modify model 1

This commit is contained in:
nl8590687 2018-04-12 09:31:32 +08:00
parent bd142eaeb1
commit 2b082b878d
2 changed files with 13 additions and 8 deletions

View File

@ -80,14 +80,19 @@ class ModelSpeech(): # 语音模型类
layer_h1_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, kernel_initializer='he_normal', padding="same")(input_data) # 卷积层
#layer_h1_a = Activation('relu', name='relu0')(layer_h1_c)
layer_h1_a = LeakyReLU(alpha=0.3)(layer_h1_c) # 高级激活层
layer_h1 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h1_a) # 池化层
layer_h1_cc = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, kernel_initializer='he_normal', padding="same")(layer_h1_a) # 卷积层
#layer_h1_a = Activation('relu', name='relu0')(layer_h1_c)
layer_h1_aa = LeakyReLU(alpha=0.3)(layer_h1_cc) # 高级激活层
layer_h1 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h1_aa) # 池化层
layer_h2 = BatchNormalization()(layer_h1)
layer_h3_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, kernel_initializer='he_normal', padding="same")(layer_h2) # 卷积层
layer_h3_a = LeakyReLU(alpha=0.3)(layer_h3_c) # 高级激活层
layer_h3_cc = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, kernel_initializer='he_normal', padding="same")(layer_h3_a) # 卷积层
layer_h3_aa = LeakyReLU(alpha=0.3)(layer_h3_cc) # 高级激活层
#layer_h3_a = Activation('relu', name='relu1')(layer_h3_c)
layer_h3 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h3_a) # 池化层
layer_h3 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h3_aa) # 池化层
layer_h4 = Dropout(0.1)(layer_h3) # 随机中断部分神经网络连接,防止过拟合
@ -97,8 +102,8 @@ class ModelSpeech(): # 语音模型类
layer_h7 = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h6) # LSTM层
layer_h8 = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h7) # LSTM层
layer_h9 = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h8) # LSTM层
layer_h10 = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h9) # LSTM层
#layer_h9 = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h8) # LSTM层
#layer_h10 = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h9) # LSTM层
#layer_h10 = Activation('softmax', name='softmax1')(layer_h9)
layer_h10_dropout = Dropout(0.1)(layer_h10) # 随机中断部分神经网络连接,防止过拟合
@ -344,8 +349,8 @@ if(__name__=='__main__'):
ms = ModelSpeech(datapath)
ms.LoadModel(modelpath + 'm1\\speech_model_e_1_step_100.model')
#ms.TrainModel(datapath, epoch = 2, batch_size = 8, save_step = 1)
ms.TestModel(datapath, str_dataset='dev', data_count = 8)
#ms.LoadModel(modelpath + 'm1\\speech_model_e_1_step_100.model')
ms.TrainModel(datapath, epoch = 2, batch_size = 8, save_step = 1)
#ms.TestModel(datapath, str_dataset='dev', data_count = 8)
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav')
#print('*[提示] 语音识别结果:\n',r)

View File

@ -188,7 +188,7 @@ class ModelSpeech(): # 语音模型类
#data.LoadDataList('dev')
# 获取输入特征
#data_input = data.GetMfccFeature(wavsignal, fs)
data_input = data.GetFrequencyFeature(wavsignal, fs)
data_input = GetFrequencyFeature(wavsignal, fs)
input_length = len(data_input)
input_length = input_length // 4