修改了模型5的参数初始化
This commit is contained in:
parent
8b59183c00
commit
1425994db3
|
@ -116,7 +116,7 @@ class ModelSpeech(): # 语音模型类
|
|||
|
||||
|
||||
|
||||
def TrainModel(self, datapath, epoch = 2, save_step = 1000, batch_size = 32, filename = 'model_speech/LSTM_CNN_model'):
|
||||
def TrainModel(self, datapath, epoch = 2, save_step = 1000, batch_size = 32, filename = 'model_speech/speech_model2'):
|
||||
'''
|
||||
训练模型
|
||||
参数:
|
||||
|
@ -146,13 +146,13 @@ class ModelSpeech(): # 语音模型类
|
|||
self.SaveModel(comment='_e_'+str(epoch)+'_step_'+str(n_step * save_step))
|
||||
|
||||
|
||||
def LoadModel(self,filename='model_speech/LSTM_CNN_model.model'):
|
||||
def LoadModel(self,filename='model_speech/speech_model2.model'):
|
||||
'''
|
||||
加载模型参数
|
||||
'''
|
||||
self._model.load_weights(filename)
|
||||
|
||||
def SaveModel(self,filename='model_speech/LSTM_CNN_model',comment=''):
|
||||
def SaveModel(self,filename='model_speech/speech_model2',comment=''):
|
||||
'''
|
||||
保存模型参数
|
||||
'''
|
||||
|
|
|
@ -132,7 +132,7 @@ class ModelSpeech(): # 语音模型类
|
|||
#y_pred, labels, input_length, label_length = args
|
||||
y_true, y_pred = args
|
||||
#print(y_pred)
|
||||
y_pred = y_pred[:, :, 0:-2]
|
||||
y_pred = y_pred[:, 2:, :]
|
||||
#return K.ctc_decode(y_pred,self.MS_OUTPUT_SIZE)
|
||||
return K.ctc_batch_cost(y_true, y_pred, y_true.shape[1], y_pred.shape[1])
|
||||
|
||||
|
|
|
@ -321,12 +321,12 @@ class ModelSpeech(): # 语音模型类
|
|||
print(shape)
|
||||
|
||||
|
||||
#print(test_input_data)
|
||||
print('test_input_data:',test_input_data)
|
||||
y_p = self.test_func([test_input_data])
|
||||
print(type(y_p))
|
||||
print('y_p:',y_p)
|
||||
|
||||
for j in range(0,200):
|
||||
for j in range(0,0):
|
||||
mean = sum(y_p[0][0][j])/len(y_p[0][0][j])
|
||||
print('max y_p:',max(y_p[0][0][j]),'min y_p:',min(y_p[0][0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][0][j][100])
|
||||
print('argmin:',np.argmin(y_p[0][0][j]),'argmax:',np.argmax(y_p[0][0][j]))
|
||||
|
@ -338,15 +338,30 @@ class ModelSpeech(): # 语音模型类
|
|||
|
||||
|
||||
print(K.is_sparse(y_p))
|
||||
y_p = K.to_dense(y_p)
|
||||
#y_p = K.to_dense(y_p)
|
||||
print(K.is_sparse(y_p))
|
||||
|
||||
_list = []
|
||||
for i in y_p:
|
||||
list_i = []
|
||||
for j in i:
|
||||
list_j = []
|
||||
for k in j:
|
||||
list_j.append(np.argmin(k))
|
||||
list_i.append(list_j)
|
||||
_list .append(list_i)
|
||||
|
||||
#y_p = np.array(_list, dtype = np.float)
|
||||
y_p = _list
|
||||
#print(y_p,type(y_p),y_p.shape)
|
||||
#y_p = tf.sparse_to_dense(y_p,(2,397),1417,0)
|
||||
print(test_input_length.T)
|
||||
test_input_length = test_input_length.reshape(2,1)
|
||||
func_in_len = self.test_func_input_length([test_input_length])
|
||||
print(type(func_in_len))
|
||||
print(func_in_len)
|
||||
#in_len = np.ones(shape[0]) * shape[1]
|
||||
ctc_decoded = K.ctc_decode(y_p, input_length = func_in_len)
|
||||
ctc_decoded = K.ctc_decode(y_p[0][0], input_length = tf.squeeze(func_in_len[0][0][0]))
|
||||
|
||||
print(ctc_decoded)
|
||||
#ctc_decoded = ctc_decoded[0][0]
|
||||
|
|
|
@ -63,31 +63,31 @@ class ModelSpeech(): # 语音模型类
|
|||
# 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500
|
||||
input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH))
|
||||
|
||||
layer_h1_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, padding="same")(input_data) # 卷积层
|
||||
layer_h1_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, kernel_initializer='he_normal', padding="same")(input_data) # 卷积层
|
||||
#layer_h1_a = Activation('relu', name='relu0')(layer_h1_c)
|
||||
layer_h1_a = LeakyReLU(alpha=0.3)(layer_h1_c) # 高级激活层
|
||||
layer_h1 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h1_a) # 池化层
|
||||
|
||||
layer_h2 = BatchNormalization()(layer_h1)
|
||||
|
||||
layer_h3_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, padding="same")(layer_h2) # 卷积层
|
||||
layer_h3_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, kernel_initializer='he_normal', padding="same")(layer_h2) # 卷积层
|
||||
layer_h3_a = LeakyReLU(alpha=0.3)(layer_h3_c) # 高级激活层
|
||||
#layer_h3_a = Activation('relu', name='relu1')(layer_h3_c)
|
||||
layer_h3 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h3_a) # 池化层
|
||||
|
||||
layer_h4 = Dropout(0.1)(layer_h3) # 随机中断部分神经网络连接,防止过拟合
|
||||
|
||||
layer_h5 = Dense(256, use_bias=True, activation="softmax")(layer_h4) # 全连接层
|
||||
layer_h6 = Dense(256, use_bias=True, activation="softmax")(layer_h5) # 全连接层
|
||||
layer_h5 = Dense(256, use_bias=True, kernel_initializer='he_normal', activation="relu")(layer_h4) # 全连接层
|
||||
layer_h6 = Dense(256, use_bias=True, kernel_initializer='he_normal', activation="relu")(layer_h5) # 全连接层
|
||||
#layer_h4 = Activation('softmax', name='softmax0')(layer_h4_d1)
|
||||
|
||||
layer_h7a = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h6) # LSTM层
|
||||
layer_h7b = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h6) # LSTM层
|
||||
layer_h7a = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h6) # LSTM层
|
||||
layer_h7b = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, go_backwards=True, kernel_initializer='he_normal')(layer_h6) # LSTM层
|
||||
|
||||
layer_h7_merged = add([layer_h7a, layer_h7b])
|
||||
|
||||
layer_h8a = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h7_merged) # LSTM层
|
||||
layer_h8b = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h7_merged) # LSTM层
|
||||
layer_h8a = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h7_merged) # LSTM层
|
||||
layer_h8b = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, go_backwards=True, kernel_initializer='he_normal')(layer_h7_merged) # LSTM层
|
||||
|
||||
layer_h8 = concatenate([layer_h8a, layer_h8b])
|
||||
#layer_h10 = Activation('softmax', name='softmax1')(layer_h9)
|
||||
|
@ -95,7 +95,7 @@ class ModelSpeech(): # 语音模型类
|
|||
#layer_h10_dropout = Dropout(0.1)(layer_h10) # 随机中断部分神经网络连接,防止过拟合
|
||||
|
||||
#layer_h11 = Dense(512, use_bias=True, activation="softmax")(layer_h8) # 全连接层
|
||||
layer_h12 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, activation="softmax")(layer_h8) # 全连接层
|
||||
layer_h12 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layer_h8) # 全连接层
|
||||
#layer_h6 = Dense(1283, activation="softmax")(layer_h5) # 全连接层
|
||||
|
||||
y_pred = Activation('softmax', name='softmax2')(layer_h12)
|
||||
|
@ -348,7 +348,7 @@ if(__name__=='__main__'):
|
|||
ms = ModelSpeech(datapath)
|
||||
|
||||
#ms.LoadModel(modelpath + 'speech_model_e_0_step_1.model')
|
||||
ms.TrainModel(datapath, epoch = 2, batch_size = 8, save_step = 1)
|
||||
ms.TrainModel(datapath, epoch = 2, batch_size = 8, save_step = 10)
|
||||
#ms.TestModel(datapath, str_dataset='dev', data_count = 32)
|
||||
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav')
|
||||
#print('*[提示] 语音识别结果:\n',r)
|
||||
|
|
Loading…
Reference in New Issue