一些小修改

This commit is contained in:
nl8590687 2018-04-08 22:44:14 +08:00
parent 618835d3bf
commit 5b127ac63c
5 changed files with 18 additions and 16 deletions

View File

@ -7,7 +7,7 @@
This project uses keras, TensorFlow based on LSTM, CNN and CTC to implement. This project uses keras, TensorFlow based on LSTM, CNN and CTC to implement.
本项目目前已经可以进行训练了不过训练时loss一直高居不下 本项目目前已经可以正常进行训练了,现在的这几个神经网络模型正在准备评估哪一个模型的效果最好
本项目运行请执行: 本项目运行请执行:
```shell ```shell

View File

@ -33,7 +33,7 @@ class ModelSpeech(): # 语音模型类
def __init__(self, datapath): def __init__(self, datapath):
''' '''
初始化 初始化
默认输出的拼音的表示大小是1283即1282个拼音+1个空白块 默认输出的拼音的表示大小是1417即1416个拼音+1个空白块
''' '''
MS_OUTPUT_SIZE = 1417 MS_OUTPUT_SIZE = 1417
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
@ -62,37 +62,37 @@ class ModelSpeech(): # 语音模型类
# 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示最大信号序列长度为1500 # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示最大信号序列长度为1500
input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH)) input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH))
layer_h1_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, padding="valid")(input_data) # 卷积层 layer_h1_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, kernel_initializer='he_normal', padding="same")(input_data) # 卷积层
#layer_h1_a = Activation('relu', name='relu0')(layer_h1_c) #layer_h1_a = Activation('relu', name='relu0')(layer_h1_c)
layer_h1_a = LeakyReLU(alpha=0.3)(layer_h1_c) # 高级激活层 layer_h1_a = LeakyReLU(alpha=0.3)(layer_h1_c) # 高级激活层
layer_h1 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h1_a) # 池化层 layer_h1 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h1_a) # 池化层
layer_h2 = BatchNormalization()(layer_h1) layer_h2 = BatchNormalization()(layer_h1)
layer_h3_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, padding="valid")(layer_h2) # 卷积层 layer_h3_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, kernel_initializer='he_normal', padding="same")(layer_h2) # 卷积层
layer_h3_a = LeakyReLU(alpha=0.3)(layer_h3_c) # 高级激活层 layer_h3_a = LeakyReLU(alpha=0.3)(layer_h3_c) # 高级激活层
#layer_h3_a = Activation('relu', name='relu1')(layer_h3_c) #layer_h3_a = Activation('relu', name='relu1')(layer_h3_c)
layer_h3 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h3_a) # 池化层 layer_h3 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h3_a) # 池化层
layer_h4 = Dropout(0.1)(layer_h3) # 随机中断部分神经网络连接,防止过拟合 layer_h4 = Dropout(0.1)(layer_h3) # 随机中断部分神经网络连接,防止过拟合
layer_h5 = Dense(256, use_bias=True, activation="softmax")(layer_h4) # 全连接层 layer_h5 = Dense(256, use_bias=True, activation="relu", kernel_initializer='he_normal')(layer_h4) # 全连接层
layer_h6 = Dense(256, use_bias=True, activation="softmax")(layer_h5) # 全连接层 layer_h6 = Dense(256, use_bias=True, activation="relu", kernel_initializer='he_normal')(layer_h5) # 全连接层
#layer_h4 = Activation('softmax', name='softmax0')(layer_h4_d1) #layer_h4 = Activation('softmax', name='softmax0')(layer_h4_d1)
layer_h7 = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h6) # LSTM层 layer_h7 = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h6) # LSTM层
layer_h8 = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h7) # LSTM层 layer_h8 = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h7) # LSTM层
layer_h9 = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h8) # LSTM层 layer_h9 = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h8) # LSTM层
layer_h10 = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h9) # LSTM层 layer_h10 = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h9) # LSTM层
#layer_h10 = Activation('softmax', name='softmax1')(layer_h9) #layer_h10 = Activation('softmax', name='softmax1')(layer_h9)
layer_h10_dropout = Dropout(0.1)(layer_h10) # 随机中断部分神经网络连接,防止过拟合 layer_h10_dropout = Dropout(0.1)(layer_h10) # 随机中断部分神经网络连接,防止过拟合
layer_h11 = Dense(512, use_bias=True, activation="softmax")(layer_h10_dropout) # 全连接层 layer_h11 = Dense(512, use_bias=True, activation="relu", kernel_initializer='he_normal')(layer_h10_dropout) # 全连接层
layer_h12 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, activation="softmax")(layer_h11) # 全连接层 layer_h12 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layer_h11) # 全连接层
#layer_h6 = Dense(1283, activation="softmax")(layer_h5) # 全连接层 #layer_h6 = Dense(1283, activation="softmax")(layer_h5) # 全连接层
y_pred = Activation('softmax', name='softmax2')(layer_h12) y_pred = Activation('softmax', name='softmax')(layer_h12)
model_data = Model(inputs = input_data, outputs = y_pred) model_data = Model(inputs = input_data, outputs = y_pred)
#model_data.summary() #model_data.summary()

2
log.md
View File

@ -8,6 +8,8 @@
如果有什么问题,可以在这里直接写出来 如果有什么问题,可以在这里直接写出来
## Log ## Log
### 2018-04-08
经过连续几天的不懈努力loss终于可以下降了。原因竟然是模型的权重参数初始化有问题直接导致了梯度的消失以至于难以训练loss迟迟下不来一直欠拟合。调参的第一坑...
### 2018-04-05 ### 2018-04-05
将之前的模型做了修改并且想用图像的方式试试效果。现在对于loss下不来acc上不去这个问题很头大。 将之前的模型做了修改并且想用图像的方式试试效果。现在对于loss下不来acc上不去这个问题很头大。
### 2018-03-30 ### 2018-03-30

View File

@ -197,7 +197,7 @@ class DataSpeech():
labels = [] labels = []
for i in range(0,batch_size): for i in range(0,batch_size):
#input_length.append([1500]) #input_length.append([1500])
labels.append([1e-12]) # 最终的ctc loss结果0代表着没有ctc上的loss labels.append([0]) # 最终的ctc loss结果0代表着没有ctc上的loss
@ -217,7 +217,7 @@ class DataSpeech():
#input_length.append(data_input.shape[1] // 4 - 2) #input_length.append(data_input.shape[1] // 4 - 2)
#print(data_input.shape[0],len(data_input)) #print(data_input.shape[0],len(data_input))
input_length.append(data_input.shape[0] // 4 - 3) input_length.append(data_input.shape[0] // 4)
#print(data_input, data_labels) #print(data_input, data_labels)
#print('data_input长度:',len(data_input)) #print('data_input长度:',len(data_input))

View File

@ -193,7 +193,7 @@ class DataSpeech():
for i in range(batch_size): for i in range(batch_size):
data_input, data_labels = self.GetData((ran_num + i) % self.DataNum) # 从随机数开始连续向后取一定数量数据 data_input, data_labels = self.GetData((ran_num + i) % self.DataNum) # 从随机数开始连续向后取一定数量数据
input_length.append(data_input.shape[0] // 4 - 2) input_length.append(data_input.shape[0] // 4)
#print(data_input, data_labels) #print(data_input, data_labels)
#print('data_input长度:',len(data_input)) #print('data_input长度:',len(data_input))