一些小修改
This commit is contained in:
parent
618835d3bf
commit
5b127ac63c
|
@ -7,7 +7,7 @@
|
|||
|
||||
This project uses keras, TensorFlow based on LSTM, CNN and CTC to implement.
|
||||
|
||||
本项目目前已经可以进行训练了,不过训练时loss一直高居不下。
|
||||
本项目目前已经可以正常进行训练了,现在的这几个神经网络模型正在准备评估哪一个模型的效果最好。
|
||||
|
||||
本项目运行请执行:
|
||||
```shell
|
||||
|
|
|
@ -33,7 +33,7 @@ class ModelSpeech(): # 语音模型类
|
|||
def __init__(self, datapath):
|
||||
'''
|
||||
初始化
|
||||
默认输出的拼音的表示大小是1283,即1282个拼音+1个空白块
|
||||
默认输出的拼音的表示大小是1417,即1416个拼音+1个空白块
|
||||
'''
|
||||
MS_OUTPUT_SIZE = 1417
|
||||
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
|
||||
|
@ -62,37 +62,37 @@ class ModelSpeech(): # 语音模型类
|
|||
# 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500
|
||||
input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH))
|
||||
|
||||
layer_h1_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, padding="valid")(input_data) # 卷积层
|
||||
layer_h1_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, kernel_initializer='he_normal', padding="same")(input_data) # 卷积层
|
||||
#layer_h1_a = Activation('relu', name='relu0')(layer_h1_c)
|
||||
layer_h1_a = LeakyReLU(alpha=0.3)(layer_h1_c) # 高级激活层
|
||||
layer_h1 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h1_a) # 池化层
|
||||
|
||||
layer_h2 = BatchNormalization()(layer_h1)
|
||||
|
||||
layer_h3_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, padding="valid")(layer_h2) # 卷积层
|
||||
layer_h3_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, kernel_initializer='he_normal', padding="same")(layer_h2) # 卷积层
|
||||
layer_h3_a = LeakyReLU(alpha=0.3)(layer_h3_c) # 高级激活层
|
||||
#layer_h3_a = Activation('relu', name='relu1')(layer_h3_c)
|
||||
layer_h3 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h3_a) # 池化层
|
||||
|
||||
layer_h4 = Dropout(0.1)(layer_h3) # 随机中断部分神经网络连接,防止过拟合
|
||||
|
||||
layer_h5 = Dense(256, use_bias=True, activation="softmax")(layer_h4) # 全连接层
|
||||
layer_h6 = Dense(256, use_bias=True, activation="softmax")(layer_h5) # 全连接层
|
||||
layer_h5 = Dense(256, use_bias=True, activation="relu", kernel_initializer='he_normal')(layer_h4) # 全连接层
|
||||
layer_h6 = Dense(256, use_bias=True, activation="relu", kernel_initializer='he_normal')(layer_h5) # 全连接层
|
||||
#layer_h4 = Activation('softmax', name='softmax0')(layer_h4_d1)
|
||||
|
||||
layer_h7 = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h6) # LSTM层
|
||||
layer_h8 = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h7) # LSTM层
|
||||
layer_h9 = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h8) # LSTM层
|
||||
layer_h10 = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h9) # LSTM层
|
||||
layer_h7 = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h6) # LSTM层
|
||||
layer_h8 = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h7) # LSTM层
|
||||
layer_h9 = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h8) # LSTM层
|
||||
layer_h10 = LSTM(256, activation='tanh', use_bias=True, return_sequences=True, kernel_initializer='he_normal')(layer_h9) # LSTM层
|
||||
#layer_h10 = Activation('softmax', name='softmax1')(layer_h9)
|
||||
|
||||
layer_h10_dropout = Dropout(0.1)(layer_h10) # 随机中断部分神经网络连接,防止过拟合
|
||||
|
||||
layer_h11 = Dense(512, use_bias=True, activation="softmax")(layer_h10_dropout) # 全连接层
|
||||
layer_h12 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, activation="softmax")(layer_h11) # 全连接层
|
||||
layer_h11 = Dense(512, use_bias=True, activation="relu", kernel_initializer='he_normal')(layer_h10_dropout) # 全连接层
|
||||
layer_h12 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layer_h11) # 全连接层
|
||||
#layer_h6 = Dense(1283, activation="softmax")(layer_h5) # 全连接层
|
||||
|
||||
y_pred = Activation('softmax', name='softmax2')(layer_h12)
|
||||
y_pred = Activation('softmax', name='softmax')(layer_h12)
|
||||
model_data = Model(inputs = input_data, outputs = y_pred)
|
||||
#model_data.summary()
|
||||
|
||||
|
|
2
log.md
2
log.md
|
@ -8,6 +8,8 @@
|
|||
如果有什么问题,可以在这里直接写出来
|
||||
|
||||
## Log
|
||||
### 2018-04-08
|
||||
经过连续几天的不懈努力,loss终于可以下降了。原因竟然是模型的权重参数初始化有问题,直接导致了梯度的消失,以至于难以训练,loss迟迟下不来,一直欠拟合。调参的第一坑...
|
||||
### 2018-04-05
|
||||
将之前的模型做了修改,并且,想用图像的方式试试效果。现在对于loss下不来acc上不去这个问题很头大。
|
||||
### 2018-03-30
|
||||
|
|
|
@ -197,7 +197,7 @@ class DataSpeech():
|
|||
labels = []
|
||||
for i in range(0,batch_size):
|
||||
#input_length.append([1500])
|
||||
labels.append([1e-12]) # 最终的ctc loss结果,0代表着没有ctc上的loss
|
||||
labels.append([0]) # 最终的ctc loss结果,0代表着没有ctc上的loss
|
||||
|
||||
|
||||
|
||||
|
@ -217,7 +217,7 @@ class DataSpeech():
|
|||
|
||||
#input_length.append(data_input.shape[1] // 4 - 2)
|
||||
#print(data_input.shape[0],len(data_input))
|
||||
input_length.append(data_input.shape[0] // 4 - 3)
|
||||
input_length.append(data_input.shape[0] // 4)
|
||||
#print(data_input, data_labels)
|
||||
#print('data_input长度:',len(data_input))
|
||||
|
||||
|
|
|
@ -193,7 +193,7 @@ class DataSpeech():
|
|||
for i in range(batch_size):
|
||||
data_input, data_labels = self.GetData((ran_num + i) % self.DataNum) # 从随机数开始连续向后取一定数量数据
|
||||
|
||||
input_length.append(data_input.shape[0] // 4 - 2)
|
||||
input_length.append(data_input.shape[0] // 4)
|
||||
#print(data_input, data_labels)
|
||||
#print('data_input长度:',len(data_input))
|
||||
|
||||
|
|
Loading…
Reference in New Issue