diff --git a/SpeechModel22.py b/SpeechModel22.py index e9b7c83..9afea1d 100644 --- a/SpeechModel22.py +++ b/SpeechModel22.py @@ -34,7 +34,7 @@ class ModelSpeech(): # 语音模型类 初始化 默认输出的拼音的表示大小是1283,即1282个拼音+1个空白块 ''' - MS_OUTPUT_SIZE = 1417 + MS_OUTPUT_SIZE = 1419 self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 #self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch self.label_max_string_length = 64 @@ -87,7 +87,7 @@ class ModelSpeech(): # 语音模型类 #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 #layer_h6 = Dropout(0.2)(layer_h5) # 随机中断部分神经网络连接,防止过拟合 layer_h8 = Dense(256, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h7) # 全连接层 - layer_h9 = Dense(1417, use_bias=True, kernel_initializer='he_normal')(layer_h8) # 全连接层 + layer_h9 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layer_h8) # 全连接层 y_pred = Activation('softmax', name='Activation0')(layer_h9) model_data = Model(inputs = input_data, outputs = y_pred) diff --git a/dict.txt b/dict.txt index 95334be..1a5227b 100644 --- a/dict.txt +++ b/dict.txt @@ -703,6 +703,7 @@ nin2 您 ning2 柠狞凝宁拧咛 ning3 拧 ning4 拧泞宁 +ni1 妞 niu2 牛 niu3 扭钮纽 nin4 拗 diff --git a/readdata22_2.py b/readdata22_2.py index 8b91ed5..6f1e9d6 100644 --- a/readdata22_2.py +++ b/readdata22_2.py @@ -74,9 +74,9 @@ class DataSpeech(): filename_symbollist_thchs30 = 'thchs30' + self.slash + 'train.syllable.txt' filename_symbollist_stcmds = 'st-cmds' + self.slash + 'train.syllable.txt' elif(self.type=='dev'): - filename_wavlist_thchs30 = 'thchs30' + self.slash + 'dev.wav.lst' + filename_wavlist_thchs30 = 'thchs30' + self.slash + 'cv.wav.lst' filename_wavlist_stcmds = 'st-cmds' + self.slash + 'dev.wav.txt' - filename_symbollist_thchs30 = 'thchs30' + self.slash + 'dev.syllable.txt' + filename_symbollist_thchs30 = 'thchs30' + self.slash + 'cv.syllable.txt' filename_symbollist_stcmds = 'st-cmds' + self.slash + 'dev.syllable.txt' elif(self.type=='test'): filename_wavlist_thchs30 = 'thchs30' + self.slash + 'test.wav.lst'