update daily 20170904
This commit is contained in:
parent
740b65f884
commit
5f73fe0599
93
main.py
93
main.py
|
@ -8,8 +8,11 @@ import keras as kr
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from keras.models import Sequential
|
from keras.models import Sequential
|
||||||
from keras.layers import Dense, Dropout, Flatten # ,Input,LSTM,Convolution1D,MaxPooling1D,Merge
|
from keras.layers import Dense, Dropout, Input # , Flatten,LSTM,Convolution1D,MaxPooling1D,Merge
|
||||||
from keras.layers import Conv1D,LSTM,MaxPooling1D,Merge # Conv2D, MaxPooling2D,Conv1D
|
from keras.layers import Conv1D,LSTM,MaxPooling1D, Lambda #, Merge, Conv2D, MaxPooling2D,Conv1D
|
||||||
|
from keras import backend as K
|
||||||
|
|
||||||
|
from readdata import DataSpeech
|
||||||
|
|
||||||
class ModelSpeech(): # 语音模型类
|
class ModelSpeech(): # 语音模型类
|
||||||
def __init__(self,MS_EMBED_SIZE = 64,BATCH_SIZE = 32):
|
def __init__(self,MS_EMBED_SIZE = 64,BATCH_SIZE = 32):
|
||||||
|
@ -22,24 +25,52 @@ class ModelSpeech(): # 语音模型类
|
||||||
|
|
||||||
def CreateModel(self):
|
def CreateModel(self):
|
||||||
'''
|
'''
|
||||||
定义LSTM/CNN模型,尚未完成
|
定义CNN/LSTM/CTC模型,使用函数式模型
|
||||||
|
输入层:39维的特征值序列,一条语音数据的最大长度设为1500(大约15s)
|
||||||
|
隐藏层一:1024个神经元的卷积层
|
||||||
|
隐藏层二:池化层,池化窗口大小为2
|
||||||
|
隐藏层三:Dropout层,需要断开的神经元的比例为0.3,防止过拟合
|
||||||
|
隐藏层四:循环层、LSTM层
|
||||||
|
隐藏层五:Dropout层,需要断开的神经元的比例为0.3,防止过拟合
|
||||||
|
输出层:全连接层,神经元数量为1279,使用softmax作为激活函数,使用CTC的loss作为损失函数
|
||||||
'''
|
'''
|
||||||
_model = Sequential()
|
# 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500
|
||||||
_model.add(LSTM(self.MS_EMBED_SIZE, return_sequences=True, input_shape = (200,400))) # input_shape需要修改
|
layer_input = Input((1500,39))
|
||||||
_model.add(Dropout(0.3))
|
|
||||||
_model.add(Conv1D(self.QA_EMBED_SIZE // 2, 5, border_mode="valid"))
|
|
||||||
_model.add(MaxPooling1D(pool_length=2, border_mode="valid"))
|
|
||||||
_model.add(Dropout(0.3))
|
|
||||||
_model.add(Flatten())
|
|
||||||
|
|
||||||
|
layer_h1 = Conv1D(256, 5, use_bias=True, padding="valid")(layer_input) # 卷积层
|
||||||
|
layer_h2 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h1) # 池化层
|
||||||
|
layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合
|
||||||
|
layer_h4 = LSTM(256, activation='relu', use_bias=True)(layer_h3) # LSTM层
|
||||||
|
layer_h5 = Dropout(0.2)(layer_h4) # 随机中断部分神经网络连接,防止过拟合
|
||||||
|
layer_h6 = Dense(1279, activation="softmax")(layer_h5) # 全连接层
|
||||||
|
|
||||||
|
#labels = Input(name='the_labels', shape=[60], dtype='float32')
|
||||||
|
layer_out = Lambda(ctc_lambda_func,output_shape=(1279,), name='ctc')(layer_h6) # CTC
|
||||||
|
_model = Model(inputs = layer_input, outputs = layer_out)
|
||||||
|
|
||||||
#_model = Sequential()
|
#_model = Sequential()
|
||||||
#_model.add(Merge([m_lstm, aenc], mode="concat", concat_axis=-1))
|
|
||||||
_model.add(Dense(1279, activation="softmax"))
|
#_model.add(Conv1D(256, 5,input_shape=(1500,39), use_bias=True, padding="valid"))
|
||||||
_model.compile(optimizer="adam", loss='categorical_crossentropy',metrics=["accuracy"])
|
#_model.add(MaxPooling1D(pool_size=2, strides=None, padding="valid"))
|
||||||
|
#_model.add(Dropout(0.3)) # 随机中断部分神经网络连接
|
||||||
|
|
||||||
|
#_model.add(LSTM(256, activation='relu', use_bias=True))
|
||||||
|
#_model.add(Dropout(0.3)) # 随机中断部分神经网络连接
|
||||||
|
|
||||||
|
#_model.add(Dense(1279, activation="softmax"))
|
||||||
|
##_model.add(Lambda(ctc_lambda_func,output_shape=(1,),name='ctc'))
|
||||||
|
|
||||||
|
#_model.compile(optimizer="sgd", loss='categorical_crossentropy',metrics=["accuracy"])
|
||||||
|
_model.compile(optimizer="sgd", loss='ctc',metrics=["accuracy"])
|
||||||
return _model
|
return _model
|
||||||
|
|
||||||
|
def ctc_lambda_func(args):
|
||||||
|
#labels, y_pred, input_length, label_length = args
|
||||||
|
y_pred = args
|
||||||
|
#y_pred = y_pred[:, 2:, :]
|
||||||
|
return K.ctc_decode(y_pred,1279)
|
||||||
|
#return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
|
||||||
|
|
||||||
def TrainModel(self,datapath,epoch = 2,save_step=1000,filename='model_speech/LSTM_CNN_model'):
|
def TrainModel(self,datapath,epoch = 2,save_step=1000,filename='model_speech/LSTM_CNN_model'):
|
||||||
'''
|
'''
|
||||||
训练模型
|
训练模型
|
||||||
|
@ -49,10 +80,26 @@ class ModelSpeech(): # 语音模型类
|
||||||
save_step: 每多少步保存一次模型
|
save_step: 每多少步保存一次模型
|
||||||
filename: 默认保存文件名,不含文件后缀名
|
filename: 默认保存文件名,不含文件后缀名
|
||||||
'''
|
'''
|
||||||
for epoch in range(epoch):
|
data=DataSpeech(datapath)
|
||||||
pass
|
data.LoadDataList('train')
|
||||||
pass
|
num_data=DataSpeech.GetDataNum() # 获取数据的数量
|
||||||
|
for epoch in range(epoch): # 迭代轮数
|
||||||
|
n_step = 0 # 迭代数据数
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
data_input, data_label = data.GetData(n_step) # 读数据
|
||||||
|
|
||||||
|
pass
|
||||||
|
# 需要写一个生成器函数
|
||||||
|
self._model.fit_generator(yielddatas, save_step)
|
||||||
|
n_step += 1
|
||||||
|
except StopIteration:
|
||||||
|
print('[error] generator error. please check data format.')
|
||||||
|
break
|
||||||
|
|
||||||
|
self.SaveModel(comment='_e_'+str(epoch)+'_step_'+str(n_step))
|
||||||
|
|
||||||
|
|
||||||
def LoadModel(self,filename='model_speech/LSTM_CNN_model.model'):
|
def LoadModel(self,filename='model_speech/LSTM_CNN_model.model'):
|
||||||
'''
|
'''
|
||||||
加载模型参数
|
加载模型参数
|
||||||
|
@ -71,13 +118,21 @@ class ModelSpeech(): # 语音模型类
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def Predict(self,x):
|
||||||
|
'''
|
||||||
|
预测结果
|
||||||
|
'''
|
||||||
|
r = predict_on_batch(x)
|
||||||
|
return r
|
||||||
|
pass
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def model(self):
|
def model(self):
|
||||||
'''
|
'''
|
||||||
返回keras model
|
返回keras model
|
||||||
'''
|
'''
|
||||||
return self._model
|
return self._model
|
||||||
|
|
||||||
|
|
||||||
if(__name__=='__main__'):
|
if(__name__=='__main__'):
|
||||||
pass
|
pass
|
||||||
|
|
14
readdata.py
14
readdata.py
|
@ -93,8 +93,18 @@ class DataSpeech():
|
||||||
v=self.NumToVector(n)
|
v=self.NumToVector(n)
|
||||||
feat_out.append(v)
|
feat_out.append(v)
|
||||||
# 返回值分别是mfcc特征向量的矩阵及其一阶差分和二阶差分矩阵,以及对应的拼音符号矩阵
|
# 返回值分别是mfcc特征向量的矩阵及其一阶差分和二阶差分矩阵,以及对应的拼音符号矩阵
|
||||||
return feat_mfcc,feat_mfcc_d,feat_mfcc_dd,np.array(feat_out)
|
data_input = np.column_stack((feat_mfcc, feat_mfcc_d, feat_mfcc_dd))
|
||||||
|
data_label = np.array(feat_out)
|
||||||
|
return data_input, data_label
|
||||||
|
|
||||||
|
def data_genetator(self, data_input, data_label):
|
||||||
|
'''
|
||||||
|
数据生成器函数,用于Keras的generator_fit训练
|
||||||
|
输入GetData函数产生的输出
|
||||||
|
'''
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
def GetSymbolList(self):
|
def GetSymbolList(self):
|
||||||
'''
|
'''
|
||||||
加载拼音符号列表,用于标记符号
|
加载拼音符号列表,用于标记符号
|
||||||
|
@ -109,7 +119,7 @@ class DataSpeech():
|
||||||
txt_l=i.split('\t')
|
txt_l=i.split('\t')
|
||||||
list_symbol.append(txt_l[0])
|
list_symbol.append(txt_l[0])
|
||||||
txt_obj.close()
|
txt_obj.close()
|
||||||
list_symbol.append(' ')
|
list_symbol.append('_')
|
||||||
return list_symbol
|
return list_symbol
|
||||||
|
|
||||||
def SymbolToNum(self,symbol):
|
def SymbolToNum(self,symbol):
|
||||||
|
|
Loading…
Reference in New Issue