更新拼音参数和几条文件路径,声学模型文件跟之前版本不再兼容,需要重新训练
This commit is contained in:
parent
07c9b3600b
commit
6579229d7d
|
@ -28,9 +28,9 @@ class ModelSpeech(): # 语音模型类
|
||||||
def __init__(self, datapath):
|
def __init__(self, datapath):
|
||||||
'''
|
'''
|
||||||
初始化
|
初始化
|
||||||
默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块
|
默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块
|
||||||
'''
|
'''
|
||||||
MS_OUTPUT_SIZE = 1422
|
MS_OUTPUT_SIZE = 1424
|
||||||
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
|
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
|
||||||
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
|
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
|
||||||
self.label_max_string_length = 64
|
self.label_max_string_length = 64
|
||||||
|
|
|
@ -28,9 +28,9 @@ class ModelSpeech(): # 语音模型类
|
||||||
def __init__(self, datapath):
|
def __init__(self, datapath):
|
||||||
'''
|
'''
|
||||||
初始化
|
初始化
|
||||||
默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块
|
默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块
|
||||||
'''
|
'''
|
||||||
MS_OUTPUT_SIZE = 1422
|
MS_OUTPUT_SIZE = 1424
|
||||||
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
|
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
|
||||||
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
|
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
|
||||||
self.label_max_string_length = 64
|
self.label_max_string_length = 64
|
||||||
|
|
|
@ -32,9 +32,9 @@ class ModelSpeech(): # 语音模型类
|
||||||
def __init__(self, datapath):
|
def __init__(self, datapath):
|
||||||
'''
|
'''
|
||||||
初始化
|
初始化
|
||||||
默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块
|
默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块
|
||||||
'''
|
'''
|
||||||
MS_OUTPUT_SIZE = 1422
|
MS_OUTPUT_SIZE = 1424
|
||||||
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
|
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
|
||||||
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
|
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
|
||||||
self.label_max_string_length = 64
|
self.label_max_string_length = 64
|
||||||
|
|
|
@ -32,9 +32,9 @@ class ModelSpeech(): # 语音模型类
|
||||||
def __init__(self, datapath):
|
def __init__(self, datapath):
|
||||||
'''
|
'''
|
||||||
初始化
|
初始化
|
||||||
默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块
|
默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块
|
||||||
'''
|
'''
|
||||||
MS_OUTPUT_SIZE = 1422
|
MS_OUTPUT_SIZE = 1424
|
||||||
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
|
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
|
||||||
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
|
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
|
||||||
self.label_max_string_length = 64
|
self.label_max_string_length = 64
|
||||||
|
|
|
@ -29,9 +29,9 @@ class ModelSpeech(): # 语音模型类
|
||||||
def __init__(self, datapath):
|
def __init__(self, datapath):
|
||||||
'''
|
'''
|
||||||
初始化
|
初始化
|
||||||
默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块
|
默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块
|
||||||
'''
|
'''
|
||||||
MS_OUTPUT_SIZE = 1422
|
MS_OUTPUT_SIZE = 1424
|
||||||
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
|
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
|
||||||
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
|
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
|
||||||
self.label_max_string_length = 64
|
self.label_max_string_length = 64
|
||||||
|
|
14
test.py
14
test.py
|
@ -8,7 +8,7 @@
|
||||||
import platform as plat
|
import platform as plat
|
||||||
|
|
||||||
from SpeechModel251 import ModelSpeech
|
from SpeechModel251 import ModelSpeech
|
||||||
from LanguageModel import ModelLanguage
|
from LanguageModel2 import ModelLanguage
|
||||||
from keras import backend as K
|
from keras import backend as K
|
||||||
|
|
||||||
datapath = ''
|
datapath = ''
|
||||||
|
@ -16,7 +16,7 @@ modelpath = 'model_speech'
|
||||||
|
|
||||||
system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断
|
system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断
|
||||||
if(system_type == 'Windows'):
|
if(system_type == 'Windows'):
|
||||||
datapath = 'E:\\语音数据集'
|
datapath = 'D:\\语音数据集'
|
||||||
modelpath = modelpath + '\\'
|
modelpath = modelpath + '\\'
|
||||||
elif(system_type == 'Linux'):
|
elif(system_type == 'Linux'):
|
||||||
datapath = 'dataset'
|
datapath = 'dataset'
|
||||||
|
@ -29,14 +29,14 @@ else:
|
||||||
ms = ModelSpeech(datapath)
|
ms = ModelSpeech(datapath)
|
||||||
|
|
||||||
#ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.model')
|
#ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.model')
|
||||||
ms.LoadModel(modelpath + 'm251\\speech_model251_e_0_step_117000.model')
|
ms.LoadModel(modelpath + 'm251\\speech_model251_e_0_step_12000.model')
|
||||||
|
|
||||||
#ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)
|
#ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)
|
||||||
r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav')
|
r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav')
|
||||||
#r = ms.RecognizeSpeech_FromFile('E:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
|
#r = ms.RecognizeSpeech_FromFile('D:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
|
||||||
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
|
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
|
||||||
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV')
|
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\A11_167.WAV')
|
||||||
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav')
|
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\D4_750.wav')
|
||||||
|
|
||||||
K.clear_session()
|
K.clear_session()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue