From 6579229d7d4850a8c2d7b610eaae3e4e06103ac9 Mon Sep 17 00:00:00 2001 From: nl <3210346136@qq.com> Date: Fri, 29 Mar 2019 14:28:01 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=8B=BC=E9=9F=B3=E5=8F=82?= =?UTF-8?q?=E6=95=B0=E5=92=8C=E5=87=A0=E6=9D=A1=E6=96=87=E4=BB=B6=E8=B7=AF?= =?UTF-8?q?=E5=BE=84=EF=BC=8C=E5=A3=B0=E5=AD=A6=E6=A8=A1=E5=9E=8B=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E8=B7=9F=E4=B9=8B=E5=89=8D=E7=89=88=E6=9C=AC=E4=B8=8D?= =?UTF-8?q?=E5=86=8D=E5=85=BC=E5=AE=B9=EF=BC=8C=E9=9C=80=E8=A6=81=E9=87=8D?= =?UTF-8?q?=E6=96=B0=E8=AE=AD=E7=BB=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- SpeechModel24.py | 4 ++-- SpeechModel25.py | 4 ++-- SpeechModel251.py | 4 ++-- SpeechModel251_p.py | 4 ++-- SpeechModel26.py | 4 ++-- test.py | 14 +++++++------- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/SpeechModel24.py b/SpeechModel24.py index d05247b..f9e125b 100644 --- a/SpeechModel24.py +++ b/SpeechModel24.py @@ -28,9 +28,9 @@ class ModelSpeech(): # 语音模型类 def __init__(self, datapath): ''' 初始化 - 默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块 + 默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块 ''' - MS_OUTPUT_SIZE = 1422 + MS_OUTPUT_SIZE = 1424 self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 #self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch self.label_max_string_length = 64 diff --git a/SpeechModel25.py b/SpeechModel25.py index 50fbca1..fe6f999 100644 --- a/SpeechModel25.py +++ b/SpeechModel25.py @@ -28,9 +28,9 @@ class ModelSpeech(): # 语音模型类 def __init__(self, datapath): ''' 初始化 - 默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块 + 默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块 ''' - MS_OUTPUT_SIZE = 1422 + MS_OUTPUT_SIZE = 1424 self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 #self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch self.label_max_string_length = 64 diff --git a/SpeechModel251.py b/SpeechModel251.py index 9bf2b03..0f2831e 100644 --- a/SpeechModel251.py +++ b/SpeechModel251.py @@ -32,9 +32,9 @@ class ModelSpeech(): # 语音模型类 def __init__(self, datapath): ''' 初始化 - 默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块 + 默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块 ''' - MS_OUTPUT_SIZE = 1422 + MS_OUTPUT_SIZE = 1424 self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 #self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch self.label_max_string_length = 64 diff --git a/SpeechModel251_p.py b/SpeechModel251_p.py index 3487652..d79f67d 100644 --- a/SpeechModel251_p.py +++ b/SpeechModel251_p.py @@ -32,9 +32,9 @@ class ModelSpeech(): # 语音模型类 def __init__(self, datapath): ''' 初始化 - 默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块 + 默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块 ''' - MS_OUTPUT_SIZE = 1422 + MS_OUTPUT_SIZE = 1424 self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 #self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch self.label_max_string_length = 64 diff --git a/SpeechModel26.py b/SpeechModel26.py index f6977d1..95097c3 100644 --- a/SpeechModel26.py +++ b/SpeechModel26.py @@ -29,9 +29,9 @@ class ModelSpeech(): # 语音模型类 def __init__(self, datapath): ''' 初始化 - 默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块 + 默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块 ''' - MS_OUTPUT_SIZE = 1422 + MS_OUTPUT_SIZE = 1424 self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 #self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch self.label_max_string_length = 64 diff --git a/test.py b/test.py index dbdcc7a..f7722b6 100644 --- a/test.py +++ b/test.py @@ -8,7 +8,7 @@ import platform as plat from SpeechModel251 import ModelSpeech -from LanguageModel import ModelLanguage +from LanguageModel2 import ModelLanguage from keras import backend as K datapath = '' @@ -16,7 +16,7 @@ modelpath = 'model_speech' system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if(system_type == 'Windows'): - datapath = 'E:\\语音数据集' + datapath = 'D:\\语音数据集' modelpath = modelpath + '\\' elif(system_type == 'Linux'): datapath = 'dataset' @@ -29,14 +29,14 @@ else: ms = ModelSpeech(datapath) #ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.model') -ms.LoadModel(modelpath + 'm251\\speech_model251_e_0_step_117000.model') +ms.LoadModel(modelpath + 'm251\\speech_model251_e_0_step_12000.model') #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav') -#r = ms.RecognizeSpeech_FromFile('E:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') -#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') -#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') -#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav') +#r = ms.RecognizeSpeech_FromFile('D:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') +#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') +#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\A11_167.WAV') +#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\D4_750.wav') K.clear_session()