From cbbb8ba99fae7347029fdff793ff4ba54121eee1 Mon Sep 17 00:00:00 2001 From: nl <3210346136@qq.com> Date: Sat, 20 Nov 2021 20:32:09 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=88=87=E6=8D=A2=E5=A3=B0=E5=AD=A6?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- asrserver.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/asrserver.py b/asrserver.py index e4d8cd9..a332912 100644 --- a/asrserver.py +++ b/asrserver.py @@ -20,21 +20,29 @@ """ @author: nl8590687 -语音识别API的HTTP服务器程序 +ASRT语音识别API的HTTP服务器程序 """ import http.server import urllib import socket -import keras -from SpeechModel251 import ModelSpeech -from LanguageModel import ModelLanguage +from speech_model import ModelSpeech +from speech_model_zoo import SpeechModel251 +from speech_features import Spectrogram +from LanguageModel2 import ModelLanguage - -datapath = './' -modelpath = 'model_speech/' -ms = ModelSpeech(datapath) -ms.LoadModel(modelpath + 'm251/speech_model251_e_0_step_12000.h5') +audio_length = 1600 +audio_feature_length = 200 +channels = 1 +# 默认输出的拼音的表示大小是1428,即1427个拼音+1个空白块 +output_size = 1428 +sm251 = SpeechModel251( + input_shape=(audio_length, audio_feature_length, channels), + output_size=output_size + ) +feat = Spectrogram() +ms = ModelSpeech(sm251, feat, max_label_length=64) +ms.load_model('save_models/' + sm251.get_model_name() + '.h5') ml = ModelLanguage('model_language') ml.LoadModel() @@ -114,7 +122,7 @@ class ASRTHTTPHandle(http.server.BaseHTTPRequestHandler): def recognize(self, wavs, fs): r='' try: - r_speech = ms.RecognizeSpeech(wavs, fs) + r_speech = ms.recognize_speech(wavs, fs) print(r_speech) str_pinyin = r_speech r = ml.SpeechToText(str_pinyin)