feat: 切换默认声学模型到m251bn

This commit is contained in:
ailemon 2022-03-27 21:47:12 +08:00
parent f19b20702b
commit 087f51f8b4
7 changed files with 36 additions and 28 deletions

View File

@ -101,7 +101,7 @@ $ python3 client_http.py
请注意开启API服务器之后需要使用本ASRT项目对应的客户端软件来进行语音识别详见Wiki文档[下载ASRT语音识别客户端SDK和Demo](https://wiki.ailemon.net/docs/asrt-doc/download)。
如果要训练和使用非251版模型请在代码中 `import speech_model_zoo` 的相应位置做修改。
如果要训练和使用非251bn版模型,请在代码中 `import speech_model_zoo` 的相应位置做修改。
使用docker直接部署ASRT
```shell

View File

@ -97,7 +97,7 @@ To test whether it is successful or not that calls api service interface:
$ python3 client_http.py
```
If you want to train and use other model(not Model 251), make changes in the corresponding position of the `import speech_model_zoo` in the code files.
If you want to train and use other model(not Model 251bn), make changes in the corresponding position of the `import speech_model_zoo` in the code files.
If there is any problem during the execution of the program or during use, it can be promptly put forward in the issue, and I will reply as soon as possible.

View File

@ -26,7 +26,7 @@ ASRT语音识别API的HTTP服务器程序
import http.server
import socket
from speech_model import ModelSpeech
from speech_model_zoo import SpeechModel251
from speech_model_zoo import SpeechModel251BN
from speech_features import Spectrogram
from LanguageModel2 import ModelLanguage
@ -35,13 +35,13 @@ AUDIO_FEATURE_LENGTH = 200
CHANNELS = 1
# 默认输出的拼音的表示大小是1428即1427个拼音+1个空白块
OUTPUT_SIZE = 1428
sm251 = SpeechModel251(
sm251bn = SpeechModel251BN(
input_shape=(AUDIO_LENGTH, AUDIO_FEATURE_LENGTH, CHANNELS),
output_size=OUTPUT_SIZE
)
feat = Spectrogram()
ms = ModelSpeech(sm251, feat, max_label_length=64)
ms.load_model('save_models/' + sm251.get_model_name() + '.model.h5')
ms = ModelSpeech(sm251bn, feat, max_label_length=64)
ms.load_model('save_models/' + sm251bn.get_model_name() + '.model.h5')
ml = ModelLanguage('model_language')
ml.LoadModel()

View File

@ -23,12 +23,13 @@
ASRT语音识别基于HTTP协议的API服务器程序
"""
import argparse
import base64
import json
from flask import Flask, Response, request
from speech_model import ModelSpeech
from speech_model_zoo import SpeechModel251
from speech_model_zoo import SpeechModel251BN
from speech_features import Spectrogram
from LanguageModel2 import ModelLanguage
from utils.ops import decode_wav_bytes
@ -36,10 +37,15 @@ from utils.ops import decode_wav_bytes
API_STATUS_CODE_OK = 200000 # OK
API_STATUS_CODE_CLIENT_ERROR = 400000
API_STATUS_CODE_CLIENT_ERROR_FORMAT = 400001 # 请求数据格式错误
API_STATUS_CODE_CLIENT_ERROR_FORMAT = 400002 # 请求数据配置不支持
API_STATUS_CODE_CLIENT_ERROR_CONFIG = 400002 # 请求数据配置不支持
API_STATUS_CODE_SERVER_ERROR = 500000
API_STATUS_CODE_SERVER_ERROR_RUNNING = 500001 # 服务器运行中出错
parser = argparse.ArgumentParser(description='ASRT HTTP+Json RESTful API Service')
parser.add_argument('--listen', default='0.0.0.0', type=str, help='the network to listen')
parser.add_argument('--port', default='20001', type=str, help='the port to listen')
args = parser.parse_args()
app = Flask("ASRT API Service")
AUDIO_LENGTH = 1600
@ -47,13 +53,13 @@ AUDIO_FEATURE_LENGTH = 200
CHANNELS = 1
# 默认输出的拼音的表示大小是1428即1427个拼音+1个空白块
OUTPUT_SIZE = 1428
sm251 = SpeechModel251(
sm251bn = SpeechModel251BN(
input_shape=(AUDIO_LENGTH, AUDIO_FEATURE_LENGTH, CHANNELS),
output_size=OUTPUT_SIZE
)
feat = Spectrogram()
ms = ModelSpeech(sm251, feat, max_label_length=64)
ms.load_model('save_models/' + sm251.get_model_name() + '.model.h5')
ms = ModelSpeech(sm251bn, feat, max_label_length=64)
ms.load_model('save_models/' + sm251bn.get_model_name() + '.model.h5')
ml = ModelLanguage('model_language')
ml.LoadModel()
@ -149,7 +155,7 @@ def recognition_post(level):
json_data = AsrtApiResponse(API_STATUS_CODE_OK, 'all level')
json_data.result = result
buffer = json_data.to_json()
print('output:', buffer)
print('ASRT Result:', result,'output:', buffer)
return Response(buffer, mimetype='application/json')
else:
request_data = request.get_json()
@ -165,6 +171,8 @@ def recognition_post(level):
# request_data['samples'][-100:])
json_data = AsrtApiResponse(API_STATUS_CODE_SERVER_ERROR, str(except_general))
buffer = json_data.to_json()
#print("input:", request_data, "\n", "output:", buffer)
print("output:", buffer, "error:", except_general)
return Response(buffer, mimetype='application/json')
@ -173,4 +181,4 @@ if __name__ == '__main__':
#app.run(host='0.0.0.0', port=20001)
# for production env
import waitress
waitress.serve(app, host='0.0.0.0', port=20001)
waitress.serve(app, host=args.listen, port=args.port)

View File

@ -26,7 +26,7 @@
import os
from speech_model import ModelSpeech
from speech_model_zoo import SpeechModel251
from speech_model_zoo import SpeechModel251BN
from data_loader import DataLoader
from speech_features import Spectrogram
@ -37,14 +37,14 @@ AUDIO_FEATURE_LENGTH = 200
CHANNELS = 1
# 默认输出的拼音的表示大小是1428即1427个拼音+1个空白块
OUTPUT_SIZE = 1428
sm251 = SpeechModel251(
sm251bn = SpeechModel251BN(
input_shape=(AUDIO_LENGTH, AUDIO_FEATURE_LENGTH, CHANNELS),
output_size=OUTPUT_SIZE
)
feat = Spectrogram()
evalue_data = DataLoader('dev')
ms = ModelSpeech(sm251, feat, max_label_length=64)
ms = ModelSpeech(sm251bn, feat, max_label_length=64)
ms.load_model('save_models/' + sm251.get_model_name() + '.model.h5')
ms.load_model('save_models/' + sm251bn.get_model_name() + '.model.h5')
ms.evaluate_model(data_loader=evalue_data, data_count=-1,
out_report=True, show_ratio=True, show_per_step=100)

View File

@ -26,7 +26,7 @@
import os
from speech_model import ModelSpeech
from speech_model_zoo import SpeechModel251
from speech_model_zoo import SpeechModel251BN
from speech_features import Spectrogram
from LanguageModel2 import ModelLanguage
@ -37,14 +37,14 @@ AUDIO_FEATURE_LENGTH = 200
CHANNELS = 1
# 默认输出的拼音的表示大小是1428即1427个拼音+1个空白块
OUTPUT_SIZE = 1428
sm251 = SpeechModel251(
sm251bn = SpeechModel251BN(
input_shape=(AUDIO_LENGTH, AUDIO_FEATURE_LENGTH, CHANNELS),
output_size=OUTPUT_SIZE
)
feat = Spectrogram()
ms = ModelSpeech(sm251, feat, max_label_length=64)
ms = ModelSpeech(sm251bn, feat, max_label_length=64)
ms.load_model('save_models/' + sm251.get_model_name() + '.model.h5')
ms.load_model('save_models/' + sm251bn.get_model_name() + '.model.h5')
res = ms.recognize_speech_from_file('filename.wav')
print('*[提示] 声学模型语音识别结果:\n', res)

View File

@ -28,9 +28,9 @@ import os
from tensorflow.keras.optimizers import Adam
from speech_model import ModelSpeech
from speech_model_zoo import SpeechModel251
from speech_model_zoo import SpeechModel251BN
from data_loader import DataLoader
from speech_features import Spectrogram
from speech_features import SpecAugment
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
@ -39,16 +39,16 @@ AUDIO_FEATURE_LENGTH = 200
CHANNELS = 1
# 默认输出的拼音的表示大小是1428即1427个拼音+1个空白块
OUTPUT_SIZE = 1428
sm251 = SpeechModel251(
sm251bn = SpeechModel251BN(
input_shape=(AUDIO_LENGTH, AUDIO_FEATURE_LENGTH, CHANNELS),
output_size=OUTPUT_SIZE
)
feat = Spectrogram()
feat = SpecAugment()
train_data = DataLoader('train')
opt = Adam(lr = 0.0001, beta_1 = 0.9, beta_2 = 0.999, decay = 0.0, epsilon = 10e-8)
ms = ModelSpeech(sm251, feat, max_label_length=64)
ms = ModelSpeech(sm251bn, feat, max_label_length=64)
#ms.load_model('save_models/' + sm251.get_model_name() + '.model.h5')
#ms.load_model('save_models/' + sm251bn.get_model_name() + '.model.h5')
ms.train_model(optimizer=opt, data_loader=train_data,
epochs=50, save_step=1, batch_size=16, last_epoch=0)
ms.save_model('save_models/' + sm251.get_model_name())
ms.save_model('save_models/' + sm251bn.get_model_name())