add GetFreqFeat4 and update readme

This commit is contained in:
nl 2019-03-18 14:28:54 +08:00
parent 4ef5fa32e2
commit ccc723f644
3 changed files with 35 additions and 2 deletions

View File

@ -81,7 +81,7 @@ CNN + LSTM/GRU + CTC
* 关于下载已经训练好的模型的问题
可以在Github本仓库下[releases](https://github.com/nl8590687/ASRT_SpeechRecognition/releases)里面的查看发布的各个版本软件的压缩包里获得完整源程序。
可以在Github本仓库下[releases](https://github.com/nl8590687/ASRT_SpeechRecognition/releases)里面的查看发布的各个版本软件的压缩包里获得包含已经训练好模型参数的完整源程序。
### Language Model 语言模型

View File

@ -83,7 +83,7 @@ The maximum length of the input audio is 16 seconds, and the output is the corre
* Questions about downloading trained models
The complete source program can be obtained from the archives of the various versions of the software released in the [releases](https://github.com/nl8590687/ASRT_SpeechRecognition/releases) page of Github.
The complete source program that includes trained model weights can be obtained from the archives of the various versions of the software released in the [releases](https://github.com/nl8590687/ASRT_SpeechRecognition/releases) page of Github.
### Language Model

View File

@ -130,6 +130,39 @@ def GetFrequencyFeature3(wavsignal, fs):
data_line = np.abs(fft(data_line)) / wav_length
data_input[i]=data_line[0:200] # 设置为400除以2的值即200是取一半数据因为是对称的
#print(data_input.shape)
data_input = np.log(data_input + 1)
return data_input
def GetFrequencyFeature4(wavsignal, fs):
'''
主要是用来修正3版的bug
'''
# wav波形 加时间窗以及时移10ms
time_window = 25 # 单位ms
window_length = fs / 1000 * time_window # 计算窗长度的公式目前全部为400固定值
wav_arr = np.array(wavsignal)
#wav_length = len(wavsignal[0])
wav_length = wav_arr.shape[1]
range0_end = int(len(wavsignal[0])/fs*1000 - time_window) // 10 + 1 # 计算循环终止的位置,也就是最终生成的窗数
data_input = np.zeros((range0_end, 200), dtype = np.float) # 用于存放最终的频率特征数据
data_line = np.zeros((1, 400), dtype = np.float)
for i in range(0, range0_end):
p_start = i * 160
p_end = p_start + 400
data_line = wav_arr[0, p_start:p_end]
data_line = data_line * w # 加窗
data_line = np.abs(fft(data_line)) / wav_length
data_input[i]=data_line[0:200] # 设置为400除以2的值即200是取一半数据因为是对称的
#print(data_input.shape)