add GetFreqFeat4 and update readme
This commit is contained in:
parent
4ef5fa32e2
commit
ccc723f644
|
@ -81,7 +81,7 @@ CNN + LSTM/GRU + CTC
|
|||
|
||||
* 关于下载已经训练好的模型的问题
|
||||
|
||||
可以在Github本仓库下[releases](https://github.com/nl8590687/ASRT_SpeechRecognition/releases)里面的查看发布的各个版本软件的压缩包里获得完整源程序。
|
||||
可以在Github本仓库下[releases](https://github.com/nl8590687/ASRT_SpeechRecognition/releases)里面的查看发布的各个版本软件的压缩包里获得包含已经训练好模型参数的完整源程序。
|
||||
|
||||
### Language Model 语言模型
|
||||
|
||||
|
|
|
@ -83,7 +83,7 @@ The maximum length of the input audio is 16 seconds, and the output is the corre
|
|||
|
||||
* Questions about downloading trained models
|
||||
|
||||
The complete source program can be obtained from the archives of the various versions of the software released in the [releases](https://github.com/nl8590687/ASRT_SpeechRecognition/releases) page of Github.
|
||||
The complete source program that includes trained model weights can be obtained from the archives of the various versions of the software released in the [releases](https://github.com/nl8590687/ASRT_SpeechRecognition/releases) page of Github.
|
||||
|
||||
### Language Model
|
||||
|
||||
|
|
|
@ -130,6 +130,39 @@ def GetFrequencyFeature3(wavsignal, fs):
|
|||
data_line = np.abs(fft(data_line)) / wav_length
|
||||
|
||||
|
||||
data_input[i]=data_line[0:200] # 设置为400除以2的值(即200)是取一半数据,因为是对称的
|
||||
|
||||
#print(data_input.shape)
|
||||
data_input = np.log(data_input + 1)
|
||||
return data_input
|
||||
|
||||
def GetFrequencyFeature4(wavsignal, fs):
|
||||
'''
|
||||
主要是用来修正3版的bug
|
||||
'''
|
||||
# wav波形 加时间窗以及时移10ms
|
||||
time_window = 25 # 单位ms
|
||||
window_length = fs / 1000 * time_window # 计算窗长度的公式,目前全部为400固定值
|
||||
|
||||
wav_arr = np.array(wavsignal)
|
||||
#wav_length = len(wavsignal[0])
|
||||
wav_length = wav_arr.shape[1]
|
||||
|
||||
range0_end = int(len(wavsignal[0])/fs*1000 - time_window) // 10 + 1 # 计算循环终止的位置,也就是最终生成的窗数
|
||||
data_input = np.zeros((range0_end, 200), dtype = np.float) # 用于存放最终的频率特征数据
|
||||
data_line = np.zeros((1, 400), dtype = np.float)
|
||||
|
||||
for i in range(0, range0_end):
|
||||
p_start = i * 160
|
||||
p_end = p_start + 400
|
||||
|
||||
data_line = wav_arr[0, p_start:p_end]
|
||||
|
||||
data_line = data_line * w # 加窗
|
||||
|
||||
data_line = np.abs(fft(data_line)) / wav_length
|
||||
|
||||
|
||||
data_input[i]=data_line[0:200] # 设置为400除以2的值(即200)是取一半数据,因为是对称的
|
||||
|
||||
#print(data_input.shape)
|
||||
|
|
Loading…
Reference in New Issue