implement to read wav files and list files.
This commit is contained in:
parent
2cdeeaa9a7
commit
ab74ee4bfc
|
@ -10,6 +10,8 @@
|
|||
|
||||
This project will use TensorFlow based on RNN and CNN to implement.
|
||||
|
||||
本项目尚未完成,想要Fork的同学请手慢。
|
||||
|
||||
## Model
|
||||
模型
|
||||
|
||||
|
|
|
@ -1,19 +1,63 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import wave
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def read_wav_file(filename):
|
||||
def read_wav_data(filename):
|
||||
'''
|
||||
读取一个wav文件,返回一个文件对象
|
||||
读取一个wav文件,返回声音信号的时域谱矩阵和播放时间
|
||||
'''
|
||||
#在这里添加代码
|
||||
wav = wave.open(filename,"rb") # 打开一个wav格式的声音文件流
|
||||
num_frame = wav.getnframes() # 获取帧数
|
||||
num_channel=wav.getnchannels() # 获取声道数
|
||||
framerate=wav.getframerate() # 获取帧速率
|
||||
num_sample_width=wav.getsampwidth() # 获取实例的比特宽度,即每一帧的字节数
|
||||
str_data = wav.readframes(num_frame) # 读取全部的帧
|
||||
wav.close() # 关闭流
|
||||
wave_data = np.fromstring(str_data, dtype = np.short) # 将声音文件数据转换为数组矩阵形式
|
||||
wave_data.shape = -1, num_channel # 按照声道数将数组整形,单声道时候是一列数组,双声道时候是两列的矩阵
|
||||
wave_data = wave_data.T # 将矩阵转置
|
||||
time = np.arange(0, num_frame) * (1.0/framerate) # 计算声音的播放时间,单位为秒
|
||||
return wave_data, time
|
||||
|
||||
def wav_show(wave_data, time): # 显示出来声音波形
|
||||
#wave_data, time = read_wave_data("C:\\Users\\nl\\Desktop\\A2_0.wav")
|
||||
#draw the wave
|
||||
#plt.subplot(211)
|
||||
plt.plot(time, wave_data[0])
|
||||
#plt.subplot(212)
|
||||
#plt.plot(time, wave_data[1], c = "g")
|
||||
plt.show()
|
||||
|
||||
|
||||
def get_wav_list(filename):
|
||||
'''
|
||||
读取一个wav文件列表
|
||||
ps:在数据中专门有几个文件用于存放用于训练、验证和测试的wav文件列表
|
||||
读取一个wav文件列表,返回一个存储该列表的字典类型值
|
||||
ps:在数据中专门有几个文件用于存放用于训练、验证和测试的wav文件列表
|
||||
'''
|
||||
#在这里添加代码
|
||||
txt_obj=open(filename,'r') # 打开文件并读入
|
||||
txt_text=txt_obj.read()
|
||||
txt_lines=txt_text.split('\n') # 文本分割
|
||||
dic_filelist={} # 初始化字典
|
||||
for i in txt_lines:
|
||||
if(i!=''):
|
||||
txt_l=i.split(' ')
|
||||
dic_filelist[txt_l[0]]=txt_l[1]
|
||||
return dic_filelist
|
||||
|
||||
def get_wav_symbol(filename):
|
||||
'''
|
||||
读取指定数据集中,所有wav文件对应的语音符号
|
||||
返回一个存储符号集的字典类型值
|
||||
'''
|
||||
print('test')
|
||||
#if(__name__=='__main__'):
|
||||
#dic=get_wav_list('E:\\语音数据集\\doc\\doc\\list\\train.wav.lst')
|
||||
#for i in dic:
|
||||
#print(i,dic[i])
|
||||
#wave_data, time = read_wav_data("C:\\Users\\nl\\Desktop\\A2_0.wav")
|
||||
#wav_show(wave_data,time)
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
57
main.py
57
main.py
|
@ -1,22 +1,22 @@
|
|||
# -*- coding: encoding -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@author: nl8590687
|
||||
"""
|
||||
#LSTM_CNN
|
||||
# LSTM_CNN
|
||||
import keras as kr
|
||||
import numpy as np
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, Flatten#,Input,LSTM,Convolution1D,MaxPooling1D,Merge
|
||||
from keras.layers import Conv1D,LSTM,MaxPooling1D,Merge#Conv2D, MaxPooling2D,Conv1D
|
||||
from keras.layers import Dense, Dropout, Flatten # ,Input,LSTM,Convolution1D,MaxPooling1D,Merge
|
||||
from keras.layers import Conv1D,LSTM,MaxPooling1D,Merge # Conv2D, MaxPooling2D,Conv1D
|
||||
|
||||
class ModelSpeech(): # 语音模型类
|
||||
def __init__(self,MS_EMBED_SIZE = 64,BATCH_SIZE = 32): # 初始化
|
||||
self.MS_EMBED_SIZE = MS_EMBED_SIZE # LSTM 的大小
|
||||
self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
|
||||
self._model = self.createLSTMModel()
|
||||
self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
|
||||
self._model = self.createLSTMModel()
|
||||
|
||||
def CreateLSTMModel(self):# 定义训练模型,尚未完成
|
||||
def CreateModel(self): # 定义训练模型,尚未完成
|
||||
# 定义LSTM/CNN模型
|
||||
|
||||
_model = Sequential()
|
||||
|
@ -27,25 +27,30 @@ class ModelSpeech(): # 语音模型类
|
|||
_model.add(Dropout(0.3))
|
||||
_model.add(Flatten())
|
||||
|
||||
|
||||
|
||||
#_model = Sequential()
|
||||
#_model.add(Merge([m_lstm, aenc], mode="concat", concat_axis=-1))
|
||||
_model.add(Dense(1279, activation="softmax"))
|
||||
_model.compile(optimizer="adam", loss='categorical_crossentropy',metrics=["accuracy"])
|
||||
return _model
|
||||
|
||||
def Train(self):
|
||||
# 训练模型
|
||||
|
||||
def LoadModel(self,filename='model_speech/LSTM_CNN.model'):
|
||||
self._model.load_weights(filename)
|
||||
#_model = Sequential()
|
||||
#_model.add(Merge([m_lstm, aenc], mode="concat", concat_axis=-1))
|
||||
_model.add(Dense(1279, activation="softmax"))
|
||||
_model.compile(optimizer="adam", loss='categorical_crossentropy',metrics=["accuracy"])
|
||||
return _model
|
||||
|
||||
def TrainModel(self,datas,epoch = 2,save_step=5000,filename='model_speech/LSTM_CNN_model'): # 训练模型
|
||||
print('test')
|
||||
|
||||
def LoadModel(self,filename='model_speech/LSTM_CNN_model'): # 加载模型参数
|
||||
self._model.load_weights(filename)
|
||||
|
||||
def SaveModel(self,filename='model_speech/LSTM_CNN_model'): # 保存模型参数
|
||||
self._model.save_weights(filename+'.model')
|
||||
|
||||
def TestModel(self): # 测试检验模型效果
|
||||
print('test')
|
||||
|
||||
@property
|
||||
def model(self): # 返回keras model
|
||||
return self._model
|
||||
|
||||
def SaveModel(self,filename='model_speech/LSTM_CNN.model'):
|
||||
# 保存模型参数
|
||||
|
||||
def Test(self):
|
||||
# 测试检验模型效果
|
||||
|
||||
|
||||
print('test')
|
||||
|
||||
print('test')
|
||||
print(__name__)
|
|
@ -1,4 +1,4 @@
|
|||
# -*- coding: encoding -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
|
Loading…
Reference in New Issue