implement to read wav files and list files.

This commit is contained in:
nl8590687 2017-08-26 23:40:28 +08:00
parent 2cdeeaa9a7
commit ab74ee4bfc
4 changed files with 87 additions and 36 deletions

View File

@ -10,6 +10,8 @@
This project will use TensorFlow based on RNN and CNN to implement.
本项目尚未完成想要Fork的同学请手慢。
## Model
模型

View File

@ -1,19 +1,63 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import wave
import numpy as np
import matplotlib.pyplot as plt
def read_wav_file(filename):
def read_wav_data(filename):
'''
读取一个wav文件返回一个文件对象
读取一个wav文件返回声音信号的时域谱矩阵和播放时间
'''
#在这里添加代码
wav = wave.open(filename,"rb") # 打开一个wav格式的声音文件流
num_frame = wav.getnframes() # 获取帧数
num_channel=wav.getnchannels() # 获取声道数
framerate=wav.getframerate() # 获取帧速率
num_sample_width=wav.getsampwidth() # 获取实例的比特宽度,即每一帧的字节数
str_data = wav.readframes(num_frame) # 读取全部的帧
wav.close() # 关闭流
wave_data = np.fromstring(str_data, dtype = np.short) # 将声音文件数据转换为数组矩阵形式
wave_data.shape = -1, num_channel # 按照声道数将数组整形,单声道时候是一列数组,双声道时候是两列的矩阵
wave_data = wave_data.T # 将矩阵转置
time = np.arange(0, num_frame) * (1.0/framerate) # 计算声音的播放时间,单位为秒
return wave_data, time
def wav_show(wave_data, time): # 显示出来声音波形
#wave_data, time = read_wave_data("C:\\Users\\nl\\Desktop\\A2_0.wav")
#draw the wave
#plt.subplot(211)
plt.plot(time, wave_data[0])
#plt.subplot(212)
#plt.plot(time, wave_data[1], c = "g")
plt.show()
def get_wav_list(filename):
'''
读取一个wav文件列表
ps:在数据中专门有几个文件用于存放用于训练验证和测试的wav文件列表
读取一个wav文件列表返回一个存储该列表的字典类型值
ps:在数据中专门有几个文件用于存放用于训练验证和测试的wav文件列表
'''
#在这里添加代码
txt_obj=open(filename,'r') # 打开文件并读入
txt_text=txt_obj.read()
txt_lines=txt_text.split('\n') # 文本分割
dic_filelist={} # 初始化字典
for i in txt_lines:
if(i!=''):
txt_l=i.split(' ')
dic_filelist[txt_l[0]]=txt_l[1]
return dic_filelist
def get_wav_symbol(filename):
'''
读取指定数据集中所有wav文件对应的语音符号
返回一个存储符号集的字典类型值
'''
print('test')
#if(__name__=='__main__'):
#dic=get_wav_list('E:\\语音数据集\\doc\\doc\\list\\train.wav.lst')
#for i in dic:
#print(i,dic[i])
#wave_data, time = read_wav_data("C:\\Users\\nl\\Desktop\\A2_0.wav")
#wav_show(wave_data,time)

57
main.py
View File

@ -1,22 +1,22 @@
# -*- coding: encoding -*-
# -*- coding: utf-8 -*-
"""
@author: nl8590687
"""
#LSTM_CNN
# LSTM_CNN
import keras as kr
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten#,Input,LSTM,Convolution1D,MaxPooling1D,Merge
from keras.layers import Conv1D,LSTM,MaxPooling1D,Merge#Conv2D, MaxPooling2D,Conv1D
from keras.layers import Dense, Dropout, Flatten # ,Input,LSTM,Convolution1D,MaxPooling1D,Merge
from keras.layers import Conv1D,LSTM,MaxPooling1D,Merge # Conv2D, MaxPooling2D,Conv1D
class ModelSpeech(): # 语音模型类
def __init__(self,MS_EMBED_SIZE = 64,BATCH_SIZE = 32): # 初始化
self.MS_EMBED_SIZE = MS_EMBED_SIZE # LSTM 的大小
self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
self._model = self.createLSTMModel()
self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
self._model = self.createLSTMModel()
def CreateLSTMModel(self):# 定义训练模型,尚未完成
def CreateModel(self): # 定义训练模型,尚未完成
# 定义LSTM/CNN模型
_model = Sequential()
@ -27,25 +27,30 @@ class ModelSpeech(): # 语音模型类
_model.add(Dropout(0.3))
_model.add(Flatten())
#_model = Sequential()
#_model.add(Merge([m_lstm, aenc], mode="concat", concat_axis=-1))
_model.add(Dense(1279, activation="softmax"))
_model.compile(optimizer="adam", loss='categorical_crossentropy',metrics=["accuracy"])
return _model
def Train(self):
# 训练模型
def LoadModel(self,filename='model_speech/LSTM_CNN.model'):
self._model.load_weights(filename)
#_model = Sequential()
#_model.add(Merge([m_lstm, aenc], mode="concat", concat_axis=-1))
_model.add(Dense(1279, activation="softmax"))
_model.compile(optimizer="adam", loss='categorical_crossentropy',metrics=["accuracy"])
return _model
def TrainModel(self,datas,epoch = 2,save_step=5000,filename='model_speech/LSTM_CNN_model'): # 训练模型
print('test')
def LoadModel(self,filename='model_speech/LSTM_CNN_model'): # 加载模型参数
self._model.load_weights(filename)
def SaveModel(self,filename='model_speech/LSTM_CNN_model'): # 保存模型参数
self._model.save_weights(filename+'.model')
def TestModel(self): # 测试检验模型效果
print('test')
@property
def model(self): # 返回keras model
return self._model
def SaveModel(self,filename='model_speech/LSTM_CNN.model'):
# 保存模型参数
def Test(self):
# 测试检验模型效果
print('test')
print('test')
print(__name__)

View File

@ -1,4 +1,4 @@
# -*- coding: encoding -*-
# -*- coding: utf-8 -*-
import numpy as np