implement to read wav files and list files.

2017-08-26 23:40:28 +08:00 · 2017-08-26 23:40:28 +08:00 · ab74ee4bfc
parent 2cdeeaa9a7
commit ab74ee4bfc
4 changed files with 87 additions and 36 deletions
--- a/README.md
+++ b/README.md
@ -10,6 +10,8 @@

 This project will use TensorFlow based on RNN and CNN to implement. 

+本项目尚未完成，想要Fork的同学请手慢。
+
 ## Model
 模型

--- a/general_function/file_wav.py
+++ b/general_function/file_wav.py
@ -1,19 +1,63 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-

 import os
+import wave
+import numpy as np
+import matplotlib.pyplot as plt  

-def read_wav_file(filename):
+def read_wav_data(filename):
 	'''
-	读取一个wav文件，返回一个文件对象
+	读取一个wav文件，返回声音信号的时域谱矩阵和播放时间
 	'''
-	#在这里添加代码
+	wav = wave.open(filename,"rb") # 打开一个wav格式的声音文件流
+	num_frame = wav.getnframes() # 获取帧数
+	num_channel=wav.getnchannels() # 获取声道数
+	framerate=wav.getframerate() # 获取帧速率
+	num_sample_width=wav.getsampwidth() # 获取实例的比特宽度，即每一帧的字节数
+	str_data = wav.readframes(num_frame) # 读取全部的帧
+	wav.close() # 关闭流
+	wave_data = np.fromstring(str_data, dtype = np.short) # 将声音文件数据转换为数组矩阵形式
+	wave_data.shape = -1, num_channel # 按照声道数将数组整形，单声道时候是一列数组，双声道时候是两列的矩阵
+	wave_data = wave_data.T # 将矩阵转置
+	time = np.arange(0, num_frame) * (1.0/framerate)  # 计算声音的播放时间，单位为秒
+	return wave_data, time  
+	
+def wav_show(wave_data, time): # 显示出来声音波形
+	#wave_data, time = read_wave_data("C:\\Users\\nl\\Desktop\\A2_0.wav")     
+	#draw the wave  
+	#plt.subplot(211)  
+	plt.plot(time, wave_data[0])  
+	#plt.subplot(212)  
+	#plt.plot(time, wave_data[1], c = "g")  
+	plt.show()  
+
 	
 def get_wav_list(filename):
 	'''
-	读取一个wav文件列表
-	ps:在数据中专门有几个文件用于存放用于训练、验证和测试的wav文件列表
+	读取一个wav文件列表，返回一个存储该列表的字典类型值
+	ps:在数据中专门有几个文件用于存放用于训练、验证和测试的wav文件列表
 	'''
-	#在这里添加代码
+	txt_obj=open(filename,'r') # 打开文件并读入
+	txt_text=txt_obj.read()
+	txt_lines=txt_text.split('\n') # 文本分割
+	dic_filelist={} # 初始化字典
+	for i in txt_lines:
+		if(i!=''):
+			txt_l=i.split(' ')
+			dic_filelist[txt_l[0]]=txt_l[1]
+	return dic_filelist
+	
+def get_wav_symbol(filename):
+	'''
+	读取指定数据集中，所有wav文件对应的语音符号
+	返回一个存储符号集的字典类型值
+	'''
+	print('test')
+#if(__name__=='__main__'):
+	#dic=get_wav_list('E:\\语音数据集\\doc\\doc\\list\\train.wav.lst')
+	#for i in dic:
+		#print(i,dic[i])
+	#wave_data, time = read_wav_data("C:\\Users\\nl\\Desktop\\A2_0.wav")  
+	#wav_show(wave_data,time)
 	
-
-
-
--- a/main.py
+++ b/main.py
@ -1,22 +1,22 @@
-# -*- coding: encoding -*-
+# -*- coding: utf-8 -*-
 """
@author: nl8590687
 """
-#LSTM_CNN
+# LSTM_CNN
 import keras as kr
 import numpy as np

 from keras.models import Sequential
-from keras.layers import Dense, Dropout, Flatten#,Input,LSTM,Convolution1D,MaxPooling1D,Merge
-from keras.layers import Conv1D,LSTM,MaxPooling1D,Merge#Conv2D, MaxPooling2D,Conv1D
+from keras.layers import Dense, Dropout, Flatten # ,Input,LSTM,Convolution1D,MaxPooling1D,Merge
+from keras.layers import Conv1D,LSTM,MaxPooling1D,Merge # Conv2D, MaxPooling2D,Conv1D

 class ModelSpeech(): # 语音模型类
 	def __init__(self,MS_EMBED_SIZE = 64,BATCH_SIZE = 32): # 初始化
 		self.MS_EMBED_SIZE = MS_EMBED_SIZE # LSTM 的大小
-        self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
-        self._model = self.createLSTMModel()
+		self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
+		self._model = self.createLSTMModel()

-	def CreateLSTMModel(self):# 定义训练模型，尚未完成
+	def CreateModel(self): # 定义训练模型，尚未完成
 		# 定义LSTM/CNN模型
 		
 		_model = Sequential()
@ -27,25 +27,30 @@ class ModelSpeech(): # 语音模型类
 		_model.add(Dropout(0.3))
 		_model.add(Flatten())
 		
-
 		
-        #_model = Sequential()
-        #_model.add(Merge([m_lstm, aenc], mode="concat", concat_axis=-1))
-        _model.add(Dense(1279, activation="softmax"))
-        _model.compile(optimizer="adam", loss='categorical_crossentropy',metrics=["accuracy"])
-        return _model
-
-	def Train(self):
-		# 训练模型
 		
-	def LoadModel(self,filename='model_speech/LSTM_CNN.model'):
-        self._model.load_weights(filename)
+		#_model = Sequential()
+		#_model.add(Merge([m_lstm, aenc], mode="concat", concat_axis=-1))
+		_model.add(Dense(1279, activation="softmax"))
+		_model.compile(optimizer="adam", loss='categorical_crossentropy',metrics=["accuracy"])
+		return _model
+
+	def TrainModel(self,datas,epoch = 2,save_step=5000,filename='model_speech/LSTM_CNN_model'): # 训练模型
+		print('test')
+
+	def LoadModel(self,filename='model_speech/LSTM_CNN_model'): # 加载模型参数
+		self._model.load_weights(filename)
+
+	def SaveModel(self,filename='model_speech/LSTM_CNN_model'): # 保存模型参数
+		self._model.save_weights(filename+'.model')
+
+	def TestModel(self): # 测试检验模型效果
+		print('test')
+
+	@property
+	def model(self): # 返回keras model
+		return self._model
 	
-	def SaveModel(self,filename='model_speech/LSTM_CNN.model'):
-		# 保存模型参数
-	
-	def Test(self):
-		# 测试检验模型效果
-	
-	
-print('test')
+
+print('test')
+print(__name__)
--- a/readdata.py
+++ b/readdata.py
@ -1,4 +1,4 @@
-# -*- coding: encoding -*-
+# -*- coding: utf-8 -*-

 import numpy as np