From b6f828f2fb644c2cc84f285778873596dc0ff15a Mon Sep 17 00:00:00 2001 From: Pushkal Katara Date: Thu, 22 Aug 2019 02:40:23 +0530 Subject: [PATCH 1/3] SRNN Datapreprocessing script --- examples/pytorch/SRNN/SRNN_Example.ipynb | 60 +++++++----- examples/pytorch/SRNN/SRNN_Example.py | 20 ++-- examples/pytorch/SRNN/process_google.py | 120 ++++++++++------------- examples/pytorch/requirements.txt | 2 + 4 files changed, 102 insertions(+), 100 deletions(-) create mode 100644 examples/pytorch/requirements.txt diff --git a/examples/pytorch/SRNN/SRNN_Example.ipynb b/examples/pytorch/SRNN/SRNN_Example.ipynb index 4538688f4..1f5811015 100644 --- a/examples/pytorch/SRNN/SRNN_Example.ipynb +++ b/examples/pytorch/SRNN/SRNN_Example.ipynb @@ -25,7 +25,8 @@ "import sys\n", "import os\n", "import numpy as np\n", - "import torch" + "import torch\n", + "import h5py" ] }, { @@ -80,9 +81,16 @@ } ], "source": [ - "x_train_, y_train = np.load(DATA_DIR + 'x_train.npy'), np.load(DATA_DIR + 'y_train.npy')\n", - "x_val_, y_val = np.load(DATA_DIR + 'x_val.npy'), np.load(DATA_DIR + 'y_val.npy')\n", - "x_test_, y_test = np.load(DATA_DIR + 'x_test.npy'), np.load(DATA_DIR + 'y_test.npy')\n", + "# Copyright (c) Microsoft Corporation. All rights reserved.\n", + "f = h5py.File(DATA_DIR + 'train.h5','r')\n", + "x_train_ = np.array(f.get('X'))\n", + "y_train = np.array(f.get('Y'))\n", + "f = h5py.File(DATA_DIR + 'val.h5','r')\n", + "x_val_ = np.array(f.get('X'))\n", + "y_val = np.array(f.get('Y'))\n", + "f = h5py.File(DATA_DIR + 'test.h5','r')\n", + "x_test_ = np.array(f.get('X'))\n", + "y_test = np.array(f.get('Y'))\n", "# Mean-var normalize\n", "mean = np.mean(np.reshape(x_train_, [-1, x_train_.shape[-1]]), axis=0)\n", "std = np.std(np.reshape(x_train_, [-1, x_train_.shape[-1]]), axis=0)\n", @@ -161,28 +169,28 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch 0 batch 0 loss 4.295151 acc 0.031250\n", - "Epoch 0 batch 200 loss 1.002617 acc 0.718750\n", - "Epoch 1 batch 0 loss 0.647069 acc 0.796875\n", - "Epoch 1 batch 200 loss 0.469229 acc 0.835938\n", - "Epoch 2 batch 0 loss 0.388671 acc 0.882812\n", - "Epoch 2 batch 200 loss 0.396696 acc 0.859375\n", - "Epoch 3 batch 0 loss 0.266433 acc 0.921875\n", - "Epoch 3 batch 200 loss 0.281694 acc 0.867188\n", - "Epoch 4 batch 0 loss 0.302240 acc 0.906250\n", - "Epoch 4 batch 200 loss 0.245797 acc 0.929688\n", - "Validation accuracy: 0.911003\n", - "Epoch 5 batch 0 loss 0.202542 acc 0.945312\n", - "Epoch 5 batch 200 loss 0.192004 acc 0.929688\n", - "Epoch 6 batch 0 loss 0.256735 acc 0.921875\n", - "Epoch 6 batch 200 loss 0.279066 acc 0.921875\n", - "Epoch 7 batch 0 loss 0.228837 acc 0.945312\n", - "Epoch 7 batch 200 loss 0.222357 acc 0.937500\n", - "Epoch 8 batch 0 loss 0.164639 acc 0.960938\n", - "Epoch 8 batch 200 loss 0.160117 acc 0.945312\n", - "Epoch 9 batch 0 loss 0.173849 acc 0.953125\n", - "Epoch 9 batch 200 loss 0.201694 acc 0.929688\n", - "Validation accuracy: 0.912474\n" + "Epoch 0 batch 0 loss 2.049031 acc 0.632812\n", + "Epoch 0 batch 200 loss 0.739568 acc 0.695312\n", + "Epoch 1 batch 0 loss 0.536956 acc 0.843750\n", + "Epoch 1 batch 200 loss 0.402417 acc 0.882812\n", + "Epoch 2 batch 0 loss 0.299402 acc 0.921875\n", + "Epoch 2 batch 200 loss 0.316270 acc 0.882812\n", + "Epoch 3 batch 0 loss 0.237716 acc 0.929688\n", + "Epoch 3 batch 200 loss 0.215562 acc 0.929688\n", + "Epoch 4 batch 0 loss 0.235044 acc 0.929688\n", + "Epoch 4 batch 200 loss 0.177791 acc 0.945312\n", + "Validation accuracy: 0.913504\n", + "Epoch 5 batch 0 loss 0.181037 acc 0.945312\n", + "Epoch 5 batch 200 loss 0.167289 acc 0.937500\n", + "Epoch 6 batch 0 loss 0.201628 acc 0.921875\n", + "Epoch 6 batch 200 loss 0.266160 acc 0.914062\n", + "Epoch 7 batch 0 loss 0.199887 acc 0.937500\n", + "Epoch 7 batch 200 loss 0.154214 acc 0.929688\n", + "Epoch 8 batch 0 loss 0.193560 acc 0.945312\n", + "Epoch 8 batch 200 loss 0.194838 acc 0.937500\n", + "Epoch 9 batch 0 loss 0.205967 acc 0.921875\n", + "Epoch 9 batch 200 loss 0.186773 acc 0.937500\n", + "Validation accuracy: 0.913063\n" ] } ], diff --git a/examples/pytorch/SRNN/SRNN_Example.py b/examples/pytorch/SRNN/SRNN_Example.py index c30ecc589..cfa1e00c7 100644 --- a/examples/pytorch/SRNN/SRNN_Example.py +++ b/examples/pytorch/SRNN/SRNN_Example.py @@ -6,6 +6,7 @@ import os import numpy as np import torch +import h5py from edgeml_pytorch.graph.rnn import SRNN2 from edgeml_pytorch.trainer.srnnTrainer import SRNNTrainer @@ -16,12 +17,15 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") DATA_DIR = config.data_dir -x_train_ = np.load(DATA_DIR + 'x_train.npy') -y_train = np.load(DATA_DIR + 'y_train.npy') -x_val_ = np.load(DATA_DIR + 'x_val.npy') -y_val = np.load(DATA_DIR + 'y_val.npy') -x_test_ = np.load(DATA_DIR + 'x_test.npy') -y_test = np.load(DATA_DIR + 'y_test.npy') +f = h5py.File(DATA_DIR + 'train.h5','r') +x_train_ = np.array(f.get('X')) +y_train = np.array(f.get('Y')) +f = h5py.File(DATA_DIR + 'val.h5','r') +x_val_ = np.array(f.get('X')) +y_val = np.array(f.get('Y')) +f = h5py.File(DATA_DIR + 'test.h5','r') +x_test_ = np.array(f.get('X')) +y_test = np.array(f.get('Y')) # Mean-var normalize mean = np.mean(np.reshape(x_train_, [-1, x_train_.shape[-1]]), axis=0) @@ -60,7 +64,7 @@ Example OPTIONAL args for FastGRNNCell cellArgs = {'gate_non_linearity':"sigmoid",'update_non_linearity':"tanh", - 'wRank':None, 'uRank':None,'zetaInit':1.0, 'nuInit':-4.0, + 'wRank':None, 'uRank':None,'zetaInit':1.0, 'nuInit':-4.0, 'batch_first':False} ''' @@ -68,7 +72,7 @@ srnn2 = SRNN2(numInput, numClasses, hiddenDim0, hiddenDim1, cellType, dropoutProbability_l0, dropoutProbability_l1, - **cellArgs).to(device) + **cellArgs).to(device) trainer = SRNNTrainer(srnn2, learningRate, lossType='xentropy', device=device) trainer.train(brickSize, batchSize, epochs, x_train, x_val, y_train, y_val, diff --git a/examples/pytorch/SRNN/process_google.py b/examples/pytorch/SRNN/process_google.py index afce89a33..223e82848 100644 --- a/examples/pytorch/SRNN/process_google.py +++ b/examples/pytorch/SRNN/process_google.py @@ -1,4 +1,3 @@ - # Google Speech data feature extraction # Note that the 'testing_list.txt' and 'validation_list.txt' @@ -27,7 +26,7 @@ import numpy as np import scipy.io.wavfile as r import random - +import h5py # Various version can be created depending on which labels are chosen and which # are moved to the negative (noise) set. We use LABELMAP13 for most of our @@ -131,15 +130,15 @@ def createFileList(audioFileDir, testingList, np.save(outPrefix + 'file_test.npy', testingList) np.save(outPrefix + 'file_val.npy', validationList) - -def extractFeatures(fileList, LABELMAP, maxlen, numFilt, samplerate, winlen, - winstep): +def extractFeatures(fileList, LABELMAP, numLabels, maxlen, numFilt, samplerate, + winlen, winstep, X, Y): ''' Reads audio from files specified in fileList, extracts features and assigns labels to them. fileList: List of audio file names. LABELMAP: The label map to use. + numLabels: No of labels maxlen: maximum length of the audio file. Every other files is zero padded to maxlen numFilt: number of filters to use in MFCC @@ -147,62 +146,42 @@ def extractFeatures(fileList, LABELMAP, maxlen, numFilt, samplerate, winlen, assumed to be of same sample rate winLen: winLen to use for fbank in seconds winstep: winstep for fbank in seconds + X: dataset input + Y: dataset ground-truth ''' - def __extractFeatures(stackedWav, numSteps, numFilt, - samplerate, winlen, winstep): - ''' - [number of waves, Len(wave)] - returns [number of waves, numSteps, numFilt] - All waves are assumed to be of fixed length - ''' - assert stackedWav.ndim == 2, 'Should be [number of waves, len(wav)]' - extractedList = [] + def __extractFeatures(sample, numSteps, numFilt, samplerate, winLen, + winstep): eps = 1e-10 - for sample in stackedWav: - temp, _ = fbank(sample, samplerate=samplerate, winlen=winlen, - winstep=winstep, nfilt=numFilt, - winfunc=np.hamming) - temp = np.log(temp + eps) - assert temp.ndim == 2, 'Should be [numSteps, numFilt]' - assert temp.shape[0] == numSteps, 'Should be [numSteps, numFilt]' - extractedList.append(temp) - return np.array(extractedList) - + temp, _ = fbank(sample, samplerate=samplerate, winlen=winlen, + winstep=winstep, nfilt=numFilt, winfunc=np.hamming) + temp = [np.log(temp + eps)] + assert temp.ndim == 2, 'Should be [numSteps, numFilt]' + assert temp.shape[0] == numSteps, 'Should be [numSteps, numFilt]' + return np.array(temp) + fileList = np.array(fileList) - assert(fileList.ndim == 1) - allSamples = np.zeros((len(fileList), maxlen)) i = 0 - for i,file in enumerate(fileList): - _, x = r.read(file) - assert(len(x) <= maxlen) - allSamples[i, maxlen - len(x):maxlen] += x - i += 1 - assert allSamples.ndim == 2 winstepSamples = winstep * samplerate winlenSamples = winlen * samplerate - assert(winstepSamples.is_integer()) - assert(winlenSamples.is_integer()) numSteps = int(np.ceil((maxlen - winlenSamples)/winstepSamples) + 1) - x = __extractFeatures(allSamples, numSteps, numFilt, samplerate, winlen, - winstep) - y_ = [t.split('/') for t in fileList] - y_ = [t[-2] for t in y_] - y = [] - for t in y_: - assert t in LABELMAP - y.append(LABELMAP[t]) - - def to_onehot(indices, numClasses): - assert indices.ndim == 1 - n = max(indices) + 1 - assert numClasses <= n - b = np.zeros((len(indices), numClasses)) - b[np.arange(len(indices)), indices] = 1 - return b - y = to_onehot(np.array(y), np.max(y) + 1) - return x, y + for i, file in enumerate(fileList): + print('Processing', file) + sample = np.zeros(maxlen) + _, data = r.read(file) + sample[maxlen-len(data):maxlen] += data + x = __extractFeatures(sample, numSteps, numFilt, samplerate, winlen, + winstep) + X[i] = x + y_ = file.split('/') + y_ = y_[-2] + y = LABELMAP[y_] + b = np.zeros(numLabels) + b[y] = 1 + Y[i] = b + i += 1 + print('Total Processed Samples:', i) -if __name__=='__main__': +if __name__ == '__main__': # ----------------------------------------- # # Configuration # ----------------------------------------- # @@ -216,7 +195,7 @@ def to_onehot(indices, numClasses): numLabels = 13 # 0 not assigned samplerate=16000 # For creation of training file list, testing file list - # and validation list. + # and validation list. audioFileDir = './GoogleSpeech/Raw/' testingList = './GoogleSpeech/Raw/testing_list.txt' validationList = './GoogleSpeech/Raw/validation_list.txt' @@ -249,17 +228,26 @@ def to_onehot(indices, numClasses): trainFileList_ = [audioFileDir + x for x in trainFileList] valFileList_ = [audioFileDir + x for x in valFileList] testFileList_ = [audioFileDir + x for x in testFileList] - x_test, y_test = extractFeatures(testFileList_, LABELMAP, maxlen, numFilt, + def generateDataset(datasetType, fileList, LABELMAP, numLabels, maxlen, + numFilt, samplerate, winlen, winstep): + winstepSamples = winstep * samplerate + winlenSamples = winlen * samplerate + assert(winstepSamples.is_integer()) + assert(winlenSamples.is_integer()) + numSteps = int(np.ceil((maxlen - winlenSamples)/winstepSamples) + 1) + + with h5py.File(outDir+datasetType+'.h5') as f: + x = f.create_dataset("X", shape=(len(fileList), numSteps, numFilt), + dtype=np.float64) + y = f.create_dataset("Y", shape=(len(fileList), numLabels), + dtype=np.float64) + extractFeatures(fileList, LABELMAP, numLabels, maxlen, numFilt, + samplerate, winlen, winstep, x, y) + print(datasetType, 'dataset generated') + + generateDataset('test', testFileList_, LABELMAP, numLabels, maxlen, numFilt, samplerate, winlen, winstep) - x_val, y_val = extractFeatures(valFileList_, LABELMAP, maxlen, numFilt, + generateDataset('val', valFileList_, LABELMAP, numLabels, maxlen, numFilt, samplerate, winlen, winstep) - x_train, y_train = extractFeatures(trainFileList_, LABELMAP, maxlen, - numFilt, samplerate, winlen, winstep) - np.save(outDir + 'x_train', x_train);np.save(outDir + 'y_train', y_train) - np.save(outDir + 'x_test', x_test);np.save(outDir + 'y_test', y_test) - np.save(outDir + 'x_val', x_val);np.save(outDir + 'y_val', y_val) - print("Shape train", x_train.shape, y_train.shape) - print("Shape test", x_test.shape, y_test.shape) - print("Shape val", x_val.shape, y_val.shape) - - + generateDataset('train', trainFileList_, LABELMAP, numLabels, maxlen, + numFilt, samplerate, winlen, winstep) diff --git a/examples/pytorch/requirements.txt b/examples/pytorch/requirements.txt new file mode 100644 index 000000000..60af0cc89 --- /dev/null +++ b/examples/pytorch/requirements.txt @@ -0,0 +1,2 @@ +python-speech-features==0.6 +h5py==2.9.0 From 25b0210ee2b088f0df7a188fb9e6729865d6056e Mon Sep 17 00:00:00 2001 From: Pushkal Katara Date: Thu, 22 Aug 2019 02:45:54 +0530 Subject: [PATCH 2/3] clean --- examples/pytorch/SRNN/process_google.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/examples/pytorch/SRNN/process_google.py b/examples/pytorch/SRNN/process_google.py index 223e82848..1aaf8aca1 100644 --- a/examples/pytorch/SRNN/process_google.py +++ b/examples/pytorch/SRNN/process_google.py @@ -158,7 +158,7 @@ def __extractFeatures(sample, numSteps, numFilt, samplerate, winLen, assert temp.ndim == 2, 'Should be [numSteps, numFilt]' assert temp.shape[0] == numSteps, 'Should be [numSteps, numFilt]' return np.array(temp) - + fileList = np.array(fileList) i = 0 winstepSamples = winstep * samplerate @@ -169,11 +169,9 @@ def __extractFeatures(sample, numSteps, numFilt, samplerate, winLen, sample = np.zeros(maxlen) _, data = r.read(file) sample[maxlen-len(data):maxlen] += data - x = __extractFeatures(sample, numSteps, numFilt, samplerate, winlen, + X[i] = __extractFeatures(sample, numSteps, numFilt, samplerate, winlen, winstep) - X[i] = x - y_ = file.split('/') - y_ = y_[-2] + y_ = file.split('/')[-2] y = LABELMAP[y_] b = np.zeros(numLabels) b[y] = 1 From 58b2cb559373242e699b38c00b2032fa7285df8d Mon Sep 17 00:00:00 2001 From: Pushkal Katara Date: Thu, 22 Aug 2019 02:50:36 +0530 Subject: [PATCH 3/3] clean --- examples/pytorch/SRNN/process_google.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/pytorch/SRNN/process_google.py b/examples/pytorch/SRNN/process_google.py index 1aaf8aca1..e02743291 100644 --- a/examples/pytorch/SRNN/process_google.py +++ b/examples/pytorch/SRNN/process_google.py @@ -154,10 +154,10 @@ def __extractFeatures(sample, numSteps, numFilt, samplerate, winLen, eps = 1e-10 temp, _ = fbank(sample, samplerate=samplerate, winlen=winlen, winstep=winstep, nfilt=numFilt, winfunc=np.hamming) - temp = [np.log(temp + eps)] + temp = np.log(temp + eps) assert temp.ndim == 2, 'Should be [numSteps, numFilt]' assert temp.shape[0] == numSteps, 'Should be [numSteps, numFilt]' - return np.array(temp) + return np.array([temp]) fileList = np.array(fileList) i = 0