Skip to content
This repository was archived by the owner on Aug 21, 2020. It is now read-only.

Commit e657950

Browse files
committed
fixes conflicts from previous commit. Adds files from the master branch?
2 parents 17c5755 + 7eb0227 commit e657950

16 files changed

+866
-1282
lines changed

Classification_baselines/LibriSpeech/LibriSpeech-100.ipynb

-804
This file was deleted.

Classification_baselines/LibriSpeech/LibriSpeech.ipynb

+236-460
Large diffs are not rendered by default.

Utils/models.py

+108-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
import torch
12
from torch import nn
23
import torch.nn.functional as F
34
import numpy as np
4-
5+
import os.path
56

67
def new_size_conv(size, kernel, stride=1, padding=0):
78
return np.floor((size + 2*padding - (kernel -1)-1)/stride +1)
@@ -290,7 +291,85 @@ def forward(self, x):
290291

291292
return out
292293

293-
294+
295+
class audio_cnn_block(nn.Module):
296+
'''
297+
1D convolution block used to build audio cnn classifiers
298+
Args:
299+
input: input channels
300+
output: output channels
301+
kernel_size: convolution kernel size
302+
'''
303+
def __init__(self, n_input, n_out, kernel_size):
304+
super(audio_cnn_block, self).__init__()
305+
self.cnn_block = nn.Sequential(
306+
nn.Conv1d(n_input, n_out, kernel_size, padding=1),
307+
nn.BatchNorm1d(n_out),
308+
nn.ReLU(),
309+
nn.MaxPool1d(kernel_size=4, stride=4)
310+
)
311+
312+
def forward(self, x):
313+
return self.cnn_block(x)
314+
315+
316+
class audio_tiny_cnn(nn.Module):
317+
'''
318+
Template for convolutional audio classifiers.
319+
'''
320+
def __init__(self, cnn_sizes, n_hidden, kernel_size, n_classes):
321+
'''
322+
Init
323+
Args:
324+
cnn_sizes: List of sizes for the convolution blocks
325+
n_hidden: number of hidden units in the first fully connected layer
326+
kernel_size: convolution kernel size
327+
n_classes: number of speakers to classify
328+
'''
329+
super(audio_tiny_cnn, self).__init__()
330+
self.down_path = nn.ModuleList()
331+
self.down_path.append(audio_cnn_block(cnn_sizes[0], cnn_sizes[1],
332+
kernel_size,))
333+
self.down_path.append(audio_cnn_block(cnn_sizes[1], cnn_sizes[2],
334+
kernel_size,))
335+
self.down_path.append(audio_cnn_block(cnn_sizes[2], cnn_sizes[3],
336+
kernel_size,))
337+
self.fc = nn.Sequential(
338+
nn.Linear(cnn_sizes[4], n_hidden),
339+
nn.ReLU()
340+
)
341+
self.out = nn.Linear(n_hidden, n_classes)
342+
343+
def forward(self, x):
344+
for down in self.down_path:
345+
x = down(x)
346+
x = x.view(x.size(0), -1)
347+
x = self.fc(x)
348+
return self.out(x)
349+
350+
351+
def MFCC_cnn_classifier(n_classes):
352+
'''
353+
Builds speaker classifier that ingests MFCC's
354+
'''
355+
in_size = 20
356+
n_hidden = 512
357+
sizes_list = [in_size, 2*in_size, 4*in_size, 8*in_size, 8*in_size]
358+
return audio_tiny_cnn(cnn_sizes=sizes_list, n_hidden=n_hidden,
359+
kernel_size=3, n_classes=125)
360+
361+
362+
def ft_cnn_classifer(n_classes):
363+
'''
364+
Builds speaker classifier that ingests the abs value of fourier transforms
365+
'''
366+
in_size = 94
367+
n_hidden = 512
368+
sizes_list = [in_size, in_size, 2*in_size, 4*in_size, 14*4*in_size]
369+
return audio_tiny_cnn(cnn_sizes=sizes_list, n_hidden=n_hidden,
370+
kernel_size=7, n_classes=125)
371+
372+
294373
def weights_init(m):
295374
if isinstance(m, nn.Conv2d):
296375
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
@@ -302,4 +381,30 @@ def weights_init(m):
302381
elif isinstance(m, nn.Linear):
303382
nn.init.xavier_normal_(m.weight.data)
304383
nn.init.constant_(m.bias, 0)
305-
384+
385+
def save_checkpoint(model=None, optimizer=None, epoch=None,
386+
data_descriptor=None, loss=None, accuracy=None, path='./',
387+
filename='checkpoint', ext='.pth.tar'):
388+
state = {
389+
'epoch': epoch,
390+
'arch': str(model.type),
391+
'state_dict': model.state_dict(),
392+
'optimizer' : optimizer.state_dict(),
393+
'loss': loss,
394+
'accuracy': accuracy,
395+
'dataset': data_descriptor
396+
}
397+
torch.save(state, path+filename+ext)
398+
399+
400+
def load_checkpoint(model=None, optimizer=None, checkpoint=None):
401+
assert os.path.isfile(checkpoint), 'Checkpoint not found, aborting load'
402+
chpt = torch.load(checkpoint)
403+
assert str(model.type) == chpt['arch'], 'Model arquitecture mismatch,\
404+
aborting load'
405+
model.load_state_dict(chpt['state_dict'])
406+
if optimizer is not None:
407+
optimizer.load_state_dict['optimizer']
408+
print('Succesfully loaded checkpoint \nDataset: %s \nEpoch: %s \nLoss: %s\
409+
\nAccuracy: %s' % (chpt['dataset'], chpt['epoch'], chpt['loss'],
410+
chpt['accuracy']))

Utils/transformations.py

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import torch
2+
import librosa as libr
3+
import numpy as np
4+
5+
6+
class ToMFCC:
7+
'''
8+
Transformation to convert soundfile loaded via LibriSpeechDataset to Mel-
9+
frequency cepstral coefficients (MFCCs)
10+
Args:
11+
number_of_mels: Number of bins to use for cepstral coefficients
12+
Returns:
13+
torch.float tensor
14+
'''
15+
def __init__(self, number_of_mels=128):
16+
self.number_of_mels = number_of_mels
17+
18+
def __call__(self, y):
19+
dims = y.shape
20+
y = libr.feature.melspectrogram(np.reshape(y, (dims[1],)), 16000,
21+
n_mels=self.number_of_mels, fmax=8000)
22+
y = libr.feature.mfcc(S=libr.power_to_db(y))
23+
y = torch.from_numpy(y)
24+
return y.float()
25+
26+
27+
class STFT:
28+
'''
29+
Short-time Fourier transform (STFT) for librosa dataset
30+
Args:
31+
phase: If true, will return the magnitude and phase of the transformation,
32+
if false only returns magnitude
33+
Returns:
34+
torch.float tensor
35+
'''
36+
def __init__(self, phase=False):
37+
self.phase = phase
38+
39+
def __call__(self, y):
40+
dims = y.shape
41+
y = libr.core.stft(np.reshape(y, (dims[1],)))
42+
y, phase = np.abs(y), np.angle(y)
43+
y = torch.from_numpy(y).permute(1, 0)
44+
phase = torch.from_numpy(phase).permute(1, 0)
45+
if self.phase:
46+
return torch.cat( (y, phase), dim=0).float()
47+
else:
48+
return y.float()

cyphercat/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
from .__version__ import __version__
44

55
from .utils import *
6+
from .train import *
67
from .models import *
78
from .metrics import *
89
from .load_data import *
10+
from .definitions import *

cyphercat/definitions.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import os
2+
3+
CYCAT_DIR = os.path.dirname(os.path.abspath(__file__))
4+
REPO_DIR = os.path.split(CYCAT_DIR)[0]

cyphercat/load_data.py

+6-10
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def custom_preprocessor(out_dir=''):
1616
----------
1717
out_dir : string
1818
directory of unpacked data set
19+
1920
"""
2021

2122
# Get name of data set from output directory
@@ -56,23 +57,18 @@ def custom_preprocessor(out_dir=''):
5657
# For LFW
5758
if 'lfw' in data_name.lower():
5859

59-
os.rename(os.path.join(out_dir, 'lfw/'), os.path.join(out_dir, 'lfw_original/'))
60-
61-
lfw_dir = os.path.join(out_dir, 'lfw_original/')
60+
lfw_dir = out_dir + '_original/'
61+
os.rename(out_dir, lfw_dir)
62+
6263
people_dir = os.listdir(lfw_dir)
6364

6465
num_per_class = 20
6566

66-
new_dir = os.path.join(out_dir, 'lfw_' + str(num_per_class))
67-
68-
if not os.path.isdir(new_dir):
69-
os.makedirs(new_dir)
70-
7167
for p in people_dir:
7268
imgs = os.listdir(os.path.join(lfw_dir, p))
7369
if len(imgs) >= num_per_class:
74-
shutil.copytree(os.path.join(lfw_dir, p), os.path.join(new_dir, p))
75-
70+
shutil.copytree(os.path.join(lfw_dir, p), os.path.join(out_dir, p))
71+
7672
print('{} successfully downloaded and preprocessed.'.format(data_name))
7773

7874

0 commit comments

Comments
 (0)