-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2_2_Audio_CNN_Function.py
145 lines (116 loc) · 6.19 KB
/
2_2_Audio_CNN_Function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#=================================================
# ML_Project__Auditory Attention Detection (on a part of KULeuven Dataset)
# 2_2_Audio_CNN_Function
# Foad Moslem ([email protected]) - Researcher | Aerodynamics
# Using Python 3.11.4 & Spyder IDE
#=================================================
#%%
try:
from IPython import get_ipython
get_ipython().magic('clear')
get_ipython().magic('reset -f')
except:
pass
#%% (Audio_CNN) Convolutional Neural Network For Audios
import torch.nn as nn # import the neural network module
# Defining the convolutional neural network class
class Audio_CNN(nn.Module):
def __init__(self):
### call the parent class constructor
super(Audio_CNN, self).__init__()
# Define the convolutional layers
# Define the maxpooling layers
# Define the batch normalization layers
# Define the dropout layer
# Define the ReLU activation function
# Define the adaptive pooling layer
# Layer 1
# Layer 2
# Layer 3
# Layer 4
# Layer 5
# Define the convolutional layers
self.conv1 = nn.Conv2d(1, 32, kernel_size=(1,7), dilation=(1,1), padding=(0,3))
self.conv2 = nn.Conv2d(32, 32, kernel_size=(7,1), dilation=(1,1), padding=(0,0))
self.conv3 = nn.Conv2d(32, 32, kernel_size=(3,5), dilation=(8,8), padding=(0,16))
self.conv4 = nn.Conv2d(32, 32, kernel_size=(3,3), dilation=(16,16), padding=(0,16))
self.conv5 = nn.Conv2d(32, 1, kernel_size=(1,1), dilation=(1,1), padding=(0,0))
# Define the maxpooling layers
self.maxpool1 = nn.MaxPool2d(kernel_size=(1,1))
self.maxpool2 = nn.MaxPool2d(kernel_size=(1,4))
self.maxpool3 = nn.MaxPool2d(kernel_size=(1,2))
self.maxpool4 = nn.MaxPool2d(kernel_size=(1,1))
self.maxpool5 = nn.MaxPool2d(kernel_size=(2,2))
# Define the batch normalization layers
self.bn1 = nn.BatchNorm2d(num_features=32)
self.bn2 = nn.BatchNorm2d(num_features=32)
self.bn3 = nn.BatchNorm2d(num_features=32)
self.bn4 = nn.BatchNorm2d(num_features=32)
self.bn5 = nn.BatchNorm2d(num_features=1)
# Define the dropout layer
self.drop = nn.Dropout(p=0.4)
# Define the ReLU activation function
self.relu = nn.ReLU()
# define the adaptive pooling layer
self.adaptive_pool = nn.AdaptiveAvgPool2d((48,16))
def forward(self, x):
# order: CONV/FC -> BatchNorm -> ReLu(or other activation) -> Dropout -> CONV/FC ->
x = self.bn1(self.maxpool1(self.conv1(x)))
x = self.bn2(self.maxpool2(self.conv2(x)))
x = self.bn3(self.maxpool3(self.conv3(x)))
x = self.bn4(self.maxpool4(self.conv4(x)))
x = self.bn5(self.maxpool5(self.conv5(x)))
x = self.drop(self.relu(x))
x = self.adaptive_pool(x)
return x
#%%
""" Explanation:
#=================================================
torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1,
return_indices=False, ceil_mode=False)
Applies a 2D max pooling over an input signal composed of several input
planes.
kernel_size: the size of the window to take a max over
stride: the stride of the window. Default value is kernel_size
padding: Implicit negative infinity padding to be added on both
sides
dilation: a parameter that controls the stride of elements in the
window
return_indices: if True, will return the max indices along with the
outputs. Useful for torch.nn.MaxUnpool2d later
ceil_mode: when True, will use ceil instead of floor to compute
the output shape
#=================================================
torch.nn.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True,
track_running_stats=True, device=None, dtype=None)
Applies Batch Normalization over a 4D input (a mini-batch of 2D inputs
with additional channel dimension)
num_features: C from an expected input of size (N,C,H,W)
eps: a value added to the denominator for numerical stability.
Default: 1e-5
momentum: the value used for the running_mean and running_var
computation. Can be set to None for cumulative moving average
(i.e. simple average). Default: 0.1
affin: a boolean value that when set to True, this module has learnable
affine parameters. Default: True
track_running_stats: a boolean value that when set to True, this module
tracks the running mean and variance, and when set to False, this
module does not track such statistics, and initializes statistics
buffers running_mean and running_var as None. When these buffers
are None, this module always uses batch statistics. in both
training and eval modes. Default: True
#=================================================
torch.nn.Dropout(p=0.5, inplace=False)
During training, randomly zeroes some of the elements of the input
tensor with probability p using samples from a Bernoulli distribution.
Each channel will be zeroed out independently on every forward call.
This has proven to be an effective technique for regularization and
preventing the co-adaptation of neurons.
p: probability of an element to be zeroed. Default: 0.5
inplace: If set to True, will do this operation in-place. Default: False
#=================================================
torch.nn.ReLU(inplace=False)
Applies the rectified linear unit function element-wise
inplace: can optionally do the operation in-place. Default: False
#=================================================
"""