From 9fc0fb78a3a384af4dfd84b19f38558a73b6e1ba Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Thu, 17 Feb 2022 13:52:04 -0600 Subject: [PATCH 1/4] Make SE layer, inverted bottleneck, and MB utils generic for use with other models --- src/convnets/mobilenet.jl | 164 ++++++++++---------------------------- src/layers.jl | 53 ++++++++++++ src/utilities.jl | 8 ++ 3 files changed, 102 insertions(+), 123 deletions(-) diff --git a/src/convnets/mobilenet.jl b/src/convnets/mobilenet.jl index 1c9d574f9..e208c74cb 100644 --- a/src/convnets/mobilenet.jl +++ b/src/convnets/mobilenet.jl @@ -1,45 +1,5 @@ -# This is a utility function for making sure that all layers have a channel size divisible by 8. -function _make_divisible(v, divisor, min_value = nothing) - if isnothing(min_value) - min_value = divisor - end - new_v = max(min_value, floor(Int, v + divisor / 2) ÷ divisor * divisor) - # Make sure that round down does not go down by more than 10% - (new_v < 0.9 * v) ? new_v + divisor : new_v -end - # MobileNetv2 -""" - invertedresidualv2(inplanes, outplanes, stride, expand_ratio) - -Create a basic inverted residual block for MobileNetv2 -([reference](https://arxiv.org/abs/1801.04381)). - -# Arguments -- `inplanes`: The number of input feature maps -- `outplanes`: The number of output feature maps -- `stride`: The stride of the convolutional layer, has to be either 1 or 2 -- `expand_ratio`: The ratio of the inner bottleneck feature maps over the input feature maps -""" -function invertedresidualv2(inplanes, outplanes, stride, expand_ratio) - @assert stride in [1, 2] "`stride` has to be 1 or 2" - hidden_planes = floor(Int, inplanes * expand_ratio) - - if expand_ratio == 1 - invres = Chain(conv_bn((3, 3), hidden_planes, hidden_planes, relu6; - bias = false, stride, pad = 1, groups = hidden_planes)..., - conv_bn((1, 1), hidden_planes, outplanes, identity; bias = false)...) - else - invres = Chain(conv_bn((1, 1), inplanes, hidden_planes, relu6; bias = false)..., - conv_bn((3, 3), hidden_planes, hidden_planes, relu6; - bias = false, stride, pad = 1, groups = hidden_planes)..., - conv_bn((1, 1), hidden_planes, outplanes, identity; bias = false)...) - end - - (stride == 1 && inplanes == outplanes) ? SkipConnection(invres, +) : invres -end - """ mobilenetv2(width_mult, configs; max_width = 1280, nclasses = 1000) @@ -59,21 +19,23 @@ Create a MobileNetv2 model. """ function mobilenetv2(width_mult, configs; max_width = 1280, nclasses = 1000) # building first layer - inplanes = _make_divisible(32 * width_mult, width_mult == 0.1 ? 4 : 8) + inplanes = _round_channels(32 * width_mult, width_mult == 0.1 ? 4 : 8) layers = [] append!(layers, conv_bn((3, 3), 3, inplanes, stride = 2)) # building inverted residual blocks for (t, c, n, s) in configs - outplanes = _make_divisible(c * width_mult, width_mult == 0.1 ? 4 : 8) + outplanes = _round_channels(c * width_mult, width_mult == 0.1 ? 4 : 8) for i in 1:n - push!(layers, invertedresidualv2(inplanes, outplanes, i == 1 ? s : 1, t)) + push!(layers, invertedresidual(3, inplanes, inplanes * t, outplanes, relu6; + stride = i == 1 ? s : 1)) inplanes = outplanes end end # building last several layers - outplanes = (width_mult > 1.0) ? _make_divisible(max_width * width_mult, width_mult == 0.1 ? 4 : 8) : max_width + outplanes = (width_mult > 1) ? _round_channels(max_width * width_mult, width_mult == 0.1 ? 4 : 8) : + max_width return Chain(Chain(layers..., conv_bn((1, 1), inplanes, outplanes, relu6, bias = false)...), @@ -129,52 +91,6 @@ classifier(m::MobileNetv2) = m.layers[2:end] # MobileNetv3 -""" - selayer(channels, reduction = 4) - -Squeeze and Excitation layer used by MobileNetv3 -([reference](https://arxiv.org/abs/1905.02244)). -""" -selayer(channels, reduction = 4) = - SkipConnection(Chain(AdaptiveMeanPool((1, 1)), - conv_bn((1, 1), channels, channels // reduction, relu; bias = false)..., - conv_bn((1, 1), channels // reduction, channels, hardσ)...,), .*) - -""" - invertedresidualv3(inplanes, hidden_planes, outplanes, kernel_size, stride, use_se, use_hs) - -Create a basic inverted residual block for MobileNetv3 -([reference](https://arxiv.org/abs/1905.02244)). - -# Arguments -- `inplanes`: The number of input feature maps -- `hidden_planes`: The number of feature maps in the hidden layer -- `outplanes`: The number of output feature maps -- `kernel_size`: The kernel size of the convolutional layers -- `stride`: The stride of the convolutional kernel, has to be either 1 or 2 -- `use_se`: If `true`, Squeeze and Excitation layer will be used -- `use_hs`: If `true`, Hard-Swish activation function will be used -""" -function invertedresidualv3(inplanes, hidden_planes, outplanes, kernel_size, - stride, use_se, use_hs) - @assert stride in [1, 2] "`stride` has to be 1 or 2" - - if inplanes == hidden_planes - invres = Chain(conv_bn((kernel_size, kernel_size), hidden_planes, hidden_planes, use_hs ? hardswish : relu; - bias = false, stride, pad = (kernel_size - 1) ÷ 2, groups = hidden_planes)..., - use_se ? selayer(hidden_planes) : identity, - conv_bn((1, 1), hidden_planes, outplanes, identity; bias = false)...) - else - invres = Chain(conv_bn((1, 1), inplanes, hidden_planes, use_hs ? hardswish : relu; bias = false)..., - conv_bn((kernel_size, kernel_size), hidden_planes, hidden_planes, use_hs ? hardswish : relu; - bias = false, stride, pad = (kernel_size - 1) ÷ 2, groups = hidden_planes)..., - use_se ? selayer(hidden_planes) : identity, - conv_bn((1, 1), hidden_planes, outplanes, identity; bias = false)...) - end - - (stride == 1 && inplanes == outplanes) ? SkipConnection(invres, +) : invres -end - """ mobilenetv3(width_mult, configs; max_width = 1024, nclasses = 1000) @@ -189,7 +105,7 @@ Create a MobileNetv3 model. - `k::Int` - The size of the convolutional kernel - `c::Float` - The multiplier factor for deciding the number of feature maps in the hidden layer - `t::Int` - The number of output feature maps for a given block - - `use_se::Bool` - Whether to use Squeeze and Excitation layer + - `r::Int` - The reduction factor (`>= 1` or `nothing` to skip) for squeeze and excite layers - `use_hs::Bool` - Whether to use Hard-Swish activation function - `s::Int` - The stride of the convolutional kernel - `max_width`: The maximum number of feature maps in any layer of the network @@ -197,22 +113,24 @@ Create a MobileNetv3 model. """ function mobilenetv3(width_mult, configs; max_width = 1024, nclasses = 1000) # building first layer - inplanes = _make_divisible(16 * width_mult, 8) + inplanes = _round_channels(16 * width_mult, 8) layers = [] append!(layers, conv_bn((3, 3), 3, inplanes, hardswish; stride = 2)) explanes = 0 # building inverted residual blocks - for (k, t, c, use_se, use_hs, s) in configs + for (k, t, c, r, use_hs, s) in configs # inverted residual layers - outplanes = _make_divisible(c * width_mult, 8) - explanes = _make_divisible(inplanes * t, 8) - push!(layers, invertedresidualv3(inplanes, explanes, outplanes, k, s, use_se, use_hs)) + outplanes = _round_channels(c * width_mult, 8) + explanes = _round_channels(inplanes * t, 8) + activation = use_hs ? hardswish : relu + push!(layers, invertedresidual(k, inplanes, explanes, outplanes, activation; + stride = s, reduction = r)) inplanes = outplanes end # building last several layers output_channel = max_width - output_channel = width_mult > 1.0 ? _make_divisible(output_channel * width_mult, 8) : output_channel + output_channel = width_mult > 1.0 ? _round_channels(output_channel * width_mult, 8) : output_channel classifier = ( Dense(explanes, output_channel, hardswish), Dropout(0.2), @@ -228,35 +146,35 @@ end mobilenetv3_configs = Dict( :small => [ # k, t, c, SE, HS, s - (3, 1, 16, true, false, 2), - (3, 4.5, 24, false, false, 2), - (3, 3.67, 24, false, false, 1), - (5, 4, 40, true, true, 2), - (5, 6, 40, true, true, 1), - (5, 6, 40, true, true, 1), - (5, 3, 48, true, true, 1), - (5, 3, 48, true, true, 1), - (5, 6, 96, true, true, 2), - (5, 6, 96, true, true, 1), - (5, 6, 96, true, true, 1), + (3, 1, 16, 4, false, 2), + (3, 4.5, 24, nothing, false, 2), + (3, 3.67, 24, nothing, false, 1), + (5, 4, 40, 4, true, 2), + (5, 6, 40, 4, true, 1), + (5, 6, 40, 4, true, 1), + (5, 3, 48, 4, true, 1), + (5, 3, 48, 4, true, 1), + (5, 6, 96, 4, true, 2), + (5, 6, 96, 4, true, 1), + (5, 6, 96, 4, true, 1), ], :large => [ # k, t, c, SE, HS, s - (3, 1, 16, false, false, 1), - (3, 4, 24, false, false, 2), - (3, 3, 24, false, false, 1), - (5, 3, 40, true, false, 2), - (5, 3, 40, true, false, 1), - (5, 3, 40, true, false, 1), - (3, 6, 80, false, true, 2), - (3, 2.5, 80, false, true, 1), - (3, 2.3, 80, false, true, 1), - (3, 2.3, 80, false, true, 1), - (3, 6, 112, true, true, 1), - (3, 6, 112, true, true, 1), - (5, 6, 160, true, true, 2), - (5, 6, 160, true, true, 1), - (5, 6, 160, true, true, 1) + (3, 1, 16, nothing, false, 1), + (3, 4, 24, nothing, false, 2), + (3, 3, 24, nothing, false, 1), + (5, 3, 40, 4, false, 2), + (5, 3, 40, 4, false, 1), + (5, 3, 40, 4, false, 1), + (3, 6, 80, nothing, true, 2), + (3, 2.5, 80, nothing, true, 1), + (3, 2.3, 80, nothing, true, 1), + (3, 2.3, 80, nothing, true, 1), + (3, 6, 112, 4, true, 1), + (3, 6, 112, 4, true, 1), + (5, 6, 160, 4, true, 2), + (5, 6, 160, 4, true, 1), + (5, 6, 160, 4, true, 1) ] ) diff --git a/src/layers.jl b/src/layers.jl index 34d99a3e0..c35e1307b 100644 --- a/src/layers.jl +++ b/src/layers.jl @@ -90,6 +90,59 @@ function skip_identity(inplanes, outplanes) end skip_identity(inplanes, outplanes, downsample) = skip_identity(inplanes, outplanes) +""" + squeeze_excite(channels, reduction = 4) + +Squeeze and excitation layer used by MobileNet variants +([reference](https://arxiv.org/abs/1905.02244)). + +# Arguments +- `channels`: the number of input/output feature maps +- `reduction = 4`: the reduction factor for the number of hidden feature maps + (must be >= 1) +""" +function squeeze_excite(channels, reduction = 4) + @assert (reduction >= 1) "`reduction` must be >= 1" + SkipConnection(Chain(AdaptiveMeanPool((1, 1)), + conv_bn((1, 1), channels, channels ÷ reduction, relu; bias = false)..., + conv_bn((1, 1), channels ÷ reduction, channels, hardσ)...), .*) +end + +""" + invertedresidual(kernel_size, inplanes, hidden_planes, outplanes, activation = relu; + stride, reduction = nothing) + +Create a basic inverted residual block for MobileNetv3 +([reference](https://arxiv.org/abs/1905.02244)). + +# Arguments +- `inplanes`: The number of input feature maps +- `hidden_planes`: The number of feature maps in the hidden layer +- `outplanes`: The number of output feature maps +- `kernel_size`: The kernel size of the convolutional layers +- `stride`: The stride of the convolutional kernel, has to be either 1 or 2 +- `use_se`: If `true`, Squeeze and Excitation layer will be used +- `use_hs`: If `true`, Hard-Swish activation function will be used +""" +function invertedresidual(kernel_size, inplanes, hidden_planes, outplanes, activation = relu; + stride, reduction = nothing) + @assert stride in [1, 2] "`stride` has to be 1 or 2" + + pad = (kernel_size - 1) ÷ 2 + conv1 = (inplanes == hidden_planes) ? () : conv_bn((1, 1), inplanes, hidden_planes, activation; bias = false) + selayer = isnothing(reduction) ? identity : squeeze_excite(hidden_planes, reduction) + + invres = Chain(conv1..., + conv_bn(kernel_size, hidden_planes, hidden_planes, activation; + bias = false, stride, pad = pad, groups = hidden_planes)..., + selayer, + conv_bn((1, 1), hidden_planes, outplanes, identity; bias = false)...) + + (stride == 1 && inplanes == outplanes) ? SkipConnection(invres, +) : invres +end +invertedresidual(kernel_size::Integer, args...; kwargs...) = + invertedresidual((kernel_size, kernel_size), args...; kwargs...) + """ mlpblock(planes, hidden_planes; dropout = 0., dense = Dense, activation = gelu) diff --git a/src/utilities.jl b/src/utilities.jl index 57ea8eba9..5b2d847ca 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -1,6 +1,14 @@ # Utility function for classifier head of vision transformer-like models _seconddimmean(x) = dropdims(mean(x, dims = 2); dims = 2) +# utility function for making sure that all layers have a channel size divisible by 8 +# used by MobileNet variants +function _round_channels(channels, divisor, min_value = divisor) + new_channels = max(min_value, floor(Int, channels + divisor / 2) ÷ divisor * divisor) + # Make sure that round down does not go down by more than 10% + return (new_channels < 0.9 * v) ? new_channels + divisor : new_channels +end + """ addrelu(x, y) From 4d660b4f623938868c46f2567b1493a5cf5ef5a1 Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Thu, 17 Feb 2022 14:05:32 -0600 Subject: [PATCH 2/4] Fix inverted residual docstring --- src/layers.jl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/layers.jl b/src/layers.jl index c35e1307b..818a28a82 100644 --- a/src/layers.jl +++ b/src/layers.jl @@ -112,17 +112,19 @@ end invertedresidual(kernel_size, inplanes, hidden_planes, outplanes, activation = relu; stride, reduction = nothing) -Create a basic inverted residual block for MobileNetv3 +Create a basic inverted residual block for MobileNet variants ([reference](https://arxiv.org/abs/1905.02244)). # Arguments +- `kernel_size`: The kernel size of the convolutional layers - `inplanes`: The number of input feature maps - `hidden_planes`: The number of feature maps in the hidden layer - `outplanes`: The number of output feature maps -- `kernel_size`: The kernel size of the convolutional layers +- `activation`: The activation function for the first two convolution layer - `stride`: The stride of the convolutional kernel, has to be either 1 or 2 -- `use_se`: If `true`, Squeeze and Excitation layer will be used -- `use_hs`: If `true`, Hard-Swish activation function will be used +- `reduction`: The reduction factor for the number of hidden feature maps + in a squeeze and excite layer (see [`squeeze_excite`](#)). + Must be >= 1 or `nothing` for no squeeze and excite layer. """ function invertedresidual(kernel_size, inplanes, hidden_planes, outplanes, activation = relu; stride, reduction = nothing) From c8c90e0e4f2c05721fb091a8c3336e39caaad9bf Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Thu, 17 Feb 2022 16:36:11 -0600 Subject: [PATCH 3/4] Update src/utilities.jl --- src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index 5b2d847ca..c778c0cbb 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -6,7 +6,7 @@ _seconddimmean(x) = dropdims(mean(x, dims = 2); dims = 2) function _round_channels(channels, divisor, min_value = divisor) new_channels = max(min_value, floor(Int, channels + divisor / 2) ÷ divisor * divisor) # Make sure that round down does not go down by more than 10% - return (new_channels < 0.9 * v) ? new_channels + divisor : new_channels + return (new_channels < 0.9 * channels) ? new_channels + divisor : new_channels end """ From 84a50e096799ca9bf5bbc967859a53ec502b4db6 Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Fri, 18 Feb 2022 10:57:51 -0600 Subject: [PATCH 4/4] Fix broadcasting error in inverted residual --- src/layers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers.jl b/src/layers.jl index 818a28a82..99104785e 100644 --- a/src/layers.jl +++ b/src/layers.jl @@ -130,7 +130,7 @@ function invertedresidual(kernel_size, inplanes, hidden_planes, outplanes, activ stride, reduction = nothing) @assert stride in [1, 2] "`stride` has to be 1 or 2" - pad = (kernel_size - 1) ÷ 2 + pad = @. (kernel_size - 1) ÷ 2 conv1 = (inplanes == hidden_planes) ? () : conv_bn((1, 1), inplanes, hidden_planes, activation; bias = false) selayer = isnothing(reduction) ? identity : squeeze_excite(hidden_planes, reduction)