diff --git a/CODEOWNERS b/CODEOWNERS index afa98df95..1fafadbb1 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -52,3 +52,4 @@ keras_contrib/optimizers/padam.py @MFreidank # wrappers +keras_contrib/wrappers/cdropout.py @moritzmoritz98 \ No newline at end of file diff --git a/keras_contrib/wrappers/__init__.py b/keras_contrib/wrappers/__init__.py index e69de29bb..7490e705c 100644 --- a/keras_contrib/wrappers/__init__.py +++ b/keras_contrib/wrappers/__init__.py @@ -0,0 +1,3 @@ +from __future__ import absolute_import + +from .cdropout import ConcreteDropout diff --git a/keras_contrib/wrappers/cdropout.py b/keras_contrib/wrappers/cdropout.py new file mode 100644 index 000000000..a7c89a8b7 --- /dev/null +++ b/keras_contrib/wrappers/cdropout.py @@ -0,0 +1,191 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import numpy as np +from keras import backend as K +from keras.initializers import RandomUniform +from keras.layers import InputSpec +from keras.layers.wrappers import Wrapper +from keras_contrib.utils.test_utils import to_tuple + + +class ConcreteDropout(Wrapper): + """A wrapper automating the dropout rate choice + through the 'Concrete Dropout' technique. + + Note that currently only Dense layers with weights + and Conv layers (Conv1D, Conv2D, Conv3D) are supported. + In the case of Dense Layers, dropout is applied to its complete input, + whereas in the Conv case just the input-channels are dropped. + + # Example + + ```python + # as first layer in a sequential model: + model = Sequential() + model.add(ConcreteDropout(Dense(8), input_shape=(16)), n_data=5000) + # now model.output_shape == (None, 8) + # subsequent layers: no need for input shape + model.add(ConcreteDropout(Dense(32), n_data=500)) + # now model.output_shape == (None, 32) + + # Note that the current implementation supports Conv layers as well. + ``` + + # Arguments + layer: The to be wrapped layer. + n_data: int. `n_data > 0`. + Length of the dataset. + length_scale: float. `length_scale > 0`. + Prior lengthscale. + model_precision: float. `model_precision > 0`. + Model precision parameter is `1` for classification. + Also known as inverse observation noise. + prob_init: Tuple[float, float]. `prob_init > 0` + Probability lower / upper bounds of dropout rate initialization. + temp: float. Temperature. `temp > 0`. + Determines the speed of probability (i.e. dropout rate) adjustments. + seed: Seed for random probability sampling. + + # References + - [Concrete Dropout](https://arxiv.org/pdf/1705.07832.pdf) + """ + + def __init__(self, + layer, + n_data, + length_scale=5e-2, + model_precision=1, + prob_init=(0.1, 0.5), + temp=0.4, + seed=None, + **kwargs): + assert 'kernel_regularizer' not in kwargs + assert n_data > 0 and isinstance(n_data, int) + assert length_scale > 0. + assert prob_init[0] <= prob_init[1] and prob_init[0] > 0. + assert temp > 0. + assert model_precision > 0. + super(ConcreteDropout, self).__init__(layer, **kwargs) + + self._n_data = n_data + self._length_scale = length_scale + self._model_precision = model_precision + self._prob_init = prob_init + self._temp = temp + self._seed = seed + + eps = K.epsilon() + self.weight_regularizer = length_scale**2 / (model_precision * n_data + eps) + self.dropout_regularizer = 2 / (model_precision * n_data + eps) + self.supports_masking = True + self.p_logit = None + self.p = None + + def _concrete_dropout(self, inputs, layer_type): + """Applies concrete dropout. + Used at training time (gradients can be propagated). + + # Arguments + inputs: Input. + layer_type: str. Either 'dense' or 'conv'. + # Returns + A tensor with the same shape as inputs and dropout applied. + """ + assert layer_type in {'dense', 'conv'} + eps = K.cast_to_floatx(K.epsilon()) + + noise_shape = K.shape(inputs) + if layer_type == 'conv': + nodrops = np.ones(len(K.int_shape(inputs)) - 2, int) + _ = lambda *x: x # don't ask... py2 can't unpack directly into a tuple + if K.image_data_format() == 'channels_first': + noise_shape = _(noise_shape[0], noise_shape[1], *nodrops) + else: + noise_shape = _(noise_shape[0], *(_(*nodrops) + (noise_shape[-1],))) + unif_noise = K.random_uniform(shape=noise_shape, + seed=self._seed, + dtype=inputs.dtype) + drop_prob = ( + K.log(self.p + eps) + - K.log(1. - self.p + eps) + + K.log(unif_noise + eps) + - K.log(1. - unif_noise + eps) + ) + drop_prob = K.sigmoid(drop_prob / self._temp) + + # apply dropout + random_tensor = 1. - drop_prob + retain_prob = 1. - self.p + inputs *= random_tensor + inputs /= retain_prob + + return inputs + + def build(self, input_shape=None): + input_shape = to_tuple(input_shape) + if len(input_shape) == 2: # Dense_layer + input_dim = np.prod(input_shape[-1]) # we drop only last dim + elif 3 <= len(input_shape) <= 5: # Conv_layers + input_dim = ( + input_shape[1] + if K.image_data_format() == 'channels_first' + else input_shape[-1] # we drop only channels + ) + else: + raise ValueError( + 'concrete_dropout currenty supports only Dense/Conv layers') + + self.input_spec = InputSpec(shape=input_shape) + if not self.layer.built: + self.layer.build(input_shape) + self.layer.built = True + + # initialise p + self.p_logit = self.layer.add_weight(name='p_logit', + shape=(1,), + initializer=RandomUniform( + *np.log(self._prob_init), + seed=self._seed + ), + trainable=True) + self.p = K.squeeze(K.sigmoid(self.p_logit), axis=0) + + super(ConcreteDropout, self).build(input_shape) + + # initialize regularizer / prior KL term and add to layer-loss + weight = self.layer.kernel + kernel_regularizer = ( + self.weight_regularizer + * K.sum(K.square(weight)) + / (1. - self.p) + ) + dropout_regularizer = ( + self.p * K.log(self.p) + + (1. - self.p) * K.log(1. - self.p) + ) * self.dropout_regularizer * input_dim + regularizer = K.sum(kernel_regularizer + dropout_regularizer) + self.layer.add_loss(regularizer) + + def call(self, inputs, training=None): + def relaxed_dropped_inputs(): + return self.layer.call(self._concrete_dropout(inputs, ( + 'dense' if len(K.int_shape(inputs)) == 2 else 'conv' + ))) + + return K.in_train_phase(relaxed_dropped_inputs, + self.layer.call(inputs), + training=training) + + def get_config(self): + config = {'n_data': self._n_data, + 'length_scale': self._length_scale, + 'model_precision': self._model_precision, + 'prob_init': self._prob_init, + 'temp': self._temp, + 'seed': self._seed} + base_config = super(ConcreteDropout, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def compute_output_shape(self, input_shape): + return self.layer.compute_output_shape(input_shape) diff --git a/tests/keras_contrib/wrappers/test_cdropout.py b/tests/keras_contrib/wrappers/test_cdropout.py new file mode 100644 index 000000000..88e15f65d --- /dev/null +++ b/tests/keras_contrib/wrappers/test_cdropout.py @@ -0,0 +1,224 @@ +import pytest +import numpy as np + +from numpy.testing import assert_allclose +from numpy.testing import assert_array_almost_equal +from numpy.testing import assert_approx_equal +from numpy.testing import assert_equal +from keras import backend as K +from keras.layers import Conv1D, Conv2D, Conv3D, Dense, Input +from keras.models import Model +from keras_contrib.utils.test_utils import layer_test +from keras_contrib.wrappers import ConcreteDropout + + +@pytest.fixture +def clear_session_after_test(): + """Overridden: make session cleanup manually. + """ + pass + + +@pytest.fixture(scope='module') +def dense_model(): + """Initialize to be tested dense model. Executed once. + """ + # DATA + in_dim = 20 + init_prop = .1 + np.random.seed(1) + X = np.random.randn(1, in_dim) + + # MODEL + inputs = Input(shape=(in_dim,)) + dense = Dense(1, use_bias=True) + # Model, normal + cd = ConcreteDropout(dense, in_dim, prob_init=(init_prop, init_prop)) + x = cd(inputs) + model = Model(inputs, x) + model.compile(loss=None, optimizer='rmsprop') + # Model, reference w/o Dropout + x_ref = dense(inputs) + model_ref = Model(inputs, x_ref) + model_ref.compile(loss=None, optimizer='rmsprop') + + yield {'model': model, + 'model_ref': model_ref, + 'concrete_dropout': cd, + 'init_prop': init_prop, + 'in_dim': in_dim, + 'X': X} + if K.backend() == 'tensorflow' or K.backend() == 'cntk': + K.clear_session() + + +def test_cdropout_dense_3rdweight(dense_model): + """Check about correct 3rd weight (equal to initial value) + """ + model = dense_model['model'] + init_prop = dense_model['init_prop'] + + W = model.get_weights() + assert_array_almost_equal(W[2], [np.log(init_prop)]) + + +def test_cdropout_dense_identity(dense_model): + """Check if ConcreteDropout in prediction phase is the same as no dropout + """ + model = dense_model['model'] + model_ref = dense_model['model_ref'] + X = dense_model['X'] + + out = model.predict(X) + out_ref = model_ref.predict(X) + assert_allclose(out, out_ref, atol=1e-5) + + +def test_cdropout_dense_loss(dense_model): + """Check if ConcreteDropout has the right amount of losses deposited + """ + model = dense_model['model'] + + assert_equal(len(model.losses), 1) + + +def test_cdropout_dense_loss_value(dense_model): + """Check if the loss corresponds the the desired value + """ + model = dense_model['model'] + X = dense_model['X'] + cd = dense_model['concrete_dropout'] + in_dim = dense_model['in_dim'] + + def sigmoid(x): + return 1. / (1. + np.exp(-x)) + + W = model.get_weights() + p = np.squeeze(sigmoid(W[2])) + kernel_regularizer = cd.weight_regularizer * np.sum(np.square(W[0])) / (1. - p) + dropout_regularizer = (p * np.log(p) + (1. - p) * np.log(1. - p)) + dropout_regularizer *= cd.dropout_regularizer * in_dim + loss = np.sum(kernel_regularizer + dropout_regularizer) + + eval_loss = model.evaluate(X) + assert_approx_equal(eval_loss, loss) + + +@pytest.fixture(scope='module', params=['channels_first', 'channels_last']) +def conv2d_model(request): + """Initialize to be tested conv model. Executed once per param: + The whole tests are repeated for respectively + `channels_first` and `channels_last`. + """ + assert request.param in {'channels_last', 'channels_first'} + K.set_image_data_format(request.param) + + # DATA + in_dim = 20 + init_prop = .1 + np.random.seed(1) + if K.image_data_format() == 'channels_last': + X = np.random.randn(1, in_dim, in_dim, 1) + elif K.image_data_format() == 'channels_first': + X = np.random.randn(1, 1, in_dim, in_dim) + else: + raise ValueError('Unknown data_format:', K.image_data_format()) + + # MODEL + if K.image_data_format() == 'channels_last': + inputs = Input(shape=(in_dim, in_dim, 1,)) + elif K.image_data_format() == 'channels_first': + inputs = Input(shape=(1, in_dim, in_dim,)) + else: + raise ValueError('Unknown data_format:', K.image_data_format()) + conv2d = Conv2D(1, (3, 3)) + # Model, normal + cd = ConcreteDropout(conv2d, in_dim, prob_init=(init_prop, init_prop)) + x = cd(inputs) + model = Model(inputs, x) + model.compile(loss=None, optimizer='rmsprop') + # Model, reference w/o Dropout + x_ref = conv2d(inputs) + model_ref = Model(inputs, x_ref) + model_ref.compile(loss=None, optimizer='rmsprop') + + yield {'model': model, + 'model_ref': model_ref, + 'concrete_dropout': cd, + 'init_prop': init_prop, + 'in_dim': in_dim, + 'X': X} + if K.backend() == 'tensorflow' or K.backend() == 'cntk': + K.clear_session() + + +def test_cdropout_conv2d_3rdweight(conv2d_model): + """Check about correct 3rd weight (equal to initial value) + """ + model = conv2d_model['model'] + init_prop = conv2d_model['init_prop'] + + W = model.get_weights() + assert_array_almost_equal(W[2], [np.log(init_prop)]) + + +def test_cdropout_conv2d_identity(conv2d_model): + """Check if ConcreteDropout in prediction phase is the same as no dropout + """ + model = conv2d_model['model'] + model_ref = conv2d_model['model_ref'] + X = conv2d_model['X'] + + out = model.predict(X) + out_ref = model_ref.predict(X) + assert_allclose(out, out_ref, atol=1e-5) + + +def test_cdropout_conv2d_loss(conv2d_model): + """Check if ConcreteDropout has the right amount of losses deposited + """ + model = conv2d_model['model'] + + assert_equal(len(model.losses), 1) + + +def test_cdropout_conv2d_loss_value(conv2d_model): + """Check if the loss corresponds the the desired value + """ + model = conv2d_model['model'] + X = conv2d_model['X'] + cd = conv2d_model['concrete_dropout'] + + def sigmoid(x): + return 1. / (1. + np.exp(-x)) + + W = model.get_weights() + p = np.squeeze(sigmoid(W[2])) + kernel_regularizer = cd.weight_regularizer * np.sum(np.square(W[0])) / (1. - p) + dropout_regularizer = (p * np.log(p) + (1. - p) * np.log(1. - p)) + dropout_regularizer *= cd.dropout_regularizer * 1 # only channels are dropped + loss = np.sum(kernel_regularizer + dropout_regularizer) + + eval_loss = model.evaluate(X) + assert_approx_equal(eval_loss, loss) + + +@pytest.mark.parametrize('n_data', [1, 60]) +@pytest.mark.parametrize('layer, args, shape', [(Dense, (2,), (None, 6)), + (Conv1D, (4, 3), (None, 6, 1)), + (Conv2D, (8, 7), (None, 12, 12, 3)), + (Conv3D, (16, 3), (1, 6, 6, 6, 1))]) +def test_cdropout_valid_layer(layer, args, shape, n_data): + """Original layer test with a variety of different valid parameters. + """ + layer_test(ConcreteDropout, + kwargs={'layer': layer(*args), + 'n_data': n_data}, + input_shape=shape) + + if K.backend() == 'tensorflow' or K.backend() == 'cntk': + K.clear_session() + + +if __name__ == '__main__': + pytest.main([__file__])