diff --git a/README.md b/README.md index bdb8d1c..7369287 100755 --- a/README.md +++ b/README.md @@ -15,16 +15,106 @@ It is recommended that you consult [the current working branch](https://github.c A Stephen Fox endeavor to become an Applied AI Scientist. +## Background Resources + +### Key Ideas + +1. Make it simple to refine neural architectures +2. Focus on dropping model parameter size while __keeping performance as high as possible__ +3. Make the tools user-friendly, and clearly documented + +### Project Roadmap + +- Please see [the GitHub Project board](https://github.com/stephenjfox/Morph.py/projects/1) + +--- + +## Usage + +### Installation + +`pip install morph-py` + +### Code Example + +```python +import morph + +morph_optimizer = None +# train loop +for e in range(epoch_count): + + for input, target in dataloader: + optimizer.zero_grad() # optional: zero gradients or don't... + output = model(input) + + loss = loss_fn(output, target) + loss.backward() + optim.step() + + + # setup for comparing the morphed model + if morph_optimizer: + morph_optimizer.zero_grad() + morph_loss = loss_fn(morph_model(input), target) + + logging.info(f'Morph loss - Standard loss = {morph_loss - loss}') + + morph_loss.backward() + morph_optimizer.step() + + + # Experimentally supported: Initialize our morphing halfway training + if e == epoch_count // 2: + # if you want to override your model + model = morph.once(model) + + # if you want to compare in parallel + morph_model = morph.once(model) + + # either way, you need to tell your optimizer about it + morph_optimizer = init_optimizer(params=morph_model.parameters()) + +``` + +## What is Morph.py? + +Morph.py is a Neural Network Architecture Optimization toolkit targeted at Deep Learning researchers + and practitioners. +* It acts outside of the current paradigm of [Neural Architecture Search](https://github.com/D-X-Y/awesome-NAS) + while still proving effective +* It helps one model accuracy of a model with respect to its size (as measured by "count of model parameters") + * Subsequently, you could be nearly as effective (given some margin of error) with a __much__ smaller + memory footprint +* Provides you, the researcher, with [better insight on how to improve your model](https://github.com/stephenjfox/Morph.py/projects/3) + +Please enjoy this [Google Slides presentation](https://goo.gl/ZzZrng) + +Coming soon: +* A walkthrough of the presentation (more detail than my presenter's notes) +* More [supported model architectures](https://github.com/stephenjfox/Morph.py/projects/2) + + +### Current support + +* Dynamic adjustment of a given layer's size +* Weight persistence across layer resizing + * To preserve all the hard work you spent in + +--- + +# Contributing + ## Setup (to work alongside me) `git clone https://github.com/stephenjfox/Morph.py.git` -## Requisites +### Requisites -### [Install Anaconda](https://www.anaconda.com/download/) +#### [Install Anaconda](https://www.anaconda.com/download/) * They've made it easier with the years. If you haven't already, please give it a try -### Install Pip +#### Install Pip 1. `conda install pip` 2. Proceed as normal @@ -34,4 +124,7 @@ A Stephen Fox endeavor to become an Applied AI Scientist. - Jupyter Notebook * And a few tools to make it better on your local environment like `nb_conda`, `nbconvert`, and `nb_conda_kernels` - Python 3.6+ because [Python 2 is dying](https://pythonclock.org/) -- PyTorch (`conda install torch torchvision`) +- PyTorch (`conda install torch torchvision -c pytorch`) + +All of these and more are covered in the `environment.yml` file: ++ Simply run `conda env create -f environment.yml -n ` \ No newline at end of file diff --git a/check-prune-widen.ipynb b/check-prune-widen.ipynb new file mode 100644 index 0000000..4a1a13e --- /dev/null +++ b/check-prune-widen.ipynb @@ -0,0 +1,171 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import morph" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "morph.nn" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "??morph.nn.once" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import morph.nn.shrink as ms" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from morph.testing.models import EasyMnist" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "some_linear = ms.nn.Linear(3, 2)\n", + "c = [c for c in some_linear.children()]\n", + "len(c)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "EasyMnist(\n", + " (linear1): Linear(in_features=784, out_features=1000, bias=True)\n", + " (linear2): Linear(in_features=1000, out_features=30, bias=True)\n", + " (linear3): Linear(in_features=30, out_features=10, bias=True)\n", + ")" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "EasyMnist()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Module(\n", + " (linear1): Linear(in_features=784, out_features=700, bias=True)\n", + " (linear2): Linear(in_features=700, out_features=21, bias=True)\n", + " (linear3): Linear(in_features=21, out_features=10, bias=True)\n", + ")" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ms.prune(EasyMnist())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.2" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/demo.py b/demo.py index 5a8be18..f1587e2 100644 --- a/demo.py +++ b/demo.py @@ -6,20 +6,25 @@ import morph.nn as net from morph.layers.sparse import sparsify -from morph._models import EasyMnist +from morph.testing.models import EasyMnist +def random_dataset(): + return TensorDataset(torch.randn(2, 28, 28)) + def main(): my_model = EasyMnist() # do one pass through the algorithm modified = morph.once(my_model) - print(modified) # proof that the thing wasn't tampered with + print(modified) # take a peek at the new layers. You take it from here - my_dataloader = DataLoader(TensorDataset(torch.randn(2, 28, 28))) + my_dataloader = DataLoader(random_dataset()) # get back the class that will do work morphed = net.Morph(my_model, epochs=5, dataloader=my_dataloader) + + # TODO: we need your loss function, but this is currentry __unsupported__ morphed.run_training() diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..8bb69f9 --- /dev/null +++ b/environment.yml @@ -0,0 +1,128 @@ +name: insight-project-pytorch +channels: + - pytorch + - anaconda-fusion + - defaults +dependencies: + - appnope=0.1.0=py37_0 + - asn1crypto=0.24.0=py37_0 + - astroid=2.1.0=py37_0 + - backcall=0.1.0=py37_0 + - blas=1.0=mkl + - bleach=3.1.0=py37_0 + - ca-certificates=2018.12.5=0 + - certifi=2018.11.29=py37_0 + - cffi=1.11.5=py37h6174b99_1 + - chardet=3.0.4=py37_1 + - cryptography=2.4.2=py37ha12b0ac_0 + - cycler=0.10.0=py37_0 + - dbus=1.13.6=h90a0687_0 + - decorator=4.3.0=py37_0 + - defusedxml=0.5.0=py37_1 + - entrypoints=0.3=py37_0 + - expat=2.2.6=h0a44026_0 + - freetype=2.9.1=hb4e5f40_0 + - gettext=0.19.8.1=h15daf44_3 + - glib=2.56.2=hd9629dc_0 + - icu=58.2=h4b95b61_1 + - idna=2.8=py37_0 + - intel-openmp=2019.1=144 + - ipykernel=5.1.0=py37h39e3cac_0 + - ipython=7.2.0=py37h39e3cac_0 + - ipython_genutils=0.2.0=py37_0 + - ipywidgets=7.4.2=py37_0 + - isort=4.3.4=py37_0 + - jedi=0.13.2=py37_0 + - jinja2=2.10=py37_0 + - jpeg=9b=he5867d9_2 + - jsonschema=2.6.0=py37_0 + - jupyter=1.0.0=py37_7 + - jupyter_client=5.2.4=py37_0 + - jupyter_console=6.0.0=py37_0 + - jupyter_core=4.4.0=py37_0 + - kiwisolver=1.0.1=py37h0a44026_0 + - lazy-object-proxy=1.3.1=py37h1de35cc_2 + - libcxx=4.0.1=hcfea43d_1 + - libcxxabi=4.0.1=hcfea43d_1 + - libedit=3.1.20181209=hb402a30_0 + - libffi=3.2.1=h475c297_4 + - libgfortran=3.0.1=h93005f0_2 + - libiconv=1.15=hdd342a3_7 + - libpng=1.6.36=ha441bb4_0 + - libsodium=1.0.16=h3efe00b_0 + - libtiff=4.0.10=hcb84e12_1001 + - markupsafe=1.1.0=py37h1de35cc_0 + - matplotlib=3.0.2=py37h54f8f79_0 + - mccabe=0.6.1=py37_1 + - mistune=0.8.4=py37h1de35cc_0 + - mkl=2019.1=144 + - mkl_fft=1.0.10=py37h5e564d8_0 + - mkl_random=1.0.2=py37h27c97d8_0 + - nb_conda_kernels=2.2.0=py37_0 + - nbconvert=5.4.0=py37_1 + - nbformat=4.4.0=py37_0 + - ncurses=6.1=h0a44026_1 + - ninja=1.8.2=py37h04f5b5a_1 + - notebook=5.7.4=py37_0 + - numpy=1.15.4=py37hacdab7b_0 + - numpy-base=1.15.4=py37h6575580_0 + - olefile=0.46=py37_0 + - openssl=1.1.1a=h1de35cc_0 + - pandoc=1.19.2.1=ha5e8f32_1 + - pandocfilters=1.4.2=py37_1 + - parso=0.3.1=py37_0 + - pcre=8.42=h378b8a2_0 + - pexpect=4.6.0=py37_0 + - pickleshare=0.7.5=py37_0 + - pillow=5.4.1=py37hb68e598_0 + - pip=18.1=py37_0 + - prometheus_client=0.5.0=py37_0 + - prompt_toolkit=2.0.7=py37_0 + - ptyprocess=0.6.0=py37_0 + - pycparser=2.19=py37_0 + - pygments=2.3.1=py37_0 + - pylint=2.2.2=py37_0 + - pyopenssl=18.0.0=py37_0 + - pyparsing=2.3.1=py37_0 + - pyqt=5.9.2=py37h655552a_2 + - pysocks=1.6.8=py37_0 + - python=3.7.2=haf84260_0 + - python-dateutil=2.7.5=py37_0 + - pytz=2018.9=py37_0 + - pyzmq=17.1.2=py37h1de35cc_0 + - qt=5.9.7=h468cd18_1 + - qtconsole=4.4.3=py37_0 + - readline=7.0=h1de35cc_5 + - requests=2.21.0=py37_0 + - rope=0.11.0=py37_0 + - send2trash=1.5.0=py37_0 + - setuptools=40.6.3=py37_0 + - sip=4.19.8=py37h0a44026_0 + - six=1.12.0=py37_0 + - sqlite=3.26.0=ha441bb4_0 + - terminado=0.8.1=py37_1 + - testpath=0.4.2=py37_0 + - tk=8.6.8=ha441bb4_0 + - tornado=5.1.1=py37h1de35cc_0 + - traitlets=4.3.2=py37_0 + - urllib3=1.24.1=py37_0 + - wcwidth=0.1.7=py37_0 + - webencodings=0.5.1=py37_1 + - wheel=0.32.3=py37_0 + - widgetsnbextension=3.4.2=py37_0 + - wrapt=1.11.0=py37h1de35cc_0 + - xz=5.2.4=h1de35cc_4 + - yapf=0.25.0=py37_0 + - zeromq=4.2.5=h0a44026_1 + - zlib=1.2.11=h1de35cc_3 + - pytorch=1.0.0=py3.7_1 + - torchvision=0.2.1=py_2 + - pip: + - docutils==0.14 + - pkginfo==1.5.0.1 + - readme-renderer==24.0 + - requests-toolbelt==0.9.1 + - torch==1.0.0 + - tqdm==4.30.0 + - twine==1.12.1 + diff --git a/morph/__init__.py b/morph/__init__.py index e1530f3..8e67f27 100644 --- a/morph/__init__.py +++ b/morph/__init__.py @@ -1 +1 @@ -from .nn.morph import once # facility tate "morph.once" \ No newline at end of file +from .nn import once # facilitate "morph.once" \ No newline at end of file diff --git a/morph/_error.py b/morph/_error.py new file mode 100644 index 0000000..1d645c2 --- /dev/null +++ b/morph/_error.py @@ -0,0 +1,6 @@ +class ValidationError(Exception): + """Custom error that represents a validation issue, according to internal + system rules + """ + def __init__(self, msg): + super(ValidationError, self).__init__(msg) diff --git a/morph/_models.py b/morph/_models.py deleted file mode 100644 index 410eab7..0000000 --- a/morph/_models.py +++ /dev/null @@ -1,23 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class EasyMnist(nn.Module): - - def __init__(self): - super().__init__() - self.linear1 = nn.Linear(784, 1000) - self.linear2 = nn.Linear(1000, 30) - self.linear3 = nn.Linear(30, 10) - - def forward(self, x_batch: torch.Tensor): - """Simple ReLU-based activations through all layers of the DNN. - Simple and effectively deep neural network. No frills. - """ - _input = x_batch.view(-1, 784) # shape for our linear1 - out1 = F.relu(self.linear1(x_batch)) - out2 = F.relu(self.linear2(out1)) - out3 = F.relu(self.linear3(out2)) - - return out3 \ No newline at end of file diff --git a/morph/layers/sparse.py b/morph/layers/sparse.py index 1f29dd6..d62b316 100644 --- a/morph/layers/sparse.py +++ b/morph/layers/sparse.py @@ -28,7 +28,7 @@ def percent_waste(layer: nn.Module) -> float: weight matrix/tensor to determine how many neurons can be spared """ w = layer.weight - non_sparse_w = torch.nonzero(sparsify(w)) + non_sparse_w = sparsify(w).nonzero() non_zero_count = non_sparse_w.numel() // len(non_sparse_w[0]) percent_size = non_zero_count / w.numel() diff --git a/morph/layers/sparse_test.py b/morph/layers/sparse_test.py new file mode 100644 index 0000000..aab6657 --- /dev/null +++ b/morph/layers/sparse_test.py @@ -0,0 +1,16 @@ +from unittest import main as test_main, TestCase, skip + +from .sparse import sparsify, torch + +class TestSparseFunctions(TestCase): + + @skip("Skipping value-wise comparison until better solution than iterating all tensor values") + def test_sparsify_selected_indices_should_have_sub_threshold_values(self): + test_threshold = 0.1 + test_tensor = torch.randn(3, 2) + expected = torch.where(test_tensor > test_threshold, test_tensor, torch.zeros(3, 2)) + self.assertEqual(expected, sparsify(test_tensor, test_threshold)) + + +if __name__ == "__main__": + test_main() \ No newline at end of file diff --git a/morph/layers/widen.py b/morph/layers/widen.py index e2ff0e6..3e737c2 100644 --- a/morph/layers/widen.py +++ b/morph/layers/widen.py @@ -3,11 +3,11 @@ import torch import torch.nn as nn -from ..nn.utils import layer_has_bias +from ..nn.utils import layer_has_bias, redo_layer +from ..utils import check, round # NOTE: should factor be {smaller, default at all}? -# TODO: Research - is there a better type for layer than nn.Module? def widen(layer: nn.Module, factor=1.4, in_place=False) -> nn.Module: """ Args: @@ -23,23 +23,18 @@ def widen(layer: nn.Module, factor=1.4, in_place=False) -> nn.Module: Returns: A new layer of the base type (e.g. nn.Linear) or `None` if in_place=True """ - if factor < 1.0: - raise ValueError('Cannot shrink with the widen() function') - if factor == 1.0: - raise ValueError("You shouldn't waste compute time if you're not changing anything") + check(factor > 1.0, "Your call to widen() should be increasing the size of your layers") # we know that layer.weight.size()[0] is the __output__ dimension in the linear case output_dim = 0 if isinstance(layer, nn.Linear): output_dim = layer.weight.size()[0] # FIXME: switch to layer.out_features? input_dim = layer.weight.size()[1] # FIXME: switch to layer.in_features? - # TODO: other classes, for robustness? - # TODO: Use dictionary look-ups instead, because they're faster? else: raise ValueError('unsupported layer type:', type(layer)) logging.debug(f"current dimensions: {(output_dim, input_dim)}") - new_size = round(factor * output_dim + .5) # round up, not down, if we can + new_size = round(factor * output_dim) # round up, not down, if we can # We're increasing layer width from output_dim to new_size, so let's save that for later size_diff = new_size - output_dim @@ -56,20 +51,26 @@ def widen(layer: nn.Module, factor=1.4, in_place=False) -> nn.Module: # TODO: cleanup duplication? Missing properties that will effect usability? if in_place: - layer.out_features = new_size - layer.weight = p_weights - layer.bias = p_bias - logging.warning( - 'Using experimental "in-place" version. May have unexpected affects on activation.' - ) + write_layer_properties(layer, new_size, p_weights, p_bias) return layer else: - print(f"New shape = {expanded_weights.shape}") - l = nn.Linear(*expanded_weights.shape[::-1], bias=utils.layer_has_bias(layer)) - l.weight = p_weights - l.bias = p_bias + logging.debug(f"New shape = {expanded_weights.shape}") + new_input, new_output = expanded_weights[1], expanded_weights[0] + l = redo_layer(layer, new_in=new_input, new_out=new_output) + write_layer_properties(layer, new_size=None, new_weights=p_weights, new_bias=p_bias) + return l +def write_layer_properties(layer, new_size, new_weights, new_bias): + """Assigns properties to this `layer`, making the changes on a model in-line + """ + if new_size: layer.out_features = new_size + if new_weights: layer.weight = new_weights + if new_bias: layer.bias = new_bias + logging.warning( + 'Using experimental "in-place" version. May have unexpected affects on activation.' + ) + def _expand_bias_or_weight(t: nn.Parameter, increase: int) -> torch.Tensor: """Returns a tensor of shape `t`, with padding values drawn from a Guassian distribution diff --git a/morph/layers/widen_test.py b/morph/layers/widen_test.py new file mode 100644 index 0000000..0c87a66 --- /dev/null +++ b/morph/layers/widen_test.py @@ -0,0 +1,24 @@ +import unittest + +from .widen import widen, nn +from .._error import ValidationError + + +class TestWiden_Functional(unittest.TestCase): + + DUD_LINEAR = nn.Linear(1, 1) + + def test_widen_width_factor_too_small_should_fail(self): + with self.assertRaises(ValidationError): + widen(self.DUD_LINEAR, 0.8) + + def test_widen_width_factor_identity_should_fail(self): + with self.assertRaises(ValidationError): + widen(self.DUD_LINEAR, 1.0) + + def test_widen_width_factor_increases_layer_generously(self): + pass + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/morph/nn/_types.py b/morph/nn/_types.py new file mode 100644 index 0000000..9eef78c --- /dev/null +++ b/morph/nn/_types.py @@ -0,0 +1,10 @@ +def type_name(o): + '''Returns the simplified type name of the given object. + Eases type checking, rather than any(isinstance(some_obj, _type) for _type in [my, types, to, check]) + ''' + return type(o).__name__ + + +def type_supported(type_name: str) -> bool: + # NOTE: already considerd a constants file. I don't like that precident + return type_name in ['Conv2d', 'Linear'] diff --git a/morph/nn/morph.py b/morph/nn/morph.py index bbe853a..bd6d644 100644 --- a/morph/nn/morph.py +++ b/morph/nn/morph.py @@ -1,11 +1,19 @@ -import torch.nn as nn +import torch.nn as nn # TODO do we even need to import this __just__ for the type? def once(net: nn.Module, experimental_support=False) -> nn.Module: """Runs an experimental implementation of the MorphNet algorithm on `net` producing a new network: - 1. Shrink the layers o - + 1. Shrink the layers + 2. Widen the network + a. If everything mathematically fits together nicely, try to run inference + i. initialize those new weights with my random sampling technique + b. If things aren't snug, apply the more robust layer fitting approach + i. the layer widths will be what they will and that logic is handled in + morph.nn.widen.py + 3. Present the new model in a simple dataclass + a. takes advantage of the generated __repr__ and __eq__ + b. that class will have analysis functions (like `pd.DataFrame.summary()`) Returns: either `net` if `experimental_support == False` or a MorphNet of the supplied `net`. """ diff --git a/morph/nn/resizing.py b/morph/nn/resizing.py new file mode 100644 index 0000000..79caee5 --- /dev/null +++ b/morph/nn/resizing.py @@ -0,0 +1,6 @@ +from collections import namedtuple + +Resizing = namedtuple('Resizing', ['input_size', 'output_size'], defaults=[0, 0]) +Resizing.__doc__ += ': Baseclass for a type that encapsulates a resized layer' +Resizing.input_size.__doc__ = "The layer's \"new\" input dimension size (Linear -> in_features, Conv2d -> in_channels)" +Resizing.output_size.__doc__ = "The layer's \"new\" output dimension size (Linear -> out_features, Conv2d -> out_channels)" diff --git a/morph/nn/shrink.py b/morph/nn/shrink.py index f273d37..661990b 100644 --- a/morph/nn/shrink.py +++ b/morph/nn/shrink.py @@ -1,27 +1,103 @@ -import torch +from morph.layers.sparse import percent_waste +from morph.utils import check, round +from .resizing import Resizing +from .utils import in_dim, out_dim, group_layers_by_algo +from .widen import resize_layers +from ._types import type_name + +from typing import List + import torch.nn as nn -#################### HELPERS #################### +class Shrinkage: + """ + An intermediary for the "Shrink" step of the three step Morphing algorithm. + Rather than have all of the state be free in the small scope of a mega-function, + these abstractions ease the way of implementing the shrinking and prune of the + network. + * Given that we have access to the total count of nodes, and how wasteful a layer was + we can deduce any necessary changes once given a new input dimension + * We expect input dimensions to change to accomodate the trimmed down earlier layers, + but we want an expansion further along to allow the opening of bottlenecks in the architecture + """ + + def __init__(self, input_dimension: int, initial_parameters: int, + waste_percentage: float): + self.input_dimension = input_dimension # TODO: is this relevant in any non-Linear case? + self.initial_parameters = initial_parameters + self.waste_percentage = waste_percentage + self.reduced_parameters = Shrinkage.reduce_parameters(initial_parameters, + waste_percentage) + + @staticmethod + def reduce_parameters(initial_parameters: int, waste: float) -> int: + """Calculates the new, smaller, number of paratemers that this instance encapsulates""" + percent_keep = (1. - waste) + unrounded_params_to_keep = percent_keep * initial_parameters + # round digital up to the nearest integer + return round(unrounded_params_to_keep) -def _group_layers_by_algo(children_list): - """Group the layers into how they will be acted upon by my implementation of the algorithm: - 1. First child in the list - 2. Slice of all the child, those that are not first nor last - 3. Last child in the list + +def shrink_to_resize(shrinkage: Shrinkage, new_input_dimension: int) -> Resizing: + """Given the `new_input_dimension`, calculate a reshaping/resizing for the parameters + of the supplied `shrinkage`. + We round up the new output dimension, generously allowing for opening bottlenecks. + Iteratively, any waste introduced is pruned hereafter. (Needs proof/unit test) """ + new_output_dimension = round(shrinkage.reduced_parameters / new_input_dimension) + return Resizing(new_input_dimension, new_output_dimension) + + +#################### prove of a good implementation #################### + + +def uniform_prune(net: nn.Module) -> nn.Module: + """Shrink the network down 70%. Input and output dimensions are not altered""" + return resize_layers(net, width_factor=0.7) + + +#################### the algorithm to end all algorithms #################### + + +def shrink_layer(layer: nn.Module) -> Shrinkage: + waste = percent_waste(layer) + parameter_count = layer.weight.numel() # the count is already tracked for us + return Shrinkage(in_dim(layer), parameter_count, waste) + + +def fit_layer_sizes(layer_sizes: List[Shrinkage]) -> List[Resizing]: + # TODO: where's the invocation site for shrink_to_resize + pass + + +def transform(original_layer: nn.Module, new_shape: Resizing) -> nn.Module: + # TODO: this might just be utils.redo_layer, without the primitive obsession + pass + + +def shrink_prune_fit(net: nn.Module) -> nn.Module: + first, middle_layers, last = group_layers_by_algo(net) + shrunk = { + "first": shrink_layer(first), + "middle": [shrink_layer(m) for m in middle_layers], + "last": shrink_layer(last) + } + + # FIXME: why doesn't the linter like `fitted_layers` + fitted_layers = fit_layer_sizes([shrunk["first"], *shrunk["middle"], shrunk["last"]]) + + # iteration very similar to `resize_layers` but matches Shrinkage with the corresponding layer + new_first, new_middle_layers, new_last = group_layers_by_algo(fitted_layers) - list_len = len(children_list) + new_net = nn.Module() - # validate input in case I slip up - if list_len < 1: - raise ValueError('Invalid argument:', children_list) + new_net.add_module(type_name(first), transform(first, new_first)) - if list_len <= 2: - return children_list # interface? + for old, new in zip(middle_layers, new_middle_layers): + new_net.add_module(type_name(old), transform(old, new)) + pass # append to new_net with the Shrinkage's properties - first = children_list[0] - middle = children_list[1:-1] - last = children_list[-1] + new_net.add_module(type_name(last), transform(last, new_last)) - return first, middle, last + return new_net diff --git a/morph/nn/utils.py b/morph/nn/utils.py index 45da6be..1df2ba7 100644 --- a/morph/nn/utils.py +++ b/morph/nn/utils.py @@ -1,5 +1,36 @@ import torch.nn as nn +from morph.nn._types import type_name, type_supported +from morph.utils import check + +from typing import List, Tuple, TypeVar + +ML = List[nn.Module] +# Type constrained to be the results of nn.Module.children() or ...named_children() +CL = TypeVar('MODULE_CHILDREN_LIST', ML, List[Tuple[str, nn.Module]]) + + +def group_layers_by_algo(children_list: CL) -> ML: + """Group the layers into how they will be acted upon by my implementation of the algorithm: + 1. First child in the list (the "input" layer) + 2. Slice of all the children, those that are not first nor last + 3. Last child in the list (the "output" layer) + """ + + list_len = len(children_list) + + # validate input in case I slip up + check(list_len > 1, 'Your children_list must be more than a singleton') + + if list_len <= 2: + return children_list # interface? + + first = children_list[0] + middle = children_list[1:-1] + last = children_list[-1] + + return first, middle, last + def layer_has_bias(layer: nn.Module) -> bool: return not layer.bias is None @@ -12,6 +43,35 @@ def make_children_list(children_or_named_children): return [c for c in children_or_named_children] +#################### LAYER INSPECTION #################### + + +def in_dim(layer: nn.Module) -> int: + """Returns the input dimension of a given (supported) `layer`""" + layer_name = type_name(layer) + check(type_supported(layer_name)) + + if layer_is_linear(layer_name): + return layer.in_features + elif layer_is_conv2d(layer_name): + return layer.in_channels + else: + raise RuntimeError('Inspecting on unsupported layer') + + +def out_dim(layer: nn.Module) -> int: + """Returns the output dimension of a given (supported) `layer`""" + layer_name = type_name(layer) + check(type_supported(layer_name)) + + if layer_is_linear(layer_name): + return layer.out_features + elif layer_is_conv2d(layer_name): + return layer.out_channels + else: + raise RuntimeError('Inspecting on unsupported layer') + + #################### NEW LAYERS #################### @@ -66,7 +126,7 @@ def new_output_layer(base_layer: nn.Module, type_name: str, in_dim: int) -> nn.M def redo_layer(layer: nn.Module, new_in=None, new_out=None) -> nn.Module: if new_in is None and new_out is None: - return layehr + return layer _type = type_name(layer) if not type_supported(_type): @@ -96,14 +156,3 @@ def layer_is_conv2d(name: str): def layer_is_linear(name: str): return name == 'Linear' - - -def type_name(o): - '''Returns the simplified type name of the given object. - Eases type checking, rather than any(isinstance(some_obj, _type) for _type in [my, types, to, check]) - ''' - return type(o).__name__ - - -def type_supported(type_name: str) -> bool: - return type_name in ['Conv2d', 'Linear'] diff --git a/morph/nn/widen.py b/morph/nn/widen.py index 991aa1a..8f5e3ab 100644 --- a/morph/nn/widen.py +++ b/morph/nn/widen.py @@ -1 +1,53 @@ - \ No newline at end of file +import torch.nn as nn + +import logging + +from morph.nn.utils import group_layers_by_algo, make_children_list, out_dim, redo_layer +from morph.utils import round +from morph.nn._types import type_name, type_supported + + +def widen(net: nn.Module, width_factor: float = 1.4) -> nn.Module: + return resize_layers(net, width_factor) + + +def resize_layers(net: nn.Module, width_factor: float = 1.4) -> nn.Module: + """Perform a uniform layer widening, which increases the output dimension for + fully-connected layers and the number of filters for convolutional layers. + """ + + old_layers = make_children_list(net.named_children()) + (first_name, first_layer), middle, last = group_layers_by_algo(old_layers) + + first_layer_output_size = out_dim(first_layer) # count of the last layer's out features + + new_out_next_in = round(first_layer_output_size * width_factor) + + # NOTE: is there a better way to do this part? Maybe nn.Sequential? + network = nn.Module() # new network + + network.add_module(first_name, redo_layer(first_layer, new_out=new_out_next_in)) + + for name, child in middle: + if type_supported(type_name(child)): + + new_out = round(out_dim(child) * width_factor) + + new_layer = redo_layer(child, new_in=new_out_next_in, new_out=new_out) + new_out_next_in = new_out + network.add_module(name, new_layer) + elif type_is_nested(child): + raise NotImplementedError( + 'Currently do not support for nested structures (i.e. ResidualBlock, nn.Sequntial)') + else: + logging.warning(f"Encountered a non-resizable layer: {type(child)}") + network.add_module(name, child) + + last_name, last_layer = last + network.add_module(last_name, redo_layer(last_layer, new_in=new_out_next_in)) + + return network + +def type_is_nested(layer: nn.Module) -> bool: + """Returns true is the `layer` has children""" + return bool(make_children_list(layer)) \ No newline at end of file diff --git a/morph/testing/models.py b/morph/testing/models.py new file mode 100644 index 0000000..8035b7e --- /dev/null +++ b/morph/testing/models.py @@ -0,0 +1,61 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from utils import Lambda + +class EasyMnist(nn.Module): + + def __init__(self): + super().__init__() + self.linear1 = nn.Linear(784, 1000) + self.linear2 = nn.Linear(1000, 30) + self.linear3 = nn.Linear(30, 10) + + def forward(self, x_batch: torch.Tensor): + """Simple ReLU-based activations through all layers of the DNN. + Simple and sufficiently deep neural network. No frills. + """ + _input = x_batch.view(-1, 784) # shape for our linear1 + out1 = F.relu(self.linear1(x_batch)) + out2 = F.relu(self.linear2(out1)) + out3 = F.relu(self.linear3(out2)) + + return out3 + + +# for comparison with the above +def EasyMnistSeq(): + return nn.Sequential( + Lambda(lambda x: x.reshape(-1, 784)), + nn.Linear(784, 1000), + nn.Relu(), + nn.Linear(1000, 300), + nn.Relu(), + nn.Linear(300, 10), + nn.Relu(), + ) + + +class MnistConvNet(nn.Module): + def __init__(self, interim_size=16): + """ + A simple and shallow deep CNN to show that morph will shrink this architecture, + which will inherently be wasteful on the task of classifying MNIST digits with + accuracy above 95%. + By default produces a 1x16 -> 16x16 -> 16x10 convnet + """ + super().__init__() + self.conv1 = nn.Conv2d(1, interim_size, kernel_size=3, stride=2, padding=1) + self.conv2 = nn.Conv2d(interim_size, interim_size, kernel_size=3, stride=2, padding=1) + self.conv3 = nn.Conv2d(interim_size, 10, kernel_size=3, stride=2, padding=1) + + def forward(self, xb): + xb = xb.view(-1, 1, 28, 28) # any batch_size, 1 channel, 28x28 pixels + xb = F.relu(self.conv1(xb)) + xb = F.relu(self.conv2(xb)) + xb = F.relu(self.conv3(xb)) + xb = F.avg_pool2d(xb, 4) + + # reshape the output to the second dimension of the pool size, and just fill the rest to whatever. + return xb.view(-1, xb.size(1)) + \ No newline at end of file diff --git a/morph/utils.py b/morph/utils.py new file mode 100644 index 0000000..680c4b9 --- /dev/null +++ b/morph/utils.py @@ -0,0 +1,22 @@ +from ._error import ValidationError +import torch.nn as nn + +def check(pred: bool, message='Validation failed'): + if not pred: raise ValidationError(message) + + +def round(value: float) -> int: + """Rounds a `value` up to the next integer if possible. + Performs differently from the standard Python `round` + """ + return int(value + .5) + + +# courtesy of https://pytorch.org/tutorials/beginner/nn_tutorial.html#nn-sequential +class Lambda(nn.Module): + def __init__(self, func): + super().__init__() + self.func = func + + def forward(self, x): + return self.func(x) diff --git a/morph/utils_test.py b/morph/utils_test.py new file mode 100644 index 0000000..c2eadf1 --- /dev/null +++ b/morph/utils_test.py @@ -0,0 +1,19 @@ +import unittest + +from .utils import round + +class TestGlobalUtilities(unittest.TestCase): + + def test_round_down(self): + test = 1.2 + expected = 1 + self.assertEqual(expected, round(test), '1.2 should round DOWN, to 1') + + def test_round_up(self): + test = 1.7 + expected = 2 + self.assertEqual(expected, round(test), '1.7 should round UP, to 2') + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file