diff --git a/how-to-guides/07-param-schedulers.ipynb b/how-to-guides/07-param-schedulers.ipynb new file mode 100644 index 0000000..f188323 --- /dev/null +++ b/how-to-guides/07-param-schedulers.ipynb @@ -0,0 +1,754 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "# How to use ParamScheduler with Ignite" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import ast" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from torch.optim import SGD" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook aims at presenting the use of ignite parameter schedulers.\n", + "\n", + "See the [PyTorch-Ignite implementation](https://github.com/pytorch/ignite/blob/master/ignite/handlers/param_scheduler.py) and [documentation](https://pytorch.org/ignite/master/contrib/handlers.html#module-ignite.handlers.param_scheduler) as well as the [PyTorch documentation](https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate).\n", + "\n", + "# `PyTorch` schedulers\n", + "\n", + "We use the `LRScheduler` tool of `PyTorch-Ignite` which allows to easily integrate the schedulers of `PyTorch` and in particular to display the values." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from ignite.handlers import LRScheduler" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> Beware that not all `PyTorch` schedulers are compatible because they have more or less exotic modes." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from torch.optim.lr_scheduler import (\n", + " StepLR,\n", + " ExponentialLR,\n", + " CosineAnnealingLR,\n", + " MultiStepLR,\n", + " LambdaLR,\n", + " MultiplicativeLR,\n", + " CyclicLR,\n", + " OneCycleLR\n", + " # ReduceLROnPlateau : not compatible\n", + " # CosineAnnealingWarmRestarts : not compatible\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def print_optimizer(cls, num_events=50, lr=1.0, *args, **kwargs):\n", + " optimizer = SGD(params=[torch.zeros(10)], lr=lr)\n", + " lr_scheduler = cls(optimizer=optimizer, *args, **kwargs)\n", + " LRScheduler.plot_values(num_events=num_events, lr_scheduler=lr_scheduler)\n", + " plt.title(\"{}\".format(cls))\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## LambdaLR\n", + "\n", + "See the [documentation](https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.LambdaLR.html#torch.optim.lr_scheduler.LambdaLR)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0ada03fd78ca4fc394f48b9942de4437", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(FloatSlider(value=0.05, description='lr', max=0.1, min=0.01, step=0.01), IntSlider(value…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def print_lambdalr(**kwargs):\n", + " print_optimizer(LambdaLR, lr_lambda=lambda epoch: 1 / (epoch+1), **kwargs)\n", + " \n", + "lambdalr_params = {\n", + " \"lr\": (0.01, 0.1, 0.01), \n", + " \"num_events\": (5, 100, 5),\n", + "}\n", + "\n", + "_ = widgets.interact(print_lambdalr, **lambdalr_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## MultiplicativeLR\n", + "\n", + "See the [documentation](https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.MultiplicativeLR.html#torch.optim.lr_scheduler.MultiplicativeLR)." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ae424390a0cf452ba245f67c168d5229", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(FloatSlider(value=0.5, description='factor', max=1.0, min=0.05, step=0.05), FloatSlider(…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def print_multiplicativelr(factor, **kwargs):\n", + " print_optimizer(MultiplicativeLR, lr_lambda=lambda epoch: factor, **kwargs)\n", + "\n", + "multiplicativelr_params = {\n", + " \"lr\": (0.01, 0.1, 0.01), \n", + " \"num_events\": (5, 100, 5), \n", + " \"factor\": (0.05, 1.0, 0.05),\n", + "}\n", + " \n", + "_ = widgets.interact(print_multiplicativelr, **multiplicativelr_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## CyclicLR" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "51b759a6c9ec40c6bfa0495f23cf5b2b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(FloatSlider(value=0.05, description='base_lr', max=0.1, min=0.01, step=0.01), FloatSlide…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def print_cycliclr(**kwargs):\n", + " print_optimizer(CyclicLR, **kwargs)\n", + "\n", + "cycliclr_params = {\n", + " \"base_lr\": (0.01, 0.1, 0.01),\n", + " \"max_lr\": (0.01, 0.05, 0.01), \n", + " \"num_events\": (5, 100, 5), \n", + " \"step_size_up\": (1, 10, 1),\n", + " \"step_size_down\": (1, 10, 1),\n", + " \"mode\": ['triangular', 'triangular2', 'exp_range'],\n", + " \"gamma\": (0.1, 2.0, 0.01),\n", + "}\n", + " \n", + "_ = widgets.interact(print_cycliclr, **cycliclr_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## OneCycleLR\n", + "\n", + "See the [documentation](https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.OneCycleLR.html#torch.optim.lr_scheduler.OneCycleLR)." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "991afe00432946d2a697b5affbb05105", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(FloatSlider(value=0.02, description='max_lr', max=0.05, min=0.01, step=0.01), IntSlider(…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def print_onecyclelr(**kwargs):\n", + " print_optimizer(OneCycleLR, **kwargs, num_events=kwargs[\"total_steps\"])\n", + "\n", + "onecyclelr_params = {\n", + " \"max_lr\": (0.01, 0.05, 0.01), \n", + " \"total_steps\": (1, 100, 1),\n", + " \"pct_start\": (0.1, 1.0, 0.1),\n", + " \"anneal_strategy\": ['cos', 'linear'],\n", + "} \n", + " \n", + "_ = widgets.interact(print_onecyclelr, **onecyclelr_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## StepLR\n", + "\n", + "See the [documentation](https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.StepLR.html#torch.optim.lr_scheduler.StepLR)." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b56e289a5aa74cecb3136bb865281fc3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(FloatSlider(value=0.05, description='lr', max=0.1, min=0.01, step=0.01), IntSlider(value…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def print_steplr(**kwargs):\n", + " print_optimizer(StepLR, **kwargs)\n", + "\n", + "steplr_params = {\n", + " \"lr\": (0.01, 0.1, 0.01), \n", + " \"num_events\": (5, 100, 5),\n", + " \"step_size\": (1, 10, 1), \n", + " \"gamma\": (0.01, 1.0, 0.01),\n", + "} \n", + " \n", + "_ = widgets.interact(print_steplr, **steplr_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ExponentialLR\n", + "\n", + "See the [documentation](https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.ExponentialLR.html#torch.optim.lr_scheduler.ExponentialLR)." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5da7df3041724625a8f71ddb5db0bfbf", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(FloatSlider(value=0.05, description='lr', max=0.1, min=0.01, step=0.01), IntSlider(value…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def print_exponentiallr(**kwargs):\n", + " print_optimizer(ExponentialLR, **kwargs)\n", + "\n", + "exponentiallr_params = {\n", + " \"lr\": (0.01, 0.1, 0.01), \n", + " \"num_events\": (5, 100, 5),\n", + " \"gamma\": (0.01, 1.0, 0.01),\n", + "} \n", + " \n", + "_ = widgets.interact(print_exponentiallr, **exponentiallr_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## CosineAnnealingLR\n", + "\n", + "See the [documentation](https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.CosineAnnealingLR.html#torch.optim.lr_scheduler.CosineAnnealingLR)." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3502c0393e3743be98d031bdab54d3f2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(FloatSlider(value=0.05, description='lr', max=0.1, min=0.01, step=0.01), IntSlider(value…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def print_cosineannealinglr(**kwargs):\n", + " print_optimizer(CosineAnnealingLR, **kwargs)\n", + "\n", + "cosineannealinglr_params = {\n", + " \"lr\": (0.01, 0.1, 0.01), \n", + " \"num_events\": (5, 100, 5),\n", + " \"T_max\": (1, 20, 1),\n", + " \"eta_min\": (0.0, 1.0, 0.1),\n", + "} \n", + " \n", + "_ = widgets.interact(print_cosineannealinglr, **cosineannealinglr_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## MultiStepLR\n", + "\n", + "See the [documentation](https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.MultiStepLR.html#torch.optim.lr_scheduler.MultiStepLR)." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "adde4c098a7043708c1bc8d39cd3ff75", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(FloatSlider(value=0.05, description='lr', max=0.1, min=0.01, step=0.01), IntSlider(value…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def print_multisteplr(**kwargs):\n", + " kwargs[\"milestones\"] = list(ast.literal_eval(kwargs[\"milestones\"]))\n", + " print_optimizer(MultiStepLR, **kwargs)\n", + "\n", + "multisteplr_params = {\n", + " \"lr\": (0.01, 0.1, 0.01), \n", + " \"num_events\": (5, 100, 5),\n", + " \"milestones\": \"10, 30, 40\",\n", + " \"gamma\": (0.01, 1.0, 0.01),\n", + "} \n", + " \n", + "_ = widgets.interact(print_multisteplr, **multisteplr_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# `PyTorch-Ignite` Schedulers\n", + "\n", + "Each scheduler can apply on a part of the parameters associated with the optimizer. This is a strong difference with `PyTorch`. On the other hand, there is an operator to concatenate schedulers. " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "from ignite.contrib.handlers import (\n", + " LinearCyclicalScheduler,\n", + " PiecewiseLinear,\n", + " CosineAnnealingScheduler\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_values(cls, **kwargs):\n", + " cls.plot_values(**kwargs)\n", + " plt.title(\"{}\".format(cls))\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LinearCyclicalScheduler\n", + "\n", + "See the [documentation](https://pytorch.org/ignite/generated/ignite.handlers.param_scheduler.LinearCyclicalScheduler.html#ignite.handlers.param_scheduler.LinearCyclicalScheduler)." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "68b547d45efa4412b0c2d110f2af5dad", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(IntSlider(value=27, description='num_events', max=50, min=5), FloatSlider(value=0.55, de…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def print_linearcyclicalscheduler(**kwargs):\n", + " plot_values(LinearCyclicalScheduler, param_name=\"lr\", **kwargs)\n", + " \n", + "linearcyclicalscheduler_params = {\n", + " \"num_events\": (5, 50, 1),\n", + " \"start_value\": (0.1, 1., 0.01),\n", + " \"end_value\": (0., 0.1, 0.001),\n", + " \"cycle_size\": (0, 20, 1),\n", + "}\n", + "\n", + "_ = widgets.interact(print_linearcyclicalscheduler, **linearcyclicalscheduler_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## PiecewiseLinear\n", + "\n", + "See the [documentation](https://pytorch.org/ignite/generated/ignite.handlers.param_scheduler.PiecewiseLinear.html#ignite.handlers.param_scheduler.PiecewiseLinear)." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0e39d64ccf60429a9575221783bb3e05", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(IntSlider(value=52, description='num_events', min=5), Text(value='(10, 0.5), (20, 0.45),…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def print_piecewiselinear(**kwargs):\n", + " kwargs[\"milestones_values\"] = list(ast.literal_eval(kwargs[\"milestones_values\"]))\n", + " plot_values(PiecewiseLinear, param_name=\"lr\", **kwargs)\n", + " \n", + "piecewiselinear_params = {\n", + " \"num_events\": (5, 100, 1),\n", + " \"milestones_values\": \"(10, 0.5), (20, 0.45), (21, 0.3), (30, 0.1), (40, 0.1)\",\n", + "}\n", + "\n", + "_ = widgets.interact(print_piecewiselinear, **piecewiselinear_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## CosineAnnealingScheduler\n", + "\n", + "See the [documentation](https://pytorch.org/ignite/generated/ignite.handlers.param_scheduler.CosineAnnealingScheduler.html#ignite.handlers.param_scheduler.CosineAnnealingScheduler)." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "676d6df2adf84e3b91066ac3d0221e8b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(IntSlider(value=77, description='num_events', max=150, min=5), FloatSlider(value=1.0, de…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def print_cosineannealingscheduler(**kwargs):\n", + " plot_values(CosineAnnealingScheduler, param_name=\"lr\", **kwargs)\n", + " \n", + "cosineannealingscheduler_params = {\n", + " \"num_events\": (5, 150, 1),\n", + " \"start_value\": (0.1, 2.0, 0.1),\n", + " \"end_value\": (0.001, 0.01, 0.001),\n", + " \"cycle_size\": (1, 20, 1),\n", + " \"start_value_mult\": (0.0, 1.9, 0.05),\n", + " \"end_value_mult\": (0.0, 3.0, 0.05),\n", + "}\n", + "\n", + "_ = widgets.interact(print_cosineannealingscheduler, **cosineannealingscheduler_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Warmup\n", + "\n", + "See the [documentation](https://pytorch.org/ignite/generated/ignite.handlers.param_scheduler.create_lr_scheduler_with_warmup.html#ignite.handlers.param_scheduler.create_lr_scheduler_with_warmup)." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from ignite.contrib.handlers import create_lr_scheduler_with_warmup" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c109dfdc40f742bcb5984c3e9c7ffe28", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(IntSlider(value=50, description='num_events', max=150, min=5), FloatSlider(value=0.25, d…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def print_warmup_optimizer(num_events=50, **kwargs):\n", + " optimizer = SGD(params=[torch.zeros(10)], lr=kwargs[\"warmup_end_value\"])\n", + " lr_scheduler = ExponentialLR(optimizer=optimizer, gamma=kwargs[\"gamma\"])\n", + " lr_values = [None] * num_events\n", + " del kwargs[\"gamma\"]\n", + " create_lr_scheduler_with_warmup(\n", + " lr_scheduler=lr_scheduler,\n", + " **kwargs,\n", + " output_simulated_values=lr_values,\n", + " )\n", + " lr_values = np.array(lr_values)\n", + " plt.plot(lr_values[:, 0], lr_values[:, 1], label=\"learning rate\")\n", + " plt.title(\"create_lr_scheduler_with_warmup + ExponentialLR\")\n", + " plt.show()\n", + " \n", + "warmup_params = {\n", + " \"num_events\": (5, 150, 1),\n", + " \"warmup_start_value\": (0.01, 0.5, 0.01),\n", + " \"warmup_end_value\": (0.5, 2.0, 0.1),\n", + " \"warmup_duration\": (5, 20, 1),\n", + " \"gamma\": (0.5, 1.0, 0.01),\n", + "}\n", + "\n", + "_ = widgets.interact(print_warmup_optimizer, **warmup_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example of an advanced case using multiparameters\n", + "\n", + "In the following case, we consider an optimizer with 3 groups of parameters. We associate a `PyTorch` scheduler on each group with warmup." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "t1 = torch.tensor([0.0])\n", + "t2 = torch.tensor([0.0])\n", + "t3 = torch.tensor([0.0])\n", + "\n", + "opt = torch.optim.SGD([\n", + " {\"params\": [t1], \"lr\": 0.7, \"weight_decay\": 0.0},\n", + " {\"params\": [t2], \"lr\": 0.2, \"weight_decay\": 0.0001},\n", + " {\"params\": [t3], \"lr\": 0.4, \"weight_decay\": 0.01},\n", + "])\n", + "\n", + "torch_lr_scheduler = ExponentialLR(optimizer=opt, gamma=0.98)\n", + "lr_values = [None] * 100\n", + "scheduler = create_lr_scheduler_with_warmup(torch_lr_scheduler,\n", + " warmup_start_value=0.0,\n", + " warmup_duration=10,\n", + " output_simulated_values=lr_values)\n", + "\n", + "lr_values = np.array(lr_values)\n", + "# lr_values.shape = (100, 3) <=> event index, lr for group 1, lr for group 2\n", + "# Plot simulated values\n", + "plt.plot(lr_values[:, 0], lr_values[:, 1], label=\"learning rate g1\")\n", + "plt.plot(lr_values[:, 0], lr_values[:, 2], label=\"learning rate g2\")\n", + "plt.plot(lr_values[:, 0], lr_values[:, 3], label=\"learning rate g3\")\n", + "plt.show()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}