From 116a54bbef018196ded0fdd85650a872a38b9235 Mon Sep 17 00:00:00 2001 From: Vladimir Malinovskii Date: Tue, 11 Apr 2023 19:33:51 +0300 Subject: [PATCH 001/609] Changed deprecated argument "pretrained" to non-deprecated argument "weights" for torchvision models (#2263) * changed depricated pretrained argument to weights * Update intermediate_source/flask_rest_api_tutorial.py Co-authored-by: Nicolas Hug --------- Co-authored-by: Nicolas Hug Co-authored-by: Svetlana Karslioglu --- beginner_source/basics/saveloadrun_tutorial.py | 4 ++-- beginner_source/introyt/captumyt.py | 2 +- beginner_source/transfer_learning_tutorial.py | 4 ++-- intermediate_source/flask_rest_api_tutorial.py | 6 +++--- intermediate_source/tensorboard_profiler_tutorial.py | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/beginner_source/basics/saveloadrun_tutorial.py b/beginner_source/basics/saveloadrun_tutorial.py index 50c381cfcec..baccdf3cfdd 100644 --- a/beginner_source/basics/saveloadrun_tutorial.py +++ b/beginner_source/basics/saveloadrun_tutorial.py @@ -26,14 +26,14 @@ # state dictionary, called ``state_dict``. These can be persisted via the ``torch.save`` # method: -model = models.vgg16(pretrained=True) +model = models.vgg16(weights='IMAGENET1K_V1') torch.save(model.state_dict(), 'model_weights.pth') ########################## # To load model weights, you need to create an instance of the same model first, and then load the parameters # using ``load_state_dict()`` method. -model = models.vgg16() # we do not specify pretrained=True, i.e. do not load default weights +model = models.vgg16() # we do not specify weights, i.e. create untrained model model.load_state_dict(torch.load('model_weights.pth')) model.eval() diff --git a/beginner_source/introyt/captumyt.py b/beginner_source/introyt/captumyt.py index fe73613dcb2..2ff8e9e70b1 100644 --- a/beginner_source/introyt/captumyt.py +++ b/beginner_source/introyt/captumyt.py @@ -155,7 +155,7 @@ # now. # -model = models.resnet101(pretrained=True) +model = models.resnet101(weights='IMAGENET1K_V1') model = model.eval() diff --git a/beginner_source/transfer_learning_tutorial.py b/beginner_source/transfer_learning_tutorial.py index be8bb87e134..15843ec074e 100644 --- a/beginner_source/transfer_learning_tutorial.py +++ b/beginner_source/transfer_learning_tutorial.py @@ -250,7 +250,7 @@ def visualize_model(model, num_images=6): # Load a pretrained model and reset final fully connected layer. # -model_ft = models.resnet18(pretrained=True) +model_ft = models.resnet18(weights='IMAGENET1K_V1') num_ftrs = model_ft.fc.in_features # Here the size of each output sample is set to 2. # Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)). @@ -295,7 +295,7 @@ def visualize_model(model, num_images=6): # `here `__. # -model_conv = torchvision.models.resnet18(pretrained=True) +model_conv = torchvision.models.resnet18(weights='IMAGENET1K_V1') for param in model_conv.parameters(): param.requires_grad = False diff --git a/intermediate_source/flask_rest_api_tutorial.py b/intermediate_source/flask_rest_api_tutorial.py index 66539e704c2..39c1a9d39f7 100644 --- a/intermediate_source/flask_rest_api_tutorial.py +++ b/intermediate_source/flask_rest_api_tutorial.py @@ -161,8 +161,8 @@ def transform_image(image_bytes): from torchvision import models -# Make sure to pass `pretrained` as `True` to use the pretrained weights: -model = models.densenet121(pretrained=True) +# Make sure to set `weights` as `'IMAGENET1K_V1'` to use the pretrained weights: +model = models.densenet121(weights='IMAGENET1K_V1') # Since we are using our model only for inference, switch to `eval` mode: model.eval() @@ -269,7 +269,7 @@ def get_prediction(image_bytes): # # app = Flask(__name__) # imagenet_class_index = json.load(open('/imagenet_class_index.json')) -# model = models.densenet121(pretrained=True) +# model = models.densenet121(weights='IMAGENET1K_V1') # model.eval() # # diff --git a/intermediate_source/tensorboard_profiler_tutorial.py b/intermediate_source/tensorboard_profiler_tutorial.py index 5b6cd440a58..7cd241d40ad 100644 --- a/intermediate_source/tensorboard_profiler_tutorial.py +++ b/intermediate_source/tensorboard_profiler_tutorial.py @@ -68,7 +68,7 @@ # To run on GPU, move model and loss to GPU device. device = torch.device("cuda:0") -model = torchvision.models.resnet18(pretrained=True).cuda(device) +model = torchvision.models.resnet18(weights='IMAGENET1K_V1').cuda(device) criterion = torch.nn.CrossEntropyLoss().cuda(device) optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) model.train() From 5974b5cd7f1f0ffd997f470c365df30287a64ec4 Mon Sep 17 00:00:00 2001 From: Dale Evans Date: Tue, 11 Apr 2023 09:34:09 -0700 Subject: [PATCH 002/609] Typo in tutorial, dist_barrier should be dist.barrier (#2281) Co-authored-by: Svetlana Karslioglu --- intermediate_source/FSDP_tutorial.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intermediate_source/FSDP_tutorial.rst b/intermediate_source/FSDP_tutorial.rst index cc9411e14b1..d69a03b68be 100644 --- a/intermediate_source/FSDP_tutorial.rst +++ b/intermediate_source/FSDP_tutorial.rst @@ -249,7 +249,7 @@ We add the following code snippets to a python script “FSDP_mnist.py”. if args.save_model: # use a barrier to make sure training is done on all ranks - dist_barrier() + dist.barrier() # state_dict for FSDP model is only available on Nightlies for now states = model.state_dict() if rank == 0: From c2aa09658fa47aaf02718bd4f3ae750eff60db59 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 11 Apr 2023 13:23:14 -0700 Subject: [PATCH 003/609] Update pyspelling to include beginner tutorials written in Python (#2279) * Fix kernel dimensions for LeNet model code example (#2192) * Change kernel to 5x5 in 1st Conv2d layer in model init Signed-off-by: Kiersten Stokes * Change kernel to 5x5 in 2nd Conv2d layer in model init * Fix dimensions of 1st Linear layer to match new expected size --------- Signed-off-by: Kiersten Stokes Co-authored-by: Suraj Subramanian <5676233+suraj813@users.noreply.github.com> Co-authored-by: Svetlana Karslioglu * Update pyspelling for all beginner tutorials in Python * Update * Update * Fix template tutorial, update gitignore * Apply suggestions from code review --------- Signed-off-by: Kiersten Stokes Co-authored-by: Kiersten Stokes Co-authored-by: Suraj Subramanian <5676233+suraj813@users.noreply.github.com> Co-authored-by: Nikita Shulga --- .gitignore | 3 + .pyspelling.yml | 25 ++- .../Intro_to_TorchScript_tutorial.py | 8 +- beginner_source/chatbot_tutorial.py | 91 ++++---- beginner_source/dcgan_faces_tutorial.py | 124 ++++++----- ...deploy_seq2seq_hybrid_frontend_tutorial.py | 50 +++-- beginner_source/fgsm_tutorial.py | 12 +- beginner_source/flava_finetuning_tutorial.py | 26 +-- .../hyperparameter_tuning_tutorial.py | 4 +- beginner_source/nn_tutorial.py | 108 +++++---- beginner_source/profiler.py | 2 +- beginner_source/saving_loading_models.py | 10 +- beginner_source/t5_tutorial.py | 32 +-- beginner_source/template_tutorial.py | 18 +- .../text_sentiment_ngrams_tutorial.py | 4 +- beginner_source/transfer_learning_tutorial.py | 8 +- beginner_source/transformer_tutorial.py | 40 ++-- beginner_source/translation_transformer.py | 28 ++- beginner_source/vt_tutorial.py | 34 +-- en-wordlist.txt | 209 ++++++++++++++++++ tutorials-wordlist.txt | 23 -- 21 files changed, 552 insertions(+), 307 deletions(-) create mode 100644 en-wordlist.txt delete mode 100644 tutorials-wordlist.txt diff --git a/.gitignore b/.gitignore index 2d9a9e5a634..ef7a026d9e8 100644 --- a/.gitignore +++ b/.gitignore @@ -124,3 +124,6 @@ cleanup.sh # VSCode *.vscode + +# pyspelling +dictionary.dic diff --git a/.pyspelling.yml b/.pyspelling.yml index 04dcda37b75..015ac975b7f 100644 --- a/.pyspelling.yml +++ b/.pyspelling.yml @@ -1,11 +1,11 @@ spellchecker: aspell matrix: -- name: beginner +- name: python sources: - - beginner_source/data_loading_tutorial.py + - beginner_source/*.py dictionary: wordlists: - - tutorials-wordlist.txt + - en-wordlist.txt pipeline: - pyspelling.filters.python: group_comments: true @@ -13,13 +13,30 @@ matrix: context_visible_first: true delimiters: # Exclude figure rST tags - - open: '\.\.\s+(figure|literalinclude|)::' + - open: '\.\.\s+(figure|literalinclude|math|image|grid)::' + close: '\n' + # Exclude raw directive + - open: '\.\. (raw)::.*$\n*' close: '\n' # Exclude Python coding directives - open: '-\*- coding:' close: '\n' + # Exclude Authors: + - open: 'Author(|s):' + close: '\n' + # Exclude .rst directives: + - open: ':math:`.*`' + close: ' ' + # Ignore multiline content in codeblock + - open: '(?s)^::\n\n ' + close: '^\n' + # Ignore reStructuredText block directives + - open: '\.\. (code-block)::.*$\n*' + content: '(?P(^(?P[ ]+).*$\n))(?P(^([ \t]+.*|[ \t]*)$\n)*)' + close: '(^(?![ \t]+.*$))' - pyspelling.filters.markdown: - pyspelling.filters.html: ignores: - code - pre + - pyspelling.filters.url: diff --git a/beginner_source/Intro_to_TorchScript_tutorial.py b/beginner_source/Intro_to_TorchScript_tutorial.py index 02757752135..063abd442d1 100644 --- a/beginner_source/Intro_to_TorchScript_tutorial.py +++ b/beginner_source/Intro_to_TorchScript_tutorial.py @@ -2,7 +2,7 @@ Introduction to TorchScript =========================== -*James Reed (jamesreed@fb.com), Michael Suo (suo@fb.com)*, rev2 +**Authors:** James Reed (jamesreed@fb.com), Michael Suo (suo@fb.com), rev2 This tutorial is an introduction to TorchScript, an intermediate representation of a PyTorch model (subclass of ``nn.Module``) that @@ -147,7 +147,7 @@ def forward(self, x, h): ###################################################################### -# We’ve once again redefined our MyCell class, but here we’ve defined +# We’ve once again redefined our ``MyCell`` class, but here we’ve defined # ``MyDecisionGate``. This module utilizes **control flow**. Control flow # consists of things like loops and ``if``-statements. # @@ -202,7 +202,7 @@ def forward(self, x, h): # inputs* the network might see. # # What exactly has this done? It has invoked the ``Module``, recorded the -# operations that occured when the ``Module`` was run, and created an +# operations that occurred when the ``Module`` was run, and created an # instance of ``torch.jit.ScriptModule`` (of which ``TracedModule`` is an # instance) # @@ -283,7 +283,7 @@ def forward(self, x, h): # Looking at the ``.code`` output, we can see that the ``if-else`` branch # is nowhere to be found! Why? Tracing does exactly what we said it would: # run the code, record the operations *that happen* and construct a -# ScriptModule that does exactly that. Unfortunately, things like control +# ``ScriptModule`` that does exactly that. Unfortunately, things like control # flow are erased. # # How can we faithfully represent this module in TorchScript? We provide a diff --git a/beginner_source/chatbot_tutorial.py b/beginner_source/chatbot_tutorial.py index 3e4905126c4..02185a6ba3e 100644 --- a/beginner_source/chatbot_tutorial.py +++ b/beginner_source/chatbot_tutorial.py @@ -31,28 +31,28 @@ # :align: center # :alt: bot # -# .. code:: python -# -# > hello? -# Bot: hello . -# > where am I? -# Bot: you re in a hospital . -# > who are you? -# Bot: i m a lawyer . -# > how are you doing? -# Bot: i m fine . -# > are you my friend? -# Bot: no . -# > you're under arrest -# Bot: i m trying to help you ! -# > i'm just kidding -# Bot: i m sorry . -# > where are you from? -# Bot: san francisco . -# > it's time for me to leave -# Bot: i know . -# > goodbye -# Bot: goodbye . +# .. code-block:: python +# +# > hello? +# Bot: hello . +# > where am I? +# Bot: you re in a hospital . +# > who are you? +# Bot: i m a lawyer . +# > how are you doing? +# Bot: i m fine . +# > are you my friend? +# Bot: no . +# > you're under arrest +# Bot: i m trying to help you ! +# > i'm just kidding +# Bot: i m sorry . +# > where are you from? +# Bot: san francisco . +# > it's time for me to leave +# Bot: i know . +# > goodbye +# Bot: goodbye . # # **Tutorial Highlights** # @@ -65,7 +65,7 @@ # - Implement greedy-search decoding module # - Interact with trained chatbot # -# **Acknowledgements** +# **Acknowledgments** # # This tutorial borrows code from the following sources: # @@ -75,7 +75,7 @@ # 2) Sean Robertson’s practical-pytorch seq2seq-translation example: # https://github.com/spro/practical-pytorch/tree/master/seq2seq-translation # -# 3) FloydHub’s Cornell Movie Corpus preprocessing code: +# 3) FloydHub Cornell Movie Corpus preprocessing code: # https://github.com/floydhub/textutil-preprocess-cornell-movie-corpus # @@ -162,11 +162,11 @@ def printLines(file, n=10): # contains a tab-separated *query sentence* and a *response sentence* pair. # # The following functions facilitate the parsing of the raw -# *utterances.jsonl* data file. +# ``utterances.jsonl`` data file. # # - ``loadLinesAndConversations`` splits each line of the file into a dictionary of -# lines with fields: lineID, characterID, and text and then groups them -# into conversations with fields: conversationID, movieID, and lines. +# lines with fields: ``lineID``, ``characterID``, and text and then groups them +# into conversations with fields: ``conversationID``, ``movieID``, and lines. # - ``extractSentencePairs`` extracts pairs of sentences from # conversations # @@ -215,7 +215,7 @@ def extractSentencePairs(conversations): ###################################################################### # Now we’ll call these functions and create the file. We’ll call it -# *formatted_movie_lines.txt*. +# ``formatted_movie_lines.txt``. # # Define path to new file @@ -359,12 +359,12 @@ def readVocs(datafile, corpus_name): voc = Voc(corpus_name) return voc, pairs -# Returns True iff both sentences in a pair 'p' are under the MAX_LENGTH threshold +# Returns True if both sentences in a pair 'p' are under the MAX_LENGTH threshold def filterPair(p): # Input sequences need to preserve the last word for EOS token return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH -# Filter pairs using filterPair condition +# Filter pairs using the ``filterPair`` condition def filterPairs(pairs): return [pair for pair in pairs if filterPair(pair)] @@ -659,7 +659,7 @@ def __init__(self, hidden_size, embedding, n_layers=1, dropout=0): self.hidden_size = hidden_size self.embedding = embedding - # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size' + # Initialize GRU; the input_size and hidden_size parameters are both set to 'hidden_size' # because our input size is a word embedding with number of features == hidden_size self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout), bidirectional=True) @@ -958,7 +958,7 @@ def train(input_variable, lengths, target_variable, mask, max_target_len, encode input_variable = input_variable.to(device) target_variable = target_variable.to(device) mask = mask.to(device) - # Lengths for rnn packing should always be on the cpu + # Lengths for RNN packing should always be on the CPU lengths = lengths.to("cpu") # Initialize variables @@ -1007,7 +1007,7 @@ def train(input_variable, lengths, target_variable, mask, max_target_len, encode print_losses.append(mask_loss.item() * nTotal) n_totals += nTotal - # Perform backpropatation + # Perform backpropagation loss.backward() # Clip gradients: gradients are modified in place @@ -1032,8 +1032,8 @@ def train(input_variable, lengths, target_variable, mask, max_target_len, encode # lifting with the ``train`` function. # # One thing to note is that when we save our model, we save a tarball -# containing the encoder and decoder state_dicts (parameters), the -# optimizers’ state_dicts, the loss, the iteration, etc. Saving the model +# containing the encoder and decoder ``state_dicts`` (parameters), the +# optimizers’ ``state_dicts``, the loss, the iteration, etc. Saving the model # in this way will give us the ultimate flexibility with the checkpoint. # After loading a checkpoint, we will be able to use the model parameters # to run inference, or we can continue training right where we left off. @@ -1240,8 +1240,8 @@ def evaluateInput(encoder, decoder, searcher, voc): # Configure models model_name = 'cb_model' attn_model = 'dot' -#attn_model = 'general' -#attn_model = 'concat' +#``attn_model = 'general'`` +#``attn_model = 'concat'`` hidden_size = 500 encoder_n_layers = 2 decoder_n_layers = 2 @@ -1251,12 +1251,17 @@ def evaluateInput(encoder, decoder, searcher, voc): # Set checkpoint to load from; set to None if starting from scratch loadFilename = None checkpoint_iter = 4000 -#loadFilename = os.path.join(save_dir, model_name, corpus_name, -# '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size), -# '{}_checkpoint.tar'.format(checkpoint_iter)) +############################################################# +# Sample code to load from a checkpoint: +# +# .. code-block:: python +# +# loadFilename = os.path.join(save_dir, model_name, corpus_name, +# '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size), +# '{}_checkpoint.tar'.format(checkpoint_iter)) -# Load model if a loadFilename is provided +# Load model if a ``loadFilename`` is provided if loadFilename: # If loading on same machine the model was trained on checkpoint = torch.load(loadFilename) @@ -1319,7 +1324,7 @@ def evaluateInput(encoder, decoder, searcher, voc): encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) -# If you have cuda, configure cuda to call +# If you have CUDA, configure CUDA to call for state in encoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): @@ -1344,7 +1349,7 @@ def evaluateInput(encoder, decoder, searcher, voc): # To chat with your model, run the following block. # -# Set dropout layers to eval mode +# Set dropout layers to ``eval`` mode encoder.eval() decoder.eval() diff --git a/beginner_source/dcgan_faces_tutorial.py b/beginner_source/dcgan_faces_tutorial.py index a909f713393..2ee43d72d2f 100644 --- a/beginner_source/dcgan_faces_tutorial.py +++ b/beginner_source/dcgan_faces_tutorial.py @@ -15,7 +15,7 @@ # This tutorial will give an introduction to DCGANs through an example. We # will train a generative adversarial network (GAN) to generate new # celebrities after showing it pictures of many real celebrities. Most of -# the code here is from the dcgan implementation in +# the code here is from the DCGAN implementation in # `pytorch/examples `__, and this # document will give a thorough explanation of the implementation and shed # light on how and why this model works. But don’t worry, no prior @@ -30,8 +30,8 @@ # What is a GAN? # ~~~~~~~~~~~~~~ # -# GANs are a framework for teaching a DL model to capture the training -# data’s distribution so we can generate new data from that same +# GANs are a framework for teaching a deep learning model to capture the training +# data distribution so we can generate new data from that same # distribution. GANs were invented by Ian Goodfellow in 2014 and first # described in the paper `Generative Adversarial # Nets `__. @@ -145,35 +145,35 @@ # # Let’s define some inputs for the run: # -# - **dataroot** - the path to the root of the dataset folder. We will -# talk more about the dataset in the next section -# - **workers** - the number of worker threads for loading the data with -# the DataLoader -# - **batch_size** - the batch size used in training. The DCGAN paper -# uses a batch size of 128 -# - **image_size** - the spatial size of the images used for training. +# - ``dataroot`` - the path to the root of the dataset folder. We will +# talk more about the dataset in the next section. +# - ``workers`` - the number of worker threads for loading the data with +# the ``DataLoader``. +# - ``batch_size`` - the batch size used in training. The DCGAN paper +# uses a batch size of 128. +# - ``image_size`` - the spatial size of the images used for training. # This implementation defaults to 64x64. If another size is desired, # the structures of D and G must be changed. See # `here `__ for more -# details -# - **nc** - number of color channels in the input images. For color -# images this is 3 -# - **nz** - length of latent vector -# - **ngf** - relates to the depth of feature maps carried through the -# generator -# - **ndf** - sets the depth of feature maps propagated through the -# discriminator -# - **num_epochs** - number of training epochs to run. Training for +# details. +# - ``nc`` - number of color channels in the input images. For color +# images this is 3. +# - ``nz`` - length of latent vector. +# - ``ngf`` - relates to the depth of feature maps carried through the +# generator. +# - ``ndf`` - sets the depth of feature maps propagated through the +# discriminator. +# - ``num_epochs`` - number of training epochs to run. Training for # longer will probably lead to better results but will also take much -# longer -# - **lr** - learning rate for training. As described in the DCGAN paper, -# this number should be 0.0002 -# - **beta1** - beta1 hyperparameter for Adam optimizers. As described in -# paper, this number should be 0.5 -# - **ngpu** - number of GPUs available. If this is 0, code will run in +# longer. +# - ``lr`` - learning rate for training. As described in the DCGAN paper, +# this number should be 0.0002. +# - ``beta1`` - beta1 hyperparameter for Adam optimizers. As described in +# paper, this number should be 0.5. +# - ``ngpu`` - number of GPUs available. If this is 0, code will run in # CPU mode. If this number is greater than 0 it will run on that number -# of GPUs -# +# of GPUs. +# # Root directory for dataset dataroot = "data/celeba" @@ -206,7 +206,7 @@ # Learning rate for optimizers lr = 0.0002 -# Beta1 hyperparam for Adam optimizers +# Beta1 hyperparameter for Adam optimizers beta1 = 0.5 # Number of GPUs available. Use 0 for CPU mode. @@ -221,10 +221,10 @@ # dataset `__ which can # be downloaded at the linked site, or in `Google # Drive `__. -# The dataset will download as a file named *img_align_celeba.zip*. Once -# downloaded, create a directory named *celeba* and extract the zip file -# into that directory. Then, set the *dataroot* input for this notebook to -# the *celeba* directory you just created. The resulting directory +# The dataset will download as a file named ``img_align_celeba.zip``. Once +# downloaded, create a directory named ``celeba`` and extract the zip file +# into that directory. Then, set the ``dataroot`` input for this notebook to +# the ``celeba`` directory you just created. The resulting directory # structure should be: # # :: @@ -237,9 +237,9 @@ # -> 537394.jpg # ... # -# This is an important step because we will be using the ImageFolder +# This is an important step because we will be using the ``ImageFolder`` # dataset class, which requires there to be subdirectories in the -# dataset’s root folder. Now, we can create the dataset, create the +# dataset root folder. Now, we can create the dataset, create the # dataloader, set the device to run on, and finally visualize some of the # training data. # @@ -282,14 +282,14 @@ # ~~~~~~~~~~~~~~~~~~~~~ # # From the DCGAN paper, the authors specify that all model weights shall -# be randomly initialized from a Normal distribution with mean=0, -# stdev=0.02. The ``weights_init`` function takes an initialized model as +# be randomly initialized from a Normal distribution with ``mean=0``, +# ``stdev=0.02``. The ``weights_init`` function takes an initialized model as # input and reinitializes all convolutional, convolutional-transpose, and # batch normalization layers to meet this criteria. This function is # applied to the models immediately after initialization. # -# custom weights initialization called on netG and netD +# custom weights initialization called on ``netG`` and ``netD`` def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: @@ -319,10 +319,10 @@ def weights_init(m): # .. figure:: /_static/img/dcgan_generator.png # :alt: dcgan_generator # -# Notice, how the inputs we set in the input section (*nz*, *ngf*, and -# *nc*) influence the generator architecture in code. *nz* is the length -# of the z input vector, *ngf* relates to the size of the feature maps -# that are propagated through the generator, and *nc* is the number of +# Notice, how the inputs we set in the input section (``nz``, ``ngf``, and +# ``nc``) influence the generator architecture in code. ``nz`` is the length +# of the z input vector, ``ngf`` relates to the size of the feature maps +# that are propagated through the generator, and ``nc`` is the number of # channels in the output image (set to 3 for RGB images). Below is the # code for the generator. # @@ -338,22 +338,22 @@ def __init__(self, ngpu): nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False), nn.BatchNorm2d(ngf * 8), nn.ReLU(True), - # state size. (ngf*8) x 4 x 4 + # state size. ``(ngf*8) x 4 x 4`` nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 4), nn.ReLU(True), - # state size. (ngf*4) x 8 x 8 + # state size. ``(ngf*4) x 8 x 8`` nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 2), nn.ReLU(True), - # state size. (ngf*2) x 16 x 16 + # state size. ``(ngf*2) x 16 x 16`` nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf), nn.ReLU(True), - # state size. (ngf) x 32 x 32 + # state size. ``(ngf) x 32 x 32`` nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False), nn.Tanh() - # state size. (nc) x 64 x 64 + # state size. ``(nc) x 64 x 64`` ) def forward(self, input): @@ -369,12 +369,12 @@ def forward(self, input): # Create the generator netG = Generator(ngpu).to(device) -# Handle multi-gpu if desired +# Handle multi-GPU if desired if (device.type == 'cuda') and (ngpu > 1): netG = nn.DataParallel(netG, list(range(ngpu))) -# Apply the weights_init function to randomly initialize all weights -# to mean=0, stdev=0.02. +# Apply the ``weights_init`` function to randomly initialize all weights +# to ``mean=0``, ``stdev=0.02``. netG.apply(weights_init) # Print the model @@ -408,22 +408,22 @@ def __init__(self, ngpu): super(Discriminator, self).__init__() self.ngpu = ngpu self.main = nn.Sequential( - # input is (nc) x 64 x 64 + # input is ``(nc) x 64 x 64`` nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), - # state size. (ndf) x 32 x 32 + # state size. ``(ndf) x 32 x 32`` nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 2), nn.LeakyReLU(0.2, inplace=True), - # state size. (ndf*2) x 16 x 16 + # state size. ``(ndf*2) x 16 x 16`` nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 4), nn.LeakyReLU(0.2, inplace=True), - # state size. (ndf*4) x 8 x 8 + # state size. ``(ndf*4) x 8 x 8`` nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 8), nn.LeakyReLU(0.2, inplace=True), - # state size. (ndf*8) x 4 x 4 + # state size. ``(ndf*8) x 4 x 4`` nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False), nn.Sigmoid() ) @@ -440,12 +440,12 @@ def forward(self, input): # Create the Discriminator netD = Discriminator(ngpu).to(device) -# Handle multi-gpu if desired +# Handle multi-GPU if desired if (device.type == 'cuda') and (ngpu > 1): netD = nn.DataParallel(netD, list(range(ngpu))) -# Apply the weights_init function to randomly initialize all weights -# to mean=0, stdev=0.2. +# Apply the ``weights_init`` function to randomly initialize all weights +# like this: ``to mean=0, stdev=0.2``. netD.apply(weights_init) # Print the model @@ -485,7 +485,7 @@ def forward(self, input): # images form out of the noise. # -# Initialize BCELoss function +# Initialize the ``BCELoss`` function criterion = nn.BCELoss() # Create batch of latent vectors that we will use to visualize @@ -509,7 +509,8 @@ def forward(self, input): # we can train it. Be mindful that training GANs is somewhat of an art # form, as incorrect hyperparameter settings lead to mode collapse with # little explanation of what went wrong. Here, we will closely follow -# Algorithm 1 from Goodfellow’s paper, while abiding by some of the best +# Algorithm 1 from the `Goodfellow’s paper `__, +# while abiding by some of the best # practices shown in `ganhacks `__. # Namely, we will “construct different mini-batches for real and fake” # images, and also adjust G’s objective function to maximize @@ -523,7 +524,8 @@ def forward(self, input): # terms of Goodfellow, we wish to “update the discriminator by ascending # its stochastic gradient”. Practically, we want to maximize # :math:`log(D(x)) + log(1-D(G(z)))`. Due to the separate mini-batch -# suggestion from ganhacks, we will calculate this in two steps. First, we +# suggestion from `ganhacks `__, +# we will calculate this in two steps. First, we # will construct a batch of real samples from the training set, forward # pass through :math:`D`, calculate the loss (:math:`log(D(x))`), then # calculate the gradients in a backward pass. Secondly, we will construct @@ -545,7 +547,7 @@ def forward(self, input): # G’s gradients in a backward pass, and finally updating G’s parameters # with an optimizer step. It may seem counter-intuitive to use the real # labels as GT labels for the loss function, but this allows us to use the -# :math:`log(x)` part of the BCELoss (rather than the :math:`log(1-x)` +# :math:`log(x)` part of the ``BCELoss`` (rather than the :math:`log(1-x)` # part) which is exactly what we want. # # Finally, we will do some statistic reporting and at the end of each diff --git a/beginner_source/deploy_seq2seq_hybrid_frontend_tutorial.py b/beginner_source/deploy_seq2seq_hybrid_frontend_tutorial.py index 3d1b4c13b32..5e985b58598 100644 --- a/beginner_source/deploy_seq2seq_hybrid_frontend_tutorial.py +++ b/beginner_source/deploy_seq2seq_hybrid_frontend_tutorial.py @@ -39,7 +39,7 @@ # the Python runtime. # # The API for converting eager-mode PyTorch programs into TorchScript is -# found in the torch.jit module. This module has two core modalities for +# found in the ``torch.jit`` module. This module has two core modalities for # converting an eager-mode model to a TorchScript graph representation: # **tracing** and **scripting**. The ``torch.jit.trace`` function takes a # module or function and a set of example inputs. It then runs the example @@ -74,18 +74,18 @@ ###################################################################### -# Acknowledgements +# Acknowledgments # ---------------- # # This tutorial was inspired by the following sources: # -# 1) Yuan-Kuei Wu’s pytorch-chatbot implementation: +# 1) Yuan-Kuei Wu's pytorch-chatbot implementation: # https://github.com/ywk991112/pytorch-chatbot # -# 2) Sean Robertson’s practical-pytorch seq2seq-translation example: +# 2) Sean Robertson's practical-pytorch seq2seq-translation example: # https://github.com/spro/practical-pytorch/tree/master/seq2seq-translation # -# 3) FloydHub’s Cornell Movie Corpus preprocessing code: +# 3) FloydHub's Cornell Movie Corpus preprocessing code: # https://github.com/floydhub/textutil-preprocess-cornell-movie-corpus # @@ -290,7 +290,7 @@ def __init__(self, hidden_size, embedding, n_layers=1, dropout=0): self.hidden_size = hidden_size self.embedding = embedding - # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size' + # Initialize GRU; the ``input_size`` and ``hidden_size`` parameters are both set to 'hidden_size' # because our input size is a word embedding with number of features == hidden_size self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout), bidirectional=True) @@ -525,7 +525,7 @@ def forward(self, input_step, last_hidden, encoder_outputs): # we can use function type annotations as introduced in `PEP # 3107 `__. In addition, # it is possible to declare arguments of different types using -# MyPy-style type annotations (see +# Mypy-style type annotations (see # `doc `__). # # @@ -618,7 +618,7 @@ def evaluate(searcher, voc, sentence, max_length=MAX_LENGTH): return decoded_words -# Evaluate inputs from user input (stdin) +# Evaluate inputs from user input (``stdin``) def evaluateInput(searcher, voc): input_sentence = '' while(1): @@ -638,7 +638,7 @@ def evaluateInput(searcher, voc): except KeyError: print("Error: Encountered unknown word.") -# Normalize input sentence and call evaluate() +# Normalize input sentence and call ``evaluate()`` def evaluateExample(sentence, searcher, voc): print("> " + sentence) # Normalize sentence @@ -653,7 +653,7 @@ def evaluateExample(sentence, searcher, voc): # Load Pretrained Parameters # -------------------------- # -# Ok, its time to load our model! +# No, let's load our model! # # Use hosted model # ~~~~~~~~~~~~~~~~ @@ -671,7 +671,7 @@ def evaluateExample(sentence, searcher, voc): # Use your own model # ~~~~~~~~~~~~~~~~~~ # -# To load your own pre-trained model: +# To load your own pretrained model: # # 1) Set the ``loadFilename`` variable to the path to the checkpoint file # that you wish to load. Note that if you followed the convention for @@ -691,9 +691,9 @@ def evaluateExample(sentence, searcher, voc): # ~~~~~~~~~~~~~~~~~~~~~~ # # Notice that we initialize and load parameters into our encoder and -# decoder models as usual. If you are using tracing mode(`torch.jit.trace`) -# for some part of your models, you must call .to(device) to set the device -# options of the models and .eval() to set the dropout layers to test mode +# decoder models as usual. If you are using tracing mode(``torch.jit.trace``) +# for some part of your models, you must call ``.to(device)`` to set the device +# options of the models and ``.eval()`` to set the dropout layers to test mode # **before** tracing the models. `TracedModule` objects do not inherit the # ``to`` or ``eval`` methods. Since in this tutorial we are only using # scripting instead of tracing, we only need to do this before we do @@ -706,7 +706,7 @@ def evaluateExample(sentence, searcher, voc): # Configure models model_name = 'cb_model' attn_model = 'dot' -#attn_model = 'general' +#attn_model = 'general'`` #attn_model = 'concat' hidden_size = 500 encoder_n_layers = 2 @@ -717,7 +717,13 @@ def evaluateExample(sentence, searcher, voc): # If you're loading your own model # Set checkpoint to load from checkpoint_iter = 4000 -# loadFilename = os.path.join(save_dir, model_name, corpus_name, + +############################################################# +# Sample code to load from a checkpoint: +# +# .. code-block:: python +# +# loadFilename = os.path.join(save_dir, model_name, corpus_name, # '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size), # '{}_checkpoint.tar'.format(checkpoint_iter)) @@ -743,13 +749,13 @@ def evaluateExample(sentence, searcher, voc): # Initialize encoder & decoder models encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) -# Load trained model params +# Load trained model parameters encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) -# Set dropout layers to eval mode +# Set dropout layers to ``eval`` mode encoder.eval() decoder.eval() print('Models built and ready to go!') @@ -794,7 +800,7 @@ def evaluateExample(sentence, searcher, voc): # data-dependent control flow. In the case of scripting, we do necessary # language changes to make sure the implementation complies with # TorchScript. We initialize the scripted searcher the same way that we -# would initialize an un-scripted variant. +# would initialize an unscripted variant. # ### Compile the whole greedy search model to TorchScript model @@ -847,7 +853,7 @@ def evaluateExample(sentence, searcher, voc): # Use appropriate device scripted_searcher.to(device) -# Set dropout layers to eval mode +# Set dropout layers to ``eval`` mode scripted_searcher.eval() # Evaluate examples @@ -855,8 +861,8 @@ def evaluateExample(sentence, searcher, voc): for s in sentences: evaluateExample(s, scripted_searcher, voc) -# Evaluate your input -#evaluateInput(traced_encoder, traced_decoder, scripted_searcher, voc) +# Evaluate your input by running +# ``evaluateInput(traced_encoder, traced_decoder, scripted_searcher, voc)`` ###################################################################### diff --git a/beginner_source/fgsm_tutorial.py b/beginner_source/fgsm_tutorial.py index 69b7f2e9964..fa23680496c 100644 --- a/beginner_source/fgsm_tutorial.py +++ b/beginner_source/fgsm_tutorial.py @@ -120,7 +120,7 @@ # There are only three inputs for this tutorial, and are defined as # follows: # -# - **epsilons** - List of epsilon values to use for the run. It is +# - ``epsilons`` - List of epsilon values to use for the run. It is # important to keep 0 in the list because it represents the model # performance on the original test set. Also, intuitively we would # expect the larger the epsilon, the more noticeable the perturbations @@ -128,12 +128,12 @@ # accuracy. Since the data range here is :math:`[0,1]`, no epsilon # value should exceed 1. # -# - **pretrained_model** - path to the pretrained MNIST model which was +# - ``pretrained_model`` - path to the pretrained MNIST model which was # trained with # `pytorch/examples/mnist `__. # For simplicity, download the pretrained model `here `__. # -# - **use_cuda** - boolean flag to use CUDA if desired and available. +# - ``use_cuda`` - boolean flag to use CUDA if desired and available. # Note, a GPU with CUDA is not critical for this tutorial as a CPU will # not take much time. # @@ -263,7 +263,7 @@ def test( model, device, test_loader, epsilon ): output = model(data) init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability - # If the initial prediction is wrong, dont bother attacking, just move on + # If the initial prediction is wrong, don't bother attacking, just move on if init_pred.item() != target.item(): continue @@ -276,7 +276,7 @@ def test( model, device, test_loader, epsilon ): # Calculate gradients of model in backward pass loss.backward() - # Collect datagrad + # Collect ``datagrad`` data_grad = data.grad.data # Call FGSM Attack @@ -366,7 +366,7 @@ def test( model, device, test_loader, epsilon ): # Remember the idea of no free lunch? In this case, as epsilon increases # the test accuracy decreases **BUT** the perturbations become more easily # perceptible. In reality, there is a tradeoff between accuracy -# degredation and perceptibility that an attacker must consider. Here, we +# degradation and perceptibility that an attacker must consider. Here, we # show some examples of successful adversarial examples at each epsilon # value. Each row of the plot shows a different epsilon value. The first # row is the :math:`\epsilon=0` examples which represent the original diff --git a/beginner_source/flava_finetuning_tutorial.py b/beginner_source/flava_finetuning_tutorial.py index ef54c5ea06d..12e20f475f8 100644 --- a/beginner_source/flava_finetuning_tutorial.py +++ b/beginner_source/flava_finetuning_tutorial.py @@ -24,7 +24,7 @@ ###################################################################### # Installation # ----------------- -# We will use TextVQA dataset and bert tokenizer from HuggingFace for this +# We will use TextVQA dataset and ``bert tokenizer`` from Hugging Face for this # tutorial. So you need to install datasets and transformers in addition to TorchMultimodal. # # .. note:: @@ -40,21 +40,21 @@ # ###################################################################### -# Steps +# Steps # ----- -# -# 1. Download the HuggingFace dataset to a directory on your computer by running the following command: -# +# +# 1. Download the Hugging Face dataset to a directory on your computer by running the following command: +# # .. code-block:: -# +# # wget http://dl.fbaipublicfiles.com/pythia/data/vocab.tar.gz # tar xf vocab.tar.gz -# +# # .. note:: # If you are running this tutorial in Google Colab, run these commands # in a new cell and prepend these commands with an exclamation mark (!) # -# +# # 2. For this tutorial, we treat VQA as a classification task where # the inputs are images and question (text) and the output is an answer class. # So we need to download the vocab file with answer classes and create the answer to @@ -62,7 +62,7 @@ # # We also load the `textvqa # dataset `__ containing 34602 training samples -# (images,questions and answers) from HuggingFace +# (images,questions and answers) from Hugging Face # # We see there are 3997 answer classes including a class representing # unknown answers. @@ -98,8 +98,8 @@ # 3. Next, we write the transform function to convert the image and text into # Tensors consumable by our model - For images, we use the transforms from # torchvision to convert to Tensor and resize to uniform sizes - For text, -# we tokenize (and pad) them using the BertTokenizer from HuggingFace - -# For answers (i.e. labels), we take the most frequently occuring answer +# we tokenize (and pad) them using the ``BertTokenizer`` from Hugging Face - +# For answers (i.e. labels), we take the most frequently occurring answer # as the label to train with: # @@ -133,8 +133,8 @@ def transform(tokenizer, input): ###################################################################### -# 4. Finally, we import the flava_model_for_classification from -# torchmultimodal. It loads the pretrained flava checkpoint by default and +# 4. Finally, we import the ``flava_model_for_classification`` from +# ``torchmultimodal``. It loads the pretrained FLAVA checkpoint by default and # includes a classification head. # # The model forward function passes the image through the visual encoder diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py index 7f93ad233ba..35ab5c9538e 100644 --- a/beginner_source/hyperparameter_tuning_tutorial.py +++ b/beginner_source/hyperparameter_tuning_tutorial.py @@ -389,7 +389,7 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2): grace_period=1, reduction_factor=2) reporter = CLIReporter( - # parameter_columns=["l1", "l2", "lr", "batch_size"], + # ``parameter_columns=["l1", "l2", "lr", "batch_size"]``, metric_columns=["loss", "accuracy", "training_iteration"]) result = tune.run( partial(train_cifar, data_dir=data_dir), @@ -425,7 +425,7 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2): if __name__ == "__main__": # sphinx_gallery_start_ignore - # Fixes AttributeError: '_LoggingTee' object has no attribute 'fileno'. + # Fixes ``AttributeError: '_LoggingTee' object has no attribute 'fileno'``. # This is only needed to run with sphinx-build. import sys sys.stdout.fileno = lambda: False diff --git a/beginner_source/nn_tutorial.py b/beginner_source/nn_tutorial.py index cdc20040654..bc32131b93a 100644 --- a/beginner_source/nn_tutorial.py +++ b/beginner_source/nn_tutorial.py @@ -2,10 +2,12 @@ """ What is `torch.nn` *really*? ============================ -by Jeremy Howard, `fast.ai `_. Thanks to Rachel Thomas and Francisco Ingham. + +**Authors:** Jeremy Howard, `fast.ai `_. Thanks to Rachel Thomas and Francisco Ingham. """ + ############################################################################### -# We recommend running this tutorial as a notebook, not a script. To download the notebook (.ipynb) file, +# We recommend running this tutorial as a notebook, not a script. To download the notebook (``.ipynb``) file, # click the link at the top of the page. # # PyTorch provides the elegantly designed modules and classes `torch.nn `_ , @@ -90,7 +92,7 @@ print(y_train.min(), y_train.max()) ############################################################################### -# Neural net from scratch (no torch.nn) +# Neural net from scratch (without ``torch.nn``) # --------------------------------------------- # # Let's first create a model using nothing but PyTorch tensor operations. We're assuming @@ -109,7 +111,7 @@ # # .. note:: We are initializing the weights here with # `Xavier initialisation `_ -# (by multiplying with 1/sqrt(n)). +# (by multiplying with ``1/sqrt(n)``). import math @@ -123,7 +125,7 @@ # let's just write a plain matrix multiplication and broadcasted addition # to create a simple linear model. We also need an activation function, so # we'll write `log_softmax` and use it. Remember: although PyTorch -# provides lots of pre-written loss functions, activation functions, and +# provides lots of prewritten loss functions, activation functions, and # so forth, you can easily write your own using plain python. PyTorch will # even create fast GPU or vectorized CPU code for your function # automatically. @@ -242,7 +244,7 @@ def accuracy(out, yb): print(loss_func(model(xb), yb), accuracy(model(xb), yb)) ############################################################################### -# Using torch.nn.functional +# Using ``torch.nn.functional`` # ------------------------------ # # We will now refactor our code, so that it does the same thing as before, only @@ -278,7 +280,7 @@ def model(xb): print(loss_func(model(xb), yb), accuracy(model(xb), yb)) ############################################################################### -# Refactor using nn.Module +# Refactor using ``nn.Module`` # ----------------------------- # Next up, we'll use ``nn.Module`` and ``nn.Parameter``, for a clearer and more # concise training loop. We subclass ``nn.Module`` (which itself is a class and @@ -320,22 +322,26 @@ def forward(self, xb): ############################################################################### # Previously for our training loop we had to update the values for each parameter # by name, and manually zero out the grads for each parameter separately, like this: +# # :: -# with torch.no_grad(): -# weights -= weights.grad * lr -# bias -= bias.grad * lr -# weights.grad.zero_() -# bias.grad.zero_() +# +# with torch.no_grad(): +# weights -= weights.grad * lr +# bias -= bias.grad * lr +# weights.grad.zero_() +# bias.grad.zero_() # # # Now we can take advantage of model.parameters() and model.zero_grad() (which # are both defined by PyTorch for ``nn.Module``) to make those steps more concise # and less prone to the error of forgetting some of our parameters, particularly # if we had a more complicated model: +# # :: -# with torch.no_grad(): -# for p in model.parameters(): p -= p.grad * lr -# model.zero_grad() +# +# with torch.no_grad(): +# for p in model.parameters(): p -= p.grad * lr +# model.zero_grad() # # # We'll wrap our little training loop in a ``fit`` function so we can run it @@ -365,8 +371,8 @@ def fit(): print(loss_func(model(xb), yb)) ############################################################################### -# Refactor using nn.Linear -# ------------------------- +# Refactor using ``nn.Linear`` +# ---------------------------- # # We continue to refactor our code. Instead of manually defining and # initializing ``self.weights`` and ``self.bias``, and calculating ``xb @ @@ -398,7 +404,7 @@ def forward(self, xb): print(loss_func(model(xb), yb)) ############################################################################### -# Refactor using optim +# Refactor using ``torch.optim`` # ------------------------------ # # Pytorch also has a package with various optimization algorithms, ``torch.optim``. @@ -406,15 +412,19 @@ def forward(self, xb): # of manually updating each parameter. # # This will let us replace our previous manually coded optimization step: +# # :: -# with torch.no_grad(): -# for p in model.parameters(): p -= p.grad * lr -# model.zero_grad() +# +# with torch.no_grad(): +# for p in model.parameters(): p -= p.grad * lr +# model.zero_grad() # # and instead use just: +# # :: -# opt.step() -# opt.zero_grad() +# +# opt.step() +# opt.zero_grad() # # (``optim.zero_grad()`` resets the gradient to 0 and we need to call it before # computing the gradient for the next minibatch.) @@ -473,15 +483,19 @@ def get_model(): train_ds = TensorDataset(x_train, y_train) ############################################################################### -# Previously, we had to iterate through minibatches of x and y values separately: +# Previously, we had to iterate through minibatches of ``x`` and ``y`` values separately: +# # :: -# xb = x_train[start_i:end_i] -# yb = y_train[start_i:end_i] +# +# xb = x_train[start_i:end_i] +# yb = y_train[start_i:end_i] # # # Now, we can do these two steps together: +# # :: -# xb,yb = train_ds[i*bs : i*bs+bs] +# +# xb,yb = train_ds[i*bs : i*bs+bs] # model, opt = get_model() @@ -499,13 +513,13 @@ def get_model(): print(loss_func(model(xb), yb)) ############################################################################### -# Refactor using DataLoader +# Refactor using ``DataLoader`` # ------------------------------ # -# Pytorch's ``DataLoader`` is responsible for managing batches. You can +# PyTorch's ``DataLoader`` is responsible for managing batches. You can # create a ``DataLoader`` from any ``Dataset``. ``DataLoader`` makes it easier # to iterate over batches. Rather than having to use ``train_ds[i*bs : i*bs+bs]``, -# the DataLoader gives us each minibatch automatically. +# the ``DataLoader`` gives us each minibatch automatically. from torch.utils.data import DataLoader @@ -513,16 +527,20 @@ def get_model(): train_dl = DataLoader(train_ds, batch_size=bs) ############################################################################### -# Previously, our loop iterated over batches (xb, yb) like this: +# Previously, our loop iterated over batches ``(xb, yb)`` like this: +# # :: -# for i in range((n-1)//bs + 1): -# xb,yb = train_ds[i*bs : i*bs+bs] -# pred = model(xb) # -# Now, our loop is much cleaner, as (xb, yb) are loaded automatically from the data loader: +# for i in range((n-1)//bs + 1): +# xb,yb = train_ds[i*bs : i*bs+bs] +# pred = model(xb) +# +# Now, our loop is much cleaner, as ``(xb, yb)`` are loaded automatically from the data loader: +# # :: -# for xb,yb in train_dl: -# pred = model(xb) +# +# for xb,yb in train_dl: +# pred = model(xb) model, opt = get_model() @@ -538,7 +556,7 @@ def get_model(): print(loss_func(model(xb), yb)) ############################################################################### -# Thanks to Pytorch's ``nn.Module``, ``nn.Parameter``, ``Dataset``, and ``DataLoader``, +# Thanks to PyTorch's ``nn.Module``, ``nn.Parameter``, ``Dataset``, and ``DataLoader``, # our training loop is now dramatically smaller and easier to understand. Let's # now try to add the basic features necessary to create effective models in practice. # @@ -573,7 +591,7 @@ def get_model(): # # (Note that we always call ``model.train()`` before training, and ``model.eval()`` # before inference, because these are used by layers such as ``nn.BatchNorm2d`` -# and ``nn.Dropout`` to ensure appropriate behaviour for these different phases.) +# and ``nn.Dropout`` to ensure appropriate behavior for these different phases.) model, opt = get_model() @@ -667,11 +685,11 @@ def get_data(train_ds, valid_ds, bs): # Because none of the functions in the previous section assume anything about # the model form, we'll be able to use them to train a CNN without any modification. # -# We will use Pytorch's predefined +# We will use PyTorch's predefined # `Conv2d `_ class # as our convolutional layer. We define a CNN with 3 convolutional layers. # Each convolution is followed by a ReLU. At the end, we perform an -# average pooling. (Note that ``view`` is PyTorch's version of numpy's +# average pooling. (Note that ``view`` is PyTorch's version of Numpy's # ``reshape``) class Mnist_CNN(nn.Module): @@ -702,7 +720,7 @@ def forward(self, xb): fit(epochs, model, loss_func, opt, train_dl, valid_dl) ############################################################################### -# nn.Sequential +# Using ``nn.Sequential`` # ------------------------ # # ``torch.nn`` has another handy class we can use to simplify our code: @@ -729,7 +747,7 @@ def preprocess(x): return x.view(-1, 1, 28, 28) ############################################################################### -# The model created with ``Sequential`` is simply: +# The model created with ``Sequential`` is simple: model = nn.Sequential( Lambda(preprocess), @@ -748,7 +766,7 @@ def preprocess(x): fit(epochs, model, loss_func, opt, train_dl, valid_dl) ############################################################################### -# Wrapping DataLoader +# Wrapping ``DataLoader`` # ----------------------------- # # Our CNN is fairly concise, but it only works with MNIST, because: @@ -862,7 +880,7 @@ def preprocess(x, y): # ``torch.nn``, ``torch.optim``, ``Dataset``, and ``DataLoader``. So let's summarize # what we've seen: # -# - **torch.nn** +# - ``torch.nn``: # # + ``Module``: creates a callable which behaves like a function, but can also # contain state(such as neural net layer weights). It knows what ``Parameter`` (s) it diff --git a/beginner_source/profiler.py b/beginner_source/profiler.py index 450e450b0f0..95d077f7ba3 100644 --- a/beginner_source/profiler.py +++ b/beginner_source/profiler.py @@ -6,7 +6,7 @@ PyTorch includes a profiler API that is useful to identify the time and memory costs of various PyTorch operations in your code. Profiler can be easily integrated in your code, and the results can be printed as a table -or retured in a JSON trace file. +or returned in a JSON trace file. .. note:: Profiler supports multithreaded models. Profiler runs in the diff --git a/beginner_source/saving_loading_models.py b/beginner_source/saving_loading_models.py index f3c74828fa8..d4b328156ce 100644 --- a/beginner_source/saving_loading_models.py +++ b/beginner_source/saving_loading_models.py @@ -60,7 +60,7 @@ # linear layers, etc.) and registered buffers (batchnorm's running_mean) # have entries in the model’s *state_dict*. Optimizer # objects (``torch.optim``) also have a *state_dict*, which contains -# information about the optimizer’s state, as well as the hyperparameters +# information about the optimizer's state, as well as the hyperparameters # used. # # Because *state_dict* objects are Python dictionaries, they can be easily @@ -158,9 +158,9 @@ # # .. note:: # The 1.6 release of PyTorch switched ``torch.save`` to use a new -# zipfile-based file format. ``torch.load`` still retains the ability to +# zip file-based format. ``torch.load`` still retains the ability to # load files in the old format. If for any reason you want ``torch.save`` -# to use the old format, pass the kwarg ``_use_new_zipfile_serialization=False``. +# to use the old format, pass the ``kwarg``parameter ``_use_new_zipfile_serialization=False``. # # When saving a model for inference, it is only necessary to save the # trained model’s learned parameters. Saving the model’s *state_dict* with @@ -302,7 +302,7 @@ # # When saving a general checkpoint, to be used for either inference or # resuming training, you must save more than just the model’s -# *state_dict*. It is important to also save the optimizer’s *state_dict*, +# *state_dict*. It is important to also save the optimizer's *state_dict*, # as this contains buffers and parameters that are updated as the model # trains. Other items that you may want to save are the epoch you left off # on, the latest recorded training loss, external ``torch.nn.Embedding`` @@ -503,7 +503,7 @@ # # When loading a model on a GPU that was trained and saved on CPU, set the # ``map_location`` argument in the ``torch.load()`` function to -# *cuda:device_id*. This loads the model to a given GPU device. Next, be +# ``cuda:device_id``. This loads the model to a given GPU device. Next, be # sure to call ``model.to(torch.device('cuda'))`` to convert the model’s # parameter tensors to CUDA tensors. Finally, be sure to use the # ``.to(torch.device('cuda'))`` function on all model inputs to prepare diff --git a/beginner_source/t5_tutorial.py b/beginner_source/t5_tutorial.py index 6b4742170d9..8f77cd278ea 100644 --- a/beginner_source/t5_tutorial.py +++ b/beginner_source/t5_tutorial.py @@ -2,7 +2,7 @@ T5-Base Model for Summarization, Sentiment Classification, and Translation ========================================================================== -**Author**: `Pendo Abbo `__, `Joe Cummings `__ +**Authors**: `Pendo Abbo `__, `Joe Cummings `__ """ @@ -10,12 +10,12 @@ # Overview # -------- # -# This tutorial demonstrates how to use a pre-trained T5 Model for summarization, sentiment classification, and +# This tutorial demonstrates how to use a pretrained T5 Model for summarization, sentiment classification, and # translation tasks. We will demonstrate how to use the torchtext library to: # -# 1. Build a text pre-processing pipeline for a T5 model -# 2. Instantiate a pre-trained T5 model with base configuration -# 3. Read in the CNNDM, IMDB, and Multi30k datasets and pre-process their texts in preparation for the model +# 1. Build a text preprocessing pipeline for a T5 model +# 2. Instantiate a pretrained T5 model with base configuration +# 3. Read in the CNNDM, IMDB, and Multi30k datasets and preprocess their texts in preparation for the model # 4. Perform text summarization, sentiment classification, and translation # # .. note:: @@ -33,8 +33,8 @@ # 3. Truncate the sequences to a specified maximum length # 4. Add end-of-sequence (EOS) and padding token IDs # -# T5 uses a SentencePiece model for text tokenization. Below, we use a pre-trained SentencePiece model to build -# the text pre-processing pipeline using torchtext's T5Transform. Note that the transform supports both +# T5 uses a ``SentencePiece`` model for text tokenization. Below, we use a pretrained ``SentencePiece`` model to build +# the text preprocessing pipeline using torchtext's T5Transform. Note that the transform supports both # batched and non-batched text input (for example, one can either pass a single sentence or a list of sentences), however the T5 model expects the input to be batched. # @@ -53,7 +53,7 @@ ) ####################################################################### -# Alternatively, we can also use the transform shipped with the pre-trained models that does all of the above out-of-the-box +# Alternatively, we can also use the transform shipped with the pretrained models that does all of the above out-of-the-box # # .. code-block:: # @@ -66,9 +66,9 @@ # Model Preparation # ----------------- # -# torchtext provides SOTA pre-trained models that can be used directly for NLP tasks or fine-tuned on downstream tasks. Below -# we use the pre-trained T5 model with standard base configuration to perform text summarization, sentiment classification, and -# translation. For additional details on available pre-trained models, see `the torchtext documentation `__ +# torchtext provides SOTA pretrained models that can be used directly for NLP tasks or fine-tuned on downstream tasks. Below +# we use the pretrained T5 model with standard base configuration to perform text summarization, sentiment classification, and +# translation. For additional details on available pretrained models, see `the torchtext documentation `__ # # from torchtext.models import T5_BASE_GENERATION @@ -81,8 +81,8 @@ ####################################################################### -# GenerationUtils -# ------------------ +# Using ``GenerationUtils`` +# ------------------------- # # We can use torchtext's ``GenerationUtils`` to produce an output sequence based on the input sequence provided. This calls on the # model's encoder and decoder, and iteratively expands the decoded sequences until the end-of-sequence token is generated @@ -103,8 +103,8 @@ # datapipes and hence support standard flow-control and mapping/transformation using user defined # functions and transforms. # -# Below we demonstrate how to pre-process the CNNDM dataset to include the prefix necessary for the -# model to indentify the task it is performing. The CNNDM dataset has a train, validation, and test +# Below we demonstrate how to preprocess the CNNDM dataset to include the prefix necessary for the +# model to identify the task it is performing. The CNNDM dataset has a train, validation, and test # split. Below we demo on the test split. # # The T5 model uses the prefix "summarize" for text summarization. For more information on task @@ -160,7 +160,7 @@ def apply_prefix(task, x): # This dataset has a train and test split. Below we demo on the test split. # # The T5 model was trained on the SST2 dataset (also available in torchtext) for sentiment classification using the -# prefix "sst2 sentence". Therefore, we will use this prefix to perform sentiment classification on the IMDB dataset. +# prefix ``sst2 sentence``. Therefore, we will use this prefix to perform sentiment classification on the IMDB dataset. # from torchtext.datasets import IMDB diff --git a/beginner_source/template_tutorial.py b/beginner_source/template_tutorial.py index 7e3e0220f0e..520bd40eb03 100644 --- a/beginner_source/template_tutorial.py +++ b/beginner_source/template_tutorial.py @@ -8,13 +8,13 @@ .. grid:: 2 - .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn + .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn * Item 1 * Item 2 * Item 3 - .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites + .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites * PyTorch v2.0.0 * GPU ??? @@ -35,18 +35,18 @@ respects regular expressions. For example to run only ``neural_style_transfer_tutorial.py``, use the following command: - + .. code-block:: sh - + GALLERY_PATTERN="neural_style_transfer_tutorial.py" make html - + or - + .. code-block:: sh - + GALLERY_PATTERN="neural_style_transfer_tutorial.py" sphinx-build . _build - -* Make a copy of this repo and add only your + +* Make a copy of this repository and add only your tutorial to the `beginner_source` directory removing all other tutorials. Then run ``make html``. diff --git a/beginner_source/text_sentiment_ngrams_tutorial.py b/beginner_source/text_sentiment_ngrams_tutorial.py index 1dd7466344c..9036cdd7214 100644 --- a/beginner_source/text_sentiment_ngrams_tutorial.py +++ b/beginner_source/text_sentiment_ngrams_tutorial.py @@ -1,6 +1,6 @@ """ Text classification with the torchtext library -================================== +============================================== In this tutorial, we will show how to use the torchtext library to build the dataset for the text classification analysis. Users will have the flexibility to @@ -133,7 +133,7 @@ def collate_batch(batch): # Define the model # ---------------- # -# The model is composed of the `nn.EmbeddingBag `__ layer plus a linear layer for the classification purpose. ``nn.EmbeddingBag`` with the default mode of "mean" computes the mean value of a “bag” of embeddings. Although the text entries here have different lengths, nn.EmbeddingBag module requires no padding here since the text lengths are saved in offsets. +# The model is composed of the `nn.EmbeddingBag `__ layer plus a linear layer for the classification purpose. ``nn.EmbeddingBag`` with the default mode of "mean" computes the mean value of a “bag” of embeddings. Although the text entries here have different lengths, ``nn.EmbeddingBag`` module requires no padding here since the text lengths are saved in offsets. # # Additionally, since ``nn.EmbeddingBag`` accumulates the average across # the embeddings on the fly, ``nn.EmbeddingBag`` can enhance the diff --git a/beginner_source/transfer_learning_tutorial.py b/beginner_source/transfer_learning_tutorial.py index 15843ec074e..b4460bb4fb2 100644 --- a/beginner_source/transfer_learning_tutorial.py +++ b/beginner_source/transfer_learning_tutorial.py @@ -20,7 +20,7 @@ These two major transfer learning scenarios look as follows: -- **Finetuning the convnet**: Instead of random initialization, we +- **Finetuning the ConvNet**: Instead of random initialization, we initialize the network with a pretrained network, like the one that is trained on imagenet 1000 dataset. Rest of the training looks as usual. @@ -108,7 +108,7 @@ # augmentations. def imshow(inp, title=None): - """Imshow for Tensor.""" + """Display image for Tensor.""" inp = inp.numpy().transpose((1, 2, 0)) mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) @@ -244,7 +244,7 @@ def visualize_model(model, num_images=6): model.train(mode=was_training) ###################################################################### -# Finetuning the convnet +# Finetuning the ConvNet # ---------------------- # # Load a pretrained model and reset final fully connected layer. @@ -253,7 +253,7 @@ def visualize_model(model, num_images=6): model_ft = models.resnet18(weights='IMAGENET1K_V1') num_ftrs = model_ft.fc.in_features # Here the size of each output sample is set to 2. -# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)). +# Alternatively, it can be generalized to ``nn.Linear(num_ftrs, len(class_names))``. model_ft.fc = nn.Linear(num_ftrs, 2) model_ft = model_ft.to(device) diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py index 1917b56ee8a..fab8e3a9a59 100644 --- a/beginner_source/transformer_tutorial.py +++ b/beginner_source/transformer_tutorial.py @@ -1,5 +1,5 @@ """ -Language Modeling with nn.Transformer and TorchText +Language Modeling with ``nn.Transformer`` and torchtext =============================================================== This is a tutorial on training a sequence-to-sequence model that uses the @@ -78,12 +78,12 @@ def init_weights(self) -> None: def forward(self, src: Tensor, src_mask: Tensor) -> Tensor: """ - Args: - src: Tensor, shape [seq_len, batch_size] - src_mask: Tensor, shape [seq_len, seq_len] + Arguments: + src: Tensor, shape ``[seq_len, batch_size]`` + src_mask: Tensor, shape ``[seq_len, seq_len]`` Returns: - output Tensor of shape [seq_len, batch_size, ntoken] + output Tensor of shape ``[seq_len, batch_size, ntoken]`` """ src = self.encoder(src) * math.sqrt(self.d_model) src = self.pos_encoder(src) @@ -93,7 +93,7 @@ def forward(self, src: Tensor, src_mask: Tensor) -> Tensor: def generate_square_subsequent_mask(sz: int) -> Tensor: - """Generates an upper-triangular matrix of -inf, with zeros on diag.""" + """Generates an upper-triangular matrix of ``-inf``, with zeros on ``diag``.""" return torch.triu(torch.ones(sz, sz) * float('-inf'), diagonal=1) @@ -120,8 +120,8 @@ def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000): def forward(self, x: Tensor) -> Tensor: """ - Args: - x: Tensor, shape [seq_len, batch_size, embedding_dim] + Arguments: + x: Tensor, shape ``[seq_len, batch_size, embedding_dim]`` """ x = x + self.pe[:x.size(0)] return self.dropout(x) @@ -182,7 +182,7 @@ def data_process(raw_text_iter: dataset.IterableDataset) -> Tensor: data = [torch.tensor(vocab(tokenizer(item)), dtype=torch.long) for item in raw_text_iter] return torch.cat(tuple(filter(lambda t: t.numel() > 0, data))) -# train_iter was "consumed" by the process of building the vocab, +# ``train_iter`` was "consumed" by the process of building the vocab, # so we have to create it again train_iter, val_iter, test_iter = WikiText2() train_data = data_process(train_iter) @@ -192,15 +192,15 @@ def data_process(raw_text_iter: dataset.IterableDataset) -> Tensor: device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') def batchify(data: Tensor, bsz: int) -> Tensor: - """Divides the data into bsz separate sequences, removing extra elements + """Divides the data into ``bsz`` separate sequences, removing extra elements that wouldn't cleanly fit. - Args: + Arguments: data: Tensor, shape [N] bsz: int, batch size Returns: - Tensor of shape [N // bsz, bsz] + Tensor of shape ``[N // bsz, bsz]`` """ seq_len = data.size(0) // bsz data = data[:seq_len * bsz] @@ -209,7 +209,7 @@ def batchify(data: Tensor, bsz: int) -> Tensor: batch_size = 20 eval_batch_size = 10 -train_data = batchify(train_data, batch_size) # shape [seq_len, batch_size] +train_data = batchify(train_data, batch_size) # shape ``[seq_len, batch_size]`` val_data = batchify(val_data, eval_batch_size) test_data = batchify(test_data, eval_batch_size) @@ -238,12 +238,12 @@ def batchify(data: Tensor, bsz: int) -> Tensor: def get_batch(source: Tensor, i: int) -> Tuple[Tensor, Tensor]: """ Args: - source: Tensor, shape [full_seq_len, batch_size] + source: Tensor, shape ``[full_seq_len, batch_size]`` i: int Returns: - tuple (data, target), where data has shape [seq_len, batch_size] and - target has shape [seq_len * batch_size] + tuple (data, target), where data has shape ``[seq_len, batch_size]`` and + target has shape ``[seq_len * batch_size]`` """ seq_len = min(bptt, len(source) - 1 - i) data = source[i:i+seq_len] @@ -258,15 +258,15 @@ def get_batch(source: Tensor, i: int) -> Tuple[Tensor, Tensor]: ###################################################################### -# The model hyperparameters are defined below. The vocab size is +# The model hyperparameters are defined below. The ``vocab`` size is # equal to the length of the vocab object. # ntokens = len(vocab) # size of vocabulary emsize = 200 # embedding dimension -d_hid = 200 # dimension of the feedforward network model in nn.TransformerEncoder -nlayers = 2 # number of nn.TransformerEncoderLayer in nn.TransformerEncoder -nhead = 2 # number of heads in nn.MultiheadAttention +d_hid = 200 # dimension of the feedforward network model in ``nn.TransformerEncoder`` +nlayers = 2 # number of ``nn.TransformerEncoderLayer`` in ``nn.TransformerEncoder`` +nhead = 2 # number of heads in ``nn.MultiheadAttention`` dropout = 0.2 # dropout probability model = TransformerModel(ntokens, emsize, nhead, d_hid, nlayers, dropout).to(device) diff --git a/beginner_source/translation_transformer.py b/beginner_source/translation_transformer.py index 6e2538d1599..c5553246e38 100644 --- a/beginner_source/translation_transformer.py +++ b/beginner_source/translation_transformer.py @@ -1,6 +1,6 @@ """ -Language Translation with nn.Transformer and torchtext -====================================================== +Language Translation with ``nn.Transformer`` and torchtext +========================================================== This tutorial shows: - How to train a translation model from scratch using Transformer. @@ -40,12 +40,16 @@ token_transform = {} vocab_transform = {} - +################################################################################### # Create source and target language tokenizer. Make sure to install the dependencies. -# pip install -U torchdata -# pip install -U spacy -# python -m spacy download en_core_web_sm -# python -m spacy download de_core_news_sm +# +# .. code-block:: python +# +# pip install -U torchdata +# pip install -U spacy +# python -m spacy download en_core_web_sm +# python -m spacy download de_core_news_sm + token_transform[SRC_LANGUAGE] = get_tokenizer('spacy', language='de_core_news_sm') token_transform[TGT_LANGUAGE] = get_tokenizer('spacy', language='en_core_web_sm') @@ -71,8 +75,8 @@ def yield_tokens(data_iter: Iterable, language: str) -> List[str]: specials=special_symbols, special_first=True) -# Set UNK_IDX as the default index. This index is returned when the token is not found. -# If not set, it throws RuntimeError when the queried token is not found in the Vocabulary. +# Set ``UNK_IDX`` as the default index. This index is returned when the token is not found. +# If not set, it throws ``RuntimeError`` when the queried token is not found in the Vocabulary. for ln in [SRC_LANGUAGE, TGT_LANGUAGE]: vocab_transform[ln].set_default_index(UNK_IDX) @@ -89,7 +93,7 @@ def yield_tokens(data_iter: Iterable, language: str) -> List[str]: # encodings to provide position information of input tokens to the model. The second part is the # actual `Transformer `__ model. # Finally, the output of the Transformer model is passed through linear layer -# that gives un-normalized probabilities for each token in the target language. +# that gives unnormalized probabilities for each token in the target language. # @@ -205,7 +209,7 @@ def create_mask(src, tgt): ###################################################################### # Let's now define the parameters of our model and instantiate the same. Below, we also -# define our loss function which is the cross-entropy loss and the optmizer used for training. +# define our loss function which is the cross-entropy loss and the optimizer used for training. # torch.manual_seed(0) @@ -258,7 +262,7 @@ def tensor_transform(token_ids: List[int]): torch.tensor(token_ids), torch.tensor([EOS_IDX]))) -# src and tgt language text transforms to convert raw strings into tensors indices +# ``src`` and ``tgt`` language text transforms to convert raw strings into tensors indices text_transform = {} for ln in [SRC_LANGUAGE, TGT_LANGUAGE]: text_transform[ln] = sequential_transforms(token_transform[ln], #Tokenization diff --git a/beginner_source/vt_tutorial.py b/beginner_source/vt_tutorial.py index f3b649f827e..1b0a93b8b4b 100644 --- a/beginner_source/vt_tutorial.py +++ b/beginner_source/vt_tutorial.py @@ -30,7 +30,7 @@ # Convolutional Neural Networks (CNNs) have been the main models for image # classification since deep learning took off in 2012, but CNNs typically # require hundreds of millions of images for training to achieve the -# SOTAresults. DeiT is a vision transformer model that requires a lot less +# SOTA results. DeiT is a vision transformer model that requires a lot less # data and computing resources for training to compete with the leading # CNNs in performing image classification, which is made possible by two # key components of of DeiT: @@ -50,16 +50,20 @@ # Classifying Images with DeiT # ------------------------------- # -# Follow the README at the DeiT repo for detailed information on how to +# Follow the ``README.md`` at the DeiT repository for detailed information on how to # classify images using DeiT, or for a quick test, first install the -# required packages: - -# pip install torch torchvision timm pandas requests +# required packages: +# +# .. code-block:: python +# +# pip install torch torchvision timm pandas requests ####################################################### -# To run in Google Colab, uncomment the following line: - -# !pip install timm pandas requests +# To run in Google Colab, install dependencies by running the following command: +# +# .. code-block:: python +# +# !pip install timm pandas requests ############################# # then run the script below: @@ -94,8 +98,8 @@ ###################################################################### # The output should be 269, which, according to the ImageNet list of class -# index to `labels file `_, maps to ‘timber -# wolf, grey wolf, gray wolf, Canis lupus’. +# index to `labels file `_, maps to ``timber +# wolf, grey wolf, gray wolf, Canis lupus``. # # Now that we have verified that we can use the DeiT model to classify # images, let’s see how to modify the model so it can run on iOS and @@ -120,7 +124,7 @@ ###################################################################### -# The scripted model file fbdeit_scripted.pt of size about 346MB is +# The scripted model file ``fbdeit_scripted.pt`` of size about 346MB is # generated. # @@ -138,8 +142,8 @@ # Now run the code below: # -# Use 'x86' for server inference (the old 'fbgemm' is still available but 'x86' is the recommended default) and 'qnnpack' for mobile inference. -backend = "x86" # replaced with qnnpack causing much worse inference speed for quantized model on this notebook +# Use 'x86' for server inference (the old 'fbgemm' is still available but 'x86' is the recommended default) and ``qnnpack`` for mobile inference. +backend = "x86" # replaced with ``qnnpack`` causing much worse inference speed for quantized model on this notebook model.qconfig = torch.quantization.get_default_qconfig(backend) torch.backends.quantized.engine = backend @@ -150,7 +154,7 @@ ###################################################################### # This generates the scripted and quantized version of the model -# fbdeit_quantized_scripted.pt, with size about 89MB, a 74% reduction of +# ``fbdeit_quantized_scripted.pt``, with size about 89MB, a 74% reduction of # the non-quantized model size of 346MB! # @@ -177,7 +181,7 @@ ###################################################################### -# The generated fbdeit_optimized_scripted_quantized.pt file has about the +# The generated ``fbdeit_optimized_scripted_quantized.pt`` file has about the # same size as the quantized, scripted, but non-optimized model. The # inference result remains the same. # diff --git a/en-wordlist.txt b/en-wordlist.txt new file mode 100644 index 00000000000..c1447668122 --- /dev/null +++ b/en-wordlist.txt @@ -0,0 +1,209 @@ +APIs +Args +Autograd +BCE +BOS +Bahdanau +BatchNorm +CHW +CIFAR +CLS +CNNDM +CNNs +CUDA +Chatbots +Colab +Conv +ConvNet +DCGAN +DCGANs +DataLoaders +DeiT +EOS +FGSM +FLAVA +FloydHub +FloydHub's +GAN +GANs +GPUs +GRU +GRUs +Goodfellow +Goodfellow’s +GreedySearchDecoder +Hugging Face +IMDB +ImageNet +Initializations +Iteratively +JSON +Kuei +LSTM +LeNet +LeakyReLU +LeakyReLUs +Luong +MLP +MNIST +Mypy +NLP +NaN +NeurIPS +NumPy +Numericalization +Numpy's +Profiler +PyTorch's +RGB +RNN +RNNs +Radford +ReLU +SST2 +Sigmoid +SoTA +TensorBoard +TextVQA +Tokenization +TorchMultimodal +TorchScript +Unescape +VQA +Wikitext +accuracies +activations +adversarially +al +backend +backprop +backpropagated +backpropagates +backpropagation +batchnorm's +benchmarking +boolean +broadcasted +chatbot +chatbot's +checkpointing +composable +concat +contrastive +conv +convolutional +cpu +csv +datafile +dataloader +dataloaders +datapipes +dataset +datasets +dataset’s +deserialize +deserialized +dir +downsample +embeddings +encodings +eq +et +evaluateInput +fastai +fbgemm +feedforward +finetune +finetuning +helpdesk +helpdesks +hyperparameter +hyperparameters +imagenet +io +iterable +iteratively +jit +jpg +labelled +learnable +loadFilename +manualSeed +matplotlib +minibatch +minibatches +minimax +misclassification +misclassified +modularity +modularized +multimodal +multimodality +multithreaded +namespace +natively +ndarrays +num +numericalize +numpy +optimizable +optimizer's +optimizers +overfitting +parallelizable +parallelization +perceptibility +prepend +preprocess +preprocessing +pretrained +prewritten +profiler +profilers +pytorch +quantized +quantizing +randint +readably +reinitializes +relu +reproducibility +rescale +rewinded +runtime +runtime +runtimes +softmax +src +stacktrace +stateful +storages +strided +subclasses +subdirectories +submodule +summarization +tanh +th +thresholding +tokenization +tokenize +tokenizer +torchaudio +torchdata +torchtext +torchtext's +torchvision +traceback +tradeoff +uncomment +uncommented +unimodal +unnormalized +unpickling +utils +vectorized +voc +walkthrough +warmstart +warmstarting diff --git a/tutorials-wordlist.txt b/tutorials-wordlist.txt deleted file mode 100644 index 822e2fb2525..00000000000 --- a/tutorials-wordlist.txt +++ /dev/null @@ -1,23 +0,0 @@ -csv -DataLoaders -dataloader -dataset -datasets -dir -imagenet -io -jpg -ndarrays -Numpy's -numpy -preprocess -preprocessing -pytorch -rescale -runtime -th -subclasses -submodule -tanh -torchvision -uncomment From 48d8207a6e80fb1152ba0f9fad7f090ebdb01859 Mon Sep 17 00:00:00 2001 From: BJ Hargrave Date: Tue, 11 Apr 2023 17:06:39 -0400 Subject: [PATCH 004/609] Fix assert statements to compare the expected and actual values (#2204) Signed-off-by: BJ Hargrave Co-authored-by: Svetlana Karslioglu --- beginner_source/introyt/tensors_deeper_tutorial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beginner_source/introyt/tensors_deeper_tutorial.py b/beginner_source/introyt/tensors_deeper_tutorial.py index 1f6d7248851..8b2c1630aff 100644 --- a/beginner_source/introyt/tensors_deeper_tutorial.py +++ b/beginner_source/introyt/tensors_deeper_tutorial.py @@ -544,11 +544,11 @@ print(c) # contents of c have changed assert c is d # test c & d are same object, not just containing equal values -assert id(c), old_id # make sure that our new c is the same object as the old one +assert id(c) == old_id # make sure that our new c is the same object as the old one torch.rand(2, 2, out=c) # works for creation too! print(c) # c has changed again -assert id(c), old_id # still the same object! +assert id(c) == old_id # still the same object! ########################################################################## From 87fa403a8e1d027e7cdff615e46d6e782d8902ed Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 14 Apr 2023 09:43:54 -0700 Subject: [PATCH 005/609] Fix storing artifacts (#2283) * Fix storing artifacts * Fix * Fix * Fix * Fix path * Fix path * Fix path * Fix * Fix * Fix * Fix * Fix * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update --- .circleci/config.yml | 17 ++++++++++++++--- .circleci/config.yml.in | 17 ++++++++++++++--- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 65f1237d739..38bbfb83818 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -139,6 +139,18 @@ pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults export COMMAND='((echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && ./ci_build_script.sh") | docker exec -u jenkins -i "$id" bash) 2>&1' echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts + # Copy docs with plot to a docs dir + if docker exec -it "$id" sh -c "test -d ./workspace/docs_with_plot/docs/"; then + mkdir /home/circleci/project/docs + docker cp "$id:/var/lib/jenkins/workspace/docs_with_plot/docs/." /home/circleci/project/docs + echo "Directory copied successfully" + else + echo "No docs_with_plot directory. Skipping..." + fi + + - store_artifacts: + path: ./docs + destination: tutorials pytorch_tutorial_build_worker_defaults: &pytorch_tutorial_build_worker_defaults environment: @@ -151,6 +163,8 @@ pytorch_tutorial_build_manager_defaults: &pytorch_tutorial_build_manager_default environment: DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7" resource_class: medium + + <<: *pytorch_tutorial_build_defaults pytorch_windows_build_worker: &pytorch_windows_build_worker @@ -181,9 +195,6 @@ pytorch_windows_build_worker: &pytorch_windows_build_worker - beginner_source/data - intermediate_source/data - prototype_source/data - - store_artifacts: - path: ./docs/build/html - destination: docs jobs: pytorch_tutorial_pr_build_manager: diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index 68e7b4172ba..46e0d9d3ecb 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -139,6 +139,18 @@ pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults export COMMAND='((echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && ./ci_build_script.sh") | docker exec -u jenkins -i "$id" bash) 2>&1' echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts + # Copy docs with plot to a docs dir + if docker exec -it "$id" sh -c "test -d ./workspace/docs_with_plot/docs/"; then + mkdir /home/circleci/project/docs + docker cp "$id:/var/lib/jenkins/workspace/docs_with_plot/docs/." /home/circleci/project/docs + echo "Directory copied successfully" + else + echo "No docs_with_plot directory. Skipping..." + fi + + - store_artifacts: + path: ./docs + destination: tutorials pytorch_tutorial_build_worker_defaults: &pytorch_tutorial_build_worker_defaults environment: @@ -151,6 +163,8 @@ pytorch_tutorial_build_manager_defaults: &pytorch_tutorial_build_manager_default environment: DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7" resource_class: medium + + <<: *pytorch_tutorial_build_defaults {% raw %} pytorch_windows_build_worker: &pytorch_windows_build_worker @@ -181,9 +195,6 @@ pytorch_windows_build_worker: &pytorch_windows_build_worker - beginner_source/data - intermediate_source/data - prototype_source/data - - store_artifacts: - path: ./docs/build/html - destination: docs {% endraw %} jobs: {{ jobs("pr") }} From 12c8be8fdf32351b80bbe23fc2c05e2a325b19df Mon Sep 17 00:00:00 2001 From: "sangjune.park" Date: Tue, 18 Apr 2023 05:47:16 +0900 Subject: [PATCH 006/609] add : torchserve version required for multi-worker (#2285) Co-authored-by: Svetlana Karslioglu --- intermediate_source/torchserve_with_ipex.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intermediate_source/torchserve_with_ipex.rst b/intermediate_source/torchserve_with_ipex.rst index caef6926763..fbf705a7c47 100644 --- a/intermediate_source/torchserve_with_ipex.rst +++ b/intermediate_source/torchserve_with_ipex.rst @@ -204,7 +204,7 @@ We'll compare the following three configurations: (2) `torch.set_num_threads `_ = ``number of physical cores / number of workers`` (no core pinning) -(3) core pinning via the launch script +(3) core pinning via the launch script (Required Torchserve>=0.6.1) After this exercise, we'll have verified that we prefer avoiding logical cores and prefer local memory access via core pinning with a real TorchServe use case. From 8c1d408f1015bce924c833adf76a3af21f2e603c Mon Sep 17 00:00:00 2001 From: qihqi Date: Mon, 17 Apr 2023 16:13:30 -0700 Subject: [PATCH 007/609] Add a section to advertise use of flatbuffer format for mobile models. (#2286) * Add a section to advertise use of flatbuffer format for mobile models. * Apply suggestions from code review Co-authored-by: Svetlana Karslioglu * Add a section on caveats. * Apply suggestions from code review Co-authored-by: Svetlana Karslioglu --------- Co-authored-by: Svetlana Karslioglu --- recipes_source/mobile_perf.rst | 67 ++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/recipes_source/mobile_perf.rst b/recipes_source/mobile_perf.rst index ace505aac06..aae1447cbf8 100644 --- a/recipes_source/mobile_perf.rst +++ b/recipes_source/mobile_perf.rst @@ -199,6 +199,73 @@ You can check how it looks in code in `pytorch android application example Date: Tue, 18 Apr 2023 17:36:22 -0700 Subject: [PATCH 008/609] Pyspelling: Python intermediate tutorials A-M (#2287) * Pyspelling: Python intermediate tutorials A-M --- .pyspelling.yml | 15 ++++ en-wordlist.txt | 80 +++++++++++++++++++ .../autograd_saved_tensors_hooks_tutorial.py | 28 ++++--- .../ax_multiobjective_nas_tutorial.py | 46 +++++------ .../char_rnn_classification_tutorial.py | 10 +-- .../char_rnn_generation_tutorial.py | 6 +- .../custom_function_conv_bn_tutorial.py | 58 +++++++------- intermediate_source/ensembling.py | 30 +++---- .../flask_rest_api_tutorial.py | 4 +- intermediate_source/forward_ad_usage.py | 28 +++---- intermediate_source/fx_conv_bn_fuser.py | 8 +- intermediate_source/fx_profiling_tutorial.py | 10 +-- intermediate_source/jacobians_hessians.py | 58 +++++++------- intermediate_source/mario_rl_tutorial.py | 23 +++--- intermediate_source/memory_format_tutorial.py | 46 +++++------ intermediate_source/mnist_train_nas.py | 2 +- .../model_parallel_tutorial.py | 6 +- 17 files changed, 276 insertions(+), 182 deletions(-) diff --git a/.pyspelling.yml b/.pyspelling.yml index 015ac975b7f..9dce7c8215a 100644 --- a/.pyspelling.yml +++ b/.pyspelling.yml @@ -3,6 +3,21 @@ matrix: - name: python sources: - beginner_source/*.py + - intermediate_source/autograd_saved_tensors_hooks_tutorial.py + - intermediate_source/ax_multiobjective_nas_tutorial.py + - intermediate_source/char_rnn_classification_tutorial.py + - intermediate_source/char_rnn_generation_tutorial.py + - intermediate_source/custom_function_conv_bn_tutorial.py + - intermediate_source/ensembling.py + #- intermediate_source/flask_rest_api_tutorial.py + - intermediate_source/forward_ad_usage.py + - intermediate_source/fx_conv_bn_fuser.py + - intermediate_source/fx_profiling_tutorial.py + - intermediate_source/jacobians_hessians.py + - intermediate_source/mario_rl_tutorial.py + - intermediate_source/mnist_train_nas.py + - intermediate_source/memory_format_tutorial.py + - intermediate_source/model_parallel_tutorial.py dictionary: wordlists: - en-wordlist.txt diff --git a/en-wordlist.txt b/en-wordlist.txt index c1447668122..9a4a99d1df2 100644 --- a/en-wordlist.txt +++ b/en-wordlist.txt @@ -2,6 +2,7 @@ APIs Args Autograd BCE +BN BOS Bahdanau BatchNorm @@ -10,18 +11,26 @@ CIFAR CLS CNNDM CNNs +CPUs CUDA Chatbots Colab Conv ConvNet +ConvNets DCGAN DCGANs +DDQN +DNN DataLoaders DeiT +DenseNet EOS +FC FGSM FLAVA +FX +FX's FloydHub FloydHub's GAN @@ -29,57 +38,85 @@ GANs GPUs GRU GRUs +GeForce Goodfellow Goodfellow’s GreedySearchDecoder +HVP Hugging Face IMDB ImageNet Initializations Iteratively JSON +JVP +Jacobian +Kiuk +Kubernetes Kuei LSTM +LSTMs LeNet LeakyReLU LeakyReLUs +Lua Luong MLP +MLPs MNIST Mypy +NAS +NCHW +NES NLP NaN NeurIPS NumPy Numericalization Numpy's +OpenAI +Plotly +Prec Profiler PyTorch's RGB +RL RNN RNNs +RTX Radford ReLU +ResNet SST2 +Sequentials Sigmoid SoTA +TPU TensorBoard TextVQA Tokenization TorchMultimodal TorchScript +TorchX +Tunable Unescape VQA Wikitext +Xeon accuracies activations adversarially al +autodiff +autograd backend +backends backprop +backpropagate backpropagated backpropagates backpropagation +batchnorm batchnorm's benchmarking boolean @@ -89,12 +126,15 @@ chatbot's checkpointing composable concat +config contrastive conv convolutional cpu csv +cuDNN datafile +dataframe dataloader dataloaders datapipes @@ -105,26 +145,43 @@ deserialize deserialized dir downsample +downsamples embeddings encodings +ensembling eq et evaluateInput +extensibility fastai fbgemm feedforward finetune finetuning +fp +functorch +fuser +grayscale +hardcode helpdesk helpdesks +hessian +hessians +hvp hyperparameter hyperparameters imagenet +initializations +inlined +interpretable io iterable iteratively +jacobian +jacobians jit jpg +kwargs labelled learnable loadFilename @@ -139,6 +196,7 @@ modularity modularized multimodal multimodality +multiobjective multithreaded namespace natively @@ -153,26 +211,37 @@ overfitting parallelizable parallelization perceptibility +pipelining +pointwise +precomputing prepend preprocess preprocessing +prespecified pretrained prewritten +primals profiler profilers pytorch quantized quantizing +queryable randint readably reinitializes relu reproducibility rescale +resnet +restride rewinded +romanized +runnable runtime runtime runtimes +scalable softmax src stacktrace @@ -180,29 +249,40 @@ stateful storages strided subclasses +subclassing subdirectories submodule +subreddit summarization tanh th thresholding +timestep +timesteps tokenization tokenize tokenizer torchaudio torchdata +torchscriptable torchtext torchtext's torchvision +torchviz traceback tradeoff +tradeoffs uncomment uncommented +unfused unimodal unnormalized unpickling utils +vectorization +vectorize vectorized +vhp voc walkthrough warmstart diff --git a/intermediate_source/autograd_saved_tensors_hooks_tutorial.py b/intermediate_source/autograd_saved_tensors_hooks_tutorial.py index f549301626d..f16b170ee6a 100644 --- a/intermediate_source/autograd_saved_tensors_hooks_tutorial.py +++ b/intermediate_source/autograd_saved_tensors_hooks_tutorial.py @@ -1,6 +1,6 @@ """ Hooks for autograd saved tensors -======================= +================================ """ @@ -13,8 +13,7 @@ # packing/unpacking process. # # This tutorial assumes you are familiar with how backpropagation works in -# theory. If not, read this first: -# https://colab.research.google.com/drive/1aWNdmYt7RcHMbUk-Xz2Cv5-cGFSWPXe0#scrollTo=AHcEJ6nXUb7W +# theory. If not, read `this `_ first. # @@ -107,7 +106,7 @@ def f(x): ###################################################################### # In the example above, executing without grad would only have kept ``x`` -# and ``y`` in the scope, But the graph additionnally stores ``f(x)`` and +# and ``y`` in the scope, But the graph additionally stores ``f(x)`` and # ``f(f(x))``. Hence, running a forward pass during training will be more # costly in memory usage than during evaluation (more precisely, when # autograd is not required). @@ -182,7 +181,7 @@ def unpack_hook(x): ###################################################################### -# The ``pack_hook`` function will be called everytime an operation saves +# The ``pack_hook`` function will be called every time an operation saves # a tensor for backward. # The output of ``pack_hook`` is then stored in the computation graph # instead of the original tensor. @@ -218,8 +217,9 @@ def unpack_hook(x): # ###################################################################### -# **Returning and int** - +# Returning an ``int`` +# ^^^^^^^^^^^^^^^^^^^^ +# # Returning the index of a Python list # Relatively harmless but with debatable usefulness @@ -240,8 +240,9 @@ def unpack(x): assert(x.grad.equal(2 * x)) ###################################################################### -# **Returning a tuple** - +# Returning a tuple +# ^^^^^^^^^^^^^^^^^ +# # Returning some tensor and a function how to unpack it # Quite unlikely to be useful in its current form @@ -262,9 +263,10 @@ def unpack(packed): assert(torch.allclose(x.grad, 2 * x)) ###################################################################### -# **Returning a str** - -# Returning the __repr__ of the tensor +# Returning a ``str`` +# ^^^^^^^^^^^^^^^^^^^ +# +# Returning the ``__repr__ of`` the tensor # Probably never do this x = torch.randn(5, requires_grad=True) @@ -337,7 +339,7 @@ def forward(self, x): ###################################################################### -# In practice, on a A100 GPU, for a resnet-152 with batch size 256, this +# In practice, on a A100 GPU, for a ResNet-152 with batch size 256, this # corresponds to a GPU memory usage reduction from 48GB to 5GB, at the # cost of a 6x slowdown. # diff --git a/intermediate_source/ax_multiobjective_nas_tutorial.py b/intermediate_source/ax_multiobjective_nas_tutorial.py index 7c43f59473c..79b096b9e64 100644 --- a/intermediate_source/ax_multiobjective_nas_tutorial.py +++ b/intermediate_source/ax_multiobjective_nas_tutorial.py @@ -48,7 +48,7 @@ # Our goal is to optimize the PyTorch Lightning training job defined in # `mnist_train_nas.py `__. # To do this using TorchX, we write a helper function that takes in -# the values of the architcture and hyperparameters of the training +# the values of the architecture and hyperparameters of the training # job and creates a `TorchX AppDef `__ # with the appropriate settings. # @@ -72,12 +72,12 @@ def trainer( trial_idx: int = -1, ) -> specs.AppDef: - # define the log path so we can pass it to the TorchX AppDef + # define the log path so we can pass it to the TorchX ``AppDef`` if trial_idx >= 0: log_path = Path(log_path).joinpath(str(trial_idx)).absolute().as_posix() return utils.python( - # command line args to the training script + # command line arguments to the training script "--log_path", log_path, "--hidden_size_1", @@ -126,15 +126,15 @@ def trainer( tracker_base="/tmp/", component=trainer, # NOTE: To launch this job on a cluster instead of locally you can - # specify a different scheduler and adjust args appropriately. + # specify a different scheduler and adjust arguments appropriately. scheduler="local_cwd", component_const_params={"log_path": log_dir}, cfg={}, ) ###################################################################### -# Setting up the SearchSpace -# -------------------------- +# Setting up the ``SearchSpace`` +# ------------------------------ # # First, we define our search space. Ax supports both range parameters # of type integer and float as well as choice parameters which can have @@ -154,7 +154,7 @@ def trainer( parameters = [ # NOTE: In a real-world setting, hidden_size_1 and hidden_size_2 # should probably be powers of 2, but in our simple example this - # would mean that num_params can't take on that many values, which + # would mean that ``num_params`` can't take on that many values, which # in turn makes the Pareto frontier look pretty weird. RangeParameter( name="hidden_size_1", @@ -189,7 +189,7 @@ def trainer( upper=0.5, parameter_type=ParameterType.FLOAT, ), - ChoiceParameter( # NOTE: ChoiceParameters don't require log-scale + ChoiceParameter( # NOTE: ``ChoiceParameters`` don't require log-scale name="batch_size", values=[32, 64, 128, 256], parameter_type=ParameterType.INT, @@ -212,7 +212,7 @@ def trainer( # # Ax has the concept of a `Metric `__ # that defines properties of outcomes and how observations are obtained -# for these outcomes. This allows e.g. encodig how data is fetched from +# for these outcomes. This allows e.g. encoding how data is fetched from # some distributed execution backend and post-processed before being # passed as input to Ax. # @@ -229,7 +229,7 @@ def trainer( # index (see the ``trainer()`` function above). We will define a metric # class that is aware of that logging directory. By subclassing # `TensorboardCurveMetric `__ -# we get the logic to read and parse the Tensorboard logs for free. +# we get the logic to read and parse the TensorBoard logs for free. # from ax.metrics.tensorboard import TensorboardCurveMetric @@ -237,10 +237,10 @@ def trainer( class MyTensorboardMetric(TensorboardCurveMetric): - # NOTE: We need to tell the new Tensorboard metric how to get the id / - # file handle for the tensorboard logs from a trial. In this case + # NOTE: We need to tell the new TensorBoard metric how to get the id / + # file handle for the TensorBoard logs from a trial. In this case # our convention is to just save a separate file per trial in - # the pre-specified log dir. + # the prespecified log dir. @classmethod def get_ids_from_trials(cls, trials): return { @@ -257,9 +257,9 @@ def is_available_while_running(cls): ###################################################################### -# Now we can instatiate the metrics for accuracy and the number of +# Now we can instantiate the metrics for accuracy and the number of # model parameters. Here `curve_name` is the name of the metric in the -# Tensorboard logs, while `name` is the metric name used internally +# TensorBoard logs, while `name` is the metric name used internally # by Ax. We also specify `lower_is_better` to indicate the favorable # direction of the two metrics. # @@ -277,8 +277,8 @@ def is_available_while_running(cls): ###################################################################### -# Setting up the OptimizationConfig -# ---------------------------------- +# Setting up the ``OptimizationConfig`` +# ------------------------------------- # # The way to tell Ax what it should optimize is by means of an # `OptimizationConfig `__. @@ -335,8 +335,8 @@ def is_available_while_running(cls): ) ###################################################################### -# Choosing the GenerationStrategy -# ------------------------------- +# Choosing the Generation Strategy +# -------------------------------- # # A `GenerationStrategy `__ # is the abstract representation of how we would like to perform the @@ -366,7 +366,7 @@ def is_available_while_running(cls): # Configuring the Scheduler # ------------------------- # -# The `Scheduler` (TODO: link) acts as the loop control for the optimization. +# The ``Scheduler`` acts as the loop control for the optimization. # It communicates with the backend to launch trials, check their status, # and retrieve results. In the case of this tutorial, it is simply reading # and parsing the locally saved logs. In a remote execution setting, @@ -404,7 +404,7 @@ def is_available_while_running(cls): # ------------------------ # # Now that everything is configured, we can let Ax run the optimization -# in a fully automated fashion. The Scheduler will periodially check +# in a fully automated fashion. The Scheduler will periodically check # the logs for the status of all currently running trials, and if a # trial completes the scheduler will update its status on the # experiment and fetch the observations needed for the Bayesian @@ -479,7 +479,7 @@ def is_available_while_running(cls): from ax.plot.diagnostic import interact_cross_validation_plotly from ax.utils.notebook.plotting import init_notebook_plotting, render -cv = cross_validate(model=gs.model) # The surrogate model is stored on the GenerationStrategy +cv = cross_validate(model=gs.model) # The surrogate model is stored on the ``GenerationStrategy`` compute_diagnostics(cv) interact_cross_validation_plotly(cv) @@ -508,7 +508,7 @@ def is_available_while_running(cls): ###################################################################### -# Acknowledgements +# Acknowledgments # ---------------- # # We thank the TorchX team (in particular Kiuk Chung and Tristan Rice) diff --git a/intermediate_source/char_rnn_classification_tutorial.py b/intermediate_source/char_rnn_classification_tutorial.py index 78cbc111151..f36b92fb17e 100644 --- a/intermediate_source/char_rnn_classification_tutorial.py +++ b/intermediate_source/char_rnn_classification_tutorial.py @@ -61,7 +61,7 @@ and extract it to the current directory. Included in the ``data/names`` directory are 18 text files named as -"[Language].txt". Each file contains a bunch of names, one name per +``[Language].txt``. Each file contains a bunch of names, one name per line, mostly romanized (but we still need to convert from Unicode to ASCII). @@ -179,7 +179,7 @@ def lineToTensor(line): # tutorial `__) # is just 2 linear layers which operate on an input and hidden state, with -# a LogSoftmax layer after the output. +# a ``LogSoftmax`` layer after the output. # # .. figure:: https://i.imgur.com/Z2xbySO.png # :alt: @@ -230,7 +230,7 @@ def initHidden(self): # For the sake of efficiency we don't want to be creating a new Tensor for # every step, so we will use ``lineToTensor`` instead of # ``letterToTensor`` and use slices. This could be further optimized by -# pre-computing batches of Tensors. +# precomputing batches of Tensors. # input = lineToTensor('Albert') @@ -372,7 +372,7 @@ def timeSince(since): output, loss = train(category_tensor, line_tensor) current_loss += loss - # Print iter number, loss, name and guess + # Print ``iter`` number, loss, name and guess if iter % print_every == 0: guess, guess_i = categoryFromOutput(output) correct = '✓' if guess == category else '✗ (%s)' % category @@ -495,7 +495,7 @@ def predict(input_line, n_predictions=3): # - ``model.py`` (defines the RNN) # - ``train.py`` (runs training) # - ``predict.py`` (runs ``predict()`` with command line arguments) -# - ``server.py`` (serve prediction as a JSON API with bottle.py) +# - ``server.py`` (serve prediction as a JSON API with ``bottle.py``) # # Run ``train.py`` to train and save the network. # diff --git a/intermediate_source/char_rnn_generation_tutorial.py b/intermediate_source/char_rnn_generation_tutorial.py index ee7b0d14fd3..431c2bf43d9 100644 --- a/intermediate_source/char_rnn_generation_tutorial.py +++ b/intermediate_source/char_rnn_generation_tutorial.py @@ -234,7 +234,7 @@ def inputTensor(line): tensor[li][0][all_letters.find(letter)] = 1 return tensor -# LongTensor of second letter to end (EOS) for target +# ``LongTensor`` of second letter to end (EOS) for target def targetTensor(line): letter_indexes = [all_letters.find(line[li]) for li in range(1, len(line))] letter_indexes.append(n_letters - 1) # EOS @@ -322,7 +322,7 @@ def timeSince(since): print_every = 5000 plot_every = 500 all_losses = [] -total_loss = 0 # Reset every plot_every iters +total_loss = 0 # Reset every ``plot_every`` ``iters`` start = time.time() @@ -429,6 +429,6 @@ def samples(category, start_letters='ABC'): # choosing a start letter # - Get better results with a bigger and/or better shaped network # -# - Try the nn.LSTM and nn.GRU layers +# - Try the ``nn.LSTM`` and ``nn.GRU`` layers # - Combine multiple of these RNNs as a higher level network # diff --git a/intermediate_source/custom_function_conv_bn_tutorial.py b/intermediate_source/custom_function_conv_bn_tutorial.py index 8838fc0d32a..a9fcd8838ae 100644 --- a/intermediate_source/custom_function_conv_bn_tutorial.py +++ b/intermediate_source/custom_function_conv_bn_tutorial.py @@ -35,7 +35,7 @@ For simplicity, in this tutorial we hardcode `bias=False`, `stride=1`, `padding=0`, `dilation=1`, and `groups=1` for Conv2D. For BatchNorm2D, we hardcode `eps=1e-3`, `momentum=0.1`, `affine=False`, and `track_running_statistics=False`. Another small difference -is that we add epsilon in the denomator outside of the square root in the computation +is that we add epsilon in the denominator outside of the square root in the computation of batch norm. [0] https://nenadmarkus.com/p/fusing-batchnorm-and-conv/ @@ -72,7 +72,7 @@ def backward(ctx, grad_out): return convolution_backward(grad_out, X, weight) ###################################################################### -# When testing with gradcheck, it is important to use double precision +# When testing with ``gradcheck``, it is important to use double precision weight = torch.rand(5, 3, 3, 3, requires_grad=True, dtype=torch.double) X = torch.rand(10, 3, 7, 7, requires_grad=True, dtype=torch.double) torch.autograd.gradcheck(Conv2D.apply, (X, weight)) @@ -80,38 +80,38 @@ def backward(ctx, grad_out): ###################################################################### # Backward Formula Implementation for Batch Norm # ------------------------------------------------------------------- -# Batch Norm has two modes: training and eval mode. In training mode -# the sample statistics are a function of the inputs. In eval mode, +# Batch Norm has two modes: training and ``eval`` mode. In training mode +# the sample statistics are a function of the inputs. In ``eval`` mode, # we use the saved running statistics, which are not a function of the inputs. # This makes non-training mode's backward significantly simpler. Below # we implement and test only the training mode case. def unsqueeze_all(t): - # Helper function to unsqueeze all the dimensions that we reduce over + # Helper function to ``unsqueeze`` all the dimensions that we reduce over return t[None, :, None, None] def batch_norm_backward(grad_out, X, sum, sqrt_var, N, eps): - # We use the formula: out = (X - mean(X)) / (sqrt(var(X)) + eps) - # in batch norm 2d's forward. To simplify our derivation, we follow the + # We use the formula: ``out = (X - mean(X)) / (sqrt(var(X)) + eps)`` + # in batch norm 2D forward. To simplify our derivation, we follow the # chain rule and compute the gradients as follows before accumulating # them all into a final grad_input. - # 1) 'grad of out wrt var(X)' * 'grad of var(X) wrt X' - # 2) 'grad of out wrt mean(X)' * 'grad of mean(X) wrt X' - # 3) 'grad of out wrt X in the numerator' * 'grad of X wrt X' + # 1) ``grad of out wrt var(X)`` * ``grad of var(X) wrt X`` + # 2) ``grad of out wrt mean(X)`` * ``grad of mean(X) wrt X`` + # 3) ``grad of out wrt X in the numerator`` * ``grad of X wrt X`` # We then rewrite the formulas to use as few extra buffers as possible tmp = ((X - unsqueeze_all(sum) / N) * grad_out).sum(dim=(0, 2, 3)) tmp *= -1 - d_denom = tmp / (sqrt_var + eps)**2 # d_denom = -num / denom**2 - # It is useful to delete tensors when you no longer need them with `del` - # For example, we could've done `del tmp` here because we won't use it later - # In this case, it's not a big difference because tmp only has size of (C,) + d_denom = tmp / (sqrt_var + eps)**2 # ``d_denom = -num / denom**2`` + # It is useful to delete tensors when you no longer need them with ``del`` + # For example, we could've done ``del tmp`` here because we won't use it later + # In this case, it's not a big difference because ``tmp`` only has size of (C,) # The important thing is avoid allocating NCHW-sized tensors unnecessarily - d_var = d_denom / (2 * sqrt_var) # denom = torch.sqrt(var) + eps - # Compute d_mean_dx before allocating the final NCHW-sized grad_input buffer + d_var = d_denom / (2 * sqrt_var) # ``denom = torch.sqrt(var) + eps`` + # Compute ``d_mean_dx`` before allocating the final NCHW-sized grad_input buffer d_mean_dx = grad_out / unsqueeze_all(sqrt_var + eps) d_mean_dx = unsqueeze_all(-d_mean_dx.sum(dim=(0, 2, 3)) / N) - # d_mean_dx has already been reassigned to a C-sized buffer so no need to worry + # ``d_mean_dx`` has already been reassigned to a C-sized buffer so no need to worry - # (1) unbiased_var(x) = ((X - unsqueeze_all(mean))**2).sum(dim=(0, 2, 3)) / (N - 1) + # ``(1) unbiased_var(x) = ((X - unsqueeze_all(mean))**2).sum(dim=(0, 2, 3)) / (N - 1)`` grad_input = X * unsqueeze_all(d_var * N) grad_input += unsqueeze_all(-d_var * sum) grad_input *= 2 / ((N - 1) * N) @@ -120,13 +120,13 @@ def batch_norm_backward(grad_out, X, sum, sqrt_var, N, eps): # (3) Add 'grad_out / ' without allocating an extra buffer grad_input *= unsqueeze_all(sqrt_var + eps) grad_input += grad_out - grad_input /= unsqueeze_all(sqrt_var + eps) # sqrt_var + eps > 0! + grad_input /= unsqueeze_all(sqrt_var + eps) # ``sqrt_var + eps > 0!`` return grad_input class BatchNorm(torch.autograd.Function): @staticmethod def forward(ctx, X, eps=1e-3): - # Don't save keepdim'd values for backward + # Don't save ``keepdim`` values for backward sum = X.sum(dim=(0, 2, 3)) var = X.var(unbiased=True, dim=(0, 2, 3)) N = X.numel() / X.size(1) @@ -149,7 +149,7 @@ def backward(ctx, grad_out): return batch_norm_backward(grad_out, X, ctx.sum, ctx.sqrt_var, ctx.N, ctx.eps) ###################################################################### -# Testing with gradcheck +# Testing with ``gradcheck`` a = torch.rand(1, 2, 3, 4, requires_grad=True, dtype=torch.double) torch.autograd.gradcheck(BatchNorm.apply, (a,), fast_mode=False) @@ -228,7 +228,7 @@ def reset_parameters(self) -> None: nn.init.kaiming_uniform_(self.conv_weight, a=math.sqrt(5)) ###################################################################### -# Use gradcheck to validate the correctness of our backward formula +# Use ``gradcheck`` to validate the correctness of our backward formula weight = torch.rand(5, 3, 3, 3, requires_grad=True, dtype=torch.double) X = torch.rand(2, 3, 4, 4, requires_grad=True, dtype=torch.double) torch.autograd.gradcheck(FusedConvBN2DFunction.apply, (X, weight)) @@ -236,7 +236,7 @@ def reset_parameters(self) -> None: ###################################################################### # Testing out our new Layer # ------------------------------------------------------------------- -# Use FusedConvBN to train a basic network +# Use ``FusedConvBN`` to train a basic network # The code below is after some light modifications to the example here: # https://github.com/pytorch/examples/tree/master/mnist import torch.optim as optim @@ -350,20 +350,20 @@ def test(model, device, test_loader): ###################################################################### # A Comparison of Memory Usage # ------------------------------------------------------------------- -# If cuda is enabled, print out memory usage for both `fused=True` and `fused=False` -# For an example run on RTX 3070, CuDNN 8.0.5: fused peak memory: 1.56GB, +# If CUDA is enabled, print out memory usage for both `fused=True` and `fused=False` +# For an example run on NVIDIA GeForce RTX 3070, NVIDIA CUDA® Deep Neural Network library (cuDNN) 8.0.5: fused peak memory: 1.56GB, # unfused peak memory: 2.68GB # # It is important to note that the *peak* memory usage for this model may vary depending -# the specific CuDNN convolution algorithm used. For shallower models, it +# the specific cuDNN convolution algorithm used. For shallower models, it # may be possible for the peak memory allocated of the fused model to exceed # that of the unfused model! This is because the memory allocated to compute -# certain CuDNN convolution algorithms can be high enough to "hide" the typical peak +# certain cuDNN convolution algorithms can be high enough to "hide" the typical peak # you would expect to be near the start of the backward pass. # # For this reason, we also record and display the memory allocated at the end # of the forward pass as an approximation, and to demonstrate that we indeed -# allocate one fewer buffer per fused conv-bn pair. +# allocate one fewer buffer per fused ``conv-bn`` pair. from statistics import mean torch.backends.cudnn.enabled = True @@ -384,7 +384,7 @@ def test(model, device, test_loader): scheduler.step() peak_memory_allocated.append(torch.cuda.max_memory_allocated()) torch.cuda.reset_peak_memory_stats() - print("CuDNN version:", torch.backends.cudnn.version()) + print("cuDNN version:", torch.backends.cudnn.version()) print() print("Peak memory allocated:") print(f"fused: {peak_memory_allocated[0]/1024**3:.2f}GB, unfused: {peak_memory_allocated[1]/1024**3:.2f}GB") diff --git a/intermediate_source/ensembling.py b/intermediate_source/ensembling.py index 8b3c21e4086..8102b7bc184 100644 --- a/intermediate_source/ensembling.py +++ b/intermediate_source/ensembling.py @@ -77,15 +77,15 @@ def forward(self, x): predictions2 = [model(minibatch) for model in models] ###################################################################### -# Using vmap to vectorize the ensemble +# Using ``vmap`` to vectorize the ensemble # ------------------------------------ # -# Let's use vmap to speed up the for-loop. We must first prepare the models -# for use with vmap. +# Let's use ``vmap`` to speed up the for-loop. We must first prepare the models +# for use with ``vmap``. # # First, let’s combine the states of the model together by stacking each # parameter. For example, ``model[i].fc1.weight`` has shape ``[784, 128]``; we are -# going to stack the .fc1.weight of each of the 10 models to produce a big +# going to stack the ``.fc1.weight`` of each of the 10 models to produce a big # weight of shape ``[10, 784, 128]``. # # PyTorch offers the ``torch.func.stack_module_state`` convenience function to do @@ -95,7 +95,7 @@ def forward(self, x): params, buffers = stack_module_state(models) ###################################################################### -# Next, we need to define a function to vmap over. The function should, +# Next, we need to define a function to ``vmap`` over. The function should, # given parameters and buffers and inputs, run the model using those # parameters, buffers, and inputs. We'll use ``torch.func.functional_call`` # to help out: @@ -114,9 +114,9 @@ def fmodel(params, buffers, x): ###################################################################### # Option 1: get predictions using a different minibatch for each model. # -# By default, vmap maps a function across the first dimension of all inputs to +# By default, ``vmap`` maps a function across the first dimension of all inputs to # the passed-in function. After using ``stack_module_state``, each of -# the params and buffers have an additional dimension of size 'num_models' at +# the ``params`` and buffers have an additional dimension of size 'num_models' at # the front, and minibatches has a dimension of size 'num_models'. print([p.size(0) for p in params.values()]) # show the leading 'num_models' dimension @@ -127,14 +127,14 @@ def fmodel(params, buffers, x): predictions1_vmap = vmap(fmodel)(params, buffers, minibatches) -# verify the vmap predictions match the +# verify the ``vmap`` predictions match the assert torch.allclose(predictions1_vmap, torch.stack(predictions_diff_minibatch_loop), atol=1e-3, rtol=1e-5) ###################################################################### # Option 2: get predictions using the same minibatch of data. # -# vmap has an in_dims arg that specifies which dimensions to map over. -# By using ``None``, we tell vmap we want the same minibatch to apply for all of +# ``vmap`` has an ``in_dims`` argument that specifies which dimensions to map over. +# By using ``None``, we tell ``vmap`` we want the same minibatch to apply for all of # the 10 models. predictions2_vmap = vmap(fmodel, in_dims=(0, 0, None))(params, buffers, minibatch) @@ -143,9 +143,9 @@ def fmodel(params, buffers, x): ###################################################################### # A quick note: there are limitations around what types of functions can be -# transformed by vmap. The best functions to transform are ones that are pure +# transformed by ``vmap``. The best functions to transform are ones that are pure # functions: a function where the outputs are only determined by the inputs -# that have no side effects (e.g. mutation). vmap is unable to handle mutation +# that have no side effects (e.g. mutation). ``vmap`` is unable to handle mutation # of arbitrary Python data structures, but it is able to handle many in-place # PyTorch operations. @@ -165,11 +165,11 @@ def fmodel(params, buffers, x): print(f'Predictions with vmap {with_vmap.timeit(100)}') ###################################################################### -# There's a large speedup using vmap! +# There's a large speedup using ``vmap``! # -# In general, vectorization with vmap should be faster than running a function +# In general, vectorization with ``vmap`` should be faster than running a function # in a for-loop and competitive with manual batching. There are some exceptions -# though, like if we haven’t implemented the vmap rule for a particular +# though, like if we haven’t implemented the ``vmap`` rule for a particular # operation or if the underlying kernels weren’t optimized for older hardware # (GPUs). If you see any of these cases, please let us know by opening an issue # on GitHub. diff --git a/intermediate_source/flask_rest_api_tutorial.py b/intermediate_source/flask_rest_api_tutorial.py index 39c1a9d39f7..690fa975a5c 100644 --- a/intermediate_source/flask_rest_api_tutorial.py +++ b/intermediate_source/flask_rest_api_tutorial.py @@ -53,7 +53,7 @@ # Simple Web Server # ----------------- # -# Following is a simple webserver, taken from Flask's documentation +# Following is a simple web server, taken from Flask's documentation from flask import Flask @@ -114,7 +114,7 @@ def predict(): # ~~~~~~~~~~~~~~~~~~~ # # DenseNet model requires the image to be of 3 channel RGB image of size -# 224 x 224. We will also normalise the image tensor with the required mean +# 224 x 224. We will also normalize the image tensor with the required mean # and standard deviation values. You can read more about it # `here `_. # diff --git a/intermediate_source/forward_ad_usage.py b/intermediate_source/forward_ad_usage.py index ef194d65023..10965d64ab9 100644 --- a/intermediate_source/forward_ad_usage.py +++ b/intermediate_source/forward_ad_usage.py @@ -25,7 +25,7 @@ to dual numbers[0]. As the forward pass is performed, if any input tensors are dual tensors, -extra computation is performed to propogate this "sensitivity" of the +extra computation is performed to propagate this "sensitivity" of the function. """ @@ -68,7 +68,7 @@ def fn(x, y): plain_tensor = torch.randn(10, 10) dual_output = fn(dual_input, plain_tensor) - # Unpacking the dual returns a namedtuple with ``primal`` and ``tangent`` + # Unpacking the dual returns a ``namedtuple`` with ``primal`` and ``tangent`` # as attributes jvp = fwAD.unpack_dual(dual_output).tangent @@ -136,7 +136,7 @@ class Fn(torch.autograd.Function): @staticmethod def forward(ctx, foo): result = torch.exp(foo) - # Tensors stored in ctx can be used in the subsequent forward grad + # Tensors stored in ``ctx`` can be used in the subsequent forward grad # computation. ctx.result = result return result @@ -144,7 +144,7 @@ def forward(ctx, foo): @staticmethod def jvp(ctx, gI): gO = gI * ctx.result - # If the tensor stored in ctx will not also be used in the backward pass, + # If the tensor stored in`` ctx`` will not also be used in the backward pass, # one can manually free it using ``del`` del ctx.result return gO @@ -161,9 +161,9 @@ def jvp(ctx, gI): # It is important to use ``autograd.gradcheck`` to verify that your # custom autograd Function computes the gradients correctly. By default, -# gradcheck only checks the backward-mode (reverse-mode) AD gradients. Specify +# ``gradcheck`` only checks the backward-mode (reverse-mode) AD gradients. Specify # ``check_forward_ad=True`` to also check forward grads. If you did not -# implement the backward formula for your function, you can also tell gradcheck +# implement the backward formula for your function, you can also tell ``gradcheck`` # to skip the tests that require backward-mode AD by specifying # ``check_backward_ad=False``, ``check_undefined_grad=False``, and # ``check_batched_grad=False``. @@ -198,11 +198,11 @@ def fn(x, y): return x ** 2 + y ** 2 # Here is a basic example to compute the JVP of the above function. -# The jvp(func, primals, tangents) returns func(*primals) as well as the -# computed jvp. Each primal must be associated with a tangent of the same shape. +# The ``jvp(func, primals, tangents)`` returns ``func(*primals)`` as well as the +# computed Jacobian-vector product (JVP). Each primal must be associated with a tangent of the same shape. primal_out, tangent_out = ft.jvp(fn, (primal0, primal1), (tangent0, tangent1)) -# functorch.jvp requires every primal to be associated with a tangent. +# ``functorch.jvp`` requires every primal to be associated with a tangent. # If we only want to associate certain inputs to `fn` with tangents, # then we'll need to create a new function that captures inputs without tangents: primal = torch.randn(10, 10) @@ -216,7 +216,7 @@ def fn(x, y): ###################################################################### # Using the functional API with Modules # -------------------------------------------------------------------- -# To use ``nn.Module`` with functorch.jvp to compute Jacobian-vector products +# To use ``nn.Module`` with ``functorch.jvp`` to compute Jacobian-vector products # with respect to the model parameters, we need to reformulate the # ``nn.Module`` as a function that accepts both the model parameters and inputs # to the module. @@ -225,16 +225,16 @@ def fn(x, y): input = torch.randn(16, 5) tangents = tuple([torch.rand_like(p) for p in model.parameters()]) -# Given a torch.nn.Module, ft.make_functional_with_buffers extracts the state -# (params and buffers) and returns a functional version of the model that +# Given a ``torch.nn.Module``, ``ft.make_functional_with_buffers`` extracts the state +# (``params`` and buffers) and returns a functional version of the model that # can be invoked like a function. # That is, the returned ``func`` can be invoked like # ``func(params, buffers, input)``. -# ft.make_functional_with_buffers is analogous to the nn.Modules stateless API +# ``ft.make_functional_with_buffers`` is analogous to the ``nn.Modules`` stateless API # that you saw previously and we're working on consolidating the two. func, params, buffers = ft.make_functional_with_buffers(model) -# Because jvp requires every input to be associated with a tangent, we need to +# Because ``jvp`` requires every input to be associated with a tangent, we need to # create a new function that, when given the parameters, produces the output def func_params_only(params): return func(params, buffers, input) diff --git a/intermediate_source/fx_conv_bn_fuser.py b/intermediate_source/fx_conv_bn_fuser.py index c06f5f76835..90620ceba4e 100644 --- a/intermediate_source/fx_conv_bn_fuser.py +++ b/intermediate_source/fx_conv_bn_fuser.py @@ -32,7 +32,7 @@ # For this tutorial, we are going to create a model consisting of convolutions # and batch norms. Note that this model has some tricky components - some of # the conv/batch norm patterns are hidden within Sequentials and one of the -# BatchNorms is wrapped in another Module. +# ``BatchNorms`` is wrapped in another Module. class WrappedBatchNorm(nn.Module): def __init__(self): @@ -137,7 +137,7 @@ def fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b): def _parent_name(target : str) -> Tuple[str, str]: """ - Splits a qualname into parent path and last atom. + Splits a ``qualname`` into parent path and last atom. For example, `foo.bar.baz` -> (`foo.bar`, `baz`) """ *parent, name = target.rsplit('.', 1) @@ -242,9 +242,9 @@ def benchmark(model, iters=20): print("Fused time: ", benchmark(fused_rn18)) ###################################################################### # As we previously saw, the output of our FX transformation is -# (Torchscriptable) PyTorch code, we can easily `jit.script` the output to try +# ("torchscriptable") PyTorch code, we can easily ``jit.script`` the output to try # and increase our performance even more. In this way, our FX model -# transformation composes with Torchscript with no issues. +# transformation composes with TorchScript with no issues. jit_rn18 = torch.jit.script(fused_rn18) print("jit time: ", benchmark(jit_rn18)) diff --git a/intermediate_source/fx_profiling_tutorial.py b/intermediate_source/fx_profiling_tutorial.py index 06726e4dd6c..18d8bc67cf4 100644 --- a/intermediate_source/fx_profiling_tutorial.py +++ b/intermediate_source/fx_profiling_tutorial.py @@ -117,7 +117,7 @@ def __init__(self, mod : torch.nn.Module): ###################################################################### # Next, let's override our first method: ``run()``. ``Interpreter``'s ``run`` - # method is the top-level entrypoint for execution of the model. We will + # method is the top-level entry point for execution of the model. We will # want to intercept this so that we can record the total runtime of the # model. @@ -129,7 +129,7 @@ def run(self, *args) -> Any: # Record the time we finished running the model t_end = time.time() # Store the total elapsed time this model execution took in the - # ProfilingInterpreter + # ``ProfilingInterpreter`` self.total_runtime_sec.append(t_end - t_start) return return_val @@ -176,7 +176,7 @@ def summary(self, should_sort : bool = False) -> str: # time each node took with respect to the whole network. pct_total = mean_runtime / mean_total_runtime * 100 # Record the node's type, name of the node, mean runtime, and - # percent runtim + # percent runtime. node_summaries.append( [node.op, str(node), mean_runtime, pct_total]) @@ -214,7 +214,7 @@ def summary(self, should_sort : bool = False) -> str: ###################################################################### # There are two things we should call out here: # -# * MaxPool2d takes up the most time. This is a known issue: +# * ``MaxPool2d`` takes up the most time. This is a known issue: # https://github.com/pytorch/pytorch/issues/51393 # * BatchNorm2d also takes up significant time. We can continue this # line of thinking and optimize this in the Conv-BN Fusion with FX @@ -226,7 +226,7 @@ def summary(self, should_sort : bool = False) -> str: # As we can see, using FX we can easily capture PyTorch programs (even # ones we don't have the source code for!) in a machine-interpretable # format and use that for analysis, such as the performance analysis -# we've done here. FX opens up an exiciting world of possibilities for +# we've done here. FX opens up an exciting world of possibilities for # working with PyTorch programs. # # Finally, since FX is still in beta, we would be happy to hear any diff --git a/intermediate_source/jacobians_hessians.py b/intermediate_source/jacobians_hessians.py index 3da8bda11f1..b8b96c30a3e 100644 --- a/intermediate_source/jacobians_hessians.py +++ b/intermediate_source/jacobians_hessians.py @@ -62,7 +62,7 @@ def compute_jac(xp): ###################################################################### # Instead of computing the jacobian row-by-row, we can use PyTorch's # ``torch.vmap`` function transform to get rid of the for-loop and vectorize the -# computation. We can’t directly apply vmap to ``torch.autograd.grad``; +# computation. We can’t directly apply ``vmap`` to ``torch.autograd.grad``; # instead, PyTorch provides a ``torch.func.vjp`` transform that composes with # ``torch.vmap``: @@ -76,15 +76,15 @@ def compute_jac(xp): assert torch.allclose(ft_jacobian, jacobian) ###################################################################### -# In a later tutorial a composition of reverse-mode AD and vmap will give us +# In a later tutorial a composition of reverse-mode AD and ``vmap`` will give us # per-sample-gradients. -# In this tutorial, composing reverse-mode AD and vmap gives us Jacobian +# In this tutorial, composing reverse-mode AD and ``vmap`` gives us Jacobian # computation! -# Various compositions of vmap and autodiff transforms can give us different +# Various compositions of ``vmap`` and autodiff transforms can give us different # interesting quantities. # # PyTorch provides ``torch.func.jacrev`` as a convenience function that performs -# the vmap-vjp composition to compute jacobians. ``jacrev`` accepts an argnums +# the ``vmap-vjp`` composition to compute jacobians. ``jacrev`` accepts an ``argnums`` # argument that says which argument we would like to compute Jacobians with # respect to. @@ -92,7 +92,7 @@ def compute_jac(xp): ft_jacobian = jacrev(predict, argnums=2)(weight, bias, x) -# confirm +# Confirm by running the following: assert torch.allclose(ft_jacobian, jacobian) ###################################################################### @@ -100,10 +100,10 @@ def compute_jac(xp): # The function transform version is much faster (and becomes even faster the # more outputs there are). # -# In general, we expect that vectorization via vmap can help eliminate overhead +# In general, we expect that vectorization via ``vmap`` can help eliminate overhead # and give better utilization of your hardware. # -# vmap does this magic by pushing the outer loop down into the function's +# ``vmap`` does this magic by pushing the outer loop down into the function's # primitive operations in order to obtain better performance. # # Let's make a quick function to evaluate performance and deal with @@ -133,34 +133,34 @@ def get_perf(first, first_descriptor, second, second_descriptor): print(with_vmap_timer) ###################################################################### -# Let's do a relative performance comparison of the above with our get_perf function: +# Let's do a relative performance comparison of the above with our ``get_perf`` function: get_perf(no_vmap_timer, "without vmap", with_vmap_timer, "vmap") ###################################################################### -# Furthemore, it’s pretty easy to flip the problem around and say we want to +# Furthermore, it’s pretty easy to flip the problem around and say we want to # compute Jacobians of the parameters to our model (weight, bias) instead of the input -# note the change in input via argnums params of 0,1 to map to weight and bias +# note the change in input via ``argnums`` parameters of 0,1 to map to weight and bias ft_jac_weight, ft_jac_bias = jacrev(predict, argnums=(0, 1))(weight, bias, x) ###################################################################### -# reverse-mode Jacobian (jacrev) vs forward-mode Jacobian (jacfwd) -# -------------------------------------------------------------------- +# Reverse-mode Jacobian (``jacrev``) vs forward-mode Jacobian (``jacfwd``) +# ------------------------------------------------------------------------ # # We offer two APIs to compute jacobians: ``jacrev`` and ``jacfwd``: # -# - jacrev uses reverse-mode AD. As you saw above it is a composition of our -# vjp and vmap transforms. -# - jacfwd uses forward-mode AD. It is implemented as a composition of our -# jvp and vmap transforms. +# - ``jacrev`` uses reverse-mode AD. As you saw above it is a composition of our +# ``vjp`` and ``vmap`` transforms. +# - ``jacfwd`` uses forward-mode AD. It is implemented as a composition of our +# ``jvp`` and ``vmap`` transforms. # -# jacfwd and jacrev can be substituted for each other but they have different +# ``jacfwd`` and ``jacrev`` can be substituted for each other but they have different # performance characteristics. # # As a general rule of thumb, if you’re computing the jacobian of an :math:`R^N \to R^M` -# function, and there are many more outputs than inputs (i.e. :math:`M > N`) then -# jacfwd is preferred, otherwise use jacrev. There are exceptions to this rule, +# function, and there are many more outputs than inputs (for example, :math:`M > N`) then +# ``jacfwd`` is preferred, otherwise use ``jacrev``. There are exceptions to this rule, # but a non-rigorous argument for this follows: # # In reverse-mode AD, we are computing the jacobian row-by-row, while in @@ -217,7 +217,7 @@ def get_perf(first, first_descriptor, second, second_descriptor): print(f'jacrev time: {jacrev_timing}') ####################################################################### -# and a relative perf comparison: +# and a relative performance comparison: get_perf(jacrev_timing, "jacrev", jacfwd_timing, "jacfwd") @@ -228,7 +228,7 @@ def get_perf(first, first_descriptor, second, second_descriptor): # Hessians are the jacobian of the jacobian (or the partial derivative of # the partial derivative, aka second order). # -# This suggests that one can just compose functorch’s jacobian transforms to +# This suggests that one can just compose functorch jacobian transforms to # compute the Hessian. # Indeed, under the hood, ``hessian(f)`` is simply ``jacfwd(jacrev(f))``. # @@ -238,7 +238,7 @@ def get_perf(first, first_descriptor, second, second_descriptor): from torch.func import hessian -# lets reduce the size in order not to blow out colab. Hessians require +# lets reduce the size in order not to overwhelm Colab. Hessians require # significant memory: Din = 512 Dout = 32 @@ -251,8 +251,8 @@ def get_perf(first, first_descriptor, second, second_descriptor): hess_revrev = jacrev(jacrev(predict, argnums=2), argnums=2)(weight, bias, x) ####################################################################### -# Let's verify we have the same result regardless of using hessian api or -# using jacfwd(jacfwd()) +# Let's verify we have the same result regardless of using hessian API or +# using ``jacfwd(jacfwd())``. torch.allclose(hess_api, hess_fwdfwd) @@ -265,7 +265,7 @@ def get_perf(first, first_descriptor, second, second_descriptor): # shape ``(B, N)`` and a function that goes from :math:`R^N \to R^M`, we would like # a Jacobian of shape ``(B, M, N)``. # -# The easiest way to do this is to use vmap: +# The easiest way to do this is to use ``vmap``: batch_size = 64 Din = 31 @@ -284,7 +284,7 @@ def get_perf(first, first_descriptor, second, second_descriptor): ####################################################################### # If you have a function that goes from (B, N) -> (B, M) instead and are # certain that each input produces an independent output, then it's also -# sometimes possible to do this without using vmap by summing the outputs +# sometimes possible to do this without using ``vmap`` by summing the outputs # and then computing the Jacobian of that function: def predict_with_output_summed(weight, bias, x): @@ -295,10 +295,10 @@ def predict_with_output_summed(weight, bias, x): ####################################################################### # If you instead have a function that goes from :math:`R^N \to R^M` but inputs that -# are batched, you compose vmap with jacrev to compute batched jacobians: +# are batched, you compose ``vmap`` with ``jacrev`` to compute batched jacobians: # # Finally, batch hessians can be computed similarly. It's easiest to think -# about them by using vmap to batch over hessian computation, but in some +# about them by using ``vmap`` to batch over hessian computation, but in some # cases the sum trick also works. compute_batch_hessian = vmap(hessian(predict, argnums=2), in_dims=(None, None, 0)) diff --git a/intermediate_source/mario_rl_tutorial.py b/intermediate_source/mario_rl_tutorial.py index 4445704cd1b..ff653d54c11 100755 --- a/intermediate_source/mario_rl_tutorial.py +++ b/intermediate_source/mario_rl_tutorial.py @@ -3,10 +3,7 @@ Train a Mario-playing RL Agent ================ -Authors: `Yuansong Feng `__, `Suraj -Subramanian `__, `Howard -Wang `__, `Steven -Guo `__. +**Authors:** `Yuansong Feng `__, `Suraj Subramanian `__, `Howard Wang `__, `Steven Guo `__. This tutorial walks you through the fundamentals of Deep Reinforcement @@ -308,9 +305,9 @@ def act(self, state): Given a state, choose an epsilon-greedy action and update value of step. Inputs: - state(LazyFrame): A single observation of the current state, dimension is (state_dim) + state(``LazyFrame``): A single observation of the current state, dimension is (state_dim) Outputs: - action_idx (int): An integer representing which action Mario will perform + ``action_idx`` (``int``): An integer representing which action Mario will perform """ # EXPLORE if np.random.rand() < self.exploration_rate: @@ -359,11 +356,11 @@ def cache(self, state, next_state, action, reward, done): Store the experience to self.memory (replay buffer) Inputs: - state (LazyFrame), - next_state (LazyFrame), - action (int), - reward (float), - done(bool)) + state (``LazyFrame``), + next_state (``LazyFrame``), + action (``int``), + reward (``float``), + done(``bool``)) """ def first_if_tuple(x): return x[0] if isinstance(x, tuple) else x @@ -399,7 +396,7 @@ def recall(self): # In our implementation, we share feature generator ``features`` across # :math:`Q_{online}` and :math:`Q_{target}`, but maintain separate FC # classifiers for each. :math:`\theta_{target}` (the parameters of -# :math:`Q_{target}`) is frozen to prevent updation by backprop. Instead, +# :math:`Q_{target}`) is frozen to prevent updating by backprop. Instead, # it is periodically synced with :math:`\theta_{online}` (more on this # later). # @@ -408,7 +405,7 @@ def recall(self): class MarioNet(nn.Module): - """mini cnn structure + """mini CNN structure input -> (conv2d + relu) x 3 -> flatten -> (dense + relu) x 2 -> output """ diff --git a/intermediate_source/memory_format_tutorial.py b/intermediate_source/memory_format_tutorial.py index af2842bf333..f08980265de 100644 --- a/intermediate_source/memory_format_tutorial.py +++ b/intermediate_source/memory_format_tutorial.py @@ -40,7 +40,7 @@ N, C, H, W = 10, 3, 32, 32 x = torch.empty(N, C, H, W) -print(x.stride()) # Ouputs: (3072, 1024, 32, 1) +print(x.stride()) # Outputs: (3072, 1024, 32, 1) ###################################################################### # Conversion operator @@ -56,11 +56,11 @@ ###################################################################### # Alternative option x = x.contiguous(memory_format=torch.channels_last) -print(x.stride()) # Ouputs: (3072, 1, 96, 3) +print(x.stride()) # Outputs: (3072, 1, 96, 3) ###################################################################### # Format checks -print(x.is_contiguous(memory_format=torch.channels_last)) # Ouputs: True +print(x.is_contiguous(memory_format=torch.channels_last)) # Outputs: True ###################################################################### # There are minor difference between the two APIs ``to`` and @@ -82,8 +82,8 @@ # sizes are 1 in order to properly represent the intended memory # format special_x = torch.empty(4, 1, 4, 4) -print(special_x.is_contiguous(memory_format=torch.channels_last)) # Ouputs: True -print(special_x.is_contiguous(memory_format=torch.contiguous_format)) # Ouputs: True +print(special_x.is_contiguous(memory_format=torch.channels_last)) # Outputs: True +print(special_x.is_contiguous(memory_format=torch.contiguous_format)) # Outputs: True ###################################################################### # Same thing applies to explicit permutation API ``permute``. In @@ -100,36 +100,36 @@ ###################################################################### # Create as channels last x = torch.empty(N, C, H, W, memory_format=torch.channels_last) -print(x.stride()) # Ouputs: (3072, 1, 96, 3) +print(x.stride()) # Outputs: (3072, 1, 96, 3) ###################################################################### # ``clone`` preserves memory format y = x.clone() -print(y.stride()) # Ouputs: (3072, 1, 96, 3) +print(y.stride()) # Outputs: (3072, 1, 96, 3) ###################################################################### # ``to``, ``cuda``, ``float`` ... preserves memory format if torch.cuda.is_available(): y = x.cuda() - print(y.stride()) # Ouputs: (3072, 1, 96, 3) + print(y.stride()) # Outputs: (3072, 1, 96, 3) ###################################################################### # ``empty_like``, ``*_like`` operators preserves memory format y = torch.empty_like(x) -print(y.stride()) # Ouputs: (3072, 1, 96, 3) +print(y.stride()) # Outputs: (3072, 1, 96, 3) ###################################################################### # Pointwise operators preserves memory format z = x + y -print(z.stride()) # Ouputs: (3072, 1, 96, 3) +print(z.stride()) # Outputs: (3072, 1, 96, 3) ###################################################################### -# Conv, Batchnorm modules using cudnn backends support channels last -# (only works for CudNN >= 7.6). Convolution modules, unlike binary +# ``Conv``, ``Batchnorm`` modules using ``cudnn`` backends support channels last +# (only works for cuDNN >= 7.6). Convolution modules, unlike binary # p-wise operator, have channels last as the dominating memory format. -# IFF all inputs are in contiguous memory format, the operator -# produces output in contiguous memory format. Otherwise, output wil -# be in channels last memroy format. +# If all inputs are in contiguous memory format, the operator +# produces output in contiguous memory format. Otherwise, output will +# be in channels last memory format. if torch.backends.cudnn.version() >= 7603: model = torch.nn.Conv2d(8, 4, 3).cuda().half() @@ -139,7 +139,7 @@ input = input.to(device="cuda", memory_format=torch.channels_last, dtype=torch.float16) out = model(input) - print(out.is_contiguous(memory_format=torch.channels_last)) # Ouputs: True + print(out.is_contiguous(memory_format=torch.channels_last)) # Outputs: True ###################################################################### # When input tensor reaches a operator without channels last support, @@ -152,13 +152,13 @@ # Performance Gains # -------------------------------------------------------------------- # Channels last memory format optimizations are available on both GPU and CPU. -# On GPU, the most significant performance gains are observed on NVidia's +# On GPU, the most significant performance gains are observed on NVIDIA's # hardware with Tensor Cores support running on reduced precision # (``torch.float16``). -# We were able to archive over 22% perf gains with channels last +# We were able to archive over 22% performance gains with channels last # comparing to contiguous format, both while utilizing # 'AMP (Automated Mixed Precision)' training scripts. -# Our scripts uses AMP supplied by NVidia +# Our scripts uses AMP supplied by NVIDIA # https://github.com/NVIDIA/apex. # # ``python main_amp.py -a resnet50 --b 200 --workers 16 --opt-level O2 ./data`` @@ -196,7 +196,7 @@ # Epoch: [0][80/125] Time 0.260 (0.335) Speed 770.324 (597.659) Loss 2.2505953312 (1.0879) Prec@1 50.500 (52.938) Prec@5 100.000 (100.000) ###################################################################### -# Passing ``--channels-last true`` allows running a model in Channels last format with observed 22% perf gain. +# Passing ``--channels-last true`` allows running a model in Channels last format with observed 22% performance gain. # # ``python main_amp.py -a resnet50 --b 200 --workers 16 --opt-level O2 --channels-last true ./data`` @@ -237,12 +237,12 @@ # Epoch: [0][80/125] Time 0.198 (0.269) Speed 1011.827 (743.883) Loss 2.8196096420 (2.4011) Prec@1 47.500 (50.938) Prec@5 100.000 (100.000) ###################################################################### -# The following list of models has the full support of Channels last and showing 8%-35% perf gains on Volta devices: +# The following list of models has the full support of Channels last and showing 8%-35% performance gains on Volta devices: # ``alexnet``, ``mnasnet0_5``, ``mnasnet0_75``, ``mnasnet1_0``, ``mnasnet1_3``, ``mobilenet_v2``, ``resnet101``, ``resnet152``, ``resnet18``, ``resnet34``, ``resnet50``, ``resnext50_32x4d``, ``shufflenet_v2_x0_5``, ``shufflenet_v2_x1_0``, ``shufflenet_v2_x1_5``, ``shufflenet_v2_x2_0``, ``squeezenet1_0``, ``squeezenet1_1``, ``vgg11``, ``vgg11_bn``, ``vgg13``, ``vgg13_bn``, ``vgg16``, ``vgg16_bn``, ``vgg19``, ``vgg19_bn``, ``wide_resnet101_2``, ``wide_resnet50_2`` # ###################################################################### -# The following list of models has the full support of Channels last and showing 26%-76% perf gains on Intel(R) Xeon(R) Ice Lake (or newer) CPUs: +# The following list of models has the full support of Channels last and showing 26%-76% performance gains on Intel(R) Xeon(R) Ice Lake (or newer) CPUs: # ``alexnet``, ``densenet121``, ``densenet161``, ``densenet169``, ``googlenet``, ``inception_v3``, ``mnasnet0_5``, ``mnasnet1_0``, ``resnet101``, ``resnet152``, ``resnet18``, ``resnet34``, ``resnet50``, ``resnext101_32x8d``, ``resnext50_32x4d``, ``shufflenet_v2_x0_5``, ``shufflenet_v2_x1_0``, ``squeezenet1_0``, ``squeezenet1_1``, ``vgg11``, ``vgg11_bn``, ``vgg13``, ``vgg13_bn``, ``vgg16``, ``vgg16_bn``, ``vgg19``, ``vgg19_bn``, ``wide_resnet101_2``, ``wide_resnet50_2`` # @@ -381,7 +381,7 @@ def attribute(m): # ---------- # There are still many things to do, such as: # -# - Resolving ambiguity of N1HW and NC11 Tensors; +# - Resolving ambiguity of ``N1HW`` and ``NC11`` Tensors; # - Testing of Distributed Training support; # - Improving operators coverage. # diff --git a/intermediate_source/mnist_train_nas.py b/intermediate_source/mnist_train_nas.py index e3141e3d958..4ae6d894fce 100644 --- a/intermediate_source/mnist_train_nas.py +++ b/intermediate_source/mnist_train_nas.py @@ -1,5 +1,5 @@ """ -Example training code for ax_multiobjective_nas_tutorial.py +Example training code for ``ax_multiobjective_nas_tutorial.py`` """ import argparse diff --git a/intermediate_source/model_parallel_tutorial.py b/intermediate_source/model_parallel_tutorial.py index 7e5cb84c499..d7a4da73371 100644 --- a/intermediate_source/model_parallel_tutorial.py +++ b/intermediate_source/model_parallel_tutorial.py @@ -259,11 +259,11 @@ def forward(self, x): ret = [] for s_next in splits: - # A. s_prev runs on cuda:1 + # A. ``s_prev`` runs on ``cuda:1`` s_prev = self.seq2(s_prev) ret.append(self.fc(s_prev.view(s_prev.size(0), -1))) - # B. s_next runs on cuda:0, which can run concurrently with A + # B. ``s_next`` runs on ``cuda:0``, which can run concurrently with A s_prev = self.seq1(s_next).to('cuda:1') s_prev = self.seq2(s_prev) @@ -339,7 +339,7 @@ def forward(self, x): # still opportunities to further accelerate the training process. For example, # all operations on ``cuda:0`` is placed on its default stream. It means that # computations on the next split cannot overlap with the copy operation of the -# prev split. However, as prev and next splits are different tensors, there is +# ``prev`` split. However, as ``prev`` and next splits are different tensors, there is # no problem to overlap one's computation with the other one's copy. The # implementation need to use multiple streams on both GPUs, and different # sub-network structures require different stream management strategies. As no From c3c21da78161a22f8aff66b5fe9cea32e54e359b Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 21 Apr 2023 11:53:26 -0700 Subject: [PATCH 009/609] Spellcheck intermediate 2 python (#2291) * Pyspelling: intermediate Python tutorials N-Z --- .pyspelling.yml | 23 ++---- en-wordlist.txt | 77 +++++++++++++++++++ .../flask_rest_api_tutorial.py | 6 +- intermediate_source/neural_tangent_kernels.py | 12 +-- intermediate_source/nvfuser_intro_tutorial.py | 50 ++++++------ intermediate_source/parametrizations.py | 10 +-- intermediate_source/per_sample_grads.py | 20 ++--- intermediate_source/pipeline_tutorial.py | 33 ++++---- intermediate_source/pruning_tutorial.py | 4 +- intermediate_source/reinforcement_ppo.py | 48 ++++++------ .../reinforcement_q_learning.py | 18 ++--- .../scaled_dot_product_attention_tutorial.py | 39 +++++----- .../seq2seq_translation_tutorial.py | 12 +-- .../tensorboard_profiler_tutorial.py | 16 ++-- intermediate_source/torch_compile_tutorial.py | 6 +- 15 files changed, 222 insertions(+), 152 deletions(-) diff --git a/.pyspelling.yml b/.pyspelling.yml index 9dce7c8215a..ffe9f469d03 100644 --- a/.pyspelling.yml +++ b/.pyspelling.yml @@ -3,21 +3,7 @@ matrix: - name: python sources: - beginner_source/*.py - - intermediate_source/autograd_saved_tensors_hooks_tutorial.py - - intermediate_source/ax_multiobjective_nas_tutorial.py - - intermediate_source/char_rnn_classification_tutorial.py - - intermediate_source/char_rnn_generation_tutorial.py - - intermediate_source/custom_function_conv_bn_tutorial.py - - intermediate_source/ensembling.py - #- intermediate_source/flask_rest_api_tutorial.py - - intermediate_source/forward_ad_usage.py - - intermediate_source/fx_conv_bn_fuser.py - - intermediate_source/fx_profiling_tutorial.py - - intermediate_source/jacobians_hessians.py - - intermediate_source/mario_rl_tutorial.py - - intermediate_source/mnist_train_nas.py - - intermediate_source/memory_format_tutorial.py - - intermediate_source/model_parallel_tutorial.py + - intermediate_source/*.py dictionary: wordlists: - en-wordlist.txt @@ -30,9 +16,14 @@ matrix: # Exclude figure rST tags - open: '\.\.\s+(figure|literalinclude|math|image|grid)::' close: '\n' + # Exclude roles: + - open: ':(?:(class|py:mod|mod|func)):`' + content: '[^`]*' + close: '`' # Exclude raw directive - open: '\.\. (raw)::.*$\n*' close: '\n' + # Exclude # Exclude Python coding directives - open: '-\*- coding:' close: '\n' @@ -46,7 +37,7 @@ matrix: - open: '(?s)^::\n\n ' close: '^\n' # Ignore reStructuredText block directives - - open: '\.\. (code-block)::.*$\n*' + - open: '\.\. (code-block|math)::.*$\n*' content: '(?P(^(?P[ ]+).*$\n))(?P(^([ \t]+.*|[ \t]*)$\n)*)' close: '(^(?![ \t]+.*$))' - pyspelling.filters.markdown: diff --git a/en-wordlist.txt b/en-wordlist.txt index 9a4a99d1df2..025098fd7ee 100644 --- a/en-wordlist.txt +++ b/en-wordlist.txt @@ -1,4 +1,5 @@ APIs +ATen Args Autograd BCE @@ -13,6 +14,8 @@ CNNDM CNNs CPUs CUDA +CartPole +Cayley Chatbots Colab Conv @@ -20,12 +23,16 @@ ConvNet ConvNets DCGAN DCGANs +DDP DDQN DNN +DQN DataLoaders +DeepMind DeiT DenseNet EOS +EPS FC FGSM FLAVA @@ -33,11 +40,16 @@ FX FX's FloydHub FloydHub's +Frobenius +GAE GAN GANs +GLOO +GPU's GPUs GRU GRUs +GTC GeForce Goodfellow Goodfellow’s @@ -45,6 +57,7 @@ GreedySearchDecoder HVP Hugging Face IMDB +IOT ImageNet Initializations Iteratively @@ -56,25 +69,32 @@ Kubernetes Kuei LSTM LSTMs +LeCun LeNet LeakyReLU LeakyReLUs +Lipschitz Lua Luong MLP MLPs MNIST +MacBook Mypy NAS +NCCL NCHW NES NLP +NTK NaN +NanoGPT NeurIPS NumPy Numericalization Numpy's OpenAI +PPO Plotly Prec Profiler @@ -83,11 +103,16 @@ RGB RL RNN RNNs +RPC RTX Radford ReLU ResNet +SDPA +SGD +SPD SST2 +STN Sequentials Sigmoid SoTA @@ -95,19 +120,31 @@ TPU TensorBoard TextVQA Tokenization +TorchDynamo +TorchInductor TorchMultimodal +TorchRL +TorchRL's TorchScript TorchX Tunable +UI Unescape VQA +VS Code Wikitext Xeon accuracies activations adversarially +affine al +allocator +allocator's +allocators +approximators autodiff +autoencoder autograd backend backends @@ -121,9 +158,13 @@ batchnorm's benchmarking boolean broadcasted +bytecode +cardinality chatbot chatbot's checkpointing +colorbar +compilable composable concat config @@ -141,11 +182,17 @@ datapipes dataset datasets dataset’s +deallocation +decorrelated deserialize deserialized +deterministically +dimensionality dir downsample downsamples +dropdown +duration embeddings encodings ensembling @@ -161,6 +208,7 @@ finetuning fp functorch fuser +geomean grayscale hardcode helpdesk @@ -174,6 +222,7 @@ imagenet initializations inlined interpretable +invariance io iterable iteratively @@ -181,9 +230,11 @@ jacobian jacobians jit jpg +judgements kwargs labelled learnable +learnings loadFilename manualSeed matplotlib @@ -194,9 +245,11 @@ misclassification misclassified modularity modularized +multihead multimodal multimodality multiobjective +multiprocessed multithreaded namespace natively @@ -204,19 +257,27 @@ ndarrays num numericalize numpy +nvFuser +nvFuser's optimizable optimizer's optimizers overfitting parallelizable parallelization +parametrization +parametrizations +parametrized +parametrizing perceptibility pipelining pointwise +precompute precomputing prepend preprocess preprocessing +prepruned prespecified pretrained prewritten @@ -229,6 +290,11 @@ quantizing queryable randint readably +recomputation +regressor +reimplement +reimplementing +reimplements reinitializes relu reproducibility @@ -236,6 +302,7 @@ rescale resnet restride rewinded +rollout romanized runnable runtime @@ -243,6 +310,8 @@ runtime runtimes scalable softmax +sparsify +specificities src stacktrace stateful @@ -252,6 +321,8 @@ subclasses subclassing subdirectories submodule +submodules +subnetworks subreddit summarization tanh @@ -262,6 +333,8 @@ timesteps tokenization tokenize tokenizer +tooltip +topologies torchaudio torchdata torchscriptable @@ -277,7 +350,11 @@ uncommented unfused unimodal unnormalized +unoptimized +unparametrized unpickling +unpruned +updation utils vectorization vectorize diff --git a/intermediate_source/flask_rest_api_tutorial.py b/intermediate_source/flask_rest_api_tutorial.py index 690fa975a5c..0975ff93125 100644 --- a/intermediate_source/flask_rest_api_tutorial.py +++ b/intermediate_source/flask_rest_api_tutorial.py @@ -318,10 +318,10 @@ def get_prediction(image_bytes): # # .. code-block:: python # -# import requests +# import requests # -# resp = requests.post("http://localhost:5000/predict", -# files={"file": open('/cat.jpg','rb')}) +# resp = requests.post("http://localhost:5000/predict", +# files={"file": open('/cat.jpg','rb')}) ####################################################################### # Printing `resp.json()` will now show the following: diff --git a/intermediate_source/neural_tangent_kernels.py b/intermediate_source/neural_tangent_kernels.py index 5d897bfa31f..ca1de89daf1 100644 --- a/intermediate_source/neural_tangent_kernels.py +++ b/intermediate_source/neural_tangent_kernels.py @@ -58,7 +58,7 @@ def forward(self, x): # we will need a function that accepts the parameters of the model and a single # input (as opposed to a batch of inputs!) and returns a single output. # -# We'll use ``torch.func.functional_call``, which allows us to call an nn.Module +# We'll use ``torch.func.functional_call``, which allows us to call an ``nn.Module`` # using different parameters/buffers, to help accomplish the first step. # # Keep in mind that the model was originally written to accept a batch of input @@ -200,10 +200,10 @@ def func_x2(params): output, vjp_fn = vjp(func_x1, params) def get_ntk_slice(vec): - # This computes vec @ J(x2).T + # This computes ``vec @ J(x2).T`` # `vec` is some unit vector (a single slice of the Identity matrix) vjps = vjp_fn(vec) - # This computes J(X1) @ vjps + # This computes ``J(X1) @ vjps`` _, jvps = jvp(func_x2, (params,), vjps) return jvps @@ -211,10 +211,10 @@ def get_ntk_slice(vec): basis = torch.eye(output.numel(), dtype=output.dtype, device=output.device).view(output.numel(), -1) return vmap(get_ntk_slice)(basis) - # get_ntk(x1, x2) computes the NTK for a single data point x1, x2 - # Since the x1, x2 inputs to empirical_ntk_ntk_vps are batched, + # ``get_ntk(x1, x2)`` computes the NTK for a single data point x1, x2 + # Since the x1, x2 inputs to ``empirical_ntk_ntk_vps`` are batched, # we actually wish to compute the NTK between every pair of data points - # between {x1} and {x2}. That's what the vmaps here do. + # between {x1} and {x2}. That's what the ``vmaps`` here do. result = vmap(vmap(get_ntk, (None, 0)), (0, None))(x1, x2) if compute == 'full': diff --git a/intermediate_source/nvfuser_intro_tutorial.py b/intermediate_source/nvfuser_intro_tutorial.py index 91166fcce1e..155c1471a72 100644 --- a/intermediate_source/nvfuser_intro_tutorial.py +++ b/intermediate_source/nvfuser_intro_tutorial.py @@ -71,7 +71,7 @@ # networks, so improving the speed of these operations can improve # overall network training speed. Future releases of nvFuser will # improve the performance of Linear Layers, but for now we will -# specifically look at the Bias-Dropout-Add-LayerNorm section of this +# specifically look at the ``Bias-Dropout-Add-LayerNorm`` section of this # Transformer Block. # # .. figure:: /_static/img/nvfuser_intro/nvfuser_transformer_block.png @@ -154,7 +154,7 @@ def profile_workload(forward_func, grad_output, iteration_count=100, label=""): # Run model, forward and backward output = forward_func() output.backward(grad_output) - # delete gradiens to avoid profiling the gradient accumulation + # delete gradients to avoid profiling the gradient accumulation for p in parameters: p.grad = None @@ -165,7 +165,7 @@ def profile_workload(forward_func, grad_output, iteration_count=100, label=""): # Run model, forward and backward output = forward_func() output.backward(grad_output) - # delete gradiens to avoid profiling the gradient accumulation + # delete gradients to avoid profiling the gradient accumulation for p in parameters: p.grad = None @@ -265,7 +265,7 @@ def profile_workload(forward_func, grad_output, iteration_count=100, label=""): # nvFuser took around 2.4s in total to compile these high speed # GPU functions. # -# nvFuser’s capabilities extend well beyond this initial performance gain. +# nvFuser's capabilities extend well beyond this initial performance gain. # ###################################################################### @@ -281,7 +281,7 @@ def profile_workload(forward_func, grad_output, iteration_count=100, label=""): # To use nvFuser on inputs that change shape from iteration, we # generate new input and output gradient tensors and make a few # different sizes. Since the last dimension is shared with the -# parameters and cannot be changed dynamically in LayerNorm, we +# parameters and cannot be changed dynamically in ``LayerNorm``, we # perturb the first two dimensions of the input and gradient tensors. # @@ -390,7 +390,7 @@ def profile_workload(forward_func, grad_output, iteration_count=100, label=""): # ###################################################################### -# Defining novel operations with nvFuser and FuncTorch +# Defining novel operations with nvFuser and functorch # ---------------------------------------------------- # # One of the primary benefits of nvFuser is the ability to define @@ -398,8 +398,8 @@ def profile_workload(forward_func, grad_output, iteration_count=100, label=""): # just-in-time compiled into efficient kernels. # # PyTorch has strong performance for any individual operation, -# especially composite operations like LayerNorm. However, if -# LayerNorm wasn’t already implemented in PyTorch as a composite +# especially composite operations like ``LayerNorm``. However, if +# ``LayerNorm`` wasn’t already implemented in PyTorch as a composite # operation, then you’d have to define it as a series of simpler # (primitive) operations. Let’s make such a definition and run it # without nvFuser. @@ -488,7 +488,7 @@ def primitive_definition( # # However, the performance is still slower than the original eager # mode performance of the composite definition. TorchScript works well -# when predefined composite operations are used, however TorchScript’s +# when predefined composite operations are used, however TorchScript # application of Autograd saves all of the activations for each # operator in the fusion for re-use in the backwards pass. However, # this is not typically the optimal choice. Especially when chaining @@ -499,7 +499,7 @@ def primitive_definition( # It’s possible to optimize away many of these unnecessary memory # accesses, but it requires building a connected forward and backward # graph which isn’t possible with TorchScript. The -# `memory_efficient_fusion` pass in FuncTorch, however, is such an +# ``memory_efficient_fusion`` pass in functorch, however, is such an # optimization pass. To use this pass, we have to redefine our # function to pull the constants inside (for now it’s easiest to make # non-tensor constants literals in the function definition): @@ -527,11 +527,11 @@ def primitive_definition_for_memory_efficient_fusion( ###################################################################### # Now, instead of passing our function to TorchScript, we will pass it -# to FuncTorch’s optimization pass. +# to functorch optimization pass. # -# Optimize the model with FuncTorch tracing and the memory efficiency +# Optimize the model with functorch tracing and the memory efficiency # optimization pass memory_efficient_primitive_definition = memory_efficient_fusion( primitive_definition_for_memory_efficient_fusion @@ -550,22 +550,22 @@ def primitive_definition_for_memory_efficient_fusion( ###################################################################### # This recovers even more speed, but it’s still not as fast as -# TorchScripts original performance with the composite definition. +# TorchScript original performance with the composite definition. # However, this is still faster than running this new definition # without nvFuser, and is still faster than the composite definition # without nvFuser. # # .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_5.png # -# .. note:: FuncTorch’s memory efficient pass is experimental and still +# .. note:: The functorch memory efficient pass is experimental and still # actively in development. # Future versions of the API are expected to achieve performance # closer to that of TorchScript with the composite definition. # -# .. note:: FuncTorch’s memory efficient pass specializes on the shapes of +# .. note:: The functorch memory efficient pass specializes on the shapes of # the inputs to the function. If new inputs are provided with # different shapes, then you need to construct a new function -# using `memory_efficient_fusion` and apply it to the new inputs. +# using ``memory_efficient_fusion`` and apply it to the new inputs. ###################################################################### @@ -577,10 +577,10 @@ def primitive_definition_for_memory_efficient_fusion( # an entirely new operation in PyTorch – which takes a lot of time and # knowledge of the lower-level PyTorch code as well as parallel # programming – or writing the operation in simpler PyTorch ops and -# settling for poor performance. For example, let's replace LayerNorm -# in our example with RMSNorm. Even though RMSNorm is a bit simpler -# than LayerNorm, it doesn’t have an existing compound operation in -# PyTorch. See the `Root Mean Square Layer Normalization `__ paper for more information about RMSNorm. +# settling for poor performance. For example, let's replace ``LayerNorm`` +# in our example with ``RMSNorm``. Even though ``RMSNorm`` is a bit simpler +# than ``LayerNorm``, it doesn’t have an existing compound operation in +# PyTorch. See the `Root Mean Square Layer Normalization `__ paper for more information about ``RMSNorm``. # As before, we’ll define our new transformer block with # primitive PyTorch operations. # @@ -608,7 +608,7 @@ def with_rms_norm( # As before, we’ll get a baseline by running PyTorch without nvFuser. # -# Profile rms_norm +# Profile ``rms_norm`` func = functools.partial( with_rms_norm, input1, @@ -625,7 +625,7 @@ def with_rms_norm( # With nvFuser through TorchScript. # -# Profile scripted rms_norm +# Profile scripted ``rms_norm`` scripted_with_rms_norm = torch.jit.script(with_rms_norm) func = functools.partial( scripted_with_rms_norm, @@ -656,7 +656,7 @@ def with_rms_norm_for_memory_efficient_fusion( return norm_output -# Profile memory efficient rms_norm +# Profile memory efficient ``rms_norm`` memory_efficient_rms_norm = memory_efficient_fusion( with_rms_norm_for_memory_efficient_fusion ) @@ -666,12 +666,12 @@ def with_rms_norm_for_memory_efficient_fusion( ###################################################################### # .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_6.png # -# Since RMSNorm is simpler than LayerNorm the performance of our new +# Since ``RMSNorm`` is simpler than ``LayerNorm`` the performance of our new # transformer block is a little higher than the primitive definition # without nvFuser (354 iterations per second compared with 260 # iterations per second). With TorchScript, the iterations per second # increases by 2.68x and 3.36x to 952 iterations per second and 1,191 -# iterations per second with TorchScript and FuncTorch’s memory +# iterations per second with TorchScript and functorch memory # efficient optimization pass, respectively. The performance of this # new operation nearly matches the performance of the composite Layer # Norm definition with TorchScript. diff --git a/intermediate_source/parametrizations.py b/intermediate_source/parametrizations.py index 0f71a0aafe6..086a4300674 100644 --- a/intermediate_source/parametrizations.py +++ b/intermediate_source/parametrizations.py @@ -19,7 +19,7 @@ This approach proposes to decouple the learning of the parameters from the learning of their norms. To do so, the parameter is divided by its `Frobenius norm `_ -and a separate parameter encoding its norm is learnt. +and a separate parameter encoding its norm is learned. A similar regularization was proposed for GANs under the name of "`spectral normalization `_". This method controls the Lipschitz constant of the network by dividing its parameters by @@ -84,7 +84,7 @@ def forward(self, x): # 2) It does not separate the layer and the parametrization. If the parametrization were # more difficult, we would have to rewrite its code for each layer that we want to use it # in. -# 3) It recomputes the parametrization everytime we use the layer. If we use the layer +# 3) It recomputes the parametrization every time we use the layer. If we use the layer # several times during the forward pass, (imagine the recurrent kernel of an RNN), it # would compute the same ``A`` every time that the layer is called. # @@ -258,8 +258,8 @@ def forward(self, X): print((torch.symeig(X).eigenvalues > 0.).all()) # X is positive definite ############################################################################### -# Intializing parametrizations -# ---------------------------- +# Initializing parametrizations +# ----------------------------- # # Parametrizations come with a mechanism to initialize them. If we implement a method # ``right_inverse`` with signature @@ -327,7 +327,7 @@ def right_inverse(self, A): ############################################################################### # The name of this method comes from the fact that we would often expect # that ``forward(right_inverse(X)) == X``. This is a direct way of rewriting that -# the forward afer the initalization with value ``X`` should return the value ``X``. +# the forward after the initialization with value ``X`` should return the value ``X``. # This constraint is not strongly enforced in practice. In fact, at times, it might be of # interest to relax this relation. For example, consider the following implementation # of a randomized pruning method: diff --git a/intermediate_source/per_sample_grads.py b/intermediate_source/per_sample_grads.py index 9d2c774e9fc..c423679229c 100644 --- a/intermediate_source/per_sample_grads.py +++ b/intermediate_source/per_sample_grads.py @@ -70,7 +70,7 @@ def loss_fn(predictions, targets): predictions = model(data) # move the entire mini-batch through the model loss = loss_fn(predictions, targets) -loss.backward() # back propogate the 'average' gradient of this mini-batch +loss.backward() # back propagate the 'average' gradient of this mini-batch ###################################################################### # In contrast to the above approach, per-sample-gradient computation is @@ -114,7 +114,7 @@ def compute_sample_grads(data, targets): # Our strategy is to define a function that computes the loss and then apply # transforms to construct a function that computes per-sample-gradients. # -# We'll use the ``torch.func.functional_call`` function to treat an nn.Module +# We'll use the ``torch.func.functional_call`` function to treat an ``nn.Module`` # like a function. # # First, let’s extract the state from ``model`` into two dictionaries, @@ -146,16 +146,16 @@ def compute_loss(params, buffers, sample, target): ###################################################################### # Now, let’s use the ``grad`` transform to create a new function that computes # the gradient with respect to the first argument of ``compute_loss`` -# (i.e. the params). +# (i.e. the ``params``). ft_compute_grad = grad(compute_loss) ###################################################################### # The ``ft_compute_grad`` function computes the gradient for a single -# (sample, target) pair. We can use vmap to get it to compute the gradient +# (sample, target) pair. We can use ``vmap`` to get it to compute the gradient # over an entire batch of samples and targets. Note that # ``in_dims=(None, None, 0, 0)`` because we wish to map ``ft_compute_grad`` over -# the 0th dimension of the data and targets, and use the same params and +# the 0th dimension of the data and targets, and use the same ``params`` and # buffers for each. ft_compute_sample_grad = vmap(ft_compute_grad, in_dims=(None, None, 0, 0)) @@ -174,16 +174,16 @@ def compute_loss(params, buffers, sample, target): ###################################################################### # A quick note: there are limitations around what types of functions can be -# transformed by vmap. The best functions to transform are ones that are pure +# transformed by ``vmap``. The best functions to transform are ones that are pure # functions: a function where the outputs are only determined by the inputs, -# and that have no side effects (e.g. mutation). vmap is unable to handle +# and that have no side effects (e.g. mutation). ``vmap`` is unable to handle # mutation of arbitrary Python data structures, but it is able to handle many # in-place PyTorch operations. # # Performance comparison # ---------------------- # -# Curious about how the performance of vmap compares? +# Curious about how the performance of ``vmap`` compares? # # Currently the best results are obtained on newer GPU's such as the A100 # (Ampere) where we've seen up to 25x speedups on this example, but here are @@ -218,9 +218,9 @@ def get_perf(first, first_descriptor, second, second_descriptor): # the naive method. But it’s cool that composing ``vmap`` and ``grad`` give us a # nice speedup. # -# In general, vectorization with vmap should be faster than running a function +# In general, vectorization with ``vmap`` should be faster than running a function # in a for-loop and competitive with manual batching. There are some exceptions -# though, like if we haven’t implemented the vmap rule for a particular +# though, like if we haven’t implemented the ``vmap`` rule for a particular # operation or if the underlying kernels weren’t optimized for older hardware # (GPUs). If you see any of these cases, please let us know by opening an issue # at on GitHub. diff --git a/intermediate_source/pipeline_tutorial.py b/intermediate_source/pipeline_tutorial.py index bdd6cabb3f2..33561f60592 100644 --- a/intermediate_source/pipeline_tutorial.py +++ b/intermediate_source/pipeline_tutorial.py @@ -35,7 +35,7 @@ # As a result, our focus is on ``nn.TransformerEncoder`` and we split the model # such that half of the ``nn.TransformerEncoderLayer`` are on one GPU and the # other half are on another. To do this, we pull out the ``Encoder`` and -# ``Decoder`` sections into seperate modules and then build an nn.Sequential +# ``Decoder`` sections into separate modules and then build an ``nn.Sequential`` # representing the original Transformer module. import sys @@ -134,16 +134,17 @@ def forward(self, x): # length 6: # # .. math:: -# \begin{bmatrix} -# \text{A} & \text{B} & \text{C} & \ldots & \text{X} & \text{Y} & \text{Z} -# \end{bmatrix} -# \Rightarrow -# \begin{bmatrix} -# \begin{bmatrix}\text{A} \\ \text{B} \\ \text{C} \\ \text{D} \\ \text{E} \\ \text{F}\end{bmatrix} & -# \begin{bmatrix}\text{G} \\ \text{H} \\ \text{I} \\ \text{J} \\ \text{K} \\ \text{L}\end{bmatrix} & -# \begin{bmatrix}\text{M} \\ \text{N} \\ \text{O} \\ \text{P} \\ \text{Q} \\ \text{R}\end{bmatrix} & -# \begin{bmatrix}\text{S} \\ \text{T} \\ \text{U} \\ \text{V} \\ \text{W} \\ \text{X}\end{bmatrix} -# \end{bmatrix} +# +# \begin{bmatrix} +# \text{A} & \text{B} & \text{C} & \ldots & \text{X} & \text{Y} & \text{Z} +# \end{bmatrix} +# \Rightarrow +# \begin{bmatrix} +# \begin{bmatrix}\text{A} \\ \text{B} \\ \text{C} \\ \text{D} \\ \text{E} \\ \text{F}\end{bmatrix} & +# \begin{bmatrix}\text{G} \\ \text{H} \\ \text{I} \\ \text{J} \\ \text{K} \\ \text{L}\end{bmatrix} & +# \begin{bmatrix}\text{M} \\ \text{N} \\ \text{O} \\ \text{P} \\ \text{Q} \\ \text{R}\end{bmatrix} & +# \begin{bmatrix}\text{S} \\ \text{T} \\ \text{U} \\ \text{V} \\ \text{W} \\ \text{X}\end{bmatrix} +# \end{bmatrix} # # These columns are treated as independent by the model, which means that # the dependence of ``G`` and ``F`` can not be learned, but allows more @@ -172,11 +173,11 @@ def data_process(raw_text_iter): device = torch.device("cuda") def batchify(data, bsz): - # Divide the dataset into bsz parts. + # Divide the dataset into ``bsz`` parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) - # Evenly divide the data across the bsz batches. + # Evenly divide the data across the ``bsz` batches. data = data.view(bsz, -1).t().contiguous() return data.to(device) @@ -245,9 +246,9 @@ def get_batch(source, i): ntokens = len(vocab) # the size of vocabulary emsize = 4096 # embedding dimension -nhid = 4096 # the dimension of the feedforward network model in nn.TransformerEncoder -nlayers = 12 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder -nhead = 16 # the number of heads in the multiheadattention models +nhid = 4096 # the dimension of the feedforward network model in ``nn.TransformerEncoder`` +nlayers = 12 # the number of ``nn.TransformerEncoderLayer`` in ``nn.TransformerEncoder`` +nhead = 16 # the number of heads in the Multihead Attention models dropout = 0.2 # the dropout value from torch.distributed import rpc diff --git a/intermediate_source/pruning_tutorial.py b/intermediate_source/pruning_tutorial.py index d8de5a7502a..ba6701c8c35 100644 --- a/intermediate_source/pruning_tutorial.py +++ b/intermediate_source/pruning_tutorial.py @@ -339,8 +339,8 @@ def forward(self, x): # pruning this technique implements (supported options are ``global``, # ``structured``, and ``unstructured``). This is needed to determine # how to combine masks in the case in which pruning is applied -# iteratively. In other words, when pruning a pre-pruned parameter, -# the current prunining techique is expected to act on the unpruned +# iteratively. In other words, when pruning a prepruned parameter, +# the current pruning technique is expected to act on the unpruned # portion of the parameter. Specifying the ``PRUNING_TYPE`` will # enable the ``PruningContainer`` (which handles the iterative # application of pruning masks) to correctly identify the slice of the diff --git a/intermediate_source/reinforcement_ppo.py b/intermediate_source/reinforcement_ppo.py index 8dee73969db..dc6eca94931 100644 --- a/intermediate_source/reinforcement_ppo.py +++ b/intermediate_source/reinforcement_ppo.py @@ -15,7 +15,7 @@ Key learnings: -- How to create an environment in TorchRL, transform its outputs, and collect data from this env; +- How to create an environment in TorchRL, transform its outputs, and collect data from this environment; - How to make your classes talk to each other using :class:`tensordict.TensorDict`; - The basics of building your training loop with TorchRL: @@ -166,7 +166,7 @@ # When using ``frame_skip`` it is good practice to # correct the other frame counts by the number of frames we are grouping # together. If we configure a total count of X frames for training but -# use a ``frame_skip`` of Y, we will be actually collecting XY frames in total +# use a ``frame_skip`` of Y, we will be actually collecting ``XY`` frames in total # which exceeds our predefined budget. # frame_skip = 1 @@ -187,7 +187,7 @@ # The size of these sub-batches is controlled by ``sub_batch_size``. # sub_batch_size = 64 # cardinality of the sub-samples gathered from the current data in the inner loop -num_epochs = 10 # optimisation steps per batch of data collected +num_epochs = 10 # optimization steps per batch of data collected clip_epsilon = ( 0.2 # clip value for PPO loss: see the equation in the intro for more context. ) @@ -201,9 +201,9 @@ # # In RL, an *environment* is usually the way we refer to a simulator or a # control system. Various libraries provide simulation environments for reinforcement -# learning, including Gymnasium (previously OpenAI Gym), DeepMind control suite, and +# learning, including Gymnasium (previously OpenAI Gym), DeepMind Control Suite, and # many others. -# As a generalistic library, TorchRL's goal is to provide an interchangeable interface +# As a general library, TorchRL's goal is to provide an interchangeable interface # to a large panel of RL simulators, allowing you to easily swap one environment # with another. For example, creating a wrapped gym environment can be achieved with few characters: # @@ -214,12 +214,12 @@ # There are a few things to notice in this code: first, we created # the environment by calling the ``GymEnv`` wrapper. If extra keyword arguments # are passed, they will be transmitted to the ``gym.make`` method, hence covering -# the most common env construction commands. +# the most common environment construction commands. # Alternatively, one could also directly create a gym environment using ``gym.make(env_name, **kwargs)`` # and wrap it in a `GymWrapper` class. # # Also the ``device`` argument: for gym, this only controls the device where -# input action and observered states will be stored, but the execution will always +# input action and observed states will be stored, but the execution will always # be done on CPU. The reason for this is simply that gym does not support on-device # execution, unless specified otherwise. For other libraries, we have control over # the execution device and, as much as we can, we try to stay consistent in terms of @@ -232,8 +232,8 @@ # the policy. In Gym, this is usually achieved via wrappers. TorchRL takes a different # approach, more similar to other pytorch domain libraries, through the use of transforms. # To add transforms to an environment, one should simply wrap it in a :class:`TransformedEnv` -# instance, and append the sequence of transforms to it. The transformed env will inherit -# the device and meta-data of the wrapped env, and transform these depending on the sequence +# instance, and append the sequence of transforms to it. The transformed environment will inherit +# the device and meta-data of the wrapped environment, and transform these depending on the sequence # of transforms it contains. # # Normalization @@ -255,7 +255,7 @@ # to communicate. You could think of it as a python dictionary with some extra # tensor features. In practice, this means that many modules we will be working # with need to be told what key to read (``in_keys``) and what key to write -# (``out_keys``) in the tensordict they will receive. Usually, if ``out_keys`` +# (``out_keys``) in the ``tensordict`` they will receive. Usually, if ``out_keys`` # is omitted, it is assumed that the ``in_keys`` entries will be updated # in-place. For our transforms, the only entry we are interested in is referred # to as ``"observation"`` and our transform layers will be told to modify this @@ -295,7 +295,7 @@ # environment specs, but you can easily check that your environment specs are # adequate. # In our example, the :class:`GymWrapper` and :class:`GymEnv` that inherits -# from it already take care of setting the proper specs for your env so +# from it already take care of setting the proper specs for your environment so # you should not have to care about this. # # Nevertheless, let's see a concrete example using our transformed @@ -312,7 +312,7 @@ print("action_spec (as defined by input_spec):", env.action_spec) ###################################################################### -# the :func:`check_env_specs` function runs a small rollout and compares its output against the environemnt +# the :func:`check_env_specs` function runs a small rollout and compares its output against the environment # specs. If no error is raised, we can be confident that the specs are properly defined: # check_env_specs(env) @@ -328,7 +328,7 @@ # observation may be composite, meaning that it could be composed of more than one # tensor. This is not a problem for TorchRL, since the whole set of observations # is automatically packed in the output :class:`tensordict.TensorDict`. After executing a rollout -# (ie a sequence of environment steps and random action generations) over a given +# (for example, a sequence of environment steps and random action generations) over a given # number of steps, we will retrieve a :class:`tensordict.TensorDict` instance with a shape # that matches this trajectory length: # @@ -340,7 +340,7 @@ # Our rollout data has a shape of ``torch.Size([3])`, which matches the number of steps # we ran it for. The ``"next"`` entry points to the data coming after the current step. # In most cases, the ``"next""`` data at time `t` matches the data at ``t+1``, but this -# may not be the case if we are using some specific transformations (e.g. mutli-step). +# may not be the case if we are using some specific transformations (for example, multi-step). # # Policy # ------ @@ -360,13 +360,13 @@ # f_{\theta}(\text{observation}) = \mu_{\theta}(\text{observation}), \sigma^{+}_{\theta}(\text{observation}) # # The only extra-difficulty that is brought up here is to split our output in two -# equal parts and map the second to a scrictly positive space. +# equal parts and map the second to a strictly positive space. # # We design the policy in three steps: # # 1. Define a neural network ``D_obs`` -> ``2 * D_action``. Indeed, our ``loc`` (mu) and ``scale`` (sigma) both have dimension ``D_action``; # -# 2. Append a :class:`NormalParamExtractor` to extract a location and a scale (ie splits the input in two equal parts +# 2. Append a :class:`NormalParamExtractor` to extract a location and a scale (for example, splits the input in two equal parts # and applies a positive transformation to the scale parameter); # # 3. Create a probabilistic :class:`TensorDictModule` that can create this distribution and sample from it. @@ -384,7 +384,7 @@ ) ###################################################################### -# To enable the policy to "talk" with the environment through the tensordict +# To enable the policy to "talk" with the environment through the ``tensordict`` # data carrier, we wrap the ``nn.Module`` in a :class:`TensorDictModule`. This # class will simply ready the ``in_keys`` it is provided with and write the # outputs in-place at the registered ``out_keys``. @@ -429,7 +429,7 @@ # won't be used at inference time. This module will read the observations and # return an estimation of the discounted return for the following trajectory. # This allows us to amortize learning by relying on the some utility estimation -# that is learnt on-the-fly during training. Our value network share the same +# that is learned on-the-fly during training. Our value network share the same # structure as the policy, but for simplicity we assign it its own set of # parameters. # @@ -484,7 +484,7 @@ # As for the policy and environment before, the data collector will return # :class:`tensordict.TensorDict` instances with a total number of elements that will # match ``frames_per_batch``. Using :class:`tensordict.TensorDict` to pass data to the -# training loop allows you to write dataloading pipelines +# training loop allows you to write data loading pipelines # that are 100% oblivious to the actual specificities of the rollout content. # collector = SyncDataCollector( @@ -525,7 +525,7 @@ # Loss function # ------------- # -# The PPO loss can be directly imported from torchrl for convenience using the +# The PPO loss can be directly imported from TorchRL for convenience using the # :class:`ClipPPOLoss` class. This is the easiest way of utilizing PPO: # it hides away the mathematical operations of PPO and the control flow that # goes with it. @@ -536,7 +536,7 @@ # To compute the advantage, one just needs to (1) build the advantage module, which # utilizes our value operator, and (2) pass each batch of data through it before each # epoch. -# The GAE module will update the input tensordict with new ``"advantage"`` and +# The GAE module will update the input ``tensordict`` with new ``"advantage"`` and # ``"value_target"`` entries. # The ``"value_target"`` is a gradient-free tensor that represents the empirical # value that the value network should represent with the input observation. @@ -612,7 +612,7 @@ + loss_vals["loss_entropy"] ) - # Optimization: backward, grad clipping and optim step + # Optimization: backward, grad clipping and optimization step loss_value.backward() # this is not strictly mandatory but it's good practice to keep # your gradient norm bounded @@ -633,8 +633,8 @@ # We evaluate the policy once every 10 batches of data. # Evaluation is rather simple: execute the policy without exploration # (take the expected value of the action distribution) for a given - # number of steps (1000, which is our env horizon). - # The ``rollout`` method of the env can take a policy as argument: + # number of steps (1000, which is our ``env`` horizon). + # The ``rollout`` method of the ``env`` can take a policy as argument: # it will then execute this policy at each step. with set_exploration_mode("mean"), torch.no_grad(): # execute a rollout with the trained policy diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py index 083ce07f77a..78dc7e2fc6e 100644 --- a/intermediate_source/reinforcement_q_learning.py +++ b/intermediate_source/reinforcement_q_learning.py @@ -17,9 +17,9 @@ `Gymnasium's website `__. .. figure:: /_static/img/cartpole.gif - :alt: cartpole + :alt: CartPole - cartpole + CartPole As the agent observes the current state of the environment and chooses an action, the environment *transitions* to a new state, and also @@ -45,7 +45,7 @@ `gymnasium `__ for the environment, installed by using `pip`. This is a fork of the original OpenAI Gym project and maintained by the same team since Gym v0.19. -If you are running this in Google colab, run: +If you are running this in Google Colab, run: .. code-block:: bash @@ -82,7 +82,7 @@ plt.ion() -# if gpu is to be used +# if GPU is to be used device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -96,7 +96,7 @@ # batch are decorrelated. It has been shown that this greatly stabilizes # and improves the DQN training procedure. # -# For this, we're going to need two classses: +# For this, we're going to need two classes: # # - ``Transition`` - a named tuple representing a single transition in # our environment. It essentially maps (state, action) pairs @@ -172,7 +172,7 @@ def __len__(self): # # .. math:: \delta = Q(s, a) - (r + \gamma \max_a' Q(s', a)) # -# To minimise this error, we will use the `Huber +# To minimize this error, we will use the `Huber # loss `__. The Huber loss acts # like the mean squared error when the error is small, but like the mean # absolute error when the error is large - this makes it more robust to @@ -233,7 +233,7 @@ def forward(self, x): # probability of choosing a random action will start at ``EPS_START`` # and will decay exponentially towards ``EPS_END``. ``EPS_DECAY`` # controls the rate of the decay. -# - ``plot_durations`` - a helper for plotting the durations of episodes, +# - ``plot_durations`` - a helper for plotting the duration of episodes, # along with an average over the last 100 episodes (the measure used in # the official evaluations). The plot will be underneath the cell # containing the main training loop, and will update after every @@ -246,7 +246,7 @@ def forward(self, x): # EPS_END is the final value of epsilon # EPS_DECAY controls the rate of exponential decay of epsilon, higher means a slower decay # TAU is the update rate of the target network -# LR is the learning rate of the AdamW optimizer +# LR is the learning rate of the ``AdamW`` optimizer BATCH_SIZE = 128 GAMMA = 0.99 EPS_START = 0.9 @@ -391,7 +391,7 @@ def optimize_model(): # # Below, `num_episodes` is set to 600 if a GPU is available, otherwise 50 # episodes are scheduled so training does not take too long. However, 50 -# episodes is insufficient for to observe good performance on cartpole. +# episodes is insufficient for to observe good performance on CartPole. # You should see the model constantly achieve 500 steps within 600 training # episodes. Training RL agents can be a noisy process, so restarting training # can produce better results if convergence is not observed. diff --git a/intermediate_source/scaled_dot_product_attention_tutorial.py b/intermediate_source/scaled_dot_product_attention_tutorial.py index fbc76a15799..669e516f2c2 100644 --- a/intermediate_source/scaled_dot_product_attention_tutorial.py +++ b/intermediate_source/scaled_dot_product_attention_tutorial.py @@ -88,7 +88,7 @@ def benchmark_torch_function_in_microseconds(f, *args, **kwargs): # Lets explore the speed of each of the 3 implementations from torch.backends.cuda import sdp_kernel, SDPBackend -# Helpful arg mapper +# Helpful arguments mapper backend_map = { SDPBackend.MATH: {"enable_math": True, "enable_flash": False, "enable_mem_efficient": False}, SDPBackend.FLASH_ATTENTION: {"enable_math": False, "enable_flash": True, "enable_mem_efficient": False}, @@ -130,8 +130,8 @@ def benchmark_torch_function_in_microseconds(f, *args, **kwargs): # ~~~~~~~~~~~~~~~~~~~~~ # # Below is an example implementation of a multi-headed causal self -# attention block inspired by Andrej Karpathy’s -# `NanoGPT `__ repository. +# attention block inspired by +# `Andrej Karpathy NanoGPT `__ repository. # class CausalSelfAttention(nn.Module): @@ -186,12 +186,12 @@ def forward(self, x): print(model) -###################################################################### -# NestedTensor and Dense tensor support -# ------------------------------------- +##################################################################### +# ``NestedTensor`` and Dense tensor support +# ----------------------------------------- # -# SDPA supports both NestedTensor and Dense tensor inputs. NestedTensors handle the case where the input is a batch of variable length sequences -# without needing to pad each sequence to the maximum length in the batch. For more information about NestedTensors see +# SDPA supports both ``NestedTensor`` and Dense tensor inputs. ``NestedTensors`` handle the case where the input is a batch of variable length sequences +# without needing to pad each sequence to the maximum length in the batch. For more information about ``NestedTensors`` see # `torch.nested `__ and `NestedTensors Tutorial `__. # @@ -236,7 +236,7 @@ def generate_rand_batch( random_nt, _ = generate_rand_batch(32, 512, embed_dimension, pad_percentage=0.5, dtype=dtype, device=device) random_dense, _ = generate_rand_batch(32, 512, embed_dimension, pad_percentage=None, dtype=dtype, device=device) -# Currently the fused implementations don't support NestedTensor for training +# Currently the fused implementations don't support ``NestedTensor`` for training model.eval() with sdp_kernel(**backend_map[SDPBackend.FLASH_ATTENTION]): @@ -248,14 +248,14 @@ def generate_rand_batch( ###################################################################### -# Using SDPA with torch.compile -# ============================ +# Using SDPA with ``torch.compile`` +# ================================= # # With the release of PyTorch 2.0, a new feature called # ``torch.compile()`` has been introduced, which can provide # significant performance improvements over eager mode. # Scaled dot product attention is fully composable with ``torch.compile()``. -# To demonstrate this, let's compile the CausalSelfAttention module using +# To demonstrate this, let's compile the ``CausalSelfAttention`` module using # ``torch.compile()`` and observe the resulting performance improvements. # @@ -303,7 +303,9 @@ def generate_rand_batch( print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10)) # For even more insights, you can export the trace and use ``chrome://tracing`` to view the results -# prof.export_chrome_trace("compiled_causal_attention_trace.json"). +# :: +# +# prof.export_chrome_trace("compiled_causal_attention_trace.json"). @@ -315,15 +317,14 @@ def generate_rand_batch( # on the same set of functions for both modules. # The reason for this here is that ``torch.compile`` is very good at removing the # framework overhead associated with PyTorch. If your model is launching -# large, efficient CUDA kernels, which in this case CausaulSelfAttention +# large, efficient CUDA kernels, which in this case ``CausaulSelfAttention`` # is, then the overhead of PyTorch can be hidden. # # In reality, your module does not normally consist of a singular -# CausalSelfAttention block. When experimenting with Andrej Karpathy’s -# `NanoGPT `__ repository, compiling +# ``CausalSelfAttention`` block. When experimenting with `Andrej Karpathy NanoGPT `__ repository, compiling # the module took the time per train step from: ``6090.49ms`` to -# ``3273.17ms``! This was done on commit: ae3a8d5 of NanoGPT training on -# the shakespeare dataset. +# ``3273.17ms``! This was done on commit: ``ae3a8d5`` of NanoGPT training on +# the Shakespeare dataset. # @@ -335,7 +336,7 @@ def generate_rand_batch( # ``torch.nn.functional.scaled_dot_product_attention``. We have shown how # the ``sdp_kernel`` context manager can be used to assert a certain # implementation is used on GPU. As well, we built a simple -# CausalSelfAttention module that works with NestedTensor and is torch +# ``CausalSelfAttention`` module that works with ``NestedTensor`` and is torch # compilable. In the process we have shown how to the profiling tools can # be used to explore the performance characteristics of a user defined # module. diff --git a/intermediate_source/seq2seq_translation_tutorial.py b/intermediate_source/seq2seq_translation_tutorial.py index e8a5651c57b..853cb2aed45 100644 --- a/intermediate_source/seq2seq_translation_tutorial.py +++ b/intermediate_source/seq2seq_translation_tutorial.py @@ -106,7 +106,7 @@ # yet, someone did the extra work of splitting language pairs into # individual text files here: https://www.manythings.org/anki/ # -# The English to French pairs are too big to include in the repo, so +# The English to French pairs are too big to include in the repository, so # download to ``data/eng-fra.txt`` before continuing. The file is a tab # separated list of translation pairs: # @@ -301,10 +301,10 @@ def prepareData(lang1, lang2, reverse=False): # length and order, which makes it ideal for translation between two # languages. # -# Consider the sentence "Je ne suis pas le chat noir" → "I am not the -# black cat". Most of the words in the input sentence have a direct +# Consider the sentence ``Je ne suis pas le chat noir`` → ``I am not the +# black cat``. Most of the words in the input sentence have a direct # translation in the output sentence, but are in slightly different -# orders, e.g. "chat noir" and "black cat". Because of the "ne/pas" +# orders, e.g. ``chat noir`` and ``black cat``. Because of the ``ne/pas`` # construction there is also one more word in the input sentence. It would # be difficult to produce a correct translation directly from the sequence # of input words. @@ -844,8 +844,8 @@ def evaluateAndShowAttention(input_sentence): # - Chat → Response # - Question → Answer # -# - Replace the embeddings with pre-trained word embeddings such as word2vec or -# GloVe +# - Replace the embeddings with pretrained word embeddings such as ``word2vec`` or +# ``GloVe`` # - Try with more layers, more hidden units, and more sentences. Compare # the training time and results. # - If you use a translation file where pairs have two of the same phrase diff --git a/intermediate_source/tensorboard_profiler_tutorial.py b/intermediate_source/tensorboard_profiler_tutorial.py index 7cd241d40ad..440f2257e1a 100644 --- a/intermediate_source/tensorboard_profiler_tutorial.py +++ b/intermediate_source/tensorboard_profiler_tutorial.py @@ -54,7 +54,7 @@ ###################################################################### # Then prepare the input data. For this tutorial, we use the CIFAR10 dataset. -# Transform it to the desired format and use DataLoader to load each batch. +# Transform it to the desired format and use ``DataLoader`` to load each batch. transform = T.Compose( [T.Resize(224), @@ -116,7 +116,7 @@ def train(data): # - ``profile_memory`` - Track tensor memory allocation/deallocation. Note, for old version of pytorch with version # before 1.10, if you suffer long profiling time, please disable it or upgrade to new version. # - ``with_stack`` - Record source information (file and line number) for the ops. -# If the TensorBoard is launched in VSCode (`reference `_), +# If the TensorBoard is launched in VS Code (`reference `_), # clicking a stack frame will navigate to the specific code line. with torch.profiler.profile( @@ -217,13 +217,13 @@ def train(data): # The "Total" duration includes its child operators’ time. # # - View call stack -# Click the "View Callstack" of an operator, the operators with same name but different call stacks will be shown. -# Then click a "View Callstack" in this sub-table, the call stack frames will be shown. +# Click the ``View Callstack`` of an operator, the operators with same name but different call stacks will be shown. +# Then click a ``View Callstack`` in this sub-table, the call stack frames will be shown. # # .. image:: ../../_static/img/profiler_callstack.png # :scale: 25 % # -# If the TensorBoard is launched inside VSCode +# If the TensorBoard is launched inside VS Code # (`Launch Guide `_), # clicking a call stack frame will navigate to the specific code line. # @@ -279,8 +279,8 @@ def train(data): # 5. Improve performance with the help of profiler # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # -# At the bottom of "Overview" page, the suggestion in "Performance Recommendation" hints the bottleneck is DataLoader. -# The PyTorch DataLoader uses single process by default. +# At the bottom of "Overview" page, the suggestion in "Performance Recommendation" hints the bottleneck is ``DataLoader``. +# The PyTorch ``DataLoader`` uses single process by default. # User could enable multi-process data loading by setting the parameter ``num_workers``. # `Here `_ is more details. # @@ -350,7 +350,7 @@ def train(data): # In the memory events table, the allocation and release events are paired into one entry. The "operator" column shows # the immediate ATen operator that is causing the allocation. Notice that in PyTorch, ATen operators commonly use # ``aten::empty`` to allocate memory. For example, ``aten::ones`` is implemented as ``aten::empty`` followed by an -# ``aten::fill_``. Solely display the opeartor name as ``aten::empty`` is of little help. It will be shown as +# ``aten::fill_``. Solely display the operator name as ``aten::empty`` is of little help. It will be shown as # ``aten::ones (aten::empty)`` in this special case. The "Allocation Time", "Release Time" and "Duration" # columns' data might be missing if the event occurs outside of the time range. # diff --git a/intermediate_source/torch_compile_tutorial.py b/intermediate_source/torch_compile_tutorial.py index ad1c5d41be9..fcea4ed6611 100644 --- a/intermediate_source/torch_compile_tutorial.py +++ b/intermediate_source/torch_compile_tutorial.py @@ -41,7 +41,7 @@ # Basic Usage # ------------ # -# ``torch.compile`` is included in the latest PyTorch nightlies. +# ``torch.compile`` is included in the latest PyTorch.. # Running TorchInductor on GPU requires Triton, which is included with the PyTorch 2.0 nightly # binary. If Triton is still missing, try installing ``torchtriton`` via pip # (``pip install torchtriton --extra-index-url "https://download.pytorch.org/whl/nightly/cu117"`` @@ -125,7 +125,7 @@ def init_model(): # First, let's compare inference. # # Note that in the call to ``torch.compile``, we have have the additional -# ``mode`` kwarg, which we will discuss below. +# ``mode`` argument, which we will discuss below. def evaluate(mod, inp): return mod(inp) @@ -184,7 +184,7 @@ def evaluate(mod, inp): # GPU compute and the observed speedup may be less significant. # # You may also see different speedup results depending on the chosen ``mode`` -# kwarg. Since our model and data are small, we want to reduce overhead as +# argument. Since our model and data are small, we want to reduce overhead as # much as possible, and so we chose ``"reduce-overhead"``. For your own models, # you may need to experiment with different modes to maximize speedup. You can # read more about modes `here `__. From 7a9632aac911a706798901781bee6871dea6aa0d Mon Sep 17 00:00:00 2001 From: Sam Baumann Date: Mon, 24 Apr 2023 11:08:57 -0400 Subject: [PATCH 010/609] modified some text for clarity (#2292) --- beginner_source/blitz/autograd_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beginner_source/blitz/autograd_tutorial.py b/beginner_source/blitz/autograd_tutorial.py index 5d468d387f6..3b33ffca9a8 100644 --- a/beginner_source/blitz/autograd_tutorial.py +++ b/beginner_source/blitz/autograd_tutorial.py @@ -40,7 +40,7 @@ shape (1,1000). .. note:: - This tutorial work only on CPU and will not work on GPU (even if tensors are moved to CUDA). + This tutorial works only on the CPU and will not work on GPU devices (even if tensors are moved to CUDA). """ import torch From 7939159f2d8352ae9a6e117fdbc4c67b650bb3b2 Mon Sep 17 00:00:00 2001 From: heloa-net <13964550+heloa-net@users.noreply.github.com> Date: Mon, 24 Apr 2023 11:57:15 -0600 Subject: [PATCH 011/609] Fix #1983 by moving input and model to device (#2249) Fixes #1983 Moved input to device on line `234` instead of `232` for readability. `y` on line `232` can't be moved to device due to being an `int` I've found out about this issue while following the tutorial from Quickstart and reaching "Optimization model parameters". I was iterating on the code started on Quickstart and the model had been sent to the device. [Optimization tutorial] (https://github.com/pytorch/tutorials/blob/main/beginner_source/basics/optimization_tutorial.py) doesn't move the model or input to the device and this is when I got the error described in pytorch#1983. In light of this, should optimization_tutorial.py also be updated? Test plan: * Tested in Colab using GPU, [link](https://colab.research.google.com/drive/1X7BJHd42FN9N_8DiXd7h4ScplN2-OmLo?usp=sharing) Co-authored-by: Svetlana Karslioglu --- beginner_source/basics/quickstart_tutorial.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/beginner_source/basics/quickstart_tutorial.py b/beginner_source/basics/quickstart_tutorial.py index de4a8b45437..2bb4622d4e4 100644 --- a/beginner_source/basics/quickstart_tutorial.py +++ b/beginner_source/basics/quickstart_tutorial.py @@ -215,7 +215,7 @@ def test(dataloader, model, loss_fn): # The process for loading a model includes re-creating the model structure and loading # the state dictionary into it. -model = NeuralNetwork() +model = NeuralNetwork().to(device) model.load_state_dict(torch.load("model.pth")) ############################################################# @@ -237,6 +237,7 @@ def test(dataloader, model, loss_fn): model.eval() x, y = test_data[0][0], test_data[0][1] with torch.no_grad(): + x = x.to(device) pred = model(x) predicted, actual = classes[pred[0].argmax(0)], classes[y] print(f'Predicted: "{predicted}", Actual: "{actual}"') From 3bb7d5b87a9fb9acb8fa0fc80f3363e72cf39add Mon Sep 17 00:00:00 2001 From: washing <41882421+washing1127@users.noreply.github.com> Date: Tue, 25 Apr 2023 23:32:27 +0800 Subject: [PATCH 012/609] Update autogradyt_tutorial.py (#2295) Add backslashes in front of `sin` and `cos` in mathematical formulas to normalize their formatting. --- beginner_source/introyt/autogradyt_tutorial.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/beginner_source/introyt/autogradyt_tutorial.py b/beginner_source/introyt/autogradyt_tutorial.py index 1ce49f3cd03..a2ed238e52b 100644 --- a/beginner_source/introyt/autogradyt_tutorial.py +++ b/beginner_source/introyt/autogradyt_tutorial.py @@ -153,7 +153,7 @@ ####################################################################### # This ``grad_fn`` gives us a hint that when we execute the # backpropagation step and compute gradients, we’ll need to compute the -# derivative of :math:`sin(x)` for all this tensor’s inputs. +# derivative of :math:`\sin(x)` for all this tensor’s inputs. # # Let’s perform some more computations: # @@ -222,8 +222,8 @@ # out = d.sum() # # Adding a constant, as we did to compute ``d``, does not change the -# derivative. That leaves :math:`c = 2 * b = 2 * sin(a)`, the derivative -# of which should be :math:`2 * cos(a)`. Looking at the graph above, +# derivative. That leaves :math:`c = 2 * b = 2 * \sin(a)`, the derivative +# of which should be :math:`2 * \cos(a)`. Looking at the graph above, # that’s just what we see. # # Be aware that only *leaf nodes* of the computation have their gradients From 490335258488c0bb67411657617a7b0c79a5385b Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 25 Apr 2023 11:45:11 -0700 Subject: [PATCH 013/609] Remove nvFuser intro tutorial (#2294) * Remove nvFuser intro tutorial * Add a redirect to homepage --- index.rst | 7 - intermediate_source/nvfuser_intro_tutorial.py | 687 ------------------ .../nvfuser_intro_tutorial.rst | 8 + 3 files changed, 8 insertions(+), 694 deletions(-) delete mode 100644 intermediate_source/nvfuser_intro_tutorial.py create mode 100644 intermediate_source/nvfuser_intro_tutorial.rst diff --git a/index.rst b/index.rst index 4fbc574b2f9..48550e6c4ac 100644 --- a/index.rst +++ b/index.rst @@ -543,13 +543,6 @@ What's new in PyTorch tutorials? :link: intermediate/torchserve_with_ipex_2 :tags: Model-Optimization,Production -.. customcarditem:: - :header: Introduction to nvFuser - :card_description: An introduction to nvFuser - :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: intermediate/nvfuser_intro_tutorial.html - :tags: Model-Optimization - .. customcarditem:: :header: Multi-Objective Neural Architecture Search with Ax :card_description: Learn how to use Ax to search over architectures find optimal tradeoffs between accuracy and latency. diff --git a/intermediate_source/nvfuser_intro_tutorial.py b/intermediate_source/nvfuser_intro_tutorial.py deleted file mode 100644 index 155c1471a72..00000000000 --- a/intermediate_source/nvfuser_intro_tutorial.py +++ /dev/null @@ -1,687 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Getting Started - Accelerate Your Scripts with nvFuser -**************************** - -**Authors**: `Christian Sarofeen `_ -`Piotr Bialecki `_ -`Kevin Stephano `_ -`Jie Jiang `_ -`Masaki Kozuki `_ -`Neal Vaidya` - - -Introduction ------------- - -This tutorial will demonstrate how you can accelerate your networks -with nvFuser. nvFuser is a Deep Learning Compiler that just-in-time -compiles fast and flexible GPU specific code to reliably accelerate -users' networks automatically, providing speedups for deep learning -networks running on Volta and later CUDA accelerators by generating -fast custom “fusion” kernels at runtime. nvFuser is specifically -designed to meet the unique requirements of the PyTorch community, -and it supports diverse network architectures and programs with -dynamic inputs of varying shapes and strides. - -Importing Packages and Selecting a Device ------------------------------------------ -In order to run this tutorial and see the benefits of using nvFuser, -you would need to install the `1.12.0` PyTorch release as well as -`functorch` `0.2` or newer version of them. `functorch` also needs -`networkx` for its smart recomputation heuristics which you can -install via `pip install networkx`. -Additionally, a GPU is required. -""" - -import torch -import torch.nn.functional as F -import functorch -from functorch.compile import memory_efficient_fusion -from copy import deepcopy -from typing import List -import time -import functools -import random - -random.seed(42) - -if torch.__version__ < (1, 12, 0): - raise RuntimeError( - "PyTorch >= 1.12.0 required, but your environment uses torch=={}".format( - torch.__version__ - ) - ) - -major, minor, _ = functorch.__version__.split(".") -if int(major) == 0 and int(minor) < 2: - raise RuntimeError( - "FuncTorch >= 0.2.0 required, but your environment uses functorch=={}".format( - functorch.__version__ - ) - ) - -###################################################################### -# The Transformer Block -# --------------------- -# The network topology we’re going to focus on is the Transformer -# Block for networks like BERT. As of writing this tutorial, nvFuser -# provides acceleration of pointwise, reduction, and normalization -# operations. These simple operations are the backbone of large -# networks, so improving the speed of these operations can improve -# overall network training speed. Future releases of nvFuser will -# improve the performance of Linear Layers, but for now we will -# specifically look at the ``Bias-Dropout-Add-LayerNorm`` section of this -# Transformer Block. -# -# .. figure:: /_static/img/nvfuser_intro/nvfuser_transformer_block.png -# -# First, let’s define the forward pass for this section of our network. -# For when we’ll use TorchScript on this function, we decorate the -# function with type information of the function parameters. This isn’t -# always required, but it can often help to provide this information to -# TorchScript because it is a strictly typed system. Since we have -# PyTorch’s autograd system, we don’t need to explicitly define the -# backwards pass. - - -def composite_definition( - input1: torch.Tensor, - input2: torch.Tensor, - weight: torch.Tensor, - bias1: torch.Tensor, - bias2: torch.Tensor, - normalization_axis: int, - dropout_prob: float, -) -> torch.Tensor: - bias1_out = input1 + bias1 - dropout_out = F.dropout(bias1_out, dropout_prob, training=True) - norm_input = dropout_out + input2 - norm_output = F.layer_norm( - norm_input, (input1.size(normalization_axis),), weight, bias2 - ) - return norm_output - - -###################################################################### -# Setup and Performance Metrics -# --------------------- -# Next, we initialize some inputs, parameters, and a simulated gradient -# output tensor for the backwards pass since we aren’t including a -# loss function. -# - - -# Setup initial tensors and parameters -input_size = [64, 128, 1024] -device = "cuda" -dtype = torch.float32 - -# Create sample inputs -input1 = torch.randn(*input_size, device=device, dtype=dtype, requires_grad=True) -input2 = torch.rand_like(input1).requires_grad_() - -# Precompute a grad output tensor, for this example it's the same size -# as the inputs -grad_output = torch.rand_like(input1) - -# Randomly initialize the model parameters -weight = torch.nn.Parameter(torch.randn(input_size[2], dtype=dtype, device=device)) -bias1 = torch.nn.Parameter(torch.randn(input_size[2], dtype=dtype, device=device)) -bias2 = torch.nn.Parameter(torch.randn(input_size[2], dtype=dtype, device=device)) - -parameters = [input1, input2, weight, bias1, bias2] - -###################################################################### -# To produce a baseline performance we will measure the speed of our -# forward and backward passes in PyTorch’s default eager mode. To get -# accurate and comparable measurements, we perform a few warm up -# iterations. Then, we time many iterations of the forward and backward -# pass using performance counters combined with proper GPU -# synchronization, then compute the average iterations per second. -# It’s important to be very careful when measuring performance on the -# GPU, as we want to remove any initialization costs and need -# synchronization since it’s an asynchronous device. Since we will -# measure many variations of this problem with and without nvFuser we -# define a helper method called `profile_workload` and will use -# `functool.partial` to concisely profile the workload. -# - -# Utility to profile the workload -def profile_workload(forward_func, grad_output, iteration_count=100, label=""): - # Perform warm-up iterations - for _ in range(3): - # Run model, forward and backward - output = forward_func() - output.backward(grad_output) - # delete gradients to avoid profiling the gradient accumulation - for p in parameters: - p.grad = None - - # Synchronize the GPU before starting the timer - torch.cuda.synchronize() - start = time.perf_counter() - for _ in range(iteration_count): - # Run model, forward and backward - output = forward_func() - output.backward(grad_output) - # delete gradients to avoid profiling the gradient accumulation - for p in parameters: - p.grad = None - - # Synchronize the GPU before stopping the timer - torch.cuda.synchronize() - stop = time.perf_counter() - iters_per_second = iteration_count / (stop - start) - if label: - print(label) - print("Average iterations per second: {:.2f}".format(iters_per_second)) - - -###################################################################### -# We can now measure a baseline performance of PyTorch’s eager mode -# (without nvFuser). -# - - -# Run and profile eager mode execution on the composite definition of our -# operations. -func = functools.partial( - composite_definition, - input1, - input2, - weight, - bias1, - bias2, - normalization_axis=2, - dropout_prob=0.1, -) -profile_workload( - func, grad_output, iteration_count=100, label="Eager Mode - Composite definition" -) - -###################################################################### -# It’s important for PyTorch and nvFuser to work well across diverse -# GPU architectures. For our measurements we’ve run this tutorial on -# five GPUs ranging from consumer to enterprise grade. Our baseline -# geometric mean (geomean) performance across these GPUs is 850 -# iterations per second, plotted in the figure below. -# -# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_0.png -# As we run different variations of this script with nvFuser, we will -# continue to add the results to this figure for the same GPUs. -# - -###################################################################### -# TorchScript & nvFuser -# --------------------- -# nvFuser is the default fusion system in TorchScript since PyTorch -# version 1.12, so to turn on nvFuser we need to enable TorchScript. -# This will allow nvFuser to automatically generate fast kernels and -# take over execution of these operations. TorchScript can be a -# challenging system to get working, but with our current definition -# of our operators, all we need to do is wrap our function in the -# `torch.jit.script` compile function. We can then simply run our -# workload as before. -# - -scripted_composite_definition = torch.jit.script(composite_definition) -func = functools.partial( - scripted_composite_definition, - input1, - input2, - weight, - bias1, - bias2, - normalization_axis=2, - dropout_prob=0.1, -) -profile_workload( - func, grad_output, iteration_count=100, label="TorchScript - Composite definition" -) - -###################################################################### -# Before we get to the results, it is important to mention here that -# nvFuser does not generate the exact same sequence of random numbers, -# as random number generation in PyTorch is dependent on the precise -# parallelization scheme used for the GPU function. Therefore, if you -# want to validate the output of nvFuser with the output without -# nvFuser, it would require disabling the random number generation -# functions. In this example, we would simply need to change -# `dropout_out = F.dropout(bias1_out, dropout_prob, training=True)` -# to -# `dropout_out = F.dropout(bias1_out, dropout_prob, training=False)` -# as the dropout function is the only function in this example that -# depends on random number generation. -# -# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_1.png -# -# Our geomean performance with nvFuser is 1,394 images per second -# which is a geomean of 1.64x faster than eager mode. We did not -# include the time that TorchScript and nvFuser take to compile the -# program and GPU functions. For real end-to-end training the -# compile time of TorchScript and nvFuser are negligible. For -# example, in this tutorial the combination of TorchScript and -# nvFuser took around 2.4s in total to compile these high speed -# GPU functions. -# -# nvFuser's capabilities extend well beyond this initial performance gain. -# - -###################################################################### -# nvFuser & Dynamic Shapes -# --------------------- -# It is challenging for Deep Learning Compilers to provide performance -# gains when the user changes the input sizes of the tensors. However, -# supporting changing shapes has always been a fundamental design -# criteria for nvFuser, as processing different-sized input tensors is -# critical to many applications like Natural Language Processing and -# Graph Neural Networks. -# -# To use nvFuser on inputs that change shape from iteration, we -# generate new input and output gradient tensors and make a few -# different sizes. Since the last dimension is shared with the -# parameters and cannot be changed dynamically in ``LayerNorm``, we -# perturb the first two dimensions of the input and gradient tensors. -# - -SHAPE_COUNT = 20 -dynamic_sizes = deepcopy(input_size) - -inputs1: List[torch.Tensor] = [] -inputs2: List[torch.Tensor] = [] -grad_outputs: List[torch.Tensor] = [] - - -# Create some random shapes -for _ in range(SHAPE_COUNT): - dynamic_sizes[0] = input_size[0] + random.randrange(-2, 3) - dynamic_sizes[1] = input_size[1] + random.randrange(-2, 3) - input = torch.randn(*dynamic_sizes, device=device, dtype=dtype, requires_grad=True) - inputs1.append(input) - inputs2.append(torch.rand_like(input)) - grad_outputs.append(torch.rand_like(input)) - -###################################################################### -# No changes from before are required for running with TorchScript, we -# simply reuse the previous definition that we wrapped in -# `torch.jit.script`. -# -# We’ll start as usual by performing some warm-up iterations, however -# we won’t show nvFuser all of the input sizes, we’ll only show one -# size for the warm-up. -# - -# Perform warm-up iterations -for _ in range(3): - dynamic_input1 = inputs1[0] - dynamic_input2 = inputs2[0] - dynamic_grad_output = grad_outputs[0] - # Run model, forward and backward - output = scripted_composite_definition( - dynamic_input1, - dynamic_input2, - weight, - bias1, - bias2, - normalization_axis=2, - dropout_prob=0.1, - ) - output.backward(dynamic_grad_output) - -###################################################################### -# Now, we can measure the performance metrics of nvFuser as we have -# previously. -# - -# Profile manually as our helper function expects static inputs -iteration_count = 100 -# Synchronize the GPU before starting the timer -torch.cuda.synchronize() -start = time.perf_counter() -for i in range(iteration_count): - dynamic_input1 = inputs1[i % SHAPE_COUNT] - dynamic_input2 = inputs2[i % SHAPE_COUNT] - dynamic_grad_output = grad_outputs[i % SHAPE_COUNT] - dynamic_parameters = [dynamic_input1, dynamic_input2, weight, bias1, bias2] - - # Run model, forward and backward - output = scripted_composite_definition( - dynamic_input1, - dynamic_input2, - weight, - bias1, - bias2, - normalization_axis=2, - dropout_prob=0.1, - ) - output.backward(dynamic_grad_output) - # Delete the gradients to avoid profiling the gradient accumulation - for p in dynamic_parameters: - p.grad = None - -# Synchronize the GPU before stopping the timer -torch.cuda.synchronize() -stop = time.perf_counter() -iters_per_second = iteration_count / (stop - start) -print("TorchScript - Random Sizes") -print("Average iterations per second: {:.2f}".format(iters_per_second)) - -###################################################################### -# Performance across our GPUs is very similar to the previous -# performance seen. Only the performance of the A100 degraded -# slightly, but is still much higher than without nvFuser. The small -# change in performance of the A100 is actually related to the -# additional CPU overhead that dynamic shapes cause in nvFuser. -# nvFuser at runtime has to infer how to run the different sized -# kernels, so additional CPU time is consumed. This CPU time is -# present with all GPUs, but since the A100 runs its functions so fast -# this CPU overhead cannot be fully hidden by the asynchronous nature -# of GPU execution. -# -# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_2.png -# -# .. note:: Today, nvFuser in TorchScript is the only exposure of -# nvFuser that allows for dynamic shape changes, although we will -# expand this capability to other systems in the future. For more -# insight into how dynamic shapes are implemented in nvFuser, you can -# view this presentation from GTC 2021: -# https://www.nvidia.com/en-us/on-demand/session/gtcspring21-s31952/ -# - -###################################################################### -# Defining novel operations with nvFuser and functorch -# ---------------------------------------------------- -# -# One of the primary benefits of nvFuser is the ability to define -# novel operations composed of PyTorch “primitives” which are then -# just-in-time compiled into efficient kernels. -# -# PyTorch has strong performance for any individual operation, -# especially composite operations like ``LayerNorm``. However, if -# ``LayerNorm`` wasn’t already implemented in PyTorch as a composite -# operation, then you’d have to define it as a series of simpler -# (primitive) operations. Let’s make such a definition and run it -# without nvFuser. -# - - -def primitive_definition( - input1: torch.Tensor, - input2: torch.Tensor, - weight: torch.Tensor, - bias1: torch.Tensor, - bias2: torch.Tensor, - normalization_axis: int, - dropout_prob: float, - keepdim: bool, -) -> torch.Tensor: - bias1_out = input1 + bias1 - dropout_out = F.dropout(bias1_out, dropout_prob, training=True) - norm_input = dropout_out + input2 - mean = norm_input.mean(normalization_axis, keepdim=keepdim) - diff = norm_input - mean - diff_sq = diff * diff - var = diff_sq.mean(normalization_axis, keepdim=keepdim) - pre_shift_scale_norm_output = (norm_input - mean) / torch.sqrt(var + 1e-12) - norm_output = weight * pre_shift_scale_norm_output + bias2 - return norm_output - - -# Profile primitive definition -func = functools.partial( - primitive_definition, - input1, - input2, - weight, - bias1, - bias2, - normalization_axis=2, - dropout_prob=0.1, - keepdim=True, -) -profile_workload( - func, grad_output, iteration_count=100, label="Eager Mode - Primitive Definition" -) - -###################################################################### -# While the above is mathematically equivalent to our previous -# definition, benchmarking our new function with the original static -# shape using TorchScript and nvFuser shows the iterations per second -# decreases – mostly due to the cost of accessing memory to save -# intermediate results. -# -# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_3.png -# -# The geomean iterations per second is 260 iterations per second, -# 3.26x slower than the composite definition in eager mode and 5.35x -# slower than the nvFuser composite operation! For more information on -# why there’s such a drastic decrease in compute speed please see this -# presentation from GTC 2022: -# https://www.nvidia.com/en-us/on-demand/session/gtcspring22-s41958/ -# -# nvFuser with TorchScript can improve the performance of this -# operation even though it’s defined with primitive PyTorch -# operations. Simply by enabling TorchScript on the new function -# (just like before), we can see much of the performance returns. -# - -# Profile scripted primitive definition -scripted_primitive_definition = torch.jit.script(primitive_definition) -func = functools.partial( - scripted_primitive_definition, - input1, - input2, - weight, - bias1, - bias2, - normalization_axis=2, - dropout_prob=0.1, - keepdim=True, -) -profile_workload( - func, grad_output, iteration_count=100, label="TorchScript - Primitive definition" -) - -###################################################################### -# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_4.png -# -# However, the performance is still slower than the original eager -# mode performance of the composite definition. TorchScript works well -# when predefined composite operations are used, however TorchScript -# application of Autograd saves all of the activations for each -# operator in the fusion for re-use in the backwards pass. However, -# this is not typically the optimal choice. Especially when chaining -# together multiple simple operations, it is often much faster to -# recompute some intermediate tensors rather than spend the time -# storing and retrieving several saved results from memory. -# -# It’s possible to optimize away many of these unnecessary memory -# accesses, but it requires building a connected forward and backward -# graph which isn’t possible with TorchScript. The -# ``memory_efficient_fusion`` pass in functorch, however, is such an -# optimization pass. To use this pass, we have to redefine our -# function to pull the constants inside (for now it’s easiest to make -# non-tensor constants literals in the function definition): -# - - -def primitive_definition_for_memory_efficient_fusion( - input1: torch.Tensor, - input2: torch.Tensor, - weight: torch.Tensor, - bias1: torch.Tensor, - bias2: torch.Tensor, -) -> torch.Tensor: - bias1_out = input1 + bias1 - dropout_out = F.dropout(bias1_out, 0.1, training=True) - norm_input = dropout_out + input2 - mean = norm_input.mean(2, keepdim=True) - diff = norm_input - mean - diff_sq = diff * diff - var = diff_sq.mean(2, keepdim=True) - pre_shift_scale_norm_output = (norm_input - mean) / torch.sqrt(var + 1e-12) - norm_output = weight * pre_shift_scale_norm_output + bias2 - return norm_output - - -###################################################################### -# Now, instead of passing our function to TorchScript, we will pass it -# to functorch optimization pass. -# - - -# Optimize the model with functorch tracing and the memory efficiency -# optimization pass -memory_efficient_primitive_definition = memory_efficient_fusion( - primitive_definition_for_memory_efficient_fusion -) - -# Profile memory efficient primitive definition -func = functools.partial( - memory_efficient_primitive_definition, input1, input2, weight, bias1, bias2 -) -profile_workload( - func, - grad_output, - iteration_count=100, - label="FuncTorch - Primitive definition", -) - -###################################################################### -# This recovers even more speed, but it’s still not as fast as -# TorchScript original performance with the composite definition. -# However, this is still faster than running this new definition -# without nvFuser, and is still faster than the composite definition -# without nvFuser. -# -# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_5.png -# -# .. note:: The functorch memory efficient pass is experimental and still -# actively in development. -# Future versions of the API are expected to achieve performance -# closer to that of TorchScript with the composite definition. -# -# .. note:: The functorch memory efficient pass specializes on the shapes of -# the inputs to the function. If new inputs are provided with -# different shapes, then you need to construct a new function -# using ``memory_efficient_fusion`` and apply it to the new inputs. - - -###################################################################### -# Transformer Block With a Novel Normalization -# ---------------------------------------------------- -# The ability to quickly execute chains of simple operations is -# important as not every operation has a composite operation defined -# in PyTorch. Previously, this meant researchers either had to define -# an entirely new operation in PyTorch – which takes a lot of time and -# knowledge of the lower-level PyTorch code as well as parallel -# programming – or writing the operation in simpler PyTorch ops and -# settling for poor performance. For example, let's replace ``LayerNorm`` -# in our example with ``RMSNorm``. Even though ``RMSNorm`` is a bit simpler -# than ``LayerNorm``, it doesn’t have an existing compound operation in -# PyTorch. See the `Root Mean Square Layer Normalization `__ paper for more information about ``RMSNorm``. -# As before, we’ll define our new transformer block with -# primitive PyTorch operations. -# - - -def with_rms_norm( - input1: torch.Tensor, - input2: torch.Tensor, - weight: torch.Tensor, - bias: torch.Tensor, - normalization_axis: int, - dropout_prob: float, - keepdim: bool, -) -> torch.Tensor: - bias_out = input1 + bias - dropout_out = F.dropout(bias_out, dropout_prob, training=True) - norm_input = dropout_out + input2 - var = norm_input.mul(norm_input).mean(normalization_axis, keepdim) - pre_shift_scale_norm_output = norm_input / torch.sqrt(var + 1e-12) - norm_output = weight * pre_shift_scale_norm_output - return norm_output - - -###################################################################### -# As before, we’ll get a baseline by running PyTorch without nvFuser. -# - -# Profile ``rms_norm`` -func = functools.partial( - with_rms_norm, - input1, - input2, - weight, - bias1, - normalization_axis=2, - dropout_prob=0.1, - keepdim=True, -) -profile_workload(func, grad_output, iteration_count=100, label="Eager Mode - RMS Norm") - -###################################################################### -# With nvFuser through TorchScript. -# - -# Profile scripted ``rms_norm`` -scripted_with_rms_norm = torch.jit.script(with_rms_norm) -func = functools.partial( - scripted_with_rms_norm, - input1, - input2, - weight, - bias1, - normalization_axis=2, - dropout_prob=0.1, - keepdim=True, -) -profile_workload(func, grad_output, iteration_count=100, label="TorchScript - RMS Norm") - -###################################################################### -# With nvFuser through Functorch. -# - - -def with_rms_norm_for_memory_efficient_fusion( - input1: torch.Tensor, input2: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor -) -> torch.Tensor: - bias_out = input1 + bias - dropout_out = torch.nn.functional.dropout(bias_out, 0.1) - norm_input = dropout_out + input2 - var = norm_input.mul(norm_input).mean(2, keepdim=True) - pre_shift_scale_norm_output = norm_input / torch.sqrt(var + 1e-12) - norm_output = weight * pre_shift_scale_norm_output - return norm_output - - -# Profile memory efficient ``rms_norm`` -memory_efficient_rms_norm = memory_efficient_fusion( - with_rms_norm_for_memory_efficient_fusion -) -func = functools.partial(memory_efficient_rms_norm, input1, input2, weight, bias1) -profile_workload(func, grad_output, iteration_count=100, label="FuncTorch - RMS Norm") - -###################################################################### -# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_6.png -# -# Since ``RMSNorm`` is simpler than ``LayerNorm`` the performance of our new -# transformer block is a little higher than the primitive definition -# without nvFuser (354 iterations per second compared with 260 -# iterations per second). With TorchScript, the iterations per second -# increases by 2.68x and 3.36x to 952 iterations per second and 1,191 -# iterations per second with TorchScript and functorch memory -# efficient optimization pass, respectively. The performance of this -# new operation nearly matches the performance of the composite Layer -# Norm definition with TorchScript. -# -# nvFuser is here to provide the ability to define novel operations in -# simple PyTorch and get performance that’s close to a highly optimized -# composite operation in PyTorch. We believe this will enable research -# into novel network topologies without paying for sometimes devastating -# effects on speed of training. nvFuser provides this unique ability as -# it’s able to analyze users’ programs to provide performance as fast as a -# highly hand tuned implementation, regardless of how the operations are -# defined. nvFuser still cannot support every operation in PyTorch, -# however its capabilities will continue to grow over time. diff --git a/intermediate_source/nvfuser_intro_tutorial.rst b/intermediate_source/nvfuser_intro_tutorial.rst new file mode 100644 index 00000000000..965500d71ee --- /dev/null +++ b/intermediate_source/nvfuser_intro_tutorial.rst @@ -0,0 +1,8 @@ +Getting Started - Accelerate Your Scripts with nvFuser +====================================================== + +This tutorial has been deprecated. Redirecting to homepage in 3 seconds... + +.. raw:: html + + From 339bf13464457efe417b8d23f506ce575925fbc7 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 25 Apr 2023 13:18:44 -0700 Subject: [PATCH 014/609] Pyspelling: Advanced Python tutorials (#2293) * Pyspelling: Advanced Python tutorials --- .pyspelling.yml | 1 + advanced_source/ddp_pipeline.py | 37 ++++++++++--------- .../dynamic_quantization_tutorial.py | 10 ++--- advanced_source/neural_style_tutorial.py | 37 ++++++++++--------- advanced_source/numpy_extensions_tutorial.py | 8 ++-- .../super_resolution_with_onnxruntime.py | 20 +++++----- en-wordlist.txt | 15 ++++++++ 7 files changed, 74 insertions(+), 54 deletions(-) diff --git a/.pyspelling.yml b/.pyspelling.yml index ffe9f469d03..9c9b18800cc 100644 --- a/.pyspelling.yml +++ b/.pyspelling.yml @@ -4,6 +4,7 @@ matrix: sources: - beginner_source/*.py - intermediate_source/*.py + - advanced_source/*.py dictionary: wordlists: - en-wordlist.txt diff --git a/advanced_source/ddp_pipeline.py b/advanced_source/ddp_pipeline.py index 67040532194..1eb956a7836 100644 --- a/advanced_source/ddp_pipeline.py +++ b/advanced_source/ddp_pipeline.py @@ -75,7 +75,7 @@ def forward(self, x): # As a result, our focus is on ``nn.TransformerEncoder`` and we split the model # such that half of the ``nn.TransformerEncoderLayer`` are on one GPU and the # other half are on another. To do this, we pull out the ``Encoder`` and -# ``Decoder`` sections into seperate modules and then build an nn.Sequential +# ``Decoder`` sections into separate modules and then build an ``nn.Sequential`` # representing the original Transformer module. @@ -151,16 +151,17 @@ def run_worker(rank, world_size): # length 6: # # .. math:: -# \begin{bmatrix} -# \text{A} & \text{B} & \text{C} & \ldots & \text{X} & \text{Y} & \text{Z} -# \end{bmatrix} -# \Rightarrow -# \begin{bmatrix} -# \begin{bmatrix}\text{A} \\ \text{B} \\ \text{C} \\ \text{D} \\ \text{E} \\ \text{F}\end{bmatrix} & -# \begin{bmatrix}\text{G} \\ \text{H} \\ \text{I} \\ \text{J} \\ \text{K} \\ \text{L}\end{bmatrix} & -# \begin{bmatrix}\text{M} \\ \text{N} \\ \text{O} \\ \text{P} \\ \text{Q} \\ \text{R}\end{bmatrix} & -# \begin{bmatrix}\text{S} \\ \text{T} \\ \text{U} \\ \text{V} \\ \text{W} \\ \text{X}\end{bmatrix} -# \end{bmatrix} +# +# \begin{bmatrix} +# \text{A} & \text{B} & \text{C} & \ldots & \text{X} & \text{Y} & \text{Z} +# \end{bmatrix} +# \Rightarrow +# \begin{bmatrix} +# \begin{bmatrix}\text{A} \\ \text{B} \\ \text{C} \\ \text{D} \\ \text{E} \\ \text{F}\end{bmatrix} & +# \begin{bmatrix}\text{G} \\ \text{H} \\ \text{I} \\ \text{J} \\ \text{K} \\ \text{L}\end{bmatrix} & +# \begin{bmatrix}\text{M} \\ \text{N} \\ \text{O} \\ \text{P} \\ \text{Q} \\ \text{R}\end{bmatrix} & +# \begin{bmatrix}\text{S} \\ \text{T} \\ \text{U} \\ \text{V} \\ \text{W} \\ \text{X}\end{bmatrix} +# \end{bmatrix} # # These columns are treated as independent by the model, which means that # the dependence of ``G`` and ``F`` can not be learned, but allows more @@ -192,11 +193,11 @@ def data_process(raw_text_iter): device = torch.device(2 * rank) def batchify(data, bsz, rank, world_size, is_train=False): - # Divide the dataset into bsz parts. + # Divide the dataset into ``bsz`` parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) - # Evenly divide the data across the bsz batches. + # Evenly divide the data across the ``bsz`` batches. data = data.view(bsz, -1).t().contiguous() # Divide the data across the ranks only for training data. if is_train: @@ -261,14 +262,14 @@ def get_batch(source, i): # # The pipeline is then initialized with 8 transformer layers on one GPU and 8 # transformer layers on the other GPU. One pipe is setup across GPUs 0 and 1 and -# another across GPUs 2 and 3. Both pipes are then replicated using DistributedDataParallel. +# another across GPUs 2 and 3. Both pipes are then replicated using ``DistributedDataParallel``. # In 'run_worker' ntokens = len(vocab) # the size of vocabulary emsize = 4096 # embedding dimension - nhid = 4096 # the dimension of the feedforward network model in nn.TransformerEncoder - nlayers = 8 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder - nhead = 16 # the number of heads in the multiheadattention models + nhid = 4096 # the dimension of the feedforward network model in ``nn.TransformerEncoder`` + nlayers = 8 # the number of ``nn.TransformerEncoderLayer`` in ``nn.TransformerEncoder`` + nhead = 16 # the number of heads in the Multihead Attention models dropout = 0.2 # the dropout value from torch.distributed import rpc @@ -287,7 +288,7 @@ def get_batch(source, i): ) ) - # Num gpus for model parallelism. + # Number of GPUs for model parallelism. num_gpus = 2 partition_len = ((nlayers - 1) // num_gpus) + 1 diff --git a/advanced_source/dynamic_quantization_tutorial.py b/advanced_source/dynamic_quantization_tutorial.py index 571c0e4a831..9cc07a1d956 100644 --- a/advanced_source/dynamic_quantization_tutorial.py +++ b/advanced_source/dynamic_quantization_tutorial.py @@ -130,12 +130,12 @@ def tokenize(self, path): corpus = Corpus(model_data_filepath + 'wikitext-2') ###################################################################### -# 3. Load the pre-trained model +# 3. Load the pretrained model # ----------------------------- # # This is a tutorial on dynamic quantization, a quantization technique # that is applied after a model has been trained. Therefore, we'll simply load some -# pre-trained weights into this model architecture; these weights were obtained +# pretrained weights into this model architecture; these weights were obtained # by training for five epochs using the default settings in the word language model # example. @@ -159,7 +159,7 @@ def tokenize(self, path): print(model) ###################################################################### -# Now let's generate some text to ensure that the pre-trained model is working +# Now let's generate some text to ensure that the pretrained model is working # properly - similarly to before, we follow # `here `_ @@ -200,11 +200,11 @@ def tokenize(self, path): # create test data set def batchify(data, bsz): - # Work out how cleanly we can divide the dataset into bsz parts. + # Work out how cleanly we can divide the dataset into ``bsz`` parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) - # Evenly divide the data across the bsz batches. + # Evenly divide the data across the ``bsz`` batches. return data.view(bsz, -1).t().contiguous() test_data = batchify(corpus.test, eval_batch_size) diff --git a/advanced_source/neural_style_tutorial.py b/advanced_source/neural_style_tutorial.py index 099cb330859..3d84fc508bc 100644 --- a/advanced_source/neural_style_tutorial.py +++ b/advanced_source/neural_style_tutorial.py @@ -44,7 +44,7 @@ # - ``PIL``, ``PIL.Image``, ``matplotlib.pyplot`` (load and display # images) # - ``torchvision.transforms`` (transform PIL images into tensors) -# - ``torchvision.models`` (train or load pre-trained models) +# - ``torchvision.models`` (train or load pretrained models) # - ``copy`` (to deep copy the models; system package) from __future__ import print_function @@ -84,7 +84,7 @@ # torch library are trained with tensor values ranging from 0 to 1. If you # try to feed the networks with 0 to 255 tensor images, then the activated # feature maps will be unable to sense the intended content and style. -# However, pre-trained networks from the Caffe library are trained with 0 +# However, pretrained networks from the Caffe library are trained with 0 # to 255 tensor images. # # @@ -96,7 +96,7 @@ # with name ``images`` in your current working directory. # desired size of the output image -imsize = 512 if torch.cuda.is_available() else 128 # use small size if no gpu +imsize = 512 if torch.cuda.is_available() else 128 # use small size if no GPU loader = transforms.Compose([ transforms.Resize(imsize), # scale imported image @@ -220,7 +220,7 @@ def gram_matrix(input): # b=number of feature maps # (c,d)=dimensions of a f. map (N=c*d) - features = input.view(a * b, c * d) # resise F_XL into \hat F_XL + features = input.view(a * b, c * d) # resize F_XL into \hat F_XL G = torch.mm(features, features.t()) # compute the gram product @@ -251,7 +251,7 @@ def forward(self, input): # Importing the Model # ------------------- # -# Now we need to import a pre-trained neural network. We will use a 19 +# Now we need to import a pretrained neural network. We will use a 19 # layer VGG network like the one used in the paper. # # PyTorch’s implementation of VGG is a module divided into two child @@ -277,7 +277,7 @@ def forward(self, input): cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device) # create a module to normalize input image so we can easily put it in a -# nn.Sequential +# ``nn.Sequential`` class Normalization(nn.Module): def __init__(self, mean, std): super(Normalization, self).__init__() @@ -288,14 +288,14 @@ def __init__(self, mean, std): self.std = torch.tensor(std).view(-1, 1, 1) def forward(self, img): - # normalize img + # normalize ``img`` return (img - self.mean) / self.std ###################################################################### # A ``Sequential`` module contains an ordered list of child modules. For -# instance, ``vgg19.features`` contains a sequence (Conv2d, ReLU, MaxPool2d, -# Conv2d, ReLU…) aligned in the right order of depth. We need to add our +# instance, ``vgg19.features`` contains a sequence (``Conv2d``, ``ReLU``, ``MaxPool2d``, +# ``Conv2d``, ``ReLU``…) aligned in the right order of depth. We need to add our # content loss and style loss layers immediately after the convolution # layer they are detecting. To do this we must create a new ``Sequential`` # module that has content loss and style loss modules correctly inserted. @@ -312,12 +312,12 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std, # normalization module normalization = Normalization(normalization_mean, normalization_std).to(device) - # just in order to have an iterable access to or list of content/syle + # just in order to have an iterable access to or list of content/style # losses content_losses = [] style_losses = [] - # assuming that cnn is a nn.Sequential, so we make a new nn.Sequential + # assuming that ``cnn`` is a ``nn.Sequential``, so we make a new ``nn.Sequential`` # to put in modules that are supposed to be activated sequentially model = nn.Sequential(normalization) @@ -328,8 +328,8 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std, name = 'conv_{}'.format(i) elif isinstance(layer, nn.ReLU): name = 'relu_{}'.format(i) - # The in-place version doesn't play very nicely with the ContentLoss - # and StyleLoss we insert below. So we replace with out-of-place + # The in-place version doesn't play very nicely with the ``ContentLoss`` + # and ``StyleLoss`` we insert below. So we replace with out-of-place # ones here. layer = nn.ReLU(inplace=False) elif isinstance(layer, nn.MaxPool2d): @@ -371,8 +371,11 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std, # input_img = content_img.clone() -# if you want to use white noise instead uncomment the below line: -# input_img = torch.randn(content_img.data.size(), device=device) +# if you want to use white noise by using the following code: +# +# :: +# +# input_img = torch.randn(content_img.data.size(), device=device) # add the original input image to the figure: plt.figure() @@ -385,7 +388,7 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std, # # As Leon Gatys, the author of the algorithm, suggested `here `__, we will use # L-BFGS algorithm to run our gradient descent. Unlike training a network, -# we want to train the input image in order to minimise the content/style +# we want to train the input image in order to minimize the content/style # losses. We will create a PyTorch L-BFGS optimizer ``optim.LBFGS`` and pass # our image to it as the tensor to optimize. # @@ -400,7 +403,7 @@ def get_input_optimizer(input_img): # Finally, we must define a function that performs the neural transfer. For # each iteration of the networks, it is fed an updated input and computes # new losses. We will run the ``backward`` methods of each loss module to -# dynamicaly compute their gradients. The optimizer requires a “closure” +# dynamically compute their gradients. The optimizer requires a “closure” # function, which reevaluates the module and returns the loss. # # We still have one final constraint to address. The network may try to diff --git a/advanced_source/numpy_extensions_tutorial.py b/advanced_source/numpy_extensions_tutorial.py index afc9a118c30..8ccd92d3765 100644 --- a/advanced_source/numpy_extensions_tutorial.py +++ b/advanced_source/numpy_extensions_tutorial.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -Creating Extensions Using numpy and scipy +Creating Extensions Using NumPy and SciPy ========================================= **Author**: `Adam Paszke `_ @@ -27,7 +27,7 @@ # This layer doesn’t particularly do anything useful or mathematically # correct. # -# It is aptly named BadFFTFunction +# It is aptly named ``BadFFTFunction`` # # **Layer Implementation** @@ -48,7 +48,7 @@ def backward(ctx, grad_output): return grad_output.new(result) # since this layer does not have any parameters, we can -# simply declare this as a function, rather than as an nn.Module class +# simply declare this as a function, rather than as an ``nn.Module`` class def incorrect_fft(input): @@ -75,7 +75,7 @@ def incorrect_fft(input): # Implementation of a layer with learnable weights, where cross-correlation # has a filter (kernel) that represents weights. # -# The backward pass computes the gradient wrt the input and the gradient wrt the filter. +# The backward pass computes the gradient ``wrt`` the input and the gradient ``wrt`` the filter. from numpy import flip import numpy as np diff --git a/advanced_source/super_resolution_with_onnxruntime.py b/advanced_source/super_resolution_with_onnxruntime.py index 91dfc806398..eb184e85109 100644 --- a/advanced_source/super_resolution_with_onnxruntime.py +++ b/advanced_source/super_resolution_with_onnxruntime.py @@ -37,12 +37,12 @@ # and is widely used in image processing or video editing. For this # tutorial, we will use a small super-resolution model. # -# First, let's create a SuperResolution model in PyTorch. +# First, let's create a ``SuperResolution`` model in PyTorch. # This model uses the efficient sub-pixel convolution layer described in # `"Real-Time Single Image and Video Super-Resolution Using an Efficient # Sub-Pixel Convolutional Neural Network" - Shi et al `__ # for increasing the resolution of an image by an upscale factor. -# The model expects the Y component of the YCbCr of an image as an input, and +# The model expects the Y component of the ``YCbCr`` of an image as an input, and # outputs the upscaled Y component in super resolution. # # `The @@ -87,7 +87,7 @@ def _initialize_weights(self): ###################################################################### # Ordinarily, you would now train this model; however, for this tutorial, -# we will instead download some pre-trained weights. Note that this model +# we will instead download some pretrained weights. Note that this model # was not trained fully for good accuracy and is used here for # demonstration purposes only. # @@ -154,9 +154,9 @@ def _initialize_weights(self): # the same values when run in ONNX Runtime. # # But before verifying the model's output with ONNX Runtime, we will check -# the ONNX model with ONNX's API. +# the ONNX model with ONNX API. # First, ``onnx.load("super_resolution.onnx")`` will load the saved model and -# will output a onnx.ModelProto structure (a top-level file/container format for bundling a ML model. +# will output a ``onnx.ModelProto`` structure (a top-level file/container format for bundling a ML model. # For more information `onnx.proto documentation `__.). # Then, ``onnx.checker.check_model(onnx_model)`` will verify the model's structure # and confirm that the model has a valid schema. @@ -181,7 +181,7 @@ def _initialize_weights(self): # In order to run the model with ONNX Runtime, we need to create an # inference session for the model with the chosen configuration # parameters (here we use the default config). -# Once the session is created, we evaluate the model using the run() api. +# Once the session is created, we evaluate the model using the run() API. # The output of this call is a list containing the outputs of the model # computed by ONNX Runtime. # @@ -205,7 +205,7 @@ def to_numpy(tensor): ###################################################################### # We should see that the output of PyTorch and ONNX Runtime runs match -# numerically with the given precision (rtol=1e-03 and atol=1e-05). +# numerically with the given precision (``rtol=1e-03`` and ``atol=1e-05``). # As a side-note, if they do not match then there is an issue in the # ONNX exporter, so please contact us in that case. # @@ -230,13 +230,13 @@ def to_numpy(tensor): # ###################################################################### -# First, let's load the image, pre-process it using standard PIL +# First, let's load the image, preprocess it using standard PIL # python library. Note that this preprocessing is the standard practice of # processing data for training/testing neural networks. # # We first resize the image to fit the size of the model's input (224x224). # Then we split the image into its Y, Cb, and Cr components. -# These components represent a greyscale image (Y), and +# These components represent a grayscale image (Y), and # the blue-difference (Cb) and red-difference (Cr) chroma components. # The Y component being more sensitive to the human eye, we are # interested in this component which we will be transforming. @@ -262,7 +262,7 @@ def to_numpy(tensor): ###################################################################### # Now, as a next step, let's take the tensor representing the -# greyscale resized cat image and run the super-resolution model in +# grayscale resized cat image and run the super-resolution model in # ONNX Runtime as explained previously. # diff --git a/en-wordlist.txt b/en-wordlist.txt index 025098fd7ee..0b7a5417953 100644 --- a/en-wordlist.txt +++ b/en-wordlist.txt @@ -3,10 +3,12 @@ ATen Args Autograd BCE +BFGS BN BOS Bahdanau BatchNorm +Bethge CHW CIFAR CLS @@ -14,6 +16,7 @@ CNNDM CNNs CPUs CUDA +Caffe CartPole Cayley Chatbots @@ -33,6 +36,7 @@ DeiT DenseNet EOS EPS +Ecker FC FGSM FLAVA @@ -45,11 +49,13 @@ GAE GAN GANs GLOO +GPT GPU's GPUs GRU GRUs GTC +Gatys GeForce Goodfellow Goodfellow’s @@ -93,7 +99,9 @@ NeurIPS NumPy Numericalization Numpy's +ONNX OpenAI +PIL PPO Plotly Prec @@ -108,11 +116,13 @@ RTX Radford ReLU ResNet +Runtime's SDPA SGD SPD SST2 STN +SciPy Sequentials Sigmoid SoTA @@ -130,6 +140,7 @@ TorchX Tunable UI Unescape +VGG VQA VS Code Wikitext @@ -163,6 +174,7 @@ cardinality chatbot chatbot's checkpointing +chroma colorbar compilable composable @@ -219,6 +231,7 @@ hvp hyperparameter hyperparameters imagenet +inferencing initializations inlined interpretable @@ -333,6 +346,7 @@ timesteps tokenization tokenize tokenizer +tokenizes tooltip topologies torchaudio @@ -355,6 +369,7 @@ unparametrized unpickling unpruned updation +upscaled utils vectorization vectorize From fa4866ece7aa20c4c6cd7ee3bb8ac00ff1474623 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Tue, 25 Apr 2023 20:34:49 +0000 Subject: [PATCH 015/609] Remove `updation` from wordlist --- en-wordlist.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/en-wordlist.txt b/en-wordlist.txt index 0b7a5417953..fdf5df67d8d 100644 --- a/en-wordlist.txt +++ b/en-wordlist.txt @@ -368,7 +368,6 @@ unoptimized unparametrized unpickling unpruned -updation upscaled utils vectorization From 417d1af18fdf001086c98414e95313e6c93fa78b Mon Sep 17 00:00:00 2001 From: Remi Delbouys Date: Wed, 26 Apr 2023 17:06:52 +0200 Subject: [PATCH 016/609] Fix image URL in tutorial Intro_to_TorchScript (#2299) Fix url to this GIF: https://raw.githubusercontent.com/pytorch/pytorch/main/docs/source/_static/img/dynamic_graph.gif Currently, the image is not shown in the tutorial. --- beginner_source/Intro_to_TorchScript_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beginner_source/Intro_to_TorchScript_tutorial.py b/beginner_source/Intro_to_TorchScript_tutorial.py index 063abd442d1..d369c4fbf80 100644 --- a/beginner_source/Intro_to_TorchScript_tutorial.py +++ b/beginner_source/Intro_to_TorchScript_tutorial.py @@ -158,7 +158,7 @@ def forward(self, x, h): # have to explicitly define derivatives for all constructs in the # language. # -# .. figure:: https://github.com/pytorch/pytorch/raw/master/docs/source/_static/img/dynamic_graph.gif +# .. figure:: https://github.com/pytorch/pytorch/raw/main/docs/source/_static/img/dynamic_graph.gif # :alt: How autograd works # # How autograd works From 35125b1feb32269309dcca749a134673bf2f3514 Mon Sep 17 00:00:00 2001 From: clee2000 <44682903+clee2000@users.noreply.github.com> Date: Wed, 26 Apr 2023 14:59:17 -0700 Subject: [PATCH 017/609] switch to v100 (#2301) --- .circleci/config.yml | 4 ++-- .circleci/regenerate.py | 8 ++++---- .jenkins/get_files_to_run.py | 4 ++-- .jenkins/metadata.json | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 38bbfb83818..78e1e9b117f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -204,7 +204,7 @@ jobs: resource_class: gpu.nvidia.small.multi pytorch_tutorial_pr_build_worker_1: <<: *pytorch_tutorial_build_worker_defaults - resource_class: gpu.nvidia.medium + resource_class: gpu.nvidia.large pytorch_tutorial_pr_build_worker_10: <<: *pytorch_tutorial_build_worker_defaults pytorch_tutorial_pr_build_worker_11: @@ -249,7 +249,7 @@ jobs: resource_class: gpu.nvidia.small.multi pytorch_tutorial_trunk_build_worker_1: <<: *pytorch_tutorial_build_worker_defaults - resource_class: gpu.nvidia.medium + resource_class: gpu.nvidia.large pytorch_tutorial_trunk_build_worker_10: <<: *pytorch_tutorial_build_worker_defaults pytorch_tutorial_trunk_build_worker_11: diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py index 42da5c34f52..f47ee1dfa6f 100644 --- a/.circleci/regenerate.py +++ b/.circleci/regenerate.py @@ -27,10 +27,10 @@ def jobs(pr_or_trunk, num_workers=20, indentation=2): jobs = {} # all tutorials that need gpu.nvidia.small.multi machines will be routed by - # get_files_to_run.py to 0th worker, similarly for gpu.nvidia.medium and the + # get_files_to_run.py to 0th worker, similarly for gpu.nvidia.large and the # 1st worker needs_gpu_nvidia_small_multi = [0] - needs_gpu_nvidia_medium = [1] + needs_gpu_nvidia_large = [1] jobs[f"pytorch_tutorial_{pr_or_trunk}_build_manager"] = { "<<": "*pytorch_tutorial_build_manager_defaults" } @@ -38,8 +38,8 @@ def jobs(pr_or_trunk, num_workers=20, indentation=2): job_info = {"<<": "*pytorch_tutorial_build_worker_defaults"} if i in needs_gpu_nvidia_small_multi: job_info["resource_class"] = "gpu.nvidia.small.multi" - if i in needs_gpu_nvidia_medium: - job_info["resource_class"] = "gpu.nvidia.medium" + if i in needs_gpu_nvidia_large: + job_info["resource_class"] = "gpu.nvidia.large" jobs[f"pytorch_tutorial_{pr_or_trunk}_build_worker_{i}"] = job_info return indent(indentation, jobs).replace("'", "") diff --git a/.jenkins/get_files_to_run.py b/.jenkins/get_files_to_run.py index fc5d4310ac7..ae04d387b46 100644 --- a/.jenkins/get_files_to_run.py +++ b/.jenkins/get_files_to_run.py @@ -44,7 +44,7 @@ def add_to_shard(i, filename): filter(lambda x: get_needs_machine(x) == "gpu.nvidia.small.multi", all_files,) ) needs_gpu_nvidia_medium = list( - filter(lambda x: get_needs_machine(x) == "gpu.nvidia.medium", all_files,) + filter(lambda x: get_needs_machine(x) == "gpu.nvidia.large", all_files,) ) for filename in needs_gpu_nvidia_small_multi: # currently, the only job that uses gpu.nvidia.small.multi is the 0th worker, @@ -52,7 +52,7 @@ def add_to_shard(i, filename): add_to_shard(0, filename) all_other_files.remove(filename) for filename in needs_gpu_nvidia_medium: - # currently, the only job that uses gpu.nvidia.medium is the 1st worker, + # currently, the only job that uses gpu.nvidia.large is the 1st worker, # so we'll add all the jobs that need this machine to the 1st worker add_to_shard(1, filename) all_other_files.remove(filename) diff --git a/.jenkins/metadata.json b/.jenkins/metadata.json index 9a881ccfa88..40c0e13c74e 100644 --- a/.jenkins/metadata.json +++ b/.jenkins/metadata.json @@ -26,6 +26,6 @@ "needs": "gpu.nvidia.small.multi" }, "intermediate_source/torch_compile_tutorial.py": { - "needs": "gpu.nvidia.medium" + "needs": "gpu.nvidia.large" } } From 8c0785e09dd656609f9d4bb195c8e9358714267b Mon Sep 17 00:00:00 2001 From: William Wen Date: Thu, 27 Apr 2023 12:30:05 -0700 Subject: [PATCH 018/609] use better model for torch.compile tutorial (#2297) Co-authored-by: Svetlana Karslioglu --- intermediate_source/torch_compile_tutorial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/intermediate_source/torch_compile_tutorial.py b/intermediate_source/torch_compile_tutorial.py index fcea4ed6611..2f5d2e966e1 100644 --- a/intermediate_source/torch_compile_tutorial.py +++ b/intermediate_source/torch_compile_tutorial.py @@ -117,9 +117,9 @@ def generate_data(b): N_ITERS = 10 -from torchvision.models import resnet18 +from torchvision.models import densenet121 def init_model(): - return resnet18().to(torch.float32).cuda() + return densenet121().to(torch.float32).cuda() ###################################################################### # First, let's compare inference. From 07a7ae2816cbe053a5049b7b8433bdadcb3ae802 Mon Sep 17 00:00:00 2001 From: Suraj Subramanian <5676233+subramen@users.noreply.github.com> Date: Thu, 27 Apr 2023 18:05:01 -0400 Subject: [PATCH 019/609] Call out using set_device when initing pg --- beginner_source/ddp_series_fault_tolerance.rst | 2 +- beginner_source/ddp_series_multigpu.rst | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/beginner_source/ddp_series_fault_tolerance.rst b/beginner_source/ddp_series_fault_tolerance.rst index a05c2e1a9ca..2bc63d7ec7d 100644 --- a/beginner_source/ddp_series_fault_tolerance.rst +++ b/beginner_source/ddp_series_fault_tolerance.rst @@ -117,7 +117,7 @@ Process group initialization - os.environ["MASTER_PORT"] = "12355" - init_process_group(backend="nccl", rank=rank, world_size=world_size) + init_process_group(backend="nccl") - + torch.cuda.set_device(int(os.environ["LOCAL_RANK"])) Use Torchrun-provided env variables ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/beginner_source/ddp_series_multigpu.rst b/beginner_source/ddp_series_multigpu.rst index 73e49c6c299..885afbb25b7 100644 --- a/beginner_source/ddp_series_multigpu.rst +++ b/beginner_source/ddp_series_multigpu.rst @@ -83,6 +83,8 @@ Constructing the process group initializes the distributed process group. - Read more about `choosing a DDP backend `__ +- `set_device `__ + sets the default GPU for each process. This is important to prevent hangs or excessive memory utilization on `GPU:0` .. code:: diff @@ -95,6 +97,7 @@ Constructing the process group + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12355" + init_process_group(backend="nccl", rank=rank, world_size=world_size) + + torch.cuda.set_device(rank) Constructing the DDP model From c26d6a5db19db5d4c3d90d72f949e200246bacf3 Mon Sep 17 00:00:00 2001 From: William Wen Date: Mon, 1 May 2023 06:34:08 -0700 Subject: [PATCH 020/609] Add note and check for GPU capability (#2303) * add note and check for GPU capability * Use device capability instead of device name Co-authored-by: Nikita Shulga * Allow H100 --------- Co-authored-by: Nikita Shulga --- intermediate_source/torch_compile_tutorial.py | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/intermediate_source/torch_compile_tutorial.py b/intermediate_source/torch_compile_tutorial.py index 2f5d2e966e1..d4b8e54b9ed 100644 --- a/intermediate_source/torch_compile_tutorial.py +++ b/intermediate_source/torch_compile_tutorial.py @@ -33,9 +33,25 @@ # - ``numpy`` # - ``scipy`` # - ``tabulate`` -# -# Note: a modern NVIDIA GPU (Volta or Ampere) is recommended for this tutorial. -# + +###################################################################### +# NOTE: a modern NVIDIA GPU (H100, A100, or V100) is recommended for this tutorial in +# order to reproduce the speedup numbers shown below and documented elsewhere. + +import torch +import warnings + +gpu_ok = False +if torch.cuda.is_available(): + device_cap = torch.cuda.get_device_capability() + if device_cap in ((7, 0), (8, 0), (9, 0)): + gpu_ok = True + +if not gpu_ok: + warnings.warn( + "GPU is not NVIDIA V100, A100, or H100. Speedup numbers may be lower " + "than expected." + ) ###################################################################### # Basic Usage @@ -51,8 +67,6 @@ # ``torch.compile``. We can then call the returned optimized # function in place of the original function. -import torch - def foo(x, y): a = torch.sin(x) b = torch.cos(x) From 63c6dbbd51418eda6380b32faa273c4bb06ea0ad Mon Sep 17 00:00:00 2001 From: clee2000 <44682903+clee2000@users.noreply.github.com> Date: Tue, 2 May 2023 14:18:23 -0700 Subject: [PATCH 021/609] update docker image 11.7 to 12.1 (#2306) --- .circleci/config.yml | 4 ++-- .circleci/config.yml.in | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 78e1e9b117f..8bf2a6321ae 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -154,14 +154,14 @@ pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults pytorch_tutorial_build_worker_defaults: &pytorch_tutorial_build_worker_defaults environment: - DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7" + DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7" CUDA_VERSION: "9" resource_class: gpu.nvidia.small <<: *pytorch_tutorial_build_defaults pytorch_tutorial_build_manager_defaults: &pytorch_tutorial_build_manager_defaults environment: - DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7" + DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7" resource_class: medium diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index 46e0d9d3ecb..41f226eaeee 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -154,14 +154,14 @@ pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults pytorch_tutorial_build_worker_defaults: &pytorch_tutorial_build_worker_defaults environment: - DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7" + DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7" CUDA_VERSION: "9" resource_class: gpu.nvidia.small <<: *pytorch_tutorial_build_defaults pytorch_tutorial_build_manager_defaults: &pytorch_tutorial_build_manager_defaults environment: - DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7" + DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7" resource_class: medium From 9efe789bfc3763ec359b60f12b5e6dda4e6d5db0 Mon Sep 17 00:00:00 2001 From: Saar Tochner Date: Fri, 5 May 2023 01:26:12 +0300 Subject: [PATCH 022/609] Fix typo in DCGAN Tutorial (#2304) Co-authored-by: Svetlana Karslioglu --- beginner_source/dcgan_faces_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beginner_source/dcgan_faces_tutorial.py b/beginner_source/dcgan_faces_tutorial.py index 2ee43d72d2f..d98683741e5 100644 --- a/beginner_source/dcgan_faces_tutorial.py +++ b/beginner_source/dcgan_faces_tutorial.py @@ -514,7 +514,7 @@ def forward(self, input): # practices shown in `ganhacks `__. # Namely, we will “construct different mini-batches for real and fake” # images, and also adjust G’s objective function to maximize -# :math:`logD(G(z))`. Training is split up into two main parts. Part 1 +# :math:`log(D(G(z)))`. Training is split up into two main parts. Part 1 # updates the Discriminator and Part 2 updates the Generator. # # **Part 1 - Train the Discriminator** From 2b5de98684646b78dc5d5e85012da5d43472fe4f Mon Sep 17 00:00:00 2001 From: Junghwan Park <9343724+9bow@users.noreply.github.com> Date: Wed, 10 May 2023 00:14:10 +0900 Subject: [PATCH 023/609] FIX: add neccessary backticks (#2309) (#2315) --- beginner_source/basics/saveloadrun_tutorial.py | 2 +- beginner_source/transformer_tutorial.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/beginner_source/basics/saveloadrun_tutorial.py b/beginner_source/basics/saveloadrun_tutorial.py index baccdf3cfdd..691a479938f 100644 --- a/beginner_source/basics/saveloadrun_tutorial.py +++ b/beginner_source/basics/saveloadrun_tutorial.py @@ -33,7 +33,7 @@ # To load model weights, you need to create an instance of the same model first, and then load the parameters # using ``load_state_dict()`` method. -model = models.vgg16() # we do not specify weights, i.e. create untrained model +model = models.vgg16() # we do not specify ``weights``, i.e. create untrained model model.load_state_dict(torch.load('model_weights.pth')) model.eval() diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py index fab8e3a9a59..d93b3d55fe7 100644 --- a/beginner_source/transformer_tutorial.py +++ b/beginner_source/transformer_tutorial.py @@ -135,7 +135,7 @@ def forward(self, x: Tensor) -> Tensor: ###################################################################### # This tutorial uses ``torchtext`` to generate Wikitext-2 dataset. -# To access torchtext datasets, please install torchdata following instructions at https://github.com/pytorch/data. +# To access torchtext datasets, please install torchdata following instructions at https://github.com/pytorch/data. # %% # .. code-block:: bash # @@ -175,7 +175,7 @@ def forward(self, x: Tensor) -> Tensor: train_iter = WikiText2(split='train') tokenizer = get_tokenizer('basic_english') vocab = build_vocab_from_iterator(map(tokenizer, train_iter), specials=['']) -vocab.set_default_index(vocab['']) +vocab.set_default_index(vocab['']) def data_process(raw_text_iter: dataset.IterableDataset) -> Tensor: """Converts raw text into a flat Tensor.""" @@ -196,7 +196,7 @@ def batchify(data: Tensor, bsz: int) -> Tensor: that wouldn't cleanly fit. Arguments: - data: Tensor, shape [N] + data: Tensor, shape ``[N]`` bsz: int, batch size Returns: From af117606af1300e14844ba70265bcd2d8f0997c9 Mon Sep 17 00:00:00 2001 From: Roman Feldbauer Date: Tue, 9 May 2023 19:53:48 +0200 Subject: [PATCH 024/609] Fix typo in dist_overview.rst (#2316) Link to generic_join.rst was missing the "j" --- beginner_source/dist_overview.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beginner_source/dist_overview.rst b/beginner_source/dist_overview.rst index 542eb31e038..12e9bfa0e55 100644 --- a/beginner_source/dist_overview.rst +++ b/beginner_source/dist_overview.rst @@ -131,7 +131,7 @@ DDP materials are listed below: 4. The `Shard Optimizer States With ZeroRedundancyOptimizer <../recipes/zero_redundancy_optimizer.html>`__ recipe demonstrates how `ZeroRedundancyOptimizer `__ helps to reduce optimizer memory footprint. -5. The `Distributed Training with Uneven Inputs Using the Join Context Manager <../advanced/generic_oin.html>`__ +5. The `Distributed Training with Uneven Inputs Using the Join Context Manager <../advanced/generic_join.html>`__ tutorial walks through using the generic join context for distributed training with uneven inputs. torch.distributed.elastic From 44bcbdc2dd349765077c767d433196832e262777 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 9 May 2023 13:12:17 -0700 Subject: [PATCH 025/609] Pyspelling python recipes (#2302) * Pyspelling: Add check for Python recipes * Fix --- .pyspelling.yml | 6 +- en-wordlist.txt | 36 ++++ recipes_source/recipes/Captum_Recipe.py | 2 +- recipes_source/recipes/amp_recipe.py | 87 +++++---- recipes_source/recipes/benchmark.py | 60 +++--- .../recipes/defining_a_neural_network.py | 6 +- .../recipes/dynamic_quantization.py | 30 +-- recipes_source/recipes/loading_data_recipe.py | 14 +- recipes_source/recipes/profiler_recipe.py | 183 ++++++++++-------- .../recipes/save_load_across_devices.py | 12 +- ...saving_and_loading_a_general_checkpoint.py | 9 +- ...saving_and_loading_models_for_inference.py | 6 +- .../saving_multiple_models_in_one_file.py | 12 +- .../recipes/tensorboard_with_pytorch.py | 58 +++--- recipes_source/recipes/timer_quick_start.py | 75 +++---- recipes_source/recipes/tuning_guide.py | 105 +++++----- ...using_parameters_from_a_different_model.py | 4 +- recipes_source/recipes/what_is_state_dict.py | 4 +- .../recipes/zeroing_out_gradients.py | 4 +- 19 files changed, 384 insertions(+), 329 deletions(-) diff --git a/.pyspelling.yml b/.pyspelling.yml index 9c9b18800cc..b068dec2478 100644 --- a/.pyspelling.yml +++ b/.pyspelling.yml @@ -5,6 +5,7 @@ matrix: - beginner_source/*.py - intermediate_source/*.py - advanced_source/*.py + - recipes_source/*/*.py dictionary: wordlists: - en-wordlist.txt @@ -21,10 +22,13 @@ matrix: - open: ':(?:(class|py:mod|mod|func)):`' content: '[^`]*' close: '`' + # Exclude reStructuredText hyperlinks + - open: '\s' + content: '\w*' + close: '_' # Exclude raw directive - open: '\.\. (raw)::.*$\n*' close: '\n' - # Exclude # Exclude Python coding directives - open: '-\*- coding:' close: '\n' diff --git a/en-wordlist.txt b/en-wordlist.txt index fdf5df67d8d..2a4c87925f4 100644 --- a/en-wordlist.txt +++ b/en-wordlist.txt @@ -1,5 +1,6 @@ APIs ATen +AVX Args Autograd BCE @@ -15,8 +16,11 @@ CLS CNNDM CNNs CPUs +CPython CUDA Caffe +Captum +Captum's CartPole Cayley Chatbots @@ -28,6 +32,7 @@ DCGAN DCGANs DDP DDQN +DLRM DNN DQN DataLoaders @@ -64,8 +69,10 @@ HVP Hugging Face IMDB IOT +ISA ImageNet Initializations +Interpretability Iteratively JSON JVP @@ -93,6 +100,7 @@ NCHW NES NLP NTK +NUMA NaN NanoGPT NeurIPS @@ -101,6 +109,7 @@ Numericalization Numpy's ONNX OpenAI +OpenMP PIL PPO Plotly @@ -115,11 +124,13 @@ RPC RTX Radford ReLU +ReLUs ResNet Runtime's SDPA SGD SPD +SSD SST2 STN SciPy @@ -128,6 +139,7 @@ Sigmoid SoTA TPU TensorBoard +TensorBoards TextVQA Tokenization TorchDynamo @@ -157,6 +169,7 @@ approximators autodiff autoencoder autograd +autotuner backend backends backprop @@ -164,12 +177,15 @@ backpropagate backpropagated backpropagates backpropagation +backtrace batchnorm batchnorm's benchmarking +bitwise boolean broadcasted bytecode +cancelation cardinality chatbot chatbot's @@ -201,10 +217,13 @@ deserialized deterministically dimensionality dir +discontiguous +distractor downsample downsamples dropdown duration +elementwise embeddings encodings ensembling @@ -242,6 +261,7 @@ iteratively jacobian jacobians jit +jitter jpg judgements kwargs @@ -250,6 +270,7 @@ learnable learnings loadFilename manualSeed +matmul matplotlib minibatch minibatches @@ -272,6 +293,7 @@ numericalize numpy nvFuser nvFuser's +oneDNN optimizable optimizer's optimizers @@ -283,8 +305,13 @@ parametrizations parametrized parametrizing perceptibility +pickleable pipelining pointwise +postprocessing +preallocate +preallocates +preallocation precompute precomputing prepend @@ -336,7 +363,11 @@ subdirectories submodule submodules subnetworks +subprocess +subprocesses subreddit +subregion +subregion's summarization tanh th @@ -361,6 +392,7 @@ tradeoff tradeoffs uncomment uncommented +underflowing unfused unimodal unnormalized @@ -368,6 +400,9 @@ unoptimized unparametrized unpickling unpruned +unscale +unscaled +unscales upscaled utils vectorization @@ -377,4 +412,5 @@ vhp voc walkthrough warmstart +warmstarted warmstarting diff --git a/recipes_source/recipes/Captum_Recipe.py b/recipes_source/recipes/Captum_Recipe.py index 18c81c33d6e..d164374df7f 100644 --- a/recipes_source/recipes/Captum_Recipe.py +++ b/recipes_source/recipes/Captum_Recipe.py @@ -59,7 +59,7 @@ normalize = transforms.Compose([ transforms.ToTensor(), # converts the image to a tensor with values between 0 and 1 - transforms.Normalize( # normalize to follow 0-centered imagenet pixel rgb distribution + transforms.Normalize( # normalize to follow 0-centered imagenet pixel RGB distribution mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py index eb92b90d0cc..2cdd37c8035 100644 --- a/recipes_source/recipes/amp_recipe.py +++ b/recipes_source/recipes/amp_recipe.py @@ -78,7 +78,7 @@ def make_model(in_size, out_size, num_layers): # Creates data in default precision. # The same data is used for both default and mixed precision trials below. -# You don't need to manually change inputs' dtype when enabling mixed precision. +# You don't need to manually change inputs' ``dtype`` when enabling mixed precision. data = [torch.randn(batch_size, in_size, device="cuda") for _ in range(num_batches)] targets = [torch.randn(batch_size, out_size, device="cuda") for _ in range(num_batches)] @@ -103,38 +103,38 @@ def make_model(in_size, out_size, num_layers): end_timer_and_print("Default precision:") ########################################################## -# Adding autocast -# --------------- +# Adding ``torch.autocast`` +# ------------------------- # Instances of `torch.autocast `_ # serve as context managers that allow regions of your script to run in mixed precision. # -# In these regions, CUDA ops run in a dtype chosen by autocast +# In these regions, CUDA ops run in a ``dtype`` chosen by ``autocast`` # to improve performance while maintaining accuracy. # See the `Autocast Op Reference `_ -# for details on what precision autocast chooses for each op, and under what circumstances. +# for details on what precision ``autocast`` chooses for each op, and under what circumstances. for epoch in range(0): # 0 epochs, this section is for illustration only for input, target in zip(data, targets): - # Runs the forward pass under autocast. + # Runs the forward pass under ``autocast``. with torch.autocast(device_type='cuda', dtype=torch.float16): output = net(input) - # output is float16 because linear layers autocast to float16. + # output is float16 because linear layers ``autocast`` to float16. assert output.dtype is torch.float16 loss = loss_fn(output, target) - # loss is float32 because mse_loss layers autocast to float32. + # loss is float32 because ``mse_loss`` layers ``autocast`` to float32. assert loss.dtype is torch.float32 - # Exits autocast before backward(). - # Backward passes under autocast are not recommended. - # Backward ops run in the same dtype autocast chose for corresponding forward ops. + # Exits ``autocast`` before backward(). + # Backward passes under ``autocast`` are not recommended. + # Backward ops run in the same ``dtype`` ``autocast`` chose for corresponding forward ops. loss.backward() opt.step() opt.zero_grad() # set_to_none=True here can modestly improve performance ########################################################## -# Adding GradScaler -# ----------------- +# Adding ``GradScaler`` +# --------------------- # `Gradient scaling `_ # helps prevent gradients with small magnitudes from flushing to zero # ("underflowing") when training with mixed precision. @@ -142,11 +142,11 @@ def make_model(in_size, out_size, num_layers): # `torch.cuda.amp.GradScaler `_ # performs the steps of gradient scaling conveniently. -# Constructs scaler once, at the beginning of the convergence run, using default args. -# If your network fails to converge with default GradScaler args, please file an issue. -# The same GradScaler instance should be used for the entire convergence run. +# Constructs a ``scaler`` once, at the beginning of the convergence run, using default arguments. +# If your network fails to converge with default ``GradScaler`` arguments, please file an issue. +# The same ``GradScaler`` instance should be used for the entire convergence run. # If you perform multiple convergence runs in the same script, each run should use -# a dedicated fresh GradScaler instance. GradScaler instances are lightweight. +# a dedicated fresh ``GradScaler`` instance. ``GradScaler`` instances are lightweight. scaler = torch.cuda.amp.GradScaler() for epoch in range(0): # 0 epochs, this section is for illustration only @@ -155,11 +155,11 @@ def make_model(in_size, out_size, num_layers): output = net(input) loss = loss_fn(output, target) - # Scales loss. Calls backward() on scaled loss to create scaled gradients. + # Scales loss. Calls ``backward()`` on scaled loss to create scaled gradients. scaler.scale(loss).backward() - # scaler.step() first unscales the gradients of the optimizer's assigned params. - # If these gradients do not contain infs or NaNs, optimizer.step() is then called, + # ``scaler.step()`` first unscales the gradients of the optimizer's assigned parameters. + # If these gradients do not contain ``inf``s or ``NaN``s, optimizer.step() is then called, # otherwise, optimizer.step() is skipped. scaler.step(opt) @@ -207,10 +207,10 @@ def make_model(in_size, out_size, num_layers): loss = loss_fn(output, target) scaler.scale(loss).backward() - # Unscales the gradients of optimizer's assigned params in-place + # Unscales the gradients of optimizer's assigned parameters in-place scaler.unscale_(opt) - # Since the gradients of optimizer's assigned params are now unscaled, clips as usual. + # Since the gradients of optimizer's assigned parameters are now unscaled, clips as usual. # You may use the same value for max_norm here as you would without gradient scaling. torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=0.1) @@ -225,7 +225,7 @@ def make_model(in_size, out_size, num_layers): # `scaler.state_dict `_ and # `scaler.load_state_dict `_. # -# When saving, save the scaler state dict alongside the usual model and optimizer state dicts. +# When saving, save the ``scaler`` state dict alongside the usual model and optimizer state ``dicts``. # Do this either at the beginning of an iteration before any forward passes, or at the end of # an iteration after ``scaler.update()``. @@ -236,23 +236,26 @@ def make_model(in_size, out_size, num_layers): # torch.save(checkpoint, "filename") ########################################################## -# When resuming, load the scaler state dict alongside the model and optimizer state dicts. - -# Read checkpoint as desired, e.g., -# dev = torch.cuda.current_device() -# checkpoint = torch.load("filename", -# map_location = lambda storage, loc: storage.cuda(dev)) +# When resuming, load the ``scaler`` state dict alongside the model and optimizer state ``dicts``. +# Read checkpoint as desired, for example: +# +# .. code-block:: +# +# dev = torch.cuda.current_device() +# checkpoint = torch.load("filename", +# map_location = lambda storage, loc: storage.cuda(dev)) +# net.load_state_dict(checkpoint["model"]) opt.load_state_dict(checkpoint["optimizer"]) scaler.load_state_dict(checkpoint["scaler"]) ########################################################## # If a checkpoint was created from a run *without* Amp, and you want to resume training *with* Amp, -# load model and optimizer states from the checkpoint as usual. The checkpoint won't contain a saved scaler state, so +# load model and optimizer states from the checkpoint as usual. The checkpoint won't contain a saved ``scaler`` state, so # use a fresh instance of ``GradScaler``. # -# If a checkpoint was created from a run *with* Amp and you want to resume training *without* Amp, -# load model and optimizer states from the checkpoint as usual, and ignore the saved scaler state. +# If a checkpoint was created from a run *with* Amp and you want to resume training *without* ``Amp``, +# load model and optimizer states from the checkpoint as usual, and ignore the saved ``scaler`` state. ########################################################## # Inference/Evaluation @@ -273,7 +276,7 @@ def make_model(in_size, out_size, num_layers): # * Custom autograd functions (subclasses of ``torch.autograd.Function``) # # If you perform multiple convergence runs in the same script, each run should use -# a dedicated fresh GradScaler instance. GradScaler instances are lightweight. +# a dedicated fresh ``GradScaler`` instance. ``GradScaler`` instances are lightweight. # # If you're registering a custom C++ op with the dispatcher, see the # `autocast section `_ @@ -293,9 +296,9 @@ def make_model(in_size, out_size, num_layers): # as much as you can without running OOM. # * Try to avoid excessive CPU-GPU synchronization (``.item()`` calls, or printing values from CUDA tensors). # * Try to avoid sequences of many small CUDA ops (coalesce these into a few large CUDA ops if you can). -# 2. Your network may be GPU compute bound (lots of matmuls/convolutions) but your GPU does not have Tensor Cores. +# 2. Your network may be GPU compute bound (lots of ``matmuls``/convolutions) but your GPU does not have Tensor Cores. # In this case a reduced speedup is expected. -# 3. Matmul dimensions are not Tensor Core-friendly. Make sure matmuls' participating sizes are multiples of 8. +# 3. The ``matmul`` dimensions are not Tensor Core-friendly. Make sure ``matmuls`` participating sizes are multiples of 8. # (For NLP models with encoders/decoders, this can be subtle. Also, convolutions used to have similar size constraints # for Tensor Core use, but for CuDNN versions 7.3 and later, no such constraints exist. See # `here `_ for guidance.) @@ -307,19 +310,19 @@ def make_model(in_size, out_size, num_layers): # # If you're confident your Amp usage is correct, you may need to file an issue, but before doing so, it's helpful to gather the following information: # -# 1. Disable ``autocast`` or ``GradScaler`` individually (by passing ``enabled=False`` to their constructor) and see if infs/NaNs persist. +# 1. Disable ``autocast`` or ``GradScaler`` individually (by passing ``enabled=False`` to their constructor) and see if ``infs``/``NaNs`` persist. # 2. If you suspect part of your network (e.g., a complicated loss function) overflows , run that forward region in ``float32`` -# and see if infs/NaNs persist. +# and see if ``infs``/``NaN``s persist. # `The autocast docstring `_'s last code snippet -# shows forcing a subregion to run in ``float32`` (by locally disabling autocast and casting the subregion's inputs). +# shows forcing a subregion to run in ``float32`` (by locally disabling ``autocast`` and casting the subregion's inputs). # -# Type mismatch error (may manifest as CUDNN_STATUS_BAD_PARAM) +# Type mismatch error (may manifest as ``CUDNN_STATUS_BAD_PARAM``) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Autocast tries to cover all ops that benefit from or require casting. +# ``Autocast`` tries to cover all ops that benefit from or require casting. # `Ops that receive explicit coverage `_ # are chosen based on numerical properties, but also on experience. -# If you see a type mismatch error in an autocast-enabled forward region or a backward pass following that region, -# it's possible autocast missed an op. +# If you see a type mismatch error in an ``autocast`` enabled forward region or a backward pass following that region, +# it's possible ``autocast`` missed an op. # # Please file an issue with the error backtrace. ``export TORCH_SHOW_CPP_STACKTRACES=1`` before running your script to provide # fine-grained information on which backend op is failing. diff --git a/recipes_source/recipes/benchmark.py b/recipes_source/recipes/benchmark.py index bd2e28e9654..d02157a83e4 100644 --- a/recipes_source/recipes/benchmark.py +++ b/recipes_source/recipes/benchmark.py @@ -39,11 +39,11 @@ # 1. Defining functions to benchmark # 2. Benchmarking with ``timeit.Timer`` # 3. Benchmarking with ``torch.utils.benchmark.Timer`` -# 4. Benchmarking with `Blocked Autorange` +# 4. Benchmarking with ``Blocked Autorange`` # 5. Comparing benchmark results # 6. Saving/Loading benchmark results -# 7. Generating inputs with `Fuzzed Parameters` -# 8. Collecting instruction counts with `Callgrind` +# 7. Generating inputs with ``Fuzzed Parameters`` +# 8. Collecting instruction counts with ``Callgrind`` # # 1. Defining functions to benchmark # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -63,7 +63,7 @@ def batched_dot_mul_sum(a, b): def batched_dot_bmm(a, b): - '''Computes batched dot by reducing to bmm''' + '''Computes batched dot by reducing to ``bmm``''' a = a.reshape(-1, 1, a.shape[-1]) b = b.reshape(-1, b.shape[-1], 1) return torch.bmm(a, b).flatten(-3) @@ -159,11 +159,11 @@ def batched_dot_bmm(a, b): # # Another important difference, and the reason why the results diverge # is that PyTorch benchmark module runs in a single thread by default. -# We can change the number of threads with the num_threads arg. +# We can change the number of threads with the ``num_threads`` argument. # # ``torch.utils.benchmark.Timer`` takes several additional arguments -# including: `label`, `sub_label`, `description` and `env` which change -# the ``__repr__`` of the measurement object returned and are used for +# including: ``label``, ``sub_label``, ``description`` and ``env`` which change +# the __repr__ of the measurement object returned and are used for # grouping the results (more on this later). # @@ -227,7 +227,7 @@ def batched_dot_bmm(a, b): setup='from __main__ import batched_dot_bmm', globals={'x': x}) -# Ran each twice to show difference before/after warmup +# Ran each twice to show difference before/after warm-up print(f'mul_sum(x, x): {t0.timeit(100) / 100 * 1e6:>5.1f} us') print(f'mul_sum(x, x): {t0.timeit(100) / 100 * 1e6:>5.1f} us') print(f'bmm(x, x): {t1.timeit(100) / 100 * 1e6:>5.1f} us') @@ -253,7 +253,7 @@ def batched_dot_bmm(a, b): setup='from __main__ import batched_dot_bmm', globals={'x': x}) -# Run only once since benchmark module does warmup for us +# Run only once since benchmark module does warm-up for us print(t0.timeit(100)) print(t1.timeit(100)) @@ -278,7 +278,7 @@ def batched_dot_bmm(a, b): # version using the ``timeit`` module takes much longer than the second # run. This is because ``bmm`` calls into `cuBLAS` which needs to be # loaded the first time it's called which takes some time. This is why -# it's important to do a warmup run before benchmarking, luckily for +# it's important to do a warm-up run before benchmarking, luckily for # us, PyTorch's ``benchmark`` module takes care of that. # # The difference in the results between ``timeit`` and ``benchmark`` modules @@ -469,7 +469,7 @@ def batched_dot_bmm(a, b): # ###################################################################### -# The results above indicate that the version which reduces to bmm +# The results above indicate that the version which reduces to ``bmm`` # is better for larger tensors running on multiple threads, while for # smaller and/or single thread code, the other version is better. # @@ -485,14 +485,14 @@ def batched_dot_bmm(a, b): # 6. Saving/Loading benchmark results # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # -# `Measurements` (and `CallgrindStats` which are described in section 8) -# are pickleable. This makes A/B testing easy, as you can collect +# `Measurements` (and ``CallgrindStats`` which are described in section 8) +# can be serialized by the ``pickle`` module. This makes A/B testing easy, as you can collect # measurements from two separate environments, pickle them, and then # load both in a single environment. Timer even takes an `env` # constructor argument so that such A/B testing works seamlessly. # # Let's imagine that rather than two Python functions, the add/sum -# and bmm approaches were in two different builds of PyTorch. +# and ``bmm`` approaches were in two different builds of PyTorch. # The example below demonstrates how one might A/B test them. For # simplicity, we only use a subset of shapes, and simply round trip # results through pickle rather than actually using multiple environments @@ -549,14 +549,14 @@ def batched_dot_bmm(a, b): # is a good idea to run benchmarks on a number of different inputs. # However, creating all these input tensors can be tedious which is # where ``torch.utils.benchmark.Fuzzer`` and related classes come in. -# Let's take a look at how we can use the Fuzzer to create some test +# Let's take a look at how we can use the ``Fuzzer`` to create some test # cases for the benchmark. # from torch.utils.benchmark import Fuzzer, FuzzedParameter, FuzzedTensor, ParameterAlias # Generates random tensors with 128 to 10000000 elements and sizes k0 and k1 chosen from a -# loguniform distribution in [1, 10000], 40% of which will be discontiguous on average. +# ``loguniform`` distribution in [1, 10000], 40% of which will be discontiguous on average. example_fuzzer = Fuzzer( parameters = [ FuzzedParameter('k0', minval=1, maxval=10000, distribution='loguniform'), @@ -615,11 +615,11 @@ def batched_dot_bmm(a, b): # ###################################################################### -# There is a lot of flexibility for defining your own Fuzzers which +# There is a lot of flexibility for defining your own ``fuzzers`` which # is great for creating a powerful set of inputs to benchmark. But to # make things even simpler, PyTorch benchmark module comes with some -# buitin Fuzzers for common benchmarking needs. Let's take a look at -# how we can use one of these builtin fuzzers. +# built-in ``fuzzers`` for common benchmarking needs. Let's take a look at +# how we can use one of these built-in ``fuzzers``. # from torch.utils.benchmark.op_fuzzers import binary @@ -671,8 +671,8 @@ def batched_dot_bmm(a, b): # ###################################################################### -# 8. Collecting instruction counts with `Callgrind` -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# 8. Collecting instruction counts with ``Callgrind`` +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # One of the challenges of optimizing code is the variation and opacity of # wall time. There are many sources of non-determinism, from adaptive clock @@ -723,7 +723,7 @@ def batched_dot_bmm(a, b): cpp_sources=batched_dot_src, extra_cflags=['-O3'], extra_include_paths=[ - # `load_inline` needs to know where to find Pybind11 headers. + # `load_inline` needs to know where to find ``pybind11`` headers. os.path.join(os.getenv('CONDA_PREFIX'), 'include') ], functions=['batched_dot_mul_sum_v0', 'batched_dot_mul_sum_v1'] @@ -742,7 +742,7 @@ def batched_dot_bmm(a, b): import textwrap def pretty_print(result): - """Import machinery for cpp_lib.so can get repetitive to look at.""" + """Import machinery for ``cpp_lib.so`` can get repetitive to look at.""" print(repr(result).replace(textwrap.indent(module_import_str, " "), " import cpp_lib")) @@ -801,7 +801,7 @@ def pretty_print(result): # 1 measurement, 100000 runs , 1 thread # -# Let's use Callgrind to determine which is better. +# Let's use ``Callgrind`` to determine which is better. stats_v0 = t0.collect_callgrind() stats_v1 = t1.collect_callgrind() @@ -819,7 +819,7 @@ def pretty_print(result): delta = stats_v1.delta(stats_v0).denoise() # `.transform` is a convenience API for transforming function names. It is -# useful for increasing cancelation when diff-ing instructions, as well as +# useful for increasing cancelation when ``diff-ing`` instructions, as well as # just generally improving readability. replacements = ( ("???:void pybind11", "pybind11"), @@ -835,21 +835,19 @@ def pretty_print(result): torch.set_printoptions(linewidth=160) # Once parsed, the instruction counts make clear that passing `a` and `b` -# by reference is more efficient as it skips some c10::TensorImpl bookkeeping -# for the intermediate Tensors, and is also works better with PyBind11. This +# by reference is more efficient as it skips some ``c10::TensorImpl`` bookkeeping +# for the intermediate Tensors, and is also works better with ``pybind11``. This # is consistent with our noisy wall time observations. print(delta) ###################################################################### -# .. code-block:: none -# :caption: Output +# .. code-block:: # # # cpp_lib.batched_dot_mul_sum_v0(x, x) # setup: # import cpp_lib # x = torch.randn(2, 2) -# # All Noisy symbols removed # Instructions: 2392671 2392671 # Baseline: 4367 4367 @@ -862,7 +860,6 @@ def pretty_print(result): # setup: # import cpp_lib # x = torch.randn(2, 2) -# # All Noisy symbols removed # Instructions: 2378978 2378978 # Baseline: 4367 4367 @@ -877,7 +874,6 @@ def pretty_print(result): # -1600 ???:wrap_pybind_function_impl_(at::Tensor (&)(...), std::integer_sequence)::{lambda(...) # -5200 ???:c10::intrusive_ptr::reset_() # -5935 ???:0x000000000022c0e0 -# # Total: -13693 # diff --git a/recipes_source/recipes/defining_a_neural_network.py b/recipes_source/recipes/defining_a_neural_network.py index 3992a092820..07d91edcd8c 100644 --- a/recipes_source/recipes/defining_a_neural_network.py +++ b/recipes_source/recipes/defining_a_neural_network.py @@ -54,7 +54,7 @@ ###################################################################### -# 2. Define and intialize the neural network +# 2. Define and initialize the neural network # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # Our network will recognize images. We will use a process built into @@ -64,7 +64,7 @@ # blurriness, etc.) from the input image. # # There are two requirements for defining the ``Net`` class of your model. -# The first is writing an ``__init__`` function that references +# The first is writing an __init__ function that references # ``nn.Module``. This function is where you define the fully connected # layers in your neural network. # @@ -140,7 +140,7 @@ def forward(self, x): x = self.dropout1(x) # Flatten x with start_dim=1 x = torch.flatten(x, 1) - # Pass data through fc1 + # Pass data through ``fc1`` x = self.fc1(x) x = F.relu(x) x = self.dropout2(x) diff --git a/recipes_source/recipes/dynamic_quantization.py b/recipes_source/recipes/dynamic_quantization.py index 648e756df94..cdb3d22da72 100644 --- a/recipes_source/recipes/dynamic_quantization.py +++ b/recipes_source/recipes/dynamic_quantization.py @@ -11,7 +11,7 @@ ------------- There are a number of trade-offs that can be made when designing neural -networks. During model developmenet and training you can alter the +networks. During model development and training you can alter the number of layers and number of parameters in a recurrent neural network and trade-off accuracy against model size and/or model latency or throughput. Such changes can take lot of time and compute resources @@ -108,7 +108,7 @@ 1: Set Up ~~~~~~~~~~~~~~~ -This is a straightfoward bit of code to set up for the rest of the +This is a straightforward bit of code to set up for the rest of the recipe. The unique module we are importing here is torch.quantization which @@ -126,12 +126,12 @@ import time # define a very, very simple LSTM for demonstration purposes -# in this case, we are wrapping nn.LSTM, one layer, no pre or post processing +# in this case, we are wrapping ``nn.LSTM``, one layer, no preprocessing or postprocessing # inspired by -# https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html, by Robert Guthrie -# and https://pytorch.org/tutorials/advanced/dynamic_quantization_tutorial.html +# `Sequence Models and Long Short-Term Memory Networks tutorial `__. class lstm_for_demonstration(nn.Module): - """Elementary Long Short Term Memory style model which simply wraps nn.LSTM + """Elementary Long Short Term Memory style model which simply wraps ``nn.LSTM`` Not to be used for anything other than demonstration. """ def __init__(self,in_dim,out_dim,depth): @@ -162,7 +162,7 @@ def forward(self,inputs,hidden): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # Now we get to the fun part. First we create an instance of the model -# called float\_lstm then we are going to quantize it. We're going to use +# called ``float\_lstm`` then we are going to quantize it. We're going to use # the # # :: @@ -198,7 +198,7 @@ def forward(self,inputs,hidden): ###################################################################### # 3. Look at Model Size # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Ok, so we've quantized the model. What does that get us? Well the first +# We've quantized the model. What does that get us? Well the first # benefit is that we've replaced the FP32 model parameters with INT8 # values (and some recorded scale factors). This means about 75% less data # to store and move around. With the default values the reduction shown @@ -232,16 +232,24 @@ def print_size_of_model(model, label=""): # # As you will see the quantized version of this super-simple network runs # faster. This will generally be true of more complex networks but as they -# say "your milage may vary" depending on a number of factors including +# say "your mileage may vary" depending on a number of factors including # the structure of the model and the hardware you are running on. # # compare the performance print("Floating point FP32") -# %timeit float_lstm.forward(inputs, hidden) + +##################################################################### +# .. code-block:: python +# +# %timeit float_lstm.forward(inputs, hidden) print("Quantized INT8") -# %timeit quantized_lstm.forward(inputs,hidden) + +###################################################################### +# .. code-block:: python +# +# %timeit quantized_lstm.forward(inputs,hidden) ###################################################################### diff --git a/recipes_source/recipes/loading_data_recipe.py b/recipes_source/recipes/loading_data_recipe.py index cac42e9440c..63efbdc01ce 100644 --- a/recipes_source/recipes/loading_data_recipe.py +++ b/recipes_source/recipes/loading_data_recipe.py @@ -19,7 +19,7 @@ * `torchtext `__ with more to come. -Using the Yesno dataset from ``torchaudio.datasets.YESNO``, we will +Using the ``yesno`` dataset from ``torchaudio.datasets.YESNO``, we will demonstrate how to effectively and efficiently load data from a PyTorch ``Dataset`` into a PyTorch ``DataLoader``. """ @@ -66,11 +66,11 @@ # 2. Access the data in the dataset # --------------------------------------------------------------- # -# The Yesno dataset in ``torchaudio`` features sixty recordings of one +# The ``yesno`` dataset in ``torchaudio`` features sixty recordings of one # individual saying yes or no in Hebrew; with each recording being eight # words long (`read more here `__). # -# ``torchaudio.datasets.YESNO`` creates a dataset for YesNo. +# ``torchaudio.datasets.YESNO`` creates a dataset for ``yesno``. torchaudio.datasets.YESNO( root='./', url='http://www.openslr.org/resources/1/waves_yesno.tar.gz', @@ -81,21 +81,21 @@ # Each item in the dataset is a tuple of the form: (waveform, sample_rate, # labels). # -# You must set a ``root`` for the Yesno dataset, which is where the +# You must set a ``root`` for the ``yesno`` dataset, which is where the # training and testing dataset will exist. The other parameters are # optional, with their default values shown. Here is some additional # useful info on the other parameters: # * ``download``: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. # -# Let’s access our Yesno data: +# Let’s access our ``yesno`` data: # -# A data point in Yesno is a tuple (waveform, sample_rate, labels) where labels +# A data point in ``yesno`` is a tuple (waveform, sample_rate, labels) where labels # is a list of integers with 1 for yes and 0 for no. yesno_data = torchaudio.datasets.YESNO('./', download=True) -# Pick data point number 3 to see an example of the the yesno_data: +# Pick data point number 3 to see an example of the the ``yesno_data``: n = 3 waveform, sample_rate, labels = yesno_data[n] print("Waveform: {}\nSample rate: {}\nLabels: {}".format(waveform, sample_rate, labels)) diff --git a/recipes_source/recipes/profiler_recipe.py b/recipes_source/recipes/profiler_recipe.py index 4a53c7c0b59..a88ea87feca 100644 --- a/recipes_source/recipes/profiler_recipe.py +++ b/recipes_source/recipes/profiler_recipe.py @@ -34,7 +34,7 @@ # 4. Using profiler to analyze memory consumption # 5. Using tracing functionality # 6. Examining stack traces -# 7. Visualizing data as a flamegraph +# 7. Visualizing data as a flame graph # 8. Using profiler to analyze long-running jobs # # 1. Import all necessary libraries @@ -77,7 +77,7 @@ # - ``use_cuda`` - whether to measure execution time of CUDA kernels. # # Note: when using CUDA, profiler also shows the runtime CUDA events -# occuring on the host. +# occurring on the host. ###################################################################### # Let's see how we can use profiler to analyze the execution time: @@ -96,7 +96,7 @@ # If multiple profiler ranges are active at the same time (e.g. in # parallel PyTorch threads), each profiling context manager tracks only # the operators of its corresponding range. -# Profiler also automatically profiles the async tasks launched +# Profiler also automatically profiles the asynchronous tasks launched # with ``torch.jit._fork`` and (in case of a backward pass) # the backward pass operators launched with ``backward()`` call. # @@ -125,7 +125,7 @@ ###################################################################### # Here we see that, as expected, most of the time is spent in convolution (and specifically in ``mkldnn_convolution`` -# for PyTorch compiled with MKL-DNN support). +# for PyTorch compiled with ``MKL-DNN`` support). # Note the difference between self cpu time and cpu time - operators can call other operators, self cpu time excludes time # spent in children operator calls, while total cpu time includes it. You can choose to sort by the self cpu time by passing # ``sort_by="self_cpu_time_total"`` into the ``table`` call. @@ -135,25 +135,29 @@ print(prof.key_averages(group_by_input_shape=True).table(sort_by="cpu_time_total", row_limit=10)) -# (omitting some columns) -# --------------------------------- ------------ ------------------------------------------- -# Name CPU total Input Shapes -# --------------------------------- ------------ ------------------------------------------- -# model_inference 57.503ms [] -# aten::conv2d 8.008ms [5,64,56,56], [64,64,3,3], [], ..., []] -# aten::convolution 7.956ms [[5,64,56,56], [64,64,3,3], [], ..., []] -# aten::_convolution 7.909ms [[5,64,56,56], [64,64,3,3], [], ..., []] -# aten::mkldnn_convolution 7.834ms [[5,64,56,56], [64,64,3,3], [], ..., []] -# aten::conv2d 6.332ms [[5,512,7,7], [512,512,3,3], [], ..., []] -# aten::convolution 6.303ms [[5,512,7,7], [512,512,3,3], [], ..., []] -# aten::_convolution 6.273ms [[5,512,7,7], [512,512,3,3], [], ..., []] -# aten::mkldnn_convolution 6.233ms [[5,512,7,7], [512,512,3,3], [], ..., []] -# aten::conv2d 4.751ms [[5,256,14,14], [256,256,3,3], [], ..., []] -# --------------------------------- ------------ ------------------------------------------- -# Self CPU time total: 57.549ms +######################################################################################## +# The output might look like this (omitting some columns): +# +# :: +# +# --------------------------------- ------------ ------------------------------------------- +# Name CPU total Input Shapes +# --------------------------------- ------------ ------------------------------------------- +# model_inference 57.503ms [] +# aten::conv2d 8.008ms [5,64,56,56], [64,64,3,3], [], ..., []] +# aten::convolution 7.956ms [[5,64,56,56], [64,64,3,3], [], ..., []] +# aten::_convolution 7.909ms [[5,64,56,56], [64,64,3,3], [], ..., []] +# aten::mkldnn_convolution 7.834ms [[5,64,56,56], [64,64,3,3], [], ..., []] +# aten::conv2d 6.332ms [[5,512,7,7], [512,512,3,3], [], ..., []] +# aten::convolution 6.303ms [[5,512,7,7], [512,512,3,3], [], ..., []] +# aten::_convolution 6.273ms [[5,512,7,7], [512,512,3,3], [], ..., []] +# aten::mkldnn_convolution 6.233ms [[5,512,7,7], [512,512,3,3], [], ..., []] +# aten::conv2d 4.751ms [[5,256,14,14], [256,256,3,3], [], ..., []] +# --------------------------------- ------------ ------------------------------------------- +# Self CPU time total: 57.549ms ###################################################################### -# Note the occurence of ``aten::convolution`` twice with different input shapes. +# Note the occurrence of ``aten::convolution`` twice with different input shapes. ###################################################################### # Profiler can also be used to analyze performance of models executed on GPUs: @@ -172,28 +176,29 @@ # (Note: the first use of CUDA profiling may bring an extra overhead.) ###################################################################### -# The resulting table output: - -# (omitting some columns) -# ------------------------------------------------------- ------------ ------------ -# Name Self CUDA CUDA total -# ------------------------------------------------------- ------------ ------------ -# model_inference 0.000us 11.666ms -# aten::conv2d 0.000us 10.484ms -# aten::convolution 0.000us 10.484ms -# aten::_convolution 0.000us 10.484ms -# aten::_convolution_nogroup 0.000us 10.484ms -# aten::thnn_conv2d 0.000us 10.484ms -# aten::thnn_conv2d_forward 10.484ms 10.484ms -# void at::native::im2col_kernel(long, float co... 3.844ms 3.844ms -# sgemm_32x32x32_NN 3.206ms 3.206ms -# sgemm_32x32x32_NN_vec 3.093ms 3.093ms -# ------------------------------------------------------- ------------ ------------ -# Self CPU time total: 23.015ms -# Self CUDA time total: 11.666ms +# The resulting table output (omitting some columns): +# +# :: +# +# ------------------------------------------------------- ------------ ------------ +# Name Self CUDA CUDA total +# ------------------------------------------------------- ------------ ------------ +# model_inference 0.000us 11.666ms +# aten::conv2d 0.000us 10.484ms +# aten::convolution 0.000us 10.484ms +# aten::_convolution 0.000us 10.484ms +# aten::_convolution_nogroup 0.000us 10.484ms +# aten::thnn_conv2d 0.000us 10.484ms +# aten::thnn_conv2d_forward 10.484ms 10.484ms +# void at::native::im2col_kernel(long, float co... 3.844ms 3.844ms +# sgemm_32x32x32_NN 3.206ms 3.206ms +# sgemm_32x32x32_NN_vec 3.093ms 3.093ms +# ------------------------------------------------------- ------------ ------------ +# Self CPU time total: 23.015ms +# Self CUDA time total: 11.666ms ###################################################################### -# Note the occurence of on-device kernels in the output (e.g. ``sgemm_32x32x32_NN``). +# Note the occurrence of on-device kernels in the output (e.g. ``sgemm_32x32x32_NN``). ###################################################################### # 4. Using profiler to analyze memory consumption @@ -233,28 +238,32 @@ print(prof.key_averages().table(sort_by="cpu_memory_usage", row_limit=10)) -# (omitting some columns) -# --------------------------------- ------------ ------------ ------------ -# Name CPU Mem Self CPU Mem # of Calls -# --------------------------------- ------------ ------------ ------------ -# aten::empty 94.79 Mb 94.79 Mb 121 -# aten::batch_norm 47.41 Mb 0 b 20 -# aten::_batch_norm_impl_index 47.41 Mb 0 b 20 -# aten::native_batch_norm 47.41 Mb 0 b 20 -# aten::conv2d 47.37 Mb 0 b 20 -# aten::convolution 47.37 Mb 0 b 20 -# aten::_convolution 47.37 Mb 0 b 20 -# aten::mkldnn_convolution 47.37 Mb 0 b 20 -# aten::max_pool2d 11.48 Mb 0 b 1 -# aten::max_pool2d_with_indices 11.48 Mb 11.48 Mb 1 -# --------------------------------- ------------ ------------ ------------ -# Self CPU time total: 53.064ms +############################################################################# +# The output might look like this (omitting some columns): +# +# :: +# +# --------------------------------- ------------ ------------ ------------ +# Name CPU Mem Self CPU Mem # of Calls +# --------------------------------- ------------ ------------ ------------ +# aten::empty 94.79 Mb 94.79 Mb 121 +# aten::batch_norm 47.41 Mb 0 b 20 +# aten::_batch_norm_impl_index 47.41 Mb 0 b 20 +# aten::native_batch_norm 47.41 Mb 0 b 20 +# aten::conv2d 47.37 Mb 0 b 20 +# aten::convolution 47.37 Mb 0 b 20 +# aten::_convolution 47.37 Mb 0 b 20 +# aten::mkldnn_convolution 47.37 Mb 0 b 20 +# aten::max_pool2d 11.48 Mb 0 b 1 +# aten::max_pool2d_with_indices 11.48 Mb 11.48 Mb 1 +# --------------------------------- ------------ ------------ ------------ +# Self CPU time total: 53.064ms ###################################################################### # 5. Using tracing functionality # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # -# Profiling results can be outputted as a .json trace file: +# Profiling results can be outputted as a ``.json`` trace file: model = models.resnet18().cuda() inputs = torch.randn(5, 3, 224, 224).cuda() @@ -286,34 +295,36 @@ # Print aggregated stats print(prof.key_averages(group_by_stack_n=5).table(sort_by="self_cuda_time_total", row_limit=2)) -# (omitting some columns) -# ------------------------- ----------------------------------------------------------- -# Name Source Location -# ------------------------- ----------------------------------------------------------- -# aten::thnn_conv2d_forward .../torch/nn/modules/conv.py(439): _conv_forward -# .../torch/nn/modules/conv.py(443): forward -# .../torch/nn/modules/module.py(1051): _call_impl -# .../site-packages/torchvision/models/resnet.py(63): forward -# .../torch/nn/modules/module.py(1051): _call_impl -# -# aten::thnn_conv2d_forward .../torch/nn/modules/conv.py(439): _conv_forward -# .../torch/nn/modules/conv.py(443): forward -# .../torch/nn/modules/module.py(1051): _call_impl -# .../site-packages/torchvision/models/resnet.py(59): forward -# .../torch/nn/modules/module.py(1051): _call_impl -# -# ------------------------- ----------------------------------------------------------- -# Self CPU time total: 34.016ms -# Self CUDA time total: 11.659ms +################################################################################# +# The output might look like this (omitting some columns): +# +# :: +# +# ------------------------- ----------------------------------------------------------- +# Name Source Location +# ------------------------- ----------------------------------------------------------- +# aten::thnn_conv2d_forward .../torch/nn/modules/conv.py(439): _conv_forward +# .../torch/nn/modules/conv.py(443): forward +# .../torch/nn/modules/module.py(1051): _call_impl +# .../site-packages/torchvision/models/resnet.py(63): forward +# .../torch/nn/modules/module.py(1051): _call_impl +# aten::thnn_conv2d_forward .../torch/nn/modules/conv.py(439): _conv_forward +# .../torch/nn/modules/conv.py(443): forward +# .../torch/nn/modules/module.py(1051): _call_impl +# .../site-packages/torchvision/models/resnet.py(59): forward +# .../torch/nn/modules/module.py(1051): _call_impl +# ------------------------- ----------------------------------------------------------- +# Self CPU time total: 34.016ms +# Self CUDA time total: 11.659ms ###################################################################### -# Note the two convolutions and the two callsites in ``torchvision/models/resnet.py`` script. +# Note the two convolutions and the two call sites in ``torchvision/models/resnet.py`` script. # # (Warning: stack tracing adds an extra profiling overhead.) ###################################################################### -# 7. Visualizing data as a flamegraph +# 7. Visualizing data as a flame graph # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # Execution time (``self_cpu_time_total`` and ``self_cuda_time_total`` metrics) and stack traces @@ -322,12 +333,14 @@ prof.export_stacks("/tmp/profiler_stacks.txt", "self_cuda_time_total") ###################################################################### -# We recommend using e.g. `Flamegraph tool `_ to generate an -# interactive SVG: - -# git clone https://github.com/brendangregg/FlameGraph -# cd FlameGraph -# ./flamegraph.pl --title "CUDA time" --countname "us." /tmp/profiler_stacks.txt > perf_viz.svg +# We recommend using `Flamegraph tool `_ to generate an +# interactive ``.svg`` file: +# +# .. code-block:: sh +# +# git clone https://github.com/brendangregg/FlameGraph +# cd FlameGraph +# ./flamegraph.pl --title "CUDA time" --countname "us." /tmp/profiler_stacks.txt > perf_viz.svg ###################################################################### # diff --git a/recipes_source/recipes/save_load_across_devices.py b/recipes_source/recipes/save_load_across_devices.py index c2d86fbab50..cd311a62365 100644 --- a/recipes_source/recipes/save_load_across_devices.py +++ b/recipes_source/recipes/save_load_across_devices.py @@ -31,7 +31,7 @@ # ----- # # 1. Import all necessary libraries for loading our data -# 2. Define and intialize the neural network +# 2. Define and initialize the neural network # 3. Save on a GPU, load on a CPU # 4. Save on a GPU, load on a GPU # 5. Save on a CPU, load on a GPU @@ -50,7 +50,7 @@ ###################################################################### -# 2. Define and intialize the neural network +# 2. Define and initialize the neural network # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # For sake of example, we will create a neural network for training @@ -180,11 +180,3 @@ def forward(self, x): # Congratulations! You have successfully saved and loaded models across # devices in PyTorch. # -# Learn More -# ---------- -# -# Take a look at these other recipes to continue your learning: -# -# - TBD -# - TBD -# diff --git a/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py b/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py index cc872ae8042..31b14f3a28a 100644 --- a/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py +++ b/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py @@ -152,11 +152,4 @@ def forward(self, x): # # Congratulations! You have successfully saved and loaded a general # checkpoint for inference and/or resuming training in PyTorch. -# -# Learn More -# ---------- -# -# Take a look at these other recipes to continue your learning: -# -# - TBD -# - TBD +# diff --git a/recipes_source/recipes/saving_and_loading_models_for_inference.py b/recipes_source/recipes/saving_and_loading_models_for_inference.py index 22ae01aec76..cd24b77c1de 100644 --- a/recipes_source/recipes/saving_and_loading_models_for_inference.py +++ b/recipes_source/recipes/saving_and_loading_models_for_inference.py @@ -44,7 +44,7 @@ # ----- # # 1. Import all necessary libraries for loading our data -# 2. Define and intialize the neural network +# 2. Define and initialize the neural network # 3. Initialize the optimizer # 4. Save and load the model via ``state_dict`` # 5. Save and load the entire model @@ -62,7 +62,7 @@ ###################################################################### -# 2. Define and intialize the neural network +# 2. Define and initialize the neural network # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # For sake of example, we will create a neural network for training @@ -153,7 +153,7 @@ def forward(self, x): ###################################################################### -# Again here, remember that you must call model.eval() to set dropout and +# Again here, remember that you must call ``model.eval()`` to set dropout and # batch normalization layers to evaluation mode before running inference. # # Congratulations! You have successfully saved and load models for diff --git a/recipes_source/recipes/saving_multiple_models_in_one_file.py b/recipes_source/recipes/saving_multiple_models_in_one_file.py index 24f466f76fb..aeff7803969 100644 --- a/recipes_source/recipes/saving_multiple_models_in_one_file.py +++ b/recipes_source/recipes/saving_multiple_models_in_one_file.py @@ -36,7 +36,7 @@ # ----- # # 1. Import all necessary libraries for loading our data -# 2. Define and intialize the neural network +# 2. Define and initialize the neural network # 3. Initialize the optimizer # 4. Save multiple models # 5. Load multiple models @@ -151,12 +151,4 @@ def forward(self, x): # # Congratulations! You have successfully saved and loaded multiple models # in PyTorch. -# -# Learn More -# ---------- -# -# Take a look at these other recipes to continue your learning: -# -# - TBD -# - TBD -# +# diff --git a/recipes_source/recipes/tensorboard_with_pytorch.py b/recipes_source/recipes/tensorboard_with_pytorch.py index e3b055ac24c..3b9455b7f44 100644 --- a/recipes_source/recipes/tensorboard_with_pytorch.py +++ b/recipes_source/recipes/tensorboard_with_pytorch.py @@ -95,25 +95,25 @@ def train_model(iter): # # Install TensorBoard through the command line to visualize data you logged # -# :: -# -# $ pip install tensorboard -# +# .. code-block:: sh +# +# pip install tensorboard +# # # Now, start TensorBoard, specifying the root log directory you used above. # Argument ``logdir`` points to directory where TensorBoard will look to find # event files that it can display. TensorBoard will recursively walk -# the directory structure rooted at logdir, looking for .*tfevents.* files. +# the directory structure rooted at ``logdir``, looking for ``.*tfevents.*`` files. +# +# .. code-block:: sh +# +# tensorboard --logdir=runs # -# :: -# -# $ tensorboard --logdir=runs -# # Go to the URL it provides OR to `http://localhost:6006/ `_ -# +# # .. image:: ../../_static/img/thumbnails/tensorboard_scalars.png # :scale: 40 % -# +# # This dashboard shows how the loss and accuracy change with every epoch. # You can use it to also track training speed, learning rate, and other # scalar values. It’s helpful to compare these metrics across different @@ -123,33 +123,33 @@ def train_model(iter): ###################################################################### # Share TensorBoard dashboards -# ----- -# +# ---------------------------- +# # `TensorBoard.dev `_ lets you upload and share -# your ML experiment results with anyone. Use TensorBoard.dev to host, +# your ML experiment results with anyone. Use ``TensorBoard.dev`` to host, # track, and share your TensorBoard dashboards. -# -# Install the latest version of TensorBoard to use the uploader. # -# :: -# -# $ pip install tensorboard --upgrade +# Install the latest version of TensorBoard to use the ``uploader``. +# +# .. code-block:: sh +# +# pip install tensorboard --upgrade # # Use a simple command to upload and share your TensorBoard. # -# :: -# -# $ tensorboard dev upload --logdir runs \ -# --name "My latest experiment" \ # optional -# --description "Simple comparison of several hyperparameters" # optional -# +# .. code-block:: sh +# +# tensorboard dev upload --logdir runs \ +# --name "My latest experiment" \ # optional +# --description "Simple comparison of several hyperparameters" # optional +# # For help, run ``$ tensorboard dev --help``. # -# **Note:** Uploaded TensorBoards are public and visible to everyone. +# **Note:** Uploaded TensorBoards are public and visible to everyone. # Do not upload sensitive data. # -# View your TensorBoard live at URL provided in your terminal. -# E.g. `https://tensorboard.dev/experiment/AdYd1TgeTlaLWXx6I8JUbA `_ +# View your TensorBoard live at URL provided in your terminal. +# For example: `https://tensorboard.dev/experiment/AdYd1TgeTlaLWXx6I8JUbA `_ # # # .. image:: ../../_static/img/thumbnails/tensorboard_dev.png @@ -157,7 +157,7 @@ def train_model(iter): # # # .. note:: -# TensorBoard.dev currently supports scalars, graphs, histograms, distributions, hparams, and text dashboards. +# ``TensorBoard.dev`` currently supports scalars, graphs, histograms, distributions, ``hparams``, and text dashboards. ######################################################################## # Learn More diff --git a/recipes_source/recipes/timer_quick_start.py b/recipes_source/recipes/timer_quick_start.py index 15887263938..b93e13dcbd2 100644 --- a/recipes_source/recipes/timer_quick_start.py +++ b/recipes_source/recipes/timer_quick_start.py @@ -9,16 +9,15 @@ builtin `Timer` class is not required for this tutorial, however we assume that the reader is familiar with the fundamentals of performance work. -A more comprehensive performace tuning tutorial is available at: - - https://pytorch.org/tutorials/recipes/recipes/benchmark.html +For a more comprehensive performance tuning tutorial, see +`PyTorch Benchmark `__. **Contents:** 1. `Defining a Timer <#defining-a-timer>`__ - 2. `Wall time: \`Timer.blocked_autorange(...)\` <#wall-time-timer-blocked-autorange>`__ + 2. `Wall time: Timer.blocked_autorange(...) <#wall-time-timer-blocked-autorange>`__ 3. `C++ snippets <#c-snippets>`__ - 4. `Instruction counts: \`Timer.collect_callgrind(...)\` <#instruction-counts-timer-collect-callgrind>`__ + 4. `Instruction counts: Timer.collect_callgrind(...) <#instruction-counts-timer-collect-callgrind>`__ 5. `Instruction counts: Delving deeper <#instruction-counts-delving-deeper>`__ 6. `A/B testing with Callgrind <#a-b-testing-with-callgrind>`__ 7. `Wrapping up <#wrapping-up>`__ @@ -46,20 +45,22 @@ y = torch.ones((128,)) """, - # Alternately, `globals` can be used to pass variables from the outer scope. + # Alternatively, ``globals`` can be used to pass variables from the outer scope. # ------------------------------------------------------------------------- - # globals={ - # "x": torch.ones((128,)), - # "y": torch.ones((128,)), - # }, + # :: + # + # globals={ + # "x": torch.ones((128,)), + # "y": torch.ones((128,)), + # }, # Control the number of threads that PyTorch uses. (Default: 1) num_threads=1, ) ############################################################################### -# 2. Wall time: `Timer.blocked_autorange(...)` -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# 2. Wall time: ``Timer.blocked_autorange(...)`` +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # This method will handle details such as picking a suitable number if repeats, # fixing the number of threads, and providing a convenient representation of @@ -126,11 +127,11 @@ # ############################################################################### -# 4. Instruction counts: `Timer.collect_callgrind(...)` -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# 4. Instruction counts: ``Timer.collect_callgrind(...)`` +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # -# For deep dive investigations, `Timer.collect_callgrind` wraps -# `Callgrind ` in order to +# For deep dive investigations, ``Timer.collect_callgrind`` wraps +# `Callgrind `__ in order to # collect instruction counts. These are useful as they offer fine grained and # deterministic (or very low noise in the case of Python) insights into how a # snippet is run. @@ -161,13 +162,13 @@ # 5. Instruction counts: Delving deeper # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # -# The string representation of CallgrindStats is similar to that of +# The string representation of ``CallgrindStats`` is similar to that of # Measurement. `Noisy symbols` are a Python concept (removing calls in the # CPython interpreter which are known to be noisy). # # For more detailed analysis, however, we will want to look at specific calls. -# `CallgrindStats.stats()` returns a FunctionCounts object to make this easier. -# Conceptually, FunctionCounts can be thought of as a tuple of pairs with some +# ``CallgrindStats.stats()`` returns a ``FunctionCounts`` object to make this easier. +# Conceptually, ``FunctionCounts`` can be thought of as a tuple of pairs with some # utility methods, where each pair is `(number of instructions, file path and # function name)`. # @@ -175,14 +176,18 @@ # One generally doesn't care about absolute path. For instance, the full path # and function name for a multiply call is something like: # -# /the/prefix/to/your/pytorch/install/dir/pytorch/build/aten/src/ATen/core/TensorMethods.cpp:at::Tensor::mul(at::Tensor const&) const [/the/path/to/your/conda/install/miniconda3/envs/ab_ref/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so] +# :: +# +# /the/prefix/to/your/pytorch/install/dir/pytorch/build/aten/src/ATen/core/TensorMethods.cpp:at::Tensor::mul(at::Tensor const&) const [/the/path/to/your/conda/install/miniconda3/envs/ab_ref/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so] # # when in reality, all of the information that we're interested in can be # represented in: # -# build/aten/src/ATen/core/TensorMethods.cpp:at::Tensor::mul(at::Tensor const&) const +# :: +# +# build/aten/src/ATen/core/TensorMethods.cpp:at::Tensor::mul(at::Tensor const&) const # -# CallgrindStats.as_standardized() makes a best effort to strip low signal +# ``CallgrindStats.as_standardized()`` makes a best effort to strip low signal # portions of the file path, as well as the shared object and is generally # recommended. # @@ -251,8 +256,8 @@ def group_by_file(fn_name: str): # ############################################################################### -# 6. A/B testing with Callgrind -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# 6. A/B testing with ``Callgrind`` +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # One of the most useful features of instruction counts is they allow fine # grained comparison of computation, which is critical when analyzing @@ -275,8 +280,8 @@ def group_by_file(fn_name: str): ############################################################################### # Often we want to A/B test two different environments. (e.g. testing a PR, or -# experimenting with compile flags.) This is quite simple, as CallgrindStats, -# FunctionCounts, and Measurement are all pickleable. Simply save measurements +# experimenting with compile flags.) This is quite simple, as ``CallgrindStats``, +# ``FunctionCounts``, and Measurement are all pickleable. Simply save measurements # from each environment, and load them in a single process for analysis. # @@ -329,7 +334,7 @@ def extract_fn_name(fn: str): ############################################################################### # So the broadcasting version takes an extra 580 instructions per call (recall # that we're collecting 100 runs per sample), or about 10%. There are quite a -# few TensorIterator calls, so lets drill down to those. FunctionCounts.filter +# few ``TensorIterator`` calls, so lets drill down to those. ``FunctionCounts.filter`` # makes this easy. # @@ -356,7 +361,7 @@ def extract_fn_name(fn: str): # ############################################################################### -# This makes plain what is going on: there is a fast path in TensorIterator +# This makes plain what is going on: there is a fast path in ``TensorIterator`` # setup, but in the {128} x {1} case we miss it and have to do a more general # analysis which is more expensive. The most prominent call omitted by the # filter is `c10::SmallVectorImpl::operator=(...)`, which is also part @@ -380,15 +385,15 @@ def extract_fn_name(fn: str): # 8. Footnotes # ~~~~~~~~~~~~ # -# - Implied `import torch` +# - Implied ``import torch`` # If `globals` does not contain "torch", Timer will automatically -# populate it. This means that `Timer("torch.empty(())")` will work. +# populate it. This means that ``Timer("torch.empty(())")`` will work. # (Though other imports should be placed in `setup`, -# e.g. `Timer("np.zeros(())", "import numpy as np")`) +# e.g. ``Timer("np.zeros(())", "import numpy as np")``) # -# - REL_WITH_DEB_INFO +# - ``REL_WITH_DEB_INFO`` # In order to provide full information about the PyTorch internals which -# are executed, Callgrind needs access to C++ debug symbols. This is -# accomplished by setting REL_WITH_DEB_INFO=1 when building PyTorch. -# Otherwise function calls will be opaque. (The resultant CallgrindStats +# are executed, ``Callgrind`` needs access to C++ debug symbols. This is +# accomplished by setting ``REL_WITH_DEB_INFO=1`` when building PyTorch. +# Otherwise function calls will be opaque. (The resultant ``CallgrindStats`` # will warn if debug symbols are missing.) diff --git a/recipes_source/recipes/tuning_guide.py b/recipes_source/recipes/tuning_guide.py index 66deb01416b..7c8aa135b11 100644 --- a/recipes_source/recipes/tuning_guide.py +++ b/recipes_source/recipes/tuning_guide.py @@ -13,8 +13,8 @@ """ ############################################################################### -# Enable async data loading and augmentation -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Enable asynchronous data loading and augmentation +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # `torch.utils.data.DataLoader `_ # supports asynchronous data loading and data augmentation in separate worker # subprocesses. The default setting for ``DataLoader`` is ``num_workers=0``, @@ -172,7 +172,7 @@ def fused_gelu(x): # * profiler related: # `torch.autograd.profiler.emit_nvtx `_, # `torch.autograd.profiler.profile `_ -# * autograd gradcheck: +# * autograd ``gradcheck``: # `torch.autograd.gradcheck `_ # or # `torch.autograd.gradgradcheck `_ @@ -188,8 +188,10 @@ def fused_gelu(x): # NUMA or non-uniform memory access is a memory layout design used in data center machines meant to take advantage of locality of memory in multi-socket machines with multiple memory controllers and blocks. Generally speaking, all deep learning workloads, training or inference, get better performance without accessing hardware resources across NUMA nodes. Thus, inference can be run with multiple instances, each instance runs on one socket, to raise throughput. For training tasks on single node, distributed training is recommended to make each training process run on one socket. # # In general cases the following command executes a PyTorch script on cores on the Nth node only, and avoids cross-socket memory access to reduce memory access overhead. - -# numactl --cpunodebind=N --membind=N python +# +# .. code-block:: sh +# +# numactl --cpunodebind=N --membind=N python ############################################################################### # More detailed descriptions can be found `here `_. @@ -198,49 +200,59 @@ def fused_gelu(x): # Utilize OpenMP # ~~~~~~~~~~~~~~ # OpenMP is utilized to bring better performance for parallel computation tasks. -# OMP_NUM_THREADS is the easiest switch that can be used to accelerate computations. It determines number of threads used for OpenMP computations. -# CPU affinity setting controls how workloads are distributed over multiple cores. It affects communication overhead, cache line invalidation overhead, or page thrashing, thus proper setting of CPU affinity brings performance benefits. GOMP_CPU_AFFINITY or KMP_AFFINITY determines how to bind OpenMP* threads to physical processing units. Detailed information can be found `here `_. +# ``OMP_NUM_THREADS`` is the easiest switch that can be used to accelerate computations. It determines number of threads used for OpenMP computations. +# CPU affinity setting controls how workloads are distributed over multiple cores. It affects communication overhead, cache line invalidation overhead, or page thrashing, thus proper setting of CPU affinity brings performance benefits. ``GOMP_CPU_AFFINITY`` or ``KMP_AFFINITY`` determines how to bind OpenMP* threads to physical processing units. Detailed information can be found `here `_. ############################################################################### # With the following command, PyTorch run the task on N OpenMP threads. - -# export OMP_NUM_THREADS=N +# +# .. code-block:: sh +# +# export OMP_NUM_THREADS=N ############################################################################### -# Typically, the following environment variables are used to set for CPU affinity with GNU OpenMP implementation. OMP_PROC_BIND specifies whether threads may be moved between processors. Setting it to CLOSE keeps OpenMP threads close to the primary thread in contiguous place partitions. OMP_SCHEDULE determines how OpenMP threads are scheduled. GOMP_CPU_AFFINITY binds threads to specific CPUs. - -# export OMP_SCHEDULE=STATIC -# export OMP_PROC_BIND=CLOSE -# export GOMP_CPU_AFFINITY="N-M" +# Typically, the following environment variables are used to set for CPU affinity with GNU OpenMP implementation. ``OMP_PROC_BIND`` specifies whether threads may be moved between processors. Setting it to CLOSE keeps OpenMP threads close to the primary thread in contiguous place partitions. ``OMP_SCHEDULE`` determines how OpenMP threads are scheduled. ``GOMP_CPU_AFFINITY`` binds threads to specific CPUs. +# +# .. code-block:: sh +# +# export OMP_SCHEDULE=STATIC +# export OMP_PROC_BIND=CLOSE +# export GOMP_CPU_AFFINITY="N-M" ############################################################################### -# Intel OpenMP Runtime Library (libiomp) -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# By default, PyTorch uses GNU OpenMP (GNU libgomp) for parallel computation. On Intel platforms, Intel OpenMP Runtime Library (libiomp) provides OpenMP API specification support. It sometimes brings more performance benefits compared to libgomp. Utilizing environment variable LD_PRELOAD can switch OpenMP library to libiomp: - -# export LD_PRELOAD=/libiomp5.so:$LD_PRELOAD +# Intel OpenMP Runtime Library (``libiomp``) +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# By default, PyTorch uses GNU OpenMP (GNU ``libgomp``) for parallel computation. On Intel platforms, Intel OpenMP Runtime Library (``libiomp``) provides OpenMP API specification support. It sometimes brings more performance benefits compared to ``libgomp``. Utilizing environment variable ``LD_PRELOAD`` can switch OpenMP library to ``libiomp``: +# +# .. code-block:: sh +# +# export LD_PRELOAD=/libiomp5.so:$LD_PRELOAD ############################################################################### -# Similar to CPU affinity settings in GNU OpenMP, environment variables are provided in libiomp to control CPU affinity settings. -# KMP_AFFINITY binds OpenMP threads to physical processing units. KMP_BLOCKTIME sets the time, in milliseconds, that a thread should wait, after completing the execution of a parallel region, before sleeping. In most cases, setting KMP_BLOCKTIME to 1 or 0 yields good performances. +# Similar to CPU affinity settings in GNU OpenMP, environment variables are provided in ``libiomp`` to control CPU affinity settings. +# ``KMP_AFFINITY`` binds OpenMP threads to physical processing units. ``KMP_BLOCKTIME`` sets the time, in milliseconds, that a thread should wait, after completing the execution of a parallel region, before sleeping. In most cases, setting ``KMP_BLOCKTIME`` to 1 or 0 yields good performances. # The following commands show a common settings with Intel OpenMP Runtime Library. - -# export KMP_AFFINITY=granularity=fine,compact,1,0 -# export KMP_BLOCKTIME=1 +# +# .. code-block:: sh +# +# export KMP_AFFINITY=granularity=fine,compact,1,0 +# export KMP_BLOCKTIME=1 ############################################################################### # Switch Memory allocator # ~~~~~~~~~~~~~~~~~~~~~~~ -# For deep learning workloads, Jemalloc or TCMalloc can get better performance by reusing memory as much as possible than default malloc funtion. `Jemalloc `_ is a general purpose malloc implementation that emphasizes fragmentation avoidance and scalable concurrency support. `TCMalloc `_ also features a couple of optimizations to speed up program executions. One of them is holding memory in caches to speed up access of commonly-used objects. Holding such caches even after deallocation also helps avoid costly system calls if such memory is later re-allocated. -# Use environment variable LD_PRELOAD to take advantage of one of them. - -# export LD_PRELOAD=:$LD_PRELOAD +# For deep learning workloads, ``Jemalloc`` or ``TCMalloc`` can get better performance by reusing memory as much as possible than default ``malloc`` function. `Jemalloc `_ is a general purpose ``malloc`` implementation that emphasizes fragmentation avoidance and scalable concurrency support. `TCMalloc `_ also features a couple of optimizations to speed up program executions. One of them is holding memory in caches to speed up access of commonly-used objects. Holding such caches even after deallocation also helps avoid costly system calls if such memory is later re-allocated. +# Use environment variable ``LD_PRELOAD`` to take advantage of one of them. +# +# .. code-block:: sh +# +# export LD_PRELOAD=:$LD_PRELOAD ############################################################################### # Use oneDNN Graph with TorchScript for inference # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # oneDNN Graph can significantly boost inference performance. It fuses some compute-intensive operations such as convolution, matmul with their neighbor operations. -# In PyTorch 2.0, it is supported as a beta feature for Float32 & BFloat16 data-types. +# In PyTorch 2.0, it is supported as a beta feature for ``Float32`` & ``BFloat16`` data-types. # oneDNN Graph receives the model’s graph and identifies candidates for operator-fusion with respect to the shape of the example input. # A model should be JIT-traced using an example input. # Speed-up would then be observed after a couple of warm-up iterations for inputs with the same shape as the example input. @@ -255,7 +267,7 @@ def fused_gelu(x): # sample input should be of the same shape as expected inputs sample_input = [torch.rand(32, 3, 224, 224)] -# Using resnet50 from TorchVision in this example for illustrative purposes, +# Using resnet50 from torchvision in this example for illustrative purposes, # but the line below can indeed be modified to use custom models as well. model = getattr(torchvision.models, "resnet50")().eval() # Tracing the model with example input @@ -267,16 +279,17 @@ def fused_gelu(x): # Once a model is JIT-traced with a sample input, it can then be used for inference after a couple of warm-up runs. with torch.no_grad(): - # a couple of warmup runs + # a couple of warm-up runs traced_model(*sample_input) traced_model(*sample_input) - # speedup would be observed after warmup runs + # speedup would be observed after warm-up runs traced_model(*sample_input) ############################################################################### -# While the JIT fuser for oneDNN Graph also supports inference with BFloat16 datatype, -# performance benefit with oneDNN Graph is only exhibited by machines with AVX512_BF16 ISA. -# The following code snippets serves as an example of using BFloat16 datatype for inference with oneDNN Graph: +# While the JIT fuser for oneDNN Graph also supports inference with ``BFloat16`` datatype, +# performance benefit with oneDNN Graph is only exhibited by machines with AVX512_BF16 +# instruction set architecture (ISA). +# The following code snippets serves as an example of using ``BFloat16`` datatype for inference with oneDNN Graph: # AMP for JIT mode is enabled by default, and is divergent with its eager mode counterpart torch._C._jit_set_autocast_mode(False) @@ -284,7 +297,7 @@ def fused_gelu(x): with torch.no_grad(), torch.cpu.amp.autocast(cache_enabled=False, dtype=torch.bfloat16): model = torch.jit.trace(model, (example_input)) model = torch.jit.freeze(model) - # a couple of warmup runs + # a couple of warm-up runs model(example_input) model(example_input) # speedup would be observed in subsequent runs. @@ -292,9 +305,9 @@ def fused_gelu(x): ############################################################################### -# Train a model on CPU with PyTorch DistributedDataParallel(DDP) functionality -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# For small scale models or memory-bound models, such as DLRM, training on CPU is also a good choice. On a machine with multiple sockets, distributed training brings a high-efficient hardware resource usage to accelerate the training process. `Torch-ccl `_, optimized with Intel(R) oneCCL (collective commnications library) for efficient distributed deep learning training implementing such collectives like allreduce, allgather, alltoall, implements PyTorch C10D ProcessGroup API and can be dynamically loaded as external ProcessGroup. Upon optimizations implemented in PyTorch DDP moduel, torhc-ccl accelerates communication operations. Beside the optimizations made to communication kernels, torch-ccl also features simultaneous computation-communication functionality. +# Train a model on CPU with PyTorch ``DistributedDataParallel``(DDP) functionality +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# For small scale models or memory-bound models, such as DLRM, training on CPU is also a good choice. On a machine with multiple sockets, distributed training brings a high-efficient hardware resource usage to accelerate the training process. `Torch-ccl `_, optimized with Intel(R) ``oneCCL`` (collective communications library) for efficient distributed deep learning training implementing such collectives like ``allreduce``, ``allgather``, ``alltoall``, implements PyTorch C10D ``ProcessGroup`` API and can be dynamically loaded as external ``ProcessGroup``. Upon optimizations implemented in PyTorch DDP module, ``torch-ccl`` accelerates communication operations. Beside the optimizations made to communication kernels, ``torch-ccl`` also features simultaneous computation-communication functionality. ############################################################################### # GPU specific optimizations @@ -335,7 +348,7 @@ def fused_gelu(x): # * memory copies: ``tensor.cuda()``, ``cuda_tensor.cpu()`` and equivalent # ``tensor.to(device)`` calls # * ``cuda_tensor.nonzero()`` -# * python control flow which depends on results of operations performed on cuda +# * python control flow which depends on results of operations performed on CUDA # tensors e.g. ``if (cuda_tensor != 0).all()`` # @@ -386,7 +399,7 @@ def fused_gelu(x): # ############################################################################### -# Pre-allocate memory in case of variable input length +# Preallocate memory in case of variable input length # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Models for speech recognition or for NLP are often trained on input tensors # with variable sequence length. Variable length can be problematic for PyTorch @@ -397,14 +410,14 @@ def fused_gelu(x): # buffers. This process is time consuming and causes fragmentation in the # caching allocator which may result in out-of-memory errors. # -# A typical solution is to implement pre-allocation. It consists of the +# A typical solution is to implement preallocation. It consists of the # following steps: # # #. generate a (usually random) batch of inputs with maximum sequence length # (either corresponding to max length in the training dataset or to some # predefined threshold) # #. execute a forward and a backward pass with the generated batch, do not -# execute an optimizer or a learning rate scheduler, this step pre-allocates +# execute an optimizer or a learning rate scheduler, this step preallocates # buffers of maximum size, which can be reused in subsequent # training iterations # #. zero out gradients @@ -429,8 +442,8 @@ def fused_gelu(x): # from PyTorch documentation. ############################################################################### -# Skip unnecessary all-reduce if training with DistributedDataParallel and gradient accumulation -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Skip unnecessary all-reduce if training with ``DistributedDataParallel`` and gradient accumulation +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # By default # `torch.nn.parallel.DistributedDataParallel `_ # executes gradient all-reduce after every backward pass to compute the average @@ -447,7 +460,7 @@ def fused_gelu(x): # perform the required gradient all-reduce. ############################################################################### -# Match the order of layers in constructors and during the execution if using DistributedDataParallel(find_unused_parameters=True) +# Match the order of layers in constructors and during the execution if using ``DistributedDataParallel``(find_unused_parameters=True) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # `torch.nn.parallel.DistributedDataParallel `_ # with ``find_unused_parameters=True`` uses the order of layers and parameters diff --git a/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py b/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py index 410bc992c6e..c04ae7d4be4 100644 --- a/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py +++ b/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py @@ -34,7 +34,7 @@ # ----- # # 1. Import all necessary libraries for loading our data -# 2. Define and intialize the neural network A and B +# 2. Define and initialize the neural network A and B # 3. Save model A # 4. Load into model B # @@ -51,7 +51,7 @@ ###################################################################### -# 2. Define and intialize the neural network A and B +# 2. Define and initialize the neural network A and B # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # For sake of example, we will create a neural network for training diff --git a/recipes_source/recipes/what_is_state_dict.py b/recipes_source/recipes/what_is_state_dict.py index 5e7f259fd7b..838d0c0d4ff 100644 --- a/recipes_source/recipes/what_is_state_dict.py +++ b/recipes_source/recipes/what_is_state_dict.py @@ -39,7 +39,7 @@ # ----- # # 1. Import all necessary libraries for loading our data -# 2. Define and intialize the neural network +# 2. Define and initialize the neural network # 3. Initialize the optimizer # 4. Access the model and optimizer ``state_dict`` # @@ -56,7 +56,7 @@ ###################################################################### -# 2. Define and intialize the neural network +# 2. Define and initialize the neural network # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # For sake of example, we will create a neural network for training diff --git a/recipes_source/recipes/zeroing_out_gradients.py b/recipes_source/recipes/zeroing_out_gradients.py index f080bf30bd0..b3c25654d93 100644 --- a/recipes_source/recipes/zeroing_out_gradients.py +++ b/recipes_source/recipes/zeroing_out_gradients.py @@ -28,7 +28,7 @@ Setup ----- -Since we will be training data in this recipe, if you are in a runable +Since we will be training data in this recipe, if you are in a runnable notebook, it is best to switch the runtime to GPU or TPU. Before we begin, we need to install ``torch`` and ``torchvision`` if they aren’t already available. @@ -180,7 +180,7 @@ def forward(self, x): ###################################################################### # You can also use ``model.zero_grad()``. This is the same as using # ``optimizer.zero_grad()`` as long as all your model parameters are in -# that optimizer. Use your best judgement to decide which one to use. +# that optimizer. Use your best judgment to decide which one to use. # # Congratulations! You have successfully zeroed out gradients PyTorch. # From 5de40c6e29b5179657042923bcfec8ec17fdfa51 Mon Sep 17 00:00:00 2001 From: Junghwan Park <9343724+9bow@users.noreply.github.com> Date: Thu, 11 May 2023 00:08:52 +0900 Subject: [PATCH 026/609] FIX: replace word from option to operator (#2310) (#2314) --- beginner_source/basics/tensorqs_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beginner_source/basics/tensorqs_tutorial.py b/beginner_source/basics/tensorqs_tutorial.py index 3e870f3f0c2..1a086fc5ad8 100644 --- a/beginner_source/basics/tensorqs_tutorial.py +++ b/beginner_source/basics/tensorqs_tutorial.py @@ -133,7 +133,7 @@ ###################################################################### # **Joining tensors** You can use ``torch.cat`` to concatenate a sequence of tensors along a given dimension. # See also `torch.stack `__, -# another tensor joining option that is subtly different from ``torch.cat``. +# another tensor joining operator that is subtly different from ``torch.cat``. t1 = torch.cat([tensor, tensor, tensor], dim=1) print(t1) From 5f0556a36d4cbd3b8af1451fe5e9bd8e497db90e Mon Sep 17 00:00:00 2001 From: Suraj Subramanian <5676233+subramen@users.noreply.github.com> Date: Thu, 11 May 2023 11:36:14 -0400 Subject: [PATCH 027/609] Add dist_tuto to resources (#2317) Co-authored-by: Svetlana Karslioglu --- beginner_source/ddp_series_theory.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/beginner_source/ddp_series_theory.rst b/beginner_source/ddp_series_theory.rst index 7c22352f70e..7a0226fc46a 100644 --- a/beginner_source/ddp_series_theory.rst +++ b/beginner_source/ddp_series_theory.rst @@ -37,6 +37,8 @@ ensures each device gets a non-overlapping input batch. The model is replicated each replica calculates gradients and simultaneously synchronizes with the others using the `ring all-reduce algorithm `__. +This `illustrative tutorial `__ provides a more in-depth python view of the mechanics of DDP. + Why you should prefer DDP over DataParallel (DP) ------------------------------------------------- @@ -66,3 +68,4 @@ Further Reading API `__ - `DDP Internal Design `__ +- `DDP Mechanics Tutorial `__ From be6e86342233bdc932deace52d7388073fbb66b4 Mon Sep 17 00:00:00 2001 From: Suraj Subramanian <5676233+subramen@users.noreply.github.com> Date: Tue, 16 May 2023 18:40:44 -0400 Subject: [PATCH 028/609] Change order of optimizer.step() to reduce memory footprint --- beginner_source/basics/optimization_tutorial.py | 2 +- beginner_source/basics/quickstart_tutorial.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/beginner_source/basics/optimization_tutorial.py b/beginner_source/basics/optimization_tutorial.py index e10496fabb6..0fb508d1ccc 100644 --- a/beginner_source/basics/optimization_tutorial.py +++ b/beginner_source/basics/optimization_tutorial.py @@ -155,9 +155,9 @@ def train_loop(dataloader, model, loss_fn, optimizer): loss = loss_fn(pred, y) # Backpropagation - optimizer.zero_grad() loss.backward() optimizer.step() + optimizer.zero_grad() if batch % 100 == 0: loss, current = loss.item(), (batch + 1) * len(X) diff --git a/beginner_source/basics/quickstart_tutorial.py b/beginner_source/basics/quickstart_tutorial.py index 2bb4622d4e4..07a1be517d1 100644 --- a/beginner_source/basics/quickstart_tutorial.py +++ b/beginner_source/basics/quickstart_tutorial.py @@ -152,9 +152,9 @@ def train(dataloader, model, loss_fn, optimizer): loss = loss_fn(pred, y) # Backpropagation - optimizer.zero_grad() loss.backward() optimizer.step() + optimizer.zero_grad() if batch % 100 == 0: loss, current = loss.item(), (batch + 1) * len(X) From f87faf7d1316ce9ddac67f73c34892efdcdc834b Mon Sep 17 00:00:00 2001 From: Evan Shrestha Date: Wed, 17 May 2023 10:06:24 -0500 Subject: [PATCH 029/609] Fix typo in `torchscript_freezing.py` comment: `SciptModule` -> `ScriptModule` (#2323) --- prototype_source/torchscript_freezing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prototype_source/torchscript_freezing.py b/prototype_source/torchscript_freezing.py index 0b6115c3dc7..ca21451d6e8 100644 --- a/prototype_source/torchscript_freezing.py +++ b/prototype_source/torchscript_freezing.py @@ -12,7 +12,7 @@ ------------ Model freezing can be invoked using API below: - ``torch.jit.freeze(mod : ScriptModule, names : str[]) -> SciptModule`` + ``torch.jit.freeze(mod : ScriptModule, names : str[]) -> ScriptModule`` Note the input module can either be the result of scripting or tracing. See https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html From 9b4809789024587e14dde7da501f0a0808c2adbe Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 17 May 2023 13:09:10 -0700 Subject: [PATCH 030/609] Add the assign workflow (#2320) Adding a workflow that allows users without repo access assign issues to themselves by commenting on an issue with the docathon label: "/assigntome" --- .github/workflows/docathon-assign.yml | 65 +++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 .github/workflows/docathon-assign.yml diff --git a/.github/workflows/docathon-assign.yml b/.github/workflows/docathon-assign.yml new file mode 100644 index 00000000000..2aa0ca5dbe2 --- /dev/null +++ b/.github/workflows/docathon-assign.yml @@ -0,0 +1,65 @@ +name: Assign User on Comment + +on: + workflow_dispatch: + issue_comment: + types: [created] + +jobs: + assign: + runs-on: ubuntu-latest + steps: + - name: Install Dependencies + uses: actions/setup-node@v3 + with: + node-version: '18' + - name: Install @octokit/core + run: | + npm i @octokit/core @octokit/rest + - name: Check for "/assigntome" in comment + uses: actions/github-script@v4 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + script: | + const issueComment = context.payload.comment.body; + const assignRegex = /\/assigntome/i; + if (assignRegex.test(issueComment)) { + const assignee = context.payload.comment.user.login; + const issueNumber = context.payload.issue.number; + const { Octokit } = require("@octokit/rest"); + const octokit = new Octokit({ + auth: process.env.GITHUB_TOKEN, + }); + const { data: issue } = await octokit.issues.get({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issueNumber + }); + const hasLabel = issue.labels.some(label => label.name === 'docathon-h1-2023'); + if (hasLabel) { + if (issue.assignee !== null) { + await octokit.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issueNumber, + body: "The issue is already assigned. Please pick an open and unnasigned issue with the [docathon-h1-2023 label](https://github.com/pytorch/tutorials/issues?q=is%3Aopen+is%3Aissue+label%3Adocathon-h1-2023)." + }); + } else { + octokit.issues.addAssignees({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issueNumber, + assignees: [assignee] + }); + } + } else { + const commmentMessage = "This issue does not have the correct label. Please pick an open and unnasigned issue with the [docathon-h1-2023 label](https://github.com/pytorch/tutorials/issues?q=is%3Aopen+is%3Aissue+label%3Adocathon-h1-2023)." + await octokit.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issueNumber, + body: commmentMessage + }); + } + } From c877c592826eeba10548bbe324be325a7d91f8ae Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 17 May 2023 14:54:37 -0700 Subject: [PATCH 031/609] Update docathon-assign.yml --- .github/workflows/docathon-assign.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docathon-assign.yml b/.github/workflows/docathon-assign.yml index 2aa0ca5dbe2..1810872303b 100644 --- a/.github/workflows/docathon-assign.yml +++ b/.github/workflows/docathon-assign.yml @@ -43,7 +43,7 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, issue_number: issueNumber, - body: "The issue is already assigned. Please pick an open and unnasigned issue with the [docathon-h1-2023 label](https://github.com/pytorch/tutorials/issues?q=is%3Aopen+is%3Aissue+label%3Adocathon-h1-2023)." + body: "The issue is already assigned. Please pick an opened and unnasigned issue with the [docathon-h1-2023 label](https://github.com/pytorch/tutorials/issues?q=is%3Aopen+is%3Aissue+label%3Adocathon-h1-2023)." }); } else { octokit.issues.addAssignees({ @@ -54,7 +54,7 @@ jobs: }); } } else { - const commmentMessage = "This issue does not have the correct label. Please pick an open and unnasigned issue with the [docathon-h1-2023 label](https://github.com/pytorch/tutorials/issues?q=is%3Aopen+is%3Aissue+label%3Adocathon-h1-2023)." + const commmentMessage = "This issue does not have the correct label. Please pick an opened and unnasigned issue with the [docathon-h1-2023 label](https://github.com/pytorch/tutorials/issues?q=is%3Aopen+is%3Aissue+label%3Adocathon-h1-2023)." await octokit.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, From dfbba6e0c48ad534ac33171cd0e0a4ea091baf2b Mon Sep 17 00:00:00 2001 From: Kai Fricke Date: Thu, 18 May 2023 16:17:29 +0100 Subject: [PATCH 032/609] Update hyperparameter optimization with Ray Tune tutorial (#2318) * Update code * Reformat * Output * remove total time s --------- Co-authored-by: Svetlana Karslioglu --- .../hyperparameter_tuning_tutorial.py | 208 ++++++++++-------- requirements.txt | 2 +- 2 files changed, 117 insertions(+), 93 deletions(-) diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py index 35ab5c9538e..202a2f1a30b 100644 --- a/beginner_source/hyperparameter_tuning_tutorial.py +++ b/beginner_source/hyperparameter_tuning_tutorial.py @@ -40,7 +40,6 @@ Let's start with the imports: """ from functools import partial -import numpy as np import os import torch import torch.nn as nn @@ -50,7 +49,7 @@ import torchvision import torchvision.transforms as transforms from ray import tune -from ray.tune import CLIReporter +from ray.air import Checkpoint, session from ray.tune.schedulers import ASHAScheduler ###################################################################### @@ -64,23 +63,26 @@ def load_data(data_dir="./data"): - transform = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) - ]) + transform = transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] + ) trainset = torchvision.datasets.CIFAR10( - root=data_dir, train=True, download=True, transform=transform) + root=data_dir, train=True, download=True, transform=transform + ) testset = torchvision.datasets.CIFAR10( - root=data_dir, train=False, download=True, transform=transform) + root=data_dir, train=False, download=True, transform=transform + ) return trainset, testset + ###################################################################### # Configurable neural network # --------------------------- -# We can only tune those parameters that are configurable. In this example, we can specify +# We can only tune those parameters that are configurable. +# In this example, we can specify # the layer sizes of the fully connected layers: @@ -97,32 +99,40 @@ def __init__(self, l1=120, l2=84): def forward(self, x): x = self.pool(F.relu(self.conv1(x))) x = self.pool(F.relu(self.conv2(x))) - x = x.view(-1, 16 * 5 * 5) + x = torch.flatten(x, 1) # flatten all dimensions except batch x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x + ###################################################################### # The train function # ------------------ # Now it gets interesting, because we introduce some changes to the example `from the PyTorch # documentation `_. # -# We wrap the training script in a function ``train_cifar(config, checkpoint_dir=None, data_dir=None)``. -# As you can guess, the ``config`` parameter will receive the hyperparameters we would like to -# train with. The ``checkpoint_dir`` parameter is used to restore checkpoints. The ``data_dir`` specifies -# the directory where we load and store the data, so multiple runs can share the same data source. +# We wrap the training script in a function ``train_cifar(config, data_dir=None)``. +# The ``config`` parameter will receive the hyperparameters we would like to +# train with. The ``data_dir`` specifies the directory where we load and store the data, +# so that multiple runs can share the same data source. +# We also load the model and optimizer state at the start of the run, if a checkpoint +# is provided. Further down in this tutorial you will find information on how +# to save the checkpoint and what it is used for. # # .. code-block:: python # # net = Net(config["l1"], config["l2"]) # -# if checkpoint_dir: -# model_state, optimizer_state = torch.load( -# os.path.join(checkpoint_dir, "checkpoint")) -# net.load_state_dict(model_state) -# optimizer.load_state_dict(optimizer_state) +# checkpoint = session.get_checkpoint() +# +# if checkpoint: +# checkpoint_state = checkpoint.to_dict() +# start_epoch = checkpoint_state["epoch"] +# net.load_state_dict(checkpoint_state["net_state_dict"]) +# optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"]) +# else: +# start_epoch = 0 # # The learning rate of the optimizer is made configurable, too: # @@ -171,11 +181,17 @@ def forward(self, x): # # .. code-block:: python # -# with tune.checkpoint_dir(epoch) as checkpoint_dir: -# path = os.path.join(checkpoint_dir, "checkpoint") -# torch.save((net.state_dict(), optimizer.state_dict()), path) +# checkpoint_data = { +# "epoch": epoch, +# "net_state_dict": net.state_dict(), +# "optimizer_state_dict": optimizer.state_dict(), +# } +# checkpoint = Checkpoint.from_dict(checkpoint_data) # -# tune.report(loss=(val_loss / val_steps), accuracy=correct / total) +# session.report( +# {"loss": val_loss / val_steps, "accuracy": correct / total}, +# checkpoint=checkpoint, +# ) # # Here we first save a checkpoint and then report some metrics back to Ray Tune. Specifically, # we send the validation loss and accuracy back to Ray Tune. Ray Tune can then use these metrics @@ -187,7 +203,8 @@ def forward(self, x): # schedulers like # `Population Based Training `_. # Also, by saving the checkpoint we can later load the trained models and validate them -# on a test set. +# on a test set. Lastly, saving checkpoints is useful for fault tolerance, and it allows +# us to interrupt training and continue training later. # # Full training function # ~~~~~~~~~~~~~~~~~~~~~~ @@ -195,7 +212,7 @@ def forward(self, x): # The full code example looks like this: -def train_cifar(config, checkpoint_dir=None, data_dir=None): +def train_cifar(config, data_dir=None): net = Net(config["l1"], config["l2"]) device = "cpu" @@ -208,30 +225,31 @@ def train_cifar(config, checkpoint_dir=None, data_dir=None): criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9) - if checkpoint_dir: - model_state, optimizer_state = torch.load( - os.path.join(checkpoint_dir, "checkpoint")) - net.load_state_dict(model_state) - optimizer.load_state_dict(optimizer_state) + checkpoint = session.get_checkpoint() + + if checkpoint: + checkpoint_state = checkpoint.to_dict() + start_epoch = checkpoint_state["epoch"] + net.load_state_dict(checkpoint_state["net_state_dict"]) + optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"]) + else: + start_epoch = 0 trainset, testset = load_data(data_dir) test_abs = int(len(trainset) * 0.8) train_subset, val_subset = random_split( - trainset, [test_abs, len(trainset) - test_abs]) + trainset, [test_abs, len(trainset) - test_abs] + ) trainloader = torch.utils.data.DataLoader( - train_subset, - batch_size=int(config["batch_size"]), - shuffle=True, - num_workers=8) + train_subset, batch_size=int(config["batch_size"]), shuffle=True, num_workers=8 + ) valloader = torch.utils.data.DataLoader( - val_subset, - batch_size=int(config["batch_size"]), - shuffle=True, - num_workers=8) + val_subset, batch_size=int(config["batch_size"]), shuffle=True, num_workers=8 + ) - for epoch in range(10): # loop over the dataset multiple times + for epoch in range(start_epoch, 10): # loop over the dataset multiple times running_loss = 0.0 epoch_steps = 0 for i, data in enumerate(trainloader, 0): @@ -252,8 +270,10 @@ def train_cifar(config, checkpoint_dir=None, data_dir=None): running_loss += loss.item() epoch_steps += 1 if i % 2000 == 1999: # print every 2000 mini-batches - print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, - running_loss / epoch_steps)) + print( + "[%d, %5d] loss: %.3f" + % (epoch + 1, i + 1, running_loss / epoch_steps) + ) running_loss = 0.0 # Validation loss @@ -275,13 +295,20 @@ def train_cifar(config, checkpoint_dir=None, data_dir=None): val_loss += loss.cpu().numpy() val_steps += 1 - with tune.checkpoint_dir(epoch) as checkpoint_dir: - path = os.path.join(checkpoint_dir, "checkpoint") - torch.save((net.state_dict(), optimizer.state_dict()), path) - - tune.report(loss=(val_loss / val_steps), accuracy=correct / total) + checkpoint_data = { + "epoch": epoch, + "net_state_dict": net.state_dict(), + "optimizer_state_dict": optimizer.state_dict(), + } + checkpoint = Checkpoint.from_dict(checkpoint_data) + + session.report( + {"loss": val_loss / val_steps, "accuracy": correct / total}, + checkpoint=checkpoint, + ) print("Finished Training") + ###################################################################### # As you can see, most of the code is adapted directly from the original example. # @@ -296,7 +323,8 @@ def test_accuracy(net, device="cpu"): trainset, testset = load_data() testloader = torch.utils.data.DataLoader( - testset, batch_size=4, shuffle=False, num_workers=2) + testset, batch_size=4, shuffle=False, num_workers=2 + ) correct = 0 total = 0 @@ -311,6 +339,7 @@ def test_accuracy(net, device="cpu"): return correct / total + ###################################################################### # The function also expects a ``device`` parameter, so we can do the # test set validation on a GPU. @@ -322,14 +351,14 @@ def test_accuracy(net, device="cpu"): # .. code-block:: python # # config = { -# "l1": tune.sample_from(lambda _: 2**np.random.randint(2, 9)), -# "l2": tune.sample_from(lambda _: 2**np.random.randint(2, 9)), +# "l1": tune.choice([2 ** i for i in range(9)]), +# "l2": tune.choice([2 ** i for i in range(9)]), # "lr": tune.loguniform(1e-4, 1e-1), # "batch_size": tune.choice([2, 4, 8, 16]) # } # -# The ``tune.sample_from()`` function makes it possible to define your own sample -# methods to obtain hyperparameters. In this example, the ``l1`` and ``l2`` parameters +# The ``tune.choice()`` accepts a list of values that are uniformly sampled from. +# In this example, the ``l1`` and ``l2`` parameters # should be powers of 2 between 4 and 256, so either 4, 8, 16, 32, 64, 128, or 256. # The ``lr`` (learning rate) should be uniformly sampled between 0.0001 and 0.1. Lastly, # the batch size is a choice between 2, 4, 8, and 16. @@ -353,7 +382,6 @@ def test_accuracy(net, device="cpu"): # config=config, # num_samples=num_samples, # scheduler=scheduler, -# progress_reporter=reporter, # checkpoint_at_end=True) # # You can specify the number of CPUs, which are then available e.g. @@ -377,34 +405,30 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2): data_dir = os.path.abspath("./data") load_data(data_dir) config = { - "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), - "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), + "l1": tune.choice([2**i for i in range(9)]), + "l2": tune.choice([2**i for i in range(9)]), "lr": tune.loguniform(1e-4, 1e-1), - "batch_size": tune.choice([2, 4, 8, 16]) + "batch_size": tune.choice([2, 4, 8, 16]), } scheduler = ASHAScheduler( metric="loss", mode="min", max_t=max_num_epochs, grace_period=1, - reduction_factor=2) - reporter = CLIReporter( - # ``parameter_columns=["l1", "l2", "lr", "batch_size"]``, - metric_columns=["loss", "accuracy", "training_iteration"]) + reduction_factor=2, + ) result = tune.run( partial(train_cifar, data_dir=data_dir), resources_per_trial={"cpu": 2, "gpu": gpus_per_trial}, config=config, num_samples=num_samples, scheduler=scheduler, - progress_reporter=reporter) + ) best_trial = result.get_best_trial("loss", "min", "last") - print("Best trial config: {}".format(best_trial.config)) - print("Best trial final validation loss: {}".format( - best_trial.last_result["loss"])) - print("Best trial final validation accuracy: {}".format( - best_trial.last_result["accuracy"])) + print(f"Best trial config: {best_trial.config}") + print(f"Best trial final validation loss: {best_trial.last_result['loss']}") + print(f"Best trial final validation accuracy: {best_trial.last_result['accuracy']}") best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"]) device = "cpu" @@ -414,10 +438,10 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2): best_trained_model = nn.DataParallel(best_trained_model) best_trained_model.to(device) - best_checkpoint_dir = best_trial.checkpoint.value - model_state, optimizer_state = torch.load(os.path.join( - best_checkpoint_dir, "checkpoint")) - best_trained_model.load_state_dict(model_state) + best_checkpoint = best_trial.checkpoint.to_air_checkpoint() + best_checkpoint_data = best_checkpoint.to_dict() + + best_trained_model.load_state_dict(best_checkpoint_data["net_state_dict"]) test_acc = test_accuracy(best_trained_model, device) print("Best trial test set accuracy: {}".format(test_acc)) @@ -428,6 +452,7 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2): # Fixes ``AttributeError: '_LoggingTee' object has no attribute 'fileno'``. # This is only needed to run with sphinx-build. import sys + sys.stdout.fileno = lambda: False # sphinx_gallery_end_ignore # You can change the number of GPUs per trial here: @@ -439,30 +464,29 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2): # # :: # -# Number of trials: 10 (10 TERMINATED) -# +-----+------+------+-------------+--------------+---------+------------+--------------------+ -# | ... | l1 | l2 | lr | batch_size | loss | accuracy | training_iteration | -# |-----+------+------+-------------+--------------+---------+------------+--------------------| -# | ... | 64 | 4 | 0.00011629 | 2 | 1.87273 | 0.244 | 2 | -# | ... | 32 | 64 | 0.000339763 | 8 | 1.23603 | 0.567 | 8 | -# | ... | 8 | 16 | 0.00276249 | 16 | 1.1815 | 0.5836 | 10 | -# | ... | 4 | 64 | 0.000648721 | 4 | 1.31131 | 0.5224 | 8 | -# | ... | 32 | 16 | 0.000340753 | 8 | 1.26454 | 0.5444 | 8 | -# | ... | 8 | 4 | 0.000699775 | 8 | 1.99594 | 0.1983 | 2 | -# | ... | 256 | 8 | 0.0839654 | 16 | 2.3119 | 0.0993 | 1 | -# | ... | 16 | 128 | 0.0758154 | 16 | 2.33575 | 0.1327 | 1 | -# | ... | 16 | 8 | 0.0763312 | 16 | 2.31129 | 0.1042 | 4 | -# | ... | 128 | 16 | 0.000124903 | 4 | 2.26917 | 0.1945 | 1 | -# +-----+------+------+-------------+--------------+---------+------------+--------------------+ -# -# -# Best trial config: {'l1': 8, 'l2': 16, 'lr': 0.00276249, 'batch_size': 16, 'data_dir': '...'} -# Best trial final validation loss: 1.181501 -# Best trial final validation accuracy: 0.5836 -# Best trial test set accuracy: 0.5806 +# Number of trials: 10/10 (10 TERMINATED) +# +-----+--------------+------+------+-------------+--------+---------+------------+ +# | ... | batch_size | l1 | l2 | lr | iter | loss | accuracy | +# |-----+--------------+------+------+-------------+--------+---------+------------| +# | ... | 2 | 1 | 256 | 0.000668163 | 1 | 2.31479 | 0.0977 | +# | ... | 4 | 64 | 8 | 0.0331514 | 1 | 2.31605 | 0.0983 | +# | ... | 4 | 2 | 1 | 0.000150295 | 1 | 2.30755 | 0.1023 | +# | ... | 16 | 32 | 32 | 0.0128248 | 10 | 1.66912 | 0.4391 | +# | ... | 4 | 8 | 128 | 0.00464561 | 2 | 1.7316 | 0.3463 | +# | ... | 8 | 256 | 8 | 0.00031556 | 1 | 2.19409 | 0.1736 | +# | ... | 4 | 16 | 256 | 0.00574329 | 2 | 1.85679 | 0.3368 | +# | ... | 8 | 2 | 2 | 0.00325652 | 1 | 2.30272 | 0.0984 | +# | ... | 2 | 2 | 2 | 0.000342987 | 2 | 1.76044 | 0.292 | +# | ... | 4 | 64 | 32 | 0.003734 | 8 | 1.53101 | 0.4761 | +# +-----+--------------+------+------+-------------+--------+---------+------------+ +# +# Best trial config: {'l1': 64, 'l2': 32, 'lr': 0.0037339984519545164, 'batch_size': 4} +# Best trial final validation loss: 1.5310075663924216 +# Best trial final validation accuracy: 0.4761 +# Best trial test set accuracy: 0.4737 # # Most trials have been stopped early in order to avoid wasting resources. -# The best performing trial achieved a validation accuracy of about 58%, which could +# The best performing trial achieved a validation accuracy of about 47%, which could # be confirmed on the test set. # # So that's it! You can now tune the parameters of your PyTorch models. diff --git a/requirements.txt b/requirements.txt index ecb41a8618f..0811ded54c6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ bs4 awscliv2==2.1.1 flask spacy==3.4.1 -ray[tune]==1.13.0 +ray[tune]==2.4.0 tensorboard jinja2==3.0.3 pytorch-lightning From 11aec454234a993d7e9148d1452658770d0b7270 Mon Sep 17 00:00:00 2001 From: Anupam Sharma <29808870+anp-scp@users.noreply.github.com> Date: Fri, 19 May 2023 02:23:48 +0530 Subject: [PATCH 033/609] A torchtext tutorial to pre-process a non-built-in dataset (#2307) * added intro * added setup section * import packages and read dataset * remove attributions from dataset * added functions for tokenization * building the vocabulary * added some comments * Numericalize sentences using vocabulary * bucket batching * applied padding * view the final result * added torchtext logo * added card in index.rst * added entry in toctree * updated Makefile for downloading dataset * get dataset from data folder * updated comment * updated function to view results, and added some sources * updated typo * fixed hyperlinks * changed title and introduction * fixed indentation issue * fixed typo * fixed typo * replaced Finninsh with German as spacy German model is already there in build * fixed issue in title * use another dataset * addressed review comments for PR #2307 * corrected spelling mistakes * followed pyspelling's configuration for the shell commands * added words used in beginner_source/torchtext_custom_dataset_tutorial.py --------- Co-authored-by: Nayef Ahmed <22487263+Nayef211@users.noreply.github.com> --- Makefile | 4 + .../thumbnails/cropped/torch_text_logo.png | Bin 0 -> 8545 bytes .../torchtext_custom_dataset_tutorial.py | 384 ++++++++++++++++++ en-wordlist.txt | 4 + index.rst | 8 + 5 files changed, 400 insertions(+) create mode 100644 _static/img/thumbnails/cropped/torch_text_logo.png create mode 100644 beginner_source/torchtext_custom_dataset_tutorial.py diff --git a/Makefile b/Makefile index a01ea69bb50..ed0ade00465 100644 --- a/Makefile +++ b/Makefile @@ -106,6 +106,10 @@ download: wget -nv -N http://dl.fbaipublicfiles.com/pythia/data/vocab.tar.gz -P $(DATADIR) tar $(TAROPTS) -xzf $(DATADIR)/vocab.tar.gz -C ./beginner_source/data/ + # Download dataset for beginner_source/torchtext_custom_dataset_tutorial.py + wget -nv -N https://www.manythings.org/anki/deu-eng.zip -P $(DATADIR) + unzip -o $(DATADIR)/deu-eng.zip -d beginner_source/data/ + docs: make download diff --git a/_static/img/thumbnails/cropped/torch_text_logo.png b/_static/img/thumbnails/cropped/torch_text_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..3fe736d60e282d080ac3165e3a5aad3c69bcc934 GIT binary patch literal 8545 zcmeHtc~n!$mw!M34GpNYD1tzD2hxoo5kzDW1UC}4Hj6AG1X+}@8e}JcqSy)wz6gX} zW$PCZ5M|%NEsbn#g2-Y-q9_=`A|MINRQ%4&`OTU0n}6oaobMm=IIrGIRo%K(x2o>_ zeCl!icl#4^$X!ST0wIUBK8i;mB%Q?fCTWn<;=>pLkB#Rn?JN<9hbgiv9#Y`9hNm^& z4uOc$LLlO=AP_>375@o=2*n@}lVk+Kgn>Zp2+nCZX$A_U&)S?gidYx_-EA(RgB+QQ zSi55~6B~b&S2$=*31lG<>gTXWEr}6B{88VK(-WCm?^IkL2ca#&${))!IlmlEM)^fz5l=!%xsuj5 zc=r%nYx9M3GZO;XyXi}!+K|P9q*vw^ zlty;0{QhYERO?KO=d~x{kz$mH~at3($qvRWtoshyUoA6t zqNVY0kOO_?VPkVTl05au( zMJ?Mw7Mjjobw;v-o{~!QWdfTE^(IL^sYu5z_`{qctbGcf#vF#@L(4^vOtCH2x+><@ z3sipDnS-r#C+#cE8f1x=wJeWDEa`ENmulN7dedzl9TjJ1*ilhbew5gK`Owc~2~`~% zm^lDXFZx4E^q3r;BQ`bz>5>iOlqzL5>cl6U`;Ejh$>Ck3B(uhdMWx(jGNWNAn8xo| z_HcFHWVhJ~A;axw|Nb_O=sgvz{kv{Sxqg$1!K1ssNyxr?r&1`avPT(bA_-HY&lBNi z->$L7=c|a1-hB|uxpOzRLP!^`-Bl(h`P}nS>#I66=f3!OsY)mLl9~*YJf@FksNonD z())#MsVJ$`cGejyRjLU8HPFo7eif^`W6>t@myoLOjU~Z?E|Yv0ZL^`~px?&j3c6XB zGf-p+opJ(94mDYbs?3~SB&2-wdT{2T#UK0RXRN^3o^pD-Ofo1jtr@dk-%e7lbVgAW zj=YoFznQk*U=D9##@f$tjPbbp#=(51vJ&7cxcL8xUc6ACSij%&62+AJ>6{9ZsD7tOM% z9{8Kn1;olBYonI-9G!2`pg{)%Yr9B`Y+eG9F2XTqX?JF&`#*ZMi#y;x@jDbQ?m9uD zl&c}6FdbwWU~SBVejg8^LPY^@a|L_GyDm3yxiX*b4W}Lx_o7KCcR!ASJaa<-t%fk_ zCF}ADU;?)%*V!#s&*5#Z4J!w2p=nAgp|(~cBKCE`%FaPr%b0N*QnY7tg{HH6V{q(F zN2RJZvCuQBw%4i-Cp+}l6d$(NGFwn@J8{wh++v+e$C2zS%p`kBg63tRn4%l%2# zpopYXa(UnyoEmy?T<(5FiB)=wj?%4oskTxRh-V5*e^XR;h<3jrci;JUFu)3mCNO(R z2d6pt2&dBY06wJ3e@nO3mP*8V?^IqjYJy-VpodfiK9TO%*&-pk*z<|gx>432!&i*J zmO!dD3*NdVrLLjZKyy1Kl^z@6>>jsd2Z~Z(3X(KM=lXW9=D_1{;%wxjJ50Cc+d~>c zw`X>O0x%U(K#c^NY#D8e9e`r8T$GfH-S&?;3xh*ZEF-9ij?|vt)#C^?<$4*CI4-R5 zkSd`>QLx5lr@ptC?Uic`r1R<7pqmx7caZ96s3`-fbq(JYK$AM3r-L@mMLM3qlS&t+ zJy&ze7Et@pNeM6XXy=0%yi$}pdO3h8@XFh>^~(d6CeM=MJA72M z_IX;X$XWDk$&%UOmvCMlB_TUBjBUomLOct6f|A0Cw)a!~Ldh2&h}P~lS5p)-x$$Yl z^D7#pZj&sbgSEO&H|zc3ZIySMPf|XQ`#_}^ihk_%0GobonmW1sD2=kwB5UZD4|}xS zXRSj>rNHxi;<2SbZ7(_ey{guZ;6c3|!ljNT#Y%*ajm4ky`0zNx?N-x;H3gA-=0%Os zzg3Sc(~0j)axRRE7N#e{LVuF!Xj=P4>foP33(|71L;teZVImPyp~Q^{iGo$P$UnBWlE}whW8)3Dloau>O zMPM5q*Gw`7%lTzFpQz{V&cbQOtn9De-Q6TzR#;v}s}(8Rdyk}4=S-eyGUY2?ar{YK zHMW5FAbG<8W}#7ZQ9CE+MNALN1-KqM{ClWH_0hrgRjnef2@1T~c)43bmMm}PVLxEc zl$oL~t+iOjFm}bwM14G$-1(=eAR1ziTQ3b1Jyo?PycTZqJBf8BX~Nxc_ZxFL&zAVa^}#e^ zE2pAyd_1`Vx>ElOn2pwx+R3&(tX+j!V;0sRB7aM8{##FWabmS=`da{#x^P!yP>``I zjj;okZLiE@dMyyRwGB(VaeR@HpM;<3_WR{p%LK?BwVXj5Y_OP}PzwSmMYD?7_;(f1in(Fb`>>@6VNv^ST`J>*Sc$&v z)jPRFhBJ;-wPpjU1Kjd?(NK<%R_ce1rSm4>XN$da5l0%C+e7TyBo8`;;V)4P#*Lka z!SXTTe2u*y9o?ERaEhr{=B~6tg4Q#56>xC%{G9tPpQL!HV1%u!^PppFST9&}3my+j zYBZc}&+;tJh2GG5HqtTw+(=(J_30%L zXjNlYrTeqt8+d$IVm3?}$b8Jzh%&8KjnpO83u{s9=<#4prwez*VV|w|(QASJO0OjS zV>&^?FExw4;NnKJKjmKEkiDxpcps;4t&x3YaWq8S6W}WmN#3T_mS4l3kqTw7Yx`I! z^OF)&SQR6Oz_<;vmTi(wI%3tzGG!i=A<<{{@qV;^;ElMgr{ToK*dFmxa@bn#gaPv= z$zae8m#1YSV-h}nU~hXL4mD*WPxlt&hb)N4M1tZR=G2u1y5F37Id4+-PPV^43>=u! z>*H$qbWTuP(;Z4o+}n7=`U~^wgzN+BKOYG6xdgf=>*Y8e?H<{XjXbgny_;ok`8x=0 zoOVtD#h-Ac-9|dL+6IiQS3nb+U8Hiq7O47*9-g zv8&#-7Itt-VBJWv2WU8Xjwg3N96$fi{)1q`E1*Q1_?i$>zILcu=bP`OcE{M`xL+As zV3%4Rnvm{4f%ljk2HX7Rg+XjU$h4$3^nv|x*K9|Bb`cN^9SSDz`}Hy^&F<_p+9D2XLbTbCC{=?NTk z8s0KsFo84yqA6w`37`1HKN@l-n|BqZD+GO<<%ewxDSx*W$W<8FE}RLt7!*{feR|dU z+K$mjTjb5Y=i|C-T=FAKx0bzwf%WpvQ|p6Y>&_UayRF(F5EuX@5a(_G8;QU75E}8N z0`Lb>V*)#_>J+4kqw28wr}_PldqXVcpf9M%RGvL_?Xz0aBlR;mi}ezEhubbcHMEv0 z9mn=)M(VLl;k#*U*+Zgpsb0pc<4`Z~8?2&DjhKv$I!>+$s8X!G*77MRUuzi)y~gWJ zcKYVOyX=fRq_zaD=(94gEsRC1gx(3zCdbiM9t3|LAH{VbJqr7o%&#pX9-9q=Kof;( zzmVlOJ5>$}nXG;>so-@J+lKNMS-Ol5#J0%|f_s~sFty(`Caa8!QbT`c-JTwgid%G4 zFShrIc_@J?S=k^htyr6YZpvS+7F|GmOf`0j$8A3h_yyvhEzzlIe za@R8E(EQI0q3g)fFqfqO0gi*)sKYodsiae$OKfwonZs-^kG(xBkkXqLmAV)2)~e4y zwpi_b2^fshOW+3uVK{esXIO0k%2?F@^w8?ZMzzkh*(vqTqU)Y>4^{0{8+%3l^$sJ`%e@jZnekFd z0miI*s1x)Yd#XN*qUGilc|R95j6DF2*icPb3BTn$*I0vYRNC(2X|gTA`E4qnbc zliS}VtqSUm;E8tG5_61Bz< zn7nW*&w_l$;c91)R(%%Fhsg|W6@{pPb`=yYW!)0N;QgvDFGN8Ct>x!L!%{)-*acc! z6a%RpXLI#-tCt}9bvoYWQVG3@9@W&6izH3pNoBE-GhW)Rc6+zV)BQirYIc|cwt0xe z5}L7!Q0|3R854*mfV`E4sZb}pgCC^1j3M=ZiVlky!803{Fr;c1dnu^EcV}go;lq>$ zK{Se$NvsoI0PtxJP(ZZtaBe3)Z-i(5rtPiKV+BIWtdF)1ILuJM#&TCP8%LiX7ti>_ zy)3C-tk`w9U#iJrUR1Rw9TIY7uNBiS8~6iwx!XZ1|HGjbH(DKV7$R&>_;Urq_OjRb zl;P5x&d<})s#)J6<%_gk+`aE_R*Nu~R7#DQjd7+Kp08M2)5?(II?P!>E43&=(Pa*c zh>7K7CYHiQV@F0`o4ui}a>JhQFHgNvuSJ&V;3TQ=P2Aonrc2S0W)O7k+ND{aHpyw&?Wo)lBIXCL`%zq*IWA#rQU?C(Z@0B+b7->D24K!u z?XR>fN}^-;OlC0ZgpR=5Y&ew|>uv8-jX7?O%} z0C8S7f%r=JhW%@zprm{{##Hp^=E=C(P8NI9AKk>hhRj{u;62+Lv@-NBodL%HZ>sJf z9W8LWvazXILDk&oqw);`Q7!<>JFshv2Ih<#_8Z<9 zdvVA9VNex!vS*ynej>~zzR>5Z=UIt6PzoalM6mQyt$uSn$25zl0#k+_uxm^eJa_7v zrNv8ORD^lOoyrdW;KZP2F*-81HjRDvl>%XWR|_scNzUw8sj@brDJ%AuiCD#nM{ub$ zy0$4ZJ#E^xC6_lkz0wrhX7DF4O5z7Le&jPdvAgt@`Mu3Ha4C{&tGit-C0?qRr@pDp z-wAt_0i>6FeDz)wu|NIsx}o6n$ARE209`!t0Hpb?^;Je^X6}|(K?iBt+a*(;Ov#wh zHLc>`RA#xT#TbyDW-X>A(1mxy%S?TIm$&&q>z5EfX|l?QgU^8@FDaDIRO3g4GYOdh z0=`VN)UGS@X0&>%P&FwODCd@!-$Oc7ysGTgV6ei)!xGr+evEbGc<#7$?JH z!y7$A5_{x)Amn$+s~OeAzM-7O&31!W-P3Pv0~S{>>6^zg#Z!y1U2q@$s}8q+Png(+ z9RkOEW~9n@%HpyRM+G8~Z7u9uP*9My7 zhF-m@Z441MnFbbF?U(`HtTo_iYUJ2-bIuFl0VJnsg3jhK*A?9M- zcgBn%=?eWQuO<`$JPRmz#7zXqbPw=Hk(BE*fg5!q#2Bt7880#A0Mpjr+Rlvj1!x`_eOkXR<~~S7r>wUH)4pTO8~nEm%wVv@%6lvvalXFf3>k+Cbd!+SmB*>FbC7 zb>#ZE1&>jzVZ-`fRwi4OsX(APNAIpY4r_Bb|2RHar_3>*_4Br&XAT1Wi(jMHMr}zc z2kpW7kqC{_DNOzE^Ddf8edjTOQ&5 zCFw;`!>`QSl^@@kefE56^kHVRO2Zq+bm7U(!`PO;zTsLPpt!i3AqT6bLS%ZsWvvFN zm%S#)eF2>G!PO2?RRLKLQ z(d-KMy{?VHE$dmsFyA*(Stjo$5fO!E8R1h#AF|V~oxmqxMMa(FwgRuFAHQM7QzHyQ zIq%VM>H?P8zS_=PM)W!Ww_Hu1~eNH*q9wj1BM@HFB1LYG%(a>nMM5@E)oP*Cc=w<(|LbH;ALrujB68!qaoZ4_0A zkCmKkl=U_Qh31P$OUsT>O#f(W8q!^5Yx!8(cW9|SFv?C6JZKVb_e~;Kg$ERvX33PD zu?b0uDrqSF0lmj%l@sSk;FQv_w~9Nc>-F@mgm6O2LN{A2A-rKp!C-5-j$B8Vc(VDt z*&SS&3x;F!pE`T&i-#))+`a44kamuQ*4LtbIhvx#%TwfKSZYzzxCFac0i>tHU`5&} z*3#O}%G;VH(%OcjmMf&cTwNXPMIy)m+#(P+f#>?CNgmjWHpZ`3hmFO=h4<~Y!2-B3 zd8-Ij;G2(@_3Il2m>}$V7?jJNr`|!GvuHTTpTBZ$TP68|9HeSb{h6hLed{o14|$G0 z4+pVYr!dz0gFVzs2aco^r2zTcig@oL!Iadc1J~dW3DC>=*fl>W1P(R?&aNs|___#q zCqq^*q4z1DXlN6fR<8k|(XPT+8CAQhWq?^630E)X+d_I!Q+m{`z2}-@HW@h3O#!Yh z-{8aI&Hp`5um73cepaO@eS=8)EJJg3e*R>L=}TD&S;XCa|6IvVU}}W?E(zJ4cH)<; zG~av##<1YJrzy(^x`s`}hBYj;$DX!3gjO%kqp!~Q$8BWn5pQsF*8o&~wwLZg^u0KX zFSX|6QCq6s!icKRO1=a`R$Rk_i3>JjaZvf8OD9I3lUo-g)+x{wb{hQ#M|wyO6mPZT z@Q(6mc*i3Pl(Pf1O2pW)zu3cgVWlyRM<>3B+OnPWFoIr&f0q&q?boUgzw&xcWh9-T-**4d$mE`uE)lX;R!sm{4$?iOS zXbfF=*FzHcwa6*}m%U(H%7i#%A!1$sf%M3;%(hTuietcutlMm8?6J1< zD)gZlwS!$0+g-PB2 z=9BqN?NjQNiSP$Yn=1%e9BBO}zU#`cvP`LS-HqVB+~F|hU*d1f|9AcGSfDLOxSpKS zJLmfYX%1*S>+OW~!D6)$o4}6^h;0%Z5E9@`eD9NxL~I35eQ^6;+xR{2D0mlNg!4fc z!@d3eH86V(Fnjmy2mdO7ki5J3dz}xUe8d0qF0LQ3Y3pwA(7bre>7tj%MQ;;NsyBEb z^fCGdx|sdC`WWKA{U-YiObqq+U@#^a%-y!)<^LcM5ai|S6Z!ua=+9L<1_ZW=HF!t( z`j{L8iGz<%i1$UrfrEX$I3G~D^WRH<_l@xOcRK1D;2jv^8x)Ap*Ed$kLv8!db)5f0 p9q9H`FHlGA-`gPs`CJV5pnCr&i!>Oa--DV6>@oYJ`_ + +This tutorial illustrates the usage of torchtext on a dataset that is not built-in. In the tutorial, +we will preprocess a dataset that can be further utilized to train a sequence-to-sequence +model for machine translation (something like, in this tutorial: `Sequence to Sequence Learning +with Neural Networks `_) but without using legacy version +of torchtext. + +In this tutorial, we will learn how to: + +* Read a dataset +* Tokenize sentence +* Apply transforms to sentence +* Perform bucket batching + +Let us assume that we need to prepare a dataset to train a model that can perform English to +German translation. We will use a tab-delimited German - English sentence pairs provided by +the `Tatoeba Project `_ which can be downloaded from +`this link `__. + +Sentence pairs for other languages can be found in `this link `\ +__. +""" + +# %% +# Setup +# ----- +# +# First, download the dataset, extract the zip, and note the path to the file `deu.txt`. +# +# Ensure that following packages are installed: +# +# * `Torchdata 0.6.0 `_ (`Installation instructions \ +# `__) +# * `Torchtext 0.15.0 `_ (`Installation instructions \ +# `__) +# * `Spacy `__ +# +# Here, we are using `Spacy` to tokenize text. In simple words tokenization means to +# convert a sentence to list of words. Spacy is a python package used for various Natural +# Language Processing (NLP) tasks. +# +# Download the English and German models from Spacy as shown below: +# +# .. code-block:: shell +# +# python -m spacy download en_core_web_sm +# python -m spacy download de_core_news_sm +# + + +# %% +# Let us start by importing required modules: + +import torchdata.datapipes as dp +import torchtext.transforms as T +import spacy +from torchtext.vocab import build_vocab_from_iterator +eng = spacy.load("en_core_web_sm") # Load the English model to tokenize English text +de = spacy.load("de_core_news_sm") # Load the German model to tokenize German text + +# %% +# Now we will load the dataset + +FILE_PATH = 'data/deu.txt' +data_pipe = dp.iter.IterableWrapper([FILE_PATH]) +data_pipe = dp.iter.FileOpener(data_pipe, mode='rb') +data_pipe = data_pipe.parse_csv(skip_lines=0, delimiter='\t', as_tuple=True) + +# %% +# In the above code block, we are doing following things: +# +# 1. At line 2, we are creating an iterable of filenames +# 2. At line 3, we pass the iterable to `FileOpener` which then +# opens the file in read mode +# 3. At line 4, we call a function to parse the file, which +# again returns an iterable of tuples representing each rows +# of the tab-delimited file +# +# DataPipes can be thought of something like a dataset object, on which +# we can perform various operations. +# Check `this tutorial `_ for more details on +# DataPipes. +# +# We can verify if the iterable has the pair of sentences as shown +# below: + +for sample in data_pipe: + print(sample) + break + +# %% +# Note that we also have attribution details along with pair of sentences. We will +# write a small function to remove the attribution details: + +def removeAttribution(row): + """ + Function to keep the first two elements in a tuple + """ + return row[:2] +data_pipe = data_pipe.map(removeAttribution) + +# %% +# The `map` function at line 6 in above code block can be used to apply some function +# on each elements of `data_pipe`. Now, we can verify that the `data_pipe` only contains +# pair of sentences. + + +for sample in data_pipe: + print(sample) + break + +# %% +# Now, let us define few functions to perform tokenization: + +def engTokenize(text): + """ + Tokenize an English text and return a list of tokens + """ + return [token.text for token in eng.tokenizer(text)] + +def deTokenize(text): + """ + Tokenize a German text and return a list of tokens + """ + return [token.text for token in de.tokenizer(text)] + +# %% +# Above function accepts a text and returns a list of words +# as shown below: + +print(engTokenize("Have a good day!!!")) +print(deTokenize("Haben Sie einen guten Tag!!!")) + +# %% +# Building the vocabulary +# ----------------------- +# Let us consider an English sentence as the source and a German sentence as the target. +# +# Vocabulary can be considered as the set of unique words we have in the dataset. +# We will build vocabulary for both our source and target now. +# +# Let us define a function to get tokens from elements of tuples in the iterator. + + +def getTokens(data_iter, place): + """ + Function to yield tokens from an iterator. Since, our iterator contains + tuple of sentences (source and target), `place` parameters defines for which + index to return the tokens for. `place=0` for source and `place=1` for target + """ + for english, german in data_iter: + if place == 0: + yield engTokenize(english) + else: + yield deTokenize(german) + +# %% +# Now, we will build vocabulary for source: + +source_vocab = build_vocab_from_iterator( + getTokens(data_pipe,0), + min_freq=2, + specials= ['', '', '', ''], + special_first=True +) +source_vocab.set_default_index(source_vocab['']) + +# %% +# The code above, builds the vocabulary from the iterator. In the above code block: +# +# * At line 2, we call the `getTokens()` function with `place=0` as we need vocabulary for +# source sentences. +# * At line 3, we set `min_freq=2`. This means, the function will skip those words that occurs +# less than 2 times. +# * At line 4, we specify some special tokens: +# +# * `` for start of sentence +# * `` for end of sentence +# * `` for unknown words. An example of unknown word is the one skipped because of +# `min_freq=2`. +# * `` is the padding token. While training, a model we mostly train in batches. In a +# batch, there can be sentences of different length. So, we pad the shorter sentences with +# `` token to make length of all sequences in the batch equal. +# +# * At line 5, we set `special_first=True`. Which means `` will get index 0, `` index 1, +# `` index 2, and will get index 3 in the vocabulary. +# * At line 7, we set default index as index of ``. That means if some word is not in +# vocabulary, we will use `` instead of that unknown word. +# +# Similarly, we will build vocabulary for target sentences: + +target_vocab = build_vocab_from_iterator( + getTokens(data_pipe,1), + min_freq=2, + specials= ['', '', '', ''], + special_first=True +) +target_vocab.set_default_index(target_vocab['']) + +# %% +# Note that the example above shows how can we add special tokens to our vocabulary. The +# special tokens may change based on the requirements. +# +# Now, we can verify that special tokens are placed at the beginning and then other words. +# In the below code, `source_vocab.get_itos()` returns a list with tokens at index based on +# vocabulary. + +print(source_vocab.get_itos()[:9]) + +# %% +# Numericalize sentences using vocabulary +# --------------------------------------- +# After building the vocabulary, we need to convert our sentences to corresponding indices. +# Let us define some functions for this: + +def getTransform(vocab): + """ + Create transforms based on given vocabulary. The returned transform is applied to sequence + of tokens. + """ + text_tranform = T.Sequential( + ## converts the sentences to indices based on given vocabulary + T.VocabTransform(vocab=vocab), + ## Add at beginning of each sentence. 1 because the index for in vocabulary is + # 1 as seen in previous section + T.AddToken(1, begin=True), + ## Add at beginning of each sentence. 2 because the index for in vocabulary is + # 2 as seen in previous section + T.AddToken(2, begin=False) + ) + return text_tranform + +# %% +# Now, let us see how to use the above function. The function returns an object of `Transforms` +# which we will use on our sentence. Let us take a random sentence and check how the transform +# works. + +temp_list = list(data_pipe) +some_sentence = temp_list[798][0] +print("Some sentence=", end="") +print(some_sentence) +transformed_sentence = getTransform(source_vocab)(engTokenize(some_sentence)) +print("Transformed sentence=", end="") +print(transformed_sentence) +index_to_string = source_vocab.get_itos() +for index in transformed_sentence: + print(index_to_string[index], end=" ") + +# %% +# In the above code,: +# +# * At line 2, we take a source sentence from list that we created from `data_pipe` at line 1 +# * At line 5, we get a transform based on a source vocabulary and apply it to a tokenized +# sentence. Note that transforms take list of words and not a sentence. +# * At line 8, we get the mapping of index to string and then use it get the transformed +# sentence +# +# Now we will use DataPipe functions to apply transform to all our sentences. +# Let us define some more functions for this. + +def applyTransform(sequence_pair): + """ + Apply transforms to sequence of tokens in a sequence pair + """ + + return ( + getTransform(source_vocab)(engTokenize(sequence_pair[0])), + getTransform(target_vocab)(deTokenize(sequence_pair[1])) + ) +data_pipe = data_pipe.map(applyTransform) ## Apply the function to each element in the iterator +temp_list = list(data_pipe) +print(temp_list[0]) + +# %% +# Make batches (with bucket batch) +# -------------------------------- +# Generally, we train models in batches. While working for sequence to sequence models, it is +# recommended to keep the length of sequences in a batch similar. For that we will use +# `bucketbatch` function of `data_pipe`. +# +# Let us define some functions that will be used by the `bucketbatch` function. + +def sortBucket(bucket): + """ + Function to sort a given bucket. Here, we want to sort based on the length of + source and target sequence. + """ + return sorted(bucket, key=lambda x: (len(x[0]), len(x[1]))) + +# %% +# Now, we will apply the `bucketbatch` function: + +data_pipe = data_pipe.bucketbatch( + batch_size = 4, batch_num=5, bucket_num=1, + use_in_batch_shuffle=False, sort_key=sortBucket +) + +# %% +# In the above code block: +# +# * We keep batch size = 4. +# * `batch_num` is the number of batches to keep in a bucket +# * `bucket_num` is the number of buckets to keep in a pool for shuffling +# * `sort_key` specifies the function that takes a bucket and sorts it +# +# Now, let us consider a batch of source sentences as `X` and a batch of target sentences as `y`. +# Generally, while training a model, we predict on a batch of `X` and compare the result with `y`. +# But, a batch in our `data_pipe` is of the form `[(X_1,y_1), (X_2,y_2), (X_3,y_3), (X_4,y_4)]`: + +print(list(data_pipe)[0]) +# %% +# So, we will now convert them into the form: `((X_1,X_2,X_3,X_4), (y_1,y_2,y_3,y_4))`. +# For this we will write a small function: + +def separateSourceTarget(sequence_pairs): + """ + input of form: `[(X_1,y_1), (X_2,y_2), (X_3,y_3), (X_4,y_4)]` + output of form: `((X_1,X_2,X_3,X_4), (y_1,y_2,y_3,y_4))` + """ + sources,targets = zip(*sequence_pairs) + return sources,targets + +## Apply the function to each element in the iterator +data_pipe = data_pipe.map(separateSourceTarget) +print(list(data_pipe)[0]) + +# %% +# Now, we have the data as desired. +# +# Padding +# ------- +# As discussed earlier while building vocabulary, we need to pad shorter sentences in a batch to +# make all the sequences in a batch of equal length. We can perform padding as follows: + +def applyPadding(pair_of_sequences): + """ + Convert sequences to tensors and apply padding + """ + return (T.ToTensor(0)(list(pair_of_sequences[0])), T.ToTensor(0)(list(pair_of_sequences[1]))) +## `T.ToTensor(0)` returns a transform that converts the sequence to `torch.tensor` and also applies +# padding. Here, `0` is passed to the constructor to specify the index of the `` token in the +# vocabulary. +data_pipe = data_pipe.map(applyPadding) + +# %% +# Now, we can use the index to string mapping to see how the sequence would look with tokens +# instead of indices: + +source_index_to_string = source_vocab.get_itos() +target_index_to_string = target_vocab.get_itos() + +def showSomeTransformedSentences(data_pipe): + """ + Function to show how the sentences look like after applying all transforms. + Here we try to print actual words instead of corresponding index + """ + for sources,targets in data_pipe: + if sources[0][-1] != 0: + continue # Just to visualize padding of shorter sentences + for i in range(4): + source = "" + for token in sources[i]: + source += " " + source_index_to_string[token] + target = "" + for token in targets[i]: + target += " " + target_index_to_string[token] + print(f"Source: {source}") + print(f"Traget: {target}") + break + +showSomeTransformedSentences(data_pipe) +# %% +# In the above output we can observe that the shorter sentences are padded with ``. Now, we +# can use `data_pipe` while writing our training function. +# +# Some parts of this tutorial was inspired from `this article +# `__. diff --git a/en-wordlist.txt b/en-wordlist.txt index 2a4c87925f4..145d270e7b2 100644 --- a/en-wordlist.txt +++ b/en-wordlist.txt @@ -35,6 +35,8 @@ DDQN DLRM DNN DQN +DataPipe +DataPipes DataLoaders DeepMind DeiT @@ -137,6 +139,7 @@ SciPy Sequentials Sigmoid SoTA +Spacy TPU TensorBoard TensorBoards @@ -376,6 +379,7 @@ timestep timesteps tokenization tokenize +tokenized tokenizer tokenizes tooltip diff --git a/index.rst b/index.rst index 48550e6c4ac..6e6d687d0c1 100644 --- a/index.rst +++ b/index.rst @@ -264,6 +264,13 @@ What's new in PyTorch tutorials? :link: beginner/translation_transformer.html :tags: Text +.. customcarditem:: + :header: Pre-process custom text dataset using Torchtext + :card_description: Learn how to use torchtext to prepare a custom dataset + :image: _static/img/thumbnails/cropped/torch_text_logo.png + :link: beginner/torchtext_custom_dataset_tutorial.html + :tags: Text + .. Reinforcement Learning @@ -870,6 +877,7 @@ Additional Resources intermediate/seq2seq_translation_tutorial beginner/text_sentiment_ngrams_tutorial beginner/translation_transformer + beginner/torchtext_custom_dataset_tutorial .. toctree:: From 7a24657deb4bab5c3c2b2b0ef7842708cfdafc2f Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 23 May 2023 06:54:33 -0700 Subject: [PATCH 034/609] Create PULL_REQUEST_TEMPLATE.md (#2328) - Add a template for pull requests. --- .github/PULL_REQUEST_TEMPLATE.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000000..0392eb3a00d --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,11 @@ +Fixes #ISSUE_NUMBER + +## Description + + +## Checklist + +- [ ] The issue that is being fixed is referred in the description (see above "Fixes #ISSUE_NUMBER") +- [ ] Only one issue is addressed in this pull request +- [ ] Labels from the issue that this PR is fixing are added to this pull request +- [ ] No unnessessary issues are included into this pull request. From da8b1e32fa3da9d9ce130961507035f32a328952 Mon Sep 17 00:00:00 2001 From: clee2000 <44682903+clee2000@users.noreply.github.com> Date: Wed, 31 May 2023 07:13:19 -0700 Subject: [PATCH 035/609] uncommit_later (#2358) --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8bf2a6321ae..b7084096c4b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -154,14 +154,14 @@ pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults pytorch_tutorial_build_worker_defaults: &pytorch_tutorial_build_worker_defaults environment: - DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7" + DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9" CUDA_VERSION: "9" resource_class: gpu.nvidia.small <<: *pytorch_tutorial_build_defaults pytorch_tutorial_build_manager_defaults: &pytorch_tutorial_build_manager_defaults environment: - DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7" + DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9" resource_class: medium From 963a29dcf873c1c3314d2663a72f30e0c77c3d90 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Wed, 31 May 2023 08:11:28 -0700 Subject: [PATCH 036/609] Followup after https://github.com/pytorch/tutorials/pull/2358 Noop, but both config.yml.in and config.yml should have been changed --- .circleci/config.yml.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index 41f226eaeee..0694d221aad 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -154,14 +154,14 @@ pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults pytorch_tutorial_build_worker_defaults: &pytorch_tutorial_build_worker_defaults environment: - DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7" + DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9" CUDA_VERSION: "9" resource_class: gpu.nvidia.small <<: *pytorch_tutorial_build_defaults pytorch_tutorial_build_manager_defaults: &pytorch_tutorial_build_manager_defaults environment: - DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7" + DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9" resource_class: medium From 88789696cc0f7b6376e2686348ae7fc7c4fbc45f Mon Sep 17 00:00:00 2001 From: Karl F Date: Wed, 31 May 2023 17:43:44 +0200 Subject: [PATCH 037/609] Fix dead URL to docs.ray (#2359) Co-authored-by: Svetlana Karslioglu --- beginner_source/hyperparameter_tuning_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py index 202a2f1a30b..228879fa5f2 100644 --- a/beginner_source/hyperparameter_tuning_tutorial.py +++ b/beginner_source/hyperparameter_tuning_tutorial.py @@ -201,7 +201,7 @@ def forward(self, x): # # The checkpoint saving is optional, however, it is necessary if we wanted to use advanced # schedulers like -# `Population Based Training `_. +# `Population Based Training `_. # Also, by saving the checkpoint we can later load the trained models and validate them # on a test set. Lastly, saving checkpoints is useful for fault tolerance, and it allows # us to interrupt training and continue training later. From 29d4d24a9d948b10751cdb1cc18b32f5b11517ef Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 31 May 2023 10:36:08 -0700 Subject: [PATCH 038/609] Propagate labels from the issue to the PR (#2357) * Add a GH action that propagates labels from an issue with docathon label to the PR that references it. --------- Co-authored-by: Nikita Shulga --- .github/scripts/docathon-label-sync.py | 43 +++++++++++++++++++++++ .github/workflows/docathon-label-sync.yml | 27 ++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 .github/scripts/docathon-label-sync.py create mode 100644 .github/workflows/docathon-label-sync.yml diff --git a/.github/scripts/docathon-label-sync.py b/.github/scripts/docathon-label-sync.py new file mode 100644 index 00000000000..597f4b5e034 --- /dev/null +++ b/.github/scripts/docathon-label-sync.py @@ -0,0 +1,43 @@ +import os +from github import Github +import sys +import re + +def main(): + token = os.environ.get('GITHUB_TOKEN') + + repo_owner = "pytorch" + repo_name = "tutorials" + pull_request_number = int(sys.argv[1]) + + g = Github(token) + repo = g.get_repo(f'{repo_owner}/{repo_name}') + pull_request = repo.get_pull(pull_request_number) + pull_request_body = pull_request.body + + # get issue number from the PR body + if not re.search(r'#\d{1,5}', pull_request_body): + print("The pull request does not mention an issue.") + return + issue_number = int(re.findall(r'#(\d{1,5})', pull_request_body)[0]) + issue = repo.get_issue(issue_number) + issue_labels = issue.labels + docathon_label_present = any(label.name == 'docathon-h1-2023' for label in issue_labels) + + # if the issue has a docathon label, add all labels from the issue to the PR. + if not docathon_label_present: + print("The 'docathon-h1-2023' label is not present in the issue.") + return + pull_request_labels = pull_request.get_labels() + issue_label_names = [label.name for label in issue_labels] + labels_to_add = [label for label in issue_label_names if label not in pull_request_labels] + if not labels_to_add: + print("The pull request already has the same labels.") + return + pull_request.set_labels(*labels_to_add) + print("Labels added to the pull request!") + + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/docathon-label-sync.yml b/.github/workflows/docathon-label-sync.yml new file mode 100644 index 00000000000..fb4a532ed28 --- /dev/null +++ b/.github/workflows/docathon-label-sync.yml @@ -0,0 +1,27 @@ +name: Docathon Labels Sync + +on: + pull_request: + types: [opened, synchronize, edited] + +jobs: + check-labels: + runs-on: ubuntu-latest + + steps: + - name: Check if PR mentions an issue and get labels + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.x + - name: Install dependencies + run: | + pip install requests + pip install PyGithub + - name: Run Python script + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: python ./.github/scripts/docathon-label-sync.py ${{ github.event.pull_request.number }} From ddcc10c5b46cd08bf642b5d2b19545d1beb33f55 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Wed, 31 May 2023 11:10:43 -0700 Subject: [PATCH 039/609] Run docathon-label-sync on PR-target To enable it for forked pull-requests --- .github/workflows/docathon-label-sync.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docathon-label-sync.yml b/.github/workflows/docathon-label-sync.yml index fb4a532ed28..2d855877417 100644 --- a/.github/workflows/docathon-label-sync.yml +++ b/.github/workflows/docathon-label-sync.yml @@ -1,7 +1,7 @@ name: Docathon Labels Sync on: - pull_request: + pull_request_target: types: [opened, synchronize, edited] jobs: From 814201046de6170e38a81de5069f3b8fd667cfa5 Mon Sep 17 00:00:00 2001 From: Kishan Savant <66986430+NeoKish@users.noreply.github.com> Date: Thu, 1 Jun 2023 00:40:05 +0530 Subject: [PATCH 040/609] Fixes #1460 (#2364) --- intermediate_source/char_rnn_classification_tutorial.py | 2 +- intermediate_source/char_rnn_generation_tutorial.py | 2 +- intermediate_source/seq2seq_translation_tutorial.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/intermediate_source/char_rnn_classification_tutorial.py b/intermediate_source/char_rnn_classification_tutorial.py index f36b92fb17e..9b1f255a51b 100644 --- a/intermediate_source/char_rnn_classification_tutorial.py +++ b/intermediate_source/char_rnn_classification_tutorial.py @@ -2,7 +2,7 @@ """ NLP From Scratch: Classifying Names with a Character-Level RNN ************************************************************** -**Author**: `Sean Robertson `_ +**Author**: `Sean Robertson `_ We will be building and training a basic character-level RNN to classify words. This tutorial, along with the following two, show how to do diff --git a/intermediate_source/char_rnn_generation_tutorial.py b/intermediate_source/char_rnn_generation_tutorial.py index 431c2bf43d9..6068c84cd0e 100644 --- a/intermediate_source/char_rnn_generation_tutorial.py +++ b/intermediate_source/char_rnn_generation_tutorial.py @@ -2,7 +2,7 @@ """ NLP From Scratch: Generating Names with a Character-Level RNN ************************************************************* -**Author**: `Sean Robertson `_ +**Author**: `Sean Robertson `_ This is our second of three tutorials on "NLP From Scratch". In the `first tutorial ` diff --git a/intermediate_source/seq2seq_translation_tutorial.py b/intermediate_source/seq2seq_translation_tutorial.py index 853cb2aed45..7953854e60a 100644 --- a/intermediate_source/seq2seq_translation_tutorial.py +++ b/intermediate_source/seq2seq_translation_tutorial.py @@ -2,7 +2,7 @@ """ NLP From Scratch: Translation with a Sequence to Sequence Network and Attention ******************************************************************************* -**Author**: `Sean Robertson `_ +**Author**: `Sean Robertson `_ This is the third and final tutorial on doing "NLP From Scratch", where we write our own classes and functions to preprocess the data to do our NLP From 921f4fb4f48aac1b676e0a6fe6abe47a5bb7be7e Mon Sep 17 00:00:00 2001 From: Kishan Savant <66986430+NeoKish@users.noreply.github.com> Date: Thu, 1 Jun 2023 00:40:39 +0530 Subject: [PATCH 041/609] Fixes #1943 (#2361) --- beginner_source/README.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beginner_source/README.txt b/beginner_source/README.txt index 1cc37db7dca..5017c80b86b 100644 --- a/beginner_source/README.txt +++ b/beginner_source/README.txt @@ -23,4 +23,4 @@ Beginner Tutorials 6. transformer_translation.py Language Translation with Transformers - https://pytorch.org/tutorials/beginner/transformer_tutorial.html + https://pytorch.org/tutorials/beginner/translation_transformer.html From 510f82ea036b8b2b7d7f57baa53bcc719fa124e3 Mon Sep 17 00:00:00 2001 From: frasertajima <69366820+frasertajima@users.noreply.github.com> Date: Wed, 31 May 2023 13:17:24 -0700 Subject: [PATCH 042/609] Update transformer_tutorial.py (#2363) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix to "perhaps there is a misprint at line 40 #2111"; review of referenced paper https://arxiv.org/pdf/1706.03762.pdf section 3.2.3 suggests: "Similarly, self-attention layers in the decoder allow each position in the decoder to attend to all positions in the decoder up to and including that position. We need to prevent leftward information flow in the decoder to preserve the auto-regressive property. We implement this inside of scaled dot-product attention by masking out (setting to −∞) all values in the input of the softmax which correspond to illegal connections. See Figure 2." Thus the suggested change in reference from nn.Transform.Encoder to nn.Transform.Decoder seems reasonable. --- beginner_source/transformer_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py index d93b3d55fe7..57d1f8d8591 100644 --- a/beginner_source/transformer_tutorial.py +++ b/beginner_source/transformer_tutorial.py @@ -37,7 +37,7 @@ # ``nn.TransformerEncoder`` consists of multiple layers of # `nn.TransformerEncoderLayer `__. # Along with the input sequence, a square attention mask is required because the -# self-attention layers in ``nn.TransformerEncoder`` are only allowed to attend +# self-attention layers in ``nn.TransformerDecoder`` are only allowed to attend # the earlier positions in the sequence. For the language modeling task, any # tokens on the future positions should be masked. To produce a probability # distribution over output words, the output of the ``nn.TransformerEncoder`` From dfc6aa22c9905778d3687360ed83fcc28cac1b7d Mon Sep 17 00:00:00 2001 From: TheMemoryDealer <32904619+TheMemoryDealer@users.noreply.github.com> Date: Wed, 31 May 2023 21:59:13 +0100 Subject: [PATCH 043/609] Update dynamic_quantization_bert_tutorial.rst (#2369) As per suggestion in #1114 --- intermediate_source/dynamic_quantization_bert_tutorial.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intermediate_source/dynamic_quantization_bert_tutorial.rst b/intermediate_source/dynamic_quantization_bert_tutorial.rst index d618df87d58..53ac2cd0afb 100644 --- a/intermediate_source/dynamic_quantization_bert_tutorial.rst +++ b/intermediate_source/dynamic_quantization_bert_tutorial.rst @@ -68,7 +68,7 @@ built-in F1 score calculation helper function. .. code:: shell pip install sklearn - pip install transformers + pip install transformers==4.29.2 Because we will be using the beta parts of the PyTorch, it is From d55a262b2279d35672eae674232ae135b45db368 Mon Sep 17 00:00:00 2001 From: frasertajima <69366820+frasertajima@users.noreply.github.com> Date: Wed, 31 May 2023 15:28:33 -0700 Subject: [PATCH 044/609] Update super_resolution_with_onnxruntime.py (#2370) Fix for #1781 Rather than manually update the version number with the current stable version (e.g., 2.0.1), as long as ONNX maintains compatibility with the lastest stable version that reference should be sufficient and constantly up to date. --- advanced_source/super_resolution_with_onnxruntime.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/advanced_source/super_resolution_with_onnxruntime.py b/advanced_source/super_resolution_with_onnxruntime.py index eb184e85109..835a79bd3a0 100644 --- a/advanced_source/super_resolution_with_onnxruntime.py +++ b/advanced_source/super_resolution_with_onnxruntime.py @@ -16,10 +16,7 @@ and `ONNX Runtime `__. You can get binary builds of ONNX and ONNX Runtime with ``pip install onnx onnxruntime``. -Note that ONNX Runtime is compatible with Python versions 3.5 to 3.7. - -``NOTE``: This tutorial needs PyTorch master branch which can be installed by following -the instructions `here `__ +ONNX Runtime recommends using the latest stable runtime for PyTorch. """ From 7aff96cb8e2ccdcf5fe9f6eeb08f28b374eeb9f9 Mon Sep 17 00:00:00 2001 From: frasertajima <69366820+frasertajima@users.noreply.github.com> Date: Wed, 31 May 2023 16:16:51 -0700 Subject: [PATCH 045/609] Update nn_tutorial.py (#2368) * Update nn_tutorial.py Fix to #1303 "add pyplot.show() in beginner tutorial." Comments to issue suggested manually commenting out pyplot.show for users not using colab. --------- Co-authored-by: Svetlana Karslioglu --- beginner_source/nn_tutorial.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/beginner_source/nn_tutorial.py b/beginner_source/nn_tutorial.py index bc32131b93a..7ee7df3b435 100644 --- a/beginner_source/nn_tutorial.py +++ b/beginner_source/nn_tutorial.py @@ -75,6 +75,11 @@ import numpy as np pyplot.imshow(x_train[0].reshape((28, 28)), cmap="gray") +# ``pyplot.show()`` only if not on Colab +try: + import google.colab +except ImportError: + pyplot.show() print(x_train.shape) ############################################################################### From 4673b1434bb03eaaa4a4519d9fe9da545b02748e Mon Sep 17 00:00:00 2001 From: Suhas G Date: Thu, 1 Jun 2023 01:17:56 +0200 Subject: [PATCH 046/609] Add model.eval() in neural_style_tutorial.py (#2371) --- advanced_source/neural_style_tutorial.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/advanced_source/neural_style_tutorial.py b/advanced_source/neural_style_tutorial.py index 3d84fc508bc..54085fb1e98 100644 --- a/advanced_source/neural_style_tutorial.py +++ b/advanced_source/neural_style_tutorial.py @@ -423,6 +423,9 @@ def run_style_transfer(cnn, normalization_mean, normalization_std, # We want to optimize the input and not the model parameters so we # update all the requires_grad fields accordingly input_img.requires_grad_(True) + # We also put the model in evaluation mode, so that specific layers + # such as dropout or batch normalization layers behave correctly. + model.eval() model.requires_grad_(False) optimizer = get_input_optimizer(input_img) From d686b662932a380a58b7683425faa00c06bcf502 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Luis=20Castro=20Garc=C3=ADa?= <81191337+JoseLuisC99@users.noreply.github.com> Date: Wed, 31 May 2023 19:19:00 -0600 Subject: [PATCH 047/609] Fix train loop in trainingyt.py (#2372) * refactored train loop in trainingyt.py, resolves issue #2230 * Simplified numpy function call, resolves issue #1038 --- beginner_source/introyt/trainingyt.py | 20 ++++++++++++-------- intermediate_source/torchvision_tutorial.rst | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/beginner_source/introyt/trainingyt.py b/beginner_source/introyt/trainingyt.py index 929e06c1b57..d9f585411e8 100644 --- a/beginner_source/introyt/trainingyt.py +++ b/beginner_source/introyt/trainingyt.py @@ -290,15 +290,19 @@ def train_one_epoch(epoch_index, tb_writer): model.train(True) avg_loss = train_one_epoch(epoch_number, writer) - # We don't need gradients on to do reporting - model.train(False) - + running_vloss = 0.0 - for i, vdata in enumerate(validation_loader): - vinputs, vlabels = vdata - voutputs = model(vinputs) - vloss = loss_fn(voutputs, vlabels) - running_vloss += vloss + # Set the model to evaluation mode, disabling dropout and using population + # statistics for batch normalization. + model.eval() + + # Disable gradient computation and reduce memory consumption. + with torch.no_grad(): + for i, vdata in enumerate(validation_loader): + vinputs, vlabels = vdata + voutputs = model(vinputs) + vloss = loss_fn(voutputs, vlabels) + running_vloss += vloss avg_vloss = running_vloss / (i + 1) print('LOSS train {} valid {}'.format(avg_loss, avg_vloss)) diff --git a/intermediate_source/torchvision_tutorial.rst b/intermediate_source/torchvision_tutorial.rst index 9e3d1b9655c..21d47e258f7 100644 --- a/intermediate_source/torchvision_tutorial.rst +++ b/intermediate_source/torchvision_tutorial.rst @@ -145,7 +145,7 @@ Let’s write a ``torch.utils.data.Dataset`` class for this dataset. num_objs = len(obj_ids) boxes = [] for i in range(num_objs): - pos = np.where(masks[i]) + pos = np.nonzero(masks[i]) xmin = np.min(pos[1]) xmax = np.max(pos[1]) ymin = np.min(pos[0]) From 0bee138587b12312df68237d4e57886896898c6e Mon Sep 17 00:00:00 2001 From: Beniamin Condrea <30630733+BeniaminC@users.noreply.github.com> Date: Wed, 31 May 2023 20:09:35 -0700 Subject: [PATCH 048/609] Added matplotlib dependency to blitz tutorial (#2366) * Added matplotlib dependency to blitz tutorial. * Removed a modified file from pull request --------- Co-authored-by: Carl Parker --- beginner_source/deep_learning_60min_blitz.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/beginner_source/deep_learning_60min_blitz.rst b/beginner_source/deep_learning_60min_blitz.rst index 09ac232cc49..6c96c403455 100644 --- a/beginner_source/deep_learning_60min_blitz.rst +++ b/beginner_source/deep_learning_60min_blitz.rst @@ -20,11 +20,12 @@ Goal of this tutorial: - Understand PyTorch’s Tensor library and neural networks at a high level. - Train a small neural network to classify images -To run the tutorials below, make sure you have the `torch`_ and `torchvision`_ -packages installed. +To run the tutorials below, make sure you have the `torch`_, `torchvision`_, +and `matplotlib`_ packages installed. .. _torch: https://github.com/pytorch/pytorch .. _torchvision: https://github.com/pytorch/vision +.. _matplotlib: https://github.com/matplotlib/matplotlib .. toctree:: :hidden: From d3686263fea8c2c625f0c4a3d46a2e28485701e1 Mon Sep 17 00:00:00 2001 From: Fabio Gomez Date: Thu, 1 Jun 2023 10:44:16 -0500 Subject: [PATCH 049/609] Fix formatting in the FX Graph Mode Quantization guide (#2362) * removed ### lines and numbered in headlines * removed numbered from titles * added blank lines to show code * Remove the empty TODO placeholder --------- Co-authored-by: Svetlana Karslioglu --- .../fx_graph_mode_quant_guide.rst | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/prototype_source/fx_graph_mode_quant_guide.rst b/prototype_source/fx_graph_mode_quant_guide.rst index bb360861b9f..9072e488a4b 100644 --- a/prototype_source/fx_graph_mode_quant_guide.rst +++ b/prototype_source/fx_graph_mode_quant_guide.rst @@ -4,7 +4,7 @@ **Author**: `Jerry Zhang `_ FX Graph Mode Quantization requires a symbolically traceable model. -We use the FX framework (TODO: link) to convert a symbolically traceable nn.Module instance to IR, +We use the FX framework to convert a symbolically traceable nn.Module instance to IR, and we operate on the IR to execute the quantization passes. Please post your question about symbolically tracing your model in `PyTorch Discussion Forum `_ @@ -22,16 +22,19 @@ You can use any combination of these options: b. Write your own observed and quantized submodule -#################################################################### If the code that is not symbolically traceable does not need to be quantized, we have the following two options to run FX Graph Mode Quantization: -1.a. Symbolically trace only the code that needs to be quantized + + +Symbolically trace only the code that needs to be quantized ----------------------------------------------------------------- When the whole model is not symbolically traceable but the submodule we want to quantize is symbolically traceable, we can run quantization only on that submodule. + before: .. code:: python + class M(nn.Module): def forward(self, x): x = non_traceable_code_1(x) @@ -42,6 +45,7 @@ before: after: .. code:: python + class FP32Traceable(nn.Module): def forward(self, x): x = traceable_code(x) @@ -69,8 +73,7 @@ Note if original model needs to be preserved, you will have to copy it yourself before calling the quantization APIs. -##################################################### -1.b. Skip symbolically trace the non-traceable code +Skip symbolically trace the non-traceable code --------------------------------------------------- When we have some non-traceable code in the module, and this part of code doesn’t need to be quantized, we can factor out this part of the code into a submodule and skip symbolically trace that submodule. @@ -134,8 +137,7 @@ quantization code: If the code that is not symbolically traceable needs to be quantized, we have the following two options: -########################################################## -2.a Refactor your code to make it symbolically traceable +Refactor your code to make it symbolically traceable -------------------------------------------------------- If it is easy to refactor the code and make the code symbolically traceable, we can refactor the code and remove the use of non-traceable constructs in python. @@ -167,15 +169,10 @@ after: return x.permute(0, 2, 1, 3) -quantization code: - This can be combined with other approaches and the quantization code depends on the model. - - -####################################################### -2.b. Write your own observed and quantized submodule +Write your own observed and quantized submodule ----------------------------------------------------- If the non-traceable code can’t be refactored to be symbolically traceable, @@ -207,8 +204,8 @@ non-traceable logic, wrapped in a module class FP32NonTraceable: ... - -2. Define observed version of FP32NonTraceable +2. Define observed version of +FP32NonTraceable .. code:: python From c5501e78a19f7cae71cc91fb5a9ead1c283e9ee3 Mon Sep 17 00:00:00 2001 From: Mariia Mykhailova Date: Thu, 1 Jun 2023 08:49:51 -0700 Subject: [PATCH 050/609] Redirect "Finetuning Torchvision Models" to "TorchVision Object Detection Finetuning Tutorial" (#2378) --- .../finetuning_torchvision_models_tutorial.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 beginner_source/finetuning_torchvision_models_tutorial.rst diff --git a/beginner_source/finetuning_torchvision_models_tutorial.rst b/beginner_source/finetuning_torchvision_models_tutorial.rst new file mode 100644 index 00000000000..711f4b0f99b --- /dev/null +++ b/beginner_source/finetuning_torchvision_models_tutorial.rst @@ -0,0 +1,10 @@ +Finetuning Torchvision Models +============================= + +This tutorial has been moved to https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html + +It will redirect in 3 seconds. + +.. raw:: html + + From 9633e5f141eefbe62e5dcb8168b9e34d505058d0 Mon Sep 17 00:00:00 2001 From: Sergii Dymchenko Date: Thu, 1 Jun 2023 09:31:10 -0700 Subject: [PATCH 051/609] Fix docathon-label-sync.py to not fail on PRs without description (#2379) See https://github.com/pytorch/tutorials/actions/runs/5140794478/jobs/9252588225?pr=2377 as an example --- .github/scripts/docathon-label-sync.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/scripts/docathon-label-sync.py b/.github/scripts/docathon-label-sync.py index 597f4b5e034..5da80f24f5b 100644 --- a/.github/scripts/docathon-label-sync.py +++ b/.github/scripts/docathon-label-sync.py @@ -14,6 +14,9 @@ def main(): repo = g.get_repo(f'{repo_owner}/{repo_name}') pull_request = repo.get_pull(pull_request_number) pull_request_body = pull_request.body + # PR without description + if pull_request_body is None: + return # get issue number from the PR body if not re.search(r'#\d{1,5}', pull_request_body): From d9fd5bae719632632f96865bc198dd266905bacc Mon Sep 17 00:00:00 2001 From: Qasim Khan Date: Thu, 1 Jun 2023 21:39:27 +0500 Subject: [PATCH 052/609] Change batchify desc to remove ambiguity (#2383) Co-authored-by: Carl Parker --- beginner_source/transformer_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py index 57d1f8d8591..cce52eefdb3 100644 --- a/beginner_source/transformer_tutorial.py +++ b/beginner_source/transformer_tutorial.py @@ -149,7 +149,7 @@ def forward(self, x: Tensor) -> Tensor: # into ``batch_size`` columns. If the data does not divide evenly into # ``batch_size`` columns, then the data is trimmed to fit. For instance, with # the alphabet as the data (total length of 26) and ``batch_size=4``, we would -# divide the alphabet into 4 sequences of length 6: +# divide the alphabet into sequences of length 6, resulting in 4 of such sequences. # # .. math:: # \begin{bmatrix} From 4cd44ae2dd4cfdd5f923302d4e6af234b5af0ece Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Thu, 1 Jun 2023 12:19:25 -0500 Subject: [PATCH 053/609] Change formatting of code blocks for correct rendering in Colab (#2398) --- .../tensorboard_profiler_tutorial.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/intermediate_source/tensorboard_profiler_tutorial.py b/intermediate_source/tensorboard_profiler_tutorial.py index 440f2257e1a..2b241071b7f 100644 --- a/intermediate_source/tensorboard_profiler_tutorial.py +++ b/intermediate_source/tensorboard_profiler_tutorial.py @@ -18,7 +18,7 @@ ----- To install ``torch`` and ``torchvision`` use the following command: -:: +.. code-block:: pip install torch torchvision @@ -160,7 +160,7 @@ def train(data): # # Install PyTorch Profiler TensorBoard Plugin. # -# :: +# .. code-block:: # # pip install torch_tb_profiler # @@ -168,7 +168,7 @@ def train(data): ###################################################################### # Launch the TensorBoard. # -# :: +# .. code-block:: # # tensorboard --logdir=./log # @@ -176,7 +176,7 @@ def train(data): ###################################################################### # Open the TensorBoard profile URL in Google Chrome browser or Microsoft Edge browser. # -# :: +# .. code-block:: # # http://localhost:6006/#pytorch_profiler # @@ -287,7 +287,7 @@ def train(data): # In this example, we follow the "Performance Recommendation" and set ``num_workers`` as below, # pass a different name such as ``./log/resnet18_4workers`` to ``tensorboard_trace_handler``, and run it again. # -# :: +# .. code-block:: # # train_loader = torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True, num_workers=4) # @@ -316,7 +316,7 @@ def train(data): # # You can try it by using existing example on Azure # -# :: +# .. code-block:: # # pip install azure-storage-blob # tensorboard --logdir=https://torchtbprofiler.blob.core.windows.net/torchtbprofiler/demo/memory_demo_1_10 @@ -366,7 +366,7 @@ def train(data): # # You can try it by using existing example on Azure: # -# :: +# .. code-block:: # # pip install azure-storage-blob # tensorboard --logdir=https://torchtbprofiler.blob.core.windows.net/torchtbprofiler/demo/distributed_bert From 7e72b705cb7d6057cae513c1259e144b2a99e887 Mon Sep 17 00:00:00 2001 From: Mateusz Nowak <37732935+noqqaqq@users.noreply.github.com> Date: Thu, 1 Jun 2023 19:34:47 +0200 Subject: [PATCH 054/609] README.txt - fix unreachable link (#2386) Co-authored-by: sekyondaMeta <127536312+sekyondaMeta@users.noreply.github.com> --- prototype_source/README.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/prototype_source/README.txt b/prototype_source/README.txt index 94c182dcca0..4ab9ce8f6a9 100644 --- a/prototype_source/README.txt +++ b/prototype_source/README.txt @@ -1,8 +1,8 @@ Prototype Tutorials ------------------ 1. distributed_rpc_profiling.rst - Profiling PyTorch RPC-Based Workloads - https://github.com/pytorch/tutorials/blob/release/1.6/prototype_source/distributed_rpc_profiling.rst + Profiling PyTorch RPC-Based Workloads + https://github.com/pytorch/tutorials/blob/main/prototype_source/distributed_rpc_profiling.rst 2. graph_mode_static_quantization_tutorial.py Graph Mode Post Training Static Quantization in PyTorch @@ -21,8 +21,8 @@ Prototype Tutorials https://github.com/pytorch/tutorials/blob/main/prototype_source/torchscript_freezing.py 6. vulkan_workflow.rst - Vulkan Backend User Workflow - https://pytorch.org/tutorials/intermediate/vulkan_workflow.html + Vulkan Backend User Workflow + https://pytorch.org/tutorials/intermediate/vulkan_workflow.html 7. fx_graph_mode_ptq_static.rst FX Graph Mode Post Training Static Quantization From 0be50f4e48ea4af0bc27dee5af936a41700e61fc Mon Sep 17 00:00:00 2001 From: Sergii Dymchenko Date: Thu, 1 Jun 2023 10:46:55 -0700 Subject: [PATCH 055/609] Fix typo in a PR template (#2377) --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 0392eb3a00d..8c3604b99fb 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -8,4 +8,4 @@ Fixes #ISSUE_NUMBER - [ ] The issue that is being fixed is referred in the description (see above "Fixes #ISSUE_NUMBER") - [ ] Only one issue is addressed in this pull request - [ ] Labels from the issue that this PR is fixing are added to this pull request -- [ ] No unnessessary issues are included into this pull request. +- [ ] No unnecessary issues are included into this pull request. From aa400c32d7c602d895116fbf92e5600264bd5616 Mon Sep 17 00:00:00 2001 From: Qasim Khan Date: Thu, 1 Jun 2023 23:41:27 +0500 Subject: [PATCH 056/609] Fixes module 'get_filesystem' error (#2397) * Add temporary fix for embeddings bug Co-authored-by: Svetlana Karslioglu --- beginner_source/introyt/tensorboardyt_tutorial.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/beginner_source/introyt/tensorboardyt_tutorial.py b/beginner_source/introyt/tensorboardyt_tutorial.py index 4c7c356fd0c..29e83066726 100644 --- a/beginner_source/introyt/tensorboardyt_tutorial.py +++ b/beginner_source/introyt/tensorboardyt_tutorial.py @@ -64,6 +64,13 @@ # PyTorch TensorBoard support from torch.utils.tensorboard import SummaryWriter +# In case you are using an environment that has TensorFlow installed, +# such as Google Colab, uncomment the following code to avoid +# a bug with saving embeddings to your TensorBoard directory + +# import tensorflow as tf +# import tensorboard as tb +# tf.io.gfile = tb.compat.tensorflow_stub.io.gfile ###################################################################### # Showing Images in TensorBoard From e2a7ab0f009cce4555f28c70711ee67ae85ad08a Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Thu, 1 Jun 2023 15:31:52 -0500 Subject: [PATCH 057/609] Clear plot at beginning of loop so that non-empty image renders (#2401) Co-authored-by: Svetlana Karslioglu --- intermediate_source/mario_rl_tutorial.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/intermediate_source/mario_rl_tutorial.py b/intermediate_source/mario_rl_tutorial.py index ff653d54c11..8d02f3daf34 100755 --- a/intermediate_source/mario_rl_tutorial.py +++ b/intermediate_source/mario_rl_tutorial.py @@ -711,17 +711,18 @@ def record(self, episode, epsilon, step): f"{datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S'):>20}\n" ) - for metric in ["ep_rewards", "ep_lengths", "ep_avg_losses", "ep_avg_qs"]: - plt.plot(getattr(self, f"moving_avg_{metric}")) - plt.savefig(getattr(self, f"{metric}_plot")) + for metric in ["ep_lengths", "ep_avg_losses", "ep_avg_qs", "ep_rewards"]: plt.clf() + plt.plot(getattr(self, f"moving_avg_{metric}"), label=f"moving_avg_{metric}") + plt.legend() + plt.savefig(getattr(self, f"{metric}_plot")) ###################################################################### # Let’s play! # """"""""""""""" # -# In this example we run the training loop for 10 episodes, but for Mario to truly learn the ways of +# In this example we run the training loop for 40 episodes, but for Mario to truly learn the ways of # his world, we suggest running the loop for at least 40,000 episodes! # use_cuda = torch.cuda.is_available() @@ -735,7 +736,7 @@ def record(self, episode, epsilon, step): logger = MetricLogger(save_dir) -episodes = 10 +episodes = 40 for e in range(episodes): state = env.reset() From e1ec4bdaa7bbf167a354ba4c7aab8f17c1831bf7 Mon Sep 17 00:00:00 2001 From: Hemanth Sai <73033596+HemanthSai7@users.noreply.github.com> Date: Fri, 2 Jun 2023 02:31:26 +0530 Subject: [PATCH 058/609] Optimize DataLoader iteration in WrappedDataLoader (#2375) --- beginner_source/nn_tutorial.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/beginner_source/nn_tutorial.py b/beginner_source/nn_tutorial.py index 7ee7df3b435..183aca1748b 100644 --- a/beginner_source/nn_tutorial.py +++ b/beginner_source/nn_tutorial.py @@ -795,8 +795,7 @@ def __len__(self): return len(self.dl) def __iter__(self): - batches = iter(self.dl) - for b in batches: + for b in self.dl: yield (self.func(*b)) train_dl, valid_dl = get_data(train_ds, valid_ds, bs) From d07875659aa9ca8b092344a2ecc487d4b3309ff3 Mon Sep 17 00:00:00 2001 From: TheMemoryDealer <32904619+TheMemoryDealer@users.noreply.github.com> Date: Thu, 1 Jun 2023 22:47:44 +0100 Subject: [PATCH 059/609] Patch 3 (#2389) * Updates #836 as suggested in https://github.com/pytorch/pytorch/issues/16885#issuecomment-551779897 --- beginner_source/former_torchies/parallelism_tutorial.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/beginner_source/former_torchies/parallelism_tutorial.py b/beginner_source/former_torchies/parallelism_tutorial.py index 18c14c43167..a11d844e1bd 100644 --- a/beginner_source/former_torchies/parallelism_tutorial.py +++ b/beginner_source/former_torchies/parallelism_tutorial.py @@ -53,7 +53,10 @@ def forward(self, x): class MyDataParallel(nn.DataParallel): def __getattr__(self, name): - return getattr(self.module, name) + try: + return super().__getattr__(name) + except AttributeError: + return getattr(self.module, name) ######################################################################## # **Primitives on which DataParallel is implemented upon:** From 56a2faf3a561cff3a7e98675a7d2080d84e30f96 Mon Sep 17 00:00:00 2001 From: Mike Brown Date: Thu, 1 Jun 2023 17:30:45 -0500 Subject: [PATCH 060/609] Address Err in char_rnn tutorial issue (#2374) * address bug; do a little editing Signed-off-by: Mike Brown * Update intermediate_source/char_rnn_classification_tutorial.py Signed-off-by: Mike Brown Co-authored-by: Svetlana Karslioglu --- .../char_rnn_classification_tutorial.py | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/intermediate_source/char_rnn_classification_tutorial.py b/intermediate_source/char_rnn_classification_tutorial.py index 9b1f255a51b..0c0aa3e988b 100644 --- a/intermediate_source/char_rnn_classification_tutorial.py +++ b/intermediate_source/char_rnn_classification_tutorial.py @@ -4,11 +4,14 @@ ************************************************************** **Author**: `Sean Robertson `_ -We will be building and training a basic character-level RNN to classify -words. This tutorial, along with the following two, show how to do -preprocess data for NLP modeling "from scratch", in particular not using -many of the convenience functions of `torchtext`, so you can see how -preprocessing for NLP modeling works at a low level. +We will be building and training a basic character-level Recurrent Neural +Network (RNN) to classify words. This tutorial, along with two other +Natural Language Processing (NLP) "from scratch" tutorials +:doc:`/intermediate/char_rnn_generation_tutorial` and +:doc:`/intermediate/seq2seq_translation_tutorial`, show how to +preprocess data to model NLP. In particular these tutorials do not +use many of the convenience functions of `torchtext`, so you can see how +preprocessing to model NLP works at a low level. A character-level RNN reads words as a series of characters - outputting a prediction and "hidden state" at each step, feeding its @@ -32,13 +35,15 @@ (-2.68) Dutch -**Recommended Reading:** +Recommended Preparation +======================= -I assume you have at least installed PyTorch, know Python, and -understand Tensors: +Before starting this tutorial it is recommended that you have installed PyTorch, +and have a basic understanding of Python programming language and Tensors: - https://pytorch.org/ For installation instructions - :doc:`/beginner/deep_learning_60min_blitz` to get started with PyTorch in general + and learn the basics of Tensors - :doc:`/beginner/pytorch_with_examples` for a wide and deep overview - :doc:`/beginner/former_torchies_tutorial` if you are former Lua Torch user @@ -181,10 +186,6 @@ def lineToTensor(line): # is just 2 linear layers which operate on an input and hidden state, with # a ``LogSoftmax`` layer after the output. # -# .. figure:: https://i.imgur.com/Z2xbySO.png -# :alt: -# -# import torch.nn as nn @@ -195,13 +196,13 @@ def __init__(self, input_size, hidden_size, output_size): self.hidden_size = hidden_size self.i2h = nn.Linear(input_size + hidden_size, hidden_size) - self.i2o = nn.Linear(input_size + hidden_size, output_size) + self.h2o = nn.Linear(hidden_size, output_size) self.softmax = nn.LogSoftmax(dim=1) def forward(self, input, hidden): combined = torch.cat((input, hidden), 1) hidden = self.i2h(combined) - output = self.i2o(combined) + output = self.h2o(hidden) output = self.softmax(output) return output, hidden From 9b5405667b99c37c990981c9646383e46ad39a79 Mon Sep 17 00:00:00 2001 From: zabboud <91271094+zabboud@users.noreply.github.com> Date: Thu, 1 Jun 2023 18:44:00 -0400 Subject: [PATCH 061/609] Fixes #2083 - explain model.eval, torch.no_grad (#2400) Co-authored-by: Svetlana Karslioglu --- beginner_source/basics/optimization_tutorial.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/beginner_source/basics/optimization_tutorial.py b/beginner_source/basics/optimization_tutorial.py index 0fb508d1ccc..a1603510b96 100644 --- a/beginner_source/basics/optimization_tutorial.py +++ b/beginner_source/basics/optimization_tutorial.py @@ -149,6 +149,9 @@ def forward(self, x): def train_loop(dataloader, model, loss_fn, optimizer): size = len(dataloader.dataset) + # Set the model to training mode - important for batch normalization and dropout layers + # Unnecessary in this situation but added for best practices + model.train() for batch, (X, y) in enumerate(dataloader): # Compute prediction and loss pred = model(X) @@ -165,10 +168,15 @@ def train_loop(dataloader, model, loss_fn, optimizer): def test_loop(dataloader, model, loss_fn): + # Set the model to evaluation mode - important for batch normalization and dropout layers + # Unnecessary in this situation but added for best practices + model.eval() size = len(dataloader.dataset) num_batches = len(dataloader) test_loss, correct = 0, 0 + # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode + # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True with torch.no_grad(): for X, y in dataloader: pred = model(X) From d41e23baf7efb3f0ce5f1839ef43b75a1ceb8aa4 Mon Sep 17 00:00:00 2001 From: Qasim Khan Date: Fri, 2 Jun 2023 03:56:59 +0500 Subject: [PATCH 062/609] Copy float_model using load_model (#2385) --- prototype_source/fx_graph_mode_ptq_static.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/prototype_source/fx_graph_mode_ptq_static.rst b/prototype_source/fx_graph_mode_ptq_static.rst index f97b1f0a5f2..091673ed2e4 100644 --- a/prototype_source/fx_graph_mode_ptq_static.rst +++ b/prototype_source/fx_graph_mode_ptq_static.rst @@ -214,9 +214,9 @@ Download the `torchvision resnet18 model Date: Thu, 1 Jun 2023 23:04:55 -0400 Subject: [PATCH 063/609] resolve issue 1818 by modifying mean and standard deviation in the transforms.Normalize (#2405) * Fixes #2083 - explain model.eval, torch.no_grad * set norm to mean & std of CIFAR10(pytorch#1818) --------- Co-authored-by: Svetlana Karslioglu --- beginner_source/introyt/introyt1_tutorial.py | 23 ++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/beginner_source/introyt/introyt1_tutorial.py b/beginner_source/introyt/introyt1_tutorial.py index f52c3902c03..a5d65bcab16 100644 --- a/beginner_source/introyt/introyt1_tutorial.py +++ b/beginner_source/introyt/introyt1_tutorial.py @@ -288,7 +288,7 @@ def num_flat_features(self, x): transform = transforms.Compose( [transforms.ToTensor(), - transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))]) ########################################################################## @@ -297,9 +297,28 @@ def num_flat_features(self, x): # - ``transforms.ToTensor()`` converts images loaded by Pillow into # PyTorch tensors. # - ``transforms.Normalize()`` adjusts the values of the tensor so -# that their average is zero and their standard deviation is 0.5. Most +# that their average is zero and their standard deviation is 1.0. Most # activation functions have their strongest gradients around x = 0, so # centering our data there can speed learning. +# The values passed to the transform are the means (first tuple) and the +# standard deviations (second tuple) of the rgb values of the images in +# the dataset. You can calculate these values yourself by running these +# few lines of code: +# ``` +# from torch.utils.data import ConcatDataset +# transform = transforms.Compose([transforms.ToTensor()]) +# trainset = torchvision.datasets.CIFAR10(root='./data', train=True, +# download=True, transform=transform) +# +# #stack all train images together into a tensor of shape +# #(50000, 3, 32, 32) +# x = torch.stack([sample[0] for sample in ConcatDataset([trainset])]) +# +# #get the mean of each channel +# mean = torch.mean(x, dim=(0,2,3)) #tensor([0.4914, 0.4822, 0.4465]) +# std = torch.std(x, dim=(0,2,3)) #tensor([0.2470, 0.2435, 0.2616]) +# +# ``` # # There are many more transforms available, including cropping, centering, # rotation, and reflection. From 64dc7022385a579a3afa809f2a44b1ccee1eaa27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Luis=20Castro=20Garc=C3=ADa?= <81191337+JoseLuisC99@users.noreply.github.com> Date: Fri, 2 Jun 2023 08:18:16 -0600 Subject: [PATCH 064/609] Replace usage of copy.deepcopy() in Computer Vision Transfer Learning Tutorial, resolves issue #2332 (#2404) --- beginner_source/transfer_learning_tutorial.py | 128 +++++++++--------- 1 file changed, 66 insertions(+), 62 deletions(-) diff --git a/beginner_source/transfer_learning_tutorial.py b/beginner_source/transfer_learning_tutorial.py index b4460bb4fb2..b09efc11749 100644 --- a/beginner_source/transfer_learning_tutorial.py +++ b/beginner_source/transfer_learning_tutorial.py @@ -46,7 +46,7 @@ import matplotlib.pyplot as plt import time import os -import copy +from tempfile import TemporaryDirectory cudnn.benchmark = True plt.ion() # interactive mode @@ -146,67 +146,71 @@ def imshow(inp, title=None): def train_model(model, criterion, optimizer, scheduler, num_epochs=25): since = time.time() - best_model_wts = copy.deepcopy(model.state_dict()) - best_acc = 0.0 - - for epoch in range(num_epochs): - print(f'Epoch {epoch}/{num_epochs - 1}') - print('-' * 10) - - # Each epoch has a training and validation phase - for phase in ['train', 'val']: - if phase == 'train': - model.train() # Set model to training mode - else: - model.eval() # Set model to evaluate mode - - running_loss = 0.0 - running_corrects = 0 - - # Iterate over data. - for inputs, labels in dataloaders[phase]: - inputs = inputs.to(device) - labels = labels.to(device) - - # zero the parameter gradients - optimizer.zero_grad() - - # forward - # track history if only in train - with torch.set_grad_enabled(phase == 'train'): - outputs = model(inputs) - _, preds = torch.max(outputs, 1) - loss = criterion(outputs, labels) - - # backward + optimize only if in training phase - if phase == 'train': - loss.backward() - optimizer.step() - - # statistics - running_loss += loss.item() * inputs.size(0) - running_corrects += torch.sum(preds == labels.data) - if phase == 'train': - scheduler.step() - - epoch_loss = running_loss / dataset_sizes[phase] - epoch_acc = running_corrects.double() / dataset_sizes[phase] - - print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}') - - # deep copy the model - if phase == 'val' and epoch_acc > best_acc: - best_acc = epoch_acc - best_model_wts = copy.deepcopy(model.state_dict()) - - print() - - time_elapsed = time.time() - since - print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s') - print(f'Best val Acc: {best_acc:4f}') - - # load best model weights - model.load_state_dict(best_model_wts) + # Create a temporary directory to save training checkpoints + with TemporaryDirectory() as tempdir: + best_model_params_path = os.path.join(tempdir, 'best_model_params.pt') + + torch.save(model.state_dict(), best_model_params_path) + best_acc = 0.0 + + for epoch in range(num_epochs): + print(f'Epoch {epoch}/{num_epochs - 1}') + print('-' * 10) + + # Each epoch has a training and validation phase + for phase in ['train', 'val']: + if phase == 'train': + model.train() # Set model to training mode + else: + model.eval() # Set model to evaluate mode + + running_loss = 0.0 + running_corrects = 0 + + # Iterate over data. + for inputs, labels in dataloaders[phase]: + inputs = inputs.to(device) + labels = labels.to(device) + + # zero the parameter gradients + optimizer.zero_grad() + + # forward + # track history if only in train + with torch.set_grad_enabled(phase == 'train'): + outputs = model(inputs) + _, preds = torch.max(outputs, 1) + loss = criterion(outputs, labels) + + # backward + optimize only if in training phase + if phase == 'train': + loss.backward() + optimizer.step() + + # statistics + running_loss += loss.item() * inputs.size(0) + running_corrects += torch.sum(preds == labels.data) + if phase == 'train': + scheduler.step() + + epoch_loss = running_loss / dataset_sizes[phase] + epoch_acc = running_corrects.double() / dataset_sizes[phase] + + print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}') + + # deep copy the model + if phase == 'val' and epoch_acc > best_acc: + best_acc = epoch_acc + torch.save(model.state_dict(), best_model_params_path) + + print() + + time_elapsed = time.time() - since + print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s') + print(f'Best val Acc: {best_acc:4f}') + + # load best model weights + model.load_state_dict(torch.load(best_model_params_path)) return model From 5b804b84f1877ce6ac13ebfb8c15d114c0e5743a Mon Sep 17 00:00:00 2001 From: arunppsg Date: Fri, 2 Jun 2023 20:06:06 +0530 Subject: [PATCH 065/609] fix cropping to include last column and last row (#2384) --- beginner_source/data_loading_tutorial.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/beginner_source/data_loading_tutorial.py b/beginner_source/data_loading_tutorial.py index 322d9b3009c..3afb7dffa84 100644 --- a/beginner_source/data_loading_tutorial.py +++ b/beginner_source/data_loading_tutorial.py @@ -268,8 +268,8 @@ def __call__(self, sample): h, w = image.shape[:2] new_h, new_w = self.output_size - top = np.random.randint(0, h - new_h) - left = np.random.randint(0, w - new_w) + top = np.random.randint(0, h - new_h + 1) + left = np.random.randint(0, w - new_w + 1) image = image[top: top + new_h, left: left + new_w] @@ -294,7 +294,7 @@ def __call__(self, sample): ###################################################################### # .. note:: -# In the example above, `RandomCrop` uses an external library's random number generator +# In the example above, `RandomCrop` uses an external library's random number generator # (in this case, Numpy's `np.random.int`). This can result in unexpected behavior with `DataLoader` # (see `here `_). # In practice, it is safer to stick to PyTorch's random number generator, e.g. by using `torch.randint` instead. From fd9a6a7f5fff58b00a0b83c13c96a70cbf513f98 Mon Sep 17 00:00:00 2001 From: Mateusz Nowak <37732935+noqqaqq@users.noreply.github.com> Date: Fri, 2 Jun 2023 17:45:11 +0200 Subject: [PATCH 066/609] Enumerate over dataset instead of simple loop (#2407) Co-authored-by: noqqaqq Co-authored-by: Nicolas Hug --- beginner_source/data_loading_tutorial.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/beginner_source/data_loading_tutorial.py b/beginner_source/data_loading_tutorial.py index 3afb7dffa84..d5326f6e9a6 100644 --- a/beginner_source/data_loading_tutorial.py +++ b/beginner_source/data_loading_tutorial.py @@ -165,9 +165,7 @@ def __getitem__(self, idx): fig = plt.figure() -for i in range(len(face_dataset)): - sample = face_dataset[i] - +for i, sample in enumerate(face_dataset): print(i, sample['image'].shape, sample['landmarks'].shape) ax = plt.subplot(1, 4, i + 1) @@ -356,9 +354,7 @@ def __call__(self, sample): ToTensor() ])) -for i in range(len(transformed_dataset)): - sample = transformed_dataset[i] - +for i, sample in enumerate(transformed_dataset): print(i, sample['image'].size(), sample['landmarks'].size()) if i == 3: From b966c1fc9ff17eb0da60b5c7546a83589e22831d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Luis=20Castro=20Garc=C3=ADa?= <81191337+JoseLuisC99@users.noreply.github.com> Date: Fri, 2 Jun 2023 10:33:11 -0600 Subject: [PATCH 067/609] Implement function for BERT quantization tutorial, resolves issue #1971 (#2403) Co-authored-by: Carl Parker --- .../dynamic_quantization_bert_tutorial.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/intermediate_source/dynamic_quantization_bert_tutorial.rst b/intermediate_source/dynamic_quantization_bert_tutorial.rst index 53ac2cd0afb..39cff5a22c5 100644 --- a/intermediate_source/dynamic_quantization_bert_tutorial.rst +++ b/intermediate_source/dynamic_quantization_bert_tutorial.rst @@ -255,6 +255,9 @@ model before and after the dynamic quantization. torch.manual_seed(seed) set_seed(42) + # Initialize a global random number generator + global_rng = random.Random() + 2.2 Load the fine-tuned BERT model ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -525,6 +528,21 @@ We can serialize and save the quantized model for the future use using .. code:: python + def ids_tensor(shape, vocab_size, rng=None, name=None): + # Creates a random int32 tensor of the shape within the vocab size + if rng is None: + rng = global_rng + + total_dims = 1 + for dim in shape: + total_dims *= dim + + values = [] + for _ in range(total_dims): + values.append(rng.randint(0, vocab_size - 1)) + + return torch.tensor(data=values, dtype=torch.long, device='cpu').view(shape).contiguous() + input_ids = ids_tensor([8, 128], 2) token_type_ids = ids_tensor([8, 128], 2) attention_mask = ids_tensor([8, 128], vocab_size=2) From 769cff98ea01d67b9f82910c4be2e095b31deb46 Mon Sep 17 00:00:00 2001 From: Alok Kumar Jha <92216931+akjalok@users.noreply.github.com> Date: Fri, 2 Jun 2023 22:40:07 +0530 Subject: [PATCH 068/609] Fix the loss initialization in intermediate_source/char_rnn_generation_tutorial.py (#2380) * changed the loss init to make it less confusing --------- Co-authored-by: Nicolas Hug Co-authored-by: Svetlana Karslioglu --- intermediate_source/char_rnn_generation_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intermediate_source/char_rnn_generation_tutorial.py b/intermediate_source/char_rnn_generation_tutorial.py index 6068c84cd0e..d0c1c553865 100644 --- a/intermediate_source/char_rnn_generation_tutorial.py +++ b/intermediate_source/char_rnn_generation_tutorial.py @@ -278,7 +278,7 @@ def train(category_tensor, input_line_tensor, target_line_tensor): rnn.zero_grad() - loss = 0 + loss = torch.Tensor([0]) # you can also just simply use ``loss = 0`` for i in range(input_line_tensor.size(0)): output, hidden = rnn(category_tensor, input_line_tensor[i], hidden) From 83cbc8de29a9ad40aaffb782206a316f8966a257 Mon Sep 17 00:00:00 2001 From: Youshaa Murhij Date: Fri, 2 Jun 2023 20:19:13 +0300 Subject: [PATCH 069/609] Update transformer_tutorial.py | Resolving issue #1778 (#2402) * Update transformer_tutorial.py Add description for positional encoding calculation for Transformers * Update Positional Encoding description in transformer_tutorial.py * Update transformer_tutorial.py --------- Co-authored-by: Carl Parker --- beginner_source/transformer_tutorial.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py index cce52eefdb3..5ed9a0d1390 100644 --- a/beginner_source/transformer_tutorial.py +++ b/beginner_source/transformer_tutorial.py @@ -103,6 +103,15 @@ def generate_square_subsequent_mask(sz: int) -> Tensor: # positional encodings have the same dimension as the embeddings so that # the two can be summed. Here, we use ``sine`` and ``cosine`` functions of # different frequencies. +# The ``div_term`` in the code is calculated as +# ``torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))``. +# This calculation is based on the original Transformer paper’s formulation +# for positional encoding. The purpose of this calculation is to create +# a range of values that decrease exponentially. +# This allows the model to learn to attend to positions based on their relative distances. +# The ``math.log(10000.0)`` term in the exponent represents the maximum effective +# input length (in this case, ``10000``). Dividing this term by ``d_model`` scales +# the values to be within a reasonable range for the exponential function. # class PositionalEncoding(nn.Module): From 420037e77a0d3dd8fc7952d48c7be2c591b4e625 Mon Sep 17 00:00:00 2001 From: TheMemoryDealer <32904619+TheMemoryDealer@users.noreply.github.com> Date: Fri, 2 Jun 2023 21:27:30 +0100 Subject: [PATCH 070/609] Fix run_demo(demo_model_parallel, world_size) issue (#2367) In the function demo_model_parallel, dev0 and dev1 are computed in a way that assigns two distinct GPUs to each process. This is achieved by doubling the rank and applying modulus operation with twice the world_size. Assuming 8 gpus world_size is set to 4, leading to the creation of 4 processes. Each of these processes is allocated two distinct GPUs. For instance, the first process (process 0) is assigned GPUs 0 and 1, the second process (process 1) is assigned GPUs 2 and 3, and so forth. --- intermediate_source/ddp_tutorial.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/intermediate_source/ddp_tutorial.rst b/intermediate_source/ddp_tutorial.rst index 366db8db130..a8955569df5 100644 --- a/intermediate_source/ddp_tutorial.rst +++ b/intermediate_source/ddp_tutorial.rst @@ -269,8 +269,8 @@ either the application or the model ``forward()`` method. setup(rank, world_size) # setup mp_model and devices for this process - dev0 = (rank * 2) % world_size - dev1 = (rank * 2 + 1) % world_size + dev0 = rank * 2 + dev1 = rank * 2 + 1 mp_model = ToyMpModel(dev0, dev1) ddp_mp_model = DDP(mp_model) @@ -293,6 +293,7 @@ either the application or the model ``forward()`` method. world_size = n_gpus run_demo(demo_basic, world_size) run_demo(demo_checkpoint, world_size) + world_size = n_gpus//2 run_demo(demo_model_parallel, world_size) Initialize DDP with torch.distributed.run/torchrun From 4648254675e467170e17374a4bf954291e3ce819 Mon Sep 17 00:00:00 2001 From: Qasim Khan Date: Sat, 3 Jun 2023 01:28:34 +0500 Subject: [PATCH 071/609] Fix dependencies and kernel crash in captumyt.py (#2408) * Update captum dependencies (matplotlib and flask-compress) * Use resnet18 due to RAM limitation Google Colab crashes due to insufficient RAM (more than 12 GB is required) if resnet101 or resnet50 are used. Thus, resnet18 is used instead (approximately 6 GB is used). --- beginner_source/introyt/captumyt.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/beginner_source/introyt/captumyt.py b/beginner_source/introyt/captumyt.py index 2ff8e9e70b1..cf63b6109b6 100644 --- a/beginner_source/introyt/captumyt.py +++ b/beginner_source/introyt/captumyt.py @@ -98,21 +98,24 @@ Before you get started, you need to have a Python environment with: - Python version 3.6 or higher -- For the Captum Insights example, Flask 1.1 or higher +- For the Captum Insights example, Flask 1.1 or higher and Flask-Compress + (the latest version is recommended) - PyTorch version 1.2 or higher (the latest version is recommended) - TorchVision version 0.6 or higher (the latest version is recommended) - Captum (the latest version is recommended) +- Matplotlib version 3.3.4, since Captum currently uses a Matplotlib + function whose arguments have been renamed in later versions To install Captum in an Anaconda or pip virtual environment, use the appropriate command for your environment below: With ``conda``:: - conda install pytorch torchvision captum -c pytorch + conda install pytorch torchvision captum flask-compress matplotlib=3.3.4 -c pytorch With ``pip``:: - pip install torch torchvision captum + pip install torch torchvision captum matplotlib==3.3.4 Flask-Compress Restart this notebook in the environment you set up, and you’re ready to go! @@ -155,7 +158,7 @@ # now. # -model = models.resnet101(weights='IMAGENET1K_V1') +model = models.resnet18(weights='IMAGENET1K_V1') model = model.eval() From fa9be972d2d7c4953c3927365a033f9de74cfbfb Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Fri, 2 Jun 2023 14:45:07 -0700 Subject: [PATCH 072/609] Set global device back to cpu at the end of tutorial (#2411) We are using sphinx to render those tutorials, which does not start a new process to render, so one needs to restore global state to default value, by calling `torch.set_default_device('cpu')` Co-authored-by: Svetlana Karslioglu --- recipes_source/recipes/changing_default_device.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/recipes_source/recipes/changing_default_device.py b/recipes_source/recipes/changing_default_device.py index 103560fd743..f5e50b3f0be 100644 --- a/recipes_source/recipes/changing_default_device.py +++ b/recipes_source/recipes/changing_default_device.py @@ -43,6 +43,9 @@ print(mod.weight.device) print(mod(torch.randn(128, 20)).device) +# And then globally return it back to CPU +torch.set_default_device('cpu') + ################################################################ # This function imposes a slight performance cost on every Python # call to the torch API (not just factory functions). If this From 3a58c5197c343f8adf39ed4ec3f3bd0e666f3a6a Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 2 Jun 2023 15:07:09 -0700 Subject: [PATCH 073/609] Revert "Update transformer_tutorial.py | Resolving issue #1778 (#2402)" (#2412) This reverts commit 83cbc8de29a9ad40aaffb782206a316f8966a257. --- beginner_source/transformer_tutorial.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py index 5ed9a0d1390..cce52eefdb3 100644 --- a/beginner_source/transformer_tutorial.py +++ b/beginner_source/transformer_tutorial.py @@ -103,15 +103,6 @@ def generate_square_subsequent_mask(sz: int) -> Tensor: # positional encodings have the same dimension as the embeddings so that # the two can be summed. Here, we use ``sine`` and ``cosine`` functions of # different frequencies. -# The ``div_term`` in the code is calculated as -# ``torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))``. -# This calculation is based on the original Transformer paper’s formulation -# for positional encoding. The purpose of this calculation is to create -# a range of values that decrease exponentially. -# This allows the model to learn to attend to positions based on their relative distances. -# The ``math.log(10000.0)`` term in the exponent represents the maximum effective -# input length (in this case, ``10000``). Dividing this term by ``d_model`` scales -# the values to be within a reasonable range for the exponential function. # class PositionalEncoding(nn.Module): From 9e001571138e4fee947934e8f8f24b92ee669ddb Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Fri, 2 Jun 2023 16:33:53 -0700 Subject: [PATCH 074/609] Make DCGan tutorial results reproducible (#2414) By using deterministic algorithm That should prevent repo size increase by 70Mb after every commit, but will make tutorial slightly slower (though not significantly) Also, remove unused/absolete imports --- beginner_source/dcgan_faces_tutorial.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/beginner_source/dcgan_faces_tutorial.py b/beginner_source/dcgan_faces_tutorial.py index d98683741e5..1a1f9c38606 100644 --- a/beginner_source/dcgan_faces_tutorial.py +++ b/beginner_source/dcgan_faces_tutorial.py @@ -112,7 +112,6 @@ # will be explained in the coming sections. # -from __future__ import print_function #%matplotlib inline import argparse import os @@ -120,7 +119,6 @@ import torch import torch.nn as nn import torch.nn.parallel -import torch.backends.cudnn as cudnn import torch.optim as optim import torch.utils.data import torchvision.datasets as dset @@ -137,6 +135,7 @@ print("Random Seed: ", manualSeed) random.seed(manualSeed) torch.manual_seed(manualSeed) +torch.use_deterministic_algorithms(True) # Needed for reproducible results ###################################################################### From f1cb62c9cacc6d0f781dbca6b4c27007b6de42da Mon Sep 17 00:00:00 2001 From: BJ Hargrave Date: Mon, 5 Jun 2023 12:22:42 -0400 Subject: [PATCH 075/609] Remove improper src_mask from encoder tutorial (#2423) Fixes https://github.com/pytorch/tutorials/issues/1877 The tutorial is using a transformer encoder and the mask used was for masking a decoder which is not part of the tutorial. The mask is removed. Some variable names are changed to better reflect the purpose of the variable. Also, some unused imports are removed. Signed-off-by: BJ Hargrave --- beginner_source/transformer_tutorial.py | 45 ++++++++----------------- 1 file changed, 14 insertions(+), 31 deletions(-) diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py index cce52eefdb3..a3fc3ab16eb 100644 --- a/beginner_source/transformer_tutorial.py +++ b/beginner_source/transformer_tutorial.py @@ -36,12 +36,8 @@ # of the word (see the next paragraph for more details). The # ``nn.TransformerEncoder`` consists of multiple layers of # `nn.TransformerEncoderLayer `__. -# Along with the input sequence, a square attention mask is required because the -# self-attention layers in ``nn.TransformerDecoder`` are only allowed to attend -# the earlier positions in the sequence. For the language modeling task, any -# tokens on the future positions should be masked. To produce a probability -# distribution over output words, the output of the ``nn.TransformerEncoder`` -# model is passed through a linear layer followed by a log-softmax function. +# To produce a probability distribution over output words, the output of +# the ``nn.TransformerEncoder`` model is passed through a linear layer. # import math @@ -51,7 +47,6 @@ import torch from torch import nn, Tensor -import torch.nn.functional as F from torch.nn import TransformerEncoder, TransformerEncoderLayer from torch.utils.data import dataset @@ -64,19 +59,19 @@ def __init__(self, ntoken: int, d_model: int, nhead: int, d_hid: int, self.pos_encoder = PositionalEncoding(d_model, dropout) encoder_layers = TransformerEncoderLayer(d_model, nhead, d_hid, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers) - self.encoder = nn.Embedding(ntoken, d_model) + self.embedding = nn.Embedding(ntoken, d_model) self.d_model = d_model - self.decoder = nn.Linear(d_model, ntoken) + self.linear = nn.Linear(d_model, ntoken) self.init_weights() def init_weights(self) -> None: initrange = 0.1 - self.encoder.weight.data.uniform_(-initrange, initrange) - self.decoder.bias.data.zero_() - self.decoder.weight.data.uniform_(-initrange, initrange) + self.embedding.weight.data.uniform_(-initrange, initrange) + self.linear.bias.data.zero_() + self.linear.weight.data.uniform_(-initrange, initrange) - def forward(self, src: Tensor, src_mask: Tensor) -> Tensor: + def forward(self, src: Tensor, src_mask: Tensor = None) -> Tensor: """ Arguments: src: Tensor, shape ``[seq_len, batch_size]`` @@ -85,18 +80,13 @@ def forward(self, src: Tensor, src_mask: Tensor) -> Tensor: Returns: output Tensor of shape ``[seq_len, batch_size, ntoken]`` """ - src = self.encoder(src) * math.sqrt(self.d_model) + src = self.embedding(src) * math.sqrt(self.d_model) src = self.pos_encoder(src) output = self.transformer_encoder(src, src_mask) - output = self.decoder(output) + output = self.linear(output) return output -def generate_square_subsequent_mask(sz: int) -> Tensor: - """Generates an upper-triangular matrix of ``-inf``, with zeros on ``diag``.""" - return torch.triu(torch.ones(sz, sz) * float('-inf'), diagonal=1) - - ###################################################################### # ``PositionalEncoding`` module injects some information about the # relative or absolute position of the tokens in the sequence. The @@ -286,7 +276,6 @@ def get_batch(source: Tensor, i: int) -> Tuple[Tensor, Tensor]: # to prevent gradients from exploding. # -import copy import time criterion = nn.CrossEntropyLoss() @@ -299,16 +288,13 @@ def train(model: nn.Module) -> None: total_loss = 0. log_interval = 200 start_time = time.time() - src_mask = generate_square_subsequent_mask(bptt).to(device) num_batches = len(train_data) // bptt for batch, i in enumerate(range(0, train_data.size(0) - 1, bptt)): data, targets = get_batch(train_data, i) - seq_len = data.size(0) - if seq_len != bptt: # only on last batch - src_mask = src_mask[:seq_len, :seq_len] - output = model(data, src_mask) - loss = criterion(output.view(-1, ntokens), targets) + output = model(data) + output_flat = output.view(-1, ntokens) + loss = criterion(output_flat, targets) optimizer.zero_grad() loss.backward() @@ -330,14 +316,11 @@ def train(model: nn.Module) -> None: def evaluate(model: nn.Module, eval_data: Tensor) -> float: model.eval() # turn on evaluation mode total_loss = 0. - src_mask = generate_square_subsequent_mask(bptt).to(device) with torch.no_grad(): for i in range(0, eval_data.size(0) - 1, bptt): data, targets = get_batch(eval_data, i) seq_len = data.size(0) - if seq_len != bptt: - src_mask = src_mask[:seq_len, :seq_len] - output = model(data, src_mask) + output = model(data) output_flat = output.view(-1, ntokens) total_loss += seq_len * criterion(output_flat, targets).item() return total_loss / (len(eval_data) - 1) From 47b9ea4628764a19e867d8a0227a81fc2138c0bc Mon Sep 17 00:00:00 2001 From: clee2000 <44682903+clee2000@users.noreply.github.com> Date: Mon, 5 Jun 2023 11:26:48 -0700 Subject: [PATCH 076/609] Move from CircleCI to GHA (#2280) TODO: - Increase number of runner and see if it reduces the build time - Upload previews to the docs bucket --- .circleci/config.yml | 366 +------------------------- .circleci/config.yml.in | 213 --------------- .circleci/regenerate.py | 112 -------- .github/workflows/build-tutorials.yml | 182 +++++++++++++ .jenkins/build.sh | 38 +-- .jenkins/get_files_to_run.py | 8 +- 6 files changed, 203 insertions(+), 716 deletions(-) delete mode 100644 .circleci/config.yml.in delete mode 100644 .circleci/regenerate.py create mode 100644 .github/workflows/build-tutorials.yml diff --git a/.circleci/config.yml b/.circleci/config.yml index b7084096c4b..70b2c7fd5b0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -306,6 +306,10 @@ jobs: workflows: build: + when: + and: # All must be true to trigger + - equal: [ branch1, << pipeline.git.branch >> ] + - equal: [ branch2, << pipeline.git.branch >> ] jobs: # Build jobs that only run on PR - pytorch_tutorial_pr_build_worker_0: @@ -314,365 +318,3 @@ workflows: ignore: - master - main - - pytorch_tutorial_pr_build_worker_1: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_2: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_3: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_4: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_5: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_6: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_7: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_8: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_9: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_10: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_11: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_12: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_13: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_14: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_15: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_16: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_17: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_18: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_worker_19: - filters: - branches: - ignore: - - master - - main - - pytorch_tutorial_pr_build_manager: - filters: - branches: - ignore: - - master - - main - requires: - - pytorch_tutorial_pr_build_worker_0 - - pytorch_tutorial_pr_build_worker_1 - - pytorch_tutorial_pr_build_worker_2 - - pytorch_tutorial_pr_build_worker_3 - - pytorch_tutorial_pr_build_worker_4 - - pytorch_tutorial_pr_build_worker_5 - - pytorch_tutorial_pr_build_worker_6 - - pytorch_tutorial_pr_build_worker_7 - - pytorch_tutorial_pr_build_worker_8 - - pytorch_tutorial_pr_build_worker_9 - - pytorch_tutorial_pr_build_worker_10 - - pytorch_tutorial_pr_build_worker_11 - - pytorch_tutorial_pr_build_worker_12 - - pytorch_tutorial_pr_build_worker_13 - - pytorch_tutorial_pr_build_worker_14 - - pytorch_tutorial_pr_build_worker_15 - - pytorch_tutorial_pr_build_worker_16 - - pytorch_tutorial_pr_build_worker_17 - - pytorch_tutorial_pr_build_worker_18 - - pytorch_tutorial_pr_build_worker_19 - # Build jobs that only run on trunk - - pytorch_tutorial_trunk_build_worker_0: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_1: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_2: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_3: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_4: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_5: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_6: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_7: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_8: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_9: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_10: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_11: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_12: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_13: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_14: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_15: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_16: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_17: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_18: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_worker_19: - context: org-member - filters: - branches: - only: - - master - - main - - pytorch_tutorial_trunk_build_manager: - context: org-member - filters: - branches: - only: - - master - - main - requires: - - pytorch_tutorial_trunk_build_worker_0 - - pytorch_tutorial_trunk_build_worker_1 - - pytorch_tutorial_trunk_build_worker_2 - - pytorch_tutorial_trunk_build_worker_3 - - pytorch_tutorial_trunk_build_worker_4 - - pytorch_tutorial_trunk_build_worker_5 - - pytorch_tutorial_trunk_build_worker_6 - - pytorch_tutorial_trunk_build_worker_7 - - pytorch_tutorial_trunk_build_worker_8 - - pytorch_tutorial_trunk_build_worker_9 - - pytorch_tutorial_trunk_build_worker_10 - - pytorch_tutorial_trunk_build_worker_11 - - pytorch_tutorial_trunk_build_worker_12 - - pytorch_tutorial_trunk_build_worker_13 - - pytorch_tutorial_trunk_build_worker_14 - - pytorch_tutorial_trunk_build_worker_15 - - pytorch_tutorial_trunk_build_worker_16 - - pytorch_tutorial_trunk_build_worker_17 - - pytorch_tutorial_trunk_build_worker_18 - - pytorch_tutorial_trunk_build_worker_19 -# - pytorch_tutorial_windows_pr_build_worker_0: -# filters: -# branches: -# ignore: -# - master -# - main -# - pytorch_tutorial_windows_pr_build_worker_1: -# filters: -# branches: -# ignore: -# - master -# - main -# - pytorch_tutorial_windows_pr_build_worker_2: -# filters: -# branches: -# ignore: -# - master -# - main -# - pytorch_tutorial_windows_pr_build_worker_3: -# filters: -# branches: -# ignore: -# - master -# - main -# - pytorch_tutorial_windows_trunk_build_worker_0: -# context: org-member -# filters: -# branches: -# only: -# - master -# - main -# - pytorch_tutorial_windows_trunk_build_worker_1: -# context: org-member -# filters: -# branches: -# only: -# - master -# - main -# - pytorch_tutorial_windows_trunk_build_worker_2: -# context: org-member -# filters: -# branches: -# only: -# - master -# - main -# - pytorch_tutorial_windows_trunk_build_worker_3: -# context: org-member -# filters: -# branches: -# only: -# - master -# - main diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in deleted file mode 100644 index 0694d221aad..00000000000 --- a/.circleci/config.yml.in +++ /dev/null @@ -1,213 +0,0 @@ -# run python regenerate.py to generate config.yml from config.yml.in - -version: 2.1 - -executors: - windows-with-nvidia-gpu: - machine: - resource_class: windows.gpu.nvidia.medium - image: windows-server-2019-nvidia:stable - shell: bash.exe - -install_official_git_client: &install_official_git_client - name: Install Official Git Client - no_output_timeout: "1h" - command: | - set -e - sudo apt-get -qq update - sudo apt-get -qq install openssh-client git - -# This system setup script is meant to run before the CI-related scripts, e.g., -# installing Git client, checking out code, setting up CI env, and -# building/testing. -setup_linux_system_environment: &setup_linux_system_environment - name: Set Up System Environment - no_output_timeout: "1h" - command: | - set -ex - - # Stop background apt updates. Hypothetically, the kill should not - # be necessary, because stop is supposed to send a kill signal to - # the process, but we've added it for good luck. Also - # hypothetically, it's supposed to be unnecessary to wait for - # the process to block. We also have that line for good luck. - # If you like, try deleting them and seeing if it works. - sudo systemctl stop apt-daily.service || true - sudo systemctl kill --kill-who=all apt-daily.service || true - - sudo systemctl stop unattended-upgrades.service || true - sudo systemctl kill --kill-who=all unattended-upgrades.service || true - - # wait until `apt-get update` has been killed - while systemctl is-active --quiet apt-daily.service - do - sleep 1; - done - while systemctl is-active --quiet unattended-upgrades.service - do - sleep 1; - done - - # See if we actually were successful - systemctl list-units --all | cat - - sudo apt-get purge -y unattended-upgrades - - cat /etc/apt/sources.list - - ps auxfww | grep [a]pt - ps auxfww | grep dpkg - -pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults - machine: - image: ubuntu-2004-cuda-11.4:202110-01 - steps: - - checkout - - run: - <<: *setup_linux_system_environment - - run: - name: Set Up CI Environment - no_output_timeout: "1h" - command: | - set -e - - sudo apt-get -y update - sudo apt-get -y install expect-dev moreutils - - sudo pip3 -q install awscli==1.16.35 - - if [ -n "${CUDA_VERSION}" ]; then - nvidia-smi - fi - - # This IAM user only allows read-write access to ECR - export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_ECR_READ_ONLY} - export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_ECR_READ_ONLY} - eval $(aws ecr get-login --region us-east-1 --no-include-email) - - run: - name: Build - no_output_timeout: "20h" - command: | - set -e - - # for some reason, pip installs it in a different place than what is looked at in the py file - sudo pip3 install requests --target=/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages - export pyTorchDockerImageTag=$(python3 .jenkins/get_docker_tag.py) - echo "PyTorchDockerImageTag: "${pyTorchDockerImageTag} - - cat >/home/circleci/project/ci_build_script.sh \</dev/null - if [ -n "${CUDA_VERSION}" ]; then - export id=$(docker run --gpus all -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}) - else - export id=$(docker run -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}) - fi - - echo "declare -x JOB_BASE_NAME=${CIRCLE_JOB}" > /home/circleci/project/env - echo "declare -x COMMIT_ID=${CIRCLE_SHA1}" >> /home/circleci/project/env - echo "declare -x COMMIT_SOURCE=${CIRCLE_BRANCH}" >> /home/circleci/project/env - # DANGER! DO NOT REMOVE THE `set +x` SETTING HERE! - set +x - if [[ "$CIRCLE_BRANCH" == master || "$CIRCLE_BRANCH" == main ]]; then - if [ -z "${CIRCLECI_AWS_ACCESS_KEY_FOR_PYTORCH_TUTORIAL_BUILD_MASTER_S3_BUCKET}" ]; then exit 1; fi - if [ -z "${CIRCLECI_AWS_SECRET_KEY_FOR_PYTORCH_TUTORIAL_BUILD_MASTER_S3_BUCKET}" ]; then exit 1; fi - if [ -z "${GITHUB_PYTORCHBOT_USERNAME}" ]; then exit 1; fi - if [ -z "${GITHUB_PYTORCHBOT_TOKEN}" ]; then exit 1; fi - - echo "declare -x AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_PYTORCH_TUTORIAL_BUILD_MASTER_S3_BUCKET}" >> /home/circleci/project/env - echo "declare -x AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_PYTORCH_TUTORIAL_BUILD_MASTER_S3_BUCKET}" >> /home/circleci/project/env - echo "declare -x GITHUB_PYTORCHBOT_USERNAME=${GITHUB_PYTORCHBOT_USERNAME}" >> /home/circleci/project/env - echo "declare -x GITHUB_PYTORCHBOT_TOKEN=${GITHUB_PYTORCHBOT_TOKEN}" >> /home/circleci/project/env - else - echo "declare -x AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_PYTORCH_TUTORIAL_BUILD_PR_S3_BUCKET}" >> /home/circleci/project/env - echo "declare -x AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_PYTORCH_TUTORIAL_BUILD_PR_S3_BUCKET}" >> /home/circleci/project/env - fi - set -x - - echo 'rm /opt/cache/bin/*' | docker exec -u root -i "$id" bash - docker cp /home/circleci/project/. "$id:/var/lib/jenkins/workspace" - - export COMMAND='((echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && ./ci_build_script.sh") | docker exec -u jenkins -i "$id" bash) 2>&1' - echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts - # Copy docs with plot to a docs dir - if docker exec -it "$id" sh -c "test -d ./workspace/docs_with_plot/docs/"; then - mkdir /home/circleci/project/docs - docker cp "$id:/var/lib/jenkins/workspace/docs_with_plot/docs/." /home/circleci/project/docs - echo "Directory copied successfully" - else - echo "No docs_with_plot directory. Skipping..." - fi - - - store_artifacts: - path: ./docs - destination: tutorials - -pytorch_tutorial_build_worker_defaults: &pytorch_tutorial_build_worker_defaults - environment: - DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9" - CUDA_VERSION: "9" - resource_class: gpu.nvidia.small - <<: *pytorch_tutorial_build_defaults - -pytorch_tutorial_build_manager_defaults: &pytorch_tutorial_build_manager_defaults - environment: - DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9" - resource_class: medium - - - <<: *pytorch_tutorial_build_defaults -{% raw %} -pytorch_windows_build_worker: &pytorch_windows_build_worker - executor: windows-with-nvidia-gpu - steps: - - checkout - - run: - name: Install Cuda - no_output_timeout: 30m - command: | - .circleci/scripts/windows_cuda_install.sh - - run: - name: Generate cache key - # This will refresh cache on Sundays, build should generate new cache. - command: echo "$(date +"%Y-%U")" > .circleci-weekly - - restore_cache: - keys: - - data-{{ checksum "Makefile" }}-{{ checksum ".circleci-weekly" }} - - run: - name: test - no_output_timeout: "1h" - command: | - .circleci/scripts/build_for_windows.sh - - save_cache: - key: data-{{ checksum "Makefile" }}-{{ checksum ".circleci-weekly" }} - paths: - - advanced_source/data - - beginner_source/data - - intermediate_source/data - - prototype_source/data -{% endraw %} -jobs: - {{ jobs("pr") }} - - {{ jobs("trunk") }} - - {{ windows_jobs() }} - -workflows: - build: - jobs: - # Build jobs that only run on PR - {{ workflows_jobs("pr") }} - # Build jobs that only run on trunk - {{ workflows_jobs("trunk") }} -# {{ windows_workflows_jobs() }} diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py deleted file mode 100644 index f47ee1dfa6f..00000000000 --- a/.circleci/regenerate.py +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/env python3 - -# regenrates config.yml based on config.yml.in - -from copy import deepcopy -import os.path - -import jinja2 -import yaml -from jinja2 import select_autoescape - -WORKFLOWS_JOBS_PR = {"filters": {"branches": {"ignore": ["master", "main"]}}} - -WORKFLOWS_JOBS_TRUNK = { - "context": "org-member", - "filters": {"branches": {"only": ["master", "main"]}}, -} - - -def indent(indentation, data_list): - return ("\n" + " " * indentation).join( - yaml.dump(data_list, default_flow_style=False).splitlines() - ) - - -def jobs(pr_or_trunk, num_workers=20, indentation=2): - jobs = {} - - # all tutorials that need gpu.nvidia.small.multi machines will be routed by - # get_files_to_run.py to 0th worker, similarly for gpu.nvidia.large and the - # 1st worker - needs_gpu_nvidia_small_multi = [0] - needs_gpu_nvidia_large = [1] - jobs[f"pytorch_tutorial_{pr_or_trunk}_build_manager"] = { - "<<": "*pytorch_tutorial_build_manager_defaults" - } - for i in range(num_workers): - job_info = {"<<": "*pytorch_tutorial_build_worker_defaults"} - if i in needs_gpu_nvidia_small_multi: - job_info["resource_class"] = "gpu.nvidia.small.multi" - if i in needs_gpu_nvidia_large: - job_info["resource_class"] = "gpu.nvidia.large" - jobs[f"pytorch_tutorial_{pr_or_trunk}_build_worker_{i}"] = job_info - - return indent(indentation, jobs).replace("'", "") - - -def workflows_jobs(pr_or_trunk, indentation=6, num_workers=20): - jobs = [] - job_info = deepcopy( - WORKFLOWS_JOBS_PR if pr_or_trunk == "pr" else WORKFLOWS_JOBS_TRUNK - ) - - for i in range(num_workers): - jobs.append( - {f"pytorch_tutorial_{pr_or_trunk}_build_worker_{i}": deepcopy(job_info)} - ) - - job_info["requires"] = [ - f"pytorch_tutorial_{pr_or_trunk}_build_worker_{i}" for i in range(num_workers) - ] - jobs.append({f"pytorch_tutorial_{pr_or_trunk}_build_manager": deepcopy(job_info)}) - return indent(indentation, jobs) - - -def windows_jobs(indentation=2, num_workers=4): - jobs = {} - for i in range(num_workers): - jobs[f"pytorch_tutorial_windows_pr_build_worker_{i}"] = { - "<<": "*pytorch_windows_build_worker" - } - jobs[f"pytorch_tutorial_windows_trunk_build_worker_{i}"] = { - "<<": "*pytorch_windows_build_worker" - } - return indent(indentation, jobs).replace("'", "") - - -def windows_workflows_jobs(indentation=6, num_workers=4): - jobs = [] - job_info = WORKFLOWS_JOBS_PR - for i in range(num_workers): - jobs.append( - {f"pytorch_tutorial_windows_pr_build_worker_{i}": deepcopy(job_info)} - ) - - job_info = WORKFLOWS_JOBS_TRUNK - for i in range(num_workers): - jobs.append( - {f"pytorch_tutorial_windows_trunk_build_worker_{i}": deepcopy(job_info)} - ) - - return ("\n#").join(indent(indentation, jobs).splitlines()) - - -if __name__ == "__main__": - - directory = os.path.dirname(__file__) - env = jinja2.Environment( - loader=jinja2.FileSystemLoader(directory), - lstrip_blocks=True, - autoescape=select_autoescape(enabled_extensions=("html", "xml")), - keep_trailing_newline=True, - ) - with open(os.path.join(directory, "config.yml"), "w") as f: - f.write( - env.get_template("config.yml.in").render( - jobs=jobs, - workflows_jobs=workflows_jobs, - windows_jobs=windows_jobs, - windows_workflows_jobs=windows_workflows_jobs, - ) - ) diff --git a/.github/workflows/build-tutorials.yml b/.github/workflows/build-tutorials.yml new file mode 100644 index 00000000000..222de26b9bb --- /dev/null +++ b/.github/workflows/build-tutorials.yml @@ -0,0 +1,182 @@ +name: Build tutorials + +on: + pull_request: + push: + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} + cancel-in-progress: true + +jobs: + worker: + name: pytorch_tutorial_build_worker + strategy: + matrix: + include: + - { shard: 1, num_shards: 6, runner: "linux.16xlarge.nvidia.gpu" } + - { shard: 2, num_shards: 6, runner: "linux.g5.4xlarge.nvidia.gpu" } + - { shard: 3, num_shards: 6, runner: "linux.4xlarge.nvidia.gpu" } + - { shard: 4, num_shards: 6, runner: "linux.4xlarge.nvidia.gpu" } + - { shard: 5, num_shards: 6, runner: "linux.4xlarge.nvidia.gpu" } + - { shard: 6, num_shards: 6, runner: "linux.4xlarge.nvidia.gpu" } + fail-fast: false + runs-on: ${{ matrix.runner }} + env: + DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9" + CUDA_VERSION: "9" + steps: + - name: Setup SSH (Click me for login details) + uses: pytorch/test-infra/.github/actions/setup-ssh@main + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + instructions: | + All testing is done inside the container, to start an interactive session run: + docker exec -it $(docker container ps --format '{{.ID}}') bash + + - name: Checkout Tutorials + uses: actions/checkout@v3 + + - name: Setup Linux + uses: pytorch/pytorch/.github/actions/setup-linux@main + + - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + uses: pytorch/test-infra/.github/actions/setup-nvidia@main + + - name: Calculate docker image + shell: bash + id: docker-image + run: | + set -ex + + # for some reason, pip installs it in a different place than what is looked at in the py file + pip3 install requests==2.26 + pyTorchDockerImageTag=$(python3 .jenkins/get_docker_tag.py) + + echo "docker-image=${DOCKER_IMAGE}:${pyTorchDockerImageTag}" >> "${GITHUB_OUTPUT}" + + - name: Pull docker image + uses: pytorch/test-infra/.github/actions/pull-docker-image@main + with: + docker-image: ${{ steps.docker-image.outputs.docker-image }} + + - name: Build + shell: bash + env: + DOCKER_IMAGE: ${{ steps.docker-image.outputs.docker-image }} + NUM_WORKERS: ${{ matrix.num_shards }} + WORKER_ID: ${{ matrix.shard }} + COMMIT_ID: ${{ github.sha }} + JOB_TYPE: worker + COMMIT_SOURCE: ${{ github.ref }} + run: | + set -ex + + chmod +x ".jenkins/build.sh" + + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e WORKER_ID \ + -e NUM_WORKERS \ + -e COMMIT_ID \ + -e JOB_TYPE \ + -e COMMIT_SOURCE \ + --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ + --tty \ + --detach \ + --user jenkins \ + --name="${container_name}" \ + -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ + -w /var/lib/jenkins/workspace \ + "${DOCKER_IMAGE}" + ) + + echo "rm /opt/cache/bin/*" | docker exec -u root -i "${container_name}" bash + + docker exec -t "${container_name}" sh -c ".jenkins/build.sh" + + - name: Teardown Linux + uses: pytorch/test-infra/.github/actions/teardown-linux@main + if: always() + + manager: + name: pytorch_tutorial_build_manager + needs: worker + runs-on: [self-hosted, linux.2xlarge] + env: + DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9" + CUDA_VERSION: "9" + steps: + - name: Setup SSH (Click me for login details) + uses: pytorch/test-infra/.github/actions/setup-ssh@main + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + instructions: | + All testing is done inside the container, to start an interactive session run: + docker exec -it $(docker container ps --format '{{.ID}}') bash + + - name: Checkout Tutorials + uses: actions/checkout@v3 + + - name: Setup Linux + uses: pytorch/pytorch/.github/actions/setup-linux@main + + - name: Calculate docker image + shell: bash + id: docker-image + run: | + set -ex + + # for some reason, pip installs it in a different place than what is looked at in the py file + pip3 install requests==2.26 + pyTorchDockerImageTag=$(python3 .jenkins/get_docker_tag.py) + + echo "docker-image=${DOCKER_IMAGE}:${pyTorchDockerImageTag}" >> "${GITHUB_OUTPUT}" + + - name: Pull docker image + uses: pytorch/test-infra/.github/actions/pull-docker-image@main + with: + docker-image: ${{ steps.docker-image.outputs.docker-image }} + + - name: Build + shell: bash + env: + DOCKER_IMAGE: ${{ steps.docker-image.outputs.docker-image }} + NUM_WORKERS: 6 + WORKER_ID: ${{ matrix.shard }} + COMMIT_ID: ${{ github.sha }} + JOB_TYPE: manager + COMMIT_SOURCE: ${{ github.ref }} + GITHUB_PYTORCHBOT_TOKEN: ${{ secrets.PYTORCHBOT_TOKEN }} + run: | + set -ex + + chmod +x ".jenkins/build.sh" + + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e WORKER_ID \ + -e NUM_WORKERS \ + -e COMMIT_ID \ + -e JOB_TYPE \ + -e COMMIT_SOURCE \ + -e GITHUB_PYTORCHBOT_TOKEN \ + --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ + --tty \ + --detach \ + --user jenkins \ + --name="${container_name}" \ + -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ + -w /var/lib/jenkins/workspace \ + "${DOCKER_IMAGE}" + ) + + echo "rm /opt/cache/bin/*" | docker exec -u root -i "${container_name}" bash + + docker exec -t "${container_name}" sh -c ".jenkins/build.sh" + + - name: Teardown Linux + uses: pytorch/test-infra/.github/actions/teardown-linux@main + if: always() diff --git a/.jenkins/build.sh b/.jenkins/build.sh index d09b0a8782a..f13966ff84b 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -1,10 +1,8 @@ +#!/bin/bash + set -ex -if [[ "$COMMIT_SOURCE" == master || "$COMMIT_SOURCE" == main ]]; then - export BUCKET_NAME=pytorch-tutorial-build-master -else - export BUCKET_NAME=pytorch-tutorial-build-pull-request -fi +export BUCKET_NAME=pytorch-tutorial-build-pull-request # set locale for click dependency in spacy export LC_ALL=C.UTF-8 @@ -25,7 +23,7 @@ pip install -r $DIR/../requirements.txt # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # RC Link # pip uninstall -y torch torchvision torchaudio torchtext -# pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext +# pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext # pip uninstall -y torch torchvision torchaudio torchtext # pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu116/torch_test.html torch torchdata torchvision torchaudio torchtext @@ -37,8 +35,7 @@ awsv2 -i awsv2 configure set default.s3.multipart_threshold 5120MB # Decide whether to parallelize tutorial builds, based on $JOB_BASE_NAME -export NUM_WORKERS=20 -if [[ "${JOB_BASE_NAME}" == *worker_* ]]; then +if [[ "${JOB_TYPE}" == "worker" ]]; then # Step 1: Remove runnable code from tutorials that are not supposed to be run python $DIR/remove_runnable_code.py beginner_source/aws_distributed_training_tutorial.py beginner_source/aws_distributed_training_tutorial.py || true # python $DIR/remove_runnable_code.py advanced_source/ddp_pipeline_tutorial.py advanced_source/ddp_pipeline_tutorial.py || true @@ -47,7 +44,7 @@ if [[ "${JOB_BASE_NAME}" == *worker_* ]]; then # python $DIR/remove_runnable_code.py intermediate_source/spatial_transformer_tutorial.py intermediate_source/spatial_transformer_tutorial.py || true # Temp remove for 1.10 release. # python $DIR/remove_runnable_code.py advanced_source/neural_style_tutorial.py advanced_source/neural_style_tutorial.py || true - + # TODO: Fix bugs in these tutorials to make them runnable again # python $DIR/remove_runnable_code.py beginner_source/audio_classifier_tutorial.py beginner_source/audio_classifier_tutorial.py || true @@ -56,7 +53,6 @@ if [[ "${JOB_BASE_NAME}" == *worker_* ]]; then # Step 2: Keep certain tutorials based on file count, and remove runnable code in all other tutorials # IMPORTANT NOTE: We assume that each tutorial has a UNIQUE filename. - export WORKER_ID=$(echo "${JOB_BASE_NAME}" | tr -dc '0-9') FILES_TO_RUN=$(python .jenkins/get_files_to_run.py) echo "FILES_TO_RUN: " ${FILES_TO_RUN} @@ -116,26 +112,18 @@ if [[ "${JOB_BASE_NAME}" == *worker_* ]]; then # Step 6: Copy generated files to S3, tag with commit ID 7z a worker_${WORKER_ID}.7z docs - awsv2 s3 cp worker_${WORKER_ID}.7z s3://${BUCKET_NAME}/${COMMIT_ID}/worker_${WORKER_ID}.7z --acl public-read -elif [[ "${JOB_BASE_NAME}" == *manager ]]; then + awsv2 s3 cp worker_${WORKER_ID}.7z s3://${BUCKET_NAME}/${COMMIT_ID}/worker_${WORKER_ID}.7z +elif [[ "${JOB_TYPE}" == "manager" ]]; then # Step 1: Generate no-plot HTML pages for all tutorials make html-noplot cp -r _build/html docs # Step 2: Wait for all workers to finish - set +e - for ((worker_id=0;worker_id List[str]: sources = [x.relative_to(REPO_BASE_DIR) for x in REPO_BASE_DIR.glob("*_source/**/*.py") if 'data' not in x.parts] - return [str(x) for x in sources] + return sorted([str(x) for x in sources]) def read_metadata() -> Dict[str, Any]: @@ -87,8 +87,8 @@ def parse_args() -> Any: from argparse import ArgumentParser parser = ArgumentParser("Select files to run") parser.add_argument("--dry-run", action="store_true") - parser.add_argument("--num-shards", type=int, default=int(os.environ.get("NUM_WORKERS", 20))) - parser.add_argument("--shard-num", type=int, default=int(os.environ.get("WORKER_ID", 0))) + parser.add_argument("--num-shards", type=int, default=int(os.environ.get("NUM_WORKERS", "20"))) + parser.add_argument("--shard-num", type=int, default=int(os.environ.get("WORKER_ID", "1"))) return parser.parse_args() @@ -96,7 +96,7 @@ def main() -> None: args = parse_args() all_files = get_all_files() - files_to_run = calculate_shards(all_files, num_shards=args.num_shards)[args.shard_num] + files_to_run = calculate_shards(all_files, num_shards=args.num_shards)[args.shard_num - 1] if not args.dry_run: remove_other_files(all_files, compute_files_to_keep(files_to_run)) stripped_file_names = [Path(x).stem for x in files_to_run] From 121f71a0bd928fb018c1e409efe31f88478f1818 Mon Sep 17 00:00:00 2001 From: clee2000 <44682903+clee2000@users.noreply.github.com> Date: Mon, 5 Jun 2023 14:59:50 -0700 Subject: [PATCH 077/609] Upload docs preview (#2426) Add step to upload docs preview in manager preview at: https://docs-preview.pytorch.org/pytorch/tutorials/2426/index.html --- .github/workflows/build-tutorials.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/build-tutorials.yml b/.github/workflows/build-tutorials.yml index 222de26b9bb..c242a1897c6 100644 --- a/.github/workflows/build-tutorials.yml +++ b/.github/workflows/build-tutorials.yml @@ -177,6 +177,16 @@ jobs: docker exec -t "${container_name}" sh -c ".jenkins/build.sh" + - name: Upload docs preview + uses: seemethere/upload-artifact-s3@v5 + if: ${{ github.event_name == 'pull_request' }} + with: + retention-days: 14 + s3-bucket: doc-previews + if-no-files-found: error + path: docs + s3-prefix: pytorch/tutorials/${{ github.event.pull_request.number }} + - name: Teardown Linux uses: pytorch/test-infra/.github/actions/teardown-linux@main if: always() From 1068abed74b30919179c40e88fa81514b8a3f5e0 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Mon, 5 Jun 2023 17:13:41 -0700 Subject: [PATCH 078/609] [BE] Delete `from __future__ import` (#2429) As Python-2.7 is long past EOL, and all tutorials are living in the future :) --- advanced_source/cpp_frontend.rst | 3 --- advanced_source/neural_style_tutorial.py | 2 -- beginner_source/chatbot_tutorial.py | 5 ----- beginner_source/data_loading_tutorial.py | 1 - beginner_source/deploy_seq2seq_hybrid_frontend_tutorial.py | 5 ----- beginner_source/fgsm_tutorial.py | 1 - beginner_source/transfer_learning_tutorial.py | 2 -- intermediate_source/char_rnn_classification_tutorial.py | 1 - intermediate_source/char_rnn_generation_tutorial.py | 1 - intermediate_source/dynamic_quantization_bert_tutorial.rst | 2 -- intermediate_source/seq2seq_translation_tutorial.py | 1 - intermediate_source/spatial_transformer_tutorial.py | 1 - prototype_source/graph_mode_dynamic_bert_tutorial.rst | 2 -- prototype_source/numeric_suite_tutorial.py | 1 - 14 files changed, 28 deletions(-) diff --git a/advanced_source/cpp_frontend.rst b/advanced_source/cpp_frontend.rst index 11033951ece..901658183c7 100644 --- a/advanced_source/cpp_frontend.rst +++ b/advanced_source/cpp_frontend.rst @@ -1216,9 +1216,6 @@ tensors and display them with matplotlib: .. code-block:: python - from __future__ import print_function - from __future__ import unicode_literals - import argparse import matplotlib.pyplot as plt diff --git a/advanced_source/neural_style_tutorial.py b/advanced_source/neural_style_tutorial.py index 54085fb1e98..4c42c228448 100644 --- a/advanced_source/neural_style_tutorial.py +++ b/advanced_source/neural_style_tutorial.py @@ -47,8 +47,6 @@ # - ``torchvision.models`` (train or load pretrained models) # - ``copy`` (to deep copy the models; system package) -from __future__ import print_function - import torch import torch.nn as nn import torch.nn.functional as F diff --git a/beginner_source/chatbot_tutorial.py b/beginner_source/chatbot_tutorial.py index 02185a6ba3e..44310cc3620 100644 --- a/beginner_source/chatbot_tutorial.py +++ b/beginner_source/chatbot_tutorial.py @@ -92,11 +92,6 @@ # After that, let’s import some necessities. # -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - import torch from torch.jit import script, trace import torch.nn as nn diff --git a/beginner_source/data_loading_tutorial.py b/beginner_source/data_loading_tutorial.py index d5326f6e9a6..7ec18236b33 100644 --- a/beginner_source/data_loading_tutorial.py +++ b/beginner_source/data_loading_tutorial.py @@ -18,7 +18,6 @@ """ -from __future__ import print_function, division import os import torch import pandas as pd diff --git a/beginner_source/deploy_seq2seq_hybrid_frontend_tutorial.py b/beginner_source/deploy_seq2seq_hybrid_frontend_tutorial.py index 5e985b58598..508fa5a057a 100644 --- a/beginner_source/deploy_seq2seq_hybrid_frontend_tutorial.py +++ b/beginner_source/deploy_seq2seq_hybrid_frontend_tutorial.py @@ -101,11 +101,6 @@ # maximum length output that the model is capable of producing. # -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - import torch import torch.nn as nn import torch.nn.functional as F diff --git a/beginner_source/fgsm_tutorial.py b/beginner_source/fgsm_tutorial.py index fa23680496c..e200f09a712 100644 --- a/beginner_source/fgsm_tutorial.py +++ b/beginner_source/fgsm_tutorial.py @@ -90,7 +90,6 @@ # into the implementation. # -from __future__ import print_function import torch import torch.nn as nn import torch.nn.functional as F diff --git a/beginner_source/transfer_learning_tutorial.py b/beginner_source/transfer_learning_tutorial.py index b09efc11749..f08312522c8 100644 --- a/beginner_source/transfer_learning_tutorial.py +++ b/beginner_source/transfer_learning_tutorial.py @@ -33,8 +33,6 @@ # License: BSD # Author: Sasank Chilamkurthy -from __future__ import print_function, division - import torch import torch.nn as nn import torch.optim as optim diff --git a/intermediate_source/char_rnn_classification_tutorial.py b/intermediate_source/char_rnn_classification_tutorial.py index 0c0aa3e988b..0957b109b3a 100644 --- a/intermediate_source/char_rnn_classification_tutorial.py +++ b/intermediate_source/char_rnn_classification_tutorial.py @@ -74,7 +74,6 @@ ``{language: [names ...]}``. The generic variables "category" and "line" (for language and name in our case) are used for later extensibility. """ -from __future__ import unicode_literals, print_function, division from io import open import glob import os diff --git a/intermediate_source/char_rnn_generation_tutorial.py b/intermediate_source/char_rnn_generation_tutorial.py index d0c1c553865..5e0f6308c01 100644 --- a/intermediate_source/char_rnn_generation_tutorial.py +++ b/intermediate_source/char_rnn_generation_tutorial.py @@ -75,7 +75,6 @@ and end up with a dictionary ``{language: [names ...]}``. """ -from __future__ import unicode_literals, print_function, division from io import open import glob import os diff --git a/intermediate_source/dynamic_quantization_bert_tutorial.rst b/intermediate_source/dynamic_quantization_bert_tutorial.rst index 39cff5a22c5..6ece2a9d405 100644 --- a/intermediate_source/dynamic_quantization_bert_tutorial.rst +++ b/intermediate_source/dynamic_quantization_bert_tutorial.rst @@ -92,8 +92,6 @@ In this step we import the necessary Python modules for the tutorial. .. code:: python - from __future__ import absolute_import, division, print_function - import logging import numpy as np import os diff --git a/intermediate_source/seq2seq_translation_tutorial.py b/intermediate_source/seq2seq_translation_tutorial.py index 7953854e60a..776197fbbd1 100644 --- a/intermediate_source/seq2seq_translation_tutorial.py +++ b/intermediate_source/seq2seq_translation_tutorial.py @@ -78,7 +78,6 @@ **Requirements** """ -from __future__ import unicode_literals, print_function, division from io import open import unicodedata import string diff --git a/intermediate_source/spatial_transformer_tutorial.py b/intermediate_source/spatial_transformer_tutorial.py index b566e7e4e0b..49b6b0f0a2b 100644 --- a/intermediate_source/spatial_transformer_tutorial.py +++ b/intermediate_source/spatial_transformer_tutorial.py @@ -27,7 +27,6 @@ # License: BSD # Author: Ghassen Hamrouni -from __future__ import print_function import torch import torch.nn as nn import torch.nn.functional as F diff --git a/prototype_source/graph_mode_dynamic_bert_tutorial.rst b/prototype_source/graph_mode_dynamic_bert_tutorial.rst index 2a296ccfa6b..b7757d8a1de 100644 --- a/prototype_source/graph_mode_dynamic_bert_tutorial.rst +++ b/prototype_source/graph_mode_dynamic_bert_tutorial.rst @@ -40,8 +40,6 @@ Once all the necesessary packages are downloaded and installed we setup the code .. code:: python - from __future__ import absolute_import, division, print_function - import logging import numpy as np import os diff --git a/prototype_source/numeric_suite_tutorial.py b/prototype_source/numeric_suite_tutorial.py index 35052f4b2f4..ee486d43c3b 100644 --- a/prototype_source/numeric_suite_tutorial.py +++ b/prototype_source/numeric_suite_tutorial.py @@ -24,7 +24,6 @@ ############################################################################## -from __future__ import print_function, division, absolute_import import numpy as np import torch import torch.nn as nn From d541f746d1d60b787f389e915e96391a6c26d4f2 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Tue, 6 Jun 2023 13:20:42 +0000 Subject: [PATCH 079/609] [BE] Cleanup + set random seed - Remove unnecessary brackets - Use f-strings - Set random seed for reproducibility --- beginner_source/fgsm_tutorial.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/beginner_source/fgsm_tutorial.py b/beginner_source/fgsm_tutorial.py index e200f09a712..6071cb2fb35 100644 --- a/beginner_source/fgsm_tutorial.py +++ b/beginner_source/fgsm_tutorial.py @@ -98,13 +98,6 @@ import numpy as np import matplotlib.pyplot as plt -# NOTE: This is a hack to get around "User-agent" limitations when downloading MNIST datasets -# see, https://github.com/pytorch/vision/issues/3497 for more information -from six.moves import urllib -opener = urllib.request.build_opener() -opener.addheaders = [('User-agent', 'Mozilla/5.0')] -urllib.request.install_opener(opener) - ###################################################################### # Implementation @@ -140,6 +133,8 @@ epsilons = [0, .05, .1, .15, .2, .25, .3] pretrained_model = "data/lenet_mnist_model.pth" use_cuda=True +# Set random seed for reproducibility +torch.manual_seed(42) ###################################################################### @@ -178,18 +173,18 @@ def forward(self, x): test_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), - ])), + ])), batch_size=1, shuffle=True) # Define what device we are using print("CUDA Available: ",torch.cuda.is_available()) -device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu") +device = torch.device("cuda" if use_cuda and torch.cuda.is_available() else "cpu") # Initialize the network model = Net().to(device) # Load the pretrained model -model.load_state_dict(torch.load(pretrained_model, map_location='cpu')) +model.load_state_dict(torch.load(pretrained_model, weights_only=True, map_location='cpu')) # Set the model in evaluation mode. In this case this is for the Dropout layers model.eval() @@ -289,7 +284,7 @@ def test( model, device, test_loader, epsilon ): if final_pred.item() == target.item(): correct += 1 # Special case for saving 0 epsilon examples - if (epsilon == 0) and (len(adv_examples) < 5): + if epsilon == 0 and len(adv_examples) < 5: adv_ex = perturbed_data.squeeze().detach().cpu().numpy() adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) ) else: @@ -300,7 +295,7 @@ def test( model, device, test_loader, epsilon ): # Calculate final accuracy for this epsilon final_acc = correct/float(len(test_loader)) - print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(test_loader), final_acc)) + print(f"Epsilon: {epsilon}\tTest Accuracy = {correct} / {len(test_loader)} = {final_acc}") # Return the accuracy and an adversarial example return final_acc, adv_examples @@ -386,9 +381,9 @@ def test( model, device, test_loader, epsilon ): plt.xticks([], []) plt.yticks([], []) if j == 0: - plt.ylabel("Eps: {}".format(epsilons[i]), fontsize=14) + plt.ylabel(f"Eps: {epsilons[i]}", fontsize=14) orig,adv,ex = examples[i][j] - plt.title("{} -> {}".format(orig, adv)) + plt.title(f"{orig} -> {adv}") plt.imshow(ex, cmap="gray") plt.tight_layout() plt.show() From 1d90341f2704f2adaa8039d0b23a49bc54c256fd Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Tue, 6 Jun 2023 12:31:29 -0700 Subject: [PATCH 080/609] [BE] Simplify `ids_tensor` (#2431) Remove `global_rng` and use `torch.randint` to feel the tensor of shape `shape` with values in range `[0, vocab_size)` Co-authored-by: Svetlana Karslioglu --- .../dynamic_quantization_bert_tutorial.rst | 17 ++--------------- .../graph_mode_dynamic_bert_tutorial.rst | 17 ++--------------- 2 files changed, 4 insertions(+), 30 deletions(-) diff --git a/intermediate_source/dynamic_quantization_bert_tutorial.rst b/intermediate_source/dynamic_quantization_bert_tutorial.rst index 6ece2a9d405..dd76d08956f 100644 --- a/intermediate_source/dynamic_quantization_bert_tutorial.rst +++ b/intermediate_source/dynamic_quantization_bert_tutorial.rst @@ -253,8 +253,6 @@ model before and after the dynamic quantization. torch.manual_seed(seed) set_seed(42) - # Initialize a global random number generator - global_rng = random.Random() 2.2 Load the fine-tuned BERT model @@ -526,20 +524,9 @@ We can serialize and save the quantized model for the future use using .. code:: python - def ids_tensor(shape, vocab_size, rng=None, name=None): + def ids_tensor(shape, vocab_size): # Creates a random int32 tensor of the shape within the vocab size - if rng is None: - rng = global_rng - - total_dims = 1 - for dim in shape: - total_dims *= dim - - values = [] - for _ in range(total_dims): - values.append(rng.randint(0, vocab_size - 1)) - - return torch.tensor(data=values, dtype=torch.long, device='cpu').view(shape).contiguous() + return torch.randint(0, vocab_size, shape=shape, dtype=torch.int, device='cpu') input_ids = ids_tensor([8, 128], 2) token_type_ids = ids_tensor([8, 128], 2) diff --git a/prototype_source/graph_mode_dynamic_bert_tutorial.rst b/prototype_source/graph_mode_dynamic_bert_tutorial.rst index b7757d8a1de..5d76ddef79a 100644 --- a/prototype_source/graph_mode_dynamic_bert_tutorial.rst +++ b/prototype_source/graph_mode_dynamic_bert_tutorial.rst @@ -60,22 +60,9 @@ Once all the necesessary packages are downloaded and installed we setup the code from torch.quantization import per_channel_dynamic_qconfig from torch.quantization import quantize_dynamic_jit - global_rng = random.Random() - - def ids_tensor(shape, vocab_size, rng=None, name=None): + def ids_tensor(shape, vocab_size): # Creates a random int32 tensor of the shape within the vocab size - if rng is None: - rng = global_rng - - total_dims = 1 - for dim in shape: - total_dims *= dim - - values = [] - for _ in range(total_dims): - values.append(rng.randint(0, vocab_size - 1)) - - return torch.tensor(data=values, dtype=torch.long, device='cpu').view(shape).contiguous() + return torch.randint(0, vocab_size, shape=shape, dtype=torch.int, device='cpu') # Setup logging logger = logging.getLogger(__name__) From 2284ab2381c623291ccb4a476e3753aac5671fd9 Mon Sep 17 00:00:00 2001 From: Laith Hasanian Date: Tue, 6 Jun 2023 13:12:38 -0700 Subject: [PATCH 081/609] Update torch_compile_tutorial.py to use unused parameter (#2436) I noticed when reading through these docs that the two examples did not use the parameter 'y'. I assume it was meant to be used so I updated the code in the examples. Another possibility is that we don't need param 'y' and only need 'x'. Let me know if that is the case and I will fix this :) --- intermediate_source/torch_compile_tutorial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/intermediate_source/torch_compile_tutorial.py b/intermediate_source/torch_compile_tutorial.py index d4b8e54b9ed..b8a37cc0a7e 100644 --- a/intermediate_source/torch_compile_tutorial.py +++ b/intermediate_source/torch_compile_tutorial.py @@ -69,7 +69,7 @@ def foo(x, y): a = torch.sin(x) - b = torch.cos(x) + b = torch.cos(y) return a + b opt_foo1 = torch.compile(foo) print(opt_foo1(torch.randn(10, 10), torch.randn(10, 10))) @@ -80,7 +80,7 @@ def foo(x, y): @torch.compile def opt_foo2(x, y): a = torch.sin(x) - b = torch.cos(x) + b = torch.cos(y) return a + b print(opt_foo2(torch.randn(10, 10), torch.randn(10, 10))) From 6e0fd0a8d239018e3010466614cb993d13acb32e Mon Sep 17 00:00:00 2001 From: Pratik Hublikar <57823560+neuralninja27@users.noreply.github.com> Date: Wed, 7 Jun 2023 02:24:58 +0530 Subject: [PATCH 082/609] Update mario_rl_tutorial.py (#2381) * Update mario_rl_tutorial.py Fixes #1620 --------- Co-authored-by: Vincent Moens Co-authored-by: Svetlana Karslioglu --- intermediate_source/mario_rl_tutorial.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/intermediate_source/mario_rl_tutorial.py b/intermediate_source/mario_rl_tutorial.py index 8d02f3daf34..eb46feb2ad0 100755 --- a/intermediate_source/mario_rl_tutorial.py +++ b/intermediate_source/mario_rl_tutorial.py @@ -53,6 +53,8 @@ # Super Mario environment for OpenAI Gym import gym_super_mario_bros +from tensordict import TensorDict +from torchrl.data import TensorDictReplayBuffer, LazyMemmapStorage ###################################################################### # RL Definitions @@ -348,7 +350,7 @@ def act(self, state): class Mario(Mario): # subclassing for continuity def __init__(self, state_dim, action_dim, save_dir): super().__init__(state_dim, action_dim, save_dir) - self.memory = deque(maxlen=100000) + self.memory = TensorDictReplayBuffer(storage=LazyMemmapStorage(100000)) self.batch_size = 32 def cache(self, state, next_state, action, reward, done): @@ -373,14 +375,15 @@ def first_if_tuple(x): reward = torch.tensor([reward], device=self.device) done = torch.tensor([done], device=self.device) - self.memory.append((state, next_state, action, reward, done,)) + # self.memory.append((state, next_state, action, reward, done,)) + self.memory.add(TensorDict({"state": state, "next_state": next_state, "action": action, "reward": reward, "done": done}, batch_size=[])) def recall(self): """ Retrieve a batch of experiences from memory """ - batch = random.sample(self.memory, self.batch_size) - state, next_state, action, reward, done = map(torch.stack, zip(*batch)) + batch = self.memory.sample(self.batch_size) + state, next_state, action, reward, done = (batch.get(key) for key in ("state", "next_state", "action", "reward", "done")) return state, next_state, action.squeeze(), reward.squeeze(), done.squeeze() From 730029b88c4bbac8febd9618bbcf5af60945c89c Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Tue, 6 Jun 2023 16:05:39 -0700 Subject: [PATCH 083/609] Update rpc_ddp_tutorial.rst (#2437) Update the github username of an author --- advanced_source/rpc_ddp_tutorial.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/advanced_source/rpc_ddp_tutorial.rst b/advanced_source/rpc_ddp_tutorial.rst index 747c12f6d4f..5c7aeffb2f9 100644 --- a/advanced_source/rpc_ddp_tutorial.rst +++ b/advanced_source/rpc_ddp_tutorial.rst @@ -1,6 +1,6 @@ Combining Distributed DataParallel with Distributed RPC Framework ================================================================= -**Authors**: `Pritam Damania `_ and `Yi Wang `_ +**Authors**: `Pritam Damania `_ and `Yi Wang `_ .. note:: |edit| View and edit this tutorial in `github `__. From eaa2e901bf28add46763429f34055d901057a905 Mon Sep 17 00:00:00 2001 From: clee2000 <44682903+clee2000@users.noreply.github.com> Date: Wed, 7 Jun 2023 10:27:29 -0700 Subject: [PATCH 084/609] Set random seed (#2438) To make tutorial builds predictable, but still keep randomness when one rans it on Collab. Also, reset default_device after every tutorial runCo-authored-by: Nikita Shulga Co-authored-by: Nikita Shulga --- conf.py | 9 ++++++++- recipes_source/recipes/changing_default_device.py | 3 --- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/conf.py b/conf.py index eaa25a956c6..5f88045adb3 100644 --- a/conf.py +++ b/conf.py @@ -34,6 +34,7 @@ import pytorch_sphinx_theme import torch import glob +import random import shutil from custom_directives import IncludeDirective, GalleryItemDirective, CustomGalleryItemDirective, CustomCalloutItemDirective, CustomCardItemDirective import distutils.file_util @@ -85,6 +86,11 @@ # -- Sphinx-gallery configuration -------------------------------------------- +def reset_seeds(gallery_conf, fname): + torch.manual_seed(42) + torch.set_default_device(None) + random.seed(10) + sphinx_gallery_conf = { 'examples_dirs': ['beginner_source', 'intermediate_source', 'advanced_source', 'recipes_source', 'prototype_source'], @@ -94,7 +100,8 @@ 'backreferences_dir': None, 'first_notebook_cell': ("# For tips on running notebooks in Google Colab, see\n" "# https://pytorch.org/tutorials/beginner/colab\n" - "%matplotlib inline") + "%matplotlib inline"), + 'reset_modules': (reset_seeds) } if os.getenv('GALLERY_PATTERN'): diff --git a/recipes_source/recipes/changing_default_device.py b/recipes_source/recipes/changing_default_device.py index f5e50b3f0be..103560fd743 100644 --- a/recipes_source/recipes/changing_default_device.py +++ b/recipes_source/recipes/changing_default_device.py @@ -43,9 +43,6 @@ print(mod.weight.device) print(mod(torch.randn(128, 20)).device) -# And then globally return it back to CPU -torch.set_default_device('cpu') - ################################################################ # This function imposes a slight performance cost on every Python # call to the torch API (not just factory functions). If this From d9938ee19e585d81e4d0f1b30596f621a5113b91 Mon Sep 17 00:00:00 2001 From: Boadi Samson Date: Thu, 8 Jun 2023 00:20:16 +0200 Subject: [PATCH 085/609] created original copy of the model by loading from disk (#2406) * created original copy of the model by loading from disk * Update fx_graph_mode_ptq_dynamic.py --------- Co-authored-by: Svetlana Karslioglu --- prototype_source/fx_graph_mode_ptq_dynamic.py | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/prototype_source/fx_graph_mode_ptq_dynamic.py b/prototype_source/fx_graph_mode_ptq_dynamic.py index eda88ff5c01..98ece5f3d31 100644 --- a/prototype_source/fx_graph_mode_ptq_dynamic.py +++ b/prototype_source/fx_graph_mode_ptq_dynamic.py @@ -239,9 +239,27 @@ def evaluate(model_, data_source): .set_object_type(nn.LSTM, default_dynamic_qconfig) .set_object_type(nn.Linear, default_dynamic_qconfig) ) -# Deepcopying the original model because quantization api changes the model inplace and we want +# Load model to create the original model because quantization api changes the model inplace and we want # to keep the original model for future comparison -model_to_quantize = copy.deepcopy(model) + + +model_to_quantize = LSTMModel( + ntoken = ntokens, + ninp = 512, + nhid = 256, + nlayers = 5, +) + +model_to_quantize.load_state_dict( + torch.load( + model_data_filepath + 'word_language_model_quantize.pth', + map_location=torch.device('cpu') + ) + ) + +model_to_quantize.eval() + + prepared_model = prepare_fx(model_to_quantize, qconfig_mapping, example_inputs) print("prepared model:", prepared_model) quantized_model = convert_fx(prepared_model) @@ -289,4 +307,4 @@ def time_model_evaluation(model, test_data): # 3. Conclusion # ------------- # This tutorial introduces the api for post training dynamic quantization in FX Graph Mode, -# which dynamically quantizes the same modules as Eager Mode Quantization. \ No newline at end of file +# which dynamically quantizes the same modules as Eager Mode Quantization. From fc7494da87419c78c57aa0f9e778a581872e3387 Mon Sep 17 00:00:00 2001 From: William Wen Date: Thu, 8 Jun 2023 07:32:50 -0700 Subject: [PATCH 086/609] Typo fix to torch_compile_tutorial.py (#2446) "evaluating and training ResNet-18 on random data" --> "evaluating and training a ``torchvision`` model on random data", since speedups are no longer demonstrated on resnet18. --- intermediate_source/torch_compile_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intermediate_source/torch_compile_tutorial.py b/intermediate_source/torch_compile_tutorial.py index b8a37cc0a7e..40a53c263ed 100644 --- a/intermediate_source/torch_compile_tutorial.py +++ b/intermediate_source/torch_compile_tutorial.py @@ -105,7 +105,7 @@ def forward(self, x): # # Let's now demonstrate that using ``torch.compile`` can speed # up real models. We will compare standard eager mode and -# ``torch.compile`` by evaluating and training ResNet-18 on random data. +# ``torch.compile`` by evaluating and training a ``torchvision`` model on random data. # # Before we start, we need to define some utility functions. From 3b6d83b2130903be87693a4813f0be70990710a0 Mon Sep 17 00:00:00 2001 From: BJ Hargrave Date: Thu, 8 Jun 2023 11:55:04 -0400 Subject: [PATCH 087/609] Change paper reference to a paper matching the model used (#2424) Fixes https://github.com/pytorch/tutorials/issues/1642 Signed-off-by: BJ Hargrave Co-authored-by: sekyondaMeta <127536312+sekyondaMeta@users.noreply.github.com> --- intermediate_source/seq2seq_translation_tutorial.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/intermediate_source/seq2seq_translation_tutorial.py b/intermediate_source/seq2seq_translation_tutorial.py index 776197fbbd1..ea583821f85 100644 --- a/intermediate_source/seq2seq_translation_tutorial.py +++ b/intermediate_source/seq2seq_translation_tutorial.py @@ -45,7 +45,7 @@ :alt: To improve upon this model we'll use an `attention -mechanism `__, which lets the decoder +mechanism `__, which lets the decoder learn to focus over a specific range of the input sequence. **Recommended Reading:** @@ -66,8 +66,8 @@ Statistical Machine Translation `__ - `Sequence to Sequence Learning with Neural Networks `__ -- `Neural Machine Translation by Jointly Learning to Align and - Translate `__ +- `Effective Approaches to Attention-based Neural Machine + Translation `__ - `A Neural Conversational Model `__ You will also find the previous tutorials on From 1fe4025f750d8042e5245fe9cbc50627ca2d6abf Mon Sep 17 00:00:00 2001 From: BJ Hargrave Date: Thu, 8 Jun 2023 12:29:49 -0400 Subject: [PATCH 088/609] Set the random seed for reproducibility of the output (#2428) We also fix the code to use the scripted_cell just created. Fixes https://github.com/pytorch/tutorials/issues/1449 Signed-off-by: BJ Hargrave --- beginner_source/Intro_to_TorchScript_tutorial.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/beginner_source/Intro_to_TorchScript_tutorial.py b/beginner_source/Intro_to_TorchScript_tutorial.py index d369c4fbf80..21ee32ff384 100644 --- a/beginner_source/Intro_to_TorchScript_tutorial.py +++ b/beginner_source/Intro_to_TorchScript_tutorial.py @@ -33,6 +33,7 @@ import torch # This is all you need to use both PyTorch and TorchScript! print(torch.__version__) +torch.manual_seed(191009) # set the seed for reproducibility ###################################################################### @@ -308,7 +309,7 @@ def forward(self, x, h): # New inputs x, h = torch.rand(3, 4), torch.rand(3, 4) -traced_cell(x, h) +print(scripted_cell(x, h)) ###################################################################### From 2bdd8460fd912024cdecb9ddd2a2b4b4c1473e50 Mon Sep 17 00:00:00 2001 From: Degao Chu Date: Fri, 9 Jun 2023 23:55:15 +0800 Subject: [PATCH 089/609] Update example link in FSDP_adavnced_tutorial.rst (#2448) Co-authored-by: Svetlana Karslioglu --- intermediate_source/FSDP_adavnced_tutorial.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/intermediate_source/FSDP_adavnced_tutorial.rst b/intermediate_source/FSDP_adavnced_tutorial.rst index cce90e8787e..748c8593306 100644 --- a/intermediate_source/FSDP_adavnced_tutorial.rst +++ b/intermediate_source/FSDP_adavnced_tutorial.rst @@ -75,7 +75,7 @@ highlight different available features in FSDP that are helpful for training large scale model above 3B parameters. Also, we cover specific features for Transformer based models. The code for this tutorial is available in `Pytorch Examples -`__. +`__. *Setup* @@ -97,13 +97,13 @@ Please create a `data` folder, download the WikiHow dataset from `wikihowAll.csv `wikihowSep.cs `__, and place them in the `data` folder. We will use the wikihow dataset from `summarization_dataset -`__. +`__. Next, we add the following code snippets to a Python script “T5_training.py”. .. note:: The full source code for this tutorial is available in `PyTorch examples - `__. + `__. 1.3 Import necessary packages: From 203f567555b1c4cc86d03fd36f7b7ae4c3405262 Mon Sep 17 00:00:00 2001 From: NM512 <70328564+NM512@users.noreply.github.com> Date: Sat, 10 Jun 2023 01:32:35 +0900 Subject: [PATCH 090/609] Update transformer_tutorial.py (#2451) Co-authored-by: NM512 Co-authored-by: Svetlana Karslioglu --- beginner_source/transformer_tutorial.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py index a3fc3ab16eb..2f87117752f 100644 --- a/beginner_source/transformer_tutorial.py +++ b/beginner_source/transformer_tutorial.py @@ -2,7 +2,7 @@ Language Modeling with ``nn.Transformer`` and torchtext =============================================================== -This is a tutorial on training a sequence-to-sequence model that uses the +This is a tutorial on training a model to predict the next word in a sequence using the `nn.Transformer `__ module. The PyTorch 1.2 release includes a standard transformer module based on the @@ -29,7 +29,9 @@ ###################################################################### # In this tutorial, we train a ``nn.TransformerEncoder`` model on a -# language modeling task. The language modeling task is to assign a +# language modeling task. Please note that this tutorial does not cover +# the training of `nn.TransformerDecoder `__, as depicted in +# the right half of the diagram above. The language modeling task is to assign a # probability for the likelihood of a given word (or a sequence of words) # to follow a sequence of words. A sequence of tokens are passed to the embedding # layer first, followed by a positional encoding layer to account for the order @@ -130,6 +132,7 @@ def forward(self, x: Tensor) -> Tensor: # .. code-block:: bash # # %%bash +# pip install portalocker # pip install torchdata # # The vocab object is built based on the train dataset and is used to numericalize From a5376f73f619b7ab3d1a29a7b39bcffa3b91fd0f Mon Sep 17 00:00:00 2001 From: Qasim Khan Date: Fri, 9 Jun 2023 22:45:43 +0500 Subject: [PATCH 091/609] Fix Attention in seq2seq_translation_tutorial AttnDecoderRNN (#2452) * replace old decoder diagram with new one * remove 1 from encoder1 and decoder1 * fix attention in AttnDecoderRNN * Fix formatting going over max character count --------- Co-authored-by: Svetlana Karslioglu --- .../attention-decoder-network.png | Bin 36999 -> 44701 bytes .../seq2seq_translation_tutorial.py | 38 +++++++++--------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/_static/img/seq-seq-images/attention-decoder-network.png b/_static/img/seq-seq-images/attention-decoder-network.png index 243f87c6e972bcb712cfc1d3fa250bc725f8de31..d31d42a5af19b96af1163fbdc6fcd0e5d4a9d79f 100755 GIT binary patch literal 44701 zcmeEv1zc2F`~MgiAYynEf-)P4CxH(~4FseFK%iItSerT+z~__ZVt1@Spwm_8 ze}`(JDY_t#t}jej6$Y`>H!{@&QLqT@y`x}eG=y5gC|GV$Ff;3zo73wW=@?k*K&h5Kk%=DgpQzqF6Dax=Awx?j1Sk<pc*V`-A)w40W2iyiT1&c5RqX2Lc`Y(&ne%AsY z%yfX;-0gB%pttCl_F6F7?_!9Jjy0>Ynw6aCEhc@DTP$jNccg#W+CiHTJ5fs=b3-Yp zuAV8-u&&)+vrMe4oO_Mx+V7RHaqeHm!1CZaUlk}AIqcsMFsQrFX<%)nt7o;}+FlV1 z3N?ipng6`-9@NZC@814+@3*L9X^9@>-3Qz+(ubPvKiz(TIq={=tAQ?1xTjil1&Rz|5YzpmdvgYTp}nsEl_%XVvVvM$0_$aWy8P+`?AL?oSQ-GJ z^zZBKuEgISzMlqBR1d%kn58{1kd}I;Ixr)fpWmp?{scDoQV+fSzh#htI{S0vx9`|$MFjvp`|E$vl--e#l+AYO3y;~n(JgAcdRSo6~-G|r$ueTQ*p-kl)7 z-P!+t1OF{;A6!Ae(ozRKs}3fRIn>Au2AI5`n}faDzx|4yAhPJQfz}Ry{Q#f$VV#ME zZLheU5#2+rR(+uzeQ5{EO^$^tTMnZgs#^9xUbGK4-u7?$Z3d zn%}m9_W{gC$JBZsWGI-~O#vtq)Q9eluD;Gb0J2+HqfL;YqK+XH0uZNz&%g}$Y55-% zqEDu?+BMy1mchho4$Kl@0UjI&-Wi}9Gq=>U0UH|W>gt&tTo>rRy-V-a*qcK?U4i-M z@Na8q1k+P6-@8d$fN%hfeg*Xar8E_SnnEoB2%unQ5fR}O0<1BMpcTv#YNGdRN6^>) zs+@86&+fhNb1azN!GzztHFKSx4CH^h+7DVpF9h^u4wlftXWO56#uqkW_!Y-s*+1`K zVSj(iz`VaWzZ$r|z_R}UmH`+(^a%bgBLF;xZ-ZSl6n=q+AD$8HKmPZ^6C*QoYrrP$ zA=rO0k{kfXZve@Gx%kKJAQB%fo-2*{Ivu4l_B8x zUOvLNj{Ke;^4oL{y`BNy@*kRlZ)yAB3je1*0_!hv=}-0%e%U@7`#v%HlY9^smVLou zA6Ngw_Wx!+!u~}5UN&9N>>i*US%RU~Fu=G2?&|-nJL@<22?tB<2e{r@f|?2?Ip9wzKR{`bNJ zT}!Arz!m-qM7Y5zB6x%I*AU^$k?)BJ2NYnRtuh~c{t6KeK;p+n1orQ(CIWKi_X#xZ z`y0OtCI3Z%CLpilYmuD&An^YC^anlvfByH^;!Fqa?XQ~uu>HT8kGDV1z87Ze?eaJv zsJ~C+?pgxxb|XXwCBW$Z*M^0DPVATuF#9VQ|9cR-gD};1(*nO6jPI-1zYF6`yYZ8M z9wpp={_h1Z`uG0AAoCYz@Lz(=UygiFQpj|$3J+pKzXB(me+xg6<-azxuoo=)hA8tt z6;BR2;5!1!zAXJ$@RKjN@na$i^Dh`P)2|@P-z05?Wj}x6uVpBIqV%-4;J*bKcB4A~ zJf?Bb0pAN5bacNsG5?n?%NKp=Um)Sjk?%<|m=AK^52UAqh}Bm<%Ry}R$CjR0zDoki zSNRLy&1Yfy)&z#VMBINMlzivFH%!OO@UNg3Ul8MOf+YuH$KMW14nXj)Wfxyz$#=_z zVq)FnIKMA|?25bpKrGqsfbT|2OpOf8Aiz`r+W>pwfPDDBAYT51XWKI$s2m3zd|jk1k{!80dXOU^#G6zG4~& zTE`C#2!DGW<6HCUnEs)Obxivm@Vx**=L=!5x(Brc_S^rDFfcRzJoA9{V8`oM@bGsb z4EwI_U+andnS|k6bL^P@p-FaoxCeae$9LMfYX#MZL3Hf?3Rw8j^@(2uR9Sz8H5^1l zesI?C->Y?gYo6Wz{bnt+$NAUi@$bRp_k$B17|aZ8ySt_Re^i5+nfZWP?0X_?2cN&D z8V6-RHmCq!p0%5<`PWj7A3^jy(2jmr)c7w--eo!v+4d*)e@Axh!A$sGWDWQ#G~jDX z{-^zZ`LZJBgB=A2)9`@9eT}aNqxOU2>wd1@UyHAQqARzDe&3L?!nR9K|9PtK%lE=}154`zNBpndIq`!F`oGL|WOLDBzEc@@)GK>R*cy zU$N-#wE=DqGQPn&Fz-s5|2*q(&;j2ITTGz_;9rP+e=WxR;PT%W#ebhlavadcuQ29s zlKs55gub5$wD%1!-^Y^u;K?e#v}7EAUrY9P4?P`xfBEmrI=i0PKhGi^bintrNdL=K z0lX-dUu4*`9q`z%%+B8tVh7ab2QoUl->>j*eidQ= zYqG!k{fVzZc=zbRFJ9~&*gpaN10;JtlmH#RWdr>*2XOv^#x{;1N^VMMYPd<8z26vsxwYtu`jG8L^ zNIM+2Na;GR@XO#MiCz+Ita(?BB;~If@R`8D`g&YNQez9@FHqC=%KDwo4QeKx1}+uL z^|z~Ux*r04igk_?1%XP1#14TCGfe zQu(e;=0&UyySZyL^ju^xt%*(0waW-H)m2as7y~C2>ipJpvPeIWHvZ5}_r~Lq_+0#; zpe*+Xilue6ycN!Qcd8S0l6Cxuc9wS}{A{Ch+nkH#QpFRK8=x;V=bv_8nQoBY-fmv2 z^F_$aJ7+j21SBPp9x2#ao=8$jcIMVFs-?S^kvPm9Ek)l2m&xs2%^pTDF4xW3)flO9 zxQy9;;BtsRdouItOigV;6P%LI^;K};Fv8`7?|amJ?h93A$yfQZxKKTLr|3)y*J`YI zM0%3iM^96hm#*HJ?jO$t_i`=ugPkh6>-Zmxd#Xo=Bu+m}vZ#k42K%p=Iab4mMVuJ0M=-&bdp2}PVa=LMD#A|m zIq!UjdB5ehXXayDQJF$A8%?Oq9Bz(!)Dw|wZ~fnF-mBsm944wX>T{`ZNbF7`(Z?sn zN4nbHZ}tMWMKRwqO&o6VD~N~4K8KWWMJW4oMfuTlg_k@qyJw}zDUD!vwYU4gCf%2N zEQd(hYk0Iy^n*12b>d{JTlJ5@OHN4;JXDX1xwIX*HG;pDPr6_rf96zew5vhkb?mO~ z&TU)#g;&HlEu7X@Ac2jqS`y5X8@Wl^TKrmq2y<3s!XoYMR(pn~6`FIMsECv9m0z{s zdvf9WWQc9!MitJRo@>kLC8lPDWK0(QUhS3VD?R9s&p_GZTTC0Kbt()uU$s?^i?Y6m z41d1T#qeN@fJn2niTj#U<|T3LuvI4W*mxt2iMRgsAC6S&Oy_(1V`gG&>gNpLulVcd z4%79}Yo5Vl_Vsyui$+s5O3XB#o^#S&4v}=pX55$R=}^|%l)XMB2{E6HvlL(3N=5rK zo~VH0t*V(}*oM?>stmE7P1E9_WBtte8QU4xNGzJjCorCx5~+Ccg2*tHTG-`Uu`HP^ z6`0wF7v-oTjteE{W}kKCKR^sqo#-pCdTWO-KWr^FgqYkhh7ylP8eo- z29cobv!TgQBvj_q^-%9m_rXye9_>GZ`jUS)$+|`GTE6j7iDE)gm3HnLTvD2L5nsa6d(mh&*ucpb7gMjs=@4?rvpr0&TXWsPb!oNP>K@7$ zK)mp)p={jIsZGCiCaZ^*!eK_;fjNDJ(5wPmC9V0!%K_b-BPgORJKbWb$pnNni6lvz z9UkJ=DHR7|9Oq9h$I1FQ>T{>gIMPl%Z52=RKA*NBT^1cH!vwx_mM=8yEs+c*EI;kI zgjfIPVrsaf^rQqHVUk5EN3c?2>=r|2@x4LUFp}J$1$*O(!8HHgS6$Ay zEvy_{*Sd>mGhPhdE46s&*k@*6UDp@dVdC@PrPhvN^axv4nm!vGffF`9&fq5LWOpHBEdxUn|hjx%lPOU2ejLbooF;7FSL2iD_PIl-FQ&_Y4EPDw6UF z3C@dm?1_QJkz6|=O*sB=E?--eCk$sHIoenMcC#Hsa*Fp#-UJMhH0`Z6W7N}rB3RRKlE0&ag;7I4@8v#S;Y`}wxa8fY~-0R2IhKiWLU3U#R??;#?h7A^5(!ak;6^S+3 zc($vEMl^_0LcaT!IHsU@onue}=9RzzMLDk^)T2uYPmg=z)_6Q6Jlo+60zuCLzi0OE zUwn6V5_kf{!#D2A)n(B|S~`}A`7g%g&b!Kk+yVsuh?!MVB=6|#)X&Sy>+J6*#Xgx@ zk(6|ez<=oVD^GJ$ZC2ssK4)HD-p-F74Xv%Mm6eqANJvOJdwWf{wzfJE2op_BO=U2c zOiHs?VvGAj-K6SWyNKhXi6a#yGL$C3AM?A@x)*rUz*RS1JU(7~uF~GE1-GvtW_ko- zY@A(QUT*mCjx;U(Cofw-TDbIDksCfoH?xUXJ zP$O>BYK&E5)K2@mnLGmWzfHoTDbE``1dikB>hlv!!Ekxjwvs@}*5W zh8TRZ!C4zJ-5KBL%aAzCE_a0l)o#V-vMCb46^$4`a!xsq`HdUSqEwCNt`@8KCa>f*R1ksggW_axeBsc2=a zqT!TxdWHS``8Qs)F77kyRhZnL>MJm7x?Db0ev*nmua{tLv<vde z7^%t6H?WOt<0l}l7!b|!t3l56MV&@$B|pem3rO}@ewZ(r%LiG!-vz-6iLZ4wXvC@s z#tb%F;om6!WIahRY28W6!C%N|7;O@5_;F{x!Ft7PHs3Jw*)pGXxRPSm>BfRZTmF-N zl4;LYW>x(b4aei@RcR;;OSQta6gD?$4XQ{`uv*<1ekbsgTZ_wO3#(5TN8zHGq?Wyo zQKcP8T7^6WSu`R_Y!byN(~b>4tj=%;(kZ8A``BU59!KX~$fF8Ws^+|;2zkfOde~bZ z>ue=b^IOeXN7S4rsNaZx;=51vymwVx0q0fmd3?2omhq!o8|~6knRf8+DNNXH_yNFs# zzA5+7MvuB9l{mK@^E0tXUlqV=Z~0qOUOy3qBf6o!p?PSiF%(wITdv`3Cz~oVeH#_k z!DS5>K>n0t?zm;-Daj8s)r zL~?(2Wbd<2nG#Ssugp%uH{|Gi8R1!3pK?`+Wx6zG^)n&fL!{YGoh`PdZPi_JP(O0Z&)fp68LgK0>P zkYLR~0cU}~;~b4T40_pmXG)|PQ4vqFdq)zrw@~Io*teHHZY*VgS~Kr;;^IR(Qwmgh zrLgsh!LBby`O2hQ(-_J(36&ieMiGcQWXZt7WmPYF1<`95`y@Ypls>F9U6wXkE2{9M ziI2?{KR4IHy1LbTJ@L)3*mNsr-k61TgRAB%P77ui^5-(J!F2wuY;6bME@r}WTQAHJ z@iT4N{WINamhcVl`0(EKgek3(m2p1nbyyg0>V|FfRPmcF?jDQAm9>XY(<)&WT@^s{pvF-~0T+S#o8RJxVLPn5y2%+t=?A=vEv?jkV5 zolmF#_x1yo=%57?$>d%whE~9gLN-|#Krsntev?f6f@k@06^XIInIj8sxmx+5z3fQ^Gg-PN^Xd1XaSMJ2SOt4mY?pQF44 zXY?UJNP;(QQPgf~>gpaZv$c(1QDEXS+;viR|N7r117 z%VrkOvcyY>KZ9d0y>TIxYDNAL^PZlbjQsr09t0wuoP4>fz5VF0RjlkZhSSrwS)_vY z=G-EG?v2IqkMq&Id<021?bdukXG-%IO*f_ykZz26)0*Q3N4J6lX+5-?Ut*Qp-Y0Pw ziglzDBG0j@R#P|cI&8vMdgDZB34Jl`M3zYg5i?wHBC6BTbjN4|(XA4}OCak&Z?>sJ zNp|5j*#JAF-fq6nIZlvneyuOnwHxo9kFe%|s;@}~weYt$C0ovo(zZmtYNKmjHXiYI?`LLHr)k$q) zMdOw%0TpF$GS8*w{@@4n?GE<^>y5-xZzTjgOHU$_DpkZyPAh$n9SBcLq9F{xWpGYZ z9|&1iH|+x3Vuz%-`N&V|MLSm(_>nFug;Ar=t}_yoistQO-?Z_7sFhGpsKh@EhR}M9 zSH0MJ(JfGQg6-j%K1sDQC+p@ya?QNKo{dhmt{Qr7<9<1TVUY$mB%=7Loy*7z(H=Pi zQoJBDjp_0dh9X4c4sy){oBVCwoZ~PK^yn6NeLnP<`tdZc{v#^9XLv%7`16^+OpGq< zbYp-!QBUlAR3alXZJ?ZA!{^Wa7^Obns|~nvVQL%Hh4P`?7jlubWW@|i)eJ5z$iiX2 zQg7kdPApxaYh8J98*8iW`XhYg36ErAo5ls-joD*9d0;Emdp{`-Ml}6s_(^dPKtNcu-z#=9Yni(wuLKQfQ-aiy2q+o!Y0*7*Ja-o$>ySU zOvdsHC%r1WE+4{3XuhLZWcadV?Q{5p&C9t_CeMj>;u$un2+z0T(}Ep)^E`%!!3n3( z?Y)buCoTC9g1kcrTlaasQDNR9bGVv7XTu!HY3>;7hb{g`V^?^*eq zArs?TxVQaWDn4xSbwBnmHS}#Gf^;V93*UR7wm-AIKj64MJ)A9pdZu9=xk19#k|i~* zlHAtkC@)^WRm|!ZdxH8B7A~u?bSBxFDWdg)Vpr-E&g-{lP)RD$n$BK?at#<0 znZ(D6(@>=51|ePJ6pQ7lB@^jDLclFtXlLtuq4a#&Y#MvLL-z8ve_@+}OaXR9pLTes z875G4R}WG5sc2%mqX*B#V=(UAMfe72Tc%EZGg{F_cX1~H_d1fV{h0x&Q8GPG2WiO+ z)5du8rj=~51c__=3(;<1QXkuqk&ybRnX{yDA>wWz(KU5zXSN+muYhOot$m*^;F1TE zd2Jf>3z8?uQoYdVN&!48yYv*nG&sD$*u(^0R>n(sPTo~YN{W#bB+KTq5YuqJ!TPXX>r1{p&j*p(88 zC>#_kyh9~!9s&spcbN;`Z*Oncb#``cIg>LrWjy`)^YzrHaaV)Mf)d;|sULfv$BYXJ zxsaZ5szOV|e<~Rl1C*Fd6&_Q2U}@F_#4+DRXH$v$&`O<5qykZC3U4%((nztfvZ~tJ z+KPMQQlxIiT*VlX0j9F9Sf0{RoOd3bEjZ_ZNA|>h%I~T8>QX`-X)XRJRp4S&mhfSa z-Q%af^->@HRBgbBeRgGzi70bT?_*+HUj63`J$;$-3)lq7&qA<4??``I1X7#{A^Zl) z7Dm}tN`{#dGby>0EOe!@Sp0MYo98s)e@(dY@6vn?<+=NrWrR{xSUe5@XYYm zRleDF+6K})WWEnm)*F3g`Z8NWu%9IT%8f3@1ZEh(nls6XeoojU5j?}kGq720+<8Lj#wYdF> zI4AJ*=?#VPvc>MV!%jIhVT@FHbbvED-1ewariyhdKk^C#_Z25waCb|=3dg@E8~FS; zJOV!Xh`fblFrJM)eDLNu-`)(D+irz#o}6i`RE99E&{6Koabk zdy0;h1f!*b(%o>(HlW_7Mv03h~=hJauJp^2bZZb51mbPjN#Jf=6`uZL7Y3ktHG&J7I zGkW0?0&2CfJ^>%hr{$Li0Fhj!B<>J(uZ{7YNyAcPVCt=4<6ONX!-?{3d>funZeICE zhebjx#3r}>kZ0-cu;o%j5K`-|%Vb*=t0Wx5RIAld5Y-ArYW8giT1I#mU9jts?vrVC zrna4(4xtQ$Z@^q{qat$Y;cZ2e9D@rYYV3?!nXD8+ac(VADaA4!4#vwDDx%{V&9QNq zV=p)&%b!+OE0Or+(n}>DyJ!~Sojy>+>X5miN(fdVQJ%kHtQdQyovSY!Pr^8YsVDpX zyk#P#gxu?iYwbr#F2$o=0MQOl%2z-mRiST-l8&QFBim4^nH5x+Qge_(o zPV`uw*Z|B1Uab7~)TQzZ)co1=d1CZ2jpak1KUrK>3|Jo=cO|$!-FzO$*IPdn2mCnU zvB_rFtH;l9vd9?_uK^2FJ^dn@_a2VM^37(tG!0zE4v3w;hbCZH1S+P7I&3pX66yQw zCi^;xu1gLtrOeT=uLq3XGrO(vJdOdP%+X8f5*#-~LA_Ne;8ea^YNlqLI;`=2Xr<%5 zb$U?JAR6vUz-&+3XP!W7fnlGVee|FbtYFSxLm16KQg#gzo?qQkJl+@88@1{nU%+A= z-8)}0SU|qaW~5bHMp{5WAgSYt%Xp~Pb;WK2FydIniH;#X)i_$FY#Uu4Pe${TPpH^- z-wW;5#P8E1aXszIu7zvNA`^atQ|mq+PF$^-M&q zKnIuBjiMZ*aFW~dCqnU&<&4Ay)9Z6xg~RyEu<4%Q6RFdDgl3`MMiCM$S8kZ-uM+Ic zaBYe7KdG6+$IsPh$!?!iA-ga@R-Osl7>TrPgL-JSt=W1nmKd?knrFR71zab`(7m%b z*kDsL#1hAsgCd>RGO>dS$#PlZ8fS9J4C~de)4hpLDbC7&#cr3$s34yHQ9@xI&W_;` zAKhUObsmblQCTm0a&#Vhuv)nSgCin+oPr?}nS6f;-*s!r&m}kaS_=se#il(8#Gg3e z1O~>l&cvsj_9B)MV>c*TvLNOa{2V>SHFaD0eOchs8+|4n>$naK6ns??_#2591(^e8 zm_paeWtIv*c?>QpVrpZ&Lg-7S+dpuWD!-E8Ml73Bm{!_}%I}e+1wX0@!dIdwqM_W< z3g39uGHBYPlOIdFS^ZhViab`{3n5}1y>W+C;B?AToO4|3Gp%X$lB#ndg0lnX2hZ)4 z@f1|B`kDR1Aj92RxUs5M`&76z$>2ri{kr71q!vt7Id_{2C_M9$^6CrO*9eD!Iy9v-`zGWbTS!pA}66Vf(Sr^b%z zc}NE4xD^F$-}yG@g!W^XdQN)p;<94&$ zqF;8VucnP$m9T)-JWj;UeR5R7X{Vk0dUyIzqk-Q2fn4jewaR_Alf84V#um)W;bleX z*AzK7jpgjl+9Fan1Z&A~r5F!u&f$?3KS*5Dm@aK0=58<@JgGxalEx=De2h*871x_j z#n`a`-F_uhJ1KW4*o5Or)Ml1aFfiD|*$SfAd`p|KbHAvh5ePJc{R5 zjfdPFJ4$tFc}C9ee82*U(>Pb}B#U-98*6`Zyjd;jJ6ZfHB$pAFf?7r{pbkeu{(Mk; zK#*V^8-~j8lFxnz}KPBf{wK#7e2K+h$h}2w)mqUb-hrKs1C%7H9TWS%7Xlq}Z?o(3H#DkL; zH1IunH+TmyKdKLDAHk2nhteujP_0W-iJ+D<5?^;i(S> z)`g?~^&xavq*mm>{TW?OkT^k(@uHuUm--M9zNn0BK2X;}=6Ay*|0Z}sB>?5M^qm2s z5s(eu$EIx*c=>wonex%minY%RG2qCk`h%bfDiz@|$tt~QWH+^WvmC$`ABR2R|L$>T z+t=5~oHNVTpe!DPQ&6o<&za4>NP#mqwxB!)4WT@c<5~p`fFu_NDI0x4zBsGqD8ott zuyS14INa;O$BEDKM6)iRN$0kR?I3oRZmy7lyPP+Ey`68R-pr6kR@h8KL#ZyTCUABLeB`jEOo=FZ~Rl5qhDNddM0`*`1<`*~Swto%v;jFsPA{TK7lD#!1 zCnsOk)QF^~r{|=l(M(TFyc`?TpPrp9=o|?)=Pe0>Jj;lyMP`o@S50F@N65YkO{umPXDzi9XI)~pk5&ly$_dNX{V62~ zq+JQP+sC7v8a_WKYUs2TIr8YJ0M-EDY@d#Rz3g5Dvv9{z6M`;X0XWm?uFckrp;lcE zjZ`6u6#D{7N%`G;q7WA?j+rL#DM@f@y>()E){cs$R&aZJkqbmc!-;#Cq8K0rqs~YJ zz;A#kTdjSRbWh#5g`iE%y3@pVo7I;ns+g3n$(k^0MnY5BX(z#tXxb*8^Njp!&UU46 zKmqT5jjdmjiSwZEC5;oYAAQ_nXt<`G24{3JV3i3kPEN2n8b)@v;ZuZ`40+AT;S!_M zbV?~*2R@^-QhK5&*{?EuC@y@g-i6c1r0(R(e92oqRKitl1;}U_yhnYK#+2RmL0bu= zJ51H3xNDPvG7N&a*l0DL<1?;8SunfF{oq)Au>W(wk>Ye2J2aSGzWi(?JBiJ6LsP)I zT1cqNs=L>mmkJx*KXhagT7{G( zn1X6bd;Id-^iT3t^>RRcsjyHDGbV5?H&))v+< z(G6-6Y+GMS@A#MUhLcrQ5tBmcr{#K2P-{Pd-B(fyE_prGwjLlyDE00$>IJ;taHV(b zB^Y15FZ4Y|-}%-p61J#^;CASG!$%4H;(~Zbo{}1Z6kx`7IFa$wXh@gNKTyLp9!PyA z~PH4}cTUeTgDC#Cm|=tKO!*wUkC@GAhWsWJ8HCQ?JcD z4yHIqH!&wr1cVikh?Mr&n*xV`0*p5WF-J({l@v=IB>i`mTbuQV_*!jh)tQrQSY>bW z$p*ONOO#zr)Rt$94s%aWlTp$^?s&?AfV7Oufv6kg7z{xUVsBVH6$Nw`_>zlpelH1t zZd(RZ>yzq*(;K8~l{)~wOJI^?faWfI`%BP^{CxIwI6QASPJ?Ty`Qd?#D7 z($}v(L`OxDnwXmA78i4+r=?~4`JFZ~Gt1xJcA1`;$;-^7PtVHoPdb+UU>13Vkp@E+ z$i6tEj7ilJzs?rn<}O1e{sxP>mYC|v&+TsIm4uraNA_*gTzqx@O&}t)swsJ9XQG@jGXDY!Uf8 zxmzAaY+3Hov5H-)_@sSWvu~Gyy=KL z9^^5`yk71SBA8EK$s0|e!zoR9QCFIHTF2H(Z3sGoDP-E~z1sy-5RTE6?3U@g0;BQ@x@jXyasMIawPFG1p0gGi_{V zPgm06=k||+wfH{XWz{qI{>TE2?7Voj>sn)tTeBamg|4 zZYScixtXJ$51~@@UG-PAJHiZmI_g!9qkLMu#HO|q=jD~+u&<8!ONbdO4yqlk0|GC{ z5bU$r_Cje}ySMHS1brN3@YytQdyg19Lrt0;WKgN?ZUS_Z?H{=L8uJd{PHcT%liWW* zNDGHHj*oq;AYIOSvq{4ixz3P$Y*~%_^mGhNwm9v?+)i@#}O>>Tk-9{)tb?>iR+dDY!S`aJYaf zb<8b5lQut$$rpPSgTegzqU67b^H&p|vH%|F6Cd}Keb>f?H!a%D9lDQTk3*BFgvrSO z0!_f85_V2PZVr!v8vUFEKc|b%(qeD0`pN3ibn%fiZPy9#AH$-Xoz^qV+$Jx&1G9vW zxSaxgj{9`3jixPS`X#7aN6w*z`88eV=Ng9oGjx)$n-_NvCZ|Os|x5)(m6N;_As~i4Yy`WlvGxP2LV~*p%j! zP82WL91QVVxFvSTURDK?k0jDR!ts2jn2*?|D($7>LR(W99S=EiWCFmyfbVx=)Y1wNxljYAYvE_m|*hu)bPgz&WrkMj&CiuA@VJdof8~wmiS@ z^C#*SH_x?N`8*(a_zp`YaV#xcu{x9i^O+I0euc}8QzyKJ*J?czlcl&T&71Ay2#Snr zp5-gOS?Nky3dY7{#%O&%fYbFNbHp3nnKzQOSKD_^N0+2ADb0@tN-$w2kN2NAGC6lk zeDdAc%aMeD4sxG>ka&odl3Av@LuPmRm7wR@0*RRrmGB1c%cn$;;sSs>QL8xhbefOL zvO|?V^=*7WTN#7reF}%oE$|12^$+?L40nbICGqY~IcJ*6`ADEX;f?_@liBcCCt%+B zC2BP*aj9n0IMEm5#M5x}g<2?PU!3Jle>UL~h~$O?8QF2+_HNx|{inD&K9}CBG#bKY z&MgDdf~R~6H#AT@0Ml-^VnY&O!L#Bn7s)90C<>iIIkfe~3)%yc39-Tz&!#46_JU4~ zoq9@@6c)1?8?Wd<-(3 zb>T(dn(0DRQ2xfMN9DXBI}HN|D^=k+!Luo3EJ|3*7if`QOZqc_X`1HLq6&Y z7(mtt(C@|E9`6LY<8y|1Rh5gD&;9W<+CLyI>6syWi{3Oj;G3Xkpsw5w6h*P4$?6P! ztK5+$!$3QF$QDtuy!-d>$9>K$o=Tfg8VTtPG~0p=Ps1{O@xPr+?lRAq6x06F8xHve z&uV-{`8w~v0we=mUw}w*4?kh~{DGw@*C5LN8lmwUE9^E-DVDQqs>f?G@7@HbC?+g< zc?p-UJsecMn?YZ(X<0H~Lj)OcM&(f}Bpt2FhYpxQE4K0h0SlZnomIg{z`lE~_fvE) z63_~ z2Pjnpd&KkH@L7gQF?tD3;Fd&0#W8QT_=LhNl_~*=ijixVnw}d@3}|dmRqz-`~FaPfxP+S+t?CK}wCRVX;#W)v~jHrKu;XXlCQ7BIK69<0n? zPipnt5YW1w{PBmzjA^|)jSB<(eZG1&KIMiPh8XL%4 zi87SHJx7iabCpV!28Z*q9iPz@xw7q$VET>qh#1#swXJLIZd;J?{1~t?zj>^ZqICYF zG{SJ!QEhZ49r@zU47`E;rGag;f91c`jnW9j9!B9uK6->nJJcY0s`ean8LxxvTZ?*K7>RyHRS2XOQJ>1fjnG`DDH0CukV62zt3XYy54r$O4F$f(4Y`@wX-!V4nq`l zK*FzUJ2(@#g^O*~i;V^i`I-=RbD`Bj+Ox#Ps?n~=-8hZnr@LVDk7pn<;T^z!tS;xO z&y*&WYhz?4Hp`_i+X^4xd$u)!%~s(4o>lEqCX6LIN|p|qBi4gm%()rBjw`x|98@-; zN#$sw)o0e68*L4prKH$mss(IL5%xC=xBQ`Er8U!m2K}a2nmPgYe~Xg2 zaU}tytBYuSo4k1_CpE9d?IAsq29G$P(5}{~xWMKC1$V0JJ(OT9<{u`!{*W|ui#>z11m&e$&jB<< zYx{;wYD#z$*qSsiI^1;Tw{&hM z&`VZ{#Lsjwx3@MaE&Jqy;V69<$|4d>{Jc#`=Z2{$=PSKrlUhv+YlEd>(p5_56kgv= zd34`%!oKCkOm3TLN7~z3!jZn-BC8p|Ui+?J%Z!Vw%(kboGOpSS)cVR}ayif?! zD{w}3-r1f44!`UmzuvpUytT~$Y&11qac--vRi?P3c!D~JNHEPaNKPyme)%*t@*4vK z)%=T>uK?-2ZcZ^*y_k$lOmcd9)J&jIe{auru!6$*CuBGSp~qk21SK$D`<5L%yj)+Z z7r!n*Yx^;vz4XapY|z0W{o+p?zmShuFb@GoW9F#Dbw0pip}Pis&i2$&%J%IeuTi6i zsCXpg2}13RD2Nhq(G_0;6*V-B^jZ_d?@%YMmh*tTfUMu)q}Dq8idEr~VL$HdbBD_r zaBdEsk=MJs-t0;ycnocSK<|jsHvUk!)MpMLTD4-#dK6Ui0Jx7C z$JNRF4II=lY%0KYCi5AaM9d|WmE_P)%x*7Z3k+j|M$y$$-`v`o4DAlzVP`SC+n+Oa z4S{;d68~z_h}0UY!Nm4FdHvIk-O9;#V{SuLQY3jr!mKGHi}WTugmU@zxyDF zkuYte&Qqqtx%$x*-=&>z;0e_`Xf+ZC9jE-`EKE=!_aHwVXG_y@FVFdq+bmK@;F}16 zn@vKKf`DNo)DS$u>HzNy}wOk2`CICp|*r;?m9p7C&YUiQUru{7;z}olbUE z%?xNQme;)Fh#Yv`sI|L0ugzt3jSQW3g1J7tn3|jW*9zbdp2o9IP1rpSyHUQc4v;@I$>^X_9n#`D<#uj04Js8LXcDJ4= zGVJR)7W|~9?YWSEO>>5a`?#9FyD2iRNWfL|o@QL3sQ^{o#m7wq*ViWVh;0^h1Xhim z2{S<*<*Qo^E@VMty_O){3*<^QxPt6Y3u9!YG@9=z26q<4w&iCG+%4HIrY-&CALuSh zp3o{)>J=p@Fc@R~)`;jpoYl%RgwY+-Tux#E)LnO)3iYNkYcazkRM&wBZv5l2v<1aV zIVuexXRUMIQgtgt26I@Q-`QlYS4ln1!JJbw%UZYzgi8oq-n~^YL*lmLkZfMK1ed@` zTDjJ#83{i!2KexYct{=b25{b)k~Sga~Y0ZHrjUOodSET?QE3nK$5rXQ{Qm1 zkrtJVBHwrutA!FiqPo@R%nI_V+7ZVqy1gx9d|7eI?$gxq&+aQ&h8m2gfN;_~Qo*!$ zqg)A!VX}CUCiI!|Pmn69T#VaIg{i&#^`~udQi1?|XyuH&Fg+OsHAdh@{;@3~IM($9 zfR%L%iQyGFsi1UVfCS0=%@PPHsP&J#YRcW7it6g&z)oT{M@L82s|nkH`>)Nj@L>3m z_Vv24l8tYYii>LIL5vsl6arc?vhwoS3OT3TQvmH3Tvc>wOl4~RjNfrLMZIOZiDD4@ zFGxrc)df)Pk=KTGX@j2hPG9XC(l~?V0o((t9?DUEUS`(Gn1KgqsJ z#wa&#(#mysNprzu;>pyA9AdnW7Pe4%RIFM}+s5h=?ND4+pj+D zXIaU{KiAyPNGsuOmBqh3mtL|6o%+uF<{CBc#C6Flk}YFaN;tgK z-Z{Z4qkgkwr@|#$;lLX&p6$-QYyP_yxHP@Z{DtfH4Zy{X^Sj?QXZ-0BU&P1>TsG)1 z>D%SKowXwQi7VTLchxT!@ZK9;rw;<)G=Y11y0gw?1&bfmkQlK)*u4*U%J?veqo93WI=E zlGA89qAlnJ9&I|zUO<14#Rs}ay1UQ5m$yTs#RFTRDE9PPsCq}RjlA~6iOU4H#sksg zcH)GQC+8QX!3;bCZ8= z!7Ua)p0_s^V9rMP>V;#CK{i1a9~%(GzI$K0%7RBnwPyp@;oJ|sXoC<59Fz_2jEEyru5m&c^HA}{j4Q>{1uyhd^A^Rp{4aIHI1zDa_g$KyE4i|Uip8I?wW z-|^xHfj*{81M@m9OxG)+)swugk}AD={YwHKHnzM3GR!?w(=#!BweSQ+_U2&<8Ms1Y z#WQ(5=n`&hSM}*r92@vtbQtDTD5G5Hcy_3{%kz-7)Q|Hkc|{(iC{=Hhzgt9pAXS!t z&_6)6a>ALhSPtjQ68zs~p+Mv*wZ)HfW316i;AuG)n4e%)h?z>#RYf+y%mzce^ohWw7fv5ynaWv@XGrbIns?Gvf-y+(+Z(?rS zoaaV7q>Cd}ZiQ7{=Gi{6V##0P)CMXa3GDu^o&RxWotd^~HR)@Vha!1ER@XspB0uWA z^kS_p&T%<;V)Gt_@R&0FOhMA-9DW>Dr(9pu=9#Vb(!BTC8z*_xNn`8qm_<~0Zk)fQ zCp5Z4^j4aYMfmPw5*)oZ;Dd_`#nY)hxT2K7rq;ru+Sg!shuRCBmP9)eiApxI$nrk0 zdBof=SH*8Stcmer@Ju7S>T>R@XYv70(spyYB!A3OomHzh>93}!%lPxh=JN=~i0GY* zip1Yck%$dOQ1V`AO*MG=F)eP67_0a60+HIbMH0@|v6^%En`3ir(HMkfa&ZXW3SLdP zO;C0h4lzi7M?6(Q0wjLcTS0VQcieVIDdQpi80Q&YU&fLu>`v*Gz%lK%%j`GvIfkY! z9!j?&5Fl4zTPYpp?L6Svx4 zx7eR4AAO%B=6@0obbcPcK7p>4ewbgo8HDqCDrCEoElPT{DHlf#>=lkrIa zV-FQ$m)gH;#;A=NPTozr_jdFL)N`cdw%(tFm2z-#TP!e3It;&;1^2xK$a7+0QH;rR%jiAQ_J^SOkePD_2 zr6|+znj}S;CZbcAK6yXiDLXO9C7xwfWcVZLVv{Occ6KDIp5XDs{%-_-7;#*21YFeK zQPGXJO;rt-T9_(r>QVf;490N0;^Bfx)zCc047%tzP3`c{mBy+GDtu}sPQUHoU;Z*N zz4hZ9yiA^OTgZ~FFGp=Bza6~g=8yL|j4+QLR01CX@R6oWLK`$u$y$wl~H@!{Z50?xC9VSifqQ#ldgy=QG8u zw_wk%G_!~2$dkr1zvS@AkdVX-R9>KJ*cr*`%g0)-A8Dv*ZRE(wI;G!c<|;-Xz~yxpPexg{rpt0<@8wxO=C{2gP!!_=7jRlJ>@=)wsnV+ zO8!kC6RkwjaT*fiJFJfb<)t-;jRh^N*cxyHTZd0ZtM{Pyr|7JB6ecxp{OXSh+C?WV zn%|^P>pUB8OD3KrriQ)u&1X>cIAs-;wdT~1s7ju`<{U;}i*AH_dYcA*z_;HNaC)b` zCb4`Bal^O((J-P+Nxs~`&wod3n_N3FYGiva?Jq9-`ZGhc8WFuCbQp z{b9g9&(nk<$)zY8wGu+;%cZ8zZ@~#hQz_ZHl}kg}rMJgU{E?_P39DyjgekK#0madZ z&`B$wpy8K4l%?EK{vI<(dG_7-1ltZ+|V_x&Ko>U8EuH-~r>nQ|wCHgL?4R3nf zG_SrTHkb0{QsQULc6Sp@eFC1bq_^Wk`L?i3&x&aok2L5v(|S*rwPZJ^YW}@_r-1E~ zfEykS8YqosE{cCJg^DD#iS2{E&fk}2CSqGe2Mlo{h>lZul6-t__R_S;P2Zj7$ zUJSn}&ZxMcX#-Pc2Y%GGCikGE1y%EiYB1jeQu*9*h0>U99=U)!7Z1a5c=pJC2dqq9 z&zKDhKHso6)2xSrV`o^G`%Ss&7k14^o@0To?OC(ni~D6Wzl-L?wKKW(2(G zIK!MA@~kwq6X|T(sD-&R@}F3<@k-Vi;W&p9T5>A#m;$*46t>s zq?q15$8I6g?bJYcztHG6k*9V^ojFKAMB?l{MPNBcIFnEAXB`tD$j@!`Yng&dC+~27 zMOvSxc85zZJQa(lJe+WXR}Wc-UDtb42_9j`(M?ZmJ8b#8QM)7E2OZNcdB#}GrL_i3 z^EO^&Eqe@2laZLHK27^hc)ATGB=GVK*uHmRpK|#a=ZQ5W33(LeXB@h?Oqt#0LGRro zCH|H;n0y~7gUSh)@u__-NjpYhf;5qE_HD%Bx0-Dp^C*c?LOmWqx|gF2!Drk8oZl~E zCh6K=w@W5UdVJKhBj_`gA1o0OAdTJ$KA3BehPg2jmh!RC|<4`yX8@&3|DnV{k_K&cu)%$>-( zfId&*5k^6pM1MmM+-buz}nNkXSz zFq`6L0EV=5upgJhypD9I9z2RODIwWx6p#91n`LG{2?GQ}P6@+FU-vtSH(WAd#wq@} z9u!zz^YZ~o#!;I^^*-&iRA&6c0KsVKrdVrA4nq`zFWj^F;uP0LCl)U!O$`13GalTbTkB)A>5^;sGM&dmZ< zfuT#vcEm-oPQ`s%-Yn>?d-EmtlppgfXyUF(hrn-q<&LJ@`lvtO|5t=d~mc! zp0;Imz4K@3-za=5&Rf@xLVRc;Dhki-_!FM`tCX7?*Z721T`%QeRW`UdQu<@c_D%lQ z!CJt9k)LPdH1l8x0yFdDkti&J?1UQ#j#R8JoURV!x!mNY-s790*VibPuD_|-74)1wYIhQSmJyb=tA!~ zH8}Hq*02Nl?~{`WHn;mrw7yEz+ANK>c)I`5s$~wJB@hMJ^BZXBYjPf!{uTlTL9A0a z4XedEEG{lC30X8-*dQwW=BvZGx1yq=8Tdl4cmEebZF#VYiV7VQlc<-M*9AKoh>4D_ zEZ8p~GwVN&J<1DHLW^R7!$$9PG+%v}j1X|1M#A?!pPm5ixwL1}X_!p5kaeJnN@UVP zD4BX4-ggyLm%zvw7IHwXxj%~@K+ekgR;R(y*8GyPHc+iH8? z8wP-9V-i8n#B^;Br4)B5FEzSg=vIQYbDZ19wo7!*y~J@wDE0YSwLOek+SaQM4z1jN z6ovv)1e|DFwUJ;R8#_!g);xpE4}u^ITx-#B;v0t)T<;0$+J{vtq}S{;BY;+mfmhSC zS!%$!29@va2a*aM`N4J0DKadJ`)o_QBM{3bOM#bd9g4p$VA@e@jKYbU@6Cs9XRND| zvx)A4$%ZmFQ{UIrTKgeSvNb757l%+_a=~d!f+wLQu>1P@u!Q}xWl9=XLv6VERGK27 z@?>51C;JMGv}h$=3F(VdDfBVT8r%81CBEgU4;$cr_HgZV*bY7J=n%4+ErVv0E(rJ` zP;FP_e{GFcI{K!_(uSl?HP2z@IRD$)ZAR)JVe6CrHQia+-&n)}J_o&i6UKW#H@khK z!4?dQtf5+|N&{uq^U=dSmV5CDuu+39dc`q6qvVc0zPHr9JrT5*Vww;sXn7{fixV4& z27&4#8O{9G!5d@WzoNNh=dAc$oKEp#Ez7r2gt@JCIjJ(*U>V9N4l}6jjvMtO3`l`9 z?Z@$=h1*o57)=?DX3HtFDl=d*f5)a&{+S#j4h~NPvPFP`a+RTCm60wvw2qk~S;!my z2C^<4ul&EwzZo%X1LJGduAwk?O>Jakr2ZLeih1$U(M$t4D#(&SbwP}Em{F{16!kS( zT>1q%cGK}KCgoglY%;IA2p)RoC%mlCB%OgM7~((D?B|Grx$}Z8M*t(UhdcL;WD
    {R%Fl)`kgZPHc3NDA32U*N=woo*|1Jg+aZ@?w;{ts<_BMa|2?jNRFam z;-Knaw1%6;Xml2KCIudLqo|@96KKORYPZI(!&UxMg-mE>gEW>Yj5x(?C2+|@2Q%f= z_89}}MF*EnorM7=4Y>D!@$d#+#E@J!U-h?h`L`{p%YzvVdV2cIEKAp4AP6@$rvX%3 zm+_&|OEXk{yz#1bw8sMWYPy53gBXnP=G49e2NcE-G|dgXT;iBl`61QI`WyX%If}vl z=-iC38(8#MPD1B?3PFvV^6?W-)xV9<{3K|X{V)lMFfTtZr5~MtpHR~Lt`vj%>)=~D z4Lb>b%&E8QZho?h-3cvjgO3?uxEmgUi;Uw|?cYDe1;eI4H2*vs{oH2n?*nmMjW8C# z)xfPQN4>R+_b&PV2gHd8eP_C&{gt{d^&2cNtt??Vh6_iof#s~V6@5Zj3oE=24Q00bKpb}^n@A=tdYr; z)((^>H3@KOW*CMWd8W0JXqH89Jr&Wh)Vtk)*6j}rvjgxD^8Vt;wgupcbw;>PzvjmcK>)Q1Q8y2fJ=fUte#DI`y=G6B}C1 z4D9g%OIGL%^9{V3lLHFo@Xyek5h{|a28_t2>{>Z+0tM`7suQE(TtG@T=sO@J2uXm|u zgH!8jKY8g#!c@;AfA)>^KywViF%!ccmLSNv^&xk&etS6(Vy+25uHCrvW^HEG`{ZuI zSC9G2o6AnZHhXH3!boyx_l9+dIQz2wfi~``o5*>|61Zn#rTHh)wgX`}AKOkAa&V!>Ml=7CS8-U83XkD#`*@ z*ghEl?%$@!>bk_~c-)-07r4vPusT9tsVAX_krQb!Xf@eCoi&vj?Y04Fh9ffcAtp!5EulIUWwTz?&haVl!I7aCgHSyQ- za^9oeqp3=LpA>RdCCIx?KN!+P&GVn52@p;3`TKBID34w;!{AD-Tz673Dt;j9PZk{> zcRr7PZ-WmtAnzR#O0lX8M^vg|N26SIvlUe_RHK-)XjHm^I zcJ+?yd5C1uHb85v5e}nosdtHB9hN?eh zz{ihqW75QpQ#xdC4tN)lxWgRm;XOOBt)FM(99}%mhGcLiAegzp9E{+IiZ$$hXxLn= z!Byx}erf#0VVhQc6#SIHPRmj9cA$IgF~=6>Mh4adf)O_ZwGzU=zK`KkY!Ld@bctY% zkyfsLE1W91z$O{Vj^@pltzKxWfQgWOHxAE%tU=ay*ODasuG719+IpKH%ml|(4rKGO zfUAC&HUaxTKi=9H1isv`d?AT}#XZmIV>#ttNrv}y9514SkrO481UCVhpCpv`(Ue>k zLMJT$qqidvr8w1AJd$Yd=bNrpyp<6Ao!uQx-AKkWTBnmL32;hhlJx_~ zgIw$&qwmM?b}m(E_6LpXPfZ06kEIj#E$Z(p4`g%Ji=?4qK&Fy`l$TwM^&cU>2xzt8 zI7PkB*1LYOnSWUvT3$=wN;o634$4C6H>Y%E9?zag)%sObg$A9RdWc?WO@DVxg9S&4 z+?}C1L8SN|9rux)HnSt8aR$B70>*Ad5eoNy3)$@{-HeS`OvWUMtKecll6wtvi3HmJj)V^xbuoYs3@VzUdHaF82C7T9u zs#az=q!8PG|NayguTD83)Svm3Pxc04*LX%8R?sK7;8| zicpPmj4OHs!0}hmSW3C7UXjt-QDD^m!*9WBw6r7e`zxZ#0J#!Fi^ss<_!kxyep{5G zd-*}m#f8uJ{jv>V z#A+IOHLsRuC2f$%q`IzLVY%R{f4cdgYO7bYC)d3i&Cjn zdH*3}Tr4{xjk3Qv4k?t*wV)w+Dh>c*j2mdmy#hdbm`F;LWcb@-P;K);atq3EXyKnY zS?~WX5f3Jk3WXyn<8a5fGLw@>N-xrgGHV5h;VLy8ktBF=YAQl#1zyc@VkhgrXC-0+ z&l+t%Qmp_W-@R6yXt!*VMrvGqS3BU%+be3Z~NWP^pWJXGpB9DBPZm7fS0rIw%8l!g2 zEOZ^@v?1nJUKvV>wL4v$qQZQZjsYeziF-hUerjqeZH#etb&|Q|=~`>R`)iaz>OEO! zh4#CB)y|KNG#7;frNXp#YUAZ_)K1j0-C-&MVqXxDV~`EvQ@680RUqh|pYS1mLJ#F? z0ZMFpWjllUg8m3@IhxibJ%bHN`SITNxcgIwDG>)0$6b3VleqAGQeb;q5R-rc9Fbwr z`hg_p6I|tHWUh(9eP|3dzIQ(8bSwSBii=C zs9j}5n`mnr-mRSk#vKLon9oRl3KcA0O8-ixUXO86ur4KXRp*a@3%Wr^x1Ms*l z0>l}_Xklr|=e2 z@}@$Kvc9@@GF;+w>+cNK!T@?Ng~)-euru6MW4++Wk_CQLXh}U|+HPV)#NlDE zJEcbCXt>xw{4BP-%KLsea-Ep-*rkWuTjI0oqYJzV-$#^%PgcTFYkYQ_?BW!8E|E?r z{VZzhjTo(eKO>$PcA2NolRTt1@L%)Tw+2-uO^3G*w>1PF{ESZwMB0&U3pS_xb6R-L z(Byg*#(6&@PU(`aeK0@mq;+;py3%d#k{ZN9@&co!YTNEt3nruILh4zxlM!zCo&l=E zf$lK_L+Cq`gmu9!S`fnN0%t6*cPw}MNY7s9E5$c*m&6OW@;dj}QYg}dZ}e|j)p-3o z>I3r`W1G7^PMi)ctgNQ>-DG&Ub9FugXUNUYsQzVop5bPU3kE)wRjI{Wh~c|NA1G2d;#(=cLg}QS}K-UFi2eq6->s{ z)rCtcy`;%eRgvbqUq-tq7}C)M?L?lUQ~ANhKtzsR*=cNUAQbw=}K^agN8Yh5pL zIqdsb6V+!JoK;t2Oj+9V4aGEeniA(bZYz5>o*I`C|JuH+IV&9eIxIe4tdN5rp@a$iMy1>cTAGh9uEDlx?==*=7jMts5Yw3d;TcW!ezT1Hnm&k> zi>R^YP-t)d%b%oLgul7ymBAg6WheFzns8s5{>--gUI`TtB45R;Aluiy(U_G7R|DsA zMJ;D}>4A$O9B0E0ncqa=6E#=PSqe6f^Od>T*218pO0v%*b));IeGAVN0po2G3U$ zN_NqS*n2WLBZp%?`^}AvbXUkH`qJdq+PD?XomU9+Used8O;zfB9di$BOOmc6Nexud zE9sb>==s!ele)0n9vMTdPHp+A#dRHJfV5o_9Lt;@Ap7pq*L%JRriqF3kp!*E+}%St zNohfg?}a&Tc7T{p%00aDFyS6CK0MebGB!?O#WKIV+mQC(Nri4x3o|?a?PhljHZU-7 zHBAu^AmGGTfAgmWR16uro(3S6=Q(2p5p$t7-`J9r#@h1ZL*hF%l>3fi1!E`XZc-!c zO(QIOe+xS!PAX5Yc0Y4a6zII^hI~RWeZG=;=W+Vg|18`42~_%ckJIFKkzzK}LM)cf zNx<*5*@JeI{xDj9dn5YNUE1)3yf?SUd9Yl0+8HR%7I5M+?&W%>#$s^yXRFTPDiv+! zN&w659gL)bi&KE9+unH2G@W}Vh&6Gk*r(?A;ybkx5lKlT8kt`|-#-(qUziPLGkK+p zI*C@Zvwy18GMU9xQ&Y{hQo?Vzv^Z?NJx^jeOSgRSZ^eFrhn!2Tz*h$ftjYTy(3+v6 zT*kg<&U2rDh`xxGnXj;`u;&*QSZ)#`4gP(BoK$jh-k7j)9RDqe zbmfkUYrrlX7^aN9K8D?Oqy6)}a8XW ztVIO+vLc|w)}4og*0=q5T#myxIW9B;;!O!2J*}WdP(K2-Yah zqP-2&%OOxQGj&_No_HTVf1AD;b^c)x^{8YXE`!?lw_AB7rrOlO9(KSiQG;TJ%x{Cn?o@WHiLzvs^^^=HX*c>o{~0P94zz$$@b zrlT{PWhpF(Z%1eN78ceKIZB^OUJFZ4pKxQ0iogHsV>O;avmjP57R-KSFxKdr$wZ~8 z{N&_hM%`?!r*qwn1@FJ*WCa4tsrqSc+~6?+Kcv_oQ5p}mZ~ZDp;s8Q+7L4V>n=LRI zL};L(>OVjYa8YU$+A1Ld{PBs2v1&`TDP|bEF(qKIU{YXiXMbDfAY2j7J(0w-EjlYz z(~JrP`M`VlAPB^o=6x-1?BSu67P!(0jP1uujnUE3SU2acIZz&{M-5H~1cjA$YeEOEllh9%=xu1wKoz8eK?CKR zj6nrnpzqdZymYi0I_T0bsbw+i5VV@av#)BM<@W&uBvil-Sd-tz`mZ7YrEU9_>ZydF z!cc{6Zf*v3N?=o&yjY_7{u>Vb8f9|I`!P>&nM8WS`=>gbMK1`YDpZgo+ ziUGvoM=d=MTgm=%*;m;8{e2=`%^U2flh3r03i*l(m9HISzG$u>`cy4huCf6>LbB`LKcXs$k-Ot^;Li=_XNGxRk#5Y@ zWFh`OF@e(a$+lzJ^#O#a+|E=@&kn0w=FEcF5{WVBPO|1t$lqln2CuG$w$D+7F#7NcUHzshBAp}5!BED_vwvWxypm1N)*X5rgGz;Q`QNF`XK=2dc*_)Jk0tDZnq zwWqr$nWdzq39s3|X`EHpy~4dO|E}9jGUk8X9f;PRw$s!WZQBACT2}@QY!OvuW8)kY z=~7Wu00VLWC%{^V>c$_s;m9v5fu(1KmcClKuz@)(meHKw6=+mOGNOrFfY+%GbS~}TEc)MQBwgbt&w%U!P!gee+ zpcZiMiE9(9GmaniM0mRVW%YOT_}770*Tw9%iSZ<*1Y4Nrb9%eGfi^FAfl$ZYJU%3% zr{F!aC<2&87MiTa&!5hYT5((HcwBl*D89 zX;e&3Dq9|4$Yob^bfLJ#3R9G2v&_?Lc1;Q7Ij;8m`?S}$UD?6{?QDUI6sZT71HK=| z?I&o@x()PQ*iYA@WH@=QTD{2w?3+rzm_{ZzP3rspZh!%754HpDm-dw$!tne45t!rC3DoHUFRv~GF-_VPYzx9{ zl)(?|VautaN|+HH35$_Ic83mLq&$Nz&H}a>4v6~4+&toq!AXdQJ*)jzTFUb|v%^h3pSv8jTKUIg09Ae# zs16!>ig9sNio9Rq$e`U1ezx9$wxYtQ?idDtR#rx4z-J*cVYsnHSeKmgfkic}Qj7a` z8o4^?%yV$5s57Za=(B%k)07Hi6)*;nD8pO*KJ03HeTy6;Ij@Y!Wz~^UjYUj;;0XD2 zF8lP5D-?JUso<2wb+mLMGk>i&#b8Bsxv&SCe341jm0{9z0G$@w&dtr0j5B+E9Q&1G zlSEGW?MxQ?2x{1Ucu-l&`Y&dXS4cz*n7`GXbazUUXjceRDF7-6WWGo>lVBA$r?CF8 z+7SNYzK~a}?Fx0e=%K%jkt@UDtq)oHbe-8DJQdOs!@l72U;@1r^CFFc$~C`tQ2WYD z=T9pgzg52}wfc{6D=RB!2qW@qMF?l&(PqDxjeq_y8xiWLR>vAZ(%+^0!f|na1JaL)*5_$9D%GvYlk!zV!Y| z+tz(X!yze@gY-XiY6vNu^v3Cu7c|MN_N));X|4|lM`1(fQT(JjaYir8>dwoTPGqiD zrHsNO|0@u$O8(yc+R75;LeT%Qsj672Af5wf*lcPbYIinwuYidl&^x54Q@mV~`zb9; z1@5g}aWU3g>?i#808Md?_$U#WjR0iRM+GMXSKnVPes?-B7Yhg4dR|qyRMB;9k`!UM zEZx5)W3}sT=-=S6B?cvDCJXl%f7OQ6+HZ&{t4Z7LP<+iJq0iEE_12MJa$EePx)0;$hZP3O%(}G&vKs=c&Wk3 zVTyvU$FBfh3HT5NS|@I24AFPK3&7@Oo)6A;fi5ys($|%EPZPh4;%Ae3b5$8a1;Sqm z%!k$T{z!Kbr{l%Bab^QCa4G`eWqXHd;?#R>{=g{`vD^>UzU!2_9sU_#g#01HTW^#W z+hvZDbtz_&4)exYq6~&}wNL(ir=hxz1+I2r5UTb>%6gT2S-TOS zQSAY}zP_Gw)d!X_fF5yc9tD7psYe_f~6$u;6Xffg$#3t zy~Z8iyq~US=m31#@^FlD?AC>|fA*7Er$HTsq+m%|wSRy`8QT|5SfHetz%# z)Xd)0R=<_wGo@2tO2R?T-h~@Q zuoX9B(gALNN%VUN1KSa!W)*0~`vnkecYVtZd%A7Tzcz0vuo!UTuQ+=)Xnd>#(T@a5?SafnZDjK+^#rb0qq zzNfGSBoWRIqC{mBJxs$eMXp^BZKZ9?9fDS^jrHX$+`QoU^q{7D;=BeAz)fJ`?P0WTt+QN%>}TlX;E zTx-XsD$Xnh-W0i5y}YCU$K4kojYYYtiQJEV;H;kWJI=W)F-!f=~zpc}E8Nu9VU^S989rF1w@%>lpvcFfx*&11Ct@{DGTzIR5Cq%dSLCzx0Q6^{=v|bww7iM zC9e*e;-(6~flYv57znFb*`(QPRxN@%kZA^sbcSI{|Rc$#aydGEaj>=Z3B*AxPjMB)IaD z#O*vFh`X#AAQ3$1V+T>w7P*{=_hrf2+OOx$yT{~uT&&e_*5vHau(0CVN4N4w&Ad)v zJ5rp@^O{Al!kSbgq#-s-vV01Gs#RMV9Cfnl;uEmv;%9 z#y#g!B3>*Ev~1o)%mdWo;LT_wHZ&qRa4ckW2dgPwkJv&kewGLX$rak5O1&FC>dZY_ z&ak521_S+2IQpLzE7a{`7&}{pgEzw0Xd;}yZA|1r`G0!@)$~dA0}l@&&}O7?locVc ze`GHXNRD+Jf{rMo8qP2)U(g?~4ObDz(COZ7JxG%OEWvN3y)WQ4+cqI z9>DRoc0w*#khTUBw^FgcJ#Gr=KX0{>3vpB7#cyNn&XlHCqk(P)Dmd(|p3%`%;2=MM zw7Hn4%A%g9v6X+?Yiv8UY1{kj<7xEc{ESZ)OdRuf#6mHI9C=x=BZDRe>%Bk2WN2+bkiZ8DHnU|}0{CX&sx53i-|Ja< z^+4J;v5UxQMXk-5JOHbnrW!*Pss|lrnRfx=O?@*u$aVq4x>S?nX7RWRay*zuGdDL^ zZ`FkY1OV+IftV)JIjrfO9O*<&mL_=-9M4n#G4FKIP*jwL->}qK=5TL=Xa{;;h~Lbu zw=FB;Fgjb6$R1(vdKp-qhpI3yM%0v*VZNo6o4nEKM814~vG%Zz%c2`D8B3#a5{hwA z6A+S{Eb7PRteSMg=|zt5E^?`0H-qraBgT%s%t9WP`1_X zoOv!0ezLOjHm*WIdKFBB*lSg%kwDnk*!nleOAYNAZwZ74fCE_y{8cyy3&*#YY_!}^ z-16Z(i()QV8i*3oYn06Cm~v||GfoU9a%HD~quWl&SZ=7iR)P5nO%`#=(PnoS%Wkxl zT)@l&KshxEM^#M1m;-2ZY{&_hnz~{y0hkvIoMR&fBHNo4Rz)T)-y)!0+{fzsu5qSw zyW3NIeEhTlAIpr&!M4}GV=qWs2E4_P z8lbKuIJXzS8lAM1876eJwQ}JD-7cam>Sb-3Y~YPQDTBjBBJIeu)YKZebleRp05}B{ zxwT91__-SabaGamb-fMq?w-lD$W|O9?^w7dQZ0o*EUB#GVxk1F@c+maF;mJ8Rt9}& zOW61qPyXT^!i6myswym?ph|Vp3AD?NGQ+O1i86Rp0aUUuz-K%N&Q}cObm>( zDx_O-&Be>BWoLhH6In5glFPw24S1Ny>%(Rl?CIX>T}e7%2d=bNMQ~!j%d8me?-$b0 z&=8u^cy~Yo+_X5Ms>oz+V`JlQ5}le_+?XMTbF>=bAQooX5q<^$k#(OOi_g9D5_WSpSm_So6 zndQRZ~YqeSA3^bnPJx)2t@v0<)0)7ThnBpJ8SAbIem8<_d7`D=$4MS}(X$Ez#wS;LF&RQ%dD3U> zg|HgK-8JD4SBXkv%v`+G_Ct^Cx0~(q3Jtl~O9+mC$rFkEo;MyiGRoFl-@QMG)|jpN zO86c1#Fw&^>kU`4!=DO|vrgRnqT;`mYM;ZFAFh^*JSx*uBAVLWW}7Tn!Oe}mg)3E- zdE|g$Ve)8<;GA&SIm_B(aA>PJy3|j* zVIYq^vC(r(6Ae3m-TA9P`?D(~3qOMr(ScKJGI_aB+LbT7cAuzre(>=@hS&Mou=glE zEQCKT7;pmrnJV`OQ@#6X#!e-N?;j!#e#r50vvZs7Xq*cpeQtQ6zH*d1>~xtHIK zn(@BjUVfTr{5oYWeun<&w68bTBJ57|ZI66KlH zPYyPWsI&_@v*E0t%FlH#q~%49xHJ%c=p{s_9-i7(e-4}Pf00RDdj-OX^qTHP4X=`+|J+;P0C$Zi`NFi}^^*tvMN?03TdcFF5nQM2yR#-1+twsxg(G>1s1)x-It69*WxCbGxb zK4o3G$Fme05|nscNm13x*MK5Eqfg%j7e$qgYd_C@WAv~jzmGf$TR#aa{s?(Fy-Uyt z1IYSWhGXxck91lCoC~QlN01zG=!}iW5W~Y~#3RE%_bUt};UNWGYn_{E={|}e>mIq} zT*~AW&SbrtA#UYFwd@9p$0Qbd;x7cjexi9H!O4=Grpfmm6kNl)=Pm*fm(Ls*0k(W7L)eb0ukied)2lqVRU%>E3Y+VROTd{5%k4m86;cq1WT%BiV%R8OyD;lw$|H4y9F7wX}a(?9~ z=$0OiE8^3;b^6NC2qbKnhH`}d@p>gzib!U;=eYN<`pX)d-dwvM#P>NKPY^t=4%O5j zj}3TVE_PlmGDCA1whxL8*pB9oEbN%~^d0e$Izz(+&u8X-$TU;~4^hpeeFc4Zj=f|G zhWU&|Rcz=lw|l7*#S`<%nlA%D&mLfThzNKB`bT476LRVEwhF{~#=P!qfY5$hZ=~}% z@6eXZ^JX$>dD*MDK<9Ux!^3yWR`cu3CGYe2*t3gTugJ!*W3lHl50CRvTELC<-;6V2 zOltY9crI>WC6`6(@J^01i~_d`pi z;-xiK?h4=1ux{&Xn{m)6~uE47wYWSTGDd?{;pVk@81+My=q+dUNBOF1dr3AOEJTzBZZxju8_8 zi#~LuWM5Al^P)NELi`WhwWAK)b)%{V*#_Dho!d2wupP-NCtBP(hv*wJ38IP0c0pjMMJm>e?z z?;V{$IZy2CIbUyiJqeKA_kfz?2na6Yc4ps)$jXcdz>K<0c(F9H7Ote8tYdGPC5RX8 z&Aydrn8`hc_?^Lw_%96e~fl{_TYOgpO;?0Tm_!NlD8UGY$+`c`Rgzcz$Ple?>OW(J0F>|F7?EEi z%tO`3k7G)aVZAZvJocp?aM8kQs*>zFGy8t{y);p1mws(kYJcZ6{{3~x1uO+tWy1Hr zRoAH3kX1`aoO`ppSOkll0HP4(Q~RA~hU16o>%VBgh6YA7T6-PwzfL2%SNuCgq>1-$ zqL2_^qO1#|(mAga^~nY%YNzaA_VwRN?P!1`9Jaz`ivKt7NFjk44bDQYbtOExBtTDm z3v>?t1Q1?JaOla*@k}Wxw`1@lO31pB^pFbhVAWd{l380SpciGk6ZO^+-L09_ZRFl9E2?LJ7>pf%AmHua!)5R zJUubR|7hX=5%!f)RdrFj#G#RH>28n)k&^BbDd`RYK|oMxBqXGyJEgnfP|~e*D<$0x zcOBku+&jj-KkgXsk9WvEd#|JS95Sg3HhJp~E~s}t6KYack;e%lKE9Zc9E-SWq(UUt?W*MFU!WsUg3+}Hg&u^vmJ zRSEZyCbONEil|gv|Evt>=SgY%GL6T(TSrU{EEaWx@NdrtqGb#w$8W%i1T}X$7)O5W z*#YMPI>Nt|Nc58A!pB#!1e7&84fcv^|1%+0*dih6sAxaFa3>gr0X<*P7;jMb+l+Ym z$K$;7*WkmWFhyYG&ab9m&c>hWkQNxE?3%k+Jn+q-IwE8}Tp-4K$oWo$z-}y5 zrQWwzhGTJb^4jmlIw&^zCwP{hE?uUUwRwbR$Ssi*#QoJ3EmHc8)SA!fzo2g%U9atd zqJp%5ii!#t2Q3T>TR3Arj~^gg5hH!!(@K=tZz<2|yk{!0OujNn2xVJ_+o^;9-fAJD z`fwsng)Pcv|A5^8Vy7O_i4$H#9>YikUdYUOi?(DI{niE9dDBH=`dY>6;FhFM9w<`$ zS{N0o|HMpp8_VF>*k@vZSF~L@=3gbuaD1@Aw86@ zI~IsbZ&@InhIq1-g-Pc1oe9FxiBD7F%jPYGygSqKjxj1P-(e|TtK;th0Q z56sD(7%@IqxC7~BKN6ZTG1)5{0e`sX+IXGfs=R3ol37+RyXH_vG;Ptsy9QxeXl1bVS}16|MW^K+PlHBzW&zs9Mi zV(rArCGwSd58}+7Y^JzI?)KgiUdBcb1{4U4Y^Enc_w82N+qo%j2sO)aJ)Yv#m`|b7 zv{BneY+IJ88zfyJ0#OdAixjs{A{IHdDN_XBnW{*LM-Bg857l9W}vWYAE*93B^vdsyuYm4ZyS*qUkb zX1en_rHwmZyFVlASXSlJ@9m_`SzI`bPmq7jsaw{$!f=FAsB#Y& ziS!$U$=NnxUTdjj{NIOsj^A!zQnTi(T5! z=Pz89O9Nu)C_KioeA6sd_Lepob>{CU2;MJmLUo^6f1|cJHf=Q0U7^F+a9)>$PP#_0 zgsx_k;`nspL#@VX5!PSFe&zbB|IEb&ercEr&bS2Y>+jnSuZwOx81nDGeptm=izY@9 zK4H{zSaej@8ZOArg1w96L61u{tTC7QP#tK5rZta(U0kg>|RZDXfi(G z){^_OVEGZz?{P>m>2H-({K$|)|7Etd)i|Xir*cHDPi8=>q@OlPh#c%Nr6g&z z>28Fm?g(-Tv_-wlH8LhlW@$}*MOda@yy{STbO9gW>gTNr_xYktdGwBA%vf1HRpzXj zu2V(j4{*d1^tkeKba?#`gOsU>JS7bSgK%8BTqf68oHmb9`|%t0A9h1q3r6KQB~9}K zLDIJl_kNol`=L{{cH_>4Ph2*%wK@ssZ1><*{c6{LOm~NJkxig{pMNdoga33SIa|Cp zAtKurH#`9+>ivO6)S&1BRj8{p`UY(hzdcbfCaM4a;vLjY<7!d=7l%Qe$$u>qG0ML} zYT#!|3oi#slY_9s6Djf(mw%IMR8ZebMM? zr9ClSr+=tKkr(M=ynHBbI^nN^1HfDt8{x~jJ<{Vb%KT-(X9Y?_-@Gh~{;jZ!YW~28 zv0*{56qm|_?s<8dFE#Ie7l!CpJMTMaEP%0z>=E1ZxtFBcU|w^o8RiGkfOiwhBBa$0 zMw*so&eEapLh|vTa1t{lRgDF#73gF@fd|}h0SAQ^J_C9$Ul5TrZU_m0F1P`j_>WE> z+6->-NSy(b0J#N_4+syDEJ72;^*cU(r~P`m+|$hrzE(NRR|!X5Fjt98F??18qEliR z#F1z4Eg>NoRAyONS<$kw4PMtcl}GvPsQ+%p!-c=At!eyisj*~w(MKDQGM5^|h+_&7 zIfqubZ_hN^3%c(w*nSEN3#+h;XGg|WP*ZcF#!8WxyE&qkX1m$Vyy`K11Fuxnwpx?v9ac#cKty!t^jTHaEf3~e!lDy@dOq~B|1n`i@w%?;5`AembP~( z1<++{a;3mPP|wSV%PsQ8p1P-}XX@S(bgD*tq~bFj&jsa$o}_?g<*F~6Dpf&^hPymk z9&-W|;uA;}NC|_kRLP|bBNUNA$M!FGD#xI!w*`Q`px64EJGl4|&_2o}ZoG6(->H%^ zI51E$3Sb*FO=%l(^$b+mwW0^~bC`$`HzUpe8IwA;4lme!*wLCd=vyZJo%3HVEDkAS{{G)tT z!OuS8%5R6SFcE&}ZN+AycBA>_=O2DP`~7;#flYPuH%rU76M0O)Ljw0N1eSlsc3)d# zWXwfRm2pGZ5PmTW{aD9j_a;hEU5$7f`*&l*?CN;^NfM3^=g6<^u_w{GD2hZ8M`OG) zP#4rKf~741G=9|NTTn=Em6V;B@?xH&Nw}HqJp+bE?e_#7+zK2UJM{8B>1mc04sUgk z%xAA!d2?zoqyJ`5^T^$S0mG@je2P;N=P;ww{|V=A zP>9Cr8P28$>6HQ1n(5NMX|x_I?*}+r%#QqSJp%u(Ur5;6vNX95%A~zSXJ~Wp^N|aQ z38G7u#K=PJ#)hrd+&%W^dQ0^3G*!utrTNQh-j7Np;|1ZNa{^%+Zscbd?xf#H}ua_@A*UMH{~9yeuqFf!9vXdx9mgL@F&w zoqzti${C87B?D_lN}wH^V`)i^QOt6JC7@TDg8dLte&9FeJ;B+${Gr#~2+!v&106;U zI~GBY^o2VmEHtvm=lDju zLJDc%N&Y(qI)QgmlI8gFS}zNH#IP<$>!H_a<3lV*6Yh~a;zt$JWNZlOa=yi#uJ7Kl ztI*C#OL8j^wI`#VPNenF-8-=8Nt0`;s6Qh30~(NTIH)a}lE;m9D=09^GG>fo3}-xQf62 zYDW$|iJkS?rqS*ENO=5&2+W`K%_h(u;)9{?H8uMcc-! zkv*HXtBYopw!31IjN@&)ox$sBnf;@v1B|1QZ!tk0bDH+BlIk;5hr<*BrR7e6?IEq$ zPlpb?%~gVi%*Aho;=WT|MegO4!XJJ3RFIg7rx3@ES5#YD2IfZ1-I7wC?WTOOhgrg7 zpnZjKS||fMtx!25U}V=@318mCcf3QJA})r+q501~FyH8y!vJb-{B&p! zkL*^eF@|Yd-#_K1-Oqxz3eTuYlRR&{1R#)7Gjny6{!Po>+o=C7$Kvu@NyZT9mm z`$$zOlc^Ft5*`cGCXb6t!y&IR6mgQY3%%TsMS7;ik`!mnn_vT?vTaYp{4EigUzqE0 z31jbI=ujHBFyTFcp-BNADR3gR0Xaa#0~Y=l0DpAoc4D{w82*P~0vrFjJd##6`H9_L%U-EPFNKC6L!~?_TK1>3E4oYFFmf)cvB%@Pi=%<<{l4Czz7Riiv zWXr_m@Xr4c2Gn4JC8srcABeS$ISZMuOQ7Y?dlBQBCP}2~#s15BHFs*P3N}YWTzOU1 zgz3^BD(t#+%zBWQGO$)F2`SpKvNz8e!Ws3A`m@nYPOHFN=wX{#rM@vnW^%?R1ojUw z^q?Zx@c;b7NU$B*({#^zf6AM~ zRiq(8t+bGSUvO}Vsi%&4jf3w6A90#M>C6f>^o%q zs|Te?Sy6tlfruhpU0uyo%OxmHV?;H--?93?{tEJBeE=5(2E@qF3B|FKb!AzD*Ara` zOThAJzW~x2Aj$b;(<}3TYj5hJ0^tw-OOAK0u7vS$kYAXDa3SfiB7HMjpigpxEFP4c zdHxyM?ceTF;?`q%^%I2Jc!}BTsfF!C?`t>PFL6!E7TQjby65@2rEKzHEjT$ZBWI#w z3_N_GZxo5(eYc9h$ZPQYhQs}=v250dM;bV^!k+b!%Gi3d$QnYFZ(h$&7O6=NtN(xT z*?)}H8(>>4^L6^X|JfFx)?~Ufe+53;=piR~Q%jBprAf3ac90)-zF~I8gb1UbXhuYn z>#;6(3i2IlkM$2gy>xM1Ov7dp&&H>M3G1`WBgU~LjE%IA`MercG#OH$2 z`CfOW@k86mAR|KT1^nmhuaCOBmP}&`;yO zuccK}IS+BWiVut3S;F%v?BuLN`#!F8x-rG)M`6R0A&c81VCvMLOGNA`OA1A-Vb8*7C_jO(>G3};Vm6U@z_o)#3 z&tRG+v-`GItQ;tx>BHM*#~(C?_&{~34Og?)S59u~h<`L`4Q}`Mn^$^ppWvcKkRv}4 zFfXB{3U#_|ap+YHMP4sh`@u`063dgizgF;yf;XPNK@y7|x$Y!Z{_?3Vy3cGD-eN%9 z_v~!mmz@}|(VFq_&!*qeC%A<0?HA%c2gqfyuPrND&ReN4uE;NacOz*(q(39OR6M4q zeEM(lvro~lZ5Lms8KF#K=cDjo?Q>OO_t)ChArw=|vB3U*0ZoK!y$J=ecfk&C=wD8q z{Dez!Cr$a7e5$yc>hw`Z+YP-6n`%XP5i8*tPZ8*fp0prdPReEDrrLE1|MR*3NR#B{NH2`m z@Q;67oxXJAZ&ST=0IA)+rKp{f`Mur5XZ4A;>nBC!HY$QDyhRo10S9)RJzHOZ*MEc} zFg8(0UqmrEM(iH`vJ7%rM{;@SO`28@4eX{D&aTf>BX+G(lHFPD zjs>{5!CkhLki?(30Z%k$wpC(XMamo%@Hs-xDlF%4Rz}o(Yv*^_ynS9z$s5T8j{!+U zAZU^+EH|;l0I30`*O%1Bx({W+e4k#M`Q%#QdJpl-$xyH2UdoDW0Bugqyy_Pdo3`yAY1npmr7O?TY+PF^70*EhNR{d@+oFwP79 zqbA!}bwYi|QOBLy*~G{2oE0uPk7rW4_y-s7A4*}8tr)D`IB!x_@h|PUs7c*gAeISj z+bBh)XGZWzd`sR+bq`qyNj|<}{S&_8qAAf!0!C(K8_&GMoQ1fjb2NKN1|d@qQ*_rVS%AR5JGfTVE*aZ)|e{p zajv_}L$8~H{jmgon}V{{a)d02{`K9jXp--&+d)h9 zGgq{$NY~x^6sbh0&}=&^gfdo$SW)PXd$>Oy1zv3wY01zG^|+dlC;T6p$qA<`@ucTl zF>bBCFni^<_fVnc6eZp{(;s@$Pfb;f0W3V1T1oC2+Rbhe@4NYsOFU;RyW>ISBzwa* zY2g7ENl6>&M7nLv+b@)TH|ivARZef<=0|rauJm~~km`OysH$9aOlV5@$u!Y5-CN=> z=Y-{Oiy{aQLZy{lS+Q}kkm*eUb4`&qurJ%hd(wo>uj!lyyKSi6#{{cNQxu`jv6LFE%np};0<0C97@^2~4 zuN_0ud4O$Lwodr#QF=P!?LU?6UrtE6*a^HZdc=dqdJVNYi`rZ1GfbqcpBGHW6za2| z4@S+&$TEc}vvQOLE&Zn@mgq0#=I=+3=ea}>>^HIs^`xh2{^Rr`=e+$3#(8qA`kBo? zBRiqt^<~MvFH}q*u))`t9EAN8_oeUWNOMYCu|}p5Jp9qajX!P)NeX&vA#W9DLN|k{ zoAdYI{{gX^U|n*89(X^=(k2dEE<(!_7S1#9Y8mq%waZ#Im^A1EN5wQu#=L-wh2bVX zPzxvHx@D6H{vQSqK+RN;80zDnm=S0I{z66jCi`Dz16LiT{!uU+hZHj5>UZ~cp3v8-D10rJzU2H0=#OZ z4Kha{ZL}wdZe>XSjgoRBGBDw>5UHEmXoDE}b{0QKk z=+red{IauYceJ#$0)U{FDQMW*T4)S?64UbJi@jZe#cFc?gg-07zO2b$0tb;K=o6&n z=O=vOeJ&kE`J%+!W19V$V!6dPlD(2zZOEFx#QXQ}1Bf*A^g=-uw#nt;YJvdncLdw~ zQdo5Rdu%%btlSs@!l~pKapi{-tHB2eaY${*soQLtT2DLxtPNGOHDg9EeYIq#%Z<65 zBIX0);t0kpw2X}2&K^B~k6tNW_Yzu>-#XyH7z42uw6wN1KV0dU@)nNSy@OI{>Tt*o z!Z;dXE3YrKI4%%*%wnC=7!P)OJ*vpQoxh(lQpNtcRiO&(z26$k>Klstu2Sp?fHB4> zU!G3)FlvRlijorI-t|3K`CsKMPjs|!GQ?U6pK;srdmH+nl`%4AHPurSNH9cZ3P_0r z*^ae3r{$%0sJ+EoK;})i!8z+yIvPHsZqJx)`H@}39y0~ zn@6HXQq?p5NEpyrR1doM$Usgq3+fsH&--}0ly@qsk>QT8xK=qA|w`5B>GWubtmFvD9tSIfH? z*xGN*KA8JK7)H?v7jV?jb4W-aH=9w~c0Az>Rg5s3sW5eManSN!DPb3x621vdxjO1{ z{amcG8uNu&u}8Zq1g|35!3`Iu3~DgwyJICSm~ol=wkCXL^9A(}y~qPJQCCIn>ap$| zm?%3z$M4*naEVfGSxZN^ld)fPUnDG5{!UE9nML_tvztXVZov92!+dvOSen0aa%SWO5zzCmwc=Ecab?BLuCW;gc`5V^IF4! z3v3qSe$MCG?X8g@pVqR}sV|NJKeu6P_3szF=PQxfSM$Eb-Bvzn(TLA?oCHZT6W$_+ zDDmEfc#-!MMb$%d#;G#$g? zXZYw%e~Endo`nPDUf@YXg_!s?`v|FRpPvg8n%^RgZN{^OXzH$vgx^yEfx0XxXd12V zL}MEAVS&{d(bK0Vn6}9z*V8+&F26IPscSt{OdR}4gXM7Kqb$i%6eM(MM)4&G z^_^G(%k+4WBSnsfBBQf3oAS5S#_$P_&obW;sB)~f&1OmNr<;-wSl^-~wsv%LEg_ND zMhP>zw6qai{&sV{-=!t}ZEVrE`{QD`mNVwzsvJH%h`sQMO04%}-ier6nMSTpeNFoF z_6pTfq3IFsIh=C!lx(e=A}3|fmE}de&l`it8BE0JG$7YVJ4a9(vP@}LlW6?w`}!4w z6xk-6I2FnQA1^n!wm zsiHIMa2ssCs9C9ZCTWgI40FBf<`Ja*0#380w?^8UYPs)}2WMo*D|j^hep&cpxhAK+ z6-s;-F?L($l>RAy*$C8rcJ6SLoK@qHu%4e6^jK(d=tG|G3Lj@JE-x2RptHKS-FJ?} z#&~dN@uqc-%o+=XH?r#Sp&Gb{)%`i30?D}Q0jzkebmH5JMwm4-P%;Y?h{S5JbYww^33a?#D2>DB*h3CtkXT!e3PdD|T_!+BkM=rP; zpS}0|gPx0Da`1I8a%%q;5tQNllX_;U5s^hzoP$+%%1WWE0DY@ zJtc4q`Y?y(lssf7L#NlAW62WZCa=wxa(C6<=AA^zPafYb0d-oyTSj+nznz<73^qFA zNZ#0ohY0jDw^qBQQvCZOCx-4qrnCtOpgZ5}mg^wZ)w)jNN#jMBY5(GMu4(VuX4i7$G$!bpkbBh`^K(wNVOxxD z{f}}P-Stxr^A?=F93~QR)dpJ|Q4hKml3}U-&PD8)Nu^Pi?3a;o+g0C_M&4D`Cx{THi^ zj!e~4aWV&?*g*c_8t|Pz3#dW5fSHA*qrv6Rnikyf?^mc}?)N`=k`_-S2g4Q{1C9(T zs<9YT$@DoRZRQ22JSlh66O7DBg%H`siPe8Oi1!{+_lhq=hB_3IF8E%MxRIZ!t~${rnGI1P zm!HDL^~1ba_{Lh`Q`u6ciFFJ?rwC$WnALo|aDEj(g8-PZ?=;*$o*({I7$&}Mf^}J> zMCE{tWh|~Urk>l@SV!`cR~j1dlt4`Ft)%Y`{!h`hgb6!gLcb{*Y9e`Sd9NlCI~jSR z|C9%q5k{nd#LzhCVsVy@1H?5Y4oAczz?6n54|Tfj&fr>F!Vl__X8uR?(c-*@0Kp-^ z9?djr3ABI&&Lf-ROn?yS;*1{;(p?0EgnE(t4^9W7D?2~R+#ZT9j2s8+7M^m>m?*-f zr2FBJ%lrQA@Xn&gM2n5?kCsD)U&BmNPPLFXk~d{W36FewrP2_y2so8IfURWTzvl;yNzYt2lwTBAPRX;X^`Uhy%r|6aLTC8!oi7hf`|h6} zpX&sh(?iF-c2&6fhIePH&%Q=}E8vWxLU>ldF7%9xiwoP0L{YDMG~jPw3_#lt8=ITv zAk~rJGmLhoLm}e}F;ms6PzNTy6)zMew zMPbN!7q)oKP)|U`82@AhJ9GY&5qZTAqTmEP>%ZpMSzCTMwB#USCu+n z?&XKhO?6q;un#92A1J5XA*dzWZvvLgIhmXxZ;8Jd(4W$;=pOr)oAC*9s4@2{C@@8A zXuhFCaFL_l@FsejP5u)cda^Y&HM(s78l!duZy|P(zcZ(AE8_z}f9=cU(&$1ST5deQ z#0$L!3^gryx{3XT#>r3e?X=Vg;6Gei9Kiv9QLCXucx2Z}^*PiR6ipFep-?|cQJ2Lb zeIoig-o-thmX>BGjf(@1i{pn1a-WuxjD~OH@y{$G!^ZHl=fTw>zBD)<1O8o48^2nb zo6})YiWbVcQTy%ZEb3OjdxCZFd>1-bJV1*>f#6cW9>O9hIQ`FbcdizLf`Ve4rl46i zLPE_Nd+OHt3E2g|Y$;|_C8|(_gdr=203Opn7fnLMjt{{O&iaTy+F+x`K|^pUX19qn z+jwh@zqxW%z(9c0b>Hxkvmtoaya{&alz$=GK$G=M5KO-Y%nI-iH&5 zTv_$K7lu*aeU5mz!VM<|7*Q*3co)a(f}rB=(A2r+iscL?&~1`?hXK!|=CqWsa02 z|M3U*Hn(SD9@Cp8Pl3L3kNrhUa4}UInNqo~9mSuk`OSq$iU%hYM$TlCD};ZMv@6aC zF6HbqpUEkHPfc0L;N^9i97-yD#7(SnQ}pq>C|NYW$~ym8fpR0bdmx^k5RaT5TeT&Z z{RGrUz#Fu@+HN=T?I+W}5NAMz4CyYwX4)K(c&5sYot~6ZP%yq>fbH-RMiYC>j{LlA-3k-& zjo^pt{I!Z2tG+W+Aq@Sc>H(AzFAoo#5b*OV11HnDdWIVR+i~K?+nZ1Hb!1w>PQY+`2u4!f3DIT#m>&IggWpm2LoCpoVP9JlJ1)~j>E|r0*+*$(87OEx=2O> zVwUXt=Oa;@e!w@o?OUruNs(59B$xeT4Oh;ux8NfLt*xy(+)}dT z@0-TBKp_y)d0U_P_38H13S5D!#+?k51za3#YuiRX+ht>G3*VJ?!ZsHw)!q&2B1-b? zNE2NK;#h#7fh#TgZ#@XPio`&t+R5bKDxZhXW1MQS;jw|?a(hdy^T$fcsY1A*fWC@+ zR@E%CDa;qO0aacPa!CK{_isJAJe$u+&ppcc--U5$h`6z_u_5-+xW`s(La*Z!63&UM zN=t{2mG}e%^m~OHRX>M*7M;U^m&1#$LamtHo>>H7i*W5@XCPX_UHW$(LwQL_N#Dk9 zBc7=u0Nu#$Ug_%EJPF7%_UE0Oja|!?f%#L|vyU*r)~#(7#w#o@A8m@s$xGmTV^^Aj zE3NbFTU;w$^2|*0%2)TWTfArEe5`?}-Q@7S!8ar-N{2E73f>-B1IJ{f z&Im&JR%R4MA)d0UQIp7Z^t0=U(1N;kQo%s43N+4k^3ITSaWX}qr0ZLPmC~%G)&|E3mponI!ZO}dPFgVguS zQw3-cuWu*j%J!awq=@${FJkV0ljy+_OHSD>qB&a3TJ`>X`bxRU{_*4Dn zM$ZcCje($Zf^eM$dCOm3+1VlXW07167;r(wfInmJ^nd?X*{1^2(}DN!ZW;t5cQdEV zWyfv#)hG?OXqisbWKA9jdtlW=Bp_&o*0_>0Ew7BoxP_@Q{hbF#y5L?$cnk~@gf??sa&28#rb`b^`ZFwqN1GRakNbESL~e%({f9#KT(dL za!`-2FQ4}m{&Qqxzm8yfhs!_}`U2}!(zphj6)CV1v#2b(CQ{n6gfC-z^ln& z0_TmFdrsT2^+8iympBK~gffO`8&YDT{Zt}}=~14yI7F2kyXD>z6yO=1Y#2%0A7}^$ za1wx}^7oCC!KsjnKmCR7%k_}avN!Op6_JxudwGBE;dt`pfm`8|hRbKuE#Xs~-#bZ? z73E-lh*}ibPiNQrZo_sLyfM>TqGg}ox5-AGwY`YjKdFwc({Z_~RhtOI9^w>fU<1qo z2O_RW0{o`ipb}ICl+@0@Zb+RXXY~H91?`Ze%S9MbM+2v_H1!bdzL+zw&oS4JA~xTQ zY}5bwyT5%Buh{$eSa(#LEeC>m{}Ef3FlGVYx3Cw(4Xn~vgC*-#FoV-7(0>i}gPVtk`Q7zd-{@%X>Gq_Y{Px4I zte0OZ%zLp?-wm+HD{A)XJ#@!OfhlQ2EP~Mq6{xP<-oJ-&|0bJ`6ZF-x|48x*`)b*Z z93A&aTqo8aHyAmX%8DmaYI2zW3-kdXqW&#e#C;Oi8y^&%6qc1?HIOSZKQ1RVSWfh? zfMe$i=B*sv8XFCHf}k6sKfO_M1_n>{^^rMBH&ru@AB_tU+!N;|5RWOe^iot-uJIyZ zVWDIr3R*D&Fr=@)U*mul!X6(4=C%S>2*G+?kbw|MMU9Vm{FJ~>W|b4pn;2wfGEWSF zaZ@9FgFTscV4qT^fed13c_G2MVR2pw>Z&NhlqVAe0TWdet#odx3Dl1 zzkmNy!j5mt)!BJ7IbI6L0_5`fP+vr}K5Ja*oXISeRi$L~fHwdBSSC6_LBSA1sg$^U z=7eJ43wgc-33#kC!q$`%0rwTrCq@#lmY**-Awn1GziG|ylZ0&F`8Z_@O2;db$y^?Q zE+zsZBK>^kOf9`k}jm@P2c%F|NnvEi@g; zn`HM2z_IsydRt_$7e(^TmwbV4Ei@xGHFtCSV^l^=aV{9ekpys{bJeqpkkkC!? zXgE$Sn6JSFdn`^yBGF77chg|3qg0rWH62iv6C{>dD|z?mM+0Mg z<9bl}s(ah;{-HL0xhG&k;jxPIBF?fdCYOP31G5bNfS|*PQf4GH)K4Ndw1gD!)GJs$ zYIW_04H$-tl*E8pVz5Zt$3*;RpA%QbnYBs&8wDZ!JZt!x^2*et?{YcZeb;99_+VB26uTChdmok&E+jPnvpAs6hOTdTbFzv}UR0au_blKnzK= zt;dJO=!)c02c@Dz_npzgp7^JdJg`sUA11~rhN-_){*+mNj9uXk&eP>_mm2_Ep33 zu&nH}XBzfzJjEb$z?GBm!>6>;z9AQTi;(czi&syKbn6*o(cTm2xDhpP&&_SF=)s-m za!xkDnUksEo=)=d#t^eWusl5nf(Y?f&@(G-tpp8prQxjtF49ebJM1^I8rr0mQnA1C zT;a}{N0Jay9q+vq&6&7?#1((ekF{4o)INDNA|m2QA1U4dERZbfjHX_T2VBFI%*xKs z@w@TY5drg<8J3=>PGSey_Zm+QS42#H3qvEz5e3sQZ?Bz}IF6T_eKD-yipvkhDAl}E zhto1L%9mtB`I84&5uPbhcd%oVWxrp#ojxSw{d)HA_x)rLPfvqG=Ex=(Glu) z^*rAR*^n=`5g<*fqhS^>d!~22rORUWEO$nu8|R@d^;Vd1UX;560DC6IcP~DuHD%Oi z4-XFyibJhQbg#z(4qfOW%(|`3Q+N$Yii(Rd&d$#2iegz|Ktc}yiErx^@y&S!1pzy5 z#U8kXw@pQ;Gr(a7aU_rXFaS)9GdH5j%c%Q!G)uxxF+ANCMlPV#tFNCNJpR)j@y7x! zjwQH}##>+crS#TtK>A5L*SOIa7kC5Y6Zj2mQqUdn0_I_%o+CC z9rwEnCSjYYHHOhF3!PU)d~(m+gb|-8KDuIc1l+Wa#lglk+!U_;r;=J)u~$`Mfuy`v z*Qz)@Ir||EtLz9nK>yb()wvU`Z(sm@vox+AoC^`C(lyrKx*Jw*pUT+UJ4!0K#uEOd z3+oA)D6@{gm#L5S2M`lpx3FB%gILbhySV!vB#4cK88?wb>b z)SgdD*T4M&qfS&sqqMXXgj3@_NU($JPzkCr0KOH)idk^OeyI&qadmyRw-R83@fAmi zR3{?m^jd+!gW_C$(Hhj~ol27ffBaJ~FWQQx*mfq3&BxGELV8nAI=mZbXa*1ITczgy zi2`cw-w8_J$=tL7YTI)&&4U}TtUzE=~H9n1q0-&uOppOKgo;V%oOcL(XPYC?Cn zw6jH=vWDtsbZrk@)UUqgaY5FHqO6ev1B!>1+iu|F9CY0zZU4FDE?npC`Ir|NYO7-! zZS)KkR<5o(^+d80lK{4Jj5=8O`@0M zTOq)NnIN5b?-NG;Vx1Js--Z$@a45@|>e28F@Qi&d5&qFLAoF%8sbgk-4tRGmGO`kW zu55I9K>*M~gb_?Nh)UMyKRKCCSH7v#GL4K$dwYK_(=I>F_$J)qgef7AMjag;y~MP} zkXqr<$kSn8W;iaXsr!V#g8B?A%Tkx%(S>*d<3G$@J#;*07OH*|drE#!U3g8q5&-rJ zHnXaj^s$sdl?kwNVt(N8IxNe5HNK>OO!E{UX<6cl6!koVuH)^>qe(mCUU`n9I z@zeI0k>@)c91e?uw50ZQdN7Mp!D;N|^CiJ>y~zj!zl8?Z{qYWqR?G-E#G7VS#tV1AQ^ z<{3QK{jLnTup2~))4-qMJLh)0@#4Nnu_*t%U379%SP_$2kKVEn2GD|-IlW~p@%Ez7 zxphXW3a>7;H)^_JP?CIe*otv|5_0iGq^bDF5B8ueUHzk_WY%`TbV>a3CgP~&xV)Mb zmwV^MquRXB$A=SDiUpSHsVr<@fH#x5_7VlMQb;k zT8Z>goS72oA=2olwuSnZ3M6cwbyKzsVe8S3vU>N`X|~T1%SP7*xtsYp4(_D;H*23V zg~O%tj=TQ&gw%C83T+R>s*B_hj4qwAr?=hN?%bYE=9|y6%^%LFMCskIFUQu4hU!_5 zRGse~V|O)tPe>RTcl?h0>V>q)S4j3!btY+1aVrM#q6_XjC0CbDOQy_^?jIh6eA?pF zZr4H^HnQwXex$qHkzOvH9+;l(6}#*ml=pm{v^Kf1pLe-_Utq9_?I7yg^ydBDjc8&* zeDKrs7I=kz1?EIulPe*y_J1croB6IK_U>(M_&XVTQTiqh7~+esDh8 zAbmdL^MHbub4Mw$E?vb;j?jZo#QikT5PRqxm`$;JpQY~pQvDH*D(x@vYgqkO zvwc0ypkl3zPzU=@l;AyX{cywZKMps(#$YJXSFS^|4ocKJ0zX9^jj8BdKFOz!yRY8n21? zZpQZ8Z-dtxj*^~M{##*<8)fM&zGw;SR0xt+ROs9w7V!RUcM_hWQCZQsljv<-!nRVX z*ND7cJ>W7u((F9kHEl90&e>JjW^ObPn$Me@QFhrwDAce{ZKQkVe{)`U!@-H3 zPYc%CnbWV34I|rUEBQ;sd1&nvMq_mz{$9jhf-UPa7Tf}#b+?v_E$^w@(6UI*7MJCx zg+tv(u3u&UtfQH ztBM+QB^U7qUXzcX-w*#BhA~FyqD|V;qk@{|hK|?Uz6TiH7B=RSCS4^>DS0_3HAT7S z)Rz4w<9vRZT7Cyo6l`3^n+U>Fb7OW-TGs>H)x&EqHKcx>xvBrw2MjiepdH9^NxmuQ z26sNE#=*B@7940f7R?Cr-_)AWM9?8nR?FihTD2*0W)7>2V@aN#7=L4I9970S92sTx zd~1mohXdhR9(y*DS_lKL))5n9Qyk^=EM>M4J>4gj-nj5fH)s$c!B$_`R_3_5A!iv-~#_EDV%` z)(vFW*Ap);E&}e$i%~c*AuO>C1Lm0T&}MY>Dqc&jF2E^(`ce`{M@Nq9YXsvER#66x zGVtS7MiT?ejt_8{8Z?%;ZE_*2;)To&G3o!2;~>{DQw;=mC&xy(#o8Y|HIGaXZdDZ# z5Hm1&ZaF?N;XgH{)7MCm#Pl`4kWMZVptW5q64%E^qXt^0%hYhYjOR@o`?Nls8{+YS ztaR%8AvBAaF?{M1P!FpcfCij*$y0WNI`&gR&Ht!WoP-ZV<3GO5%dJKlNPCNTlco|= zqrHH?W(8fop`a3g%ay88OG62^JTIZ{4>BO7?{uit$4;$&1+RmJhn`E2|LMn?9<^CQM^SGrq3*DwvKW+)7{?#e(D20HKJ z2aV5IJ8z(>DFVajcalB3L@XGBXrLM86Y7krp659&NjnI?2E|GVbNFFYJz>CLDR(Zr zSxuHUJ1z+#{5%WB1u8R-M!6ZLnk>$DO$Bn0N4mqOUav+)NRI*E=)kE+tD^RhmCAdO z)@7B$`ICUiMVRPA20YUihhf8*S=qQXu9M56I;2yTn(u`D^DDSho1|fyQvP*>l{Kbd za;zIau^e`zTh@v*Y}58vhD$3M`o6hw4eU+m90=pIILRM~vzBfexU044$}!UwE?EemKG1B;L?5B5*k;d^@+91kG| zo8xFg^S=wFzxg`G$tZ=~?Yv?9t6@!WH#zRbD3JI{^L4Ap?V=zhUyMW`NQ(-?Lq*rYtUw-^cF7Z4+jOMzEo9G=pOQ#IQ_2x2?zH0Q|R)Hmz>rcW%2JL zC<7{spT0JHZEB3NxN`($;qUzLIqQrFh(I(6*g{#nw>Pv1o6Pdv+v`G`r(JUU(qiDy z5A0GK)k6eCAW#CKnP(B`-G)g=(*(a(`BhtHg2rCC1UP?|D{+T01%2Zw;Fhz1X~nK% z`^^_WO&zN)0`7kg`1mmJ^ky^9%Ba5Yo(=r!O%v*)(DlN9c_(mOIq>BZfjMQa>+gOY z7@rGVw#c>5f4~1cuzVVD@+6bTbFSmJ{T2Ai$-ucY2hurZYwdnlZ|$(@Wt}|(_~z*_ zq2Gb`_5gqU2)KKNiv-Pe_r7c90PZkM^MKFH@h0T&JX5bb0NnEyu%{82Yue~LXS(v@ zIkx4D65v2Hu-de7x_M%+I|(?)fo(z_5)p_N0rPr|uF&MO_&wgI1=2lScwQWxD1 z{N`;}x%jfV?|q=Qb)cLL<{BPr?*e9*0$FLm1oNWei-z*w+l_Mg6Y$tN;0mJJsf;rZvb0`HiYo~Qg@la}d{lYoWeU1#Qx1Imr^a+)-E0$=<+!1`0c-um2tY3lC{ zz$0r1h;DY=*WX-9tJgdq=l>_L*|e2J1VkWm1k86@bmiY(0gfC6s%`~RO=)gC&Gq8HXWw0N;UVt0cQ0*qkK*^vA^yF*kk0qBqAUJks%QE zp2dgzjY+|nIjAVya*?OG6JhM;Jio`zwntB#}(z{~n#S=`g ztYzqR>x{CX+0d;hnBvo)m=C$_@BGmoV|qLY@RuJ4?!PoAe&Eqo{19-j(#l` zh=2%0j)2E2I&wGlyc10xZeN1?IA1^%njGBX8N9XI^cwt0rHO6mDTSFpo5u^rGe4$d zPekG(z%P0C%b7s-j(8hviX9fW124$iszRk9oqinmA`wZpaYg)$P}8I zJoD=@iempKHgcYKzxW0#6rPg|5TKdrkntt42XXu-Y-t{Hi@)8nzmf)jy5d~>Vr6q>D?&DXQUHvyW5<{1b0$`_2XZ35`S;K(Sq_&d*ZH=OEnm~*5P+kowV z@q<8pn>!c%ihu~j0)eP2i)CiG>iw4ibaa>80bFpu%U=HImj>|s=2DkKv*7!{33nT( z=<t!JSHsH4B&FCtV$63ZVLizSGV6rh&&G@mgx!(!U ztVO?xyGN0m?y|jeWY5YMtxNQe4+7~%k*~QHs5VNP0|?0fg4qKJ{hepZJI-?rDERzC zK+dg3q5lcEdAVz3)Id(!5&;p2Hi6JAess4nFpqKV1b)@i6vUybzDy8(=BtxV)(OUB z;b~8I1LUwxj^fEobq&3pm=FBgCe4Y?#Kn)EzuI)S9vM?M!CYKt%u!@X1>sBlou|TPdsLWq3%lH8Mo%OS)97Kp zxo#i=@kt=+J_|QCzRdZ8A5N$sMmo>YRKEWEbL7l$;&!ADcRt7TFeHHyp#Nbc@pqmd z2oX^fBNbBTL_h?FB{14==3z68g%kOvqr4*r3 zfCqulOv>}fP@97Y95XWo6C&J!