diff --git a/intermediate_source/transformer_building_blocks.py b/intermediate_source/transformer_building_blocks.py index 8b88707004..df2fb90f96 100644 --- a/intermediate_source/transformer_building_blocks.py +++ b/intermediate_source/transformer_building_blocks.py @@ -71,7 +71,7 @@ # =============================== # First, we will briefly introduce the four technologies mentioned in the introduction # -# * `torch.nested `_ +# * `torch.nested `_ # # Nested tensors generalize the shape of regular dense tensors, allowing for # representation of ragged-sized data with the same tensor UX. In the context of @@ -157,7 +157,7 @@ # skipped, performance and memory usage improve. # # We'll demonstrate the above by building upon the ``MultiheadAttention`` layer in the -# `Nested Tensor tutorial `_ +# `Nested Tensor tutorial `_ # and comparing it to the ``nn.MultiheadAttention`` layer. import torch