pytorch · NicolasHug · Sep 3, 2025 · Sep 2, 2025
@@ -8,10 +8,10 @@ torchaudio: an audio library for PyTorch
 ![TorchAudio Logo](docs/source/_static/img/logo.png)
 
 > [!NOTE]
-> **We are in the process of refactoring TorchAudio and transitioning it into a
->  maintenance phase. This process will include removing some user-facing
->  features: those features are deprecated from TorchAudio 2.8 and will be removed in 2.9.
->  Our main goals are to reduce redundancies with the rest of the
+> **We have transitioned TorchAudio into a
+>  maintenance phase. This process removed some user-facing
+>  features. These features were deprecated from TorchAudio 2.8 and removed in 2.9.
+>  Our main goals were to reduce redundancies with the rest of the
 >  PyTorch ecosystem, make it easier to maintain, and create a version of
 >  TorchAudio that is more tightly scoped to its strengths: processing audio
 >  data for ML. Please see

@@ -8,8 +8,7 @@ It provides signal and data processing functions, datasets,
 model implementations and application components.
 
 .. note::
-    Starting with version 2.8, we are refactoring TorchAudio to transition it
-    into a maintenance phase. As a result:
+    Starting with version 2.8, we have transitioned into a maintenance phase. As a result:
 
     - Some APIs were deprecated in 2.8 and removed as of 2.9.
     - The decoding and encoding capabilities of PyTorch for both audio and video

diff --git a/examples/tutorials/asr_inference_with_cuda_ctc_decoder_tutorial.py b/examples/tutorials/asr_inference_with_cuda_ctc_decoder_tutorial.py
@@ -5,12 +5,11 @@
 **Author**: `Yuekai Zhang <[email protected]>`__
 
 .. warning::
-    Starting with version 2.8, we are refactoring TorchAudio to transition it
-    into a maintenance phase. As a result:
+    Starting with version 2.9, we have transitioned TorchAudio into a maintenance phase. As a result:
 
-    - The APIs described in this tutorial are deprecated in 2.8 and will be removed in 2.9.
+    - The APIs described in this tutorial were deprecated in 2.8 and have been removed in 2.9.
     - The decoding and encoding capabilities of PyTorch for both audio and video
-      are being consolidated into TorchCodec.
+      have been consolidated into TorchCodec.
 
     Please see https://github.com/pytorch/audio/issues/3902 for more information.
 

diff --git a/examples/tutorials/ctc_forced_alignment_api_tutorial.py b/examples/tutorials/ctc_forced_alignment_api_tutorial.py
@@ -5,12 +5,11 @@
 **Author**: `Xiaohui Zhang <[email protected]>`__, `Moto Hira <[email protected]>`__
 
 .. warning::
-    Starting with version 2.8, we are refactoring TorchAudio to transition it
-    into a maintenance phase. As a result:
+    Starting with version 2.9, we have transitioned TorchAudio into a maintenance phase. As a result:
 
-    - The APIs described in this tutorial are deprecated in 2.8 and will be removed in 2.9.
+    - The APIs described in this tutorial were deprecated in 2.8 and have been removed in 2.9.
     - The decoding and encoding capabilities of PyTorch for both audio and video
-      are being consolidated into TorchCodec.
+      have been consolidated into TorchCodec.
 
     Please see https://github.com/pytorch/audio/issues/3902 for more information.
 

@@ -17,28 +17,28 @@ def load_with_torchcodec(
     backend: Optional[str] = None,
 ) -> Tuple[torch.Tensor, int]:
     """Load audio data from source using TorchCodec's AudioDecoder.
-    
+
     .. note::
-        
+
         This function supports the same API as :func:`~torchaudio.load`, and
         relies on TorchCodec's decoding capabilities under the hood. It is
         provided for convenience, but we do recommend that you port your code to
         natively use ``torchcodec``'s ``AudioDecoder`` class for better
         performance:
         https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.decoders.AudioDecoder.
-        In TorchAudio 2.9, :func:`~torchaudio.load` will be relying on
+        As of TorchAudio 2.9, :func:`~torchaudio.load` relies on
         :func:`~torchaudio.load_with_torchcodec`. Note that some parameters of
         :func:`~torchaudio.load`, like ``normalize``, ``buffer_size``, and
         ``backend``, are ignored by :func:`~torchaudio.load_with_torchcodec`.
-    
-    
+
+
     Args:
         uri (path-like object or file-like object):
             Source of audio data. The following types are accepted:
-            
+
             * ``path-like``: File path or URL.
             * ``file-like``: Object with ``read(size: int) -> bytes`` method.
-            
+
         frame_offset (int, optional):
             Number of samples to skip before start reading data.
         num_frames (int, optional):
@@ -58,17 +58,17 @@ def load_with_torchcodec(
             Not used by TorchCodec AudioDecoder. Provided for API compatibility.
         backend (str or None, optional):
             Not used by TorchCodec AudioDecoder. Provided for API compatibility.
-            
+
     Returns:
         (torch.Tensor, int): Resulting Tensor and sample rate.
         Always returns float32 tensors. If ``channels_first=True``, shape is
         `[channel, time]`, otherwise `[time, channel]`.
-        
+
     Raises:
         ImportError: If torchcodec is not available.
         ValueError: If unsupported parameters are used.
         RuntimeError: If TorchCodec fails to decode the audio.
-        
+
     Note:
         - TorchCodec always returns normalized float32 samples, so the ``normalize``
           parameter has no effect.
@@ -84,7 +84,7 @@ def load_with_torchcodec(
             "TorchCodec is required for load_with_torchcodec. "
             "Please install torchcodec to use this function."
         ) from e
-    
+
     # Parameter validation and warnings
     if not normalize:
         import warnings
@@ -94,71 +94,71 @@ def load_with_torchcodec(
             UserWarning,
             stacklevel=2
         )
-    
+
     if buffer_size != 4096:
         import warnings
         warnings.warn(
             "The 'buffer_size' parameter is not used by TorchCodec AudioDecoder.",
             UserWarning,
             stacklevel=2
         )
-        
+
     if backend is not None:
         import warnings
         warnings.warn(
             "The 'backend' parameter is not used by TorchCodec AudioDecoder.",
             UserWarning,
             stacklevel=2
         )
-    
+
     if format is not None:
         import warnings
         warnings.warn(
             "The 'format' parameter is not supported by TorchCodec AudioDecoder.",
             UserWarning,
             stacklevel=2
         )
-    
+
     # Create AudioDecoder
     try:
         decoder = AudioDecoder(uri)
     except Exception as e:
         raise RuntimeError(f"Failed to create AudioDecoder for {uri}: {e}") from e
-    
+
     # Get sample rate from metadata
     sample_rate = decoder.metadata.sample_rate
     if sample_rate is None:
         raise RuntimeError("Unable to determine sample rate from audio metadata")
-    
+
     # Decode the entire file first, then subsample manually
     # This is the simplest approach since torchcodec uses time-based indexing
     try:
         audio_samples = decoder.get_all_samples()
     except Exception as e:
         raise RuntimeError(f"Failed to decode audio samples: {e}") from e
-        
+
     data = audio_samples.data
-    
+
     # Apply frame_offset and num_frames (which are actually sample offsets)
     if frame_offset > 0:
         if frame_offset >= data.shape[1]:
             # Return empty tensor if offset is beyond available data
             empty_shape = (data.shape[0], 0) if channels_first else (0, data.shape[0])
             return torch.zeros(empty_shape, dtype=torch.float32), sample_rate
         data = data[:, frame_offset:]
-    
+
     if num_frames == 0:
         # Return empty tensor if num_frames is 0
         empty_shape = (data.shape[0], 0) if channels_first else (0, data.shape[0])
         return torch.zeros(empty_shape, dtype=torch.float32), sample_rate
     elif num_frames > 0:
         data = data[:, :num_frames]
-    
+
     # TorchCodec returns data in [channel, time] format by default
     # Handle channels_first parameter
     if not channels_first:
         data = data.transpose(0, 1)  # [channel, time] -> [time, channel]
-    
+
     return data, sample_rate
 
 
@@ -177,70 +177,70 @@ def save_with_torchcodec(
     """Save audio data to file using TorchCodec's AudioEncoder.
 
     .. note::
-        
+
         This function supports the same API as :func:`~torchaudio.save`, and
         relies on TorchCodec's encoding capabilities under the hood. It is
         provided for convenience, but we do recommend that you port your code to
         natively use ``torchcodec``'s ``AudioEncoder`` class for better
         performance:
         https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.encoders.AudioEncoder.
-        In TorchAudio 2.9, :func:`~torchaudio.save` will be relying on
+        As of TorchAudio 2.9, :func:`~torchaudio.save` relies on
         :func:`~torchaudio.save_with_torchcodec`. Note that some parameters of
         :func:`~torchaudio.save`, like ``format``, ``encoding``,
         ``bits_per_sample``, ``buffer_size``, and ``backend``, are ignored by
         are ignored by :func:`~torchaudio.save_with_torchcodec`.
-    
+
     This function provides a TorchCodec-based alternative to torchaudio.save
     with the same API. TorchCodec's AudioEncoder provides efficient encoding
     with FFmpeg under the hood.
-    
+
     Args:
         uri (path-like object):
             Path to save the audio file. The file extension determines the format.
-            
+
         src (torch.Tensor):
             Audio data to save. Must be a 1D or 2D tensor with float32 values
             in the range [-1, 1]. If 2D, shape should be [channel, time] when
             channels_first=True, or [time, channel] when channels_first=False.
-            
+
         sample_rate (int):
             Sample rate of the audio data.
-            
+
         channels_first (bool, optional):
             Indicates whether the input tensor has channels as the first dimension.
             If True, expects [channel, time]. If False, expects [time, channel].
             Default: True.
-            
+
         format (str or None, optional):
             Audio format hint. Not used by TorchCodec (format is determined by
             file extension). A warning is issued if provided.
             Default: None.
-            
+
         encoding (str or None, optional):
             Audio encoding. Not fully supported by TorchCodec AudioEncoder.
             A warning is issued if provided. Default: None.
-            
+
         bits_per_sample (int or None, optional):
             Bits per sample. Not directly supported by TorchCodec AudioEncoder.
             A warning is issued if provided. Default: None.
-            
+
         buffer_size (int, optional):
             Not used by TorchCodec AudioEncoder. Provided for API compatibility.
             A warning is issued if not default value. Default: 4096.
-            
+
         backend (str or None, optional):
             Not used by TorchCodec AudioEncoder. Provided for API compatibility.
             A warning is issued if provided. Default: None.
-            
+
         compression (float, int or None, optional):
             Compression level or bit rate. Maps to bit_rate parameter in
             TorchCodec AudioEncoder. Default: None.
-            
+
     Raises:
         ImportError: If torchcodec is not available.
         ValueError: If input parameters are invalid.
         RuntimeError: If TorchCodec fails to encode the audio.
-        
+
     Note:
         - TorchCodec AudioEncoder expects float32 samples in [-1, 1] range.
         - Some parameters (format, encoding, bits_per_sample, buffer_size, backend)
@@ -256,7 +256,7 @@ def save_with_torchcodec(
             "TorchCodec is required for save_with_torchcodec. "
             "Please install torchcodec to use this function."
         ) from e
-    
+
     # Parameter validation and warnings
     if format is not None:
         import warnings
@@ -266,49 +266,49 @@ def save_with_torchcodec(
             UserWarning,
             stacklevel=2
         )
-    
+
     if encoding is not None:
         import warnings
         warnings.warn(
             "The 'encoding' parameter is not fully supported by TorchCodec AudioEncoder.",
             UserWarning,
             stacklevel=2
         )
-    
+
     if bits_per_sample is not None:
         import warnings
         warnings.warn(
             "The 'bits_per_sample' parameter is not directly supported by TorchCodec AudioEncoder.",
             UserWarning,
             stacklevel=2
         )
-    
+
     if buffer_size != 4096:
         import warnings
         warnings.warn(
             "The 'buffer_size' parameter is not used by TorchCodec AudioEncoder.",
             UserWarning,
             stacklevel=2
         )
-        
+
     if backend is not None:
         import warnings
         warnings.warn(
             "The 'backend' parameter is not used by TorchCodec AudioEncoder.",
             UserWarning,
             stacklevel=2
         )
-    
+
     # Input validation
     if not isinstance(src, torch.Tensor):
         raise ValueError(f"Expected src to be a torch.Tensor, got {type(src)}")
-    
+
     if src.dtype != torch.float32:
         src = src.float()
-    
+
     if sample_rate <= 0:
         raise ValueError(f"sample_rate must be positive, got {sample_rate}")
-    
+
     # Handle tensor shape and channels_first
     if src.ndim == 1:
         # Convert to 2D: [1, time] for channels_first=True
@@ -324,13 +324,13 @@ def save_with_torchcodec(
             data = src.transpose(0, 1)  # [time, channel] -> [channel, time]
     else:
         raise ValueError(f"Expected 1D or 2D tensor, got {src.ndim}D tensor")
-    
+
     # Create AudioEncoder
     try:
         encoder = AudioEncoder(data, sample_rate=sample_rate)
     except Exception as e:
         raise RuntimeError(f"Failed to create AudioEncoder: {e}") from e
-    
+
     # Determine bit_rate from compression parameter
     bit_rate = None
     if compression is not None:
@@ -344,7 +344,7 @@ def save_with_torchcodec(
                 UserWarning,
                 stacklevel=2
             )
-    
+
     # Save to file
     try:
         encoder.to_file(uri, bit_rate=bit_rate)