Merge branch 'develop' into preload-waveform-pipeline

hbredin · web-flow · commit b2a07e859219 · 2025-11-19T11:43:50.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,9 +2,11 @@
 
 ## next
 
-- feat(pipeline): add `preload` option to base `Pipeline.__call__` to force preloading audio in memory ([@antoinelaurent](https://github.com/antoinelaurent/))
+- BREAKING(util): make `Binarize.__call__` return `string` tracks (instead of `int`) [@benniekiss](https://github.com/benniekiss/)
 - feat(cli): add option to apply pipeline on a directory of audio files
-- improve(util): make `permutate` faster thanks to vectorized cost function ([@joonaskalda](https://github.com/joonaskalda/))
+- feat(pipeline): add `preload` option to base `Pipeline.__call__` to force preloading audio in memory ([@antoinelaurent](https://github.com/antoinelaurent/))
+- feat(pipeline): add `Pipeline.cuda()` convenience method [@tkanarsky](https://github.com/tkanarsky/)
+- improve(util): make `permutate` faster thanks to vectorized cost function
 
 ## Version 4.0.1 (2025-10-10)
 
diff --git a/src/pyannote/audio/core/pipeline.py b/src/pyannote/audio/core/pipeline.py
@@ -21,6 +21,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import annotations
 import os
 import warnings
 from collections import OrderedDict
@@ -476,7 +477,7 @@ def __call__(self, file: AudioFile, preload: bool = False, **kwargs):
 
         return self.apply(file, **kwargs)
 
-    def to(self, device: torch.device):
+    def to(self, device: torch.device) -> Pipeline:
         """Send pipeline to `device`"""
 
         if not isinstance(device, torch.device):
@@ -497,3 +498,14 @@ def to(self, device: torch.device):
         self.device = device
 
         return self
+
+    def cuda(self, device: torch.device | int | None = None) -> Pipeline:
+        """Send pipeline to (optionally specified) cuda device"""
+        if device is None:
+            return self.to(torch.device("cuda"))
+        elif isinstance(device, int):
+            return self.to(torch.device("cuda", device))
+        else:
+            if device.type != "cuda":
+                raise ValueError("Expected CUDA device. Use `Pipeline.to(device)` for other devices.")
+            return self.to(device)
diff --git a/src/pyannote/audio/pipelines/utils/diarization.py b/src/pyannote/audio/pipelines/utils/diarization.py
@@ -215,7 +215,7 @@ def to_annotation(
             min_duration_off=min_duration_off,
         )
 
-        return binarize(discrete_diarization).rename_tracks(generator="string")
+        return binarize(discrete_diarization)
 
     @staticmethod
     def to_diarization(
diff --git a/src/pyannote/audio/utils/signal.py b/src/pyannote/audio/utils/signal.py
@@ -38,7 +38,7 @@
 import numpy as np
 import scipy.signal
 from pyannote.core import Annotation, Segment, SlidingWindowFeature, Timeline
-from pyannote.core.utils.generators import pairwise
+from pyannote.core.utils.generators import pairwise, string_generator
 
 
 @singledispatch
@@ -271,10 +271,11 @@ def __call__(self, scores: SlidingWindowFeature) -> Annotation:
 
         # annotation meant to store 'active' regions
         active = Annotation()
+        track_generator = string_generator()
 
         for k, k_scores in enumerate(scores.data.T):
-
             label = k if scores.labels is None else scores.labels[k]
+            track = next(track_generator)
 
             # initial state
             start = timestamps[0]
@@ -287,7 +288,7 @@ def __call__(self, scores: SlidingWindowFeature) -> Annotation:
                     # switching from active to inactive
                     if y < self.offset:
                         region = Segment(start - self.pad_onset, t + self.pad_offset)
-                        active[region, k] = label
+                        active[region, track] = label
                         start = t
                         is_active = False
 
@@ -301,7 +302,7 @@ def __call__(self, scores: SlidingWindowFeature) -> Annotation:
             # if active at the end, add final region
             if is_active:
                 region = Segment(start - self.pad_onset, t + self.pad_offset)
-                active[region, k] = label
+                active[region, track] = label
 
         # because of padding, some active regions might be overlapping: merge them.
         # also: fill same speaker gaps shorter than min_duration_off

Original file line number	Diff line number	Diff line change
`@@ -215,7 +215,7 @@ def to_annotation(`
`215`	`215`	`min_duration_off=min_duration_off,`
`216`	`216`	`)`
`217`	`217`
`218`		`- return binarize(discrete_diarization).rename_tracks(generator="string")`
	`218`	`+ return binarize(discrete_diarization)`
`219`	`219`
`220`	`220`	`@staticmethod`
`221`	`221`	`def to_diarization(`