Skip to content

Commit b2a07e8

Browse files
authored
Merge branch 'develop' into preload-waveform-pipeline
2 parents 0a88a7f + 05f4907 commit b2a07e8

File tree

4 files changed

+23
-8
lines changed

4 files changed

+23
-8
lines changed

CHANGELOG.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22

33
## next
44

5-
- feat(pipeline): add `preload` option to base `Pipeline.__call__` to force preloading audio in memory ([@antoinelaurent](https://github.com/antoinelaurent/))
5+
- BREAKING(util): make `Binarize.__call__` return `string` tracks (instead of `int`) [@benniekiss](https://github.com/benniekiss/)
66
- feat(cli): add option to apply pipeline on a directory of audio files
7-
- improve(util): make `permutate` faster thanks to vectorized cost function ([@joonaskalda](https://github.com/joonaskalda/))
7+
- feat(pipeline): add `preload` option to base `Pipeline.__call__` to force preloading audio in memory ([@antoinelaurent](https://github.com/antoinelaurent/))
8+
- feat(pipeline): add `Pipeline.cuda()` convenience method [@tkanarsky](https://github.com/tkanarsky/)
9+
- improve(util): make `permutate` faster thanks to vectorized cost function
810

911
## Version 4.0.1 (2025-10-10)
1012

src/pyannote/audio/core/pipeline.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2222
# SOFTWARE.
2323

24+
from __future__ import annotations
2425
import os
2526
import warnings
2627
from collections import OrderedDict
@@ -476,7 +477,7 @@ def __call__(self, file: AudioFile, preload: bool = False, **kwargs):
476477

477478
return self.apply(file, **kwargs)
478479

479-
def to(self, device: torch.device):
480+
def to(self, device: torch.device) -> Pipeline:
480481
"""Send pipeline to `device`"""
481482

482483
if not isinstance(device, torch.device):
@@ -497,3 +498,14 @@ def to(self, device: torch.device):
497498
self.device = device
498499

499500
return self
501+
502+
def cuda(self, device: torch.device | int | None = None) -> Pipeline:
503+
"""Send pipeline to (optionally specified) cuda device"""
504+
if device is None:
505+
return self.to(torch.device("cuda"))
506+
elif isinstance(device, int):
507+
return self.to(torch.device("cuda", device))
508+
else:
509+
if device.type != "cuda":
510+
raise ValueError("Expected CUDA device. Use `Pipeline.to(device)` for other devices.")
511+
return self.to(device)

src/pyannote/audio/pipelines/utils/diarization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ def to_annotation(
215215
min_duration_off=min_duration_off,
216216
)
217217

218-
return binarize(discrete_diarization).rename_tracks(generator="string")
218+
return binarize(discrete_diarization)
219219

220220
@staticmethod
221221
def to_diarization(

src/pyannote/audio/utils/signal.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
import numpy as np
3939
import scipy.signal
4040
from pyannote.core import Annotation, Segment, SlidingWindowFeature, Timeline
41-
from pyannote.core.utils.generators import pairwise
41+
from pyannote.core.utils.generators import pairwise, string_generator
4242

4343

4444
@singledispatch
@@ -271,10 +271,11 @@ def __call__(self, scores: SlidingWindowFeature) -> Annotation:
271271

272272
# annotation meant to store 'active' regions
273273
active = Annotation()
274+
track_generator = string_generator()
274275

275276
for k, k_scores in enumerate(scores.data.T):
276-
277277
label = k if scores.labels is None else scores.labels[k]
278+
track = next(track_generator)
278279

279280
# initial state
280281
start = timestamps[0]
@@ -287,7 +288,7 @@ def __call__(self, scores: SlidingWindowFeature) -> Annotation:
287288
# switching from active to inactive
288289
if y < self.offset:
289290
region = Segment(start - self.pad_onset, t + self.pad_offset)
290-
active[region, k] = label
291+
active[region, track] = label
291292
start = t
292293
is_active = False
293294

@@ -301,7 +302,7 @@ def __call__(self, scores: SlidingWindowFeature) -> Annotation:
301302
# if active at the end, add final region
302303
if is_active:
303304
region = Segment(start - self.pad_onset, t + self.pad_offset)
304-
active[region, k] = label
305+
active[region, track] = label
305306

306307
# because of padding, some active regions might be overlapping: merge them.
307308
# also: fill same speaker gaps shorter than min_duration_off

0 commit comments

Comments
 (0)