From e9901f7ce00a02472e44710e3fd69df190d22fe5 Mon Sep 17 00:00:00 2001 From: antoinelaurent Date: Thu, 6 Nov 2025 17:01:56 +0100 Subject: [PATCH 1/3] Update pipeline.py Allow preloading of the waveform in pipeline call --- src/pyannote/audio/core/pipeline.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/pyannote/audio/core/pipeline.py b/src/pyannote/audio/core/pipeline.py index 079aba5c3..317546296 100644 --- a/src/pyannote/audio/core/pipeline.py +++ b/src/pyannote/audio/core/pipeline.py @@ -406,7 +406,7 @@ def classes(self) -> List | Iterator: """ raise NotImplementedError() - def __call__(self, file: AudioFile, **kwargs): + def __call__(self, file: AudioFile, preload_waveform: bool = False, **kwargs): fix_reproducibility(getattr(self, "device", torch.device("cpu"))) if not self.instantiated: @@ -432,8 +432,23 @@ def __call__(self, file: AudioFile, **kwargs): file = Audio.validate_file(file) + preproc_waveform = False + + # Check if the instance has preprocessors and wrap the file if so if hasattr(self, "preprocessors"): file = ProtocolFile(file, lazy=self.preprocessors) + preproc_waveform = "waveform" in self.preprocessors + + # Load the waveform if requested, unless already available + if preload_waveform: + if preproc_waveform or "waveform" in file: + raise ValueError( + "Cannot preload waveform: it is already loaded in the file or through a preprocessor." + ) + + audio = Audio() + file["waveform"], file["sample_rate"] = audio(file) + file.pop("channel", None) # send file duration to telemetry as well as # requested number of speakers in case of diarization From e882e0b6e05a92ced74c6e80256f5e9b5db87656 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20BREDIN?= Date: Sat, 8 Nov 2025 17:21:04 +0100 Subject: [PATCH 2/3] chore: added docstring and renamed option to `preload` --- src/pyannote/audio/core/pipeline.py | 42 +++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/src/pyannote/audio/core/pipeline.py b/src/pyannote/audio/core/pipeline.py index 317546296..205b33ade 100644 --- a/src/pyannote/audio/core/pipeline.py +++ b/src/pyannote/audio/core/pipeline.py @@ -406,7 +406,23 @@ def classes(self) -> List | Iterator: """ raise NotImplementedError() - def __call__(self, file: AudioFile, preload_waveform: bool = False, **kwargs): + def __call__(self, file: AudioFile, preload: bool = False, **kwargs): + """Validate file, (optionally) load it in memory, then process it + + Parameters + ---------- + file : AudioFile + File to process + preload : bool, optional + Whether to preload waveform before applying the pipeline. + kwargs : keyword arguments, optional + Additional keyword arguments passed to `self.apply(...)` + + Returns + ------- + output : Any + Whatever `self.apply(...)` returns + """ fix_reproducibility(getattr(self, "device", torch.device("cpu"))) if not self.instantiated: @@ -432,22 +448,26 @@ def __call__(self, file: AudioFile, preload_waveform: bool = False, **kwargs): file = Audio.validate_file(file) - preproc_waveform = False - - # Check if the instance has preprocessors and wrap the file if so + # check if the instance has preprocessors and wrap the file if so if hasattr(self, "preprocessors"): file = ProtocolFile(file, lazy=self.preprocessors) - preproc_waveform = "waveform" in self.preprocessors - # Load the waveform if requested, unless already available - if preload_waveform: - if preproc_waveform or "waveform" in file: + # pre-load the audio in memory if requested + if preload: + # raise error if `waveform`` is already in memory (or will be via a preprocessor) + if ( + "waveform" in getattr(self, "preprocessors", dict()) + or "waveform" in file + ): raise ValueError( - "Cannot preload waveform: it is already loaded in the file or through a preprocessor." + "Cannot preload audio: `waveform` key is already available or will be via a preprocessor." ) - audio = Audio() - file["waveform"], file["sample_rate"] = audio(file) + # load waveform in memory (and keep track of its original sample rate) + file["waveform"], file["sample_rate"] = Audio()(file) + + # the above line already took care of channel selection, + # therefore we remove the `channel` key from the file file.pop("channel", None) # send file duration to telemetry as well as From 0a88a7f0b76eeebfc08a5b9fe7585a9117971f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20BREDIN?= Date: Sat, 8 Nov 2025 17:36:09 +0100 Subject: [PATCH 3/3] doc: update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b3f5cb195..ccd02b647 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,9 @@ ## next +- feat(pipeline): add `preload` option to base `Pipeline.__call__` to force preloading audio in memory ([@antoinelaurent](https://github.com/antoinelaurent/)) - feat(cli): add option to apply pipeline on a directory of audio files -- improve(util): make `permutate` faster thanks to vectorized cost function +- improve(util): make `permutate` faster thanks to vectorized cost function ([@joonaskalda](https://github.com/joonaskalda/)) ## Version 4.0.1 (2025-10-10)