Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

## next

- feat(pipeline): add `preload` option to base `Pipeline.__call__` to force preloading audio in memory ([@antoinelaurent](https://github.com/antoinelaurent/))
- feat(cli): add option to apply pipeline on a directory of audio files
- improve(util): make `permutate` faster thanks to vectorized cost function
- improve(util): make `permutate` faster thanks to vectorized cost function ([@joonaskalda](https://github.com/joonaskalda/))

## Version 4.0.1 (2025-10-10)

Expand Down
37 changes: 36 additions & 1 deletion src/pyannote/audio/core/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,23 @@ def classes(self) -> List | Iterator:
"""
raise NotImplementedError()

def __call__(self, file: AudioFile, **kwargs):
def __call__(self, file: AudioFile, preload: bool = False, **kwargs):
"""Validate file, (optionally) load it in memory, then process it

Parameters
----------
file : AudioFile
File to process
preload : bool, optional
Whether to preload waveform before applying the pipeline.
kwargs : keyword arguments, optional
Additional keyword arguments passed to `self.apply(...)`

Returns
-------
output : Any
Whatever `self.apply(...)` returns
"""
fix_reproducibility(getattr(self, "device", torch.device("cpu")))

if not self.instantiated:
Expand All @@ -432,9 +448,28 @@ def __call__(self, file: AudioFile, **kwargs):

file = Audio.validate_file(file)

# check if the instance has preprocessors and wrap the file if so
if hasattr(self, "preprocessors"):
file = ProtocolFile(file, lazy=self.preprocessors)

# pre-load the audio in memory if requested
if preload:
# raise error if `waveform`` is already in memory (or will be via a preprocessor)
if (
"waveform" in getattr(self, "preprocessors", dict())
or "waveform" in file
):
raise ValueError(
"Cannot preload audio: `waveform` key is already available or will be via a preprocessor."
)

# load waveform in memory (and keep track of its original sample rate)
file["waveform"], file["sample_rate"] = Audio()(file)

# the above line already took care of channel selection,
# therefore we remove the `channel` key from the file
file.pop("channel", None)

# send file duration to telemetry as well as
# requested number of speakers in case of diarization
track_pipeline_apply(self, file, **kwargs)
Expand Down