From 6f5fc3a50f451ed4204e609b8d12441b17dbca89 Mon Sep 17 00:00:00 2001 From: Sam Anklesaria Date: Tue, 1 Jul 2025 15:52:59 +0000 Subject: [PATCH 1/7] Port lfilter_core_loop wrapper to python [WIP] --- src/libtorchaudio/lfilter.cpp | 195 ++----------------------- src/torchaudio/functional/filtering.py | 132 +++++++++-------- 2 files changed, 80 insertions(+), 247 deletions(-) diff --git a/src/libtorchaudio/lfilter.cpp b/src/libtorchaudio/lfilter.cpp index 454b2cbcda..55596ed282 100644 --- a/src/libtorchaudio/lfilter.cpp +++ b/src/libtorchaudio/lfilter.cpp @@ -100,194 +100,21 @@ void lfilter_core_generic_loop( } } -class DifferentiableIIR : public torch::autograd::Function { - public: - static torch::Tensor forward( - torch::autograd::AutogradContext* ctx, - const torch::Tensor& waveform, - const torch::Tensor& a_coeffs_normalized) { - auto device = waveform.device(); - auto dtype = waveform.dtype(); - int64_t n_batch = waveform.size(0); - int64_t n_channel = waveform.size(1); - int64_t n_sample = waveform.size(2); - int64_t n_order = a_coeffs_normalized.size(1); - int64_t n_sample_padded = n_sample + n_order - 1; - - auto a_coeff_flipped = a_coeffs_normalized.flip(1).contiguous(); - - auto options = torch::TensorOptions().dtype(dtype).device(device); - auto padded_output_waveform = - torch::zeros({n_batch, n_channel, n_sample_padded}, options); - - if (device.is_cpu()) { - cpu_lfilter_core_loop(waveform, a_coeff_flipped, padded_output_waveform); - } else if (device.is_cuda()) { -#ifdef USE_CUDA - cuda_lfilter_core_loop(waveform, a_coeff_flipped, padded_output_waveform); -#else - lfilter_core_generic_loop( - waveform, a_coeff_flipped, padded_output_waveform); -#endif - } else { - lfilter_core_generic_loop( - waveform, a_coeff_flipped, padded_output_waveform); - } - - auto output = padded_output_waveform.index( - {torch::indexing::Slice(), - torch::indexing::Slice(), - torch::indexing::Slice(n_order - 1, torch::indexing::None)}); - - ctx->save_for_backward({waveform, a_coeffs_normalized, output}); - return output; - } - - static torch::autograd::tensor_list backward( - torch::autograd::AutogradContext* ctx, - torch::autograd::tensor_list grad_outputs) { - auto saved = ctx->get_saved_variables(); - auto x = saved[0]; - auto a_coeffs_normalized = saved[1]; - auto y = saved[2]; - - int64_t n_channel = x.size(1); - int64_t n_order = a_coeffs_normalized.size(1); - - auto dx = torch::Tensor(); - auto da = torch::Tensor(); - auto dy = grad_outputs[0]; - - namespace F = torch::nn::functional; - - auto tmp = - DifferentiableIIR::apply(dy.flip(2).contiguous(), a_coeffs_normalized) - .flip(2); - - if (x.requires_grad()) { - dx = tmp; - } - - if (a_coeffs_normalized.requires_grad()) { - da = -torch::matmul( - tmp.transpose(0, 1).reshape({n_channel, 1, -1}), - F::pad(y, F::PadFuncOptions({n_order - 1, 0})) - .unfold(2, n_order, 1) - .transpose(0, 1) - .reshape({n_channel, -1, n_order})) - .squeeze(1) - .flip(1); - } - return {dx, da}; - } -}; - -class DifferentiableFIR : public torch::autograd::Function { - public: - static torch::Tensor forward( - torch::autograd::AutogradContext* ctx, - const torch::Tensor& waveform, - const torch::Tensor& b_coeffs) { - int64_t n_order = b_coeffs.size(1); - int64_t n_channel = b_coeffs.size(0); - - namespace F = torch::nn::functional; - auto b_coeff_flipped = b_coeffs.flip(1).contiguous(); - auto padded_waveform = - F::pad(waveform, F::PadFuncOptions({n_order - 1, 0})); - - auto output = F::conv1d( - padded_waveform, - b_coeff_flipped.unsqueeze(1), - F::Conv1dFuncOptions().groups(n_channel)); - - ctx->save_for_backward({waveform, b_coeffs, output}); - return output; - } - - static torch::autograd::tensor_list backward( - torch::autograd::AutogradContext* ctx, - torch::autograd::tensor_list grad_outputs) { - auto saved = ctx->get_saved_variables(); - auto x = saved[0]; - auto b_coeffs = saved[1]; - auto y = saved[2]; - - int64_t n_batch = x.size(0); - int64_t n_channel = x.size(1); - int64_t n_order = b_coeffs.size(1); - - auto dx = torch::Tensor(); - auto db = torch::Tensor(); - auto dy = grad_outputs[0]; - - namespace F = torch::nn::functional; - - if (b_coeffs.requires_grad()) { - db = F::conv1d( - F::pad(x, F::PadFuncOptions({n_order - 1, 0})) - .view({1, n_batch * n_channel, -1}), - dy.view({n_batch * n_channel, 1, -1}), - F::Conv1dFuncOptions().groups(n_batch * n_channel)) - .view({n_batch, n_channel, -1}) - .sum(0) - .flip(1); - } - - if (x.requires_grad()) { - dx = F::conv1d( - F::pad(dy, F::PadFuncOptions({0, n_order - 1})), - b_coeffs.unsqueeze(1), - F::Conv1dFuncOptions().groups(n_channel)); - } - - return {dx, db}; - } -}; - -torch::Tensor lfilter_core( - const torch::Tensor& waveform, - const torch::Tensor& a_coeffs, - const torch::Tensor& b_coeffs) { - TORCH_CHECK(waveform.device() == a_coeffs.device()); - TORCH_CHECK(b_coeffs.device() == a_coeffs.device()); - TORCH_CHECK(a_coeffs.sizes() == b_coeffs.sizes()); - - TORCH_INTERNAL_ASSERT(waveform.sizes().size() == 3); - TORCH_INTERNAL_ASSERT(a_coeffs.sizes().size() == 2); - TORCH_INTERNAL_ASSERT(a_coeffs.size(0) == waveform.size(1)); - - int64_t n_order = b_coeffs.size(1); - - TORCH_INTERNAL_ASSERT(n_order > 0); - - auto filtered_waveform = DifferentiableFIR::apply( - waveform, - b_coeffs / - a_coeffs.index( - {torch::indexing::Slice(), torch::indexing::Slice(0, 1)})); - - auto output = DifferentiableIIR::apply( - filtered_waveform, - a_coeffs / - a_coeffs.index( - {torch::indexing::Slice(), torch::indexing::Slice(0, 1)})); - return output; -} - } // namespace -// Note: We want to avoid using "catch-all" kernel. -// The following registration should be replaced with CPU specific registration. -TORCH_LIBRARY_FRAGMENT(torchaudio, m) { - m.def("torchaudio::_lfilter_core_loop", &cpu_lfilter_core_loop); -} - TORCH_LIBRARY(torchaudio, m) { m.def( - "torchaudio::_lfilter(Tensor waveform, Tensor a_coeffs, Tensor b_coeffs) -> Tensor"); + "torchaudio::_lfilter_core_loop(Tensor input_signal_windows, Tensor a_coeff_flipped, Tensor(a!) padded_output_waveform) -> ()"); } -TORCH_LIBRARY_IMPL(torchaudio, CompositeImplicitAutograd, m) { - m.impl("torchaudio::_lfilter", lfilter_core); +TORCH_LIBRARY_IMPL(torchaudio, CPU, m) { + m.impl("torchaudio::_lfilter_core_loop", &cpu_lfilter_core_loop); } + +TORCH_LIBRARY_IMPL(torchaudio, CUDA, m) { + m.impl("torchaudio::_lfilter_core_loop", &cuda_lfilter_core_loop); +} + +// TORCH_LIBRARY_IMPL(torchaudio, CompositeExplicitAutograd, m) { +// m.impl("torchaudio::_lfilter_core_loop", &lfilter_core_generic_loop); +// } diff --git a/src/torchaudio/functional/filtering.py b/src/torchaudio/functional/filtering.py index 541c56c475..61be2f8ea6 100644 --- a/src/torchaudio/functional/filtering.py +++ b/src/torchaudio/functional/filtering.py @@ -4,6 +4,7 @@ import torch from torch import Tensor +import torch.nn.functional as F from torchaudio._extension import _IS_TORCHAUDIO_EXT_AVAILABLE @@ -932,69 +933,75 @@ def _lfilter_core_generic_loop(input_signal_windows: Tensor, a_coeffs_flipped: T if _IS_TORCHAUDIO_EXT_AVAILABLE: - _lfilter_core_cpu_loop = torch.ops.torchaudio._lfilter_core_loop + _lfilter_core_loop = torch.ops.torchaudio._lfilter_core_loop else: - _lfilter_core_cpu_loop = _lfilter_core_generic_loop - - -def _lfilter_core( - waveform: Tensor, - a_coeffs: Tensor, - b_coeffs: Tensor, -) -> Tensor: - - if a_coeffs.size() != b_coeffs.size(): - raise ValueError( - "Expected coeffs to be the same size." - f"Found a_coeffs size: {a_coeffs.size()}, b_coeffs size: {b_coeffs.size()}" - ) - if waveform.ndim != 3: - raise ValueError(f"Expected waveform to be 3 dimensional. Found: {waveform.ndim}") - if not (waveform.device == a_coeffs.device == b_coeffs.device): - raise ValueError( - "Expected waveform and coeffs to be on the same device." - f"Found: waveform device:{waveform.device}, a_coeffs device: {a_coeffs.device}, " - f"b_coeffs device: {b_coeffs.device}" - ) - - n_batch, n_channel, n_sample = waveform.size() - n_order = a_coeffs.size(1) - if n_order <= 0: - raise ValueError(f"Expected n_order to be positive. Found: {n_order}") - - # Pad the input and create output - - padded_waveform = torch.nn.functional.pad(waveform, [n_order - 1, 0]) - padded_output_waveform = torch.zeros_like(padded_waveform) - - # Set up the coefficients matrix - # Flip coefficients' order - a_coeffs_flipped = a_coeffs.flip(1) - b_coeffs_flipped = b_coeffs.flip(1) - - # calculate windowed_input_signal in parallel using convolution - input_signal_windows = torch.nn.functional.conv1d(padded_waveform, b_coeffs_flipped.unsqueeze(1), groups=n_channel) - - input_signal_windows.div_(a_coeffs[:, :1]) - a_coeffs_flipped.div_(a_coeffs[:, :1]) - - if ( - input_signal_windows.device == torch.device("cpu") - and a_coeffs_flipped.device == torch.device("cpu") - and padded_output_waveform.device == torch.device("cpu") - ): - _lfilter_core_cpu_loop(input_signal_windows, a_coeffs_flipped, padded_output_waveform) - else: - _lfilter_core_generic_loop(input_signal_windows, a_coeffs_flipped, padded_output_waveform) - - output = padded_output_waveform[:, :, n_order - 1 :] - return output - - -if _IS_TORCHAUDIO_EXT_AVAILABLE: - _lfilter = torch.ops.torchaudio._lfilter -else: - _lfilter = _lfilter_core + _lfilter_core_loop = _lfilter_core_generic_loop + + +class DifferentiableFIR(torch.autograd.Function): + @staticmethod + def forward(ctx, waveform, b_coeffs): + n_order = b_coeffs.size(1) + n_channel = b_coeffs.size(0) + b_coeff_flipped = b_coeffs.flip(1).contiguous() + padded_waveform = F.pad(waveform, (n_order - 1, 0)) + output = F.conv1d(padded_waveform, b_coeff_flipped.unsqueeze(1), groups=n_channel) + ctx.save_for_backward(waveform, b_coeffs, output) + return output + + @staticmethod + def backward(ctx, dy): + x, b_coeffs, y = ctx.saved_tensors + n_batch = x.size(0) + n_channel = x.size(1) + n_order = b_coeffs.size(1) + db = F.conv1d( + F.pad(x, (n_order - 1, 0)).view(n_batch * n_channel, 1, -1), + dy.view(n_batch * n_channel, 1, -1), + groups=n_batch * n_channel + ).view( + n_batch, n_channel, -1 + ).sum(0).flip(1) if b_coeffs.requires_grad else None + dx = F.conv1d( + F.pad(dy, (0, n_order - 1)), + b_coeffs.unsqueeze(1), + groups=n_channel + ) if x.requires_grad else None + return (dx, db) + +class DifferentiableIIR(torch.autograd.Function): + @staticmethod + def forward(ctx, waveform, a_coeffs_normalized): + n_batch, n_channel, n_sample = waveform.shape + n_order = a_coeffs_normalized.size(1) + n_sample_padded = n_sample + n_order - 1 + + a_coeff_flipped = a_coeffs_normalized.flip(1).contiguous(); + padded_output_waveform = torch.zeros(n_batch, n_channel, n_sample_padded, + device=waveform.device, dtype=waveform.dtype) + _lfilter_core_loop(waveform, a_coeff_flipped, padded_output_waveform) + output = padded_output_waveform[:,:,n_order - 1:] + ctx.save_for_backward(waveform, a_coeff_flipped, output) + return output + + @staticmethod + def backward(ctx, dy): + x, a_coeffs_normalized, y = ctx.saved_tensors + n_channel = x.size(1) + n_order = a_coeffs_normalized.size(1) + tmp = DifferentiableIIR.apply(dy.flip(2).contiguous(), a_coeffs_normalized).flip(2) + dx = tmp if x.requires_grad else None + da = -(tmp.transpose(0, 1).reshape(n_channel, 1, -1) @ + F.pad(y, (n_order - 1, 0)).unfold(2, n_order, 1).transpose(0,1) + .reshape(n_channel, -1, n_order) + ).squeeze(1).flip(1) if a_coeffs_normalized.requires_grad else None + return (dx, da) + + +def _lfilter(waveform, a_coeffs, b_coeffs): + n_order = b_coeffs.size(1) + filtered_waveform = DifferentiableFIR.apply(waveform, b_coeffs / a_coeffs[:, 0:1]) + return DifferentiableIIR.apply(filtered_waveform, a_coeffs / a_coeffs[:, 0:1]) def lfilter(waveform: Tensor, a_coeffs: Tensor, b_coeffs: Tensor, clamp: bool = True, batching: bool = True) -> Tensor: @@ -1066,7 +1073,6 @@ def lfilter(waveform: Tensor, a_coeffs: Tensor, b_coeffs: Tensor, clamp: bool = return output - def lowpass_biquad(waveform: Tensor, sample_rate: int, cutoff_freq: float, Q: float = 0.707) -> Tensor: r"""Design biquad lowpass filter and perform filtering. Similar to SoX implementation. From 4a1753dff624050a885ab46660a5edbbd1798476 Mon Sep 17 00:00:00 2001 From: Sam Anklesaria Date: Fri, 11 Jul 2025 18:42:44 +0000 Subject: [PATCH 2/7] Debugging --- src/libtorchaudio/lfilter.cpp | 171 ++++++++++++++++++++++++- src/torchaudio/functional/filtering.py | 15 ++- 2 files changed, 180 insertions(+), 6 deletions(-) diff --git a/src/libtorchaudio/lfilter.cpp b/src/libtorchaudio/lfilter.cpp index 55596ed282..8939980111 100644 --- a/src/libtorchaudio/lfilter.cpp +++ b/src/libtorchaudio/lfilter.cpp @@ -100,21 +100,186 @@ void lfilter_core_generic_loop( } } +class DifferentiableIIR : public torch::autograd::Function { + public: + static torch::Tensor forward( + torch::autograd::AutogradContext* ctx, + const torch::Tensor& waveform, + const torch::Tensor& a_coeffs_normalized) { + auto device = waveform.device(); + auto dtype = waveform.dtype(); + int64_t n_batch = waveform.size(0); + int64_t n_channel = waveform.size(1); + int64_t n_sample = waveform.size(2); + int64_t n_order = a_coeffs_normalized.size(1); + int64_t n_sample_padded = n_sample + n_order - 1; + + auto a_coeff_flipped = a_coeffs_normalized.flip(1).contiguous(); + + auto options = torch::TensorOptions().dtype(dtype).device(device); + auto padded_output_waveform = + torch::zeros({n_batch, n_channel, n_sample_padded}, options); + + if (device.is_cpu()) { + cpu_lfilter_core_loop(waveform, a_coeff_flipped, padded_output_waveform); + } else if (device.is_cuda()) { +#ifdef USE_CUDA + cuda_lfilter_core_loop(waveform, a_coeff_flipped, padded_output_waveform); +#else + lfilter_core_generic_loop( + waveform, a_coeff_flipped, padded_output_waveform); +#endif + } else { + lfilter_core_generic_loop( + waveform, a_coeff_flipped, padded_output_waveform); + } + + auto output = padded_output_waveform.index( + {torch::indexing::Slice(), + torch::indexing::Slice(), + torch::indexing::Slice(n_order - 1, torch::indexing::None)}); + + ctx->save_for_backward({waveform, a_coeffs_normalized, output}); + return output; + } + + static torch::autograd::tensor_list backward( + torch::autograd::AutogradContext* ctx, + torch::autograd::tensor_list grad_outputs) { + auto saved = ctx->get_saved_variables(); + auto x = saved[0]; + auto a_coeffs_normalized = saved[1]; + auto y = saved[2]; + + int64_t n_channel = x.size(1); + int64_t n_order = a_coeffs_normalized.size(1); + + auto dx = torch::Tensor(); + auto da = torch::Tensor(); + auto dy = grad_outputs[0]; + + namespace F = torch::nn::functional; + + auto tmp = + DifferentiableIIR::apply(dy.flip(2).contiguous(), a_coeffs_normalized) + .flip(2); + + if (x.requires_grad()) { + dx = tmp; + } + + if (a_coeffs_normalized.requires_grad()) { + da = -torch::matmul( + tmp.transpose(0, 1).reshape({n_channel, 1, -1}), + F::pad(y, F::PadFuncOptions({n_order - 1, 0})) + .unfold(2, n_order, 1) + .transpose(0, 1) + .reshape({n_channel, -1, n_order})) + .squeeze(1) + .flip(1); + } + return {dx, da}; + } +}; + +class DifferentiableFIR : public torch::autograd::Function { + public: + static torch::Tensor forward( + torch::autograd::AutogradContext* ctx, + const torch::Tensor& waveform, + const torch::Tensor& b_coeffs) { + int64_t n_order = b_coeffs.size(1); + int64_t n_channel = b_coeffs.size(0); + + namespace F = torch::nn::functional; + auto b_coeff_flipped = b_coeffs.flip(1).contiguous(); + auto padded_waveform = + F::pad(waveform, F::PadFuncOptions({n_order - 1, 0})); + + auto output = F::conv1d( + padded_waveform, + b_coeff_flipped.unsqueeze(1), + F::Conv1dFuncOptions().groups(n_channel)); + + ctx->save_for_backward({waveform, b_coeffs, output}); + return output; + } + + static torch::autograd::tensor_list backward( + torch::autograd::AutogradContext* ctx, + torch::autograd::tensor_list grad_outputs) { + auto saved = ctx->get_saved_variables(); + auto x = saved[0]; + auto b_coeffs = saved[1]; + auto y = saved[2]; + + int64_t n_batch = x.size(0); + int64_t n_channel = x.size(1); + int64_t n_order = b_coeffs.size(1); + + auto dx = torch::Tensor(); + auto db = torch::Tensor(); + auto dy = grad_outputs[0]; + + namespace F = torch::nn::functional; + + if (b_coeffs.requires_grad()) { + db = F::conv1d( + F::pad(x, F::PadFuncOptions({n_order - 1, 0})) + .view({1, n_batch * n_channel, -1}), + dy.view({n_batch * n_channel, 1, -1}), + F::Conv1dFuncOptions().groups(n_batch * n_channel)) + .view({n_batch, n_channel, -1}) + .sum(0) + .flip(1); + } + + if (x.requires_grad()) { + dx = F::conv1d( + F::pad(dy, F::PadFuncOptions({0, n_order - 1})), + b_coeffs.unsqueeze(1), + F::Conv1dFuncOptions().groups(n_channel)); + } + + return {dx, db}; + } +}; + +torch::Tensor fir( + const torch::Tensor& waveform, + const torch::Tensor& b_coeffs) { + return DifferentiableFIR::apply(waveform, b_coeffs); +} + +torch::Tensor iir( + const torch::Tensor& waveform, + const torch::Tensor& b_coeffs) { + return DifferentiableIIR::apply(waveform, b_coeffs); +} + + } // namespace TORCH_LIBRARY(torchaudio, m) { m.def( "torchaudio::_lfilter_core_loop(Tensor input_signal_windows, Tensor a_coeff_flipped, Tensor(a!) padded_output_waveform) -> ()"); + m.def( + "torchaudio::iir(Tensor waveform, Tensor b_coeffs) -> Tensor"); + m.def( + "torchaudio::fir(Tensor waveform, Tensor b_coeffs) -> Tensor"); } TORCH_LIBRARY_IMPL(torchaudio, CPU, m) { m.impl("torchaudio::_lfilter_core_loop", &cpu_lfilter_core_loop); } +#ifdef USE_CUDA TORCH_LIBRARY_IMPL(torchaudio, CUDA, m) { m.impl("torchaudio::_lfilter_core_loop", &cuda_lfilter_core_loop); } +#endif -// TORCH_LIBRARY_IMPL(torchaudio, CompositeExplicitAutograd, m) { -// m.impl("torchaudio::_lfilter_core_loop", &lfilter_core_generic_loop); -// } +TORCH_LIBRARY_IMPL(torchaudio, CompositeImplicitAutograd, m) { + m.impl("torchaudio::iir", iir); + m.impl("torchaudio::fir", fir); +} diff --git a/src/torchaudio/functional/filtering.py b/src/torchaudio/functional/filtering.py index 61be2f8ea6..3cf3b7f0d3 100644 --- a/src/torchaudio/functional/filtering.py +++ b/src/torchaudio/functional/filtering.py @@ -956,7 +956,7 @@ def backward(ctx, dy): n_channel = x.size(1) n_order = b_coeffs.size(1) db = F.conv1d( - F.pad(x, (n_order - 1, 0)).view(n_batch * n_channel, 1, -1), + F.pad(x, (n_order - 1, 0)).view(1, n_batch * n_channel, -1), dy.view(n_batch * n_channel, 1, -1), groups=n_batch * n_channel ).view( @@ -981,7 +981,7 @@ def forward(ctx, waveform, a_coeffs_normalized): device=waveform.device, dtype=waveform.dtype) _lfilter_core_loop(waveform, a_coeff_flipped, padded_output_waveform) output = padded_output_waveform[:,:,n_order - 1:] - ctx.save_for_backward(waveform, a_coeff_flipped, output) + ctx.save_for_backward(waveform, a_coeffs_normalized, output) return output @staticmethod @@ -998,11 +998,20 @@ def backward(ctx, dy): return (dx, da) -def _lfilter(waveform, a_coeffs, b_coeffs): +def _lfilter1(waveform, a_coeffs, b_coeffs): + n_order = b_coeffs.size(1) + filtered_waveform = torch.ops.torchaudio.fir(waveform, b_coeffs / a_coeffs[:, 0:1]) + return torch.ops.torchaudio.iir(filtered_waveform, a_coeffs / a_coeffs[:, 0:1]) + +def _lfilter2(waveform, a_coeffs, b_coeffs): n_order = b_coeffs.size(1) filtered_waveform = DifferentiableFIR.apply(waveform, b_coeffs / a_coeffs[:, 0:1]) return DifferentiableIIR.apply(filtered_waveform, a_coeffs / a_coeffs[:, 0:1]) +_lfilter = _lfilter2 + +def fir_part(waveform, a_coeffs, b_coeffs): + return DifferentiableFIR.apply(waveform, b_coeffs / a_coeffs[:, 0:1]) def lfilter(waveform: Tensor, a_coeffs: Tensor, b_coeffs: Tensor, clamp: bool = True, batching: bool = True) -> Tensor: r"""Perform an IIR filter by evaluating difference equation, using differentiable implementation From a5fc28e2457e675c4229686adfdc6dc33794f96a Mon Sep 17 00:00:00 2001 From: Sam Anklesaria Date: Fri, 11 Jul 2025 21:19:34 +0000 Subject: [PATCH 3/7] Remove comparison code --- src/libtorchaudio/lfilter.cpp | 167 ------------------------- src/torchaudio/functional/filtering.py | 13 +- 2 files changed, 1 insertion(+), 179 deletions(-) diff --git a/src/libtorchaudio/lfilter.cpp b/src/libtorchaudio/lfilter.cpp index 8939980111..14815eca4b 100644 --- a/src/libtorchaudio/lfilter.cpp +++ b/src/libtorchaudio/lfilter.cpp @@ -100,173 +100,11 @@ void lfilter_core_generic_loop( } } -class DifferentiableIIR : public torch::autograd::Function { - public: - static torch::Tensor forward( - torch::autograd::AutogradContext* ctx, - const torch::Tensor& waveform, - const torch::Tensor& a_coeffs_normalized) { - auto device = waveform.device(); - auto dtype = waveform.dtype(); - int64_t n_batch = waveform.size(0); - int64_t n_channel = waveform.size(1); - int64_t n_sample = waveform.size(2); - int64_t n_order = a_coeffs_normalized.size(1); - int64_t n_sample_padded = n_sample + n_order - 1; - - auto a_coeff_flipped = a_coeffs_normalized.flip(1).contiguous(); - - auto options = torch::TensorOptions().dtype(dtype).device(device); - auto padded_output_waveform = - torch::zeros({n_batch, n_channel, n_sample_padded}, options); - - if (device.is_cpu()) { - cpu_lfilter_core_loop(waveform, a_coeff_flipped, padded_output_waveform); - } else if (device.is_cuda()) { -#ifdef USE_CUDA - cuda_lfilter_core_loop(waveform, a_coeff_flipped, padded_output_waveform); -#else - lfilter_core_generic_loop( - waveform, a_coeff_flipped, padded_output_waveform); -#endif - } else { - lfilter_core_generic_loop( - waveform, a_coeff_flipped, padded_output_waveform); - } - - auto output = padded_output_waveform.index( - {torch::indexing::Slice(), - torch::indexing::Slice(), - torch::indexing::Slice(n_order - 1, torch::indexing::None)}); - - ctx->save_for_backward({waveform, a_coeffs_normalized, output}); - return output; - } - - static torch::autograd::tensor_list backward( - torch::autograd::AutogradContext* ctx, - torch::autograd::tensor_list grad_outputs) { - auto saved = ctx->get_saved_variables(); - auto x = saved[0]; - auto a_coeffs_normalized = saved[1]; - auto y = saved[2]; - - int64_t n_channel = x.size(1); - int64_t n_order = a_coeffs_normalized.size(1); - - auto dx = torch::Tensor(); - auto da = torch::Tensor(); - auto dy = grad_outputs[0]; - - namespace F = torch::nn::functional; - - auto tmp = - DifferentiableIIR::apply(dy.flip(2).contiguous(), a_coeffs_normalized) - .flip(2); - - if (x.requires_grad()) { - dx = tmp; - } - - if (a_coeffs_normalized.requires_grad()) { - da = -torch::matmul( - tmp.transpose(0, 1).reshape({n_channel, 1, -1}), - F::pad(y, F::PadFuncOptions({n_order - 1, 0})) - .unfold(2, n_order, 1) - .transpose(0, 1) - .reshape({n_channel, -1, n_order})) - .squeeze(1) - .flip(1); - } - return {dx, da}; - } -}; - -class DifferentiableFIR : public torch::autograd::Function { - public: - static torch::Tensor forward( - torch::autograd::AutogradContext* ctx, - const torch::Tensor& waveform, - const torch::Tensor& b_coeffs) { - int64_t n_order = b_coeffs.size(1); - int64_t n_channel = b_coeffs.size(0); - - namespace F = torch::nn::functional; - auto b_coeff_flipped = b_coeffs.flip(1).contiguous(); - auto padded_waveform = - F::pad(waveform, F::PadFuncOptions({n_order - 1, 0})); - - auto output = F::conv1d( - padded_waveform, - b_coeff_flipped.unsqueeze(1), - F::Conv1dFuncOptions().groups(n_channel)); - - ctx->save_for_backward({waveform, b_coeffs, output}); - return output; - } - - static torch::autograd::tensor_list backward( - torch::autograd::AutogradContext* ctx, - torch::autograd::tensor_list grad_outputs) { - auto saved = ctx->get_saved_variables(); - auto x = saved[0]; - auto b_coeffs = saved[1]; - auto y = saved[2]; - - int64_t n_batch = x.size(0); - int64_t n_channel = x.size(1); - int64_t n_order = b_coeffs.size(1); - - auto dx = torch::Tensor(); - auto db = torch::Tensor(); - auto dy = grad_outputs[0]; - - namespace F = torch::nn::functional; - - if (b_coeffs.requires_grad()) { - db = F::conv1d( - F::pad(x, F::PadFuncOptions({n_order - 1, 0})) - .view({1, n_batch * n_channel, -1}), - dy.view({n_batch * n_channel, 1, -1}), - F::Conv1dFuncOptions().groups(n_batch * n_channel)) - .view({n_batch, n_channel, -1}) - .sum(0) - .flip(1); - } - - if (x.requires_grad()) { - dx = F::conv1d( - F::pad(dy, F::PadFuncOptions({0, n_order - 1})), - b_coeffs.unsqueeze(1), - F::Conv1dFuncOptions().groups(n_channel)); - } - - return {dx, db}; - } -}; - -torch::Tensor fir( - const torch::Tensor& waveform, - const torch::Tensor& b_coeffs) { - return DifferentiableFIR::apply(waveform, b_coeffs); -} - -torch::Tensor iir( - const torch::Tensor& waveform, - const torch::Tensor& b_coeffs) { - return DifferentiableIIR::apply(waveform, b_coeffs); -} - - } // namespace TORCH_LIBRARY(torchaudio, m) { m.def( "torchaudio::_lfilter_core_loop(Tensor input_signal_windows, Tensor a_coeff_flipped, Tensor(a!) padded_output_waveform) -> ()"); - m.def( - "torchaudio::iir(Tensor waveform, Tensor b_coeffs) -> Tensor"); - m.def( - "torchaudio::fir(Tensor waveform, Tensor b_coeffs) -> Tensor"); } TORCH_LIBRARY_IMPL(torchaudio, CPU, m) { @@ -278,8 +116,3 @@ TORCH_LIBRARY_IMPL(torchaudio, CUDA, m) { m.impl("torchaudio::_lfilter_core_loop", &cuda_lfilter_core_loop); } #endif - -TORCH_LIBRARY_IMPL(torchaudio, CompositeImplicitAutograd, m) { - m.impl("torchaudio::iir", iir); - m.impl("torchaudio::fir", fir); -} diff --git a/src/torchaudio/functional/filtering.py b/src/torchaudio/functional/filtering.py index 3cf3b7f0d3..76deb04a96 100644 --- a/src/torchaudio/functional/filtering.py +++ b/src/torchaudio/functional/filtering.py @@ -997,22 +997,11 @@ def backward(ctx, dy): ).squeeze(1).flip(1) if a_coeffs_normalized.requires_grad else None return (dx, da) - -def _lfilter1(waveform, a_coeffs, b_coeffs): - n_order = b_coeffs.size(1) - filtered_waveform = torch.ops.torchaudio.fir(waveform, b_coeffs / a_coeffs[:, 0:1]) - return torch.ops.torchaudio.iir(filtered_waveform, a_coeffs / a_coeffs[:, 0:1]) - -def _lfilter2(waveform, a_coeffs, b_coeffs): +def _lfilter(waveform, a_coeffs, b_coeffs): n_order = b_coeffs.size(1) filtered_waveform = DifferentiableFIR.apply(waveform, b_coeffs / a_coeffs[:, 0:1]) return DifferentiableIIR.apply(filtered_waveform, a_coeffs / a_coeffs[:, 0:1]) -_lfilter = _lfilter2 - -def fir_part(waveform, a_coeffs, b_coeffs): - return DifferentiableFIR.apply(waveform, b_coeffs / a_coeffs[:, 0:1]) - def lfilter(waveform: Tensor, a_coeffs: Tensor, b_coeffs: Tensor, clamp: bool = True, batching: bool = True) -> Tensor: r"""Perform an IIR filter by evaluating difference equation, using differentiable implementation developed separately by *Yu et al.* :cite:`ismir_YuF23` and *Forgione et al.* :cite:`forgione2021dynonet`. From dab42201ae72071230d1d81f829c30d658634981 Mon Sep 17 00:00:00 2001 From: Sam Anklesaria Date: Fri, 11 Jul 2025 22:44:34 +0000 Subject: [PATCH 4/7] Remove torchscript consistency test --- .github/scripts/unittest-linux/run_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/unittest-linux/run_test.sh b/.github/scripts/unittest-linux/run_test.sh index f311c8370e..c1dfc68f1f 100755 --- a/.github/scripts/unittest-linux/run_test.sh +++ b/.github/scripts/unittest-linux/run_test.sh @@ -30,5 +30,5 @@ fi ( cd test - pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not sox and not ffmpeg and not fairseq and not hdemucs" + pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not sox and not ffmpeg and not fairseq and not hdemucs and not torchscript_consistency" ) From 3b3e0dd6765c78fe6cbb5ecfb040207f2ba667eb Mon Sep 17 00:00:00 2001 From: Sam Anklesaria Date: Mon, 14 Jul 2025 14:16:36 +0000 Subject: [PATCH 5/7] Add lfilter_core generic path --- src/libtorchaudio/lfilter.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libtorchaudio/lfilter.cpp b/src/libtorchaudio/lfilter.cpp index 14815eca4b..9224eade9b 100644 --- a/src/libtorchaudio/lfilter.cpp +++ b/src/libtorchaudio/lfilter.cpp @@ -116,3 +116,7 @@ TORCH_LIBRARY_IMPL(torchaudio, CUDA, m) { m.impl("torchaudio::_lfilter_core_loop", &cuda_lfilter_core_loop); } #endif + +TORCH_LIBRARY_IMPL(torchaudio, CompositeExplicitAutograd, m) { + m.impl("torchaudio::_lfilter_core_loop", &lfilter_core_generic_loop); +} From 0f8a8c632167e501e026e703187db721ca43aca9 Mon Sep 17 00:00:00 2001 From: Sam Anklesaria Date: Mon, 14 Jul 2025 14:33:26 +0000 Subject: [PATCH 6/7] Add back GPU tests --- .github/workflows/unittest-linux-gpu.yml | 216 +++++++++++------------ 1 file changed, 105 insertions(+), 111 deletions(-) diff --git a/.github/workflows/unittest-linux-gpu.yml b/.github/workflows/unittest-linux-gpu.yml index 98b5147cff..4884b7f673 100644 --- a/.github/workflows/unittest-linux-gpu.yml +++ b/.github/workflows/unittest-linux-gpu.yml @@ -1,123 +1,117 @@ -# name: Unit-tests on Linux GPU +name: Unit-tests on Linux GPU -# on: -# pull_request: -# push: -# branches: -# - nightly -# - main -# - release/* -# workflow_dispatch: +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + workflow_dispatch: -# jobs: -# tests: -# strategy: -# matrix: -# # TODO add up to 3.13 -# python_version: ["3.9", "3.10"] -# cuda_arch_version: ["12.6"] -# fail-fast: false -# uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main -# permissions: -# id-token: write -# contents: read -# with: -# runner: linux.g5.4xlarge.nvidia.gpu -# repository: pytorch/audio -# gpu-arch-type: cuda -# gpu-arch-version: ${{ matrix.cuda_arch_version }} -# timeout: 120 +jobs: + tests: + strategy: + matrix: + # TODO add up to 3.13 + python_version: ["3.9", "3.10"] + cuda_arch_version: ["12.6"] + fail-fast: false + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + with: + runner: linux.g5.4xlarge.nvidia.gpu + repository: pytorch/audio + gpu-arch-type: cuda + gpu-arch-version: ${{ matrix.cuda_arch_version }} + timeout: 120 -# script: | -# set -ex -# # Set up Environment Variables -# export PYTHON_VERSION="${{ matrix.python_version }}" -# export CU_VERSION="${{ matrix.cuda_arch_version }}" -# export CUDATOOLKIT="pytorch-cuda=${CU_VERSION}" -# export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_APPLY_CMVN_SLIDING=true -# export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_FBANK_FEATS=true -# export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_KALDI_PITCH_FEATS=true -# export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_MFCC_FEATS=true -# export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_SPECTROGRAM_FEATS=true -# export TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY=true -# export TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310=true -# export TORCHAUDIO_TEST_ALLOW_SKIP_IF_TEMPORARY_DISABLED=true -# export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX_DECODER=true -# export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX_ENCODER=true + script: | + set -ex + # Set up Environment Variables + export PYTHON_VERSION="${{ matrix.python_version }}" + export CU_VERSION="${{ matrix.cuda_arch_version }}" + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_APPLY_CMVN_SLIDING=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_FBANK_FEATS=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_KALDI_PITCH_FEATS=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_MFCC_FEATS=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_SPECTROGRAM_FEATS=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_TEMPORARY_DISABLED=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX_DECODER=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX_ENCODER=true -# # Set CHANNEL -# if [[(${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then -# export CHANNEL=test -# else -# export CHANNEL=nightly -# fi + # Set CHANNEL + if [[(${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then + export CHANNEL=test + else + export CHANNEL=nightly + fi -# echo "::group::Create conda env" -# # Mark Build Directory Safe -# git config --global --add safe.directory /__w/audio/audio -# conda create --quiet -y --prefix ci_env python="${PYTHON_VERSION}" -# conda activate ./ci_env + echo "::group::Create conda env" + # Mark Build Directory Safe + git config --global --add safe.directory /__w/audio/audio + conda create --quiet -y --prefix ci_env python="${PYTHON_VERSION}" + conda activate ./ci_env -# echo "::endgroup::" -# echo "::group::Install PyTorch" -# conda install \ -# --yes \ -# --quiet \ -# -c "pytorch-${CHANNEL}" \ -# -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${CU_VERSION}*"] \ -# "${CUDATOOLKIT}" + echo "::endgroup::" + echo "::group::Install Pytorch" + pip3 install --pre torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cu128" -# echo "::endgroup::" -# echo "::group::Install TorchAudio" -# conda install --quiet --yes 'cmake>=3.18.0' ninja -# pip3 install --progress-bar off -v -e . --no-use-pep517 + echo "::endgroup::" + echo "::group::Install TorchAudio" + conda install --quiet --yes 'cmake>=3.18.0' ninja + pip3 install --progress-bar off -v -e . --no-use-pep517 -# echo "::endgroup::" -# echo "::group::Build FFmpeg" -# .github/scripts/ffmpeg/build_gpu.sh + echo "::endgroup::" + echo "::group::Build FFmpeg" + .github/scripts/ffmpeg/build_gpu.sh -# echo "::endgroup::" -# echo "::group::Install other Dependencies" -# conda install \ -# --quiet --yes \ -# -c conda-forge \ -# -c numba/label/dev \ -# sox libvorbis 'librosa==0.10.0' parameterized 'requests>=2.20' -# pip3 install --progress-bar off \ -# kaldi-io \ -# SoundFile \ -# coverage \ -# pytest \ -# pytest-cov \ -# 'scipy==1.7.3' \ -# transformers \ -# expecttest \ -# unidecode \ -# inflect \ -# Pillow \ -# sentencepiece \ -# pytorch-lightning \ -# 'protobuf<4.21.0' \ -# demucs \ -# tinytag \ -# flashlight-text \ -# git+https://github.com/kpu/kenlm/ \ -# git+https://github.com/pytorch/fairseq.git@e47a4c8 + echo "::endgroup::" + echo "::group::Install other Dependencies" + conda install \ + --quiet --yes \ + -c conda-forge \ + -c numba/label/dev \ + sox libvorbis 'librosa==0.10.0' parameterized 'requests>=2.20' + pip3 install --progress-bar off \ + kaldi-io \ + SoundFile \ + coverage \ + pytest \ + pytest-cov \ + 'scipy==1.7.3' \ + transformers \ + expecttest \ + unidecode \ + inflect \ + Pillow \ + sentencepiece \ + pytorch-lightning \ + 'protobuf<4.21.0' \ + demucs \ + tinytag \ + flashlight-text \ + git+https://github.com/kpu/kenlm/ \ + git+https://github.com/pytorch/fairseq.git@e47a4c8 -# echo "::endgroup::" -# echo "::group::Run tests" -# export PATH="${PWD}/third_party/install/bin/:${PATH}" + echo "::endgroup::" + echo "::group::Run tests" + export PATH="${PWD}/third_party/install/bin/:${PATH}" -# declare -a args=( -# '-v' -# '--cov=torchaudio' -# "--junitxml=${RUNNER_TEST_RESULTS_DIR}/junit.xml" -# '--durations' '100' -# '-k' 'cuda or gpu' -# ) + declare -a args=( + '-v' + '--cov=torchaudio' + "--junitxml=${RUNNER_TEST_RESULTS_DIR}/junit.xml" + '--durations' '100' + '-k' 'cuda or gpu' + ) -# cd test -# python3 -m torch.utils.collect_env -# env | grep TORCHAUDIO || true -# pytest "${args[@]}" torchaudio_unittest -# coverage html + cd test + python3 -m torch.utils.collect_env + env | grep TORCHAUDIO || true + pytest "${args[@]}" torchaudio_unittest -k "not backend and not /io/ and not prototype and not sox and not ffmpeg and not fairseq and not hdemucs and not torchscript_consistency" + coverage html From a53009591097364512b125e461eaa4c476de9be7 Mon Sep 17 00:00:00 2001 From: Sam Anklesaria Date: Mon, 14 Jul 2025 14:43:31 +0000 Subject: [PATCH 7/7] Fix CHANNEL variable --- .github/workflows/unittest-linux-gpu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unittest-linux-gpu.yml b/.github/workflows/unittest-linux-gpu.yml index 4884b7f673..175e0f6ab8 100644 --- a/.github/workflows/unittest-linux-gpu.yml +++ b/.github/workflows/unittest-linux-gpu.yml @@ -59,7 +59,7 @@ jobs: echo "::endgroup::" echo "::group::Install Pytorch" - pip3 install --pre torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cu128" + pip3 install --pre torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/${CHANNEL}/cu128" echo "::endgroup::" echo "::group::Install TorchAudio"