Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pytensor/gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -1951,10 +1951,10 @@ def random_projection():
mode_for_cost = mode

cost_fn = fn_maker(tensor_pt, cost, name="gradient.py cost", mode=mode_for_cost)

symbolic_grad = grad(cost, tensor_pt, disconnected_inputs="ignore")

grad_fn = fn_maker(tensor_pt, symbolic_grad, name="gradient.py symbolic grad")
grad_fn.dprint(print_shape=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reminder: revert changes in this file


for test_num in range(n_tests):
num_grad = numeric_grad(cost_fn, [p.copy() for p in pt], eps, out_type)
Expand Down
4 changes: 2 additions & 2 deletions pytensor/tensor/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,7 +921,7 @@ def zeros_like(model, dtype=None, opt=False):
return fill(_model, ret)


def zeros(shape, dtype=None):
def zeros(shape, dtype=None) -> TensorVariable:
"""Create a `TensorVariable` filled with zeros, closer to NumPy's syntax than ``alloc``."""
if not (
isinstance(shape, np.ndarray | Sequence)
Expand All @@ -933,7 +933,7 @@ def zeros(shape, dtype=None):
return alloc(np.array(0, dtype=dtype), *shape)


def ones(shape, dtype=None):
def ones(shape, dtype=None) -> TensorVariable:
"""Create a `TensorVariable` filled with ones, closer to NumPy's syntax than ``alloc``."""
if not (
isinstance(shape, np.ndarray | Sequence)
Expand Down
191 changes: 155 additions & 36 deletions pytensor/tensor/signal/conv.py
Original file line number Diff line number Diff line change
@@ -1,72 +1,92 @@
from typing import TYPE_CHECKING, Literal, cast
from typing import TYPE_CHECKING, Literal
from typing import cast as type_cast

import numpy as np
from numpy import convolve as numpy_convolve
from scipy.signal import convolve2d as scipy_convolve2d

from pytensor.gradient import DisconnectedType
from pytensor.graph import Apply, Constant
from pytensor.graph.op import Op
from pytensor.link.c.op import COp
from pytensor.scalar import as_scalar
from pytensor.scalar.basic import upcast
from pytensor.tensor.basic import as_tensor_variable, join, zeros
from pytensor.tensor.blockwise import Blockwise
from pytensor.tensor.math import maximum, minimum, switch
from pytensor.tensor.type import vector
from pytensor.tensor.pad import pad
from pytensor.tensor.subtensor import flip
from pytensor.tensor.type import tensor
from pytensor.tensor.variable import TensorVariable


if TYPE_CHECKING:
from pytensor.tensor import TensorLike


class Convolve1d(COp):
class AbstractConvolveNd:
__props__ = ()
gufunc_signature = "(n),(k),()->(o)"
ndim: int

@property
def gufunc_signature(self):
data_signature = ",".join([f"n{i}" for i in range(self.ndim)])
kernel_signature = ",".join([f"k{i}" for i in range(self.ndim)])
output_signature = ",".join([f"o{i}" for i in range(self.ndim)])

return f"({data_signature}),({kernel_signature}),()->({output_signature})"

def make_node(self, in1, in2, full_mode):
in1 = as_tensor_variable(in1)
in2 = as_tensor_variable(in2)
full_mode = as_scalar(full_mode)

if not (in1.ndim == 1 and in2.ndim == 1):
raise ValueError("Convolution inputs must be vector (ndim=1)")
ndim = self.ndim
if not (in1.ndim == ndim and in2.ndim == self.ndim):
raise ValueError(
f"Convolution inputs must have ndim={ndim}, got: in1={in1.ndim}, in2={in2.ndim}"
)
if not full_mode.dtype == "bool":
raise ValueError("Convolution mode must be a boolean type")
raise ValueError("Convolution full_mode flag must be a boolean type")

dtype = upcast(in1.dtype, in2.dtype)
n = in1.type.shape[0]
k = in2.type.shape[0]
match full_mode:
case Constant():
static_mode = "full" if full_mode.data else "valid"
case _:
static_mode = None

if n is None or k is None or static_mode is None:
out_shape = (None,)
elif static_mode == "full":
out_shape = (n + k - 1,)
else: # mode == "valid":
out_shape = (max(n, k) - min(n, k) + 1,)
if static_mode is None:
out_shape = (None,) * ndim
else:
out_shape = []
# TODO: Raise if static shapes are not valid (one input size doesn't dominate the other)
for n, k in zip(in1.type.shape, in2.type.shape):
if n is None or k is None:
out_shape.append(None)
elif static_mode == "full":
out_shape.append(
n + k - 1,
)
else: # mode == "valid":
out_shape.append(
max(n, k) - min(n, k) + 1,
)
out_shape = tuple(out_shape)

out = vector(dtype=dtype, shape=out_shape)
return Apply(self, [in1, in2, full_mode], [out])
dtype = upcast(in1.dtype, in2.dtype)

def perform(self, node, inputs, outputs):
# We use numpy_convolve as that's what scipy would use if method="direct" was passed.
# And mode != "same", which this Op doesn't cover anyway.
in1, in2, full_mode = inputs
outputs[0][0] = numpy_convolve(in1, in2, mode="full" if full_mode else "valid")
out = tensor(dtype=dtype, shape=out_shape)
return Apply(self, [in1, in2, full_mode], [out])

def infer_shape(self, fgraph, node, shapes):
_, _, full_mode = node.inputs
in1_shape, in2_shape, _ = shapes
n = in1_shape[0]
k = in2_shape[0]
shape_valid = maximum(n, k) - minimum(n, k) + 1
shape_full = n + k - 1
shape = switch(full_mode, shape_full, shape_valid)
return [[shape]]
out_shape = [
switch(full_mode, n + k - 1, maximum(n, k) - minimum(n, k) + 1)
for n, k in zip(in1_shape, in2_shape)
]

return [out_shape]

def connection_pattern(self, node):
return [[True], [True], [False]]
Expand All @@ -75,22 +95,34 @@ def L_op(self, inputs, outputs, output_grads):
in1, in2, full_mode = inputs
[grad] = output_grads

n = in1.shape[0]
k = in2.shape[0]
n = in1.shape
k = in2.shape
# Note: this assumes the shape of one input dominates the other over all dimensions (which is required for a valid forward)

# If mode is "full", or mode is "valid" and k >= n, then in1_bar mode should use "valid" convolve
# The expression below is equivalent to ~(full_mode | (k >= n))
full_mode_in1_bar = ~full_mode & (k < n)
full_mode_in1_bar = ~full_mode & (k < n).any()
# If mode is "full", or mode is "valid" and n >= k, then in2_bar mode should use "valid" convolve
# The expression below is equivalent to ~(full_mode | (n >= k))
full_mode_in2_bar = ~full_mode & (n < k)
full_mode_in2_bar = ~full_mode & (n < k).any()

return [
self(grad, in2[::-1], full_mode_in1_bar),
self(grad, in1[::-1], full_mode_in2_bar),
self(grad, flip(in2), full_mode_in1_bar),
self(grad, flip(in1), full_mode_in2_bar),
DisconnectedType()(),
]


class Convolve1d(AbstractConvolveNd, COp):
__props__ = ()
ndim = 1

def perform(self, node, inputs, outputs):
# We use numpy_convolve as that's what scipy would use if method="direct" was passed.
# And mode != "same", which this Op doesn't cover anyway.
in1, in2, full_mode = inputs
outputs[0][0] = numpy_convolve(in1, in2, mode="full" if full_mode else "valid")

def c_code_cache_version(self):
return (2,)

Expand Down Expand Up @@ -210,4 +242,91 @@ def convolve1d(
mode = "valid"

full_mode = as_scalar(np.bool_(mode == "full"))
return cast(TensorVariable, blockwise_convolve_1d(in1, in2, full_mode))
return type_cast(TensorVariable, blockwise_convolve_1d(in1, in2, full_mode))


class Convolve2d(AbstractConvolveNd, Op):
__props__ = ()
ndim = 2

def perform(self, node, inputs, outputs):
in1, in2, full_mode = inputs

# if all(inpt.dtype.kind in ['f', 'c'] for inpt in inputs):
# outputs[0][0] = scipy_convolve(in1, in2, mode=self.mode, method='fft')
#
# else:
# TODO: Why is .item() needed???
outputs[0][0] = scipy_convolve2d(
in1,
in2,
mode="full" if full_mode.item() else "valid",
)


blockwise_convolve_2d = Blockwise(Convolve2d())


def convolve2d(
in1: "TensorLike",
in2: "TensorLike",
mode: Literal["full", "valid", "same"] = "full",
boundary: Literal["fill", "wrap", "symm"] = "fill",
fillvalue: float | int = 0,
) -> TensorVariable:
"""Convolve two two-dimensional arrays.

Convolve in1 and in2, with the output size determined by the mode argument.

Parameters
----------
in1 : (..., N, M) tensor_like
First input.
in2 : (..., K, L) tensor_like
Second input.
mode : {'full', 'valid', 'same'}, optional
A string indicating the size of the output:
- 'full': The output is the full discrete linear convolution of the inputs, with shape (..., N+K-1, M+L-1).
- 'valid': The output consists only of elements that do not rely on zero-padding, with shape (..., max(N, K) - min(N, K) + 1, max(M, L) - min(M, L) + 1).
- 'same': The output is the same size as in1, centered with respect to the 'full' output.
boundary : {'fill', 'wrap', 'symm'}, optional
A string indicating how to handle boundaries:
- 'fill': Pads the input arrays with fillvalue.
- 'wrap': Circularly wraps the input arrays.
- 'symm': Symmetrically reflects the input arrays.
fillvalue : float or int, optional
The value to use for padding when boundary is 'fill'. Default is 0.
Returns
-------
out: tensor_variable
The discrete linear convolution of in1 with in2.

"""
in1 = as_tensor_variable(in1)
in2 = as_tensor_variable(in2)

if mode == "same":
raise NotImplementedError("same mode not implemented for convolve2d")

if mode != "valid" and (boundary != "fill" or fillvalue != 0):
# We use a valid convolution on an appropriately padded kernel
*_, k, l = in2.shape
ndim = max(in1.type.ndim, in2.type.ndim)

pad_width = zeros((ndim, 2), dtype="int64")
pad_width = pad_width[-2, :].set(k - 1)
pad_width = pad_width[-1, :].set(l - 1)
if boundary == "fill":
in1 = pad(
in1, pad_width=pad_width, mode="constant", constant_values=fillvalue
)
elif boundary == "wrap":
in1 = pad(in1, pad_width=pad_width, mode="wrap")

elif boundary == "symm":
in1 = pad(in1, pad_width=pad_width, mode="symmetric")

mode = "valid"

full_mode = as_scalar(np.bool_(mode == "full"))
return type_cast(TensorVariable, blockwise_convolve_2d(in1, in2, full_mode))
67 changes: 65 additions & 2 deletions tests/tensor/signal/test_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
import numpy as np
import pytest
from scipy.signal import convolve as scipy_convolve
from scipy.signal import convolve2d as scipy_convolve2d

from pytensor import config, function, grad
from pytensor.graph.rewriting import rewrite_graph
from pytensor.graph.traversal import ancestors, io_toposort
from pytensor.tensor import matrix, tensor, vector
from pytensor.tensor.basic import expand_dims
from pytensor.tensor.blockwise import Blockwise
from pytensor.tensor.signal.conv import Convolve1d, convolve1d
from pytensor.tensor.signal.conv import Convolve1d, convolve1d, convolve2d
from tests import unittest_tools as utt


Expand Down Expand Up @@ -46,7 +48,7 @@ def test_convolve1d_batch():
res = out.eval({x: x_test, y: y_test})
# Second entry of x, y are just y, x respectively,
# so res[0] and res[1] should be identical.
rtol = 1e-6 if config.floatX == "float32" else 1e-15
rtol = 1e-6 if config.floatX == "float32" else 1e-12
res_np = np.convolve(x_test[0], y_test[0])
np.testing.assert_allclose(res[0], res_np, rtol=rtol)
np.testing.assert_allclose(res[1], res_np, rtol=rtol)
Expand Down Expand Up @@ -100,6 +102,7 @@ def test_convolve1d_valid_grad(static_shape):
"local_useless_unbatched_blockwise",
),
)
grad_out.dprint()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
grad_out.dprint()

[conv_node] = [
node
for node in io_toposort([larger, smaller], [grad_out])
Expand Down Expand Up @@ -137,3 +140,63 @@ def convolve1d_grad_benchmarker(convolve_mode, mode, benchmark):
@pytest.mark.parametrize("convolve_mode", ["full", "valid"])
def test_convolve1d_grad_benchmark_c(convolve_mode, benchmark):
convolve1d_grad_benchmarker(convolve_mode, "FAST_RUN", benchmark)


@pytest.mark.parametrize(
"kernel_shape", [(3, 3), (5, 3), (5, 8)], ids=lambda x: f"kernel_shape={x}"
)
@pytest.mark.parametrize(
"data_shape", [(3, 3), (5, 5), (8, 8)], ids=lambda x: f"data_shape={x}"
)
Comment on lines +145 to +150
Copy link
Member

@ricardoV94 ricardoV94 Jul 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would like a parametrization where one of the dimensions is larger and the other smaller than the respective dimensions of the other input, something like (5, 5) vs (3, 7) (with both input orders). Specially for the grad. This is something that cannot happeen in Conv1D and I want to be sure we are doing it correctly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in that case we can swap the inputs then swap them back?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not if you only have runtime shapes

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scipy will do this swap internally when mode="valid", see here. This helper is called by both convolve and convolve2d.

Our gradients will be wrong if we don't take that into account.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I hate that, it should be an implementation detail under the hood and not affect us though?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think numpy convolve also does it and it's not a problem for us?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it's "luck", because the L_op calls self.perform so gradient inputs will also be flipped.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Luck? Wait aren't we just talking about the same issue that's addressed by #1522 (and before that by doing the worst case scenario pad and throw away the waste)?

Copy link
Member Author

@jessegrabowski jessegrabowski Jul 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so. My point is that the shapes of the gradient inputs are the same as the shapes of the inputs, and we call the same function again. So the flipping/unflipping is done for us correctly in both cases. If the gradient for conv1d didn't end up itself being a convolution, we would have had problems.

@pytest.mark.parametrize("mode", ["full", "valid", "same"][:-1])
@pytest.mark.parametrize(
"boundary, boundary_kwargs",
[
("fill", {"fillvalue": 0}),
("fill", {"fillvalue": 0.5}),
("wrap", {}),
("symm", {}),
],
)
def test_convolve2d(kernel_shape, data_shape, mode, boundary, boundary_kwargs):
data = matrix("data")
kernel = matrix("kernel")
op = partial(convolve2d, mode=mode, boundary=boundary, **boundary_kwargs)
conv_result = op(data, kernel)

fn = function([data, kernel], conv_result)

rng = np.random.default_rng((26, kernel_shape, data_shape, sum(map(ord, mode))))
data_val = rng.normal(size=data_shape).astype(data.dtype)
kernel_val = rng.normal(size=kernel_shape).astype(kernel.dtype)

np.testing.assert_allclose(
fn(data_val, kernel_val),
scipy_convolve2d(
data_val, kernel_val, mode=mode, boundary=boundary, **boundary_kwargs
),
atol=1e-6 if config.floatX == "float32" else 1e-8,
)

utt.verify_grad(lambda k: op(data_val, k).sum(), [kernel_val])


def test_batched_1d_agrees_with_diagonal_2d():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Title is not very accurate right. More something like test_batched_1d_agrees_with_2d_row_filter ?

data = matrix("data")
kernel_1d = vector("kernel_1d")
kernel_2d = expand_dims(kernel_1d, 0)

output_1d = convolve1d(data, kernel_1d, mode="valid")
output_2d = convolve2d(data, kernel_2d, mode="valid")

grad_1d = grad(output_1d.sum(), kernel_1d).ravel()
grad_2d = grad(output_1d.sum(), kernel_1d).ravel()

fn = function([data, kernel_1d], [output_1d, output_2d, grad_1d, grad_2d])

data_val = np.random.normal(size=(10, 8)).astype(config.floatX)
kernel_1d_val = np.random.normal(size=(3,)).astype(config.floatX)

forward_1d, forward_2d, backward_1d, backward_2d = fn(data_val, kernel_1d_val)
np.testing.assert_allclose(forward_1d, forward_2d)
np.testing.assert_allclose(backward_1d, backward_2d)
Loading