dde.gradients support 3D outputs (#1928)

Jerry-Jzy · web-flow · commit 9546d7ec5d8b · 2025-03-09T19:40:57.000-04:00
diff --git a/deepxde/gradients/gradients.py b/deepxde/gradients/gradients.py
@@ -19,7 +19,9 @@ def jacobian(ys, xs, i=None, j=None):
       computation.
 
     Args:
-        ys: Output Tensor of shape (batch_size, dim_y).
+        ys: Output Tensor of shape (batch_size, dim_y) or (batch_size_out, batch_size,
+            dim_y). Here, the `batch_size` is the same one for `xs`, and
+            `batch_size_out` is the batch size for an additional/outer dimension.
         xs: Input Tensor of shape (batch_size, dim_x).
         i (int or None): `i`th row. If `i` is ``None``, returns the `j`th column
             J[:, `j`].
@@ -29,6 +31,9 @@ def jacobian(ys, xs, i=None, j=None):
 
     Returns:
         (`i`, `j`)th entry J[`i`, `j`], `i`th row J[`i`, :], or `j`th column J[:, `j`].
+        When `ys` has shape (batch_size, dim_y), the output shape is (batch_size, 1).
+        When `ys` has shape (batch_size_out, batch_size, dim_y), the output shape is
+        (batch_size_out, batch_size, 1).
     """
     if config.autodiff == "reverse":
         return gradients_reverse.jacobian(ys, xs, i=i, j=j)
@@ -48,14 +53,18 @@ def hessian(ys, xs, component=0, i=0, j=0):
       computation.
 
     Args:
-        ys: Output Tensor of shape (batch_size, dim_y).
+        ys: Output Tensor of shape (batch_size, dim_y) or (batch_size_out, batch_size,
+            dim_y). Here, the `batch_size` is the same one for `xs`, and
+            `batch_size_out` is the batch size for an additional/outer dimension.
         xs: Input Tensor of shape (batch_size, dim_x).
         component: `ys[:, component]` is used as y to compute the Hessian.
         i (int): `i`th row.
         j (int): `j`th column.
 
     Returns:
-        H[`i`, `j`].
+        H[`i`, `j`]. When `ys` has shape (batch_size, dim_y), the output shape is
+        (batch_size, 1). When `ys` has shape (batch_size_out, batch_size, dim_y),
+        the output shape is (batch_size_out, batch_size, 1).
     """
     if config.autodiff == "reverse":
         return gradients_reverse.hessian(ys, xs, component=component, i=i, j=j)
diff --git a/deepxde/gradients/gradients_forward.py b/deepxde/gradients/gradients_forward.py
@@ -87,14 +87,14 @@ def grad_fn(x):
         # Compute J[i, j]
         if (i, j) not in self.J:
             if backend_name == "tensorflow.compat.v1":
-                self.J[i, j] = self.J[j][:, i : i + 1]
+                self.J[i, j] = self.J[j][..., i : i + 1]
             elif backend_name in ["tensorflow", "pytorch", "jax"]:
                 # In backend tensorflow/pytorch/jax, a tuple of a tensor/tensor/array
                 # and a callable is returned, so that it is consistent with the argument,
                 # which is also a tuple. This is useful for further computation, e.g.,
                 # Hessian.
                 self.J[i, j] = (
-                    self.J[j][0][:, i : i + 1],
+                    self.J[j][0][..., i : i + 1],
                     lambda x: self.J[j][1](x)[i : i + 1],
                 )
         return self.J[i, j]
diff --git a/deepxde/gradients/gradients_reverse.py b/deepxde/gradients/gradients_reverse.py
@@ -3,6 +3,7 @@
 __all__ = ["hessian", "jacobian"]
 
 from .jacobian import Jacobian, Jacobians
+from .. import backend as bkd
 from ..backend import backend_name, tf, torch, jax, paddle
 
 
@@ -17,6 +18,14 @@ def __call__(self, i=None, j=None):
                     "Reverse-mode autodiff doesn't support computing a column."
                 )
             i = 0
+        if backend_name in ["tensorflow.compat.v1", "tensorflow", "pytorch", "paddle"]:
+            ndim_y = bkd.ndim(self.ys)
+        elif backend_name == "jax":
+            ndim_y = bkd.ndim(self.ys[0])
+        if ndim_y == 3:
+            raise NotImplementedError(
+                "Reverse-mode autodiff doesn't support 3D output"
+            )
 
         # Compute J[i, :]
         if i not in self.J:
diff --git a/deepxde/gradients/jacobian.py b/deepxde/gradients/jacobian.py
@@ -11,7 +11,9 @@ class Jacobian(ABC):
     It is lazy evaluation, i.e., it only computes J[i, j] when needed.
 
     Args:
-        ys: Output Tensor of shape (batch_size, dim_y).
+        ys: Output Tensor of shape (batch_size, dim_y) or (batch_size_out, batch_size,
+            dim_y). Here, the `batch_size` is the same one for `xs`, and
+            `batch_size_out` is the batch size for an additional/outer dimension.
         xs: Input Tensor of shape (batch_size, dim_x).
     """
 
@@ -20,22 +22,22 @@ def __init__(self, ys, xs):
         self.xs = xs
 
         if backend_name in ["tensorflow.compat.v1", "paddle"]:
-            self.dim_y = ys.shape[1]
+            self.dim_y = ys.shape[-1]
         elif backend_name in ["tensorflow", "pytorch"]:
             if config.autodiff == "reverse":
                 # For reverse-mode AD, only a tensor is passed.
-                self.dim_y = ys.shape[1]
+                self.dim_y = ys.shape[-1]
             elif config.autodiff == "forward":
                 # For forward-mode AD, a tuple of a tensor and a callable is passed, 
                 # similar to backend jax.
-                self.dim_y = ys[0].shape[1]
+                self.dim_y = ys[0].shape[-1]
         elif backend_name == "jax":
             # For backend jax, a tuple of a jax array and a callable is passed as one of
             # the arguments, since jax does not support computational graph explicitly.
             # The array is used to control the dimensions and the callable is used to
             # obtain the derivative function, which can be used to compute the
             # derivatives.
-            self.dim_y = ys[0].shape[1]
+            self.dim_y = ys[0].shape[-1]
         self.dim_x = xs.shape[1]
 
         self.J = {}
@@ -114,7 +116,9 @@ def __call__(self, ys, xs, i=None, j=None):
         #     x = torch.from_numpy(x)
         #     x.requires_grad_()
         #     f(x)
-        if backend_name in ["tensorflow.compat.v1", "tensorflow"]:
+        if backend_name == "tensorflow.compat.v1":
+            key = (ys.ref(), xs.ref())
+        elif backend_name == "tensorflow":
             if config.autodiff == "reverse":
                 key = (ys.ref(), xs.ref())
             elif config.autodiff == "forward":