Add option to vectorize jacobian in minimize/root

jessegrabowski · jessegrabowski · commit 5859a2e9dea1 · 2025-09-19T18:45:11.000-05:00
diff --git a/pytensor/tensor/optimize.py b/pytensor/tensor/optimize.py
@@ -7,7 +7,7 @@
 
 import pytensor.scalar as ps
 from pytensor.compile.function import function
-from pytensor.gradient import grad, hessian, jacobian
+from pytensor.gradient import grad, jacobian
 from pytensor.graph import Apply, Constant, FunctionGraph
 from pytensor.graph.basic import ancestors, truncated_graph_inputs
 from pytensor.graph.op import ComputeMapType, HasInnerGraph, Op, StorageMapType
@@ -483,6 +483,7 @@ def __init__(
         jac: bool = True,
         hess: bool = False,
         hessp: bool = False,
+        use_vectorized_jac: bool = False,
         optimizer_kwargs: dict | None = None,
     ):
         if not cast(TensorVariable, objective).ndim == 0:
@@ -495,6 +496,7 @@ def __init__(
             )
 
         self.fgraph = FunctionGraph([x, *args], [objective])
+        self.use_vectorized_jac = use_vectorized_jac
 
         if jac:
             grad_wrt_x = cast(
@@ -504,7 +506,12 @@ def __init__(
 
         if hess:
             hess_wrt_x = cast(
-                Variable, hessian(self.fgraph.outputs[0], self.fgraph.inputs[0])
+                Variable,
+                jacobian(
+                    self.fgraph.outputs[-1],
+                    self.fgraph.inputs[0],
+                    vectorize=use_vectorized_jac,
+                ),
             )
             self.fgraph.add_output(hess_wrt_x)
 
@@ -563,7 +570,7 @@ def L_op(self, inputs, outputs, output_grads):
             implicit_f,
             [inner_x, *inner_args],
             disconnected_inputs="ignore",
-            vectorize=True,
+            vectorize=self.use_vectorized_jac,
         )
         grad_wrt_args = implict_optimization_grads(
             df_dx=df_dx,
@@ -583,6 +590,7 @@ def minimize(
     method: str = "BFGS",
     jac: bool = True,
     hess: bool = False,
+    use_vectorized_jac: bool = False,
     optimizer_kwargs: dict | None = None,
 ) -> tuple[TensorVariable, TensorVariable]:
     """
@@ -592,18 +600,21 @@ def minimize(
     ----------
     objective : TensorVariable
         The objective function to minimize. This should be a pytensor variable representing a scalar value.
-
-    x : TensorVariable
+    x: TensorVariable
         The variable with respect to which the objective function is minimized. It must be an input to the
         computational graph of `objective`.
-
-    method : str, optional
+    method: str, optional
         The optimization method to use. Default is "BFGS". See scipy.optimize.minimize for other options.
-
-    jac : bool, optional
-        Whether to compute and use the gradient of teh objective function with respect to x for optimization.
+    jac: bool, optional
+        Whether to compute and use the gradient of the objective function with respect to x for optimization.
         Default is True.
-
+    hess: bool, optional
+        Whether to compute and use the Hessian of the objective function with respect to x for optimization.
+        Default is False. Note that some methods require this, while others do not support it.
+    use_vectorized_jac: bool, optional
+        Whether to use a vectorized graph (vmap) to compute the jacobian (and/or hessian) matrix. If False, a
+        scan will be used instead. This comes down to a memory/compute trade-off. Vectorized graphs can be faster,
+        but use more memory. Default is False.
     optimizer_kwargs
         Additional keyword arguments to pass to scipy.optimize.minimize
 
@@ -626,6 +637,7 @@ def minimize(
         method=method,
         jac=jac,
         hess=hess,
+        use_vectorized_jac=use_vectorized_jac,
         optimizer_kwargs=optimizer_kwargs,
     )
 
@@ -806,6 +818,7 @@ def __init__(
         method: str = "hybr",
         jac: bool = True,
         optimizer_kwargs: dict | None = None,
+        use_vectorized_jac: bool = False,
     ):
         if cast(TensorVariable, variables).ndim != cast(TensorVariable, equations).ndim:
             raise ValueError(
@@ -820,7 +833,9 @@ def __init__(
 
         if jac:
             jac_wrt_x = jacobian(
-                self.fgraph.outputs[0], self.fgraph.inputs[0], vectorize=True
+                self.fgraph.outputs[0],
+                self.fgraph.inputs[0],
+                vectorize=use_vectorized_jac,
             )
             self.fgraph.add_output(atleast_2d(jac_wrt_x))
 
@@ -927,6 +942,7 @@ def root(
     variables: TensorVariable,
     method: str = "hybr",
     jac: bool = True,
+    use_vectorized_jac: bool = False,
     optimizer_kwargs: dict | None = None,
 ) -> tuple[TensorVariable, TensorVariable]:
     """
@@ -945,6 +961,10 @@ def root(
     jac : bool, optional
         Whether to compute and use the Jacobian of the `equations` with respect to `variables`.
         Default is True. Most methods require this.
+    use_vectorized_jac: bool, optional
+        Whether to use a vectorized graph (vmap) to compute the jacobian matrix. If False, a scan will be used instead.
+        This comes down to a memory/compute trade-off. Vectorized graphs can be faster, but use more memory.
+        Default is False.
     optimizer_kwargs : dict, optional
         Additional keyword arguments to pass to `scipy.optimize.root`.
 
@@ -968,6 +988,7 @@ def root(
         method=method,
         jac=jac,
         optimizer_kwargs=optimizer_kwargs,
+        use_vectorized_jac=use_vectorized_jac,
     )
 
     solution, success = cast(