pymc-devs
diff --git a/‎pytensor/tensor/__init__.py
Lines changed: 0 additions & 1 deletion b/‎pytensor/tensor/__init__.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎pytensor/tensor/basic.py
Lines changed: 1 addition & 2 deletions b/‎pytensor/tensor/basic.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎pytensor/tensor/blas.py
Lines changed: 14 additions & 22 deletions b/‎pytensor/tensor/blas.py
Lines changed: 14 additions & 22 deletions
diff --git a/‎pytensor/tensor/blas_scipy.py
Lines changed: 0 additions & 34 deletions b/‎pytensor/tensor/blas_scipy.py
Lines changed: 0 additions & 34 deletions
diff --git a/‎pytensor/tensor/math.py
Lines changed: 49 additions & 94 deletions b/‎pytensor/tensor/math.py
Lines changed: 49 additions & 94 deletions
diff --git a/‎pytensor/tensor/rewriting/__init__.py
Lines changed: 0 additions & 1 deletion b/‎pytensor/tensor/rewriting/__init__.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎pytensor/tensor/rewriting/blas.py
Lines changed: 1 addition & 17 deletions b/‎pytensor/tensor/rewriting/blas.py
Lines changed: 1 addition & 17 deletions
@@ -107,7 +107,6 @@ def _get_vector_length_Constant(op: Op | Variable, var: Constant) -> int:
 from pytensor.tensor import (
     blas,
     blas_c,
-    blas_scipy,
     sharedvar,
     xlogx,
 )
 
@@ -1801,8 +1801,7 @@ def do_constant_folding(self, fgraph, node):
                     | pytensor.tensor.blas.Gemv
                     | pytensor.tensor.blas_c.CGemv
                     | pytensor.tensor.blas.Ger
-                    | pytensor.tensor.blas_c.CGer
-                    | pytensor.tensor.blas_scipy.ScipyGer,
+                    | pytensor.tensor.blas_c.CGer,
                 )
             ):
                 # Ops that will work inplace on the Alloc. So if they
 
@@ -83,6 +83,7 @@
 from pathlib import Path
 
 import numpy as np
+from scipy.linalg import get_blas_funcs
 
 from pytensor.graph import vectorize_graph
 from pytensor.npy_2_compat import normalize_axis_tuple
@@ -288,18 +289,17 @@ def make_node(self, A, alpha, x, y):
 
         return Apply(self, inputs, [A.type()])
 
-    def perform(self, node, inp, out):
-        cA, calpha, cx, cy = inp
-        (cZ,) = out
-        if self.destructive:
-            A = cA
-        else:
-            A = cA.copy()
-        if calpha != 1:
-            A += calpha * np.outer(cx, cy)
-        else:
-            A += np.outer(cx, cy)
-        cZ[0] = A
+    def perform(self, node, inputs, output_storage):
+        A, alpha, x, y = inputs
+        if A.size:
+            # GER doesn't handle zero-sized inputs
+            ger_func = get_blas_funcs("ger", dtype=A.dtype)
+            if A.flags["C_CONTIGUOUS"]:
+                # Work on transposed system to avoid copying
+                A = ger_func(alpha, y, x, a=A.T, overwrite_a=self.destructive).T
+            else:
+                A = ger_func(alpha, x, y, a=A, overwrite_a=self.destructive)
+        output_storage[0][0] = A
 
     def infer_shape(self, fgraph, node, input_shapes):
         return [input_shapes[0]]
@@ -1128,16 +1128,8 @@ def make_node(self, x, y):
         outputs = [tensor(dtype=x.type.dtype, shape=(x.type.shape[0], y.type.shape[1]))]
         return Apply(self, [x, y], outputs)
 
-    def perform(self, node, inp, out):
-        x, y = inp
-        (z,) = out
-        try:
-            z[0] = np.asarray(np.dot(x, y))
-        except ValueError as e:
-            # The error raised by numpy has no shape information, we mean to
-            # add that
-            e.args = (*e.args, x.shape, y.shape)
-            raise
+    def perform(self, node, inputs, output_storage):
+        output_storage[0][0] = np.dot(*inputs)
 
     def infer_shape(self, fgraph, node, input_shapes):
         return [[input_shapes[0][0], input_shapes[1][1]]]
 
@@ -40,12 +40,13 @@
     get_normalized_batch_axes,
     scalar_elemwise,
 )
-from pytensor.tensor.shape import shape, specify_broadcastable
+from pytensor.tensor.shape import shape, specify_shape
 from pytensor.tensor.type import (
     DenseTensorType,
     complex_dtypes,
     continuous_dtypes,
     discrete_dtypes,
+    float_dtypes,
     int_dtypes,
     tensor,
     uint_dtypes,
@@ -2986,9 +2987,7 @@ def clip(x, min, max):
 
 class Dot(Op):
     """
-    Computes the dot product of two variables. For two matrices, this is
-    equivalent to matrix multiplication. For two vectors, this is the inner
-    product.
+    Computes the dot product of two matrices variables
 
     Notes
     -----
@@ -3001,97 +3000,57 @@ class Dot(Op):
 
     """
 
+    gufunc_signature = "(m,n),(n,p)->(m,p)"
+    gufunc_spec = ("matmul", 2, 1)
     __props__ = ()
 
-    # the rationale for Dot22 is related to getting GEMM Ops into the
-    # graph.  See Dot22 in tensor.blas for details.
-
-    def make_node(self, *inputs):
-        inputs = list(map(as_tensor_variable, inputs))
+    def make_node(self, x, y):
+        x = as_tensor_variable(x)
+        y = as_tensor_variable(y)
 
-        if len(inputs) != 2:
-            raise TypeError(f"Two arguments required, {len(inputs)} given ")
-        if inputs[0].ndim not in (1, 2):
+        if x.type.ndim != 2:
             raise TypeError(
-                "Input 0 (0-indexed) must have ndim of "
-                f"1 or 2, {int(inputs[0].ndim)} given. Consider calling "
-                "pytensor.tensor.dot instead."
+                f"Dot Op expects a 2D tensor as input 0, got {x} with {x.type.ndim} dimensions"
             )
-        if inputs[1].ndim not in (1, 2):
+        if y.type.ndim != 2:
             raise TypeError(
-                "Input 1 (0-indexed) must have ndim of "
-                f"1 or 2, {int(inputs[1].ndim)} given. Consider calling "
-                "pytensor.tensor.dot instead."
+                f"Dot Op expects a 2D tensor as input 1, got {y} with {y.type.ndim} dimensions"
             )
 
-        sx, sy = (input.type.shape for input in inputs)
+        sx, sy = x.type.shape, y.type.shape
         if sx[-1] is not None and sy[0] is not None and sx[-1] != sy[0]:
             raise ValueError(
                 f"Incompatible shared dimension for dot product: {sx}, {sy}"
             )
+        sz = sx[:-1] + sy[-1:]
+        outputs = [tensor(dtype=ps.upcast(x.type.dtype, y.type.dtype), shape=sz)]
+        return Apply(self, [x, y], outputs)
 
-        if len(sy) == 2:
-            sz = sx[:-1] + sy[-1:]
-        elif len(sy) == 1:
-            sz = sx[:-1]
-
-        i_dtypes = [input.type.dtype for input in inputs]
-        outputs = [tensor(dtype=ps.upcast(*i_dtypes), shape=sz)]
-        return Apply(self, inputs, outputs)
-
-    def perform(self, node, inp, out):
-        x, y = inp
-        (z,) = out
-
-        # the asarray is here because dot between two vectors
-        # gives a numpy float object but we need to return a 0d
-        # ndarray
-        z[0] = np.asarray(np.dot(x, y))
+    def perform(self, node, inputs, output_storage):
+        output_storage[0][0] = np.matmul(*inputs)
 
     def grad(self, inp, grads):
         x, y = inp
         (gz,) = grads
-        xdim, ydim, gdim = x.type.ndim, y.type.ndim, gz.type.ndim
-
-        # grad is scalar, so x is vector and y is vector
-        if gdim == 0:
-            xgrad = gz * y
-            ygrad = gz * x
-
-        # x is vector, y is matrix, grad is vector
-        elif xdim == 1 and ydim == 2:
-            xgrad = dot(gz, y.T)
-            ygrad = outer(x.T, gz)
 
-        # x is matrix, y is vector, grad is vector
-        elif xdim == 2 and ydim == 1:
-            xgrad = outer(gz, y.T)
-            ygrad = dot(x.T, gz)
-
-        # x is matrix, y is matrix, grad is matrix
-        elif xdim == ydim == 2:
-            xgrad = dot(gz, y.T)
-            ygrad = dot(x.T, gz)
+        xgrad = self(gz, y.T)
+        ygrad = self(x.T, gz)
 
         # If x or y contain broadcastable dimensions but only one of
         # them know that a matching dimensions is broadcastable, the
         # above code don't always return the right broadcast pattern.
         # This cause problem down the road. See gh-1461.
-        if xgrad.broadcastable != x.broadcastable:
-            xgrad = specify_broadcastable(
-                xgrad, *(ax for (ax, b) in enumerate(x.type.broadcastable) if b)
-            )
-        if ygrad.broadcastable != y.broadcastable:
-            ygrad = specify_broadcastable(
-                ygrad, *(ax for (ax, b) in enumerate(y.type.broadcastable) if b)
-            )
+        if xgrad.type.shape != x.type.shape:
+            xgrad = specify_shape(xgrad, x.type.shape)
+        if ygrad.type.shape != y.type.shape:
+            ygrad = specify_shape(ygrad, y.type.shape)
 
-        rval = xgrad, ygrad
+        if xgrad.type.dtype not in float_dtypes:
+            raise TypeError("Dot grad x output must be a float type")
+        if ygrad.type.dtype not in float_dtypes:
+            raise TypeError("Dot grad y output must be a float type")
 
-        for elem in rval:
-            assert elem.dtype.find("float") != -1
-
-        return rval
+        return xgrad, ygrad
 
     def R_op(self, inputs, eval_points):
         # R_op for a \dot b evaluated at c for a and d for b is
@@ -3116,24 +3075,7 @@ def R_op(self, inputs, eval_points):
 
     def infer_shape(self, fgraph, node, shapes):
         xshp, yshp = shapes
-        x, y = node.inputs
-
-        # vector / vector
-        if x.ndim == 1 and y.ndim == 1:
-            return [()]
-        # matrix / vector
-        if x.ndim == 2 and y.ndim == 1:
-            return [xshp[:-1]]
-        # vector / matrix
-        if x.ndim == 1 and y.ndim == 2:
-            return [yshp[-1:]]
-        # matrix / matrix
-        if x.ndim == 2 and y.ndim == 2:
-            return [xshp[:-1] + yshp[-1:]]
-        raise NotImplementedError()
-
-    def __str__(self):
-        return "dot"
+        return [[xshp[0], yshp[1]]]
 
 
 _dot = Dot()
@@ -3215,7 +3157,24 @@ def dense_dot(a, b):
     elif a.ndim > 2 or b.ndim > 2:
         return tensordot(a, b, [[a.ndim - 1], [np.maximum(0, b.ndim - 2)]])
     else:
-        return _dot(a, b)
+        row_vector = a.ndim == 1
+        if row_vector:
+            # Promote to row matrix
+            a = a[None]
+
+        col_vector = b.ndim == 1
+        if col_vector:
+            # Promote to column matrix
+            b = b[:, None]
+
+        out = _dot(a, b)
+        if row_vector:
+            # If we promoted a to a row matrix, we need to squeeze the first dimension
+            out = out.squeeze(0)
+        if col_vector:
+            # If we promoted b to a column matrix, we need to squeeze the last dimension
+            out = out.squeeze(-1)
+        return out
 
 
 def tensordot(
@@ -3921,11 +3880,7 @@ def logsumexp(x, axis=None, keepdims=False):
     return log(sum(exp(x), axis=axis, keepdims=keepdims))
 
 
-_matmul = Blockwise(
-    _dot,
-    signature="(m,k),(k,n)->(m,n)",
-    gufunc_spec=("numpy.matmul", 2, 1),
-)
+_matmul = Blockwise(_dot, name="Matmul")
 
 
 def matmul(x1: "ArrayLike", x2: "ArrayLike", dtype: Optional["DTypeLike"] = None):
 
@@ -1,7 +1,6 @@
 import pytensor.tensor.rewriting.basic
 import pytensor.tensor.rewriting.blas
 import pytensor.tensor.rewriting.blas_c
-import pytensor.tensor.rewriting.blas_scipy
 import pytensor.tensor.rewriting.blockwise
 import pytensor.tensor.rewriting.einsum
 import pytensor.tensor.rewriting.elemwise
 
@@ -107,7 +107,6 @@
 )
 from pytensor.tensor.rewriting.elemwise import local_dimshuffle_lift
 from pytensor.tensor.type import (
-    DenseTensorType,
     TensorType,
     integer_dtypes,
     values_eq_approx_remove_inf_nan,
@@ -580,29 +579,14 @@ def print_profile(cls, stream, prof, level=0):
 def local_dot_to_dot22(fgraph, node):
     # This works for tensor.outer too because basic.outer is a macro that
     # produces a dot(dimshuffle,dimshuffle) of form 4 below
-    if not isinstance(node.op, Dot):
-        return
-
-    if any(not isinstance(i.type, DenseTensorType) for i in node.inputs):
-        return False
-
     x, y = node.inputs
     if y.type.dtype != x.type.dtype:
         # TODO: upcast one so the types match
         _logger.info(f"Not optimizing dot with inputs {x} {y} {x.type} {y.type}")
         return
 
     if y.type.dtype in ("float16", "float32", "float64", "complex64", "complex128"):
-        if x.ndim == 2 and y.ndim == 2:
-            new_out = [_dot22(*node.inputs)]
-        elif x.ndim == 2 and y.ndim == 1:
-            new_out = [_dot22(x, y.dimshuffle(0, "x")).dimshuffle(0)]
-        elif x.ndim == 1 and y.ndim == 2:
-            new_out = [_dot22(x.dimshuffle("x", 0), y).dimshuffle(1)]
-        elif x.ndim == 1 and y.ndim == 1:
-            new_out = [_dot22(x.dimshuffle("x", 0), y.dimshuffle(0, "x")).dimshuffle()]
-        else:
-            return
+        new_out = [_dot22(*node.inputs)]
         copy_stack_trace(node.outputs, new_out)
         return new_out
Original file line number	Diff line number	Diff line change
`@@ -107,7 +107,6 @@ def _get_vector_length_Constant(op: Op \| Variable, var: Constant) -> int:`
`107`	`107`	`from pytensor.tensor import (`
`108`	`108`	`blas,`
`109`	`109`	`blas_c,`
`110`		`- blas_scipy,`
`111`	`110`	`sharedvar,`
`112`	`111`	`xlogx,`
`113`	`112`	`)`