briangu · briangu · Jul 9, 2025 · Jul 9, 2025 · Jul 9, 2025 · Jul 9, 2025
diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
@@ -23,7 +23,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         python -m pip install pylint flake8
-        pip install -e ".[full]"
+        pip install -e ".[full,torch]" --extra-index-url https://download.pytorch.org/whl/cpu
 #     - name: Lint with pylint
 #       run: python -m pylint --disable=all -e W0311 --jobs=0 --indent-string='  ' **/*.py
     - name: Lint with flake8
@@ -45,6 +45,8 @@ jobs:
       with:
         python-version: 3.11
     - name: Install Dependencies
-      run: pip install -e ".[full]"
+      run: |
+        pip install --upgrade pip
+        pip install -e ".[full,torch]" --extra-index-url https://download.pytorch.org/whl/cpu
     - name: Run unittest
       run: python3 -m unittest -v
diff --git a/docs/autograd_tests.md b/docs/autograd_tests.md
@@ -0,0 +1,51 @@
+# Autograd Test Cases
+
+This document explains the mathematical ideas behind the unit tests found in
+`tests/test_autograd.py` and their KlongPy counterparts in
+`tests/kgtests/autograd`.
+
+KlongPy provides minimal reverse mode automatic differentiation.  The following
+examples verify the correctness of the gradient computations for the NumPy and
+Torch backends.
+
+## Scalar square
+
+We test the derivative of $f(x)=x^2$.  From the
+[definition of the derivative](https://en.wikipedia.org/wiki/Derivative),
+$\frac{\mathrm d}{\mathrm dx}x^2=2x$.  The test evaluates this gradient at
+$x=3$ and expects the value `6`.
+
+In the Klong test suite the alias ``∂`` is bound to ``backend.grad``.  Calling
+``∂(square;3)`` therefore computes the same derivative using the del symbol.
+
+## Matrix multiplication
+
+The function $f(X)=\sum X X$ multiplies a matrix by itself and sums all
+elements of the result.  Matrix calculus shows that the derivative of
+$\mathrm{tr}(X^2)$ with respect to $X$ is $X+X^T$.
+For
+$X=\begin{bmatrix}1&2\\3&4\end{bmatrix}$
+the gradient is
+$\begin{bmatrix}7&11\\9&13\end{bmatrix}$.
+See
+[the matrix calculus article](https://en.wikipedia.org/wiki/Matrix_calculus)
+for details.
+
+## Elementwise product
+
+The function $f(x)=\sum (x+1)(x+2)$ is differentiated using the chain rule
+([Wikipedia](https://en.wikipedia.org/wiki/Chain_rule)).  The gradient of each
+component is $2x+3$, so the resulting array should equal `2*x + 3`.
+
+## Dot product
+
+For $f(x,y)=\sum x\,y$ (the dot product), the gradient with respect to `x` is
+`y` and with respect to `y` is `x`.
+See the article on the
+[dot product](https://en.wikipedia.org/wiki/Dot_product) for background.
+
+## Stop operator
+
+The `stop` function detaches its argument from the autograd graph.  In
+$f(x)=\sum\mathrm{stop}(x)\,x$ the first occurrence of `x` is treated as a
+constant, so the gradient simplifies to `x`.
diff --git a/klongpy/backend.py b/klongpy/backend.py
@@ -1,103 +1,70 @@
+"""Backend selection utilities for klongpy."""
+
 import os
-import warnings
-
-# Attempt to import CuPy. If not available, set use_gpu to False.
-use_gpu = bool(os.environ.get('USE_GPU') == '1')
-if use_gpu:
-    try:
-        import cupy as np
-        use_gpu = True
-    except ImportError:
-        import numpy as np
-        use_gpu = False
-else:
-    import numpy as np
-
-
-def is_supported_type(x):
-    """
-    CuPy does not support strings or jagged arrays.
-    Note: add any other unsupported types here.
-    """
-    if isinstance(x, str) or is_jagged_array(x):
-        return False
-    return True
+from importlib import import_module
+from typing import Any
+import numpy as _np
 
+# default numpy compatibility shim for legacy modules
+_np.seterr(divide="ignore")
+_np.isarray = lambda x: isinstance(x, _np.ndarray)
+np = _np
 
-def is_jagged_array(x):
-    """
-    Check if an array is jagged.
+BACKEND = os.environ.get("KLONGPY_BACKEND", "numpy").lower()
+
+
+def current():
+    """Return the currently selected backend module."""
+    return import_module(f"klongpy.backends.{BACKEND}_backend")
+
+
+def set_backend(name: str) -> None:
+    """Select the computation backend.
+
+    Parameters
+    ----------
+    name: str
+        Either ``"numpy"`` or ``"torch"``.
     """
-    if isinstance(x, list):
-        # If the lengths of sublists vary, it's a jagged array.
-        return len(set(map(len, x))) > 1
-    return False
-
-if use_gpu:
-    import cupy
-    import numpy
-
-    class CuPyReductionKernelWrapper:
-        def __init__(self, fn, reduce_fn_1, reduce_fn_2):
-            self.fn = fn
-            self.reduce_fn_1 = reduce_fn_1
-            self.reduce_fn_2 = reduce_fn_2
-
-        def __call__(self, *args, **kwargs):
-            return self.fn(*args, **kwargs)
-
-        def reduce(self, x):
-            return self.reduce_fn_1(x) if x.ndim == 1 else self.reduce_fn_2(x[0], x[1])
-
-    add_reduce_2 = cupy.ElementwiseKernel(
-            'T x, T y',
-            'T z',
-            'z = (x + y)',
-            'add_reduce_2')
-    np.add = CuPyReductionKernelWrapper(cupy.add, cupy.sum, add_reduce_2)
-
-    def subtract_reduce_1(x):
-        return 2*x[0] - cupy.sum(x)
-
-    subtract_reduce_2 = cupy.ElementwiseKernel(
-            'T x, T y',
-            'T z',
-            'z = (x - y)',
-            'subtract_reduce_2')
-    np.subtract = CuPyReductionKernelWrapper(cupy.subtract, subtract_reduce_1, subtract_reduce_2)
-
-    multiply_reduce_1 = cupy.ReductionKernel(
-                'T x',
-                'T y',
-                'x',
-                'a * b',
-                'y = a',
-                '1',
-                'multiply_reduce_1'
-             )
-    multiply_reduce_2 = cupy.ElementwiseKernel(
-            'T x, T y',
-            'T z',
-            'z = (x * y)',
-            'multiply_reduce_2')
-    np.multiply = CuPyReductionKernelWrapper(cupy.multiply, multiply_reduce_1, multiply_reduce_2)
-
-    def divide_reduce_1(x):
-        raise NotImplementedError()
-
-    divide_reduce_2 = cupy.ElementwiseKernel(
-            'T x, T y',
-            'T z',
-            'z = (x / y)',
-            'divide_reduce_2')
-    np.divide = CuPyReductionKernelWrapper(cupy.divide, divide_reduce_1, divide_reduce_2)
-
-    np.isarray = lambda x: isinstance(x, (numpy.ndarray, cupy.ndarray))
-
-#    np.hstack = lambda x: cupy.hstack(x) if use_gpu and is_supported_type(x) else numpy.hstack(x)
-else:
-    np.seterr(divide='ignore')
-    warnings.filterwarnings("error", category=np.VisibleDeprecationWarning)
-    np.isarray = lambda x: isinstance(x, np.ndarray)
-
-np
+    global BACKEND
+    name = name.lower()
+    if name not in {"numpy", "torch"}:
+        raise ValueError(f"unknown backend '{name}'")
+    if name == "torch":
+        import_module("klongpy.backends.torch_backend")
+    BACKEND = name
+
+
+def array(obj: Any, *, dtype: Any | None = None, requires_grad: bool = False) -> Any:
+    """Create an array or tensor using the active backend."""
+    return current().array(obj, dtype=dtype, requires_grad=requires_grad)
+
+
+def add(a: Any, b: Any) -> Any:
+    """Element-wise addition via the active backend."""
+    return current().add(a, b)
+
+
+def mul(a: Any, b: Any) -> Any:
+    """Element-wise multiplication via the active backend."""
+    return current().mul(a, b)
+
+
+def matmul(a: Any, b: Any) -> Any:
+    """Matrix multiplication via the active backend."""
+    return current().matmul(a, b)
+
+
+def sum(a: Any, axis: int | None = None) -> Any:
+    """Sum elements of ``a`` via the active backend."""
+    return current().sum(a, axis=axis)
+
+
+def grad(fn: Any, wrt: int = 0) -> Any:
+    """Return gradient function via the active backend."""
+    return current().grad(fn, wrt=wrt)
+
+
+def stop(x: Any) -> Any:
+    """Detach ``x`` from the autograd graph via the active backend."""
+    return current().stop(x)
diff --git a/klongpy/backends/__init__.py b/klongpy/backends/__init__.py
@@ -0,0 +1,10 @@
+"""Backend implementations."""
+
+from . import numpy_backend
+
+try:
+    from . import torch_backend  # noqa: F401
+except Exception:  # torch may not be available
+    torch_backend = None
+
+__all__ = ["numpy_backend", "torch_backend"]