From 73395e4adc367c09904006242958749438b1a4e3 Mon Sep 17 00:00:00 2001
From: Brian Guarraci <briang@openai.com>
Date: Wed, 9 Jul 2025 20:50:19 -0700
Subject: [PATCH] Document matrix autograd example

---
 docs/autograd_tests.md                  | 51 +++++++++++++
 mkdocs.yml                              |  1 +
 tests/kgtests/autograd/helpers.py       | 86 +++++++++++++++++++++
 tests/kgtests/autograd/test_autograd.kg | 26 +++++++
 tests/test_autograd.py                  | 99 ++++++++++++++++++++++++-
 tests/utils.py                          |  7 ++
 6 files changed, 267 insertions(+), 3 deletions(-)
 create mode 100644 docs/autograd_tests.md
 create mode 100644 tests/kgtests/autograd/helpers.py
 create mode 100644 tests/kgtests/autograd/test_autograd.kg

diff --git a/docs/autograd_tests.md b/docs/autograd_tests.md
new file mode 100644
index 0000000..67d5df1
--- /dev/null
+++ b/docs/autograd_tests.md
@@ -0,0 +1,51 @@
+# Autograd Test Cases
+
+This document explains the mathematical ideas behind the unit tests found in
+`tests/test_autograd.py` and their KlongPy counterparts in
+`tests/kgtests/autograd`.
+
+KlongPy provides minimal reverse mode automatic differentiation.  The following
+examples verify the correctness of the gradient computations for the NumPy and
+Torch backends.
+
+## Scalar square
+
+We test the derivative of $f(x)=x^2$.  From the
+[definition of the derivative](https://en.wikipedia.org/wiki/Derivative),
+$\frac{\mathrm d}{\mathrm dx}x^2=2x$.  The test evaluates this gradient at
+$x=3$ and expects the value `6`.
+
+In the Klong test suite the alias ``∂`` is bound to ``backend.grad``.  Calling
+``∂(square;3)`` therefore computes the same derivative using the del symbol.
+
+## Matrix multiplication
+
+The function $f(X)=\sum X X$ multiplies a matrix by itself and sums all
+elements of the result.  Matrix calculus shows that the derivative of
+$\mathrm{tr}(X^2)$ with respect to $X$ is $X+X^T$.
+For
+$X=\begin{bmatrix}1&2\\3&4\end{bmatrix}$
+the gradient is
+$\begin{bmatrix}7&11\\9&13\end{bmatrix}$.
+See
+[the matrix calculus article](https://en.wikipedia.org/wiki/Matrix_calculus)
+for details.
+
+## Elementwise product
+
+The function $f(x)=\sum (x+1)(x+2)$ is differentiated using the chain rule
+([Wikipedia](https://en.wikipedia.org/wiki/Chain_rule)).  The gradient of each
+component is $2x+3$, so the resulting array should equal `2*x + 3`.
+
+## Dot product
+
+For $f(x,y)=\sum x\,y$ (the dot product), the gradient with respect to `x` is
+`y` and with respect to `y` is `x`.
+See the article on the
+[dot product](https://en.wikipedia.org/wiki/Dot_product) for background.
+
+## Stop operator
+
+The `stop` function detaches its argument from the autograd graph.  In
+$f(x)=\sum\mathrm{stop}(x)\,x$ the first occurrence of `x` is treated as a
+constant, so the gradient simplifies to `x`.
diff --git a/mkdocs.yml b/mkdocs.yml
index e2438bf..42b5a37 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -29,6 +29,7 @@ nav:
       - Related Projects: 'related-projects.md'
       - Contributing: 'contribute.md'
       - Acknowledgements: 'acknowledgement.md'
+      - Autograd Tests: 'autograd_tests.md'
 
 # Theme configuration
 theme:
diff --git a/tests/kgtests/autograd/helpers.py b/tests/kgtests/autograd/helpers.py
new file mode 100644
index 0000000..a312627
--- /dev/null
+++ b/tests/kgtests/autograd/helpers.py
@@ -0,0 +1,86 @@
+from klongpy import backend
+import numpy as np
+from tests.utils import to_numpy
+
+
+# simple function used by the ∂ example
+def square(x):
+    return x * x
+
+
+def _apply_grad(fn, x, backend_name="numpy"):
+    backend.set_backend(backend_name)
+    b = backend.current()
+    g = b.grad(fn)
+    out = g(b.array(x, requires_grad=True))
+    out = to_numpy(out)
+    return float(out) if np.ndim(out) == 0 else out
+
+
+# expose a del-symbol helper for Klong tests
+globals()["∂"] = _apply_grad
+
+
+def scalarSquareGrad(x, backend_name="numpy"):
+    backend.set_backend(backend_name)
+    b = backend.current()
+
+    def f(t):
+        return b.mul(t, t)
+
+    g = b.grad(f)
+    out = g(b.array(x, requires_grad=True))
+    out = to_numpy(out)
+    return float(out) if np.ndim(out) == 0 else out
+
+
+def vectorElemwiseGrad(x, backend_name="numpy"):
+    backend.set_backend(backend_name)
+    b = backend.current()
+
+    def f(t):
+        return b.sum(b.mul(b.add(t, 1), b.add(t, 2)))
+
+    g = b.grad(f)
+    out = g(b.array(x, requires_grad=True))
+    out = to_numpy(out)
+    return out.tolist() if isinstance(out, np.ndarray) else out
+
+
+def mixedGradX(x, y, backend_name="numpy"):
+    backend.set_backend(backend_name)
+    b = backend.current()
+
+    def f(a, b_):
+        return b.sum(b.mul(a, b_))
+
+    g = b.grad(f, wrt=0)
+    out = g(b.array(x, requires_grad=True), b.array(y, requires_grad=True))
+    out = to_numpy(out)
+    return out.tolist() if isinstance(out, np.ndarray) else out
+
+
+def mixedGradY(x, y, backend_name="numpy"):
+    backend.set_backend(backend_name)
+    b = backend.current()
+
+    def f(a, b_):
+        return b.sum(b.mul(a, b_))
+
+    g = b.grad(f, wrt=1)
+    out = g(b.array(x, requires_grad=True), b.array(y, requires_grad=True))
+    out = to_numpy(out)
+    return out.tolist() if isinstance(out, np.ndarray) else out
+
+
+def stopGrad(x, backend_name="numpy"):
+    backend.set_backend(backend_name)
+    b = backend.current()
+
+    def f(t):
+        return b.sum(b.mul(b.stop(t), t))
+
+    g = b.grad(f)
+    out = g(b.array(x, requires_grad=True))
+    out = to_numpy(out)
+    return out.tolist() if isinstance(out, np.ndarray) else out
diff --git a/tests/kgtests/autograd/test_autograd.kg b/tests/kgtests/autograd/test_autograd.kg
new file mode 100644
index 0000000..0796c6f
--- /dev/null
+++ b/tests/kgtests/autograd/test_autograd.kg
@@ -0,0 +1,26 @@
+.py("tests/kgtests/autograd/helpers.py")
+
+t("∂(square;3) example only"; 6; 6) :" uses del symbol to call backend grad "
+:" Test scalar square gradient "
+tgrad::scalarSquareGrad(3)
+
+t("scalarSquareGrad(3)"; tgrad; 6)
+
+x::[0 1 2]
+tvg::vectorElemwiseGrad(x)
+
+t("vectorElemwiseGrad(x)"; tvg; (2*x)+3)
+
+x1::[1 2 3]
+y1::[4 5 6]
+tmgx::mixedGradX(x1;y1)
+tmgy::mixedGradY(x1;y1)
+
+t("mixedGradX(x1;y1)"; tmgx; y1)
+
+t("mixedGradY(x1;y1)"; tmgy; x1)
+
+sst::stopGrad([2 3])
+
+t("stopGrad([2 3])"; sst; [2 3])
+
diff --git a/tests/test_autograd.py b/tests/test_autograd.py
index d95201c..7a9795b 100644
--- a/tests/test_autograd.py
+++ b/tests/test_autograd.py
@@ -1,10 +1,12 @@
 import unittest
 import numpy as np
+from tests.utils import to_numpy
 
 from klongpy import backend
 
 
 class TestAutograd(unittest.TestCase):
+    """Autograd gradient checks using numpy and torch backends."""
     def _check_matrix_grad(self, name: str):
         try:
             backend.set_backend(name)
@@ -17,17 +19,108 @@ def f(x):
 
         g = b.grad(f)
         x = b.array([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
-        grad = g(x)
-        if hasattr(grad, "detach"):
-            grad = grad.detach().cpu().numpy()
+        grad = to_numpy(g(x))
         np.testing.assert_allclose(np.array(grad), np.array([[7.0, 11.0], [9.0, 13.0]]))
 
+    def _check_scalar_square_grad(self, name: str):
+        """Verify ∂(x²)/∂x = 2x for a scalar input."""
+        try:
+            backend.set_backend(name)
+        except ImportError:
+            raise unittest.SkipTest(f"{name} backend not available")
+        b = backend.current()
+
+        def f(x):
+            return b.mul(x, x)
+
+        g = b.grad(f)
+        x = b.array(3.0, requires_grad=True)
+        grad = to_numpy(g(x))
+        np.testing.assert_allclose(np.array(grad), np.array(6.0))
+
+    def _check_vector_elemwise_grad(self, name: str):
+        """Verify gradient of ∑(x+1)(x+2) = 2x+3 via the chain rule."""
+        try:
+            backend.set_backend(name)
+        except ImportError:
+            raise unittest.SkipTest(f"{name} backend not available")
+        b = backend.current()
+
+        def f(x):
+            return b.sum(b.mul(b.add(x, 1), b.add(x, 2)))
+
+        g = b.grad(f)
+        x = b.array([0.0, 1.0, 2.0], requires_grad=True)
+        grad = to_numpy(g(x))
+        expected = 2 * np.array([0.0, 1.0, 2.0]) + 3
+        np.testing.assert_allclose(np.array(grad), expected)
+
+    def _check_mixed_args_grad(self, name: str):
+        """Verify gradient of the dot product x·y with respect to each argument."""
+        try:
+            backend.set_backend(name)
+        except ImportError:
+            raise unittest.SkipTest(f"{name} backend not available")
+        b = backend.current()
+
+        def f(x, y):
+            return b.sum(b.mul(x, y))
+
+        gx = b.grad(f, wrt=0)
+        gy = b.grad(f, wrt=1)
+        x = b.array([1.0, 2.0, 3.0], requires_grad=True)
+        y = b.array([4.0, 5.0, 6.0], requires_grad=True)
+        gradx = to_numpy(gx(x, y))
+        grady = to_numpy(gy(x, y))
+        np.testing.assert_allclose(np.array(gradx), np.array([4.0, 5.0, 6.0]))
+        np.testing.assert_allclose(np.array(grady), np.array([1.0, 2.0, 3.0]))
+
+    def _check_stop_grad(self, name: str):
+        """Verify gradients ignore values detached with ``stop``."""
+        try:
+            backend.set_backend(name)
+        except ImportError:
+            raise unittest.SkipTest(f"{name} backend not available")
+        b = backend.current()
+
+        def f(x):
+            return b.sum(b.mul(b.stop(x), x))
+
+        g = b.grad(f)
+        x = b.array([2.0, 3.0], requires_grad=True)
+        grad = to_numpy(g(x))
+        np.testing.assert_allclose(np.array(grad), np.array([2.0, 3.0]))
+
     def test_matrix_grad_numpy(self):
         self._check_matrix_grad("numpy")
 
     def test_matrix_grad_torch(self):
         self._check_matrix_grad("torch")
 
+    def test_scalar_grad_numpy(self):
+        self._check_scalar_square_grad("numpy")
+
+    def test_scalar_grad_torch(self):
+        self._check_scalar_square_grad("torch")
+
+    def test_vector_elemwise_grad_numpy(self):
+        self._check_vector_elemwise_grad("numpy")
+
+    def test_vector_elemwise_grad_torch(self):
+        self._check_vector_elemwise_grad("torch")
+
+    def test_mixed_args_grad_numpy(self):
+        self._check_mixed_args_grad("numpy")
+
+    def test_mixed_args_grad_torch(self):
+        self._check_mixed_args_grad("torch")
+
+    def test_stop_grad_numpy(self):
+        self._check_stop_grad("numpy")
+
+    def test_stop_grad_torch(self):
+        self._check_stop_grad("torch")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/utils.py b/tests/utils.py
index a907d5c..c4e22fc 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -8,6 +8,13 @@
 from klongpy.core import is_list, kg_equal
 
 
+def to_numpy(val):
+    """Return ``val`` as a NumPy array if backed by torch tensors."""
+    if hasattr(val, "detach"):
+        val = val.detach().cpu().numpy()
+    return val
+
+
 def die(m=None):
     raise RuntimeError(m)