SparseDifferentiation · dance858 · Jan 21, 2026 · Jan 20, 2026 · Jan 21, 2026
diff --git a/include/affine.h b/include/affine.h
@@ -5,7 +5,7 @@
 #include "subexpr.h"
 #include "utils/CSR_Matrix.h"
 
-expr *new_linear(expr *u, const CSR_Matrix *A);
+expr *new_linear(expr *u, const CSR_Matrix *A, const double *b);
 
 expr *new_add(expr *left, expr *right);
 expr *new_neg(expr *child);

diff --git a/include/subexpr.h b/include/subexpr.h
@@ -10,12 +10,13 @@ struct int_double_pair;
 
 /* Type-specific expression structures that "inherit" from expr */
 
-/* Linear operator: y = A * x */
+/* Linear operator: y = A * x + b */
 typedef struct linear_op_expr
 {
     expr base;
     CSC_Matrix *A_csc;
     CSR_Matrix *A_csr;
+    double *b; /* constant offset vector (NULL if no offset) */
 } linear_op_expr;
 
 /* Power: y = x^p */

diff --git a/python/atoms/linear.h b/python/atoms/linear.h
@@ -7,9 +7,12 @@ static PyObject *py_make_linear(PyObject *self, PyObject *args)
 {
     PyObject *child_capsule;
     PyObject *data_obj, *indices_obj, *indptr_obj;
+    PyObject *b_obj = Py_None;  /* Optional offset vector */
     int m, n;
-    if (!PyArg_ParseTuple(args, "OOOOii", &child_capsule, &data_obj, &indices_obj,
-                          &indptr_obj, &m, &n))
+
+    /* Accept optional b array: (child, data, indices, indptr, m, n[, b]) */
+    if (!PyArg_ParseTuple(args, "OOOOii|O", &child_capsule, &data_obj, &indices_obj,
+                          &indptr_obj, &m, &n, &b_obj))
     {
         return NULL;
     }
@@ -46,8 +49,26 @@ static PyObject *py_make_linear(PyObject *self, PyObject *args)
     Py_DECREF(indices_array);
     Py_DECREF(indptr_array);
 
-    expr *node = new_linear(child, A);
+    /* Parse optional b offset vector */
+    double *b_data = NULL;
+    PyArrayObject *b_array = NULL;
+
+    if (b_obj != Py_None)
+    {
+        b_array = (PyArrayObject *) PyArray_FROM_OTF(b_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+        if (!b_array)
+        {
+            free_csr_matrix(A);
+            return NULL;
+        }
+        b_data = (double *) PyArray_DATA(b_array);
+    }
+
+    expr *node = new_linear(child, A, b_data);
+
+    /* Clean up */
     free_csr_matrix(A);
+    Py_XDECREF(b_array);
 
     if (!node)
     {

diff --git a/python/tests/test_linear_op_offset.py b/python/tests/test_linear_op_offset.py
@@ -0,0 +1,220 @@
+"""Tests for linear_op with constant offset: A @ x + b.
+
+Tests verify that linear_op correctly handles:
+1. Forward pass: y = A @ x + b
+2. Gradient computation via chain rule
+"""
+
+import numpy as np
+import dnlp_diff_engine as de
+
+
+def test_linear_op_with_offset_forward():
+    """Test linear_op(x, A, b) computes A @ x + b correctly in forward pass.
+
+    Note: linear_op is an internal expression type and must be wrapped in
+    another expression (like log) to be used as an objective.
+    """
+    n_vars = 2
+    x = de.make_variable(n_vars, 1, 0, n_vars)  # Column vector (n_vars x 1)
+
+    # A @ x + b where A = [[1, 1]], b = [5]
+    # Should compute x[0] + x[1] + 5
+    A_data = np.array([1.0, 1.0])
+    A_indices = np.array([0, 1], dtype=np.int32)
+    A_indptr = np.array([0, 2], dtype=np.int32)
+    b = np.array([5.0])
+
+    linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b)
+    # Wrap in log to create a valid objective
+    log_expr = de.make_log(linear_with_offset)
+
+    prob = de.make_problem(log_expr, [])
+    de.problem_init_derivatives(prob)
+
+    # Test at u = [2.0, 3.0]
+    u = np.array([2.0, 3.0])
+
+    # Forward: log(2 + 3 + 5) = log(10)
+    obj = de.problem_objective_forward(prob, u)
+    assert np.isclose(obj, np.log(10.0)), f"Expected log(10), got {obj}"
+
+
+def test_linear_op_with_offset_gradient():
+    """Test gradient of log(A @ x + b)."""
+    n_vars = 2
+    x = de.make_variable(n_vars, 1, 0, n_vars)  # Column vector (n_vars x 1)
+
+    # A @ x + b where A = [[1, 1]], b = [5]
+    # log(x[0] + x[1] + 5)
+    A_data = np.array([1.0, 1.0])
+    A_indices = np.array([0, 1], dtype=np.int32)
+    A_indptr = np.array([0, 2], dtype=np.int32)
+    b = np.array([5.0])
+
+    linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b)
+    log_expr = de.make_log(linear_with_offset)
+
+    prob = de.make_problem(log_expr, [])
+    de.problem_init_derivatives(prob)
+
+    # Test at u = [2.0, 3.0]
+    u = np.array([2.0, 3.0])
+
+    # Forward: log(2 + 3 + 5) = log(10)
+    obj = de.problem_objective_forward(prob, u)
+    assert np.isclose(obj, np.log(10.0))
+
+    # Gradient: d/dx log(x+y+5) = 1/(x+y+5) for both
+    grad = de.problem_gradient(prob)
+    expected = 1.0 / 10.0
+    np.testing.assert_allclose(grad, [expected, expected], rtol=1e-5)
+
+
+def test_linear_op_without_offset():
+    """Test linear_op(x, A) still works (no b parameter)."""
+    n_vars = 2
+    x = de.make_variable(n_vars, 1, 0, n_vars)  # Column vector (n_vars x 1)
+
+    # A @ x where A = [[2, 3]]
+    A_data = np.array([2.0, 3.0])
+    A_indices = np.array([0, 1], dtype=np.int32)
+    A_indptr = np.array([0, 2], dtype=np.int32)
+
+    # No b parameter - pass None explicitly
+    linear_no_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, None)
+    log_expr = de.make_log(linear_no_offset)
+
+    prob = de.make_problem(log_expr, [])
+    de.problem_init_derivatives(prob)
+
+    u = np.array([1.0, 1.0])
+    obj = de.problem_objective_forward(prob, u)
+    assert np.isclose(obj, np.log(5.0))  # log(2*1 + 3*1)
+
+    grad = de.problem_gradient(prob)
+    # d/dx log(2x + 3y) = [2, 3] / (2x + 3y) = [2/5, 3/5]
+    np.testing.assert_allclose(grad, [2.0/5.0, 3.0/5.0], rtol=1e-5)
+
+
+def test_linear_op_with_offset_hessian():
+    """Test Hessian of log(A @ x + b)."""
+    n_vars = 2
+    x = de.make_variable(n_vars, 1, 0, n_vars)  # Column vector (n_vars x 1)
+
+    # log(x[0] + x[1] + 5)
+    A_data = np.array([1.0, 1.0])
+    A_indices = np.array([0, 1], dtype=np.int32)
+    A_indptr = np.array([0, 2], dtype=np.int32)
+    b = np.array([5.0])
+
+    linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b)
+    log_expr = de.make_log(linear_with_offset)
+
+    prob = de.make_problem(log_expr, [])
+    de.problem_init_derivatives(prob)
+
+    # Test at u = [2.0, 3.0]
+    u = np.array([2.0, 3.0])
+    de.problem_objective_forward(prob, u)
+
+    # Hessian: d²/dx² log(x+y+5) = -1/(x+y+5)² for all entries
+    obj_factor = 1.0
+    hess_data, hess_indices, hess_indptr, hess_shape = de.problem_hessian(
+        prob, obj_factor, np.array([])
+    )
+
+    # The Hessian should be [[h, h], [h, h]] where h = -1/100
+    expected_h = -1.0 / 100.0
+
+    # Check that entries are correct
+    assert len(hess_data) >= 3, f"Expected at least 3 Hessian entries, got {len(hess_data)}"
+
+    # Check values
+    for val in hess_data:
+        np.testing.assert_allclose(val, expected_h, rtol=1e-5)
+
+
+def test_linear_op_vector_with_offset():
+    """Test linear_op with vector output: y = A @ x + b where y is a vector."""
+    n_vars = 3
+    x = de.make_variable(n_vars, 1, 0, n_vars)  # Column vector (n_vars x 1)
+
+    # A is 2x3, b is 2-vector
+    # A = [[1, 2, 0], [0, 1, 3]]
+    # b = [1, 2]
+    # y[0] = x[0] + 2*x[1] + 1
+    # y[1] = x[1] + 3*x[2] + 2
+    A_data = np.array([1.0, 2.0, 1.0, 3.0])
+    A_indices = np.array([0, 1, 1, 2], dtype=np.int32)
+    A_indptr = np.array([0, 2, 4], dtype=np.int32)
+    b = np.array([1.0, 2.0])
+
+    linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 2, 3, b)
+
+    # sum(log(A @ x + b))
+    log_expr = de.make_log(linear_with_offset)
+    sum_expr = de.make_sum(log_expr, -1)
+
+    prob = de.make_problem(sum_expr, [])
+    de.problem_init_derivatives(prob)
+
+    # Test at u = [1, 1, 1]
+    u = np.array([1.0, 1.0, 1.0])
+
+    # y[0] = 1 + 2 + 1 = 4
+    # y[1] = 1 + 3 + 2 = 6
+    # sum(log(y)) = log(4) + log(6)
+    obj = de.problem_objective_forward(prob, u)
+    expected_obj = np.log(4.0) + np.log(6.0)
+    np.testing.assert_allclose(obj, expected_obj, rtol=1e-5)
+
+    # Gradient:
+    # d/dx[0] = 1/y[0] * 1 = 1/4
+    # d/dx[1] = 1/y[0] * 2 + 1/y[1] * 1 = 2/4 + 1/6 = 0.5 + 0.1667
+    # d/dx[2] = 1/y[1] * 3 = 3/6 = 0.5
+    grad = de.problem_gradient(prob)
+    expected_grad = np.array([1.0/4.0, 2.0/4.0 + 1.0/6.0, 3.0/6.0])
+    np.testing.assert_allclose(grad, expected_grad, rtol=1e-5)
+
+
+def test_linear_op_sparse_matrix_with_offset():
+    """Test linear_op with sparse matrix and offset."""
+    n_vars = 5
+    x = de.make_variable(n_vars, 1, 0, n_vars)  # Column vector (n_vars x 1)
+
+    # A is 3x5 sparse matrix (only some entries non-zero)
+    # A = [[1, 0, 2, 0, 0],
+    #      [0, 3, 0, 4, 0],
+    #      [0, 0, 0, 0, 5]]
+    # b = [10, 20, 30]
+    A_data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
+    A_indices = np.array([0, 2, 1, 3, 4], dtype=np.int32)
+    A_indptr = np.array([0, 2, 4, 5], dtype=np.int32)
+    b = np.array([10.0, 20.0, 30.0])
+
+    linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 3, 5, b)
+    log_expr = de.make_log(linear_with_offset)
+    sum_expr = de.make_sum(log_expr, -1)
+
+    prob = de.make_problem(sum_expr, [])
+    de.problem_init_derivatives(prob)
+
+    # Test at u = [1, 1, 1, 1, 1]
+    u = np.ones(5)
+
+    # y[0] = 1*1 + 2*1 + 10 = 13
+    # y[1] = 3*1 + 4*1 + 20 = 27
+    # y[2] = 5*1 + 30 = 35
+    obj = de.problem_objective_forward(prob, u)
+    expected_obj = np.log(13.0) + np.log(27.0) + np.log(35.0)
+    np.testing.assert_allclose(obj, expected_obj, rtol=1e-5)
+
+    grad = de.problem_gradient(prob)
+    # d/dx[0] = 1/13
+    # d/dx[1] = 3/27
+    # d/dx[2] = 2/13
+    # d/dx[3] = 4/27
+    # d/dx[4] = 5/35
+    expected_grad = np.array([1.0/13.0, 3.0/27.0, 2.0/13.0, 4.0/27.0, 5.0/35.0])
+    np.testing.assert_allclose(grad, expected_grad, rtol=1e-5)
diff --git a/src/affine/linear_op.c b/src/affine/linear_op.c
@@ -1,16 +1,27 @@
 #include "affine.h"
 #include <assert.h>
 #include <stdlib.h>
+#include <string.h>
 
 static void forward(expr *node, const double *u)
 {
     expr *x = node->left;
+    linear_op_expr *lin_node = (linear_op_expr *) node;
 
     /* child's forward pass */
     node->left->forward(node->left, u);
 
     /* y = A * x */
-    csr_matvec(((linear_op_expr *) node)->A_csr, x->value, node->value, x->var_id);
+    csr_matvec(lin_node->A_csr, x->value, node->value, x->var_id);
+
+    /* y += b (if offset exists) */
+    if (lin_node->b != NULL)
+    {
+        for (int i = 0; i < node->size; i++)
+        {
+            node->value[i] += lin_node->b[i];
+        }
+    }
 }
 
 static bool is_affine(const expr *node)
@@ -30,6 +41,13 @@ static void free_type_data(expr *node)
     }
 
     free_csc_matrix(lin_node->A_csc);
+
+    if (lin_node->b != NULL)
+    {
+        free(lin_node->b);
+        lin_node->b = NULL;
+    }
+
     lin_node->A_csr = NULL;
     lin_node->A_csc = NULL;
 }
@@ -39,7 +57,7 @@ static void jacobian_init(expr *node)
     node->jacobian = ((linear_op_expr *) node)->A_csr;
 }
 
-expr *new_linear(expr *u, const CSR_Matrix *A)
+expr *new_linear(expr *u, const CSR_Matrix *A, const double *b)
 {
     assert(u->d2 == 1);
     /* Allocate the type-specific struct */
@@ -55,5 +73,16 @@ expr *new_linear(expr *u, const CSR_Matrix *A)
     copy_csr_matrix(A, lin_node->A_csr);
     lin_node->A_csc = csr_to_csc(A);
 
+    /* Initialize offset (copy b if provided, otherwise NULL) */
+    if (b != NULL)
+    {
+        lin_node->b = (double *) malloc(A->m * sizeof(double));
+        memcpy(lin_node->b, b, A->m * sizeof(double));
+    }
+    else
+    {
+        lin_node->b = NULL;
+    }
+
     return node;
 }
diff --git a/tests/forward_pass/affine/test_linear_op.h b/tests/forward_pass/affine/test_linear_op.h
@@ -24,7 +24,7 @@ const char *test_linear_op()
     memcpy(A->p, Ap, 4 * sizeof(int));
 
     expr *var = new_variable(3, 1, 2, 6);
-    expr *linear_node = new_linear(var, A);
+    expr *linear_node = new_linear(var, A, NULL);
     double x[6] = {0, 0, 1, 2, 3, 0};
     linear_node->forward(linear_node, x);
 

diff --git a/tests/jacobian_tests/test_composite.h b/tests/jacobian_tests/test_composite.h
@@ -18,7 +18,7 @@ const char *test_jacobian_composite_log()
     memcpy(A->p, Ap, 3 * sizeof(int));
 
     expr *u = new_variable(3, 1, 2, 6);
-    expr *Au = new_linear(u, A);
+    expr *Au = new_linear(u, A, NULL);
     expr *log_node = new_log(Au);
     log_node->forward(log_node, u_vals);
     log_node->jacobian_init(log_node);
@@ -69,8 +69,8 @@ const char *test_jacobian_composite_log_add()
 
     expr *x = new_variable(3, 1, 2, 7);
     expr *y = new_variable(2, 1, 5, 7);
-    expr *Ax_expr = new_linear(x, A);
-    expr *By_expr = new_linear(y, B);
+    expr *Ax_expr = new_linear(x, A, NULL);
+    expr *By_expr = new_linear(y, B, NULL);
     expr *log_Ax = new_log(Ax_expr);
     expr *log_By = new_log(By_expr);
     expr *sum = new_add(log_Ax, log_By);