Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/affine.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "subexpr.h"
#include "utils/CSR_Matrix.h"

expr *new_linear(expr *u, const CSR_Matrix *A);
expr *new_linear(expr *u, const CSR_Matrix *A, const double *b);

expr *new_add(expr *left, expr *right);
expr *new_neg(expr *child);
Expand Down
3 changes: 2 additions & 1 deletion include/subexpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@ struct int_double_pair;

/* Type-specific expression structures that "inherit" from expr */

/* Linear operator: y = A * x */
/* Linear operator: y = A * x + b */
typedef struct linear_op_expr
{
expr base;
CSC_Matrix *A_csc;
CSR_Matrix *A_csr;
double *b; /* constant offset vector (NULL if no offset) */
} linear_op_expr;

/* Power: y = x^p */
Expand Down
27 changes: 24 additions & 3 deletions python/atoms/linear.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@ static PyObject *py_make_linear(PyObject *self, PyObject *args)
{
PyObject *child_capsule;
PyObject *data_obj, *indices_obj, *indptr_obj;
PyObject *b_obj = Py_None; /* Optional offset vector */
int m, n;
if (!PyArg_ParseTuple(args, "OOOOii", &child_capsule, &data_obj, &indices_obj,
&indptr_obj, &m, &n))

/* Accept optional b array: (child, data, indices, indptr, m, n[, b]) */
if (!PyArg_ParseTuple(args, "OOOOii|O", &child_capsule, &data_obj, &indices_obj,
&indptr_obj, &m, &n, &b_obj))
{
return NULL;
}
Expand Down Expand Up @@ -46,8 +49,26 @@ static PyObject *py_make_linear(PyObject *self, PyObject *args)
Py_DECREF(indices_array);
Py_DECREF(indptr_array);

expr *node = new_linear(child, A);
/* Parse optional b offset vector */
double *b_data = NULL;
PyArrayObject *b_array = NULL;

if (b_obj != Py_None)
{
b_array = (PyArrayObject *) PyArray_FROM_OTF(b_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
if (!b_array)
{
free_csr_matrix(A);
return NULL;
}
b_data = (double *) PyArray_DATA(b_array);
}

expr *node = new_linear(child, A, b_data);

/* Clean up */
free_csr_matrix(A);
Py_XDECREF(b_array);

if (!node)
{
Expand Down
220 changes: 220 additions & 0 deletions python/tests/test_linear_op_offset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
"""Tests for linear_op with constant offset: A @ x + b.

Tests verify that linear_op correctly handles:
1. Forward pass: y = A @ x + b
2. Gradient computation via chain rule
"""

import numpy as np
import dnlp_diff_engine as de


def test_linear_op_with_offset_forward():
"""Test linear_op(x, A, b) computes A @ x + b correctly in forward pass.

Note: linear_op is an internal expression type and must be wrapped in
another expression (like log) to be used as an objective.
"""
n_vars = 2
x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1)

# A @ x + b where A = [[1, 1]], b = [5]
# Should compute x[0] + x[1] + 5
A_data = np.array([1.0, 1.0])
A_indices = np.array([0, 1], dtype=np.int32)
A_indptr = np.array([0, 2], dtype=np.int32)
b = np.array([5.0])

linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b)
# Wrap in log to create a valid objective
log_expr = de.make_log(linear_with_offset)

prob = de.make_problem(log_expr, [])
de.problem_init_derivatives(prob)

# Test at u = [2.0, 3.0]
u = np.array([2.0, 3.0])

# Forward: log(2 + 3 + 5) = log(10)
obj = de.problem_objective_forward(prob, u)
assert np.isclose(obj, np.log(10.0)), f"Expected log(10), got {obj}"


def test_linear_op_with_offset_gradient():
"""Test gradient of log(A @ x + b)."""
n_vars = 2
x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1)

# A @ x + b where A = [[1, 1]], b = [5]
# log(x[0] + x[1] + 5)
A_data = np.array([1.0, 1.0])
A_indices = np.array([0, 1], dtype=np.int32)
A_indptr = np.array([0, 2], dtype=np.int32)
b = np.array([5.0])

linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b)
log_expr = de.make_log(linear_with_offset)

prob = de.make_problem(log_expr, [])
de.problem_init_derivatives(prob)

# Test at u = [2.0, 3.0]
u = np.array([2.0, 3.0])

# Forward: log(2 + 3 + 5) = log(10)
obj = de.problem_objective_forward(prob, u)
assert np.isclose(obj, np.log(10.0))

# Gradient: d/dx log(x+y+5) = 1/(x+y+5) for both
grad = de.problem_gradient(prob)
expected = 1.0 / 10.0
np.testing.assert_allclose(grad, [expected, expected], rtol=1e-5)


def test_linear_op_without_offset():
"""Test linear_op(x, A) still works (no b parameter)."""
n_vars = 2
x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1)

# A @ x where A = [[2, 3]]
A_data = np.array([2.0, 3.0])
A_indices = np.array([0, 1], dtype=np.int32)
A_indptr = np.array([0, 2], dtype=np.int32)

# No b parameter - pass None explicitly
linear_no_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, None)
log_expr = de.make_log(linear_no_offset)

prob = de.make_problem(log_expr, [])
de.problem_init_derivatives(prob)

u = np.array([1.0, 1.0])
obj = de.problem_objective_forward(prob, u)
assert np.isclose(obj, np.log(5.0)) # log(2*1 + 3*1)

grad = de.problem_gradient(prob)
# d/dx log(2x + 3y) = [2, 3] / (2x + 3y) = [2/5, 3/5]
np.testing.assert_allclose(grad, [2.0/5.0, 3.0/5.0], rtol=1e-5)


def test_linear_op_with_offset_hessian():
"""Test Hessian of log(A @ x + b)."""
n_vars = 2
x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1)

# log(x[0] + x[1] + 5)
A_data = np.array([1.0, 1.0])
A_indices = np.array([0, 1], dtype=np.int32)
A_indptr = np.array([0, 2], dtype=np.int32)
b = np.array([5.0])

linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b)
log_expr = de.make_log(linear_with_offset)

prob = de.make_problem(log_expr, [])
de.problem_init_derivatives(prob)

# Test at u = [2.0, 3.0]
u = np.array([2.0, 3.0])
de.problem_objective_forward(prob, u)

# Hessian: d²/dx² log(x+y+5) = -1/(x+y+5)² for all entries
obj_factor = 1.0
hess_data, hess_indices, hess_indptr, hess_shape = de.problem_hessian(
prob, obj_factor, np.array([])
)

# The Hessian should be [[h, h], [h, h]] where h = -1/100
expected_h = -1.0 / 100.0

# Check that entries are correct
assert len(hess_data) >= 3, f"Expected at least 3 Hessian entries, got {len(hess_data)}"

# Check values
for val in hess_data:
np.testing.assert_allclose(val, expected_h, rtol=1e-5)


def test_linear_op_vector_with_offset():
"""Test linear_op with vector output: y = A @ x + b where y is a vector."""
n_vars = 3
x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1)

# A is 2x3, b is 2-vector
# A = [[1, 2, 0], [0, 1, 3]]
# b = [1, 2]
# y[0] = x[0] + 2*x[1] + 1
# y[1] = x[1] + 3*x[2] + 2
A_data = np.array([1.0, 2.0, 1.0, 3.0])
A_indices = np.array([0, 1, 1, 2], dtype=np.int32)
A_indptr = np.array([0, 2, 4], dtype=np.int32)
b = np.array([1.0, 2.0])

linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 2, 3, b)

# sum(log(A @ x + b))
log_expr = de.make_log(linear_with_offset)
sum_expr = de.make_sum(log_expr, -1)

prob = de.make_problem(sum_expr, [])
de.problem_init_derivatives(prob)

# Test at u = [1, 1, 1]
u = np.array([1.0, 1.0, 1.0])

# y[0] = 1 + 2 + 1 = 4
# y[1] = 1 + 3 + 2 = 6
# sum(log(y)) = log(4) + log(6)
obj = de.problem_objective_forward(prob, u)
expected_obj = np.log(4.0) + np.log(6.0)
np.testing.assert_allclose(obj, expected_obj, rtol=1e-5)

# Gradient:
# d/dx[0] = 1/y[0] * 1 = 1/4
# d/dx[1] = 1/y[0] * 2 + 1/y[1] * 1 = 2/4 + 1/6 = 0.5 + 0.1667
# d/dx[2] = 1/y[1] * 3 = 3/6 = 0.5
grad = de.problem_gradient(prob)
expected_grad = np.array([1.0/4.0, 2.0/4.0 + 1.0/6.0, 3.0/6.0])
np.testing.assert_allclose(grad, expected_grad, rtol=1e-5)


def test_linear_op_sparse_matrix_with_offset():
"""Test linear_op with sparse matrix and offset."""
n_vars = 5
x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1)

# A is 3x5 sparse matrix (only some entries non-zero)
# A = [[1, 0, 2, 0, 0],
# [0, 3, 0, 4, 0],
# [0, 0, 0, 0, 5]]
# b = [10, 20, 30]
A_data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
A_indices = np.array([0, 2, 1, 3, 4], dtype=np.int32)
A_indptr = np.array([0, 2, 4, 5], dtype=np.int32)
b = np.array([10.0, 20.0, 30.0])

linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 3, 5, b)
log_expr = de.make_log(linear_with_offset)
sum_expr = de.make_sum(log_expr, -1)

prob = de.make_problem(sum_expr, [])
de.problem_init_derivatives(prob)

# Test at u = [1, 1, 1, 1, 1]
u = np.ones(5)

# y[0] = 1*1 + 2*1 + 10 = 13
# y[1] = 3*1 + 4*1 + 20 = 27
# y[2] = 5*1 + 30 = 35
obj = de.problem_objective_forward(prob, u)
expected_obj = np.log(13.0) + np.log(27.0) + np.log(35.0)
np.testing.assert_allclose(obj, expected_obj, rtol=1e-5)

grad = de.problem_gradient(prob)
# d/dx[0] = 1/13
# d/dx[1] = 3/27
# d/dx[2] = 2/13
# d/dx[3] = 4/27
# d/dx[4] = 5/35
expected_grad = np.array([1.0/13.0, 3.0/27.0, 2.0/13.0, 4.0/27.0, 5.0/35.0])
np.testing.assert_allclose(grad, expected_grad, rtol=1e-5)
33 changes: 31 additions & 2 deletions src/affine/linear_op.c
Original file line number Diff line number Diff line change
@@ -1,16 +1,27 @@
#include "affine.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>

static void forward(expr *node, const double *u)
{
expr *x = node->left;
linear_op_expr *lin_node = (linear_op_expr *) node;

/* child's forward pass */
node->left->forward(node->left, u);

/* y = A * x */
csr_matvec(((linear_op_expr *) node)->A_csr, x->value, node->value, x->var_id);
csr_matvec(lin_node->A_csr, x->value, node->value, x->var_id);

/* y += b (if offset exists) */
if (lin_node->b != NULL)
{
for (int i = 0; i < node->size; i++)
{
node->value[i] += lin_node->b[i];
}
}
}

static bool is_affine(const expr *node)
Expand All @@ -30,6 +41,13 @@ static void free_type_data(expr *node)
}

free_csc_matrix(lin_node->A_csc);

if (lin_node->b != NULL)
{
free(lin_node->b);
lin_node->b = NULL;
}

lin_node->A_csr = NULL;
lin_node->A_csc = NULL;
}
Expand All @@ -39,7 +57,7 @@ static void jacobian_init(expr *node)
node->jacobian = ((linear_op_expr *) node)->A_csr;
}

expr *new_linear(expr *u, const CSR_Matrix *A)
expr *new_linear(expr *u, const CSR_Matrix *A, const double *b)
{
assert(u->d2 == 1);
/* Allocate the type-specific struct */
Expand All @@ -55,5 +73,16 @@ expr *new_linear(expr *u, const CSR_Matrix *A)
copy_csr_matrix(A, lin_node->A_csr);
lin_node->A_csc = csr_to_csc(A);

/* Initialize offset (copy b if provided, otherwise NULL) */
if (b != NULL)
{
lin_node->b = (double *) malloc(A->m * sizeof(double));
memcpy(lin_node->b, b, A->m * sizeof(double));
}
else
{
lin_node->b = NULL;
}

return node;
}
2 changes: 1 addition & 1 deletion tests/forward_pass/affine/test_linear_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const char *test_linear_op()
memcpy(A->p, Ap, 4 * sizeof(int));

expr *var = new_variable(3, 1, 2, 6);
expr *linear_node = new_linear(var, A);
expr *linear_node = new_linear(var, A, NULL);
double x[6] = {0, 0, 1, 2, 3, 0};
linear_node->forward(linear_node, x);

Expand Down
6 changes: 3 additions & 3 deletions tests/jacobian_tests/test_composite.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ const char *test_jacobian_composite_log()
memcpy(A->p, Ap, 3 * sizeof(int));

expr *u = new_variable(3, 1, 2, 6);
expr *Au = new_linear(u, A);
expr *Au = new_linear(u, A, NULL);
expr *log_node = new_log(Au);
log_node->forward(log_node, u_vals);
log_node->jacobian_init(log_node);
Expand Down Expand Up @@ -69,8 +69,8 @@ const char *test_jacobian_composite_log_add()

expr *x = new_variable(3, 1, 2, 7);
expr *y = new_variable(2, 1, 5, 7);
expr *Ax_expr = new_linear(x, A);
expr *By_expr = new_linear(y, B);
expr *Ax_expr = new_linear(x, A, NULL);
expr *By_expr = new_linear(y, B, NULL);
expr *log_Ax = new_log(Ax_expr);
expr *log_By = new_log(By_expr);
expr *sum = new_add(log_Ax, log_By);
Expand Down
Loading
Loading