diff --git a/include/affine.h b/include/affine.h index a4c65d7..5905e61 100644 --- a/include/affine.h +++ b/include/affine.h @@ -5,7 +5,7 @@ #include "subexpr.h" #include "utils/CSR_Matrix.h" -expr *new_linear(expr *u, const CSR_Matrix *A); +expr *new_linear(expr *u, const CSR_Matrix *A, const double *b); expr *new_add(expr *left, expr *right); expr *new_neg(expr *child); diff --git a/include/subexpr.h b/include/subexpr.h index d8e56a5..aef2be5 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -10,12 +10,13 @@ struct int_double_pair; /* Type-specific expression structures that "inherit" from expr */ -/* Linear operator: y = A * x */ +/* Linear operator: y = A * x + b */ typedef struct linear_op_expr { expr base; CSC_Matrix *A_csc; CSR_Matrix *A_csr; + double *b; /* constant offset vector (NULL if no offset) */ } linear_op_expr; /* Power: y = x^p */ diff --git a/python/atoms/linear.h b/python/atoms/linear.h index c7afcae..041e69e 100644 --- a/python/atoms/linear.h +++ b/python/atoms/linear.h @@ -7,9 +7,12 @@ static PyObject *py_make_linear(PyObject *self, PyObject *args) { PyObject *child_capsule; PyObject *data_obj, *indices_obj, *indptr_obj; + PyObject *b_obj = Py_None; /* Optional offset vector */ int m, n; - if (!PyArg_ParseTuple(args, "OOOOii", &child_capsule, &data_obj, &indices_obj, - &indptr_obj, &m, &n)) + + /* Accept optional b array: (child, data, indices, indptr, m, n[, b]) */ + if (!PyArg_ParseTuple(args, "OOOOii|O", &child_capsule, &data_obj, &indices_obj, + &indptr_obj, &m, &n, &b_obj)) { return NULL; } @@ -46,8 +49,26 @@ static PyObject *py_make_linear(PyObject *self, PyObject *args) Py_DECREF(indices_array); Py_DECREF(indptr_array); - expr *node = new_linear(child, A); + /* Parse optional b offset vector */ + double *b_data = NULL; + PyArrayObject *b_array = NULL; + + if (b_obj != Py_None) + { + b_array = (PyArrayObject *) PyArray_FROM_OTF(b_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); + if (!b_array) + { + free_csr_matrix(A); + return NULL; + } + b_data = (double *) PyArray_DATA(b_array); + } + + expr *node = new_linear(child, A, b_data); + + /* Clean up */ free_csr_matrix(A); + Py_XDECREF(b_array); if (!node) { diff --git a/python/tests/test_linear_op_offset.py b/python/tests/test_linear_op_offset.py new file mode 100644 index 0000000..025af5d --- /dev/null +++ b/python/tests/test_linear_op_offset.py @@ -0,0 +1,220 @@ +"""Tests for linear_op with constant offset: A @ x + b. + +Tests verify that linear_op correctly handles: +1. Forward pass: y = A @ x + b +2. Gradient computation via chain rule +""" + +import numpy as np +import dnlp_diff_engine as de + + +def test_linear_op_with_offset_forward(): + """Test linear_op(x, A, b) computes A @ x + b correctly in forward pass. + + Note: linear_op is an internal expression type and must be wrapped in + another expression (like log) to be used as an objective. + """ + n_vars = 2 + x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1) + + # A @ x + b where A = [[1, 1]], b = [5] + # Should compute x[0] + x[1] + 5 + A_data = np.array([1.0, 1.0]) + A_indices = np.array([0, 1], dtype=np.int32) + A_indptr = np.array([0, 2], dtype=np.int32) + b = np.array([5.0]) + + linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b) + # Wrap in log to create a valid objective + log_expr = de.make_log(linear_with_offset) + + prob = de.make_problem(log_expr, []) + de.problem_init_derivatives(prob) + + # Test at u = [2.0, 3.0] + u = np.array([2.0, 3.0]) + + # Forward: log(2 + 3 + 5) = log(10) + obj = de.problem_objective_forward(prob, u) + assert np.isclose(obj, np.log(10.0)), f"Expected log(10), got {obj}" + + +def test_linear_op_with_offset_gradient(): + """Test gradient of log(A @ x + b).""" + n_vars = 2 + x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1) + + # A @ x + b where A = [[1, 1]], b = [5] + # log(x[0] + x[1] + 5) + A_data = np.array([1.0, 1.0]) + A_indices = np.array([0, 1], dtype=np.int32) + A_indptr = np.array([0, 2], dtype=np.int32) + b = np.array([5.0]) + + linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b) + log_expr = de.make_log(linear_with_offset) + + prob = de.make_problem(log_expr, []) + de.problem_init_derivatives(prob) + + # Test at u = [2.0, 3.0] + u = np.array([2.0, 3.0]) + + # Forward: log(2 + 3 + 5) = log(10) + obj = de.problem_objective_forward(prob, u) + assert np.isclose(obj, np.log(10.0)) + + # Gradient: d/dx log(x+y+5) = 1/(x+y+5) for both + grad = de.problem_gradient(prob) + expected = 1.0 / 10.0 + np.testing.assert_allclose(grad, [expected, expected], rtol=1e-5) + + +def test_linear_op_without_offset(): + """Test linear_op(x, A) still works (no b parameter).""" + n_vars = 2 + x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1) + + # A @ x where A = [[2, 3]] + A_data = np.array([2.0, 3.0]) + A_indices = np.array([0, 1], dtype=np.int32) + A_indptr = np.array([0, 2], dtype=np.int32) + + # No b parameter - pass None explicitly + linear_no_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, None) + log_expr = de.make_log(linear_no_offset) + + prob = de.make_problem(log_expr, []) + de.problem_init_derivatives(prob) + + u = np.array([1.0, 1.0]) + obj = de.problem_objective_forward(prob, u) + assert np.isclose(obj, np.log(5.0)) # log(2*1 + 3*1) + + grad = de.problem_gradient(prob) + # d/dx log(2x + 3y) = [2, 3] / (2x + 3y) = [2/5, 3/5] + np.testing.assert_allclose(grad, [2.0/5.0, 3.0/5.0], rtol=1e-5) + + +def test_linear_op_with_offset_hessian(): + """Test Hessian of log(A @ x + b).""" + n_vars = 2 + x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1) + + # log(x[0] + x[1] + 5) + A_data = np.array([1.0, 1.0]) + A_indices = np.array([0, 1], dtype=np.int32) + A_indptr = np.array([0, 2], dtype=np.int32) + b = np.array([5.0]) + + linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b) + log_expr = de.make_log(linear_with_offset) + + prob = de.make_problem(log_expr, []) + de.problem_init_derivatives(prob) + + # Test at u = [2.0, 3.0] + u = np.array([2.0, 3.0]) + de.problem_objective_forward(prob, u) + + # Hessian: d²/dx² log(x+y+5) = -1/(x+y+5)² for all entries + obj_factor = 1.0 + hess_data, hess_indices, hess_indptr, hess_shape = de.problem_hessian( + prob, obj_factor, np.array([]) + ) + + # The Hessian should be [[h, h], [h, h]] where h = -1/100 + expected_h = -1.0 / 100.0 + + # Check that entries are correct + assert len(hess_data) >= 3, f"Expected at least 3 Hessian entries, got {len(hess_data)}" + + # Check values + for val in hess_data: + np.testing.assert_allclose(val, expected_h, rtol=1e-5) + + +def test_linear_op_vector_with_offset(): + """Test linear_op with vector output: y = A @ x + b where y is a vector.""" + n_vars = 3 + x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1) + + # A is 2x3, b is 2-vector + # A = [[1, 2, 0], [0, 1, 3]] + # b = [1, 2] + # y[0] = x[0] + 2*x[1] + 1 + # y[1] = x[1] + 3*x[2] + 2 + A_data = np.array([1.0, 2.0, 1.0, 3.0]) + A_indices = np.array([0, 1, 1, 2], dtype=np.int32) + A_indptr = np.array([0, 2, 4], dtype=np.int32) + b = np.array([1.0, 2.0]) + + linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 2, 3, b) + + # sum(log(A @ x + b)) + log_expr = de.make_log(linear_with_offset) + sum_expr = de.make_sum(log_expr, -1) + + prob = de.make_problem(sum_expr, []) + de.problem_init_derivatives(prob) + + # Test at u = [1, 1, 1] + u = np.array([1.0, 1.0, 1.0]) + + # y[0] = 1 + 2 + 1 = 4 + # y[1] = 1 + 3 + 2 = 6 + # sum(log(y)) = log(4) + log(6) + obj = de.problem_objective_forward(prob, u) + expected_obj = np.log(4.0) + np.log(6.0) + np.testing.assert_allclose(obj, expected_obj, rtol=1e-5) + + # Gradient: + # d/dx[0] = 1/y[0] * 1 = 1/4 + # d/dx[1] = 1/y[0] * 2 + 1/y[1] * 1 = 2/4 + 1/6 = 0.5 + 0.1667 + # d/dx[2] = 1/y[1] * 3 = 3/6 = 0.5 + grad = de.problem_gradient(prob) + expected_grad = np.array([1.0/4.0, 2.0/4.0 + 1.0/6.0, 3.0/6.0]) + np.testing.assert_allclose(grad, expected_grad, rtol=1e-5) + + +def test_linear_op_sparse_matrix_with_offset(): + """Test linear_op with sparse matrix and offset.""" + n_vars = 5 + x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1) + + # A is 3x5 sparse matrix (only some entries non-zero) + # A = [[1, 0, 2, 0, 0], + # [0, 3, 0, 4, 0], + # [0, 0, 0, 0, 5]] + # b = [10, 20, 30] + A_data = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + A_indices = np.array([0, 2, 1, 3, 4], dtype=np.int32) + A_indptr = np.array([0, 2, 4, 5], dtype=np.int32) + b = np.array([10.0, 20.0, 30.0]) + + linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 3, 5, b) + log_expr = de.make_log(linear_with_offset) + sum_expr = de.make_sum(log_expr, -1) + + prob = de.make_problem(sum_expr, []) + de.problem_init_derivatives(prob) + + # Test at u = [1, 1, 1, 1, 1] + u = np.ones(5) + + # y[0] = 1*1 + 2*1 + 10 = 13 + # y[1] = 3*1 + 4*1 + 20 = 27 + # y[2] = 5*1 + 30 = 35 + obj = de.problem_objective_forward(prob, u) + expected_obj = np.log(13.0) + np.log(27.0) + np.log(35.0) + np.testing.assert_allclose(obj, expected_obj, rtol=1e-5) + + grad = de.problem_gradient(prob) + # d/dx[0] = 1/13 + # d/dx[1] = 3/27 + # d/dx[2] = 2/13 + # d/dx[3] = 4/27 + # d/dx[4] = 5/35 + expected_grad = np.array([1.0/13.0, 3.0/27.0, 2.0/13.0, 4.0/27.0, 5.0/35.0]) + np.testing.assert_allclose(grad, expected_grad, rtol=1e-5) diff --git a/src/affine/linear_op.c b/src/affine/linear_op.c index 69ab9f6..0078f2e 100644 --- a/src/affine/linear_op.c +++ b/src/affine/linear_op.c @@ -1,16 +1,27 @@ #include "affine.h" #include #include +#include static void forward(expr *node, const double *u) { expr *x = node->left; + linear_op_expr *lin_node = (linear_op_expr *) node; /* child's forward pass */ node->left->forward(node->left, u); /* y = A * x */ - csr_matvec(((linear_op_expr *) node)->A_csr, x->value, node->value, x->var_id); + csr_matvec(lin_node->A_csr, x->value, node->value, x->var_id); + + /* y += b (if offset exists) */ + if (lin_node->b != NULL) + { + for (int i = 0; i < node->size; i++) + { + node->value[i] += lin_node->b[i]; + } + } } static bool is_affine(const expr *node) @@ -30,6 +41,13 @@ static void free_type_data(expr *node) } free_csc_matrix(lin_node->A_csc); + + if (lin_node->b != NULL) + { + free(lin_node->b); + lin_node->b = NULL; + } + lin_node->A_csr = NULL; lin_node->A_csc = NULL; } @@ -39,7 +57,7 @@ static void jacobian_init(expr *node) node->jacobian = ((linear_op_expr *) node)->A_csr; } -expr *new_linear(expr *u, const CSR_Matrix *A) +expr *new_linear(expr *u, const CSR_Matrix *A, const double *b) { assert(u->d2 == 1); /* Allocate the type-specific struct */ @@ -55,5 +73,16 @@ expr *new_linear(expr *u, const CSR_Matrix *A) copy_csr_matrix(A, lin_node->A_csr); lin_node->A_csc = csr_to_csc(A); + /* Initialize offset (copy b if provided, otherwise NULL) */ + if (b != NULL) + { + lin_node->b = (double *) malloc(A->m * sizeof(double)); + memcpy(lin_node->b, b, A->m * sizeof(double)); + } + else + { + lin_node->b = NULL; + } + return node; } diff --git a/tests/forward_pass/affine/test_linear_op.h b/tests/forward_pass/affine/test_linear_op.h index 72e06d4..6034ea8 100644 --- a/tests/forward_pass/affine/test_linear_op.h +++ b/tests/forward_pass/affine/test_linear_op.h @@ -24,7 +24,7 @@ const char *test_linear_op() memcpy(A->p, Ap, 4 * sizeof(int)); expr *var = new_variable(3, 1, 2, 6); - expr *linear_node = new_linear(var, A); + expr *linear_node = new_linear(var, A, NULL); double x[6] = {0, 0, 1, 2, 3, 0}; linear_node->forward(linear_node, x); diff --git a/tests/jacobian_tests/test_composite.h b/tests/jacobian_tests/test_composite.h index b35923a..0a52cc9 100644 --- a/tests/jacobian_tests/test_composite.h +++ b/tests/jacobian_tests/test_composite.h @@ -18,7 +18,7 @@ const char *test_jacobian_composite_log() memcpy(A->p, Ap, 3 * sizeof(int)); expr *u = new_variable(3, 1, 2, 6); - expr *Au = new_linear(u, A); + expr *Au = new_linear(u, A, NULL); expr *log_node = new_log(Au); log_node->forward(log_node, u_vals); log_node->jacobian_init(log_node); @@ -69,8 +69,8 @@ const char *test_jacobian_composite_log_add() expr *x = new_variable(3, 1, 2, 7); expr *y = new_variable(2, 1, 5, 7); - expr *Ax_expr = new_linear(x, A); - expr *By_expr = new_linear(y, B); + expr *Ax_expr = new_linear(x, A, NULL); + expr *By_expr = new_linear(y, B, NULL); expr *log_Ax = new_log(Ax_expr); expr *log_By = new_log(By_expr); expr *sum = new_add(log_Ax, log_By); diff --git a/tests/jacobian_tests/test_elementwise_mult.h b/tests/jacobian_tests/test_elementwise_mult.h index 3c5644e..5f10bf3 100644 --- a/tests/jacobian_tests/test_elementwise_mult.h +++ b/tests/jacobian_tests/test_elementwise_mult.h @@ -91,8 +91,8 @@ const char *test_jacobian_elementwise_mult_3() double u_vals[10] = {0, 0, 1.0, 2.0, 3.0, 0, 0, 4.0, 5.0, 6.0}; expr *x = new_variable(3, 1, 2, 10); expr *y = new_variable(3, 1, 7, 10); - expr *Ax = new_linear(x, A); - expr *By = new_linear(y, B); + expr *Ax = new_linear(x, A, NULL); + expr *By = new_linear(y, B, NULL); expr *node = new_elementwise_mult(Ax, By); node->forward(node, u_vals); @@ -141,7 +141,7 @@ const char *test_jacobian_elementwise_mult_4() double u_vals[10] = {0, 0, 1.0, 2.0, 3.0, 0, 0, 4.0, 5.0, 6.0}; expr *x = new_variable(3, 1, 2, 10); - expr *Ax = new_linear(x, A); + expr *Ax = new_linear(x, A, NULL); expr *node = new_elementwise_mult(Ax, Ax); node->forward(node, u_vals); diff --git a/tests/jacobian_tests/test_left_matmul.h b/tests/jacobian_tests/test_left_matmul.h index 09557ca..f79eee7 100644 --- a/tests/jacobian_tests/test_left_matmul.h +++ b/tests/jacobian_tests/test_left_matmul.h @@ -154,7 +154,7 @@ const char *test_jacobian_left_matmul_log_composite() memcpy(A->i, A_i, 7 * sizeof(int)); memcpy(A->x, A_x, 7 * sizeof(double)); - expr *Bx = new_linear(x, B); + expr *Bx = new_linear(x, B, NULL); expr *log_Bx = new_log(Bx); expr *A_log_Bx = new_left_matmul(log_Bx, A); diff --git a/tests/jacobian_tests/test_quad_form.h b/tests/jacobian_tests/test_quad_form.h index cea6176..48345ef 100644 --- a/tests/jacobian_tests/test_quad_form.h +++ b/tests/jacobian_tests/test_quad_form.h @@ -67,7 +67,7 @@ int Ap[4] = {0, 4, 7, 10}; memcpy(A->x, Ax, 10 * sizeof(double)); memcpy(A->i, Ai, 10 * sizeof(int)); memcpy(A->p, Ap, 4 * sizeof(int)); -expr *Au = new_linear(u, A); +expr *Au = new_linear(u, A, NULL); expr *node = new_quad_form(Au, Q); node->jacobian_init(node); diff --git a/tests/jacobian_tests/test_quad_over_lin.h b/tests/jacobian_tests/test_quad_over_lin.h index 3360846..24a24fa 100644 --- a/tests/jacobian_tests/test_quad_over_lin.h +++ b/tests/jacobian_tests/test_quad_over_lin.h @@ -74,7 +74,7 @@ const char *test_quad_over_lin3() // Create variables with global indices expr *x = new_variable(3, 1, 2, 8); expr *y = new_variable(1, 1, 7, 8); - expr *Ax_expr = new_linear(x, A); + expr *Ax_expr = new_linear(x, A, NULL); expr *node = new_quad_over_lin(Ax_expr, y); double u_vals[8] = {0, 0, 1.0, 2.0, 3.0, 0, 0, 4.0}; @@ -114,7 +114,7 @@ const char *test_quad_over_lin4() // Create variables with global indices expr *x = new_variable(3, 1, 5, 8); expr *y = new_variable(1, 1, 2, 8); - expr *Ax_expr = new_linear(x, A); + expr *Ax_expr = new_linear(x, A, NULL); expr *node = new_quad_over_lin(Ax_expr, y); double u_vals[8] = {0, 0, 4, 0, 0, 1.0, 2.0, 3.0}; @@ -154,7 +154,7 @@ const char *test_quad_over_lin5() // Create variables with global indices expr *u = new_variable(8, 1, 0, 8); expr *y = new_variable(1, 1, 2, 8); - expr *Ax_expr = new_linear(u, A); + expr *Ax_expr = new_linear(u, A, NULL); expr *node = new_quad_over_lin(Ax_expr, y); double u_vals[8] = {1, 2, 4, 3, 2, 1.0, 2.0, 3.0}; diff --git a/tests/wsum_hess/elementwise/test_log.h b/tests/wsum_hess/elementwise/test_log.h index 61b78c4..5df089a 100644 --- a/tests/wsum_hess/elementwise/test_log.h +++ b/tests/wsum_hess/elementwise/test_log.h @@ -61,7 +61,7 @@ const char *test_wsum_hess_log_composite() memcpy(A_csr->p, Ap, 4 * sizeof(int)); expr *x = new_variable(5, 1, 0, 5); - expr *Ax_node = new_linear(x, A_csr); + expr *Ax_node = new_linear(x, A_csr, NULL); expr *log_node = new_log(Ax_node); log_node->forward(log_node, u_vals); log_node->jacobian_init(log_node); diff --git a/tests/wsum_hess/test_left_matmul.h b/tests/wsum_hess/test_left_matmul.h index b1f0923..28d1cec 100644 --- a/tests/wsum_hess/test_left_matmul.h +++ b/tests/wsum_hess/test_left_matmul.h @@ -159,7 +159,7 @@ const char *test_wsum_hess_left_matmul_composite() memcpy(A->i, A_i, 7 * sizeof(int)); memcpy(A->x, A_x, 7 * sizeof(double)); - expr *Bx = new_linear(x, B); + expr *Bx = new_linear(x, B, NULL); expr *log_Bx = new_log(Bx); expr *A_log_Bx = new_left_matmul(log_Bx, A); diff --git a/tests/wsum_hess/test_multiply.h b/tests/wsum_hess/test_multiply.h index b74c426..ed95c67 100644 --- a/tests/wsum_hess/test_multiply.h +++ b/tests/wsum_hess/test_multiply.h @@ -68,8 +68,8 @@ const char *test_wsum_hess_multiply_sparse_random() /* Create variable and linear operators */ expr *x = new_variable(10, 1, 0, 10); - expr *Ax_node = new_linear(x, A); - expr *Bx_node = new_linear(x, B); + expr *Ax_node = new_linear(x, A, NULL); + expr *Bx_node = new_linear(x, B, NULL); /* Create multiply node */ expr *mult_node = new_elementwise_mult(Ax_node, Bx_node); @@ -149,8 +149,8 @@ const char *test_wsum_hess_multiply_linear_ops() /* Create linear operator expressions */ expr *x = new_variable(3, 1, 0, 3); - expr *Ax_node = new_linear(x, A); - expr *Bx_node = new_linear(x, B); + expr *Ax_node = new_linear(x, A, NULL); + expr *Bx_node = new_linear(x, B, NULL); /* Create elementwise multiply node */ expr *mult_node = new_elementwise_mult(Ax_node, Bx_node); diff --git a/tests/wsum_hess/test_sum.h b/tests/wsum_hess/test_sum.h index 20387c6..e6aff23 100644 --- a/tests/wsum_hess/test_sum.h +++ b/tests/wsum_hess/test_sum.h @@ -22,7 +22,7 @@ const char *test_wsum_hess_sum_log_linear() double w = 1.5; expr *x = new_variable(2, 1, 0, 2); - expr *Ax_node = new_linear(x, A); + expr *Ax_node = new_linear(x, A, NULL); expr *log_node = new_log(Ax_node); expr *sum_node = new_sum(log_node, -1);