From b9071a7c25dd0c8289026f18b78e5aeff55f1552 Mon Sep 17 00:00:00 2001 From: William Zijie Zhang Date: Wed, 21 Jan 2026 21:53:18 -0500 Subject: [PATCH 1/2] Add diag_vec atom for creating diagonal matrices from vectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements diag_vec which converts a vector of size n into an n×n diagonal matrix. Includes forward pass, Jacobian, and Hessian computations. Co-Authored-By: Claude Opus 4.5 --- include/affine.h | 1 + python/atoms/diag_vec.h | 32 +++++++ python/bindings.c | 2 + src/affine/diag_vec.c | 140 +++++++++++++++++++++++++++++++ src/dnlp_diff_engine/__init__.py | 1 + 5 files changed, 176 insertions(+) create mode 100644 python/atoms/diag_vec.h create mode 100644 src/affine/diag_vec.c diff --git a/include/affine.h b/include/affine.h index 5905e61..2405009 100644 --- a/include/affine.h +++ b/include/affine.h @@ -21,5 +21,6 @@ expr *new_variable(int d1, int d2, int var_id, int n_vars); expr *new_index(expr *child, int d1, int d2, const int *indices, int n_idxs); expr *new_reshape(expr *child, int d1, int d2); expr *new_broadcast(expr *child, int target_d1, int target_d2); +expr *new_diag_vec(expr *child); #endif /* AFFINE_H */ diff --git a/python/atoms/diag_vec.h b/python/atoms/diag_vec.h new file mode 100644 index 0000000..b1056d6 --- /dev/null +++ b/python/atoms/diag_vec.h @@ -0,0 +1,32 @@ +#ifndef ATOM_DIAG_VEC_H +#define ATOM_DIAG_VEC_H + +#include "common.h" + +static PyObject *py_make_diag_vec(PyObject *self, PyObject *args) +{ + PyObject *child_capsule; + + if (!PyArg_ParseTuple(args, "O", &child_capsule)) + { + return NULL; + } + + expr *child = (expr *) PyCapsule_GetPointer(child_capsule, EXPR_CAPSULE_NAME); + if (!child) + { + return NULL; + } + + expr *node = new_diag_vec(child); + if (!node) + { + PyErr_SetString(PyExc_RuntimeError, "failed to create diag_vec node"); + return NULL; + } + + expr_retain(node); + return PyCapsule_New(node, EXPR_CAPSULE_NAME, expr_capsule_destructor); +} + +#endif /* ATOM_DIAG_VEC_H */ diff --git a/python/bindings.c b/python/bindings.c index 10f263d..6998f0f 100644 --- a/python/bindings.c +++ b/python/bindings.c @@ -11,6 +11,7 @@ #include "atoms/const_vector_mult.h" #include "atoms/constant.h" #include "atoms/cos.h" +#include "atoms/diag_vec.h" #include "atoms/entr.h" #include "atoms/exp.h" #include "atoms/getters.h" @@ -95,6 +96,7 @@ static PyMethodDef DNLPMethods[] = { "Create prod_axis_one node"}, {"make_sin", py_make_sin, METH_VARARGS, "Create sin node"}, {"make_cos", py_make_cos, METH_VARARGS, "Create cos node"}, + {"make_diag_vec", py_make_diag_vec, METH_VARARGS, "Create diag_vec node"}, {"make_tan", py_make_tan, METH_VARARGS, "Create tan node"}, {"make_sinh", py_make_sinh, METH_VARARGS, "Create sinh node"}, {"make_tanh", py_make_tanh, METH_VARARGS, "Create tanh node"}, diff --git a/src/affine/diag_vec.c b/src/affine/diag_vec.c new file mode 100644 index 0000000..6f6a9b2 --- /dev/null +++ b/src/affine/diag_vec.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "affine.h" +#include +#include +#include + +/* diag_vec: converts a vector of size n into an n×n diagonal matrix. + * In Fortran (column-major) order, element i of the input maps to + * position i*(n+1) in the flattened output (the diagonal positions). */ + +static void forward(expr *node, const double *u) +{ + expr *x = node->left; + int n = x->size; + + /* child's forward pass */ + x->forward(x, u); + + /* zero-initialize output */ + memset(node->value, 0, node->size * sizeof(double)); + + /* place input elements on the diagonal */ + for (int i = 0; i < n; i++) + { + node->value[i * (n + 1)] = x->value[i]; + } +} + +static void jacobian_init(expr *node) +{ + expr *x = node->left; + int n = x->size; + x->jacobian_init(x); + + CSR_Matrix *Jx = x->jacobian; + + /* Output Jacobian has n² rows, but only n rows (diagonal positions) are non-zero. + * We allocate space for the same nnz as child, but with n² rows. */ + CSR_Matrix *J = new_csr_matrix(node->size, node->n_vars, Jx->nnz); + + /* Build row pointers: rows at diagonal positions copy from child, + * all other rows are empty */ + J->p[0] = 0; + int child_row = 0; + for (int out_row = 0; out_row < node->size; out_row++) + { + if (out_row == child_row * (n + 1) && child_row < n) + { + /* This is a diagonal position - copy sparsity from child row */ + int len = Jx->p[child_row + 1] - Jx->p[child_row]; + memcpy(J->i + J->p[out_row], Jx->i + Jx->p[child_row], len * sizeof(int)); + J->p[out_row + 1] = J->p[out_row] + len; + child_row++; + } + else + { + /* Non-diagonal row - empty */ + J->p[out_row + 1] = J->p[out_row]; + } + } + + node->jacobian = J; +} + +static void eval_jacobian(expr *node) +{ + expr *x = node->left; + int n = x->size; + x->eval_jacobian(x); + + CSR_Matrix *J = node->jacobian; + CSR_Matrix *Jx = x->jacobian; + + /* Copy values from child Jacobian to diagonal positions */ + int child_row = 0; + for (int out_row = 0; out_row < node->size && child_row < n; out_row++) + { + if (out_row == child_row * (n + 1)) + { + int len = J->p[out_row + 1] - J->p[out_row]; + memcpy(J->x + J->p[out_row], Jx->x + Jx->p[child_row], len * sizeof(double)); + child_row++; + } + } +} + +static void wsum_hess_init(expr *node) +{ + expr *x = node->left; + + /* initialize child's wsum_hess */ + x->wsum_hess_init(x); + + /* workspace for extracting diagonal weights */ + node->dwork = (double *) calloc(x->size, sizeof(double)); + + /* Copy child's Hessian structure (diag_vec is linear, so its own Hessian is zero) */ + CSR_Matrix *Hx = x->wsum_hess; + node->wsum_hess = new_csr_matrix(Hx->m, Hx->n, Hx->nnz); + memcpy(node->wsum_hess->p, Hx->p, (Hx->m + 1) * sizeof(int)); + memcpy(node->wsum_hess->i, Hx->i, Hx->nnz * sizeof(int)); +} + +static void eval_wsum_hess(expr *node, const double *w) +{ + expr *x = node->left; + int n = x->size; + + /* Extract weights from diagonal positions of w (which has n² elements) */ + for (int i = 0; i < n; i++) + { + node->dwork[i] = w[i * (n + 1)]; + } + + /* Evaluate child's Hessian with extracted weights */ + x->eval_wsum_hess(x, node->dwork); + memcpy(node->wsum_hess->x, x->wsum_hess->x, x->wsum_hess->nnz * sizeof(double)); +} + +static bool is_affine(const expr *node) +{ + return node->left->is_affine(node->left); +} + +expr *new_diag_vec(expr *child) +{ + /* child must be a vector: either column (n, 1) or row (1, n) */ + assert(child->d1 == 1 || child->d2 == 1); + + /* n is the number of elements (works for both row and column vectors) */ + int n = child->size; + expr *node = (expr *) calloc(1, sizeof(expr)); + init_expr(node, n, n, child->n_vars, forward, jacobian_init, eval_jacobian, + is_affine, wsum_hess_init, eval_wsum_hess, NULL); + node->left = child; + expr_retain(child); + + return node; +} diff --git a/src/dnlp_diff_engine/__init__.py b/src/dnlp_diff_engine/__init__.py index b1ab670..fae5fff 100644 --- a/src/dnlp_diff_engine/__init__.py +++ b/src/dnlp_diff_engine/__init__.py @@ -20,6 +20,7 @@ "make_promote", "make_index", "make_reshape", + "make_diag_vec", "make_log", "make_exp", "make_power", From 1168e68a188f5a5309e10f1aae27f4b574376d4c Mon Sep 17 00:00:00 2001 From: William Zijie Zhang Date: Wed, 21 Jan 2026 22:04:31 -0500 Subject: [PATCH 2/2] Clean up diag_vec jacobian_init and eval_jacobian - Use standard CSR building pattern (J->p[row] = nnz) - Use next_diag counter instead of checking row == child_row * (n+1) - Simplify eval_jacobian to O(n) loop computing out_row directly Co-Authored-By: Claude Opus 4.5 --- src/affine/diag_vec.c | 44 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/src/affine/diag_vec.c b/src/affine/diag_vec.c index 6f6a9b2..5d2a303 100644 --- a/src/affine/diag_vec.c +++ b/src/affine/diag_vec.c @@ -34,31 +34,25 @@ static void jacobian_init(expr *node) x->jacobian_init(x); CSR_Matrix *Jx = x->jacobian; - - /* Output Jacobian has n² rows, but only n rows (diagonal positions) are non-zero. - * We allocate space for the same nnz as child, but with n² rows. */ CSR_Matrix *J = new_csr_matrix(node->size, node->n_vars, Jx->nnz); - /* Build row pointers: rows at diagonal positions copy from child, - * all other rows are empty */ - J->p[0] = 0; - int child_row = 0; - for (int out_row = 0; out_row < node->size; out_row++) + /* Output has n² rows but only n diagonal positions are non-empty. + * Diagonal position i is at row i*(n+1) in Fortran order. */ + int nnz = 0; + int next_diag = 0; + for (int row = 0; row < node->size; row++) { - if (out_row == child_row * (n + 1) && child_row < n) + J->p[row] = nnz; + if (row == next_diag) { - /* This is a diagonal position - copy sparsity from child row */ + int child_row = row / (n + 1); int len = Jx->p[child_row + 1] - Jx->p[child_row]; - memcpy(J->i + J->p[out_row], Jx->i + Jx->p[child_row], len * sizeof(int)); - J->p[out_row + 1] = J->p[out_row] + len; - child_row++; - } - else - { - /* Non-diagonal row - empty */ - J->p[out_row + 1] = J->p[out_row]; + memcpy(J->i + nnz, Jx->i + Jx->p[child_row], len * sizeof(int)); + nnz += len; + next_diag += n + 1; } } + J->p[node->size] = nnz; node->jacobian = J; } @@ -72,16 +66,12 @@ static void eval_jacobian(expr *node) CSR_Matrix *J = node->jacobian; CSR_Matrix *Jx = x->jacobian; - /* Copy values from child Jacobian to diagonal positions */ - int child_row = 0; - for (int out_row = 0; out_row < node->size && child_row < n; out_row++) + /* Copy values from child row i to output diagonal row i*(n+1) */ + for (int i = 0; i < n; i++) { - if (out_row == child_row * (n + 1)) - { - int len = J->p[out_row + 1] - J->p[out_row]; - memcpy(J->x + J->p[out_row], Jx->x + Jx->p[child_row], len * sizeof(double)); - child_row++; - } + int out_row = i * (n + 1); + int len = J->p[out_row + 1] - J->p[out_row]; + memcpy(J->x + J->p[out_row], Jx->x + Jx->p[i], len * sizeof(double)); } }