diff --git a/include/bivariate.h b/include/bivariate.h index 77623e1..4a3e674 100644 --- a/include/bivariate.h +++ b/include/bivariate.h @@ -10,4 +10,10 @@ expr *new_quad_over_lin(expr *left, expr *right); expr *new_rel_entr_first_arg_scalar(expr *left, expr *right); expr *new_rel_entr_second_arg_scalar(expr *left, expr *right); +/* Left matrix multiplication: A @ f(x) where A is a constant matrix */ +expr *new_left_matmul(expr *u, const CSR_Matrix *A); + +/* Right matrix multiplication: f(x) @ A where A is a constant matrix */ +expr *new_right_matmul(expr *u, const CSR_Matrix *A); + #endif /* BIVARIATE_H */ diff --git a/include/subexpr.h b/include/subexpr.h index 1b6d6a2..4368662 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -73,4 +73,26 @@ typedef struct elementwise_mult_expr CSR_Matrix *CSR_work2; } elementwise_mult_expr; +/* Left matrix multiplication: y = A * f(x) where f(x) is an expression. Note that +here A does not have global column indices but it is a local matrix. This is an +important distinction compared to linear_op_expr. */ +typedef struct left_matmul_expr +{ + expr base; + CSR_Matrix *A; + CSR_Matrix *AT; + CSC_Matrix *CSC_work; +} left_matmul_expr; + +/* Right matrix multiplication: y = f(x) * A where f(x) is an expression. + * f(x) has shape p x n, A has shape n x q, output y has shape p x q. + * Uses vec(y) = B * vec(f(x)) where B = A^T kron I_p. */ +typedef struct right_matmul_expr +{ + expr base; + CSR_Matrix *B; /* B = A^T kron I_p */ + CSR_Matrix *BT; /* B^T for backpropagating Hessian weights */ + CSC_Matrix *CSC_work; +} right_matmul_expr; + #endif /* SUBEXPR_H */ diff --git a/include/utils/CSC_Matrix.h b/include/utils/CSC_Matrix.h index 51e6f6a..ed044dc 100644 --- a/include/utils/CSC_Matrix.h +++ b/include/utils/CSC_Matrix.h @@ -49,4 +49,18 @@ void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d, */ void csc_matvec_fill_values(const CSC_Matrix *A, const double *z, CSR_Matrix *C); +CSC_Matrix *csr_to_csc_fill_sparsity(const CSR_Matrix *A, int *iwork); +void csr_to_csc_fill_values(const CSR_Matrix *A, CSC_Matrix *C, int *iwork); + +/* Allocate CSR matrix for C = A @ B where A is CSR, B is CSC + * Precomputes sparsity pattern. No workspace required. + */ +CSR_Matrix *csr_csc_matmul_alloc(const CSR_Matrix *A, const CSC_Matrix *B); + +/* Fill values of C = A @ B where A is CSR, B is CSC + * C must have sparsity pattern already computed + */ +void csr_csc_matmul_fill_values(const CSR_Matrix *A, const CSC_Matrix *B, + CSR_Matrix *C); + #endif /* CSC_MATRIX_H */ diff --git a/include/utils/CSR_Matrix.h b/include/utils/CSR_Matrix.h index 6dbeb24..94a3b0d 100644 --- a/include/utils/CSR_Matrix.h +++ b/include/utils/CSR_Matrix.h @@ -34,9 +34,18 @@ void free_csr_matrix(CSR_Matrix *matrix); /* Copy CSR matrix A to C */ void copy_csr_matrix(const CSR_Matrix *A, CSR_Matrix *C); +/* Build block-diagonal repeat A_blk = I_p kron A. Returns newly allocated CSR + * matrix of size (p*A->m) x (p*A->n) with nnz = p*A->nnz. */ +CSR_Matrix *block_diag_repeat_csr(const CSR_Matrix *A, int p); + +/* Build left-repeated Kronecker A_kron = A kron I_p. Returns newly allocated CSR + * matrix of size (A->m * p) x (A->n * p) with nnz = A->nnz * p. */ +CSR_Matrix *kron_identity_csr(const CSR_Matrix *A, int p); + /* matvec y = Ax, where A indices minus col_offset gives x indices. Returns y as * dense. */ void csr_matvec(const CSR_Matrix *A, const double *x, double *y, int col_offset); +void csr_matvec_wo_offset(const CSR_Matrix *A, const double *x, double *y); /* C = z^T A is assumed to have one row. C must have column indices pre-computed and transposed matrix AT must be provided. Fills in values of C only. @@ -141,6 +150,7 @@ void insert_idx(int idx, int *arr, int len); double csr_get_value(const CSR_Matrix *A, int row, int col); +/* iwork must be of size A->n*/ CSR_Matrix *transpose(const CSR_Matrix *A, int *iwork); CSR_Matrix *AT_alloc(const CSR_Matrix *A, int *iwork); diff --git a/src/affine/linear_op.c b/src/affine/linear_op.c index 4e9caa8..89a258a 100644 --- a/src/affine/linear_op.c +++ b/src/affine/linear_op.c @@ -1,4 +1,5 @@ #include "affine.h" +#include #include static void forward(expr *node, const double *u) @@ -9,7 +10,7 @@ static void forward(expr *node, const double *u) node->left->forward(node->left, u); /* y = A * x */ - csr_matvec(node->jacobian, x->value, node->value, x->var_id); + csr_matvec(((linear_op_expr *) node)->A_csr, x->value, node->value, x->var_id); } static bool is_affine(const expr *node) @@ -20,21 +21,31 @@ static bool is_affine(const expr *node) static void free_type_data(expr *node) { linear_op_expr *lin_node = (linear_op_expr *) node; - /* memory pointing to by A_csr will already be freed when the - jacobian is freed, so free_csr_matrix(lin_node->A_csr) should - be commented out */ - // free_csr_matrix(lin_node->A_csr); + /* memory pointing to by A_csr will be freed when the jacobian is freed, + so if the jacobian is not null we must not free A_csr. */ + + if (!node->jacobian) + { + free_csr_matrix(lin_node->A_csr); + } + free_csc_matrix(lin_node->A_csc); lin_node->A_csr = NULL; lin_node->A_csc = NULL; } +static void jacobian_init(expr *node) +{ + node->jacobian = ((linear_op_expr *) node)->A_csr; +} + expr *new_linear(expr *u, const CSR_Matrix *A) { + assert(u->d2 == 1); /* Allocate the type-specific struct */ linear_op_expr *lin_node = (linear_op_expr *) calloc(1, sizeof(linear_op_expr)); expr *node = &lin_node->base; - init_expr(node, A->m, 1, u->n_vars, forward, NULL, NULL, is_affine, + init_expr(node, A->m, 1, u->n_vars, forward, jacobian_init, NULL, is_affine, free_type_data); node->left = u; expr_retain(u); @@ -44,8 +55,5 @@ expr *new_linear(expr *u, const CSR_Matrix *A) copy_csr_matrix(A, lin_node->A_csr); lin_node->A_csc = csr_to_csc(A); - /* what if we have A @ phi(x). Then I don't think this is correct. */ - node->jacobian = lin_node->A_csr; - return node; } diff --git a/src/bivariate/left_matmul.c b/src/bivariate/left_matmul.c new file mode 100644 index 0000000..517f48c --- /dev/null +++ b/src/bivariate/left_matmul.c @@ -0,0 +1,136 @@ +#include "bivariate.h" +#include "subexpr.h" +#include + +/* This file implement the atom 'left_matmul' corresponding to the operation y = + A @ f(x), where A is a given matrix and f(x) is an arbitrary expression. + Here, f(x) can be a vector-valued expression and a matrix-valued + expression. The dimensions are A - m x n, f(x) - n x p, y - m x p. + Note that here A does not have global column indices but it is a local matrix. + This is an important distinction compared to linear_op_expr. + + * To compute the forward pass: vec(y) = A_kron @ vec(f(x)), + where A_kron = I_p kron A is a Kronecker product of size (m*p) x (n*p), + or more specificely, a block-diagonal matrix with p blocks of A along the + diagonal. + + * To compute the Jacobian: J_y = A_kron @ J_f(x), where J_f(x) is the + Jacobian of f(x) of size (n*p) x n_vars. + + * To compute the contribution to the Lagrange Hessian: we form + w = A_kron^T @ lambda and then evaluate the hessian of f(x). + + Working in terms of A_kron unifies the implementation of f(x) being + vector-valued or matrix-valued. + + +*/ + +// todo: put this in common somewhere +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static void forward(expr *node, const double *u) +{ + expr *x = node->left; + + /* child's forward pass */ + node->left->forward(node->left, u); + + /* y = A_kron @ vec(f(x)) */ + csr_matvec_wo_offset(((left_matmul_expr *) node)->A, x->value, node->value); +} + +static bool is_affine(const expr *node) +{ + return node->left->is_affine(node->left); +} + +static void free_type_data(expr *node) +{ + left_matmul_expr *lin_node = (left_matmul_expr *) node; + free_csr_matrix(lin_node->A); + free_csr_matrix(lin_node->AT); + if (lin_node->CSC_work) + { + free_csc_matrix(lin_node->CSC_work); + } + lin_node->A = NULL; + lin_node->AT = NULL; + lin_node->CSC_work = NULL; +} + +static void jacobian_init(expr *node) +{ + expr *x = node->left; + left_matmul_expr *lin_node = (left_matmul_expr *) node; + + /* initialize child's jacobian and precompute sparsity of its transpose */ + x->jacobian_init(x); + lin_node->CSC_work = csr_to_csc_fill_sparsity(x->jacobian, node->iwork); + + /* precompute sparsity of this node's jacobian */ + node->jacobian = csr_csc_matmul_alloc(lin_node->A, lin_node->CSC_work); +} + +static void eval_jacobian(expr *node) +{ + expr *x = node->left; + left_matmul_expr *lin_node = (left_matmul_expr *) node; + + /* evaluate child's jacobian and convert to CSC */ + x->eval_jacobian(x); + csr_to_csc_fill_values(x->jacobian, lin_node->CSC_work, node->iwork); + + /* compute this node's jacobian */ + csr_csc_matmul_fill_values(lin_node->A, lin_node->CSC_work, node->jacobian); +} + +static void wsum_hess_init(expr *node) +{ + /* initialize child's hessian */ + expr *x = node->left; + x->wsum_hess_init(x); + + /* allocate this node's hessian with the same sparsity as child's */ + node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, x->wsum_hess->nnz); + memcpy(node->wsum_hess->p, x->wsum_hess->p, (node->n_vars + 1) * sizeof(int)); + memcpy(node->wsum_hess->i, x->wsum_hess->i, x->wsum_hess->nnz * sizeof(int)); + + /* work for computing A^T w*/ + int A_n = ((left_matmul_expr *) node)->A->n; + node->dwork = (double *) malloc(A_n * sizeof(double)); +} + +static void eval_wsum_hess(expr *node, const double *w) +{ + /* compute A^T w*/ + left_matmul_expr *lin_node = (left_matmul_expr *) node; + csr_matvec_wo_offset(lin_node->AT, w, node->dwork); + + node->left->eval_wsum_hess(node->left, node->dwork); + memcpy(node->wsum_hess->x, node->left->wsum_hess->x, + node->wsum_hess->nnz * sizeof(double)); +} + +expr *new_left_matmul(expr *u, const CSR_Matrix *A) +{ + /* Allocate the type-specific struct */ + left_matmul_expr *lin_node = + (left_matmul_expr *) calloc(1, sizeof(left_matmul_expr)); + expr *node = &lin_node->base; + init_expr(node, A->m, u->d2, u->n_vars, forward, jacobian_init, eval_jacobian, + is_affine, free_type_data); + node->left = u; + expr_retain(u); + node->wsum_hess_init = wsum_hess_init; + node->eval_wsum_hess = eval_wsum_hess; + + /* Initialize type-specific fields */ + lin_node->A = block_diag_repeat_csr(A, node->d2); + int alloc = MAX(lin_node->A->n, node->n_vars); + node->iwork = (int *) malloc(alloc * sizeof(int)); + lin_node->AT = transpose(lin_node->A, node->iwork); + lin_node->CSC_work = NULL; + + return node; +} diff --git a/src/bivariate/multiply.c b/src/bivariate/multiply.c index 63c9290..a6f9667 100644 --- a/src/bivariate/multiply.c +++ b/src/bivariate/multiply.c @@ -29,18 +29,8 @@ static void forward(expr *node, const double *u) static void jacobian_init(expr *node) { - /* if a child is a variable we initialize its jacobian for a - short chain rule implementation */ - if (node->left->var_id != NOT_A_VARIABLE) - { - node->left->jacobian_init(node->left); - } - - if (node->right->var_id != NOT_A_VARIABLE) - { - node->right->jacobian_init(node->right); - } - + node->left->jacobian_init(node->left); + node->right->jacobian_init(node->right); node->dwork = (double *) malloc(2 * node->size * sizeof(double)); int nnz_max = node->left->jacobian->nnz + node->right->jacobian->nnz; node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz_max); @@ -180,6 +170,9 @@ static void eval_wsum_hess(expr *node, const double *w) /* Compute CT = C^T = A^T diag(w) B */ AT_fill_values(C, CT, node->iwork); + /* TODO: should fill sparsity before. Maybe we can map values from C to + * hessian directly instead?*/ + /* Hessian = C + CT = B^T diag(w) A + A^T diag(w) B */ sum_csr_matrices(C, CT, node->wsum_hess); } diff --git a/src/bivariate/quad_over_lin.c b/src/bivariate/quad_over_lin.c index adc4a2b..41b8dc5 100644 --- a/src/bivariate/quad_over_lin.c +++ b/src/bivariate/quad_over_lin.c @@ -68,7 +68,7 @@ static void jacobian_init(expr *node) /* compute required allocation and allocate jacobian */ bool *col_nz = (bool *) calloc( node->n_vars, sizeof(bool)); /* TODO: could use iwork here instead*/ - int nonzero_cols = count_nonzero_cols(lin_x->base.jacobian, col_nz); + int nonzero_cols = count_nonzero_cols(lin_x->A_csr, col_nz); node->jacobian = new_csr_matrix(1, node->n_vars, nonzero_cols + 1); /* precompute column indices */ diff --git a/src/bivariate/right_matmul.c b/src/bivariate/right_matmul.c new file mode 100644 index 0000000..0c349fb --- /dev/null +++ b/src/bivariate/right_matmul.c @@ -0,0 +1,135 @@ +#include "bivariate.h" +#include "subexpr.h" +#include + +/* This file implements the atom 'right_matmul' corresponding to the operation y = + f(x) @ A, where A is a given matrix and f(x) is an arbitrary expression. + Here, f(x) can be a vector-valued expression and a matrix-valued + expression. The dimensions are f(x) - p x n, A - n x q, y - p x q. + Note that here A does not have global column indices but it is a local matrix. + This is an important distinction compared to linear_op_expr. + + * To compute the forward pass: vec(y) = B @ vec(f(x)), + where B = A^T kron I_p is a Kronecker product of size (p*q) x (p*n). + + * To compute the Jacobian: J_y = B @ J_f(x), where J_f(x) is the + Jacobian of f(x) of size (p*n) x n_vars. + + * To compute the contribution to the Lagrange Hessian: we form + w_child = B^T @ w and then evaluate the hessian of f(x). +*/ + +static void forward(expr *node, const double *u) +{ + expr *x = node->left; + + /* child's forward pass */ + node->left->forward(node->left, u); + + /* y = x * A, vec(y) = B @ vec(x) */ + csr_matvec_wo_offset(((right_matmul_expr *) node)->B, x->value, node->value); +} + +static bool is_affine(const expr *node) +{ + return node->left->is_affine(node->left); +} + +static void free_type_data(expr *node) +{ + right_matmul_expr *right_node = (right_matmul_expr *) node; + free_csr_matrix(right_node->B); + free_csr_matrix(right_node->BT); + if (right_node->CSC_work) + { + free_csc_matrix(right_node->CSC_work); + } + right_node->B = NULL; + right_node->BT = NULL; + right_node->CSC_work = NULL; +} + +static void jacobian_init(expr *node) +{ + expr *x = node->left; + right_matmul_expr *right_node = (right_matmul_expr *) node; + + /* initialize child's jacobian and precompute sparsity of its transpose */ + x->jacobian_init(x); + right_node->CSC_work = csr_to_csc_fill_sparsity(x->jacobian, node->iwork); + + /* precompute sparsity of this node's jacobian */ + node->jacobian = csr_csc_matmul_alloc(right_node->B, right_node->CSC_work); +} + +static void eval_jacobian(expr *node) +{ + expr *x = node->left; + right_matmul_expr *right_node = (right_matmul_expr *) node; + + /* evaluate child's jacobian and convert to CSC*/ + x->eval_jacobian(x); + csr_to_csc_fill_values(x->jacobian, right_node->CSC_work, node->iwork); + + /* compute this node's jacobian */ + csr_csc_matmul_fill_values(right_node->B, right_node->CSC_work, node->jacobian); +} + +static void wsum_hess_init(expr *node) +{ + /* initialize child's hessian */ + expr *x = node->left; + x->wsum_hess_init(x); + + /* allocate this node's hessian with the same sparsity as child's */ + node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, x->wsum_hess->nnz); + memcpy(node->wsum_hess->p, x->wsum_hess->p, (node->n_vars + 1) * sizeof(int)); + memcpy(node->wsum_hess->i, x->wsum_hess->i, x->wsum_hess->nnz * sizeof(int)); + + /* Allocate workspace for B^T @ w */ + node->dwork = (double *) malloc(x->d1 * x->d2 * sizeof(double)); +} + +static void eval_wsum_hess(expr *node, const double *w) +{ + /* Compute B^T @ w, where B = A^T ⊗ I_p */ + right_matmul_expr *right_node = (right_matmul_expr *) node; + + /* B^T @ w computes the weights for the child expression */ + csr_matvec_wo_offset(right_node->BT, w, node->dwork); + + /* Propagate to child */ + node->left->eval_wsum_hess(node->left, node->dwork); + memcpy(node->wsum_hess->x, node->left->wsum_hess->x, + node->wsum_hess->nnz * sizeof(double)); +} + +expr *new_right_matmul(expr *u, const CSR_Matrix *A) +{ + /* Allocate the type-specific struct */ + right_matmul_expr *right_matmul_node = + (right_matmul_expr *) calloc(1, sizeof(right_matmul_expr)); + expr *node = &right_matmul_node->base; + + /* Output dimensions: u is p x n, A is n x q, output is p x q */ + int p = u->d1; + int q = A->n; + + init_expr(node, p, q, u->n_vars, forward, jacobian_init, eval_jacobian, + is_affine, free_type_data); + node->left = u; + expr_retain(u); + node->wsum_hess_init = wsum_hess_init; + node->eval_wsum_hess = eval_wsum_hess; + + /* create B = A^T kron I_p and its transpose */ + node->iwork = (int *) malloc(node->n_vars * sizeof(int)); + CSR_Matrix *AT = transpose(A, node->iwork); + right_matmul_node->B = kron_identity_csr(AT, p); + right_matmul_node->BT = kron_identity_csr(A, p); + free_csr_matrix(AT); + + right_matmul_node->CSC_work = NULL; + + return node; +} diff --git a/src/elementwise_univariate/common.c b/src/elementwise_univariate/common.c index c1c412e..2af054e 100644 --- a/src/elementwise_univariate/common.c +++ b/src/elementwise_univariate/common.c @@ -21,7 +21,7 @@ void jacobian_init_elementwise(expr *node) /* otherwise it will be a linear operator */ else { - CSR_Matrix *J = child->jacobian; + CSR_Matrix *J = ((linear_op_expr *) child)->A_csr; node->jacobian = new_csr_matrix(J->m, J->n, J->nnz); node->dwork = (double *) malloc(node->size * sizeof(double)); diff --git a/src/expr.c b/src/expr.c index fe6d1ac..3e8bad3 100644 --- a/src/expr.c +++ b/src/expr.c @@ -41,6 +41,12 @@ void free_expr(expr *node) node->left = NULL; node->right = NULL; + /* free type-specific data */ + if (node->free_type_data) + { + node->free_type_data(node); + } + /* free value array and jacobian */ free(node->value); free_csr_matrix(node->jacobian); @@ -53,12 +59,6 @@ void free_expr(expr *node) node->dwork = NULL; node->iwork = NULL; - /* free type-specific data */ - if (node->free_type_data) - { - node->free_type_data(node); - } - /* free the node itself */ free(node); } diff --git a/src/utils/CSC_Matrix.c b/src/utils/CSC_Matrix.c index 841c2b3..d4389ab 100644 --- a/src/utils/CSC_Matrix.c +++ b/src/utils/CSC_Matrix.c @@ -122,6 +122,35 @@ static inline double sparse_wdot(const double *a_x, const int *a_i, int a_nnz, return sum; } +/* Unweighted sparse dot product of two sorted index arrays */ +static inline double sparse_dot(const double *a_x, const int *a_i, int a_nnz, + const double *b_x, const int *b_i, int b_nnz) +{ + int ii = 0; + int jj = 0; + double sum = 0.0; + + while (ii < a_nnz && jj < b_nnz) + { + if (a_i[ii] == b_i[jj]) + { + sum += a_x[ii] * b_x[jj]; + ii++; + jj++; + } + else if (a_i[ii] < b_i[jj]) + { + ii++; + } + else + { + jj++; + } + } + + return sum; +} + void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C) { int i, j, ii, jj; @@ -196,6 +225,70 @@ CSC_Matrix *csr_to_csc(const CSR_Matrix *A) free(count); return C; } + +CSC_Matrix *csr_to_csc_fill_sparsity(const CSR_Matrix *A, int *iwork) +{ + CSC_Matrix *C = new_csc_matrix(A->m, A->n, A->nnz); + + int i, j, start; + int *count = iwork; + memset(count, 0, A->n * sizeof(int)); + + // ------------------------------------------------------------------- + // compute nnz in each column of A + // ------------------------------------------------------------------- + for (i = 0; i < A->m; ++i) + { + for (j = A->p[i]; j < A->p[i + 1]; ++j) + { + count[A->i[j]]++; + } + } + + // ------------------------------------------------------------------ + // compute column pointers + // ------------------------------------------------------------------ + C->p[0] = 0; + for (i = 0; i < A->n; ++i) + { + C->p[i + 1] = C->p[i] + count[i]; + count[i] = C->p[i]; + } + + // ------------------------------------------------------------------ + // fill row indices + // ------------------------------------------------------------------ + for (i = 0; i < A->m; ++i) + { + for (j = A->p[i]; j < A->p[i + 1]; ++j) + { + C->i[count[A->i[j]]] = i; + count[A->i[j]]++; + } + } + + return C; +} + +void csr_to_csc_fill_values(const CSR_Matrix *A, CSC_Matrix *C, int *iwork) +{ + int i, j; + int *count = iwork; + memcpy(count, C->p, A->n * sizeof(int)); + + // ------------------------------------------------------------------ + // fill values + // ------------------------------------------------------------------ + for (i = 0; i < A->m; ++i) + { + for (j = A->p[i]; j < A->p[i + 1]; ++j) + { + C->x[count[A->i[j]]] = A->x[j]; + count[A->i[j]]++; + } + } +} + CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B) { /* A is m x n, B is m x p, C = B^T A is p x n */ @@ -304,3 +397,88 @@ void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d, } } } + +static bool has_overlap(const int *a_idx, int a_len, const int *b_idx, int b_len) +{ + int ai = 0, bi = 0; + while (ai < a_len && bi < b_len) + { + if (a_idx[ai] == b_idx[bi]) return true; + if (a_idx[ai] < b_idx[bi]) + { + ai++; + } + else + { + bi++; + } + } + return false; +} + +/* Fill values of C = A @ B where A is CSR, B is CSC. */ +void csr_csc_matmul_fill_values(const CSR_Matrix *A, const CSC_Matrix *B, + CSR_Matrix *C) +{ + for (int i = 0; i < A->m; i++) + { + for (int jj = C->p[i]; jj < C->p[i + 1]; jj++) + { + int j = C->i[jj]; + + int a_nnz = A->p[i + 1] - A->p[i]; + int b_nnz = B->p[j + 1] - B->p[j]; + + /* Compute dot product of row i of A and column j of B */ + double sum = sparse_dot(A->x + A->p[i], A->i + A->p[i], a_nnz, + B->x + B->p[j], B->i + B->p[j], b_nnz); + + C->x[jj] = sum; + } + } +} + +/* C = A @ B where A is CSR (m x n), B is CSC (n x p). Result C is CSR (m x p) + with precomputed sparsity pattern */ +CSR_Matrix *csr_csc_matmul_alloc(const CSR_Matrix *A, const CSC_Matrix *B) +{ + int m = A->m; + int p = B->n; + + int len_a, len_b; + + int *Cp = (int *) malloc((m + 1) * sizeof(int)); + iVec *Ci = iVec_new(m); + + Cp[0] = 0; + + // -------------------------------------------------------------- + // countz nnz and fill column indices + // -------------------------------------------------------------- + int nnz = 0; + for (int i = 0; i < A->m; i++) + { + len_a = A->p[i + 1] - A->p[i]; + + for (int j = 0; j < B->n; j++) + { + len_b = B->p[j + 1] - B->p[j]; + + if (has_overlap(A->i + A->p[i], len_a, B->i + B->p[j], len_b)) + { + iVec_append(Ci, j); + nnz++; + } + } + + Cp[i + 1] = nnz; + } + + CSR_Matrix *C = new_csr_matrix(m, p, nnz); + memcpy(C->p, Cp, (m + 1) * sizeof(int)); + memcpy(C->i, Ci->data, nnz * sizeof(int)); + free(Cp); + iVec_free(Ci); + + return C; +} diff --git a/src/utils/CSR_Matrix.c b/src/utils/CSR_Matrix.c index 6b63c7e..2dbae1a 100644 --- a/src/utils/CSR_Matrix.c +++ b/src/utils/CSR_Matrix.c @@ -39,6 +39,76 @@ void copy_csr_matrix(const CSR_Matrix *A, CSR_Matrix *C) memcpy(C->x, A->x, A->nnz * sizeof(double)); } +CSR_Matrix *block_diag_repeat_csr(const CSR_Matrix *A, int p) +{ + assert(p > 0); + + int m = A->m; + int n = A->n; + int nnz = A->nnz; + + CSR_Matrix *A_kron = new_csr_matrix(m * p, n * p, nnz * p); + + int nnz_cursor = 0; + for (int block = 0; block < p; block++) + { + int row_offset = block * m; + int col_offset = block * n; + + for (int row = 0; row < m; row++) + { + int dest_row = row_offset + row; + A_kron->p[dest_row] = nnz_cursor; + + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + A_kron->i[nnz_cursor] = A->i[j] + col_offset; + A_kron->x[nnz_cursor] = A->x[j]; + nnz_cursor++; + } + + A_kron->p[dest_row + 1] = nnz_cursor; + } + } + + return A_kron; +} + +CSR_Matrix *kron_identity_csr(const CSR_Matrix *A, int p) +{ + assert(p > 0); + + int m = A->m; + int n = A->n; + int nnz = A->nnz; + + CSR_Matrix *A_kron = new_csr_matrix(m * p, n * p, nnz * p); + + int nnz_cursor = 0; + for (int row_block = 0; row_block < m; row_block++) + { + for (int diag_idx = 0; diag_idx < p; diag_idx++) + { + int dest_row = row_block * p + diag_idx; + A_kron->p[dest_row] = nnz_cursor; + + /* Copy entries from row_block of A, adjusting column indices */ + for (int j = A->p[row_block]; j < A->p[row_block + 1]; j++) + { + int col_block = A->i[j]; + /* Column in result: col_block * p + diag_idx */ + A_kron->i[nnz_cursor] = col_block * p + diag_idx; + A_kron->x[nnz_cursor] = A->x[j]; + nnz_cursor++; + } + + A_kron->p[dest_row + 1] = nnz_cursor; + } + } + + return A_kron; +} + void csr_matvec(const CSR_Matrix *A, const double *x, double *y, int col_offset) { for (int row = 0; row < A->m; row++) @@ -52,6 +122,19 @@ void csr_matvec(const CSR_Matrix *A, const double *x, double *y, int col_offset) } } +void csr_matvec_wo_offset(const CSR_Matrix *A, const double *x, double *y) +{ + for (int row = 0; row < A->m; row++) + { + double sum = 0.0; + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + sum += A->x[j] * x[A->i[j]]; + } + y[row] = sum; + } +} + int count_nonzero_cols(const CSR_Matrix *A, bool *col_nz) { for (int row = 0; row < A->m; row++) diff --git a/tests/all_tests.c b/tests/all_tests.c index f59d647..6486947 100644 --- a/tests/all_tests.c +++ b/tests/all_tests.c @@ -16,6 +16,7 @@ #include "jacobian_tests/test_composite.h" #include "jacobian_tests/test_elementwise_mult.h" #include "jacobian_tests/test_hstack.h" +#include "jacobian_tests/test_left_matmul.h" #include "jacobian_tests/test_log.h" #include "jacobian_tests/test_neg.h" #include "jacobian_tests/test_prod.h" @@ -23,6 +24,7 @@ #include "jacobian_tests/test_quad_form.h" #include "jacobian_tests/test_quad_over_lin.h" #include "jacobian_tests/test_rel_entr.h" +#include "jacobian_tests/test_right_matmul.h" #include "jacobian_tests/test_sum.h" #include "jacobian_tests/test_trace.h" #include "problem/test_problem.h" @@ -37,11 +39,13 @@ #include "wsum_hess/elementwise/test_trig.h" #include "wsum_hess/elementwise/test_xexp.h" #include "wsum_hess/test_hstack.h" +#include "wsum_hess/test_left_matmul.h" #include "wsum_hess/test_multiply.h" #include "wsum_hess/test_prod.h" #include "wsum_hess/test_quad_form.h" #include "wsum_hess/test_quad_over_lin.h" #include "wsum_hess/test_rel_entr.h" +#include "wsum_hess/test_right_matmul.h" #include "wsum_hess/test_sum.h" #include "wsum_hess/test_trace.h" @@ -105,6 +109,11 @@ int main(void) mu_run_test(test_wsum_hess_multiply_2, tests_run); mu_run_test(test_jacobian_trace_variable, tests_run); mu_run_test(test_jacobian_trace_composite, tests_run); + mu_run_test(test_jacobian_left_matmul_log, tests_run); + mu_run_test(test_jacobian_left_matmul_log_matrix, tests_run); + mu_run_test(test_jacobian_left_matmul_log_composite, tests_run); + mu_run_test(test_jacobian_right_matmul_log, tests_run); + mu_run_test(test_jacobian_right_matmul_log_vector, tests_run); printf("\n--- Weighted Sum of Hessian Tests ---\n"); mu_run_test(test_wsum_hess_log, tests_run); @@ -140,6 +149,11 @@ int main(void) mu_run_test(test_wsum_hess_multiply_sparse_random, tests_run); mu_run_test(test_wsum_hess_multiply_1, tests_run); mu_run_test(test_wsum_hess_multiply_2, tests_run); + mu_run_test(test_wsum_hess_left_matmul, tests_run); + mu_run_test(test_wsum_hess_left_matmul_matrix, tests_run); + mu_run_test(test_wsum_hess_left_matmul_composite, tests_run); + mu_run_test(test_wsum_hess_right_matmul, tests_run); + mu_run_test(test_wsum_hess_right_matmul_vector, tests_run); // This test leads to seg fault // mu_run_test(test_wsum_hess_trace_variable, tests_run); @@ -153,6 +167,7 @@ int main(void) mu_run_test(test_csr_sum2, tests_run); mu_run_test(test_transpose, tests_run); mu_run_test(test_AT_alloc_and_fill, tests_run); + mu_run_test(test_kron_identity_csr, tests_run); mu_run_test(test_csr_to_csc1, tests_run); mu_run_test(test_csr_to_csc2, tests_run); mu_run_test(test_csr_vecmat_values_sparse, tests_run); diff --git a/tests/jacobian_tests/test_left_matmul.h b/tests/jacobian_tests/test_left_matmul.h new file mode 100644 index 0000000..09557ca --- /dev/null +++ b/tests/jacobian_tests/test_left_matmul.h @@ -0,0 +1,183 @@ +#include +#include + +#include "bivariate.h" +#include "elementwise_univariate.h" +#include "expr.h" +#include "minunit.h" +#include "test_helpers.h" + +const char *test_jacobian_left_matmul_log() +{ + /* Test Jacobian of A @ log(x) where: + * x is 3x1 variable at x = [1, 2, 3] + * A is 4x3 sparse matrix [1, 0, 2; 3, 0, 4; 5, 0, 6; 7, 0, 0] + * Output: A @ log(x) is 4x1 + * + * Jacobian is d(A @ log(x))/dx = A @ diag(1/x) + * At x = [1, 2, 3], this is: + * [1, 0, 2/3 ] + * [3, 0, 4/3 ] + * [5, 0, 2 ] + * [7, 0, 0 ] + * + * Stored in CSR format (4x3 sparse): + * nnz = 7 + * p = [0, 2, 4, 6, 7] + * i = [0, 2, 0, 2, 0, 2, 0] + * x = [1.0, 2.0/3.0, 3.0, 4.0/3.0, 5.0, 2.0, 7.0] + */ + double x_vals[3] = {1.0, 2.0, 3.0}; + expr *x = new_variable(3, 1, 0, 3); + + /* Create sparse matrix A in CSR format */ + CSR_Matrix *A = new_csr_matrix(4, 3, 7); + int A_p[5] = {0, 2, 4, 6, 7}; + int A_i[7] = {0, 2, 0, 2, 0, 2, 0}; + double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; + memcpy(A->p, A_p, 5 * sizeof(int)); + memcpy(A->i, A_i, 7 * sizeof(int)); + memcpy(A->x, A_x, 7 * sizeof(double)); + + expr *log_x = new_log(x); + expr *A_log_x = new_left_matmul(log_x, A); + + A_log_x->forward(A_log_x, x_vals); + A_log_x->jacobian_init(A_log_x); + A_log_x->eval_jacobian(A_log_x); + + /* Expected jacobian values: A @ diag(1/x) */ + double expected_Ax[7] = { + 1.0, /* row 0, col 0: 1 * (1/1) */ + 2.0 / 3.0, /* row 0, col 2: 2 * (1/3) */ + 3.0, /* row 1, col 0: 3 * (1/1) */ + 4.0 / 3.0, /* row 1, col 2: 4 * (1/3) */ + 5.0, /* row 2, col 0: 5 * (1/1) */ + 2.0, /* row 2, col 2: 6 * (1/3) */ + 7.0 /* row 3, col 0: 7 * (1/1) */ + }; + int expected_Ai[7] = {0, 2, 0, 2, 0, 2, 0}; + int expected_Ap[5] = {0, 2, 4, 6, 7}; + + mu_assert("vals fail", cmp_double_array(A_log_x->jacobian->x, expected_Ax, 7)); + mu_assert("cols fail", cmp_int_array(A_log_x->jacobian->i, expected_Ai, 7)); + mu_assert("rows fail", cmp_int_array(A_log_x->jacobian->p, expected_Ap, 5)); + + free_csr_matrix(A); + free_expr(A_log_x); + return 0; +} + +const char *test_jacobian_left_matmul_log_matrix() +{ + /* x is 3x2, vectorized column-wise: [1,2,3 | 4,5,6] */ + double x_vals[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + expr *x = new_variable(3, 2, 0, 6); + + /* Create sparse matrix A in CSR format (4x3) */ + CSR_Matrix *A = new_csr_matrix(4, 3, 7); + int A_p[5] = {0, 2, 4, 6, 7}; + int A_i[7] = {0, 2, 0, 2, 0, 2, 0}; + double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; + memcpy(A->p, A_p, 5 * sizeof(int)); + memcpy(A->i, A_i, 7 * sizeof(int)); + memcpy(A->x, A_x, 7 * sizeof(double)); + + expr *log_x = new_log(x); + expr *A_log_x = new_left_matmul(log_x, A); + + A_log_x->forward(A_log_x, x_vals); + A_log_x->jacobian_init(A_log_x); + A_log_x->eval_jacobian(A_log_x); + + /* Expected Jacobian: block-diagonal repeat of A scaled by diag(1./x) */ + double expected_Ax[14] = {/* first column block (x = [1, 2, 3]) */ + 1.0, 2.0 / 3.0, 3.0, 4.0 / 3.0, 5.0, 2.0, 7.0, + /* second column block (x = [4, 5, 6]) */ + 0.25, 1.0 / 3.0, 0.75, 2.0 / 3.0, 1.25, 1.0, 1.75}; + int expected_Ai[14] = {0, 2, 0, 2, 0, 2, 0, 3, 5, 3, 5, 3, 5, 3}; + int expected_Ap[9] = {0, 2, 4, 6, 7, 9, 11, 13, 14}; + + mu_assert("vals fail", cmp_double_array(A_log_x->jacobian->x, expected_Ax, 14)); + mu_assert("cols fail", cmp_int_array(A_log_x->jacobian->i, expected_Ai, 14)); + mu_assert("rows fail", cmp_int_array(A_log_x->jacobian->p, expected_Ap, 9)); + + free_csr_matrix(A); + free_expr(A_log_x); + return 0; +} + +const char *test_jacobian_left_matmul_log_composite() +{ + /* Test Jacobian of A @ log(B @ x) where: + * x is 3x1 variable at x = [1, 2, 3] + * B is 3x3 dense matrix of all ones + * A is 4x3 sparse matrix [1, 0, 2; 3, 0, 4; 5, 0, 6; 7, 0, 0] + * + * B @ x = [1+2+3, 1+2+3, 1+2+3]^T = [6, 6, 6]^T + * log(B @ x) = [log(6), log(6), log(6)]^T + * A @ log(B @ x) is 4x1 + * + * Jacobian is d(A @ log(B @ x))/dx = A @ diag(1/(B@x)) @ B + * Where B@x = [6, 6, 6], so diag(1/(B@x)) = diag([1/6, 1/6, 1/6]) + * diag(1/(B@x)) @ B gives [1/6, 1/6, 1/6; 1/6, 1/6, 1/6; 1/6, 1/6, 1/6] + * A @ (diag(1/(B@x)) @ B) is 4x3 dense (all entries nonzero): + * Row 0: [1, 0, 2] @ [1/6, 1/6, 1/6; ...] = [1/6 + 2/6, ...] = [3/6, 3/6, 3/6] = + * [1/2, 1/2, 1/2] Row 1: [3, 0, 4] @ [...] = [3/6 + 4/6, ...] = [7/6, 7/6, 7/6] + * Row 2: [5, 0, 6] @ [...] = [5/6 + 6/6, ...] = [11/6, 11/6, 11/6] + * Row 3: [7, 0, 0] @ [...] = [7/6, 7/6, 7/6] + * + * Stored in CSR format (4x3 all dense): + * nnz = 12 + * p = [0, 3, 6, 9, 12] + * i = [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2] + * x = [1/2, 1/2, 1/2, 7/6, 7/6, 7/6, 11/6, 11/6, 11/6, 7/6, 7/6, 7/6] + */ + double x_vals[3] = {1.0, 2.0, 3.0}; + expr *x = new_variable(3, 1, 0, 3); + + /* Create B matrix (3x3 all ones) */ + CSR_Matrix *B = new_csr_matrix(3, 3, 9); + int B_p[4] = {0, 3, 6, 9}; + int B_i[9] = {0, 1, 2, 0, 1, 2, 0, 1, 2}; + double B_x[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + memcpy(B->p, B_p, 4 * sizeof(int)); + memcpy(B->i, B_i, 9 * sizeof(int)); + memcpy(B->x, B_x, 9 * sizeof(double)); + + /* Create A matrix */ + CSR_Matrix *A = new_csr_matrix(4, 3, 7); + int A_p[5] = {0, 2, 4, 6, 7}; + int A_i[7] = {0, 2, 0, 2, 0, 2, 0}; + double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; + memcpy(A->p, A_p, 5 * sizeof(int)); + memcpy(A->i, A_i, 7 * sizeof(int)); + memcpy(A->x, A_x, 7 * sizeof(double)); + + expr *Bx = new_linear(x, B); + expr *log_Bx = new_log(Bx); + expr *A_log_Bx = new_left_matmul(log_Bx, A); + + A_log_Bx->forward(A_log_Bx, x_vals); + A_log_Bx->jacobian_init(A_log_Bx); + A_log_Bx->eval_jacobian(A_log_Bx); + + /* Expected jacobian values: A @ diag(1/(B@x)) @ B */ + double expected_Ax[12] = { + 0.5, 0.5, 0.5, /* row 0: [1/2, 1/2, 1/2] */ + 7.0 / 6.0, 7.0 / 6.0, 7.0 / 6.0, /* row 1: [7/6, 7/6, 7/6] */ + 11.0 / 6.0, 11.0 / 6.0, 11.0 / 6.0, /* row 2: [11/6, 11/6, 11/6] */ + 7.0 / 6.0, 7.0 / 6.0, 7.0 / 6.0 /* row 3: [7/6, 7/6, 7/6] */ + }; + int expected_Ai[12] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; + int expected_Ap[5] = {0, 3, 6, 9, 12}; + + mu_assert("vals fail", cmp_double_array(A_log_Bx->jacobian->x, expected_Ax, 12)); + mu_assert("cols fail", cmp_int_array(A_log_Bx->jacobian->i, expected_Ai, 12)); + mu_assert("rows fail", cmp_int_array(A_log_Bx->jacobian->p, expected_Ap, 5)); + + free_csr_matrix(A); + free_csr_matrix(B); + free_expr(A_log_Bx); + return 0; +} diff --git a/tests/jacobian_tests/test_right_matmul.h b/tests/jacobian_tests/test_right_matmul.h new file mode 100644 index 0000000..931df44 --- /dev/null +++ b/tests/jacobian_tests/test_right_matmul.h @@ -0,0 +1,102 @@ +#include +#include + +#include "affine.h" +#include "bivariate.h" +#include "elementwise_univariate.h" +#include "expr.h" +#include "minunit.h" +#include "test_helpers.h" + +const char *test_jacobian_right_matmul_log() +{ + /* Test Jacobian of log(x) @ A where: + * x is 2x2 variable at x = [[1, 2], [3, 4]] (vectorized column-wise: [1, 3, 2, + * 4]) A is 2x3 sparse matrix [1, 0, 2; 3, 0, 4] Output: log(x) @ A is 2x3 + */ + double x_vals[4] = {1.0, 3.0, 2.0, 4.0}; // column-wise vectorization + expr *x = new_variable(2, 2, 0, 4); + + /* Create sparse matrix A in CSR format (2x3) */ + CSR_Matrix *A = new_csr_matrix(2, 3, 4); + int A_p[3] = {0, 2, 4}; + int A_i[4] = {0, 2, 0, 2}; + double A_x[4] = {1.0, 2.0, 3.0, 4.0}; + memcpy(A->p, A_p, 3 * sizeof(int)); + memcpy(A->i, A_i, 4 * sizeof(int)); + memcpy(A->x, A_x, 4 * sizeof(double)); + + expr *log_x = new_log(x); + expr *log_x_A = new_right_matmul(log_x, A); + + log_x_A->forward(log_x_A, x_vals); + log_x_A->jacobian_init(log_x_A); + log_x_A->eval_jacobian(log_x_A); + + /* Expected jacobian values */ + double expected_Ax[8] = { + 1.0, /* row 0, col 0 */ + 1.5, /* row 0, col 2 */ + 1.0 / 3.0, /* row 1, col 1 */ + 0.75, /* row 1, col 3 */ + 2.0, /* row 4, col 0 */ + 2.0, /* row 4, col 2 */ + 2.0 / 3.0, /* row 5, col 1 */ + 1.0 /* row 5, col 3 */ + }; + int expected_Ai[8] = {0, 2, 1, 3, 0, 2, 1, 3}; + int expected_Ap[7] = {0, 2, 4, 4, 4, 6, 8}; + + mu_assert("vals fail", cmp_double_array(log_x_A->jacobian->x, expected_Ax, 8)); + mu_assert("cols fail", cmp_int_array(log_x_A->jacobian->i, expected_Ai, 8)); + mu_assert("rows fail", cmp_int_array(log_x_A->jacobian->p, expected_Ap, 7)); + + free_csr_matrix(A); + free_expr(log_x_A); + return 0; +} + +const char *test_jacobian_right_matmul_log_vector() +{ + /* Test Jacobian of log(x) @ A where: + * x is 1x3 variable at x = [1, 2, 3] + * A is 3x2 sparse matrix [[1, 0], [2, 3], [0, 4]] + * Output: log(x) @ A is 1x2 + */ + double x_vals[3] = {1.0, 2.0, 3.0}; + expr *x = new_variable(1, 3, 0, 3); + + /* Create sparse matrix A in CSR format (3x2) */ + CSR_Matrix *A = new_csr_matrix(3, 2, 4); + int A_p[4] = {0, 1, 3, 4}; + int A_i[4] = {0, 0, 1, 1}; + double A_x[4] = {1.0, 2.0, 3.0, 4.0}; + memcpy(A->p, A_p, 4 * sizeof(int)); + memcpy(A->i, A_i, 4 * sizeof(int)); + memcpy(A->x, A_x, 4 * sizeof(double)); + + expr *log_x = new_log(x); + expr *log_x_A = new_right_matmul(log_x, A); + + log_x_A->forward(log_x_A, x_vals); + log_x_A->jacobian_init(log_x_A); + log_x_A->eval_jacobian(log_x_A); + + /* Expected jacobian values: A^T @ diag(1/x) */ + double expected_Ax[4] = { + 1.0, /* row 0, col 0 */ + 1.0, /* row 0, col 1 */ + 1.5, /* row 1, col 1 */ + 4.0 / 3.0 /* row 1, col 2 */ + }; + int expected_Ai[4] = {0, 1, 1, 2}; + int expected_Ap[3] = {0, 2, 4}; + + mu_assert("vals fail", cmp_double_array(log_x_A->jacobian->x, expected_Ax, 4)); + mu_assert("cols fail", cmp_int_array(log_x_A->jacobian->i, expected_Ai, 4)); + mu_assert("rows fail", cmp_int_array(log_x_A->jacobian->p, expected_Ap, 3)); + + free_csr_matrix(A); + free_expr(log_x_A); + return 0; +} diff --git a/tests/utils/test_csr_matrix.h b/tests/utils/test_csr_matrix.h index b2cfced..7a42970 100644 --- a/tests/utils/test_csr_matrix.h +++ b/tests/utils/test_csr_matrix.h @@ -435,3 +435,50 @@ const char *test_AT_alloc_and_fill() return 0; } + +const char *test_kron_identity_csr() +{ + /* Test A kron I_p where: + * A is 2x3: + * [1 0 2] + * [3 0 4] + * + * p = 2, so result should be 4x6 (2*2 rows, 3*2 cols) + * A kron I_2 = + * [1 0 | 0 0 | 2 0] + * [0 1 | 0 0 | 0 2] + * [-----|-----|-----] + * [3 0 | 0 0 | 4 0] + * [0 3 | 0 0 | 0 4] + */ + CSR_Matrix *A = new_csr_matrix(2, 3, 4); + double Ax[4] = {1.0, 2.0, 3.0, 4.0}; + int Ai[4] = {0, 2, 0, 2}; + int Ap[3] = {0, 2, 4}; + memcpy(A->x, Ax, 4 * sizeof(double)); + memcpy(A->i, Ai, 4 * sizeof(int)); + memcpy(A->p, Ap, 3 * sizeof(int)); + + CSR_Matrix *result = kron_identity_csr(A, 2); + + /* Expected: 4x6 with 8 nonzeros + * Row 0: [1, 0, 0, 0, 2, 0] -> cols {0, 4}, vals {1, 2} + * Row 1: [0, 1, 0, 0, 0, 2] -> cols {1, 5}, vals {1, 2} + * Row 2: [3, 0, 0, 0, 4, 0] -> cols {0, 4}, vals {3, 4} + * Row 3: [0, 3, 0, 0, 0, 4] -> cols {1, 5}, vals {3, 4} + */ + double expected_x[8] = {1.0, 2.0, 1.0, 2.0, 3.0, 4.0, 3.0, 4.0}; + int expected_i[8] = {0, 4, 1, 5, 0, 4, 1, 5}; + int expected_p[5] = {0, 2, 4, 6, 8}; + + mu_assert("dimensions incorrect", result->m == 4 && result->n == 6); + mu_assert("nnz incorrect", result->nnz == 8); + mu_assert("vals incorrect", cmp_double_array(result->x, expected_x, 8)); + mu_assert("cols incorrect", cmp_int_array(result->i, expected_i, 8)); + mu_assert("rows incorrect", cmp_int_array(result->p, expected_p, 5)); + + free_csr_matrix(A); + free_csr_matrix(result); + + return 0; +} diff --git a/tests/wsum_hess/test_left_matmul.h b/tests/wsum_hess/test_left_matmul.h new file mode 100644 index 0000000..b1f0923 --- /dev/null +++ b/tests/wsum_hess/test_left_matmul.h @@ -0,0 +1,263 @@ +#include +#include +#include +#include + +#include "affine.h" +#include "bivariate.h" +#include "elementwise_univariate.h" +#include "expr.h" +#include "minunit.h" +#include "test_helpers.h" + +const char *test_wsum_hess_left_matmul() +{ + /* Test weighted sum of Hessian of A @ log(x) where: + * x is 3x1 variable at x = [1, 2, 3] + * A is 4x3 sparse matrix [1, 0, 2; 3, 0, 4; 5, 0, 6; 7, 0, 0] + * Output: A @ log(x) is 4x1 + * Weights w = [1, 2, 3, 4] + * + * For log(x_i), the Hessian is diagonal with h_ii = -1/x_i^2 + * The output f = A @ log(x) has: + * f[0] = log(x[0]) + 2*log(x[2]) + * f[1] = 3*log(x[0]) + 4*log(x[2]) + * f[2] = 5*log(x[0]) + 6*log(x[2]) + * f[3] = 7*log(x[0]) + * + * Weighted sum of Hessian: sum_k w[k] * d²f[k]/dx² + * For each variable x[i], we need to find which outputs f[k] depend on it. + * + * x[0] affects f[0], f[1], f[2], f[3] with coefficients 1, 3, 5, 7 + * x[1] doesn't affect any output (column 1 of A is all zeros) + * x[2] affects f[0], f[1], f[2] with coefficients 2, 4, 6 + * + * d²f[k]/dx[i]² = A[k,i] * (-1/x[i]²) + * + * wsum_hess[i,i] = sum_k w[k] * A[k,i] * (-1/x[i]²) + * = (-1/x[i]²) * sum_k w[k] * A[k,i] + * = (-1/x[i]²) * (A^T @ w)[i] + * + * A^T @ w: + * [1, 3, 5, 7] @ [1, 2, 3, 4] = 1*1 + 3*2 + 5*3 + 7*4 = 1 + 6 + 15 + 28 = 50 + * [0, 0, 0, 0] @ [1, 2, 3, 4] = 0 + * [2, 4, 6, 0] @ [1, 2, 3, 4] = 2*1 + 4*2 + 6*3 = 2 + 8 + 18 = 28 + * + * wsum_hess[0,0] = -50 / 1² = -50 + * wsum_hess[1,1] = 0 + * wsum_hess[2,2] = -28 / 3² = -28/9 + */ + double x_vals[3] = {1.0, 2.0, 3.0}; + double w[4] = {1.0, 2.0, 3.0, 4.0}; + + expr *x = new_variable(3, 1, 0, 3); + + /* Create sparse matrix A in CSR format */ + CSR_Matrix *A = new_csr_matrix(4, 3, 7); + int A_p[5] = {0, 2, 4, 6, 7}; + int A_i[7] = {0, 2, 0, 2, 0, 2, 0}; + double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; + memcpy(A->p, A_p, 5 * sizeof(int)); + memcpy(A->i, A_i, 7 * sizeof(int)); + memcpy(A->x, A_x, 7 * sizeof(double)); + + expr *log_x = new_log(x); + expr *A_log_x = new_left_matmul(log_x, A); + + A_log_x->forward(A_log_x, x_vals); + A_log_x->jacobian_init(A_log_x); + A_log_x->wsum_hess_init(A_log_x); + A_log_x->eval_wsum_hess(A_log_x, w); + + /* Expected wsum_hess: diagonal matrix with all 3 entries + * (sparsity matches child's diagonal Hessian) */ + double expected_x[3] = { + -50.0, /* position [0,0]: -50 / 1² */ + -0.0, /* position [1,1]: 0 * (-1/4) = 0 */ + -28.0 / 9.0 /* position [2,2]: -28 / 9 */ + }; + int expected_i[3] = {0, 1, 2}; + int expected_p[4] = {0, 1, 2, 3}; /* each row has 1 diagonal entry */ + + mu_assert("vals incorrect", + cmp_double_array(A_log_x->wsum_hess->x, expected_x, 3)); + mu_assert("cols incorrect", cmp_int_array(A_log_x->wsum_hess->i, expected_i, 3)); + mu_assert("rows incorrect", cmp_int_array(A_log_x->wsum_hess->p, expected_p, 4)); + + free_csr_matrix(A); + free_expr(A_log_x); + return 0; +} + +const char *test_wsum_hess_left_matmul_composite() +{ + /* Test weighted sum of Hessian of A @ log(B @ x) where: + * x is 3x1 variable at x = [1, 2, 3] + * B is 3x3 dense matrix of all ones + * A is 4x3 sparse matrix [1, 0, 2; 3, 0, 4; 5, 0, 6; 7, 0, 0] + * Weights w = [1, 2, 3, 4] + * + * B @ x = [6, 6, 6]^T + * log(B @ x) = [log(6), log(6), log(6)]^T + * f = A @ log(B @ x) is 4x1 + * + * The Hessian is more complex due to the composite chain rule. + * df/dx = A @ diag(1/(B@x)) @ B + * + * The second derivative involves: + * d²f/dx² = A @ d(diag(1/y))/dy @ dy/dx @ B + A @ diag(1/y) @ 0 + * where y = B @ x + * + * d(diag(1/y))/dy = -diag(1/y²) + * dy/dx = B + * + * So: d²f[k]/dx² = -sum_j sum_i A[k,i] * (1/(B@x)_i²) * B[i,j] * B[i,l] + * This is dense and symmetric. + * + * For our case with B = all ones, B@x = [6,6,6], so 1/(B@x)² = 1/36 for each + * element + * + * For wsum_hess = sum_k w_k * d²f_k/dx² + * We use the chain rule: weight w goes through A + * weighted_w_prime[i] = sum_k w_k * A[k,i] + * Then: wsum_hess[i,l] = -weighted_w_prime[i] * (1/36) * B[i,j] * (same for l) + * + * A^T @ w = [50, 0, 28]^T (from A^T w) + * So weighted contribution for each element of B@x is -[50, 0, 28] / 36 + * + * The result is a 3x3 matrix (outer product of weighted_w_prime with B): + * wsum_hess = -diag([50/36, 0, 28/36]) @ B @ B^T = -diag([50/36, 0, 28/36]) @ + * (all ones) = [-50/36, -50/36, -50/36; 0, 0, 0; -28/36, -28/36, + * -28/36] = [-25/18, -25/18, -25/18; 0, 0, 0; -7/9, -7/9, -7/9] + * + * Stored as dense 3x3: + * nnz = 6 (zeros in row 1) + * p = [0, 3, 3, 6] + * i = [0, 1, 2, 0, 1, 2] + * x = [-25/18, -25/18, -25/18, -7/9, -7/9, -7/9] + */ + double x_vals[3] = {1.0, 2.0, 3.0}; + double w[4] = {1.0, 2.0, 3.0, 4.0}; + + expr *x = new_variable(3, 1, 0, 3); + + /* Create B matrix (3x3 all ones) */ + CSR_Matrix *B = new_csr_matrix(3, 3, 9); + int B_p[4] = {0, 3, 6, 9}; + int B_i[9] = {0, 1, 2, 0, 1, 2, 0, 1, 2}; + double B_x[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + memcpy(B->p, B_p, 4 * sizeof(int)); + memcpy(B->i, B_i, 9 * sizeof(int)); + memcpy(B->x, B_x, 9 * sizeof(double)); + + /* Create A matrix */ + CSR_Matrix *A = new_csr_matrix(4, 3, 7); + int A_p[5] = {0, 2, 4, 6, 7}; + int A_i[7] = {0, 2, 0, 2, 0, 2, 0}; + double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; + memcpy(A->p, A_p, 5 * sizeof(int)); + memcpy(A->i, A_i, 7 * sizeof(int)); + memcpy(A->x, A_x, 7 * sizeof(double)); + + expr *Bx = new_linear(x, B); + expr *log_Bx = new_log(Bx); + expr *A_log_Bx = new_left_matmul(log_Bx, A); + + A_log_Bx->forward(A_log_Bx, x_vals); + A_log_Bx->jacobian_init(A_log_Bx); + A_log_Bx->wsum_hess_init(A_log_Bx); + A_log_Bx->eval_wsum_hess(A_log_Bx, w); + + /* Expected wsum_hess: 3x3 dense matrix (9 nonzeros) + * All entries are -13/6 because the hessian of log(B@x) w.r.t. x + * becomes dense when B is dense (all ones) */ + double expected_x[9] = { + -13.0 / 6.0, -13.0 / 6.0, -13.0 / 6.0, /* row 0 */ + -13.0 / 6.0, -13.0 / 6.0, -13.0 / 6.0, /* row 1 */ + -13.0 / 6.0, -13.0 / 6.0, -13.0 / 6.0 /* row 2 */ + }; + int expected_i[9] = {0, 1, 2, 0, 1, 2, 0, 1, 2}; + int expected_p[4] = {0, 3, 6, 9}; /* each row has 3 entries */ + + mu_assert("vals incorrect", + cmp_double_array(A_log_Bx->wsum_hess->x, expected_x, 9)); + mu_assert("cols incorrect", + cmp_int_array(A_log_Bx->wsum_hess->i, expected_i, 9)); + mu_assert("rows incorrect", + cmp_int_array(A_log_Bx->wsum_hess->p, expected_p, 4)); + + free_csr_matrix(A); + free_csr_matrix(B); + free_expr(A_log_Bx); + return 0; +} + +const char *test_wsum_hess_left_matmul_matrix() +{ + /* Test weighted sum of Hessian of A @ log(x) where: + * x is 3x2 variable, vectorized column-wise: [1, 2, 3, 4, 5, 6] + * A is 4x3 sparse matrix [1, 0, 2; 3, 0, 4; 5, 0, 6; 7, 0, 0] + * Output: A @ log(x) is 4x2, vectorized to 8x1 + * Weights w = [1, 2, 3, 4, 5, 6, 7, 8] + * + * The operation is block-diagonal: + * - Column 0 of x: [1, 2, 3] with weights w[0:4] = [1, 2, 3, 4] + * - Column 1 of x: [4, 5, 6] with weights w[4:8] = [5, 6, 7, 8] + * + * For column 0 (variables x[0:3]): + * A^T @ w[0:4] = [1*1 + 3*2 + 5*3 + 7*4, 0, 2*1 + 4*2 + 6*3] + * = [50, 0, 28] + * wsum_hess[0,0] = -50 / 1² = -50 + * wsum_hess[1,1] = 0 + * wsum_hess[2,2] = -28 / 3² = -28/9 + * + * For column 1 (variables x[3:6]): + * A^T @ w[4:8] = [1*5 + 3*6 + 5*7 + 7*8, 0, 2*5 + 4*6 + 6*7] + * = [114, 0, 76] + * wsum_hess[3,3] = -114 / 4² = -114/16 = -57/8 + * wsum_hess[4,4] = 0 + * wsum_hess[5,5] = -76 / 6² = -76/36 = -19/9 + */ + double x_vals[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + double w[8] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; + + expr *x = new_variable(3, 2, 0, 6); + + /* Create sparse matrix A in CSR format */ + CSR_Matrix *A = new_csr_matrix(4, 3, 7); + int A_p[5] = {0, 2, 4, 6, 7}; + int A_i[7] = {0, 2, 0, 2, 0, 2, 0}; + double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; + memcpy(A->p, A_p, 5 * sizeof(int)); + memcpy(A->i, A_i, 7 * sizeof(int)); + memcpy(A->x, A_x, 7 * sizeof(double)); + + expr *log_x = new_log(x); + expr *A_log_x = new_left_matmul(log_x, A); + + A_log_x->forward(A_log_x, x_vals); + A_log_x->jacobian_init(A_log_x); + A_log_x->wsum_hess_init(A_log_x); + A_log_x->eval_wsum_hess(A_log_x, w); + + /* Expected wsum_hess: 6x6 diagonal matrix with all 6 entries */ + double expected_x[6] = { + -50.0, /* position [0,0]: column 0, variable 0 */ + -0.0, /* position [1,1]: column 0, variable 1 */ + -28.0 / 9.0, /* position [2,2]: column 0, variable 2 */ + -57.0 / 8.0, /* position [3,3]: column 1, variable 0 */ + -0.0, /* position [4,4]: column 1, variable 1 */ + -19.0 / 9.0 /* position [5,5]: column 1, variable 2 */ + }; + int expected_i[6] = {0, 1, 2, 3, 4, 5}; + int expected_p[7] = {0, 1, 2, 3, 4, 5, 6}; /* each row has 1 diagonal entry */ + + mu_assert("vals incorrect", + cmp_double_array(A_log_x->wsum_hess->x, expected_x, 6)); + mu_assert("cols incorrect", cmp_int_array(A_log_x->wsum_hess->i, expected_i, 6)); + mu_assert("rows incorrect", cmp_int_array(A_log_x->wsum_hess->p, expected_p, 7)); + + free_csr_matrix(A); + free_expr(A_log_x); + return 0; +} diff --git a/tests/wsum_hess/test_right_matmul.h b/tests/wsum_hess/test_right_matmul.h new file mode 100644 index 0000000..ca109f8 --- /dev/null +++ b/tests/wsum_hess/test_right_matmul.h @@ -0,0 +1,110 @@ +#include +#include +#include +#include + +#include "affine.h" +#include "bivariate.h" +#include "elementwise_univariate.h" +#include "expr.h" +#include "minunit.h" +#include "test_helpers.h" + +const char *test_wsum_hess_right_matmul() +{ + /* Test weighted sum of Hessian of log(x) @ A where: + * x is 2x2 variable at x = [[1, 2], [3, 4]] (vectorized column-wise: [1, 3, 2, + * 4]) A is 2x3 sparse matrix [[1, 0, 2], [3, 0, 4]] Output: log(x) @ A is 2x3 + * Weights w is 2x3 (vectorized column-wise: w = [w00, w10, w01, w11, w02, w12]) + * = [1, 2, 3, 4, 5, 6] + */ + double x_vals[4] = {1.0, 3.0, 2.0, 4.0}; // column-wise vectorization + double w[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; // weights for 2x3 output + + expr *x = new_variable(2, 2, 0, 4); + + /* Create sparse matrix A in CSR format (2x3) */ + CSR_Matrix *A = new_csr_matrix(2, 3, 4); + int A_p[3] = {0, 2, 4}; + int A_i[4] = {0, 2, 0, 2}; + double A_x[4] = {1.0, 2.0, 3.0, 4.0}; + memcpy(A->p, A_p, 3 * sizeof(int)); + memcpy(A->i, A_i, 4 * sizeof(int)); + memcpy(A->x, A_x, 4 * sizeof(double)); + + expr *log_x = new_log(x); + expr *log_x_A = new_right_matmul(log_x, A); + + log_x_A->forward(log_x_A, x_vals); + log_x_A->jacobian_init(log_x_A); + log_x_A->wsum_hess_init(log_x_A); + log_x_A->eval_wsum_hess(log_x_A, w); + + /* Expected wsum_hess: diagonal matrix with 4 entries */ + double expected_x[4] = { + -11.0, /* position [0,0]: -11 */ + -14.0 / 9.0, /* position [1,1]: -14/9 */ + -23.0 / 4.0, /* position [2,2]: -23/4 */ + -15.0 / 8.0 /* position [3,3]: -15/8 */ + }; + int expected_i[4] = {0, 1, 2, 3}; + int expected_p[5] = {0, 1, 2, 3, 4}; /* each row has 1 diagonal entry */ + + mu_assert("vals incorrect", + cmp_double_array(log_x_A->wsum_hess->x, expected_x, 4)); + mu_assert("cols incorrect", cmp_int_array(log_x_A->wsum_hess->i, expected_i, 4)); + mu_assert("rows incorrect", cmp_int_array(log_x_A->wsum_hess->p, expected_p, 5)); + + free_csr_matrix(A); + free_expr(log_x_A); + return 0; +} + +const char *test_wsum_hess_right_matmul_vector() +{ + /* Test weighted sum of Hessian of log(x) @ A where: + * x is 1x3 variable at x = [1, 2, 3] + * A is 3x2 sparse matrix [[1, 0], [2, 3], [0, 4]] + * Output: log(x) @ A is 1x2 + * Weights w is 1x2 (vectorized: w = [w0, w1] = [1, 2]) + */ + double x_vals[3] = {1.0, 2.0, 3.0}; + double w[2] = {1.0, 2.0}; // weights for 1x2 output + + expr *x = new_variable(1, 3, 0, 3); + + /* Create sparse matrix A in CSR format (3x2) */ + CSR_Matrix *A = new_csr_matrix(3, 2, 4); + int A_p[4] = {0, 1, 3, 4}; + int A_i[4] = {0, 0, 1, 1}; + double A_x[4] = {1.0, 2.0, 3.0, 4.0}; + memcpy(A->p, A_p, 4 * sizeof(int)); + memcpy(A->i, A_i, 4 * sizeof(int)); + memcpy(A->x, A_x, 4 * sizeof(double)); + + expr *log_x = new_log(x); + expr *log_x_A = new_right_matmul(log_x, A); + + log_x_A->forward(log_x_A, x_vals); + log_x_A->jacobian_init(log_x_A); + log_x_A->wsum_hess_init(log_x_A); + log_x_A->eval_wsum_hess(log_x_A, w); + + /* Expected wsum_hess: diagonal matrix with 3 entries */ + double expected_x[3] = { + -1.0, /* position [0,0]: -1 */ + -2.0, /* position [1,1]: -2 */ + -8.0 / 9.0 /* position [2,2]: -8/9 */ + }; + int expected_i[3] = {0, 1, 2}; + int expected_p[4] = {0, 1, 2, 3}; /* each row has 1 diagonal entry */ + + mu_assert("vals incorrect", + cmp_double_array(log_x_A->wsum_hess->x, expected_x, 3)); + mu_assert("cols incorrect", cmp_int_array(log_x_A->wsum_hess->i, expected_i, 3)); + mu_assert("rows incorrect", cmp_int_array(log_x_A->wsum_hess->p, expected_p, 4)); + + free_csr_matrix(A); + free_expr(log_x_A); + return 0; +}