diff --git a/README.md b/README.md index d1d7b1f..b32b769 100644 --- a/README.md +++ b/README.md @@ -2,4 +2,13 @@ 4. in the refactor, add consts 5. multiply with one constant vector/scalar argument 6. AX where X is a matrix. Can that happen? How is that canonicalized? Maybe it can't happen. -7. Must be able to compute jacobian and hessian of A @ phi(x), so linear operator needs other code! This requires new infrastructure, I think. \ No newline at end of file +7. Must be able to compute jacobian and hessian of A @ phi(x), so linear operator needs other code! This requires new infrastructure, I think. +8. Shortcut hessians of affine stuff? + +Going through all atoms to see that sparsity pattern is computed in initialization of jacobian: +2. trace - not ok + +Going through all atoms to see that sparsity pattern is computed in initialization of hessian: +2. hstack - not ok +3. trace - not ok + diff --git a/include/affine.h b/include/affine.h index e1b66ac..12b1dab 100644 --- a/include/affine.h +++ b/include/affine.h @@ -11,6 +11,7 @@ expr *new_add(expr *left, expr *right); expr *new_sum(expr *child, int axis); expr *new_hstack(expr **args, int n_args, int n_vars); +expr *new_trace(expr *child); expr *new_constant(int d1, int d2, int n_vars, const double *values); expr *new_variable(int d1, int d2, int var_id, int n_vars); diff --git a/include/expr.h b/include/expr.h index f6381a3..56173b9 100644 --- a/include/expr.h +++ b/include/expr.h @@ -5,6 +5,7 @@ #include "utils/CSR_Matrix.h" #include #include +#include #define JAC_IDXS_NOT_SET -1 #define NOT_A_VARIABLE -1 @@ -53,6 +54,9 @@ typedef struct expr local_wsum_hess_fn local_wsum_hess; /* used by elementwise univariate atoms*/ free_type_data_fn free_type_data; /* Cleanup for type-specific fields */ + // name of node just for debugging - should be removed later + char name[32]; + } expr; void init_expr(expr *node, int d1, int d2, int n_vars, forward_fn forward, diff --git a/include/subexpr.h b/include/subexpr.h index 4b29416..1b6d6a2 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -37,9 +37,16 @@ typedef struct sum_expr { expr base; int axis; - struct int_double_pair *int_double_pairs; /* for sorting jacobian entries */ + int *idx_map; /* maps child nnz to summed-row positions */ } sum_expr; +/* Trace-like reduction: sums entries spaced by child->d1 */ +typedef struct trace_expr +{ + expr base; + struct int_double_pair *int_double_pairs; /* for sorting jacobian entries */ +} trace_expr; + /* Product of all entries */ typedef struct prod_expr { diff --git a/include/utils/CSR_Matrix.h b/include/utils/CSR_Matrix.h index ca4b0a2..aa4d7ab 100644 --- a/include/utils/CSR_Matrix.h +++ b/include/utils/CSR_Matrix.h @@ -52,29 +52,86 @@ void csr_insert_value(CSR_Matrix *A, int col_idx, double value); * d must have length m * C must be pre-allocated with same dimensions as A */ void diag_csr_mult(const double *d, const CSR_Matrix *A, CSR_Matrix *C); +void diag_csr_mult_fill_values(const double *d, const CSR_Matrix *A, CSR_Matrix *C); /* Compute C = A + B where A, B, C are CSR matrices * A and B must have same dimensions * C must be pre-allocated with sufficient nnz capacity. * C must be different from A and B */ void sum_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C); +/* Compute sparsity pattern of A + B where A, B, C are CSR matrices. + * Fills C->p, C->i, and C->nnz; does not touch C->x. */ +void sum_csr_matrices_fill_sparsity(const CSR_Matrix *A, const CSR_Matrix *B, + CSR_Matrix *C); + +/* Fill only the values of C = A + B, assuming C's sparsity pattern (p and i) + * is already filled and matches the union of A and B per row. Does not modify + * C->p, C->i, or C->nnz. */ +void sum_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, + CSR_Matrix *C); /* Compute C = diag(d1) * A + diag(d2) * B where A, B, C are CSR matrices */ void sum_scaled_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C, const double *d1, const double *d2); +/* Fill only the values of C = diag(d1) * A + diag(d2) * B, assuming C's sparsity + * pattern (p and i) is already filled and matches the union of A and B per row. + * Does not modify C->p, C->i, or C->nnz. */ +void sum_scaled_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, + CSR_Matrix *C, const double *d1, + const double *d2); + /* Sum all rows of A into a single row matrix C */ void sum_all_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, struct int_double_pair *pairs); +/* iwork must have size max(C->n, A->nnz), and idx_map must have size A->nnz. */ +void sum_all_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, CSR_Matrix *C, + int *iwork, int *idx_map); + +/* Fill values of summed rows using precomputed idx_map and sparsity of C */ +// void sum_all_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, +// const int *idx_map); + +/* Fill accumulator for summing rows using precomputed idx_map for each nnz of A*/ +void idx_map_accumulator(const CSR_Matrix *A, const int *idx_map, + double *accumulator); + /* Sum blocks of rows of A into a matrix C */ void sum_block_of_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, struct int_double_pair *pairs, int row_block_size); +/* Build sparsity and index map for summing blocks of rows. + * iwork must have size max(A->n, A->nnz), and idx_map must have size A->nnz. */ +void sum_block_of_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, + CSR_Matrix *C, + int row_block_size, int *iwork, + int *idx_map); + +/* Fill values for summing blocks of rows using precomputed idx_map */ +// void sum_block_of_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, +// const int *idx_map); + /* Sum evenly spaced rows of A into a matrix C */ void sum_evenly_spaced_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, struct int_double_pair *pairs, int row_spacing); +/* Build sparsity and index map for summing evenly spaced rows. + * iwork must have size max(A->n, A->nnz), and idx_map must have size A->nnz. */ +void sum_evenly_spaced_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, + CSR_Matrix *C, + int row_spacing, + int *iwork, int *idx_map); + +/* Fill values for summing evenly spaced rows using precomputed idx_map */ +// void sum_evenly_spaced_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, +// const int *idx_map); + +/* Sum evenly spaced rows of A starting at offset into a row matrix C */ +void sum_spaced_rows_into_row_csr(const CSR_Matrix *A, CSR_Matrix *C, + struct int_double_pair *pairs, int offset, + int spacing); + /* Count number of columns with nonzero entries */ int count_nonzero_cols(const CSR_Matrix *A, bool *col_nz); diff --git a/src/affine/add.c b/src/affine/add.c index 9f6b2d1..d7994b3 100644 --- a/src/affine/add.c +++ b/src/affine/add.c @@ -22,6 +22,10 @@ static void jacobian_init(expr *node) /* we never have to store more than the sum of children's nnz */ int nnz_max = node->left->jacobian->nnz + node->right->jacobian->nnz; node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz_max); + + /* fill sparsity pattern */ + sum_csr_matrices_fill_sparsity(node->left->jacobian, node->right->jacobian, + node->jacobian); } static void eval_jacobian(expr *node) @@ -31,7 +35,8 @@ static void eval_jacobian(expr *node) node->right->eval_jacobian(node->right); /* sum children's jacobians */ - sum_csr_matrices(node->left->jacobian, node->right->jacobian, node->jacobian); + sum_csr_matrices_fill_values(node->left->jacobian, node->right->jacobian, + node->jacobian); } static void wsum_hess_init(expr *node) @@ -43,6 +48,10 @@ static void wsum_hess_init(expr *node) /* we never have to store more than the sum of children's nnz */ int nnz_max = node->left->wsum_hess->nnz + node->right->wsum_hess->nnz; node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, nnz_max); + + /* TODO: we should fill sparsity pattern here for consistency */ + sum_csr_matrices_fill_sparsity(node->left->wsum_hess, node->right->wsum_hess, + node->wsum_hess); } static void eval_wsum_hess(expr *node, const double *w) @@ -52,7 +61,8 @@ static void eval_wsum_hess(expr *node, const double *w) node->right->eval_wsum_hess(node->right, w); /* sum children's wsum_hess */ - sum_csr_matrices(node->left->wsum_hess, node->right->wsum_hess, node->wsum_hess); + sum_csr_matrices_fill_values(node->left->wsum_hess, node->right->wsum_hess, + node->wsum_hess); } static bool is_affine(const expr *node) @@ -74,5 +84,8 @@ expr *new_add(expr *left, expr *right) node->wsum_hess_init = wsum_hess_init; node->eval_wsum_hess = eval_wsum_hess; + // just for debugging, should be removed + strcpy(node->name, "add"); + return node; } diff --git a/src/affine/sum.c b/src/affine/sum.c index ee9988b..7114124 100644 --- a/src/affine/sum.c +++ b/src/affine/sum.c @@ -5,6 +5,8 @@ #include #include +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + static void forward(expr *node, const double *u) { int i, j, end; @@ -62,42 +64,46 @@ static void jacobian_init(expr *node) { expr *x = node->left; sum_expr *snode = (sum_expr *) node; + int axis = snode->axis; /* initialize child's jacobian */ x->jacobian_init(x); /* we never have to store more than the child's nnz */ node->jacobian = new_csr_matrix(node->d1, node->n_vars, x->jacobian->nnz); - snode->int_double_pairs = new_int_double_pair_array(x->jacobian->nnz); -} - -static void eval_jacobian(expr *node) -{ - expr *x = node->left; - sum_expr *snode = (sum_expr *) node; - int axis = snode->axis; - - /* evaluate child's jacobian */ - x->eval_jacobian(x); + node->iwork = malloc(MAX(node->jacobian->n, x->jacobian->nnz) * sizeof(int)); + snode->idx_map = malloc(x->jacobian->nnz * sizeof(int)); - /* sum rows or columns of child's jacobian */ if (axis == -1) { - sum_all_rows_csr(x->jacobian, node->jacobian, snode->int_double_pairs); + sum_all_rows_csr_fill_sparsity_and_idx_map(x->jacobian, node->jacobian, + node->iwork, snode->idx_map); } else if (axis == 0) { - sum_block_of_rows_csr(x->jacobian, node->jacobian, snode->int_double_pairs, - x->d1); + sum_block_of_rows_csr_fill_sparsity_and_idx_map( + x->jacobian, node->jacobian, x->d1, node->iwork, snode->idx_map); } - else if (axis == 1) { - sum_evenly_spaced_rows_csr(x->jacobian, node->jacobian, - snode->int_double_pairs, node->d1); + sum_evenly_spaced_rows_csr_fill_sparsity_and_idx_map( + x->jacobian, node->jacobian, node->d1, node->iwork, snode->idx_map); } } +static void eval_jacobian(expr *node) +{ + expr *x = node->left; + + /* evaluate child's jacobian */ + x->eval_jacobian(x); + + /* we have precomputed an idx map between the nonzeros of the child's jacobian + and this node's jacobian, so we just accumulate accordingly */ + idx_map_accumulator(x->jacobian, ((sum_expr *) node)->idx_map, + node->jacobian->x); +} + static void wsum_hess_init(expr *node) { expr *x = node->left; @@ -107,6 +113,10 @@ static void wsum_hess_init(expr *node) /* we never have to store more than the child's nnz */ node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, x->wsum_hess->nnz); node->dwork = malloc(x->size * sizeof(double)); + + /* copy sparsity pattern */ + memcpy(node->wsum_hess->p, x->wsum_hess->p, (x->n_vars + 1) * sizeof(int)); + memcpy(node->wsum_hess->i, x->wsum_hess->i, x->wsum_hess->nnz * sizeof(int)); } static void eval_wsum_hess(expr *node, const double *w) @@ -130,8 +140,8 @@ static void eval_wsum_hess(expr *node, const double *w) x->eval_wsum_hess(x, node->dwork); - /* todo: is this copy necessary or can we just change pointers? */ - copy_csr_matrix(x->wsum_hess, node->wsum_hess); + /* copy values (TODO: is this necessary or can we just change pointers?)*/ + memcpy(node->wsum_hess->x, x->wsum_hess->x, x->wsum_hess->nnz * sizeof(double)); } static bool is_affine(const expr *node) @@ -142,7 +152,7 @@ static bool is_affine(const expr *node) static void free_type_data(expr *node) { sum_expr *snode = (sum_expr *) node; - free_int_double_pair_array(snode->int_double_pairs); + free(snode->idx_map); } expr *new_sum(expr *child, int axis) @@ -179,7 +189,6 @@ expr *new_sum(expr *child, int axis) /* Set type-specific fields */ snode->axis = axis; - snode->int_double_pairs = NULL; return node; } diff --git a/src/affine/trace.c b/src/affine/trace.c new file mode 100644 index 0000000..a424277 --- /dev/null +++ b/src/affine/trace.c @@ -0,0 +1,137 @@ +#include "affine.h" +#include "utils/int_double_pair.h" +#include +#include +#include + +static void forward(expr *node, const double *u) +{ + expr *x = node->left; + + /* child's forward pass */ + x->forward(x, u); + + /* local forward pass */ + double sum = 0.0; + int row_spacing = x->d1 + 1; + for (int idx = 0; idx < x->size; idx += row_spacing) + { + sum += x->value[idx]; + } + + node->value[0] = sum; +} + +static void jacobian_init(expr *node) +{ + expr *x = node->left; + + /* initialize child's jacobian */ + x->jacobian_init(x); + + /* count total nnz in the rows of the jacobian that should be summed */ + const CSR_Matrix *A = x->jacobian; + int total_nnz = 0; + int row_spacing = x->d1 + 1; + for (int row = 0; row < A->m; row += row_spacing) + { + total_nnz += (A->p[row + 1] - A->p[row]); + } + + node->jacobian = new_csr_matrix(1, node->n_vars, total_nnz); + ((trace_expr *) node)->int_double_pairs = new_int_double_pair_array(total_nnz); +} + +static void eval_jacobian(expr *node) +{ + expr *x = node->left; + trace_expr *tnode = (trace_expr *) node; + + /* evaluate child's jacobian */ + x->eval_jacobian(x); + + /* local jacobian */ + sum_spaced_rows_into_row_csr(x->jacobian, node->jacobian, + tnode->int_double_pairs, 0, x->d1 + 1); +} + +/* Placeholders for Hessian-related functions */ +static void wsum_hess_init(expr *node) +{ + expr *x = node->left; + + /* initialize child's hessian */ + x->wsum_hess_init(x); + + node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, x->wsum_hess->nnz); + node->dwork = (double *) calloc(x->size, sizeof(double)); + + /* TODO: here we could copy over sparsity pattern once we have checked + that all atoms fill their sparsity pattern in the init functions. Perhaps + we should only take sparsity pattern of rows that are summed? Not the rows + which will get zero weight in the hessian. That would be very cool. + But must eval_wsum_hess then also ignore contributions with zero weight? that + would be bad. */ + + /* lets conclude that the hessian can be made more sophisticated */ + + /* but perhaps let's keep it as simple as possible! */ +} + +static void eval_wsum_hess(expr *node, const double *w) +{ + expr *x = node->left; + + int row_spacing = x->d1 + 1; + for (int i = 0; i < x->size; i += row_spacing) + { + node->dwork[i] = w[0]; + } + + x->eval_wsum_hess(x, node->dwork); + + /* TODO: here we only need to copy over values once we have filled the sparsity + * pattern in wsum_hess_init*/ + node->wsum_hess->nnz = x->wsum_hess->nnz; + memcpy(node->wsum_hess->p, x->wsum_hess->p, sizeof(int) * (node->n_vars + 1)); + memcpy(node->wsum_hess->i, x->wsum_hess->i, sizeof(int) * x->wsum_hess->nnz); + memcpy(node->wsum_hess->x, x->wsum_hess->x, sizeof(double) * x->wsum_hess->nnz); + + /* This might contain many many zeros actually! Hmm...*/ +} + +static bool is_affine(const expr *node) +{ + return node->left->is_affine(node->left); +} + +static void free_type_data(expr *node) +{ + trace_expr *tnode = (trace_expr *) node; + free_int_double_pair_array(tnode->int_double_pairs); +} + +expr *new_trace(expr *child) +{ + /* Output is a single scalar */ + int d1 = 1; + + trace_expr *tnode = (trace_expr *) calloc(1, sizeof(trace_expr)); + expr *node = &tnode->base; + init_expr(node, d1, 1, child->n_vars, forward, jacobian_init, eval_jacobian, + is_affine, free_type_data); + node->left = child; + expr_retain(child); + + /* Hessian placeholders */ + node->wsum_hess_init = wsum_hess_init; + node->eval_wsum_hess = eval_wsum_hess; + + /* Initialize type-specific fields */ + tnode->int_double_pairs = NULL; + + // just for debugging, should be removed + strcpy(node->name, "trace"); + + return node; +} diff --git a/src/affine/variable.c b/src/affine/variable.c index 46e2446..43b706f 100644 --- a/src/affine/variable.c +++ b/src/affine/variable.c @@ -9,15 +9,20 @@ static void forward(expr *node, const double *u) static void jacobian_init(expr *node) { - int size = node->d1 * node->d2; - node->jacobian = new_csr_matrix(size, node->n_vars, size); - for (int j = 0; j < size; j++) + node->jacobian = new_csr_matrix(node->size, node->n_vars, node->size); + for (int j = 0; j < node->size; j++) { node->jacobian->p[j] = j; node->jacobian->i[j] = j + node->var_id; node->jacobian->x[j] = 1.0; } - node->jacobian->p[size] = size; + node->jacobian->p[node->size] = node->size; +} + +static void eval_jacobian(expr *node) +{ + /* Jacobian is already initialized with correct values */ + (void) node; } static bool is_affine(const expr *node) @@ -33,6 +38,7 @@ expr *new_variable(int d1, int d2, int var_id, int n_vars) node->var_id = var_id; node->is_affine = is_affine; node->jacobian_init = jacobian_init; + node->eval_jacobian = eval_jacobian; return node; } diff --git a/src/bivariate/multiply.c b/src/bivariate/multiply.c index e50882b..63c9290 100644 --- a/src/bivariate/multiply.c +++ b/src/bivariate/multiply.c @@ -44,6 +44,10 @@ static void jacobian_init(expr *node) node->dwork = (double *) malloc(2 * node->size * sizeof(double)); int nnz_max = node->left->jacobian->nnz + node->right->jacobian->nnz; node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz_max); + + /* fill sparsity pattern */ + sum_csr_matrices_fill_sparsity(node->left->jacobian, node->right->jacobian, + node->jacobian); } static void eval_jacobian(expr *node) @@ -52,8 +56,8 @@ static void eval_jacobian(expr *node) expr *y = node->right; /* chain rule */ - sum_scaled_csr_matrices(x->jacobian, y->jacobian, node->jacobian, y->value, - x->value); + sum_scaled_csr_matrices_fill_values(x->jacobian, y->jacobian, node->jacobian, + y->value, x->value); } static void wsum_hess_init(expr *node) diff --git a/src/elementwise_univariate/common.c b/src/elementwise_univariate/common.c index 752f107..c1c412e 100644 --- a/src/elementwise_univariate/common.c +++ b/src/elementwise_univariate/common.c @@ -21,9 +21,14 @@ void jacobian_init_elementwise(expr *node) /* otherwise it will be a linear operator */ else { - node->jacobian = new_csr_matrix(child->jacobian->m, child->jacobian->n, - child->jacobian->nnz); + CSR_Matrix *J = child->jacobian; + node->jacobian = new_csr_matrix(J->m, J->n, J->nnz); + node->dwork = (double *) malloc(node->size * sizeof(double)); + + /* copy sparsity pattern of child */ + memcpy(node->jacobian->p, J->p, sizeof(int) * (J->m + 1)); + memcpy(node->jacobian->i, J->i, sizeof(int) * J->nnz); } } @@ -40,7 +45,7 @@ void eval_jacobian_elementwise(expr *node) /* Child will be a linear operator */ linear_op_expr *lin_child = (linear_op_expr *) child; node->local_jacobian(node, node->dwork); - diag_csr_mult(node->dwork, lin_child->A_csr, node->jacobian); + diag_csr_mult_fill_values(node->dwork, lin_child->A_csr, node->jacobian); } } diff --git a/src/elementwise_univariate/exp.c b/src/elementwise_univariate/exp.c index 3b27c15..1f820c0 100644 --- a/src/elementwise_univariate/exp.c +++ b/src/elementwise_univariate/exp.c @@ -33,5 +33,8 @@ expr *new_exp(expr *child) node->forward = forward; node->local_jacobian = local_jacobian; node->local_wsum_hess = local_wsum_hess; + + // just for debugging, should be removed + strcpy(node->name, "exp"); return node; } diff --git a/src/elementwise_univariate/log.c b/src/elementwise_univariate/log.c index 637dde3..6722747 100644 --- a/src/elementwise_univariate/log.c +++ b/src/elementwise_univariate/log.c @@ -38,5 +38,8 @@ expr *new_log(expr *child) node->forward = forward; node->local_jacobian = local_jacobian; node->local_wsum_hess = local_wsum_hess; + + // just for debugging, should be removed + strcpy(node->name, "log"); return node; } diff --git a/src/utils/CSR_Matrix.c b/src/utils/CSR_Matrix.c index 8ee83e5..5e9aed8 100644 --- a/src/utils/CSR_Matrix.c +++ b/src/utils/CSR_Matrix.c @@ -97,6 +97,26 @@ void diag_csr_mult(const double *d, const CSR_Matrix *A, CSR_Matrix *C) } } +void diag_csr_mult_fill_values(const double *d, const CSR_Matrix *A, CSR_Matrix *C) +{ + memcpy(C->x, A->x, A->nnz * sizeof(double)); + + for (int row = 0; row < C->m; row++) + { + for (int j = C->p[row]; j < C->p[row + 1]; j++) + { + C->x[j] *= d[row]; + } + } +} + +static int compare_int_asc(const void *a, const void *b) +{ + int ia = *((const int *) a); + int ib = *((const int *) b); + return (ia > ib) - (ia < ib); +} + void sum_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C) { /* A and B must be different from C */ @@ -159,6 +179,131 @@ void sum_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C) C->p[A->m] = C->nnz; } +void sum_csr_matrices_fill_sparsity(const CSR_Matrix *A, const CSR_Matrix *B, + CSR_Matrix *C) +{ + /* A and B must be different from C */ + assert(A != C && B != C); + + C->nnz = 0; + + for (int row = 0; row < A->m; row++) + { + int a_ptr = A->p[row]; + int a_end = A->p[row + 1]; + int b_ptr = B->p[row]; + int b_end = B->p[row + 1]; + C->p[row] = C->nnz; + + /* Merge while both have elements (only column indices) */ + while (a_ptr < a_end && b_ptr < b_end) + { + if (A->i[a_ptr] < B->i[b_ptr]) + { + C->i[C->nnz] = A->i[a_ptr]; + a_ptr++; + } + else if (B->i[b_ptr] < A->i[a_ptr]) + { + C->i[C->nnz] = B->i[b_ptr]; + b_ptr++; + } + else + { + C->i[C->nnz] = A->i[a_ptr]; + a_ptr++; + b_ptr++; + } + C->nnz++; + } + + /* Copy remaining elements from A */ + if (a_ptr < a_end) + { + int a_remaining = a_end - a_ptr; + memcpy(C->i + C->nnz, A->i + a_ptr, a_remaining * sizeof(int)); + C->nnz += a_remaining; + } + + /* Copy remaining elements from B */ + if (b_ptr < b_end) + { + int b_remaining = b_end - b_ptr; + memcpy(C->i + C->nnz, B->i + b_ptr, b_remaining * sizeof(int)); + C->nnz += b_remaining; + } + } + + C->p[A->m] = C->nnz; +} + +void sum_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, + CSR_Matrix *C) +{ + /* Assumes C->p and C->i already contain the sparsity pattern of A+B. + Fills only C->x accordingly. */ + for (int row = 0; row < A->m; row++) + { + int a_ptr = A->p[row]; + int a_end = A->p[row + 1]; + int b_ptr = B->p[row]; + int b_end = B->p[row + 1]; + + for (int c_ptr = C->p[row]; c_ptr < C->p[row + 1]; c_ptr++) + { + int col = C->i[c_ptr]; + double val = 0.0; + + if (a_ptr < a_end && A->i[a_ptr] == col) + { + val += A->x[a_ptr]; + a_ptr++; + } + + if (b_ptr < b_end && B->i[b_ptr] == col) + { + val += B->x[b_ptr]; + b_ptr++; + } + C->x[c_ptr] = val; + } + } +} + +void sum_scaled_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, + CSR_Matrix *C, const double *d1, + const double *d2) +{ + /* Assumes C->p and C->i already contain the sparsity pattern of A+B. + Fills only C->x accordingly with scaling. */ + for (int row = 0; row < A->m; row++) + { + int a_ptr = A->p[row]; + int a_end = A->p[row + 1]; + int b_ptr = B->p[row]; + int b_end = B->p[row + 1]; + + for (int c_ptr = C->p[row]; c_ptr < C->p[row + 1]; c_ptr++) + { + int col = C->i[c_ptr]; + double val = 0.0; + + if (a_ptr < a_end && A->i[a_ptr] == col) + { + val += d1[row] * A->x[a_ptr]; + a_ptr++; + } + + if (b_ptr < b_end && B->i[b_ptr] == col) + { + val += d2[row] * B->x[b_ptr]; + b_ptr++; + } + C->x[c_ptr] = val; + } + } +} + void sum_scaled_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C, const double *d1, const double *d2) { @@ -321,6 +466,96 @@ void sum_block_of_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, } } +/* iwork must have size max(A->n, A->nnz), and idx_map must have size A->nnz */ +void sum_block_of_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, + CSR_Matrix *C, + int row_block_size, int *iwork, + int *idx_map) +{ + assert(A->m % row_block_size == 0); + int n_blocks = A->m / row_block_size; + assert(C->m == n_blocks); + + C->n = A->n; + C->p[0] = 0; + C->nnz = 0; + + int *cols = iwork; + int *col_to_pos = iwork; + + for (int block = 0; block < n_blocks; block++) + { + int start_row = block * row_block_size; + int end_row = start_row + row_block_size; + + // ----------------------------------------------------------------- + // Build sparsity pattern of the row resulting from summing + // the block of rows from A + // ----------------------------------------------------------------- + C->p[block] = C->nnz; + int count = 0; + for (int row = start_row; row < end_row; row++) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + cols[count++] = A->i[j]; + } + } + + /* Sort columns and write unique pattern into C->i */ + qsort(cols, count, sizeof(int), compare_int_asc); + + int unique_nnz = 0; + int prev_col = -1; + for (int t = 0; t < count; t++) + { + int col = cols[t]; + if (t == 0 || col != prev_col) + { + C->i[C->nnz + unique_nnz] = col; + prev_col = col; + unique_nnz++; + } + } + + C->nnz += unique_nnz; + C->p[block + 1] = C->nnz; + + // ----------------------------------------------------------------- + // Build idx_map for all entries in this block + // ----------------------------------------------------------------- + int row_start = C->p[block]; + for (int idx = 0; idx < unique_nnz; idx++) + { + col_to_pos[C->i[row_start + idx]] = row_start + idx; + } + + for (int row = start_row; row < end_row; row++) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + idx_map[j] = col_to_pos[A->i[j]]; + } + } + } +} + +/* +void sum_block_of_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, + const int *idx_map) +{ + memset(C->x, 0, C->nnz * sizeof(double)); + + for (int row = 0; row < A->m; row++) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + C->x[idx_map[j]] += A->x[j]; + } + } +} +*/ + void sum_evenly_spaced_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, struct int_double_pair *pairs, int row_spacing) { @@ -369,6 +604,149 @@ void sum_evenly_spaced_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, } } +/* iwork must have size max(A->n, A->nnz), and idx_map must have size A->nnz */ +void sum_evenly_spaced_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, + CSR_Matrix *C, + int row_spacing, + int *iwork, int *idx_map) +{ + assert(C->m == row_spacing); + C->n = A->n; + C->p[0] = 0; + C->nnz = 0; + + int *cols = iwork; + int *col_to_pos = iwork; + + for (int C_row = 0; C_row < C->m; C_row++) + { + // ----------------------------------------------------------------- + // Build sparsity pattern of the row resulting from summing + // evenly spaced rows from A + // ----------------------------------------------------------------- + C->p[C_row] = C->nnz; + int count = 0; + for (int row = C_row; row < A->m; row += row_spacing) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + cols[count++] = A->i[j]; + } + } + + /* Sort columns and write unique pattern into C->i */ + qsort(cols, count, sizeof(int), compare_int_asc); + + int unique_nnz = 0; + int prev_col = -1; + for (int t = 0; t < count; t++) + { + int col = cols[t]; + if (t == 0 || col != prev_col) + { + C->i[C->nnz + unique_nnz] = col; + prev_col = col; + unique_nnz++; + } + } + + C->nnz += unique_nnz; + C->p[C_row + 1] = C->nnz; + + // ----------------------------------------------------------------- + // Build idx_map for all entries in evenly spaced rows + // ----------------------------------------------------------------- + int row_start = C->p[C_row]; + for (int idx = 0; idx < unique_nnz; idx++) + { + col_to_pos[C->i[row_start + idx]] = row_start + idx; + } + + for (int row = C_row; row < A->m; row += row_spacing) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + idx_map[j] = col_to_pos[A->i[j]]; + } + } + } +} + +void idx_map_accumulator(const CSR_Matrix *A, const int *idx_map, + double *accumulator) +{ + memset(accumulator, 0, A->nnz * sizeof(double)); + + for (int row = 0; row < A->m; row++) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + accumulator[idx_map[j]] += A->x[j]; + } + } +} + +/* +void sum_evenly_spaced_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, + const int *idx_map) +{ + memset(C->x, 0, C->nnz * sizeof(double)); + + for (int row = 0; row < A->m; row++) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + C->x[idx_map[j]] += A->x[j]; + } + } +} +*/ +void sum_spaced_rows_into_row_csr(const CSR_Matrix *A, CSR_Matrix *C, + struct int_double_pair *pairs, int offset, + int spacing) +{ + assert(C->m == 1); + C->n = A->n; + C->p[0] = 0; + C->nnz = 0; + + /* copy evenly spaced rows starting at offset into pairs */ + int pair_idx = 0; + for (int row = offset; row < A->m; row += spacing) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + pairs[pair_idx].col = A->i[j]; + pairs[pair_idx].val = A->x[j]; + pair_idx++; + } + } + + /* sort so columns are in order */ + sort_int_double_pair_array(pairs, pair_idx); + + /* merge entries with same columns and insert result in C */ + for (int j = 0; j < pair_idx;) + { + int current_col = pairs[j].col; + double sum_val = 0.0; + + /* sum all values with the same column */ + while (j < pair_idx && pairs[j].col == current_col) + { + sum_val += pairs[j].val; + j++; + } + + /* insert into C */ + C->i[C->nnz] = current_col; + C->x[C->nnz] = sum_val; + C->nnz++; + } + + C->p[1] = C->nnz; +} + void csr_insert_value(CSR_Matrix *A, int col_idx, double value) { assert(A->m == 1); @@ -599,3 +977,63 @@ void symmetrize_csr(const int *Ap, const int *Ai, int m, CSR_Matrix *C) free(counts); } + +void sum_all_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, CSR_Matrix *C, + int *iwork, int *idx_map) +{ + // ------------------------------------------------------------------- + // Build sparsity pattern of the summed row + // ------------------------------------------------------------------- + int *cols = iwork; + memcpy(cols, A->i, A->nnz * sizeof(int)); + qsort(cols, A->nnz, sizeof(int), compare_int_asc); + + int unique_nnz = 0; + int prev_col = -1; + for (int j = 0; j < A->nnz; j++) + { + if (cols[j] != prev_col) + { + C->i[unique_nnz] = cols[j]; + prev_col = cols[j]; + unique_nnz++; + } + } + + C->p[0] = 0; + C->p[1] = unique_nnz; + C->nnz = unique_nnz; + + // ------------------------------------------------------------------- + // Map child values to summed-row positions + // ------------------------------------------------------------------- + int *col_to_pos = iwork; + for (int idx = 0; idx < unique_nnz; idx++) + { + col_to_pos[C->i[idx]] = idx; + } + + for (int i = 0; i < A->m; i++) + { + for (int j = A->p[i]; j < A->p[i + 1]; j++) + { + idx_map[j] = col_to_pos[A->i[j]]; + } + } +} + +/* +void sum_all_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, + const int *idx_map) +{ + memset(C->x, 0, C->nnz * sizeof(double)); + + for (int row = 0; row < A->m; row++) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + C->x[idx_map[j]] += A->x[j]; + } + } +} +*/ diff --git a/tests/all_tests.c b/tests/all_tests.c index 2801647..768ba86 100644 --- a/tests/all_tests.c +++ b/tests/all_tests.c @@ -20,6 +20,7 @@ #include "jacobian_tests/test_quad_over_lin.h" #include "jacobian_tests/test_rel_entr.h" #include "jacobian_tests/test_sum.h" +#include "jacobian_tests/test_trace.h" #include "utils/test_csc_matrix.h" #include "utils/test_csr_matrix.h" #include "wsum_hess/elementwise/test_entr.h" @@ -37,6 +38,7 @@ #include "wsum_hess/test_quad_over_lin.h" #include "wsum_hess/test_rel_entr.h" #include "wsum_hess/test_sum.h" +#include "wsum_hess/test_trace.h" int main(void) { @@ -87,8 +89,8 @@ int main(void) mu_run_test(test_jacobian_sum_log_axis_1, tests_run); mu_run_test(test_jacobian_hstack_vectors, tests_run); mu_run_test(test_jacobian_hstack_matrix, tests_run); - mu_run_test(test_wsum_hess_multiply_1, tests_run); - mu_run_test(test_wsum_hess_multiply_2, tests_run); + mu_run_test(test_jacobian_trace_variable, tests_run); + mu_run_test(test_jacobian_trace_composite, tests_run); printf("\n--- Weighted Sum of Hessian Tests ---\n"); mu_run_test(test_wsum_hess_log, tests_run); @@ -121,6 +123,14 @@ int main(void) mu_run_test(test_wsum_hess_quad_form, tests_run); mu_run_test(test_wsum_hess_multiply_linear_ops, tests_run); mu_run_test(test_wsum_hess_multiply_sparse_random, tests_run); + mu_run_test(test_wsum_hess_multiply_1, tests_run); + mu_run_test(test_wsum_hess_multiply_2, tests_run); + // This test leads to seg fault + // mu_run_test(test_wsum_hess_trace_variable, tests_run); + + // This test fails - not sure how sophisticated we should make + // wsum_hess for trace + // mu_run_test(test_wsum_hess_trace_composite, tests_run); printf("\n--- Utility Tests ---\n"); mu_run_test(test_diag_csr_mult, tests_run); diff --git a/tests/jacobian_tests/test_trace.h b/tests/jacobian_tests/test_trace.h new file mode 100644 index 0000000..b2f9810 --- /dev/null +++ b/tests/jacobian_tests/test_trace.h @@ -0,0 +1,103 @@ +#include + +#include "affine.h" +#include "elementwise_univariate.h" +#include "expr.h" +#include "minunit.h" +#include "test_helpers.h" + +const char *test_jacobian_trace_variable() +{ + /* Test Jacobian of trace(x) where x is 3x3 variable + * x has global variable index 1 + * Total 13 variables + * x.value = [1, 2, 3, 4, 5, 6, 7, 8, 9] (column-wise) + * Stored as: + * [[1, 4, 7], + * [2, 5, 8], + * [3, 6, 9]] + * Diagonal: [1, 5, 9] + * trace(x) = 1 + 5 + 9 = 15 + * + * Jacobian: d(trace)/dx has nonzeros only at diagonal positions + * Indices (0-indexed, column-wise): 0, 4, 8 + * So global variable indices: 1, 5, 9 (offset by 1) + * Values: [1, 1, 1] + * Result is 1x13 sparse matrix with 3 nonzeros + */ + double u_vals[13] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, + 7.0, 8.0, 9.0, 0.0, 0.0, 0.0}; + expr *x = new_variable(3, 3, 1, 13); + expr *trace_node = new_trace(x); + + trace_node->forward(trace_node, u_vals); + trace_node->jacobian_init(trace_node); + trace_node->eval_jacobian(trace_node); + + double expected_Ax[3] = {1.0, 1.0, 1.0}; + int expected_Ap[2] = {0, 3}; + int expected_Ai[3] = {1, 5, 9}; /* column indices (global variable indices) */ + + mu_assert("vals fail", + cmp_double_array(trace_node->jacobian->x, expected_Ax, 3)); + mu_assert("rows fail", cmp_int_array(trace_node->jacobian->p, expected_Ap, 2)); + mu_assert("cols fail", cmp_int_array(trace_node->jacobian->i, expected_Ai, 3)); + + free_expr(trace_node); + return 0; +} + +const char *test_jacobian_trace_composite() +{ + /* Test Jacobian of trace(log(x) + exp(x)) where x is 3x3 variable + * x has global variable index 1 + * Total 13 variables + * x.value = [1, 2, 3, 4, 5, 6, 7, 8, 9] (column-wise) + * Diagonal elements: x[0, 0] = 1, x[1, 1] = 5, x[2, 2] = 9 + * + * log(x) + exp(x) elementwise: [log(x_ij) + exp(x_ij)] + * At diagonal: [log(1) + exp(1), log(5) + exp(5), log(9) + exp(9)] + * + * d/dx (log(x) + exp(x)) = [1/x + exp(x)] + * At diagonal elements: + * x[0,0] = 1: 1/1 + exp(1) ≈ 1 + 2.718... ≈ 3.718... + * x[1,1] = 5: 1/5 + exp(5) = 0.2 + 148.413... ≈ 148.613... + * x[2,2] = 9: 1/9 + exp(9) ≈ 0.111... + 8103.08... ≈ 8103.19... + * + * Jacobian (1x13 sparse): + * Nonzeros at columns [1, 5, 9] with values above + * We'll compute and check the actual values + */ + double u_vals[13] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, + 7.0, 8.0, 9.0, 0.0, 0.0, 0.0}; + expr *x = new_variable(3, 3, 1, 13); + expr *log_node = new_log(x); + expr *exp_node = new_exp(x); + expr *add_node = new_add(log_node, exp_node); + expr *trace_node = new_trace(add_node); + + trace_node->jacobian_init(trace_node); + trace_node->forward(trace_node, u_vals); + trace_node->eval_jacobian(trace_node); + + /* Expected values: d(log(x_ii) + exp(x_ii))/dx_ii = 1/x_ii + exp(x_ii) + * At x_00 = 1: 1/1 + exp(1) = 1 + 2.718281828... + * At x_11 = 5: 1/5 + exp(5) = 0.2 + 148.413159103... + * At x_22 = 9: 1/9 + exp(9) = 0.111... + 8103.083927576... + */ + double exp_1 = 2.718281828; + double exp_5 = 148.413159103; + double exp_9 = 8103.083927576; + double expected_Ax[3] = {1.0 + exp_1, 0.2 + exp_5, 1.0 / 9.0 + exp_9}; + int expected_Ap[2] = {0, 3}; + int expected_Ai[3] = {1, 5, 9}; /* column indices (global variable indices) */ + + mu_assert("vals match count", trace_node->jacobian->nnz == 3); + mu_assert("rows fail", cmp_int_array(trace_node->jacobian->p, expected_Ap, 2)); + mu_assert("cols fail", cmp_int_array(trace_node->jacobian->i, expected_Ai, 3)); + mu_assert("vals fail", + cmp_double_array(trace_node->jacobian->x, expected_Ax, 3)); + + free_expr(trace_node); + return 0; +} diff --git a/tests/wsum_hess/test_trace.h b/tests/wsum_hess/test_trace.h new file mode 100644 index 0000000..372f8d1 --- /dev/null +++ b/tests/wsum_hess/test_trace.h @@ -0,0 +1,109 @@ +#include +#include +#include +#include + +#include "affine.h" +#include "elementwise_univariate.h" +#include "expr.h" +#include "minunit.h" +#include "test_helpers.h" + +const char *test_wsum_hess_trace_variable() +{ + /* Test weighted sum of Hessian of trace(x) where x is 3x3 variable + * x has global variable index 1 + * Total 13 variables + * x.value = [1, 2, 3, 4, 5, 6, 7, 8, 9] (column-wise) + * Weight = 2.0 + * + * trace(x) = x[0,0] + x[1,1] + x[2,2] = 1 + 5 + 9 = 15 + * Since x is linear (identity mapping), Hessian is zero. + * wsum_hess should be a 13x13 zero sparse matrix (or empty) + */ + double u_vals[13] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, + 7.0, 8.0, 9.0, 0.0, 0.0, 0.0}; + double w = 2.0; + + expr *x = new_variable(3, 3, 1, 13); + expr *trace_node = new_trace(x); + + trace_node->forward(trace_node, u_vals); + trace_node->jacobian_init(trace_node); + trace_node->wsum_hess_init(trace_node); + trace_node->eval_wsum_hess(trace_node, &w); + + /* For a linear operation (variable), Hessian is zero */ + mu_assert("wsum_hess should be empty", trace_node->wsum_hess->nnz == 0); + + mu_assert("dims correct", + trace_node->wsum_hess->m == 13 && trace_node->wsum_hess->n == 13); + + free_expr(trace_node); + return 0; +} + +const char *test_wsum_hess_trace_composite() +{ + /* Test weighted sum of Hessian of trace(log(x) + exp(x)) where x is 3x3 variable + * x has global variable index 1 + * Total 13 variables + * x.value = [1, 2, 3, 4, 5, 6, 7, 8, 9] (column-wise) + * Weight = 2.0 + * + * f(x) = trace(log(x) + exp(x)) + * d/dx (log(x) + exp(x)) = 1/x + exp(x) + * d²/dx² (log(x) + exp(x)) = -1/x² + exp(x) + * + * For diagonal elements: + * x[0,0] = 1: d²/dx² = -1/1² + exp(1) = -1 + 2.718... ≈ 1.718... + * x[1,1] = 5: d²/dx² = -1/25 + exp(5) = -0.04 + 148.413... ≈ 148.373... + * x[2,2] = 9: d²/dx² = -1/81 + exp(9) ≈ -0.0123... + 8103.08... ≈ 8103.07... + * + * wsum_hess (13x13) has nonzeros only at diagonal positions [1,1], [5,5], [9,9] + * with values = weight * second derivatives = 2 * values above + */ + double u_vals[13] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, + 7.0, 8.0, 9.0, 0.0, 0.0, 0.0}; + double w = 2.0; + + expr *x = new_variable(3, 3, 1, 13); + expr *log_node = new_log(x); + expr *exp_node = new_exp(x); + expr *add_node = new_add(log_node, exp_node); + expr *trace_node = new_trace(add_node); + + trace_node->forward(trace_node, u_vals); + trace_node->jacobian_init(trace_node); + trace_node->wsum_hess_init(trace_node); + trace_node->eval_wsum_hess(trace_node, &w); + + /* Expected diagonal Hessian values at indices [1,1], [5,5], [9,9] + * d²(log(x_ii) + exp(x_ii))/dx_ii² = -1/x_ii² + exp(x_ii) + * At x_00 = 1: -1 + exp(1) = -1 + 2.718281828... + * At x_11 = 5: -1/25 + exp(5) = -0.04 + 148.413159103... + * At x_22 = 9: -1/81 + exp(9) ≈ -0.01234... + 8103.083927576... + * + * wsum_hess values = 2 * second derivatives + */ + double exp_1 = 2.718281828; + double exp_5 = 148.413159103; + double exp_9 = 8103.083927576; + + double d2_1 = -1.0 + exp_1; + double d2_5 = -0.04 + exp_5; + double d2_9 = -1.0 / 81.0 + exp_9; + + double expected_Ax[3] = {w * d2_1, w * d2_5, w * d2_9}; + int expected_Ap[14] = {0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3}; + int expected_Ai[3] = {1, 5, 9}; + + mu_assert("nnz wrong", trace_node->wsum_hess->nnz == 3); + mu_assert("rows fail", cmp_int_array(trace_node->wsum_hess->p, expected_Ap, 14)); + mu_assert("vals match", + cmp_double_array(trace_node->wsum_hess->x, expected_Ax, 3)); + mu_assert("cols match", cmp_int_array(trace_node->wsum_hess->i, expected_Ai, 3)); + + free_expr(trace_node); + return 0; +}