From 984f75225976e49b1b51bda1c5a606e0b78068bd Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 13 Feb 2026 10:23:04 -0800 Subject: [PATCH 1/3] refactored and moved things from CSR_Matrix --- include/utils/CSR_Matrix.h | 104 +-- include/utils/CSR_sum.h | 89 ++ .../{linalg.h => linalg_sparse_matmuls.h} | 0 src/affine/add.c | 1 + src/affine/hstack.c | 1 + src/affine/sum.c | 1 + src/affine/trace.c | 1 + src/bivariate/left_matmul.c | 2 +- src/bivariate/multiply.c | 1 + src/bivariate/right_matmul.c | 2 +- src/problem.c | 1 + src/utils/CSR_Matrix.c | 738 ----------------- src/utils/CSR_sum.c | 761 ++++++++++++++++++ .../{linalg.c => linalg_sparse_matmuls.c} | 0 tests/all_tests.c | 2 +- tests/utils/test_csr_matrix.h | 1 + ..._linalg.h => test_linalg_sparse_matmuls.h} | 2 +- 17 files changed, 867 insertions(+), 840 deletions(-) create mode 100644 include/utils/CSR_sum.h rename include/utils/{linalg.h => linalg_sparse_matmuls.h} (100%) create mode 100644 src/utils/CSR_sum.c rename src/utils/{linalg.c => linalg_sparse_matmuls.c} (100%) rename tests/utils/{test_linalg.h => test_linalg_sparse_matmuls.h} (99%) diff --git a/include/utils/CSR_Matrix.h b/include/utils/CSR_Matrix.h index 8c3f523..00d75ee 100644 --- a/include/utils/CSR_Matrix.h +++ b/include/utils/CSR_Matrix.h @@ -25,16 +25,17 @@ typedef struct CSR_Matrix int nnz; } CSR_Matrix; -/* Allocate a new CSR matrix with given dimensions and nnz */ +/* constructors and destructors */ CSR_Matrix *new_csr_matrix(int m, int n, int nnz); CSR_Matrix *new_csr(const CSR_Matrix *A); - -/* Free a CSR matrix */ void free_csr_matrix(CSR_Matrix *matrix); - -/* Copy CSR matrix A to C */ void copy_csr_matrix(const CSR_Matrix *A, CSR_Matrix *C); +/* transpose functionality (iwork must be of size A->n) */ +CSR_Matrix *transpose(const CSR_Matrix *A, int *iwork); +CSR_Matrix *AT_alloc(const CSR_Matrix *A, int *iwork); +void AT_fill_values(const CSR_Matrix *A, CSR_Matrix *AT, int *iwork); + /* Build block-diagonal repeat A_blk = I_p kron A. Returns newly allocated CSR * matrix of size (p*A->m) x (p*A->n) with nnz = p*A->nnz. */ CSR_Matrix *block_diag_repeat_csr(const CSR_Matrix *A, int p); @@ -64,92 +65,6 @@ void csr_insert_value(CSR_Matrix *A, int col_idx, double value); void diag_csr_mult(const double *d, const CSR_Matrix *A, CSR_Matrix *C); void diag_csr_mult_fill_values(const double *d, const CSR_Matrix *A, CSR_Matrix *C); -/* Compute C = A + B where A, B, C are CSR matrices - * A and B must have same dimensions - * C must be pre-allocated with sufficient nnz capacity. - * C must be different from A and B */ -void sum_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C); -/* Compute sparsity pattern of A + B where A, B, C are CSR matrices. - * Fills C->p, C->i, and C->nnz; does not touch C->x. */ -void sum_csr_matrices_fill_sparsity(const CSR_Matrix *A, const CSR_Matrix *B, - CSR_Matrix *C); - -/* Fill only the values of C = A + B, assuming C's sparsity pattern (p and i) - * is already filled and matches the union of A and B per row. Does not modify - * C->p, C->i, or C->nnz. */ -void sum_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, - CSR_Matrix *C); - -/* Compute C = diag(d1) * A + diag(d2) * B where A, B, C are CSR matrices */ -void sum_scaled_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C, - const double *d1, const double *d2); - -/* Fill only the values of C = diag(d1) * A + diag(d2) * B, assuming C's sparsity - * pattern (p and i) is already filled and matches the union of A and B per row. - * Does not modify C->p, C->i, or C->nnz. */ -void sum_scaled_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, - CSR_Matrix *C, const double *d1, - const double *d2); - -/* Sum all rows of A into a single row matrix C */ -void sum_all_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, - struct int_double_pair *pairs); - -/* iwork must have size max(C->n, A->nnz), and idx_map must have size A->nnz. */ -void sum_all_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, CSR_Matrix *C, - int *iwork, int *idx_map); - -/* Fill values of summed rows using precomputed idx_map and sparsity of C */ -// void sum_all_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, -// const int *idx_map); - -/* Fill accumulator for summing rows using precomputed idx_map for each nnz of A. - Must memset accumulator to zero before calling. */ -void idx_map_accumulator(const CSR_Matrix *A, const int *idx_map, - double *accumulator); -void idx_map_accumulator_with_spacing(const CSR_Matrix *A, const int *idx_map, - double *accumulator, int spacing); - -/* Sum blocks of rows of A into a matrix C */ -void sum_block_of_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, - struct int_double_pair *pairs, int row_block_size); - -/* Build sparsity and index map for summing blocks of rows. - * iwork must have size max(A->n, A->nnz), and idx_map must have size A->nnz. */ -void sum_block_of_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, - CSR_Matrix *C, - int row_block_size, int *iwork, - int *idx_map); - -/* Fill values for summing blocks of rows using precomputed idx_map */ -// void sum_block_of_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, -// const int *idx_map); - -/* Sum evenly spaced rows of A into a matrix C */ -void sum_evenly_spaced_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, - struct int_double_pair *pairs, int row_spacing); - -/* Build sparsity and index map for summing evenly spaced rows. - * iwork must have size max(A->n, A->nnz), and idx_map must have size A->nnz. */ -void sum_evenly_spaced_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, - CSR_Matrix *C, - int row_spacing, - int *iwork, int *idx_map); - -/* Fill values for summing evenly spaced rows using precomputed idx_map */ -// void sum_evenly_spaced_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, -// const int *idx_map); - -/* Sum evenly spaced rows of A starting at offset into a row matrix C */ -void sum_spaced_rows_into_row_csr(const CSR_Matrix *A, CSR_Matrix *C, - struct int_double_pair *pairs, int offset, - int spacing); -/* Fills the sparsity and index map for summing spaced rows into a row matrix */ -void sum_spaced_rows_into_row_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, - CSR_Matrix *C, - int spacing, int *iwork, - int *idx_map); - /* Count number of columns with nonzero entries */ int count_nonzero_cols(const CSR_Matrix *A, bool *col_nz); @@ -158,13 +73,6 @@ void insert_idx(int idx, int *arr, int len); double csr_get_value(const CSR_Matrix *A, int row, int col); -/* iwork must be of size A->n*/ -CSR_Matrix *transpose(const CSR_Matrix *A, int *iwork); -CSR_Matrix *AT_alloc(const CSR_Matrix *A, int *iwork); - -/* Fill values of A^T given sparsity pattern is already computed */ -void AT_fill_values(const CSR_Matrix *A, CSR_Matrix *AT, int *iwork); - /* Expand symmetric CSR matrix A to full matrix C. A is assumed to store only upper triangle. C must be pre-allocated with sufficient nnz */ void symmetrize_csr(const int *Ap, const int *Ai, int m, CSR_Matrix *C); diff --git a/include/utils/CSR_sum.h b/include/utils/CSR_sum.h new file mode 100644 index 0000000..fcd8038 --- /dev/null +++ b/include/utils/CSR_sum.h @@ -0,0 +1,89 @@ +#ifndef CSR_SUM_H +#define CSR_SUM_H + +#include "utils/CSR_Matrix.h" + +/* forward declaration */ +struct int_double_pair; + +/* Compute C = A + B where A, B, C are CSR matrices + * A and B must have same dimensions + * C must be pre-allocated with sufficient nnz capacity. + * C must be different from A and B */ +void sum_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C); + +/* Compute sparsity pattern of A + B where A, B, C are CSR matrices. + * Fills C->p, C->i, and C->nnz; does not touch C->x. */ +void sum_csr_matrices_fill_sparsity(const CSR_Matrix *A, const CSR_Matrix *B, + CSR_Matrix *C); + +/* Fill only the values of C = A + B, assuming C's sparsity pattern (p and i) + * is already filled and matches the union of A and B per row. Does not modify + * C->p, C->i, or C->nnz. */ +void sum_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, + CSR_Matrix *C); + +/* Compute C = diag(d1) * A + diag(d2) * B where A, B, C are CSR matrices */ +void sum_scaled_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C, + const double *d1, const double *d2); + +/* Fill only the values of C = diag(d1) * A + diag(d2) * B, assuming C's sparsity + * pattern (p and i) is already filled and matches the union of A and B per row. + * Does not modify C->p, C->i, or C->nnz. */ +void sum_scaled_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, + CSR_Matrix *C, const double *d1, + const double *d2); + +/* Sum all rows of A into a single row matrix C */ +void sum_all_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, + struct int_double_pair *pairs); + +/* iwork must have size max(C->n, A->nnz), and idx_map must have size A->nnz. */ +void sum_all_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, CSR_Matrix *C, + int *iwork, int *idx_map); + +/* Fill values of summed rows using precomputed idx_map and sparsity of C */ +// void sum_all_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, +// const int *idx_map); + +/* Fill accumulator for summing rows using precomputed idx_map for each nnz of A. + Must memset accumulator to zero before calling. */ +void idx_map_accumulator(const CSR_Matrix *A, const int *idx_map, + double *accumulator); +void idx_map_accumulator_with_spacing(const CSR_Matrix *A, const int *idx_map, + double *accumulator, int spacing); + +/* Sum blocks of rows of A into a matrix C */ +void sum_block_of_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, + struct int_double_pair *pairs, int row_block_size); + +/* Build sparsity and index map for summing blocks of rows. + * iwork must have size max(A->n, A->nnz), and idx_map must have size A->nnz. */ +void sum_block_of_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, + CSR_Matrix *C, + int row_block_size, int *iwork, + int *idx_map); + +/* Sum evenly spaced rows of A into a matrix C */ +void sum_evenly_spaced_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, + struct int_double_pair *pairs, int row_spacing); + +/* Build sparsity and index map for summing evenly spaced rows. + * iwork must have size max(A->n, A->nnz), and idx_map must have size A->nnz. */ +void sum_evenly_spaced_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, + CSR_Matrix *C, + int row_spacing, + int *iwork, int *idx_map); + +/* Sum evenly spaced rows of A starting at offset into a row matrix C */ +void sum_spaced_rows_into_row_csr(const CSR_Matrix *A, CSR_Matrix *C, + struct int_double_pair *pairs, int offset, + int spacing); + +/* Fills the sparsity and index map for summing spaced rows into a row matrix */ +void sum_spaced_rows_into_row_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, + CSR_Matrix *C, + int spacing, int *iwork, + int *idx_map); + +#endif /* CSR_SUM_H */ diff --git a/include/utils/linalg.h b/include/utils/linalg_sparse_matmuls.h similarity index 100% rename from include/utils/linalg.h rename to include/utils/linalg_sparse_matmuls.h diff --git a/src/affine/add.c b/src/affine/add.c index 2d1542f..ce7b4b1 100644 --- a/src/affine/add.c +++ b/src/affine/add.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "affine.h" +#include "utils/CSR_sum.h" #include #include #include diff --git a/src/affine/hstack.c b/src/affine/hstack.c index d1ab63c..e2235d6 100644 --- a/src/affine/hstack.c +++ b/src/affine/hstack.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "affine.h" +#include "utils/CSR_sum.h" #include #include #include diff --git a/src/affine/sum.c b/src/affine/sum.c index 875cb66..d1fec14 100644 --- a/src/affine/sum.c +++ b/src/affine/sum.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "affine.h" +#include "utils/CSR_sum.h" #include "utils/int_double_pair.h" #include "utils/mini_numpy.h" #include "utils/utils.h" diff --git a/src/affine/trace.c b/src/affine/trace.c index 7095a77..1732c40 100644 --- a/src/affine/trace.c +++ b/src/affine/trace.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "affine.h" +#include "utils/CSR_sum.h" #include "utils/int_double_pair.h" #include "utils/utils.h" #include diff --git a/src/bivariate/left_matmul.c b/src/bivariate/left_matmul.c index 7b1baee..31d8251 100644 --- a/src/bivariate/left_matmul.c +++ b/src/bivariate/left_matmul.c @@ -18,7 +18,7 @@ #include "bivariate.h" #include "subexpr.h" #include "utils/Timer.h" -#include "utils/linalg.h" +#include "utils/linalg_sparse_matmuls.h" #include #include #include diff --git a/src/bivariate/multiply.c b/src/bivariate/multiply.c index 70a6187..d8836ae 100644 --- a/src/bivariate/multiply.c +++ b/src/bivariate/multiply.c @@ -17,6 +17,7 @@ */ #include "bivariate.h" #include "subexpr.h" +#include "utils/CSR_sum.h" #include #include #include diff --git a/src/bivariate/right_matmul.c b/src/bivariate/right_matmul.c index 81cb317..9d8a4a7 100644 --- a/src/bivariate/right_matmul.c +++ b/src/bivariate/right_matmul.c @@ -17,7 +17,7 @@ */ #include "bivariate.h" #include "subexpr.h" -#include "utils/linalg.h" +#include "utils/linalg_sparse_matmuls.h" #include /* This file implements the atom 'right_matmul' corresponding to the operation y = diff --git a/src/problem.c b/src/problem.c index aae28bc..3413858 100644 --- a/src/problem.c +++ b/src/problem.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "problem.h" +#include "utils/CSR_sum.h" #include "utils/utils.h" #include #include diff --git a/src/utils/CSR_Matrix.c b/src/utils/CSR_Matrix.c index fe6688a..513a457 100644 --- a/src/utils/CSR_Matrix.c +++ b/src/utils/CSR_Matrix.c @@ -220,625 +220,6 @@ void diag_csr_mult_fill_values(const double *d, const CSR_Matrix *A, CSR_Matrix } } -void sum_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C) -{ - /* A and B must be different from C */ - assert(A != C && B != C); - - C->nnz = 0; - - for (int row = 0; row < A->m; row++) - { - int a_ptr = A->p[row]; - int a_end = A->p[row + 1]; - int b_ptr = B->p[row]; - int b_end = B->p[row + 1]; - C->p[row] = C->nnz; - - /* Merge while both have elements */ - while (a_ptr < a_end && b_ptr < b_end) - { - if (A->i[a_ptr] < B->i[b_ptr]) - { - C->i[C->nnz] = A->i[a_ptr]; - C->x[C->nnz] = A->x[a_ptr]; - a_ptr++; - } - else if (B->i[b_ptr] < A->i[a_ptr]) - { - C->i[C->nnz] = B->i[b_ptr]; - C->x[C->nnz] = B->x[b_ptr]; - b_ptr++; - } - else - { - C->i[C->nnz] = A->i[a_ptr]; - C->x[C->nnz] = A->x[a_ptr] + B->x[b_ptr]; - a_ptr++; - b_ptr++; - } - C->nnz++; - } - - /* Copy remaining elements from A */ - if (a_ptr < a_end) - { - int a_remaining = a_end - a_ptr; - memcpy(C->i + C->nnz, A->i + a_ptr, a_remaining * sizeof(int)); - memcpy(C->x + C->nnz, A->x + a_ptr, a_remaining * sizeof(double)); - C->nnz += a_remaining; - } - - /* Copy remaining elements from B */ - if (b_ptr < b_end) - { - int b_remaining = b_end - b_ptr; - memcpy(C->i + C->nnz, B->i + b_ptr, b_remaining * sizeof(int)); - memcpy(C->x + C->nnz, B->x + b_ptr, b_remaining * sizeof(double)); - C->nnz += b_remaining; - } - } - - C->p[A->m] = C->nnz; -} - -void sum_csr_matrices_fill_sparsity(const CSR_Matrix *A, const CSR_Matrix *B, - CSR_Matrix *C) -{ - /* A and B must be different from C */ - assert(A != C && B != C); - - C->nnz = 0; - - for (int row = 0; row < A->m; row++) - { - int a_ptr = A->p[row]; - int a_end = A->p[row + 1]; - int b_ptr = B->p[row]; - int b_end = B->p[row + 1]; - C->p[row] = C->nnz; - - /* Merge while both have elements (only column indices) */ - while (a_ptr < a_end && b_ptr < b_end) - { - if (A->i[a_ptr] < B->i[b_ptr]) - { - C->i[C->nnz] = A->i[a_ptr]; - a_ptr++; - } - else if (B->i[b_ptr] < A->i[a_ptr]) - { - C->i[C->nnz] = B->i[b_ptr]; - b_ptr++; - } - else - { - C->i[C->nnz] = A->i[a_ptr]; - a_ptr++; - b_ptr++; - } - C->nnz++; - } - - /* Copy remaining elements from A */ - if (a_ptr < a_end) - { - int a_remaining = a_end - a_ptr; - memcpy(C->i + C->nnz, A->i + a_ptr, a_remaining * sizeof(int)); - C->nnz += a_remaining; - } - - /* Copy remaining elements from B */ - if (b_ptr < b_end) - { - int b_remaining = b_end - b_ptr; - memcpy(C->i + C->nnz, B->i + b_ptr, b_remaining * sizeof(int)); - C->nnz += b_remaining; - } - } - - C->p[A->m] = C->nnz; -} - -void sum_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, - CSR_Matrix *C) -{ - /* Assumes C->p and C->i already contain the sparsity pattern of A+B. - Fills only C->x accordingly. */ - for (int row = 0; row < A->m; row++) - { - int a_ptr = A->p[row]; - int a_end = A->p[row + 1]; - int b_ptr = B->p[row]; - int b_end = B->p[row + 1]; - - for (int c_ptr = C->p[row]; c_ptr < C->p[row + 1]; c_ptr++) - { - int col = C->i[c_ptr]; - double val = 0.0; - - if (a_ptr < a_end && A->i[a_ptr] == col) - { - val += A->x[a_ptr]; - a_ptr++; - } - - if (b_ptr < b_end && B->i[b_ptr] == col) - { - val += B->x[b_ptr]; - b_ptr++; - } - C->x[c_ptr] = val; - } - } -} - -void sum_scaled_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, - CSR_Matrix *C, const double *d1, - const double *d2) -{ - /* Assumes C->p and C->i already contain the sparsity pattern of A+B. - Fills only C->x accordingly with scaling. */ - for (int row = 0; row < A->m; row++) - { - int a_ptr = A->p[row]; - int a_end = A->p[row + 1]; - int b_ptr = B->p[row]; - int b_end = B->p[row + 1]; - - for (int c_ptr = C->p[row]; c_ptr < C->p[row + 1]; c_ptr++) - { - int col = C->i[c_ptr]; - double val = 0.0; - - if (a_ptr < a_end && A->i[a_ptr] == col) - { - val += d1[row] * A->x[a_ptr]; - a_ptr++; - } - - if (b_ptr < b_end && B->i[b_ptr] == col) - { - val += d2[row] * B->x[b_ptr]; - b_ptr++; - } - C->x[c_ptr] = val; - } - } -} - -void sum_scaled_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C, - const double *d1, const double *d2) -{ - C->nnz = 0; - - for (int row = 0; row < A->m; row++) - { - int a_ptr = A->p[row]; - int a_end = A->p[row + 1]; - int b_ptr = B->p[row]; - int b_end = B->p[row + 1]; - C->p[row] = C->nnz; - - /* Merge while both have elements */ - while (a_ptr < a_end && b_ptr < b_end) - { - if (A->i[a_ptr] < B->i[b_ptr]) - { - C->i[C->nnz] = A->i[a_ptr]; - C->x[C->nnz] = d1[row] * A->x[a_ptr]; - a_ptr++; - } - else if (B->i[b_ptr] < A->i[a_ptr]) - { - C->i[C->nnz] = B->i[b_ptr]; - C->x[C->nnz] = d2[row] * B->x[b_ptr]; - b_ptr++; - } - else - { - C->i[C->nnz] = A->i[a_ptr]; - C->x[C->nnz] = d1[row] * A->x[a_ptr] + d2[row] * B->x[b_ptr]; - a_ptr++; - b_ptr++; - } - C->nnz++; - } - - /* Copy remaining elements from A */ - if (a_ptr < a_end) - { - int a_remaining = a_end - a_ptr; - for (int j = 0; j < a_remaining; j++) - { - C->i[C->nnz + j] = A->i[a_ptr + j]; - C->x[C->nnz + j] = d1[row] * A->x[a_ptr + j]; - } - C->nnz += a_remaining; - } - - /* Copy remaining elements from B */ - if (b_ptr < b_end) - { - int b_remaining = b_end - b_ptr; - for (int j = 0; j < b_remaining; j++) - { - C->i[C->nnz + j] = B->i[b_ptr + j]; - C->x[C->nnz + j] = d2[row] * B->x[b_ptr + j]; - } - C->nnz += b_remaining; - } - } - - C->p[A->m] = C->nnz; -} - -void sum_all_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, int_double_pair *pairs) -{ - assert(C->m == 1); - C->n = A->n; - C->p[0] = 0; - - /* copy A's values and column indices into pairs */ - set_int_double_pair_array(pairs, A->i, A->x, A->nnz); - - /* sort so columns are in order */ - sort_int_double_pair_array(pairs, A->nnz); - - /* merge entries with same columns and insert result in C */ - C->nnz = 0; - for (int j = 0; j < A->nnz;) - { - int current_col = pairs[j].col; - double sum_val = 0.0; - - /* sum all values with the same column */ - while (j < A->nnz && pairs[j].col == current_col) - { - sum_val += pairs[j].val; - j++; - } - - /* insert into C */ - C->i[C->nnz] = current_col; - C->x[C->nnz] = sum_val; - C->nnz++; - } - - C->p[1] = C->nnz; -} - -void sum_block_of_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, - int_double_pair *pairs, int row_block_size) -{ - assert(A->m % row_block_size == 0); - int n_blocks = A->m / row_block_size; - assert(C->m == n_blocks); - C->n = A->n; - C->p[0] = 0; - - C->nnz = 0; - for (int block = 0; block < n_blocks; block++) - { - int start_row = block * row_block_size; - int end_row = start_row + row_block_size; - - /* copy block rows' values and column indices into pairs */ - int pair_idx = 0; - for (int row = start_row; row < end_row; row++) - { - for (int j = A->p[row]; j < A->p[row + 1]; j++) - { - pairs[pair_idx].col = A->i[j]; - pairs[pair_idx].val = A->x[j]; - pair_idx++; - } - } - - /* sort so columns are in order */ - sort_int_double_pair_array(pairs, pair_idx); - - /* merge entries with same columns and insert result in C */ - for (int j = 0; j < pair_idx;) - { - int current_col = pairs[j].col; - double sum_val = 0.0; - - /* sum all values with the same column */ - while (j < pair_idx && pairs[j].col == current_col) - { - sum_val += pairs[j].val; - j++; - } - - /* insert into C */ - C->i[C->nnz] = current_col; - C->x[C->nnz] = sum_val; - C->nnz++; - } - - C->p[block + 1] = C->nnz; - } -} - -/* iwork must have size max(A->n, A->nnz), and idx_map must have size A->nnz */ -void sum_block_of_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, - CSR_Matrix *C, - int row_block_size, int *iwork, - int *idx_map) -{ - assert(A->m % row_block_size == 0); - int n_blocks = A->m / row_block_size; - assert(C->m == n_blocks); - - C->n = A->n; - C->p[0] = 0; - C->nnz = 0; - - int *cols = iwork; - int *col_to_pos = iwork; - - for (int block = 0; block < n_blocks; block++) - { - int start_row = block * row_block_size; - int end_row = start_row + row_block_size; - - // ----------------------------------------------------------------- - // Build sparsity pattern of the row resulting from summing - // the block of rows from A - // ----------------------------------------------------------------- - C->p[block] = C->nnz; - int count = 0; - for (int row = start_row; row < end_row; row++) - { - for (int j = A->p[row]; j < A->p[row + 1]; j++) - { - cols[count++] = A->i[j]; - } - } - - /* Sort columns and write unique pattern into C->i */ - sort_int_array(cols, count); - - int unique_nnz = 0; - int prev_col = -1; - for (int t = 0; t < count; t++) - { - int col = cols[t]; - if (t == 0 || col != prev_col) - { - C->i[C->nnz + unique_nnz] = col; - prev_col = col; - unique_nnz++; - } - } - - C->nnz += unique_nnz; - C->p[block + 1] = C->nnz; - - // ----------------------------------------------------------------- - // Build idx_map for all entries in this block - // ----------------------------------------------------------------- - int row_start = C->p[block]; - for (int idx = 0; idx < unique_nnz; idx++) - { - col_to_pos[C->i[row_start + idx]] = row_start + idx; - } - - for (int row = start_row; row < end_row; row++) - { - for (int j = A->p[row]; j < A->p[row + 1]; j++) - { - idx_map[j] = col_to_pos[A->i[j]]; - } - } - } -} - -/* -void sum_block_of_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, - const int *idx_map) -{ - memset(C->x, 0, C->nnz * sizeof(double)); - - for (int row = 0; row < A->m; row++) - { - for (int j = A->p[row]; j < A->p[row + 1]; j++) - { - C->x[idx_map[j]] += A->x[j]; - } - } -} -*/ - -void sum_evenly_spaced_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, - struct int_double_pair *pairs, int row_spacing) -{ - assert(C->m == row_spacing); - C->n = A->n; - C->p[0] = 0; - C->nnz = 0; - - for (int C_row = 0; C_row < C->m; C_row++) - { - /* copy evenly spaced rows into pairs */ - int pair_idx = 0; - for (int row = C_row; row < A->m; row += row_spacing) - { - for (int j = A->p[row]; j < A->p[row + 1]; j++) - { - pairs[pair_idx].col = A->i[j]; - pairs[pair_idx].val = A->x[j]; - pair_idx++; - } - } - - /* sort so columns are in order */ - sort_int_double_pair_array(pairs, pair_idx); - - /* merge entries with same columns and insert result in C */ - for (int j = 0; j < pair_idx;) - { - int current_col = pairs[j].col; - double sum_val = 0.0; - - /* sum all values with the same column */ - while (j < pair_idx && pairs[j].col == current_col) - { - sum_val += pairs[j].val; - j++; - } - - /* insert into C */ - C->i[C->nnz] = current_col; - C->x[C->nnz] = sum_val; - C->nnz++; - } - - C->p[C_row + 1] = C->nnz; - } -} - -/* iwork must have size max(A->n, A->nnz), and idx_map must have size A->nnz */ -void sum_evenly_spaced_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, - CSR_Matrix *C, - int row_spacing, - int *iwork, int *idx_map) -{ - assert(C->m == row_spacing); - C->n = A->n; - C->p[0] = 0; - C->nnz = 0; - - int *cols = iwork; - int *col_to_pos = iwork; - - for (int C_row = 0; C_row < C->m; C_row++) - { - // ----------------------------------------------------------------- - // Build sparsity pattern of the row resulting from summing - // evenly spaced rows from A - // ----------------------------------------------------------------- - C->p[C_row] = C->nnz; - int count = 0; - for (int row = C_row; row < A->m; row += row_spacing) - { - for (int j = A->p[row]; j < A->p[row + 1]; j++) - { - cols[count++] = A->i[j]; - } - } - - /* Sort columns and write unique pattern into C->i */ - sort_int_array(cols, count); - - int unique_nnz = 0; - int prev_col = -1; - for (int t = 0; t < count; t++) - { - int col = cols[t]; - if (t == 0 || col != prev_col) - { - C->i[C->nnz + unique_nnz] = col; - prev_col = col; - unique_nnz++; - } - } - - C->nnz += unique_nnz; - C->p[C_row + 1] = C->nnz; - - // ----------------------------------------------------------------- - // Build idx_map for all entries in evenly spaced rows - // ----------------------------------------------------------------- - int row_start = C->p[C_row]; - for (int idx = 0; idx < unique_nnz; idx++) - { - col_to_pos[C->i[row_start + idx]] = row_start + idx; - } - - for (int row = C_row; row < A->m; row += row_spacing) - { - for (int j = A->p[row]; j < A->p[row + 1]; j++) - { - idx_map[j] = col_to_pos[A->i[j]]; - } - } - } -} - -void idx_map_accumulator(const CSR_Matrix *A, const int *idx_map, - double *accumulator) -{ - /* don't forget to initialze accumulator to 0 before calling this */ - for (int row = 0; row < A->m; row++) - { - for (int j = A->p[row]; j < A->p[row + 1]; j++) - { - accumulator[idx_map[j]] += A->x[j]; - } - } -} - -void idx_map_accumulator_with_spacing(const CSR_Matrix *A, const int *idx_map, - double *accumulator, int spacing) -{ - /* don't forget to initialze accumulator to 0 before calling this */ - for (int row = 0; row < A->m; row += spacing) - { - for (int j = A->p[row]; j < A->p[row + 1]; j++) - { - accumulator[idx_map[j]] += A->x[j]; - } - } -} - -void sum_spaced_rows_into_row_csr(const CSR_Matrix *A, CSR_Matrix *C, - struct int_double_pair *pairs, int offset, - int spacing) -{ - assert(C->m == 1); - C->n = A->n; - C->p[0] = 0; - C->nnz = 0; - - /* copy evenly spaced rows starting at offset into pairs */ - int pair_idx = 0; - for (int row = offset; row < A->m; row += spacing) - { - for (int j = A->p[row]; j < A->p[row + 1]; j++) - { - pairs[pair_idx].col = A->i[j]; - pairs[pair_idx].val = A->x[j]; - pair_idx++; - } - } - - /* sort so columns are in order */ - sort_int_double_pair_array(pairs, pair_idx); - - /* merge entries with same columns and insert result in C */ - for (int j = 0; j < pair_idx;) - { - int current_col = pairs[j].col; - double sum_val = 0.0; - - /* sum all values with the same column */ - while (j < pair_idx && pairs[j].col == current_col) - { - sum_val += pairs[j].val; - j++; - } - - /* insert into C */ - C->i[C->nnz] = current_col; - C->x[C->nnz] = sum_val; - C->nnz++; - } - - C->p[1] = C->nnz; -} - void csr_insert_value(CSR_Matrix *A, int col_idx, double value) { assert(A->m == 1); @@ -1069,122 +450,3 @@ void symmetrize_csr(const int *Ap, const int *Ai, int m, CSR_Matrix *C) free(counts); } - -void sum_all_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, CSR_Matrix *C, - int *iwork, int *idx_map) -{ - // ------------------------------------------------------------------- - // Build sparsity pattern of the summed row - // ------------------------------------------------------------------- - int *cols = iwork; - memcpy(cols, A->i, A->nnz * sizeof(int)); - sort_int_array(cols, A->nnz); - - int unique_nnz = 0; - int prev_col = -1; - for (int j = 0; j < A->nnz; j++) - { - if (cols[j] != prev_col) - { - C->i[unique_nnz] = cols[j]; - prev_col = cols[j]; - unique_nnz++; - } - } - - C->p[0] = 0; - C->p[1] = unique_nnz; - C->nnz = unique_nnz; - - // ------------------------------------------------------------------- - // Map child values to summed-row positions. col_to_pos maps - // column indices to positions in C's row. - // ------------------------------------------------------------------- - int *col_to_pos = iwork; - for (int idx = 0; idx < unique_nnz; idx++) - { - col_to_pos[C->i[idx]] = idx; - } - - for (int i = 0; i < A->m; i++) - { - for (int j = A->p[i]; j < A->p[i + 1]; j++) - { - idx_map[j] = col_to_pos[A->i[j]]; - } - } -} - -/* -void sum_all_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, - const int *idx_map) -{ - memset(C->x, 0, C->nnz * sizeof(double)); - - for (int row = 0; row < A->m; row++) - { - for (int j = A->p[row]; j < A->p[row + 1]; j++) - { - C->x[idx_map[j]] += A->x[j]; - } - } -} -*/ - -/* - * Sums evenly spaced rows from A into a single row in C and fills an index map. - * A: input CSR matrix - * C: output CSR matrix (must have m=1) - * spacing: row spacing - * iwork: workspace of size at least max(A->n, A->nnz) - * idx_map: output index map, size at least A->nnz - */ -void sum_spaced_rows_into_row_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, - CSR_Matrix *C, - int spacing, int *iwork, - int *idx_map) -{ - assert(C->m == 1); - C->n = A->n; - - /* gather all column indices from the spaced rows */ - int count = 0; - for (int row = 0; row < A->m; row += spacing) - { - int len = A->p[row + 1] - A->p[row]; - memcpy(iwork + count, A->i + A->p[row], len * sizeof(int)); - count += len; - } - - /* fill sparsity pattern */ - sort_int_array(iwork, count); - int unique_nnz = 0; - int prev_col = -1; - for (int i = 0; i < count; i++) - { - int col = iwork[i]; - if (col != prev_col) - { - C->i[unique_nnz++] = col; - prev_col = col; - } - } - C->nnz = unique_nnz; - C->p[0] = 0; - C->p[1] = C->nnz; - - /* fill idx_map */ - int *col_to_pos = iwork; - for (int idx = 0; idx < unique_nnz; idx++) - { - col_to_pos[C->i[idx]] = idx; - } - - for (int row = 0; row < A->m; row += spacing) - { - for (int j = A->p[row]; j < A->p[row + 1]; j++) - { - idx_map[j] = col_to_pos[A->i[j]]; - } - } -} diff --git a/src/utils/CSR_sum.c b/src/utils/CSR_sum.c new file mode 100644 index 0000000..fd0a810 --- /dev/null +++ b/src/utils/CSR_sum.c @@ -0,0 +1,761 @@ +/* + * Copyright 2026 Daniel Cederberg and William Zhang + * + * This file is part of the DNLP-differentiation-engine project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "utils/CSR_sum.h" +#include "utils/CSR_Matrix.h" +#include "utils/int_double_pair.h" +#include "utils/utils.h" +#include +#include + +void sum_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C) +{ + /* A and B must be different from C */ + assert(A != C && B != C); + + C->nnz = 0; + + for (int row = 0; row < A->m; row++) + { + int a_ptr = A->p[row]; + int a_end = A->p[row + 1]; + int b_ptr = B->p[row]; + int b_end = B->p[row + 1]; + C->p[row] = C->nnz; + + /* Merge while both have elements */ + while (a_ptr < a_end && b_ptr < b_end) + { + if (A->i[a_ptr] < B->i[b_ptr]) + { + C->i[C->nnz] = A->i[a_ptr]; + C->x[C->nnz] = A->x[a_ptr]; + a_ptr++; + } + else if (B->i[b_ptr] < A->i[a_ptr]) + { + C->i[C->nnz] = B->i[b_ptr]; + C->x[C->nnz] = B->x[b_ptr]; + b_ptr++; + } + else + { + C->i[C->nnz] = A->i[a_ptr]; + C->x[C->nnz] = A->x[a_ptr] + B->x[b_ptr]; + a_ptr++; + b_ptr++; + } + C->nnz++; + } + + /* Copy remaining elements from A */ + if (a_ptr < a_end) + { + int a_remaining = a_end - a_ptr; + memcpy(C->i + C->nnz, A->i + a_ptr, a_remaining * sizeof(int)); + memcpy(C->x + C->nnz, A->x + a_ptr, a_remaining * sizeof(double)); + C->nnz += a_remaining; + } + + /* Copy remaining elements from B */ + if (b_ptr < b_end) + { + int b_remaining = b_end - b_ptr; + memcpy(C->i + C->nnz, B->i + b_ptr, b_remaining * sizeof(int)); + memcpy(C->x + C->nnz, B->x + b_ptr, b_remaining * sizeof(double)); + C->nnz += b_remaining; + } + } + + C->p[A->m] = C->nnz; +} + +void sum_csr_matrices_fill_sparsity(const CSR_Matrix *A, const CSR_Matrix *B, + CSR_Matrix *C) +{ + /* A and B must be different from C */ + assert(A != C && B != C); + + C->nnz = 0; + + for (int row = 0; row < A->m; row++) + { + int a_ptr = A->p[row]; + int a_end = A->p[row + 1]; + int b_ptr = B->p[row]; + int b_end = B->p[row + 1]; + C->p[row] = C->nnz; + + /* Merge while both have elements (only column indices) */ + while (a_ptr < a_end && b_ptr < b_end) + { + if (A->i[a_ptr] < B->i[b_ptr]) + { + C->i[C->nnz] = A->i[a_ptr]; + a_ptr++; + } + else if (B->i[b_ptr] < A->i[a_ptr]) + { + C->i[C->nnz] = B->i[b_ptr]; + b_ptr++; + } + else + { + C->i[C->nnz] = A->i[a_ptr]; + a_ptr++; + b_ptr++; + } + C->nnz++; + } + + /* Copy remaining elements from A */ + if (a_ptr < a_end) + { + int a_remaining = a_end - a_ptr; + memcpy(C->i + C->nnz, A->i + a_ptr, a_remaining * sizeof(int)); + C->nnz += a_remaining; + } + + /* Copy remaining elements from B */ + if (b_ptr < b_end) + { + int b_remaining = b_end - b_ptr; + memcpy(C->i + C->nnz, B->i + b_ptr, b_remaining * sizeof(int)); + C->nnz += b_remaining; + } + } + + C->p[A->m] = C->nnz; +} + +void sum_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, + CSR_Matrix *C) +{ + /* Assumes C->p and C->i already contain the sparsity pattern of A+B. + Fills only C->x accordingly. */ + for (int row = 0; row < A->m; row++) + { + int a_ptr = A->p[row]; + int a_end = A->p[row + 1]; + int b_ptr = B->p[row]; + int b_end = B->p[row + 1]; + + for (int c_ptr = C->p[row]; c_ptr < C->p[row + 1]; c_ptr++) + { + int col = C->i[c_ptr]; + double val = 0.0; + + if (a_ptr < a_end && A->i[a_ptr] == col) + { + val += A->x[a_ptr]; + a_ptr++; + } + + if (b_ptr < b_end && B->i[b_ptr] == col) + { + val += B->x[b_ptr]; + b_ptr++; + } + C->x[c_ptr] = val; + } + } +} + +void sum_scaled_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, + CSR_Matrix *C, const double *d1, + const double *d2) +{ + /* Assumes C->p and C->i already contain the sparsity pattern of A+B. + Fills only C->x accordingly with scaling. */ + for (int row = 0; row < A->m; row++) + { + int a_ptr = A->p[row]; + int a_end = A->p[row + 1]; + int b_ptr = B->p[row]; + int b_end = B->p[row + 1]; + + for (int c_ptr = C->p[row]; c_ptr < C->p[row + 1]; c_ptr++) + { + int col = C->i[c_ptr]; + double val = 0.0; + + if (a_ptr < a_end && A->i[a_ptr] == col) + { + val += d1[row] * A->x[a_ptr]; + a_ptr++; + } + + if (b_ptr < b_end && B->i[b_ptr] == col) + { + val += d2[row] * B->x[b_ptr]; + b_ptr++; + } + C->x[c_ptr] = val; + } + } +} + +void sum_scaled_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C, + const double *d1, const double *d2) +{ + C->nnz = 0; + + for (int row = 0; row < A->m; row++) + { + int a_ptr = A->p[row]; + int a_end = A->p[row + 1]; + int b_ptr = B->p[row]; + int b_end = B->p[row + 1]; + C->p[row] = C->nnz; + + /* Merge while both have elements */ + while (a_ptr < a_end && b_ptr < b_end) + { + if (A->i[a_ptr] < B->i[b_ptr]) + { + C->i[C->nnz] = A->i[a_ptr]; + C->x[C->nnz] = d1[row] * A->x[a_ptr]; + a_ptr++; + } + else if (B->i[b_ptr] < A->i[a_ptr]) + { + C->i[C->nnz] = B->i[b_ptr]; + C->x[C->nnz] = d2[row] * B->x[b_ptr]; + b_ptr++; + } + else + { + C->i[C->nnz] = A->i[a_ptr]; + C->x[C->nnz] = d1[row] * A->x[a_ptr] + d2[row] * B->x[b_ptr]; + a_ptr++; + b_ptr++; + } + C->nnz++; + } + + /* Copy remaining elements from A */ + if (a_ptr < a_end) + { + int a_remaining = a_end - a_ptr; + for (int j = 0; j < a_remaining; j++) + { + C->i[C->nnz + j] = A->i[a_ptr + j]; + C->x[C->nnz + j] = d1[row] * A->x[a_ptr + j]; + } + C->nnz += a_remaining; + } + + /* Copy remaining elements from B */ + if (b_ptr < b_end) + { + int b_remaining = b_end - b_ptr; + for (int j = 0; j < b_remaining; j++) + { + C->i[C->nnz + j] = B->i[b_ptr + j]; + C->x[C->nnz + j] = d2[row] * B->x[b_ptr + j]; + } + C->nnz += b_remaining; + } + } + + C->p[A->m] = C->nnz; +} + +void sum_all_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, int_double_pair *pairs) +{ + assert(C->m == 1); + C->n = A->n; + C->p[0] = 0; + + /* copy A's values and column indices into pairs */ + set_int_double_pair_array(pairs, A->i, A->x, A->nnz); + + /* sort so columns are in order */ + sort_int_double_pair_array(pairs, A->nnz); + + /* merge entries with same columns and insert result in C */ + C->nnz = 0; + for (int j = 0; j < A->nnz;) + { + int current_col = pairs[j].col; + double sum_val = 0.0; + + /* sum all values with the same column */ + while (j < A->nnz && pairs[j].col == current_col) + { + sum_val += pairs[j].val; + j++; + } + + /* insert into C */ + C->i[C->nnz] = current_col; + C->x[C->nnz] = sum_val; + C->nnz++; + } + + C->p[1] = C->nnz; +} + +void sum_block_of_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, + int_double_pair *pairs, int row_block_size) +{ + assert(A->m % row_block_size == 0); + int n_blocks = A->m / row_block_size; + assert(C->m == n_blocks); + C->n = A->n; + C->p[0] = 0; + + C->nnz = 0; + for (int block = 0; block < n_blocks; block++) + { + int start_row = block * row_block_size; + int end_row = start_row + row_block_size; + + /* copy block rows' values and column indices into pairs */ + int pair_idx = 0; + for (int row = start_row; row < end_row; row++) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + pairs[pair_idx].col = A->i[j]; + pairs[pair_idx].val = A->x[j]; + pair_idx++; + } + } + + /* sort so columns are in order */ + sort_int_double_pair_array(pairs, pair_idx); + + /* merge entries with same columns and insert result in C */ + for (int j = 0; j < pair_idx;) + { + int current_col = pairs[j].col; + double sum_val = 0.0; + + /* sum all values with the same column */ + while (j < pair_idx && pairs[j].col == current_col) + { + sum_val += pairs[j].val; + j++; + } + + /* insert into C */ + C->i[C->nnz] = current_col; + C->x[C->nnz] = sum_val; + C->nnz++; + } + + C->p[block + 1] = C->nnz; + } +} + +/* iwork must have size max(A->n, A->nnz), and idx_map must have size A->nnz */ +void sum_block_of_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, + CSR_Matrix *C, + int row_block_size, int *iwork, + int *idx_map) +{ + assert(A->m % row_block_size == 0); + int n_blocks = A->m / row_block_size; + assert(C->m == n_blocks); + + C->n = A->n; + C->p[0] = 0; + C->nnz = 0; + + int *cols = iwork; + int *col_to_pos = iwork; + + for (int block = 0; block < n_blocks; block++) + { + int start_row = block * row_block_size; + int end_row = start_row + row_block_size; + + // ----------------------------------------------------------------- + // Build sparsity pattern of the row resulting from summing + // the block of rows from A + // ----------------------------------------------------------------- + C->p[block] = C->nnz; + int count = 0; + for (int row = start_row; row < end_row; row++) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + cols[count++] = A->i[j]; + } + } + + /* Sort columns and write unique pattern into C->i */ + sort_int_array(cols, count); + + int unique_nnz = 0; + int prev_col = -1; + for (int t = 0; t < count; t++) + { + int col = cols[t]; + if (t == 0 || col != prev_col) + { + C->i[C->nnz + unique_nnz] = col; + prev_col = col; + unique_nnz++; + } + } + + C->nnz += unique_nnz; + C->p[block + 1] = C->nnz; + + // ----------------------------------------------------------------- + // Build idx_map for all entries in this block + // ----------------------------------------------------------------- + int row_start = C->p[block]; + for (int idx = 0; idx < unique_nnz; idx++) + { + col_to_pos[C->i[row_start + idx]] = row_start + idx; + } + + for (int row = start_row; row < end_row; row++) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + idx_map[j] = col_to_pos[A->i[j]]; + } + } + } +} + +/* +void sum_block_of_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, + const int *idx_map) +{ + memset(C->x, 0, C->nnz * sizeof(double)); + + for (int row = 0; row < A->m; row++) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + C->x[idx_map[j]] += A->x[j]; + } + } +} +*/ + +void sum_evenly_spaced_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, + struct int_double_pair *pairs, int row_spacing) +{ + assert(C->m == row_spacing); + C->n = A->n; + C->p[0] = 0; + C->nnz = 0; + + for (int C_row = 0; C_row < C->m; C_row++) + { + /* copy evenly spaced rows into pairs */ + int pair_idx = 0; + for (int row = C_row; row < A->m; row += row_spacing) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + pairs[pair_idx].col = A->i[j]; + pairs[pair_idx].val = A->x[j]; + pair_idx++; + } + } + + /* sort so columns are in order */ + sort_int_double_pair_array(pairs, pair_idx); + + /* merge entries with same columns and insert result in C */ + for (int j = 0; j < pair_idx;) + { + int current_col = pairs[j].col; + double sum_val = 0.0; + + /* sum all values with the same column */ + while (j < pair_idx && pairs[j].col == current_col) + { + sum_val += pairs[j].val; + j++; + } + + /* insert into C */ + C->i[C->nnz] = current_col; + C->x[C->nnz] = sum_val; + C->nnz++; + } + + C->p[C_row + 1] = C->nnz; + } +} + +/* iwork must have size max(A->n, A->nnz), and idx_map must have size A->nnz */ +void sum_evenly_spaced_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, + CSR_Matrix *C, + int row_spacing, + int *iwork, int *idx_map) +{ + assert(C->m == row_spacing); + C->n = A->n; + C->p[0] = 0; + C->nnz = 0; + + int *cols = iwork; + int *col_to_pos = iwork; + + for (int C_row = 0; C_row < C->m; C_row++) + { + // ----------------------------------------------------------------- + // Build sparsity pattern of the row resulting from summing + // evenly spaced rows from A + // ----------------------------------------------------------------- + C->p[C_row] = C->nnz; + int count = 0; + for (int row = C_row; row < A->m; row += row_spacing) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + cols[count++] = A->i[j]; + } + } + + /* Sort columns and write unique pattern into C->i */ + sort_int_array(cols, count); + + int unique_nnz = 0; + int prev_col = -1; + for (int t = 0; t < count; t++) + { + int col = cols[t]; + if (t == 0 || col != prev_col) + { + C->i[C->nnz + unique_nnz] = col; + prev_col = col; + unique_nnz++; + } + } + + C->nnz += unique_nnz; + C->p[C_row + 1] = C->nnz; + + // ----------------------------------------------------------------- + // Build idx_map for all entries in evenly spaced rows + // ----------------------------------------------------------------- + int row_start = C->p[C_row]; + for (int idx = 0; idx < unique_nnz; idx++) + { + col_to_pos[C->i[row_start + idx]] = row_start + idx; + } + + for (int row = C_row; row < A->m; row += row_spacing) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + idx_map[j] = col_to_pos[A->i[j]]; + } + } + } +} + +void idx_map_accumulator(const CSR_Matrix *A, const int *idx_map, + double *accumulator) +{ + /* don't forget to initialze accumulator to 0 before calling this */ + for (int row = 0; row < A->m; row++) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + accumulator[idx_map[j]] += A->x[j]; + } + } +} + +void idx_map_accumulator_with_spacing(const CSR_Matrix *A, const int *idx_map, + double *accumulator, int spacing) +{ + /* don't forget to initialze accumulator to 0 before calling this */ + for (int row = 0; row < A->m; row += spacing) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + accumulator[idx_map[j]] += A->x[j]; + } + } +} + +void sum_spaced_rows_into_row_csr(const CSR_Matrix *A, CSR_Matrix *C, + struct int_double_pair *pairs, int offset, + int spacing) +{ + assert(C->m == 1); + C->n = A->n; + C->p[0] = 0; + C->nnz = 0; + + /* copy evenly spaced rows starting at offset into pairs */ + int pair_idx = 0; + for (int row = offset; row < A->m; row += spacing) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + pairs[pair_idx].col = A->i[j]; + pairs[pair_idx].val = A->x[j]; + pair_idx++; + } + } + + /* sort so columns are in order */ + sort_int_double_pair_array(pairs, pair_idx); + + /* merge entries with same columns and insert result in C */ + for (int j = 0; j < pair_idx;) + { + int current_col = pairs[j].col; + double sum_val = 0.0; + + /* sum all values with the same column */ + while (j < pair_idx && pairs[j].col == current_col) + { + sum_val += pairs[j].val; + j++; + } + + /* insert into C */ + C->i[C->nnz] = current_col; + C->x[C->nnz] = sum_val; + C->nnz++; + } + + C->p[1] = C->nnz; +} + +void sum_all_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, CSR_Matrix *C, + int *iwork, int *idx_map) +{ + // ------------------------------------------------------------------- + // Build sparsity pattern of the summed row + // ------------------------------------------------------------------- + int *cols = iwork; + memcpy(cols, A->i, A->nnz * sizeof(int)); + sort_int_array(cols, A->nnz); + + int unique_nnz = 0; + int prev_col = -1; + for (int j = 0; j < A->nnz; j++) + { + if (cols[j] != prev_col) + { + C->i[unique_nnz] = cols[j]; + prev_col = cols[j]; + unique_nnz++; + } + } + + C->p[0] = 0; + C->p[1] = unique_nnz; + C->nnz = unique_nnz; + + // ------------------------------------------------------------------- + // Map child values to summed-row positions. col_to_pos maps + // column indices to positions in C's row. + // ------------------------------------------------------------------- + int *col_to_pos = iwork; + for (int idx = 0; idx < unique_nnz; idx++) + { + col_to_pos[C->i[idx]] = idx; + } + + for (int i = 0; i < A->m; i++) + { + for (int j = A->p[i]; j < A->p[i + 1]; j++) + { + idx_map[j] = col_to_pos[A->i[j]]; + } + } +} + +/* +void sum_all_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, + const int *idx_map) +{ + memset(C->x, 0, C->nnz * sizeof(double)); + + for (int row = 0; row < A->m; row++) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + C->x[idx_map[j]] += A->x[j]; + } + } +} +*/ + +/* + * Sums evenly spaced rows from A into a single row in C and fills an index map. + * A: input CSR matrix + * C: output CSR matrix (must have m=1) + * spacing: row spacing + * iwork: workspace of size at least max(A->n, A->nnz) + * idx_map: output index map, size at least A->nnz + */ +void sum_spaced_rows_into_row_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, + CSR_Matrix *C, + int spacing, int *iwork, + int *idx_map) +{ + assert(C->m == 1); + C->n = A->n; + + /* gather all column indices from the spaced rows */ + int count = 0; + for (int row = 0; row < A->m; row += spacing) + { + int len = A->p[row + 1] - A->p[row]; + memcpy(iwork + count, A->i + A->p[row], len * sizeof(int)); + count += len; + } + + /* fill sparsity pattern */ + sort_int_array(iwork, count); + int unique_nnz = 0; + int prev_col = -1; + for (int i = 0; i < count; i++) + { + int col = iwork[i]; + if (col != prev_col) + { + C->i[unique_nnz++] = col; + prev_col = col; + } + } + C->nnz = unique_nnz; + C->p[0] = 0; + C->p[1] = C->nnz; + + /* fill idx_map */ + int *col_to_pos = iwork; + for (int idx = 0; idx < unique_nnz; idx++) + { + col_to_pos[C->i[idx]] = idx; + } + + for (int row = 0; row < A->m; row += spacing) + { + for (int j = A->p[row]; j < A->p[row + 1]; j++) + { + idx_map[j] = col_to_pos[A->i[j]]; + } + } +} diff --git a/src/utils/linalg.c b/src/utils/linalg_sparse_matmuls.c similarity index 100% rename from src/utils/linalg.c rename to src/utils/linalg_sparse_matmuls.c diff --git a/tests/all_tests.c b/tests/all_tests.c index 3b23e45..6aa699a 100644 --- a/tests/all_tests.c +++ b/tests/all_tests.c @@ -46,7 +46,7 @@ #include "utils/test_csc_matrix.h" #include "utils/test_csr_csc_conversion.h" #include "utils/test_csr_matrix.h" -#include "utils/test_linalg.h" +#include "utils/test_linalg_sparse_matmuls.h" #include "wsum_hess/elementwise/test_entr.h" #include "wsum_hess/elementwise/test_exp.h" #include "wsum_hess/elementwise/test_hyperbolic.h" diff --git a/tests/utils/test_csr_matrix.h b/tests/utils/test_csr_matrix.h index 7a42970..09b2e7d 100644 --- a/tests/utils/test_csr_matrix.h +++ b/tests/utils/test_csr_matrix.h @@ -5,6 +5,7 @@ #include "minunit.h" #include "test_helpers.h" #include "utils/CSR_Matrix.h" +#include "utils/CSR_sum.h" #include "utils/int_double_pair.h" const char *test_diag_csr_mult() diff --git a/tests/utils/test_linalg.h b/tests/utils/test_linalg_sparse_matmuls.h similarity index 99% rename from tests/utils/test_linalg.h rename to tests/utils/test_linalg_sparse_matmuls.h index e7a6896..6fd5bfa 100644 --- a/tests/utils/test_linalg.h +++ b/tests/utils/test_linalg_sparse_matmuls.h @@ -7,7 +7,7 @@ #include "test_helpers.h" #include "utils/CSC_Matrix.h" #include "utils/CSR_Matrix.h" -#include "utils/linalg.h" +#include "utils/linalg_sparse_matmuls.h" /* Test block_left_multiply_fill_sparsity with simple case: single block */ const char *test_block_left_multiply_single_block() From a2e08a85570135c35403727d4c6b3eb86090b37e Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 13 Feb 2026 10:27:28 -0800 Subject: [PATCH 2/3] clean up documentation of CSR_Matrix.h --- include/utils/CSR_Matrix.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/include/utils/CSR_Matrix.h b/include/utils/CSR_Matrix.h index 00d75ee..7ba03b5 100644 --- a/include/utils/CSR_Matrix.h +++ b/include/utils/CSR_Matrix.h @@ -36,16 +36,13 @@ CSR_Matrix *transpose(const CSR_Matrix *A, int *iwork); CSR_Matrix *AT_alloc(const CSR_Matrix *A, int *iwork); void AT_fill_values(const CSR_Matrix *A, CSR_Matrix *AT, int *iwork); -/* Build block-diagonal repeat A_blk = I_p kron A. Returns newly allocated CSR - * matrix of size (p*A->m) x (p*A->n) with nnz = p*A->nnz. */ +/* Build (I_p kron A) = blkdiag(A, A, ..., A) of size (p*A->m) x (p*A->n) */ CSR_Matrix *block_diag_repeat_csr(const CSR_Matrix *A, int p); -/* Build left-repeated Kronecker A_kron = A kron I_p. Returns newly allocated CSR - * matrix of size (A->m * p) x (A->n * p) with nnz = A->nnz * p. */ +/* Build (A kron I_p) of size (A->m * p) x (A->n * p) with nnz = A->nnz * p. */ CSR_Matrix *kron_identity_csr(const CSR_Matrix *A, int p); -/* matvec y = Ax, where A indices minus col_offset gives x indices. Returns y as - * dense. */ +/* y = Ax, where y is returned as dense */ void csr_matvec(const CSR_Matrix *A, const double *x, double *y, int col_offset); void csr_matvec_wo_offset(const CSR_Matrix *A, const double *x, double *y); From ff5dd34225fc9e7e2b69ecdb2adf27401daf442f Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 13 Feb 2026 11:22:46 -0800 Subject: [PATCH 3/3] replace right matmul with left matmul --- include/utils/CSR_Matrix.h | 5 +- src/bivariate/right_matmul.c | 132 ++++------------------------------- 2 files changed, 14 insertions(+), 123 deletions(-) diff --git a/include/utils/CSR_Matrix.h b/include/utils/CSR_Matrix.h index 7ba03b5..dc0bad3 100644 --- a/include/utils/CSR_Matrix.h +++ b/include/utils/CSR_Matrix.h @@ -46,9 +46,8 @@ CSR_Matrix *kron_identity_csr(const CSR_Matrix *A, int p); void csr_matvec(const CSR_Matrix *A, const double *x, double *y, int col_offset); void csr_matvec_wo_offset(const CSR_Matrix *A, const double *x, double *y); -/* C = z^T A is assumed to have one row. C must have column indices pre-computed -and transposed matrix AT must be provided. Fills in values of C only. - */ +/* Computes values of the row matrix C = z^T A (column indices must have been + pre-computed) and transposed matrix AT must be provided) */ void csr_matvec_fill_values(const CSR_Matrix *AT, const double *z, CSR_Matrix *C); /* Insert value into CSR matrix A with just one row at col_idx. Assumes that A diff --git a/src/bivariate/right_matmul.c b/src/bivariate/right_matmul.c index 9d8a4a7..6ec593b 100644 --- a/src/bivariate/right_matmul.c +++ b/src/bivariate/right_matmul.c @@ -15,137 +15,29 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "affine.h" #include "bivariate.h" #include "subexpr.h" +#include "utils/CSR_Matrix.h" #include "utils/linalg_sparse_matmuls.h" #include /* This file implements the atom 'right_matmul' corresponding to the operation y = f(x) @ A, where A is a given matrix and f(x) is an arbitrary expression. - Here, f(x) can be a vector-valued expression and a matrix-valued - expression. The dimensions are f(x) - p x n, A - n x q, y - p x q. - Note that here A does not have global column indices but it is a local matrix. - This is an important distinction compared to linear_op_expr. - - * To compute the forward pass: vec(y) = B @ vec(f(x)), - where B = A^T kron I_p is a Kronecker product of size (p*q) x (p*n). - - * To compute the Jacobian: J_y = B @ J_f(x), where J_f(x) is the - Jacobian of f(x) of size (p*n) x n_vars. - - * To compute the contribution to the Lagrange Hessian: we form - w_child = B^T @ w and then evaluate the hessian of f(x). -*/ - -static void forward(expr *node, const double *u) -{ - expr *x = node->left; - - /* child's forward pass */ - node->left->forward(node->left, u); - - /* y = x * A, vec(y) = B @ vec(x) */ - csr_matvec_wo_offset(((right_matmul_expr *) node)->B, x->value, node->value); -} - -static bool is_affine(const expr *node) -{ - return node->left->is_affine(node->left); -} - -static void free_type_data(expr *node) -{ - right_matmul_expr *right_node = (right_matmul_expr *) node; - free_csr_matrix(right_node->B); - free_csr_matrix(right_node->BT); - if (right_node->CSC_work) - { - free_csc_matrix(right_node->CSC_work); - } - right_node->B = NULL; - right_node->BT = NULL; - right_node->CSC_work = NULL; -} - -static void jacobian_init(expr *node) -{ - expr *x = node->left; - right_matmul_expr *right_node = (right_matmul_expr *) node; - - /* initialize child's jacobian and precompute sparsity of its transpose */ - x->jacobian_init(x); - right_node->CSC_work = csr_to_csc_fill_sparsity(x->jacobian, node->iwork); - - /* precompute sparsity of this node's jacobian */ - node->jacobian = csr_csc_matmul_alloc(right_node->B, right_node->CSC_work); -} - -static void eval_jacobian(expr *node) -{ - expr *x = node->left; - right_matmul_expr *right_node = (right_matmul_expr *) node; - - /* evaluate child's jacobian and convert to CSC*/ - x->eval_jacobian(x); - csr_to_csc_fill_values(x->jacobian, right_node->CSC_work, node->iwork); - - /* compute this node's jacobian */ - csr_csc_matmul_fill_values(right_node->B, right_node->CSC_work, node->jacobian); -} - -static void wsum_hess_init(expr *node) -{ - /* initialize child's hessian */ - expr *x = node->left; - x->wsum_hess_init(x); - - /* allocate this node's hessian with the same sparsity as child's */ - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, x->wsum_hess->nnz); - memcpy(node->wsum_hess->p, x->wsum_hess->p, (node->n_vars + 1) * sizeof(int)); - memcpy(node->wsum_hess->i, x->wsum_hess->i, x->wsum_hess->nnz * sizeof(int)); - - /* Allocate workspace for B^T @ w */ - node->dwork = (double *) malloc(x->d1 * x->d2 * sizeof(double)); -} - -static void eval_wsum_hess(expr *node, const double *w) -{ - /* Compute B^T @ w, where B = A^T ⊗ I_p */ - right_matmul_expr *right_node = (right_matmul_expr *) node; - - /* B^T @ w computes the weights for the child expression */ - csr_matvec_wo_offset(right_node->BT, w, node->dwork); - - /* Propagate to child */ - node->left->eval_wsum_hess(node->left, node->dwork); - memcpy(node->wsum_hess->x, node->left->wsum_hess->x, - node->wsum_hess->nnz * sizeof(double)); -} - + We implement this by expressing right matmul in terms of left matmul and + transpose: f(x) @ A = (A^T @ f(x)^T)^T. */ expr *new_right_matmul(expr *u, const CSR_Matrix *A) { - /* Allocate the type-specific struct */ - right_matmul_expr *right_matmul_node = - (right_matmul_expr *) calloc(1, sizeof(right_matmul_expr)); - expr *node = &right_matmul_node->base; - - /* Output dimensions: u is p x n, A is n x q, output is p x q */ - int p = u->d1; - int q = A->n; + /* We can express right matmul using left matmul and transpose: + u @ A = (A^T @ u^T)^T. */ + int *work_transpose = (int *) malloc(A->n * sizeof(int)); + CSR_Matrix *AT = transpose(A, work_transpose); - init_expr(node, p, q, u->n_vars, forward, jacobian_init, eval_jacobian, - is_affine, wsum_hess_init, eval_wsum_hess, free_type_data); - node->left = u; - expr_retain(u); + expr *u_transpose = new_transpose(u); + expr *left_matmul = new_left_matmul(u_transpose, AT); + expr *node = new_transpose(left_matmul); - /* create B = A^T kron I_p and its transpose */ - node->iwork = (int *) malloc(node->n_vars * sizeof(int)); - CSR_Matrix *AT = transpose(A, node->iwork); - right_matmul_node->B = kron_identity_csr(AT, p); - right_matmul_node->BT = kron_identity_csr(A, p); free_csr_matrix(AT); - - right_matmul_node->CSC_work = NULL; - + free(work_transpose); return node; }