Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/affine.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ expr *new_variable(int d1, int d2, int var_id, int n_vars);

expr *new_index(expr *child, const int *indices, int n_idxs);
expr *new_reshape(expr *child, int d1, int d2);
expr *new_broadcast(expr *child, int target_d1, int target_d2);

#endif /* AFFINE_H */
16 changes: 16 additions & 0 deletions include/subexpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,20 @@ typedef struct index_expr
bool has_duplicates; /* True if indices have duplicates (affects Hessian path) */
} index_expr;

/* Broadcast types */
typedef enum
{
BROADCAST_ROW, /* (1, n) -> (m, n) */
BROADCAST_COL, /* (m, 1) -> (m, n) */
BROADCAST_SCALAR /* (1, 1) -> (m, n) */
} broadcast_type;

typedef struct broadcast_expr
{
expr base;
broadcast_type type;
int m; /* target rows */
int n; /* target cols */
} broadcast_expr;

#endif /* SUBEXPR_H */
3 changes: 2 additions & 1 deletion include/utils/mini_numpy.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ void repeat(double *result, const double *a, int len, int repeats);
* Example: a = [1, 2], len = 2, tiles = 3
* result = [1, 2, 1, 2, 1, 2]
*/
void tile(double *result, const double *a, int len, int tiles);
void tile_double(double *result, const double *a, int len, int tiles);
void tile_int(int *result, const int *a, int len, int tiles);

/* Fill array with 'size' copies of 'value'
* Example: size = 5, value = 3.0
Expand Down
275 changes: 275 additions & 0 deletions src/affine/broadcast.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
#include "affine.h"
#include "subexpr.h"
#include "utils/mini_numpy.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* Broadcast expands an array to a larger shape by replicating along dimensions.
* Supports three types:
* 1. "row": (1, n) -> (m, n) - replicate rows
* 2. "col": (m, 1) -> (m, n) - replicate columns
* 3. "scalar": (1, 1) -> (m, n) - replicate in both dimensions
*/

static void forward(expr *node, const double *u)
{
expr *x = node->left;
broadcast_expr *bcast = (broadcast_expr *) node;

x->forward(x, u);

if (bcast->type == BROADCAST_ROW)
{
/* (1, n) -> (m, n): replicate row m times */
for (int j = 0; j < bcast->n; j++)
{
for (int i = 0; i < bcast->m; i++)
{
node->value[i + j * bcast->m] = x->value[j];
}
}
}
else if (bcast->type == BROADCAST_COL)
{
/* (m, 1) -> (m, n): replicate column n times */
for (int j = 0; j < bcast->n; j++)
{
memcpy(node->value + j * bcast->m, x->value, bcast->m * sizeof(double));
}
}
else
{
/* (1, 1) -> (m, n): fill with scalar value */
for (int k = 0; k < node->size; k++)
{
node->value[k] = x->value[0];
}
}
}

static void jacobian_init(expr *node)
{
expr *x = node->left;
x->jacobian_init(x);
broadcast_expr *bcast = (broadcast_expr *) node;
int total_nnz;

// --------------------------------------------------------------------
// count number of nonzeros
// --------------------------------------------------------------------
if (bcast->type == BROADCAST_ROW)
{
/* Row broadcast: (1, n) -> (m, n) */
total_nnz = x->jacobian->nnz * bcast->m;
}
else if (bcast->type == BROADCAST_COL)
{
/* Column broadcast: (m, 1) -> (m, n) */
total_nnz = x->jacobian->nnz * bcast->n;
}
else
{
/* Scalar broadcast: (1, 1) -> (m, n) */
total_nnz = x->jacobian->nnz * bcast->m * bcast->n;
}

node->jacobian = new_csr_matrix(node->size, node->n_vars, total_nnz);

// ---------------------------------------------------------------------
// fill sparsity pattern
// ---------------------------------------------------------------------
CSR_Matrix *Jx = x->jacobian;
CSR_Matrix *J = node->jacobian;
J->nnz = 0;

if (bcast->type == BROADCAST_ROW)
{
for (int i = 0; i < bcast->n; i++)
{
int nnz_in_row = Jx->p[i + 1] - Jx->p[i];

/* copy columns indices */
tile_int(J->i + J->nnz, Jx->i + Jx->p[i], nnz_in_row, bcast->m);

/* set row pointers */
for (int rep = 0; rep < bcast->m; rep++)
{
J->p[i * bcast->m + rep] = J->nnz;
J->nnz += nnz_in_row;
}
}
}
else if (bcast->type == BROADCAST_COL)
{

/* copy column indices */
tile_int(J->i, Jx->i, Jx->nnz, bcast->n);

/* set row pointers */
int offset = 0;
for (int i = 0; i < bcast->n; i++)
{
for (int j = 0; j < bcast->m; j++)
{
J->p[i * bcast->m + j] = offset;
offset += Jx->p[1] - Jx->p[0];
}
}
assert(offset == total_nnz);
J->p[node->size] = total_nnz;
}
else
{
/* copy column indices */
tile_int(J->i, Jx->i, Jx->nnz, bcast->m * bcast->n);

/* set row pointers */
int offset = 0;
int nnz = Jx->p[1] - Jx->p[0];
for (int i = 0; i < bcast->m * bcast->n; i++)
{
J->p[i] = offset;
offset += nnz;
}
assert(offset == total_nnz);
J->p[node->size] = total_nnz;
}
}

static void eval_jacobian(expr *node)
{
node->left->eval_jacobian(node->left);

broadcast_expr *bcast = (broadcast_expr *) node;
CSR_Matrix *Jx = node->left->jacobian;
CSR_Matrix *J = node->jacobian;
J->nnz = 0;

if (bcast->type == BROADCAST_ROW)
{
for (int i = 0; i < bcast->n; i++)
{
int nnz_in_row = Jx->p[i + 1] - Jx->p[i];
tile_double(J->x + J->nnz, Jx->x + Jx->p[i], nnz_in_row, bcast->m);
J->nnz += nnz_in_row * bcast->m;
}
}
else if (bcast->type == BROADCAST_COL)
{
tile_double(J->x, Jx->x, Jx->nnz, bcast->n);
}
else
{
tile_double(J->x, Jx->x, Jx->nnz, bcast->m * bcast->n);
}
}

static void wsum_hess_init(expr *node)
{
expr *x = node->left;
x->wsum_hess_init(x);

/* Same sparsity as child - weights get summed */
node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, x->wsum_hess->nnz);
memcpy(node->wsum_hess->p, x->wsum_hess->p, (x->wsum_hess->m + 1) * sizeof(int));
memcpy(node->wsum_hess->i, x->wsum_hess->i, x->wsum_hess->nnz * sizeof(int));

/* allocate space for weight vector */
node->dwork = malloc(node->size * sizeof(double));
}

static void eval_wsum_hess(expr *node, const double *w)
{
broadcast_expr *bcast = (broadcast_expr *) node;
expr *x = node->left;

/* Zero out the work array first */
memset(node->dwork, 0, x->size * sizeof(double));

if (bcast->type == BROADCAST_ROW)
{
/* (1, n) -> (m, n): each input element has m weights to sum */
for (int j = 0; j < bcast->n; j++)
{
for (int i = 0; i < bcast->m; i++)
{
node->dwork[j] += w[i + j * bcast->m];
}
}
}
else if (bcast->type == BROADCAST_COL)
{
/* (m, 1) -> (m, n): each input element has n weights to sum */
for (int j = 0; j < bcast->n; j++)
{
for (int i = 0; i < bcast->m; i++)
{
node->dwork[i] += w[i + j * bcast->m];
}
}
}
else
{
/* (1, 1) -> (m, n): scalar has m*n weights to sum */
node->dwork[0] = 0.0;
for (int k = 0; k < bcast->m * bcast->n; k++)
{
node->dwork[0] += w[k];
}
}

x->eval_wsum_hess(x, node->dwork);
memcpy(node->wsum_hess->x, x->wsum_hess->x, x->wsum_hess->nnz * sizeof(double));
}

static bool is_affine(const expr *node)
{
return node->left->is_affine(node->left);
}

expr *new_broadcast(expr *child, int target_d1, int target_d2)
{
// ---------------------------------------------------------------------------
// determine broadcast type
// ---------------------------------------------------------------------------
broadcast_type type;
int m = target_d1;
int n = target_d2;

if (child->d1 == 1 && child->d2 == n)
{
type = BROADCAST_ROW;
}
else if (child->d1 == m && child->d2 == 1)
{
type = BROADCAST_COL;
}
else if (child->d1 == 1 && child->d2 == 1)
{
type = BROADCAST_SCALAR;
}
else
{
assert(false);
}

broadcast_expr *bcast = (broadcast_expr *) calloc(1, sizeof(broadcast_expr));
expr *node = (expr *) bcast;

// --------------------------------------------------------------------------
// initialize the rest of the expression
// --------------------------------------------------------------------------
init_expr(node, target_d1, target_d2, child->n_vars, forward, jacobian_init,
eval_jacobian, is_affine, NULL);
node->left = child;
expr_retain(child);
node->wsum_hess_init = wsum_hess_init;
node->eval_wsum_hess = eval_wsum_hess;
bcast->type = type;
bcast->m = m;
bcast->n = n;

return node;
}
2 changes: 1 addition & 1 deletion src/affine/sum.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ static void eval_wsum_hess(expr *node, const double *w)
}
else if (axis == 1)
{
tile(node->dwork, w, x->d1, x->d2);
tile_double(node->dwork, w, x->d1, x->d2);
}

x->eval_wsum_hess(x, node->dwork);
Expand Down
15 changes: 14 additions & 1 deletion src/utils/mini_numpy.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,20 @@ void repeat(double *result, const double *a, int len, int repeats)
}
}

void tile(double *result, const double *a, int len, int tiles)
/* TODO: we can use memcpy here */
void tile_double(double *result, const double *a, int len, int tiles)
{
int idx = 0;
for (int i = 0; i < tiles; i++)
{
for (int j = 0; j < len; j++)
{
result[idx++] = a[j];
}
}
}

void tile_int(int *result, const int *a, int len, int tiles)
{
int idx = 0;
for (int i = 0; i < tiles; i++)
Expand Down
12 changes: 12 additions & 0 deletions tests/all_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

/* Include all test headers */
#include "forward_pass/affine/test_add.h"
#include "forward_pass/affine/test_broadcast.h"
#include "forward_pass/affine/test_hstack.h"
#include "forward_pass/affine/test_linear_op.h"
#include "forward_pass/affine/test_neg.h"
Expand All @@ -13,6 +14,7 @@
#include "forward_pass/composite/test_composite.h"
#include "forward_pass/elementwise/test_exp.h"
#include "forward_pass/elementwise/test_log.h"
#include "jacobian_tests/test_broadcast.h"
#include "jacobian_tests/test_composite.h"
#include "jacobian_tests/test_const_scalar_mult.h"
#include "jacobian_tests/test_const_vector_mult.h"
Expand Down Expand Up @@ -41,6 +43,7 @@
#include "wsum_hess/elementwise/test_power.h"
#include "wsum_hess/elementwise/test_trig.h"
#include "wsum_hess/elementwise/test_xexp.h"
#include "wsum_hess/test_broadcast.h"
#include "wsum_hess/test_const_scalar_mult.h"
#include "wsum_hess/test_const_vector_mult.h"
#include "wsum_hess/test_hstack.h"
Expand Down Expand Up @@ -76,6 +79,9 @@ int main(void)
mu_run_test(test_sum_axis_1, tests_run);
mu_run_test(test_hstack_forward_vectors, tests_run);
mu_run_test(test_hstack_forward_matrix, tests_run);
mu_run_test(test_broadcast_row, tests_run);
mu_run_test(test_broadcast_col, tests_run);
mu_run_test(test_broadcast_matrix, tests_run);

printf("\n--- Jacobian Tests ---\n");
mu_run_test(test_neg_jacobian, tests_run);
Expand Down Expand Up @@ -121,6 +127,9 @@ int main(void)
mu_run_test(test_sum_of_index, tests_run);
mu_run_test(test_promote_scalar_jacobian, tests_run);
mu_run_test(test_promote_scalar_to_matrix_jacobian, tests_run);
mu_run_test(test_broadcast_row_jacobian, tests_run);
mu_run_test(test_broadcast_col_jacobian, tests_run);
mu_run_test(test_broadcast_scalar_to_matrix_jacobian, tests_run);
mu_run_test(test_wsum_hess_multiply_1, tests_run);
mu_run_test(test_wsum_hess_multiply_2, tests_run);
mu_run_test(test_jacobian_trace_variable, tests_run);
Expand Down Expand Up @@ -177,6 +186,9 @@ int main(void)
mu_run_test(test_wsum_hess_left_matmul_composite, tests_run);
mu_run_test(test_wsum_hess_right_matmul, tests_run);
mu_run_test(test_wsum_hess_right_matmul_vector, tests_run);
mu_run_test(test_wsum_hess_broadcast_row, tests_run);
mu_run_test(test_wsum_hess_broadcast_col, tests_run);
mu_run_test(test_wsum_hess_broadcast_scalar_to_matrix, tests_run);
// This test leads to seg fault
// mu_run_test(test_wsum_hess_trace_variable, tests_run);

Expand Down
Loading
Loading