diff --git a/CMakeLists.txt b/CMakeLists.txt index f3c5531..d604fe0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,11 @@ target_compile_options(dnlp_diff PRIVATE $<$:-Os -DNDEBUG> ) +# This is needed for clock_gettime on Linux without compiler extensions +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + add_compile_definitions(_POSIX_C_SOURCE=200809L) +endif() + # Enable position-independent code for shared library compatibility set_property(TARGET dnlp_diff PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/include/problem.h b/include/problem.h index 8f685b5..63bd408 100644 --- a/include/problem.h +++ b/include/problem.h @@ -3,6 +3,17 @@ #include "expr.h" #include "utils/CSR_Matrix.h" +#include "utils/Timer.h" + +typedef struct +{ + double time_init_derivatives; + double time_eval_jacobian; + double time_eval_gradient; + double time_eval_hessian; + double time_forward_obj; + double time_forward_constraints; +} stats; typedef struct problem { @@ -21,6 +32,9 @@ typedef struct problem CSR_Matrix *lagrange_hessian; int *hess_idx_map; /* Maps all wsum_hess nnz to lagrange_hessian (obj + constraints) */ + + /* Statistics for performance measurement */ + stats stats; } problem; /* Retains objective and constraints (shared ownership with caller) */ diff --git a/include/utils/Timer.h b/include/utils/Timer.h new file mode 100644 index 0000000..d7638df --- /dev/null +++ b/include/utils/Timer.h @@ -0,0 +1,25 @@ +#ifndef TIMER_H +#define TIMER_H + +#include "time.h" + +typedef struct +{ + struct timespec start, end; +} Timer; + +// Macro to compute elapsed time in seconds +#define GET_ELAPSED_SECONDS(timer) \ + (((timer).end.tv_sec - (timer).start.tv_sec) + \ + ((double) ((timer).end.tv_nsec - (timer).start.tv_nsec) * 1e-9)) + +#define RUN_AND_TIME(func, timer, time_variable, result_var, ...) \ + do \ + { \ + clock_gettime(CLOCK_MONOTONIC, &timer.start); \ + (result_var) = func(__VA_ARGS__); \ + clock_gettime(CLOCK_MONOTONIC, &timer.end); \ + (time_variable) += GET_ELAPSED_SECONDS(timer); \ + } while (0) + +#endif // TIMER_H \ No newline at end of file diff --git a/include/utils/utils.h b/include/utils/utils.h new file mode 100644 index 0000000..f88cff6 --- /dev/null +++ b/include/utils/utils.h @@ -0,0 +1,9 @@ +#ifndef UTILS_H +#define UTILS_H + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +/* Sort an array of integers in ascending order */ +void sort_int_array(int *array, int size); + +#endif // UTILS_H diff --git a/src/affine/sum.c b/src/affine/sum.c index ba3193b..c124796 100644 --- a/src/affine/sum.c +++ b/src/affine/sum.c @@ -1,12 +1,11 @@ #include "affine.h" #include "utils/int_double_pair.h" #include "utils/mini_numpy.h" +#include "utils/utils.h" #include #include #include -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - static void forward(expr *node, const double *u) { int i, j, end; diff --git a/src/bivariate/left_matmul.c b/src/bivariate/left_matmul.c index 517f48c..ea7fe5e 100644 --- a/src/bivariate/left_matmul.c +++ b/src/bivariate/left_matmul.c @@ -26,8 +26,7 @@ */ -// todo: put this in common somewhere -#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#include "utils/utils.h" static void forward(expr *node, const double *u) { diff --git a/src/bivariate/multiply.c b/src/bivariate/multiply.c index 44c5bf5..f30e916 100644 --- a/src/bivariate/multiply.c +++ b/src/bivariate/multiply.c @@ -29,7 +29,6 @@ static void forward(expr *node, const double *u) static void jacobian_init(expr *node) { - printf("jacobian_init elementwise_mult\n \n \n"); node->left->jacobian_init(node->left); node->right->jacobian_init(node->right); node->dwork = (double *) malloc(2 * node->size * sizeof(double)); @@ -143,6 +142,9 @@ static void wsum_hess_init(expr *node) * 2 * nnz(C) */ assert(C->m == node->n_vars && C->n == node->n_vars); node->wsum_hess = new_csr_matrix(C->m, C->n, 2 * C->nnz); + + /* fill sparsity pattern Hessian = C + C^T */ + sum_csr_matrices_fill_sparsity(C, CT, node->wsum_hess); } } @@ -171,11 +173,8 @@ static void eval_wsum_hess(expr *node, const double *w) /* Compute CT = C^T = A^T diag(w) B */ AT_fill_values(C, CT, node->iwork); - /* TODO: should fill sparsity before. Maybe we can map values from C to - * hessian directly instead?*/ - /* Hessian = C + CT = B^T diag(w) A + A^T diag(w) B */ - sum_csr_matrices(C, CT, node->wsum_hess); + sum_csr_matrices_fill_values(C, CT, node->wsum_hess); } } diff --git a/src/other/quad_form.c b/src/other/quad_form.c index 04badb8..1fb68a3 100644 --- a/src/other/quad_form.c +++ b/src/other/quad_form.c @@ -27,7 +27,6 @@ static void forward(expr *node, const double *u) static void jacobian_init(expr *node) { - CSR_Matrix *Q = ((quad_form_expr *) node)->Q; assert(node->left->var_id != NOT_A_VARIABLE); assert(node->left->d2 == 1); expr *x = node->left; diff --git a/src/problem.c b/src/problem.c index 90763a2..0372494 100644 --- a/src/problem.c +++ b/src/problem.c @@ -1,32 +1,25 @@ #include "problem.h" +#include "utils/utils.h" +#include #include #include #include -// TODO: move this into common file -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - /* forward declaration */ static void problem_lagrange_hess_fill_sparsity(problem *prob, int *iwork); -/* Helper function to compare integers for qsort */ -static int compare_int_asc(const void *a, const void *b) -{ - int ia = *((const int *) a); - int ib = *((const int *) b); - return (ia > ib) - (ia < ib); -} - problem *new_problem(expr *objective, expr **constraints, int n_constraints) { problem *prob = (problem *) calloc(1, sizeof(problem)); if (!prob) return NULL; - /* Retain objective (shared ownership with caller) */ + /* objective */ prob->objective = objective; expr_retain(objective); + prob->n_vars = objective->n_vars; - /* Copy and retain constraints array */ + /* constraints array */ + prob->total_constraint_size = 0; prob->n_constraints = n_constraints; if (n_constraints > 0) { @@ -34,43 +27,31 @@ problem *new_problem(expr *objective, expr **constraints, int n_constraints) for (int i = 0; i < n_constraints; i++) { prob->constraints[i] = constraints[i]; + prob->total_constraint_size += constraints[i]->size; expr_retain(constraints[i]); } } - else - { - prob->constraints = NULL; - } - - /* Compute total constraint size */ - prob->total_constraint_size = 0; - for (int i = 0; i < n_constraints; i++) - { - prob->total_constraint_size += constraints[i]->size; - } - - prob->n_vars = objective->n_vars; - /* Allocate value arrays */ - if (prob->total_constraint_size > 0) - { - prob->constraint_values = - (double *) calloc(prob->total_constraint_size, sizeof(double)); - } - else - { - prob->constraint_values = NULL; - } + /* allocation */ + prob->constraint_values = + (double *) calloc(prob->total_constraint_size, sizeof(double)); prob->gradient_values = (double *) calloc(prob->n_vars, sizeof(double)); - /* Derivative structures allocated by problem_init_derivatives */ - prob->jacobian = NULL; + /* Initialize statistics */ + prob->stats.time_init_derivatives = 0.0; + prob->stats.time_eval_jacobian = 0.0; + prob->stats.time_eval_hessian = 0.0; + prob->stats.time_forward_obj = 0.0; + prob->stats.time_forward_constraints = 0.0; return prob; } void problem_init_derivatives(problem *prob) { + Timer timer; + clock_gettime(CLOCK_MONOTONIC, &timer.start); + // ------------------------------------------------------------------------------- // Jacobian structure // ------------------------------------------------------------------------------- @@ -85,6 +66,26 @@ void problem_init_derivatives(problem *prob) prob->jacobian = new_csr_matrix(prob->total_constraint_size, prob->n_vars, nnz); + /* set sparsity pattern of jacobian */ + CSR_Matrix *H = prob->jacobian; + H->p[0] = 0; + int row_offset = 0; + int nnz_offset = 0; + for (int i = 0; i < prob->n_constraints; i++) + { + expr *c = prob->constraints[i]; + + for (int r = 1; r <= c->jacobian->m; r++) + { + H->p[row_offset + r] = nnz_offset + c->jacobian->p[r]; + } + + memcpy(H->i + nnz_offset, c->jacobian->i, c->jacobian->nnz * sizeof(int)); + row_offset += c->jacobian->m; + nnz_offset += c->jacobian->nnz; + } + assert(nnz_offset == nnz); + // ------------------------------------------------------------------------------- // Lagrange Hessian structure // ------------------------------------------------------------------------------- @@ -102,6 +103,9 @@ void problem_init_derivatives(problem *prob) int *iwork = (int *) malloc(MAX(nnz, prob->n_vars) * sizeof(int)); problem_lagrange_hess_fill_sparsity(prob, iwork); free(iwork); + + clock_gettime(CLOCK_MONOTONIC, &timer.end); + prob->stats.time_init_derivatives += GET_ELAPSED_SECONDS(timer); } static void problem_lagrange_hess_fill_sparsity(problem *prob, int *iwork) @@ -134,7 +138,7 @@ static void problem_lagrange_hess_fill_sparsity(problem *prob, int *iwork) } /* find unique columns */ - qsort(cols, count, sizeof(int), compare_int_asc); + sort_int_array(cols, count); int prev_col = -1; for (int j = 0; j < count; j++) { @@ -214,13 +218,24 @@ void free_problem(problem *prob) double problem_objective_forward(problem *prob, const double *u) { + Timer timer; + clock_gettime(CLOCK_MONOTONIC, &timer.start); + /* Evaluate objective only */ prob->objective->forward(prob->objective, u); - return prob->objective->value[0]; + double result = prob->objective->value[0]; + + clock_gettime(CLOCK_MONOTONIC, &timer.end); + prob->stats.time_forward_obj += GET_ELAPSED_SECONDS(timer); + + return result; } void problem_constraint_forward(problem *prob, const double *u) { + Timer timer; + clock_gettime(CLOCK_MONOTONIC, &timer.start); + /* Evaluate constraints only and copy values */ int offset = 0; for (int i = 0; i < prob->n_constraints; i++) @@ -230,78 +245,67 @@ void problem_constraint_forward(problem *prob, const double *u) memcpy(prob->constraint_values + offset, c->value, c->size * sizeof(double)); offset += c->size; } + + clock_gettime(CLOCK_MONOTONIC, &timer.end); + prob->stats.time_forward_constraints += GET_ELAPSED_SECONDS(timer); } void problem_gradient(problem *prob) { - /* Jacobian on objective */ + Timer timer; + clock_gettime(CLOCK_MONOTONIC, &timer.start); + + /* evaluate jacobian of objective */ prob->objective->eval_jacobian(prob->objective); - /* Zero gradient array */ + /* copy sparse jacobian to dense gradient */ memset(prob->gradient_values, 0, prob->n_vars * sizeof(double)); - - /* Copy sparse jacobian row to dense gradient - * Objective jacobian is 1 x n_vars */ CSR_Matrix *jac = prob->objective->jacobian; for (int k = jac->p[0]; k < jac->p[1]; k++) { - int col = jac->i[k]; - prob->gradient_values[col] = jac->x[k]; + prob->gradient_values[jac->i[k]] = jac->x[k]; } + + clock_gettime(CLOCK_MONOTONIC, &timer.end); + prob->stats.time_eval_gradient += GET_ELAPSED_SECONDS(timer); } void problem_jacobian(problem *prob) { - CSR_Matrix *stacked = prob->jacobian; - - /* Initialize row pointers */ - stacked->p[0] = 0; + Timer timer; + clock_gettime(CLOCK_MONOTONIC, &timer.start); - int row_offset = 0; + CSR_Matrix *J = prob->jacobian; int nnz_offset = 0; - /* TODO: here we eventually want to evaluate affine jacobians only once. - And we only need to copy the values. The sparsity pattern should have been set - before. */ for (int i = 0; i < prob->n_constraints; i++) { expr *c = prob->constraints[i]; - - /* Evaluate jacobian */ c->eval_jacobian(c); - - CSR_Matrix *cjac = c->jacobian; - - /* Copy row pointers with offset */ - for (int r = 1; r <= cjac->m; r++) - { - stacked->p[row_offset + r] = nnz_offset + cjac->p[r]; - } - - /* Copy column indices and values */ - int constraint_nnz = cjac->p[cjac->m]; - memcpy(stacked->i + nnz_offset, cjac->i, constraint_nnz * sizeof(int)); - memcpy(stacked->x + nnz_offset, cjac->x, constraint_nnz * sizeof(double)); - - row_offset += cjac->m; - nnz_offset += constraint_nnz; + memcpy(J->x + nnz_offset, c->jacobian->x, c->jacobian->nnz * sizeof(double)); + nnz_offset += c->jacobian->nnz; } - /* Update actual nnz (may be less than allocated) */ - stacked->nnz = nnz_offset; + /* update actual nnz (may be less than allocated) */ + J->nnz = nnz_offset; + + clock_gettime(CLOCK_MONOTONIC, &timer.end); + prob->stats.time_eval_jacobian += GET_ELAPSED_SECONDS(timer); } void problem_hessian(problem *prob, double obj_w, const double *w) { + Timer timer; + clock_gettime(CLOCK_MONOTONIC, &timer.start); + // ------------------------------------------------------------------------ // evaluate hessian of objective and constraints // ------------------------------------------------------------------------ expr *obj = prob->objective; - expr **constrs = prob->constraints; - obj->eval_wsum_hess(obj, &obj_w); int offset = 0; + expr **constrs = prob->constraints; for (int i = 0; i < prob->n_constraints; i++) { constrs[i]->eval_wsum_hess(constrs[i], w + offset); @@ -327,4 +331,7 @@ void problem_hessian(problem *prob, double obj_w, const double *w) idx_map_accumulator(constrs[i]->wsum_hess, idx_map + offset, H->x); offset += constrs[i]->wsum_hess->nnz; } + + clock_gettime(CLOCK_MONOTONIC, &timer.end); + prob->stats.time_eval_hessian += GET_ELAPSED_SECONDS(timer); } diff --git a/src/utils/CSC_Matrix.c b/src/utils/CSC_Matrix.c index d4389ab..40b6844 100644 --- a/src/utils/CSC_Matrix.c +++ b/src/utils/CSC_Matrix.c @@ -153,24 +153,24 @@ static inline double sparse_dot(const double *a_x, const int *a_i, int a_nnz, void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C) { - int i, j, ii, jj; - for (i = 0; i < C->m; i++) + int j, ii, jj; + for (ii = 0; ii < C->m; ii++) { - for (jj = C->p[i]; jj < C->p[i + 1]; jj++) + for (jj = C->p[ii]; jj < C->p[ii + 1]; jj++) { j = C->i[jj]; - if (j < i) + if (j < ii) { - C->x[jj] = csr_get_value(C, j, i); + C->x[jj] = csr_get_value(C, j, ii); } else { - int nnz_ai = A->p[i + 1] - A->p[i]; + int nnz_ai = A->p[ii + 1] - A->p[ii]; int nnz_aj = A->p[j + 1] - A->p[j]; /* compute Cij = weighted inner product of column i and column j */ - double sum = sparse_wdot(A->x + A->p[i], A->i + A->p[i], nnz_ai, + double sum = sparse_wdot(A->x + A->p[ii], A->i + A->p[ii], nnz_ai, A->x + A->p[j], A->i + A->p[j], nnz_aj, d); C->x[jj] = sum; @@ -183,7 +183,7 @@ CSC_Matrix *csr_to_csc(const CSR_Matrix *A) { CSC_Matrix *C = new_csc_matrix(A->m, A->n, A->nnz); - int i, j, start; + int i, j; int *count = malloc(A->n * sizeof(int)); memset(count, 0, A->n * sizeof(int)); @@ -230,7 +230,7 @@ CSC_Matrix *csr_to_csc_fill_sparsity(const CSR_Matrix *A, int *iwork) { CSC_Matrix *C = new_csc_matrix(A->m, A->n, A->nnz); - int i, j, start; + int i, j; int *count = iwork; memset(count, 0, A->n * sizeof(int)); @@ -294,7 +294,6 @@ CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B) /* A is m x n, B is m x p, C = B^T A is p x n */ int n = A->n; int p = B->n; - int m = A->m; int nnz = 0; int i, j, ii, jj; diff --git a/src/utils/CSR_Matrix.c b/src/utils/CSR_Matrix.c index 2dbae1a..4fad6e5 100644 --- a/src/utils/CSR_Matrix.c +++ b/src/utils/CSR_Matrix.c @@ -1,5 +1,6 @@ #include "utils/CSR_Matrix.h" #include "utils/int_double_pair.h" +#include "utils/utils.h" #include #include #include @@ -193,13 +194,6 @@ void diag_csr_mult_fill_values(const double *d, const CSR_Matrix *A, CSR_Matrix } } -static int compare_int_asc(const void *a, const void *b) -{ - int ia = *((const int *) a); - int ib = *((const int *) b); - return (ia > ib) - (ia < ib); -} - void sum_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C) { /* A and B must be different from C */ @@ -453,14 +447,6 @@ void sum_scaled_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matri C->p[A->m] = C->nnz; } -/* Helper function for qsort to compare column indices */ -static int compare_cols(const void *a, const void *b) -{ - int col_a = *((int *) a); - int col_b = *((int *) b); - return col_a - col_b; -} - void sum_all_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, int_double_pair *pairs) { assert(C->m == 1); @@ -586,7 +572,7 @@ void sum_block_of_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, } /* Sort columns and write unique pattern into C->i */ - qsort(cols, count, sizeof(int), compare_int_asc); + sort_int_array(cols, count); int unique_nnz = 0; int prev_col = -1; @@ -718,7 +704,7 @@ void sum_evenly_spaced_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, } /* Sort columns and write unique pattern into C->i */ - qsort(cols, count, sizeof(int), compare_int_asc); + sort_int_array(cols, count); int unique_nnz = 0; int prev_col = -1; @@ -758,8 +744,7 @@ void sum_evenly_spaced_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, void idx_map_accumulator(const CSR_Matrix *A, const int *idx_map, double *accumulator) { - // memset(accumulator, 0, A->nnz * sizeof(double)); - + /* don't forget to initialze accumulator to 0 before calling this */ for (int row = 0; row < A->m; row++) { for (int j = A->p[row]; j < A->p[row + 1]; j++) @@ -769,21 +754,6 @@ void idx_map_accumulator(const CSR_Matrix *A, const int *idx_map, } } -/* -void sum_evenly_spaced_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, - const int *idx_map) -{ - memset(C->x, 0, C->nnz * sizeof(double)); - - for (int row = 0; row < A->m; row++) - { - for (int j = A->p[row]; j < A->p[row + 1]; j++) - { - C->x[idx_map[j]] += A->x[j]; - } - } -} -*/ void sum_spaced_rows_into_row_csr(const CSR_Matrix *A, CSR_Matrix *C, struct int_double_pair *pairs, int offset, int spacing) @@ -862,7 +832,7 @@ CSR_Matrix *transpose(const CSR_Matrix *A, int *iwork) { CSR_Matrix *AT = new_csr_matrix(A->n, A->m, A->nnz); - int i, j, start; + int i, j; int *count = iwork; memset(count, 0, A->n * sizeof(int)); @@ -1069,7 +1039,7 @@ void sum_all_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, CSR_Matrix // ------------------------------------------------------------------- int *cols = iwork; memcpy(cols, A->i, A->nnz * sizeof(int)); - qsort(cols, A->nnz, sizeof(int), compare_int_asc); + sort_int_array(cols, A->nnz); int unique_nnz = 0; int prev_col = -1; diff --git a/src/utils/utils.c b/src/utils/utils.c new file mode 100644 index 0000000..2b420d6 --- /dev/null +++ b/src/utils/utils.c @@ -0,0 +1,15 @@ +#include "utils/utils.h" +#include + +/* Helper function to compare integers for qsort */ +static int compare_int_asc(const void *a, const void *b) +{ + int ia = *((const int *) a); + int ib = *((const int *) b); + return (ia > ib) - (ia < ib); +} + +void sort_int_array(int *array, int size) +{ + qsort(array, size, sizeof(int), compare_int_asc); +}