diff --git a/CMakeLists.txt b/CMakeLists.txt
index f3c5531..d604fe0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,6 +44,11 @@ target_compile_options(dnlp_diff PRIVATE
   $<$<CONFIG:MinSizeRel>:-Os -DNDEBUG>
 )
 
+# This is needed for clock_gettime on Linux without compiler extensions
+if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+    add_compile_definitions(_POSIX_C_SOURCE=200809L)
+endif()
+
 # Enable position-independent code for shared library compatibility
 set_property(TARGET dnlp_diff PROPERTY POSITION_INDEPENDENT_CODE ON)
 
diff --git a/include/problem.h b/include/problem.h
index 8f685b5..63bd408 100644
--- a/include/problem.h
+++ b/include/problem.h
@@ -3,6 +3,17 @@
 
 #include "expr.h"
 #include "utils/CSR_Matrix.h"
+#include "utils/Timer.h"
+
+typedef struct
+{
+    double time_init_derivatives;
+    double time_eval_jacobian;
+    double time_eval_gradient;
+    double time_eval_hessian;
+    double time_forward_obj;
+    double time_forward_constraints;
+} stats;
 
 typedef struct problem
 {
@@ -21,6 +32,9 @@ typedef struct problem
     CSR_Matrix *lagrange_hessian;
     int *hess_idx_map; /* Maps all wsum_hess nnz to lagrange_hessian (obj +
                           constraints) */
+
+    /* Statistics for performance measurement */
+    stats stats;
 } problem;
 
 /* Retains objective and constraints (shared ownership with caller) */
diff --git a/include/utils/Timer.h b/include/utils/Timer.h
new file mode 100644
index 0000000..d7638df
--- /dev/null
+++ b/include/utils/Timer.h
@@ -0,0 +1,25 @@
+#ifndef TIMER_H
+#define TIMER_H
+
+#include "time.h"
+
+typedef struct
+{
+    struct timespec start, end;
+} Timer;
+
+// Macro to compute elapsed time in seconds
+#define GET_ELAPSED_SECONDS(timer)                                                  \
+    (((timer).end.tv_sec - (timer).start.tv_sec) +                                  \
+     ((double) ((timer).end.tv_nsec - (timer).start.tv_nsec) * 1e-9))
+
+#define RUN_AND_TIME(func, timer, time_variable, result_var, ...)                   \
+    do                                                                              \
+    {                                                                               \
+        clock_gettime(CLOCK_MONOTONIC, &timer.start);                               \
+        (result_var) = func(__VA_ARGS__);                                           \
+        clock_gettime(CLOCK_MONOTONIC, &timer.end);                                 \
+        (time_variable) += GET_ELAPSED_SECONDS(timer);                              \
+    } while (0)
+
+#endif // TIMER_H
\ No newline at end of file
diff --git a/include/utils/utils.h b/include/utils/utils.h
new file mode 100644
index 0000000..f88cff6
--- /dev/null
+++ b/include/utils/utils.h
@@ -0,0 +1,9 @@
+#ifndef UTILS_H
+#define UTILS_H
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+/* Sort an array of integers in ascending order */
+void sort_int_array(int *array, int size);
+
+#endif // UTILS_H
diff --git a/src/affine/sum.c b/src/affine/sum.c
index ba3193b..c124796 100644
--- a/src/affine/sum.c
+++ b/src/affine/sum.c
@@ -1,12 +1,11 @@
 #include "affine.h"
 #include "utils/int_double_pair.h"
 #include "utils/mini_numpy.h"
+#include "utils/utils.h"
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-
 static void forward(expr *node, const double *u)
 {
     int i, j, end;
diff --git a/src/bivariate/left_matmul.c b/src/bivariate/left_matmul.c
index 517f48c..ea7fe5e 100644
--- a/src/bivariate/left_matmul.c
+++ b/src/bivariate/left_matmul.c
@@ -26,8 +26,7 @@
 
 */
 
-// todo: put this in common somewhere
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#include "utils/utils.h"
 
 static void forward(expr *node, const double *u)
 {
diff --git a/src/bivariate/multiply.c b/src/bivariate/multiply.c
index 44c5bf5..f30e916 100644
--- a/src/bivariate/multiply.c
+++ b/src/bivariate/multiply.c
@@ -29,7 +29,6 @@ static void forward(expr *node, const double *u)
 
 static void jacobian_init(expr *node)
 {
-    printf("jacobian_init elementwise_mult\n \n \n");
     node->left->jacobian_init(node->left);
     node->right->jacobian_init(node->right);
     node->dwork = (double *) malloc(2 * node->size * sizeof(double));
@@ -143,6 +142,9 @@ static void wsum_hess_init(expr *node)
          * 2 * nnz(C) */
         assert(C->m == node->n_vars && C->n == node->n_vars);
         node->wsum_hess = new_csr_matrix(C->m, C->n, 2 * C->nnz);
+
+        /* fill sparsity pattern Hessian = C + C^T */
+        sum_csr_matrices_fill_sparsity(C, CT, node->wsum_hess);
     }
 }
 
@@ -171,11 +173,8 @@ static void eval_wsum_hess(expr *node, const double *w)
         /* Compute CT = C^T = A^T diag(w) B */
         AT_fill_values(C, CT, node->iwork);
 
-        /* TODO: should fill sparsity before. Maybe we can map values from C to
-         * hessian directly instead?*/
-
         /* Hessian = C + CT = B^T diag(w) A + A^T diag(w) B */
-        sum_csr_matrices(C, CT, node->wsum_hess);
+        sum_csr_matrices_fill_values(C, CT, node->wsum_hess);
     }
 }
 
diff --git a/src/other/quad_form.c b/src/other/quad_form.c
index 04badb8..1fb68a3 100644
--- a/src/other/quad_form.c
+++ b/src/other/quad_form.c
@@ -27,7 +27,6 @@ static void forward(expr *node, const double *u)
 
 static void jacobian_init(expr *node)
 {
-    CSR_Matrix *Q = ((quad_form_expr *) node)->Q;
     assert(node->left->var_id != NOT_A_VARIABLE);
     assert(node->left->d2 == 1);
     expr *x = node->left;
diff --git a/src/problem.c b/src/problem.c
index 90763a2..0372494 100644
--- a/src/problem.c
+++ b/src/problem.c
@@ -1,32 +1,25 @@
 #include "problem.h"
+#include "utils/utils.h"
+#include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
-// TODO: move this into common file
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-
 /* forward declaration */
 static void problem_lagrange_hess_fill_sparsity(problem *prob, int *iwork);
 
-/* Helper function to compare integers for qsort */
-static int compare_int_asc(const void *a, const void *b)
-{
-    int ia = *((const int *) a);
-    int ib = *((const int *) b);
-    return (ia > ib) - (ia < ib);
-}
-
 problem *new_problem(expr *objective, expr **constraints, int n_constraints)
 {
     problem *prob = (problem *) calloc(1, sizeof(problem));
     if (!prob) return NULL;
 
-    /* Retain objective (shared ownership with caller) */
+    /* objective */
     prob->objective = objective;
     expr_retain(objective);
+    prob->n_vars = objective->n_vars;
 
-    /* Copy and retain constraints array */
+    /* constraints array */
+    prob->total_constraint_size = 0;
     prob->n_constraints = n_constraints;
     if (n_constraints > 0)
     {
@@ -34,43 +27,31 @@ problem *new_problem(expr *objective, expr **constraints, int n_constraints)
         for (int i = 0; i < n_constraints; i++)
         {
             prob->constraints[i] = constraints[i];
+            prob->total_constraint_size += constraints[i]->size;
             expr_retain(constraints[i]);
         }
     }
-    else
-    {
-        prob->constraints = NULL;
-    }
-
-    /* Compute total constraint size */
-    prob->total_constraint_size = 0;
-    for (int i = 0; i < n_constraints; i++)
-    {
-        prob->total_constraint_size += constraints[i]->size;
-    }
-
-    prob->n_vars = objective->n_vars;
 
-    /* Allocate value arrays */
-    if (prob->total_constraint_size > 0)
-    {
-        prob->constraint_values =
-            (double *) calloc(prob->total_constraint_size, sizeof(double));
-    }
-    else
-    {
-        prob->constraint_values = NULL;
-    }
+    /* allocation */
+    prob->constraint_values =
+        (double *) calloc(prob->total_constraint_size, sizeof(double));
     prob->gradient_values = (double *) calloc(prob->n_vars, sizeof(double));
 
-    /* Derivative structures allocated by problem_init_derivatives */
-    prob->jacobian = NULL;
+    /* Initialize statistics */
+    prob->stats.time_init_derivatives = 0.0;
+    prob->stats.time_eval_jacobian = 0.0;
+    prob->stats.time_eval_hessian = 0.0;
+    prob->stats.time_forward_obj = 0.0;
+    prob->stats.time_forward_constraints = 0.0;
 
     return prob;
 }
 
 void problem_init_derivatives(problem *prob)
 {
+    Timer timer;
+    clock_gettime(CLOCK_MONOTONIC, &timer.start);
+
     // -------------------------------------------------------------------------------
     //                           Jacobian structure
     // -------------------------------------------------------------------------------
@@ -85,6 +66,26 @@ void problem_init_derivatives(problem *prob)
 
     prob->jacobian = new_csr_matrix(prob->total_constraint_size, prob->n_vars, nnz);
 
+    /* set sparsity pattern of jacobian */
+    CSR_Matrix *H = prob->jacobian;
+    H->p[0] = 0;
+    int row_offset = 0;
+    int nnz_offset = 0;
+    for (int i = 0; i < prob->n_constraints; i++)
+    {
+        expr *c = prob->constraints[i];
+
+        for (int r = 1; r <= c->jacobian->m; r++)
+        {
+            H->p[row_offset + r] = nnz_offset + c->jacobian->p[r];
+        }
+
+        memcpy(H->i + nnz_offset, c->jacobian->i, c->jacobian->nnz * sizeof(int));
+        row_offset += c->jacobian->m;
+        nnz_offset += c->jacobian->nnz;
+    }
+    assert(nnz_offset == nnz);
+
     // -------------------------------------------------------------------------------
     //                        Lagrange Hessian structure
     // -------------------------------------------------------------------------------
@@ -102,6 +103,9 @@ void problem_init_derivatives(problem *prob)
     int *iwork = (int *) malloc(MAX(nnz, prob->n_vars) * sizeof(int));
     problem_lagrange_hess_fill_sparsity(prob, iwork);
     free(iwork);
+
+    clock_gettime(CLOCK_MONOTONIC, &timer.end);
+    prob->stats.time_init_derivatives += GET_ELAPSED_SECONDS(timer);
 }
 
 static void problem_lagrange_hess_fill_sparsity(problem *prob, int *iwork)
@@ -134,7 +138,7 @@ static void problem_lagrange_hess_fill_sparsity(problem *prob, int *iwork)
         }
 
         /* find unique columns */
-        qsort(cols, count, sizeof(int), compare_int_asc);
+        sort_int_array(cols, count);
         int prev_col = -1;
         for (int j = 0; j < count; j++)
         {
@@ -214,13 +218,24 @@ void free_problem(problem *prob)
 
 double problem_objective_forward(problem *prob, const double *u)
 {
+    Timer timer;
+    clock_gettime(CLOCK_MONOTONIC, &timer.start);
+
     /* Evaluate objective only */
     prob->objective->forward(prob->objective, u);
-    return prob->objective->value[0];
+    double result = prob->objective->value[0];
+
+    clock_gettime(CLOCK_MONOTONIC, &timer.end);
+    prob->stats.time_forward_obj += GET_ELAPSED_SECONDS(timer);
+
+    return result;
 }
 
 void problem_constraint_forward(problem *prob, const double *u)
 {
+    Timer timer;
+    clock_gettime(CLOCK_MONOTONIC, &timer.start);
+
     /* Evaluate constraints only and copy values */
     int offset = 0;
     for (int i = 0; i < prob->n_constraints; i++)
@@ -230,78 +245,67 @@ void problem_constraint_forward(problem *prob, const double *u)
         memcpy(prob->constraint_values + offset, c->value, c->size * sizeof(double));
         offset += c->size;
     }
+
+    clock_gettime(CLOCK_MONOTONIC, &timer.end);
+    prob->stats.time_forward_constraints += GET_ELAPSED_SECONDS(timer);
 }
 
 void problem_gradient(problem *prob)
 {
-    /* Jacobian on objective */
+    Timer timer;
+    clock_gettime(CLOCK_MONOTONIC, &timer.start);
+
+    /* evaluate jacobian of objective */
     prob->objective->eval_jacobian(prob->objective);
 
-    /* Zero gradient array */
+    /* copy sparse jacobian to dense gradient */
     memset(prob->gradient_values, 0, prob->n_vars * sizeof(double));
-
-    /* Copy sparse jacobian row to dense gradient
-     * Objective jacobian is 1 x n_vars */
     CSR_Matrix *jac = prob->objective->jacobian;
     for (int k = jac->p[0]; k < jac->p[1]; k++)
     {
-        int col = jac->i[k];
-        prob->gradient_values[col] = jac->x[k];
+        prob->gradient_values[jac->i[k]] = jac->x[k];
     }
+
+    clock_gettime(CLOCK_MONOTONIC, &timer.end);
+    prob->stats.time_eval_gradient += GET_ELAPSED_SECONDS(timer);
 }
 
 void problem_jacobian(problem *prob)
 {
-    CSR_Matrix *stacked = prob->jacobian;
-
-    /* Initialize row pointers */
-    stacked->p[0] = 0;
+    Timer timer;
+    clock_gettime(CLOCK_MONOTONIC, &timer.start);
 
-    int row_offset = 0;
+    CSR_Matrix *J = prob->jacobian;
     int nnz_offset = 0;
 
-    /* TODO: here we eventually want to evaluate affine jacobians only once.
-    And we only need to copy the values. The sparsity pattern should have been set
-    before. */
     for (int i = 0; i < prob->n_constraints; i++)
     {
         expr *c = prob->constraints[i];
-
-        /* Evaluate jacobian */
         c->eval_jacobian(c);
-
-        CSR_Matrix *cjac = c->jacobian;
-
-        /* Copy row pointers with offset */
-        for (int r = 1; r <= cjac->m; r++)
-        {
-            stacked->p[row_offset + r] = nnz_offset + cjac->p[r];
-        }
-
-        /* Copy column indices and values */
-        int constraint_nnz = cjac->p[cjac->m];
-        memcpy(stacked->i + nnz_offset, cjac->i, constraint_nnz * sizeof(int));
-        memcpy(stacked->x + nnz_offset, cjac->x, constraint_nnz * sizeof(double));
-
-        row_offset += cjac->m;
-        nnz_offset += constraint_nnz;
+        memcpy(J->x + nnz_offset, c->jacobian->x, c->jacobian->nnz * sizeof(double));
+        nnz_offset += c->jacobian->nnz;
     }
 
-    /* Update actual nnz (may be less than allocated) */
-    stacked->nnz = nnz_offset;
+    /* update actual nnz (may be less than allocated) */
+    J->nnz = nnz_offset;
+
+    clock_gettime(CLOCK_MONOTONIC, &timer.end);
+    prob->stats.time_eval_jacobian += GET_ELAPSED_SECONDS(timer);
 }
 
 void problem_hessian(problem *prob, double obj_w, const double *w)
 {
+    Timer timer;
+    clock_gettime(CLOCK_MONOTONIC, &timer.start);
+
     // ------------------------------------------------------------------------
     //             evaluate hessian of objective and constraints
     // ------------------------------------------------------------------------
     expr *obj = prob->objective;
-    expr **constrs = prob->constraints;
-
     obj->eval_wsum_hess(obj, &obj_w);
 
     int offset = 0;
+    expr **constrs = prob->constraints;
     for (int i = 0; i < prob->n_constraints; i++)
     {
         constrs[i]->eval_wsum_hess(constrs[i], w + offset);
@@ -327,4 +331,7 @@ void problem_hessian(problem *prob, double obj_w, const double *w)
         idx_map_accumulator(constrs[i]->wsum_hess, idx_map + offset, H->x);
         offset += constrs[i]->wsum_hess->nnz;
     }
+
+    clock_gettime(CLOCK_MONOTONIC, &timer.end);
+    prob->stats.time_eval_hessian += GET_ELAPSED_SECONDS(timer);
 }
diff --git a/src/utils/CSC_Matrix.c b/src/utils/CSC_Matrix.c
index d4389ab..40b6844 100644
--- a/src/utils/CSC_Matrix.c
+++ b/src/utils/CSC_Matrix.c
@@ -153,24 +153,24 @@ static inline double sparse_dot(const double *a_x, const int *a_i, int a_nnz,
 
 void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C)
 {
-    int i, j, ii, jj;
-    for (i = 0; i < C->m; i++)
+    int j, ii, jj;
+    for (ii = 0; ii < C->m; ii++)
     {
-        for (jj = C->p[i]; jj < C->p[i + 1]; jj++)
+        for (jj = C->p[ii]; jj < C->p[ii + 1]; jj++)
         {
             j = C->i[jj];
 
-            if (j < i)
+            if (j < ii)
             {
-                C->x[jj] = csr_get_value(C, j, i);
+                C->x[jj] = csr_get_value(C, j, ii);
             }
             else
             {
-                int nnz_ai = A->p[i + 1] - A->p[i];
+                int nnz_ai = A->p[ii + 1] - A->p[ii];
                 int nnz_aj = A->p[j + 1] - A->p[j];
 
                 /* compute Cij = weighted inner product of column i and column j */
-                double sum = sparse_wdot(A->x + A->p[i], A->i + A->p[i], nnz_ai,
+                double sum = sparse_wdot(A->x + A->p[ii], A->i + A->p[ii], nnz_ai,
                                          A->x + A->p[j], A->i + A->p[j], nnz_aj, d);
 
                 C->x[jj] = sum;
@@ -183,7 +183,7 @@ CSC_Matrix *csr_to_csc(const CSR_Matrix *A)
 {
     CSC_Matrix *C = new_csc_matrix(A->m, A->n, A->nnz);
 
-    int i, j, start;
+    int i, j;
     int *count = malloc(A->n * sizeof(int));
 
     memset(count, 0, A->n * sizeof(int));
@@ -230,7 +230,7 @@ CSC_Matrix *csr_to_csc_fill_sparsity(const CSR_Matrix *A, int *iwork)
 {
     CSC_Matrix *C = new_csc_matrix(A->m, A->n, A->nnz);
 
-    int i, j, start;
+    int i, j;
     int *count = iwork;
     memset(count, 0, A->n * sizeof(int));
 
@@ -294,7 +294,6 @@ CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B)
     /* A is m x n, B is m x p, C = B^T A is p x n */
     int n = A->n;
     int p = B->n;
-    int m = A->m;
     int nnz = 0;
     int i, j, ii, jj;
 
diff --git a/src/utils/CSR_Matrix.c b/src/utils/CSR_Matrix.c
index 2dbae1a..4fad6e5 100644
--- a/src/utils/CSR_Matrix.c
+++ b/src/utils/CSR_Matrix.c
@@ -1,5 +1,6 @@
 #include "utils/CSR_Matrix.h"
 #include "utils/int_double_pair.h"
+#include "utils/utils.h"
 #include <assert.h>
 #include <stdbool.h>
 #include <stdio.h>
@@ -193,13 +194,6 @@ void diag_csr_mult_fill_values(const double *d, const CSR_Matrix *A, CSR_Matrix
     }
 }
 
-static int compare_int_asc(const void *a, const void *b)
-{
-    int ia = *((const int *) a);
-    int ib = *((const int *) b);
-    return (ia > ib) - (ia < ib);
-}
-
 void sum_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C)
 {
     /* A and B must be different from C */
@@ -453,14 +447,6 @@ void sum_scaled_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matri
     C->p[A->m] = C->nnz;
 }
 
-/* Helper function for qsort to compare column indices */
-static int compare_cols(const void *a, const void *b)
-{
-    int col_a = *((int *) a);
-    int col_b = *((int *) b);
-    return col_a - col_b;
-}
-
 void sum_all_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, int_double_pair *pairs)
 {
     assert(C->m == 1);
@@ -586,7 +572,7 @@ void sum_block_of_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A,
         }
 
         /* Sort columns and write unique pattern into C->i */
-        qsort(cols, count, sizeof(int), compare_int_asc);
+        sort_int_array(cols, count);
 
         int unique_nnz = 0;
         int prev_col = -1;
@@ -718,7 +704,7 @@ void sum_evenly_spaced_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A,
         }
 
         /* Sort columns and write unique pattern into C->i */
-        qsort(cols, count, sizeof(int), compare_int_asc);
+        sort_int_array(cols, count);
 
         int unique_nnz = 0;
         int prev_col = -1;
@@ -758,8 +744,7 @@ void sum_evenly_spaced_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A,
 void idx_map_accumulator(const CSR_Matrix *A, const int *idx_map,
                          double *accumulator)
 {
-    // memset(accumulator, 0, A->nnz * sizeof(double));
-
+    /* don't forget to initialze accumulator to 0 before calling this */
     for (int row = 0; row < A->m; row++)
     {
         for (int j = A->p[row]; j < A->p[row + 1]; j++)
@@ -769,21 +754,6 @@ void idx_map_accumulator(const CSR_Matrix *A, const int *idx_map,
     }
 }
 
-/*
-void sum_evenly_spaced_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C,
-                                            const int *idx_map)
-{
-    memset(C->x, 0, C->nnz * sizeof(double));
-
-    for (int row = 0; row < A->m; row++)
-    {
-        for (int j = A->p[row]; j < A->p[row + 1]; j++)
-        {
-            C->x[idx_map[j]] += A->x[j];
-        }
-    }
-}
-*/
 void sum_spaced_rows_into_row_csr(const CSR_Matrix *A, CSR_Matrix *C,
                                   struct int_double_pair *pairs, int offset,
                                   int spacing)
@@ -862,7 +832,7 @@ CSR_Matrix *transpose(const CSR_Matrix *A, int *iwork)
 {
     CSR_Matrix *AT = new_csr_matrix(A->n, A->m, A->nnz);
 
-    int i, j, start;
+    int i, j;
     int *count = iwork;
     memset(count, 0, A->n * sizeof(int));
 
@@ -1069,7 +1039,7 @@ void sum_all_rows_csr_fill_sparsity_and_idx_map(const CSR_Matrix *A, CSR_Matrix
     // -------------------------------------------------------------------
     int *cols = iwork;
     memcpy(cols, A->i, A->nnz * sizeof(int));
-    qsort(cols, A->nnz, sizeof(int), compare_int_asc);
+    sort_int_array(cols, A->nnz);
 
     int unique_nnz = 0;
     int prev_col = -1;
diff --git a/src/utils/utils.c b/src/utils/utils.c
new file mode 100644
index 0000000..2b420d6
--- /dev/null
+++ b/src/utils/utils.c
@@ -0,0 +1,15 @@
+#include "utils/utils.h"
+#include <stdlib.h>
+
+/* Helper function to compare integers for qsort */
+static int compare_int_asc(const void *a, const void *b)
+{
+    int ia = *((const int *) a);
+    int ib = *((const int *) b);
+    return (ia > ib) - (ia < ib);
+}
+
+void sort_int_array(int *array, int size)
+{
+    qsort(array, size, sizeof(int), compare_int_asc);
+}