diff --git a/include/affine.h b/include/affine.h index c1dd13b..f76376a 100644 --- a/include/affine.h +++ b/include/affine.h @@ -19,5 +19,6 @@ expr *new_constant(int d1, int d2, int n_vars, const double *values); expr *new_variable(int d1, int d2, int var_id, int n_vars); expr *new_index(expr *child, const int *indices, int n_idxs); +expr *new_reshape(expr *child, int d1, int d2); #endif /* AFFINE_H */ diff --git a/pyproject.toml b/pyproject.toml index 6e89dda..fc4c8ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ wheel.packages = ["src/dnlp_diff_engine"] build-dir = "build/{wheel_tag}" [tool.scikit-build.cmake.define] -CMAKE_BUILD_TYPE = "Release" +CMAKE_BUILD_TYPE = "Debug" [tool.pytest.ini_options] testpaths = ["python/tests"] diff --git a/python/atoms/getters.h b/python/atoms/getters.h new file mode 100644 index 0000000..538680a --- /dev/null +++ b/python/atoms/getters.h @@ -0,0 +1,43 @@ +#ifndef ATOM_GETTERS_H +#define ATOM_GETTERS_H + +#include "common.h" + +static PyObject *py_get_expr_dimensions(PyObject *self, PyObject *args) +{ + PyObject *expr_capsule; + + if (!PyArg_ParseTuple(args, "O", &expr_capsule)) + { + return NULL; + } + + expr *node = (expr *) PyCapsule_GetPointer(expr_capsule, EXPR_CAPSULE_NAME); + if (!node) + { + return NULL; + } + + // Return tuple (d1, d2) + return Py_BuildValue("(ii)", node->d1, node->d2); +} + +static PyObject *py_get_expr_size(PyObject *self, PyObject *args) +{ + PyObject *expr_capsule; + + if (!PyArg_ParseTuple(args, "O", &expr_capsule)) + { + return NULL; + } + + expr *node = (expr *) PyCapsule_GetPointer(expr_capsule, EXPR_CAPSULE_NAME); + if (!node) + { + return NULL; + } + + return Py_BuildValue("i", node->size); +} + +#endif /* ATOM_GETTERS_H */ diff --git a/python/atoms/reshape.h b/python/atoms/reshape.h new file mode 100644 index 0000000..8547a79 --- /dev/null +++ b/python/atoms/reshape.h @@ -0,0 +1,33 @@ +#ifndef ATOM_RESHAPE_H +#define ATOM_RESHAPE_H + +#include "common.h" + +static PyObject *py_make_reshape(PyObject *self, PyObject *args) +{ + PyObject *child_capsule; + int d1, d2; + + if (!PyArg_ParseTuple(args, "Oii", &child_capsule, &d1, &d2)) + { + return NULL; + } + + expr *child = (expr *) PyCapsule_GetPointer(child_capsule, EXPR_CAPSULE_NAME); + if (!child) + { + return NULL; + } + + expr *node = new_reshape(child, d1, d2); + if (!node) + { + PyErr_SetString(PyExc_RuntimeError, "failed to create reshape node"); + return NULL; + } + + expr_retain(node); + return PyCapsule_New(node, EXPR_CAPSULE_NAME, expr_capsule_destructor); +} + +#endif /* ATOM_RESHAPE_H */ diff --git a/python/bindings.c b/python/bindings.c index 1042701..97c2906 100644 --- a/python/bindings.c +++ b/python/bindings.c @@ -12,6 +12,7 @@ #include "atoms/cos.h" #include "atoms/entr.h" #include "atoms/exp.h" +#include "atoms/getters.h" #include "atoms/index.h" #include "atoms/left_matmul.h" #include "atoms/linear.h" @@ -24,6 +25,7 @@ #include "atoms/quad_form.h" #include "atoms/quad_over_lin.h" #include "atoms/rel_entr.h" +#include "atoms/reshape.h" #include "atoms/right_matmul.h" #include "atoms/sin.h" #include "atoms/sinh.h" @@ -90,6 +92,11 @@ static PyMethodDef DNLPMethods[] = { "Create quad_over_lin node (sum(x^2) / y)"}, {"make_rel_entr", py_make_rel_entr, METH_VARARGS, "Create rel_entr node: x * log(x/y) elementwise"}, + {"get_expr_dimensions", py_get_expr_dimensions, METH_VARARGS, + "Get the dimensions (d1, d2) of an expression"}, + {"get_expr_size", py_get_expr_size, METH_VARARGS, + "Get the total size of an expression"}, + {"make_reshape", py_make_reshape, METH_VARARGS, "Create reshape atom"}, {"make_problem", py_make_problem, METH_VARARGS, "Create problem from objective and constraints"}, {"problem_init_derivatives", py_problem_init_derivatives, METH_VARARGS, diff --git a/src/affine/constant.c b/src/affine/constant.c index 145a075..ff556cb 100644 --- a/src/affine/constant.c +++ b/src/affine/constant.c @@ -43,7 +43,7 @@ static bool is_affine(const expr *node) expr *new_constant(int d1, int d2, int n_vars, const double *values) { expr *node = new_expr(d1, d2, n_vars); - memcpy(node->value, values, d1 * d2 * sizeof(double)); + memcpy(node->value, values, node->size * sizeof(double)); node->forward = forward; node->is_affine = is_affine; node->jacobian_init = jacobian_init; diff --git a/src/affine/index.c b/src/affine/index.c index c407daa..81def9b 100644 --- a/src/affine/index.c +++ b/src/affine/index.c @@ -93,21 +93,21 @@ static void wsum_hess_init(expr *node) many numerical zeros in child->wsum_hess that are actually structural zeros, but we do not try to exploit that sparsity right now. */ - CSR_Matrix *H_child = x->wsum_hess; - node->wsum_hess = new_csr_matrix(H_child->m, H_child->n, H_child->nnz); - memcpy(node->wsum_hess->p, H_child->p, (H_child->m + 1) * sizeof(int)); - memcpy(node->wsum_hess->i, H_child->i, H_child->nnz * sizeof(int)); + CSR_Matrix *Hx = x->wsum_hess; + node->wsum_hess = new_csr_matrix(Hx->m, Hx->n, Hx->nnz); + memcpy(node->wsum_hess->p, Hx->p, (Hx->m + 1) * sizeof(int)); + memcpy(node->wsum_hess->i, Hx->i, Hx->nnz * sizeof(int)); } static void eval_wsum_hess(expr *node, const double *w) { - expr *child = node->left; + expr *x = node->left; index_expr *idx = (index_expr *) node; if (idx->has_duplicates) { /* zero and accumulate for repeated indices */ - memset(node->dwork, 0, child->size * sizeof(double)); + memset(node->dwork, 0, x->size * sizeof(double)); for (int i = 0; i < idx->n_idxs; i++) { node->dwork[idx->indices[i]] += w[i]; @@ -122,12 +122,9 @@ static void eval_wsum_hess(expr *node, const double *w) } } - /* delegate to child */ - child->eval_wsum_hess(child, node->dwork); - - /* copy values from child */ - memcpy(node->wsum_hess->x, child->wsum_hess->x, - child->wsum_hess->nnz * sizeof(double)); + /* evalute hessian of child */ + x->eval_wsum_hess(x, node->dwork); + memcpy(node->wsum_hess->x, x->wsum_hess->x, x->wsum_hess->nnz * sizeof(double)); } static bool is_affine(const expr *node) @@ -168,6 +165,5 @@ expr *new_index(expr *child, const int *indices, int n_idxs) /* detect duplicates for Hessian optimization */ idx->has_duplicates = check_for_duplicates(indices, n_idxs, child->size); - return node; } diff --git a/src/affine/promote.c b/src/affine/promote.c index fca5c80..8f499d9 100644 --- a/src/affine/promote.c +++ b/src/affine/promote.c @@ -1,4 +1,5 @@ #include "affine.h" +#include #include #include @@ -10,35 +11,32 @@ static void forward(expr *node, const double *u) node->left->forward(node->left, u); /* broadcast scalar value to all output elements */ - double val = node->left->value[0]; for (int i = 0; i < node->size; i++) { - node->value[i] = val; + node->value[i] = node->left->value[0]; } } static void jacobian_init(expr *node) { - node->left->jacobian_init(node->left); - - /* Each output row copies the single row from child's jacobian */ - CSR_Matrix *child_jac = node->left->jacobian; - int nnz = node->size * child_jac->nnz; + expr *x = node->left; + x->jacobian_init(x); + /* each output row copies the single row from child's jacobian */ + int nnz = node->size * x->jacobian->nnz; node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz); - CSR_Matrix *jac = node->jacobian; - /* Build sparsity pattern by replicating child's single row */ - int child_nnz = child_jac->p[1] - child_jac->p[0]; - jac->nnz = 0; + /* fill sparsity pattern */ + CSR_Matrix *J = node->jacobian; + J->nnz = 0; for (int row = 0; row < node->size; row++) { - jac->p[row] = jac->nnz; - memcpy(jac->i + jac->nnz, child_jac->i + child_jac->p[0], - child_nnz * sizeof(int)); - jac->nnz += child_nnz; + J->p[row] = J->nnz; + memcpy(J->i + J->nnz, x->jacobian->i, x->jacobian->nnz * sizeof(int)); + J->nnz += x->jacobian->nnz; } - jac->p[node->size] = jac->nnz; + assert(J->nnz == nnz); + J->p[node->size] = J->nnz; } static void eval_jacobian(expr *node) @@ -95,6 +93,7 @@ static bool is_affine(const expr *node) expr *new_promote(expr *child, int d1, int d2) { + assert(child->size == 1); expr *node = new_expr(d1, d2, child->n_vars); node->forward = forward; node->jacobian_init = jacobian_init; diff --git a/src/affine/reshape.c b/src/affine/reshape.c new file mode 100644 index 0000000..d5aeeba --- /dev/null +++ b/src/affine/reshape.c @@ -0,0 +1,70 @@ +#include "affine.h" +#include +#include +#include + +/* Reshape changes the shape of an expression without permuting data. + * Only Fortran (column-major) order is supported, where reshape is a no-op + * for the underlying data layout. */ + +static void forward(expr *node, const double *u) +{ + node->left->forward(node->left, u); + memcpy(node->value, node->left->value, node->size * sizeof(double)); +} + +static void jacobian_init(expr *node) +{ + expr *x = node->left; + x->jacobian_init(x); + node->jacobian = new_csr_matrix(node->size, node->n_vars, x->jacobian->nnz); + CSR_Matrix *jac = node->jacobian; + memcpy(jac->p, x->jacobian->p, (x->size + 1) * sizeof(int)); + memcpy(jac->i, x->jacobian->i, x->jacobian->nnz * sizeof(int)); +} + +static void eval_jacobian(expr *node) +{ + expr *x = node->left; + x->eval_jacobian(x); + memcpy(node->jacobian->x, x->jacobian->x, x->jacobian->nnz * sizeof(double)); +} + +static void wsum_hess_init(expr *node) +{ + node->left->wsum_hess_init(node->left); + CSR_Matrix *child_hess = node->left->wsum_hess; + node->wsum_hess = new_csr_matrix(child_hess->m, child_hess->n, child_hess->nnz); + memcpy(node->wsum_hess->p, child_hess->p, (child_hess->m + 1) * sizeof(int)); + memcpy(node->wsum_hess->i, child_hess->i, child_hess->nnz * sizeof(int)); + node->wsum_hess->nnz = child_hess->nnz; +} + +static void eval_wsum_hess(expr *node, const double *w) +{ + node->left->eval_wsum_hess(node->left, w); + CSR_Matrix *child_hess = node->left->wsum_hess; + CSR_Matrix *hess = node->wsum_hess; + memcpy(hess->x, child_hess->x, child_hess->nnz * sizeof(double)); +} + +static bool is_affine(const expr *node) +{ + return node->left->is_affine(node->left); +} + +expr *new_reshape(expr *child, int d1, int d2) +{ + assert(d1 * d2 == child->size); + expr *node = new_expr(d1, d2, child->n_vars); + node->left = child; + expr_retain(child); + node->forward = forward; + node->is_affine = is_affine; + node->jacobian_init = jacobian_init; + node->eval_jacobian = eval_jacobian; + node->wsum_hess_init = wsum_hess_init; + node->eval_wsum_hess = eval_wsum_hess; + + return node; +} diff --git a/src/bivariate/left_matmul.c b/src/bivariate/left_matmul.c index ea7fe5e..dba2097 100644 --- a/src/bivariate/left_matmul.c +++ b/src/bivariate/left_matmul.c @@ -1,5 +1,6 @@ #include "bivariate.h" #include "subexpr.h" +#include #include /* This file implement the atom 'left_matmul' corresponding to the operation y = @@ -113,6 +114,7 @@ static void eval_wsum_hess(expr *node, const double *w) expr *new_left_matmul(expr *u, const CSR_Matrix *A) { + assert(u->d1 == A->n); /* Allocate the type-specific struct */ left_matmul_expr *lin_node = (left_matmul_expr *) calloc(1, sizeof(left_matmul_expr)); diff --git a/src/dnlp_diff_engine/__init__.py b/src/dnlp_diff_engine/__init__.py index a8cda90..c880684 100644 --- a/src/dnlp_diff_engine/__init__.py +++ b/src/dnlp_diff_engine/__init__.py @@ -29,8 +29,10 @@ def _convert_matmul(expr, children): # One of them should be a Constant, the other a variable expression left_arg, right_arg = expr.args - if isinstance(left_arg, cp.Constant): + if left_arg.is_constant(): # A @ f(x) -> left_matmul + # TODO: why is this always dense? What's going on here? + # we later convert it to csr.... A = np.asarray(left_arg.value, dtype=np.float64) if A.ndim == 1: A = A.reshape(1, -1) # Convert 1D to row vector @@ -44,8 +46,11 @@ def _convert_matmul(expr, children): m, n, ) - elif isinstance(right_arg, cp.Constant): + elif right_arg.is_constant(): # f(x) @ A -> right_matmul + + # TODO: why is this always dense? What's going on here? + # we later convert it to csr.... A = np.asarray(right_arg.value, dtype=np.float64) if A.ndim == 1: A = A.reshape(-1, 1) # Convert 1D to column vector @@ -69,7 +74,7 @@ def _convert_multiply(expr, children): left_arg, right_arg = expr.args # Check if left is a constant - if isinstance(left_arg, cp.Constant): + if left_arg.is_constant(): value = np.asarray(left_arg.value, dtype=np.float64) # Scalar constant @@ -83,7 +88,7 @@ def _convert_multiply(expr, children): return _diffengine.make_const_vector_mult(children[1], vector) # Check if right is a constant - elif isinstance(right_arg, cp.Constant): + elif right_arg.is_constant(): value = np.asarray(right_arg.value, dtype=np.float64) # Scalar constant @@ -109,9 +114,11 @@ def _extract_flat_indices_from_index(expr): return indices_per_dim[0].astype(np.int32) elif len(indices_per_dim) == 2: # Fortran order: idx = row + col * n_rows - return np.add.outer( - indices_per_dim[0], indices_per_dim[1] * parent_shape[0] - ).flatten(order="F").astype(np.int32) + return ( + np.add.outer(indices_per_dim[0], indices_per_dim[1] * parent_shape[0]) + .flatten(order="F") + .astype(np.int32) + ) else: raise NotImplementedError("index with >2 dimensions not supported") @@ -121,24 +128,6 @@ def _extract_flat_indices_from_special_index(expr): return np.reshape(expr._select_mat, expr._select_mat.size, order="F").astype(np.int32) -def _convert_reshape(expr, children): - """Convert reshape - only Fortran order is supported. - - For Fortran order, reshape is a no-op since the underlying data layout - is unchanged. We just pass through the child expression. - - Note: Only order='F' (Fortran/column-major) is supported. This is the - default and most common case in CVXPY. C order would require permutation. - """ - if expr.order != 'F': - raise NotImplementedError( - f"reshape with order='{expr.order}' not supported. " - "Only order='F' (Fortran) is currently supported." - ) - # Pass through - data layout is unchanged with Fortran order - return children[0] - - def _convert_rel_entr(_expr, children): """Convert rel_entr(x, y) = x * log(x/y) elementwise.""" return _diffengine.make_rel_entr(children[0], children[1]) @@ -169,6 +158,25 @@ def _convert_quad_form(expr, children): ) +def _convert_reshape(expr, children): + """Convert reshape - only Fortran order is supported. + + For Fortran order, reshape is a no-op since the underlying data layout + is unchanged. We create a reshape node that updates dimension metadata. + + Note: Only order='F' (Fortran/column-major) is supported. This is the + default and most common case in CVXPY. C order would require permutation. + """ + if expr.order != "F": + raise NotImplementedError( + f"reshape with order='{expr.order}' not supported. " + "Only order='F' (Fortran) is currently supported." + ) + + d1, d2 = expr.shape + return _diffengine.make_reshape(children[0], d1, d2) + + # Mapping from CVXPY atom names to C diff engine functions # Converters receive (expr, children) where expr is the CVXPY expression ATOM_CONVERTERS = { @@ -217,7 +225,6 @@ def _convert_quad_form(expr, children): "special_index": lambda expr, children: _diffengine.make_index( children[0], _extract_flat_indices_from_special_index(expr) ), - # Reshape (Fortran order only - pass-through since data layout unchanged) "reshape": _convert_reshape, }