diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..544eccb --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,137 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Overview + +DNLP-diff-engine is a C library with Python bindings that provides automatic differentiation for nonlinear optimization problems. It builds expression trees (ASTs) from CVXPY problems and computes first and second derivatives (gradients, Jacobians, Hessians) needed by NLP solvers like IPOPT. + +## Build Commands + +### Python Package (Recommended) + +```bash +# Install in development mode (builds C library + Python bindings) +pip install -e . + +# Run all Python tests +pytest + +# Run specific test file +pytest tests/python/test_unconstrained.py + +# Run specific test +pytest tests/python/test_unconstrained.py::test_sum_log +``` + +### Standalone C Library + +```bash +# Build core C library and tests +cmake -B build -S . +cmake --build build + +# Run C tests +./build/all_tests +``` + +### Legacy Python Build (without pip) + +```bash +# Build Python bindings manually (from project root) +cd python && cmake -B build -S . && cmake --build build && cd .. + +# Run tests from python/ directory +cd python && python tests/test_problem_native.py +``` + +## Architecture + +### Expression Tree System + +The core abstraction is the `expr` struct (in `include/expr.h`) representing a node in an expression AST. Each node stores: +- Shape information (`d1`, `d2`, `size`, `n_vars`) +- Function pointers for evaluation: `forward`, `jacobian_init`, `eval_jacobian`, `eval_wsum_hess` +- Computed values: `value`, `jacobian` (CSR), `wsum_hess` (CSR) +- Child pointers (`left`, `right`) and reference counting (`refcount`) + +### Atom Categories + +Atoms are organized by mathematical properties in `src/`: + +- **`affine/`** - Linear operations: `variable`, `constant`, `add`, `neg`, `sum`, `promote`, `hstack`, `trace`, `linear_op` +- **`elementwise_univariate/`** - Scalar functions applied elementwise: `log`, `exp`, `entr`, `power`, `logistic`, trigonometric, hyperbolic +- **`bivariate/`** - Two-argument operations: `multiply`, `quad_over_lin`, `rel_entr` +- **`other/`** - Special atoms not fitting above categories + +Each atom implements its own `forward`, `jacobian_init`, `eval_jacobian`, and `eval_wsum_hess` functions following a consistent pattern. + +### Problem Struct + +The `problem` struct (in `include/problem.h`) encapsulates an optimization problem: +- Single `objective` expression (scalar) +- Array of `constraints` expressions +- Pre-allocated storage for `constraint_values`, `gradient_values`, `jacobian`, `lagrange_hessian` + +Key oracle methods: +- `problem_objective_forward(prob, u)` - Evaluate objective at point u +- `problem_constraint_forward(prob, u)` - Evaluate all constraints at u +- `problem_gradient(prob)` - Compute objective gradient (after forward) +- `problem_jacobian(prob)` - Compute stacked constraint Jacobian (after forward) +- `problem_hessian(prob, obj_w, lambda)` - Compute Lagrangian Hessian + +### Python Bindings + +The Python package `dnlp_diff_engine` (in `src/dnlp_diff_engine/`) provides: + +**High-level API** (`__init__.py`): +- `C_problem` class wraps the C problem struct +- `convert_problem()` builds expression trees from CVXPY Problem objects +- Atoms are mapped via `ATOM_CONVERTERS` dictionary + +**Low-level C extension** (`_core` module, built from `python/bindings.c`): +- Atom constructors: `make_variable`, `make_constant`, `make_log`, `make_exp`, `make_add`, etc. +- Problem interface: `make_problem`, `problem_init_derivatives`, `problem_objective_forward`, `problem_gradient`, `problem_jacobian`, `problem_hessian` + +### Derivative Computation Flow + +1. Call `problem_init_derivatives()` to allocate Jacobian/Hessian storage and compute sparsity patterns +2. Call forward pass (`objective_forward` / `constraint_forward`) to propagate values through tree +3. Call derivative functions (`gradient`, `jacobian`, `hessian`) which traverse tree computing derivatives + +Jacobian uses chain rule: each node computes local Jacobian, combined via sparse matrix operations. +Hessian computes weighted sum: `obj_w * H_obj + sum(lambda_i * H_constraint_i)` + +### Sparse Matrix Utilities + +`include/utils/` contains CSR and CSC sparse matrix implementations used throughout for efficient derivative storage and computation. + +## Key Directories + +- `include/` - Header files defining public API +- `src/` - C implementation files +- `src/dnlp_diff_engine/` - Python package (installed via pip) +- `python/` - Python bindings C code and binding headers +- `python/atoms/` - Python binding headers for each atom type +- `python/problem/` - Python binding headers for problem interface +- `tests/` - C tests using minunit framework +- `tests/python/` - Python tests (run via pytest) +- `tests/jacobian_tests/` - Jacobian correctness tests (C) +- `tests/wsum_hess/` - Hessian correctness tests (C) + +## Adding a New Atom + +1. Create header in `include/` declaring the constructor function +2. Create implementation in appropriate `src/` subdirectory +3. Implement: `forward`, `jacobian_init`, `eval_jacobian`, `eval_wsum_hess` (optional), `free_type_data` (if needed) +4. Add Python binding header in `python/atoms/` +5. Register in `python/bindings.c` (both include and method table) +6. Add converter entry in `src/dnlp_diff_engine/__init__.py` `ATOM_CONVERTERS` dict +7. Rebuild: `pip install -e .` +8. Add tests in `tests/` (C) and `tests/python/` (Python) + +## License Header + +```c +// SPDX-License-Identifier: Apache-2.0 +``` diff --git a/CMakeLists.txt b/CMakeLists.txt index 3159cba..935b00f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,47 +1,72 @@ -cmake_minimum_required(VERSION 3.10) +cmake_minimum_required(VERSION 3.15) project(DNLP_Diff_Engine C) set(CMAKE_C_STANDARD 99) -set(CMAKE_BUILD_TYPE Debug) -# Debug-friendly flags -add_compile_options(-g -O0) +# Set build type if not specified (standalone builds) +if(NOT CMAKE_BUILD_TYPE AND NOT SKBUILD) + set(CMAKE_BUILD_TYPE Debug) +endif() -# Warning flags +# Warning flags (always enabled) add_compile_options( -Wall # Enable most warnings -Wextra # Extra warnings -Wpedantic # Strict ISO C compliance -Wshadow # Warn about variable shadowing -Wformat=2 # Extra format string checks - #-Wconversion # Warn about implicit conversions - #-Wsign-conversion # Warn about sign conversions -Wcast-qual # Warn about cast that removes qualifiers -Wcast-align # Warn about pointer cast alignment issues -Wunused # Warn about unused variables/functions -Wdouble-promotion # Warn about float->double promotion -Wnull-dereference # Warn about null pointer dereference - #-Wstrict-prototypes # Warn about missing prototypes ) +# Debug flags only for standalone debug builds +if(CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT SKBUILD) + add_compile_options(-g -O0) +endif() + # Include directories include_directories(${PROJECT_SOURCE_DIR}/include) -include_directories(${PROJECT_SOURCE_DIR}/tests) # Source files - automatically gather all .c files from src/ file(GLOB_RECURSE SOURCES "src/*.c") -# Create library +# Create core library add_library(dnlp_diff ${SOURCES}) target_link_libraries(dnlp_diff m) -# Enable testing -enable_testing() +# ============================================================================= +# Python bindings (built when using scikit-build-core) +# ============================================================================= +if(SKBUILD) + find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Module NumPy) -# Single test executable combining all tests -add_executable(all_tests - tests/all_tests.c - tests/test_helpers.c -) -target_link_libraries(all_tests dnlp_diff) -add_test(NAME AllTests COMMAND all_tests) + # Create Python extension module + Python3_add_library(_core MODULE python/bindings.c) + target_include_directories(_core PRIVATE + ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/python + ${Python3_NumPy_INCLUDE_DIRS} + ) + target_link_libraries(_core PRIVATE dnlp_diff) + + # Install to the package directory + install(TARGETS _core LIBRARY DESTINATION dnlp_diff_engine) +endif() + +# ============================================================================= +# C tests (only for standalone builds) +# ============================================================================= +if(NOT SKBUILD) + include_directories(${PROJECT_SOURCE_DIR}/tests) + enable_testing() + + add_executable(all_tests + tests/all_tests.c + tests/test_helpers.c + ) + target_link_libraries(all_tests dnlp_diff) + add_test(NAME AllTests COMMAND all_tests) +endif() diff --git a/README.md b/README.md index b32b769..b939ced 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,66 @@ -3. more tests for chain rule elementwise univariate hessian -4. in the refactor, add consts -5. multiply with one constant vector/scalar argument -6. AX where X is a matrix. Can that happen? How is that canonicalized? Maybe it can't happen. -7. Must be able to compute jacobian and hessian of A @ phi(x), so linear operator needs other code! This requires new infrastructure, I think. -8. Shortcut hessians of affine stuff? +# DNLP Diff Engine -Going through all atoms to see that sparsity pattern is computed in initialization of jacobian: -2. trace - not ok +A C library with Python bindings for automatic differentiation of nonlinear optimization problems. Builds expression trees from CVXPY problems and computes gradients, Jacobians, and Hessians needed by NLP solvers like IPOPT. -Going through all atoms to see that sparsity pattern is computed in initialization of hessian: -2. hstack - not ok -3. trace - not ok +## Installation +### Using uv (recommended) + +```bash +uv venv .venv +source .venv/bin/activate +uv pip install -e ".[test]" +``` + +### Using pip + +```bash +python -m venv .venv +source .venv/bin/activate +pip install -e ".[test]" +``` + +## Running Tests + +```bash +# Run all tests +pytest + +# Run specific test file +pytest tests/python/test_unconstrained.py + +# Run specific test +pytest tests/python/test_unconstrained.py::test_sum_log +``` + +## Usage + +```python +import cvxpy as cp +import numpy as np +from dnlp_diff_engine import C_problem + +# Define a CVXPY problem +x = cp.Variable(3) +problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x)))) + +# Convert to C problem struct +prob = C_problem(problem) +prob.init_derivatives() + +# Evaluate at a point +u = np.array([1.0, 2.0, 3.0]) +obj_val = prob.objective_forward(u) +gradient = prob.gradient() + +print(f"Objective: {obj_val}") +print(f"Gradient: {gradient}") +``` + +## Building the C Library (standalone) + +```bash +cmake -B build -S . +cmake --build build +./build/all_tests +``` diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..d473508 --- /dev/null +++ b/TODO.md @@ -0,0 +1,13 @@ +3. more tests for chain rule elementwise univariate hessian +4. in the refactor, add consts +5. multiply with one constant vector/scalar argument +6. AX where X is a matrix. Can that happen? How is that canonicalized? Maybe it can't happen. +7. Must be able to compute jacobian and hessian of A @ phi(x), so linear operator needs other code! This requires new infrastructure, I think. +8. Shortcut hessians of affine stuff? + +Going through all atoms to see that sparsity pattern is computed in initialization of jacobian: +2. trace - not ok + +Going through all atoms to see that sparsity pattern is computed in initialization of hessian: +2. hstack - not ok +3. trace - not ok diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..535ccdb --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,44 @@ +[build-system] +requires = ["scikit-build-core>=0.5", "numpy"] +build-backend = "scikit_build_core.build" + +[project] +name = "dnlp-diff-engine" +version = "0.1.0" +description = "Automatic differentiation engine for DNLP optimization problems" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "numpy", + "scipy", + "cvxpy", +] + +[project.optional-dependencies] +test = [ + "pytest>=7.0", +] +dev = [ + "pytest>=7.0", + "ruff", +] + +[tool.scikit-build] +cmake.source-dir = "." +wheel.packages = ["src/dnlp_diff_engine"] +build-dir = "build/{wheel_tag}" + +[tool.scikit-build.cmake.define] +CMAKE_BUILD_TYPE = "Release" + +[tool.pytest.ini_options] +testpaths = ["tests/python"] +python_files = ["test_*.py"] +python_functions = ["test_*"] + +[tool.ruff] +line-length = 100 +target-version = "py310" + +[tool.ruff.lint] +select = ["E", "F", "W", "I"] diff --git a/python/bindings.c b/python/bindings.c index 6e202d3..ff68c0e 100644 --- a/python/bindings.c +++ b/python/bindings.c @@ -58,10 +58,10 @@ static PyMethodDef DNLPMethods[] = { "Compute Lagrangian Hessian"}, {NULL, NULL, 0, NULL}}; -static struct PyModuleDef dnlp_module = {PyModuleDef_HEAD_INIT, "DNLP_diff_engine", +static struct PyModuleDef dnlp_module = {PyModuleDef_HEAD_INIT, "dnlp_diff_engine._core", NULL, -1, DNLPMethods}; -PyMODINIT_FUNC PyInit_DNLP_diff_engine(void) +PyMODINIT_FUNC PyInit__core(void) { if (ensure_numpy() < 0) return NULL; return PyModule_Create(&dnlp_module); diff --git a/python/tests/test_unconstrained.py b/python/tests/test_unconstrained.py deleted file mode 100644 index 99994a6..0000000 --- a/python/tests/test_unconstrained.py +++ /dev/null @@ -1,433 +0,0 @@ -import cvxpy as cp -import numpy as np -from convert import C_problem - -# Note: Current implementation supports: -# - Elementwise ops on variables: log(x), exp(x) -# - Add of elementwise results: log(x) + exp(y) -# - Sum reductions: sum(log(x)) -# - Multiple variables with separate operations -# -# NOT YET SUPPORTED (see unsupported section at bottom of file): -# - Elementwise ops on add results: log(x + y) - - -def test_sum_log(): - """Test sum(log(x)) objective and gradient.""" - x = cp.Variable(4) - problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x)))) - prob = C_problem(problem) - - u = np.array([1.0, 2.0, 3.0, 4.0]) - prob.init_derivatives() - - # Objective - obj_val = prob.objective_forward(u) - expected = np.sum(np.log(u)) - assert np.allclose(obj_val, expected) - - # Gradient: d/dx sum(log(x)) = 1/x - grad = prob.gradient() - expected_grad = 1.0 / u - assert np.allclose(grad, expected_grad) - - -def test_sum_exp(): - """Test sum(exp(x)) objective and gradient.""" - x = cp.Variable(3) - problem = cp.Problem(cp.Minimize(cp.sum(cp.exp(x)))) - prob = C_problem(problem) - - u = np.array([0.0, 1.0, 2.0]) - prob.init_derivatives() - - # Objective - obj_val = prob.objective_forward(u) - expected = np.sum(np.exp(u)) - assert np.allclose(obj_val, expected) - - # Gradient: d/dx sum(exp(x)) = exp(x) - grad = prob.gradient() - expected_grad = np.exp(u) - assert np.allclose(grad, expected_grad) - - -def test_variable_reuse(): - """Test sum(log(x) + exp(x)) - same variable used twice.""" - x = cp.Variable(2) - problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x) + cp.exp(x)))) - prob = C_problem(problem) - - u = np.array([1.0, 2.0]) - prob.init_derivatives() - - # Objective - obj_val = prob.objective_forward(u) - expected = np.sum(np.log(u) + np.exp(u)) - assert np.allclose(obj_val, expected) - - # Gradient: d/dx_i = 1/x_i + exp(x_i) - grad = prob.gradient() - expected_grad = 1.0 / u + np.exp(u) - assert np.allclose(grad, expected_grad) - - -def test_variable_used_multiple_times(): - """Test sum(log(x) + exp(x) + log(x)) - variable used 3 times.""" - x = cp.Variable(3) - problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x) + cp.exp(x) + cp.log(x)))) - prob = C_problem(problem) - - u = np.array([1.0, 2.0, 3.0]) - prob.init_derivatives() - - # Objective - obj_val = prob.objective_forward(u) - expected = np.sum(2 * np.log(u) + np.exp(u)) - assert np.allclose(obj_val, expected) - - # Gradient: d/dx_i = 2/x_i + exp(x_i) - grad = prob.gradient() - expected_grad = 2.0 / u + np.exp(u) - assert np.allclose(grad, expected_grad) - - -def test_larger_variable(): - """Test sum(log(x)) with larger variable (100 elements).""" - x = cp.Variable(100) - problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x)))) - prob = C_problem(problem) - - u = np.linspace(1.0, 10.0, 100) - prob.init_derivatives() - - # Objective - obj_val = prob.objective_forward(u) - expected = np.sum(np.log(u)) - assert np.allclose(obj_val, expected) - - # Gradient - grad = prob.gradient() - expected_grad = 1.0 / u - assert np.allclose(grad, expected_grad) - - -def test_matrix_variable(): - """Test sum(log(X)) with 2D matrix variable (3x4).""" - X = cp.Variable((3, 4)) - problem = cp.Problem(cp.Minimize(cp.sum(cp.log(X)))) - prob = C_problem(problem) - - u = np.arange(1.0, 13.0) # 12 elements - prob.init_derivatives() - - # Objective - obj_val = prob.objective_forward(u) - expected = np.sum(np.log(u)) - assert np.allclose(obj_val, expected) - - # Gradient - grad = prob.gradient() - expected_grad = 1.0 / u - assert np.allclose(grad, expected_grad) - - -def test_two_variables_separate_ops(): - """Test sum(log(x)) + sum(exp(y)) - two variables with separate ops.""" - x = cp.Variable(2) - y = cp.Variable(2) - problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x)) + cp.sum(cp.exp(y)))) - prob = C_problem(problem) - - x_vals = np.array([1.0, 2.0]) - y_vals = np.array([0.5, 1.0]) - u = np.concatenate([x_vals, y_vals]) - prob.init_derivatives() - - # Objective - obj_val = prob.objective_forward(u) - expected = np.sum(np.log(x_vals)) + np.sum(np.exp(y_vals)) - assert np.allclose(obj_val, expected) - - # Gradient - grad = prob.gradient() - expected_grad = np.concatenate([1.0 / x_vals, np.exp(y_vals)]) - assert np.allclose(grad, expected_grad) - - -def test_two_variables_same_sum(): - """Test sum(log(x) + log(y)) - two variables added before sum.""" - x = cp.Variable(2) - y = cp.Variable(2) - problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x) + cp.log(y)))) - prob = C_problem(problem) - - x_vals = np.array([1.0, 2.0]) - y_vals = np.array([3.0, 4.0]) - u = np.concatenate([x_vals, y_vals]) - prob.init_derivatives() - - # Objective - obj_val = prob.objective_forward(u) - expected = np.sum(np.log(x_vals) + np.log(y_vals)) - assert np.allclose(obj_val, expected) - - # Gradient - grad = prob.gradient() - expected_grad = np.concatenate([1.0 / x_vals, 1.0 / y_vals]) - assert np.allclose(grad, expected_grad) - - -def test_mixed_sizes(): - """Test sum(log(a)) + sum(log(b)) + sum(log(c)) with different sized variables.""" - a = cp.Variable(2) - b = cp.Variable(5) - c = cp.Variable(3) - problem = cp.Problem(cp.Minimize( - cp.sum(cp.log(a)) + cp.sum(cp.log(b)) + cp.sum(cp.log(c)) - )) - prob = C_problem(problem) - - a_vals = np.array([1.0, 2.0]) - b_vals = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - c_vals = np.array([1.0, 2.0, 3.0]) - u = np.concatenate([a_vals, b_vals, c_vals]) - prob.init_derivatives() - - # Objective - obj_val = prob.objective_forward(u) - expected = np.sum(np.log(a_vals)) + np.sum(np.log(b_vals)) + np.sum(np.log(c_vals)) - assert np.allclose(obj_val, expected) - - # Gradient - grad = prob.gradient() - expected_grad = 1.0 / u - assert np.allclose(grad, expected_grad) - - -def test_multiple_variables_log_exp(): - """Test sum(log(a)) + sum(log(b)) + sum(exp(c)) + sum(exp(d)).""" - a = cp.Variable(2) - b = cp.Variable(2) - c = cp.Variable(2) - d = cp.Variable(2) - obj = cp.sum(cp.log(a)) + cp.sum(cp.log(b)) + cp.sum(cp.exp(c)) + cp.sum(cp.exp(d)) - problem = cp.Problem(cp.Minimize(obj)) - prob = C_problem(problem) - - a_vals = np.array([1.0, 2.0]) - b_vals = np.array([0.5, 1.0]) - c_vals = np.array([0.1, 0.2]) - d_vals = np.array([0.1, 0.1]) - u = np.concatenate([a_vals, b_vals, c_vals, d_vals]) - prob.init_derivatives() - - # Objective - obj_val = prob.objective_forward(u) - expected = (np.sum(np.log(a_vals)) + np.sum(np.log(b_vals)) + - np.sum(np.exp(c_vals)) + np.sum(np.exp(d_vals))) - assert np.allclose(obj_val, expected) - - # Gradient - grad = prob.gradient() - expected_grad = np.concatenate([ - 1.0 / a_vals, - 1.0 / b_vals, - np.exp(c_vals), - np.exp(d_vals) - ]) - assert np.allclose(grad, expected_grad) - - -def test_three_variables_mixed(): - """Test sum(log(x) + exp(y) + log(z)) - three variables mixed.""" - x = cp.Variable(2) - y = cp.Variable(2) - z = cp.Variable(2) - obj = cp.sum(cp.log(x) + cp.exp(y) + cp.log(z)) - problem = cp.Problem(cp.Minimize(obj)) - prob = C_problem(problem) - - x_vals = np.array([1.0, 2.0]) - y_vals = np.array([0.5, 1.0]) - z_vals = np.array([2.0, 3.0]) - u = np.concatenate([x_vals, y_vals, z_vals]) - prob.init_derivatives() - - # Objective - obj_val = prob.objective_forward(u) - expected = np.sum(np.log(x_vals) + np.exp(y_vals) + np.log(z_vals)) - assert np.allclose(obj_val, expected) - - # Gradient - grad = prob.gradient() - expected_grad = np.concatenate([1.0 / x_vals, np.exp(y_vals), 1.0 / z_vals]) - assert np.allclose(grad, expected_grad) - - -def test_repeated_evaluations(): - """Test repeated evaluations at different points.""" - x = cp.Variable(3) - problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x)))) - prob = C_problem(problem) - - u1 = np.array([1.0, 2.0, 3.0]) - prob.init_derivatives() - - # First evaluation - obj1 = prob.objective_forward(u1) - grad1 = prob.gradient() - - # Second evaluation - u2 = np.array([2.0, 3.0, 4.0]) - obj2 = prob.objective_forward(u2) - grad2 = prob.gradient() - - assert np.allclose(obj1, np.sum(np.log(u1))) - assert np.allclose(obj2, np.sum(np.log(u2))) - assert np.allclose(grad1, 1.0 / u1) - assert np.allclose(grad2, 1.0 / u2) - - -# ============================================================================= -# UNSUPPORTED PATTERNS -# ============================================================================= -# The following patterns are NOT YET SUPPORTED because the current -# implementation of elementwise univariate ops (log, exp, etc.) in -# src/elementwise_univariate/common.c assumes that the child expression -# is either a Variable or a LinearOp (which includes negation as -I). -# -# When the child is an Add expression (like x + y), the code incorrectly -# casts it to linear_op_expr and reads invalid memory, causing crashes. -# -# To support these patterns, we would need to generalize the jacobian -# computation in elementwise ops to handle arbitrary child expressions -# by using the child's jacobian directly rather than assuming specific -# structure. -# ============================================================================= - -# def test_elementwise_on_add(): -# """NOT SUPPORTED: log(x + y) - elementwise op on add result. -# -# This fails because log's jacobian computation assumes its child -# is a Variable or LinearOp, but here it's an Add expression. -# """ -# x = cp.Variable(2) -# y = cp.Variable(2) -# problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x + y)))) -# prob = C_problem(problem) -# -# x_vals = np.array([1.0, 2.0]) -# y_vals = np.array([0.5, 1.0]) -# u = np.concatenate([x_vals, y_vals]) -# prob.init_derivatives() -# -# # Expected objective: sum(log(x + y)) -# obj_val = prob.objective_forward(u) -# expected = np.sum(np.log(x_vals + y_vals)) -# assert np.allclose(obj_val, expected) -# -# # Expected gradient: d/dx_i = 1/(x_i + y_i), d/dy_i = 1/(x_i + y_i) -# grad = prob.gradient() -# grad_xy = 1.0 / (x_vals + y_vals) -# expected_grad = np.concatenate([grad_xy, grad_xy]) -# assert np.allclose(grad, expected_grad) - - -# def test_log_of_sum(): -# """NOT SUPPORTED: log(sum(x)) - elementwise op on sum result. -# -# This fails because log's jacobian computation assumes its child -# is a Variable or LinearOp, but here it's a Sum expression. -# """ -# x = cp.Variable(4) -# problem = cp.Problem(cp.Minimize(cp.log(cp.sum(x)))) -# prob = C_problem(problem) -# -# u = np.array([1.0, 2.0, 3.0, 4.0]) -# prob.init_derivatives() -# -# # Expected objective: log(sum(x)) -# obj_val = prob.objective_forward(u) -# expected = np.log(np.sum(u)) -# assert np.allclose(obj_val, expected) -# -# # Expected gradient: d/dx_i log(sum(x)) = 1/sum(x) -# grad = prob.gradient() -# expected_grad = np.ones_like(u) / np.sum(u) -# assert np.allclose(grad, expected_grad) - - -# def test_mixed_elementwise_on_add(): -# """NOT SUPPORTED: log(x + y) + exp(y + z) - multiple elementwise on adds. -# -# Same underlying issue - elementwise ops don't support Add children. -# """ -# x = cp.Variable(2) -# y = cp.Variable(2) -# z = cp.Variable(2) -# obj = cp.sum(cp.log(x + y)) + cp.sum(cp.exp(y + z)) -# problem = cp.Problem(cp.Minimize(obj)) -# prob = C_problem(problem) -# -# x_vals = np.array([1.0, 2.0]) -# y_vals = np.array([0.5, 1.0]) -# z_vals = np.array([0.1, 0.2]) -# u = np.concatenate([x_vals, y_vals, z_vals]) -# prob.init_derivatives() -# -# # Expected objective -# obj_val = prob.objective_forward(u) -# expected = np.sum(np.log(x_vals + y_vals)) + np.sum(np.exp(y_vals + z_vals)) -# assert np.allclose(obj_val, expected) - - -# def test_nested_add_in_elementwise(): -# """NOT SUPPORTED: log(x + y + z) - nested adds inside elementwise. -# -# Same underlying issue. -# """ -# x = cp.Variable(2) -# y = cp.Variable(2) -# z = cp.Variable(2) -# problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x + y + z)))) -# prob = C_problem(problem) -# -# x_vals = np.array([1.0, 2.0]) -# y_vals = np.array([0.5, 1.0]) -# z_vals = np.array([0.5, 0.5]) -# u = np.concatenate([x_vals, y_vals, z_vals]) -# prob.init_derivatives() -# -# # Expected objective -# obj_val = prob.objective_forward(u) -# expected = np.sum(np.log(x_vals + y_vals + z_vals)) -# assert np.allclose(obj_val, expected) - - -if __name__ == "__main__": - test_sum_log() - print("test_sum_log passed!") - test_sum_exp() - print("test_sum_exp passed!") - test_variable_reuse() - print("test_variable_reuse passed!") - test_variable_used_multiple_times() - print("test_variable_used_multiple_times passed!") - test_larger_variable() - print("test_larger_variable passed!") - test_matrix_variable() - print("test_matrix_variable passed!") - test_two_variables_separate_ops() - print("test_two_variables_separate_ops passed!") - test_two_variables_same_sum() - print("test_two_variables_same_sum passed!") - test_mixed_sizes() - print("test_mixed_sizes passed!") - test_multiple_variables_log_exp() - print("test_multiple_variables_log_exp passed!") - test_three_variables_mixed() - print("test_three_variables_mixed passed!") - test_repeated_evaluations() - print("test_repeated_evaluations passed!") - print("\nAll unconstrained tests passed!") diff --git a/python/convert.py b/src/dnlp_diff_engine/__init__.py similarity index 76% rename from python/convert.py rename to src/dnlp_diff_engine/__init__.py index 669f269..e9eee0e 100644 --- a/python/convert.py +++ b/src/dnlp_diff_engine/__init__.py @@ -1,15 +1,25 @@ +""" +DNLP Diff Engine - Automatic differentiation for nonlinear optimization. + +This package provides automatic differentiation capabilities for CVXPY problems, +computing gradients, Jacobians, and Hessians needed by NLP solvers. +""" + import cvxpy as cp import numpy as np -from scipy import sparse from cvxpy.reductions.inverse_data import InverseData -import DNLP_diff_engine as diffengine +from scipy import sparse + +from . import _core as _diffengine + +__all__ = ["C_problem", "convert_problem", "build_variable_dict"] def _chain_add(children): """Chain multiple children with binary adds: a + b + c -> add(add(a, b), c).""" result = children[0] for child in children[1:]: - result = diffengine.make_add(result, child) + result = _diffengine.make_add(result, child) return result @@ -17,22 +27,22 @@ def _chain_add(children): # Converters receive (expr, children) where expr is the CVXPY expression ATOM_CONVERTERS = { # Elementwise unary - "log": lambda expr, children: diffengine.make_log(children[0]), - "exp": lambda expr, children: diffengine.make_exp(children[0]), + "log": lambda _expr, children: _diffengine.make_log(children[0]), + "exp": lambda _expr, children: _diffengine.make_exp(children[0]), # Affine unary - "NegExpression": lambda expr, children: diffengine.make_neg(children[0]), - "Promote": lambda expr, children: diffengine.make_promote( + "NegExpression": lambda _expr, children: _diffengine.make_neg(children[0]), + "Promote": lambda expr, children: _diffengine.make_promote( children[0], expr.shape[0] if len(expr.shape) >= 1 else 1, expr.shape[1] if len(expr.shape) >= 2 else 1, ), # N-ary (handles 2+ args) - "AddExpression": lambda expr, children: _chain_add(children), + "AddExpression": lambda _expr, children: _chain_add(children), # Reductions - "Sum": lambda expr, children: diffengine.make_sum(children[0], -1), + "Sum": lambda _expr, children: _diffengine.make_sum(children[0], -1), } @@ -59,7 +69,7 @@ def build_variable_dict(variables: list) -> tuple[dict, int]: d1, d2 = shape[0], 1 else: # scalar d1, d2 = 1, 1 - c_var = diffengine.make_variable(d1, d2, offset, n_vars) + c_var = _diffengine.make_variable(d1, d2, offset, n_vars) var_dict[var.id] = c_var return var_dict, n_vars @@ -76,7 +86,7 @@ def _convert_expr(expr, var_dict: dict, n_vars: int): value = np.asarray(expr.value, dtype=np.float64).flatten() d1 = expr.shape[0] if len(expr.shape) >= 1 else 1 d2 = expr.shape[1] if len(expr.shape) >= 2 else 1 - return diffengine.make_constant(d1, d2, n_vars, value) + return _diffengine.make_constant(d1, d2, n_vars, value) # Recursive case: atoms atom_name = type(expr).__name__ @@ -135,29 +145,29 @@ def __init__(self, cvxpy_problem: cp.Problem): c_constraints = [ _convert_expr(c.expr, var_dict, n_vars) for c in cvxpy_problem.constraints ] - self._capsule = diffengine.make_problem(c_obj, c_constraints) + self._capsule = _diffengine.make_problem(c_obj, c_constraints) self._allocated = False def init_derivatives(self): """Initialize derivative structures. Must be called before forward/gradient/jacobian.""" - diffengine.problem_init_derivatives(self._capsule) + _diffengine.problem_init_derivatives(self._capsule) self._allocated = True def objective_forward(self, u: np.ndarray) -> float: """Evaluate objective. Returns obj_value float.""" - return diffengine.problem_objective_forward(self._capsule, u) + return _diffengine.problem_objective_forward(self._capsule, u) def constraint_forward(self, u: np.ndarray) -> np.ndarray: """Evaluate constraints only. Returns constraint_values array.""" - return diffengine.problem_constraint_forward(self._capsule, u) + return _diffengine.problem_constraint_forward(self._capsule, u) def gradient(self) -> np.ndarray: """Compute gradient of objective. Call objective_forward first. Returns gradient array.""" - return diffengine.problem_gradient(self._capsule) + return _diffengine.problem_gradient(self._capsule) def jacobian(self) -> sparse.csr_matrix: - """Compute jacobian of constraints. Call constraint_forward first. Returns scipy CSR matrix.""" - data, indices, indptr, shape = diffengine.problem_jacobian(self._capsule) + """Compute constraint Jacobian. Call constraint_forward first.""" + data, indices, indptr, shape = _diffengine.problem_jacobian(self._capsule) return sparse.csr_matrix((data, indices, indptr), shape=shape) def hessian(self, obj_factor: float, lagrange: np.ndarray) -> sparse.csr_matrix: @@ -174,7 +184,7 @@ def hessian(self, obj_factor: float, lagrange: np.ndarray) -> sparse.csr_matrix: Returns: scipy CSR matrix of shape (n_vars, n_vars) """ - data, indices, indptr, shape = diffengine.problem_hessian( + data, indices, indptr, shape = _diffengine.problem_hessian( self._capsule, obj_factor, lagrange ) return sparse.csr_matrix((data, indices, indptr), shape=shape) diff --git a/python/tests/test_constrained.py b/tests/python/test_constrained.py similarity index 90% rename from python/tests/test_constrained.py rename to tests/python/test_constrained.py index 4409b13..f7a933b 100644 --- a/python/tests/test_constrained.py +++ b/tests/python/test_constrained.py @@ -1,6 +1,9 @@ +"""Tests for constrained optimization problems.""" + import cvxpy as cp import numpy as np -from convert import C_problem + +from dnlp_diff_engine import C_problem # Note: CVXPY converts constraints A >= B to B - A <= 0 # So constr.expr for "log(x) >= 0" is "0 - log(x)" = -log(x) @@ -325,32 +328,3 @@ def test_hessian_repeated_evaluations(): assert np.allclose(H1.toarray(), expected_H1) assert np.allclose(H2.toarray(), expected_H2) - - -if __name__ == "__main__": - test_single_constraint() - print("test_single_constraint passed!") - test_two_constraints() - print("test_two_constraints passed!") - test_three_constraints_different_sizes() - print("test_three_constraints_different_sizes passed!") - test_multiple_variables() - print("test_multiple_variables passed!") - test_larger_scale() - print("test_larger_scale passed!") - test_repeated_evaluations() - print("test_repeated_evaluations passed!") - - # Hessian tests - test_hessian_objective_only() - print("test_hessian_objective_only passed!") - test_hessian_with_constraint() - print("test_hessian_with_constraint passed!") - test_hessian_obj_factor_zero() - print("test_hessian_obj_factor_zero passed!") - test_hessian_two_constraints() - print("test_hessian_two_constraints passed!") - test_hessian_repeated_evaluations() - print("test_hessian_repeated_evaluations passed!") - - print("\nAll constrained tests passed!") diff --git a/python/tests/test_problem_native.py b/tests/python/test_problem_native.py similarity index 93% rename from python/tests/test_problem_native.py rename to tests/python/test_problem_native.py index 2e3a310..4e3f0bd 100644 --- a/python/tests/test_problem_native.py +++ b/tests/python/test_problem_native.py @@ -1,6 +1,9 @@ +"""Tests for the low-level C problem interface.""" + import numpy as np from scipy import sparse -import DNLP_diff_engine as diffengine + +from dnlp_diff_engine import _core as diffengine def test_problem_objective_forward(): @@ -113,21 +116,21 @@ def test_problem_multiple_constraints(): """Test problem with multiple different constraints (low-level).""" n_vars = 3 x = diffengine.make_variable(n_vars, 1, 0, n_vars) - + # Objective: sum(log(x)) log_x = diffengine.make_log(x) objective = diffengine.make_sum(log_x, -1) - + # Multiple constraints using the same variable: # Constraint 1: log(x) - reused from objective # Constraint 2: exp(x) exp_x = diffengine.make_exp(x) constraints = [log_x, exp_x] - + prob = diffengine.make_problem(objective, constraints) u = np.array([1.0, 2.0, 3.0]) diffengine.problem_init_derivatives(prob) - + # Test forward pass obj_val = diffengine.problem_objective_forward(prob, u) constraint_vals = diffengine.problem_constraint_forward(prob, u) @@ -146,8 +149,8 @@ def test_problem_multiple_constraints(): diffengine.problem_constraint_forward(prob, u) data, indices, indptr, shape = diffengine.problem_jacobian(prob) jac = sparse.csr_matrix((data, indices, indptr), shape=shape) - - # Expected Jacobian: + + # Expected Jacobian: # log(x): diag(1/u) # exp(x): diag(exp(u)) expected_jac = np.vstack([ @@ -156,13 +159,3 @@ def test_problem_multiple_constraints(): ]) assert jac.shape == (6, 3) assert np.allclose(jac.toarray(), expected_jac) - - -if __name__ == "__main__": - test_problem_objective_forward() - test_problem_constraint_forward() - test_problem_gradient() - test_problem_jacobian() - test_problem_no_constraints() - test_problem_multiple_constraints() - print("All native problem tests passed!") diff --git a/tests/python/test_unconstrained.py b/tests/python/test_unconstrained.py new file mode 100644 index 0000000..bcb0c54 --- /dev/null +++ b/tests/python/test_unconstrained.py @@ -0,0 +1,223 @@ +"""Tests for unconstrained optimization problems.""" + +import cvxpy as cp +import numpy as np + +from dnlp_diff_engine import C_problem + + +def test_sum_log(): + """Test sum(log(x)) objective and gradient.""" + x = cp.Variable(4) + problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x)))) + prob = C_problem(problem) + + u = np.array([1.0, 2.0, 3.0, 4.0]) + prob.init_derivatives() + + # Objective + obj_val = prob.objective_forward(u) + expected = np.sum(np.log(u)) + assert np.allclose(obj_val, expected) + + # Gradient: d/dx sum(log(x)) = 1/x + grad = prob.gradient() + expected_grad = 1.0 / u + assert np.allclose(grad, expected_grad) + + +def test_sum_exp(): + """Test sum(exp(x)) objective and gradient.""" + x = cp.Variable(3) + problem = cp.Problem(cp.Minimize(cp.sum(cp.exp(x)))) + prob = C_problem(problem) + + u = np.array([0.0, 1.0, 2.0]) + prob.init_derivatives() + + # Objective + obj_val = prob.objective_forward(u) + expected = np.sum(np.exp(u)) + assert np.allclose(obj_val, expected) + + # Gradient: d/dx sum(exp(x)) = exp(x) + grad = prob.gradient() + expected_grad = np.exp(u) + assert np.allclose(grad, expected_grad) + + +def test_variable_reuse(): + """Test sum(log(x) + exp(x)) - same variable used twice.""" + x = cp.Variable(2) + problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x) + cp.exp(x)))) + prob = C_problem(problem) + + u = np.array([1.0, 2.0]) + prob.init_derivatives() + + # Objective + obj_val = prob.objective_forward(u) + expected = np.sum(np.log(u) + np.exp(u)) + assert np.allclose(obj_val, expected) + + # Gradient: d/dx_i = 1/x_i + exp(x_i) + grad = prob.gradient() + expected_grad = 1.0 / u + np.exp(u) + assert np.allclose(grad, expected_grad) + + +def test_variable_used_multiple_times(): + """Test sum(log(x) + exp(x) + log(x)) - variable used 3 times.""" + x = cp.Variable(3) + problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x) + cp.exp(x) + cp.log(x)))) + prob = C_problem(problem) + + u = np.array([1.0, 2.0, 3.0]) + prob.init_derivatives() + + # Objective + obj_val = prob.objective_forward(u) + expected = np.sum(2 * np.log(u) + np.exp(u)) + assert np.allclose(obj_val, expected) + + # Gradient: d/dx_i = 2/x_i + exp(x_i) + grad = prob.gradient() + expected_grad = 2.0 / u + np.exp(u) + assert np.allclose(grad, expected_grad) + + +def test_larger_variable(): + """Test sum(log(x)) with larger variable (100 elements).""" + x = cp.Variable(100) + problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x)))) + prob = C_problem(problem) + + u = np.linspace(1.0, 10.0, 100) + prob.init_derivatives() + + # Objective + obj_val = prob.objective_forward(u) + expected = np.sum(np.log(u)) + assert np.allclose(obj_val, expected) + + # Gradient + grad = prob.gradient() + expected_grad = 1.0 / u + assert np.allclose(grad, expected_grad) + + +def test_matrix_variable(): + """Test sum(log(X)) with 2D matrix variable (3x4).""" + X = cp.Variable((3, 4)) + problem = cp.Problem(cp.Minimize(cp.sum(cp.log(X)))) + prob = C_problem(problem) + + u = np.arange(1.0, 13.0) # 12 elements + prob.init_derivatives() + + # Objective + obj_val = prob.objective_forward(u) + expected = np.sum(np.log(u)) + assert np.allclose(obj_val, expected) + + # Gradient + grad = prob.gradient() + expected_grad = 1.0 / u + assert np.allclose(grad, expected_grad) + + +def test_two_variables_separate_ops(): + """Test sum(log(x)) + sum(exp(y)) - two variables with separate ops.""" + x = cp.Variable(2) + y = cp.Variable(2) + problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x)) + cp.sum(cp.exp(y)))) + prob = C_problem(problem) + + x_vals = np.array([1.0, 2.0]) + y_vals = np.array([0.5, 1.0]) + u = np.concatenate([x_vals, y_vals]) + prob.init_derivatives() + + # Objective + obj_val = prob.objective_forward(u) + expected = np.sum(np.log(x_vals)) + np.sum(np.exp(y_vals)) + assert np.allclose(obj_val, expected) + + # Gradient + grad = prob.gradient() + expected_grad = np.concatenate([1.0 / x_vals, np.exp(y_vals)]) + assert np.allclose(grad, expected_grad) + + +def test_two_variables_same_sum(): + """Test sum(log(x) + log(y)) - two variables added before sum.""" + x = cp.Variable(2) + y = cp.Variable(2) + problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x) + cp.log(y)))) + prob = C_problem(problem) + + x_vals = np.array([1.0, 2.0]) + y_vals = np.array([3.0, 4.0]) + u = np.concatenate([x_vals, y_vals]) + prob.init_derivatives() + + # Objective + obj_val = prob.objective_forward(u) + expected = np.sum(np.log(x_vals) + np.log(y_vals)) + assert np.allclose(obj_val, expected) + + # Gradient + grad = prob.gradient() + expected_grad = np.concatenate([1.0 / x_vals, 1.0 / y_vals]) + assert np.allclose(grad, expected_grad) + + +def test_mixed_sizes(): + """Test sum(log(a)) + sum(log(b)) + sum(log(c)) with different sized variables.""" + a = cp.Variable(2) + b = cp.Variable(5) + c = cp.Variable(3) + problem = cp.Problem(cp.Minimize( + cp.sum(cp.log(a)) + cp.sum(cp.log(b)) + cp.sum(cp.log(c)) + )) + prob = C_problem(problem) + + a_vals = np.array([1.0, 2.0]) + b_vals = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + c_vals = np.array([1.0, 2.0, 3.0]) + u = np.concatenate([a_vals, b_vals, c_vals]) + prob.init_derivatives() + + # Objective + obj_val = prob.objective_forward(u) + expected = np.sum(np.log(a_vals)) + np.sum(np.log(b_vals)) + np.sum(np.log(c_vals)) + assert np.allclose(obj_val, expected) + + # Gradient + grad = prob.gradient() + expected_grad = 1.0 / u + assert np.allclose(grad, expected_grad) + + +def test_repeated_evaluations(): + """Test repeated evaluations at different points.""" + x = cp.Variable(3) + problem = cp.Problem(cp.Minimize(cp.sum(cp.log(x)))) + prob = C_problem(problem) + + u1 = np.array([1.0, 2.0, 3.0]) + prob.init_derivatives() + + # First evaluation + obj1 = prob.objective_forward(u1) + grad1 = prob.gradient() + + # Second evaluation + u2 = np.array([2.0, 3.0, 4.0]) + obj2 = prob.objective_forward(u2) + grad2 = prob.gradient() + + assert np.allclose(obj1, np.sum(np.log(u1))) + assert np.allclose(obj2, np.sum(np.log(u2))) + assert np.allclose(grad1, 1.0 / u1) + assert np.allclose(grad2, 1.0 / u2)