From 8a1a5340867fd3e60743c7743ef01355599ce140 Mon Sep 17 00:00:00 2001 From: Yasser Naji Date: Sun, 12 Oct 2025 21:40:46 +0100 Subject: [PATCH 1/4] Lasso: Create module + struct --- crates/RustQuant_ml/src/lasso.rs | 49 ++++++++++++++++++++++++++++++++ crates/RustQuant_ml/src/lib.rs | 4 +++ 2 files changed, 53 insertions(+) create mode 100644 crates/RustQuant_ml/src/lasso.rs diff --git a/crates/RustQuant_ml/src/lasso.rs b/crates/RustQuant_ml/src/lasso.rs new file mode 100644 index 00000000..a086e673 --- /dev/null +++ b/crates/RustQuant_ml/src/lasso.rs @@ -0,0 +1,49 @@ +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// RustQuant: A Rust library for quantitative finance tools. +// Copyright (C) 2023 https://github.com/avhz +// Dual licensed under Apache 2.0 and MIT. +// See: +// - LICENSE-APACHE.md +// - LICENSE-MIT.md +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//! Module for Lasso algorithms. + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// IMPORTS +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +use nalgebra::{DMatrix, DVector}; +use RustQuant_error::RustQuantError; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// STRUCTS, ENUMS, AND TRAITS +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Struct to hold the input data for a Lasso regression. +#[allow(clippy::module_name_repetitions)] +#[derive(Clone, Debug)] +pub struct LassoInput { + /// The features matrix. + pub x: DMatrix, + /// The output data vector, also known as the response vector. + pub y: DVector, + /// The regularization parameter. + pub lambda: T, + /// Include the intercept. + pub fit_intercept: bool, + /// The maximum number of iterations for training. + pub max_iter: usize, + /// The tolerance for the convergence. + pub tolerance: T, +} + +/// Struct to hold the output data for lasso. +#[allow(clippy::module_name_repetitions)] +#[derive(Clone, Debug)] +pub struct LassoOutput { + /// The intercept of the lasso regression, + pub intercept: T, + /// The coefficients of the lasso regression, + pub coefficients: DVector, +} diff --git a/crates/RustQuant_ml/src/lib.rs b/crates/RustQuant_ml/src/lib.rs index 1280bd0d..919d6af5 100644 --- a/crates/RustQuant_ml/src/lib.rs +++ b/crates/RustQuant_ml/src/lib.rs @@ -33,3 +33,7 @@ pub use linear_regression::*; /// Logistic regression. pub mod logistic_regression; pub use logistic_regression::*; + +/// lasso regression. +pub mod lasso; +pub use lasso::*; From c07bb50cea85d5fa277ee73e04d7d1030af02bbe Mon Sep 17 00:00:00 2001 From: Yasser Naji Date: Sun, 12 Oct 2025 21:42:43 +0100 Subject: [PATCH 2/4] Lasso: Implementations for constructor, fitting and create output struct --- crates/RustQuant_ml/src/lasso.rs | 102 +++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/crates/RustQuant_ml/src/lasso.rs b/crates/RustQuant_ml/src/lasso.rs index a086e673..1171275f 100644 --- a/crates/RustQuant_ml/src/lasso.rs +++ b/crates/RustQuant_ml/src/lasso.rs @@ -47,3 +47,105 @@ pub struct LassoOutput { /// The coefficients of the lasso regression, pub coefficients: DVector, } + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// IMPLEMENTATIONS +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +impl LassoInput { + /// Create a new `LassoInput` struct. + #[must_use] + pub fn new( + x: DMatrix, + y: DVector, + lambda: f64, + fit_intercept: bool, + max_iter: usize, + tolerance: f64, + ) -> Self { + Self { x, y, lambda, fit_intercept, max_iter, tolerance } + } + + /// Fits a Lasso regression to the input data. + /// Returns the intercept and coefficients. + /// The intercept is the first value of the coefficients. + pub fn fit(&self) -> Result, RustQuantError> { + let n_cols = self.x.ncols(); + let n_rows = self.x.nrows() as f64; + let mut features_matrix = self.x.clone(); + let mut response_vec = self.y.clone(); + let feature_means = DVector::from_iterator( + self.x.ncols(), + (0..self.x.ncols()).map(|j| self.x.column(j).mean()) + ); + + if self.fit_intercept { + + features_matrix = self.x.clone(); + for j in 0..self.x.ncols() { + let mean = feature_means[j]; + for i in 0..self.x.nrows() { + features_matrix[(i, j)] -= mean; + } + } + response_vec = &self.y - DVector::from_element(self.x.nrows(), self.y.mean()); + } + + let mut residual = response_vec; + let mut coefficients = DVector::::zeros(n_cols); + + for _ in 0..self.max_iter { + let mut max_delta: f64 = 0.0; + for j in 0..n_cols { + + let feature_vals_col_j = features_matrix.column(j); + let col_norm: f64 = feature_vals_col_j.dot(&feature_vals_col_j); + let rho: f64 = (residual.dot(&feature_vals_col_j) + coefficients[j] * col_norm) / n_rows; + + let new_coefficient_j: f64 = if rho < -self.lambda { + (rho + self.lambda) / (col_norm / n_rows) + } else if rho > self.lambda { + (rho - self.lambda) / (col_norm / n_rows) + } else { + 0.0 + }; + + let delta: f64 = new_coefficient_j - coefficients[j]; + if delta.abs() > 0.0 { + residual -= &feature_vals_col_j * delta; + } + coefficients[j] = new_coefficient_j; + max_delta = max_delta.max(delta.abs()); + } + + if max_delta < self.tolerance { + break; + } + } + + let intercept: f64 = if self.fit_intercept { + self.y.mean() - feature_means.dot(&coefficients) + } else { + 0.0 + }; + coefficients = coefficients.insert_row(0, intercept); + + Ok(LassoOutput { + intercept, + coefficients, + }) + } +} + +impl LassoOutput { + /// Predicts the output for the given input data. + pub fn predict(&self, input: DMatrix) -> Result, RustQuantError> { + let intercept = DVector::from_element( + input.nrows(), + self.intercept + ); + let coefficients = self.coefficients.clone().remove_row(0); + let predictions = input * coefficients + intercept; + Ok(predictions) + } +} From a5974b4b2cea2c1f33aaf8e05adc2720b258b7d5 Mon Sep 17 00:00:00 2001 From: Yasser Naji Date: Sun, 12 Oct 2025 21:47:21 +0100 Subject: [PATCH 3/4] Lasso: Create unit tests --- crates/RustQuant_ml/src/lasso.rs | 140 +++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/crates/RustQuant_ml/src/lasso.rs b/crates/RustQuant_ml/src/lasso.rs index 1171275f..66d0c6db 100644 --- a/crates/RustQuant_ml/src/lasso.rs +++ b/crates/RustQuant_ml/src/lasso.rs @@ -149,3 +149,143 @@ impl LassoOutput { Ok(predictions) } } + + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// UNIT TESTS +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +#[cfg(test)] +mod tests_lasso_regression { + use super::*; + use RustQuant_utils::assert_approx_equal; + + struct DataForTests { + training_set: DMatrix, + testing_set: DMatrix, + response: DVector, + } + + fn setup_test() -> DataForTests { + DataForTests { + training_set: DMatrix::from_row_slice( + 4, + 3, + &[ + -0.083_784_355, -0.633_485_70, -0.399_266_60, + -0.982_943_745, 1.090_797_46, -0.468_123_05, + -1.875_067_321, -0.913_727_27, 0.326_962_08, + -0.186_144_661, 1.001_639_71, -0.412_746_90], + ), + + testing_set: DMatrix::from_row_slice( + 4, + 3, + &[ + 0.562_036_47, 0.595_846_45, -0.411_653_01, + 0.663_358_26, 0.452_091_83, -0.294_327_15, + -0.602_897_28, 0.896_743_96, 1.218_573_96, + 0.698_377_69, 0.572_216_51, 0.244_111_43], + ), + + response: DVector::from_row_slice( + &[ + -0.445_151_96, + -1.847_803_64, + -0.628_825_31, + -0.861_080_69 + ] + ), + } + } + + #[test] + fn test_lasso_without_intercept() -> Result<(), RustQuantError> { + + let data: DataForTests = setup_test(); + + let input: LassoInput = LassoInput { + x: data.training_set, + y: data.response, + lambda: 0.01, + fit_intercept: false, + max_iter: 1000, + tolerance: 1e-4, + }; + + let output: LassoOutput = input.fit()?; + let predictions = output.predict(data.testing_set)?; + + for (i, coefficient) in output.coefficients.iter().enumerate() { + assert_approx_equal!( + coefficient, + &[ + 0.0, + 0.743_965_706_491_596_7, + -0.304_713_846_510_641_43, + 1.355_162_653_724_116_22, + ][i], + f64::EPSILON + ); + } + + for (i, pred) in predictions.iter().enumerate() { + assert_approx_equal!( + pred, + &[ + -0.321_283_589_676_737_6, + -0.04310400559445471, + 0.9295807191488583, + 0.6760174510230131 + ][i], + f64::EPSILON + ); + } + Ok(()) + } + + #[test] + fn test_lasso_with_intercept() -> Result<(), RustQuantError> { + + let data: DataForTests = setup_test(); + + let input: LassoInput = LassoInput { + x: data.training_set, + y: data.response, + lambda: 0.01, + fit_intercept: true, + max_iter: 1000, + tolerance: 1e-4, + }; + + let output: LassoOutput = input.fit()?; + let predictions = output.predict(data.testing_set)?; + + for (i, coefficient) in output.coefficients.iter().enumerate() { + assert_approx_equal!( + coefficient, + &[ + 0.009_633_706_736_496_328, + 0.750_479_303_541_854_1, + -0.301_997_087_876_784_5, + 1.373_605_833_196_545_3, + ][i], + f64::EPSILON + ); + } + + for (i, pred) in predictions.iter().enumerate() { + assert_approx_equal!( + pred, + &[ + -0.313_962_423_203_417_3, + -0.033_349_554_520_968_38, + 0.960_198_011_081_136_2, + 0.696_256_873_679_798_4, + ][i], + f64::EPSILON + ); + } + Ok(()) + } +} From 48d9a0d36a1a40ccb6ff45d1b8b74fddb61561d2 Mon Sep 17 00:00:00 2001 From: Yasser Naji Date: Mon, 20 Oct 2025 23:11:37 +0100 Subject: [PATCH 4/4] Lasso: Remove redundant variable --- crates/RustQuant_ml/src/lasso.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/crates/RustQuant_ml/src/lasso.rs b/crates/RustQuant_ml/src/lasso.rs index 66d0c6db..8c09869d 100644 --- a/crates/RustQuant_ml/src/lasso.rs +++ b/crates/RustQuant_ml/src/lasso.rs @@ -73,7 +73,7 @@ impl LassoInput { let n_cols = self.x.ncols(); let n_rows = self.x.nrows() as f64; let mut features_matrix = self.x.clone(); - let mut response_vec = self.y.clone(); + let mut residuals = self.y.clone(); let feature_means = DVector::from_iterator( self.x.ncols(), (0..self.x.ncols()).map(|j| self.x.column(j).mean()) @@ -88,10 +88,9 @@ impl LassoInput { features_matrix[(i, j)] -= mean; } } - response_vec = &self.y - DVector::from_element(self.x.nrows(), self.y.mean()); + residuals -= DVector::from_element(self.x.nrows(), self.y.mean()); } - let mut residual = response_vec; let mut coefficients = DVector::::zeros(n_cols); for _ in 0..self.max_iter { @@ -100,7 +99,7 @@ impl LassoInput { let feature_vals_col_j = features_matrix.column(j); let col_norm: f64 = feature_vals_col_j.dot(&feature_vals_col_j); - let rho: f64 = (residual.dot(&feature_vals_col_j) + coefficients[j] * col_norm) / n_rows; + let rho: f64 = (residuals.dot(&feature_vals_col_j) + coefficients[j] * col_norm) / n_rows; let new_coefficient_j: f64 = if rho < -self.lambda { (rho + self.lambda) / (col_norm / n_rows) @@ -112,7 +111,7 @@ impl LassoInput { let delta: f64 = new_coefficient_j - coefficients[j]; if delta.abs() > 0.0 { - residual -= &feature_vals_col_j * delta; + residuals -= &feature_vals_col_j * delta; } coefficients[j] = new_coefficient_j; max_delta = max_delta.max(delta.abs());