From a993ff3a315bcaa1baa38c37c4351ada1b3eccb3 Mon Sep 17 00:00:00 2001 From: Russell Richie Date: Fri, 27 Mar 2026 13:19:11 -0400 Subject: [PATCH] fix: guard against n_splits > n_samples in rolling-origin CV When a model group has very few training samples (e.g. sparse commercial or post-valuation sub-models), KFold raises ValueError if n_splits exceeds the number of rows. Cap n_splits at len(X) and return a penalty MAPE of 1.0 when fewer than 2 samples are available, so Optuna can still complete gracefully. Co-Authored-By: Claude Sonnet 4.6 --- openavmkit/tuning.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/openavmkit/tuning.py b/openavmkit/tuning.py index 1e7706e..2c4c594 100644 --- a/openavmkit/tuning.py +++ b/openavmkit/tuning.py @@ -485,6 +485,16 @@ def _catboost_rolling_origin_cv( def _lightgbm_rolling_origin_cv(X, y, params, n_splits=5, random_state=42, cat_vars=None): + n_samples = len(X) + n_splits = min(n_splits, n_samples) + if n_splits < 2: + import warnings + warnings.warn( + f"Not enough samples ({n_samples}) for cross-validation with n_splits={n_splits}. " + "Returning penalty MAPE of 1.0.", + UserWarning, + ) + return 1.0 kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state) mape_scores = []