From a993ff3a315bcaa1baa38c37c4351ada1b3eccb3 Mon Sep 17 00:00:00 2001 From: Russell Richie Date: Fri, 27 Mar 2026 13:19:11 -0400 Subject: [PATCH 1/2] fix: guard against n_splits > n_samples in rolling-origin CV When a model group has very few training samples (e.g. sparse commercial or post-valuation sub-models), KFold raises ValueError if n_splits exceeds the number of rows. Cap n_splits at len(X) and return a penalty MAPE of 1.0 when fewer than 2 samples are available, so Optuna can still complete gracefully. Co-Authored-By: Claude Sonnet 4.6 --- openavmkit/tuning.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/openavmkit/tuning.py b/openavmkit/tuning.py index 1e7706e..2c4c594 100644 --- a/openavmkit/tuning.py +++ b/openavmkit/tuning.py @@ -485,6 +485,16 @@ def _catboost_rolling_origin_cv( def _lightgbm_rolling_origin_cv(X, y, params, n_splits=5, random_state=42, cat_vars=None): + n_samples = len(X) + n_splits = min(n_splits, n_samples) + if n_splits < 2: + import warnings + warnings.warn( + f"Not enough samples ({n_samples}) for cross-validation with n_splits={n_splits}. " + "Returning penalty MAPE of 1.0.", + UserWarning, + ) + return 1.0 kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state) mape_scores = [] From d24658d21c8971605d8acdab9ef7067c40b706c0 Mon Sep 17 00:00:00 2001 From: Russell Richie Date: Fri, 27 Mar 2026 13:34:03 -0400 Subject: [PATCH 2/2] fix: guard against all-NA scores in calc_correlations When a model group has very few sales (e.g. vacant land with 9 records), pandas' idxmin() raises ValueError('Encountered all NA values') instead of returning NaN as in older pandas versions. Break out of the variable elimination loop early when all scores are NA so downstream code is unaffected. Co-Authored-By: Claude Sonnet 4.6 --- openavmkit/utilities/stats.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openavmkit/utilities/stats.py b/openavmkit/utilities/stats.py index 75303d3..a2336aa 100644 --- a/openavmkit/utilities/stats.py +++ b/openavmkit/utilities/stats.py @@ -713,6 +713,9 @@ def calc_correlations( score = strength * clarity * clarity + # Guard against all-NA scores (too few samples to compute correlations) + if score.isna().all(): + break min_score_idx = score.idxmin() try: min_score = score[min_score_idx]