From a993ff3a315bcaa1baa38c37c4351ada1b3eccb3 Mon Sep 17 00:00:00 2001
From: Russell Richie <drussellmrichie@github.com>
Date: Fri, 27 Mar 2026 13:19:11 -0400
Subject: [PATCH 1/2] fix: guard against n_splits > n_samples in rolling-origin
 CV

When a model group has very few training samples (e.g. sparse commercial
or post-valuation sub-models), KFold raises ValueError if n_splits
exceeds the number of rows. Cap n_splits at len(X) and return a penalty
MAPE of 1.0 when fewer than 2 samples are available, so Optuna can
still complete gracefully.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 openavmkit/tuning.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/openavmkit/tuning.py b/openavmkit/tuning.py
index 1e7706e..2c4c594 100644
--- a/openavmkit/tuning.py
+++ b/openavmkit/tuning.py
@@ -485,6 +485,16 @@ def _catboost_rolling_origin_cv(
 
 
 def _lightgbm_rolling_origin_cv(X, y, params, n_splits=5, random_state=42, cat_vars=None):
+    n_samples = len(X)
+    n_splits = min(n_splits, n_samples)
+    if n_splits < 2:
+        import warnings
+        warnings.warn(
+            f"Not enough samples ({n_samples}) for cross-validation with n_splits={n_splits}. "
+            "Returning penalty MAPE of 1.0.",
+            UserWarning,
+        )
+        return 1.0
     kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
     mape_scores = []
 

From d24658d21c8971605d8acdab9ef7067c40b706c0 Mon Sep 17 00:00:00 2001
From: Russell Richie <drussellmrichie@github.com>
Date: Fri, 27 Mar 2026 13:34:03 -0400
Subject: [PATCH 2/2] fix: guard against all-NA scores in calc_correlations

When a model group has very few sales (e.g. vacant land with 9 records),
pandas' idxmin() raises ValueError('Encountered all NA values') instead
of returning NaN as in older pandas versions. Break out of the variable
elimination loop early when all scores are NA so downstream code is
unaffected.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 openavmkit/utilities/stats.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/openavmkit/utilities/stats.py b/openavmkit/utilities/stats.py
index 75303d3..a2336aa 100644
--- a/openavmkit/utilities/stats.py
+++ b/openavmkit/utilities/stats.py
@@ -713,6 +713,9 @@ def calc_correlations(
 
         score = strength * clarity * clarity
 
+        # Guard against all-NA scores (too few samples to compute correlations)
+        if score.isna().all():
+            break
         min_score_idx = score.idxmin()
         try:
             min_score = score[min_score_idx]