From cf20386861eab41a286bf9f5d1a111224c0a432a Mon Sep 17 00:00:00 2001 From: drussellmrichie Date: Wed, 1 Apr 2026 16:34:51 -0400 Subject: [PATCH] fix: skip non-numeric columns in calc_r2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit calc_r2 called data[var].astype(float) on every variable without checking whether the column is numeric. Any string/categorical column (e.g. luc, bldg_com_struct) raised: ValueError: could not convert string to float: '100A' Add a pd.api.types.is_numeric_dtype guard immediately after the existing ill-posed-model check. Non-numeric vars now get NaN R² and are skipped cleanly, consistent with the rest of the function's error-handling pattern. --- openavmkit/utilities/stats.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/openavmkit/utilities/stats.py b/openavmkit/utilities/stats.py index 75303d3e..ded45cca 100644 --- a/openavmkit/utilities/stats.py +++ b/openavmkit/utilities/stats.py @@ -1132,6 +1132,14 @@ def calc_r2( results["coef_sign"].append(float("nan")) continue # skip ill-posed models + # Skip non-numeric columns (e.g. string categoricals); .astype(float) would raise ValueError + if not pd.api.types.is_numeric_dtype(data[var]): + results["variable"].append(var) + results["r2"].append(float("nan")) + results["adj_r2"].append(float("nan")) + results["coef_sign"].append(float("nan")) + continue + X = sm.add_constant(data[var].astype(float), has_constant='add') # Align y with X using the same filtered rows