From bb74cfd70bb948502504e6af675aecd7f35ca173 Mon Sep 17 00:00:00 2001 From: Russell Richie Date: Fri, 27 Mar 2026 12:45:14 -0400 Subject: [PATCH] Fix ArrowNotImplementedError in SalesScrutinyStudy with pyarrow >= 22 When a DataFrame column has Arrow null dtype (all-null values with no known type), calling .astype(str) does not convert it to a string dtype with newer versions of pyarrow (observed with pyarrow 22 + pandas 2.3). Subsequent string concatenation with a large_string-typed column raises: ArrowNotImplementedError: Function 'binary_join_element_wise' has no kernel matching input types (null, large_string, large_string) Fix: go through Python object dtype first (.astype(object)) before .astype(str), and apply the same to model_group to ensure both sides of the concatenation are plain Python object strings. Co-Authored-By: Claude Sonnet 4.6 --- openavmkit/sales_scrutiny_study.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openavmkit/sales_scrutiny_study.py b/openavmkit/sales_scrutiny_study.py index d981bc7..49da1d1 100644 --- a/openavmkit/sales_scrutiny_study.py +++ b/openavmkit/sales_scrutiny_study.py @@ -111,8 +111,8 @@ def __init__(self, df: pd.DataFrame, settings: dict, model_group: str): for key in stuff: df = stuff[key] df, cluster_fields = _mark_sales_scrutiny_clusters(df, settings) - df["ss_id"] = df["ss_id"].astype(str) - df["ss_id"] = df["model_group"] + "_" + key + "_" + df["ss_id"] + df["ss_id"] = df["ss_id"].astype(object).fillna("").astype(str) + df["ss_id"] = df["model_group"].astype(object).astype(str) + "_" + key + "_" + df["ss_id"] per_area = "" denominator = "" if key == "i":