From 00ef947d56fe7159e561851e56dc4df4df480cec Mon Sep 17 00:00:00 2001 From: connorschwartz <46463980+connorschwartz@users.noreply.github.com> Date: Mon, 9 Feb 2026 20:51:50 -0500 Subject: [PATCH] Update data.py The number of nearest neighbors for spatial lag calculations is configurable for other variables under process.enrich.spatial_lag.fields. Spatial lag for sale price is calculated with different code that currently is not configurable. This PR updates the code so that the sale price spatial lag calculation can be adjusted in a similar way. Also, if k is less than the number of sales in the training set, the calculation fails. So, we can instead use the minimum of k and the length of the training set. --- openavmkit/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openavmkit/data.py b/openavmkit/data.py index 10cda32..32d2ea5 100644 --- a/openavmkit/data.py +++ b/openavmkit/data.py @@ -842,7 +842,7 @@ def _enrich_sup_spatial_lag_for_model_group( df_sub = df_sub[~pd.isna(df_sub["latitude"]) & ~pd.isna(df_sub["longitude"])] # Choose the number of nearest neighbors to use - k = 5 # adjust this number as needed + k = s_sl.get("sale_price", 5) # adjust this number as needed df_sub_train = df_sub.loc[df_sub["key_sale"].isin(train_keys)].copy() @@ -878,7 +878,7 @@ def _enrich_sup_spatial_lag_for_model_group( # Query the tree: for each parcel in df_universe, find the k nearest sales # distances: shape (n_universe, k); indices: corresponding indices in df_sales - distances, indices = sales_tree.query(universe_coords, k=k) + distances, indices = sales_tree.query(universe_coords, k=min(len(sales_coords_train), k)) # Ensure that distances and indices are 2D arrays (if k==1, reshape them) if k == 1: