From 3468661c6f5f12854549a36a124e2884bb471837 Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Tue, 16 Sep 2025 10:05:15 -0500 Subject: [PATCH 1/2] Update formulas.R --- R/formulas.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/formulas.R b/R/formulas.R index b78e134..7ea776a 100644 --- a/R/formulas.R +++ b/R/formulas.R @@ -178,8 +178,16 @@ prb <- function(assessed, sale_price, na.rm = FALSE) { # Calculate the Gini cofficients needed for KI and MKI calc_gini <- function(assessed, sale_price) { df <- data.frame(av = assessed, sp = sale_price) - df <- df[order(df$sp, -df$av), ] - assessed_price <- df$av + # This Gini coefficient algorithm is sensitive to the order of the input + # observations: If multiple observations share the same sale price but have + # different estimates, the output coefficients will be different depending + # on which of the sales with identical prices gets ordered first in the + # input dataframe. To ensure a stable sort order, Quintos recommends + # sorting by ascending sale price and then by descending estimate to break + # any ties. This produces "worst case" MKI/KI statistics, but ensures those + # statistics are deterministic. See this issue for more discussion: + # https://github.com/ccao-data/assesspy/issues/33#issuecomment-3180632954 + df <- df[order(df$sp, -df$av), ] assessed_price <- df$av sale_price <- df$sp n <- length(assessed_price) From 33d1bbc4cb3eeb62ba03f85681b825572ecf1f7d Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Tue, 16 Sep 2025 10:05:55 -0500 Subject: [PATCH 2/2] Update formulas.R --- R/formulas.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/formulas.R b/R/formulas.R index 7ea776a..3433dca 100644 --- a/R/formulas.R +++ b/R/formulas.R @@ -187,7 +187,8 @@ calc_gini <- function(assessed, sale_price) { # any ties. This produces "worst case" MKI/KI statistics, but ensures those # statistics are deterministic. See this issue for more discussion: # https://github.com/ccao-data/assesspy/issues/33#issuecomment-3180632954 - df <- df[order(df$sp, -df$av), ] assessed_price <- df$av + df <- df[order(df$sp, -df$av), ] + assessed_price <- df$av sale_price <- df$sp n <- length(assessed_price)