From 8fae41faa2e98296745aefd870cac878c36cd5b1 Mon Sep 17 00:00:00 2001 From: karldw Date: Sun, 19 Oct 2025 10:38:45 -0700 Subject: [PATCH 1/3] Fixes for KP stat --- R/fitstats.R | 20 ++++++++++++++------ tests/fixest_tests.R | 6 +++++- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/R/fitstats.R b/R/fitstats.R index e6ee23b8..2d2a3eb4 100644 --- a/R/fitstats.R +++ b/R/fitstats.R @@ -1619,8 +1619,7 @@ kp_stat = function(x){ # internal function => x must be a fixest object # # The code here is a translation of the ranktest.jl function from the Vcov.jl package - # from @matthieugomez (see https://github.com/matthieugomez/Vcov.jl) - # + # from @matthieugomez (see https://github.com/FixedEffects/Vcov.jl) if(!isTRUE(x$iv) || !x$iv_stage == 2) return(NA) @@ -1650,7 +1649,7 @@ kp_stat = function(x){ } else { PI = coef(summary(x, stage = 1)) } - PI = PI[, colnames(PI) %in% x$iv_inst_names_xpd, drop = FALSE] + PI = as.matrix(PI[, colnames(PI) %in% x$iv_inst_names_xpd, drop = FALSE]) Fmat = chol(crossprod(Z_proj)) Gmat = chol(crossprod(X_proj)) @@ -1707,9 +1706,18 @@ kp_stat = function(x){ vcov = x$summary_flags$vcov ssc = x$summary_flags$ssc - meat = vcov(x_new, vcov = vcov, ssc = ssc, sandwich = FALSE) - vhat = solve(K, t(solve(K, meat))) - + if(ncol(x$scores) == ncol(x_new$scores)) { + meat = vcov(x_new, vcov = vcov, ssc = ssc, sandwich = FALSE) + vhat = solve(K, t(solve(K, meat))) + } else { + warning(stringmagic::string_magic( + "KP calculation not implemented for this case:\n", + " * vcov type: {VCOV_TYPE}\n", + " * n_endo: {n_endo}\n", + " * n_inst: {n_inst}" + )) + vhat <- matrix(NA_real_, nrow=ncol(kronv), ncol=ncol(kronv)) + } # DOF correction now n = nobs(x) - identical(VCOV_TYPE, "cluster") df_resid = degrees_freedom(x, "resid", stage = 1) diff --git a/tests/fixest_tests.R b/tests/fixest_tests.R index cd46b457..b02a3629 100644 --- a/tests/fixest_tests.R +++ b/tests/fixest_tests.R @@ -2966,11 +2966,15 @@ names(base) = c("y", "x1", "x_endo_1", "x_inst_1", "fe") set.seed(2) base$x_inst_2 = 0.2 * base$y + 0.2 * base$x_endo_1 + rnorm(150, sd = 0.5) base$x_endo_2 = 0.2 * base$y - 0.2 * base$x_inst_1 + rnorm(150, sd = 0.5) +base$cl_var = rep(1:50, 3) # using `fe` to cluster causes a vcov PSD warning # Checking a basic estimation est_iv = feols(y ~ x1 | x_endo_1 + x_endo_2 ~ x_inst_1 + x_inst_2, base) -fitstat(est_iv, ~ f + ivf + ivf2 + wald + ivwald + ivwald2 + wh + sargan + rmse + g + n + ll + sq.cor + r2) +fitstat(est_iv, ~ f + ivf + ivf2 + wald + ivwald + ivwald2 + wh + sargan + kpr + rmse + g + n + ll + sq.cor + r2) + +est_iv_uneven = feols(y ~ x1 | x_endo_1 ~ x_inst_1 + x_inst_2, base, cluster = "cl_var") +fitstat(est_iv_uneven, ~ f + ivf + ivf2 + wald + ivwald + ivwald2 + wh + sargan + kpr + rmse + g + n + ll + sq.cor + r2) est_fe = feols(y ~ x1 | fe, base) From 2a5cc606d624d62a9b71aad926e56511607b3119 Mon Sep 17 00:00:00 2001 From: karldw Date: Wed, 29 Oct 2025 19:53:07 -0700 Subject: [PATCH 2/3] Clarify KP warning to list supported cases --- R/fitstats.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/fitstats.R b/R/fitstats.R index 2d2a3eb4..129efa93 100644 --- a/R/fitstats.R +++ b/R/fitstats.R @@ -1710,11 +1710,11 @@ kp_stat = function(x){ meat = vcov(x_new, vcov = vcov, ssc = ssc, sandwich = FALSE) vhat = solve(K, t(solve(K, meat))) } else { - warning(stringmagic::string_magic( - "KP calculation not implemented for this case:\n", - " * vcov type: {VCOV_TYPE}\n", - " * n_endo: {n_endo}\n", - " * n_inst: {n_inst}" + warning(paste0( + "KP calculation is only implemented for the cases:\n", + " * vcov type is IID or\n", + " * number of endogenous variables == number of instruments", + "Results are NA for other cases." )) vhat <- matrix(NA_real_, nrow=ncol(kronv), ncol=ncol(kronv)) } From f7d91c6af124c598c9f4f36087d9314feed3bcbd Mon Sep 17 00:00:00 2001 From: karldw Date: Sun, 23 Nov 2025 18:56:11 -0800 Subject: [PATCH 3/3] In KP calc, return NA earlier --- R/fitstats.R | 33 +++++++++++++++++---------------- tests/fixest_tests.R | 1 + 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/R/fitstats.R b/R/fitstats.R index 129efa93..4a977f26 100644 --- a/R/fitstats.R +++ b/R/fitstats.R @@ -1624,6 +1624,18 @@ kp_stat = function(x){ if(!isTRUE(x$iv) || !x$iv_stage == 2) return(NA) + n_endo = length(x$iv_first_stage) + n_inst = x$iv_n_inst + VCOV_TYPE = attr(x$cov.scaled, "type") + if (!identical(VCOV_TYPE, "IID") && n_inst != n_endo) { + warning(paste0( + "KP calculation is only implemented for the cases:\n", + " * vcov type is IID or\n", + " * number of endogenous variables == number of instruments" + )) + return(NA) + } + # Necessary data X_proj = as.matrix(resid(summary(x, stage = 1))) @@ -1636,8 +1648,8 @@ kp_stat = function(x){ Z = model.matrix(x, type = "iv.inst") Z_proj = proj_on_U(x, Z) - k = n_endo = ncol(X_proj) - l = n_inst = ncol(Z) + k = n_endo + l = n_inst # We assume l >= k q = min(k, l) - 1 @@ -1690,8 +1702,6 @@ kp_stat = function(x){ # There is need to compute the vcov specifically for this case # We do it the same way as it was for x - VCOV_TYPE = attr(x$cov.scaled, "type") - if(identical(VCOV_TYPE, "IID")){ vlab = chol(tcrossprod(kronv) / nrow(X_proj)) @@ -1706,18 +1716,9 @@ kp_stat = function(x){ vcov = x$summary_flags$vcov ssc = x$summary_flags$ssc - if(ncol(x$scores) == ncol(x_new$scores)) { - meat = vcov(x_new, vcov = vcov, ssc = ssc, sandwich = FALSE) - vhat = solve(K, t(solve(K, meat))) - } else { - warning(paste0( - "KP calculation is only implemented for the cases:\n", - " * vcov type is IID or\n", - " * number of endogenous variables == number of instruments", - "Results are NA for other cases." - )) - vhat <- matrix(NA_real_, nrow=ncol(kronv), ncol=ncol(kronv)) - } + meat = vcov(x_new, vcov = vcov, ssc = ssc, sandwich = FALSE) + vhat = solve(K, t(solve(K, meat))) + # DOF correction now n = nobs(x) - identical(VCOV_TYPE, "cluster") df_resid = degrees_freedom(x, "resid", stage = 1) diff --git a/tests/fixest_tests.R b/tests/fixest_tests.R index b02a3629..3077fb76 100644 --- a/tests/fixest_tests.R +++ b/tests/fixest_tests.R @@ -2972,6 +2972,7 @@ base$cl_var = rep(1:50, 3) # using `fe` to cluster causes a vcov PSD warning est_iv = feols(y ~ x1 | x_endo_1 + x_endo_2 ~ x_inst_1 + x_inst_2, base) fitstat(est_iv, ~ f + ivf + ivf2 + wald + ivwald + ivwald2 + wh + sargan + kpr + rmse + g + n + ll + sq.cor + r2) +test(c(length(est_iv$iv_first_stage), est_iv$iv_n_inst), c(2, 2)) # kpr relies on accessing these est_iv_uneven = feols(y ~ x1 | x_endo_1 ~ x_inst_1 + x_inst_2, base, cluster = "cl_var") fitstat(est_iv_uneven, ~ f + ivf + ivf2 + wald + ivwald + ivwald2 + wh + sargan + kpr + rmse + g + n + ll + sq.cor + r2)