From 32c3ab307d434d36e2a04b655cc22fcbef8ff526 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 26 Nov 2025 22:46:06 +0000
Subject: [PATCH 01/20] Initial plan


From 092623be8942e2b4cb52f95958f3b31d54a995cc Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 26 Nov 2025 22:57:36 +0000
Subject: [PATCH 02/20] Implement ar_test and ar_confint functions for
 Anderson-Rubin inference

Co-authored-by: kv9898 <105025148+kv9898@users.noreply.github.com>
---
 NAMESPACE            |   4 +
 NEWS.md              |   2 +
 R/ar_test.R          | 554 +++++++++++++++++++++++++++++++++++++++++++
 tests/fixest_tests.R |  77 ++++++
 4 files changed, 637 insertions(+)
 create mode 100644 R/ar_test.R

diff --git a/NAMESPACE b/NAMESPACE
index 1111e2fa..1bc8b6d2 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -15,6 +15,8 @@ export(extralines_register)
 # misc funs
 export(collinearity)
 export(r2, fitstat, fitstat_register, wald)
+# AR test
+export(ar_test, ar_confint)
 export(fixest_startup_msg)
 export(check_conv_feols)
 export(demeaning_algo)
@@ -74,6 +76,8 @@ export(esttex, esttable)
 # Base methods
 S3method(print, fixest)
 S3method(print, fixest_fitstat)
+S3method(print, fixest_ar)
+S3method(print, fixest_ar_confint)
 S3method(print, vec_n_unik)
 S3method(print, list_n_unik)
 S3method(print, osize)
diff --git a/NEWS.md b/NEWS.md
index b8013cbd..33bc3ca2 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -3,6 +3,8 @@
 
 ## New features
 
+- Added Anderson-Rubin (AR) weak-IV robust test for `feols` IV models via `ar_test()`. This test is robust to weak instruments and can be used to test hypotheses about endogenous variable coefficients. The function `ar_confint()` is also provided to compute AR confidence intervals by inverting the test.
+
 - `etable`: arguments `extralines` and `headers` are more robust and the behavior is slightly modified. You can now position the values using integer indexes which give the columns position, inseadof column spans. It also errors more gracefully. This change is retro compatible. 
 
 - in multiple estimations in which at least one estimation contains only missing values: no error is thrown any more
diff --git a/R/ar_test.R b/R/ar_test.R
new file mode 100644
index 00000000..1a357c6d
--- /dev/null
+++ b/R/ar_test.R
@@ -0,0 +1,554 @@
+#----------------------------------------------#
+# Author: Laurent Berge (+ AI-assisted development)
+# Date creation: 2024
+# ~: Anderson-Rubin test for IV models
+#----------------------------------------------#
+
+
+
+#' Anderson-Rubin test for IV models
+#'
+#' @description
+#' Performs the Anderson-Rubin test for instrumental variable (IV) models. 
+#' This test is robust to weak instruments and can be used to test hypotheses 
+#' about the coefficients of endogenous variables.
+#'
+#' @param object A `fixest` object obtained from [`feols`] with an IV specification 
+#'   (i.e., formula includes `| endo ~ inst`).
+#' @param beta0 Numeric vector. The hypothesized values for the endogenous variable 
+#'   coefficients under the null hypothesis. If `NULL` (default), tests 
+#'   H0: beta = 0 for all endogenous variables. The length must match the number 
+#'   of endogenous variables in the model.
+#' @param vcov Versatile argument to specify the VCOV. If `NULL` (default), uses 
+#'   the same VCOV specification as the original model. See [`vcov.fixest`] for 
+#'   details on available options.
+#' @param ... Additional arguments passed to [`summary.fixest`].
+#'
+#' @details
+#' The Anderson-Rubin (AR) test is a weak-instrument robust test for the null 
+
+#' hypothesis H0: beta = beta0, where beta is the vector of coefficients on the 
+#' endogenous variables.
+#'
+#' The test proceeds as follows:
+#' \enumerate{
+#'   \item Construct the transformed outcome: y* = y - X * beta0, where X contains 
+#'     the endogenous variables.
+#'   \item Regress y* on the exogenous controls (W), instruments (Z), and any 
+#'     fixed effects, using the same VCOV specification as the original model.
+#'   \item Test the joint significance of the instrument coefficients (H0: pi = 0).
+#' }
+#'
+#' The AR test statistic follows an F-distribution under the null, regardless of 
+#' instrument strength. This makes it particularly useful when instruments may be 
+#' weak.
+#'
+#' @return
+#' An object of class `fixest_ar` containing:
+#' \item{beta0}{The hypothesized coefficient values tested.}
+#' \item{stat}{The AR test statistic (F-statistic).}
+#' \item{df1}{Numerator degrees of freedom (number of instruments).}
+#' \item{df2}{Denominator degrees of freedom.}
+#' \item{pvalue}{The p-value of the test.}
+#' \item{method}{Character string: "Anderson-Rubin test".}
+#' \item{vcov_type}{The type of VCOV used for the test.}
+#' \item{endo_names}{Names of the endogenous variables.}
+#' \item{inst_names}{Names of the instruments.}
+#' \item{model}{The original `feols` IV object.}
+#'
+#' @seealso 
+#' [`ar_confint`] for computing AR confidence intervals, [`wald`] for standard 
+#' Wald tests, [`feols`] for IV estimation.
+#'
+#' @references
+#' Anderson, T. W. and Rubin, H. (1949). "Estimation of the Parameters of a Single 
+#' Equation in a Complete System of Stochastic Equations." Annals of Mathematical 
+#' Statistics, 20(1), 46-63.
+#'
+#' @examples
+#' # Simple IV example
+#' set.seed(123)
+#' n <- 1000
+#' z <- rnorm(n)
+#' x <- 0.5 * z + rnorm(n)
+#' y <- 1 + 0.8 * x + rnorm(n)
+#' data <- data.frame(y = y, x = x, z = z)
+#'
+#' # IV estimation
+#' est <- feols(y ~ 1 | x ~ z, data = data)
+#'
+#' # Test H0: beta = 0
+#' ar_test(est)
+#'
+#' # Test H0: beta = 0.8 (close to true value, should not reject)
+#' ar_test(est, beta0 = 0.8)
+#'
+#' # Test H0: beta = 0 (far from true value, should reject)
+#' ar_test(est, beta0 = 0)
+#'
+#' # With clustered standard errors
+#' data$cl <- rep(1:100, each = 10)
+#' est_cl <- feols(y ~ 1 | x ~ z, data = data, vcov = ~cl)
+#' ar_test(est_cl)
+#'
+#' @export
+ar_test = function(object, beta0 = NULL, vcov = NULL, ...) {
+  
+  # Input validation
+  check_arg(object, "class(fixest)")
+  
+  # Check that object is an IV model
+  if (!isTRUE(object$iv)) {
+    stop("The `ar_test` function requires an IV model. ",
+         "The input object is not an IV estimation (no endogenous variables/instruments found). ",
+         "Please provide a `feols` object estimated with an IV formula ",
+         "(e.g., `feols(y ~ controls | endo ~ inst, data)`).")
+  }
+  
+  # Check that it's a second stage estimation
+  if (isTRUE(object$iv_stage == 1)) {
+    stop("The `ar_test` function requires a second-stage IV estimation. ",
+         "The input object appears to be a first-stage estimation. ",
+         "Please use the main IV estimation result, not `summary(est, stage = 1)`.")
+  }
+  
+  # Get endogenous variable names and instrument names
+  endo_names <- object$iv_endo_names
+  inst_names <- object$iv_inst_names_xpd  # expanded instrument names
+  n_endo <- length(endo_names)
+  n_inst <- length(inst_names)
+  
+  # Handle beta0
+  if (is.null(beta0)) {
+    beta0 <- rep(0, n_endo)
+  } else {
+    check_arg(beta0, "numeric vector no na")
+    if (length(beta0) == 1 && n_endo > 1) {
+      # If scalar beta0 provided but multiple endogenous vars, error
+      stop("Argument `beta0` has length 1 but there are ", n_endo, 
+           " endogenous variables (", paste(endo_names, collapse = ", "), "). ",
+           "Please provide a vector of length ", n_endo, ".")
+    }
+    if (length(beta0) != n_endo) {
+      stop("Argument `beta0` has length ", length(beta0), " but there are ", 
+           n_endo, " endogenous variables. ",
+           "Please provide a vector of length ", n_endo, ".")
+    }
+  }
+  
+  # Name beta0 for clarity
+  names(beta0) <- endo_names
+  
+  # Fetch the data
+  data <- fetch_data(object, "To apply `ar_test`, ")
+  
+  # Get the dependent variable
+  fml_linear <- formula(object, type = "linear")
+  y_vec <- eval(fml_linear[[2]], data)
+  
+  # Get the endogenous variable matrix
+  endo_mat <- model.matrix(object, data = data, type = "iv.endo", collin.rm = FALSE)
+  
+  # Compute transformed outcome: y* = y - X * beta0
+  y_tilde <- as.numeric(y_vec - endo_mat %*% beta0)
+  
+  # Store y_tilde in a temporary column
+  temp_y_name <- paste0("..ar_y_tilde_", sample.int(1e6, 1))
+  data[[temp_y_name]] <- y_tilde
+  
+  # Build the AR regression formula
+  # We need: y_tilde ~ controls + instruments | fixed_effects
+  
+  # Get the exogenous controls part (the linear formula RHS)
+  fml_linear_rhs <- object$fml_all$linear
+  if (length(fml_linear_rhs) == 3) {
+    controls_str <- deparse_long(fml_linear_rhs[[3]])
+  } else {
+    controls_str <- "1"
+  }
+  
+  # Get the instrument formula RHS
+  fml_iv <- object$fml_all$iv
+  inst_str <- deparse_long(fml_iv[[3]])
+  
+  # Get the fixed effects part if any
+  fml_fe <- object$fml_all$fixef
+  if (!is.null(fml_fe)) {
+    fe_str <- paste0(" | ", deparse_long(fml_fe[[2]]))
+  } else {
+    fe_str <- ""
+  }
+  
+  # Build the new formula: y_tilde ~ controls + instruments | FE
+  new_fml_str <- paste0(temp_y_name, " ~ ", controls_str, " + ", inst_str, fe_str)
+  new_fml <- as.formula(new_fml_str, env = parent.frame())
+  
+  # Get weights if present
+  weights_val <- NULL
+  if (!is.null(object$weights)) {
+    weights_val <- object$weights
+  }
+  
+  # Get vcov specification - use original if not specified
+  if (is.null(vcov)) {
+    # Try to get the vcov from the summarized object
+    if (!is.null(object$cov.scaled)) {
+      vcov <- attr(object$cov.scaled, "type")
+      if (is.null(vcov)) vcov <- "iid"
+    } else {
+      vcov <- "iid"
+    }
+  }
+  
+  # Run the AR regression
+  ar_fit <- feols(new_fml, data = data, weights = weights_val, vcov = vcov, ...)
+  
+  # Summarize to ensure we have the VCOV
+  ar_fit_sum <- summary(ar_fit, vcov = vcov, ...)
+  
+  # Get the coefficient names in the AR regression that correspond to instruments
+  ar_coef_names <- names(ar_fit_sum$coefficients)
+  
+  # Find which coefficients correspond to instruments
+  # The instrument names in the AR fit should match the expanded instrument names
+  inst_in_fit <- intersect(ar_coef_names, inst_names)
+  
+  if (length(inst_in_fit) == 0) {
+    # Instruments may have different names due to expansion
+    # Try matching without the "fit_" prefix or similar transformations
+    inst_in_fit <- ar_coef_names[ar_coef_names %in% inst_names]
+    
+    if (length(inst_in_fit) == 0) {
+      # Try a more flexible approach
+      # The instruments should be the new coefficients compared to a model without them
+      # For now, let's assume they're the last n_inst coefficients or use the iv formula vars
+      inst_formula_vars <- all.vars(fml_iv[[3]])
+      for (v in inst_formula_vars) {
+        matches <- grep(paste0("^", v, "$|^", v, "::"), ar_coef_names, value = TRUE)
+        inst_in_fit <- c(inst_in_fit, matches)
+      }
+      inst_in_fit <- unique(inst_in_fit)
+    }
+  }
+  
+  if (length(inst_in_fit) == 0) {
+    stop("Could not identify instrument coefficients in the AR regression. ",
+         "This may be a bug - please report it.")
+  }
+  
+  # Perform the Wald test on the instrument coefficients
+  wald_result <- wald(ar_fit_sum, keep = inst_in_fit, print = FALSE)
+  
+  # Build the result object
+  res <- list(
+    beta0 = beta0,
+    stat = wald_result$stat,
+    df1 = wald_result$df1,
+    df2 = wald_result$df2,
+    pvalue = wald_result$p,
+    method = "Anderson-Rubin test",
+    vcov_type = wald_result$vcov,
+    endo_names = endo_names,
+    inst_names = inst_names,
+    inst_tested = inst_in_fit,
+    model = object
+  )
+  
+  class(res) <- "fixest_ar"
+  
+  return(res)
+}
+
+
+#' @rdname ar_test
+#' @export
+print.fixest_ar = function(x, ...) {
+  
+  cat("Anderson-Rubin Test\n")
+  cat("-------------------\n")
+  
+  # Show the null hypothesis
+  endo_str <- paste(x$endo_names, collapse = ", ")
+  beta0_str <- paste(format(x$beta0, digits = 4), collapse = ", ")
+  
+  if (length(x$endo_names) == 1) {
+    cat("H0: ", x$endo_names, " = ", format(x$beta0, digits = 4), "\n", sep = "")
+  } else {
+    cat("H0: (", endo_str, ") = (", beta0_str, ")\n", sep = "")
+  }
+  
+  cat("\n")
+  
+  # Show test statistics
+  cat("Stat (F): ", format(x$stat, digits = 4), "\n", sep = "")
+  cat("DoF:      ", x$df1, " and ", x$df2, "\n", sep = "")
+  
+  # Format p-value
+  if (x$pvalue < 2.2e-16) {
+    cat("P-value:  < 2.2e-16 ***\n")
+  } else if (x$pvalue < 0.001) {
+    cat("P-value:  ", format(x$pvalue, digits = 4), " ***\n", sep = "")
+  } else if (x$pvalue < 0.01) {
+    cat("P-value:  ", format(x$pvalue, digits = 4), " **\n", sep = "")
+  } else if (x$pvalue < 0.05) {
+    cat("P-value:  ", format(x$pvalue, digits = 4), " *\n", sep = "")
+  } else if (x$pvalue < 0.1) {
+    cat("P-value:  ", format(x$pvalue, digits = 4), " .\n", sep = "")
+  } else {
+    cat("P-value:  ", format(x$pvalue, digits = 4), "\n", sep = "")
+  }
+  
+  cat("VCOV:     ", x$vcov_type, "\n", sep = "")
+  cat("---\n")
+  cat("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n")
+  
+  invisible(x)
+}
+
+
+#' Anderson-Rubin confidence interval for IV models
+#'
+#' @description
+#' Computes Anderson-Rubin confidence intervals by inverting the AR test.
+#' These confidence intervals are robust to weak instruments.
+#'
+#' @param object A `fixest` object obtained from [`feols`] with an IV specification.
+#' @param level Numeric scalar between 0 and 1. The confidence level. Default is 0.95.
+#' @param grid Numeric vector. The grid of values over which to compute the AR test. 
+#'   If `NULL` (default), a grid is automatically constructed around the point estimate.
+#' @param grid_range Numeric vector of length 2. If `grid` is `NULL`, the grid is 
+#'   constructed from `grid_range[1]` to `grid_range[2]`. If `NULL` (default), 
+#'   the range is set to the point estimate plus/minus 5 standard errors.
+#' @param n_grid Integer. The number of grid points to use if `grid` is `NULL`. 
+#'   Default is 100.
+#' @param vcov Versatile argument to specify the VCOV. See [`ar_test`] for details.
+#' @param ... Additional arguments passed to [`ar_test`].
+#'
+#' @details
+#' This function only supports models with a single endogenous variable. For models 
+#' with multiple endogenous variables, the confidence region is multi-dimensional 
+#' and cannot be easily represented.
+#'
+#' The AR confidence interval is constructed by inverting the AR test: it includes 
+#' all values of beta0 for which the AR test does not reject at the specified level.
+#'
+#' Note that AR confidence intervals may be:
+#' \itemize{
+#'   \item Empty (if the model is severely misspecified)
+#'   \item Unbounded (if instruments are very weak)
+#'   \item Disconnected (consisting of multiple disjoint intervals)
+#' }
+#'
+#' @return
+#' An object of class `fixest_ar_confint` containing:
+#' \item{level}{The confidence level used.}
+#' \item{grid}{The grid of values tested.}
+#' \item{pvalues}{The p-values at each grid point.}
+#' \item{accept}{The grid values that were not rejected.}
+#' \item{intervals}{A matrix with columns "lower" and "upper" giving the 
+#'   confidence interval(s). Each row is a separate interval (in case of 
+#'   disconnected regions).}
+#' \item{endo_name}{The name of the endogenous variable.}
+#' \item{point_est}{The 2SLS point estimate.}
+#' \item{model}{The original `feols` IV object.}
+#'
+#' @seealso 
+#' [`ar_test`] for the Anderson-Rubin test, [`confint.fixest`] for standard 
+#' confidence intervals.
+#'
+#' @examples
+#' # Simple IV example
+#' set.seed(123)
+#' n <- 500
+#' z <- rnorm(n)
+#' x <- 0.5 * z + rnorm(n)  # moderate instrument strength
+#' y <- 1 + 0.8 * x + rnorm(n)
+#' data <- data.frame(y = y, x = x, z = z)
+#'
+#' # IV estimation
+#' est <- feols(y ~ 1 | x ~ z, data = data)
+#'
+#' # AR confidence interval
+#' ar_ci <- ar_confint(est)
+#' print(ar_ci)
+#'
+#' # Compare with standard confidence interval
+#' confint(est)
+#'
+#' @export
+ar_confint = function(object, level = 0.95, grid = NULL, grid_range = NULL, 
+                      n_grid = 100, vcov = NULL, ...) {
+  
+  # Input validation
+  check_arg(object, "class(fixest)")
+  check_arg(level, "numeric scalar GT{0} LT{1}")
+  check_arg(n_grid, "integer scalar GE{10}")
+  
+  # Check that object is an IV model
+  if (!isTRUE(object$iv)) {
+    stop("The `ar_confint` function requires an IV model. ",
+         "Please provide a `feols` object estimated with an IV formula.")
+  }
+  
+  # Currently only support single endogenous variable
+  endo_names <- object$iv_endo_names
+  if (length(endo_names) > 1) {
+    stop("The `ar_confint` function currently only supports models with a single ",
+         "endogenous variable. This model has ", length(endo_names), 
+         " endogenous variables: ", paste(endo_names, collapse = ", "), ".")
+  }
+  
+  endo_name <- endo_names[1]
+  
+  # Get the point estimate and SE
+  # Need to handle the case where the coefficient name might have "fit_" prefix
+  coef_names <- names(object$coefficients)
+  endo_coef_name <- paste0("fit_", endo_name)
+  if (!endo_coef_name %in% coef_names) {
+    endo_coef_name <- endo_name
+  }
+  
+  if (!endo_coef_name %in% coef_names) {
+    stop("Could not find the coefficient for the endogenous variable '", 
+         endo_name, "' in the model.")
+  }
+  
+  point_est <- object$coefficients[endo_coef_name]
+  
+  # Get SE for grid construction
+  obj_sum <- summary(object, vcov = vcov, ...)
+  se_est <- se(obj_sum)[endo_coef_name]
+  
+  # Construct grid if not provided
+  if (is.null(grid)) {
+    if (is.null(grid_range)) {
+      # Default: point estimate +/- 5 SEs
+      grid_range <- c(point_est - 5 * se_est, point_est + 5 * se_est)
+    }
+    check_arg(grid_range, "numeric vector len(2) no na")
+    grid <- seq(grid_range[1], grid_range[2], length.out = n_grid)
+  } else {
+    check_arg(grid, "numeric vector no na")
+  }
+  
+  # Compute AR test for each grid point
+  pvalues <- numeric(length(grid))
+  for (i in seq_along(grid)) {
+    ar_res <- ar_test(object, beta0 = grid[i], vcov = vcov, ...)
+    pvalues[i] <- ar_res$pvalue
+  }
+  
+  # Identify accepted values (where p-value > 1 - level, i.e., not rejected)
+  alpha <- 1 - level
+  accept <- grid[pvalues > alpha]
+  
+  # Find contiguous intervals
+  if (length(accept) == 0) {
+    # Empty confidence set
+    intervals <- matrix(NA_real_, nrow = 0, ncol = 2)
+    colnames(intervals) <- c("lower", "upper")
+  } else {
+    # Find breaks in the accepted values
+    # Two consecutive grid values are in different intervals if there's a 
+    # rejected value between them
+    accept_idx <- which(pvalues > alpha)
+    breaks <- which(diff(accept_idx) > 1)
+    
+    # Build intervals
+    n_intervals <- length(breaks) + 1
+    intervals <- matrix(NA_real_, nrow = n_intervals, ncol = 2)
+    colnames(intervals) <- c("lower", "upper")
+    
+    start_idx <- 1
+    for (i in seq_len(n_intervals)) {
+      if (i <= length(breaks)) {
+        end_idx <- breaks[i]
+      } else {
+        end_idx <- length(accept_idx)
+      }
+      
+      interval_accept_idx <- accept_idx[start_idx:end_idx]
+      intervals[i, "lower"] <- grid[min(interval_accept_idx)]
+      intervals[i, "upper"] <- grid[max(interval_accept_idx)]
+      
+      start_idx <- end_idx + 1
+    }
+    
+    # Check if intervals extend to grid boundaries (possibly unbounded)
+    if (intervals[1, "lower"] == min(grid)) {
+      intervals[1, "lower"] <- -Inf
+    }
+    if (intervals[n_intervals, "upper"] == max(grid)) {
+      intervals[n_intervals, "upper"] <- Inf
+    }
+  }
+  
+  res <- list(
+    level = level,
+    grid = grid,
+    pvalues = pvalues,
+    accept = accept,
+    intervals = intervals,
+    endo_name = endo_name,
+    point_est = unname(point_est),
+    model = object
+  )
+  
+  class(res) <- "fixest_ar_confint"
+  
+  return(res)
+}
+
+
+#' @rdname ar_confint
+#' @export
+print.fixest_ar_confint = function(x, ...) {
+  
+  cat("Anderson-Rubin Confidence Interval\n")
+  cat("-----------------------------------\n")
+  cat("Variable: ", x$endo_name, "\n", sep = "")
+  cat("Level:    ", format(x$level * 100, digits = 2), "%\n", sep = "")
+  cat("\n")
+  
+  cat("2SLS point estimate: ", format(x$point_est, digits = 4), "\n", sep = "")
+  cat("\n")
+  
+  if (nrow(x$intervals) == 0) {
+    cat("AR Confidence Set: EMPTY\n")
+    cat("  (This may indicate model misspecification)\n")
+  } else if (nrow(x$intervals) == 1) {
+    lower <- x$intervals[1, "lower"]
+    upper <- x$intervals[1, "upper"]
+    
+    if (is.infinite(lower) && is.infinite(upper)) {
+      cat("AR Confidence Set: (-Inf, Inf)\n")
+      cat("  (Instruments may be very weak)\n")
+    } else if (is.infinite(lower)) {
+      cat("AR Confidence Set: (-Inf, ", format(upper, digits = 4), "]\n", sep = "")
+      cat("  (Lower bound may be unbounded - consider expanding the grid)\n")
+    } else if (is.infinite(upper)) {
+      cat("AR Confidence Set: [", format(lower, digits = 4), ", Inf)\n", sep = "")
+      cat("  (Upper bound may be unbounded - consider expanding the grid)\n")
+    } else {
+      cat("AR Confidence Set: [", format(lower, digits = 4), ", ", 
+          format(upper, digits = 4), "]\n", sep = "")
+    }
+  } else {
+    cat("AR Confidence Set: (disconnected)\n")
+    for (i in seq_len(nrow(x$intervals))) {
+      lower <- x$intervals[i, "lower"]
+      upper <- x$intervals[i, "upper"]
+      
+      lower_bracket <- if (is.infinite(lower)) "(" else "["
+      upper_bracket <- if (is.infinite(upper)) ")" else "]"
+      lower_str <- if (is.infinite(lower)) "-Inf" else format(lower, digits = 4)
+      upper_str <- if (is.infinite(upper)) "Inf" else format(upper, digits = 4)
+      
+      cat("  Interval ", i, ": ", lower_bracket, lower_str, ", ", 
+          upper_str, upper_bracket, "\n", sep = "")
+    }
+    cat("  (Multiple intervals may indicate misspecification or non-convex confidence region)\n")
+  }
+  
+  invisible(x)
+}
diff --git a/tests/fixest_tests.R b/tests/fixest_tests.R
index 6549ed1e..390d984c 100644
--- a/tests/fixest_tests.R
+++ b/tests/fixest_tests.R
@@ -3167,6 +3167,83 @@ test(dim(fixest_data(est_mult)), dim(base))
 test(nrow(fixest_data(est_mult, "esti")), 45)
 
 
+####
+#### Anderson-Rubin test ####
+####
+
+chunk("AR test")
+
+# Simple IV example with known DGP
+set.seed(42)
+n = 500
+z = rnorm(n)
+x = 0.5 * z + rnorm(n)  # moderate instrument strength
+true_beta = 0.8
+y = 1 + true_beta * x + rnorm(n)
+data_ar = data.frame(y = y, x = x, z = z, cl = rep(1:50, each = 10))
+
+# Basic IV estimation
+est_iv_ar = feols(y ~ 1 | x ~ z, data = data_ar)
+
+# Test 1: AR test should run without error
+ar_res = ar_test(est_iv_ar)
+test("fixest_ar" %in% class(ar_res), TRUE)
+
+# Test 2: AR test with beta0 = 0 (null should be rejected since true_beta = 0.8)
+ar_res_null = ar_test(est_iv_ar, beta0 = 0)
+test(ar_res_null$beta0, 0)
+test(ar_res_null$pvalue < 0.05, TRUE)  # should reject
+
+# Test 3: AR test with beta0 close to true value (should not reject)
+ar_res_true = ar_test(est_iv_ar, beta0 = true_beta)
+test(ar_res_true$pvalue > 0.05, TRUE)  # should not reject
+
+# Test 4: AR test returns correct structure
+test(is.numeric(ar_res$stat), TRUE)
+test(is.numeric(ar_res$pvalue), TRUE)
+test(is.numeric(ar_res$df1), TRUE)
+test(is.numeric(ar_res$df2), TRUE)
+test(ar_res$method, "Anderson-Rubin test")
+
+# Test 5: AR test with clustered SEs
+est_iv_cl = feols(y ~ 1 | x ~ z, data = data_ar, vcov = ~cl)
+ar_res_cl = ar_test(est_iv_cl)
+test("fixest_ar" %in% class(ar_res_cl), TRUE)
+
+# Test 6: AR test should error on non-IV model
+est_ols = feols(y ~ x, data = data_ar)
+test(ar_test(est_ols), "err")
+
+# Test 7: ar_confint should work for single endogenous variable
+ar_ci = ar_confint(est_iv_ar, grid_range = c(0, 2), n_grid = 20)
+test("fixest_ar_confint" %in% class(ar_ci), TRUE)
+test(ar_ci$point_est > 0, TRUE)  # point estimate should be positive
+test(nrow(ar_ci$intervals) >= 1, TRUE)  # should have at least one interval
+
+# Test 8: IV with controls and fixed effects
+data_ar$w = rnorm(n)  # exogenous control
+data_ar$fe = rep(1:5, each = 100)  # fixed effect
+est_iv_fe = feols(y ~ w | fe | x ~ z, data = data_ar)
+ar_res_fe = ar_test(est_iv_fe)
+test("fixest_ar" %in% class(ar_res_fe), TRUE)
+
+# Test 9: Multiple instruments  
+data_ar$z2 = 0.3 * x + rnorm(n, sd = 0.8)
+est_iv_2inst = feols(y ~ 1 | x ~ z + z2, data = data_ar)
+ar_res_2inst = ar_test(est_iv_2inst)
+test("fixest_ar" %in% class(ar_res_2inst), TRUE)
+
+# Test 10: ar_confint errors with multiple endogenous vars
+data_ar$x2 = 0.3 * z + rnorm(n)
+est_iv_2endo = feols(y ~ 1 | x + x2 ~ z + z2, data = data_ar)
+test(ar_confint(est_iv_2endo), "err")  # should error with multiple endo vars
+
+# Test 11: Verify print methods work without error
+print(ar_res)
+print(ar_ci)
+
+
+
 
 
From cc7c1ac8cd5880fb8cd398cb9f56b1c1fdf3f9ba Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 26 Nov 2025 23:01:22 +0000
Subject: [PATCH 03/20] Fix code review comments: simplify validation, robust
 temp names, clean interval logic

Co-authored-by: kv9898 <105025148+kv9898@users.noreply.github.com>
---
 R/ar_test.R | 43 ++++++++++++++++++-------------------------
 1 file changed, 18 insertions(+), 25 deletions(-)

diff --git a/R/ar_test.R b/R/ar_test.R
index 1a357c6d..10da6bb8 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -26,7 +26,6 @@
 #'
 #' @details
 #' The Anderson-Rubin (AR) test is a weak-instrument robust test for the null 
-
 #' hypothesis H0: beta = beta0, where beta is the vector of coefficients on the 
 #' endogenous variables.
 #'
@@ -123,15 +122,9 @@ ar_test = function(object, beta0 = NULL, vcov = NULL, ...) {
     beta0 <- rep(0, n_endo)
   } else {
     check_arg(beta0, "numeric vector no na")
-    if (length(beta0) == 1 && n_endo > 1) {
-      # If scalar beta0 provided but multiple endogenous vars, error
-      stop("Argument `beta0` has length 1 but there are ", n_endo, 
-           " endogenous variables (", paste(endo_names, collapse = ", "), "). ",
-           "Please provide a vector of length ", n_endo, ".")
-    }
     if (length(beta0) != n_endo) {
       stop("Argument `beta0` has length ", length(beta0), " but there are ", 
-           n_endo, " endogenous variables. ",
+           n_endo, " endogenous variables (", paste(endo_names, collapse = ", "), "). ",
            "Please provide a vector of length ", n_endo, ".")
     }
   }
@@ -152,8 +145,12 @@ ar_test = function(object, beta0 = NULL, vcov = NULL, ...) {
   # Compute transformed outcome: y* = y - X * beta0
   y_tilde <- as.numeric(y_vec - endo_mat %*% beta0)
   
-  # Store y_tilde in a temporary column
-  temp_y_name <- paste0("..ar_y_tilde_", sample.int(1e6, 1))
+  # Store y_tilde in a temporary column with unique name
+  temp_y_name <- "..ar_y_tilde"
+  # Ensure uniqueness if column already exists
+  while (temp_y_name %in% names(data)) {
+    temp_y_name <- paste0("..ar_y_tilde_", sample.int(1e6, 1))
+  }
   data[[temp_y_name]] <- y_tilde
   
   # Build the AR regression formula
@@ -181,7 +178,7 @@ ar_test = function(object, beta0 = NULL, vcov = NULL, ...) {
   
   # Build the new formula: y_tilde ~ controls + instruments | FE
   new_fml_str <- paste0(temp_y_name, " ~ ", controls_str, " + ", inst_str, fe_str)
-  new_fml <- as.formula(new_fml_str, env = parent.frame())
+  new_fml <- as.formula(new_fml_str)
   
   # Get weights if present
   weights_val <- NULL
@@ -214,21 +211,14 @@ ar_test = function(object, beta0 = NULL, vcov = NULL, ...) {
   inst_in_fit <- intersect(ar_coef_names, inst_names)
   
   if (length(inst_in_fit) == 0) {
-    # Instruments may have different names due to expansion
-    # Try matching without the "fit_" prefix or similar transformations
-    inst_in_fit <- ar_coef_names[ar_coef_names %in% inst_names]
-    
-    if (length(inst_in_fit) == 0) {
-      # Try a more flexible approach
-      # The instruments should be the new coefficients compared to a model without them
-      # For now, let's assume they're the last n_inst coefficients or use the iv formula vars
-      inst_formula_vars <- all.vars(fml_iv[[3]])
-      for (v in inst_formula_vars) {
-        matches <- grep(paste0("^", v, "$|^", v, "::"), ar_coef_names, value = TRUE)
-        inst_in_fit <- c(inst_in_fit, matches)
-      }
-      inst_in_fit <- unique(inst_in_fit)
+    # Try a more flexible approach using the instrument formula variables
+    # This handles cases where instruments are expanded (e.g., factor variables)
+    inst_formula_vars <- all.vars(fml_iv[[3]])
+    for (v in inst_formula_vars) {
+      matches <- grep(paste0("^", v, "$|^", v, "::"), ar_coef_names, value = TRUE)
+      inst_in_fit <- c(inst_in_fit, matches)
     }
+    inst_in_fit <- unique(inst_in_fit)
   }
   
   if (length(inst_in_fit) == 0) {
@@ -475,9 +465,12 @@ ar_confint = function(object, level = 0.95, grid = NULL, grid_range = NULL,
     }
     
     # Check if intervals extend to grid boundaries (possibly unbounded)
+    # Only check the boundaries of the extreme intervals
+    # First interval's lower bound: check if it touches the grid minimum
     if (intervals[1, "lower"] == min(grid)) {
       intervals[1, "lower"] <- -Inf
     }
+    # Last interval's upper bound: check if it touches the grid maximum
     if (intervals[n_intervals, "upper"] == max(grid)) {
       intervals[n_intervals, "upper"] <- Inf
     }

From 9a347983378a6ad756f87cecbb53d2357ce6e355 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 26 Nov 2025 23:03:29 +0000
Subject: [PATCH 04/20] Improve robustness: limit temp name attempts, escape
 regex special chars

Co-authored-by: kv9898 <105025148+kv9898@users.noreply.github.com>
---
 R/ar_test.R | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/R/ar_test.R b/R/ar_test.R
index 10da6bb8..02edf5f8 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -147,9 +147,15 @@ ar_test = function(object, beta0 = NULL, vcov = NULL, ...) {
   
   # Store y_tilde in a temporary column with unique name
   temp_y_name <- "..ar_y_tilde"
-  # Ensure uniqueness if column already exists
-  while (temp_y_name %in% names(data)) {
+  # Ensure uniqueness if column already exists (with a maximum of 100 attempts)
+  attempt <- 0
+  while (temp_y_name %in% names(data) && attempt < 100) {
     temp_y_name <- paste0("..ar_y_tilde_", sample.int(1e6, 1))
+    attempt <- attempt + 1
+  }
+  if (temp_y_name %in% names(data)) {
+    stop("Could not create a unique temporary column name in the data. ",
+         "This is unexpected - please report this issue.")
   }
   data[[temp_y_name]] <- y_tilde
   
@@ -215,7 +221,10 @@ ar_test = function(object, beta0 = NULL, vcov = NULL, ...) {
     # This handles cases where instruments are expanded (e.g., factor variables)
     inst_formula_vars <- all.vars(fml_iv[[3]])
     for (v in inst_formula_vars) {
-      matches <- grep(paste0("^", v, "$|^", v, "::"), ar_coef_names, value = TRUE)
+      # Escape special regex characters in variable name
+      v_escaped <- gsub("([\\^$*+?{}\\[\\]()|.])", "\\\\\\1", v)
+      pattern <- paste0("^", v_escaped, "($|::)")
+      matches <- grep(pattern, ar_coef_names, value = TRUE)
       inst_in_fit <- c(inst_in_fit, matches)
     }
     inst_in_fit <- unique(inst_in_fit)

From 3621eb448c56863691815a466b176d632fdfb2d6 Mon Sep 17 00:00:00 2001
From: Dianyi Yang <dianyi.yang@politics.ox.ac.uk>
Date: Wed, 26 Nov 2025 23:11:44 +0000
Subject: [PATCH 05/20] Change ar_test header

---
 R/ar_test.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/ar_test.R b/R/ar_test.R
index 02edf5f8..70c9416f 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -1,6 +1,6 @@
 #----------------------------------------------#
-# Author: Laurent Berge (+ AI-assisted development)
-# Date creation: 2024
+# Author: Dianyi Yang (+ AI)
+# Date creation: Wed Nov 26 23:11:00 2025
 # ~: Anderson-Rubin test for IV models
 #----------------------------------------------#
 

From c0f529645b0eb56ff8f3a4e3d37eb125ac4e488d Mon Sep 17 00:00:00 2001
From: Dianyi Yang <dianyi.yang@politics.ox.ac.uk>
Date: Wed, 26 Nov 2025 23:36:41 +0000
Subject: [PATCH 06/20] pass correct vcov flag

---
 R/ar_test.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/ar_test.R b/R/ar_test.R
index 70c9416f..b63e282d 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -195,8 +195,8 @@ ar_test = function(object, beta0 = NULL, vcov = NULL, ...) {
   # Get vcov specification - use original if not specified
   if (is.null(vcov)) {
     # Try to get the vcov from the summarized object
-    if (!is.null(object$cov.scaled)) {
-      vcov <- attr(object$cov.scaled, "type")
+    if (!is.null(object$summary_flags$vcov)) {
+      vcov <- object$summary_flags$vcov
       if (is.null(vcov)) vcov <- "iid"
     } else {
       vcov <- "iid"

From 1235f7e215d33d2baf1bc3df5db51036d1087905 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 27 Nov 2025 22:03:43 +0000
Subject: [PATCH 07/20] Refactor AR test: add internal core, iid/non-iid
 distributions, exact/numeric CI, unified API

Co-authored-by: kv9898 <105025148+kv9898@users.noreply.github.com>
---
 R/ar_test.R          | 1015 ++++++++++++++++++++++++++++++------------
 tests/fixest_tests.R |   44 +-
 2 files changed, 770 insertions(+), 289 deletions(-)

diff --git a/R/ar_test.R b/R/ar_test.R
index b63e282d..6aedd067 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -5,6 +5,561 @@
 #----------------------------------------------#
 
 
+# =============================================================================
+# INTERNAL CORE FUNCTION
+# =============================================================================
+
+#' Internal core function for Anderson-Rubin test
+#'
+#' @description
+#' This is an internal function that computes the AR test statistic and p-value.
+#' It is called by both ar_test() and ar_confint().
+#'
+#' @param object A `fixest` object obtained from [`feols`] with an IV specification.
+#' @param beta0 Numeric vector. The hypothesized values for the endogenous variable 
+#'   coefficients under the null hypothesis.
+#' @param vcov Versatile argument to specify the VCOV.
+#' @param ... Additional arguments passed to [`feols`].
+#'
+#' @return A list containing:
+#' \item{stat}{The test statistic (F or Chi-squared depending on vcov).}
+#' \item{df1}{Numerator degrees of freedom.}
+#' \item{df2}{Denominator degrees of freedom (NA for Chi-squared).}
+#' \item{pvalue}{The p-value.}
+#' \item{dist}{Distribution used: "F" or "Chi-squared".}
+#' \item{vcov_type}{The VCOV type used.}
+#' \item{endo_names}{Names of endogenous variables.}
+#' \item{inst_names}{Names of instruments.}
+#' \item{inst_tested}{Instrument coefficients actually tested.}
+#'
+#' @keywords internal
+.ar_test_core = function(object, beta0, vcov = NULL, ...) {
+  
+  # Get endogenous variable names and instrument names
+  endo_names <- object$iv_endo_names
+  inst_names <- object$iv_inst_names_xpd  # expanded instrument names
+  n_endo <- length(endo_names)
+  
+  # Name beta0 for clarity
+  names(beta0) <- endo_names
+  
+  # Fetch the data
+  data <- fetch_data(object, "To apply AR test, ")
+  
+  # Get the dependent variable
+  fml_linear <- formula(object, type = "linear")
+  y_vec <- eval(fml_linear[[2]], data)
+  
+  # Get the endogenous variable matrix
+  endo_mat <- model.matrix(object, data = data, type = "iv.endo", collin.rm = FALSE)
+  
+  # Compute transformed outcome: y* = y - X * beta0
+  y_tilde <- as.numeric(y_vec - endo_mat %*% beta0)
+  
+  # Store y_tilde in a temporary column with unique name
+  temp_y_name <- "..ar_y_tilde"
+  # Ensure uniqueness if column already exists (with a maximum of 100 attempts)
+  attempt <- 0
+  while (temp_y_name %in% names(data) && attempt < 100) {
+    temp_y_name <- paste0("..ar_y_tilde_", sample.int(1e6, 1))
+    attempt <- attempt + 1
+  }
+  if (temp_y_name %in% names(data)) {
+    stop("Could not create a unique temporary column name in the data. ",
+         "This is unexpected - please report this issue.")
+  }
+  data[[temp_y_name]] <- y_tilde
+  
+  # Build the AR regression formula
+  # We need: y_tilde ~ controls + instruments | fixed_effects
+  
+  # Get the exogenous controls part (the linear formula RHS)
+  fml_linear_rhs <- object$fml_all$linear
+  if (length(fml_linear_rhs) == 3) {
+    controls_str <- deparse_long(fml_linear_rhs[[3]])
+  } else {
+    controls_str <- "1"
+  }
+  
+  # Get the instrument formula RHS
+  fml_iv <- object$fml_all$iv
+  inst_str <- deparse_long(fml_iv[[3]])
+  
+  # Get the fixed effects part if any
+  fml_fe <- object$fml_all$fixef
+  if (!is.null(fml_fe)) {
+    fe_str <- paste0(" | ", deparse_long(fml_fe[[2]]))
+  } else {
+    fe_str <- ""
+  }
+  
+  # Build the new formula: y_tilde ~ controls + instruments | FE
+  new_fml_str <- paste0(temp_y_name, " ~ ", controls_str, " + ", inst_str, fe_str)
+  new_fml <- as.formula(new_fml_str)
+  
+  # Get weights if present
+  weights_val <- NULL
+  if (!is.null(object$weights)) {
+    weights_val <- object$weights
+  }
+  
+  # Determine vcov specification
+  # If not specified, try to get from original object
+  if (is.null(vcov)) {
+    if (!is.null(object$cov.scaled)) {
+      vcov <- attr(object$cov.scaled, "type")
+    }
+    if (is.null(vcov)) {
+      vcov <- "iid"
+    }
+  }
+  
+  # Run the AR regression - inherit settings from original object where possible
+  ar_fit <- feols(new_fml, data = data, weights = weights_val, vcov = vcov, 
+                  notes = FALSE, ...)
+  
+  # Summarize to ensure we have the VCOV
+  ar_fit_sum <- summary(ar_fit, vcov = vcov, ...)
+  
+  # Get the coefficient names in the AR regression that correspond to instruments
+  ar_coef_names <- names(ar_fit_sum$coefficients)
+  
+  # Find which coefficients correspond to instruments
+  inst_in_fit <- intersect(ar_coef_names, inst_names)
+  
+  if (length(inst_in_fit) == 0) {
+    # Try a more flexible approach using the instrument formula variables
+    inst_formula_vars <- all.vars(fml_iv[[3]])
+    for (v in inst_formula_vars) {
+      v_escaped <- gsub("([\\^$*+?{}\\[\\]()|.])", "\\\\\\1", v)
+      pattern <- paste0("^", v_escaped, "($|::)")
+      matches <- grep(pattern, ar_coef_names, value = TRUE)
+      inst_in_fit <- c(inst_in_fit, matches)
+    }
+    inst_in_fit <- unique(inst_in_fit)
+  }
+  
+  if (length(inst_in_fit) == 0) {
+    stop("Could not identify instrument coefficients in the AR regression. ",
+         "This may be a bug - please report it.")
+  }
+  
+  # Perform the Wald test on the instrument coefficients
+  wald_result <- wald(ar_fit_sum, keep = inst_in_fit, print = FALSE)
+  
+  # Determine if vcov is iid or not for choosing distribution
+  vcov_type <- wald_result$vcov
+  is_iid <- identical(vcov_type, "IID") || identical(vcov_type, "iid") || 
+            identical(tolower(as.character(vcov_type)), "iid")
+  
+  # Compute statistic and p-value based on vcov type
+  df1 <- wald_result$df1
+  df2 <- wald_result$df2
+  
+  if (is_iid) {
+    # For iid: use F-distribution (exact under homoskedastic errors)
+    stat <- wald_result$stat
+    pvalue <- pf(stat, df1, df2, lower.tail = FALSE)
+    dist <- "F"
+  } else {
+    # For non-iid: use Chi-squared (asymptotic)
+    # Convert F-statistic to Chi-squared: chi2 = F * df1
+    stat <- wald_result$stat * df1
+    pvalue <- pchisq(stat, df = df1, lower.tail = FALSE)
+    dist <- "Chi-squared"
+    df2 <- NA_real_
+  }
+  
+  # Return results
+  list(
+    stat = stat,
+    df1 = df1,
+    df2 = df2,
+    pvalue = pvalue,
+    dist = dist,
+    vcov_type = vcov_type,
+    endo_names = endo_names,
+    inst_names = inst_names,
+    inst_tested = inst_in_fit,
+    is_iid = is_iid
+  )
+}
+
+
+# =============================================================================
+# EXACT AR CI FOR IID (SINGLE ENDOGENOUS VARIABLE)
+# =============================================================================
+
+#' Compute exact AR confidence interval for iid errors (single endogenous variable)
+#'
+#' @description
+#' Uses the closed-form quadratic formula for AR confidence sets when:
+#' - There is exactly one endogenous variable
+#' - The vcov is iid (homoskedastic errors)
+#'
+#' @param object A `fixest` object with IV and single endogenous variable.
+#' @param level Confidence level (e.g., 0.95).
+#'
+#' @return A list with:
+#' \item{method}{"AR"}
+#' \item{exact}{TRUE}
+#' \item{level}{The confidence level}
+#' \item{intervals}{Matrix with columns "lower" and "upper"}
+#'
+#' @keywords internal
+.ar_ci_exact_iid = function(object, level = 0.95) {
+  
+  # Fetch data
+  data <- fetch_data(object, "To compute AR CI, ")
+  
+  # Get y, endogenous variable, and instruments
+  fml_linear <- formula(object, type = "linear")
+  y <- eval(fml_linear[[2]], data)
+  
+  endo_mat <- model.matrix(object, data = data, type = "iv.endo", collin.rm = FALSE)
+  inst_mat <- model.matrix(object, data = data, type = "iv.inst", collin.rm = FALSE)
+  
+  # Get exogenous controls (if any)
+  exo_mat <- model.matrix(object, data = data, type = "iv.exo", collin.rm = FALSE)
+  
+  n <- length(y)
+  L <- ncol(inst_mat)  # number of instruments
+  
+  # Check for fixed effects - if present, we need to demean
+  has_fe <- !is.null(object$fixef_vars)
+  
+  if (has_fe || !is.null(exo_mat)) {
+    # We need to partial out controls and/or fixed effects
+    # Use demean if FE present, otherwise just residualize
+    
+    if (has_fe) {
+      # Demean all variables with respect to fixed effects
+      dm_data <- demean(object)
+      
+      # Extract demeaned variables - the order is: y, then X (controls), then IV parts
+      # We need to reconstruct the demeaned versions
+      # For simplicity, re-run the demeaning manually
+      
+      # Create a combined matrix of all variables to demean
+      all_vars <- cbind(y, endo_mat, inst_mat)
+      if (!is.null(exo_mat) && ncol(exo_mat) > 0) {
+        all_vars <- cbind(all_vars, exo_mat)
+      }
+      
+      # Use fixest's demeaning
+      fixef_id <- object$fixef_id
+      slope_flag <- object$slope_flag
+      slope_vars <- NULL
+      if (!is.null(object$slope_variables_reordered)) {
+        slope_vars <- object$slope_variables_reordered
+      }
+      
+      # Demean using the internal algorithm
+      all_vars_dm <- cpp_demean(all_vars, 
+                                fixef_id, 
+                                weights = object$weights,
+                                iterMax = object$fixef.iter,
+                                diffMax = object$fixef.tol,
+                                nthreads = 1L,
+                                fixef.algo = demeaning_algo(internal = TRUE),
+                                slope.flag = if(!is.null(slope_flag)) slope_flag else integer(0),
+                                slope.vars = if(!is.null(slope_vars)) slope_vars else matrix(0, 0, 0),
+                                notes = FALSE)
+      
+      y_star <- all_vars_dm[, 1]
+      d_star <- all_vars_dm[, 2]
+      z_star <- all_vars_dm[, 2 + seq_len(L)]
+      
+      # If there are exogenous controls, also partial them out from the residuals
+      if (!is.null(exo_mat) && ncol(exo_mat) > 0) {
+        exo_dm <- all_vars_dm[, (2 + L + 1):ncol(all_vars_dm), drop = FALSE]
+        # Further residualize y_star, d_star, z_star with respect to exo_dm
+        # Using QR decomposition
+        qr_exo <- qr(exo_dm)
+        y_star <- qr.resid(qr_exo, y_star)
+        d_star <- qr.resid(qr_exo, d_star)
+        z_star <- qr.resid(qr_exo, z_star)
+      }
+      
+      # Adjust degrees of freedom for FE
+      k <- sum(object$fixef_sizes - 1) + 1  # number of FE parameters absorbed
+      if (!is.null(exo_mat)) k <- k + ncol(exo_mat)
+      
+    } else {
+      # No FE, just partial out exogenous controls
+      qr_exo <- qr(cbind(1, exo_mat))  # include intercept
+      y_star <- qr.resid(qr_exo, y)
+      d_star <- qr.resid(qr_exo, endo_mat[, 1])
+      z_star <- qr.resid(qr_exo, inst_mat)
+      k <- ncol(exo_mat) + 1  # controls + intercept
+    }
+    
+  } else {
+    # No FE, no controls - just demean for intercept
+    y_star <- y - mean(y)
+    d_star <- endo_mat[, 1] - mean(endo_mat[, 1])
+    z_star <- sweep(inst_mat, 2, colMeans(inst_mat))
+    k <- 1  # just intercept
+  }
+  
+  # Compute the quadratic coefficients
+  # S_DD = sum(D*^2), S_YY = sum(Y*^2), S_DY = sum(D* * Y*)
+  S_DD <- sum(d_star^2)
+  S_YY <- sum(y_star^2)
+  S_DY <- sum(d_star * y_star)
+  
+  # P_Y = projection of Y* onto Z*, P_D = projection of D* onto Z*
+  qr_z <- qr(z_star)
+  P_Y <- qr.fitted(qr_z, y_star)
+  P_D <- qr.fitted(qr_z, d_star)
+  
+  S_PDD <- sum(P_D^2)
+  S_PYY <- sum(P_Y^2)
+  S_PDY <- sum(P_D * P_Y)
+  
+  # Critical value
+  alpha <- 1 - level
+  Fcrit <- qf(1 - alpha, df1 = L, df2 = n - k - L)
+  c_val <- Fcrit * L / (n - k - L)
+  
+  # Quadratic coefficients for: A * beta0^2 + B * beta0 + C >= 0
+  A <- c_val * S_DD - (c_val + 1) * S_PDD
+  B <- -2 * c_val * S_DY + 2 * (c_val + 1) * S_PDY
+  C <- c_val * S_YY - (c_val + 1) * S_PYY
+  
+  # Solve the quadratic inequality
+  discriminant <- B^2 - 4 * A * C
+  
+  if (abs(A) < 1e-14) {
+    # Linear case: B * beta0 + C >= 0
+    if (abs(B) < 1e-14) {
+      # Constant case
+      if (C >= 0) {
+        # Whole real line
+        intervals <- matrix(c(-Inf, Inf), nrow = 1, ncol = 2)
+      } else {
+        # Empty set
+        intervals <- matrix(NA_real_, nrow = 0, ncol = 2)
+      }
+    } else {
+      root <- -C / B
+      if (B > 0) {
+        intervals <- matrix(c(root, Inf), nrow = 1, ncol = 2)
+      } else {
+        intervals <- matrix(c(-Inf, root), nrow = 1, ncol = 2)
+      }
+    }
+  } else if (discriminant < 0) {
+    # No real roots
+    if (A > 0) {
+      # Parabola opens upward, always positive
+      intervals <- matrix(c(-Inf, Inf), nrow = 1, ncol = 2)
+    } else {
+      # Parabola opens downward, always negative
+      intervals <- matrix(NA_real_, nrow = 0, ncol = 2)
+    }
+  } else {
+    # Two real roots
+    sqrt_disc <- sqrt(discriminant)
+    root1 <- (-B - sqrt_disc) / (2 * A)
+    root2 <- (-B + sqrt_disc) / (2 * A)
+    
+    # Ensure root1 < root2
+    if (root1 > root2) {
+      tmp <- root1
+      root1 <- root2
+      root2 <- tmp
+    }
+    
+    if (A > 0) {
+      # Parabola opens upward: f(beta0) >= 0 for beta0 <= root1 or beta0 >= root2
+      intervals <- matrix(c(-Inf, root1, root2, Inf), nrow = 2, ncol = 2, byrow = TRUE)
+    } else {
+      # Parabola opens downward: f(beta0) >= 0 for root1 <= beta0 <= root2
+      intervals <- matrix(c(root1, root2), nrow = 1, ncol = 2)
+    }
+  }
+  
+  colnames(intervals) <- c("lower", "upper")
+  
+  list(
+    method = "AR",
+    exact = TRUE,
+    level = level,
+    intervals = intervals
+  )
+}
+
+
+# =============================================================================
+# NUMERIC AR CI FOR NON-IID VCOV
+# =============================================================================
+
+#' Compute numeric AR confidence interval by test inversion
+#'
+#' @description
+#' Inverts the AR test numerically to find the confidence set.
+#' Used when vcov is not iid or when exact computation is not possible.
+#'
+#' @param object A `fixest` object with IV.
+#' @param level Confidence level (e.g., 0.95).
+#' @param vcov VCOV specification.
+#' @param n_grid Number of coarse grid points. Default 200.
+#' @param tol Tolerance for root finding. Default 1e-8.
+#' @param ... Additional arguments.
+#'
+#' @return A list with:
+#' \item{method}{"AR"}
+#' \item{exact}{FALSE}
+#' \item{level}{The confidence level}
+#' \item{intervals}{Matrix with columns "lower" and "upper"}
+#'
+#' @keywords internal
+.ar_ci_numeric = function(object, level = 0.95, vcov = NULL, n_grid = 200, 
+                          tol = 1e-8, ...) {
+  
+  # Get point estimate and SE for grid construction
+  endo_names <- object$iv_endo_names
+  endo_name <- endo_names[1]
+  
+  coef_names <- names(object$coefficients)
+  endo_coef_name <- paste0("fit_", endo_name)
+  if (!endo_coef_name %in% coef_names) {
+    endo_coef_name <- endo_name
+  }
+  
+  point_est <- object$coefficients[endo_coef_name]
+  obj_sum <- summary(object, vcov = vcov, ...)
+  se_est <- se(obj_sum)[endo_coef_name]
+  
+  # Default grid range: point estimate +/- 5 SEs
+  grid_range <- c(point_est - 5 * se_est, point_est + 5 * se_est)
+  grid <- seq(grid_range[1], grid_range[2], length.out = n_grid)
+  
+  # Get the critical value
+  # First, run one AR test to determine distribution type
+  test_result <- .ar_test_core(object, beta0 = point_est, vcov = vcov, ...)
+  
+  if (test_result$dist == "F") {
+    crit <- qf(level, df1 = test_result$df1, df2 = test_result$df2)
+  } else {
+    crit <- qchisq(level, df = test_result$df1)
+  }
+  
+  # Evaluate g(beta0) = stat(beta0) - crit on the coarse grid
+  g_values <- numeric(n_grid)
+  for (i in seq_along(grid)) {
+    ar_res <- .ar_test_core(object, beta0 = grid[i], vcov = vcov, ...)
+    g_values[i] <- ar_res$stat - crit
+  }
+  
+  # Find sign changes (brackets for roots)
+  sign_changes <- which(diff(sign(g_values)) != 0)
+  
+  if (length(sign_changes) == 0) {
+    # No sign changes - either all accepted or all rejected
+    if (g_values[1] <= 0) {
+      # All accepted (potentially unbounded)
+      intervals <- matrix(c(-Inf, Inf), nrow = 1, ncol = 2)
+    } else {
+      # All rejected (empty set)
+      intervals <- matrix(NA_real_, nrow = 0, ncol = 2)
+    }
+  } else {
+    # Find roots using uniroot
+    roots <- numeric(length(sign_changes))
+    
+    g_func <- function(beta0) {
+      ar_res <- .ar_test_core(object, beta0 = beta0, vcov = vcov, ...)
+      ar_res$stat - crit
+    }
+    
+    for (i in seq_along(sign_changes)) {
+      idx <- sign_changes[i]
+      root_result <- uniroot(g_func, 
+                            interval = c(grid[idx], grid[idx + 1]),
+                            tol = tol)
+      roots[i] <- root_result$root
+    }
+    
+    roots <- sort(roots)
+    
+    # Build intervals based on which regions have g <= 0 (accepted)
+    # Check the sign at the leftmost point to determine pattern
+    n_roots <- length(roots)
+    
+    if (g_values[1] <= 0) {
+      # Left edge is accepted
+      # Intervals: (-Inf or grid_min, root1], [root2, root3], ..., [rootN, Inf or grid_max]
+      # Number of intervals = (n_roots + 1) / 2 rounded up
+      
+      # Check if extends to left boundary
+      left_unbounded <- (grid[1] == grid_range[1])
+      right_unbounded <- (grid[n_grid] == grid_range[2]) && 
+                         (g_values[n_grid] <= 0)
+      
+      # Build interval list
+      interval_list <- list()
+      
+      # First interval
+      if (n_roots >= 1) {
+        lower <- if (left_unbounded) -Inf else grid_range[1]
+        interval_list[[1]] <- c(lower, roots[1])
+      }
+      
+      # Middle intervals (pairs of roots where acceptance resumes)
+      if (n_roots >= 2) {
+        for (j in seq(2, n_roots, by = 2)) {
+          if (j + 1 <= n_roots) {
+            interval_list[[length(interval_list) + 1]] <- c(roots[j], roots[j + 1])
+          } else if (j == n_roots) {
+            # Last root, check if extends to right
+            upper <- if (right_unbounded) Inf else grid_range[2]
+            interval_list[[length(interval_list) + 1]] <- c(roots[j], upper)
+          }
+        }
+      }
+      
+    } else {
+      # Left edge is rejected
+      # Intervals: [root1, root2], [root3, root4], ...
+      
+      right_unbounded <- (grid[n_grid] == grid_range[2]) && 
+                         (g_values[n_grid] <= 0)
+      
+      interval_list <- list()
+      for (j in seq(1, n_roots, by = 2)) {
+        if (j + 1 <= n_roots) {
+          interval_list[[length(interval_list) + 1]] <- c(roots[j], roots[j + 1])
+        } else {
+          # Odd number of roots, last one extends to right
+          upper <- if (right_unbounded) Inf else grid_range[2]
+          interval_list[[length(interval_list) + 1]] <- c(roots[j], upper)
+        }
+      }
+    }
+    
+    if (length(interval_list) == 0) {
+      intervals <- matrix(NA_real_, nrow = 0, ncol = 2)
+    } else {
+      intervals <- do.call(rbind, interval_list)
+    }
+  }
+  
+  colnames(intervals) <- c("lower", "upper")
+  
+  list(
+    method = "AR",
+    exact = FALSE,
+    level = level,
+    intervals = intervals
+  )
+}
+
+
+# =============================================================================
+# MAIN EXPORTED FUNCTION: ar_test
+# =============================================================================
 
 #' Anderson-Rubin test for IV models
 #'
@@ -22,6 +577,14 @@
 #' @param vcov Versatile argument to specify the VCOV. If `NULL` (default), uses 
 #'   the same VCOV specification as the original model. See [`vcov.fixest`] for 
 #'   details on available options.
+#' @param level Numeric scalar between 0 and 1. The confidence level for the 
+#'   optional confidence interval. Default is 0.95.
+#' @param ci Logical or NULL. Whether to compute an AR confidence interval.
+#'   If `NULL` (default):
+#'   - Returns CI if vcov is "iid" AND there is exactly one endogenous variable
+#'   - Does not return CI otherwise
+#'   If `TRUE`: Attempts to compute CI (warns if not possible with multiple endo vars)
+#'   If `FALSE`: Does not compute CI
 #' @param ... Additional arguments passed to [`summary.fixest`].
 #'
 #' @details
@@ -38,26 +601,33 @@
 #'   \item Test the joint significance of the instrument coefficients (H0: pi = 0).
 #' }
 #'
-#' The AR test statistic follows an F-distribution under the null, regardless of 
-#' instrument strength. This makes it particularly useful when instruments may be 
-#' weak.
+#' For iid (homoskedastic) errors, the AR test statistic follows an exact 
+#' F-distribution. For non-iid errors (heteroskedastic, clustered, etc.), 
+#' a Chi-squared asymptotic distribution is used.
+#'
+#' When `ci = TRUE` (or by default for iid with single endo var):
+#' - For iid errors with a single endogenous variable, an exact closed-form 
+#'   AR confidence interval is computed using the quadratic formula.
+#' - For non-iid errors, the CI is computed by numerically inverting the AR test.
 #'
 #' @return
 #' An object of class `fixest_ar` containing:
 #' \item{beta0}{The hypothesized coefficient values tested.}
-#' \item{stat}{The AR test statistic (F-statistic).}
-#' \item{df1}{Numerator degrees of freedom (number of instruments).}
-#' \item{df2}{Denominator degrees of freedom.}
+#' \item{stat}{The AR test statistic.}
+#' \item{dist}{Distribution used: "F" or "Chi-squared".}
+#' \item{df1}{Numerator degrees of freedom.}
+#' \item{df2}{Denominator degrees of freedom (NA for Chi-squared).}
 #' \item{pvalue}{The p-value of the test.}
 #' \item{method}{Character string: "Anderson-Rubin test".}
 #' \item{vcov_type}{The type of VCOV used for the test.}
 #' \item{endo_names}{Names of the endogenous variables.}
 #' \item{inst_names}{Names of the instruments.}
 #' \item{model}{The original `feols` IV object.}
+#' \item{ci}{(Optional) A list containing the AR confidence interval, with elements:
+#'   `method`, `exact`, `level`, `intervals`.}
 #'
 #' @seealso 
-#' [`ar_confint`] for computing AR confidence intervals, [`wald`] for standard 
-#' Wald tests, [`feols`] for IV estimation.
+#' [`wald`] for standard Wald tests, [`feols`] for IV estimation.
 #'
 #' @references
 #' Anderson, T. W. and Rubin, H. (1949). "Estimation of the Parameters of a Single 
@@ -76,7 +646,7 @@
 #' # IV estimation
 #' est <- feols(y ~ 1 | x ~ z, data = data)
 #'
-#' # Test H0: beta = 0
+#' # Test H0: beta = 0 (default) - includes CI since vcov is iid
 #' ar_test(est)
 #'
 #' # Test H0: beta = 0.8 (close to true value, should not reject)
@@ -85,16 +655,20 @@
 #' # Test H0: beta = 0 (far from true value, should reject)
 #' ar_test(est, beta0 = 0)
 #'
-#' # With clustered standard errors
+#' # With clustered standard errors (Chi-squared distribution)
 #' data$cl <- rep(1:100, each = 10)
 #' est_cl <- feols(y ~ 1 | x ~ z, data = data, vcov = ~cl)
 #' ar_test(est_cl)
 #'
+#' # Force CI computation even for clustered SEs
+#' ar_test(est_cl, ci = TRUE)
+#'
 #' @export
-ar_test = function(object, beta0 = NULL, vcov = NULL, ...) {
+ar_test = function(object, beta0 = NULL, vcov = NULL, level = 0.95, ci = NULL, ...) {
   
   # Input validation
   check_arg(object, "class(fixest)")
+  check_arg(level, "numeric scalar GT{0} LT{1}")
   
   # Check that object is an IV model
   if (!isTRUE(object$iv)) {
@@ -111,11 +685,9 @@ ar_test = function(object, beta0 = NULL, vcov = NULL, ...) {
          "Please use the main IV estimation result, not `summary(est, stage = 1)`.")
   }
   
-  # Get endogenous variable names and instrument names
+  # Get endogenous variable names
   endo_names <- object$iv_endo_names
-  inst_names <- object$iv_inst_names_xpd  # expanded instrument names
   n_endo <- length(endo_names)
-  n_inst <- length(inst_names)
   
   # Handle beta0
   if (is.null(beta0)) {
@@ -128,131 +700,57 @@ ar_test = function(object, beta0 = NULL, vcov = NULL, ...) {
            "Please provide a vector of length ", n_endo, ".")
     }
   }
-  
-  # Name beta0 for clarity
   names(beta0) <- endo_names
   
-  # Fetch the data
-  data <- fetch_data(object, "To apply `ar_test`, ")
+  # Run the core test
+  core_result <- .ar_test_core(object, beta0 = beta0, vcov = vcov, ...)
   
-  # Get the dependent variable
-  fml_linear <- formula(object, type = "linear")
-  y_vec <- eval(fml_linear[[2]], data)
-  
-  # Get the endogenous variable matrix
-  endo_mat <- model.matrix(object, data = data, type = "iv.endo", collin.rm = FALSE)
-  
-  # Compute transformed outcome: y* = y - X * beta0
-  y_tilde <- as.numeric(y_vec - endo_mat %*% beta0)
-  
-  # Store y_tilde in a temporary column with unique name
-  temp_y_name <- "..ar_y_tilde"
-  # Ensure uniqueness if column already exists (with a maximum of 100 attempts)
-  attempt <- 0
-  while (temp_y_name %in% names(data) && attempt < 100) {
-    temp_y_name <- paste0("..ar_y_tilde_", sample.int(1e6, 1))
-    attempt <- attempt + 1
-  }
-  if (temp_y_name %in% names(data)) {
-    stop("Could not create a unique temporary column name in the data. ",
-         "This is unexpected - please report this issue.")
-  }
-  data[[temp_y_name]] <- y_tilde
-  
-  # Build the AR regression formula
-  # We need: y_tilde ~ controls + instruments | fixed_effects
-  
-  # Get the exogenous controls part (the linear formula RHS)
-  fml_linear_rhs <- object$fml_all$linear
-  if (length(fml_linear_rhs) == 3) {
-    controls_str <- deparse_long(fml_linear_rhs[[3]])
-  } else {
-    controls_str <- "1"
-  }
-  
-  # Get the instrument formula RHS
-  fml_iv <- object$fml_all$iv
-  inst_str <- deparse_long(fml_iv[[3]])
-  
-  # Get the fixed effects part if any
-  fml_fe <- object$fml_all$fixef
-  if (!is.null(fml_fe)) {
-    fe_str <- paste0(" | ", deparse_long(fml_fe[[2]]))
-  } else {
-    fe_str <- ""
-  }
-  
-  # Build the new formula: y_tilde ~ controls + instruments | FE
-  new_fml_str <- paste0(temp_y_name, " ~ ", controls_str, " + ", inst_str, fe_str)
-  new_fml <- as.formula(new_fml_str)
+  # Build the result object
+  res <- list(
+    beta0 = beta0,
+    stat = core_result$stat,
+    dist = core_result$dist,
+    df1 = core_result$df1,
+    df2 = core_result$df2,
+    pvalue = core_result$pvalue,
+    method = "Anderson-Rubin test",
+    vcov_type = core_result$vcov_type,
+    endo_names = endo_names,
+    inst_names = core_result$inst_names,
+    inst_tested = core_result$inst_tested,
+    model = object
+  )
   
-  # Get weights if present
-  weights_val <- NULL
-  if (!is.null(object$weights)) {
-    weights_val <- object$weights
-  }
+  # Decide whether to compute CI
+  compute_ci <- FALSE
   
-  # Get vcov specification - use original if not specified
-  if (is.null(vcov)) {
-    # Try to get the vcov from the summarized object
-    if (!is.null(object$summary_flags$vcov)) {
-      vcov <- object$summary_flags$vcov
-      if (is.null(vcov)) vcov <- "iid"
+  if (is.null(ci)) {
+    # Default: compute CI only if iid and single endo var
+    if (core_result$is_iid && n_endo == 1) {
+      compute_ci <- TRUE
+    }
+  } else if (isTRUE(ci)) {
+    if (n_endo > 1) {
+      warning("Cannot compute AR confidence interval for models with multiple ",
+              "endogenous variables. Returning test result only.")
     } else {
-      vcov <- "iid"
+      compute_ci <- TRUE
     }
   }
+  # If ci = FALSE, compute_ci stays FALSE
   
-  # Run the AR regression
-  ar_fit <- feols(new_fml, data = data, weights = weights_val, vcov = vcov, ...)
-  
-  # Summarize to ensure we have the VCOV
-  ar_fit_sum <- summary(ar_fit, vcov = vcov, ...)
-  
-  # Get the coefficient names in the AR regression that correspond to instruments
-  ar_coef_names <- names(ar_fit_sum$coefficients)
-  
-  # Find which coefficients correspond to instruments
-  # The instrument names in the AR fit should match the expanded instrument names
-  inst_in_fit <- intersect(ar_coef_names, inst_names)
-  
-  if (length(inst_in_fit) == 0) {
-    # Try a more flexible approach using the instrument formula variables
-    # This handles cases where instruments are expanded (e.g., factor variables)
-    inst_formula_vars <- all.vars(fml_iv[[3]])
-    for (v in inst_formula_vars) {
-      # Escape special regex characters in variable name
-      v_escaped <- gsub("([\\^$*+?{}\\[\\]()|.])", "\\\\\\1", v)
-      pattern <- paste0("^", v_escaped, "($|::)")
-      matches <- grep(pattern, ar_coef_names, value = TRUE)
-      inst_in_fit <- c(inst_in_fit, matches)
+  # Compute CI if requested
+  if (compute_ci) {
+    if (core_result$is_iid) {
+      # Use exact closed-form CI
+      ci_result <- .ar_ci_exact_iid(object, level = level)
+    } else {
+      # Use numeric inversion
+      ci_result <- .ar_ci_numeric(object, level = level, vcov = vcov, ...)
     }
-    inst_in_fit <- unique(inst_in_fit)
+    res$ci <- ci_result
   }
   
-  if (length(inst_in_fit) == 0) {
-    stop("Could not identify instrument coefficients in the AR regression. ",
-         "This may be a bug - please report it.")
-  }
-  
-  # Perform the Wald test on the instrument coefficients
-  wald_result <- wald(ar_fit_sum, keep = inst_in_fit, print = FALSE)
-  
-  # Build the result object
-  res <- list(
-    beta0 = beta0,
-    stat = wald_result$stat,
-    df1 = wald_result$df1,
-    df2 = wald_result$df2,
-    pvalue = wald_result$p,
-    method = "Anderson-Rubin test",
-    vcov_type = wald_result$vcov,
-    endo_names = endo_names,
-    inst_names = inst_names,
-    inst_tested = inst_in_fit,
-    model = object
-  )
-  
   class(res) <- "fixest_ar"
   
   return(res)
@@ -278,48 +776,111 @@ print.fixest_ar = function(x, ...) {
   
   cat("\n")
   
-  # Show test statistics
-  cat("Stat (F): ", format(x$stat, digits = 4), "\n", sep = "")
-  cat("DoF:      ", x$df1, " and ", x$df2, "\n", sep = "")
+  # Show test statistics with distribution info
+  if (x$dist == "F") {
+    cat("Stat (F):     ", format(x$stat, digits = 4), "\n", sep = "")
+    cat("DoF:          ", x$df1, " and ", x$df2, "\n", sep = "")
+  } else {
+    cat("Stat (Chi2):  ", format(x$stat, digits = 4), "\n", sep = "")
+    cat("DoF:          ", x$df1, "\n", sep = "")
+  }
   
   # Format p-value
   if (x$pvalue < 2.2e-16) {
-    cat("P-value:  < 2.2e-16 ***\n")
+    cat("P-value:      < 2.2e-16 ***\n")
   } else if (x$pvalue < 0.001) {
-    cat("P-value:  ", format(x$pvalue, digits = 4), " ***\n", sep = "")
+    cat("P-value:      ", format(x$pvalue, digits = 4), " ***\n", sep = "")
   } else if (x$pvalue < 0.01) {
-    cat("P-value:  ", format(x$pvalue, digits = 4), " **\n", sep = "")
+    cat("P-value:      ", format(x$pvalue, digits = 4), " **\n", sep = "")
   } else if (x$pvalue < 0.05) {
-    cat("P-value:  ", format(x$pvalue, digits = 4), " *\n", sep = "")
+    cat("P-value:      ", format(x$pvalue, digits = 4), " *\n", sep = "")
   } else if (x$pvalue < 0.1) {
-    cat("P-value:  ", format(x$pvalue, digits = 4), " .\n", sep = "")
+    cat("P-value:      ", format(x$pvalue, digits = 4), " .\n", sep = "")
   } else {
-    cat("P-value:  ", format(x$pvalue, digits = 4), "\n", sep = "")
+    cat("P-value:      ", format(x$pvalue, digits = 4), "\n", sep = "")
   }
   
-  cat("VCOV:     ", x$vcov_type, "\n", sep = "")
+  cat("Distribution: ", x$dist, "\n", sep = "")
+  cat("VCOV:         ", x$vcov_type, "\n", sep = "")
   cat("---\n")
   cat("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n")
   
+  # Print CI if present
+  if (!is.null(x$ci)) {
+    cat("\n")
+    .print_ar_ci(x$ci, x$endo_names[1])
+  }
+  
   invisible(x)
 }
 
 
+#' Internal function to print AR confidence interval
+#' @keywords internal
+.print_ar_ci = function(ci, endo_name) {
+  
+  cat("Anderson-Rubin Confidence Interval\n")
+  cat("-----------------------------------\n")
+  cat("Variable: ", endo_name, "\n", sep = "")
+  cat("Level:    ", format(ci$level * 100, digits = 2), "%\n", sep = "")
+  cat("Method:   ", if (ci$exact) "Exact (closed-form)" else "Numeric inversion", "\n", sep = "")
+  cat("\n")
+  
+  intervals <- ci$intervals
+  
+  if (nrow(intervals) == 0) {
+    cat("AR Confidence Set: EMPTY\n")
+    cat("  (This may indicate model misspecification)\n")
+  } else if (nrow(intervals) == 1) {
+    lower <- intervals[1, "lower"]
+    upper <- intervals[1, "upper"]
+    
+    if (is.infinite(lower) && is.infinite(upper)) {
+      cat("AR Confidence Set: (-Inf, Inf)\n")
+      cat("  (Instruments may be very weak)\n")
+    } else if (is.infinite(lower)) {
+      cat("AR Confidence Set: (-Inf, ", format(upper, digits = 4), "]\n", sep = "")
+    } else if (is.infinite(upper)) {
+      cat("AR Confidence Set: [", format(lower, digits = 4), ", Inf)\n", sep = "")
+    } else {
+      cat("AR Confidence Set: [", format(lower, digits = 4), ", ", 
+          format(upper, digits = 4), "]\n", sep = "")
+    }
+  } else {
+    cat("AR Confidence Set: (disconnected)\n")
+    for (i in seq_len(nrow(intervals))) {
+      lower <- intervals[i, "lower"]
+      upper <- intervals[i, "upper"]
+      
+      lower_bracket <- if (is.infinite(lower)) "(" else "["
+      upper_bracket <- if (is.infinite(upper)) ")" else "]"
+      lower_str <- if (is.infinite(lower)) "-Inf" else format(lower, digits = 4)
+      upper_str <- if (is.infinite(upper)) "Inf" else format(upper, digits = 4)
+      
+      cat("  Interval ", i, ": ", lower_bracket, lower_str, ", ", 
+          upper_str, upper_bracket, "\n", sep = "")
+    }
+    cat("  (Multiple intervals may indicate non-convex confidence region)\n")
+  }
+}
+
+
+# =============================================================================
+# BACKWARD COMPATIBLE ar_confint (now a wrapper)
+# =============================================================================
+
 #' Anderson-Rubin confidence interval for IV models
 #'
 #' @description
 #' Computes Anderson-Rubin confidence intervals by inverting the AR test.
 #' These confidence intervals are robust to weak instruments.
 #'
+#' Note: This function is provided for backward compatibility. The recommended
+#' approach is to use `ar_test(object, ci = TRUE)` which integrates the test
+#' and confidence interval computation.
+#'
 #' @param object A `fixest` object obtained from [`feols`] with an IV specification.
 #' @param level Numeric scalar between 0 and 1. The confidence level. Default is 0.95.
-#' @param grid Numeric vector. The grid of values over which to compute the AR test. 
-#'   If `NULL` (default), a grid is automatically constructed around the point estimate.
-#' @param grid_range Numeric vector of length 2. If `grid` is `NULL`, the grid is 
-#'   constructed from `grid_range[1]` to `grid_range[2]`. If `NULL` (default), 
-#'   the range is set to the point estimate plus/minus 5 standard errors.
-#' @param n_grid Integer. The number of grid points to use if `grid` is `NULL`. 
-#'   Default is 100.
 #' @param vcov Versatile argument to specify the VCOV. See [`ar_test`] for details.
 #' @param ... Additional arguments passed to [`ar_test`].
 #'
@@ -328,8 +889,9 @@ print.fixest_ar = function(x, ...) {
 #' with multiple endogenous variables, the confidence region is multi-dimensional 
 #' and cannot be easily represented.
 #'
-#' The AR confidence interval is constructed by inverting the AR test: it includes 
-#' all values of beta0 for which the AR test does not reject at the specified level.
+#' For iid errors, an exact closed-form confidence interval is computed.
+#' For non-iid errors (clustered, heteroskedastic, etc.), the confidence interval
+#' is computed by numerically inverting the AR test.
 #'
 #' Note that AR confidence intervals may be:
 #' \itemize{
@@ -341,9 +903,8 @@ print.fixest_ar = function(x, ...) {
 #' @return
 #' An object of class `fixest_ar_confint` containing:
 #' \item{level}{The confidence level used.}
-#' \item{grid}{The grid of values tested.}
-#' \item{pvalues}{The p-values at each grid point.}
-#' \item{accept}{The grid values that were not rejected.}
+#' \item{method}{"AR".}
+#' \item{exact}{Logical, TRUE if exact closed-form was used.}
 #' \item{intervals}{A matrix with columns "lower" and "upper" giving the 
 #'   confidence interval(s). Each row is a separate interval (in case of 
 #'   disconnected regions).}
@@ -352,36 +913,34 @@ print.fixest_ar = function(x, ...) {
 #' \item{model}{The original `feols` IV object.}
 #'
 #' @seealso 
-#' [`ar_test`] for the Anderson-Rubin test, [`confint.fixest`] for standard 
-#' confidence intervals.
+#' [`ar_test`] for the Anderson-Rubin test with integrated CI computation.
 #'
 #' @examples
 #' # Simple IV example
 #' set.seed(123)
 #' n <- 500
 #' z <- rnorm(n)
-#' x <- 0.5 * z + rnorm(n)  # moderate instrument strength
+#' x <- 0.5 * z + rnorm(n)
 #' y <- 1 + 0.8 * x + rnorm(n)
 #' data <- data.frame(y = y, x = x, z = z)
 #'
 #' # IV estimation
 #' est <- feols(y ~ 1 | x ~ z, data = data)
 #'
-#' # AR confidence interval
-#' ar_ci <- ar_confint(est)
-#' print(ar_ci)
+#' # AR confidence interval (exact for iid)
+#' ar_confint(est)
 #'
-#' # Compare with standard confidence interval
-#' confint(est)
+#' # With clustered SEs (numeric inversion)
+#' data$cl <- rep(1:50, each = 10)
+#' est_cl <- feols(y ~ 1 | x ~ z, data = data, vcov = ~cl)
+#' ar_confint(est_cl)
 #'
 #' @export
-ar_confint = function(object, level = 0.95, grid = NULL, grid_range = NULL, 
-                      n_grid = 100, vcov = NULL, ...) {
+ar_confint = function(object, level = 0.95, vcov = NULL, ...) {
   
   # Input validation
   check_arg(object, "class(fixest)")
   check_arg(level, "numeric scalar GT{0} LT{1}")
-  check_arg(n_grid, "integer scalar GE{10}")
   
   # Check that object is an IV model
   if (!isTRUE(object$iv)) {
@@ -399,8 +958,7 @@ ar_confint = function(object, level = 0.95, grid = NULL, grid_range = NULL,
   
   endo_name <- endo_names[1]
   
-  # Get the point estimate and SE
-  # Need to handle the case where the coefficient name might have "fit_" prefix
+  # Get point estimate
   coef_names <- names(object$coefficients)
   endo_coef_name <- paste0("fit_", endo_name)
   if (!endo_coef_name %in% coef_names) {
@@ -414,83 +972,34 @@ ar_confint = function(object, level = 0.95, grid = NULL, grid_range = NULL,
   
   point_est <- object$coefficients[endo_coef_name]
   
-  # Get SE for grid construction
-  obj_sum <- summary(object, vcov = vcov, ...)
-  se_est <- se(obj_sum)[endo_coef_name]
-  
-  # Construct grid if not provided
-  if (is.null(grid)) {
-    if (is.null(grid_range)) {
-      # Default: point estimate +/- 5 SEs
-      grid_range <- c(point_est - 5 * se_est, point_est + 5 * se_est)
+  # Determine vcov type
+  if (is.null(vcov)) {
+    if (!is.null(object$cov.scaled)) {
+      vcov_type <- attr(object$cov.scaled, "type")
+    } else {
+      vcov_type <- "iid"
     }
-    check_arg(grid_range, "numeric vector len(2) no na")
-    grid <- seq(grid_range[1], grid_range[2], length.out = n_grid)
   } else {
-    check_arg(grid, "numeric vector no na")
+    # Run a quick test to determine the effective vcov type
+    test_result <- .ar_test_core(object, beta0 = 0, vcov = vcov, ...)
+    vcov_type <- test_result$vcov_type
   }
   
-  # Compute AR test for each grid point
-  pvalues <- numeric(length(grid))
-  for (i in seq_along(grid)) {
-    ar_res <- ar_test(object, beta0 = grid[i], vcov = vcov, ...)
-    pvalues[i] <- ar_res$pvalue
-  }
-  
-  # Identify accepted values (where p-value > 1 - level, i.e., not rejected)
-  alpha <- 1 - level
-  accept <- grid[pvalues > alpha]
+  is_iid <- identical(vcov_type, "IID") || identical(vcov_type, "iid") || 
+            identical(tolower(as.character(vcov_type)), "iid")
   
-  # Find contiguous intervals
-  if (length(accept) == 0) {
-    # Empty confidence set
-    intervals <- matrix(NA_real_, nrow = 0, ncol = 2)
-    colnames(intervals) <- c("lower", "upper")
+  # Compute CI
+  if (is_iid) {
+    ci_result <- .ar_ci_exact_iid(object, level = level)
   } else {
-    # Find breaks in the accepted values
-    # Two consecutive grid values are in different intervals if there's a 
-    # rejected value between them
-    accept_idx <- which(pvalues > alpha)
-    breaks <- which(diff(accept_idx) > 1)
-    
-    # Build intervals
-    n_intervals <- length(breaks) + 1
-    intervals <- matrix(NA_real_, nrow = n_intervals, ncol = 2)
-    colnames(intervals) <- c("lower", "upper")
-    
-    start_idx <- 1
-    for (i in seq_len(n_intervals)) {
-      if (i <= length(breaks)) {
-        end_idx <- breaks[i]
-      } else {
-        end_idx <- length(accept_idx)
-      }
-      
-      interval_accept_idx <- accept_idx[start_idx:end_idx]
-      intervals[i, "lower"] <- grid[min(interval_accept_idx)]
-      intervals[i, "upper"] <- grid[max(interval_accept_idx)]
-      
-      start_idx <- end_idx + 1
-    }
-    
-    # Check if intervals extend to grid boundaries (possibly unbounded)
-    # Only check the boundaries of the extreme intervals
-    # First interval's lower bound: check if it touches the grid minimum
-    if (intervals[1, "lower"] == min(grid)) {
-      intervals[1, "lower"] <- -Inf
-    }
-    # Last interval's upper bound: check if it touches the grid maximum
-    if (intervals[n_intervals, "upper"] == max(grid)) {
-      intervals[n_intervals, "upper"] <- Inf
-    }
+    ci_result <- .ar_ci_numeric(object, level = level, vcov = vcov, ...)
   }
   
   res <- list(
     level = level,
-    grid = grid,
-    pvalues = pvalues,
-    accept = accept,
-    intervals = intervals,
+    method = ci_result$method,
+    exact = ci_result$exact,
+    intervals = ci_result$intervals,
     endo_name = endo_name,
     point_est = unname(point_est),
     model = object
@@ -506,51 +1015,9 @@ ar_confint = function(object, level = 0.95, grid = NULL, grid_range = NULL,
 #' @export
 print.fixest_ar_confint = function(x, ...) {
   
-  cat("Anderson-Rubin Confidence Interval\n")
-  cat("-----------------------------------\n")
-  cat("Variable: ", x$endo_name, "\n", sep = "")
-  cat("Level:    ", format(x$level * 100, digits = 2), "%\n", sep = "")
-  cat("\n")
-  
-  cat("2SLS point estimate: ", format(x$point_est, digits = 4), "\n", sep = "")
-  cat("\n")
-  
-  if (nrow(x$intervals) == 0) {
-    cat("AR Confidence Set: EMPTY\n")
-    cat("  (This may indicate model misspecification)\n")
-  } else if (nrow(x$intervals) == 1) {
-    lower <- x$intervals[1, "lower"]
-    upper <- x$intervals[1, "upper"]
-    
-    if (is.infinite(lower) && is.infinite(upper)) {
-      cat("AR Confidence Set: (-Inf, Inf)\n")
-      cat("  (Instruments may be very weak)\n")
-    } else if (is.infinite(lower)) {
-      cat("AR Confidence Set: (-Inf, ", format(upper, digits = 4), "]\n", sep = "")
-      cat("  (Lower bound may be unbounded - consider expanding the grid)\n")
-    } else if (is.infinite(upper)) {
-      cat("AR Confidence Set: [", format(lower, digits = 4), ", Inf)\n", sep = "")
-      cat("  (Upper bound may be unbounded - consider expanding the grid)\n")
-    } else {
-      cat("AR Confidence Set: [", format(lower, digits = 4), ", ", 
-          format(upper, digits = 4), "]\n", sep = "")
-    }
-  } else {
-    cat("AR Confidence Set: (disconnected)\n")
-    for (i in seq_len(nrow(x$intervals))) {
-      lower <- x$intervals[i, "lower"]
-      upper <- x$intervals[i, "upper"]
-      
-      lower_bracket <- if (is.infinite(lower)) "(" else "["
-      upper_bracket <- if (is.infinite(upper)) ")" else "]"
-      lower_str <- if (is.infinite(lower)) "-Inf" else format(lower, digits = 4)
-      upper_str <- if (is.infinite(upper)) "Inf" else format(upper, digits = 4)
-      
-      cat("  Interval ", i, ": ", lower_bracket, lower_str, ", ", 
-          upper_str, upper_bracket, "\n", sep = "")
-    }
-    cat("  (Multiple intervals may indicate misspecification or non-convex confidence region)\n")
-  }
+  cat("2SLS point estimate: ", format(x$point_est, digits = 4), "\n\n", sep = "")
+  .print_ar_ci(list(level = x$level, exact = x$exact, intervals = x$intervals), 
+               x$endo_name)
   
   invisible(x)
 }
diff --git a/tests/fixest_tests.R b/tests/fixest_tests.R
index 390d984c..3764e365 100644
--- a/tests/fixest_tests.R
+++ b/tests/fixest_tests.R
@@ -3198,47 +3198,61 @@ test(ar_res_null$pvalue < 0.05, TRUE)  # should reject
 ar_res_true = ar_test(est_iv_ar, beta0 = true_beta)
 test(ar_res_true$pvalue > 0.05, TRUE)  # should not reject
 
-# Test 4: AR test returns correct structure
+# Test 4: AR test returns correct structure (now includes dist field)
 test(is.numeric(ar_res$stat), TRUE)
 test(is.numeric(ar_res$pvalue), TRUE)
 test(is.numeric(ar_res$df1), TRUE)
-test(is.numeric(ar_res$df2), TRUE)
 test(ar_res$method, "Anderson-Rubin test")
+test(ar_res$dist %in% c("F", "Chi-squared"), TRUE)
 
-# Test 5: AR test with clustered SEs
+# Test 5: AR test with iid should use F distribution and include CI by default
+test(ar_res$dist, "F")
+test(!is.null(ar_res$ci), TRUE)  # CI should be included for iid + single endo
+
+# Test 6: AR test with clustered SEs should use Chi-squared
 est_iv_cl = feols(y ~ 1 | x ~ z, data = data_ar, vcov = ~cl)
 ar_res_cl = ar_test(est_iv_cl)
 test("fixest_ar" %in% class(ar_res_cl), TRUE)
+test(ar_res_cl$dist, "Chi-squared")
+test(is.na(ar_res_cl$df2), TRUE)  # df2 should be NA for Chi-squared
 
-# Test 6: AR test should error on non-IV model
+# Test 7: AR test should error on non-IV model
 est_ols = feols(y ~ x, data = data_ar)
 test(ar_test(est_ols), "err")
 
-# Test 7: ar_confint should work for single endogenous variable
-ar_ci = ar_confint(est_iv_ar, grid_range = c(0, 2), n_grid = 20)
+# Test 8: ar_confint should work for single endogenous variable
+ar_ci = ar_confint(est_iv_ar)
 test("fixest_ar_confint" %in% class(ar_ci), TRUE)
-test(ar_ci$point_est > 0, TRUE)  # point estimate should be positive
-test(nrow(ar_ci$intervals) >= 1, TRUE)  # should have at least one interval
+test(ar_ci$point_est > 0, TRUE)
+test(nrow(ar_ci$intervals) >= 1, TRUE)
 
-# Test 8: IV with controls and fixed effects
-data_ar$w = rnorm(n)  # exogenous control
-data_ar$fe = rep(1:5, each = 100)  # fixed effect
+# Test 9: IV with controls and fixed effects
+data_ar$w = rnorm(n)
+data_ar$fe = rep(1:5, each = 100)
 est_iv_fe = feols(y ~ w | fe | x ~ z, data = data_ar)
 ar_res_fe = ar_test(est_iv_fe)
 test("fixest_ar" %in% class(ar_res_fe), TRUE)
 
-# Test 9: Multiple instruments  
+# Test 10: Multiple instruments  
 data_ar$z2 = 0.3 * x + rnorm(n, sd = 0.8)
 est_iv_2inst = feols(y ~ 1 | x ~ z + z2, data = data_ar)
 ar_res_2inst = ar_test(est_iv_2inst)
 test("fixest_ar" %in% class(ar_res_2inst), TRUE)
 
-# Test 10: ar_confint errors with multiple endogenous vars
+# Test 11: ar_confint errors with multiple endogenous vars
 data_ar$x2 = 0.3 * z + rnorm(n)
 est_iv_2endo = feols(y ~ 1 | x + x2 ~ z + z2, data = data_ar)
-test(ar_confint(est_iv_2endo), "err")  # should error with multiple endo vars
+test(ar_confint(est_iv_2endo), "err")
+
+# Test 12: ar_test with ci=TRUE should compute CI even for non-iid
+ar_res_cl_ci = ar_test(est_iv_cl, ci = TRUE)
+test(!is.null(ar_res_cl_ci$ci), TRUE)
+
+# Test 13: ar_test with ci=FALSE should not compute CI
+ar_res_no_ci = ar_test(est_iv_ar, ci = FALSE)
+test(is.null(ar_res_no_ci$ci), TRUE)
 
-# Test 11: Verify print methods work without error
+# Test 14: Verify print methods work without error
 print(ar_res)
 print(ar_ci)
 

From 38fdad2edd361abc756031a53263030d435a79a9 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 27 Nov 2025 22:05:32 +0000
Subject: [PATCH 08/20] Fix floating-point comparison and remove leftover code
 in numeric CI

Co-authored-by: kv9898 <105025148+kv9898@users.noreply.github.com>
---
 R/ar_test.R | 37 ++++++++++---------------------------
 1 file changed, 10 insertions(+), 27 deletions(-)

diff --git a/R/ar_test.R b/R/ar_test.R
index 6aedd067..e36431e5 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -489,52 +489,35 @@
     n_roots <- length(roots)
     
     if (g_values[1] <= 0) {
-      # Left edge is accepted
-      # Intervals: (-Inf or grid_min, root1], [root2, root3], ..., [rootN, Inf or grid_max]
-      # Number of intervals = (n_roots + 1) / 2 rounded up
-      
-      # Check if extends to left boundary
-      left_unbounded <- (grid[1] == grid_range[1])
-      right_unbounded <- (grid[n_grid] == grid_range[2]) && 
-                         (g_values[n_grid] <= 0)
-      
-      # Build interval list
+      # Left edge is accepted - first interval starts at -Inf
       interval_list <- list()
       
-      # First interval
+      # First interval: from -Inf to first root
       if (n_roots >= 1) {
-        lower <- if (left_unbounded) -Inf else grid_range[1]
-        interval_list[[1]] <- c(lower, roots[1])
+        interval_list[[1]] <- c(-Inf, roots[1])
       }
       
-      # Middle intervals (pairs of roots where acceptance resumes)
+      # Subsequent intervals: pairs of roots (entry, exit)
       if (n_roots >= 2) {
         for (j in seq(2, n_roots, by = 2)) {
           if (j + 1 <= n_roots) {
             interval_list[[length(interval_list) + 1]] <- c(roots[j], roots[j + 1])
-          } else if (j == n_roots) {
-            # Last root, check if extends to right
-            upper <- if (right_unbounded) Inf else grid_range[2]
-            interval_list[[length(interval_list) + 1]] <- c(roots[j], upper)
+          } else {
+            # Odd root at end - extends to infinity
+            interval_list[[length(interval_list) + 1]] <- c(roots[j], Inf)
           }
         }
       }
       
     } else {
-      # Left edge is rejected
-      # Intervals: [root1, root2], [root3, root4], ...
-      
-      right_unbounded <- (grid[n_grid] == grid_range[2]) && 
-                         (g_values[n_grid] <= 0)
-      
+      # Left edge is rejected - intervals start at roots
       interval_list <- list()
       for (j in seq(1, n_roots, by = 2)) {
         if (j + 1 <= n_roots) {
           interval_list[[length(interval_list) + 1]] <- c(roots[j], roots[j + 1])
         } else {
-          # Odd number of roots, last one extends to right
-          upper <- if (right_unbounded) Inf else grid_range[2]
-          interval_list[[length(interval_list) + 1]] <- c(roots[j], upper)
+          # Odd number of roots - last interval extends to infinity
+          interval_list[[length(interval_list) + 1]] <- c(roots[j], Inf)
         }
       }
     }

From cb17c64d34b13af3a7faa19e941abbdea80b4600 Mon Sep 17 00:00:00 2001
From: Dianyi Yang <dianyi.yang@politics.ox.ac.uk>
Date: Thu, 27 Nov 2025 22:48:54 +0000
Subject: [PATCH 09/20] use correct degrees of freedom + formatting

---
 R/ar_test.R | 564 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 349 insertions(+), 215 deletions(-)

diff --git a/R/ar_test.R b/R/ar_test.R
index e36431e5..4364d38f 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -4,7 +4,6 @@
 # ~: Anderson-Rubin test for IV models
 #----------------------------------------------#
 
-
 # =============================================================================
 # INTERNAL CORE FUNCTION
 # =============================================================================
@@ -16,7 +15,7 @@
 #' It is called by both ar_test() and ar_confint().
 #'
 #' @param object A `fixest` object obtained from [`feols`] with an IV specification.
-#' @param beta0 Numeric vector. The hypothesized values for the endogenous variable 
+#' @param beta0 Numeric vector. The hypothesized values for the endogenous variable
 #'   coefficients under the null hypothesis.
 #' @param vcov Versatile argument to specify the VCOV.
 #' @param ... Additional arguments passed to [`feols`].
@@ -34,28 +33,32 @@
 #'
 #' @keywords internal
 .ar_test_core = function(object, beta0, vcov = NULL, ...) {
-  
   # Get endogenous variable names and instrument names
   endo_names <- object$iv_endo_names
-  inst_names <- object$iv_inst_names_xpd  # expanded instrument names
+  inst_names <- object$iv_inst_names_xpd # expanded instrument names
   n_endo <- length(endo_names)
-  
+
   # Name beta0 for clarity
   names(beta0) <- endo_names
-  
+
   # Fetch the data
   data <- fetch_data(object, "To apply AR test, ")
-  
+
   # Get the dependent variable
   fml_linear <- formula(object, type = "linear")
   y_vec <- eval(fml_linear[[2]], data)
-  
+
   # Get the endogenous variable matrix
-  endo_mat <- model.matrix(object, data = data, type = "iv.endo", collin.rm = FALSE)
-  
+  endo_mat <- model.matrix(
+    object,
+    data = data,
+    type = "iv.endo",
+    collin.rm = FALSE
+  )
+
   # Compute transformed outcome: y* = y - X * beta0
   y_tilde <- as.numeric(y_vec - endo_mat %*% beta0)
-  
+
   # Store y_tilde in a temporary column with unique name
   temp_y_name <- "..ar_y_tilde"
   # Ensure uniqueness if column already exists (with a maximum of 100 attempts)
@@ -65,14 +68,16 @@
     attempt <- attempt + 1
   }
   if (temp_y_name %in% names(data)) {
-    stop("Could not create a unique temporary column name in the data. ",
-         "This is unexpected - please report this issue.")
+    stop(
+      "Could not create a unique temporary column name in the data. ",
+      "This is unexpected - please report this issue."
+    )
   }
   data[[temp_y_name]] <- y_tilde
-  
+
   # Build the AR regression formula
   # We need: y_tilde ~ controls + instruments | fixed_effects
-  
+
   # Get the exogenous controls part (the linear formula RHS)
   fml_linear_rhs <- object$fml_all$linear
   if (length(fml_linear_rhs) == 3) {
@@ -80,11 +85,11 @@
   } else {
     controls_str <- "1"
   }
-  
+
   # Get the instrument formula RHS
   fml_iv <- object$fml_all$iv
   inst_str <- deparse_long(fml_iv[[3]])
-  
+
   # Get the fixed effects part if any
   fml_fe <- object$fml_all$fixef
   if (!is.null(fml_fe)) {
@@ -92,17 +97,24 @@
   } else {
     fe_str <- ""
   }
-  
+
   # Build the new formula: y_tilde ~ controls + instruments | FE
-  new_fml_str <- paste0(temp_y_name, " ~ ", controls_str, " + ", inst_str, fe_str)
+  new_fml_str <- paste0(
+    temp_y_name,
+    " ~ ",
+    controls_str,
+    " + ",
+    inst_str,
+    fe_str
+  )
   new_fml <- as.formula(new_fml_str)
-  
+
   # Get weights if present
   weights_val <- NULL
   if (!is.null(object$weights)) {
     weights_val <- object$weights
   }
-  
+
   # Determine vcov specification
   # If not specified, try to get from original object
   if (is.null(vcov)) {
@@ -113,20 +125,26 @@
       vcov <- "iid"
     }
   }
-  
+
   # Run the AR regression - inherit settings from original object where possible
-  ar_fit <- feols(new_fml, data = data, weights = weights_val, vcov = vcov, 
-                  notes = FALSE, ...)
-  
+  ar_fit <- feols(
+    new_fml,
+    data = data,
+    weights = weights_val,
+    vcov = vcov,
+    notes = FALSE,
+    ...
+  )
+
   # Summarize to ensure we have the VCOV
   ar_fit_sum <- summary(ar_fit, vcov = vcov, ...)
-  
+
   # Get the coefficient names in the AR regression that correspond to instruments
   ar_coef_names <- names(ar_fit_sum$coefficients)
-  
+
   # Find which coefficients correspond to instruments
   inst_in_fit <- intersect(ar_coef_names, inst_names)
-  
+
   if (length(inst_in_fit) == 0) {
     # Try a more flexible approach using the instrument formula variables
     inst_formula_vars <- all.vars(fml_iv[[3]])
@@ -138,24 +156,27 @@
     }
     inst_in_fit <- unique(inst_in_fit)
   }
-  
+
   if (length(inst_in_fit) == 0) {
-    stop("Could not identify instrument coefficients in the AR regression. ",
-         "This may be a bug - please report it.")
+    stop(
+      "Could not identify instrument coefficients in the AR regression. ",
+      "This may be a bug - please report it."
+    )
   }
-  
+
   # Perform the Wald test on the instrument coefficients
   wald_result <- wald(ar_fit_sum, keep = inst_in_fit, print = FALSE)
-  
+
   # Determine if vcov is iid or not for choosing distribution
   vcov_type <- wald_result$vcov
-  is_iid <- identical(vcov_type, "IID") || identical(vcov_type, "iid") || 
-            identical(tolower(as.character(vcov_type)), "iid")
-  
+  is_iid <- identical(vcov_type, "IID") ||
+    identical(vcov_type, "iid") ||
+    identical(tolower(as.character(vcov_type)), "iid")
+
   # Compute statistic and p-value based on vcov type
   df1 <- wald_result$df1
   df2 <- wald_result$df2
-  
+
   if (is_iid) {
     # For iid: use F-distribution (exact under homoskedastic errors)
     stat <- wald_result$stat
@@ -169,7 +190,7 @@
     dist <- "Chi-squared"
     df2 <- NA_real_
   }
-  
+
   # Return results
   list(
     stat = stat,
@@ -208,44 +229,58 @@
 #'
 #' @keywords internal
 .ar_ci_exact_iid = function(object, level = 0.95) {
-  
   # Fetch data
   data <- fetch_data(object, "To compute AR CI, ")
-  
+
   # Get y, endogenous variable, and instruments
   fml_linear <- formula(object, type = "linear")
   y <- eval(fml_linear[[2]], data)
-  
-  endo_mat <- model.matrix(object, data = data, type = "iv.endo", collin.rm = FALSE)
-  inst_mat <- model.matrix(object, data = data, type = "iv.inst", collin.rm = FALSE)
-  
+
+  endo_mat <- model.matrix(
+    object,
+    data = data,
+    type = "iv.endo",
+    collin.rm = FALSE
+  )
+  inst_mat <- model.matrix(
+    object,
+    data = data,
+    type = "iv.inst",
+    collin.rm = FALSE
+  )
+
   # Get exogenous controls (if any)
-  exo_mat <- model.matrix(object, data = data, type = "iv.exo", collin.rm = FALSE)
-  
+  exo_mat <- model.matrix(
+    object,
+    data = data,
+    type = "iv.exo",
+    collin.rm = FALSE
+  )
+
   n <- length(y)
-  L <- ncol(inst_mat)  # number of instruments
-  
+  L <- ncol(inst_mat) # number of instruments
+
   # Check for fixed effects - if present, we need to demean
   has_fe <- !is.null(object$fixef_vars)
-  
+
   if (has_fe || !is.null(exo_mat)) {
     # We need to partial out controls and/or fixed effects
     # Use demean if FE present, otherwise just residualize
-    
+
     if (has_fe) {
       # Demean all variables with respect to fixed effects
       dm_data <- demean(object)
-      
+
       # Extract demeaned variables - the order is: y, then X (controls), then IV parts
       # We need to reconstruct the demeaned versions
       # For simplicity, re-run the demeaning manually
-      
+
       # Create a combined matrix of all variables to demean
       all_vars <- cbind(y, endo_mat, inst_mat)
       if (!is.null(exo_mat) && ncol(exo_mat) > 0) {
         all_vars <- cbind(all_vars, exo_mat)
       }
-      
+
       # Use fixest's demeaning
       fixef_id <- object$fixef_id
       slope_flag <- object$slope_flag
@@ -253,23 +288,25 @@
       if (!is.null(object$slope_variables_reordered)) {
         slope_vars <- object$slope_variables_reordered
       }
-      
+
       # Demean using the internal algorithm
-      all_vars_dm <- cpp_demean(all_vars, 
-                                fixef_id, 
-                                weights = object$weights,
-                                iterMax = object$fixef.iter,
-                                diffMax = object$fixef.tol,
-                                nthreads = 1L,
-                                fixef.algo = demeaning_algo(internal = TRUE),
-                                slope.flag = if(!is.null(slope_flag)) slope_flag else integer(0),
-                                slope.vars = if(!is.null(slope_vars)) slope_vars else matrix(0, 0, 0),
-                                notes = FALSE)
-      
+      all_vars_dm <- cpp_demean(
+        all_vars,
+        fixef_id,
+        weights = object$weights,
+        iterMax = object$fixef.iter,
+        diffMax = object$fixef.tol,
+        nthreads = 1L,
+        fixef.algo = demeaning_algo(internal = TRUE),
+        slope.flag = if (!is.null(slope_flag)) slope_flag else integer(0),
+        slope.vars = if (!is.null(slope_vars)) slope_vars else matrix(0, 0, 0),
+        notes = FALSE
+      )
+
       y_star <- all_vars_dm[, 1]
       d_star <- all_vars_dm[, 2]
       z_star <- all_vars_dm[, 2 + seq_len(L)]
-      
+
       # If there are exogenous controls, also partial them out from the residuals
       if (!is.null(exo_mat) && ncol(exo_mat) > 0) {
         exo_dm <- all_vars_dm[, (2 + L + 1):ncol(all_vars_dm), drop = FALSE]
@@ -280,56 +317,63 @@
         d_star <- qr.resid(qr_exo, d_star)
         z_star <- qr.resid(qr_exo, z_star)
       }
-      
+
       # Adjust degrees of freedom for FE
-      k <- sum(object$fixef_sizes - 1) + 1  # number of FE parameters absorbed
+      k <- sum(object$fixef_sizes - 1) + 1 # number of FE parameters absorbed
       if (!is.null(exo_mat)) k <- k + ncol(exo_mat)
-      
     } else {
-      # No FE, just partial out exogenous controls
-      qr_exo <- qr(cbind(1, exo_mat))  # include intercept
-      y_star <- qr.resid(qr_exo, y)
-      d_star <- qr.resid(qr_exo, endo_mat[, 1])
-      z_star <- qr.resid(qr_exo, inst_mat)
-      k <- ncol(exo_mat) + 1  # controls + intercept
+      # No FE, just partial out exogenous controls (including intercept)
+      if (!is.null(exo_mat) && ncol(exo_mat) > 0) {
+        qr_exo <- qr(exo_mat) # exo_mat should be the full design
+        y_star <- qr.resid(qr_exo, y)
+        d_star <- qr.resid(qr_exo, endo_mat[, 1])
+        z_star <- qr.resid(qr_exo, inst_mat)
+        k <- qr_exo$rank # rank, not ncol(exo_mat)+1
+      } else {
+        # No FE, no controls - just demean for intercept
+        y_star <- y - mean(y)
+        d_star <- endo_mat[, 1] - mean(endo_mat[, 1])
+        z_star <- sweep(inst_mat, 2, colMeans(inst_mat))
+        k <- 1
+      }
     }
-    
   } else {
     # No FE, no controls - just demean for intercept
     y_star <- y - mean(y)
     d_star <- endo_mat[, 1] - mean(endo_mat[, 1])
     z_star <- sweep(inst_mat, 2, colMeans(inst_mat))
-    k <- 1  # just intercept
+    k <- 1 # just intercept
   }
-  
+
   # Compute the quadratic coefficients
   # S_DD = sum(D*^2), S_YY = sum(Y*^2), S_DY = sum(D* * Y*)
   S_DD <- sum(d_star^2)
   S_YY <- sum(y_star^2)
   S_DY <- sum(d_star * y_star)
-  
+
   # P_Y = projection of Y* onto Z*, P_D = projection of D* onto Z*
   qr_z <- qr(z_star)
+  L <- qr_z$rank # instead of ncol(inst_mat) at the top
   P_Y <- qr.fitted(qr_z, y_star)
   P_D <- qr.fitted(qr_z, d_star)
-  
+
   S_PDD <- sum(P_D^2)
   S_PYY <- sum(P_Y^2)
   S_PDY <- sum(P_D * P_Y)
-  
+
   # Critical value
   alpha <- 1 - level
   Fcrit <- qf(1 - alpha, df1 = L, df2 = n - k - L)
   c_val <- Fcrit * L / (n - k - L)
-  
+
   # Quadratic coefficients for: A * beta0^2 + B * beta0 + C >= 0
   A <- c_val * S_DD - (c_val + 1) * S_PDD
   B <- -2 * c_val * S_DY + 2 * (c_val + 1) * S_PDY
   C <- c_val * S_YY - (c_val + 1) * S_PYY
-  
+
   # Solve the quadratic inequality
   discriminant <- B^2 - 4 * A * C
-  
+
   if (abs(A) < 1e-14) {
     # Linear case: B * beta0 + C >= 0
     if (abs(B) < 1e-14) {
@@ -363,25 +407,30 @@
     sqrt_disc <- sqrt(discriminant)
     root1 <- (-B - sqrt_disc) / (2 * A)
     root2 <- (-B + sqrt_disc) / (2 * A)
-    
+
     # Ensure root1 < root2
     if (root1 > root2) {
       tmp <- root1
       root1 <- root2
       root2 <- tmp
     }
-    
+
     if (A > 0) {
       # Parabola opens upward: f(beta0) >= 0 for beta0 <= root1 or beta0 >= root2
-      intervals <- matrix(c(-Inf, root1, root2, Inf), nrow = 2, ncol = 2, byrow = TRUE)
+      intervals <- matrix(
+        c(-Inf, root1, root2, Inf),
+        nrow = 2,
+        ncol = 2,
+        byrow = TRUE
+      )
     } else {
       # Parabola opens downward: f(beta0) >= 0 for root1 <= beta0 <= root2
       intervals <- matrix(c(root1, root2), nrow = 1, ncol = 2)
     }
   }
-  
+
   colnames(intervals) <- c("lower", "upper")
-  
+
   list(
     method = "AR",
     exact = TRUE,
@@ -415,47 +464,52 @@
 #' \item{intervals}{Matrix with columns "lower" and "upper"}
 #'
 #' @keywords internal
-.ar_ci_numeric = function(object, level = 0.95, vcov = NULL, n_grid = 200, 
-                          tol = 1e-8, ...) {
-  
+.ar_ci_numeric = function(
+  object,
+  level = 0.95,
+  vcov = NULL,
+  n_grid = 200,
+  tol = 1e-8,
+  ...
+) {
   # Get point estimate and SE for grid construction
   endo_names <- object$iv_endo_names
   endo_name <- endo_names[1]
-  
+
   coef_names <- names(object$coefficients)
   endo_coef_name <- paste0("fit_", endo_name)
   if (!endo_coef_name %in% coef_names) {
     endo_coef_name <- endo_name
   }
-  
+
   point_est <- object$coefficients[endo_coef_name]
   obj_sum <- summary(object, vcov = vcov, ...)
   se_est <- se(obj_sum)[endo_coef_name]
-  
+
   # Default grid range: point estimate +/- 5 SEs
   grid_range <- c(point_est - 5 * se_est, point_est + 5 * se_est)
   grid <- seq(grid_range[1], grid_range[2], length.out = n_grid)
-  
+
   # Get the critical value
   # First, run one AR test to determine distribution type
   test_result <- .ar_test_core(object, beta0 = point_est, vcov = vcov, ...)
-  
+
   if (test_result$dist == "F") {
     crit <- qf(level, df1 = test_result$df1, df2 = test_result$df2)
   } else {
     crit <- qchisq(level, df = test_result$df1)
   }
-  
+
   # Evaluate g(beta0) = stat(beta0) - crit on the coarse grid
   g_values <- numeric(n_grid)
   for (i in seq_along(grid)) {
     ar_res <- .ar_test_core(object, beta0 = grid[i], vcov = vcov, ...)
     g_values[i] <- ar_res$stat - crit
   }
-  
+
   # Find sign changes (brackets for roots)
   sign_changes <- which(diff(sign(g_values)) != 0)
-  
+
   if (length(sign_changes) == 0) {
     # No sign changes - either all accepted or all rejected
     if (g_values[1] <= 0) {
@@ -468,69 +522,76 @@
   } else {
     # Find roots using uniroot
     roots <- numeric(length(sign_changes))
-    
+
     g_func <- function(beta0) {
       ar_res <- .ar_test_core(object, beta0 = beta0, vcov = vcov, ...)
       ar_res$stat - crit
     }
-    
+
     for (i in seq_along(sign_changes)) {
       idx <- sign_changes[i]
-      root_result <- uniroot(g_func, 
-                            interval = c(grid[idx], grid[idx + 1]),
-                            tol = tol)
+      root_result <- uniroot(
+        g_func,
+        interval = c(grid[idx], grid[idx + 1]),
+        tol = tol
+      )
       roots[i] <- root_result$root
     }
-    
+
     roots <- sort(roots)
-    
+
     # Build intervals based on which regions have g <= 0 (accepted)
     # Check the sign at the leftmost point to determine pattern
     n_roots <- length(roots)
-    
+
     if (g_values[1] <= 0) {
       # Left edge is accepted - first interval starts at -Inf
       interval_list <- list()
-      
+
       # First interval: from -Inf to first root
       if (n_roots >= 1) {
         interval_list[[1]] <- c(-Inf, roots[1])
       }
-      
+
       # Subsequent intervals: pairs of roots (entry, exit)
       if (n_roots >= 2) {
         for (j in seq(2, n_roots, by = 2)) {
           if (j + 1 <= n_roots) {
-            interval_list[[length(interval_list) + 1]] <- c(roots[j], roots[j + 1])
+            interval_list[[length(interval_list) + 1]] <- c(
+              roots[j],
+              roots[j + 1]
+            )
           } else {
             # Odd root at end - extends to infinity
             interval_list[[length(interval_list) + 1]] <- c(roots[j], Inf)
           }
         }
       }
-      
     } else {
       # Left edge is rejected - intervals start at roots
       interval_list <- list()
       for (j in seq(1, n_roots, by = 2)) {
         if (j + 1 <= n_roots) {
-          interval_list[[length(interval_list) + 1]] <- c(roots[j], roots[j + 1])
+          interval_list[[length(interval_list) + 1]] <- c(
+            roots[j],
+            roots[j + 1]
+          )
         } else {
           # Odd number of roots - last interval extends to infinity
           interval_list[[length(interval_list) + 1]] <- c(roots[j], Inf)
         }
       }
     }
-    
+
     if (length(interval_list) == 0) {
       intervals <- matrix(NA_real_, nrow = 0, ncol = 2)
     } else {
       intervals <- do.call(rbind, interval_list)
     }
   }
-  
+
   colnames(intervals) <- c("lower", "upper")
-  
+
   list(
     method = "AR",
     exact = FALSE,
@@ -547,20 +608,20 @@
 #' Anderson-Rubin test for IV models
 #'
 #' @description
-#' Performs the Anderson-Rubin test for instrumental variable (IV) models. 
-#' This test is robust to weak instruments and can be used to test hypotheses 
+#' Performs the Anderson-Rubin test for instrumental variable (IV) models.
+#' This test is robust to weak instruments and can be used to test hypotheses
 #' about the coefficients of endogenous variables.
 #'
-#' @param object A `fixest` object obtained from [`feols`] with an IV specification 
+#' @param object A `fixest` object obtained from [`feols`] with an IV specification
 #'   (i.e., formula includes `| endo ~ inst`).
-#' @param beta0 Numeric vector. The hypothesized values for the endogenous variable 
-#'   coefficients under the null hypothesis. If `NULL` (default), tests 
-#'   H0: beta = 0 for all endogenous variables. The length must match the number 
+#' @param beta0 Numeric vector. The hypothesized values for the endogenous variable
+#'   coefficients under the null hypothesis. If `NULL` (default), tests
+#'   H0: beta = 0 for all endogenous variables. The length must match the number
 #'   of endogenous variables in the model.
-#' @param vcov Versatile argument to specify the VCOV. If `NULL` (default), uses 
-#'   the same VCOV specification as the original model. See [`vcov.fixest`] for 
+#' @param vcov Versatile argument to specify the VCOV. If `NULL` (default), uses
+#'   the same VCOV specification as the original model. See [`vcov.fixest`] for
 #'   details on available options.
-#' @param level Numeric scalar between 0 and 1. The confidence level for the 
+#' @param level Numeric scalar between 0 and 1. The confidence level for the
 #'   optional confidence interval. Default is 0.95.
 #' @param ci Logical or NULL. Whether to compute an AR confidence interval.
 #'   If `NULL` (default):
@@ -571,25 +632,25 @@
 #' @param ... Additional arguments passed to [`summary.fixest`].
 #'
 #' @details
-#' The Anderson-Rubin (AR) test is a weak-instrument robust test for the null 
-#' hypothesis H0: beta = beta0, where beta is the vector of coefficients on the 
+#' The Anderson-Rubin (AR) test is a weak-instrument robust test for the null
+#' hypothesis H0: beta = beta0, where beta is the vector of coefficients on the
 #' endogenous variables.
 #'
 #' The test proceeds as follows:
 #' \enumerate{
-#'   \item Construct the transformed outcome: y* = y - X * beta0, where X contains 
+#'   \item Construct the transformed outcome: y* = y - X * beta0, where X contains
 #'     the endogenous variables.
-#'   \item Regress y* on the exogenous controls (W), instruments (Z), and any 
+#'   \item Regress y* on the exogenous controls (W), instruments (Z), and any
 #'     fixed effects, using the same VCOV specification as the original model.
 #'   \item Test the joint significance of the instrument coefficients (H0: pi = 0).
 #' }
 #'
-#' For iid (homoskedastic) errors, the AR test statistic follows an exact 
-#' F-distribution. For non-iid errors (heteroskedastic, clustered, etc.), 
+#' For iid (homoskedastic) errors, the AR test statistic follows an exact
+#' F-distribution. For non-iid errors (heteroskedastic, clustered, etc.),
 #' a Chi-squared asymptotic distribution is used.
 #'
 #' When `ci = TRUE` (or by default for iid with single endo var):
-#' - For iid errors with a single endogenous variable, an exact closed-form 
+#' - For iid errors with a single endogenous variable, an exact closed-form
 #'   AR confidence interval is computed using the quadratic formula.
 #' - For non-iid errors, the CI is computed by numerically inverting the AR test.
 #'
@@ -609,12 +670,12 @@
 #' \item{ci}{(Optional) A list containing the AR confidence interval, with elements:
 #'   `method`, `exact`, `level`, `intervals`.}
 #'
-#' @seealso 
+#' @seealso
 #' [`wald`] for standard Wald tests, [`feols`] for IV estimation.
 #'
 #' @references
-#' Anderson, T. W. and Rubin, H. (1949). "Estimation of the Parameters of a Single 
-#' Equation in a Complete System of Stochastic Equations." Annals of Mathematical 
+#' Anderson, T. W. and Rubin, H. (1949). "Estimation of the Parameters of a Single
+#' Equation in a Complete System of Stochastic Equations." Annals of Mathematical
 #' Statistics, 20(1), 46-63.
 #'
 #' @examples
@@ -647,47 +708,66 @@
 #' ar_test(est_cl, ci = TRUE)
 #'
 #' @export
-ar_test = function(object, beta0 = NULL, vcov = NULL, level = 0.95, ci = NULL, ...) {
-  
+ar_test = function(
+  object,
+  beta0 = NULL,
+  vcov = NULL,
+  level = 0.95,
+  ci = NULL,
+  ...
+) {
   # Input validation
   check_arg(object, "class(fixest)")
   check_arg(level, "numeric scalar GT{0} LT{1}")
-  
+
   # Check that object is an IV model
   if (!isTRUE(object$iv)) {
-    stop("The `ar_test` function requires an IV model. ",
-         "The input object is not an IV estimation (no endogenous variables/instruments found). ",
-         "Please provide a `feols` object estimated with an IV formula ",
-         "(e.g., `feols(y ~ controls | endo ~ inst, data)`).")
+    stop(
+      "The `ar_test` function requires an IV model. ",
+      "The input object is not an IV estimation (no endogenous variables/instruments found). ",
+      "Please provide a `feols` object estimated with an IV formula ",
+      "(e.g., `feols(y ~ controls | endo ~ inst, data)`)."
+    )
   }
-  
+
   # Check that it's a second stage estimation
   if (isTRUE(object$iv_stage == 1)) {
-    stop("The `ar_test` function requires a second-stage IV estimation. ",
-         "The input object appears to be a first-stage estimation. ",
-         "Please use the main IV estimation result, not `summary(est, stage = 1)`.")
+    stop(
+      "The `ar_test` function requires a second-stage IV estimation. ",
+      "The input object appears to be a first-stage estimation. ",
+      "Please use the main IV estimation result, not `summary(est, stage = 1)`."
+    )
   }
-  
+
   # Get endogenous variable names
   endo_names <- object$iv_endo_names
   n_endo <- length(endo_names)
-  
+
   # Handle beta0
   if (is.null(beta0)) {
     beta0 <- rep(0, n_endo)
   } else {
     check_arg(beta0, "numeric vector no na")
     if (length(beta0) != n_endo) {
-      stop("Argument `beta0` has length ", length(beta0), " but there are ", 
-           n_endo, " endogenous variables (", paste(endo_names, collapse = ", "), "). ",
-           "Please provide a vector of length ", n_endo, ".")
+      stop(
+        "Argument `beta0` has length ",
+        length(beta0),
+        " but there are ",
+        n_endo,
+        " endogenous variables (",
+        paste(endo_names, collapse = ", "),
+        "). ",
+        "Please provide a vector of length ",
+        n_endo,
+        "."
+      )
     }
   }
   names(beta0) <- endo_names
-  
+
   # Run the core test
   core_result <- .ar_test_core(object, beta0 = beta0, vcov = vcov, ...)
-  
+
   # Build the result object
   res <- list(
     beta0 = beta0,
@@ -703,10 +783,10 @@ ar_test = function(object, beta0 = NULL, vcov = NULL, level = 0.95, ci = NULL, .
     inst_tested = core_result$inst_tested,
     model = object
   )
-  
+
   # Decide whether to compute CI
   compute_ci <- FALSE
-  
+
   if (is.null(ci)) {
     # Default: compute CI only if iid and single endo var
     if (core_result$is_iid && n_endo == 1) {
@@ -714,14 +794,16 @@ ar_test = function(object, beta0 = NULL, vcov = NULL, level = 0.95, ci = NULL, .
     }
   } else if (isTRUE(ci)) {
     if (n_endo > 1) {
-      warning("Cannot compute AR confidence interval for models with multiple ",
-              "endogenous variables. Returning test result only.")
+      warning(
+        "Cannot compute AR confidence interval for models with multiple ",
+        "endogenous variables. Returning test result only."
+      )
     } else {
       compute_ci <- TRUE
     }
   }
   # If ci = FALSE, compute_ci stays FALSE
-  
+
   # Compute CI if requested
   if (compute_ci) {
     if (core_result$is_iid) {
@@ -733,9 +815,9 @@ ar_test = function(object, beta0 = NULL, vcov = NULL, level = 0.95, ci = NULL, .
     }
     res$ci <- ci_result
   }
-  
+
   class(res) <- "fixest_ar"
-  
+
   return(res)
 }
 
@@ -743,22 +825,28 @@ ar_test = function(object, beta0 = NULL, vcov = NULL, level = 0.95, ci = NULL, .
 #' @rdname ar_test
 #' @export
 print.fixest_ar = function(x, ...) {
-  
   cat("Anderson-Rubin Test\n")
   cat("-------------------\n")
-  
+
   # Show the null hypothesis
   endo_str <- paste(x$endo_names, collapse = ", ")
   beta0_str <- paste(format(x$beta0, digits = 4), collapse = ", ")
-  
+
   if (length(x$endo_names) == 1) {
-    cat("H0: ", x$endo_names, " = ", format(x$beta0, digits = 4), "\n", sep = "")
+    cat(
+      "H0: ",
+      x$endo_names,
+      " = ",
+      format(x$beta0, digits = 4),
+      "\n",
+      sep = ""
+    )
   } else {
     cat("H0: (", endo_str, ") = (", beta0_str, ")\n", sep = "")
   }
-  
+
   cat("\n")
-  
+
   # Show test statistics with distribution info
   if (x$dist == "F") {
     cat("Stat (F):     ", format(x$stat, digits = 4), "\n", sep = "")
@@ -767,7 +855,7 @@ print.fixest_ar = function(x, ...) {
     cat("Stat (Chi2):  ", format(x$stat, digits = 4), "\n", sep = "")
     cat("DoF:          ", x$df1, "\n", sep = "")
   }
-  
+
   # Format p-value
   if (x$pvalue < 2.2e-16) {
     cat("P-value:      < 2.2e-16 ***\n")
@@ -782,18 +870,18 @@ print.fixest_ar = function(x, ...) {
   } else {
     cat("P-value:      ", format(x$pvalue, digits = 4), "\n", sep = "")
   }
-  
+
   cat("Distribution: ", x$dist, "\n", sep = "")
   cat("VCOV:         ", x$vcov_type, "\n", sep = "")
   cat("---\n")
   cat("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n")
-  
+
   # Print CI if present
   if (!is.null(x$ci)) {
     cat("\n")
     .print_ar_ci(x$ci, x$endo_names[1])
   }
-  
+
   invisible(x)
 }
 
@@ -801,47 +889,77 @@ print.fixest_ar = function(x, ...) {
 #' Internal function to print AR confidence interval
 #' @keywords internal
 .print_ar_ci = function(ci, endo_name) {
-  
   cat("Anderson-Rubin Confidence Interval\n")
   cat("-----------------------------------\n")
   cat("Variable: ", endo_name, "\n", sep = "")
   cat("Level:    ", format(ci$level * 100, digits = 2), "%\n", sep = "")
-  cat("Method:   ", if (ci$exact) "Exact (closed-form)" else "Numeric inversion", "\n", sep = "")
+  cat(
+    "Method:   ",
+    if (ci$exact) "Exact (closed-form)" else "Numeric inversion",
+    "\n",
+    sep = ""
+  )
   cat("\n")
-  
+
   intervals <- ci$intervals
-  
+
   if (nrow(intervals) == 0) {
     cat("AR Confidence Set: EMPTY\n")
     cat("  (This may indicate model misspecification)\n")
   } else if (nrow(intervals) == 1) {
     lower <- intervals[1, "lower"]
     upper <- intervals[1, "upper"]
-    
+
     if (is.infinite(lower) && is.infinite(upper)) {
       cat("AR Confidence Set: (-Inf, Inf)\n")
       cat("  (Instruments may be very weak)\n")
     } else if (is.infinite(lower)) {
-      cat("AR Confidence Set: (-Inf, ", format(upper, digits = 4), "]\n", sep = "")
+      cat(
+        "AR Confidence Set: (-Inf, ",
+        format(upper, digits = 4),
+        "]\n",
+        sep = ""
+      )
     } else if (is.infinite(upper)) {
-      cat("AR Confidence Set: [", format(lower, digits = 4), ", Inf)\n", sep = "")
+      cat(
+        "AR Confidence Set: [",
+        format(lower, digits = 4),
+        ", Inf)\n",
+        sep = ""
+      )
     } else {
-      cat("AR Confidence Set: [", format(lower, digits = 4), ", ", 
-          format(upper, digits = 4), "]\n", sep = "")
+      cat(
+        "AR Confidence Set: [",
+        format(lower, digits = 4),
+        ", ",
+        format(upper, digits = 4),
+        "]\n",
+        sep = ""
+      )
     }
   } else {
     cat("AR Confidence Set: (disconnected)\n")
     for (i in seq_len(nrow(intervals))) {
       lower <- intervals[i, "lower"]
       upper <- intervals[i, "upper"]
-      
+
       lower_bracket <- if (is.infinite(lower)) "(" else "["
       upper_bracket <- if (is.infinite(upper)) ")" else "]"
       lower_str <- if (is.infinite(lower)) "-Inf" else format(lower, digits = 4)
       upper_str <- if (is.infinite(upper)) "Inf" else format(upper, digits = 4)
-      
-      cat("  Interval ", i, ": ", lower_bracket, lower_str, ", ", 
-          upper_str, upper_bracket, "\n", sep = "")
+
+      cat(
+        "  Interval ",
+        i,
+        ": ",
+        lower_bracket,
+        lower_str,
+        ", ",
+        upper_str,
+        upper_bracket,
+        "\n",
+        sep = ""
+      )
     }
     cat("  (Multiple intervals may indicate non-convex confidence region)\n")
   }
@@ -868,8 +986,8 @@ print.fixest_ar = function(x, ...) {
 #' @param ... Additional arguments passed to [`ar_test`].
 #'
 #' @details
-#' This function only supports models with a single endogenous variable. For models 
-#' with multiple endogenous variables, the confidence region is multi-dimensional 
+#' This function only supports models with a single endogenous variable. For models
+#' with multiple endogenous variables, the confidence region is multi-dimensional
 #' and cannot be easily represented.
 #'
 #' For iid errors, an exact closed-form confidence interval is computed.
@@ -888,14 +1006,14 @@ print.fixest_ar = function(x, ...) {
 #' \item{level}{The confidence level used.}
 #' \item{method}{"AR".}
 #' \item{exact}{Logical, TRUE if exact closed-form was used.}
-#' \item{intervals}{A matrix with columns "lower" and "upper" giving the 
-#'   confidence interval(s). Each row is a separate interval (in case of 
+#' \item{intervals}{A matrix with columns "lower" and "upper" giving the
+#'   confidence interval(s). Each row is a separate interval (in case of
 #'   disconnected regions).}
 #' \item{endo_name}{The name of the endogenous variable.}
 #' \item{point_est}{The 2SLS point estimate.}
 #' \item{model}{The original `feols` IV object.}
 #'
-#' @seealso 
+#' @seealso
 #' [`ar_test`] for the Anderson-Rubin test with integrated CI computation.
 #'
 #' @examples
@@ -920,41 +1038,50 @@ print.fixest_ar = function(x, ...) {
 #'
 #' @export
 ar_confint = function(object, level = 0.95, vcov = NULL, ...) {
-  
   # Input validation
   check_arg(object, "class(fixest)")
   check_arg(level, "numeric scalar GT{0} LT{1}")
-  
+
   # Check that object is an IV model
   if (!isTRUE(object$iv)) {
-    stop("The `ar_confint` function requires an IV model. ",
-         "Please provide a `feols` object estimated with an IV formula.")
+    stop(
+      "The `ar_confint` function requires an IV model. ",
+      "Please provide a `feols` object estimated with an IV formula."
+    )
   }
-  
+
   # Currently only support single endogenous variable
   endo_names <- object$iv_endo_names
   if (length(endo_names) > 1) {
-    stop("The `ar_confint` function currently only supports models with a single ",
-         "endogenous variable. This model has ", length(endo_names), 
-         " endogenous variables: ", paste(endo_names, collapse = ", "), ".")
+    stop(
+      "The `ar_confint` function currently only supports models with a single ",
+      "endogenous variable. This model has ",
+      length(endo_names),
+      " endogenous variables: ",
+      paste(endo_names, collapse = ", "),
+      "."
+    )
   }
-  
+
   endo_name <- endo_names[1]
-  
+
   # Get point estimate
   coef_names <- names(object$coefficients)
   endo_coef_name <- paste0("fit_", endo_name)
   if (!endo_coef_name %in% coef_names) {
     endo_coef_name <- endo_name
   }
-  
+
   if (!endo_coef_name %in% coef_names) {
-    stop("Could not find the coefficient for the endogenous variable '", 
-         endo_name, "' in the model.")
+    stop(
+      "Could not find the coefficient for the endogenous variable '",
+      endo_name,
+      "' in the model."
+    )
   }
-  
+
   point_est <- object$coefficients[endo_coef_name]
-  
+
   # Determine vcov type
   if (is.null(vcov)) {
     if (!is.null(object$cov.scaled)) {
@@ -967,17 +1094,18 @@ ar_confint = function(object, level = 0.95, vcov = NULL, ...) {
     test_result <- .ar_test_core(object, beta0 = 0, vcov = vcov, ...)
     vcov_type <- test_result$vcov_type
   }
-  
-  is_iid <- identical(vcov_type, "IID") || identical(vcov_type, "iid") || 
-            identical(tolower(as.character(vcov_type)), "iid")
-  
+
+  is_iid <- identical(vcov_type, "IID") ||
+    identical(vcov_type, "iid") ||
+    identical(tolower(as.character(vcov_type)), "iid")
+
   # Compute CI
   if (is_iid) {
     ci_result <- .ar_ci_exact_iid(object, level = level)
   } else {
     ci_result <- .ar_ci_numeric(object, level = level, vcov = vcov, ...)
   }
-  
+
   res <- list(
     level = level,
     method = ci_result$method,
@@ -987,9 +1115,9 @@ ar_confint = function(object, level = 0.95, vcov = NULL, ...) {
     point_est = unname(point_est),
     model = object
   )
-  
+
   class(res) <- "fixest_ar_confint"
-  
+
   return(res)
 }
 
@@ -997,10 +1125,16 @@ ar_confint = function(object, level = 0.95, vcov = NULL, ...) {
 #' @rdname ar_confint
 #' @export
 print.fixest_ar_confint = function(x, ...) {
-  
-  cat("2SLS point estimate: ", format(x$point_est, digits = 4), "\n\n", sep = "")
-  .print_ar_ci(list(level = x$level, exact = x$exact, intervals = x$intervals), 
-               x$endo_name)
-  
+  cat(
+    "2SLS point estimate: ",
+    format(x$point_est, digits = 4),
+    "\n\n",
+    sep = ""
+  )
+  .print_ar_ci(
+    list(level = x$level, exact = x$exact, intervals = x$intervals),
+    x$endo_name
+  )
+
   invisible(x)
 }

From f531aaebca478c664c8c29de02e563f0b2b33934 Mon Sep 17 00:00:00 2001
From: Dianyi Yang <dianyi.yang@politics.ox.ac.uk>
Date: Fri, 28 Nov 2025 22:42:24 +0000
Subject: [PATCH 10/20] Refine argument passing (#2)

* pass correct argument to calculate test

* improve argument passing and modify the ci argument to take "numeric"

* set ci to false when there is more than one endo var

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Refine doc

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* remove compute_ci

* use vcov_arg_to_pass

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* set ci to false when conditions are not met

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 R/ar_test.R | 190 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 153 insertions(+), 37 deletions(-)

diff --git a/R/ar_test.R b/R/ar_test.R
index 4364d38f..2a7ba199 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -4,6 +4,50 @@
 # ~: Anderson-Rubin test for IV models
 #----------------------------------------------#
 
+# =============================================================================
+# INTERNAL HELPERS
+# =============================================================================
+
+#' Identify additional arguments beyond those supported by the exact AR CI
+#' @keywords internal
+.ar_exact_extra_args = function(object) {
+  call_obj <- object$call
+
+  if (is.null(call_obj)) {
+    return("call_missing")
+  }
+
+  call_list <- as.list(call_obj)[-1]
+  if (length(call_list) == 0) {
+    return(character(0))
+  }
+
+  arg_names <- names(call_list)
+  if (is.null(arg_names)) {
+    arg_names <- rep("", length(call_list))
+  }
+
+  allowed <- c("fml", "data", "vcov", "se")
+  extras <- character(0)
+  unnamed_seen <- 0L
+
+  for (nm in arg_names) {
+    if (!nzchar(nm)) {
+      unnamed_seen <- unnamed_seen + 1L
+      # Allow up to two unnamed positional arguments (formula, data)
+      if (unnamed_seen <= 2L) {
+        next
+      }
+      extras <- c(extras, paste0("positional#", unnamed_seen))
+    } else if (!(nm %in% allowed)) {
+      extras <- c(extras, nm)
+    }
+  }
+
+  unique(extras)
+}
+
+
 # =============================================================================
 # INTERNAL CORE FUNCTION
 # =============================================================================
@@ -32,7 +76,7 @@
 #' \item{inst_tested}{Instrument coefficients actually tested.}
 #'
 #' @keywords internal
-.ar_test_core = function(object, beta0, vcov = NULL, ...) {
+.ar_test_core = function(object, beta0, vcov = NULL, orig_call = NULL, ...) {
   # Get endogenous variable names and instrument names
   endo_names <- object$iv_endo_names
   inst_names <- object$iv_inst_names_xpd # expanded instrument names
@@ -116,28 +160,62 @@
   }
 
   # Determine vcov specification
-  # If not specified, try to get from original object
-  if (is.null(vcov)) {
-    if (!is.null(object$cov.scaled)) {
-      vcov <- attr(object$cov.scaled, "type")
-    }
-    if (is.null(vcov)) {
-      vcov <- "iid"
-    }
+  # We want to preserve the original call's `vcov` argument (which may be
+  # an expression like "hetero" or a formula) unless the user provided an
+  # explicit `vcov` to this function. Avoid using the human-readable
+  # attribute `attr(object$cov.scaled, "type")` here because it contains
+  # descriptive text (e.g. "Heteroskedasticity-robust") that `feols()`
+  # does not accept as a `vcov` argument.
+  vcov_provided <- !missing(vcov) && !is.null(vcov)
+  vcov_arg_to_pass <- NULL
+
+  if (vcov_provided) {
+    vcov_arg_to_pass <- vcov
+  } else if (!is.null(object$call) && !is.null(object$call$vcov)) {
+    # Use the original call's vcov expression if available
+    vcov_arg_to_pass <- object$call$vcov
+  } else if (!is.null(object$cov.scaled)) {
+    # Fall back to the canonical name if available (but not the descriptive attr)
+    # Use 'iid' as final fallback
+    vcov_arg_to_pass <- "iid"
+  } else {
+    vcov_arg_to_pass <- "iid"
   }
 
   # Run the AR regression - inherit settings from original object where possible
-  ar_fit <- feols(
-    new_fml,
-    data = data,
-    weights = weights_val,
-    vcov = vcov,
-    notes = FALSE,
-    ...
-  )
+  # Prefer to reuse an original call (provided via `orig_call` or from the
+  # model object) so we preserve options like weights, vcov, and other args.
+  if (is.null(orig_call)) orig_call <- object$call
+
+  if (is.call(orig_call) && length(orig_call) > 0) {
+    call_to_eval <- as.call(orig_call)
+    call_to_eval[[1]] <- as.name("feols")
+    call_to_eval$fml <- new_fml
+    call_to_eval$data <- as.name("data")
+    call_to_eval$vcov <- vcov_arg_to_pass
+    call_to_eval$notes <- FALSE
+
+    dots <- list(...)
+    if (length(dots) > 0) {
+      for (nm in names(dots)) {
+        if (nzchar(nm)) call_to_eval[[nm]] <- dots[[nm]]
+      }
+    }
+
+    ar_fit <- eval(call_to_eval)
+  } else {
+    ar_fit <- feols(
+      new_fml,
+      data = data,
+      weights = weights_val,
+      vcov = vcov_arg_to_pass,
+      notes = FALSE,
+      ...
+    )
+  }
 
   # Summarize to ensure we have the VCOV
-  ar_fit_sum <- summary(ar_fit, vcov = vcov, ...)
+  ar_fit_sum <- summary(ar_fit, vcov = vcov_arg_to_pass, ...)
 
   # Get the coefficient names in the AR regression that correspond to instruments
   ar_coef_names <- names(ar_fit_sum$coefficients)
@@ -470,6 +548,7 @@
   vcov = NULL,
   n_grid = 200,
   tol = 1e-8,
+  orig_call = NULL,
   ...
 ) {
   # Get point estimate and SE for grid construction
@@ -483,6 +562,7 @@
   }
 
   point_est <- object$coefficients[endo_coef_name]
+  # Summaries should use the same VCOV logic as the AR test core
   obj_sum <- summary(object, vcov = vcov, ...)
   se_est <- se(obj_sum)[endo_coef_name]
 
@@ -492,7 +572,13 @@
 
   # Get the critical value
   # First, run one AR test to determine distribution type
-  test_result <- .ar_test_core(object, beta0 = point_est, vcov = vcov, ...)
+  test_result <- .ar_test_core(
+    object,
+    beta0 = point_est,
+    vcov = vcov,
+    orig_call = orig_call,
+    ...
+  )
 
   if (test_result$dist == "F") {
     crit <- qf(level, df1 = test_result$df1, df2 = test_result$df2)
@@ -503,7 +589,13 @@
   # Evaluate g(beta0) = stat(beta0) - crit on the coarse grid
   g_values <- numeric(n_grid)
   for (i in seq_along(grid)) {
-    ar_res <- .ar_test_core(object, beta0 = grid[i], vcov = vcov, ...)
+    ar_res <- .ar_test_core(
+      object,
+      beta0 = grid[i],
+      vcov = vcov,
+      orig_call = orig_call,
+      ...
+    )
     g_values[i] <- ar_res$stat - crit
   }
 
@@ -524,7 +616,13 @@
     roots <- numeric(length(sign_changes))
 
     g_func <- function(beta0) {
-      ar_res <- .ar_test_core(object, beta0 = beta0, vcov = vcov, ...)
+      ar_res <- .ar_test_core(
+        object,
+        beta0 = beta0,
+        vcov = vcov,
+        orig_call = orig_call,
+        ...
+      )
       ar_res$stat - crit
     }
 
@@ -623,11 +721,12 @@
 #'   details on available options.
 #' @param level Numeric scalar between 0 and 1. The confidence level for the
 #'   optional confidence interval. Default is 0.95.
-#' @param ci Logical or NULL. Whether to compute an AR confidence interval.
+#' @param ci Logical, NULL, or the string "numeric". Whether to compute an AR confidence interval.
 #'   If `NULL` (default):
 #'   - Returns CI if vcov is "iid" AND there is exactly one endogenous variable
 #'   - Does not return CI otherwise
 #'   If `TRUE`: Attempts to compute CI (warns if not possible with multiple endo vars)
+#'   If `"numeric"` (the string): Forces numeric inversion of the AR test to compute the confidence interval, even if the exact method is not available.
 #'   If `FALSE`: Does not compute CI
 #' @param ... Additional arguments passed to [`summary.fixest`].
 #'
@@ -765,8 +864,13 @@ ar_test = function(
   }
   names(beta0) <- endo_names
 
-  # Run the core test
-  core_result <- .ar_test_core(object, beta0 = beta0, vcov = vcov, ...)
+  # Determine whether the exact CI is allowed: requires iid, single endo, and
+  # no extra arguments (weights, clusters, demean, etc.) in the original call.
+  extra_args <- .ar_exact_extra_args(object)
+  allow_exact_ci <- length(extra_args) == 0
+
+  # Run the core test (pass original call so internal call can reuse options)
+  core_result <- .ar_test_core(object, beta0 = beta0, vcov = vcov, orig_call = object$call, ...)
 
   # Build the result object
   res <- list(
@@ -784,34 +888,46 @@ ar_test = function(
     model = object
   )
 
-  # Decide whether to compute CI
-  compute_ci <- FALSE
-
+  # Determine whether to compute CI
   if (is.null(ci)) {
-    # Default: compute CI only if iid and single endo var
-    if (core_result$is_iid && n_endo == 1) {
-      compute_ci <- TRUE
+    # Default: compute CI only if iid, single endo, and exact CI allowed
+    if (core_result$is_iid && n_endo == 1 && allow_exact_ci) {
+      ci <- TRUE
+    } else {
+      ci <- FALSE
     }
-  } else if (isTRUE(ci)) {
+  } else if (ci != FALSE) {
     if (n_endo > 1) {
       warning(
         "Cannot compute AR confidence interval for models with multiple ",
         "endogenous variables. Returning test result only."
       )
-    } else {
-      compute_ci <- TRUE
+      ci <- FALSE
     }
   }
-  # If ci = FALSE, compute_ci stays FALSE
 
   # Compute CI if requested
-  if (compute_ci) {
-    if (core_result$is_iid) {
+  if (ci != FALSE) {
+    if (core_result$is_iid && allow_exact_ci && ci != "numeric") {
       # Use exact closed-form CI
       ci_result <- .ar_ci_exact_iid(object, level = level)
     } else {
       # Use numeric inversion
-      ci_result <- .ar_ci_numeric(object, level = level, vcov = vcov, ...)
+      if (core_result$is_iid && !allow_exact_ci) {
+        warning(
+          "Exact AR confidence interval requires a simple feols call with only ",
+          "fml, data, vcov, optionally se. Additional arguments (",
+          paste(extra_args, collapse = ", "),
+          ") detected; falling back to numeric inversion."
+        )
+      }
+      ci_result <- .ar_ci_numeric(
+        object,
+        level = level,
+        vcov = vcov,
+        orig_call = object$call,
+        ...
+      )
     }
     res$ci <- ci_result
   }

From 33b4d4139b20e0be5ae949d4bbc173e478c62efc Mon Sep 17 00:00:00 2001
From: Copilot <198982749+Copilot@users.noreply.github.com>
Date: Sat, 29 Nov 2025 07:02:28 +0800
Subject: [PATCH 11/20] Add weights support to exact AR CI calculation (#3)

* Initial plan

* Add weights support to exact AR CI calculation

Co-authored-by: kv9898 <105025148+kv9898@users.noreply.github.com>

* Add tests for weighted AR CI calculation

Co-authored-by: kv9898 <105025148+kv9898@users.noreply.github.com>

* Address code review feedback: fix comment and use object$weights

Co-authored-by: kv9898 <105025148+kv9898@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: kv9898 <105025148+kv9898@users.noreply.github.com>
---
 R/ar_test.R          | 131 ++++++++++++++++++++++++++++++++++---------
 tests/fixest_tests.R |  14 +++++
 2 files changed, 117 insertions(+), 28 deletions(-)

diff --git a/R/ar_test.R b/R/ar_test.R
index 2a7ba199..36f16a40 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -27,7 +27,7 @@
     arg_names <- rep("", length(call_list))
   }
 
-  allowed <- c("fml", "data", "vcov", "se")
+  allowed <- c("fml", "data", "vcov", "se", "weights")
   extras <- character(0)
   unnamed_seen <- 0L
 
@@ -338,6 +338,13 @@
   n <- length(y)
   L <- ncol(inst_mat) # number of instruments
 
+  # Get weights (numeric vector or NULL)
+  w <- object$weights
+  has_weights <- !is.null(w)
+  if (has_weights) {
+    sqrt_w <- sqrt(w)
+  }
+
   # Check for fixed effects - if present, we need to demean
   has_fe <- !is.null(object$fixef_vars)
 
@@ -368,6 +375,7 @@
       }
 
       # Demean using the internal algorithm
+      # Note: cpp_demean handles weighted demeaning for FE
       all_vars_dm <- cpp_demean(
         all_vars,
         fixef_id,
@@ -386,14 +394,26 @@
       z_star <- all_vars_dm[, 2 + seq_len(L)]
 
       # If there are exogenous controls, also partial them out from the residuals
+      # For weighted case, we transform by sqrt(w) before QR decomposition
       if (!is.null(exo_mat) && ncol(exo_mat) > 0) {
         exo_dm <- all_vars_dm[, (2 + L + 1):ncol(all_vars_dm), drop = FALSE]
         # Further residualize y_star, d_star, z_star with respect to exo_dm
-        # Using QR decomposition
-        qr_exo <- qr(exo_dm)
-        y_star <- qr.resid(qr_exo, y_star)
-        d_star <- qr.resid(qr_exo, d_star)
-        z_star <- qr.resid(qr_exo, z_star)
+        # Using QR decomposition (weighted by transforming with sqrt(w))
+        if (has_weights) {
+          exo_dm_w <- exo_dm * sqrt_w
+          y_star_w <- y_star * sqrt_w
+          d_star_w <- d_star * sqrt_w
+          z_star_w <- z_star * sqrt_w
+          qr_exo <- qr(exo_dm_w)
+          y_star <- qr.resid(qr_exo, y_star_w) / sqrt_w
+          d_star <- qr.resid(qr_exo, d_star_w) / sqrt_w
+          z_star <- qr.resid(qr_exo, z_star_w) / sqrt_w
+        } else {
+          qr_exo <- qr(exo_dm)
+          y_star <- qr.resid(qr_exo, y_star)
+          d_star <- qr.resid(qr_exo, d_star)
+          z_star <- qr.resid(qr_exo, z_star)
+        }
       }
 
       # Adjust degrees of freedom for FE
@@ -402,42 +422,97 @@
     } else {
       # No FE, just partial out exogenous controls (including intercept)
       if (!is.null(exo_mat) && ncol(exo_mat) > 0) {
-        qr_exo <- qr(exo_mat) # exo_mat should be the full design
-        y_star <- qr.resid(qr_exo, y)
-        d_star <- qr.resid(qr_exo, endo_mat[, 1])
-        z_star <- qr.resid(qr_exo, inst_mat)
+        # For weighted case, transform by sqrt(w) before QR decomposition
+        if (has_weights) {
+          exo_mat_w <- exo_mat * sqrt_w
+          y_w <- y * sqrt_w
+          d_w <- endo_mat[, 1] * sqrt_w
+          z_w <- inst_mat * sqrt_w
+          qr_exo <- qr(exo_mat_w)
+          y_star <- qr.resid(qr_exo, y_w) / sqrt_w
+          d_star <- qr.resid(qr_exo, d_w) / sqrt_w
+          z_star <- qr.resid(qr_exo, z_w) / sqrt_w
+        } else {
+          qr_exo <- qr(exo_mat)
+          y_star <- qr.resid(qr_exo, y)
+          d_star <- qr.resid(qr_exo, endo_mat[, 1])
+          z_star <- qr.resid(qr_exo, inst_mat)
+        }
         k <- qr_exo$rank # rank, not ncol(exo_mat)+1
       } else {
         # No FE, no controls - just demean for intercept
-        y_star <- y - mean(y)
-        d_star <- endo_mat[, 1] - mean(endo_mat[, 1])
-        z_star <- sweep(inst_mat, 2, colMeans(inst_mat))
+        if (has_weights) {
+          # Use weighted means
+          w_sum <- sum(w)
+          y_wmean <- sum(w * y) / w_sum
+          d_wmean <- sum(w * endo_mat[, 1]) / w_sum
+          z_wmeans <- colSums(w * inst_mat) / w_sum
+          y_star <- y - y_wmean
+          d_star <- endo_mat[, 1] - d_wmean
+          z_star <- sweep(inst_mat, 2, z_wmeans)
+        } else {
+          y_star <- y - mean(y)
+          d_star <- endo_mat[, 1] - mean(endo_mat[, 1])
+          z_star <- sweep(inst_mat, 2, colMeans(inst_mat))
+        }
         k <- 1
       }
     }
   } else {
     # No FE, no controls - just demean for intercept
-    y_star <- y - mean(y)
-    d_star <- endo_mat[, 1] - mean(endo_mat[, 1])
-    z_star <- sweep(inst_mat, 2, colMeans(inst_mat))
+    if (has_weights) {
+      # Use weighted means
+      w_sum <- sum(w)
+      y_wmean <- sum(w * y) / w_sum
+      d_wmean <- sum(w * endo_mat[, 1]) / w_sum
+      z_wmeans <- colSums(w * inst_mat) / w_sum
+      y_star <- y - y_wmean
+      d_star <- endo_mat[, 1] - d_wmean
+      z_star <- sweep(inst_mat, 2, z_wmeans)
+    } else {
+      y_star <- y - mean(y)
+      d_star <- endo_mat[, 1] - mean(endo_mat[, 1])
+      z_star <- sweep(inst_mat, 2, colMeans(inst_mat))
+    }
     k <- 1 # just intercept
   }
 
   # Compute the quadratic coefficients
-  # S_DD = sum(D*^2), S_YY = sum(Y*^2), S_DY = sum(D* * Y*)
-  S_DD <- sum(d_star^2)
-  S_YY <- sum(y_star^2)
-  S_DY <- sum(d_star * y_star)
+  # For weighted case, use weighted sums: sum(w * x^2) instead of sum(x^2)
+  if (has_weights) {
+    S_DD <- sum(w * d_star^2)
+    S_YY <- sum(w * y_star^2)
+    S_DY <- sum(w * d_star * y_star)
+  } else {
+    S_DD <- sum(d_star^2)
+    S_YY <- sum(y_star^2)
+    S_DY <- sum(d_star * y_star)
+  }
 
   # P_Y = projection of Y* onto Z*, P_D = projection of D* onto Z*
-  qr_z <- qr(z_star)
-  L <- qr_z$rank # instead of ncol(inst_mat) at the top
-  P_Y <- qr.fitted(qr_z, y_star)
-  P_D <- qr.fitted(qr_z, d_star)
-
-  S_PDD <- sum(P_D^2)
-  S_PYY <- sum(P_Y^2)
-  S_PDY <- sum(P_D * P_Y)
+  # For weighted case, transform by sqrt(w) before QR decomposition
+  if (has_weights) {
+    z_star_w <- z_star * sqrt_w
+    y_star_w <- y_star * sqrt_w
+    d_star_w <- d_star * sqrt_w
+    qr_z <- qr(z_star_w)
+    L <- qr_z$rank
+    # Get weighted projections (in weighted space)
+    P_Y_w <- qr.fitted(qr_z, y_star_w)
+    P_D_w <- qr.fitted(qr_z, d_star_w)
+    # Compute weighted sums of squared projections
+    S_PDD <- sum(P_D_w^2)
+    S_PYY <- sum(P_Y_w^2)
+    S_PDY <- sum(P_D_w * P_Y_w)
+  } else {
+    qr_z <- qr(z_star)
+    L <- qr_z$rank
+    P_Y <- qr.fitted(qr_z, y_star)
+    P_D <- qr.fitted(qr_z, d_star)
+    S_PDD <- sum(P_D^2)
+    S_PYY <- sum(P_Y^2)
+    S_PDY <- sum(P_D * P_Y)
+  }
 
   # Critical value
   alpha <- 1 - level
diff --git a/tests/fixest_tests.R b/tests/fixest_tests.R
index 3764e365..0427717f 100644
--- a/tests/fixest_tests.R
+++ b/tests/fixest_tests.R
@@ -3256,6 +3256,20 @@ test(is.null(ar_res_no_ci$ci), TRUE)
 print(ar_res)
 print(ar_ci)
 
+# Test 15: AR test with weights should use exact CI (not numeric)
+data_ar$wgt = runif(n, 0.5, 2)
+est_iv_wgt = feols(y ~ 1 | x ~ z, data = data_ar, weights = ~wgt)
+ar_res_wgt = ar_test(est_iv_wgt)
+test("fixest_ar" %in% class(ar_res_wgt), TRUE)
+# Check that CI is computed and is exact (not numeric)
+test(!is.null(ar_res_wgt$ci), TRUE)
+test(ar_res_wgt$ci$exact, TRUE)
+
+# Test 16: ar_confint with weights should also work
+ar_ci_wgt = ar_confint(est_iv_wgt)
+test("fixest_ar_confint" %in% class(ar_ci_wgt), TRUE)
+test(ar_ci_wgt$exact, TRUE)
+
 
 
From c5e0c785ab548abdd07e71f375629c1e669524b2 Mon Sep 17 00:00:00 2001
From: Copilot <198982749+Copilot@users.noreply.github.com>
Date: Sat, 29 Nov 2025 08:44:05 +0000
Subject: [PATCH 12/20] Update AR test to use realistic DGP validated against
 ivmodel (#4)

* Initial plan

* Update ar_test tests to use realistic DGP validated against ivmodel package

Co-authored-by: kv9898 <105025148+kv9898@users.noreply.github.com>

* Fix test 2

* Fix up to tests 6 and 7
---
 tests/fixest_tests.R | 115 +++++++++++++++++++++++++++----------------
 1 file changed, 73 insertions(+), 42 deletions(-)

diff --git a/tests/fixest_tests.R b/tests/fixest_tests.R
index 0427717f..35bb7299 100644
--- a/tests/fixest_tests.R
+++ b/tests/fixest_tests.R
@@ -3173,32 +3173,47 @@ test(nrow(fixest_data(est_mult, "esti")), 45)
 
 chunk("AR test")
 
-# Simple IV example with known DGP
-set.seed(42)
-n = 500
-z = rnorm(n)
-x = 0.5 * z + rnorm(n)  # moderate instrument strength
-true_beta = 0.8
-y = 1 + true_beta * x + rnorm(n)
-data_ar = data.frame(y = y, x = x, z = z, cl = rep(1:50, each = 10))
-
-# Basic IV estimation
-est_iv_ar = feols(y ~ 1 | x ~ z, data = data_ar)
-
-# Test 1: AR test should run without error
+# Realistic IV DGP with endogeneity (validated against ivmodel package)
+# Expected values from ivmodel package:
+#   With beta0=1: F=5.647947, df1=1, df2=97, p=0.019439
+#   CI: [1.2562, 2.5130]
+#   Weighted CI: [1.2147, 2.8270]
+set.seed(123)
+gamma = 0.5
+beta = 2
+n = 100
+
+e_1 = rnorm(n, mean = 0, sd = 1)
+z = rnorm(n, mean = 0, sd = 1)
+x = gamma * z + e_1
+w = rnorm(n, mean = 0, sd = 1)
+e_2 = rnorm(n, mean = 0, sd = 1) + 0.8 * e_1  # correlated errors create endogeneity
+y = beta * x + w + e_2
+
+data_ar = data.frame(y = y, x = x, z = z, w = w)
+
+# Basic IV estimation with exogenous control
+est_iv_ar = feols(y ~ w | x ~ z, data = data_ar)
+
+# Test 1: AR test should run without error and return correct class
 ar_res = ar_test(est_iv_ar)
 test("fixest_ar" %in% class(ar_res), TRUE)
 
-# Test 2: AR test with beta0 = 0 (null should be rejected since true_beta = 0.8)
+# Test 2: AR test with beta0 = 1 (validated against ivmodel package)
+# ivmodel gives: F=5.647947, df1=1, df2=97, p=0.019439
+ar_res_1 = ar_test(est_iv_ar, beta0 = 1)
+test(ar_res_1$beta0, 1)
+test(round(ar_res_1$stat, 6), 5.647947)
+test(ar_res_1$df1, 1)
+test(ar_res_1$df2, 97)
+test(round(ar_res_1$pvalue, 6), 0.019439)
+
+# Test 3: AR test with beta0 = 0 (should reject since true beta = 2)
 ar_res_null = ar_test(est_iv_ar, beta0 = 0)
 test(ar_res_null$beta0, 0)
 test(ar_res_null$pvalue < 0.05, TRUE)  # should reject
 
-# Test 3: AR test with beta0 close to true value (should not reject)
-ar_res_true = ar_test(est_iv_ar, beta0 = true_beta)
-test(ar_res_true$pvalue > 0.05, TRUE)  # should not reject
-
-# Test 4: AR test returns correct structure (now includes dist field)
+# Test 4: AR test returns correct structure
 test(is.numeric(ar_res$stat), TRUE)
 test(is.numeric(ar_res$pvalue), TRUE)
 test(is.numeric(ar_res$df1), TRUE)
@@ -3209,66 +3224,82 @@ test(ar_res$dist %in% c("F", "Chi-squared"), TRUE)
 test(ar_res$dist, "F")
 test(!is.null(ar_res$ci), TRUE)  # CI should be included for iid + single endo
 
-# Test 6: AR test with clustered SEs should use Chi-squared
-est_iv_cl = feols(y ~ 1 | x ~ z, data = data_ar, vcov = ~cl)
+# Test 6: Verify exact CI values (validated against ivmodel package)
+# ivmodel CI: [1.2562, 2.5130]
+test(round(ar_res$ci$intervals[1, "lower"], 4), 1.2562)
+test(round(ar_res$ci$intervals[1, "upper"], 4), 2.5130)
+test(ar_res$ci$exact, TRUE)
+
+# Test 7: Numeric CI should match exact CI
+ar_res_numeric = ar_test(est_iv_ar, beta0 = 1, ci = "numeric")
+test(round(ar_res_numeric$ci$intervals[1, "lower"], 4), 1.2562)
+test(round(ar_res_numeric$ci$intervals[1, "upper"], 4), 2.5130)
+test(ar_res_numeric$ci$exact, FALSE)
+
+# Test 8: AR test with clustered SEs should use Chi-squared
+data_ar$cl = rep(1:10, each = 10)
+est_iv_cl = feols(y ~ w | x ~ z, data = data_ar, vcov = ~cl)
 ar_res_cl = ar_test(est_iv_cl)
 test("fixest_ar" %in% class(ar_res_cl), TRUE)
 test(ar_res_cl$dist, "Chi-squared")
 test(is.na(ar_res_cl$df2), TRUE)  # df2 should be NA for Chi-squared
 
-# Test 7: AR test should error on non-IV model
-est_ols = feols(y ~ x, data = data_ar)
+# Test 9: AR test should error on non-IV model
+est_ols = feols(y ~ x + w, data = data_ar)
 test(ar_test(est_ols), "err")
 
-# Test 8: ar_confint should work for single endogenous variable
+# Test 10: ar_confint should work for single endogenous variable
 ar_ci = ar_confint(est_iv_ar)
 test("fixest_ar_confint" %in% class(ar_ci), TRUE)
 test(ar_ci$point_est > 0, TRUE)
 test(nrow(ar_ci$intervals) >= 1, TRUE)
 
-# Test 9: IV with controls and fixed effects
-data_ar$w = rnorm(n)
-data_ar$fe = rep(1:5, each = 100)
+# Test 11: IV with fixed effects
+data_ar$fe = rep(1:5, each = 20)
 est_iv_fe = feols(y ~ w | fe | x ~ z, data = data_ar)
 ar_res_fe = ar_test(est_iv_fe)
 test("fixest_ar" %in% class(ar_res_fe), TRUE)
 
-# Test 10: Multiple instruments  
+# Test 12: Multiple instruments
 data_ar$z2 = 0.3 * x + rnorm(n, sd = 0.8)
-est_iv_2inst = feols(y ~ 1 | x ~ z + z2, data = data_ar)
+est_iv_2inst = feols(y ~ w | x ~ z + z2, data = data_ar)
 ar_res_2inst = ar_test(est_iv_2inst)
 test("fixest_ar" %in% class(ar_res_2inst), TRUE)
 
-# Test 11: ar_confint errors with multiple endogenous vars
+# Test 13: ar_confint errors with multiple endogenous vars
 data_ar$x2 = 0.3 * z + rnorm(n)
-est_iv_2endo = feols(y ~ 1 | x + x2 ~ z + z2, data = data_ar)
+est_iv_2endo = feols(y ~ w | x + x2 ~ z + z2, data = data_ar)
 test(ar_confint(est_iv_2endo), "err")
 
-# Test 12: ar_test with ci=TRUE should compute CI even for non-iid
+# Test 14: ar_test with ci=TRUE should compute CI even for non-iid
 ar_res_cl_ci = ar_test(est_iv_cl, ci = TRUE)
 test(!is.null(ar_res_cl_ci$ci), TRUE)
 
-# Test 13: ar_test with ci=FALSE should not compute CI
+# Test 15: ar_test with ci=FALSE should not compute CI
 ar_res_no_ci = ar_test(est_iv_ar, ci = FALSE)
 test(is.null(ar_res_no_ci$ci), TRUE)
 
-# Test 14: Verify print methods work without error
+# Test 16: Verify print methods work without error
 print(ar_res)
 print(ar_ci)
 
-# Test 15: AR test with weights should use exact CI (not numeric)
-data_ar$wgt = runif(n, 0.5, 2)
-est_iv_wgt = feols(y ~ 1 | x ~ z, data = data_ar, weights = ~wgt)
-ar_res_wgt = ar_test(est_iv_wgt)
+# Test 17: AR test with weights (validated against ivmodel package)
+# ivmodel weighted CI: [1.2147, 2.8270]
+est_iv_wgt = feols(y ~ w | x ~ z, data = data_ar, weights = ~exp(z))
+ar_res_wgt = ar_test(est_iv_wgt, beta0 = 1, ci = TRUE)
 test("fixest_ar" %in% class(ar_res_wgt), TRUE)
-# Check that CI is computed and is exact (not numeric)
 test(!is.null(ar_res_wgt$ci), TRUE)
-test(ar_res_wgt$ci$exact, TRUE)
+test(ar_res_wgt$ci$intervals[1, "lower"], 1.2147, ~1e-4)
+test(ar_res_wgt$ci$intervals[1, "upper"], 2.8270, ~1e-4)
+
+# Test 18: Weighted numeric CI should match
+ar_res_wgt_numeric = ar_test(est_iv_wgt, beta0 = 1, ci = "numeric")
+test(ar_res_wgt_numeric$ci$intervals[1, "lower"], 1.2147, ~1e-4)
+test(ar_res_wgt_numeric$ci$intervals[1, "upper"], 2.8270, ~1e-4)
 
-# Test 16: ar_confint with weights should also work
+# Test 19: ar_confint with weights should also work
 ar_ci_wgt = ar_confint(est_iv_wgt)
 test("fixest_ar_confint" %in% class(ar_ci_wgt), TRUE)
-test(ar_ci_wgt$exact, TRUE)
 
 
From abb04d1da20d625cfc46016fcb790cf5ff860fb5 Mon Sep 17 00:00:00 2001
From: Dianyi Yang <dianyi.yang@politics.ox.ac.uk>
Date: Sat, 29 Nov 2025 08:58:08 +0000
Subject: [PATCH 13/20] Fix tests 12-19

---
 tests/fixest_tests.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/fixest_tests.R b/tests/fixest_tests.R
index 35bb7299..841b59de 100644
--- a/tests/fixest_tests.R
+++ b/tests/fixest_tests.R
@@ -3289,13 +3289,13 @@ est_iv_wgt = feols(y ~ w | x ~ z, data = data_ar, weights = ~exp(z))
 ar_res_wgt = ar_test(est_iv_wgt, beta0 = 1, ci = TRUE)
 test("fixest_ar" %in% class(ar_res_wgt), TRUE)
 test(!is.null(ar_res_wgt$ci), TRUE)
-test(ar_res_wgt$ci$intervals[1, "lower"], 1.2147, ~1e-4)
-test(ar_res_wgt$ci$intervals[1, "upper"], 2.8270, ~1e-4)
+test(round(ar_res_wgt$ci$intervals[1, "lower"], 4), 1.2147)
+test(round(ar_res_wgt$ci$intervals[1, "upper"], 4), 2.8270)
 
 # Test 18: Weighted numeric CI should match
 ar_res_wgt_numeric = ar_test(est_iv_wgt, beta0 = 1, ci = "numeric")
-test(ar_res_wgt_numeric$ci$intervals[1, "lower"], 1.2147, ~1e-4)
-test(ar_res_wgt_numeric$ci$intervals[1, "upper"], 2.8270, ~1e-4)
+test(round(ar_res_wgt_numeric$ci$intervals[1, "lower"], 4), 1.2147)
+test(round(ar_res_wgt_numeric$ci$intervals[1, "upper"], 4), 2.8270)
 
 # Test 19: ar_confint with weights should also work
 ar_ci_wgt = ar_confint(est_iv_wgt)

From a053e049ff6824bd7f1be95699efc1c3165d10d7 Mon Sep 17 00:00:00 2001
From: Dianyi Yang <dianyi.yang@politics.ox.ac.uk>
Date: Sat, 29 Nov 2025 08:59:32 +0000
Subject: [PATCH 14/20] fix cluster

---
 R/ar_test.R | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/R/ar_test.R b/R/ar_test.R
index 36f16a40..8054f71d 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -166,6 +166,31 @@
   # attribute `attr(object$cov.scaled, "type")` here because it contains
   # descriptive text (e.g. "Heteroskedasticity-robust") that `feols()`
   # does not accept as a `vcov` argument.
+  #
+  # When the original estimation was called with `vcov = ~cluster_id`, the
+  # expression stored in the call is a raw language object (class `call`).
+  # `feols()` and `vcov.fixest()` expect an actual formula object, so we
+  # coerce such calls back to formulas before reusing them.
+  as_formula_if_needed <- function(expr) {
+    if (is.null(expr) || inherits(expr, "formula")) {
+      return(expr)
+    }
+
+    if (!is.call(expr) || length(expr) == 0L) {
+      return(expr)
+    }
+
+    if (!identical(expr[[1]], as.name("~"))) {
+      return(expr)
+    }
+
+    env_linear <- environment(object$fml_all$linear)
+    if (is.null(env_linear)) {
+      env_linear <- parent.frame()
+    }
+
+    as.formula(expr, env = env_linear)
+  }
   vcov_provided <- !missing(vcov) && !is.null(vcov)
   vcov_arg_to_pass <- NULL
 
@@ -182,6 +207,8 @@
     vcov_arg_to_pass <- "iid"
   }
 
+  vcov_arg_to_pass <- as_formula_if_needed(vcov_arg_to_pass)
+
   # Run the AR regression - inherit settings from original object where possible
   # Prefer to reuse an original call (provided via `orig_call` or from the
   # model object) so we preserve options like weights, vcov, and other args.

From 3e110715bcebd087346786876e54f8fff1929aba Mon Sep 17 00:00:00 2001
From: Dianyi Yang <dianyi.yang@politics.ox.ac.uk>
Date: Sat, 29 Nov 2025 09:25:40 +0000
Subject: [PATCH 15/20] Simplify vcov passing

---
 R/ar_test.R | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/R/ar_test.R b/R/ar_test.R
index 8054f71d..a879a30a 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -192,34 +192,22 @@
     as.formula(expr, env = env_linear)
   }
   vcov_provided <- !missing(vcov) && !is.null(vcov)
-  vcov_arg_to_pass <- NULL
-
-  if (vcov_provided) {
-    vcov_arg_to_pass <- vcov
-  } else if (!is.null(object$call) && !is.null(object$call$vcov)) {
-    # Use the original call's vcov expression if available
-    vcov_arg_to_pass <- object$call$vcov
-  } else if (!is.null(object$cov.scaled)) {
-    # Fall back to the canonical name if available (but not the descriptive attr)
-    # Use 'iid' as final fallback
-    vcov_arg_to_pass <- "iid"
-  } else {
-    vcov_arg_to_pass <- "iid"
-  }
-
-  vcov_arg_to_pass <- as_formula_if_needed(vcov_arg_to_pass)
 
   # Run the AR regression - inherit settings from original object where possible
   # Prefer to reuse an original call (provided via `orig_call` or from the
   # model object) so we preserve options like weights, vcov, and other args.
-  if (is.null(orig_call)) orig_call <- object$call
+  if (is.null(orig_call)) {
+    orig_call <- object$call
+  }
 
   if (is.call(orig_call) && length(orig_call) > 0) {
     call_to_eval <- as.call(orig_call)
     call_to_eval[[1]] <- as.name("feols")
     call_to_eval$fml <- new_fml
     call_to_eval$data <- as.name("data")
-    call_to_eval$vcov <- vcov_arg_to_pass
+    if (vcov_provided) {
+      call_to_eval$vcov <- vcov
+    }
     call_to_eval$notes <- FALSE
 
     dots <- list(...)
@@ -235,14 +223,13 @@
       new_fml,
       data = data,
       weights = weights_val,
-      vcov = vcov_arg_to_pass,
       notes = FALSE,
       ...
     )
   }
 
   # Summarize to ensure we have the VCOV
-  ar_fit_sum <- summary(ar_fit, vcov = vcov_arg_to_pass, ...)
+  ar_fit_sum <- summary(ar_fit, ...)
 
   # Get the coefficient names in the AR regression that correspond to instruments
   ar_coef_names <- names(ar_fit_sum$coefficients)

From d8f2dcf798f0fcec784f4878623e1d6573fd151b Mon Sep 17 00:00:00 2001
From: Dianyi Yang <dianyi.yang@politics.ox.ac.uk>
Date: Sat, 29 Nov 2025 09:39:48 +0000
Subject: [PATCH 16/20] no exact CI calculated with fixed effects

---
 R/ar_test.R | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/R/ar_test.R b/R/ar_test.R
index a879a30a..b6fcbbea 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -953,13 +953,19 @@ ar_test = function(
   }
   names(beta0) <- endo_names
 
-  # Determine whether the exact CI is allowed: requires iid, single endo, and
+  # Determine whether the exact CI is allowed: requires iid, single endo, no fixed effects and
   # no extra arguments (weights, clusters, demean, etc.) in the original call.
   extra_args <- .ar_exact_extra_args(object)
-  allow_exact_ci <- length(extra_args) == 0
+  allow_exact_ci <- length(extra_args) == 0 && is.null(object$fixef_id)
 
   # Run the core test (pass original call so internal call can reuse options)
-  core_result <- .ar_test_core(object, beta0 = beta0, vcov = vcov, orig_call = object$call, ...)
+  core_result <- .ar_test_core(
+    object,
+    beta0 = beta0,
+    vcov = vcov,
+    orig_call = object$call,
+    ...
+  )
 
   # Build the result object
   res <- list(

From eb945e531ae32b86f043a11274171503830485e9 Mon Sep 17 00:00:00 2001
From: Dianyi Yang <dianyi.yang@politics.ox.ac.uk>
Date: Sat, 29 Nov 2025 09:41:18 +0000
Subject: [PATCH 17/20] update docs

---
 R/ar_test.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/ar_test.R b/R/ar_test.R
index b6fcbbea..872575a9 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -837,10 +837,10 @@
 #' F-distribution. For non-iid errors (heteroskedastic, clustered, etc.),
 #' a Chi-squared asymptotic distribution is used.
 #'
-#' When `ci = TRUE` (or by default for iid with single endo var):
-#' - For iid errors with a single endogenous variable, an exact closed-form
+#' When `ci = TRUE` (or by default for iid with single endo var and no fixed effects),:
+#' - For iid errors with a single endogenous variable and no fixed effects, an exact closed-form
 #'   AR confidence interval is computed using the quadratic formula.
-#' - For non-iid errors, the CI is computed by numerically inverting the AR test.
+#' - For non-iid errors with a single endogenous variable, the CI is computed by numerically inverting the AR test.
 #'
 #' @return
 #' An object of class `fixest_ar` containing:

From fc1b63f9bb876324d7dbdf9ef9cce7f0998e910a Mon Sep 17 00:00:00 2001
From: Dianyi Yang <dianyi.yang@politics.ox.ac.uk>
Date: Sat, 29 Nov 2025 09:50:54 +0000
Subject: [PATCH 18/20] Further refine doc

---
 R/ar_test.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/ar_test.R b/R/ar_test.R
index 872575a9..7fafbca1 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -812,10 +812,10 @@
 #'   optional confidence interval. Default is 0.95.
 #' @param ci Logical, NULL, or the string "numeric". Whether to compute an AR confidence interval.
 #'   If `NULL` (default):
-#'   - Returns CI if vcov is "iid" AND there is exactly one endogenous variable
+#'   - Returns CI if vcov is "iid" AND there is exactly one endogenous variable with no fixed effects
 #'   - Does not return CI otherwise
 #'   If `TRUE`: Attempts to compute CI (warns if not possible with multiple endo vars)
-#'   If `"numeric"` (the string): Forces numeric inversion of the AR test to compute the confidence interval, even if the exact method is not available.
+#'   If `"numeric"` (the string): Forces numeric inversion of the AR test to compute the confidence interval, even if the exact method is available.
 #'   If `FALSE`: Does not compute CI
 #' @param ... Additional arguments passed to [`summary.fixest`].
 #'

From 4ee3a95aab4cabf37dd0081a4beeff038e24fc5b Mon Sep 17 00:00:00 2001
From: Dianyi Yang <dianyi.yang@politics.ox.ac.uk>
Date: Sat, 29 Nov 2025 09:53:13 +0000
Subject: [PATCH 19/20] Update markdowns and description

---
 DESCRIPTION                    |   2 +-
 man/ar_confint.Rd              |  80 +++++++++++++++++++++
 man/ar_test.Rd                 | 123 +++++++++++++++++++++++++++++++++
 man/dot-ar_ci_exact_iid.Rd     |  28 ++++++++
 man/dot-ar_ci_numeric.Rd       |  41 +++++++++++
 man/dot-ar_exact_extra_args.Rd |  12 ++++
 man/dot-ar_test_core.Rd        |  35 ++++++++++
 man/dot-print_ar_ci.Rd         |  12 ++++
 8 files changed, 332 insertions(+), 1 deletion(-)
 create mode 100644 man/ar_confint.Rd
 create mode 100644 man/ar_test.Rd
 create mode 100644 man/dot-ar_ci_exact_iid.Rd
 create mode 100644 man/dot-ar_ci_numeric.Rd
 create mode 100644 man/dot-ar_exact_extra_args.Rd
 create mode 100644 man/dot-ar_test_core.Rd
 create mode 100644 man/dot-print_ar_ci.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index dff264b4..51a7f2ed 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -35,6 +35,6 @@ BugReports: https://github.com/lrberge/fixest/issues
 URL: https://lrberge.github.io/fixest/, https://github.com/lrberge/fixest
 VignetteBuilder: knitr
 LazyData: true
-RoxygenNote: 7.3.2.9000
+RoxygenNote: 7.3.3
 Roxygen: list(markdown = TRUE)
 Encoding: UTF-8
diff --git a/man/ar_confint.Rd b/man/ar_confint.Rd
new file mode 100644
index 00000000..eaa320f8
--- /dev/null
+++ b/man/ar_confint.Rd
@@ -0,0 +1,80 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ar_test.R
+\name{ar_confint}
+\alias{ar_confint}
+\alias{print.fixest_ar_confint}
+\title{Anderson-Rubin confidence interval for IV models}
+\usage{
+ar_confint(object, level = 0.95, vcov = NULL, ...)
+
+\method{print}{fixest_ar_confint}(x, ...)
+}
+\arguments{
+\item{object}{A \code{fixest} object obtained from \code{\link{feols}} with an IV specification.}
+
+\item{level}{Numeric scalar between 0 and 1. The confidence level. Default is 0.95.}
+
+\item{vcov}{Versatile argument to specify the VCOV. See \code{\link{ar_test}} for details.}
+
+\item{...}{Additional arguments passed to \code{\link{ar_test}}.}
+}
+\value{
+An object of class \code{fixest_ar_confint} containing:
+\item{level}{The confidence level used.}
+\item{method}{"AR".}
+\item{exact}{Logical, TRUE if exact closed-form was used.}
+\item{intervals}{A matrix with columns "lower" and "upper" giving the
+confidence interval(s). Each row is a separate interval (in case of
+disconnected regions).}
+\item{endo_name}{The name of the endogenous variable.}
+\item{point_est}{The 2SLS point estimate.}
+\item{model}{The original \code{feols} IV object.}
+}
+\description{
+Computes Anderson-Rubin confidence intervals by inverting the AR test.
+These confidence intervals are robust to weak instruments.
+
+Note: This function is provided for backward compatibility. The recommended
+approach is to use \code{ar_test(object, ci = TRUE)} which integrates the test
+and confidence interval computation.
+}
+\details{
+This function only supports models with a single endogenous variable. For models
+with multiple endogenous variables, the confidence region is multi-dimensional
+and cannot be easily represented.
+
+For iid errors, an exact closed-form confidence interval is computed.
+For non-iid errors (clustered, heteroskedastic, etc.), the confidence interval
+is computed by numerically inverting the AR test.
+
+Note that AR confidence intervals may be:
+\itemize{
+\item Empty (if the model is severely misspecified)
+\item Unbounded (if instruments are very weak)
+\item Disconnected (consisting of multiple disjoint intervals)
+}
+}
+\examples{
+# Simple IV example
+set.seed(123)
+n <- 500
+z <- rnorm(n)
+x <- 0.5 * z + rnorm(n)
+y <- 1 + 0.8 * x + rnorm(n)
+data <- data.frame(y = y, x = x, z = z)
+
+# IV estimation
+est <- feols(y ~ 1 | x ~ z, data = data)
+
+# AR confidence interval (exact for iid)
+ar_confint(est)
+
+# With clustered SEs (numeric inversion)
+data$cl <- rep(1:50, each = 10)
+est_cl <- feols(y ~ 1 | x ~ z, data = data, vcov = ~cl)
+ar_confint(est_cl)
+
+}
+\seealso{
+\code{\link{ar_test}} for the Anderson-Rubin test with integrated CI computation.
+}
diff --git a/man/ar_test.Rd b/man/ar_test.Rd
new file mode 100644
index 00000000..89aea3ab
--- /dev/null
+++ b/man/ar_test.Rd
@@ -0,0 +1,123 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ar_test.R
+\name{ar_test}
+\alias{ar_test}
+\alias{print.fixest_ar}
+\title{Anderson-Rubin test for IV models}
+\usage{
+ar_test(object, beta0 = NULL, vcov = NULL, level = 0.95, ci = NULL, ...)
+
+\method{print}{fixest_ar}(x, ...)
+}
+\arguments{
+\item{object}{A \code{fixest} object obtained from \code{\link{feols}} with an IV specification
+(i.e., formula includes \verb{| endo ~ inst}).}
+
+\item{beta0}{Numeric vector. The hypothesized values for the endogenous variable
+coefficients under the null hypothesis. If \code{NULL} (default), tests
+H0: beta = 0 for all endogenous variables. The length must match the number
+of endogenous variables in the model.}
+
+\item{vcov}{Versatile argument to specify the VCOV. If \code{NULL} (default), uses
+the same VCOV specification as the original model. See \code{\link{vcov.fixest}} for
+details on available options.}
+
+\item{level}{Numeric scalar between 0 and 1. The confidence level for the
+optional confidence interval. Default is 0.95.}
+
+\item{ci}{Logical, NULL, or the string "numeric". Whether to compute an AR confidence interval.
+If \code{NULL} (default):
+\itemize{
+\item Returns CI if vcov is "iid" AND there is exactly one endogenous variable with no fixed effects
+\item Does not return CI otherwise
+If \code{TRUE}: Attempts to compute CI (warns if not possible with multiple endo vars)
+If \code{"numeric"} (the string): Forces numeric inversion of the AR test to compute the confidence interval, even if the exact method is available.
+If \code{FALSE}: Does not compute CI
+}}
+
+\item{...}{Additional arguments passed to \code{\link{summary.fixest}}.}
+}
+\value{
+An object of class \code{fixest_ar} containing:
+\item{beta0}{The hypothesized coefficient values tested.}
+\item{stat}{The AR test statistic.}
+\item{dist}{Distribution used: "F" or "Chi-squared".}
+\item{df1}{Numerator degrees of freedom.}
+\item{df2}{Denominator degrees of freedom (NA for Chi-squared).}
+\item{pvalue}{The p-value of the test.}
+\item{method}{Character string: "Anderson-Rubin test".}
+\item{vcov_type}{The type of VCOV used for the test.}
+\item{endo_names}{Names of the endogenous variables.}
+\item{inst_names}{Names of the instruments.}
+\item{model}{The original \code{feols} IV object.}
+\item{ci}{(Optional) A list containing the AR confidence interval, with elements:
+\code{method}, \code{exact}, \code{level}, \code{intervals}.}
+}
+\description{
+Performs the Anderson-Rubin test for instrumental variable (IV) models.
+This test is robust to weak instruments and can be used to test hypotheses
+about the coefficients of endogenous variables.
+}
+\details{
+The Anderson-Rubin (AR) test is a weak-instrument robust test for the null
+hypothesis H0: beta = beta0, where beta is the vector of coefficients on the
+endogenous variables.
+
+The test proceeds as follows:
+\enumerate{
+\item Construct the transformed outcome: y* = y - X * beta0, where X contains
+the endogenous variables.
+\item Regress y* on the exogenous controls (W), instruments (Z), and any
+fixed effects, using the same VCOV specification as the original model.
+\item Test the joint significance of the instrument coefficients (H0: pi = 0).
+}
+
+For iid (homoskedastic) errors, the AR test statistic follows an exact
+F-distribution. For non-iid errors (heteroskedastic, clustered, etc.),
+a Chi-squared asymptotic distribution is used.
+
+When \code{ci = TRUE} (or by default for iid with single endo var and no fixed effects),:
+\itemize{
+\item For iid errors with a single endogenous variable and no fixed effects, an exact closed-form
+AR confidence interval is computed using the quadratic formula.
+\item For non-iid errors with a single endogenous variable, the CI is computed by numerically inverting the AR test.
+}
+}
+\examples{
+# Simple IV example
+set.seed(123)
+n <- 1000
+z <- rnorm(n)
+x <- 0.5 * z + rnorm(n)
+y <- 1 + 0.8 * x + rnorm(n)
+data <- data.frame(y = y, x = x, z = z)
+
+# IV estimation
+est <- feols(y ~ 1 | x ~ z, data = data)
+
+# Test H0: beta = 0 (default) - includes CI since vcov is iid
+ar_test(est)
+
+# Test H0: beta = 0.8 (close to true value, should not reject)
+ar_test(est, beta0 = 0.8)
+
+# Test H0: beta = 0 (far from true value, should reject)
+ar_test(est, beta0 = 0)
+
+# With clustered standard errors (Chi-squared distribution)
+data$cl <- rep(1:100, each = 10)
+est_cl <- feols(y ~ 1 | x ~ z, data = data, vcov = ~cl)
+ar_test(est_cl)
+
+# Force CI computation even for clustered SEs
+ar_test(est_cl, ci = TRUE)
+
+}
+\references{
+Anderson, T. W. and Rubin, H. (1949). "Estimation of the Parameters of a Single
+Equation in a Complete System of Stochastic Equations." Annals of Mathematical
+Statistics, 20(1), 46-63.
+}
+\seealso{
+\code{\link{wald}} for standard Wald tests, \code{\link{feols}} for IV estimation.
+}
diff --git a/man/dot-ar_ci_exact_iid.Rd b/man/dot-ar_ci_exact_iid.Rd
new file mode 100644
index 00000000..4e494baa
--- /dev/null
+++ b/man/dot-ar_ci_exact_iid.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ar_test.R
+\name{.ar_ci_exact_iid}
+\alias{.ar_ci_exact_iid}
+\title{Compute exact AR confidence interval for iid errors (single endogenous variable)}
+\usage{
+.ar_ci_exact_iid(object, level = 0.95)
+}
+\arguments{
+\item{object}{A \code{fixest} object with IV and single endogenous variable.}
+
+\item{level}{Confidence level (e.g., 0.95).}
+}
+\value{
+A list with:
+\item{method}{"AR"}
+\item{exact}{TRUE}
+\item{level}{The confidence level}
+\item{intervals}{Matrix with columns "lower" and "upper"}
+}
+\description{
+Uses the closed-form quadratic formula for AR confidence sets when:
+\itemize{
+\item There is exactly one endogenous variable
+\item The vcov is iid (homoskedastic errors)
+}
+}
+\keyword{internal}
diff --git a/man/dot-ar_ci_numeric.Rd b/man/dot-ar_ci_numeric.Rd
new file mode 100644
index 00000000..60212feb
--- /dev/null
+++ b/man/dot-ar_ci_numeric.Rd
@@ -0,0 +1,41 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ar_test.R
+\name{.ar_ci_numeric}
+\alias{.ar_ci_numeric}
+\title{Compute numeric AR confidence interval by test inversion}
+\usage{
+.ar_ci_numeric(
+  object,
+  level = 0.95,
+  vcov = NULL,
+  n_grid = 200,
+  tol = 1e-08,
+  orig_call = NULL,
+  ...
+)
+}
+\arguments{
+\item{object}{A \code{fixest} object with IV.}
+
+\item{level}{Confidence level (e.g., 0.95).}
+
+\item{vcov}{VCOV specification.}
+
+\item{n_grid}{Number of coarse grid points. Default 200.}
+
+\item{tol}{Tolerance for root finding. Default 1e-8.}
+
+\item{...}{Additional arguments.}
+}
+\value{
+A list with:
+\item{method}{"AR"}
+\item{exact}{FALSE}
+\item{level}{The confidence level}
+\item{intervals}{Matrix with columns "lower" and "upper"}
+}
+\description{
+Inverts the AR test numerically to find the confidence set.
+Used when vcov is not iid or when exact computation is not possible.
+}
+\keyword{internal}
diff --git a/man/dot-ar_exact_extra_args.Rd b/man/dot-ar_exact_extra_args.Rd
new file mode 100644
index 00000000..006489a2
--- /dev/null
+++ b/man/dot-ar_exact_extra_args.Rd
@@ -0,0 +1,12 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ar_test.R
+\name{.ar_exact_extra_args}
+\alias{.ar_exact_extra_args}
+\title{Identify additional arguments beyond those supported by the exact AR CI}
+\usage{
+.ar_exact_extra_args(object)
+}
+\description{
+Identify additional arguments beyond those supported by the exact AR CI
+}
+\keyword{internal}
diff --git a/man/dot-ar_test_core.Rd b/man/dot-ar_test_core.Rd
new file mode 100644
index 00000000..270611e4
--- /dev/null
+++ b/man/dot-ar_test_core.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ar_test.R
+\name{.ar_test_core}
+\alias{.ar_test_core}
+\title{Internal core function for Anderson-Rubin test}
+\usage{
+.ar_test_core(object, beta0, vcov = NULL, orig_call = NULL, ...)
+}
+\arguments{
+\item{object}{A \code{fixest} object obtained from \code{\link{feols}} with an IV specification.}
+
+\item{beta0}{Numeric vector. The hypothesized values for the endogenous variable
+coefficients under the null hypothesis.}
+
+\item{vcov}{Versatile argument to specify the VCOV.}
+
+\item{...}{Additional arguments passed to \code{\link{feols}}.}
+}
+\value{
+A list containing:
+\item{stat}{The test statistic (F or Chi-squared depending on vcov).}
+\item{df1}{Numerator degrees of freedom.}
+\item{df2}{Denominator degrees of freedom (NA for Chi-squared).}
+\item{pvalue}{The p-value.}
+\item{dist}{Distribution used: "F" or "Chi-squared".}
+\item{vcov_type}{The VCOV type used.}
+\item{endo_names}{Names of endogenous variables.}
+\item{inst_names}{Names of instruments.}
+\item{inst_tested}{Instrument coefficients actually tested.}
+}
+\description{
+This is an internal function that computes the AR test statistic and p-value.
+It is called by both ar_test() and ar_confint().
+}
+\keyword{internal}
diff --git a/man/dot-print_ar_ci.Rd b/man/dot-print_ar_ci.Rd
new file mode 100644
index 00000000..90534376
--- /dev/null
+++ b/man/dot-print_ar_ci.Rd
@@ -0,0 +1,12 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ar_test.R
+\name{.print_ar_ci}
+\alias{.print_ar_ci}
+\title{Internal function to print AR confidence interval}
+\usage{
+.print_ar_ci(ci, endo_name)
+}
+\description{
+Internal function to print AR confidence interval
+}
+\keyword{internal}

From 8b489af477ef5b730d090c53d219700d5f73bce9 Mon Sep 17 00:00:00 2001
From: Copilot <198982749+Copilot@users.noreply.github.com>
Date: Sat, 29 Nov 2025 19:23:48 +0000
Subject: [PATCH 20/20] Fix zero weight problem (#5)

Fix obs_selection handling in AR test functions
* Initial plan

* Apply obs_selection filtering in .ar_test_core and .ar_ci_exact_iid

Co-authored-by: kv9898 <105025148+kv9898@users.noreply.github.com>

* pass weights to feols
---
 R/ar_test.R | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/R/ar_test.R b/R/ar_test.R
index 7fafbca1..cad5d4c7 100644
--- a/R/ar_test.R
+++ b/R/ar_test.R
@@ -88,6 +88,15 @@
   # Fetch the data
   data <- fetch_data(object, "To apply AR test, ")
 
+  # Apply obs_selection to filter data to the estimation sample
+  # obs_selection may contain indices for subset selection and/or 
+  # negative indices for removed observations (obsRemoved)
+  if (length(object$obs_selection) > 0) {
+    for (obs in object$obs_selection) {
+      data <- data[obs, , drop = FALSE]
+    }
+  }
+
   # Get the dependent variable
   fml_linear <- formula(object, type = "linear")
   y_vec <- eval(fml_linear[[2]], data)
@@ -205,6 +214,7 @@
     call_to_eval[[1]] <- as.name("feols")
     call_to_eval$fml <- new_fml
     call_to_eval$data <- as.name("data")
+    call_to_eval$weights <- as.name("weights_val")
     if (vcov_provided) {
       call_to_eval$vcov <- vcov
     }
@@ -324,6 +334,13 @@
   # Fetch data
   data <- fetch_data(object, "To compute AR CI, ")
 
+  # Apply obs_selection to filter data to the estimation sample
+  if (length(object$obs_selection) > 0) {
+    for (obs in object$obs_selection) {
+      data <- data[obs, , drop = FALSE]
+    }
+  }
+
   # Get y, endogenous variable, and instruments
   fml_linear <- formula(object, type = "linear")
   y <- eval(fml_linear[[2]], data)