Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions ONGOING_ISSUES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

So far `fixest` objects are compatible with variances computed with the package `sandwich` with the following exceptions:

- `vcovHC` with `type = "HC4"`: The compatibility is only partial, i.e. it works for models without fixed-effects but breaks with fixed-effects estimations. This is because the hatvalues, needed for this kind of VCOV correction, are not defined when fixed-effects are used (actually I could make it compatible but the model would need to be reestimated with dummy variables, and this hardly squares with the idea of efficient fixed-effects estimation).

- `vcovBS`, i.e. bootstraped standard-errors: Absolutely not compatible so far, but it will become compatible later (hopefully).


Expand Down
2 changes: 1 addition & 1 deletion R/r-language.R
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ fml_split_internal = function(fml, split.lhs = FALSE){


fml_split = function(fml, i, split.lhs = FALSE, text = FALSE, raw = FALSE){
# I had to create that fonction to cope with the following formula:
# I had to create that function to cope with the following formula:
#
# y ~ x1 | fe1 | u ~ z
#
Expand Down
172 changes: 115 additions & 57 deletions R/sparse_model_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,18 +111,23 @@ sparse_model_matrix = function(object, data, type = "rhs", sample = "estimation"

# Allows passage of formula to sparse_model_matrix. A bit inefficient, but it works.
isFormula = FALSE
split_fml = NULL
if (inherits(object, "formula")) {
split_fml = fml_split_internal(object)
fml_parts = get_fml_parts(object, parts_as_formula = TRUE)
if (isTRUE(collin.rm)) {
message("Formula passed to sparse_model_matrix with collin.rm == TRUE. Estimating feols with formula.")
object = feols(object, data = data)
} else if (length(split_fml) == 3) {
} else if (!is.null(fml_parts$inst_fml)) {
message("Formula passed to sparse_model_matrix with an iv formula. Estimating feols with formula.")
object = feols(object, data = data)
} else {
isFormula = TRUE
}
}

# na.rm = FALSE doesn't work with type = "fixef" (which FE col gets NA?)
if (("fixef" %in% type & !na.rm)) {
# na.rm = TRUE
message("na.rm = FALSE doesn't work with type = 'fixef'. It has been set to TRUE.")
}

# Panel setup
Expand All @@ -132,45 +137,26 @@ sparse_model_matrix = function(object, data, type = "rhs", sample = "estimation"

# The formulas
if (isFormula) {
fml_linear = split_fml[[1]]

fml_0 = attr(stats::terms(fml_linear), "intercept") == 0
fake_intercept = !is.null(split_fml[[2]]) | fml_0
fml_parts = get_fml_parts(object, parts_as_formula = TRUE)
} else {
fml_linear = formula(object, type = "linear")

# we kick out the intercept if there is presence of fixed-effects
fml_0 = attr(stats::terms(fml_linear), "intercept") == 0
fake_intercept = !is.null(object$fixef_vars) && !(!is.null(object$slope_flag) && all(object$slope_flag < 0))
fake_intercept = fake_intercept | fml_0
}

fml_parts = get_fml_parts(formula(object), parts_as_formula = TRUE)
}
fml_0 = attr(stats::terms(fml_parts$exo_fml), "intercept") == 0
# we kick out the intercept if there is presence of fixed-effects
fake_intercept = !is.null(fml_parts$fe_fml) | fml_0
res = list()

if ("lhs" %in% type) {
lhs_text = deparse_long(fml_linear[[2]])
lhs = eval(fml_linear[[2]], data)
lhs = eval(fml_parts$y_fml[[2]], data)
lhs = Matrix::Matrix(lhs, sparse = TRUE, ncol = 1)

colnames(lhs) = lhs_text
colnames(lhs) = deparse_long(fml_parts$y_fml[[2]])

res[["lhs"]] = lhs
}

if ("rhs" %in% type && !isTRUE(object$onlyFixef)) {

fml = fml_linear

if (isFormula && (length(split_fml) == 3)) {
fml_iv = split_fml[[3]]
fml = .xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]])
} else if (isTRUE(object$iv)) {
fml_iv = object$fml_all$iv
fml = .xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]])
}

fml = .xpd(~ ..endo + ..rhs, ..rhs = fml_parts$exo_fml, ..endo = fml_parts$endo_fml)
vars = attr(stats::terms(fml), "term.labels")

linear.mat = vars_to_sparse_mat(vars = vars, data = data, object = object,
collin.rm = collin.rm, type = "rhs",
add_intercept = !fake_intercept)
Expand All @@ -180,18 +166,10 @@ sparse_model_matrix = function(object, data, type = "rhs", sample = "estimation"

if ("fixef" %in% type) {

if (isFormula && (length(split_fml) < 2)) {
mat_FE = NULL
} else if (!isFormula & length(object$fixef_id) == 0) {
if (is.null(fml_parts$fe_fml)) {
mat_FE = NULL
} else {

if (isFormula) {
fixef_fml = .xpd(~ ..fe, ..fe = split_fml[[2]])
} else {
fixef_fml = object$fml_all$fixef
}

fixef_fml = .xpd(~ ..fe, ..fe = fml_parts$fe_fml)
fixef_terms_full = fixef_terms(fixef_fml)
fe_vars = fixef_terms_full$fe_vars
slope_var_list = fixef_terms_full$slope_vars_list
Expand Down Expand Up @@ -334,7 +312,7 @@ sparse_model_matrix = function(object, data, type = "rhs", sample = "estimation"
# IV
#
if ("iv.endo" %in% type) {
fml = object$iv_endo_fml
fml = .xpd(~ ..endo, ..endo = fml_parts$endo_fml)
vars = attr(stats::terms(fml), "term.labels")
endo.mat = vars_to_sparse_mat(vars = vars, data = data, object,
collin.rm = collin.rm, type = "iv.endo")
Expand All @@ -343,7 +321,7 @@ sparse_model_matrix = function(object, data, type = "rhs", sample = "estimation"
}

if ("iv.inst" %in% type) {
fml = object$fml_all$iv
fml = .xpd(~ ..inst, ..inst = fml_parts$inst_fml)
vars = attr(stats::terms(fml), "term.labels")
inst.mat = vars_to_sparse_mat(vars = vars, data = data, object,
collin.rm = collin.rm, type = "iv.inst")
Expand All @@ -352,8 +330,7 @@ sparse_model_matrix = function(object, data, type = "rhs", sample = "estimation"
}

if ("iv.exo" %in% type) {

fml = object$fml_all$linear
fml = .xpd(~ ..exo, ..exo = fml_parts$exo_fml)
vars = attr(stats::terms(fml), "term.labels")
exo.mat = vars_to_sparse_mat(vars = vars, data = data, object = object,
collin.rm = collin.rm, type = "iv.exo",
Expand All @@ -363,23 +340,18 @@ sparse_model_matrix = function(object, data, type = "rhs", sample = "estimation"
}

if ("iv.rhs1" %in% type) {
fml = object$fml
if (object$iv_stage == 2) {
fml_iv = object$fml_all$iv
fml = .xpd(..lhs ~ ..inst + ..rhs, ..lhs = fml[[2]], ..inst = fml_iv[[3]], ..rhs = fml[[3]])
}

fml = .xpd(~ ..inst + ..rhs, ..rhs = fml_parts$exo_fml, ..inst = fml_parts$inst_fml)
vars = attr(stats::terms(fml), "term.labels")
iv_rhs1 = vars_to_sparse_mat(vars = vars, data = data, object = object,
collin.rm = collin.rm, type = "iv.rhs1",
add_intercept = !fake_intercept)
collin.rm = collin.rm, type = "iv.rhs1",
add_intercept = !fake_intercept)

res[["iv.rhs1"]] = iv_rhs1
}

if ("iv.rhs2" %in% type) {
# Second stage
if (!object$iv_stage == 2) {
if (!isFormula && !object$iv_stage == 2) {
stop("In model.matrix, the type 'iv.rhs2' is only valid for second stage IV models. This estimation is the first stage.")
}

Expand All @@ -394,7 +366,7 @@ sparse_model_matrix = function(object, data, type = "rhs", sample = "estimation"

# II) we create the variables
fml = object$fml
fml = .xpd(..lhs ~ ..fit + ..rhs, ..lhs = fml[[2]], ..fit = fit_vars, ..rhs = fml[[3]])
fml = .xpd(~ ..fit + ..rhs, ..fit = fit_vars, ..rhs = fml_parts$exo_fml)
vars = attr(stats::terms(fml), "term.labels")
iv_rhs2 = vars_to_sparse_mat(vars = vars, data = data, object = object,
collin.rm = collin.rm, type = "iv.rhs2",
Expand Down Expand Up @@ -700,7 +672,7 @@ vars_to_sparse_mat = function(vars, data, collin.rm = FALSE, object = NULL,

exo = lapply(object$iv_first_stage, function(x) names(stats::coef(x)))
exo = unique(unlist(exo, use.names = FALSE))

inst = setdiff(exo, names(object$coefficients))
}

Expand Down Expand Up @@ -733,7 +705,6 @@ vars_to_sparse_mat = function(vars, data, collin.rm = FALSE, object = NULL,
coef_names = names(object$coefficients)
add_intercept = "(Intercept)" %in% coef_names
}

keep = colnames(mat) %in% coef_names
if (length(keep) == 0) {
mat = NULL
Expand All @@ -751,3 +722,90 @@ vars_to_sparse_mat = function(vars, data, collin.rm = FALSE, object = NULL,

return(mat)
}

get_fml_parts <- function(formula, parts_as_formula = FALSE) {
has_lhs <- !is_rhs_only(formula)
fml_split_tilde <- sm_fml_breaker(formula, "~")

res <- list(
y_fml = NULL,
exo_fml = NULL,
fe_fml = NULL,
endo_fml = NULL,
inst_fml = NULL
)

# LHS
if (has_lhs) {
res$y_fml <- fml_split_tilde[[length(fml_split_tilde)]]
# Drop y and treat everything as RHS only formula
fml_split_tilde <- fml_split_tilde[-length(fml_split_tilde)]
}

# OLS
if (length(fml_split_tilde) == 1) {
fml_split <- sm_fml_breaker(fml_split_tilde[[1]], "|")
if (length(fml_split) == 2) {
res$exo_fml <- fml_split[[2]]
res$fe_fml <- fml_split[[1]]
} else {
res$exo_fml <- fml_split[[1]]
}
}

# IV
if (length(fml_split_tilde) == 2) {
res$inst_fml <- fml_split_tilde[[1]]

# This works b/c we know there is an IV
fml_W_T_split <- sm_fml_breaker(fml_split_tilde[[2]], "|")
if (length(fml_W_T_split) == 3) {
res$endo_fml <- fml_W_T_split[[1]]
res$fe_fml <- fml_W_T_split[[2]]
res$exo_fml <- fml_W_T_split[[3]]
} else {
res$endo_fml <- fml_W_T_split[[1]]
res$exo_fml <- fml_W_T_split[[2]]
}
}

if (parts_as_formula) {
res = lapply(res, function(rhs) {
if (is.null(rhs)) return(NULL)
fml <- ~.
fml[[2]] <- rhs
return(fml)
})
}
return(res)
}

is_rhs_only <- function(fml) {
# e.g. fml = ~ x
if (length(fml) == 2) {
return(TRUE)
}
# e.g. fml = ~ x | t ~ z
if (length(fml[[2]]) == 2) {
return(TRUE)
}
return(FALSE)
}

# fml_breaker but works with RHS only
sm_fml_breaker <- function(fml, op) {
res <- list()
k <- 1
while (is_operator(fml, op) & length(fml) == 3) {
res[[k]] <- fml[[3]]
k <- k + 1
fml <- fml[[2]]
}

if (length(fml) == 2) {
res[[k]] <- fml[[2]]
} else {
res[[k]] <- fml
}
res
}
10 changes: 9 additions & 1 deletion tests/fixest_tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -2579,6 +2579,8 @@ test(names(m_lhs_rhs_fixef), c("y1", "fit_x2", "x1", "species"))
#### sparse_model_matrix ####
####

chunk("Sparse model matrix")

# unit tests: i(..., sparse = TRUE)
x <- c(1, 1, 3, 1, 3)
sp1 <- i(x, sparse = TRUE)
Expand Down Expand Up @@ -2712,6 +2714,7 @@ test(nrow(sm_lag), nobs(res_lag))
# Interacted fixef
res = feols(y1 ~ x1 + x2 + x3 | species^fe2, base)
sm_ife = sparse_model_matrix(res, data = base, type = "fixef", collin.rm = FALSE)
test(ncol(sm_ife), 45)

# fixef
res = feols(y1 ~ x1 + x2 + x3 | species + fe2, base)
Expand Down Expand Up @@ -2979,8 +2982,10 @@ names(base) = c("y", "x1", "x_endo_1", "x_inst_1", "fe")
set.seed(2)
base$x_inst_2 = 0.2 * base$y + 0.2 * base$x_endo_1 + rnorm(150, sd = 0.5)
base$x_endo_2 = 0.2 * base$y - 0.2 * base$x_inst_1 + rnorm(150, sd = 0.5)
base$w = sample(c(0.5, 0.25), nrow(base), replace = TRUE)
base$cl_var = rep(1:50, 3) # using `fe` to cluster causes a vcov PSD warning


# Checking a basic estimation
est_iv = feols(y ~ x1 | x_endo_1 + x_endo_2 ~ x_inst_1 + x_inst_2, base)

Expand All @@ -2991,9 +2996,12 @@ est_iv_uneven = feols(y ~ x1 | x_endo_1 ~ x_inst_1 + x_inst_2, base, cluster = "
fitstat(est_iv_uneven, ~ f + ivf + ivf2 + wald + ivwald + ivwald2 + wh + sargan + kpr + rmse + g + n + ll + sq.cor + r2)

est_fe = feols(y ~ x1 | fe, base)

fitstat(est_fe, ~ wf)

# https://github.com/lrberge/fixest/issues/584
est_weighted = feols(y ~ x1 | fe, base, weights = ~w)
fitstat(est_weighted, ~ rmse)

# fitstat works with `split` and `lean` (https://github.com/lrberge/fixest/issues/566)
est_split = feols(y ~ x1, base, fsplit = ~fe)
est_split_lean = feols(y ~ x1, base, fsplit = ~fe, lean = TRUE)
Expand Down
Loading