diff --git a/econml/dml/_rlearner.py b/econml/dml/_rlearner.py index 6eca79aab..2effb3d17 100644 --- a/econml/dml/_rlearner.py +++ b/econml/dml/_rlearner.py @@ -98,8 +98,8 @@ def __init__(self, model_final): def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, freq_weight=None, sample_var=None, groups=None): Y_res, T_res = nuisances - self._model_final.fit(X, T, T_res, Y_res, sample_weight=sample_weight, - freq_weight=freq_weight, sample_var=sample_var) + self._model_final.fit(X, T, T_res, Y_res, **(filter_none_kwargs(sample_weight=sample_weight, + freq_weight=freq_weight, sample_var=sample_var, groups=groups))) return self def predict(self, X=None): diff --git a/econml/iv/dml/_dml.py b/econml/iv/dml/_dml.py index 12e690e34..923502a12 100644 --- a/econml/iv/dml/_dml.py +++ b/econml/iv/dml/_dml.py @@ -157,7 +157,7 @@ def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, XT_res = self._combine(X, T_res) XZ_res = self._combine(X, Z_res) filtered_kwargs = filter_none_kwargs(sample_weight=sample_weight, - freq_weight=freq_weight, sample_var=sample_var) + freq_weight=freq_weight, sample_var=sample_var, groups=groups) self._model_final.fit(XZ_res, XT_res, Y_res, **filtered_kwargs) @@ -376,7 +376,8 @@ def __init__(self, *, mc_iters=None, mc_agg='mean', random_state=None, - allow_missing=False): + allow_missing=False, + cov_type="HC0"): self.model_y_xw = clone(model_y_xw, safe=False) self.model_t_xw = clone(model_t_xw, safe=False) self.model_t_xwz = clone(model_t_xwz, safe=False) @@ -384,6 +385,7 @@ def __init__(self, *, self.projection = projection self.featurizer = clone(featurizer, safe=False) self.fit_cate_intercept = fit_cate_intercept + self.cov_type = cov_type super().__init__(discrete_outcome=discrete_outcome, discrete_instrument=discrete_instrument, @@ -403,7 +405,7 @@ def _gen_featurizer(self): return clone(self.featurizer, safe=False) def _gen_model_final(self): - return StatsModels2SLS(cov_type="HC0") + return StatsModels2SLS(cov_type=self.cov_type) def _gen_ortho_learner_model_final(self): return _OrthoIVModelFinal(self._gen_model_final(), self._gen_featurizer(), self.fit_cate_intercept) diff --git a/econml/sklearn_extensions/linear_model.py b/econml/sklearn_extensions/linear_model.py index 9bc5529f7..2c0c93e9d 100644 --- a/econml/sklearn_extensions/linear_model.py +++ b/econml/sklearn_extensions/linear_model.py @@ -1693,7 +1693,7 @@ class StatsModelsLinearRegression(_StatsModelsWrapper): fit_intercept : bool, default True Whether to fit an intercept in this model cov_type : string, default "HC0" - The covariance approach to use. Supported values are "HCO", "HC1", and "nonrobust". + The covariance approach to use. Supported values are "HC0", "HC1", "nonrobust", and "clustered". enable_federation : bool, default False Whether to enable federation (aggregating this model's results with other models in a distributed setting). This requires additional memory proportional to the number of columns in X to the fourth power. @@ -1704,10 +1704,10 @@ def __init__(self, fit_intercept=True, cov_type="HC0", *, enable_federation=Fals self.fit_intercept = fit_intercept self.enable_federation = enable_federation - def _check_input(self, X, y, sample_weight, freq_weight, sample_var): + def _check_input(self, X, y, sample_weight, freq_weight, sample_var, groups=None): """Check dimensions and other assertions.""" - X, y, sample_weight, freq_weight, sample_var = check_input_arrays( - X, y, sample_weight, freq_weight, sample_var, dtype='numeric') + X, y, sample_weight, freq_weight, sample_var, groups = check_input_arrays( + X, y, sample_weight, freq_weight, sample_var, groups, dtype='numeric') if X is None: X = np.empty((y.shape[0], 0)) if self.fit_intercept: @@ -1720,6 +1720,8 @@ def _check_input(self, X, y, sample_weight, freq_weight, sample_var): freq_weight = np.ones(y.shape[0]) if sample_var is None: sample_var = np.zeros(y.shape) + if groups is None: + groups = np.arange(y.shape[0]) # check freq_weight should be integer and should be accompanied by sample_var if np.any(np.not_equal(np.mod(freq_weight, 1), 0)): @@ -1753,7 +1755,7 @@ def _check_input(self, X, y, sample_weight, freq_weight, sample_var): # check array shape assert (X.shape[0] == y.shape[0] == sample_weight.shape[0] == - freq_weight.shape[0] == sample_var.shape[0]), "Input lengths not compatible!" + freq_weight.shape[0] == sample_var.shape[0] == groups.shape[0]), "Input lengths not compatible!" if y.ndim >= 2: assert (y.ndim == sample_var.ndim and y.shape[1] == sample_var.shape[1]), "Input shapes not compatible: {}, {}!".format( @@ -1767,9 +1769,9 @@ def _check_input(self, X, y, sample_weight, freq_weight, sample_var): else: weighted_y = y * np.sqrt(sample_weight).reshape(-1, 1) sample_var = sample_var * (sample_weight.reshape(-1, 1)) - return weighted_X, weighted_y, freq_weight, sample_var + return weighted_X, weighted_y, freq_weight, sample_var, groups - def fit(self, X, y, sample_weight=None, freq_weight=None, sample_var=None): + def fit(self, X, y, sample_weight=None, freq_weight=None, sample_var=None, groups=None): """ Fits the model. @@ -1788,13 +1790,15 @@ def fit(self, X, y, sample_weight=None, freq_weight=None, sample_var=None): sample_var : {(N,), (N, p)} nd array_like or None Variance of the outcome(s) of the original freq_weight[i] observations that were used to compute the mean outcome represented by observation i. + groups : (N,) array_like or None + Group labels for clustered standard errors. Returns ------- self : StatsModelsLinearRegression """ # TODO: Add other types of covariance estimation (e.g. Newey-West (HAC), HC2, HC3) - X, y, freq_weight, sample_var = self._check_input(X, y, sample_weight, freq_weight, sample_var) + X, y, freq_weight, sample_var, groups = self._check_input(X, y, sample_weight, freq_weight, sample_var, groups) WX = X * np.sqrt(freq_weight).reshape(-1, 1) @@ -1840,6 +1844,8 @@ def fit(self, X, y, sample_weight=None, freq_weight=None, sample_var=None): self.XXXy = np.einsum('nx,ny->yx', WX, wy) self.XXXX = np.einsum('nw,nx->wx', WX, WX) self.sample_var = np.average(sv, weights=freq_weight, axis=0) * n_obs + elif self.cov_type == 'clustered': + raise AttributeError("Clustered standard errors are not supported with federation enabled.") sigma_inv = np.linalg.pinv(self.XX) @@ -1871,8 +1877,10 @@ def fit(self, X, y, sample_weight=None, freq_weight=None, sample_var=None): for j in range(self._n_out): weighted_sigma = np.matmul(WX.T, WX * var_i[:, [j]]) self._var.append(correction * np.matmul(sigma_inv, np.matmul(weighted_sigma, sigma_inv))) + elif (self.cov_type == 'clustered'): + self._var = self._compute_clustered_variance_linear(WX, y - np.matmul(X, param), sigma_inv, groups) else: - raise AttributeError("Unsupported cov_type. Must be one of nonrobust, HC0, HC1.") + raise AttributeError("Unsupported cov_type. Must be one of nonrobust, HC0, HC1, clustered.") self._param_var = np.array(self._var) @@ -1937,7 +1945,6 @@ def aggregate(models: List[StatsModelsLinearRegression]): agg_model._var = correction * np.matmul(sigma_inv, np.matmul(weighted_sigma.squeeze(0), sigma_inv)) else: agg_model._var = [correction * np.matmul(sigma_inv, np.matmul(ws, sigma_inv)) for ws in weighted_sigma] - else: assert agg_model.cov_type == 'nonrobust' or agg_model.cov_type is None sigma = XXyy - 2 * np.einsum('yx,xy->y', XXXy, param) + np.einsum('wx,wy,xy->y', XXXX, param, param) @@ -1954,6 +1961,54 @@ def aggregate(models: List[StatsModelsLinearRegression]): return agg_model + def _compute_clustered_variance_linear(self, WX, eps_i, sigma_inv, groups): + """ + Compute clustered standard errors for linear regression. + + Parameters + ---------- + WX : array_like + Weighted design matrix + eps_i : array_like + Residuals + sigma_inv : array_like + Inverse of X.T @ X + groups : array_like + Group labels for clustering + + Returns + ------- + var : array_like or list + Clustered variance matrix + """ + n, k = WX.shape + group_ids, inverse_idx = np.unique(groups, return_inverse=True) + n_groups = len(group_ids) + + # Group correction factor + group_correction = (n_groups / (n_groups - 1)) + + if eps_i.ndim < 2: + # Single outcome case + WX_e = WX * eps_i.reshape(-1, 1) + group_sums = np.zeros((n_groups, k)) + np.add.at(group_sums, inverse_idx, WX_e) + s = group_sums.T @ group_sums + + return group_correction * np.matmul(sigma_inv, np.matmul(s, sigma_inv)) + else: + # Multiple outcome case + var_list = [] + for j in range(eps_i.shape[1]): + WX_e = WX * eps_i[:, [j]] + group_sums = np.zeros((n_groups, k)) + np.add.at(group_sums, inverse_idx, WX_e) + s = group_sums.T @ group_sums + + var_list.append(group_correction * np.matmul(sigma_inv, np.matmul(s, sigma_inv))) + + return var_list + class StatsModelsRLM(_StatsModelsWrapper): """ @@ -2040,8 +2095,8 @@ class StatsModels2SLS(_StatsModelsWrapper): Parameters ---------- - cov_type : {'HC0', 'HC1', 'nonrobust', or None}, default 'HC0' - Indicates how the covariance matrix is estimated. + cov_type : {'HC0', 'HC1', 'nonrobust', 'clustered', or None}, default 'HC0' + Indicates how the covariance matrix is estimated. 'clustered' requires groups to be provided in fit(). """ def __init__(self, cov_type="HC0"): @@ -2049,14 +2104,19 @@ def __init__(self, cov_type="HC0"): self.cov_type = cov_type return - def _check_input(self, Z, T, y, sample_weight): + def _check_input(self, Z, T, y, sample_weight, groups=None): """Check dimensions and other assertions.""" # set default values for None if sample_weight is None: sample_weight = np.ones(y.shape[0]) + if groups is None: + groups = np.arange(y.shape[0]) + else: + groups = np.asarray(groups) # check array shape - assert (T.shape[0] == Z.shape[0] == y.shape[0] == sample_weight.shape[0]), "Input lengths not compatible!" + assert (T.shape[0] == Z.shape[0] == y.shape[0] == sample_weight.shape[0] == groups.shape[0]), \ + "Input lengths not compatible!" # check dimension of instruments is more than dimension of treatments if Z.shape[1] < T.shape[1]: @@ -2073,9 +2133,9 @@ def _check_input(self, Z, T, y, sample_weight): weighted_y = y * np.sqrt(sample_weight) else: weighted_y = y * np.sqrt(sample_weight).reshape(-1, 1) - return weighted_Z, weighted_T, weighted_y + return weighted_Z, weighted_T, weighted_y, groups - def fit(self, Z, T, y, sample_weight=None, freq_weight=None, sample_var=None): + def fit(self, Z, T, y, sample_weight=None, freq_weight=None, sample_var=None, groups=None): """ Fits the model. @@ -2096,7 +2156,8 @@ def fit(self, Z, T, y, sample_weight=None, freq_weight=None, sample_var=None): sample_var : {(N,), (N, p)} nd array_like or None Variance of the outcome(s) of the original freq_weight[i] observations that were used to compute the mean outcome represented by observation i. - + groups : (N,) array_like or None + Group labels for clustered standard errors. Required when cov_type='clustered'. Returns ------- @@ -2105,7 +2166,7 @@ def fit(self, Z, T, y, sample_weight=None, freq_weight=None, sample_var=None): assert freq_weight is None, "freq_weight is not supported yet for this class!" assert sample_var is None, "sample_var is not supported yet for this class!" - Z, T, y = self._check_input(Z, T, y, sample_weight) + Z, T, y, groups = self._check_input(Z, T, y, sample_weight, groups) self._n_out = 0 if y.ndim < 2 else y.shape[1] @@ -2164,8 +2225,58 @@ def fit(self, Z, T, y, sample_weight=None, freq_weight=None, sample_var=None): weighted_sigma = np.matmul(that.T, that * var_i[:, [j]]) self._var.append(correction * np.matmul(thatT_that_inv, np.matmul(weighted_sigma, thatT_that_inv))) + elif (self.cov_type == 'clustered'): + self._var = self._compute_clustered_variance(that, y - np.dot(T, param), thatT_that_inv, groups) else: - raise AttributeError("Unsupported cov_type. Must be one of nonrobust, HC0, HC1.") + raise AttributeError("Unsupported cov_type. Must be one of nonrobust, HC0, HC1, clustered.") self._param_var = np.array(self._var) return self + + def _compute_clustered_variance(self, that, eps_i, thatT_that_inv, groups): + """ + Compute clustered standard errors. + + Parameters + ---------- + that : array_like + Fitted values from first stage + eps_i : array_like + Residuals + thatT_that_inv : array_like + Inverse of that.T @ that + groups : array_like + Group labels for clustering + + Returns + ------- + var : array_like or list + Clustered variance matrix + """ + n, k = that.shape + group_ids, inverse_idx = np.unique(groups, return_inverse=True) + n_groups = len(group_ids) + + # Group correction factor + group_correction = (n_groups / (n_groups - 1)) + + if eps_i.ndim < 2: + # Single outcome case + that_e = that * eps_i.reshape(-1, 1) + group_sums = np.zeros((n_groups, k)) + np.add.at(group_sums, inverse_idx, that_e) + s = group_sums.T @ group_sums + + return group_correction * np.matmul(thatT_that_inv, np.matmul(s, thatT_that_inv)) + else: + # Multiple outcome case + var_list = [] + for j in range(eps_i.shape[1]): + that_e = that * eps_i[:, [j]] + group_sums = np.zeros((n_groups, k)) + np.add.at(group_sums, inverse_idx, that_e) + s = group_sums.T @ group_sums + + var_list.append(group_correction * np.matmul(thatT_that_inv, np.matmul(s, thatT_that_inv))) + + return var_list diff --git a/econml/tests/test_clustered_se.py b/econml/tests/test_clustered_se.py new file mode 100644 index 000000000..1bf11a78c --- /dev/null +++ b/econml/tests/test_clustered_se.py @@ -0,0 +1,180 @@ +# Copyright (c) PyWhy contributors. All rights reserved. +# Licensed under the MIT License. + +import unittest +import numpy as np +import pytest +from sklearn.linear_model import LassoCV, LogisticRegression +import statsmodels.api as sm +from econml.dml import DML +from econml.iv.dml import OrthoIV +from econml.utilities import shape +from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression + + +@pytest.mark.cate_api +class TestClusteredSE(unittest.TestCase): + + def test_clustered_se_dml(self): + """Test that LinearDML works with clustered standard errors.""" + np.random.seed(123) + n = 500 + n_groups = 25 + + # Generate data with clustering structure + X = np.random.normal(0, 1, (n, 3)) + W = np.random.normal(0, 1, (n, 2)) + groups = np.random.randint(0, n_groups, n) + T = np.random.binomial(1, 0.5, n) + + # Add group-level effects to create clustering + group_effects = np.random.normal(0, 1, n_groups) + Y = X[:, 0] + 2 * T + group_effects[groups] + np.random.normal(0, 0.5, n) + + # Test DML with clustered standard errors via custom model_final + est = DML(model_y=LassoCV(), model_t=LogisticRegression(), + model_final=StatsModelsLinearRegression(fit_intercept=False, cov_type='clustered'), + discrete_treatment=True) + est.fit(Y, T, X=X, W=W, groups=groups) + + # Test basic functionality + effects = est.effect(X[:10]) + self.assertEqual(shape(effects), (10,)) + + # Test confidence intervals + lb, ub = est.effect_interval(X[:10], alpha=0.05) + self.assertEqual(shape(lb), (10,)) + self.assertEqual(shape(ub), (10,)) + self.assertTrue(np.all(lb <= ub)) + + # Test that clustered SEs are different from non-clustered + est_regular = DML(model_y=LassoCV(), model_t=LogisticRegression(), + model_final=StatsModelsLinearRegression(fit_intercept=False, cov_type='nonrobust'), + discrete_treatment=True) + est_regular.fit(Y, T, X=X, W=W) + + lb_regular, ub_regular = est_regular.effect_interval(X[:10], alpha=0.05) + + # Confidence intervals should be different (not identical) + self.assertFalse(np.allclose(lb, lb_regular, atol=1e-10)) + self.assertFalse(np.allclose(ub, ub_regular, atol=1e-10)) + + def test_clustered_se_iv(self): + """Test that OrthoIV works with clustered standard errors.""" + np.random.seed(123) + n = 500 + n_groups = 25 + + # Generate data with clustering structure + X = np.random.normal(0, 1, (n, 3)) + W = np.random.normal(0, 1, (n, 2)) + groups = np.random.randint(0, n_groups, n) + Z = np.random.binomial(1, 0.5, n) + T = np.random.binomial(1, 0.5, n) + + # Add group-level effects to create clustering + group_effects = np.random.normal(0, 1, n_groups) + Y = X[:, 0] + 2 * T + group_effects[groups] + np.random.normal(0, 0.5, n) + + # Test OrthoIV with clustered standard errors + est = OrthoIV(discrete_treatment=True, discrete_instrument=True, + cov_type='clustered') + est.fit(Y, T, Z=Z, X=X, W=W, groups=groups) + + # Test basic functionality + effects = est.effect(X[:10]) + self.assertEqual(shape(effects), (10,)) + + # Test confidence intervals + lb, ub = est.effect_interval(X[:10], alpha=0.05) + self.assertEqual(shape(lb), (10,)) + self.assertEqual(shape(ub), (10,)) + self.assertTrue(np.all(lb <= ub)) + + # Test that clustered SEs are different from non-clustered + est_regular = OrthoIV(discrete_treatment=True, discrete_instrument=True, + cov_type='nonrobust') + est_regular.fit(Y, T, Z=Z, X=X, W=W) + + lb_regular, ub_regular = est_regular.effect_interval(X[:10], alpha=0.05) + + # Confidence intervals should be different (not identical) + self.assertFalse(np.allclose(lb, lb_regular, atol=1e-10)) + self.assertFalse(np.allclose(ub, ub_regular, atol=1e-10)) + + def test_clustered_se_without_groups_defaults_to_individual(self): + """Test that clustered SE without groups matches HC0 with adjustment factor.""" + np.random.seed(123) + n = 100 + X = np.random.normal(0, 1, (n, 2)) + T = np.random.binomial(1, 0.5, n) + Y = np.random.normal(0, 1, n) + + # Clustered SE without groups (defaults to individual groups) + np.random.seed(123) + est_clustered = DML(model_y=LassoCV(), model_t=LogisticRegression(), + model_final=StatsModelsLinearRegression(fit_intercept=False, cov_type='clustered'), + discrete_treatment=True) + est_clustered.fit(Y, T, X=X) + + # HC0 for comparison + np.random.seed(123) + est_hc0 = DML(model_y=LassoCV(), model_t=LogisticRegression(), + model_final=StatsModelsLinearRegression(fit_intercept=False, cov_type='HC0'), + discrete_treatment=True) + est_hc0.fit(Y, T, X=X) + + # Get confidence intervals + X_test = X[:5] + lb_clustered, ub_clustered = est_clustered.effect_interval(X_test, alpha=0.05) + lb_hc0, ub_hc0 = est_hc0.effect_interval(X_test, alpha=0.05) + + # Clustered SE should be HC0 SE * sqrt(n/(n-1)) when each obs is its own cluster + # Width of confidence intervals should differ by the adjustment factor + width_clustered = ub_clustered - lb_clustered + width_hc0 = ub_hc0 - lb_hc0 + + # When each observation is its own cluster, clustered SE should equal HC0 * sqrt(n/(n-1)) + # due to the finite sample correction factor + correction_factor = np.sqrt(n / (n - 1)) + expected_width = width_hc0 * correction_factor + np.testing.assert_allclose(width_clustered, expected_width, rtol=1e-10) + + # Test basic functionality still works + effects = est_clustered.effect(X_test) + self.assertEqual(shape(effects), (5,)) + self.assertTrue(np.all(np.isfinite(effects))) + self.assertTrue(np.all(np.isfinite(lb_clustered))) + self.assertTrue(np.all(np.isfinite(ub_clustered))) + + def test_clustered_se_matches_statsmodels(self): + """Test that our final stage clustered SE matches statsmodels exactly.""" + np.random.seed(42) + n = 200 + n_groups = 20 + + # Generate simple data for direct comparison with clustering + X = np.random.normal(0, 1, (n, 2)) + groups = np.random.randint(0, n_groups, n) + group_effects = np.random.normal(0, 0.5, n_groups) + Y = 1 + 2 * X[:, 0] + 3 * X[:, 1] + group_effects[groups] + np.random.normal(0, 0.5, n) + + # Fit with our StatsModelsLinearRegression directly + X_with_intercept = np.column_stack([np.ones(n), X]) + econml_model = StatsModelsLinearRegression(cov_type='clustered') + econml_model.fit(X, Y, groups=groups) + econml_se = econml_model.coef_stderr_[0] # SE for X[:, 0] coefficient + + # Fit equivalent model with statsmodels + sm_model = sm.OLS(Y, X_with_intercept).fit(cov_type='cluster', cov_kwds={'groups': groups}) + sm_se = sm_model.bse[1] # SE for X[:, 0] coefficient + + # Account for statsmodels' additional n/(n-k) adjustment + k = X_with_intercept.shape[1] # Number of parameters + sm_adjustment = np.sqrt((n - 1) / (n - k)) + adjusted_sm_se = sm_se / sm_adjustment + + # Should match very closely + relative_diff = abs(econml_se - adjusted_sm_se) / adjusted_sm_se + self.assertLess(relative_diff, 1e-4, + f"EconML SE ({econml_se:.8f}) differs from adjusted statsmodels SE ({adjusted_sm_se:.8f})")