diff --git a/TODO.md b/TODO.md index 71a1ebfd..95ac15c2 100644 --- a/TODO.md +++ b/TODO.md @@ -82,6 +82,7 @@ Deferred items from PR reviews that were not addressed before merge. | HC2 / HC2 + Bell-McCaffrey on absorbed-FE fits currently raises `NotImplementedError` in three places: `TwoWayFixedEffects` unconditionally; `DifferenceInDifferences(absorb=..., vcov_type in {"hc2","hc2_bm"})`; `MultiPeriodDiD(absorb=..., vcov_type in {"hc2","hc2_bm"})`. Within-transformation preserves coefficients and residuals under FWL but not the hat matrix, so the reduced-design `h_ii` is not the diagonal of the full FE projection and CR2's block adjustment `A_g = (I - H_gg)^{-1/2}` is likewise wrong on absorbed cluster blocks. Lifting the guard needs HC2/CR2-BM computed from the full absorbed projection (unit/time FE dummies reconstructed internally, or a FE-aware hat-matrix formulation) and a parity harness against a full-dummy OLS run or R `fixest`/`clubSandwich`. HC1/CR1 are unaffected by this because they have no leverage term. | `twfe.py::fit`, `estimators.py::DifferenceInDifferences.fit`, `estimators.py::MultiPeriodDiD.fit` | Phase 1a | Medium | | Weighted CR2 Bell-McCaffrey cluster-robust (`vcov_type="hc2_bm"` + `cluster_ids` + `weights`) currently raises `NotImplementedError`. Weighted hat matrix and residual rebalancing need threading per clubSandwich WLS handling. | `linalg.py::_compute_cr2_bm` | Phase 1a | Medium | | Regenerate `benchmarks/data/clubsandwich_cr2_golden.json` from R (`Rscript benchmarks/R/generate_clubsandwich_golden.R`). Current JSON has `source: python_self_reference` as a stability anchor until an authoritative R run. | `benchmarks/R/generate_clubsandwich_golden.R` | Phase 1a | Medium | +| `honest_did.py:1907` `np.linalg.solve(A_sys, b_sys) / except LinAlgError: continue` is a silent basis-rejection in the vertex-enumeration loop that is algorithmically intentional (try the next basis). Consider surfacing a count of rejected bases as a diagnostic when ARP enumeration exhausts, so users see when the vertex search was heavily constrained. Not a silent failure in the sense of the Phase 2 audit (the algorithm is supposed to skip), but the diagnostic would help debug borderline cases. | `honest_did.py` | #334 | Low | #### Performance diff --git a/diff_diff/efficient_did_covariates.py b/diff_diff/efficient_did_covariates.py index bb19ddf5..86052aee 100644 --- a/diff_diff/efficient_did_covariates.py +++ b/diff_diff/efficient_did_covariates.py @@ -172,7 +172,15 @@ def estimate_propensity_ratio_sieve( Selects K via AIC/BIC: ``IC(K) = 2*loss(K) + C_n*K/n``. - On singular basis: tries lower K. Short-circuits r_{g,g}(X) = 1. + Precondition check per K: if ``cond(Psi_{g'}' W Psi_{g'}) > 1/sqrt(eps)`` + (≈ 6.7e7), that K is skipped. LinAlgError on the `np.linalg.solve` call + or a non-finite beta skips as well. If at least one K succeeds but + others were skipped, emits a ``UserWarning`` listing the skipped K + values (silent-failure audit PR, axis-A finding #18). If every K is + skipped, the caller falls back to a constant ratio of 1 with a + separate "estimation failed for all K values" warning. + + Short-circuits ``r_{g,g}(X) = 1`` for same-cohort comparisons (PT-All). Parameters ---------- @@ -227,6 +235,10 @@ def estimate_propensity_ratio_sieve( best_ic = np.inf best_ratio = np.ones(n_units) # fallback: constant ratio 1 + singular_K: List[int] = [] # K values skipped due to rank deficiency (#18) + # Near-singular matrices solve without raising LinAlgError but return + # numerically meaningless beta. Rule-of-thumb threshold: 1/sqrt(eps). + cond_threshold = 1.0 / np.sqrt(np.finfo(float).eps) for K in range(1, k_max + 1): n_basis = comb(K + d, d) @@ -249,13 +261,23 @@ def estimate_propensity_ratio_sieve( A = Psi_gp.T @ Psi_gp b = Psi_g.sum(axis=0) + # Precondition check (#18, axis A): reject near-singular A explicitly + # so np.linalg.solve can't silently return garbage coefficients. + with np.errstate(invalid="ignore", over="ignore"): + A_cond = float(np.linalg.cond(A)) + if not np.isfinite(A_cond) or A_cond > cond_threshold: + singular_K.append(K) + continue + try: beta = np.linalg.solve(A, b) except np.linalg.LinAlgError: + singular_K.append(K) continue # singular — try next K # Check for NaN/Inf in solution if not np.all(np.isfinite(beta)): + singular_K.append(K) continue # Predicted ratio for all units @@ -282,6 +304,18 @@ def estimate_propensity_ratio_sieve( UserWarning, stacklevel=2, ) + elif singular_K: + # Finding #18 (axis A): partial K-failure was previously silent. + # Surface it so users see that the selected basis order was + # forced by rank deficiency at higher K rather than by the IC. + warnings.warn( + f"Propensity ratio sieve: skipped K={singular_K} due to " + f"rank-deficient or non-finite normal equations. " + f"Selected basis used the remaining K values; " + f"this may indicate limited variation in the covariates.", + UserWarning, + stacklevel=2, + ) # Overlap diagnostics: warn if ratios require significant clipping n_extreme = int(np.sum((best_ratio < 1.0 / ratio_clip) | (best_ratio > ratio_clip))) @@ -329,6 +363,14 @@ def estimate_inverse_propensity_sieve( units on the RHS (not just group g), following the paper's algorithm step 4. + Precondition check per K: if ``cond(Psi_{g'}' W Psi_{g'}) > 1/sqrt(eps)`` + (≈ 6.7e7), that K is skipped. LinAlgError on the `np.linalg.solve` call + or a non-finite beta skips as well. If at least one K succeeds but + others were skipped, emits a ``UserWarning`` listing the skipped K + values (silent-failure audit PR, axis-A finding #18). If every K is + skipped, the caller falls back to unconditional ``n/n_group`` scaling + with a separate "estimation failed for all K values" warning. + Parameters ---------- covariate_matrix : ndarray, shape (n_units, n_covariates) @@ -377,6 +419,8 @@ def estimate_inverse_propensity_sieve( best_ic = np.inf best_s = np.full(n_units, fallback_ratio) # fallback: unconditional + singular_K: List[int] = [] # K values skipped due to rank deficiency (#18) + cond_threshold = 1.0 / np.sqrt(np.finfo(float).eps) for K in range(1, k_max + 1): n_basis = comb(K + d, d) @@ -397,11 +441,20 @@ def estimate_inverse_propensity_sieve( # RHS: sum of basis over ALL units (not just one group) b = basis_all.sum(axis=0) + # Precondition check (#18, axis A): see ratio-sieve comment above. + with np.errstate(invalid="ignore", over="ignore"): + A_cond = float(np.linalg.cond(A)) + if not np.isfinite(A_cond) or A_cond > cond_threshold: + singular_K.append(K) + continue + try: beta = np.linalg.solve(A, b) except np.linalg.LinAlgError: + singular_K.append(K) continue if not np.all(np.isfinite(beta)): + singular_K.append(K) continue s_hat = basis_all @ beta @@ -423,6 +476,16 @@ def estimate_inverse_propensity_sieve( UserWarning, stacklevel=2, ) + elif singular_K: + # Finding #18 (axis A): partial K-failure was previously silent. + warnings.warn( + f"Inverse propensity sieve: skipped K={singular_K} due to " + f"rank-deficient or non-finite normal equations. " + f"Selected basis used the remaining K values; " + f"this may indicate limited variation in the covariates.", + UserWarning, + stacklevel=2, + ) # Overlap diagnostics: warn if s_hat values require clipping n_clipped = int(np.sum((best_s < 1.0) | (best_s > float(n_units)))) diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py index 2004f2e7..d26233fe 100644 --- a/diff_diff/staggered.py +++ b/diff_diff/staggered.py @@ -92,11 +92,29 @@ def _linear_regression( return beta, residuals -def _safe_inv(A: np.ndarray) -> np.ndarray: - """Invert a square matrix with lstsq fallback for near-singular cases.""" +def _safe_inv( + A: np.ndarray, + tracker: Optional[list] = None, +) -> np.ndarray: + """Invert a square matrix with lstsq fallback for near-singular cases. + + Parameters + ---------- + A : np.ndarray + Square matrix to invert. + tracker : list, optional + When provided, one condition-number sample of ``A`` is appended on + every LinAlgError fallback. ``CallawaySantAnna.fit()`` initializes + a list and emits a single aggregate `UserWarning` after the fit + finishes, rather than surfacing a separate warning per fallback. + Sibling of finding #17 in the Phase 2 silent-failures audit. + """ try: return np.linalg.solve(A, np.eye(A.shape[0])) except np.linalg.LinAlgError: + if tracker is not None: + with np.errstate(invalid="ignore", over="ignore"): + tracker.append(float(np.linalg.cond(A))) return np.linalg.lstsq(A, np.eye(A.shape[0]), rcond=None)[0] @@ -1436,6 +1454,12 @@ def fit( # Reset stale state from prior fit (prevents leaking event-study VCV) self._event_study_vcov = None + # Tracker for _safe_inv lstsq fallbacks across all analytical SE + # paths (PS Hessian, OR bread, event-study bread, etc.). Emit ONE + # aggregate warning at the end of fit rather than fanning out per + # cell. Sibling of PR #9 finding #17. + self._safe_inv_tracker: List[float] = [] + if not self.panel: warnings.warn( "panel=False uses repeated cross-section DRDID estimators " @@ -1976,6 +2000,26 @@ def fit( eff_data["effect"] + cband_crit_value * se_val, ) + # Consolidated _safe_inv lstsq-fallback warning (sibling of PR #9 + # finding #17). Rank-deficient PS Hessian / OR bread matrices in the + # analytical SE paths previously fell back to np.linalg.lstsq + # silently per cell. Now aggregated here into ONE UserWarning so + # a bad design surface doesn't quietly degrade analytical SEs. + if self._safe_inv_tracker: + n_fallbacks = len(self._safe_inv_tracker) + finite_conds = [c for c in self._safe_inv_tracker if np.isfinite(c)] + max_cond = max(finite_conds) if finite_conds else float("inf") + warnings.warn( + f"Rank-deficient matrix encountered {n_fallbacks} time(s) " + f"in analytical SE paths (propensity-score Hessian or " + f"outcome-regression bread); fell back to np.linalg.lstsq. " + f"Max condition number of affected matrix: {max_cond:.2e}. " + f"Analytical SEs may be numerically unstable; consider " + f"dropping collinear covariates or using n_bootstrap > 0.", + UserWarning, + stacklevel=2, + ) + # Store results # Retrieve event-study VCV from aggregation mixin (Phase 7d). # Clear it when bootstrap overwrites event-study SEs to prevent @@ -2276,7 +2320,7 @@ def _ipw_estimation( W_ps = W_ps * sw_all # R: Hessian.ps = crossprod(X * sqrt(W)) / n H_psi = X_all_int.T @ (W_ps[:, None] * X_all_int) / n_all_panel - H_psi_inv = _safe_inv(H_psi) + H_psi_inv = _safe_inv(H_psi, tracker=self._safe_inv_tracker) D_all = np.concatenate([np.ones(n_t), np.zeros(n_c)]) score_ps = (D_all - pscore_all)[:, None] * X_all_int @@ -2562,7 +2606,7 @@ def _doubly_robust( if sw_all is not None: W_ps = W_ps * sw_all H_psi = X_all_int.T @ (W_ps[:, None] * X_all_int) / n_all_panel - H_psi_inv = _safe_inv(H_psi) + H_psi_inv = _safe_inv(H_psi, tracker=self._safe_inv_tracker) D_all = np.concatenate([np.ones(n_t), np.zeros(n_c)]) score_ps = (D_all - pscore_all)[:, None] * X_all_int @@ -2584,7 +2628,7 @@ def _doubly_robust( X_c_int = X_control_with_intercept W_diag = sw_control if sw_control is not None else np.ones(n_c) XtWX = X_c_int.T @ (W_diag[:, None] * X_c_int) - bread = _safe_inv(XtWX) + bread = _safe_inv(XtWX, tracker=self._safe_inv_tracker) # M1: dATT/dbeta — gradient of DR ATT w.r.t. OR parameters X_t_int = X_treated_with_intercept @@ -2628,7 +2672,7 @@ def _doubly_robust( W_ps = pscore_all * (1 - pscore_all) H_psi = X_all_int.T @ (W_ps[:, None] * X_all_int) / n_all_panel - H_psi_inv = _safe_inv(H_psi) + H_psi_inv = _safe_inv(H_psi, tracker=self._safe_inv_tracker) D_all = np.concatenate([np.ones(n_t), np.zeros(n_c)]) score_ps = (D_all - pscore_all)[:, None] * X_all_int @@ -2645,7 +2689,7 @@ def _doubly_robust( # --- OR IF correction --- X_c_int = X_control_with_intercept XtX = X_c_int.T @ X_c_int - bread = _safe_inv(XtX) + bread = _safe_inv(XtX, tracker=self._safe_inv_tracker) X_t_int = X_treated_with_intercept M1 = ( @@ -3204,8 +3248,14 @@ def _outcome_regression_rc( # R's colMeans (= sum/n_all) for M1, matching the product exactly. W_ct = sw_ct if sw_ct is not None else np.ones(n_ct) W_cs = sw_cs if sw_cs is not None else np.ones(n_cs) - bread_t = _safe_inv(X_ct_int.T @ (W_ct[:, None] * X_ct_int)) - bread_s = _safe_inv(X_cs_int.T @ (W_cs[:, None] * X_cs_int)) + bread_t = _safe_inv( + X_ct_int.T @ (W_ct[:, None] * X_ct_int), + tracker=self._safe_inv_tracker, + ) + bread_s = _safe_inv( + X_cs_int.T @ (W_cs[:, None] * X_cs_int), + tracker=self._safe_inv_tracker, + ) # R: M1 = colMeans(w.cont * out.x) = sum(w_D * X) / n_all M1 = ( @@ -3407,7 +3457,7 @@ def _ipw_estimation_rc( W_ps = W_ps * sw_all # R: Hessian.ps = crossprod(X * sqrt(W)) / n H_psi = X_all_int.T @ (W_ps[:, None] * X_all_int) / n_all - H_psi_inv = _safe_inv(H_psi) + H_psi_inv = _safe_inv(H_psi, tracker=self._safe_inv_tracker) score_ps = (D_all - pscore)[:, None] * X_all_int if sw_all is not None: @@ -3744,7 +3794,7 @@ def _doubly_robust_rc( W_ps = W_ps * sw_all # R: Hessian.ps = crossprod(X * sqrt(W)) / n H_psi = X_all_int.T @ (W_ps[:, None] * X_all_int) / n_all - H_psi_inv = _safe_inv(H_psi) + H_psi_inv = _safe_inv(H_psi, tracker=self._safe_inv_tracker) score_ps = (D_all - pscore)[:, None] * X_all_int if sw_all is not None: @@ -3779,8 +3829,14 @@ def _doubly_robust_rc( # ===================================================================== W_ct_vals = sw_ct if sw_ct is not None else np.ones(n_ct) W_cs_vals = sw_cs if sw_cs is not None else np.ones(n_cs) - bread_ct = _safe_inv(X_ct_int.T @ (W_ct_vals[:, None] * X_ct_int)) - bread_cs = _safe_inv(X_cs_int.T @ (W_cs_vals[:, None] * X_cs_int)) + bread_ct = _safe_inv( + X_ct_int.T @ (W_ct_vals[:, None] * X_ct_int), + tracker=self._safe_inv_tracker, + ) + bread_cs = _safe_inv( + X_cs_int.T @ (W_cs_vals[:, None] * X_cs_int), + tracker=self._safe_inv_tracker, + ) # R: asy.lin.rep.ols (per-obs OLS score * bread) asy_lin_rep_ct = (W_ct_vals * resid_ct)[:, None] * X_ct_int @ bread_ct @@ -3818,8 +3874,14 @@ def _doubly_robust_rc( # ===================================================================== W_gt_vals = sw_gt if sw_gt is not None else np.ones(n_gt) W_gs_vals = sw_gs if sw_gs is not None else np.ones(n_gs) - bread_gt = _safe_inv(X_gt_int.T @ (W_gt_vals[:, None] * X_gt_int)) - bread_gs = _safe_inv(X_gs_int.T @ (W_gs_vals[:, None] * X_gs_int)) + bread_gt = _safe_inv( + X_gt_int.T @ (W_gt_vals[:, None] * X_gt_int), + tracker=self._safe_inv_tracker, + ) + bread_gs = _safe_inv( + X_gs_int.T @ (W_gs_vals[:, None] * X_gs_int), + tracker=self._safe_inv_tracker, + ) asy_lin_rep_gt = (W_gt_vals * resid_gt)[:, None] * X_gt_int @ bread_gt asy_lin_rep_gs = (W_gs_vals * resid_gs)[:, None] * X_gs_int @ bread_gs diff --git a/diff_diff/staggered_triple_diff.py b/diff_diff/staggered_triple_diff.py index 85086aa3..eba8855a 100644 --- a/diff_diff/staggered_triple_diff.py +++ b/diff_diff/staggered_triple_diff.py @@ -348,6 +348,13 @@ def fit( {} if (covariates and self.estimation_method in ("ipw", "dr")) else None ) + # Trackers for rank-deficient linalg solves across all (g, g_c, t) + # cells. `_compute_did_panel` appends to the OR-side tracker; + # `_compute_pscore` appends to the PS-side tracker. Both surface as + # ONE aggregate warning below rather than fanning out per cell. + self._lstsq_fallback_tracker: List[float] = [] + self._ps_lstsq_fallback_tracker: List[float] = [] + for g in treatment_groups: # In universal mode, skip the reference period (t == g-1-anticipation) # so it's omitted from GT estimation. The event-study mixin injects @@ -507,6 +514,47 @@ def fit( comparison_group_counts[(g, t)] = len(gc_labels) gmm_weights_store[(g, t)] = dict(zip(gc_labels, gmm_w.tolist())) + # Consolidated OR influence-function rank-deficiency warning. + # Finding #17 in the Phase 2 silent-failures audit: the per-pair OR + # solve at _compute_did_panel() previously fell back to lstsq with no + # signal, so near/fully singular X'WX in the covariate expansion went + # to the user as a normal result. + if self._lstsq_fallback_tracker: + n_cells = len(self._lstsq_fallback_tracker) + finite_conds = [c for c in self._lstsq_fallback_tracker if np.isfinite(c)] + max_cond = max(finite_conds) if finite_conds else float("inf") + warnings.warn( + f"Rank-deficient X'WX detected in the outcome-regression " + f"influence-function step for {n_cells} (g, g_c, t) pair(s); " + f"fell back to np.linalg.lstsq. " + f"Max condition number of affected X'WX: {max_cond:.2e}. " + f"Consider dropping collinear covariates or using " + f"estimation_method='ipw' to avoid the OR projection.", + UserWarning, + stacklevel=2, + ) + + # Consolidated PS-Hessian rank-deficiency warning (sibling of the + # OR path above). `_compute_pscore` previously fell back from + # `np.linalg.inv(X'WX)` to `np.linalg.lstsq` with no signal, so + # a rank-deficient propensity-score design silently degraded + # IPW/DR influence-function corrections. + if self._ps_lstsq_fallback_tracker: + n_cells = len(self._ps_lstsq_fallback_tracker) + finite_conds = [c for c in self._ps_lstsq_fallback_tracker if np.isfinite(c)] + max_cond = max(finite_conds) if finite_conds else float("inf") + warnings.warn( + f"Rank-deficient X'WX detected in the propensity-score " + f"Hessian for {n_cells} (g, g_c, t) pair(s); fell back to " + f"np.linalg.lstsq. Max condition number of affected X'WX: " + f"{max_cond:.2e}. IPW/DR influence-function corrections " + f"may be numerically unstable; consider dropping collinear " + f"propensity-score covariates or using " + f"estimation_method='reg' to avoid the PS path.", + UserWarning, + stacklevel=2, + ) + # Consolidated EPV summary warning if epv_diagnostics: low_epv = {k: v for k, v in epv_diagnostics.items() if v.get("is_low")} @@ -1330,6 +1378,14 @@ def _compute_did_panel( try: asy_linear_or = (np.linalg.solve(XpX, or_ex.T)).T except np.linalg.LinAlgError: + # Rank-deficient X'WX in the OR influence-function step. Record + # a condition-number sample so fit() can emit ONE aggregate + # warning across all (g, g_c, t) cells rather than fanning out. + tracker = getattr(self, "_lstsq_fallback_tracker", None) + if tracker is not None: + with np.errstate(invalid="ignore", over="ignore"): + cond = float(np.linalg.cond(XpX)) + tracker.append(cond) asy_linear_or = (np.linalg.lstsq(XpX, or_ex.T, rcond=None)[0]).T inf_treat_or = -(asy_linear_or @ M1) @@ -1434,6 +1490,14 @@ def _compute_pscore( try: hessian = np.linalg.inv(XWX) * n_pair except np.linalg.LinAlgError: + # Sibling of the OR-side LinAlgError at _compute_did_panel. Record + # a condition-number sample on the PS-Hessian tracker so fit() + # emits ONE aggregate warning covering all (g, g_c, t) cells + # that hit the rank-deficient PS path under IPW/DR inference. + ps_tracker = getattr(self, "_ps_lstsq_fallback_tracker", None) + if ps_tracker is not None: + with np.errstate(invalid="ignore", over="ignore"): + ps_tracker.append(float(np.linalg.cond(XWX))) hessian = np.linalg.lstsq(XWX, np.eye(XWX.shape[0]), rcond=None)[0] * n_pair return pscore, hessian diff --git a/diff_diff/survey.py b/diff_diff/survey.py index 14ae44d0..2ee8334e 100644 --- a/diff_diff/survey.py +++ b/diff_diff/survey.py @@ -1445,6 +1445,25 @@ def compute_survey_vcov( return np.zeros((k, k)) return np.full((k, k), np.nan) + # Precondition check: near-singular X'WX lets np.linalg.solve return + # unstable values without raising (finding #19, axis A). Threshold of + # 1/sqrt(eps) ≈ 6.7e7 is the standard rule of thumb — above it, the + # sandwich bread becomes numerically unreliable and the caller should + # be told so. + with np.errstate(invalid="ignore", over="ignore"): + XtWX_cond = float(np.linalg.cond(XtWX)) + cond_threshold = 1.0 / np.sqrt(np.finfo(float).eps) + if np.isfinite(XtWX_cond) and XtWX_cond > cond_threshold: + warnings.warn( + f"X'WX is ill-conditioned (cond={XtWX_cond:.2e}) in the " + f"survey sandwich variance; variance estimates may be " + f"numerically unstable. This typically indicates near " + f"multicollinearity or zero-weight strata dominating the " + f"bread matrix.", + UserWarning, + stacklevel=2, + ) + # Sandwich: (X'WX)^{-1} meat (X'WX)^{-1} try: temp = np.linalg.solve(XtWX, meat) diff --git a/diff_diff/two_stage.py b/diff_diff/two_stage.py index 6a385bd9..c11bdd50 100644 --- a/diff_diff/two_stage.py +++ b/diff_diff/two_stage.py @@ -1768,6 +1768,22 @@ def _compute_gmm_variance( try: bread = np.linalg.solve(XtWX_2, np.eye(k)) except np.linalg.LinAlgError: + # Sibling of finding #17 (axis A) — the TSL-variance bread + # fallback was previously silent. Note: X_2 is the Stage-2 + # indicator design (treatment / horizon / group dummies), not + # user covariates, so the diagnostic guidance points at that + # layer. + warnings.warn( + "Rank-deficient second-stage design matrix X_2'WX_2 in " + "TwoStageDiD TSL variance; falling back to np.linalg.lstsq " + "for the bread matrix. Analytical SEs may be numerically " + "unstable. The Stage-2 design is built from treatment, " + "event-time, or group indicators, so this typically " + "indicates a zero-weight or all-zero indicator column " + "(e.g. an aggregation path with no qualifying observations).", + UserWarning, + stacklevel=2, + ) bread = np.linalg.lstsq(XtWX_2, np.eye(k), rcond=None)[0] # 7. V = bread @ meat @ bread diff --git a/diff_diff/two_stage_bootstrap.py b/diff_diff/two_stage_bootstrap.py index ea23ef40..391cdbd4 100644 --- a/diff_diff/two_stage_bootstrap.py +++ b/diff_diff/two_stage_bootstrap.py @@ -197,6 +197,21 @@ def _compute_cluster_S_scores( try: bread = np.linalg.solve(XtX_2, np.eye(k)) except np.linalg.LinAlgError: + # Sibling of finding #17 (axis A) — the bootstrap bread + # fallback was previously silent. X_2 is the Stage-2 indicator + # design (treatment / horizon / group dummies), so a singular + # bread typically indicates a zero-weight or all-zero column. + warnings.warn( + "Rank-deficient second-stage design matrix X_2'WX_2 in " + "TwoStageDiD multiplier bootstrap bread; falling back to " + "np.linalg.lstsq. Bootstrap SEs may be numerically " + "unstable. The Stage-2 design is built from treatment, " + "event-time, or group indicators, so this typically " + "indicates a zero-weight or all-zero indicator column " + "(e.g. an aggregation path with no qualifying observations).", + UserWarning, + stacklevel=2, + ) bread = np.linalg.lstsq(XtX_2, np.eye(k), rcond=None)[0] return S, bread, unique_clusters diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 091b3847..a4da8c3a 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -301,6 +301,7 @@ This matches the behavior of R's `fixest::feols()` with absorbed FE. - Requires never-treated units as comparison group (identified by `first_treat=0` or `never_treated=True`) - Warns if no never-treated units exist (suggests alternative comparison strategies) - Limited pre-treatment periods reduce ability to test parallel trends +- **Note:** The analytical SE paths call `_safe_inv()` on the propensity-score Hessian (`H_psi`) and outcome-regression bread (`X'WX`) across every `(g, t)` cell. When these matrices are rank deficient, `np.linalg.solve` raises `LinAlgError` and `_safe_inv()` falls back to `np.linalg.lstsq`. Previously silent; now `fit()` emits ONE aggregate `UserWarning` at the end of the fit reporting the number of fallbacks and the max condition number, so a rank-deficient analytical SE path can't quietly ship degraded standard errors. Sibling of axis-A finding #17 in the Phase 2 silent-failures audit. *Estimator equation (as implemented):* @@ -753,6 +754,7 @@ See `docs/methodology/continuous-did.md` Section 4 for full details. - **Balanced panel**: Short balanced panel required ("large-n, fixed-T" regime). Does not handle unbalanced panels or repeated cross-sections - Warn if treatment varies within units (non-absorbing treatment) - Warn if propensity score estimates are near boundary values +- **Note:** Polynomial-sieve propensity fits now reject any K whose normal-equations matrix has condition number above `1/sqrt(eps)` (≈ 6.7e7) — previously a near-singular `np.linalg.solve` could return numerically meaningless coefficients without raising. If at least one K succeeds but others were skipped via this precondition, a `UserWarning` lists the skipped K values. If every K is skipped, the existing "estimation failed for all K values" fallback warning still fires. Axis-A finding #18 in the Phase 2 silent-failures audit. *Estimator equation -- single treatment date (Equations 3.2, 3.5):* @@ -1175,6 +1177,7 @@ Our implementation uses multiplier bootstrap on the GMM influence function: clus - **Zero-observation cohorts in group effects:** If all treated observations for a cohort have NaN `y_tilde` (excluded from estimation), that cohort's group effect is NaN with n_obs=0. - **Note:** Survey weights in TwoStageDiD GMM sandwich via weighted cross-products: bread uses (X'_2 W X_2)^{-1}, gamma_hat uses (X'_{10} W X_{10})^{-1}(X'_1 W X_2), per-cluster scores multiply by survey weights. PSU clustering, stratification, and FPC are fully supported in the meat matrix via `_compute_stratified_meat_from_psu_scores()`. When strata or FPC are present, the meat computation replaces `S' S` with the stratified formula `sum_h (1 - f_h) * (n_h/(n_h-1)) * centered_h' centered_h`. Strata also enters survey df (n_PSU - n_strata) for t-distribution inference. Bootstrap + survey supported (Phase 6) via PSU-level multiplier weights. - **Note:** Both the iterative FE solver (`_iterative_fe`, Stage 1) and the iterative alternating-projection demeaning helper (`_iterative_demean`, used in covariate residualization) emit `UserWarning` when `max_iter` exhausts without reaching `tol`, via `diff_diff.utils.warn_if_not_converged`. Silent return of the current iterate was classified as a silent failure under the Phase 2 audit and replaced with an explicit signal to match the logistic/Poisson IRLS pattern in `linalg.py`. +- **Note:** When the Stage-2 bread `X'_2 W X_2` is singular, both the analytical TSL variance (`two_stage.py`) and the multiplier-bootstrap bread (`two_stage_bootstrap.py`) now emit a `UserWarning` before falling back to `np.linalg.lstsq`. Previously this fallback was silent. Sibling of axis-A finding #17 in the Phase 2 silent-failures audit; surfaced by the repo-wide lstsq-fallback pattern grep that accompanied the StaggeredTripleDifference fix. - **Note:** The GMM sandwich and bootstrap paths both use `scipy.sparse.linalg.factorized` for the Stage 1 normal-equations solve `(X'_{10} W X_{10}) gamma = X'_1 W X_2` and fall back to dense `lstsq` when the sparse factorization raises `RuntimeError` on a near-singular matrix. Both fallback sites emit a `UserWarning` (silent-failure audit axis C) so callers know SE estimates came from the degraded path rather than the fast sparse path. **Reference implementation(s):** @@ -1695,6 +1698,8 @@ has no additional effect. - **Note:** `pscore_fallback` default changed from unconditional to error. Set `pscore_fallback="unconditional"` for legacy behavior. - Warns on singular GMM covariance matrix (falls back to pseudoinverse) +- **Note:** Rank-deficient X'WX in the per-pair outcome-regression influence-function step now emits ONE aggregate `UserWarning` at `fit()` time (counting affected (g, g_c, t) cells and reporting the max condition number), instead of silently falling back to `np.linalg.lstsq`. Axis-A finding #17 in the Phase 2 silent-failures audit. +- **Note:** The per-pair propensity-score Hessian inversion in `_compute_pscore` (used under `estimation_method` in `{ipw, dr}`) previously fell back from `np.linalg.inv(X'WX)` to `np.linalg.lstsq` silently. `fit()` now emits a sibling aggregate `UserWarning` with cell count + max condition number so rank-deficient PS designs can't quietly degrade IPW/DR influence-function corrections. Sibling of axis-A finding #17, surfaced during PR #334 CI review. *Data structure:* @@ -2719,6 +2724,12 @@ unequal selection probabilities). per-observation PSUs for the TSL meat computation, consistent with the stratified-no-PSU path. The adjustment factor is `n/(n-1)` (not HC1's `n/(n-k)`). +- **Note:** TSL now precondition-checks `X'WX` via `np.linalg.cond` before + solving the sandwich. If the condition number exceeds `1/sqrt(eps)` (≈ + 6.7e7) a `UserWarning` fires stating that the bread is ill-conditioned + and variance estimates may be numerically unstable. Previously a near- + singular `X'WX` could silently produce unstable SEs. Axis-A finding #19 + in the Phase 2 silent-failures audit. ### Weight Type Effects on Inference diff --git a/tests/test_efficient_did.py b/tests/test_efficient_did.py index 951c46f5..ddce55a6 100644 --- a/tests/test_efficient_did.py +++ b/tests/test_efficient_did.py @@ -2052,3 +2052,85 @@ def test_inverse_propensity_sieve_fallback_warns(self): assert np.all(np.isfinite(s_hat)) # Should fall back to unconditional n/n_group = 100/2 = 50 assert np.allclose(s_hat, 50.0) + + +# --------------------------------------------------------------------------- +# Silent-failure audit PR #9: finding #18 — estimate_*_sieve silently +# `continue`'d past rank-deficient K values. Now we track skipped K and +# warn when we ship a result that wasn't the IC-winner across all K. +# --------------------------------------------------------------------------- + + +class TestSievePartialKSkipWarning: + """Finding #18 (axis A): partial K-failure no longer silent.""" + + def test_ratio_sieve_partial_skip_warns(self): + """If some K's are rank-deficient but at least one succeeds, + the function warns about the partial skip instead of swallowing it.""" + from diff_diff.efficient_did_covariates import estimate_propensity_ratio_sieve + + rng = np.random.default_rng(7) + n = 200 + # 1D covariate with discrete support {0, 1}. At K=1 the basis is + # [1, x]; at K>=2 the basis reaches size >= n_gp for most groups + # before hitting singularity, but with this discrete support the + # polynomial powers x^2, x^3, ... equal x, yielding rank-deficient + # normal equations deterministically. + X = rng.integers(0, 2, size=(n, 1)).astype(float) + mask_g = np.zeros(n, dtype=bool) + mask_g[:100] = True + mask_gp = np.zeros(n, dtype=bool) + mask_gp[100:] = True + with pytest.warns(UserWarning) as caught: + ratio = estimate_propensity_ratio_sieve(X, mask_g, mask_gp, k_max=3) + assert np.all(np.isfinite(ratio)) + partial_skip_msgs = [ + str(w.message) for w in caught if "skipped K=" in str(w.message) + ] + assert partial_skip_msgs, ( + "Expected a partial-K-skip warning when some K's are rank deficient " + "but at least one succeeds; got none." + ) + # Message should name the specific K values that were skipped. + assert any("K=" in m for m in partial_skip_msgs) + + def test_inverse_propensity_sieve_partial_skip_warns(self): + """Same contract for the inverse propensity sieve.""" + from diff_diff.efficient_did_covariates import estimate_inverse_propensity_sieve + + rng = np.random.default_rng(7) + n = 200 + X = rng.integers(0, 2, size=(n, 1)).astype(float) + mask = np.zeros(n, dtype=bool) + mask[:100] = True + with pytest.warns(UserWarning) as caught: + s_hat = estimate_inverse_propensity_sieve(X, mask, k_max=3) + assert np.all(np.isfinite(s_hat)) + partial_skip_msgs = [ + str(w.message) for w in caught if "skipped K=" in str(w.message) + ] + assert partial_skip_msgs + + def test_ratio_sieve_no_warning_when_no_skips(self): + """Clean, well-conditioned covariates → no partial-skip warning.""" + from diff_diff.efficient_did_covariates import estimate_propensity_ratio_sieve + + rng = np.random.default_rng(101) + n = 300 + X = rng.normal(0, 1, (n, 2)) + mask_g = np.zeros(n, dtype=bool) + mask_g[:150] = True + mask_gp = np.zeros(n, dtype=bool) + mask_gp[150:] = True + import warnings as _w + + with _w.catch_warnings(record=True) as caught: + _w.simplefilter("always") + ratio = estimate_propensity_ratio_sieve(X, mask_g, mask_gp, k_max=3) + assert np.all(np.isfinite(ratio)) + partial_skip_msgs = [ + str(w.message) for w in caught if "skipped K=" in str(w.message) + ] + assert partial_skip_msgs == [], ( + f"Unexpected partial-skip warning on clean data: {partial_skip_msgs}" + ) diff --git a/tests/test_staggered.py b/tests/test_staggered.py index 99770751..f7bedc3e 100644 --- a/tests/test_staggered.py +++ b/tests/test_staggered.py @@ -4157,3 +4157,77 @@ def test_skip_warning_survey_zero_mass(self): ) skip_warnings = [x for x in w if "could not be estimated" in str(x.message)] assert len(skip_warnings) > 0, "Expected skip warning for zero-mass survey cells" + + +# --------------------------------------------------------------------------- +# Silent-failure audit PR #9 follow-up: the CS analytical SE path calls +# `_safe_inv()` in ~13 places (PS Hessian, OR bread, etc.). Previously the +# LinAlgError → lstsq fallback was silent — a rank-deficient bread produced +# degraded SEs with no user-visible signal. Now fit() emits ONE aggregate +# warning tracking all fallbacks. +# --------------------------------------------------------------------------- + + +class TestCallawaySantAnnaSafeInvFallback: + def test_collinear_covariates_emit_safe_inv_warning(self): + """Perfectly collinear covariates should trigger the aggregate + `_safe_inv` lstsq-fallback warning across analytical SE paths.""" + data = generate_staggered_data( + n_units=150, n_periods=6, n_cohorts=3, seed=55 + ) + rng = np.random.default_rng(0) + # Add a covariate and a redundant (collinear) copy — forces rank- + # deficient X'WX in the OR bread and the PS Hessian within at + # least one (g, t) cell. + data["x1"] = rng.normal(0, 1, len(data)) + data["x2"] = 2.0 * data["x1"] + cs = CallawaySantAnna( + estimation_method="dr", + rank_deficient_action="silent", # suppress upstream solve_ols noise + ) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + cs.fit( + data, + outcome="outcome", + unit="unit", + time="time", + first_treat="first_treat", + covariates=["x1", "x2"], + ) + fallback_warnings = [ + w for w in caught + if "Rank-deficient matrix encountered" in str(w.message) + and "analytical SE paths" in str(w.message) + ] + assert len(fallback_warnings) == 1, ( + f"Expected exactly one aggregate _safe_inv fallback warning; " + f"got {len(fallback_warnings)}: " + f"{[str(w.message) for w in fallback_warnings]}" + ) + + def test_well_conditioned_no_safe_inv_warning(self): + """Clean data should NOT trigger the aggregate warning — + regression-safety for the happy path.""" + data = generate_staggered_data( + n_units=200, n_periods=6, n_cohorts=3, seed=42 + ) + cs = CallawaySantAnna(estimation_method="dr") + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + cs.fit( + data, + outcome="outcome", + unit="unit", + time="time", + first_treat="first_treat", + ) + fallback_warnings = [ + w for w in caught + if "Rank-deficient matrix encountered" in str(w.message) + and "analytical SE paths" in str(w.message) + ] + assert fallback_warnings == [], ( + f"Unexpected _safe_inv fallback warning on clean data: " + f"{[str(w.message) for w in fallback_warnings]}" + ) diff --git a/tests/test_staggered_triple_diff.py b/tests/test_staggered_triple_diff.py index a47d1758..197c5803 100644 --- a/tests/test_staggered_triple_diff.py +++ b/tests/test_staggered_triple_diff.py @@ -534,3 +534,146 @@ def test_collinear_covariates_cached_ps_finite(self): for (g, t), eff in res.group_time_effects.items(): assert np.isfinite(eff["effect"]), f"Non-finite ATT at (g={g},t={t})" assert np.isfinite(eff["se"]), f"Non-finite SE at (g={g},t={t})" + + +# --------------------------------------------------------------------------- +# Silent-failure audit PR #9: finding #17 — OR influence-function solve +# fell back to np.linalg.lstsq silently when X'WX was rank deficient. Now +# tracked at _compute_did_panel() and aggregated into one fit-level warning. +# --------------------------------------------------------------------------- + + +class TestStaggeredTripleDiffORSolveFallback: + def test_collinear_covariates_emit_lstsq_fallback_warning(self): + """Perfectly collinear covariates should trigger the aggregate OR + lstsq-fallback warning. Previously this path was silent.""" + data = generate_staggered_ddd_data( + n_units=200, + treatment_effect=3.0, + add_covariates=True, + seed=55, + ) + # x3 ≡ 2·x1 makes covX = [intercept, x1, x2, x3] rank-deficient + # per pair, so XpX in _compute_did_panel() hits LinAlgError. + data["x3"] = 2.0 * data["x1"] + est = StaggeredTripleDifference( + estimation_method="dr", + rank_deficient_action="silent", # silence upstream rank-def noise + ) + import warnings as _w + + with _w.catch_warnings(record=True) as caught: + _w.simplefilter("always") + res = est.fit( + data, + "outcome", + "unit", + "period", + "first_treat", + "eligibility", + covariates=["x1", "x2", "x3"], + ) + or_warnings = [ + w for w in caught + if "outcome-regression influence-function step" in str(w.message) + ] + assert len(or_warnings) == 1, ( + f"Expected exactly one aggregate OR lstsq-fallback warning, " + f"got {len(or_warnings)}." + ) + msg = str(or_warnings[0].message) + assert "(g, g_c, t) pair(s)" in msg + assert "np.linalg.lstsq" in msg + # Point estimates should still be finite (lstsq fallback succeeded). + assert np.isfinite(res.overall_att) + + def test_collinear_covariates_emit_ps_hessian_warning(self): + """Collinear propensity-score covariates should trigger the aggregate + PS-Hessian lstsq-fallback warning under IPW/DR inference. Previously + this path was silent (sibling of the OR-side finding; surfaced by + PR #334 CI review).""" + data = generate_staggered_ddd_data( + n_units=200, + treatment_effect=3.0, + add_covariates=True, + seed=55, + ) + data["x3"] = 2.0 * data["x1"] + est = StaggeredTripleDifference( + estimation_method="ipw", # exercises PS path, skips OR projection + rank_deficient_action="silent", + ) + import warnings as _w + + with _w.catch_warnings(record=True) as caught: + _w.simplefilter("always") + est.fit( + data, + "outcome", + "unit", + "period", + "first_treat", + "eligibility", + covariates=["x1", "x2", "x3"], + ) + ps_warnings = [ + w for w in caught + if "propensity-score Hessian" in str(w.message) + ] + assert len(ps_warnings) == 1, ( + f"Expected exactly one aggregate PS-Hessian lstsq-fallback " + f"warning under IPW, got {len(ps_warnings)}: " + f"{[str(w.message) for w in ps_warnings]}" + ) + msg = str(ps_warnings[0].message) + assert "(g, g_c, t) pair(s)" in msg + assert "np.linalg.lstsq" in msg + assert "IPW/DR" in msg + + def test_well_conditioned_covariates_emit_no_lstsq_warning(self): + """Clean, well-conditioned covariates should NOT trigger either + OR or PS-Hessian aggregate warning — regression-safety for the + happy path.""" + data = generate_staggered_ddd_data( + n_units=300, + treatment_effect=3.0, + add_covariates=True, + seed=42, + ) + est = StaggeredTripleDifference(estimation_method="dr") + import warnings as _w + + with _w.catch_warnings(record=True) as caught: + _w.simplefilter("always") + est.fit( + data, + "outcome", + "unit", + "period", + "first_treat", + "eligibility", + covariates=["x1", "x2"], + ) + lstsq_warnings = [ + w for w in caught if "Rank-deficient X'WX" in str(w.message) + ] + assert lstsq_warnings == [], ( + f"Unexpected lstsq-fallback warning on clean covariates: " + f"{[str(w.message) for w in lstsq_warnings]}" + ) + + def test_no_covariates_no_warning(self, simple_data): + """Without covariates both OR and PS paths are skipped, so no + aggregate warning should be emitted.""" + est = StaggeredTripleDifference(estimation_method="reg") + import warnings as _w + + with _w.catch_warnings(record=True) as caught: + _w.simplefilter("always") + est.fit( + simple_data, "outcome", "unit", "period", "first_treat", "eligibility" + ) + lstsq_warnings = [ + w for w in caught if "Rank-deficient X'WX" in str(w.message) + ] + assert lstsq_warnings == [] diff --git a/tests/test_survey.py b/tests/test_survey.py index f8a51fa0..56736057 100644 --- a/tests/test_survey.py +++ b/tests/test_survey.py @@ -3336,3 +3336,77 @@ def test_item7_aweight_normalization_warning(self): sd = SurveyDesign(weights="w", weight_type="aweight") with pytest.warns(UserWarning, match="aweight weights normalized"): sd.resolve(df) + + +# --------------------------------------------------------------------------- +# Silent-failure audit PR #9: finding #19 — compute_survey_vcov called +# np.linalg.solve(XtWX, ...) with no precondition check. A near-singular +# X'WX (zero-weight strata dominating, near-collinear X) solves without +# raising but returns numerically unstable variance. Now warns when +# cond(X'WX) exceeds 1/sqrt(eps). +# --------------------------------------------------------------------------- + + +class TestSurveyVcovIllConditionedWarning: + def test_ill_conditioned_X_warns(self): + """Near-collinear design matrix triggers the cond-number warning.""" + n = 60 + rng = np.random.default_rng(11) + x1 = rng.normal(0, 1, n) + # x2 = x1 + tiny noise → near-collinear after weighting + x2 = x1 + rng.normal(0, 1e-9, n) + X = np.column_stack([np.ones(n), x1, x2]) + y = 1.0 + 0.5 * x1 + rng.normal(0, 0.3, n) + weights = np.ones(n) + + # Use WLS solve to get residuals — but we need residuals that give + # a non-zero meat so the function reaches the vcov solve. + coef = np.linalg.lstsq(X, y, rcond=None)[0] + resid = y - X @ coef + + resolved = ResolvedSurveyDesign( + weights=weights, + weight_type="pweight", + strata=None, + psu=np.arange(n), + fpc=None, + n_strata=0, + n_psu=n, + lonely_psu="remove", + ) + with pytest.warns(UserWarning, match="X'WX is ill-conditioned"): + vcov = compute_survey_vcov(X, resid, resolved) + # Warning does not suppress output — the caller still gets a matrix. + assert vcov.shape == (3, 3) + + def test_well_conditioned_X_no_warning(self): + """Clean design matrix should NOT trigger the warning — happy path.""" + n = 80 + rng = np.random.default_rng(22) + X = np.column_stack([np.ones(n), rng.normal(0, 1, n), rng.normal(0, 1, n)]) + y = 1.0 + 0.5 * X[:, 1] - 0.3 * X[:, 2] + rng.normal(0, 0.3, n) + weights = np.ones(n) + + coef, resid, _ = solve_ols(X, y, weights=weights, weight_type="pweight") + resolved = ResolvedSurveyDesign( + weights=weights, + weight_type="pweight", + strata=None, + psu=np.arange(n), + fpc=None, + n_strata=0, + n_psu=n, + lonely_psu="remove", + ) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + vcov = compute_survey_vcov(X, resid, resolved) + ill_cond_warnings = [ + w for w in caught if "X'WX is ill-conditioned" in str(w.message) + ] + assert ill_cond_warnings == [], ( + f"Unexpected cond-number warning on clean data: " + f"{[str(w.message) for w in ill_cond_warnings]}" + ) + assert vcov.shape == (3, 3) + assert np.all(np.isfinite(vcov)) diff --git a/tests/test_two_stage.py b/tests/test_two_stage.py index cf5a47b1..56f0d44b 100644 --- a/tests/test_two_stage.py +++ b/tests/test_two_stage.py @@ -1435,3 +1435,114 @@ def test_iterative_demean_no_warning_on_convergence(self): warnings.simplefilter("always") TwoStageDiD._iterative_demean(vals, units, times, idx) assert not any("did not converge" in str(x.message) for x in w) + + +# --------------------------------------------------------------------------- +# Silent-failure audit PR #9: sibling of finding #17 — both the analytical +# TSL variance (`two_stage.py`) and the multiplier-bootstrap bread +# (`two_stage_bootstrap.py`) previously fell back to `np.linalg.lstsq` +# silently when the Stage-2 `X'_2 W X_2` matrix was singular. They now +# emit a `UserWarning` with the same message shape as STD #17. +# --------------------------------------------------------------------------- + + +class TestTwoStageStage2BreadWarning: + """Sibling of STD finding #17: the TwoStage Stage-2 bread fallback + (`X'_2 W X_2` singular) was silent. X_2 is built from treatment/ + event-time/group indicators — not user covariates — so we force the + LinAlgError via patching `np.linalg.solve` rather than data crafting, + per the PR #334 CI review guidance.""" + + def test_analytical_bread_lstsq_fallback_warns(self): + """When np.linalg.solve on the Stage-2 bread raises, the analytical + TSL path must warn and still return a finite variance via lstsq.""" + from unittest.mock import patch + + import diff_diff.two_stage as ts_mod + + data = generate_test_data(n_units=80, n_periods=6, seed=77) + est = TwoStageDiD() + + real_solve = np.linalg.solve + + def raise_for_square_eye(a, b): + # Identify the Stage-2 bread call by shape: b is np.eye(k) + # (square identity). All other solve calls in the codepath + # have different b shapes. + if ( + isinstance(b, np.ndarray) + and b.ndim == 2 + and b.shape[0] == b.shape[1] + and np.allclose(b, np.eye(b.shape[0])) + ): + raise np.linalg.LinAlgError("forced by test") + return real_solve(a, b) + + with patch.object(ts_mod.np.linalg, "solve", side_effect=raise_for_square_eye): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + result = est.fit( + data, + outcome="outcome", + unit="unit", + time="time", + first_treat="first_treat", + ) + fallback = [ + w for w in caught + if "TwoStageDiD TSL variance" in str(w.message) + ] + assert len(fallback) >= 1, ( + "Expected TSL-variance bread fallback warning when np.linalg.solve " + f"was forced to raise; got warnings: " + f"{[str(w.message) for w in caught]}" + ) + msg = str(fallback[0].message) + assert "np.linalg.lstsq" in msg + assert "X_2'WX_2" in msg + # lstsq fallback must still produce a finite SE. + assert np.isfinite(result.overall_se) + + def test_bootstrap_bread_lstsq_fallback_warns(self): + """Same contract for the multiplier-bootstrap bread path.""" + from unittest.mock import patch + + import diff_diff.two_stage_bootstrap as tsb_mod + + data = generate_test_data(n_units=80, n_periods=6, seed=77) + est = TwoStageDiD(n_bootstrap=10, seed=0) + + real_solve = np.linalg.solve + + def raise_for_square_eye(a, b): + if ( + isinstance(b, np.ndarray) + and b.ndim == 2 + and b.shape[0] == b.shape[1] + and np.allclose(b, np.eye(b.shape[0])) + ): + raise np.linalg.LinAlgError("forced by test") + return real_solve(a, b) + + with patch.object(tsb_mod.np.linalg, "solve", side_effect=raise_for_square_eye): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + est.fit( + data, + outcome="outcome", + unit="unit", + time="time", + first_treat="first_treat", + ) + fallback = [ + w for w in caught + if "TwoStageDiD multiplier bootstrap bread" in str(w.message) + ] + assert len(fallback) >= 1, ( + "Expected bootstrap-bread fallback warning when np.linalg.solve " + f"was forced to raise; got warnings: " + f"{[str(w.message) for w in caught]}" + ) + msg = str(fallback[0].message) + assert "np.linalg.lstsq" in msg + assert "X_2'WX_2" in msg