From a5f38e18b78ccbf17995d0a9853e6737bcdd09f8 Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 19 Apr 2026 08:08:55 -0400 Subject: [PATCH 1/5] Surface row-count for axis-E silent coercion / drop sites Three long-standing axis-E (silent data sanitization) sites now emit a UserWarning with the affected row count before the coercion, so users can no longer be silently shifted between the treated and control sides of an estimator or have rows vanish from a diagnostic pass without a signal. - WooldridgeDiD: NaN cohort values were filled to 0 (never-treated) both in `_filter_sample` and in `fit()`. Both now warn with the NaN row count before the fillna (finding #24). - ContinuousDiD: `first_treat=inf` was replaced with 0 silently. `fit()` now counts inf rows and warns before the replace, before any downstream drop-zero-dose / negative-dose validation (finding #26). - `_compute_outcome_changes` (the `check_parallel_trends` diff helper) dropped NaN first-differences without reporting the count. It now distinguishes the expected first-period-per-unit drops from excess drops caused by gaps / NaN outcomes and warns with the breakdown when excess drops are detected (finding #27). Finding #25 (TROP D-matrix coercion) was verified during scoping to already be resolved in `trop_local.py:60-66` via `n_missing_structural` + returned `missing_mask`; no code change required. REGISTRY updated under both WooldridgeDiD and ContinuousDiD to document the new warning contract. Covered by audit axis E (data sanitization). Findings #24, #26, #27 from docs/audits/silent-failures-findings.md. Co-Authored-By: Claude Opus 4.7 (1M context) --- diff_diff/continuous_did.py | 15 ++++++++++- diff_diff/utils.py | 18 ++++++++++++- diff_diff/wooldridge.py | 27 ++++++++++++++++++- docs/methodology/REGISTRY.md | 2 ++ tests/test_continuous_did.py | 34 ++++++++++++++++++++---- tests/test_utils.py | 51 ++++++++++++++++++++++++++++++++++++ tests/test_wooldridge.py | 47 +++++++++++++++++++++++++++++++++ 7 files changed, 186 insertions(+), 8 deletions(-) diff --git a/diff_diff/continuous_did.py b/diff_diff/continuous_did.py index 019796c3..23346840 100644 --- a/diff_diff/continuous_did.py +++ b/diff_diff/continuous_did.py @@ -227,7 +227,20 @@ def fit( f"Dose must be time-invariant. Units with varying dose: {bad_units[:5]}" ) - # Normalize first_treat: inf → 0 + # Normalize first_treat: inf → 0 (R-style never-treated encoding). Count + # rows recategorized so users can see how many units just crossed from + # "treated at some point" to "never treated" — silent recategorization + # here would shift the control composition (axis-E silent coercion). + inf_mask = np.isinf(df[first_treat].values) + n_inf_first_treat = int(inf_mask.sum()) + if n_inf_first_treat > 0: + warnings.warn( + f"{n_inf_first_treat} row(s) have inf in '{first_treat}'; " + f"treating the corresponding units as never-treated. Pass an " + f"explicit never-treated marker (0) if this is not intended.", + UserWarning, + stacklevel=2, + ) df[first_treat] = df[first_treat].replace([np.inf, float("inf")], 0) # Drop units with positive first_treat but zero dose (R convention) diff --git a/diff_diff/utils.py b/diff_diff/utils.py index 704a2138..00dc7b0e 100644 --- a/diff_diff/utils.py +++ b/diff_diff/utils.py @@ -925,7 +925,23 @@ def _compute_outcome_changes( data_sorted = data.sort_values([unit, time]) data_sorted["_outcome_change"] = data_sorted.groupby(unit)[outcome].diff() - # Remove NaN from first period of each unit + # Remove NaN from first period of each unit. The first period per unit + # has no prior observation to diff against, so n_units drops are + # expected. Anything beyond that is a silent side-effect of gaps or + # NaN outcomes — surface the excess via warning (axis-E drop counter). + n_units_observed = int(data_sorted[unit].nunique()) + n_dropped = int(data_sorted["_outcome_change"].isna().sum()) + n_unexpected_drops = max(0, n_dropped - n_units_observed) + if n_unexpected_drops > 0: + warnings.warn( + f"check_parallel_trends dropped {n_dropped} row(s) with NaN " + f"first-differences; {n_units_observed} are the expected " + f"first-period-per-unit drops, and {n_unexpected_drops} came " + f"from gaps or NaN outcomes. Parallel-trend statistics are " + f"computed on the remaining rows.", + UserWarning, + stacklevel=3, + ) changes_data = data_sorted.dropna(subset=["_outcome_change"]) treated_changes = changes_data[changes_data[treatment_group] == 1]["_outcome_change"].values diff --git a/diff_diff/wooldridge.py b/diff_diff/wooldridge.py index 4bc2c0ce..5ade4cce 100644 --- a/diff_diff/wooldridge.py +++ b/diff_diff/wooldridge.py @@ -13,6 +13,7 @@ from __future__ import annotations +import warnings from typing import Any, Dict, List, Optional, Tuple import numpy as np @@ -128,7 +129,19 @@ def _filter_sample( (see _build_interaction_matrix). """ df = data.copy() - # Normalise never-treated: fill NaN cohort with 0 + # Normalise never-treated: fill NaN cohort with 0. Report the row count so + # callers can see how many rows were recategorized as never-treated — a + # silent recategorization here would quietly move units between the + # treated and control sides of the estimator (axis-E silent coercion). + n_nan_cohort = int(df[cohort].isna().sum()) + if n_nan_cohort > 0: + warnings.warn( + f"{n_nan_cohort} row(s) have NaN cohort values; filling with 0 " + f"and treating the corresponding units as never-treated. Pass " + f"an explicit never-treated marker (0) if this is not intended.", + UserWarning, + stacklevel=3, + ) df[cohort] = df[cohort].fillna(0) treated_mask = df[cohort] > 0 @@ -396,6 +409,18 @@ def fit( ``NotImplementedError``. """ df = data.copy() + # See `_filter_sample` for the analogous warning; fit() does its own + # fillna earlier in the pipeline so we warn here too to cover the + # direct-fit path. + n_nan_cohort = int(df[cohort].isna().sum()) + if n_nan_cohort > 0: + warnings.warn( + f"{n_nan_cohort} row(s) have NaN cohort values; filling with 0 " + f"and treating the corresponding units as never-treated. Pass " + f"an explicit never-treated marker (0) if this is not intended.", + UserWarning, + stacklevel=2, + ) df[cohort] = df[cohort].fillna(0) # 0a. Validate cohort is time-invariant within unit diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index bdcfef34..9729ce1b 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -720,6 +720,7 @@ See `docs/methodology/continuous-did.md` Section 4 for full details. - [ ] Lowest-dose-as-control (Remark 3.1) - [x] Survey design support (Phase 3): weighted B-spline OLS, TSL on influence functions; bootstrap+survey supported (Phase 6) - **Note:** ContinuousDiD bootstrap with survey weights supported (Phase 6) via PSU-level multiplier weights +- **Note:** The R-style convention of coding never-treated units as `first_treat=inf` is still accepted and normalized to `first_treat=0` internally, but the estimator now emits a `UserWarning` reporting the row count so the silent recategorization is surfaced (axis-E silent coercion under the Phase 2 audit). Pass `0` directly to avoid the warning. --- @@ -1303,6 +1304,7 @@ The saturated ETWFE regression includes: The interaction coefficient `δ_{g,t}` identifies `ATT(g, t)` under parallel trends. - **Note:** OLS path uses iterative alternating-projection within-transformation (uniform weights) for exact FE absorption on both balanced and unbalanced panels. One-pass demeaning (`y - ȳ_i - ȳ_t + ȳ`) is only exact for balanced panels. - **Note:** The weighted within-transformation (`utils.within_transform` with `weights`) is invoked on every WooldridgeDiD fit (survey weights when provided, `np.ones` otherwise) and emits a `UserWarning` on non-convergence per the shared convention documented under *Absorbed Fixed Effects with Survey Weights*. +- **Note:** NaN values in the `cohort` column are filled with 0 (treated as never-treated), both in `_filter_sample` and in `fit()`. This recategorization now emits a `UserWarning` reporting the affected row count so it is no longer silent (axis-E silent coercion under the Phase 2 audit). Pass `0` directly for never-treated units to avoid the warning. *Nonlinear extensions (Wooldridge 2023):* diff --git a/tests/test_continuous_did.py b/tests/test_continuous_did.py index c519c3bb..41daa3cf 100644 --- a/tests/test_continuous_did.py +++ b/tests/test_continuous_did.py @@ -641,16 +641,40 @@ def test_few_treated_units(self): assert isinstance(results, ContinuousDiDResults) def test_inf_first_treat_normalization(self): - """first_treat=inf should be treated as never-treated.""" + """first_treat=inf should be treated as never-treated, and the caller + must receive a UserWarning reporting the affected row count so the + recategorization is not silent (axis-E counter).""" data = generate_continuous_did_data(n_units=50, n_periods=3, seed=42) data["first_treat"] = data["first_treat"].astype(float) - data.loc[data["first_treat"] == 0, "first_treat"] = np.inf + inf_mask = data["first_treat"] == 0 + n_inf_rows = int(inf_mask.sum()) + data.loc[inf_mask, "first_treat"] = np.inf est = ContinuousDiD() - results = est.fit( - data, "outcome", "unit", "period", "first_treat", "dose" - ) + + with pytest.warns( + UserWarning, + match=rf"{n_inf_rows} row\(s\) have inf in 'first_treat'", + ): + results = est.fit( + data, "outcome", "unit", "period", "first_treat", "dose" + ) assert results.n_control_units > 0 + def test_no_inf_first_treat_no_warning(self): + """No inf rows in first_treat — no recategorization warning.""" + import warnings + + data = generate_continuous_did_data(n_units=50, n_periods=3, seed=42) + data["first_treat"] = data["first_treat"].astype(float) + est = ContinuousDiD() + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + est.fit(data, "outcome", "unit", "period", "first_treat", "dose") + + inf_warnings = [x for x in w if "inf in 'first_treat'" in str(x.message)] + assert inf_warnings == [] + def test_custom_dvals(self): data = generate_continuous_did_data(n_units=100, n_periods=3, seed=42) custom_grid = np.array([1.0, 2.0, 3.0]) diff --git a/tests/test_utils.py b/tests/test_utils.py index 61a0ed80..8cc4232c 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -827,6 +827,57 @@ def test_changes_reflect_trend(self): np.testing.assert_array_almost_equal(treated_changes, 2.0, decimal=5) np.testing.assert_array_almost_equal(control_changes, 2.0, decimal=5) + def test_silent_on_balanced_panel(self): + """Balanced panel: only first-period-per-unit drops, no warning.""" + import warnings + + rng = np.random.default_rng(0) + rows = [] + for unit in range(10): + treated = int(unit >= 5) + for t in range(1, 5): + rows.append({ + "unit": unit, "period": t, + "treated": treated, "outcome": rng.normal(), + }) + df = pd.DataFrame(rows) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + _compute_outcome_changes( + df, outcome="outcome", time="period", + treatment_group="treated", unit="unit", + ) + + drop_warnings = [ + x for x in w if "check_parallel_trends dropped" in str(x.message) + ] + assert drop_warnings == [] + + def test_warns_on_nan_outcomes_with_excess_drop_count(self): + """Extra NaN-outcome rows beyond first-period drops must surface via + a UserWarning reporting the excess count (axis-E drop counter).""" + rng = np.random.default_rng(0) + rows = [] + for unit in range(10): + treated = int(unit >= 5) + for t in range(1, 5): + rows.append({ + "unit": unit, "period": t, + "treated": treated, "outcome": rng.normal(), + }) + df = pd.DataFrame(rows) + df.loc[[5, 12, 22], "outcome"] = np.nan + + with pytest.warns( + UserWarning, + match=r"check_parallel_trends dropped \d+ row\(s\).*first-period-per-unit", + ): + _compute_outcome_changes( + df, outcome="outcome", time="period", + treatment_group="treated", unit="unit", + ) + # ============================================================================= # Tests for check_parallel_trends_robust diff --git a/tests/test_wooldridge.py b/tests/test_wooldridge.py index 19027fce..2cedbf56 100644 --- a/tests/test_wooldridge.py +++ b/tests/test_wooldridge.py @@ -1593,3 +1593,50 @@ def test_survey_aggregate_and_summary(self, survey_panel): s = r.summary() assert "Survey Design" in s assert "pweight" in s + + +class TestCohortNaNWarning: + """Axis-E: silent recategorization of NaN cohort rows as never-treated.""" + + @staticmethod + def _make_panel_with_nan_cohort(): + rows = [] + for unit in range(10): + cohort_val = np.nan if unit < 2 else 0.0 + for t in range(1, 5): + rows.append({ + "unit": unit, "time": t, "cohort": cohort_val, + "y": unit + t + np.random.default_rng(unit).normal(0, 0.1), + }) + return pd.DataFrame(rows) + + def test_fit_warns_on_nan_cohort_with_count(self): + df = self._make_panel_with_nan_cohort() + est = WooldridgeDiD(method="ols") + with pytest.warns(UserWarning, match=r"8 row\(s\) have NaN cohort values"): + try: + est.fit(df, outcome="y", unit="unit", time="time", cohort="cohort") + except Exception: + pass + + def test_fit_silent_on_clean_cohort(self): + import warnings + df = self._make_panel_with_nan_cohort() + df["cohort"] = df["cohort"].fillna(0) + est = WooldridgeDiD(method="ols") + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + try: + est.fit(df, outcome="y", unit="unit", time="time", cohort="cohort") + except Exception: + pass + nan_warnings = [x for x in w if "NaN cohort values" in str(x.message)] + assert nan_warnings == [] + + def test_select_sample_helper_warns(self): + df = self._make_panel_with_nan_cohort() + with pytest.warns(UserWarning, match=r"8 row\(s\) have NaN cohort values"): + _filter_sample( + df, unit="unit", time="time", cohort="cohort", + control_group="never_treated", anticipation=0, + ) From d879c88c159d1f402020075ce726db1f5f5eac31 Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 19 Apr 2026 08:11:05 -0400 Subject: [PATCH 2/5] Address local AI review P2/P3 findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - P2 (wooldridge): Extract shared `_warn_and_fill_nan_cohort(df, cohort, stacklevel)` helper used by both `_filter_sample` and `fit()`. Removes the copy-paste warning block that was flagged as a future drift risk. - P2 (tests): Add `test_inf_first_treat_warning_counts_rows_not_units` on a 4-unit x 3-period panel. 2 units carry inf across all 3 periods (6 inf rows, 2 inf units) — the warning must report 6, not 2, because `.replace(inf, 0)` is row-level. - P3 (utils wording): The `_compute_outcome_changes` excess-drop warning said "gaps or NaN outcomes" but the code actually counts all NaN first-differences. Rephrased to "additional NaN first-differences (e.g. NaN outcomes or unit-period gaps upstream)" so the message doesn't over-claim what the helper can detect. Co-Authored-By: Claude Opus 4.7 (1M context) --- diff_diff/utils.py | 5 ++-- diff_diff/wooldridge.py | 49 ++++++++++++++++-------------------- tests/test_continuous_did.py | 30 ++++++++++++++++++++++ tests/test_utils.py | 2 +- 4 files changed, 56 insertions(+), 30 deletions(-) diff --git a/diff_diff/utils.py b/diff_diff/utils.py index 00dc7b0e..caf44e49 100644 --- a/diff_diff/utils.py +++ b/diff_diff/utils.py @@ -936,8 +936,9 @@ def _compute_outcome_changes( warnings.warn( f"check_parallel_trends dropped {n_dropped} row(s) with NaN " f"first-differences; {n_units_observed} are the expected " - f"first-period-per-unit drops, and {n_unexpected_drops} came " - f"from gaps or NaN outcomes. Parallel-trend statistics are " + f"first-period-per-unit drops, and {n_unexpected_drops} are " + f"additional NaN first-differences (e.g. NaN outcomes or " + f"unit-period gaps upstream). Parallel-trend statistics are " f"computed on the remaining rows.", UserWarning, stacklevel=3, diff --git a/diff_diff/wooldridge.py b/diff_diff/wooldridge.py index 5ade4cce..fb63a29a 100644 --- a/diff_diff/wooldridge.py +++ b/diff_diff/wooldridge.py @@ -113,6 +113,26 @@ def _resolve_survey_for_wooldridge(survey_design, sample, cluster_ids, cluster_n return resolved, survey_weights, survey_weight_type, survey_metadata, df_inf +def _warn_and_fill_nan_cohort(df: pd.DataFrame, cohort: str, stacklevel: int) -> pd.DataFrame: + """Fill NaN cohort with 0 (never-treated) and warn with the row count. + + Used by both `_filter_sample` (pre-fit) and `WooldridgeDiD.fit()` so the + silent recategorization is surfaced on whichever entry path the caller + hits first. See REGISTRY.md §WooldridgeDiD (axis-E silent coercion). + """ + n_nan_cohort = int(df[cohort].isna().sum()) + if n_nan_cohort > 0: + warnings.warn( + f"{n_nan_cohort} row(s) have NaN cohort values; filling with 0 " + f"and treating the corresponding units as never-treated. Pass " + f"an explicit never-treated marker (0) if this is not intended.", + UserWarning, + stacklevel=stacklevel, + ) + df[cohort] = df[cohort].fillna(0) + return df + + def _filter_sample( data: pd.DataFrame, unit: str, @@ -129,20 +149,7 @@ def _filter_sample( (see _build_interaction_matrix). """ df = data.copy() - # Normalise never-treated: fill NaN cohort with 0. Report the row count so - # callers can see how many rows were recategorized as never-treated — a - # silent recategorization here would quietly move units between the - # treated and control sides of the estimator (axis-E silent coercion). - n_nan_cohort = int(df[cohort].isna().sum()) - if n_nan_cohort > 0: - warnings.warn( - f"{n_nan_cohort} row(s) have NaN cohort values; filling with 0 " - f"and treating the corresponding units as never-treated. Pass " - f"an explicit never-treated marker (0) if this is not intended.", - UserWarning, - stacklevel=3, - ) - df[cohort] = df[cohort].fillna(0) + df = _warn_and_fill_nan_cohort(df, cohort, stacklevel=3) treated_mask = df[cohort] > 0 @@ -409,19 +416,7 @@ def fit( ``NotImplementedError``. """ df = data.copy() - # See `_filter_sample` for the analogous warning; fit() does its own - # fillna earlier in the pipeline so we warn here too to cover the - # direct-fit path. - n_nan_cohort = int(df[cohort].isna().sum()) - if n_nan_cohort > 0: - warnings.warn( - f"{n_nan_cohort} row(s) have NaN cohort values; filling with 0 " - f"and treating the corresponding units as never-treated. Pass " - f"an explicit never-treated marker (0) if this is not intended.", - UserWarning, - stacklevel=2, - ) - df[cohort] = df[cohort].fillna(0) + df = _warn_and_fill_nan_cohort(df, cohort, stacklevel=2) # 0a. Validate cohort is time-invariant within unit cohort_per_unit = df.groupby(unit)[cohort].nunique() diff --git a/tests/test_continuous_did.py b/tests/test_continuous_did.py index 41daa3cf..c880013c 100644 --- a/tests/test_continuous_did.py +++ b/tests/test_continuous_did.py @@ -675,6 +675,36 @@ def test_no_inf_first_treat_no_warning(self): inf_warnings = [x for x in w if "inf in 'first_treat'" in str(x.message)] assert inf_warnings == [] + def test_inf_first_treat_warning_counts_rows_not_units(self): + """The warning counts affected rows (not units). On a panel with + multiple periods per unit, each inf row must count separately so the + message surface matches the per-row semantics of `.replace(inf, 0)`.""" + # Build a 4-unit, 3-period panel (12 rows). 2 units have inf across + # all 3 periods → 6 inf rows, 2 units, so row-count != unit-count. + rows = [] + for unit in range(4): + ft = np.inf if unit < 2 else 2.0 + dose = 0.0 if unit < 2 else 1.0 + for t in range(1, 4): + rows.append({ + "unit": unit, "period": t, "outcome": float(unit + t), + "first_treat": ft, "dose": dose, + }) + data = pd.DataFrame(rows) + est = ContinuousDiD() + + with pytest.warns( + UserWarning, + match=r"6 row\(s\) have inf in 'first_treat'", + ): + try: + est.fit(data, "outcome", "unit", "period", "first_treat", "dose") + except Exception: + # Downstream validation may reject this minimal panel (too few + # treated for OLS). We only care that the inf-row warning fires + # with the correct row count. + pass + def test_custom_dvals(self): data = generate_continuous_did_data(n_units=100, n_periods=3, seed=42) custom_grid = np.array([1.0, 2.0, 3.0]) diff --git a/tests/test_utils.py b/tests/test_utils.py index 8cc4232c..1426909d 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -871,7 +871,7 @@ def test_warns_on_nan_outcomes_with_excess_drop_count(self): with pytest.warns( UserWarning, - match=r"check_parallel_trends dropped \d+ row\(s\).*first-period-per-unit", + match=r"check_parallel_trends dropped \d+ row\(s\).*additional NaN first-differences", ): _compute_outcome_changes( df, outcome="outcome", time="period", From e5755c266492ddbe7147a4d1f727af6e844fd6f5 Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 19 Apr 2026 08:28:41 -0400 Subject: [PATCH 3/5] Address CI AI review: close remaining axis-E sites + warning accuracy Three findings from the CI AI review on PR #331: P1 (newly identified): `ContinuousDiD.fit()` still silently rewrote `dose` to 0.0 for rows with `first_treat==0` but nonzero dose, in the same preprocessing block the PR had otherwise cleaned up. Counting those rows and emitting a `UserWarning` with the row count before the zeroing closes the axis-E slice for this estimator. REGISTRY updated with a matching **Note:** and two regression tests added (silent on clean data, warns with correct row count on polluted data). P2 (warning wording): The axis-E excess-drop warning in `_compute_outcome_changes` hardcoded "check_parallel_trends" but the function is called from `check_parallel_trends_robust` and `equivalence_test_trends`, not from the simple `check_parallel_trends`. Add a `caller_label` kwarg and thread the correct name from each public entry point; regression tests cover both callers surfacing the warning with their own name. P2 (inf asymmetry): The inf-first-treat counter used `np.isinf`, which counts both +inf and -inf, but the actual recode on the next line (`.replace([np.inf, float("inf")], 0)`) only touches +inf. A -inf input would have triggered a warning about "treating as never-treated" even though no such recode happened. Switch to `np.isposinf` so the warning count matches the recode set; add a test confirming -inf does not fire the warning. Co-Authored-By: Claude Opus 4.7 (1M context) --- diff_diff/continuous_did.py | 31 ++++++++++---- diff_diff/utils.py | 15 +++++-- docs/methodology/REGISTRY.md | 3 +- tests/test_continuous_did.py | 78 ++++++++++++++++++++++++++++++++++++ tests/test_utils.py | 31 +++++++++++++- 5 files changed, 145 insertions(+), 13 deletions(-) diff --git a/diff_diff/continuous_did.py b/diff_diff/continuous_did.py index 23346840..368b947b 100644 --- a/diff_diff/continuous_did.py +++ b/diff_diff/continuous_did.py @@ -227,11 +227,15 @@ def fit( f"Dose must be time-invariant. Units with varying dose: {bad_units[:5]}" ) - # Normalize first_treat: inf → 0 (R-style never-treated encoding). Count - # rows recategorized so users can see how many units just crossed from - # "treated at some point" to "never treated" — silent recategorization - # here would shift the control composition (axis-E silent coercion). - inf_mask = np.isinf(df[first_treat].values) + # Normalize first_treat: +inf → 0 (R-style never-treated encoding). + # Count rows recategorized so users can see how many units just + # crossed from "treated at some point" to "never treated" — silent + # recategorization here would shift the control composition (axis-E + # silent coercion). Only positive infinity is recoded (to match the + # existing `.replace([np.inf, float("inf")], 0)` semantics on the + # next line); `-inf` is neither counted here nor recoded, so a + # downstream validator will reject it if present. + inf_mask = np.isposinf(df[first_treat].values) n_inf_first_treat = int(inf_mask.sum()) if n_inf_first_treat > 0: warnings.warn( @@ -278,9 +282,22 @@ def fit( stacklevel=2, ) - # Force dose=0 for never-treated units with nonzero dose + # Force dose=0 for never-treated units with nonzero dose. Report the + # affected row count via UserWarning so users can see whether their + # never-treated rows had unintended nonzero doses — silent zeroing + # here would quietly shift part of the control trajectory (axis-E + # silent coercion, paired with the `first_treat=inf -> 0` fix above). never_treated_mask = df[first_treat] == 0 - if (df.loc[never_treated_mask, dose] != 0).any(): + nonzero_dose_rows = never_treated_mask & (df[dose] != 0) + n_nonzero_dose_never_treated = int(nonzero_dose_rows.sum()) + if n_nonzero_dose_never_treated > 0: + warnings.warn( + f"{n_nonzero_dose_never_treated} row(s) have '{first_treat}'=0 " + f"(never-treated) but nonzero '{dose}'; zeroing the dose. Pass " + f"dose=0 for never-treated rows to avoid this coercion.", + UserWarning, + stacklevel=2, + ) df.loc[never_treated_mask, dose] = 0.0 # Verify balanced panel diff --git a/diff_diff/utils.py b/diff_diff/utils.py index caf44e49..47df08e1 100644 --- a/diff_diff/utils.py +++ b/diff_diff/utils.py @@ -821,7 +821,8 @@ def check_parallel_trends_robust( # Compute outcome changes treated_changes, control_changes = _compute_outcome_changes( - pre_data, outcome, time, treatment_group, unit + pre_data, outcome, time, treatment_group, unit, + caller_label="check_parallel_trends_robust", ) if len(treated_changes) < 2 or len(control_changes) < 2: @@ -897,7 +898,12 @@ def check_parallel_trends_robust( def _compute_outcome_changes( - data: pd.DataFrame, outcome: str, time: str, treatment_group: str, unit: Optional[str] = None + data: pd.DataFrame, + outcome: str, + time: str, + treatment_group: str, + unit: Optional[str] = None, + caller_label: str = "parallel-trend diagnostic", ) -> Tuple[np.ndarray, np.ndarray]: """ Compute period-to-period outcome changes for treated and control groups. @@ -934,7 +940,7 @@ def _compute_outcome_changes( n_unexpected_drops = max(0, n_dropped - n_units_observed) if n_unexpected_drops > 0: warnings.warn( - f"check_parallel_trends dropped {n_dropped} row(s) with NaN " + f"{caller_label}: dropped {n_dropped} row(s) with NaN " f"first-differences; {n_units_observed} are the expected " f"first-period-per-unit drops, and {n_unexpected_drops} are " f"additional NaN first-differences (e.g. NaN outcomes or " @@ -1018,7 +1024,8 @@ def equivalence_test_trends( # Compute outcome changes treated_changes, control_changes = _compute_outcome_changes( - pre_data, outcome, time, treatment_group, unit + pre_data, outcome, time, treatment_group, unit, + caller_label="equivalence_test_trends", ) # Need at least 2 observations per group to compute variance diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 9729ce1b..a688ec81 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -720,7 +720,8 @@ See `docs/methodology/continuous-did.md` Section 4 for full details. - [ ] Lowest-dose-as-control (Remark 3.1) - [x] Survey design support (Phase 3): weighted B-spline OLS, TSL on influence functions; bootstrap+survey supported (Phase 6) - **Note:** ContinuousDiD bootstrap with survey weights supported (Phase 6) via PSU-level multiplier weights -- **Note:** The R-style convention of coding never-treated units as `first_treat=inf` is still accepted and normalized to `first_treat=0` internally, but the estimator now emits a `UserWarning` reporting the row count so the silent recategorization is surfaced (axis-E silent coercion under the Phase 2 audit). Pass `0` directly to avoid the warning. +- **Note:** The R-style convention of coding never-treated units as `first_treat=inf` is still accepted and normalized to `first_treat=0` internally, but the estimator now emits a `UserWarning` reporting the row count so the silent recategorization is surfaced (axis-E silent coercion under the Phase 2 audit). Only `+inf` is recoded (matching the R convention); `-inf` passes through untouched and will be rejected by downstream validators. Pass `0` directly to avoid the warning. +- **Note:** Rows where `first_treat=0` (never-treated) carry a nonzero `dose` are silently zeroed for internal consistency (never-treated cells must have `D=0` in the dose response). The estimator now emits a `UserWarning` with the affected row count before the zeroing, so unintended nonzero doses on never-treated rows are no longer absorbed without a signal (axis-E silent coercion). --- diff --git a/tests/test_continuous_did.py b/tests/test_continuous_did.py index c880013c..76e128f6 100644 --- a/tests/test_continuous_did.py +++ b/tests/test_continuous_did.py @@ -675,6 +675,84 @@ def test_no_inf_first_treat_no_warning(self): inf_warnings = [x for x in w if "inf in 'first_treat'" in str(x.message)] assert inf_warnings == [] + def test_nonzero_dose_on_never_treated_warns(self): + """first_treat=0 (never-treated) rows with nonzero dose must now surface + a UserWarning with the affected row count before the zeroing coercion. + Before PR #331's CI-review follow-up this was silent.""" + # 4 units x 3 periods (12 rows). 2 units are never-treated (first_treat=0) + # but carry dose=1.5 on every row — 6 rows should be reported. + rows = [] + for unit in range(4): + if unit < 2: + ft, dose_val = 0.0, 1.5 # never-treated with nonzero dose + else: + ft, dose_val = 2.0, 1.0 # treated + for t in range(1, 4): + rows.append({ + "unit": unit, "period": t, "outcome": float(unit + t), + "first_treat": ft, "dose": dose_val, + }) + data = pd.DataFrame(rows) + est = ContinuousDiD() + + with pytest.warns( + UserWarning, + match=r"6 row\(s\) have 'first_treat'=0 \(never-treated\) but nonzero 'dose'", + ): + try: + est.fit(data, "outcome", "unit", "period", "first_treat", "dose") + except Exception: + # Downstream validation may reject this minimal panel (too few + # treated for OLS); we only care about the dose-coercion warning. + pass + + def test_clean_never_treated_doses_silent(self): + """Never-treated rows with dose=0 must not trigger the coercion warning.""" + import warnings + data = generate_continuous_did_data(n_units=50, n_periods=3, seed=42) + # generate_continuous_did_data already sets dose=0 for never-treated. + est = ContinuousDiD() + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + est.fit(data, "outcome", "unit", "period", "first_treat", "dose") + + coerce_warnings = [ + x for x in w + if "never-treated" in str(x.message) and "nonzero 'dose'" in str(x.message) + ] + assert coerce_warnings == [] + + def test_negative_inf_first_treat_does_not_trigger_recategorization_warning(self): + """-inf first_treat is NOT recoded to 0 by `.replace([inf, float("inf")], 0)`, + so the recategorization warning (which used to count both +inf and -inf + via np.isinf) must not fire for -inf rows.""" + import warnings + rows = [] + for unit in range(4): + # Unit 0 carries -inf (not recoded, so downstream validation should + # see it as-is). Others are untreated with dose=0. + ft = -np.inf if unit == 0 else 0.0 + for t in range(1, 4): + rows.append({ + "unit": unit, "period": t, "outcome": float(unit + t), + "first_treat": ft, "dose": 0.0, + }) + data = pd.DataFrame(rows) + est = ContinuousDiD() + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + try: + est.fit(data, "outcome", "unit", "period", "first_treat", "dose") + except Exception: + pass + + inf_warnings = [x for x in w if "inf in 'first_treat'" in str(x.message)] + assert inf_warnings == [], ( + "-inf must not trigger the +inf recategorization warning" + ) + def test_inf_first_treat_warning_counts_rows_not_units(self): """The warning counts affected rows (not units). On a panel with multiple periods per unit, each inf row must count separately so the diff --git a/tests/test_utils.py b/tests/test_utils.py index 1426909d..72905f7e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -871,13 +871,42 @@ def test_warns_on_nan_outcomes_with_excess_drop_count(self): with pytest.warns( UserWarning, - match=r"check_parallel_trends dropped \d+ row\(s\).*additional NaN first-differences", + match=r"parallel-trend diagnostic: dropped \d+ row\(s\).*additional NaN first-differences", ): _compute_outcome_changes( df, outcome="outcome", time="period", treatment_group="treated", unit="unit", ) + def test_warning_label_reflects_public_caller(self): + """`check_parallel_trends_robust` and `equivalence_test_trends` must + each surface the axis-E excess-drop warning under their own name so + users can trace the signal back to the function they called.""" + rng = np.random.default_rng(0) + rows = [] + for unit in range(10): + treated = int(unit >= 5) + for t in range(1, 5): + rows.append({ + "unit": unit, "period": t, + "treated": treated, "outcome": rng.normal(), + }) + df = pd.DataFrame(rows) + df.loc[[5, 12, 22], "outcome"] = np.nan + + with pytest.warns(UserWarning, match="check_parallel_trends_robust:"): + check_parallel_trends_robust( + df, outcome="outcome", time="period", + treatment_group="treated", unit="unit", + n_permutations=100, seed=0, + ) + + with pytest.warns(UserWarning, match="equivalence_test_trends:"): + equivalence_test_trends( + df, outcome="outcome", time="period", + treatment_group="treated", unit="unit", + ) + # ============================================================================= # Tests for check_parallel_trends_robust From a036525f15ceb4becb2e4f8d00c1319804988fc5 Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 19 Apr 2026 08:41:36 -0400 Subject: [PATCH 4/5] Address CI AI review: reject negative first_treat in ContinuousDiD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI AI re-review flagged (P1) that the previous commit claimed "-inf will be rejected by downstream validators" in both the code comment and REGISTRY.md, but no such validator existed. After the `+inf -> 0` normalization, `first_treat < 0` units fell out of both the treated (g > 0) and never-treated (g == 0) masks, so the affected units were silently excluded from the estimator — exactly the axis-E silent failure the PR was closing. - ContinuousDiD.fit() now validates `first_treat < 0` explicitly post-normalization and raises ValueError with the row count. -inf, -2, and any other negative value are all rejected. - REGISTRY.md note rewritten to match the implemented behavior. - Existing -inf test replaced with one that asserts `pytest.raises(ValueError)` matching the row-count message, plus a positive regression test confirming +inf warning stays silent on panels with only valid 0/positive `first_treat` values. - tests/test_utils.py::test_silent_on_balanced_panel tightened: the balanced-panel silence assertion now filters on any warning containing "dropped", so a regression that changed the warning label would no longer hide a genuine drop signal. Co-Authored-By: Claude Opus 4.7 (1M context) --- diff_diff/continuous_did.py | 19 ++++++++++++--- docs/methodology/REGISTRY.md | 2 +- tests/test_continuous_did.py | 46 +++++++++++++++++++++++++++--------- tests/test_utils.py | 6 ++--- 4 files changed, 55 insertions(+), 18 deletions(-) diff --git a/diff_diff/continuous_did.py b/diff_diff/continuous_did.py index 368b947b..698af060 100644 --- a/diff_diff/continuous_did.py +++ b/diff_diff/continuous_did.py @@ -233,9 +233,9 @@ def fit( # recategorization here would shift the control composition (axis-E # silent coercion). Only positive infinity is recoded (to match the # existing `.replace([np.inf, float("inf")], 0)` semantics on the - # next line); `-inf` is neither counted here nor recoded, so a - # downstream validator will reject it if present. - inf_mask = np.isposinf(df[first_treat].values) + # next line). + first_treat_vals = df[first_treat].values + inf_mask = np.isposinf(first_treat_vals) n_inf_first_treat = int(inf_mask.sum()) if n_inf_first_treat > 0: warnings.warn( @@ -245,6 +245,19 @@ def fit( UserWarning, stacklevel=2, ) + # Reject negative first_treat values (including -inf) explicitly. + # Without this guard they would survive preprocessing but fall out of + # both the treated (g > 0) and never-treated (g == 0) masks, silently + # excluding the affected units. + negative_mask = first_treat_vals < 0 + n_negative_first_treat = int(negative_mask.sum()) + if n_negative_first_treat > 0: + raise ValueError( + f"{n_negative_first_treat} row(s) have negative '{first_treat}' " + f"values (including -inf). Valid values are 0 (never-treated) " + f"or a positive treatment period; such units would otherwise " + f"be silently excluded from both treated and control pools." + ) df[first_treat] = df[first_treat].replace([np.inf, float("inf")], 0) # Drop units with positive first_treat but zero dose (R convention) diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index a688ec81..08537681 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -720,7 +720,7 @@ See `docs/methodology/continuous-did.md` Section 4 for full details. - [ ] Lowest-dose-as-control (Remark 3.1) - [x] Survey design support (Phase 3): weighted B-spline OLS, TSL on influence functions; bootstrap+survey supported (Phase 6) - **Note:** ContinuousDiD bootstrap with survey weights supported (Phase 6) via PSU-level multiplier weights -- **Note:** The R-style convention of coding never-treated units as `first_treat=inf` is still accepted and normalized to `first_treat=0` internally, but the estimator now emits a `UserWarning` reporting the row count so the silent recategorization is surfaced (axis-E silent coercion under the Phase 2 audit). Only `+inf` is recoded (matching the R convention); `-inf` passes through untouched and will be rejected by downstream validators. Pass `0` directly to avoid the warning. +- **Note:** The R-style convention of coding never-treated units as `first_treat=inf` is still accepted and normalized to `first_treat=0` internally, but the estimator now emits a `UserWarning` reporting the row count so the silent recategorization is surfaced (axis-E silent coercion under the Phase 2 audit). Only `+inf` is recoded (matching the R convention). Any **negative** `first_treat` value (including `-inf`) raises `ValueError` with the row count, since such units would otherwise silently fall out of both the treated (`g > 0`) and never-treated (`g == 0`) masks. Pass `0` directly for never-treated units to avoid the warning. - **Note:** Rows where `first_treat=0` (never-treated) carry a nonzero `dose` are silently zeroed for internal consistency (never-treated cells must have `D=0` in the dose response). The estimator now emits a `UserWarning` with the affected row count before the zeroing, so unintended nonzero doses on never-treated rows are no longer absorbed without a signal (axis-E silent coercion). --- diff --git a/tests/test_continuous_did.py b/tests/test_continuous_did.py index 76e128f6..dbdb6fa8 100644 --- a/tests/test_continuous_did.py +++ b/tests/test_continuous_did.py @@ -723,16 +723,20 @@ def test_clean_never_treated_doses_silent(self): ] assert coerce_warnings == [] - def test_negative_inf_first_treat_does_not_trigger_recategorization_warning(self): - """-inf first_treat is NOT recoded to 0 by `.replace([inf, float("inf")], 0)`, - so the recategorization warning (which used to count both +inf and -inf - via np.isinf) must not fire for -inf rows.""" - import warnings + def test_negative_first_treat_raises_with_row_count(self): + """Negative `first_treat` (including -inf) must raise ValueError with + the affected row count. Without this guard the affected units fall + out of both the treated (g > 0) and never-treated (g == 0) masks and + are silently excluded from the estimator.""" rows = [] for unit in range(4): - # Unit 0 carries -inf (not recoded, so downstream validation should - # see it as-is). Others are untreated with dose=0. - ft = -np.inf if unit == 0 else 0.0 + # Unit 0: -inf. Unit 1: -2. Others: valid (0 or positive). + if unit == 0: + ft = -np.inf + elif unit == 1: + ft = -2.0 + else: + ft = 0.0 for t in range(1, 4): rows.append({ "unit": unit, "period": t, "outcome": float(unit + t), @@ -741,6 +745,28 @@ def test_negative_inf_first_treat_does_not_trigger_recategorization_warning(self data = pd.DataFrame(rows) est = ContinuousDiD() + with pytest.raises( + ValueError, + match=r"6 row\(s\) have negative 'first_treat' values", + ): + est.fit(data, "outcome", "unit", "period", "first_treat", "dose") + + def test_positive_inf_warning_silent_when_no_inf(self): + """+inf warning is gated on +inf rows only; panels with only valid + non-negative values (including just 0 and positive periods) must + never trigger the recategorization warning.""" + import warnings + rows = [] + for unit in range(4): + ft = 0.0 if unit < 2 else 2.0 + for t in range(1, 4): + rows.append({ + "unit": unit, "period": t, "outcome": float(unit + t), + "first_treat": ft, "dose": 0.0 if unit < 2 else 1.0, + }) + data = pd.DataFrame(rows) + est = ContinuousDiD() + with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") try: @@ -749,9 +775,7 @@ def test_negative_inf_first_treat_does_not_trigger_recategorization_warning(self pass inf_warnings = [x for x in w if "inf in 'first_treat'" in str(x.message)] - assert inf_warnings == [], ( - "-inf must not trigger the +inf recategorization warning" - ) + assert inf_warnings == [] def test_inf_first_treat_warning_counts_rows_not_units(self): """The warning counts affected rows (not units). On a panel with diff --git a/tests/test_utils.py b/tests/test_utils.py index 72905f7e..61f2d353 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -849,9 +849,9 @@ def test_silent_on_balanced_panel(self): treatment_group="treated", unit="unit", ) - drop_warnings = [ - x for x in w if "check_parallel_trends dropped" in str(x.message) - ] + # Generic filter on "dropped" catches both the old and new label so a + # regression in the label wouldn't hide a real silent-drop warning. + drop_warnings = [x for x in w if "dropped" in str(x.message).lower()] assert drop_warnings == [] def test_warns_on_nan_outcomes_with_excess_drop_count(self): From 65d1bd804f175404b1663f4d40bd7156fc7db37a Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 19 Apr 2026 09:01:01 -0400 Subject: [PATCH 5/5] Address CI AI review: NaN first_treat + StaggeredTripleDifference coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two new P1 findings from CI AI re-review: 1. ContinuousDiD.fit() still accepted NaN `first_treat` values. NaN survives preprocessing but satisfies neither the treated (g > 0) nor never-treated (g == 0) mask, so affected units were silently excluded from both pools — same silent-failure shape as the already-rejected `first_treat < 0`. Reject NaN explicitly with ValueError + row count. 2. `StaggeredTripleDifference.fit()` silently recoded `first_treat=np.inf -> 0` via `.replace([np.inf, float("inf")], 0)`. Its sibling `staggered.py:1508-1519` already surfaces this with a UserWarning; this PR mirrors that contract so the estimator no longer shifts units between treated and never-treated pools without signaling. REGISTRY gets a matching **Note:** under the StaggeredTripleDifference section. Regression tests: - test_nan_first_treat_raises_with_row_count (ContinuousDiD) on a 4-unit x 3-period panel with 3 NaN rows. - test_inf_first_treat_works (StaggeredTripleDifference) upgraded from silent-success to `pytest.warns(UserWarning, match=row-count)`. Co-Authored-By: Claude Opus 4.7 (1M context) --- diff_diff/continuous_did.py | 13 +++++++++++++ diff_diff/staggered_triple_diff.py | 13 +++++++++++++ docs/methodology/REGISTRY.md | 1 + tests/test_continuous_did.py | 23 +++++++++++++++++++++++ tests/test_staggered_triple_diff.py | 17 ++++++++++++++--- 5 files changed, 64 insertions(+), 3 deletions(-) diff --git a/diff_diff/continuous_did.py b/diff_diff/continuous_did.py index 698af060..8f433b25 100644 --- a/diff_diff/continuous_did.py +++ b/diff_diff/continuous_did.py @@ -235,6 +235,19 @@ def fit( # existing `.replace([np.inf, float("inf")], 0)` semantics on the # next line). first_treat_vals = df[first_treat].values + # Reject NaN first_treat explicitly. NaN survives preprocessing but + # satisfies neither the treated (g > 0) nor never-treated (g == 0) + # mask, so affected units would be silently excluded from the + # estimator (same silent-failure shape as `first_treat < 0`). + nan_mask = pd.isna(df[first_treat]) + n_nan_first_treat = int(nan_mask.sum()) + if n_nan_first_treat > 0: + raise ValueError( + f"{n_nan_first_treat} row(s) have NaN '{first_treat}' " + f"values. Valid values are 0 (never-treated) or a positive " + f"treatment period; such units would otherwise be silently " + f"excluded from both treated and control pools." + ) inf_mask = np.isposinf(first_treat_vals) n_inf_first_treat = int(inf_mask.sum()) if n_inf_first_treat > 0: diff --git a/diff_diff/staggered_triple_diff.py b/diff_diff/staggered_triple_diff.py index 758d518b..85086aa3 100644 --- a/diff_diff/staggered_triple_diff.py +++ b/diff_diff/staggered_triple_diff.py @@ -284,6 +284,19 @@ def fit( if first_treat != "first_treat": df["first_treat"] = df[first_treat] + # Surface the inf → 0 recategorization the same way StaggeredDiD does + # (see `staggered.py:1508-1519`). Silently recoding inf would shift + # units between treated and never-treated pools with no signal + # (axis-E silent coercion under the Phase 2 audit). + _inf_mask = np.isposinf(df["first_treat"].values) + if _inf_mask.any(): + n_inf_rows = int(_inf_mask.sum()) + warnings.warn( + f"{n_inf_rows} row(s) have first_treat=inf; recoding to 0 " + f"(never-treated). Use first_treat=0 to suppress this warning.", + UserWarning, + stacklevel=2, + ) df["first_treat"] = df["first_treat"].replace([np.inf, float("inf")], 0) precomputed = self._precompute_structures( diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 08537681..1504a67b 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -1692,6 +1692,7 @@ Balanced panel. Key variables: - `Q_i` (`eligibility`): binary, time-invariant eligibility indicator - Treatment: `D_{i,t} = 1{t >= S_i AND Q_i = 1}` (absorbing) - Covariates `X_i`: time-invariant (first observation per unit used) +- **Note:** `first_treat=inf` (R-style never-enabled marker) is accepted and normalized to `0` internally. The recoding now emits a `UserWarning` reporting the affected row count so the reclassification is not silent (axis-E silent coercion under the Phase 2 audit, mirroring the StaggeredDiD behavior). Pass `first_treat=0` directly to avoid the warning. *Estimator equation (Equation 4.1 in paper, as implemented):* diff --git a/tests/test_continuous_did.py b/tests/test_continuous_did.py index dbdb6fa8..56c773cb 100644 --- a/tests/test_continuous_did.py +++ b/tests/test_continuous_did.py @@ -751,6 +751,29 @@ def test_negative_first_treat_raises_with_row_count(self): ): est.fit(data, "outcome", "unit", "period", "first_treat", "dose") + def test_nan_first_treat_raises_with_row_count(self): + """NaN `first_treat` must raise ValueError with the row count. Without + this guard, NaN rows survive preprocessing but match neither the + treated (g > 0) nor never-treated (g == 0) mask, so the affected + units would be silently excluded.""" + rows = [] + for unit in range(4): + # Unit 0 has NaN first_treat across all 3 periods (3 NaN rows). + ft = np.nan if unit == 0 else 0.0 + for t in range(1, 4): + rows.append({ + "unit": unit, "period": t, "outcome": float(unit + t), + "first_treat": ft, "dose": 0.0, + }) + data = pd.DataFrame(rows) + est = ContinuousDiD() + + with pytest.raises( + ValueError, + match=r"3 row\(s\) have NaN 'first_treat' values", + ): + est.fit(data, "outcome", "unit", "period", "first_treat", "dose") + def test_positive_inf_warning_silent_when_no_inf(self): """+inf warning is gated on +inf rows only; panels with only valid non-negative values (including just 0 and positive periods) must diff --git a/tests/test_staggered_triple_diff.py b/tests/test_staggered_triple_diff.py index 6954b255..a47d1758 100644 --- a/tests/test_staggered_triple_diff.py +++ b/tests/test_staggered_triple_diff.py @@ -397,12 +397,23 @@ def test_missing_column_raises(self, simple_data): est.fit(simple_data, "outcome", "unit", "period", "nonexistent", "eligibility") def test_inf_first_treat_works(self): - """Never-enabled units encoded as inf should work.""" + """Never-enabled units encoded as inf should be recoded to 0, and the + recoding must surface a UserWarning with the affected row count + (axis-E silent coercion, mirroring the StaggeredDiD behavior).""" data = generate_staggered_ddd_data(n_units=100, seed=33) data["first_treat"] = data["first_treat"].astype(float) - data.loc[data["first_treat"] == 0, "first_treat"] = np.inf + inf_mask = data["first_treat"] == 0 + n_inf_rows = int(inf_mask.sum()) + data.loc[inf_mask, "first_treat"] = np.inf est = StaggeredTripleDifference() - res = est.fit(data, "outcome", "unit", "period", "first_treat", "eligibility") + + with pytest.warns( + UserWarning, + match=rf"{n_inf_rows} row\(s\) have first_treat=inf; recoding to 0", + ): + res = est.fit( + data, "outcome", "unit", "period", "first_treat", "eligibility" + ) assert np.isfinite(res.overall_att) def test_survey_design_invalid_type_raises(self, simple_data):