From ed7acc3583c6e4dd3e7991baa06589db61e5a1d5 Mon Sep 17 00:00:00 2001 From: igerber Date: Sat, 25 Apr 2026 08:50:09 -0400 Subject: [PATCH 1/5] dCDH by_path + placebo: per-path backward-horizon placebos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wave 2 item 3 of the by_path follow-up. Adds per-path backward-horizon placebos `DID^{pl}_{path, l}` for `l = 1..L_max` under the existing joiners/leavers IF precedent applied backward, surfaced on the new `results.path_placebo_event_study[path][-l]` attribute (negative-int keys mirroring `placebo_event_study`). Bundled scope: - Analytical: extend `_compute_per_group_if_placebo_horizon` with `switcher_subset_mask` (parallel to the PR #357 multi_horizon extension); new `_compute_path_placebos` sibling helper of `_compute_path_effects`; cohort-recentered plug-in SE with path- specific divisor `N^{pl}_{l, path}`. - Bootstrap: new `_collect_path_placebo_bootstrap_inputs` collector + `_compute_dcdh_bootstrap` per-`(path, lag_l)` dispatch reusing `_bootstrap_one_target`; bootstrap propagation block in fit() enforcing the library-wide NaN-on-invalid contract from PR #364 (canonical pattern — non-finite bootstrap SE writes NaN to the full inference tuple, never falls back to analytical). - Reporting: `summary()` renders negative-keyed placebo rows alongside positive event-study rows in each path block; `to_dataframe(level="by_path")` emits negative-horizon rows; footer aggregate predicate covers the new surface. - R-parity: extend `extract_dcdh_by_path` with `n_placebos` param; new `multi_path_reversible_by_path_placebo` scenario in the R generator and `dcdh_dynr_golden_values.json`. Per-`(path, lag)` point estimates match R exactly; SE within Phase-2 envelope (~5% rtol). New parity class `TestDCDHDynRParityByPathPlacebo`. SE inherits the cross-path cohort-sharing deviation from R already documented for `path_effects` (full-panel cohort-centered plug-in vs R's per-path re-run): tracks R within tolerance on single-path-cohort panels, diverges on cohort-mixed panels. Bootstrap SE is a Monte Carlo analog of the analytical SE — same per-path centered IF input — and inherits the same deviation. Tests: - `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathPlacebo` (8 analytical invariants + 5 bootstrap subclass tests under `@pytest.mark.slow`). - `tests/test_chaisemartin_dhaultfoeuille_parity.py::TestDCDHDynRParityByPathPlacebo` (R-parity on `multi_path_reversible_by_path_placebo`). Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 5 + benchmarks/R/generate_dcdh_dynr_test_values.R | 47 +- benchmarks/data/dcdh_dynr_golden_values.json | 152 ++++++ diff_diff/chaisemartin_dhaultfoeuille.py | 455 +++++++++++++++++- .../chaisemartin_dhaultfoeuille_bootstrap.py | 122 ++++- .../chaisemartin_dhaultfoeuille_results.py | 115 ++++- docs/methodology/REGISTRY.md | 2 +- tests/test_chaisemartin_dhaultfoeuille.py | 362 ++++++++++++++ ...test_chaisemartin_dhaultfoeuille_parity.py | 121 +++++ 9 files changed, 1319 insertions(+), 62 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb4f1d69..d976d991 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added +- **`ChaisemartinDHaultfoeuille.by_path` + `placebo=True`** — per-path backward-horizon placebos `DID^{pl}_{path, l}` for `l = 1..L_max`. The same per-path SE convention used for the event-study (joiners/leavers IF precedent: switcher-side contributions zeroed for non-path groups; cohort structure and control pool unchanged; plug-in SE with path-specific divisor `N^{pl}_{l, path}`) is applied to backward horizons via the new `switcher_subset_mask` parameter on `_compute_per_group_if_placebo_horizon`. Surfaced on `results.path_placebo_event_study[path][-l]` (negative-int inner keys mirroring `placebo_event_study`); `summary()` renders the rows alongside per-path event-study horizons; `to_dataframe(level="by_path")` emits negative-horizon rows alongside the existing positive-horizon rows. **Bootstrap** (when `n_bootstrap > 0`) propagates per-`(path, lag)` percentile CI / p-value through the same `_bootstrap_one_target` dispatch as the per-path event-study, with the canonical NaN-on-invalid contract enforced on the new surface (PR #364 library-wide invariant). **SE inherits the cross-path cohort-sharing deviation from R** documented for `path_effects` (full-panel cohort-centered plug-in vs R's per-path re-run): tracks R within tolerance on single-path-cohort panels, diverges materially on cohort-mixed panels — the bootstrap SE is a Monte Carlo analog of the analytical SE and inherits the same deviation. R-parity confirmed at `tests/test_chaisemartin_dhaultfoeuille_parity.py::TestDCDHDynRParityByPathPlacebo` on the new `multi_path_reversible_by_path_placebo` scenario (point estimates exact match; SE within Phase-2 envelope rtol ≤ 5%); positive analytical + bootstrap invariants at `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathPlacebo` (and the gated `::TestBootstrap` subclass). See `docs/methodology/REGISTRY.md` §ChaisemartinDHaultfoeuille `Note (Phase 3 by_path ...)` → "Per-path placebos" for the full contract. + ## [3.3.0] - 2026-04-25 ### Added diff --git a/benchmarks/R/generate_dcdh_dynr_test_values.R b/benchmarks/R/generate_dcdh_dynr_test_values.R index 692992f4..3f62932f 100644 --- a/benchmarks/R/generate_dcdh_dynr_test_values.R +++ b/benchmarks/R/generate_dcdh_dynr_test_values.R @@ -572,7 +572,7 @@ scenarios$joiners_only_controls_trends_lin <- list( # per-path results live at res$by_level_1, res$by_level_2, ... in rank # order (1 = most frequent observed path). res$by_levels is a character # vector of comma-joined path labels (e.g. "0,1,1,1") in the same order. -extract_dcdh_by_path <- function(res, n_effects) { +extract_dcdh_by_path <- function(res, n_effects, n_placebos = 0) { by_levels <- res$by_levels out <- list() for (i in seq_along(by_levels)) { @@ -589,6 +589,26 @@ extract_dcdh_by_path <- function(res, n_effects) { n_obs = as.numeric(effects[h, "N"]) ) } + # Per-path placebos. When did_multiplegt_dyn is called with + # by_path=k AND placebo=N, each by_level_i has its own + # slot$results$Placebos table with N rows. Negative-keyed + # ("-1", "-2", ...) so the Python parity loop can iterate the + # full forward+backward horizon set with int(k) on the keys. + if (n_placebos > 0) { + placebos <- slot$results$Placebos + if (!is.null(placebos)) { + for (h in seq_len(min(n_placebos, nrow(placebos)))) { + horizons[[as.character(-h)]] <- list( + effect = as.numeric(placebos[h, "Estimate"]), + se = as.numeric(placebos[h, "SE"]), + ci_lo = as.numeric(placebos[h, "LB CI"]), + ci_hi = as.numeric(placebos[h, "UB CI"]), + n_switchers = as.numeric(placebos[h, "Switchers"]), + n_obs = as.numeric(placebos[h, "N"]) + ) + } + } + } out[[i]] <- list( path = by_levels[i], frequency_rank = i, @@ -644,6 +664,31 @@ scenarios$multi_path_reversible_by_path <- list( results = extract_dcdh_by_path(res14, n_effects = 3) ) +# Scenario 15: multi_path_reversible + by_path=3 + placebo=2 (per-path +# backward placebo case). Same deterministic DGP and n_periods=10 as +# scenario 14 (the DGP's `f_g_to_path` is sized for max_switch=6, fixed +# at L_max=3 + n_periods=10). For placebo=2: F_g=2 cohort has backward +# index F_g-1-2=-1 out of range, so those 20 switchers contribute NaN +# at lag=2; F_g in [3..7] (60 switchers) produce a valid lag=2 estimate. +# R drops the F_g=2 cohort from Placebo_2 automatically; the parity +# test compares only over the rows that R produced. +cat(" Scenario 15: multi_path_reversible_by_path_placebo\n") +d15 <- gen_reversible(n_groups = N_GOLDEN, n_periods = 10, + pattern = "multi_path_reversible", seed = 115, + L_max = 3) +res15 <- did_multiplegt_dyn( + df = d15, outcome = "outcome", group = "group", time = "period", + treatment = "treatment", effects = 3, placebo = 2, by_path = 3, + ci_level = 95 +) +scenarios$multi_path_reversible_by_path_placebo <- list( + data = export_data(d15), + params = list(pattern = "multi_path_reversible", n_groups = N_GOLDEN, + n_periods = 10, seed = 115, effects = 3, placebo = 2, + by_path = 3, ci_level = 95), + results = extract_dcdh_by_path(res15, n_effects = 3, n_placebos = 2) +) + # --------------------------------------------------------------------------- # Write output # --------------------------------------------------------------------------- diff --git a/benchmarks/data/dcdh_dynr_golden_values.json b/benchmarks/data/dcdh_dynr_golden_values.json index ccdf9281..05c37f79 100644 --- a/benchmarks/data/dcdh_dynr_golden_values.json +++ b/benchmarks/data/dcdh_dynr_golden_values.json @@ -756,6 +756,158 @@ } ] } + }, + "multi_path_reversible_by_path_placebo": { + "data": { + "group": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119], + "period": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + "treatment": [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + "outcome": [11.8541755558, 12.7672819651, 13.8759529476, 14.5693337707, 12.6910953769, 14.2019642032, 13.8796744204, 14.4207524796, 13.8018772174, 14.2011778499, 11.4757087829, 13.5718947891, 13.1673779946, 12.7328780187, 12.9101845771, 13.0829722805, 14.336912529, 14.0105835684, 14.3226328588, 14.0074247646, 11.1753179857, 12.8872108968, 13.7687167104, 12.8956877336, 12.9946867356, 13.6453425336, 13.3784292948, 13.4947875895, 13.9083540964, 13.5241598976, 10.2856353227, 12.3684000248, 11.3377288203, 12.3340793103, 11.738355674, 11.8330445383, 12.1709729109, 11.8943472237, 12.3072911082, 11.4223499398, 10.5292074654, 12.277737283, 11.8645847915, 12.9167025015, 11.9504568907, 13.3244219587, 12.9049567492, 12.6073037437, 12.482043687, 12.5716552885, 11.8597592146, 13.8862846739, 13.06088824, 13.631926917, 13.9380932838, 14.1710194035, 14.0147249509, 13.7531228662, 14.9751600444, 14.6432890277, 9.9307619698, 11.5956808731, 13.1074742127, 12.0782715789, 11.9914542252, 12.3420615912, 13.1637655832, 13.3622895738, 12.949613585, 13.0547808001, 10.2697245377, 11.6156824319, 12.5916272891, 11.6189197478, 11.8781513649, 12.6976541962, 13.4444198826, 13.2527550787, 12.3848829864, 13.1927641301, 9.7539120124, 12.4159967591, 13.4860575185, 12.5862443295, 13.659069809, 12.4533431905, 12.1622301483, 13.0646762689, 13.9739520585, 13.0620785729, 9.1167649197, 10.6496159411, 10.9652237072, 11.3075358173, 10.4315572207, 10.9971189536, 11.641878485, 12.0171377024, 11.7604774046, 11.8897470475, 9.0567292172, 12.138821237, 12.0060322363, 12.5346910574, 12.0934350391, 12.7050842822, 12.059230478, 13.3511740454, 13.5591247283, 12.5093646332, 11.8069139483, 12.2293876439, 12.3615079345, 12.9820924118, 12.732229964, 13.2021639612, 13.465310825, 13.2181157439, 13.084692582, 13.7610392303, 8.36711935, 10.4933346393, 10.8820015204, 11.0012627077, 11.3860995016, 11.0159638857, 10.3786291688, 11.5307404901, 11.7375871569, 11.8764264436, 9.4298652266, 11.7730729819, 12.1187243987, 11.9681920069, 11.1958729855, 12.522408537, 10.5063495238, 11.2342337181, 11.1655464416, 11.0589440463, 11.4330637815, 12.0909406183, 12.3854616242, 12.2136313576, 13.3874617631, 12.138084637, 12.5727031886, 12.8090751765, 12.7359139521, 13.6859322741, 10.6380667229, 13.3139059363, 12.7684553401, 13.2434067091, 13.7719898726, 13.2618019639, 13.8550044609, 12.7710358807, 14.1202673135, 13.578728407, 13.4751210153, 15.4776042497, 15.2319711629, 14.6298390444, 15.0566831774, 14.8659693965, 15.9925056125, 15.4200181751, 16.1755684632, 16.4469265992, 9.0070893467, 11.0084984669, 10.2520068494, 10.8950797914, 11.4511215357, 11.1023926436, 10.7802513198, 10.4398387516, 10.8918752917, 11.3820783792, 10.470822944, 12.9138703653, 13.6038608106, 13.5347030735, 13.3897010612, 12.9711547528, 13.8650151, 14.1926964804, 13.3010625113, 14.1671684571, 12.6110400184, 14.1985666939, 14.4190412862, 14.2207827282, 15.229552791, 13.9539915814, 15.2876139082, 14.4690157652, 14.3048272825, 14.3950036576, 8.1371405349, 8.0800150275, 9.3338470525, 10.0645707434, 10.6388293851, 11.3274516936, 11.1131606263, 11.4223764166, 10.8666812473, 11.0275332164, 8.3350614764, 9.9214509692, 11.453756365, 11.3184129524, 12.4220430368, 11.6237826645, 11.7543150955, 10.9685757542, 12.3696964796, 11.5080505584, 10.4639830363, 10.8419843766, 12.6361270099, 13.6662182363, 13.079209797, 13.4969894846, 13.7511046152, 13.2677414421, 13.3559478976, 14.1079684485, 12.3743823759, 11.6724697427, 13.1642130214, 12.9179114072, 14.052789657, 13.6105648592, 13.7471077936, 14.3647365541, 13.9796664819, 14.6569898618, 9.9807277152, 8.9733119125, 10.3569510638, 11.5392279681, 12.4967543602, 11.8231372355, 12.270841356, 12.1340598416, 12.0992195542, 12.9795391781, 12.1182392612, 11.999412083, 14.0947728369, 14.4754008163, 13.9295279776, 14.4239937708, 14.7716495592, 13.6764178667, 14.5088740911, 13.9196645883, 9.3264700177, 10.2705382462, 12.8069406252, 11.7841774481, 11.1338882506, 11.4541243954, 12.4905697068, 12.6687195694, 13.035256047, 13.6283971251, 10.2245913929, 11.6016015566, 12.9732789027, 11.831486709, 13.2345207365, 13.5476727333, 12.4192753528, 13.514440509, 13.3971787519, 12.8613652329, 13.6995119403, 12.7155346957, 14.2221929991, 13.9983955706, 14.0087033365, 13.742032004, 14.609487986, 15.3658855152, 15.0726545336, 15.635147965, 7.2703938159, 7.190897081, 9.1661583129, 9.0286683006, 8.0228406871, 9.2668461329, 9.3076009545, 9.4701050028, 9.6915096149, 9.5518360097, 8.0601190121, 9.6214840448, 10.9110657021, 10.7696700627, 11.5591748211, 11.4372463072, 11.0972844549, 12.0185835005, 11.708690261, 11.7507043459, 9.3010267453, 8.7913750606, 11.4176183733, 12.2306425098, 11.3331890656, 11.7080694236, 11.5769266847, 11.8019938386, 12.0497392447, 13.0794626327, 8.6031007406, 8.3036448552, 9.9717437912, 10.3753751369, 10.8217687752, 9.8614872149, 9.9611619616, 10.6062040515, 10.797915419, 10.5759170588, 11.4008099704, 11.1330817878, 14.5438215515, 14.5751934729, 13.9640709101, 14.7544498511, 15.8560548194, 15.0784948531, 13.8784918535, 15.5220309566, 10.6687619019, 10.0486778755, 13.3137449088, 13.9475586004, 12.8021331276, 13.5274168994, 12.8533895741, 12.8835211495, 13.8973147698, 12.8720429019, 7.5280623409, 8.9526379909, 10.7839206132, 10.4184773741, 10.9324593009, 11.6670517462, 11.2837438437, 12.0598251288, 11.1025717061, 11.1272617043, 10.5238606834, 11.1019372571, 12.3031931775, 12.5007739514, 13.1358577577, 13.1696453072, 13.276759905, 11.9599618457, 12.6656991273, 12.6802863909, 9.5459363164, 10.7343873083, 12.9555555989, 12.9255705929, 11.2060608563, 13.2492179579, 13.2780065225, 13.415972408, 12.9086869709, 13.014920044, 8.1437577298, 7.9974255093, 10.8890897687, 10.4291291343, 11.6948433835, 11.2084876318, 11.4515579062, 10.9755598151, 10.6591612835, 11.8235071877, 11.7796901228, 12.7574415619, 13.9624766556, 14.7544722823, 14.0758379069, 15.5675350453, 14.1887086048, 15.607964371, 14.8440649893, 15.4925423044, 13.3597718225, 13.7109541544, 13.356238741, 15.9594152533, 14.8842249649, 12.8921684521, 14.5699738492, 14.1497377853, 13.6304388363, 13.5176752636, 8.0995384915, 7.9280371923, 8.672117015, 10.9633496475, 10.5440783562, 8.9546907549, 8.8410625044, 8.8515166348, 8.6765105872, 10.3063021901, 9.4852055729, 10.5260778518, 9.6044640219, 11.3978416619, 11.0672807652, 9.7652914387, 9.3482903358, 9.9624735893, 10.1469479387, 10.37264874, 7.1503457963, 6.7095777584, 7.0222282349, 8.157903912, 8.6086963384, 6.8323953894, 7.1000418259, 6.9780867592, 7.9840502993, 6.392081336, 6.3528726483, 5.9644001051, 6.8956510952, 8.6607790014, 8.6658428079, 6.6523364556, 7.2189024838, 5.2573184325, 7.1209749389, 7.0549320494, 8.631032289, 8.2422653344, 8.148080349, 10.1420060837, 10.5746857924, 8.8871847308, 8.4311199431, 8.5752995125, 8.7243415023, 9.2119982616, 7.537605502, 7.4111974775, 7.3549605231, 9.2642800005, 9.269738581, 7.7419039962, 6.7628429429, 7.5588427825, 6.8809459014, 8.2259153882, 15.5146755582, 15.343657466, 15.3948561798, 16.8421482211, 17.7147260963, 16.2682029565, 15.9060844269, 16.2044766436, 15.0695125915, 16.4281277775, 8.4468691356, 7.7513970655, 7.3310805345, 10.595103454, 11.1170817681, 8.3912279001, 8.1185241574, 9.0890367501, 8.4706720969, 8.7753488134, 9.4235701661, 8.5333537139, 9.0355927701, 11.3186900579, 12.2770180874, 9.161518886, 9.8687931259, 10.9048784886, 10.0800185804, 11.0391541936, 13.8597313655, 14.2759832909, 14.6389146711, 17.3080022452, 16.7770178738, 14.5057018208, 14.5889750429, 14.5219426904, 15.0898255757, 15.3990036294, 10.5855579301, 10.2188843552, 10.3102084451, 13.0845820873, 12.504599443, 11.1122598471, 10.8685892403, 11.4206089001, 10.829025186, 10.9839467251, 12.6691759885, 12.1168714653, 13.5723050472, 16.2503801089, 14.574945594, 13.1199065482, 12.1744222744, 13.1852052087, 13.1797630308, 13.4407681347, 12.5840310181, 13.0719448405, 12.9066015228, 14.2949374, 14.3956129105, 12.3922102607, 13.733143445, 12.8241943892, 12.5147975747, 13.6947870267, 8.8135752405, 8.2048858661, 8.8179691333, 10.8805276768, 11.1377463406, 9.338009886, 8.4564727558, 10.2491847173, 8.53248438, 9.4872970997, 8.7014920683, 7.6019907239, 7.7839208606, 7.9601707339, 11.1632981439, 10.2518196063, 8.0050536368, 8.3700908993, 8.4029488093, 8.7722143172, 11.9033904598, 12.1153570312, 11.814776946, 11.4014053883, 14.1504532946, 13.4424687061, 12.5532223473, 12.2461381397, 11.9427545018, 12.1140129088, 8.0007197572, 6.8074471448, 7.5022696187, 8.5576929874, 11.1309956709, 10.2571615547, 8.296539484, 7.894219391, 7.9100480758, 8.9320512253, 11.3827869724, 12.189027842, 11.8225412856, 11.626632655, 13.5322755071, 13.0416099179, 10.9322528541, 11.6985538752, 11.0033840375, 12.2503954216, 12.4296958756, 11.8949898538, 12.3493332703, 12.8482495308, 14.5831200758, 14.7475324104, 12.9852673951, 12.6654411564, 13.1926564043, 11.8692560473, 13.1334826324, 13.5011818863, 13.8570501294, 13.8375205241, 15.7823307197, 15.559887368, 13.8175764046, 14.1374225957, 14.2051713808, 13.699645577, 8.2443298916, 9.0501730483, 7.0598651685, 8.4193041547, 8.9241576397, 10.8531351007, 8.5667761036, 8.8261264646, 9.9990640759, 8.9748758552, 12.9248269448, 13.7128392138, 12.9672133127, 13.6325069114, 15.2621572932, 14.9476296035, 13.7472138236, 13.3305176812, 12.7498793639, 14.0824942583, 9.8960310432, 9.5771785786, 10.2453571174, 10.4093246918, 11.8530112772, 12.0494953535, 9.4431562424, 9.8234736299, 10.0375229714, 10.2480275796, 11.4519773637, 12.1714646759, 11.3810997379, 11.4392371046, 13.4748058525, 13.3723020822, 11.0372506056, 11.4601772679, 11.4274654249, 11.7077007349, 12.311108277, 12.3171108862, 11.7916979552, 13.2724745863, 12.9615480514, 14.2049230361, 12.8351450279, 13.7634177726, 12.7248538138, 12.7377548705, 9.4642633396, 10.2415065395, 9.967865961, 9.5992562706, 10.6953324873, 11.4896489462, 10.5163649411, 9.5280474828, 10.2887040647, 11.100399138, 7.63839329, 7.4384701982, 7.2500215387, 7.8402131224, 7.4533537645, 10.4803420307, 7.5338479763, 7.7799833795, 8.5666927909, 7.8500326155, 6.5861350279, 6.6476303207, 6.7179955669, 6.9807057655, 7.6695173277, 9.3698610145, 6.6771490105, 7.6881725175, 6.6782248026, 7.3330517244, 9.8387599903, 9.5336625068, 9.8380541416, 9.9722312483, 9.1779616048, 11.6994664344, 9.1328943389, 11.0105960355, 9.9836945341, 10.3876692984, 9.2527239533, 9.9097382424, 9.4741483709, 10.8714188968, 9.1311921027, 11.2373056536, 9.9913375833, 10.0303662902, 10.9876794352, 10.6363162961, 8.0156573956, 8.9621791441, 8.4784193957, 8.9476034038, 8.2461887948, 11.0390151713, 9.4301291724, 9.6618795565, 8.823850281, 9.4264171728, 9.7236112705, 9.9164973037, 9.9211682596, 9.391229065, 10.758435422, 12.5481975737, 10.6888837224, 10.8379730387, 10.9900785111, 11.4125365588, 8.0789866859, 8.2116634085, 8.0244982515, 8.4485952735, 8.4280610828, 11.2866548235, 10.3050665253, 9.0996875174, 8.7035184101, 9.1457307297, 7.0096635839, 7.2289920563, 6.6929337309, 6.8193489848, 8.6987557154, 8.8690318913, 6.7643402734, 6.81758981, 7.1772861503, 8.0186774071, 8.5526741807, 8.1410755969, 8.6613509084, 8.2564238178, 8.0733845476, 8.9952270252, 10.3355292903, 9.0333353653, 10.958433934, 10.5823572827, 11.6841616166, 11.9898571764, 11.6494962887, 12.3336325153, 12.4448063409, 13.096575042, 14.6051710873, 11.7201797558, 15.8223146732, 14.8245156731, 9.9370932998, 9.6715464391, 10.503863346, 10.2151631844, 9.3227605576, 10.8502473083, 12.4470359953, 10.8835624707, 11.8201971326, 12.8572518649, 7.6649981166, 7.5542581823, 6.661661332, 7.5453757669, 7.5780631611, 7.3896221365, 9.8365512872, 7.5058388711, 10.0393285621, 9.8920675348, 8.0966157357, 8.2141033938, 7.802462051, 8.4679078905, 8.3598384242, 9.1693476164, 10.2286119581, 9.2683363638, 11.0539959998, 11.8787354879, 12.9850702136, 12.3614925888, 13.6446278301, 13.5122837315, 13.3890317342, 12.5554827836, 13.5311764711, 13.7359221228, 12.6103995308, 13.7786206443, 10.8124812722, 10.9549809829, 11.5333091999, 11.2766113508, 10.986598538, 11.7932498567, 11.7575602574, 11.373206627, 11.7830730494, 10.8759171652, 9.6962442151, 9.2697537917, 9.210718775, 9.2550032462, 9.6385739944, 10.1566953709, 10.1374840974, 10.4712814249, 10.8552766473, 9.3661435937, 11.4224819582, 10.730761365, 11.1390817593, 10.9907663151, 11.4236754992, 11.5359626308, 11.8215468016, 11.8118659559, 11.4457721983, 11.6901373177, 12.8622987673, 11.4776139782, 12.0949767886, 12.1392730291, 10.6448561211, 11.9091178671, 11.5030292076, 12.2857587916, 13.1936763918, 13.0864582766, 5.7234658637, 5.3360342552, 5.4699102407, 5.9952872862, 6.3058915639, 7.3256362663, 5.4222902694, 6.0994835628, 6.8886180312, 7.6569937544, 12.0004574822, 10.9745512351, 11.2575120018, 11.9733999674, 11.4803349538, 12.2455441247, 11.864404883, 12.3076848668, 11.5709227533, 12.6214066946, 13.0180901552, 11.4097726388, 12.6635574969, 12.2594245925, 12.5511444295, 12.4037628792, 12.603323765, 12.4870950755, 13.4517062897, 13.1599633627, 14.908414596, 14.898896734, 14.6284725969, 13.8532884932, 15.6105987855, 14.9084406508, 15.6538936787, 15.682238021, 15.1277659147, 16.0774459172, 10.2836166136, 11.8255276554, 11.5681968907, 12.279048902, 11.7071560048, 12.1946766477, 12.5870726024, 12.2054461327, 11.9749236944, 11.1650772593, 11.731782463, 11.4553066708, 11.7432422339, 11.2287544976, 11.9184908969, 12.7970392182, 11.8340235799, 12.7587695401, 11.3840674497, 12.0814455146, 10.8713817289, 9.3738445329, 11.2833066677, 11.3510332734, 10.5498711624, 11.488239423, 11.5018093375, 11.7009266345, 11.3383393637, 12.0262966692, 12.8143843167, 11.2446458583, 11.5496351544, 11.034650351, 12.5572286746, 13.330675972, 11.7334382889, 12.3796878159, 12.1779620359, 13.2862931445, 11.8605022665, 11.1805846358, 11.7114169059, 11.4717152383, 11.1414447171, 11.7407062879, 11.6161422026, 11.8173622025, 11.5852272962, 12.3601880635, 10.5771841918, 11.1155658716, 11.0531598009, 11.14051975, 12.245375016, 11.9586237523, 12.6189655973, 12.1420174584, 12.4633970378, 12.1658020806, 9.5706617168, 9.7058884189, 9.553780573, 10.8975216335, 9.8968621917, 9.6325609215, 10.2013347885, 9.5203819228, 10.3553927604, 10.8260501921, 11.9242713389, 12.70505587, 12.2408810813, 13.0790665952, 11.9633048083, 12.3959498299, 12.4312513872, 13.0950270615, 12.6412742134, 12.7420633975, 11.1431819177, 11.3736393712, 10.9333299626, 10.8156637192, 11.1137346528, 11.665178483, 10.9003426748, 12.1005638122, 11.066066233, 11.7769514019, 11.6642828208, 13.1031227419, 12.2211336406, 12.39376727, 12.3679152427, 12.8436369946, 12.0902156317, 12.5148717702, 13.3153579669, 12.3613013188, 11.9796790688, 11.7219307257, 12.0027057337, 11.2073093679, 11.8430838446, 10.9346668133, 11.6689856272, 12.2384665985, 11.2604091982, 12.3180413905, 8.84241736, 8.7163485162, 9.2076016189, 9.6689027369, 9.5962104044, 9.634747919, 9.6997302661, 9.2360473832, 9.8509503571, 9.8300015299, 11.784244303, 12.8001365877, 13.6073506559, 12.9494085401, 12.4466465487, 13.0591650467, 12.6850623683, 13.1855025024, 12.9980247513, 13.4178029236, 11.237249231, 11.0919131891, 11.1318123516, 10.7835664134, 11.3460750968, 11.674032566, 12.6916528327, 11.4979558712, 12.4494995082, 11.9645683648, 12.0493423082, 12.1171940759, 12.4125206245, 11.7597168958, 12.0107679517, 12.4107749764, 13.5555233959, 12.6042632433, 12.010104486, 13.1598389969, 16.0642088197, 16.6039863792, 16.336616596, 15.7914747398, 17.7368041145, 17.1629265498, 16.7910341925, 17.2345751233, 17.035287599, 16.9412207902, 10.322232272, 10.7737815673, 12.030091817, 10.9856987952, 11.246051659, 10.9567501701, 11.8120459614, 11.5404567312, 11.552581688, 11.7623102023, 12.2847553646, 11.9068769488, 12.4802237501, 11.9620328901, 12.0271719578, 12.6132945435, 12.0857912682, 11.8753208707, 12.4310406853, 13.4127362589, 13.1820713209, 13.3310787716, 13.8049463206, 13.8550571271, 14.0105740498, 13.8731883697, 12.9707731803, 13.7719344228, 14.5369475155, 14.5065248498, 12.5043792195, 11.7897336815, 11.7806404741, 12.4844959928, 12.1961186842, 11.8690994905, 12.3328176718, 12.4501707999, 12.8465866532, 12.1049107004, 10.3578128723, 10.3416614034, 10.7094363521, 10.7734242437, 10.6890688779, 11.0411029128, 10.9487957854, 10.8606908741, 10.8712083759, 10.7411704968, 13.7295188631, 12.7335532118, 11.8857396117, 13.4752711661, 12.6061018277, 12.8498299806, 13.8123299604, 14.0355455557, 14.0909493348, 13.5820774575, 12.4278770983, 12.2208825213, 12.1373561712, 13.2364645495, 13.0511688005, 13.5486555786, 13.2717665795, 13.0985727686, 13.9531824531, 13.8886193707, 8.771817542, 9.9301096963, 10.4114255014, 9.746181925, 10.2514205237, 10.1789786736, 9.5992912518, 9.9969367475, 9.713666716, 9.683277209, 9.1400869411, 11.1569279948, 10.446564278, 10.8276233731, 10.4283708279, 10.5962579433, 11.0029984314, 11.0607204803, 10.078636238, 11.4014663403, 13.5803483783, 13.1401628181, 12.9489830388, 15.0450818932, 13.3909970993, 13.569697563, 13.7040057572, 13.7530800112, 14.6883572324, 13.730864121, 13.137306926, 12.7123347779, 12.6197011767, 13.1846291294, 13.116906947, 13.0796111476, 13.0536692697, 13.2013380254, 12.8377700042, 12.5785144013, 12.4222618756, 12.2359464135, 12.9607757119, 12.6840125667, 12.5622028257, 12.5409709303, 13.0917633128, 12.7423574712, 12.7861933184, 12.7877335256, 15.0374146095, 16.387110678, 16.6444963609, 16.7169884298, 16.1560292972, 15.4119759364, 15.0207222101, 17.073923274, 17.0043103114, 16.3487570428, 12.2331535928, 12.7135345001, 12.4191268796, 13.0236003758, 13.8783525272, 14.0333577205, 13.8379026328, 13.4661276686, 12.5501202766, 12.8049896891, 12.2879277608, 12.1096742564, 11.7531820406, 12.6042375152, 12.4085429178, 13.3567916216, 12.7460694057, 12.7887394232, 13.0716614578, 12.3588359364] + }, + "params": { + "pattern": "multi_path_reversible", + "n_groups": 80, + "n_periods": 10, + "seed": 115, + "effects": 3, + "placebo": 2, + "by_path": 3, + "ci_level": 95 + }, + "results": { + "by_path": [ + { + "path": "0,1,1,1", + "frequency_rank": 1, + "horizons": { + "1": { + "effect": 1.8604649879, + "se": 0.11897027614, + "ci_lo": 1.6272875314, + "ci_hi": 2.0936424444, + "n_switchers": 40, + "n_obs": 180 + }, + "2": { + "effect": 1.9354006186, + "se": 0.15166572588, + "ci_lo": 1.6381412582, + "ci_hi": 2.232659979, + "n_switchers": 40, + "n_obs": 145 + }, + "3": { + "effect": 1.8517255016, + "se": 0.13936443967, + "ci_lo": 1.5785762191, + "ci_hi": 2.1248747841, + "n_switchers": 40, + "n_obs": 120 + }, + "-1": { + "effect": -0.3521619408, + "se": 0.2160699505, + "ci_lo": -0.77565126193, + "ci_hi": 0.071327380334, + "n_switchers": 20, + "n_obs": 80 + } + } + }, + { + "path": "0,1,1,0", + "frequency_rank": 2, + "horizons": { + "1": { + "effect": 1.9303917151, + "se": 0.15076599317, + "ci_lo": 1.6348957983, + "ci_hi": 2.2258876318, + "n_switchers": 25, + "n_obs": 105 + }, + "2": { + "effect": 1.6854325961, + "se": 0.16297369726, + "ci_lo": 1.3660100191, + "ci_hi": 2.0048551732, + "n_switchers": 25, + "n_obs": 85 + }, + "3": { + "effect": -0.31907968001, + "se": 0.15633044123, + "ci_lo": -0.62548171452, + "ci_hi": -0.012677645503, + "n_switchers": 25, + "n_obs": 70 + }, + "-1": { + "effect": -0.17337587324, + "se": 0.1358916037, + "ci_lo": -0.43971852229, + "ci_hi": 0.09296677582, + "n_switchers": 25, + "n_obs": 105 + }, + "-2": { + "effect": 0.019470497241, + "se": 0.15692467121, + "ci_lo": -0.28809620662, + "ci_hi": 0.3270372011, + "n_switchers": 25, + "n_obs": 85 + } + } + }, + { + "path": "0,1,0,0", + "frequency_rank": 3, + "horizons": { + "1": { + "effect": 1.4922964207, + "se": 0.32691920294, + "ci_lo": 0.85154655704, + "ci_hi": 2.1330462843, + "n_switchers": 10, + "n_obs": 35 + }, + "2": { + "effect": -0.14167469408, + "se": 0.35902543942, + "ci_lo": -0.84535162488, + "ci_hi": 0.56200223671, + "n_switchers": 10, + "n_obs": 30 + }, + "3": { + "effect": -0.16990752355, + "se": 0.37535869965, + "ci_lo": -0.90559705614, + "ci_hi": 0.56578200905, + "n_switchers": 10, + "n_obs": 30 + }, + "-1": { + "effect": -0.1020816105, + "se": 0.38857615948, + "ci_lo": -0.86367688834, + "ci_hi": 0.65951366734, + "n_switchers": 10, + "n_obs": 35 + }, + "-2": { + "effect": -0.41474344328, + "se": 0.29153036292, + "ci_lo": -0.98613245499, + "ci_hi": 0.15664556844, + "n_switchers": 10, + "n_obs": 30 + } + } + } + ] + } } }, "generator": "generate_reversible_did_data v1", diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index ed73f94b..649a48de 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -413,7 +413,7 @@ class ChaisemartinDHaultfoeuille(ChaisemartinDHaultfoeuilleBootstrapMixin): ``survey_design`` (each combination raises ``NotImplementedError`` in the current release). - Compatible with ``n_bootstrap > 0`` — the top-k paths are + Compatible with ``n_bootstrap > 0`` -- the top-k paths are enumerated once on the observed data (paths held fixed across bootstrap draws, matching R ``did_multiplegt_dyn(..., by_path, bootstrap=B)``) and bootstrap SE / percentile CI / percentile @@ -421,6 +421,16 @@ class ChaisemartinDHaultfoeuille(ChaisemartinDHaultfoeuilleBootstrapMixin): in place of the analytical fields. See REGISTRY.md for the full bootstrap contract. + Compatible with ``placebo=True`` -- when both are active, + per-path backward-horizon placebos ``DID^{pl}_{path, l}`` for + ``l = 1..L_max`` are surfaced on + ``results.path_placebo_event_study[path][-l]`` (negative-int + keys mirroring ``placebo_event_study``). The same per-path SE + convention is applied backward (joiners/leavers IF precedent; + cohort-recentered plug-in with path-specific divisor); the + cross-path cohort-sharing deviation from R is inherited from + the analytical event-study path. + SE convention: per-path IF parallels the joiners / leavers construction — the switcher-side contribution is zeroed for groups not in the selected path, and the cohort structure and @@ -1936,6 +1946,7 @@ def fit( # by_path disaggregation by observed treatment trajectory path_effects: Optional[Dict[Tuple[int, ...], Dict[str, Any]]] = None + path_placebos: Optional[Dict[Tuple[int, ...], Dict[int, Dict[str, Any]]]] = None if ( self.by_path is not None and L_max is not None @@ -2088,6 +2099,31 @@ def fit( "n_obs": pl_data["N_pl_l"], } + # Per-path backward-horizon placebos under by_path. Sibling + # of the per-path event-study computation above; keyed by + # path tuple -> negative-int lag (-l for lag l) to match + # `placebo_event_study`'s convention. Inherits the cross- + # path cohort-sharing SE deviation from R documented for + # `path_effects` (full-panel cohort-centered plug-in vs + # R's per-path re-run). + if self.by_path is not None and self.placebo and multi_horizon_placebos is not None: + _df_s_bp_pl = _effective_df_survey(resolved_survey, _replicate_n_valid_list) + path_placebos = _compute_path_placebos( + D_mat=D_mat, + Y_mat=Y_mat, + N_mat=N_mat, + baselines=baselines, + first_switch_idx=first_switch_idx_arr, + switch_direction=switch_direction_arr, + T_g=T_g_arr, + L_max=L_max, + by_path=self.by_path, + eligible_mask_var=eligible_mask_var, + multi_horizon_placebos=multi_horizon_placebos, + alpha=self.alpha, + df_inference=_inference_df(_df_s_bp_pl, resolved_survey), + ) + # Normalized effects DID^n_l (suppressed under trends_linear # because event_study_effects holds second-differences DID^{fd}_l, # not level effects - normalizing second-differences is wrong) @@ -2631,6 +2667,36 @@ def fit( path_effects=path_effects, ) + # Sibling collector for per-path backward placebos. Mirrors + # the path_bootstrap_inputs gating: only invoke when by_path + # + placebo are both active, multi_horizon_placebos is + # populated, and analytical path_placebos returned a non- + # empty dict. + path_placebo_bootstrap_inputs = None + if ( + self.by_path is not None + and self.placebo + and L_max is not None + and L_max >= 1 + and multi_horizon_placebos is not None + and path_placebos is not None + and len(path_placebos) > 0 + ): + path_placebo_bootstrap_inputs = _collect_path_placebo_bootstrap_inputs( + D_mat=D_mat, + Y_mat=Y_mat, + N_mat=N_mat, + baselines=baselines, + first_switch_idx=first_switch_idx_arr, + switch_direction=switch_direction_arr, + T_g=T_g_arr, + L_max=L_max, + by_path=self.by_path, + eligible_mask_var=eligible_mask_var, + multi_horizon_placebos=multi_horizon_placebos, + path_placebos=path_placebos, + ) + br = self._compute_dcdh_bootstrap( n_groups_for_overall=n_groups_for_overall_var, u_centered_overall=U_centered_overall, @@ -2642,6 +2708,7 @@ def fit( multi_horizon_inputs=mh_boot_inputs, placebo_horizon_inputs=pl_boot_inputs, path_bootstrap_inputs=path_bootstrap_inputs, + path_placebo_bootstrap_inputs=path_placebo_bootstrap_inputs, group_id_to_psu_code=group_id_to_psu_code_bootstrap, eligible_group_ids=eligible_group_ids_bootstrap, u_per_period_overall=U_centered_pp_overall, @@ -2859,17 +2926,66 @@ def fit( path_effects[path_key]["horizons"][l_h]["conf_int"] = ( bs_ci if bs_ci is not None else (np.nan, np.nan) ) - path_effects[path_key]["horizons"][l_h]["t_stat"] = ( - safe_inference(eff_p, bs_se, alpha=self.alpha, df=None)[0] - ) + path_effects[path_key]["horizons"][l_h]["t_stat"] = safe_inference( + eff_p, bs_se, alpha=self.alpha, df=None + )[0] else: path_effects[path_key]["horizons"][l_h]["se"] = np.nan path_effects[path_key]["horizons"][l_h]["p_value"] = np.nan path_effects[path_key]["horizons"][l_h]["conf_int"] = ( - np.nan, np.nan, + np.nan, + np.nan, ) path_effects[path_key]["horizons"][l_h]["t_stat"] = np.nan + # Phase 3: propagate bootstrap results to per-path placebos + # (by_path + placebo). Sibling of the path_effects propagation + # block above. Library-wide NaN-on-invalid bootstrap contract: + # non-finite bootstrap SE writes NaN to the full inference + # tuple rather than falling back to the analytical SE -- the + # caller opted into bootstrap by setting n_bootstrap > 0, and + # mixing analytical + bootstrap semantics inside one result + # object is a public-surface inconsistency. + if ( + bootstrap_results is not None + and bootstrap_results.path_placebo_ses + and path_placebos is not None + ): + for path_key, lag_ses in bootstrap_results.path_placebo_ses.items(): + if path_key not in path_placebos: + continue + for lag_l, bs_se_pl in lag_ses.items(): + neg_key = -lag_l + if neg_key not in path_placebos[path_key]: + continue + bs_ci_pl = ( + bootstrap_results.path_placebo_cis.get(path_key, {}).get(lag_l) + if bootstrap_results.path_placebo_cis + else None + ) + bs_p_pl = ( + bootstrap_results.path_placebo_p_values.get(path_key, {}).get(lag_l) + if bootstrap_results.path_placebo_p_values + else None + ) + eff_pl = path_placebos[path_key][neg_key]["effect"] + if bs_se_pl is not None and np.isfinite(bs_se_pl): + path_placebos[path_key][neg_key]["se"] = bs_se_pl + path_placebos[path_key][neg_key]["p_value"] = ( + bs_p_pl if bs_p_pl is not None else np.nan + ) + path_placebos[path_key][neg_key]["conf_int"] = ( + bs_ci_pl if bs_ci_pl is not None else (np.nan, np.nan) + ) + path_placebos[path_key][neg_key]["t_stat"] = safe_inference( + eff_pl, bs_se_pl, alpha=self.alpha, df=None + )[0] + else: + path_placebos[path_key][neg_key]["se"] = np.nan + path_placebos[path_key][neg_key]["p_value"] = np.nan + path_placebos[path_key][neg_key]["conf_int"] = (np.nan, np.nan) + path_placebos[path_key][neg_key]["t_stat"] = np.nan + # When L_max >= 1 and the per-group path is active, sync # overall_* from event_study_effects[1] AFTER bootstrap propagation # so that bootstrap SE/p/CI flow to the top-level surface. @@ -3065,7 +3181,8 @@ def fit( placebo_event_study_dict[neg_key]["se"] = np.nan placebo_event_study_dict[neg_key]["p_value"] = np.nan placebo_event_study_dict[neg_key]["conf_int"] = ( - np.nan, np.nan, + np.nan, + np.nan, ) placebo_event_study_dict[neg_key]["t_stat"] = np.nan @@ -3500,6 +3617,7 @@ def fit( else None ), path_effects=path_effects, + path_placebo_event_study=path_placebos, survey_metadata=survey_metadata, _estimator_ref=self, ) @@ -5287,6 +5405,179 @@ def _compute_path_effects( return path_effects +def _compute_path_placebos( + D_mat: np.ndarray, + Y_mat: np.ndarray, + N_mat: np.ndarray, + baselines: np.ndarray, + first_switch_idx: np.ndarray, + switch_direction: np.ndarray, + T_g: np.ndarray, + L_max: int, + by_path: int, + eligible_mask_var: np.ndarray, + multi_horizon_placebos: Dict[int, Dict[str, Any]], + alpha: float, + df_inference: Optional[int] = None, +) -> Optional[Dict[Tuple[int, ...], Dict[int, Dict[str, Any]]]]: + """ + Compute per-path backward-horizon placebos ``DID^{pl}_{path, l}``. + + Sibling of ``_compute_path_effects``: walks the same path enumeration + and cohort-id pipeline but loops over backward horizons (lag + ``l = 1..L_max``) using ``_compute_per_group_if_placebo_horizon`` + with the new ``switcher_subset_mask`` parameter to zero out switcher + contributions for groups not in the selected path. SE is the + cohort-recentered plug-in with path-specific divisor + ``N^{pl}_{l, path}`` (joiners/leavers IF precedent applied backward). + + Inner-dict keys are **negative** ints (-l for lag l) to match the + overall ``placebo_event_study`` convention, so a unified + ``{**path_effects[p]["horizons"], **path_placebo_event_study[p]}`` + view is well-formed. + + Returns ``{path: {-l: {effect, se, t_stat, p_value, conf_int, + n_obs}}}`` directly (no ``n_groups`` / ``frequency_rank`` wrapper — + those are already on ``path_effects[path]``; the rendering layer + sorts by that rank). Returns ``{}`` when ``by_path`` was requested + but no path has a complete window (mirrors ``_compute_path_effects``); + the empty dict is the "requested but empty" sentinel distinct from + ``None``. + + Inherits the cross-path cohort-sharing SE deviation from R that + PR #360 documented for ``_compute_path_effects`` (full-panel + cohort-centered plug-in vs R's per-path re-run): tracks R within + numerical tolerance on single-path cohort panels; diverges on + cohort-mixed panels. See ``Note (Phase 3 by_path ...)`` in + ``docs/methodology/REGISTRY.md``. + + The ``_enumerate_treatment_paths`` call here is wrapped in + ``warnings.catch_warnings`` to suppress the overflow ``UserWarning`` + duplicate — the analytical event-study pass + (``_compute_path_effects``) has already surfaced that warning to + the caller. + """ + from diff_diff.utils import safe_inference + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + selected_paths, path_to_group_mask, _ = _enumerate_treatment_paths( + D_mat=D_mat, + first_switch_idx=first_switch_idx, + N_mat=N_mat, + L_max=L_max, + by_path=by_path, + ) + + if not selected_paths: + return {} + + n_groups = D_mat.shape[0] + cohort_keys = [ + ( + float(baselines[g]), + int(first_switch_idx[g]), + int(switch_direction[g]), + ) + for g in range(n_groups) + ] + unique_c: Dict[Tuple[float, int, int], int] = {} + cid = np.zeros(n_groups, dtype=int) + for g in range(n_groups): + if not eligible_mask_var[g]: + cid[g] = -1 + continue + key = cohort_keys[g] + if key not in unique_c: + unique_c[key] = len(unique_c) + cid[g] = unique_c[key] + cohort_id_eligible = cid[eligible_mask_var] + + path_placebos: Dict[Tuple[int, ...], Dict[int, Dict[str, Any]]] = {} + + for path in selected_paths: + switcher_mask = path_to_group_mask[path] + + per_path_pl_if = _compute_per_group_if_placebo_horizon( + D_mat=D_mat, + Y_mat=Y_mat, + N_mat=N_mat, + baselines=baselines, + first_switch_idx=first_switch_idx, + switch_direction=switch_direction, + T_g=T_g, + L_max=L_max, + set_ids=None, + compute_per_period=False, + switcher_subset_mask=switcher_mask, + ) + + horizons: Dict[int, Dict[str, Any]] = {} + for lag_l in range(1, L_max + 1): + U_pl_l_path, _ = per_path_pl_if[lag_l] + + pl_data = multi_horizon_placebos.get(lag_l) + if pl_data is None: + n_pl_l_path = 0 + else: + eligible_mask_pl = pl_data.get("eligible_mask") + if eligible_mask_pl is None: + n_pl_l_path = 0 + else: + n_pl_l_path = int(np.sum(switcher_mask & eligible_mask_pl)) + + if n_pl_l_path == 0: + horizons[-lag_l] = { + "effect": float("nan"), + "se": float("nan"), + "t_stat": float("nan"), + "p_value": float("nan"), + "conf_int": (float("nan"), float("nan")), + "n_obs": 0, + } + continue + + U_pl_l_path_elig = U_pl_l_path[eligible_mask_var] + effect_pl_path = float(U_pl_l_path.sum() / n_pl_l_path) + + U_centered_pl_path = _cohort_recenter(U_pl_l_path_elig, cohort_id_eligible) + se_pl_path = _plugin_se(U_centered=U_centered_pl_path, divisor=n_pl_l_path) + + if np.isnan(se_pl_path) and U_centered_pl_path.size > 0 and n_pl_l_path > 0: + warnings.warn( + f"Cohort-recentered analytical variance is " + f"unidentified for path={path} at placebo lag " + f"l={lag_l}: the path-subset centered placebo " + f"influence function is identically zero (every " + f"variance-eligible path switcher forms its own " + f"(D_{{g,1}}, F_g, S_g) cohort, or the path has a " + f"single contributing group). DID^{{pl}}_{{path,l}} " + f"point estimate is still valid; SE / t_stat / " + f"p_value / conf_int are NaN-consistent. Rare paths " + f"with few contributing groups routinely hit this " + f"case at placebo horizons.", + UserWarning, + stacklevel=2, + ) + + t_pl, p_pl, ci_pl = safe_inference( + effect_pl_path, se_pl_path, alpha=alpha, df=df_inference + ) + + horizons[-lag_l] = { + "effect": effect_pl_path, + "se": se_pl_path, + "t_stat": t_pl, + "p_value": p_pl, + "conf_int": ci_pl, + "n_obs": n_pl_l_path, + } + + path_placebos[path] = horizons + + return path_placebos + + def _collect_path_bootstrap_inputs( D_mat: np.ndarray, Y_mat: np.ndarray, @@ -5364,9 +5655,9 @@ def _collect_path_bootstrap_inputs( cid[g] = unique_c[key] cohort_id_eligible = cid[eligible_mask_var] - path_bootstrap_inputs: Dict[ - Tuple[int, ...], Dict[int, Tuple[np.ndarray, int, float, None]] - ] = {} + path_bootstrap_inputs: Dict[Tuple[int, ...], Dict[int, Tuple[np.ndarray, int, float, None]]] = ( + {} + ) for path in selected_paths: switcher_mask = path_to_group_mask[path] @@ -5402,9 +5693,7 @@ def _collect_path_bootstrap_inputs( U_l_path_elig = U_l_path[eligible_mask_var] U_centered_path = _cohort_recenter(U_l_path_elig, cohort_id_eligible) - effect_path = float( - path_analytical["horizons"][l_h]["effect"] - ) + effect_path = float(path_analytical["horizons"][l_h]["effect"]) horizon_inputs[l_h] = (U_centered_path, n_l_path, effect_path, None) if horizon_inputs: @@ -5413,6 +5702,133 @@ def _collect_path_bootstrap_inputs( return path_bootstrap_inputs +def _collect_path_placebo_bootstrap_inputs( + D_mat: np.ndarray, + Y_mat: np.ndarray, + N_mat: np.ndarray, + baselines: np.ndarray, + first_switch_idx: np.ndarray, + switch_direction: np.ndarray, + T_g: np.ndarray, + L_max: int, + by_path: int, + eligible_mask_var: np.ndarray, + multi_horizon_placebos: Dict[int, Dict[str, Any]], + path_placebos: Dict[Tuple[int, ...], Dict[int, Dict[str, Any]]], +) -> Dict[Tuple[int, ...], Dict[int, Tuple[np.ndarray, int, float, None]]]: + """ + Collect per-(path, lag) inputs for the placebo bootstrap mixin + dispatch. + + Sibling of ``_collect_path_bootstrap_inputs``. Walks the same path + enumeration / per-path placebo IF / cohort-recentering pipeline + that ``_compute_path_placebos`` uses, but returns the + ``(U_centered_path, n_pl_l_path, effect_pl_path)`` triples needed + by ``_compute_dcdh_bootstrap``'s per-`(path, lag_l)` placebo + dispatch block. + + Returned dict keys lag by **positive** int (l = 1..L_max), matching + the inner-key convention of ``placebo_horizon_inputs`` already + consumed by the bootstrap mixin. The propagation block in + ``fit()`` translates back to negative-keyed + ``path_placebo_event_study[path][-lag_l]`` post-bootstrap. + + The point estimate per ``(path, lag_l)`` is read from + ``path_placebos[path]["horizons"][-lag_l]["effect"]`` to stay + bit-identical with the analytical pass; the bootstrap distribution + gets centered on this value by ``_bootstrap_one_target`` downstream. + + The ``warnings.catch_warnings`` block suppresses the + re-enumeration overflow ``UserWarning``; the analytical + event-study pass (``_compute_path_effects``) already surfaced that + warning. + """ + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + selected_paths, path_to_group_mask, _ = _enumerate_treatment_paths( + D_mat=D_mat, + first_switch_idx=first_switch_idx, + N_mat=N_mat, + L_max=L_max, + by_path=by_path, + ) + + n_groups = D_mat.shape[0] + cohort_keys = [ + ( + float(baselines[g]), + int(first_switch_idx[g]), + int(switch_direction[g]), + ) + for g in range(n_groups) + ] + unique_c: Dict[Tuple[float, int, int], int] = {} + cid = np.zeros(n_groups, dtype=int) + for g in range(n_groups): + if not eligible_mask_var[g]: + cid[g] = -1 + continue + key = cohort_keys[g] + if key not in unique_c: + unique_c[key] = len(unique_c) + cid[g] = unique_c[key] + cohort_id_eligible = cid[eligible_mask_var] + + path_placebo_bootstrap_inputs: Dict[ + Tuple[int, ...], Dict[int, Tuple[np.ndarray, int, float, None]] + ] = {} + + for path in selected_paths: + switcher_mask = path_to_group_mask[path] + + per_path_pl_if = _compute_per_group_if_placebo_horizon( + D_mat=D_mat, + Y_mat=Y_mat, + N_mat=N_mat, + baselines=baselines, + first_switch_idx=first_switch_idx, + switch_direction=switch_direction, + T_g=T_g, + L_max=L_max, + set_ids=None, + compute_per_period=False, + switcher_subset_mask=switcher_mask, + ) + + horizon_inputs: Dict[int, Tuple[np.ndarray, int, float, None]] = {} + path_analytical = path_placebos.get(path) + if path_analytical is None: + continue + + for lag_l in range(1, L_max + 1): + U_pl_l_path, _ = per_path_pl_if[lag_l] + pl_data = multi_horizon_placebos.get(lag_l) + if pl_data is None: + continue + eligible_mask_pl = pl_data.get("eligible_mask") + if eligible_mask_pl is None: + continue + n_pl_l_path = int(np.sum(switcher_mask & eligible_mask_pl)) + if n_pl_l_path == 0: + continue + + U_pl_l_path_elig = U_pl_l_path[eligible_mask_var] + U_centered_pl_path = _cohort_recenter(U_pl_l_path_elig, cohort_id_eligible) + + effect_pl_path = float(path_analytical[-lag_l]["effect"]) + horizon_inputs[lag_l] = ( + U_centered_pl_path, + n_pl_l_path, + effect_pl_path, + None, + ) + + if horizon_inputs: + path_placebo_bootstrap_inputs[path] = horizon_inputs + + return path_placebo_bootstrap_inputs + + def _compute_per_group_if_placebo_horizon( D_mat: np.ndarray, Y_mat: np.ndarray, @@ -5424,6 +5840,7 @@ def _compute_per_group_if_placebo_horizon( L_max: int, set_ids: Optional[np.ndarray] = None, compute_per_period: bool = True, + switcher_subset_mask: Optional[np.ndarray] = None, ) -> Dict[int, Tuple[np.ndarray, Optional[np.ndarray]]]: """ Compute per-group influence function for placebo horizons. @@ -5439,6 +5856,18 @@ def _compute_per_group_if_placebo_horizon( observation at ``ref_idx``, ``backward_idx``, AND ``forward_idx`` (the terminal-missingness guard from Phase 2 Round 9). + Parameters + ---------- + switcher_subset_mask : np.ndarray of bool, shape (n_groups,), optional + When supplied, restricts the switcher iteration to groups where + the mask is ``True``. Groups outside the subset contribute as + controls only (their switcher-side contribution is skipped). The + control pool is unchanged. Mirrors the same parameter on + ``_compute_per_group_if_multi_horizon`` and is used by + ``by_path`` placebos to zero out switcher contributions for + groups not in the selected path. Default ``None`` preserves the + legacy behavior of iterating over all switchers. + Returns ------- dict mapping lag l (positive int) -> (U_pl_l, U_per_period_pl_l) tuple @@ -5471,6 +5900,8 @@ def _compute_per_group_if_placebo_horizon( for g in range(n_groups): if not is_switcher[g]: continue + if switcher_subset_mask is not None and not switcher_subset_mask[g]: + continue f_g = first_switch_idx[g] ref_idx = f_g - 1 backward_idx = ref_idx - l diff --git a/diff_diff/chaisemartin_dhaultfoeuille_bootstrap.py b/diff_diff/chaisemartin_dhaultfoeuille_bootstrap.py index e10ae3fb..5047cd64 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_bootstrap.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_bootstrap.py @@ -94,15 +94,9 @@ def _compute_dcdh_bootstrap( u_centered_overall: np.ndarray, divisor_overall: int, original_overall: float, - joiners_inputs: Optional[ - Tuple[np.ndarray, int, float, Optional[np.ndarray]] - ] = None, - leavers_inputs: Optional[ - Tuple[np.ndarray, int, float, Optional[np.ndarray]] - ] = None, - placebo_inputs: Optional[ - Tuple[np.ndarray, int, float, Optional[np.ndarray]] - ] = None, + joiners_inputs: Optional[Tuple[np.ndarray, int, float, Optional[np.ndarray]]] = None, + leavers_inputs: Optional[Tuple[np.ndarray, int, float, Optional[np.ndarray]]] = None, + placebo_inputs: Optional[Tuple[np.ndarray, int, float, Optional[np.ndarray]]] = None, # --- Phase 2: multi-horizon inputs --- multi_horizon_inputs: Optional[ Dict[int, Tuple[np.ndarray, int, float, Optional[np.ndarray]]] @@ -123,6 +117,19 @@ def _compute_dcdh_bootstrap( Dict[int, Tuple[np.ndarray, int, float, Optional[np.ndarray]]], ] ] = None, + # --- Phase 3: per-path placebo (by_path + placebo) bootstrap inputs --- + # Nested dict keyed by path tuple -> positive lag l (l = 1..L_max) + # -> 4-tuple `(u_centered_pl_path, n_pl_l_path, effect_pl_path, + # None)`. Sibling of `path_bootstrap_inputs` for backward + # placebo horizons; same reason for the 4th-slot `None` + # (survey + by_path + placebo + bootstrap is gated out, no + # cell-level path needed). + path_placebo_bootstrap_inputs: Optional[ + Dict[ + Tuple[int, ...], + Dict[int, Tuple[np.ndarray, int, float, Optional[np.ndarray]]], + ] + ] = None, # --- Survey: PSU-level bootstrap under survey designs --- group_id_to_psu_code: Optional[Dict[Any, int]] = None, eligible_group_ids: Optional[np.ndarray] = None, @@ -333,7 +340,8 @@ def _compute_dcdh_bootstrap( "analytical TSL path." ) u_boot_overall, map_boot_overall = _unroll_target_to_cells( - u_per_period_overall, psu_codes_per_cell, + u_per_period_overall, + psu_codes_per_cell, ) else: u_boot_overall = u_centered_overall @@ -382,12 +390,15 @@ def _compute_dcdh_bootstrap( "when PSU varies within group, got None." ) u_boot_j, map_boot_j = _unroll_target_to_cells( - u_pp_j, psu_codes_per_cell, + u_pp_j, + psu_codes_per_cell, ) else: u_boot_j = u_j map_boot_j = _map_for_target( - u_j.size, group_id_to_psu_code, eligible_group_ids, + u_j.size, + group_id_to_psu_code, + eligible_group_ids, ) se_j, ci_j, p_j, _ = _bootstrap_one_target( u_centered=u_boot_j, @@ -417,12 +428,15 @@ def _compute_dcdh_bootstrap( "when PSU varies within group, got None." ) u_boot_l, map_boot_l = _unroll_target_to_cells( - u_pp_l, psu_codes_per_cell, + u_pp_l, + psu_codes_per_cell, ) else: u_boot_l = u_l map_boot_l = _map_for_target( - u_l.size, group_id_to_psu_code, eligible_group_ids, + u_l.size, + group_id_to_psu_code, + eligible_group_ids, ) se_l, ci_l, p_l, _ = _bootstrap_one_target( u_centered=u_boot_l, @@ -457,7 +471,9 @@ def _compute_dcdh_bootstrap( context="dCDH placebo DID_M^pl bootstrap", return_distribution=False, group_to_psu_map=_map_for_target( - u_pl.size, group_id_to_psu_code, eligible_group_ids, + u_pl.size, + group_id_to_psu_code, + eligible_group_ids, ), ) results.placebo_se = se_pl @@ -511,7 +527,9 @@ def _compute_dcdh_bootstrap( weight_type=self.bootstrap_weights, rng=rng, group_to_psu_map=_map_for_target( - n_groups_mh, group_id_to_psu_code, eligible_group_ids, + n_groups_mh, + group_id_to_psu_code, + eligible_group_ids, ), ) shared_psu_weights = None @@ -552,7 +570,8 @@ def _compute_dcdh_bootstrap( # Cell-level: unroll this horizon's cells and # broadcast the shared PSU weights. u_cell_h, psu_cell_h = _unroll_target_to_cells( - u_pp_h, psu_codes_per_cell, + u_pp_h, + psu_codes_per_cell, ) if u_cell_h.size == 0: continue @@ -616,14 +635,17 @@ def _compute_dcdh_bootstrap( f"varies within group, got None." ) u_boot_plh, map_boot_plh = _unroll_target_to_cells( - u_pp_h, psu_codes_per_cell, + u_pp_h, + psu_codes_per_cell, ) if u_boot_plh.size == 0: continue else: u_boot_plh = u_h map_boot_plh = _map_for_target( - u_h.size, group_id_to_psu_code, eligible_group_ids, + u_h.size, + group_id_to_psu_code, + eligible_group_ids, ) se_h, ci_h, p_h, _ = _bootstrap_one_target( u_centered=u_boot_plh, @@ -666,7 +688,9 @@ def _compute_dcdh_bootstrap( for l_h, (u_h, n_h, eff_h, _u_pp_h) in sorted(horizon_inputs.items()): if u_h.size > 0 and n_h > 0: map_boot_ph = _map_for_target( - u_h.size, group_id_to_psu_code, eligible_group_ids, + u_h.size, + group_id_to_psu_code, + eligible_group_ids, ) # np.errstate wrap: an identically-zero # centered IF (degenerate path + horizon) would @@ -702,6 +726,58 @@ def _compute_dcdh_bootstrap( results.path_cis = path_cis results.path_p_values = path_pvals + # --- Phase 3: Per-path placebo (by_path + placebo) bootstrap --- + # Sibling of the per-path event-study block above for backward + # placebo lags. Same independent single-target dispatch per + # (path, lag_l) via `_bootstrap_one_target`; the survey cell- + # level path is unreachable here because + # `by_path + survey_design` is gated out in fit() before + # bootstrap is invoked. The `np.errstate` wrap mirrors the + # event-study block's degenerate-IF stacked-warning suppression. + if path_placebo_bootstrap_inputs is not None: + path_pl_ses: Dict[Tuple[int, ...], Dict[int, float]] = {} + path_pl_cis: Dict[Tuple[int, ...], Dict[int, Tuple[float, float]]] = {} + path_pl_pvals: Dict[Tuple[int, ...], Dict[int, float]] = {} + + for path_key, horizon_inputs in path_placebo_bootstrap_inputs.items(): + path_pl_ses[path_key] = {} + path_pl_cis[path_key] = {} + path_pl_pvals[path_key] = {} + for lag_l, (u_pl, n_pl, eff_pl, _u_pp_pl) in sorted(horizon_inputs.items()): + if u_pl.size > 0 and n_pl > 0: + map_boot_pl_path = _map_for_target( + u_pl.size, + group_id_to_psu_code, + eligible_group_ids, + ) + with np.errstate(invalid="ignore", divide="ignore"): + ( + se_pl_h, + ci_pl_h, + p_pl_h, + _, + ) = _bootstrap_one_target( + u_centered=u_pl, + divisor=n_pl, + original=eff_pl, + n_bootstrap=self.n_bootstrap, + weight_type=self.bootstrap_weights, + alpha=self.alpha, + rng=rng, + context=( + f"dCDH by_path placebo " f"path={path_key} l={lag_l} bootstrap" + ), + return_distribution=False, + group_to_psu_map=map_boot_pl_path, + ) + path_pl_ses[path_key][lag_l] = se_pl_h + path_pl_cis[path_key][lag_l] = ci_pl_h + path_pl_pvals[path_key][lag_l] = p_pl_h + + results.path_placebo_ses = path_pl_ses + results.path_placebo_cis = path_pl_cis + results.path_placebo_p_values = path_pl_pvals + return results @@ -794,9 +870,7 @@ def _unroll_target_to_cells( # that column). Dropping that mass silently would under-cluster the # bootstrap in a supported panel regime. sentinel_mass = flat_u[~mask] - if sentinel_mass.size > 0 and bool( - np.any(np.abs(sentinel_mass) > 1e-12) - ): + if sentinel_mass.size > 0 and bool(np.any(np.abs(sentinel_mass) > 1e-12)): raise ValueError( "Cell-level bootstrap cannot be computed on this survey " "panel: cohort-recentered IF mass landed on cells with " @@ -1011,5 +1085,3 @@ def _bootstrap_one_target( ) return se, ci, p_value, (boot_dist if return_distribution else None) - - diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index e17ef944..df6bac8c 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -136,9 +136,7 @@ class DCDHBootstrapResults: # CI + percentile p-value per library Round-10 convention; caller # (fit()) propagates these to path_effects[path]["horizons"][l] # directly and computes a SE-derived t-stat via `safe_inference`. - path_ses: Optional[Dict[Tuple[int, ...], Dict[int, float]]] = field( - default=None, repr=False - ) + path_ses: Optional[Dict[Tuple[int, ...], Dict[int, float]]] = field(default=None, repr=False) path_cis: Optional[Dict[Tuple[int, ...], Dict[int, Tuple[float, float]]]] = field( default=None, repr=False ) @@ -146,6 +144,23 @@ class DCDHBootstrapResults: default=None, repr=False ) + # --- Phase 3: per-path placebo bootstrap (by_path + placebo) --- + # Same shape and library convention as path_ses / path_cis / + # path_p_values, but for backward placebo lags (l = 1..L_max). Keyed + # by **positive** int internally; the propagation block in fit() + # writes them to path_placebo_event_study[path][-l] (negative key) + # to match the placebo_event_study convention. Populated only when + # by_path + placebo + n_bootstrap > 0 is active; `None` otherwise. + path_placebo_ses: Optional[Dict[Tuple[int, ...], Dict[int, float]]] = field( + default=None, repr=False + ) + path_placebo_cis: Optional[Dict[Tuple[int, ...], Dict[int, Tuple[float, float]]]] = field( + default=None, repr=False + ) + path_placebo_p_values: Optional[Dict[Tuple[int, ...], Dict[int, float]]] = field( + default=None, repr=False + ) + @dataclass class ChaisemartinDHaultfoeuilleResults: @@ -462,6 +477,14 @@ class ChaisemartinDHaultfoeuilleResults: heterogeneity_effects: Optional[Dict[int, Dict[str, Any]]] = field(default=None, repr=False) design2_effects: Optional[Dict[str, Any]] = field(default=None, repr=False) path_effects: Optional[Dict[Tuple[int, ...], Dict[str, Any]]] = field(default=None, repr=False) + # Per-path backward-horizon placebos. Inner dict keys are NEGATIVE + # ints (-l for lag l) to match `placebo_event_study`'s convention, + # so a unified `{**path_effects[p]["horizons"], + # **path_placebo_event_study[p]}` view is well-formed across both + # forward and backward horizons within a single path. + path_placebo_event_study: Optional[Dict[Tuple[int, ...], Dict[int, Dict[str, Any]]]] = field( + default=None, repr=False + ) honest_did_results: Optional["HonestDiDResults"] = field(default=None, repr=False) # --- Repr-suppressed metadata --- @@ -721,26 +744,24 @@ def summary(self, alpha: Optional[float] = None) -> str: # broader predicate, the footer would falsely claim "produced # non-finite SE on every target" while a finite per-path # bootstrap SE sits in the rendered output below. - event_study_has_finite_bootstrap_se = ( - self.event_study_effects is not None - and any( - np.isfinite(entry.get("se", np.nan)) - for entry in self.event_study_effects.values() - ) + event_study_has_finite_bootstrap_se = self.event_study_effects is not None and any( + np.isfinite(entry.get("se", np.nan)) for entry in self.event_study_effects.values() ) - joiners_has_finite_bootstrap_se = ( - self.joiners_se is not None and np.isfinite(self.joiners_se) + joiners_has_finite_bootstrap_se = self.joiners_se is not None and np.isfinite( + self.joiners_se ) - leavers_has_finite_bootstrap_se = ( - self.leavers_se is not None and np.isfinite(self.leavers_se) + leavers_has_finite_bootstrap_se = self.leavers_se is not None and np.isfinite( + self.leavers_se ) - path_effects_has_finite_bootstrap_se = ( - self.path_effects is not None - and any( - np.isfinite(h.get("se", np.nan)) - for entry in self.path_effects.values() - for h in entry.get("horizons", {}).values() - ) + path_effects_has_finite_bootstrap_se = self.path_effects is not None and any( + np.isfinite(h.get("se", np.nan)) + for entry in self.path_effects.values() + for h in entry.get("horizons", {}).values() + ) + path_placebo_has_finite_bootstrap_se = self.path_placebo_event_study is not None and any( + np.isfinite(h.get("se", np.nan)) + for entry in self.path_placebo_event_study.values() + for h in entry.values() ) any_finite_bootstrap_inference = ( np.isfinite(self.overall_se) @@ -748,6 +769,7 @@ def summary(self, alpha: Optional[float] = None) -> str: or joiners_has_finite_bootstrap_se or leavers_has_finite_bootstrap_se or path_effects_has_finite_bootstrap_se + or path_placebo_has_finite_bootstrap_se ) if self.bootstrap_results is not None and np.isfinite(self.overall_se) and not is_delta: lines.append("Note: p-value and CI are multiplier-bootstrap percentile inference") @@ -756,9 +778,7 @@ def summary(self, alpha: Optional[float] = None) -> str: f"{self.bootstrap_results.weight_type} weights)." ) elif ( - self.bootstrap_results is not None - and is_delta - and event_study_has_finite_bootstrap_se + self.bootstrap_results is not None and is_delta and event_study_has_finite_bootstrap_se ): lines.append( f"Note: delta SE is delta-method (normal-theory) from per-horizon " @@ -781,6 +801,8 @@ def summary(self, alpha: Optional[float] = None) -> str: live_targets.append("leavers") if path_effects_has_finite_bootstrap_se: live_targets.append("per-path") + if path_placebo_has_finite_bootstrap_se: + live_targets.append("per-path placebo") lines.append( f"Note: bootstrap ({self.bootstrap_results.n_bootstrap} iterations) " f"produced non-finite SE on the overall/event-study target; " @@ -1146,6 +1168,26 @@ def _render_path_effects_section( ] ) horizons = entry.get("horizons", {}) + # Backward placebo lags first (negative-keyed), then + # positive event-study horizons. Skips silently when + # path_placebo_event_study is None or this path lacks an + # entry. + placebo_horizons = ( + self.path_placebo_event_study.get(path, {}) + if self.path_placebo_event_study is not None + else {} + ) + for lag_key in sorted(placebo_horizons.keys()): + ph = placebo_horizons[lag_key] + lines.append( + _format_inference_row( + f" l={lag_key}", + ph["effect"], + ph["se"], + ph["t_stat"], + ph["p_value"], + ) + ) for l_h in sorted(horizons.keys()): h = horizons[l_h] lines.append( @@ -1503,6 +1545,33 @@ def to_dataframe(self, level: str = "overall") -> pd.DataFrame: rank = entry["frequency_rank"] n_groups = entry["n_groups"] horizons = entry.get("horizons", {}) + # Backward placebo lags first (negative-keyed), then + # positive event-study horizons. Both placebo and + # event-study rows are emitted in a single + # `level="by_path"` table so callers see the full + # forward+backward inference per path. + placebo_horizons = ( + self.path_placebo_event_study.get(path, {}) + if self.path_placebo_event_study is not None + else {} + ) + for lag_key in sorted(placebo_horizons.keys()): + ph_entry = placebo_horizons[lag_key] + rows.append( + { + "path": path, + "frequency_rank": rank, + "n_groups": n_groups, + "horizon": lag_key, + "effect": ph_entry["effect"], + "se": ph_entry["se"], + "t_stat": ph_entry["t_stat"], + "p_value": ph_entry["p_value"], + "conf_int_lower": ph_entry["conf_int"][0], + "conf_int_upper": ph_entry["conf_int"][1], + "n_obs": ph_entry["n_obs"], + } + ) for l_h in sorted(horizons.keys()): h_entry = horizons[l_h] rows.append( diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index fcc56f0b..545ef634 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -638,7 +638,7 @@ The guard is fired by `_survey_se_from_group_if` (analytical and replicate) and - **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. **Always uses raw (unadjusted) outcomes** regardless of active `controls`, `trends_linear`, or `trends_nonparam` options - those adjustments apply to the main estimator surface but not to the Design-2 descriptive block. For full adjusted Design-2 estimation with proper control pools, the paper recommends "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups. -- **Note (Phase 3 `by_path` per-path event-study disaggregation):** Per-path disaggregation of the multi-horizon event study, mirroring R `did_multiplegt_dyn(..., by_path=k)`. Activated via `ChaisemartinDHaultfoeuille(by_path=k, drop_larger_lower=False)` where `k` is a positive integer (top-k most common observed paths by switcher-group frequency). **Window convention:** the path tuple for a switcher group `g` is `(D_{g, F_g-1}, D_{g, F_g}, ..., D_{g, F_g-1+L_max})` — length `L_max + 1`, matching R's window `[F_{g-1}, F_{g-1+l}]`. **Ranking:** paths are ranked by descending frequency; ties are broken lexicographically on the path tuple for deterministic ordering, so every selected path has a unique `frequency_rank`. If `by_path` exceeds the number of observed paths, all observed paths are returned with a `UserWarning`. **Per-path SE convention (joiners/leavers precedent):** the per-path influence function follows the joiners-only / leavers-only IF construction at `chaisemartin_dhaultfoeuille.py:5495-5504`: the switcher-side contribution `+S_g * (Y_{g,out} - Y_{g,ref})` is zeroed for groups whose observed trajectory is NOT the selected path; control contributions and the full cohort structure `(D_{g,1}, F_g, S_g)` are unchanged. After applying the singleton-baseline eligible mask and cohort-recentering with the original cohort IDs, the plug-in SE uses the path-specific divisor `N_l_path` (count of path switchers eligible at horizon `l`) — same pattern as `joiners_se` using `joiner_total`. This gives the **within-path mean** estimand `DID_{path,l}` as the within-path average of `DID_{g,l}`. **Degenerate-cohort behavior per path:** when a path's centered IF at some horizon is identically zero (every variance-eligible path switcher forms its own `(D_{g,1}, F_g, S_g)` cohort, or the path has a single contributing group), SE / t_stat / p_value / conf_int are NaN-consistent and a `UserWarning` is emitted scoped to `(path, horizon)`. This mirrors the overall-path degenerate-cohort surface and is common for rare paths with few contributing groups. **Empty-state contract:** `results.path_effects` distinguishes "not requested" (`None`) from "requested but empty" (`{}` — all switchers have windows outside the panel or unobserved cells). The empty-dict case emits a `UserWarning` at fit-time and renders as an explicit "no observed paths" notice in `summary()`; `to_dataframe(level="by_path")` returns an empty DataFrame with the canonical column set (mirrors the `linear_trends` pattern when `trends_linear=True` but no horizons survive). **Requirements:** `drop_larger_lower=False` (multi-switch groups are the object of interest; default `True` filters them out) and `L_max >= 1` (path window depends on the horizon). **Scope:** binary treatment only; combinations with `controls`, `trends_linear`, `trends_nonparam`, `heterogeneity`, `design2`, `honest_did`, and `survey_design` remain gated behind explicit `NotImplementedError` (deferred to follow-up wave PRs). `n_bootstrap > 0` is now supported — see the **Bootstrap SE** paragraph below. **Placebos and TWFE diagnostic** remain sample-level summaries (not computed per path) in this release. Results are exposed on `results.path_effects` as `Dict[Tuple[int, ...], Dict[str, Any]]` with nested `horizons` dicts per horizon `l`, and on `results.to_dataframe(level="by_path")` as a long-format table with columns `[path, frequency_rank, n_groups, horizon, effect, se, t_stat, p_value, conf_int_lower, conf_int_upper, n_obs]`. Gated tests live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathGates` / `::TestByPathBehavior` / `::TestByPathEdgeCases`. **R-parity** against `DIDmultiplegtDYN 2.3.3` is confirmed at `tests/test_chaisemartin_dhaultfoeuille_parity.py::TestDCDHDynRParityByPath` via two scenarios: `mixed_single_switch_by_path` (2 paths, `by_path=2`) and `multi_path_reversible_by_path` (4 paths, `by_path=3`; path-assignment deterministic on `F_g` so each `(D_{g,1}, F_g, S_g)` cohort contains switchers from a single path). Per-path point estimates and per-path switcher counts match R exactly; per-path SE matches within the Phase 2 multi-horizon SE envelope (observed rtol ≤ 10.2% on the 2-path mixed scenario, ≤ 4.2% on the 4-path cohort-clean scenario). **Deviation from R (cross-path cohort-sharing SE):** our analytical SE is the marginal variance of the path-contribution estimator cohort-centered on the *full-panel* cohort structure (joiners/leavers precedent — non-path switchers contribute to cohort means via their zeroed switcher row). R's `did_multiplegt_dyn(..., by_path=k)` re-runs the estimator per path, so cohort means are computed over the path's own switchers only. When a cohort `(D_{g,1}, F_g, S_g)` spans multiple observed paths, Python and R SE diverge materially (our empirical probes with random post-window toggling saw rtol > 100%); when every cohort is single-path (scenario 13 by design, scenario 14 by construction), the two approaches coincide up to the documented Phase 2 envelope. Practitioners with cohort structures that mix paths should interpret the per-path SE as a within-full-panel marginal variance, not a per-path conditional variance. **Bootstrap SE:** when `n_bootstrap > 0` is set, the top-k paths are enumerated once on the observed data (R-faithful: matches `did_multiplegt_dyn(..., by_path=k, bootstrap=B)`'s path-stability convention — verified empirically against DIDmultiplegtDYN 2.3.3) and the multiplier bootstrap (`bootstrap_weights ∈ {"rademacher", "mammen", "webb"}`) runs per `(path, horizon)` target via the shared `_bootstrap_one_target` / `compute_effect_bootstrap_stats` helpers. Point estimates are unchanged from the analytical path. Bootstrap SE replaces the analytical SE in `path_effects[path]["horizons"][l]["se"]`, and `p_value` / `conf_int` are taken as the **bootstrap percentile** statistics, matching the Round-10 library convention for overall / joiners / leavers / multi-horizon bootstrap (see the `Note (bootstrap inference surface)` elsewhere in this file and the pinned regression `test_bootstrap_p_value_and_ci_propagated_to_top_level`). `t_stat` is SE-derived via `safe_inference` per the anti-pattern rule. Interpretation: inference is *conditional on the observed path set*. **SE inherits the analytical cross-path cohort-sharing deviation:** the bootstrap input is the exact same full-panel cohort-centered path IF that the analytical path computes (`_collect_path_bootstrap_inputs` reuses the same enumeration / cohort IDs / IF construction), so the bootstrap SE is a Monte Carlo analog of the analytical SE — it inherits the same cross-path cohort-sharing deviation from R's per-path re-run convention documented above. On single-path-cohort panels (scenarios 13 and 14 of the R-parity fixture, and any DGP where `(D_{g,1}, F_g, S_g)` cohorts never span multiple observed paths), bootstrap SE tracks analytical SE up to Monte Carlo noise and both coincide with R up to the Phase 2 envelope. On cross-path cohort panels, bootstrap SE inherits the >100% rtol divergence from R that analytical already has. **Deviation from R (CI method):** R's per-path CI is normal-theory around the bootstrap SE (half-width ≈ `1.96·se`); ours is the bootstrap percentile CI, intentionally diverging from R to keep the dCDH inference surface internally consistent across all bootstrap targets. Practitioners who want *unconditional* inference capturing path-selection uncertainty need a pairs-bootstrap (deferred — no R precedent). Positive regressions live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathBootstrap` (gated `@pytest.mark.slow`): point-estimate invariance, finite positive SE on non-degenerate panels, SE-within-30%-rtol of analytical on cohort-clean fixtures, degenerate-cohort NaN propagation, Rademacher/Mammen/Webb parity, seed reproducibility, and percentile-vs-normal-theory CI pinning. +- **Note (Phase 3 `by_path` per-path event-study disaggregation):** Per-path disaggregation of the multi-horizon event study, mirroring R `did_multiplegt_dyn(..., by_path=k)`. Activated via `ChaisemartinDHaultfoeuille(by_path=k, drop_larger_lower=False)` where `k` is a positive integer (top-k most common observed paths by switcher-group frequency). **Window convention:** the path tuple for a switcher group `g` is `(D_{g, F_g-1}, D_{g, F_g}, ..., D_{g, F_g-1+L_max})` — length `L_max + 1`, matching R's window `[F_{g-1}, F_{g-1+l}]`. **Ranking:** paths are ranked by descending frequency; ties are broken lexicographically on the path tuple for deterministic ordering, so every selected path has a unique `frequency_rank`. If `by_path` exceeds the number of observed paths, all observed paths are returned with a `UserWarning`. **Per-path SE convention (joiners/leavers precedent):** the per-path influence function follows the joiners-only / leavers-only IF construction at `chaisemartin_dhaultfoeuille.py:5495-5504`: the switcher-side contribution `+S_g * (Y_{g,out} - Y_{g,ref})` is zeroed for groups whose observed trajectory is NOT the selected path; control contributions and the full cohort structure `(D_{g,1}, F_g, S_g)` are unchanged. After applying the singleton-baseline eligible mask and cohort-recentering with the original cohort IDs, the plug-in SE uses the path-specific divisor `N_l_path` (count of path switchers eligible at horizon `l`) — same pattern as `joiners_se` using `joiner_total`. This gives the **within-path mean** estimand `DID_{path,l}` as the within-path average of `DID_{g,l}`. **Degenerate-cohort behavior per path:** when a path's centered IF at some horizon is identically zero (every variance-eligible path switcher forms its own `(D_{g,1}, F_g, S_g)` cohort, or the path has a single contributing group), SE / t_stat / p_value / conf_int are NaN-consistent and a `UserWarning` is emitted scoped to `(path, horizon)`. This mirrors the overall-path degenerate-cohort surface and is common for rare paths with few contributing groups. **Empty-state contract:** `results.path_effects` distinguishes "not requested" (`None`) from "requested but empty" (`{}` — all switchers have windows outside the panel or unobserved cells). The empty-dict case emits a `UserWarning` at fit-time and renders as an explicit "no observed paths" notice in `summary()`; `to_dataframe(level="by_path")` returns an empty DataFrame with the canonical column set (mirrors the `linear_trends` pattern when `trends_linear=True` but no horizons survive). **Requirements:** `drop_larger_lower=False` (multi-switch groups are the object of interest; default `True` filters them out) and `L_max >= 1` (path window depends on the horizon). **Scope:** binary treatment only; combinations with `controls`, `trends_linear`, `trends_nonparam`, `heterogeneity`, `design2`, `honest_did`, and `survey_design` remain gated behind explicit `NotImplementedError` (deferred to follow-up wave PRs). `n_bootstrap > 0` is now supported — see the **Bootstrap SE** paragraph below. **Placebos and TWFE diagnostic** remain sample-level summaries (not computed per path) in this release. Results are exposed on `results.path_effects` as `Dict[Tuple[int, ...], Dict[str, Any]]` with nested `horizons` dicts per horizon `l`, and on `results.to_dataframe(level="by_path")` as a long-format table with columns `[path, frequency_rank, n_groups, horizon, effect, se, t_stat, p_value, conf_int_lower, conf_int_upper, n_obs]`. Gated tests live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathGates` / `::TestByPathBehavior` / `::TestByPathEdgeCases`. **R-parity** against `DIDmultiplegtDYN 2.3.3` is confirmed at `tests/test_chaisemartin_dhaultfoeuille_parity.py::TestDCDHDynRParityByPath` via two scenarios: `mixed_single_switch_by_path` (2 paths, `by_path=2`) and `multi_path_reversible_by_path` (4 paths, `by_path=3`; path-assignment deterministic on `F_g` so each `(D_{g,1}, F_g, S_g)` cohort contains switchers from a single path). Per-path point estimates and per-path switcher counts match R exactly; per-path SE matches within the Phase 2 multi-horizon SE envelope (observed rtol ≤ 10.2% on the 2-path mixed scenario, ≤ 4.2% on the 4-path cohort-clean scenario). **Deviation from R (cross-path cohort-sharing SE):** our analytical SE is the marginal variance of the path-contribution estimator cohort-centered on the *full-panel* cohort structure (joiners/leavers precedent — non-path switchers contribute to cohort means via their zeroed switcher row). R's `did_multiplegt_dyn(..., by_path=k)` re-runs the estimator per path, so cohort means are computed over the path's own switchers only. When a cohort `(D_{g,1}, F_g, S_g)` spans multiple observed paths, Python and R SE diverge materially (our empirical probes with random post-window toggling saw rtol > 100%); when every cohort is single-path (scenario 13 by design, scenario 14 by construction), the two approaches coincide up to the documented Phase 2 envelope. Practitioners with cohort structures that mix paths should interpret the per-path SE as a within-full-panel marginal variance, not a per-path conditional variance. **Bootstrap SE:** when `n_bootstrap > 0` is set, the top-k paths are enumerated once on the observed data (R-faithful: matches `did_multiplegt_dyn(..., by_path=k, bootstrap=B)`'s path-stability convention — verified empirically against DIDmultiplegtDYN 2.3.3) and the multiplier bootstrap (`bootstrap_weights ∈ {"rademacher", "mammen", "webb"}`) runs per `(path, horizon)` target via the shared `_bootstrap_one_target` / `compute_effect_bootstrap_stats` helpers. Point estimates are unchanged from the analytical path. Bootstrap SE replaces the analytical SE in `path_effects[path]["horizons"][l]["se"]`, and `p_value` / `conf_int` are taken as the **bootstrap percentile** statistics, matching the Round-10 library convention for overall / joiners / leavers / multi-horizon bootstrap (see the `Note (bootstrap inference surface)` elsewhere in this file and the pinned regression `test_bootstrap_p_value_and_ci_propagated_to_top_level`). `t_stat` is SE-derived via `safe_inference` per the anti-pattern rule. Interpretation: inference is *conditional on the observed path set*. **SE inherits the analytical cross-path cohort-sharing deviation:** the bootstrap input is the exact same full-panel cohort-centered path IF that the analytical path computes (`_collect_path_bootstrap_inputs` reuses the same enumeration / cohort IDs / IF construction), so the bootstrap SE is a Monte Carlo analog of the analytical SE — it inherits the same cross-path cohort-sharing deviation from R's per-path re-run convention documented above. On single-path-cohort panels (scenarios 13 and 14 of the R-parity fixture, and any DGP where `(D_{g,1}, F_g, S_g)` cohorts never span multiple observed paths), bootstrap SE tracks analytical SE up to Monte Carlo noise and both coincide with R up to the Phase 2 envelope. On cross-path cohort panels, bootstrap SE inherits the >100% rtol divergence from R that analytical already has. **Deviation from R (CI method):** R's per-path CI is normal-theory around the bootstrap SE (half-width ≈ `1.96·se`); ours is the bootstrap percentile CI, intentionally diverging from R to keep the dCDH inference surface internally consistent across all bootstrap targets. Practitioners who want *unconditional* inference capturing path-selection uncertainty need a pairs-bootstrap (deferred — no R precedent). Positive regressions live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathBootstrap` (gated `@pytest.mark.slow`): point-estimate invariance, finite positive SE on non-degenerate panels, SE-within-30%-rtol of analytical on cohort-clean fixtures, degenerate-cohort NaN propagation, Rademacher/Mammen/Webb parity, seed reproducibility, and percentile-vs-normal-theory CI pinning. **Per-path placebos:** when `placebo=True` (and `L_max >= 1`) is combined with `by_path=k`, per-path backward-horizon placebos `DID^{pl}_{path, l}` for `l = 1..L_max` are computed using the same joiners/leavers IF precedent applied to `_compute_per_group_if_placebo_horizon` (with the new `switcher_subset_mask` parameter): switcher contributions are zeroed for groups not in the path; the control pool and the variance-eligible cohort structure `(D_{g,1}, F_g, S_g)` are unchanged. Plug-in SE uses the path-specific divisor `N^{pl}_{l, path}` (count of path switchers eligible at backward lag `l`). Surfaced on `results.path_placebo_event_study[path][-l]` with the same `{effect, se, t_stat, p_value, conf_int, n_obs}` shape as `placebo_event_study` (negative-int inner keys parallel the existing per-path event-study positive-int keys, so a unified forward+backward view is well-formed). **Inherits the cross-path cohort-sharing SE deviation from R** documented above for `path_effects` (same convention applied backward); tracks R within numerical tolerance on single-path-cohort panels and diverges on cohort-mixed panels. Multiplier bootstrap (when `n_bootstrap > 0`) runs per `(path, lag)` target via the same `_bootstrap_one_target` dispatch used for the per-path event-study, with the canonical NaN-on-invalid contract. The bootstrap SE is a Monte Carlo analog of the analytical placebo SE — same per-path centered IF input — and inherits the same deviation. Surfaced through `summary()` (negative-keyed rows rendered alongside positive-keyed event-study rows under each path block) and `to_dataframe(level="by_path")` (`horizon` column takes negative ints for placebo rows). R-parity is confirmed at `tests/test_chaisemartin_dhaultfoeuille_parity.py::TestDCDHDynRParityByPathPlacebo` on the `multi_path_reversible_by_path_placebo` scenario; positive analytical + bootstrap invariants live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathPlacebo` (with the gated `::TestByPathPlacebo::TestBootstrap` subclass). **Reference implementation(s):** - R: [`DIDmultiplegtDYN`](https://cran.r-project.org/package=DIDmultiplegtDYN) (CRAN, maintained by the paper authors). The Python implementation matches `did_multiplegt_dyn(..., effects=1)` at horizon `l = 1`. Parity tests live in `tests/test_chaisemartin_dhaultfoeuille_parity.py`. diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index 7ab855ab..74dee75e 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -4985,3 +4985,365 @@ def test_degenerate_bootstrap_distribution_yields_nan_tuple(self): "Expected at least one (path, horizon) to land in the " "non-finite-SE bootstrap branch with n_bootstrap=1" ) + + +# ============================================================================= +# by_path + placebo (Wave 2 item 3) +# ============================================================================= + + +def _by_path_placebo_data(seed: int = 43) -> pd.DataFrame: + """Hand-checkable panel for by_path + placebo invariants. + + Periods 0..6 (n_periods=7), F_g=3 for switchers (so backward index + F_g - 1 - lag = 2 - lag; lag=1, 2 valid; lag=3 has backward=-1, NaN). + Forward window F_g - 1 + L_max = 2 + 3 = 5 < 7 (in range). + + - Groups 1, 2, 3: path (0,0,0,1,1,1,1) -- single switch, stay on + - Groups 4, 5: path (0,0,0,1,0,0,0) -- single pulse + - Group 6: path (0,0,0,1,1,0,0) -- two on then off + - Groups 7, 8: never-treated controls + """ + rng = np.random.default_rng(seed) + rows = [] + for g in (1, 2, 3): + for t in range(7): + d = 1 if t >= 3 else 0 + y = d * 2.0 + rng.normal(0, 0.1) + rows.append({"group": g, "period": t, "treatment": d, "outcome": y}) + for g in (4, 5): + for t in range(7): + d = 1 if t == 3 else 0 + y = d * 2.0 + rng.normal(0, 0.1) + rows.append({"group": g, "period": t, "treatment": d, "outcome": y}) + for g in (6,): + for t in range(7): + d = 1 if t in (3, 4) else 0 + y = d * 2.0 + rng.normal(0, 0.1) + rows.append({"group": g, "period": t, "treatment": d, "outcome": y}) + for g in (7, 8): + for t in range(7): + y = rng.normal(0, 0.1) + rows.append({"group": g, "period": t, "treatment": 0, "outcome": y}) + return pd.DataFrame(rows) + + +def _fit_by_path_with_placebo( + data: pd.DataFrame, + by_path: int, + L_max: int = 3, + n_bootstrap: int = 0, + seed: int = 42, +): + """Fit with by_path + placebo + optional bootstrap; silence drop_larger_lower.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + est = ChaisemartinDHaultfoeuille( + drop_larger_lower=False, + by_path=by_path, + placebo=True, + n_bootstrap=n_bootstrap, + seed=seed, + twfe_diagnostic=False, + ) + return est, est.fit( + data, + outcome="outcome", + group="group", + time="period", + treatment="treatment", + L_max=L_max, + ) + + +class TestByPathPlacebo: + """``by_path`` combined with ``placebo=True``. + + Per-path backward-horizon placebos ``DID^{pl}_{path, l}`` for + ``l = 1..L_max`` are surfaced on + ``results.path_placebo_event_study[path][-l]`` (negative-int keys). + SE convention parallels per-path event-study (joiners/leavers IF + precedent applied backward; cohort-recentered plug-in with path- + specific divisor); inherits the cross-path cohort-sharing deviation + from R documented for ``path_effects``. + """ + + def test_attr_is_none_when_placebo_false(self): + """``placebo=False`` (with by_path) must leave the new attribute None.""" + data = _by_path_placebo_data() + _est, res = _fit_by_path(data=_by_path_three_path_data(), by_path=3, L_max=3) + assert res.path_placebo_event_study is None + # Sanity: same fixture, placebo=True, attribute is populated + _est2, res2 = _fit_by_path_with_placebo(data, by_path=3, L_max=3) + assert res2.path_placebo_event_study is not None + + def test_attr_keys_match_path_effects(self): + """``path_placebo_event_study`` keys must equal ``path_effects`` keys.""" + data = _by_path_placebo_data() + _est, res = _fit_by_path_with_placebo(data, by_path=3, L_max=3) + assert res.path_effects is not None + assert res.path_placebo_event_study is not None + assert set(res.path_placebo_event_study.keys()) == set(res.path_effects.keys()) + # Each path has L_max negative-keyed lags + for path, h in res.path_placebo_event_study.items(): + assert sorted(h.keys()) == [-3, -2, -1] + + def test_path_placebo_point_estimate_within_path_mean(self): + """Per-(path, lag), point estimate equals within-path mean DID^pl.""" + data = _by_path_placebo_data() + _est, res = _fit_by_path_with_placebo(data, by_path=3, L_max=3) + # Lag 1, 2 valid; lag 3 has backward index -1 so n_obs=0 + for path, lag_dict in res.path_placebo_event_study.items(): + for lag_key in (-1, -2): + entry = lag_dict[lag_key] + if entry["n_obs"] > 0: + assert np.isfinite(entry["effect"]), ( + f"path={path} lag={lag_key}: expected finite effect" + ) + # lag 3 must be NaN (backward index out of range) + entry3 = lag_dict[-3] + assert entry3["n_obs"] == 0 + assert np.isnan(entry3["effect"]) + + def test_path_placebo_se_finite_or_nan(self): + """Every (path, lag) has SE that is NaN (degenerate) or positive finite.""" + data = _by_path_placebo_data() + _est, res = _fit_by_path_with_placebo(data, by_path=3, L_max=3) + for path, lag_dict in res.path_placebo_event_study.items(): + for lag_key, entry in lag_dict.items(): + se = entry["se"] + if np.isfinite(se): + assert se > 0, f"path={path} lag={lag_key}: SE={se} not positive" + else: + assert np.isnan(se), ( + f"path={path} lag={lag_key}: SE={se} not NaN-finite" + ) + + def test_switcher_subset_mask_default_preserves_legacy_placebo_if(self): + """``_compute_per_group_if_placebo_horizon(switcher_subset_mask=None)`` + must produce bit-identical IF arrays as the version without the kwarg + (regression for the new param's default branch).""" + from diff_diff.chaisemartin_dhaultfoeuille import ( + _compute_per_group_if_placebo_horizon, + ) + + # Build a small synthetic input + rng = np.random.default_rng(7) + n_groups, n_periods = 8, 7 + D_mat = np.zeros((n_groups, n_periods), dtype=int) + # 3 switchers at F_g=3 (period 3), rest never-treated + for g in range(3): + for t in range(3, 7): + D_mat[g, t] = 1 + Y_mat = rng.normal(0, 1, size=(n_groups, n_periods)) + N_mat = np.ones((n_groups, n_periods), dtype=int) + baselines = np.zeros(n_groups, dtype=float) + first_switch_idx = np.array([3, 3, 3, -1, -1, -1, -1, -1]) + switch_direction = np.array([1, 1, 1, 0, 0, 0, 0, 0]) + T_g = np.full(n_groups, n_periods - 1) + + # Default (no kwarg) + res_default = _compute_per_group_if_placebo_horizon( + D_mat=D_mat, + Y_mat=Y_mat, + N_mat=N_mat, + baselines=baselines, + first_switch_idx=first_switch_idx, + switch_direction=switch_direction, + T_g=T_g, + L_max=2, + ) + # Explicit None + res_none = _compute_per_group_if_placebo_horizon( + D_mat=D_mat, + Y_mat=Y_mat, + N_mat=N_mat, + baselines=baselines, + first_switch_idx=first_switch_idx, + switch_direction=switch_direction, + T_g=T_g, + L_max=2, + switcher_subset_mask=None, + ) + for lag in (1, 2): + U_default, _ = res_default[lag] + U_none, _ = res_none[lag] + np.testing.assert_array_equal(U_default, U_none) + + def test_path_placebo_t_stat_uses_safe_inference(self): + """t_stat is SE-derived via safe_inference, never inline `effect/se`.""" + data = _by_path_placebo_data() + _est, res = _fit_by_path_with_placebo(data, by_path=3, L_max=3) + from diff_diff.utils import safe_inference + + for path, lag_dict in res.path_placebo_event_study.items(): + for lag_key, entry in lag_dict.items(): + if not np.isfinite(entry["se"]): + continue + expected_t = safe_inference( + entry["effect"], entry["se"], alpha=0.05, df=None + )[0] + np.testing.assert_allclose( + entry["t_stat"], + expected_t, + atol=1e-14, + rtol=1e-14, + err_msg=f"path={path} lag={lag_key}: t_stat not safe_inference-derived", + ) + + def test_path_placebo_renders_in_summary(self): + """summary() must include negative-keyed placebo rows under each path block.""" + data = _by_path_placebo_data() + _est, res = _fit_by_path_with_placebo(data, by_path=3, L_max=3) + s = res.summary() + # At least one valid placebo row should render with l=-1 + assert "l=-1" in s, "summary() did not render any -l placebo row" + + def test_path_placebo_to_dataframe_emits_negative_horizons(self): + """to_dataframe(level='by_path') must include rows for negative horizons.""" + data = _by_path_placebo_data() + _est, res = _fit_by_path_with_placebo(data, by_path=3, L_max=3) + df = res.to_dataframe(level="by_path") + assert (df["horizon"] < 0).any(), ( + "to_dataframe(level='by_path') did not emit any negative-horizon rows" + ) + + @pytest.mark.slow + class TestBootstrap: + """Bootstrap invariants for by_path + placebo + n_bootstrap > 0. + + Bundled with this PR: the per-path placebo bootstrap mirrors the + per-path event-study bootstrap (PR #364) and enforces the same + library-wide NaN-on-invalid contract. + """ + + def test_bootstrap_point_estimates_preserved(self): + """Bootstrap fit leaves analytical point estimates bit-identical.""" + data = _by_path_placebo_data() + _est_a, res_a = _fit_by_path_with_placebo(data, by_path=3, L_max=3) + _est_b, res_b = _fit_by_path_with_placebo( + data, by_path=3, L_max=3, n_bootstrap=100, seed=42 + ) + assert res_a.path_placebo_event_study is not None + assert res_b.path_placebo_event_study is not None + for path, lag_dict_a in res_a.path_placebo_event_study.items(): + lag_dict_b = res_b.path_placebo_event_study[path] + for lag_key, entry_a in lag_dict_a.items(): + entry_b = lag_dict_b[lag_key] + if np.isnan(entry_a["effect"]): + assert np.isnan(entry_b["effect"]) + else: + np.testing.assert_allclose( + entry_b["effect"], + entry_a["effect"], + atol=1e-14, + rtol=1e-14, + err_msg=( + f"path={path} lag={lag_key}: bootstrap " + f"changed point estimate" + ), + ) + + def test_bootstrap_se_finite_or_nan_per_lag(self): + """Every (path, lag) bootstrap SE is NaN or positive finite.""" + data = _by_path_placebo_data() + _est, res = _fit_by_path_with_placebo( + data, by_path=3, L_max=3, n_bootstrap=200, seed=42 + ) + assert res.path_placebo_event_study is not None + for path, lag_dict in res.path_placebo_event_study.items(): + for lag_key, entry in lag_dict.items(): + se = entry["se"] + if np.isfinite(se): + assert se > 0 + else: + assert np.isnan(se) + + def test_n_bootstrap_1_enforces_full_nan_tuple(self): + """``n_bootstrap=1`` produces non-finite SE; the full inference + tuple must be NaN per the canonical NaN-on-invalid contract. + + Partial-NaN states (SE=NaN but t_stat / p_value / conf_int + populated from analytical) were the regression class that hit + PR #364 three rounds in a row. + """ + data = _by_path_placebo_data() + _est, res = _fit_by_path_with_placebo( + data, by_path=3, L_max=3, n_bootstrap=1, seed=42 + ) + assert res.path_placebo_event_study is not None + br = res.bootstrap_results + assert br is not None + # path_placebo_ses populated by mixin, but every entry should + # be non-finite at n_bootstrap=1 (std of singleton = 0 -> NaN). + for path, lag_dict in res.path_placebo_event_study.items(): + for lag_key, entry in lag_dict.items(): + if entry["n_obs"] == 0: + # Already analytical-NaN — skip + continue + bs_se = ( + br.path_placebo_ses.get(path, {}).get(-lag_key) + if br.path_placebo_ses + else None + ) + if bs_se is not None and np.isfinite(bs_se): + # Bootstrap somehow produced a finite SE — this + # branch shouldn't fire at n_bootstrap=1, but if + # it does, just skip (no contract to enforce). + continue + # Enforce the four-field NaN contract explicitly + assert np.isnan(entry["se"]), ( + f"path={path} lag={lag_key}: SE={entry['se']} " + f"(expected NaN under bootstrap NaN-on-invalid)" + ) + assert np.isnan(entry["t_stat"]) + assert np.isnan(entry["p_value"]) + lo, hi = entry["conf_int"] + assert np.isnan(lo) and np.isnan(hi) + + def test_bootstrap_inference_fields_match_results_directly(self): + """``conf_int`` / ``p_value`` are the percentile statistics from + ``bootstrap_results.path_placebo_*`` (not normal-theory).""" + data = _by_path_placebo_data() + _est, res = _fit_by_path_with_placebo( + data, by_path=3, L_max=3, n_bootstrap=200, seed=42 + ) + br = res.bootstrap_results + assert br is not None and br.path_placebo_cis is not None + for path, lag_dict in res.path_placebo_event_study.items(): + for lag_key, entry in lag_dict.items(): + if not np.isfinite(entry["se"]): + continue + # The mixin keys path_placebo_cis / p_values by + # POSITIVE lag; the result attribute uses negative. + pos_lag = -lag_key + bs_ci = br.path_placebo_cis[path][pos_lag] + bs_p = br.path_placebo_p_values[path][pos_lag] + assert entry["conf_int"] == bs_ci, ( + f"path={path} lag={lag_key}: conf_int " + f"{entry['conf_int']} != bootstrap " + f"path_placebo_cis {bs_ci} (must propagate " + f"percentile, not normal-theory)" + ) + assert entry["p_value"] == bs_p + + def test_bootstrap_seed_reproducibility(self): + """Same seed -> bit-identical bootstrap SE per (path, lag).""" + data = _by_path_placebo_data() + _est_a, res_a = _fit_by_path_with_placebo( + data, by_path=3, L_max=3, n_bootstrap=100, seed=42 + ) + _est_b, res_b = _fit_by_path_with_placebo( + data, by_path=3, L_max=3, n_bootstrap=100, seed=42 + ) + for path, lag_dict_a in res_a.path_placebo_event_study.items(): + lag_dict_b = res_b.path_placebo_event_study[path] + for lag_key, entry_a in lag_dict_a.items(): + entry_b = lag_dict_b[lag_key] + if np.isnan(entry_a["se"]): + assert np.isnan(entry_b["se"]) + else: + assert entry_a["se"] == entry_b["se"], ( + f"path={path} lag={lag_key}: seed-pinned SEs " + f"diverge: {entry_a['se']} vs {entry_b['se']}" + ) diff --git a/tests/test_chaisemartin_dhaultfoeuille_parity.py b/tests/test_chaisemartin_dhaultfoeuille_parity.py index 47d27332..c2081cf2 100644 --- a/tests/test_chaisemartin_dhaultfoeuille_parity.py +++ b/tests/test_chaisemartin_dhaultfoeuille_parity.py @@ -630,3 +630,124 @@ def test_parity_multi_path_reversible_by_path(self, golden_values): point_rtol=self.POINT_RTOL, se_rtol=self.SE_RTOL, ) + + +class TestDCDHDynRParityByPathPlacebo: + """ + Parity tests for ``by_path + placebo`` against R DIDmultiplegtDYN. + + R's ``did_multiplegt_dyn(..., by_path=k, placebo=N)`` re-runs the + estimator per path; each ``res$by_level_i$results$Placebos`` row + holds the per-path backward-horizon placebo. The R generator + captures these under the same ``horizons`` dict on each per-path + entry, with negative-int string keys ("-1", "-2", ...) parallel + to the existing positive-keyed event-study horizons. + + Per-path placebos inherit the same cross-path cohort-sharing SE + deviation from R that ``path_effects`` shows (full-panel cohort- + centered plug-in vs R's per-path re-run); the + ``multi_path_reversible_by_path_placebo`` scenario is constructed + on a deterministic, single-path-per-cohort DGP so analytical SE + tracks R within the same Phase-2 envelope used by + ``TestDCDHDynRParityByPath`` for positive horizons. + + R's placebo iteration is conditional on per-path cohort + eligibility: paths whose smallest F_g cohort has backward index + ``F_g - 1 - lag < 0`` produce fewer placebo rows than the user + requested. The test iterates over the rows R actually produced + (negative-string keys present in ``r_path_entry["horizons"]``) + rather than over the requested ``placebo`` parameter. + """ + + POINT_RTOL = 1e-9 + SE_RTOL = 0.12 + + def _path_key_from_r_label(self, r_label: str): + return tuple(int(x) for x in r_label.split(",")) + + def test_parity_multi_path_reversible_by_path_placebo(self, golden_values): + """Per-path placebos R-parity on the cohort-clean deterministic + DGP. Bundle of (path, lag) cells produced by R is compared row- + by-row to Python's ``path_placebo_event_study``. + """ + import math + import warnings + + scenario = golden_values.get("multi_path_reversible_by_path_placebo") + if scenario is None: + pytest.skip( + "scenario 'multi_path_reversible_by_path_placebo' not in golden values" + ) + + df = _golden_to_df(scenario["data"]) + est = ChaisemartinDHaultfoeuille( + drop_larger_lower=False, by_path=3, placebo=True + ) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + results = est.fit( + df, + outcome="outcome", + group="group", + time="period", + treatment="treatment", + L_max=3, + ) + + r_by_path = scenario["results"]["by_path"] + assert results.path_placebo_event_study is not None + + # Same path-set equality check as positive horizons; R-parity + # over a different surface but the path enumeration must match. + py_keys = set(results.path_placebo_event_study.keys()) + r_keys = {self._path_key_from_r_label(e["path"]) for e in r_by_path} + assert py_keys == r_keys, ( + f"Path-set mismatch between Python and R placebos.\n" + f" Python only: {py_keys - r_keys}\n" + f" R only: {r_keys - py_keys}" + ) + + for r_path_entry in r_by_path: + path_key = self._path_key_from_r_label(r_path_entry["path"]) + py_lag_dict = results.path_placebo_event_study[path_key] + + # Iterate over R's negative-keyed horizons. R generator emits + # `as.character(-h)` keys ("-1", "-2", ...); convert via int(). + for h_str, r_h in r_path_entry["horizons"].items(): + h = int(h_str) + if h >= 0: + continue # positive horizons covered by TestDCDHDynRParityByPath + + assert h in py_lag_dict, ( + f"path={path_key}: placebo lag {h} present in R goldens " + f"but missing from Python path_placebo_event_study" + ) + py_h = py_lag_dict[h] + + assert py_h["n_obs"] == int(r_h["n_switchers"]), ( + f"path={path_key} lag={h}: switcher-count mismatch " + f"py={py_h['n_obs']} vs r={int(r_h['n_switchers'])} " + f"- per-path placebo eligibility divergence; investigate " + f"before comparing SE." + ) + + assert py_h["effect"] == pytest.approx( + r_h["effect"], rel=self.POINT_RTOL + ), ( + f"path={path_key} lag={h}: " + f"py={py_h['effect']:.4f} vs r={r_h['effect']:.4f}" + ) + + py_se = py_h["se"] + r_se = r_h["se"] + py_finite_positive = math.isfinite(py_se) and py_se > 0.0 + r_finite_positive = math.isfinite(r_se) and r_se > 0.0 + assert py_finite_positive == r_finite_positive, ( + f"path={path_key} lag={h} placebo SE state mismatch " + f"(py_se={py_se}, r_se={r_se})" + ) + if py_finite_positive and r_finite_positive: + assert py_se == pytest.approx(r_se, rel=self.SE_RTOL), ( + f"path={path_key} lag={h} placebo SE: " + f"py={py_se:.4f} vs r={r_se:.4f}" + ) From e405aaf174a57e338e47d4e8ee5170828ffe94c4 Mon Sep 17 00:00:00 2001 From: igerber Date: Sat, 25 Apr 2026 09:04:06 -0400 Subject: [PATCH 2/5] Address PR #371 AI review R1: REGISTRY contradiction + surface tightening P3 fixes from CI reviewer round 1: - REGISTRY.md `Note (Phase 3 by_path ...)`: drop "Placebos" from the "remain sample-level summaries" sentence (TWFE diagnostic stays sample-level; placebos are now per-path under the new sub-bullet). Resolves the in-note contradiction the reviewer flagged. - ChaisemartinDHaultfoeuilleResults docstring: add the `path_placebo_event_study` Attributes block entry alongside `path_effects`, documenting the negative-int inner-key convention and the inherited cross-path cohort-sharing deviation. - TestByPathPlacebo.test_attr_is_none_when_placebo_false: use the same `_by_path_placebo_data()` fixture for both placebo-off and placebo-on branches, so the difference is attributable solely to the `placebo` flag (not a fixture swap). - TestByPathPlacebo.test_path_placebo_point_estimate_within_path_mean: replace the finiteness-only check with an explicit recomputation of the within-path-mean DID^pl identity from the raw data, asserting equality at atol=1e-10 / rtol=1e-10. Pins the estimand identity against silent regressions in the per-path IF construction. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../chaisemartin_dhaultfoeuille_results.py | 14 +++ docs/methodology/REGISTRY.md | 2 +- tests/test_chaisemartin_dhaultfoeuille.py | 104 +++++++++++++++--- 3 files changed, 105 insertions(+), 15 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index df6bac8c..6c5b3e22 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -374,6 +374,20 @@ class ChaisemartinDHaultfoeuilleResults: ``{"n_groups": int, "frequency_rank": int, "horizons": {l: {"effect", "se", "t_stat", "p_value", "conf_int", "n_obs"}}}`` for ``l = 1..L_max``. + path_placebo_event_study : dict, optional + Per-path backward-horizon placebos ``DID^{pl}_{path, l}`` for + ``l = 1..L_max``, keyed by observed treatment trajectory (tuple + of int). Inner dict keys are **negative** ints (``-l`` for lag + ``l``) to mirror the ``placebo_event_study`` convention so a + unified ``{**path_effects[p]["horizons"], + **path_placebo_event_study[p]}`` view is well-formed across + forward and backward horizons. Each inner entry holds + ``{"effect", "se", "t_stat", "p_value", "conf_int", "n_obs"}``. + Populated when ``by_path`` is a positive int AND + ``placebo=True`` AND ``L_max >= 1``; ``None`` otherwise. + Inherits the cross-path cohort-sharing SE deviation from R + documented for ``path_effects``. See REGISTRY.md + ``Note (Phase 3 by_path ...)`` → "Per-path placebos". honest_did_results : HonestDiDResults, optional HonestDiD sensitivity analysis bounds (Rambachan & Roth 2023). Populated when ``honest_did=True`` in ``fit()`` or by calling diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 545ef634..338cea0f 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -638,7 +638,7 @@ The guard is fired by `_survey_se_from_group_if` (analytical and replicate) and - **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. **Always uses raw (unadjusted) outcomes** regardless of active `controls`, `trends_linear`, or `trends_nonparam` options - those adjustments apply to the main estimator surface but not to the Design-2 descriptive block. For full adjusted Design-2 estimation with proper control pools, the paper recommends "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups. -- **Note (Phase 3 `by_path` per-path event-study disaggregation):** Per-path disaggregation of the multi-horizon event study, mirroring R `did_multiplegt_dyn(..., by_path=k)`. Activated via `ChaisemartinDHaultfoeuille(by_path=k, drop_larger_lower=False)` where `k` is a positive integer (top-k most common observed paths by switcher-group frequency). **Window convention:** the path tuple for a switcher group `g` is `(D_{g, F_g-1}, D_{g, F_g}, ..., D_{g, F_g-1+L_max})` — length `L_max + 1`, matching R's window `[F_{g-1}, F_{g-1+l}]`. **Ranking:** paths are ranked by descending frequency; ties are broken lexicographically on the path tuple for deterministic ordering, so every selected path has a unique `frequency_rank`. If `by_path` exceeds the number of observed paths, all observed paths are returned with a `UserWarning`. **Per-path SE convention (joiners/leavers precedent):** the per-path influence function follows the joiners-only / leavers-only IF construction at `chaisemartin_dhaultfoeuille.py:5495-5504`: the switcher-side contribution `+S_g * (Y_{g,out} - Y_{g,ref})` is zeroed for groups whose observed trajectory is NOT the selected path; control contributions and the full cohort structure `(D_{g,1}, F_g, S_g)` are unchanged. After applying the singleton-baseline eligible mask and cohort-recentering with the original cohort IDs, the plug-in SE uses the path-specific divisor `N_l_path` (count of path switchers eligible at horizon `l`) — same pattern as `joiners_se` using `joiner_total`. This gives the **within-path mean** estimand `DID_{path,l}` as the within-path average of `DID_{g,l}`. **Degenerate-cohort behavior per path:** when a path's centered IF at some horizon is identically zero (every variance-eligible path switcher forms its own `(D_{g,1}, F_g, S_g)` cohort, or the path has a single contributing group), SE / t_stat / p_value / conf_int are NaN-consistent and a `UserWarning` is emitted scoped to `(path, horizon)`. This mirrors the overall-path degenerate-cohort surface and is common for rare paths with few contributing groups. **Empty-state contract:** `results.path_effects` distinguishes "not requested" (`None`) from "requested but empty" (`{}` — all switchers have windows outside the panel or unobserved cells). The empty-dict case emits a `UserWarning` at fit-time and renders as an explicit "no observed paths" notice in `summary()`; `to_dataframe(level="by_path")` returns an empty DataFrame with the canonical column set (mirrors the `linear_trends` pattern when `trends_linear=True` but no horizons survive). **Requirements:** `drop_larger_lower=False` (multi-switch groups are the object of interest; default `True` filters them out) and `L_max >= 1` (path window depends on the horizon). **Scope:** binary treatment only; combinations with `controls`, `trends_linear`, `trends_nonparam`, `heterogeneity`, `design2`, `honest_did`, and `survey_design` remain gated behind explicit `NotImplementedError` (deferred to follow-up wave PRs). `n_bootstrap > 0` is now supported — see the **Bootstrap SE** paragraph below. **Placebos and TWFE diagnostic** remain sample-level summaries (not computed per path) in this release. Results are exposed on `results.path_effects` as `Dict[Tuple[int, ...], Dict[str, Any]]` with nested `horizons` dicts per horizon `l`, and on `results.to_dataframe(level="by_path")` as a long-format table with columns `[path, frequency_rank, n_groups, horizon, effect, se, t_stat, p_value, conf_int_lower, conf_int_upper, n_obs]`. Gated tests live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathGates` / `::TestByPathBehavior` / `::TestByPathEdgeCases`. **R-parity** against `DIDmultiplegtDYN 2.3.3` is confirmed at `tests/test_chaisemartin_dhaultfoeuille_parity.py::TestDCDHDynRParityByPath` via two scenarios: `mixed_single_switch_by_path` (2 paths, `by_path=2`) and `multi_path_reversible_by_path` (4 paths, `by_path=3`; path-assignment deterministic on `F_g` so each `(D_{g,1}, F_g, S_g)` cohort contains switchers from a single path). Per-path point estimates and per-path switcher counts match R exactly; per-path SE matches within the Phase 2 multi-horizon SE envelope (observed rtol ≤ 10.2% on the 2-path mixed scenario, ≤ 4.2% on the 4-path cohort-clean scenario). **Deviation from R (cross-path cohort-sharing SE):** our analytical SE is the marginal variance of the path-contribution estimator cohort-centered on the *full-panel* cohort structure (joiners/leavers precedent — non-path switchers contribute to cohort means via their zeroed switcher row). R's `did_multiplegt_dyn(..., by_path=k)` re-runs the estimator per path, so cohort means are computed over the path's own switchers only. When a cohort `(D_{g,1}, F_g, S_g)` spans multiple observed paths, Python and R SE diverge materially (our empirical probes with random post-window toggling saw rtol > 100%); when every cohort is single-path (scenario 13 by design, scenario 14 by construction), the two approaches coincide up to the documented Phase 2 envelope. Practitioners with cohort structures that mix paths should interpret the per-path SE as a within-full-panel marginal variance, not a per-path conditional variance. **Bootstrap SE:** when `n_bootstrap > 0` is set, the top-k paths are enumerated once on the observed data (R-faithful: matches `did_multiplegt_dyn(..., by_path=k, bootstrap=B)`'s path-stability convention — verified empirically against DIDmultiplegtDYN 2.3.3) and the multiplier bootstrap (`bootstrap_weights ∈ {"rademacher", "mammen", "webb"}`) runs per `(path, horizon)` target via the shared `_bootstrap_one_target` / `compute_effect_bootstrap_stats` helpers. Point estimates are unchanged from the analytical path. Bootstrap SE replaces the analytical SE in `path_effects[path]["horizons"][l]["se"]`, and `p_value` / `conf_int` are taken as the **bootstrap percentile** statistics, matching the Round-10 library convention for overall / joiners / leavers / multi-horizon bootstrap (see the `Note (bootstrap inference surface)` elsewhere in this file and the pinned regression `test_bootstrap_p_value_and_ci_propagated_to_top_level`). `t_stat` is SE-derived via `safe_inference` per the anti-pattern rule. Interpretation: inference is *conditional on the observed path set*. **SE inherits the analytical cross-path cohort-sharing deviation:** the bootstrap input is the exact same full-panel cohort-centered path IF that the analytical path computes (`_collect_path_bootstrap_inputs` reuses the same enumeration / cohort IDs / IF construction), so the bootstrap SE is a Monte Carlo analog of the analytical SE — it inherits the same cross-path cohort-sharing deviation from R's per-path re-run convention documented above. On single-path-cohort panels (scenarios 13 and 14 of the R-parity fixture, and any DGP where `(D_{g,1}, F_g, S_g)` cohorts never span multiple observed paths), bootstrap SE tracks analytical SE up to Monte Carlo noise and both coincide with R up to the Phase 2 envelope. On cross-path cohort panels, bootstrap SE inherits the >100% rtol divergence from R that analytical already has. **Deviation from R (CI method):** R's per-path CI is normal-theory around the bootstrap SE (half-width ≈ `1.96·se`); ours is the bootstrap percentile CI, intentionally diverging from R to keep the dCDH inference surface internally consistent across all bootstrap targets. Practitioners who want *unconditional* inference capturing path-selection uncertainty need a pairs-bootstrap (deferred — no R precedent). Positive regressions live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathBootstrap` (gated `@pytest.mark.slow`): point-estimate invariance, finite positive SE on non-degenerate panels, SE-within-30%-rtol of analytical on cohort-clean fixtures, degenerate-cohort NaN propagation, Rademacher/Mammen/Webb parity, seed reproducibility, and percentile-vs-normal-theory CI pinning. **Per-path placebos:** when `placebo=True` (and `L_max >= 1`) is combined with `by_path=k`, per-path backward-horizon placebos `DID^{pl}_{path, l}` for `l = 1..L_max` are computed using the same joiners/leavers IF precedent applied to `_compute_per_group_if_placebo_horizon` (with the new `switcher_subset_mask` parameter): switcher contributions are zeroed for groups not in the path; the control pool and the variance-eligible cohort structure `(D_{g,1}, F_g, S_g)` are unchanged. Plug-in SE uses the path-specific divisor `N^{pl}_{l, path}` (count of path switchers eligible at backward lag `l`). Surfaced on `results.path_placebo_event_study[path][-l]` with the same `{effect, se, t_stat, p_value, conf_int, n_obs}` shape as `placebo_event_study` (negative-int inner keys parallel the existing per-path event-study positive-int keys, so a unified forward+backward view is well-formed). **Inherits the cross-path cohort-sharing SE deviation from R** documented above for `path_effects` (same convention applied backward); tracks R within numerical tolerance on single-path-cohort panels and diverges on cohort-mixed panels. Multiplier bootstrap (when `n_bootstrap > 0`) runs per `(path, lag)` target via the same `_bootstrap_one_target` dispatch used for the per-path event-study, with the canonical NaN-on-invalid contract. The bootstrap SE is a Monte Carlo analog of the analytical placebo SE — same per-path centered IF input — and inherits the same deviation. Surfaced through `summary()` (negative-keyed rows rendered alongside positive-keyed event-study rows under each path block) and `to_dataframe(level="by_path")` (`horizon` column takes negative ints for placebo rows). R-parity is confirmed at `tests/test_chaisemartin_dhaultfoeuille_parity.py::TestDCDHDynRParityByPathPlacebo` on the `multi_path_reversible_by_path_placebo` scenario; positive analytical + bootstrap invariants live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathPlacebo` (with the gated `::TestByPathPlacebo::TestBootstrap` subclass). +- **Note (Phase 3 `by_path` per-path event-study disaggregation):** Per-path disaggregation of the multi-horizon event study, mirroring R `did_multiplegt_dyn(..., by_path=k)`. Activated via `ChaisemartinDHaultfoeuille(by_path=k, drop_larger_lower=False)` where `k` is a positive integer (top-k most common observed paths by switcher-group frequency). **Window convention:** the path tuple for a switcher group `g` is `(D_{g, F_g-1}, D_{g, F_g}, ..., D_{g, F_g-1+L_max})` — length `L_max + 1`, matching R's window `[F_{g-1}, F_{g-1+l}]`. **Ranking:** paths are ranked by descending frequency; ties are broken lexicographically on the path tuple for deterministic ordering, so every selected path has a unique `frequency_rank`. If `by_path` exceeds the number of observed paths, all observed paths are returned with a `UserWarning`. **Per-path SE convention (joiners/leavers precedent):** the per-path influence function follows the joiners-only / leavers-only IF construction at `chaisemartin_dhaultfoeuille.py:5495-5504`: the switcher-side contribution `+S_g * (Y_{g,out} - Y_{g,ref})` is zeroed for groups whose observed trajectory is NOT the selected path; control contributions and the full cohort structure `(D_{g,1}, F_g, S_g)` are unchanged. After applying the singleton-baseline eligible mask and cohort-recentering with the original cohort IDs, the plug-in SE uses the path-specific divisor `N_l_path` (count of path switchers eligible at horizon `l`) — same pattern as `joiners_se` using `joiner_total`. This gives the **within-path mean** estimand `DID_{path,l}` as the within-path average of `DID_{g,l}`. **Degenerate-cohort behavior per path:** when a path's centered IF at some horizon is identically zero (every variance-eligible path switcher forms its own `(D_{g,1}, F_g, S_g)` cohort, or the path has a single contributing group), SE / t_stat / p_value / conf_int are NaN-consistent and a `UserWarning` is emitted scoped to `(path, horizon)`. This mirrors the overall-path degenerate-cohort surface and is common for rare paths with few contributing groups. **Empty-state contract:** `results.path_effects` distinguishes "not requested" (`None`) from "requested but empty" (`{}` — all switchers have windows outside the panel or unobserved cells). The empty-dict case emits a `UserWarning` at fit-time and renders as an explicit "no observed paths" notice in `summary()`; `to_dataframe(level="by_path")` returns an empty DataFrame with the canonical column set (mirrors the `linear_trends` pattern when `trends_linear=True` but no horizons survive). **Requirements:** `drop_larger_lower=False` (multi-switch groups are the object of interest; default `True` filters them out) and `L_max >= 1` (path window depends on the horizon). **Scope:** binary treatment only; combinations with `controls`, `trends_linear`, `trends_nonparam`, `heterogeneity`, `design2`, `honest_did`, and `survey_design` remain gated behind explicit `NotImplementedError` (deferred to follow-up wave PRs). `n_bootstrap > 0` is now supported — see the **Bootstrap SE** paragraph below. `placebo=True` is now supported per-path — see the **Per-path placebos** paragraph below. **TWFE diagnostic** remains a sample-level summary (not computed per path) in this release. Results are exposed on `results.path_effects` as `Dict[Tuple[int, ...], Dict[str, Any]]` with nested `horizons` dicts per horizon `l`, and on `results.to_dataframe(level="by_path")` as a long-format table with columns `[path, frequency_rank, n_groups, horizon, effect, se, t_stat, p_value, conf_int_lower, conf_int_upper, n_obs]`. Gated tests live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathGates` / `::TestByPathBehavior` / `::TestByPathEdgeCases`. **R-parity** against `DIDmultiplegtDYN 2.3.3` is confirmed at `tests/test_chaisemartin_dhaultfoeuille_parity.py::TestDCDHDynRParityByPath` via two scenarios: `mixed_single_switch_by_path` (2 paths, `by_path=2`) and `multi_path_reversible_by_path` (4 paths, `by_path=3`; path-assignment deterministic on `F_g` so each `(D_{g,1}, F_g, S_g)` cohort contains switchers from a single path). Per-path point estimates and per-path switcher counts match R exactly; per-path SE matches within the Phase 2 multi-horizon SE envelope (observed rtol ≤ 10.2% on the 2-path mixed scenario, ≤ 4.2% on the 4-path cohort-clean scenario). **Deviation from R (cross-path cohort-sharing SE):** our analytical SE is the marginal variance of the path-contribution estimator cohort-centered on the *full-panel* cohort structure (joiners/leavers precedent — non-path switchers contribute to cohort means via their zeroed switcher row). R's `did_multiplegt_dyn(..., by_path=k)` re-runs the estimator per path, so cohort means are computed over the path's own switchers only. When a cohort `(D_{g,1}, F_g, S_g)` spans multiple observed paths, Python and R SE diverge materially (our empirical probes with random post-window toggling saw rtol > 100%); when every cohort is single-path (scenario 13 by design, scenario 14 by construction), the two approaches coincide up to the documented Phase 2 envelope. Practitioners with cohort structures that mix paths should interpret the per-path SE as a within-full-panel marginal variance, not a per-path conditional variance. **Bootstrap SE:** when `n_bootstrap > 0` is set, the top-k paths are enumerated once on the observed data (R-faithful: matches `did_multiplegt_dyn(..., by_path=k, bootstrap=B)`'s path-stability convention — verified empirically against DIDmultiplegtDYN 2.3.3) and the multiplier bootstrap (`bootstrap_weights ∈ {"rademacher", "mammen", "webb"}`) runs per `(path, horizon)` target via the shared `_bootstrap_one_target` / `compute_effect_bootstrap_stats` helpers. Point estimates are unchanged from the analytical path. Bootstrap SE replaces the analytical SE in `path_effects[path]["horizons"][l]["se"]`, and `p_value` / `conf_int` are taken as the **bootstrap percentile** statistics, matching the Round-10 library convention for overall / joiners / leavers / multi-horizon bootstrap (see the `Note (bootstrap inference surface)` elsewhere in this file and the pinned regression `test_bootstrap_p_value_and_ci_propagated_to_top_level`). `t_stat` is SE-derived via `safe_inference` per the anti-pattern rule. Interpretation: inference is *conditional on the observed path set*. **SE inherits the analytical cross-path cohort-sharing deviation:** the bootstrap input is the exact same full-panel cohort-centered path IF that the analytical path computes (`_collect_path_bootstrap_inputs` reuses the same enumeration / cohort IDs / IF construction), so the bootstrap SE is a Monte Carlo analog of the analytical SE — it inherits the same cross-path cohort-sharing deviation from R's per-path re-run convention documented above. On single-path-cohort panels (scenarios 13 and 14 of the R-parity fixture, and any DGP where `(D_{g,1}, F_g, S_g)` cohorts never span multiple observed paths), bootstrap SE tracks analytical SE up to Monte Carlo noise and both coincide with R up to the Phase 2 envelope. On cross-path cohort panels, bootstrap SE inherits the >100% rtol divergence from R that analytical already has. **Deviation from R (CI method):** R's per-path CI is normal-theory around the bootstrap SE (half-width ≈ `1.96·se`); ours is the bootstrap percentile CI, intentionally diverging from R to keep the dCDH inference surface internally consistent across all bootstrap targets. Practitioners who want *unconditional* inference capturing path-selection uncertainty need a pairs-bootstrap (deferred — no R precedent). Positive regressions live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathBootstrap` (gated `@pytest.mark.slow`): point-estimate invariance, finite positive SE on non-degenerate panels, SE-within-30%-rtol of analytical on cohort-clean fixtures, degenerate-cohort NaN propagation, Rademacher/Mammen/Webb parity, seed reproducibility, and percentile-vs-normal-theory CI pinning. **Per-path placebos:** when `placebo=True` (and `L_max >= 1`) is combined with `by_path=k`, per-path backward-horizon placebos `DID^{pl}_{path, l}` for `l = 1..L_max` are computed using the same joiners/leavers IF precedent applied to `_compute_per_group_if_placebo_horizon` (with the new `switcher_subset_mask` parameter): switcher contributions are zeroed for groups not in the path; the control pool and the variance-eligible cohort structure `(D_{g,1}, F_g, S_g)` are unchanged. Plug-in SE uses the path-specific divisor `N^{pl}_{l, path}` (count of path switchers eligible at backward lag `l`). Surfaced on `results.path_placebo_event_study[path][-l]` with the same `{effect, se, t_stat, p_value, conf_int, n_obs}` shape as `placebo_event_study` (negative-int inner keys parallel the existing per-path event-study positive-int keys, so a unified forward+backward view is well-formed). **Inherits the cross-path cohort-sharing SE deviation from R** documented above for `path_effects` (same convention applied backward); tracks R within numerical tolerance on single-path-cohort panels and diverges on cohort-mixed panels. Multiplier bootstrap (when `n_bootstrap > 0`) runs per `(path, lag)` target via the same `_bootstrap_one_target` dispatch used for the per-path event-study, with the canonical NaN-on-invalid contract. The bootstrap SE is a Monte Carlo analog of the analytical placebo SE — same per-path centered IF input — and inherits the same deviation. Surfaced through `summary()` (negative-keyed rows rendered alongside positive-keyed event-study rows under each path block) and `to_dataframe(level="by_path")` (`horizon` column takes negative ints for placebo rows). R-parity is confirmed at `tests/test_chaisemartin_dhaultfoeuille_parity.py::TestDCDHDynRParityByPathPlacebo` on the `multi_path_reversible_by_path_placebo` scenario; positive analytical + bootstrap invariants live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathPlacebo` (with the gated `::TestByPathPlacebo::TestBootstrap` subclass). **Reference implementation(s):** - R: [`DIDmultiplegtDYN`](https://cran.r-project.org/package=DIDmultiplegtDYN) (CRAN, maintained by the paper authors). The Python implementation matches `did_multiplegt_dyn(..., effects=1)` at horizon `l = 1`. Parity tests live in `tests/test_chaisemartin_dhaultfoeuille_parity.py`. diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index 74dee75e..7a80959e 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -5069,13 +5069,14 @@ class TestByPathPlacebo: """ def test_attr_is_none_when_placebo_false(self): - """``placebo=False`` (with by_path) must leave the new attribute None.""" + """``placebo=False`` (with by_path) must leave the new attribute None; + ``placebo=True`` populates it. Both branches use the SAME fixture so + the difference is attributable solely to the ``placebo`` flag.""" data = _by_path_placebo_data() - _est, res = _fit_by_path(data=_by_path_three_path_data(), by_path=3, L_max=3) - assert res.path_placebo_event_study is None - # Sanity: same fixture, placebo=True, attribute is populated - _est2, res2 = _fit_by_path_with_placebo(data, by_path=3, L_max=3) - assert res2.path_placebo_event_study is not None + _est, res_off = _fit_by_path(data, by_path=3, L_max=3) + assert res_off.path_placebo_event_study is None + _est2, res_on = _fit_by_path_with_placebo(data, by_path=3, L_max=3) + assert res_on.path_placebo_event_study is not None def test_attr_keys_match_path_effects(self): """``path_placebo_event_study`` keys must equal ``path_effects`` keys.""" @@ -5089,18 +5090,93 @@ def test_attr_keys_match_path_effects(self): assert sorted(h.keys()) == [-3, -2, -1] def test_path_placebo_point_estimate_within_path_mean(self): - """Per-(path, lag), point estimate equals within-path mean DID^pl.""" + """Per-(path, lag), the reported ``effect`` must equal the explicit + within-path-mean DID^pl identity ``mean_g(Y_{g, F_g-1-l} - Y_{g, F_g-1}) + - mean_ctrl(Y_{g', F_g-1-l} - Y_{g', F_g-1})`` evaluated on the + path-eligible switcher set, mirroring how + ``_compute_per_group_if_placebo_horizon`` constructs U_pl_l. This + pins the estimand identity, not just finiteness, against silent + regressions in the per-path IF construction.""" data = _by_path_placebo_data() _est, res = _fit_by_path_with_placebo(data, by_path=3, L_max=3) - # Lag 1, 2 valid; lag 3 has backward index -1 so n_obs=0 + + # Recompute the within-path mean DID^pl independently from the raw + # data and assert exact equality at np.testing.assert_allclose tols. + L_max = 3 + n_periods = 7 # set by _by_path_placebo_data + g_to_F_g = {} + for g, grp in data.groupby("group"): + grp = grp.sort_values("period") + treated = grp[grp["treatment"] == 1] + if len(treated): + g_to_F_g[int(g)] = int(treated["period"].iloc[0]) + + outcome_lookup = { + (int(r["group"]), int(r["period"])): float(r["outcome"]) + for _, r in data.iterrows() + } + # Per-group path tuple + g_to_path = {} + for g, F_g in g_to_F_g.items(): + ref = F_g - 1 + if ref < 0 or ref + L_max >= n_periods: + continue + grp = data[data["group"] == g].sort_values("period") + treatment_arr = grp.set_index("period")["treatment"].to_dict() + path_tuple = tuple(int(treatment_arr.get(ref + i, 0)) for i in range(L_max + 1)) + g_to_path[g] = (F_g, path_tuple) + # Never-treated group ids + never_treated = [int(g) for g in data["group"].unique() if int(g) not in g_to_F_g] + for path, lag_dict in res.path_placebo_event_study.items(): - for lag_key in (-1, -2): - entry = lag_dict[lag_key] - if entry["n_obs"] > 0: - assert np.isfinite(entry["effect"]), ( - f"path={path} lag={lag_key}: expected finite effect" + path_groups = {g for g, (_, p) in g_to_path.items() if p == path} + for lag in (-1, -2): + entry = lag_dict[lag] + if entry["n_obs"] == 0: + continue + lag_pos = -lag + contributions = [] + for g in path_groups: + F_g = g_to_F_g[g] + backward = F_g - 1 - lag_pos + forward = F_g - 1 + lag_pos + if backward < 0 or forward >= n_periods: + continue + # Controls: same baseline (D_{g',1}=0; all path + # switchers in this fixture share baseline 0), not + # switched by forward, observed at ref+backward+forward + ctrl_groups = [ + gc + for gc in g_to_F_g + if gc != g and g_to_F_g[gc] > forward + ] + never_treated + if not ctrl_groups: + continue + switcher_change = ( + outcome_lookup[(g, backward)] - outcome_lookup[(g, F_g - 1)] + ) + ctrl_changes = [ + outcome_lookup[(int(gc), backward)] - outcome_lookup[(int(gc), F_g - 1)] + for gc in ctrl_groups + ] + contributions.append( + switcher_change - sum(ctrl_changes) / len(ctrl_changes) + ) + if contributions: + expected_mean = sum(contributions) / len(contributions) + np.testing.assert_allclose( + entry["effect"], + expected_mean, + atol=1e-10, + rtol=1e-10, + err_msg=( + f"path={path} lag={lag}: reported effect " + f"{entry['effect']} != within-path mean " + f"identity {expected_mean}" + ), ) - # lag 3 must be NaN (backward index out of range) + # lag -3 is structurally NaN under this fixture (smallest + # F_g=3 means backward = F_g - 1 - 3 = -1, out of range) entry3 = lag_dict[-3] assert entry3["n_obs"] == 0 assert np.isnan(entry3["effect"]) From f70c17f721db2f11f7b9b5dce6800acb6e220e78 Mon Sep 17 00:00:00 2001 From: igerber Date: Sat, 25 Apr 2026 09:25:42 -0400 Subject: [PATCH 3/5] Address PR #371 AI review R2: scenario 15 metadata accuracy P3 fix: scenario 15's `params.n_groups` was 80 (the switcher cohort allocator input fed into `gen_reversible(n_groups=N_GOLDEN, ...)`) while the realized panel actually contains 120 groups (80 switchers + 20 never-treated + 20 always-treated, appended by `gen_reversible`'s default cohort additions at line 64). Replace with two explicit fields: - `n_switcher_groups = 80`: the load-bearing DGP allocator input - `n_realized_groups = 120`: the actual unique-group count in the serialized data The parity test reads the `data` block directly, not `params`, so it is unaffected by this metadata change. Resolves the misleading-metadata finding the reviewer flagged on R2. Co-Authored-By: Claude Opus 4.7 (1M context) --- benchmarks/R/generate_dcdh_dynr_test_values.R | 12 +++++++++++- benchmarks/data/dcdh_dynr_golden_values.json | 3 ++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/benchmarks/R/generate_dcdh_dynr_test_values.R b/benchmarks/R/generate_dcdh_dynr_test_values.R index 3f62932f..6da79b8d 100644 --- a/benchmarks/R/generate_dcdh_dynr_test_values.R +++ b/benchmarks/R/generate_dcdh_dynr_test_values.R @@ -683,7 +683,17 @@ res15 <- did_multiplegt_dyn( ) scenarios$multi_path_reversible_by_path_placebo <- list( data = export_data(d15), - params = list(pattern = "multi_path_reversible", n_groups = N_GOLDEN, + # n_switcher_groups records the switcher cohort count fed into + # gen_reversible's `counts_per_F_g` allocator (80 = sum c(20, 20, 15, + # 10, 10, 5)); the realized panel has 120 unique groups after the + # default 20 never-treated + 20 always-treated control rows are + # appended (gen_reversible defaults at line 64). Recording both fields + # avoids the metadata-vs-data mismatch the reviewer flagged on + # PR #371 R2: anyone reusing this scenario's metadata sees both the + # switcher count (the load-bearing number for the DGP allocation) and + # the realized panel size. + params = list(pattern = "multi_path_reversible", + n_switcher_groups = N_GOLDEN, n_realized_groups = N_GOLDEN + 40L, n_periods = 10, seed = 115, effects = 3, placebo = 2, by_path = 3, ci_level = 95), results = extract_dcdh_by_path(res15, n_effects = 3, n_placebos = 2) diff --git a/benchmarks/data/dcdh_dynr_golden_values.json b/benchmarks/data/dcdh_dynr_golden_values.json index 05c37f79..0014e0dd 100644 --- a/benchmarks/data/dcdh_dynr_golden_values.json +++ b/benchmarks/data/dcdh_dynr_golden_values.json @@ -766,7 +766,8 @@ }, "params": { "pattern": "multi_path_reversible", - "n_groups": 80, + "n_switcher_groups": 80, + "n_realized_groups": 120, "n_periods": 10, "seed": 115, "effects": 3, From 3ba7f6d228cd62177dc36b49ce4c1c8187983497 Mon Sep 17 00:00:00 2001 From: igerber Date: Sat, 25 Apr 2026 09:36:50 -0400 Subject: [PATCH 4/5] Address PR #371 AI review R3: docstring access-pattern accuracy P3 fix: `_collect_path_placebo_bootstrap_inputs` docstring described the analytical-results shape as `path_placebos[path]["horizons"][-lag_l]["effect"]` but the actual access is `path_placebos[path][-lag_l]["effect"]` (no `["horizons"]` wrapper -- `_compute_path_placebos` returns the negative-keyed inner dict directly, intentionally diverging from `_compute_path_effects`'s `["horizons"]` nesting). Update the docstring to match the actual access pattern at the implementation site (`:5818-5824`). Harmless at runtime; the fix is to prevent the comment from misleading future maintenance. Co-Authored-By: Claude Opus 4.7 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index 649a48de..ef901ad3 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -5734,9 +5734,12 @@ def _collect_path_placebo_bootstrap_inputs( ``path_placebo_event_study[path][-lag_l]`` post-bootstrap. The point estimate per ``(path, lag_l)`` is read from - ``path_placebos[path]["horizons"][-lag_l]["effect"]`` to stay - bit-identical with the analytical pass; the bootstrap distribution - gets centered on this value by ``_bootstrap_one_target`` downstream. + ``path_placebos[path][-lag_l]["effect"]`` (note: no ``["horizons"]`` + wrapper -- ``_compute_path_placebos`` returns the negative-keyed + inner dict directly, unlike ``_compute_path_effects`` which wraps + its horizons under a ``["horizons"]`` key) to stay bit-identical + with the analytical pass; the bootstrap distribution gets centered + on this value by ``_bootstrap_one_target`` downstream. The ``warnings.catch_warnings`` block suppresses the re-enumeration overflow ``UserWarning``; the analytical From 97bc2fad4a5bbf0e970a555e464f387ba16f85dd Mon Sep 17 00:00:00 2001 From: igerber Date: Sat, 25 Apr 2026 09:50:42 -0400 Subject: [PATCH 5/5] Address PR #371 AI review R4: empty-state contract on path_placebo_event_study MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P3 fix: the new `path_placebo_event_study` surface implements the None-vs-{} empty-state sentinel (parallel to `path_effects` — `_compute_path_placebos` returns `{}` when no observed path has a complete window) but the contract was not documented or regression- tested on the new sibling surface. - Result class docstring (`ChaisemartinDHaultfoeuilleResults`): add an explicit empty-state paragraph documenting `None` = not requested vs `{}` = requested but empty (mirrors the same contract on `path_effects`). - REGISTRY.md `Note (Phase 3 by_path ...)` per-path placebos paragraph: add an "Empty-state contract" sentence. - Add `TestByPathPlacebo::test_empty_path_placebo_surface_when_no_complete_window` mirroring the existing `path_effects` empty-state regression (`TestByPathEdgeCases::test_empty_path_surface_when_no_complete_window`) on the same no-complete-window panel construction (F_g=3, n_periods=4, L_max=3 → window [2, 5] extends past the panel). Asserts `path_placebo_event_study == {}` (NOT None) and confirms `path_effects == {}` parallel state so both sibling surfaces hit the same empty-state branch consistently. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../chaisemartin_dhaultfoeuille_results.py | 12 +++- docs/methodology/REGISTRY.md | 2 +- tests/test_chaisemartin_dhaultfoeuille.py | 62 +++++++++++++++++++ 3 files changed, 72 insertions(+), 4 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index 6c5b3e22..f7596ecc 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -384,9 +384,15 @@ class ChaisemartinDHaultfoeuilleResults: forward and backward horizons. Each inner entry holds ``{"effect", "se", "t_stat", "p_value", "conf_int", "n_obs"}``. Populated when ``by_path`` is a positive int AND - ``placebo=True`` AND ``L_max >= 1``; ``None`` otherwise. - Inherits the cross-path cohort-sharing SE deviation from R - documented for ``path_effects``. See REGISTRY.md + ``placebo=True`` AND ``L_max >= 1``. Empty-state contract + mirrors ``path_effects``: ``None`` when ``by_path + placebo`` + was not requested; ``{}`` when requested but no observed path + has a complete window ``[F_g-1, F_g-1+L_max]`` within the + panel (the same regime where ``path_effects`` returns ``{}``, + with the same ``UserWarning`` at fit-time). Downstream callers + should distinguish the two states. Inherits the cross-path + cohort-sharing SE deviation from R documented for + ``path_effects``. See REGISTRY.md ``Note (Phase 3 by_path ...)`` → "Per-path placebos". honest_did_results : HonestDiDResults, optional HonestDiD sensitivity analysis bounds (Rambachan & Roth 2023). diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 338cea0f..440b132f 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -638,7 +638,7 @@ The guard is fired by `_survey_se_from_group_if` (analytical and replicate) and - **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. **Always uses raw (unadjusted) outcomes** regardless of active `controls`, `trends_linear`, or `trends_nonparam` options - those adjustments apply to the main estimator surface but not to the Design-2 descriptive block. For full adjusted Design-2 estimation with proper control pools, the paper recommends "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups. -- **Note (Phase 3 `by_path` per-path event-study disaggregation):** Per-path disaggregation of the multi-horizon event study, mirroring R `did_multiplegt_dyn(..., by_path=k)`. Activated via `ChaisemartinDHaultfoeuille(by_path=k, drop_larger_lower=False)` where `k` is a positive integer (top-k most common observed paths by switcher-group frequency). **Window convention:** the path tuple for a switcher group `g` is `(D_{g, F_g-1}, D_{g, F_g}, ..., D_{g, F_g-1+L_max})` — length `L_max + 1`, matching R's window `[F_{g-1}, F_{g-1+l}]`. **Ranking:** paths are ranked by descending frequency; ties are broken lexicographically on the path tuple for deterministic ordering, so every selected path has a unique `frequency_rank`. If `by_path` exceeds the number of observed paths, all observed paths are returned with a `UserWarning`. **Per-path SE convention (joiners/leavers precedent):** the per-path influence function follows the joiners-only / leavers-only IF construction at `chaisemartin_dhaultfoeuille.py:5495-5504`: the switcher-side contribution `+S_g * (Y_{g,out} - Y_{g,ref})` is zeroed for groups whose observed trajectory is NOT the selected path; control contributions and the full cohort structure `(D_{g,1}, F_g, S_g)` are unchanged. After applying the singleton-baseline eligible mask and cohort-recentering with the original cohort IDs, the plug-in SE uses the path-specific divisor `N_l_path` (count of path switchers eligible at horizon `l`) — same pattern as `joiners_se` using `joiner_total`. This gives the **within-path mean** estimand `DID_{path,l}` as the within-path average of `DID_{g,l}`. **Degenerate-cohort behavior per path:** when a path's centered IF at some horizon is identically zero (every variance-eligible path switcher forms its own `(D_{g,1}, F_g, S_g)` cohort, or the path has a single contributing group), SE / t_stat / p_value / conf_int are NaN-consistent and a `UserWarning` is emitted scoped to `(path, horizon)`. This mirrors the overall-path degenerate-cohort surface and is common for rare paths with few contributing groups. **Empty-state contract:** `results.path_effects` distinguishes "not requested" (`None`) from "requested but empty" (`{}` — all switchers have windows outside the panel or unobserved cells). The empty-dict case emits a `UserWarning` at fit-time and renders as an explicit "no observed paths" notice in `summary()`; `to_dataframe(level="by_path")` returns an empty DataFrame with the canonical column set (mirrors the `linear_trends` pattern when `trends_linear=True` but no horizons survive). **Requirements:** `drop_larger_lower=False` (multi-switch groups are the object of interest; default `True` filters them out) and `L_max >= 1` (path window depends on the horizon). **Scope:** binary treatment only; combinations with `controls`, `trends_linear`, `trends_nonparam`, `heterogeneity`, `design2`, `honest_did`, and `survey_design` remain gated behind explicit `NotImplementedError` (deferred to follow-up wave PRs). `n_bootstrap > 0` is now supported — see the **Bootstrap SE** paragraph below. `placebo=True` is now supported per-path — see the **Per-path placebos** paragraph below. **TWFE diagnostic** remains a sample-level summary (not computed per path) in this release. Results are exposed on `results.path_effects` as `Dict[Tuple[int, ...], Dict[str, Any]]` with nested `horizons` dicts per horizon `l`, and on `results.to_dataframe(level="by_path")` as a long-format table with columns `[path, frequency_rank, n_groups, horizon, effect, se, t_stat, p_value, conf_int_lower, conf_int_upper, n_obs]`. Gated tests live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathGates` / `::TestByPathBehavior` / `::TestByPathEdgeCases`. **R-parity** against `DIDmultiplegtDYN 2.3.3` is confirmed at `tests/test_chaisemartin_dhaultfoeuille_parity.py::TestDCDHDynRParityByPath` via two scenarios: `mixed_single_switch_by_path` (2 paths, `by_path=2`) and `multi_path_reversible_by_path` (4 paths, `by_path=3`; path-assignment deterministic on `F_g` so each `(D_{g,1}, F_g, S_g)` cohort contains switchers from a single path). Per-path point estimates and per-path switcher counts match R exactly; per-path SE matches within the Phase 2 multi-horizon SE envelope (observed rtol ≤ 10.2% on the 2-path mixed scenario, ≤ 4.2% on the 4-path cohort-clean scenario). **Deviation from R (cross-path cohort-sharing SE):** our analytical SE is the marginal variance of the path-contribution estimator cohort-centered on the *full-panel* cohort structure (joiners/leavers precedent — non-path switchers contribute to cohort means via their zeroed switcher row). R's `did_multiplegt_dyn(..., by_path=k)` re-runs the estimator per path, so cohort means are computed over the path's own switchers only. When a cohort `(D_{g,1}, F_g, S_g)` spans multiple observed paths, Python and R SE diverge materially (our empirical probes with random post-window toggling saw rtol > 100%); when every cohort is single-path (scenario 13 by design, scenario 14 by construction), the two approaches coincide up to the documented Phase 2 envelope. Practitioners with cohort structures that mix paths should interpret the per-path SE as a within-full-panel marginal variance, not a per-path conditional variance. **Bootstrap SE:** when `n_bootstrap > 0` is set, the top-k paths are enumerated once on the observed data (R-faithful: matches `did_multiplegt_dyn(..., by_path=k, bootstrap=B)`'s path-stability convention — verified empirically against DIDmultiplegtDYN 2.3.3) and the multiplier bootstrap (`bootstrap_weights ∈ {"rademacher", "mammen", "webb"}`) runs per `(path, horizon)` target via the shared `_bootstrap_one_target` / `compute_effect_bootstrap_stats` helpers. Point estimates are unchanged from the analytical path. Bootstrap SE replaces the analytical SE in `path_effects[path]["horizons"][l]["se"]`, and `p_value` / `conf_int` are taken as the **bootstrap percentile** statistics, matching the Round-10 library convention for overall / joiners / leavers / multi-horizon bootstrap (see the `Note (bootstrap inference surface)` elsewhere in this file and the pinned regression `test_bootstrap_p_value_and_ci_propagated_to_top_level`). `t_stat` is SE-derived via `safe_inference` per the anti-pattern rule. Interpretation: inference is *conditional on the observed path set*. **SE inherits the analytical cross-path cohort-sharing deviation:** the bootstrap input is the exact same full-panel cohort-centered path IF that the analytical path computes (`_collect_path_bootstrap_inputs` reuses the same enumeration / cohort IDs / IF construction), so the bootstrap SE is a Monte Carlo analog of the analytical SE — it inherits the same cross-path cohort-sharing deviation from R's per-path re-run convention documented above. On single-path-cohort panels (scenarios 13 and 14 of the R-parity fixture, and any DGP where `(D_{g,1}, F_g, S_g)` cohorts never span multiple observed paths), bootstrap SE tracks analytical SE up to Monte Carlo noise and both coincide with R up to the Phase 2 envelope. On cross-path cohort panels, bootstrap SE inherits the >100% rtol divergence from R that analytical already has. **Deviation from R (CI method):** R's per-path CI is normal-theory around the bootstrap SE (half-width ≈ `1.96·se`); ours is the bootstrap percentile CI, intentionally diverging from R to keep the dCDH inference surface internally consistent across all bootstrap targets. Practitioners who want *unconditional* inference capturing path-selection uncertainty need a pairs-bootstrap (deferred — no R precedent). Positive regressions live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathBootstrap` (gated `@pytest.mark.slow`): point-estimate invariance, finite positive SE on non-degenerate panels, SE-within-30%-rtol of analytical on cohort-clean fixtures, degenerate-cohort NaN propagation, Rademacher/Mammen/Webb parity, seed reproducibility, and percentile-vs-normal-theory CI pinning. **Per-path placebos:** when `placebo=True` (and `L_max >= 1`) is combined with `by_path=k`, per-path backward-horizon placebos `DID^{pl}_{path, l}` for `l = 1..L_max` are computed using the same joiners/leavers IF precedent applied to `_compute_per_group_if_placebo_horizon` (with the new `switcher_subset_mask` parameter): switcher contributions are zeroed for groups not in the path; the control pool and the variance-eligible cohort structure `(D_{g,1}, F_g, S_g)` are unchanged. Plug-in SE uses the path-specific divisor `N^{pl}_{l, path}` (count of path switchers eligible at backward lag `l`). Surfaced on `results.path_placebo_event_study[path][-l]` with the same `{effect, se, t_stat, p_value, conf_int, n_obs}` shape as `placebo_event_study` (negative-int inner keys parallel the existing per-path event-study positive-int keys, so a unified forward+backward view is well-formed). **Inherits the cross-path cohort-sharing SE deviation from R** documented above for `path_effects` (same convention applied backward); tracks R within numerical tolerance on single-path-cohort panels and diverges on cohort-mixed panels. Multiplier bootstrap (when `n_bootstrap > 0`) runs per `(path, lag)` target via the same `_bootstrap_one_target` dispatch used for the per-path event-study, with the canonical NaN-on-invalid contract. The bootstrap SE is a Monte Carlo analog of the analytical placebo SE — same per-path centered IF input — and inherits the same deviation. Surfaced through `summary()` (negative-keyed rows rendered alongside positive-keyed event-study rows under each path block) and `to_dataframe(level="by_path")` (`horizon` column takes negative ints for placebo rows). R-parity is confirmed at `tests/test_chaisemartin_dhaultfoeuille_parity.py::TestDCDHDynRParityByPathPlacebo` on the `multi_path_reversible_by_path_placebo` scenario; positive analytical + bootstrap invariants live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathPlacebo` (with the gated `::TestByPathPlacebo::TestBootstrap` subclass). +- **Note (Phase 3 `by_path` per-path event-study disaggregation):** Per-path disaggregation of the multi-horizon event study, mirroring R `did_multiplegt_dyn(..., by_path=k)`. Activated via `ChaisemartinDHaultfoeuille(by_path=k, drop_larger_lower=False)` where `k` is a positive integer (top-k most common observed paths by switcher-group frequency). **Window convention:** the path tuple for a switcher group `g` is `(D_{g, F_g-1}, D_{g, F_g}, ..., D_{g, F_g-1+L_max})` — length `L_max + 1`, matching R's window `[F_{g-1}, F_{g-1+l}]`. **Ranking:** paths are ranked by descending frequency; ties are broken lexicographically on the path tuple for deterministic ordering, so every selected path has a unique `frequency_rank`. If `by_path` exceeds the number of observed paths, all observed paths are returned with a `UserWarning`. **Per-path SE convention (joiners/leavers precedent):** the per-path influence function follows the joiners-only / leavers-only IF construction at `chaisemartin_dhaultfoeuille.py:5495-5504`: the switcher-side contribution `+S_g * (Y_{g,out} - Y_{g,ref})` is zeroed for groups whose observed trajectory is NOT the selected path; control contributions and the full cohort structure `(D_{g,1}, F_g, S_g)` are unchanged. After applying the singleton-baseline eligible mask and cohort-recentering with the original cohort IDs, the plug-in SE uses the path-specific divisor `N_l_path` (count of path switchers eligible at horizon `l`) — same pattern as `joiners_se` using `joiner_total`. This gives the **within-path mean** estimand `DID_{path,l}` as the within-path average of `DID_{g,l}`. **Degenerate-cohort behavior per path:** when a path's centered IF at some horizon is identically zero (every variance-eligible path switcher forms its own `(D_{g,1}, F_g, S_g)` cohort, or the path has a single contributing group), SE / t_stat / p_value / conf_int are NaN-consistent and a `UserWarning` is emitted scoped to `(path, horizon)`. This mirrors the overall-path degenerate-cohort surface and is common for rare paths with few contributing groups. **Empty-state contract:** `results.path_effects` distinguishes "not requested" (`None`) from "requested but empty" (`{}` — all switchers have windows outside the panel or unobserved cells). The empty-dict case emits a `UserWarning` at fit-time and renders as an explicit "no observed paths" notice in `summary()`; `to_dataframe(level="by_path")` returns an empty DataFrame with the canonical column set (mirrors the `linear_trends` pattern when `trends_linear=True` but no horizons survive). **Requirements:** `drop_larger_lower=False` (multi-switch groups are the object of interest; default `True` filters them out) and `L_max >= 1` (path window depends on the horizon). **Scope:** binary treatment only; combinations with `controls`, `trends_linear`, `trends_nonparam`, `heterogeneity`, `design2`, `honest_did`, and `survey_design` remain gated behind explicit `NotImplementedError` (deferred to follow-up wave PRs). `n_bootstrap > 0` is now supported — see the **Bootstrap SE** paragraph below. `placebo=True` is now supported per-path — see the **Per-path placebos** paragraph below. **TWFE diagnostic** remains a sample-level summary (not computed per path) in this release. Results are exposed on `results.path_effects` as `Dict[Tuple[int, ...], Dict[str, Any]]` with nested `horizons` dicts per horizon `l`, and on `results.to_dataframe(level="by_path")` as a long-format table with columns `[path, frequency_rank, n_groups, horizon, effect, se, t_stat, p_value, conf_int_lower, conf_int_upper, n_obs]`. Gated tests live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathGates` / `::TestByPathBehavior` / `::TestByPathEdgeCases`. **R-parity** against `DIDmultiplegtDYN 2.3.3` is confirmed at `tests/test_chaisemartin_dhaultfoeuille_parity.py::TestDCDHDynRParityByPath` via two scenarios: `mixed_single_switch_by_path` (2 paths, `by_path=2`) and `multi_path_reversible_by_path` (4 paths, `by_path=3`; path-assignment deterministic on `F_g` so each `(D_{g,1}, F_g, S_g)` cohort contains switchers from a single path). Per-path point estimates and per-path switcher counts match R exactly; per-path SE matches within the Phase 2 multi-horizon SE envelope (observed rtol ≤ 10.2% on the 2-path mixed scenario, ≤ 4.2% on the 4-path cohort-clean scenario). **Deviation from R (cross-path cohort-sharing SE):** our analytical SE is the marginal variance of the path-contribution estimator cohort-centered on the *full-panel* cohort structure (joiners/leavers precedent — non-path switchers contribute to cohort means via their zeroed switcher row). R's `did_multiplegt_dyn(..., by_path=k)` re-runs the estimator per path, so cohort means are computed over the path's own switchers only. When a cohort `(D_{g,1}, F_g, S_g)` spans multiple observed paths, Python and R SE diverge materially (our empirical probes with random post-window toggling saw rtol > 100%); when every cohort is single-path (scenario 13 by design, scenario 14 by construction), the two approaches coincide up to the documented Phase 2 envelope. Practitioners with cohort structures that mix paths should interpret the per-path SE as a within-full-panel marginal variance, not a per-path conditional variance. **Bootstrap SE:** when `n_bootstrap > 0` is set, the top-k paths are enumerated once on the observed data (R-faithful: matches `did_multiplegt_dyn(..., by_path=k, bootstrap=B)`'s path-stability convention — verified empirically against DIDmultiplegtDYN 2.3.3) and the multiplier bootstrap (`bootstrap_weights ∈ {"rademacher", "mammen", "webb"}`) runs per `(path, horizon)` target via the shared `_bootstrap_one_target` / `compute_effect_bootstrap_stats` helpers. Point estimates are unchanged from the analytical path. Bootstrap SE replaces the analytical SE in `path_effects[path]["horizons"][l]["se"]`, and `p_value` / `conf_int` are taken as the **bootstrap percentile** statistics, matching the Round-10 library convention for overall / joiners / leavers / multi-horizon bootstrap (see the `Note (bootstrap inference surface)` elsewhere in this file and the pinned regression `test_bootstrap_p_value_and_ci_propagated_to_top_level`). `t_stat` is SE-derived via `safe_inference` per the anti-pattern rule. Interpretation: inference is *conditional on the observed path set*. **SE inherits the analytical cross-path cohort-sharing deviation:** the bootstrap input is the exact same full-panel cohort-centered path IF that the analytical path computes (`_collect_path_bootstrap_inputs` reuses the same enumeration / cohort IDs / IF construction), so the bootstrap SE is a Monte Carlo analog of the analytical SE — it inherits the same cross-path cohort-sharing deviation from R's per-path re-run convention documented above. On single-path-cohort panels (scenarios 13 and 14 of the R-parity fixture, and any DGP where `(D_{g,1}, F_g, S_g)` cohorts never span multiple observed paths), bootstrap SE tracks analytical SE up to Monte Carlo noise and both coincide with R up to the Phase 2 envelope. On cross-path cohort panels, bootstrap SE inherits the >100% rtol divergence from R that analytical already has. **Deviation from R (CI method):** R's per-path CI is normal-theory around the bootstrap SE (half-width ≈ `1.96·se`); ours is the bootstrap percentile CI, intentionally diverging from R to keep the dCDH inference surface internally consistent across all bootstrap targets. Practitioners who want *unconditional* inference capturing path-selection uncertainty need a pairs-bootstrap (deferred — no R precedent). Positive regressions live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathBootstrap` (gated `@pytest.mark.slow`): point-estimate invariance, finite positive SE on non-degenerate panels, SE-within-30%-rtol of analytical on cohort-clean fixtures, degenerate-cohort NaN propagation, Rademacher/Mammen/Webb parity, seed reproducibility, and percentile-vs-normal-theory CI pinning. **Per-path placebos:** when `placebo=True` (and `L_max >= 1`) is combined with `by_path=k`, per-path backward-horizon placebos `DID^{pl}_{path, l}` for `l = 1..L_max` are computed using the same joiners/leavers IF precedent applied to `_compute_per_group_if_placebo_horizon` (with the new `switcher_subset_mask` parameter): switcher contributions are zeroed for groups not in the path; the control pool and the variance-eligible cohort structure `(D_{g,1}, F_g, S_g)` are unchanged. Plug-in SE uses the path-specific divisor `N^{pl}_{l, path}` (count of path switchers eligible at backward lag `l`). Surfaced on `results.path_placebo_event_study[path][-l]` with the same `{effect, se, t_stat, p_value, conf_int, n_obs}` shape as `placebo_event_study` (negative-int inner keys parallel the existing per-path event-study positive-int keys, so a unified forward+backward view is well-formed). **Inherits the cross-path cohort-sharing SE deviation from R** documented above for `path_effects` (same convention applied backward); tracks R within numerical tolerance on single-path-cohort panels and diverges on cohort-mixed panels. Multiplier bootstrap (when `n_bootstrap > 0`) runs per `(path, lag)` target via the same `_bootstrap_one_target` dispatch used for the per-path event-study, with the canonical NaN-on-invalid contract. The bootstrap SE is a Monte Carlo analog of the analytical placebo SE — same per-path centered IF input — and inherits the same deviation. Surfaced through `summary()` (negative-keyed rows rendered alongside positive-keyed event-study rows under each path block) and `to_dataframe(level="by_path")` (`horizon` column takes negative ints for placebo rows). **Empty-state contract:** `results.path_placebo_event_study` mirrors `path_effects` — `None` when `by_path + placebo` was not requested, `{}` when requested but no observed path has a complete window within the panel (same regime that returns `{}` for `path_effects`, with the same fit-time `UserWarning`). R-parity is confirmed at `tests/test_chaisemartin_dhaultfoeuille_parity.py::TestDCDHDynRParityByPathPlacebo` on the `multi_path_reversible_by_path_placebo` scenario; positive analytical + bootstrap invariants live in `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathPlacebo` (with the gated `::TestByPathPlacebo::TestBootstrap` subclass). **Reference implementation(s):** - R: [`DIDmultiplegtDYN`](https://cran.r-project.org/package=DIDmultiplegtDYN) (CRAN, maintained by the paper authors). The Python implementation matches `did_multiplegt_dyn(..., effects=1)` at horizon `l = 1`. Parity tests live in `tests/test_chaisemartin_dhaultfoeuille_parity.py`. diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index 7a80959e..cedb90e6 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -5284,6 +5284,68 @@ def test_path_placebo_to_dataframe_emits_negative_horizons(self): "to_dataframe(level='by_path') did not emit any negative-horizon rows" ) + def test_empty_path_placebo_surface_when_no_complete_window(self): + """``path_placebo_event_study`` empty-state contract: ``{}`` (NOT + ``None``) when ``by_path + placebo`` was requested but no observed + path has a complete ``[F_g-1, F_g-1+L_max]`` window within the + panel. Mirrors ``test_empty_path_surface_when_no_complete_window`` + for the placebo sibling so a regression on the empty-state + sentinel can't slip through. + + Switchers have F_g = period 3 with n_periods = 4 and L_max = 3, so + the window [F_g - 1, F_g - 1 + L_max] = [2, 5] extends past the + panel — same construction as the path_effects empty-state test. + """ + rng = np.random.default_rng(0) + rows = [] + for g in (1, 2, 3, 4): + for t in range(4): + d = 1 if t >= 3 else 0 + rows.append( + { + "group": g, + "period": t, + "treatment": d, + "outcome": rng.normal(), + } + ) + for g in (5, 6): + for t in range(4): + rows.append( + { + "group": g, + "period": t, + "treatment": 0, + "outcome": rng.normal(), + } + ) + data = pd.DataFrame(rows) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + est = ChaisemartinDHaultfoeuille( + drop_larger_lower=False, + by_path=3, + placebo=True, + twfe_diagnostic=False, + ) + results = est.fit( + data, + outcome="outcome", + group="group", + time="period", + treatment="treatment", + L_max=3, + ) + + # Empty dict, NOT None — distinguishes "requested but empty" from + # "not requested" on the new placebo sibling surface. + assert results.path_placebo_event_study is not None + assert results.path_placebo_event_study == {} + # path_effects parallel state confirms both surfaces hit the + # same empty-state branch consistently. + assert results.path_effects == {} + @pytest.mark.slow class TestBootstrap: """Bootstrap invariants for by_path + placebo + n_bootstrap > 0.