diff --git a/TODO.md b/TODO.md
index 2ef24f36..a27d34a6 100644
--- a/TODO.md
+++ b/TODO.md
@@ -57,6 +57,7 @@ Deferred items from PR reviews that were not addressed before merge.
 | Issue | Location | PR | Priority |
 |-------|----------|----|----------|
 | dCDH: Phase 1 per-period placebo DID_M^pl has NaN SE (no IF derivation for the per-period aggregation path). Multi-horizon placebos (L_max >= 1) have valid SE. | `chaisemartin_dhaultfoeuille.py` | #294 | Low |
+| dCDH: Survey cell-period allocator's post-period attribution is a library convention, not derived from the observation-level survey linearization. MC coverage is empirically close to nominal on the test DGP; a formal derivation (or a covariance-aware two-cell alternative) is deferred. Documented in REGISTRY.md survey IF expansion Note. | `chaisemartin_dhaultfoeuille.py`, `docs/methodology/REGISTRY.md` | PR 2 | Medium |
 | dCDH: Parity test SE/CI assertions only cover pure-direction scenarios; mixed-direction SE comparison is structurally apples-to-oranges (cell-count vs obs-count weighting). | `test_chaisemartin_dhaultfoeuille_parity.py` | #294 | Low |
 | CallawaySantAnna: consider materializing NaN entries for non-estimable (g,t) cells in group_time_effects dict (currently omitted with consolidated warning); would require updating downstream consumers (event study, balance_e, aggregation) | `staggered.py` | #256 | Low |
 | ImputationDiD dense `(A0'A0).toarray()` scales O((U+T+K)^2), OOM risk on large panels | `imputation.py` | #141 | Medium (deferred — only triggers when sparse solver fails) |
diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
index cf89c6c3..a8562a33 100644
--- a/diff_diff/chaisemartin_dhaultfoeuille.py
+++ b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -639,23 +639,40 @@ def fit(
             Survey design specification for design-based inference.
             Supports ``weight_type='pweight'`` with two variance paths:
             (1) Taylor Series Linearization using strata / PSU / FPC
-            (analytical) and (2) replicate-weight variance using
-            BRR / Fay / JK1 / JKn / SDR methods (analytical,
-            closed-form). Survey weights produce weighted cell means
-            for the point estimate. Under a survey design without an
-            explicit ``psu``, ``fit()`` auto-injects ``psu=<group_col>``
-            so the group is the effective sampling unit (strata / PSU
-            must be within-group-constant; mixed labels within a group
-            raise ``ValueError``). When ``n_bootstrap > 0`` and a
-            survey design is supplied, the multiplier bootstrap
-            operates at the PSU level (Hall-Mammen wild PSU
-            bootstrap) — under the default auto-inject this collapses
-            to a group-level clustered bootstrap. **Replicate weights
-            combined with ``n_bootstrap > 0`` are rejected** with
-            ``NotImplementedError`` (replicate variance is
-            closed-form; bootstrap would double-count variance). See
-            REGISTRY.md ``ChaisemartinDHaultfoeuille`` Notes for the
-            full contract.
+            (analytical) via the **cell-period IF allocator** that
+            attributes per-``(g, t)``-cell mass and aggregates through
+            Binder (1983), and (2) replicate-weight variance using
+            BRR / Fay / JK1 / JKn / SDR methods (analytical, closed-
+            form). Survey weights produce weighted cell means for the
+            point estimate. Under a survey design without an explicit
+            ``psu``, ``fit()`` auto-injects ``psu=<group_col>`` as a
+            safe default (the group is the effective sampling unit).
+            **Strata and PSU may vary across cells of a group** but
+            must be constant within each ``(g, t)`` cell (trivially
+            true in one-obs-per-cell panels; enforced otherwise with
+            ``ValueError``). Three supported combinations under the
+            auto-injected ``psu=<group_col>``:
+            (1) strata constant within group (any ``nest`` flag works);
+            (2) strata vary within group **and** ``nest=True`` — the
+            resolver re-labels the synthesized ``psu`` uniquely within
+            strata; (3) strata vary within group **and** ``nest=False``
+            — rejected up front with a targeted ``ValueError``; pass
+            ``SurveyDesign(..., nest=True)`` or an explicit
+            ``psu=<col>`` with globally-unique labels instead. When ``n_bootstrap > 0`` and a survey
+            design is supplied, the multiplier bootstrap operates at
+            the PSU level (Hall-Mammen wild PSU bootstrap) — under the
+            default auto-inject this collapses to a group-level
+            clustered bootstrap. **Out-of-scope combinations raise
+            ``NotImplementedError``**: (a) replicate weights with
+            ``n_bootstrap > 0`` (replicate variance is closed-form;
+            bootstrap would double-count variance); (b)
+            ``heterogeneity=`` with PSU/strata that vary within group
+            (heterogeneity WLS still uses the legacy group-level IF
+            expansion; follow-up PR extends it); (c) ``n_bootstrap >
+            0`` with PSU that varies within group (PSU-level bootstrap
+            still uses the legacy group-level PSU map; follow-up PR
+            extends it). See REGISTRY.md
+            ``ChaisemartinDHaultfoeuille`` Notes for the full contract.
 
         Returns
         -------
@@ -717,6 +734,45 @@ def fit(
                 or resolved_survey.replicate_weights.shape[1] == 0
             )
         ):
+            # Pre-auto-inject contract check: the auto-inject path
+            # synthesizes ``psu=<group>`` and preserves the user's
+            # ``nest`` flag. Under ``nest=False`` (the default), the
+            # survey resolver requires globally-unique PSU labels when
+            # strata are present; if strata varies within group, the
+            # synthesized PSU column reuses group labels across strata
+            # and trips the cross-stratum PSU uniqueness check at
+            # resolution time. Under ``nest=True`` the resolver
+            # re-labels ``(stratum, psu)`` uniquely within strata
+            # (``diff_diff/survey.py:299-302``), so varying strata is
+            # fine — let the auto-inject proceed. Only the
+            # ``nest=False`` + varying-strata + omitted-psu triple
+            # warrants an up-front targeted error.
+            if resolved_survey.strata is not None and not getattr(
+                survey_design, "nest", False
+            ):
+                _strata_varies_pre, _ = _strata_psu_vary_within_group(
+                    resolved_survey, data, group, survey_weights,
+                )
+                if _strata_varies_pre:
+                    raise ValueError(
+                        "ChaisemartinDHaultfoeuille survey support: "
+                        "strata that vary across cells of the same "
+                        "group require either an explicit "
+                        "`psu=<col>` (any column whose labels are "
+                        "globally unique within strata) or the "
+                        "original `SurveyDesign(..., nest=True)` "
+                        "flag so the auto-injected `psu=<group>` is "
+                        "re-labeled uniquely within strata by the "
+                        "resolver. The default `nest=False` auto-"
+                        "inject path reuses group labels across "
+                        "strata and trips the cross-stratum PSU "
+                        "uniqueness check in survey resolution. "
+                        "Either (a) set strata constant within each "
+                        "group, (b) pass `SurveyDesign(..., "
+                        "nest=True)`, or (c) pass an explicit "
+                        "`psu=<col>` with globally-unique labels."
+                    )
+
             from diff_diff.survey import SurveyDesign as _SurveyDesign
 
             # Build a synthesized PSU column on a private copy of data
@@ -770,15 +826,44 @@ def fit(
             # precedent: efficient_did.py:989, staggered.py:1869,
             # two_stage.py:251-253.
 
-            # Within-group-constant PSU/strata is required: the IF
-            # expansion psi_i = U[g] * (w_i / W_g) supports within-group
-            # variation in WEIGHTS (each obs contributes proportionally),
-            # but PSU and strata must be constant within group — the
-            # group is treated as the effective sampling unit for the
-            # Binder stratified-PSU variance formula.
-            _validate_group_constant_strata_psu(
+            # Cell-period IF allocator contract: strata and PSU must be
+            # constant within each (g, t) cell, a strict relaxation of
+            # the previous within-group constancy rule. Two out-of-scope
+            # combinations are gated with NotImplementedError until the
+            # corresponding follow-up PRs extend them:
+            #   - heterogeneity= + within-group-varying PSU/strata
+            #     (PR 3: cell-period allocator for the WLS psi_obs)
+            #   - n_bootstrap > 0 + within-group-varying PSU
+            #     (PR 4: cell-level Hall-Mammen wild bootstrap)
+            strata_varies, psu_varies = _strata_psu_vary_within_group(
                 resolved_survey, data, group, survey_weights,
             )
+            if strata_varies or psu_varies:
+                if heterogeneity is not None:
+                    raise NotImplementedError(
+                        "heterogeneity= is not supported under a survey "
+                        "design whose PSU or strata vary within group. "
+                        "The heterogeneity WLS path uses the legacy "
+                        "group-level IF expansion and will be extended "
+                        "to the cell-period allocator in a follow-up "
+                        "PR. For now, either (a) set heterogeneity=None, "
+                        "or (b) collapse PSU/strata to be constant "
+                        "within each group."
+                    )
+                if self.n_bootstrap > 0:
+                    raise NotImplementedError(
+                        "n_bootstrap > 0 is not supported under a "
+                        "survey design whose PSU varies within group. "
+                        "The PSU-level Hall-Mammen wild bootstrap uses "
+                        "the legacy group-level PSU map and will be "
+                        "extended to cell-level PSU in a follow-up PR. "
+                        "For now, use n_bootstrap=0 (analytical TSL "
+                        "variance, which fully supports within-group-"
+                        "varying PSU via the cell-period allocator)."
+                    )
+            _validate_cell_constant_strata_psu(
+                resolved_survey, data, group, time, survey_weights,
+            )
 
         # Design-2 precondition: requires drop_larger_lower=False
         if design2 and self.drop_larger_lower:
@@ -808,12 +893,19 @@ def fit(
 
         # Retain observation-level survey info for IF expansion (Step 3
         # of survey integration: group-level IF → observation-level psi).
+        # `time_ids` is per-row; `periods` (the sorted column index used
+        # by the pivoted U_per_period matrices) is populated below once
+        # it's known. Together they enable cell-level IF expansion
+        # psi_i = U[g_i, t_i] * (w_i / W_{g_i, t_i}) under the cell-
+        # period allocator (REGISTRY.md survey IF expansion contract).
         _obs_survey_info = None
         if resolved_survey is not None:
             _obs_survey_info = {
                 "group_ids": data[group].values,
+                "time_ids": data[time].values,
                 "weights": survey_weights,
                 "resolved": resolved_survey,
+                "periods": None,
             }
 
         # Replicate-weight variance tracker: each _compute_se call under
@@ -1203,6 +1295,12 @@ def fit(
         Y_mat = y_pivot.to_numpy()
         N_mat = n_pivot.to_numpy()
 
+        # Finalize survey obs-info with the pivot's column index so that
+        # cell-level IF expansion can map per-row time values to matrix
+        # column indices in _survey_se_from_group_if.
+        if _obs_survey_info is not None:
+            _obs_survey_info["periods"] = np.asarray(all_periods)
+
         # ------------------------------------------------------------------
         # Step 7b: Covariate residualization (DID^X)
         #
@@ -1550,7 +1648,7 @@ def fit(
         # Step 12c: Multi-horizon per-group computation (L_max >= 1)
         # ------------------------------------------------------------------
         multi_horizon_dids: Optional[Dict[int, Dict[str, Any]]] = None
-        multi_horizon_if: Optional[Dict[int, np.ndarray]] = None
+        multi_horizon_if: Optional[Dict[int, Tuple[np.ndarray, np.ndarray]]] = None
         multi_horizon_se: Optional[Dict[int, float]] = None
         multi_horizon_inference: Optional[Dict[int, Dict[str, Any]]] = None
 
@@ -1599,6 +1697,7 @@ def fit(
                 T_g=T_g_arr,
                 L_max=L_max,
                 set_ids=set_ids_arr,
+                compute_per_period=_obs_survey_info is not None,
             )
 
             # Per-horizon analytical SE via cohort recentering.
@@ -1615,7 +1714,7 @@ def fit(
             # so the event_study_effects dict uses a consistent estimand
             # (per-group DID_{g,l}) across all horizons.
             for l_h in range(1, L_max + 1):
-                U_l = multi_horizon_if[l_h]
+                U_l, U_pp_l = multi_horizon_if[l_h]
                 # Cohort IDs for this horizon: (D_{g,1}, F_g, S_g) triples
                 # are the same as Phase 1 (cohort identity depends on first
                 # switch, not on the horizon). Filter to eligible.
@@ -1646,6 +1745,16 @@ def fit(
                 U_l_elig = U_l[eligible_mask_var]
                 cid_elig = cid_l[eligible_mask_var]
                 U_centered_l = _cohort_recenter(U_l_elig, cid_elig)
+                # Only build the cell-level attribution when the IF
+                # helper actually produced a per-period tensor (i.e.,
+                # a survey design is present). Otherwise the plug-in
+                # path consumes U_centered_l only.
+                if U_pp_l is not None:
+                    U_centered_pp_l: Optional[np.ndarray] = _cohort_recenter_per_period(
+                        U_pp_l[eligible_mask_var], cid_elig
+                    )
+                else:
+                    U_centered_pp_l = None
                 N_l_h = multi_horizon_dids[l_h]["N_l"]
                 _elig_groups_l = [all_groups[g] for g in range(len(all_groups)) if eligible_mask_var[g]]
                 se_l, n_valid_l = _compute_se(
@@ -1653,6 +1762,7 @@ def fit(
                     divisor=N_l_h,
                     obs_survey_info=_obs_survey_info,
                     eligible_groups=_elig_groups_l,
+                    U_centered_per_period=U_centered_pp_l,
                 )
                 if n_valid_l is not None:
                     _replicate_n_valid_list.append(n_valid_l)
@@ -1692,7 +1802,7 @@ def fit(
 
         # Phase 2: placebos, normalized effects, cost-benefit delta
         multi_horizon_placebos: Optional[Dict[int, Dict[str, Any]]] = None
-        placebo_horizon_if: Optional[Dict[int, np.ndarray]] = None
+        placebo_horizon_if: Optional[Dict[int, Tuple[np.ndarray, np.ndarray]]] = None
         placebo_horizon_se: Optional[Dict[int, float]] = None
         placebo_horizon_inference: Optional[Dict[int, Dict[str, Any]]] = None
         normalized_effects_dict: Optional[Dict[int, Dict[str, Any]]] = None
@@ -1737,6 +1847,7 @@ def fit(
                     switch_direction=switch_direction_arr,
                     T_g=T_g_arr,
                     L_max=L_max,
+                    compute_per_period=_obs_survey_info is not None,
                     set_ids=set_ids_arr,
                 )
                 # Per-placebo-horizon analytical SE via cohort recentering
@@ -1753,7 +1864,7 @@ def fit(
                     if pl_data is None or pl_data["N_pl_l"] == 0:
                         placebo_horizon_se[lag_l] = float("nan")
                         continue
-                    U_pl = placebo_horizon_if[lag_l]
+                    U_pl, U_pp_pl = placebo_horizon_if[lag_l]
                     # Cohort IDs (same as positive horizons)
                     cohort_keys_pl = [
                         (
@@ -1776,12 +1887,19 @@ def fit(
                     U_pl_elig = U_pl[eligible_mask_pl]
                     cid_elig_pl = cid_pl[eligible_mask_pl]
                     U_centered_pl_l = _cohort_recenter(U_pl_elig, cid_elig_pl)
+                    if U_pp_pl is not None:
+                        U_centered_pp_pl_l: Optional[np.ndarray] = _cohort_recenter_per_period(
+                            U_pp_pl[eligible_mask_pl], cid_elig_pl
+                        )
+                    else:
+                        U_centered_pp_pl_l = None
                     _elig_groups_pl = [all_groups[g] for g in range(len(all_groups)) if eligible_mask_pl[g]]
                     se_pl_l, n_valid_pl_l = _compute_se(
                         U_centered=U_centered_pl_l,
                         divisor=pl_data["N_pl_l"],
                         obs_survey_info=_obs_survey_info,
                         eligible_groups=_elig_groups_pl,
+                        U_centered_per_period=U_centered_pp_pl_l,
                     )
                     if n_valid_pl_l is not None:
                         _replicate_n_valid_list.append(n_valid_pl_l)
@@ -1846,6 +1964,9 @@ def fit(
             U_centered_joiners,
             U_centered_leavers,
             _eligible_group_ids,
+            U_centered_pp_overall,
+            U_centered_pp_joiners,
+            U_centered_pp_leavers,
         ) = _compute_cohort_recentered_inputs(
             D_mat=D_mat,
             # Phase 1 IF uses per-period structure: use raw outcomes
@@ -1860,6 +1981,7 @@ def fit(
             a11_minus_zeroed_arr=a11_minus_zeroed_arr,
             all_groups=all_groups,
             singleton_baseline_groups=singleton_baseline_groups,
+            compute_per_period=_obs_survey_info is not None,
         )
 
         # Analytical SE for DID_M (survey-aware when survey_design provided)
@@ -1868,6 +1990,7 @@ def fit(
             divisor=N_S,
             obs_survey_info=_obs_survey_info,
             eligible_groups=_eligible_group_ids,
+            U_centered_per_period=U_centered_pp_overall,
         )
         if n_valid_overall is not None:
             _replicate_n_valid_list.append(n_valid_overall)
@@ -1908,6 +2031,7 @@ def fit(
                 divisor=joiner_total,
                 obs_survey_info=_obs_survey_info,
                 eligible_groups=_eligible_group_ids,
+                U_centered_per_period=U_centered_pp_joiners,
             )
             if n_valid_joiners is not None:
                 _replicate_n_valid_list.append(n_valid_joiners)
@@ -1931,6 +2055,7 @@ def fit(
                 divisor=leaver_total,
                 obs_survey_info=_obs_survey_info,
                 eligible_groups=_eligible_group_ids,
+                U_centered_per_period=U_centered_pp_leavers,
             )
             if n_valid_leavers is not None:
                 _replicate_n_valid_list.append(n_valid_leavers)
@@ -2021,9 +2146,10 @@ def fit(
                     pos_mask_warn = obs_ws_warn > 0
                     psu_codes_warn = np.asarray(psu_arr_warn)
                     # Collect the PSU label for each variance-eligible
-                    # group (within-group-constant PSU is validated
-                    # upstream, so the first positive-weight label
-                    # represents the whole group).
+                    # group. PSU that varies within group is rejected
+                    # for the bootstrap path upstream (NotImplementedError
+                    # gate), so the first positive-weight label
+                    # represents the whole group here.
                     eligible_psu_labels: List[Any] = []
                     for gid in _eligible_group_ids:
                         mask_g = (obs_gids_warn == gid) & pos_mask_warn
@@ -2085,7 +2211,7 @@ def fit(
                     pl_data = multi_horizon_placebos.get(lag_l)
                     if pl_data is None or pl_data["N_pl_l"] == 0:
                         continue
-                    U_pl_full = placebo_horizon_if[lag_l]
+                    U_pl_full, _ = placebo_horizon_if[lag_l]
                     U_pl_elig_b = U_pl_full[eligible_mask_pl_b]
                     cohort_keys_pl_b = [
                         (
@@ -2137,7 +2263,7 @@ def fit(
                     h_data = multi_horizon_dids.get(l_h)
                     if h_data is None or h_data["N_l"] == 0:
                         continue
-                    U_l_full = multi_horizon_if[l_h]
+                    U_l_full, _ = multi_horizon_if[l_h]
                     # Full variance-eligible group set (matching
                     # analytical SE path: singleton-baseline only)
                     U_l_elig = U_l_full[eligible_mask_b]
@@ -2171,12 +2297,16 @@ def fit(
             # Under a survey design with PSU information, build a
             # `group_id_to_psu_code` dict so the bootstrap mixin can
             # derive each target's PSU map by ID lookup rather than by
-            # positional reuse. PSU/strata are within-group-constant by
-            # the _validate_group_constant_strata_psu precondition, so
-            # each variance-eligible group has exactly one PSU label.
-            # Under auto-inject psu=group each group has a unique PSU
-            # code and the bootstrap mixin's identity-map fast path
-            # reproduces the pre-PSU behavior bit-for-bit.
+            # positional reuse. PSU that varies within group is
+            # rejected for the bootstrap path by a NotImplementedError
+            # gate in fit() (the cell-period allocator used by the
+            # analytical TSL path does NOT require within-group PSU
+            # constancy, but the Hall-Mammen wild PSU bootstrap still
+            # uses a group-level PSU map — PR 4 extends it). Each
+            # variance-eligible group therefore has exactly one PSU
+            # label here. Under auto-inject psu=group each group has a
+            # unique PSU code and the bootstrap mixin's identity-map
+            # fast path reproduces the pre-PSU behavior bit-for-bit.
             group_id_to_psu_code_bootstrap: Optional[Dict[Any, int]] = None
             eligible_group_ids_bootstrap: Optional[np.ndarray] = None
             if (
@@ -4250,7 +4380,8 @@ def _compute_per_group_if_multi_horizon(
     T_g: np.ndarray,
     L_max: int,
     set_ids: Optional[np.ndarray] = None,
-) -> Dict[int, np.ndarray]:
+    compute_per_period: bool = True,
+) -> Dict[int, Tuple[np.ndarray, Optional[np.ndarray]]]:
     """
     Compute per-group influence function ``U^G_{g,l}`` for ``l = 1..L_max``.
 
@@ -4275,9 +4406,15 @@ def _compute_per_group_if_multi_horizon(
 
     Returns
     -------
-    dict mapping horizon l -> U_g_l array of shape (n_groups,)
-        NOT cohort-centered. The caller applies ``_cohort_recenter()``
-        before computing SE.
+    dict mapping horizon l -> (U_g_l, U_per_period_l) tuple
+        - ``U_g_l``: np.ndarray of shape (n_groups,). NOT cohort-centered;
+          the caller applies ``_cohort_recenter()`` before computing SE.
+        - ``U_per_period_l``: np.ndarray of shape (n_groups, n_periods).
+          Per-``(g, t)``-cell contributions attributed to the outcome
+          period ``t = F_g - 1 + l`` (same column for switcher and its
+          controls, since they share the outcome period). Satisfies
+          ``U_per_period_l.sum(axis=1) == U_g_l``. Consumed by the cell-
+          period IF allocator for within-group-varying PSU designs.
     """
     n_groups, n_periods = D_mat.shape
     is_switcher = first_switch_idx >= 0
@@ -4291,10 +4428,15 @@ def _compute_per_group_if_multi_horizon(
         baseline_groups[float(d)] = np.where(mask)[0]
         baseline_f[float(d)] = first_switch_idx[mask]
 
-    results: Dict[int, np.ndarray] = {}
+    results: Dict[int, Tuple[np.ndarray, Optional[np.ndarray]]] = {}
 
     for l in range(1, L_max + 1):  # noqa: E741
         U_l = np.zeros(n_groups, dtype=float)
+        U_per_period_l: Optional[np.ndarray] = (
+            np.zeros((n_groups, n_periods), dtype=float)
+            if compute_per_period
+            else None
+        )
 
         for g in range(n_groups):
             if not is_switcher[g]:
@@ -4331,16 +4473,26 @@ def _compute_per_group_if_multi_horizon(
                 # contribution to U_l is zero, but its count is in N_l.
                 continue
 
-            # Switcher contribution: +S_g * (Y_{g, out} - Y_{g, ref})
+            # Switcher contribution: +S_g * (Y_{g, out} - Y_{g, ref}).
+            # Per-cell attribution convention: assign the whole contrast
+            # to the outcome cell (g, out_idx). See REGISTRY.md's Note
+            # on survey IF expansion for the rationale behind this
+            # convention (library choice, not a derived result).
             switcher_change = Y_mat[g, out_idx] - Y_mat[g, ref_idx]
             U_l[g] += S_g * switcher_change
+            if U_per_period_l is not None:
+                U_per_period_l[g, out_idx] += S_g * switcher_change
 
             # Control contributions: each control g' in the pool gets
-            # -S_g * (1/n_ctrl) * (Y_{g', out} - Y_{g', ref})
+            # -S_g * (1/n_ctrl) * (Y_{g', out} - Y_{g', ref}). Same
+            # post-period attribution as the switcher side.
             ctrl_changes = Y_mat[ctrl_pool, out_idx] - Y_mat[ctrl_pool, ref_idx]
-            U_l[ctrl_pool] -= (S_g / n_ctrl) * ctrl_changes
+            ctrl_contrib = (S_g / n_ctrl) * ctrl_changes
+            U_l[ctrl_pool] -= ctrl_contrib
+            if U_per_period_l is not None:
+                U_per_period_l[ctrl_pool, out_idx] -= ctrl_contrib
 
-        results[l] = U_l
+        results[l] = (U_l, U_per_period_l)
 
     return results
 
@@ -4355,7 +4507,8 @@ def _compute_per_group_if_placebo_horizon(
     T_g: np.ndarray,
     L_max: int,
     set_ids: Optional[np.ndarray] = None,
-) -> Dict[int, np.ndarray]:
+    compute_per_period: bool = True,
+) -> Dict[int, Tuple[np.ndarray, Optional[np.ndarray]]]:
     """
     Compute per-group influence function for placebo horizons.
 
@@ -4372,9 +4525,13 @@ def _compute_per_group_if_placebo_horizon(
 
     Returns
     -------
-    dict mapping lag l (positive int) -> U_pl_l array of shape (n_groups,)
-        NOT cohort-centered. The caller applies ``_cohort_recenter()``
-        before computing SE.
+    dict mapping lag l (positive int) -> (U_pl_l, U_per_period_pl_l) tuple
+        - ``U_pl_l``: np.ndarray of shape (n_groups,). NOT cohort-
+          centered; the caller applies ``_cohort_recenter()``.
+        - ``U_per_period_pl_l``: np.ndarray of shape (n_groups, n_periods).
+          Per-``(g, t)`` contributions attributed to ``t = backward_idx``
+          (the pre-period observation that supports the contrast).
+          Satisfies ``U_per_period_pl_l.sum(axis=1) == U_pl_l``.
     """
     n_groups, n_periods = D_mat.shape
     is_switcher = first_switch_idx >= 0
@@ -4387,10 +4544,15 @@ def _compute_per_group_if_placebo_horizon(
         baseline_groups[float(d)] = np.where(mask)[0]
         baseline_f[float(d)] = first_switch_idx[mask]
 
-    results: Dict[int, np.ndarray] = {}
+    results: Dict[int, Tuple[np.ndarray, Optional[np.ndarray]]] = {}
 
     for l in range(1, L_max + 1):  # noqa: E741
         U_pl = np.zeros(n_groups, dtype=float)
+        U_per_period_pl: Optional[np.ndarray] = (
+            np.zeros((n_groups, n_periods), dtype=float)
+            if compute_per_period
+            else None
+        )
 
         for g in range(n_groups):
             if not is_switcher[g]:
@@ -4431,15 +4593,23 @@ def _compute_per_group_if_placebo_horizon(
             if n_ctrl == 0:
                 continue
 
-            # Switcher contribution: paper convention backward - ref
+            # Switcher contribution: paper convention backward - ref.
+            # Attribute the whole contrast to the backward cell
+            # (mirrors the multi-horizon / DID_M post-period
+            # attribution convention).
             switcher_change = Y_mat[g, backward_idx] - Y_mat[g, ref_idx]
             U_pl[g] += S_g * switcher_change
+            if U_per_period_pl is not None:
+                U_per_period_pl[g, backward_idx] += S_g * switcher_change
 
             # Control contributions
             ctrl_changes = Y_mat[ctrl_pool, backward_idx] - Y_mat[ctrl_pool, ref_idx]
-            U_pl[ctrl_pool] -= (S_g / n_ctrl) * ctrl_changes
+            ctrl_contrib = (S_g / n_ctrl) * ctrl_changes
+            U_pl[ctrl_pool] -= ctrl_contrib
+            if U_per_period_pl is not None:
+                U_per_period_pl[ctrl_pool, backward_idx] -= ctrl_contrib
 
-        results[l] = U_pl
+        results[l] = (U_pl, U_per_period_pl)
 
     return results
 
@@ -4780,7 +4950,8 @@ def _compute_full_per_group_contributions(
     a11_plus_zeroed_arr: np.ndarray,
     a11_minus_zeroed_arr: np.ndarray,
     side: str = "overall",
-) -> np.ndarray:
+    compute_per_period: bool = True,
+) -> Tuple[np.ndarray, Optional[np.ndarray]]:
     """
     Compute the per-group influence function ``U^G_g`` for ``DID_M``,
     ``DID_+``, or ``DID_-`` by summing role-weighted outcome differences
@@ -4830,16 +5001,39 @@ def _compute_full_per_group_contributions(
     U : np.ndarray of shape (n_groups,)
         Per-group contributions. NOT cohort-centered; the caller is
         responsible for centering before computing the SE.
+    U_per_period : np.ndarray of shape (n_groups, n_periods) or ``None``
+        Per-``(g, t)``-cell contributions, attributed to the post-
+        period ``t`` of each transition pair. Satisfies
+        ``U_per_period.sum(axis=1) == U`` exactly. NOT cohort-centered.
+        Used by the cell-period IF allocator in
+        ``_survey_se_from_group_if`` so that PSU/strata that vary
+        across the cells of g can be honored by design-based variance.
+        ``None`` when ``compute_per_period=False`` (the caller has no
+        survey design, so the allocator is not needed and building the
+        dense O(n_groups * n_periods) tensor would be wasted work).
     """
     if side not in ("overall", "joiners", "leavers"):
         raise ValueError(f"side must be one of overall/joiners/leavers, got {side!r}")
 
     n_groups, n_periods = D_mat.shape
     U = np.zeros(n_groups, dtype=float)
+    U_per_period: Optional[np.ndarray] = (
+        np.zeros((n_groups, n_periods), dtype=float)
+        if compute_per_period
+        else None
+    )
 
     include_joiners_side = side in ("overall", "joiners")
     include_leavers_side = side in ("overall", "leavers")
 
+    # Per-cell attribution convention (not a derivation from the
+    # observation-level survey linearization — see REGISTRY.md
+    # ``ChaisemartinDHaultfoeuille`` Note on survey IF expansion):
+    # attribute each (Y_curr - Y_prev) transition as a single
+    # difference to its post-period cell (g, t_idx). Preserves the
+    # row-sum identity U_per_period.sum(axis=1) == U and therefore
+    # the group-sum invariance that makes the cell expansion
+    # byte-identical to the pre-allocator convention under PSU=group.
     for t_idx in range(1, n_periods):
         d_curr = D_mat[:, t_idx]
         d_prev = D_mat[:, t_idx - 1]
@@ -4867,6 +5061,9 @@ def _compute_full_per_group_contributions(
         ):
             U[joiner_mask] += y_diff[joiner_mask]
             U[stable0_mask] -= (n_10_t / n_00_t) * y_diff[stable0_mask]
+            if U_per_period is not None:
+                U_per_period[joiner_mask, t_idx] += y_diff[joiner_mask]
+                U_per_period[stable0_mask, t_idx] -= (n_10_t / n_00_t) * y_diff[stable0_mask]
 
         # Leavers side (-y_diff for leavers; +(n_01/n_11)*y_diff for stable_1)
         if (
@@ -4877,8 +5074,11 @@ def _compute_full_per_group_contributions(
         ):
             U[leaver_mask] -= y_diff[leaver_mask]
             U[stable1_mask] += (n_01_t / n_11_t) * y_diff[stable1_mask]
+            if U_per_period is not None:
+                U_per_period[leaver_mask, t_idx] -= y_diff[leaver_mask]
+                U_per_period[stable1_mask, t_idx] += (n_01_t / n_11_t) * y_diff[stable1_mask]
 
-    return U
+    return U, U_per_period
 
 
 def _cohort_recenter(
@@ -4906,6 +5106,44 @@ def _cohort_recenter(
     return U_centered
 
 
+def _cohort_recenter_per_period(
+    U_per_period: np.ndarray,
+    cohort_ids: np.ndarray,
+) -> np.ndarray:
+    """
+    Column-wise cohort recentering of a per-(g, t) IF attribution.
+
+    For each period column t, subtract the cohort-conditional mean of
+    that column: ``U_centered[g, t] = U[g, t] - mean_{g' in cohort(g)}
+    U[g', t]``. The row sum identity is preserved:
+
+        sum_t U_centered[g, t]
+            = sum_t U[g, t] - sum_t cohort_mean[cohort(g), t]
+            = U[g] - cohort_mean_of_U[cohort(g)]
+            = U_centered[g]
+
+    Hence the cell-level Binder TSL aggregation telescopes to the same
+    group-level sum produced by ``_cohort_recenter`` on ``U``, giving
+    byte-identical variance under within-group-constant PSU (old
+    validator's accepted input set) and a per-cell attribution under
+    within-group-varying PSU that follows the library's post-period
+    convention (see the REGISTRY.md Note on survey IF expansion — a
+    formal derivation from the observation-level survey linearization
+    is still open, tracked in ``TODO.md``).
+    """
+    U_centered = U_per_period.astype(float).copy()
+    if U_per_period.size == 0:
+        return U_centered
+    unique_cohorts = np.unique(cohort_ids)
+    for k in unique_cohorts:
+        in_cohort = cohort_ids == k
+        if in_cohort.any():
+            # Per-period mean across groups in this cohort
+            col_means = U_per_period[in_cohort].mean(axis=0)
+            U_centered[in_cohort] = U_per_period[in_cohort] - col_means[np.newaxis, :]
+    return U_centered
+
+
 def _compute_cohort_recentered_inputs(
     D_mat: np.ndarray,
     Y_mat: np.ndarray,
@@ -4918,7 +5156,19 @@ def _compute_cohort_recentered_inputs(
     a11_minus_zeroed_arr: np.ndarray,
     all_groups: List[Any],
     singleton_baseline_groups: List[Any],
-) -> Tuple[np.ndarray, int, int, int, np.ndarray, np.ndarray]:
+    compute_per_period: bool = True,
+) -> Tuple[
+    np.ndarray,            # U_centered_overall (n_eligible,)
+    int,                    # n_groups_for_overall
+    int,                    # n_cohorts
+    int,                    # n_groups_dropped_never_switching
+    np.ndarray,            # U_centered_joiners
+    np.ndarray,            # U_centered_leavers
+    List[Any],             # eligible_group_ids
+    Optional[np.ndarray],  # U_centered_per_period_overall (n_eligible, n_periods) or None
+    Optional[np.ndarray],  # U_centered_per_period_joiners or None
+    Optional[np.ndarray],  # U_centered_per_period_leavers or None
+]:
     """
     Compute the cohort-centered influence-function vectors for variance.
 
@@ -4966,6 +5216,9 @@ def _compute_cohort_recentered_inputs(
     n_groups, n_periods = D_mat.shape
 
     if n_groups == 0:
+        empty_pp: Optional[np.ndarray] = (
+            np.zeros((0, 0), dtype=float) if compute_per_period else None
+        )
         return (
             np.array([], dtype=float),
             0,
@@ -4973,6 +5226,10 @@ def _compute_cohort_recentered_inputs(
             0,
             np.array([], dtype=float),
             np.array([], dtype=float),
+            [],
+            empty_pp,
+            empty_pp,
+            empty_pp,
         )
 
     # Per-group switch metadata via the shared helper (factored out in
@@ -5008,8 +5265,10 @@ def _compute_cohort_recentered_inputs(
         cohort_id[g] = unique_cohorts[key]
     n_cohorts = len(unique_cohorts)
 
-    # Compute the full IF vectors via the new helper
-    U_overall_full = _compute_full_per_group_contributions(
+    # Compute the full IF vectors + per-period attributions via the
+    # helper. Skip the O(n_groups * n_periods) per-period tensor
+    # allocation when the caller won't use it (no survey design).
+    U_overall_full, U_per_period_overall_full = _compute_full_per_group_contributions(
         D_mat=D_mat,
         Y_mat=Y_mat,
         N_mat=N_mat,
@@ -5020,8 +5279,9 @@ def _compute_cohort_recentered_inputs(
         a11_plus_zeroed_arr=a11_plus_zeroed_arr,
         a11_minus_zeroed_arr=a11_minus_zeroed_arr,
         side="overall",
+        compute_per_period=compute_per_period,
     )
-    U_joiners_full = _compute_full_per_group_contributions(
+    U_joiners_full, U_per_period_joiners_full = _compute_full_per_group_contributions(
         D_mat=D_mat,
         Y_mat=Y_mat,
         N_mat=N_mat,
@@ -5032,8 +5292,9 @@ def _compute_cohort_recentered_inputs(
         a11_plus_zeroed_arr=a11_plus_zeroed_arr,
         a11_minus_zeroed_arr=a11_minus_zeroed_arr,
         side="joiners",
+        compute_per_period=compute_per_period,
     )
-    U_leavers_full = _compute_full_per_group_contributions(
+    U_leavers_full, U_per_period_leavers_full = _compute_full_per_group_contributions(
         D_mat=D_mat,
         Y_mat=Y_mat,
         N_mat=N_mat,
@@ -5044,6 +5305,7 @@ def _compute_cohort_recentered_inputs(
         a11_plus_zeroed_arr=a11_plus_zeroed_arr,
         a11_minus_zeroed_arr=a11_minus_zeroed_arr,
         side="leavers",
+        compute_per_period=compute_per_period,
     )
 
     # Restrict to variance-eligible groups (drop singleton-baseline groups)
@@ -5052,11 +5314,39 @@ def _compute_cohort_recentered_inputs(
     U_leavers = U_leavers_full[eligible_mask]
     cohort_id_eligible = cohort_id[eligible_mask]
 
-    # Cohort-recenter each IF vector
+    # Cohort-recenter each IF vector (group level for plug-in path)
     U_centered_overall = _cohort_recenter(U_overall, cohort_id_eligible)
     U_centered_joiners = _cohort_recenter(U_joiners, cohort_id_eligible)
     U_centered_leavers = _cohort_recenter(U_leavers, cohort_id_eligible)
 
+    # Per-period cohort recentering for the cell-period survey allocator.
+    # Column-wise centering preserves sum_t U_centered_pp[g, t] =
+    # U_centered[g], so Binder TSL PSU-level aggregation telescopes to
+    # the same group-level sum under within-group-constant PSU (byte-
+    # identical to the old allocator). Under within-group-varying PSU
+    # the per-cell attribution follows the library's post-period
+    # convention — a formal derivation from the observation-level
+    # survey linearization is an open question tracked in TODO.md.
+    # Skip entirely when the 2D tensor was not computed.
+    if U_per_period_overall_full is not None:
+        U_centered_pp_overall: Optional[np.ndarray] = _cohort_recenter_per_period(
+            U_per_period_overall_full[eligible_mask], cohort_id_eligible
+        )
+    else:
+        U_centered_pp_overall = None
+    if U_per_period_joiners_full is not None:
+        U_centered_pp_joiners: Optional[np.ndarray] = _cohort_recenter_per_period(
+            U_per_period_joiners_full[eligible_mask], cohort_id_eligible
+        )
+    else:
+        U_centered_pp_joiners = None
+    if U_per_period_leavers_full is not None:
+        U_centered_pp_leavers: Optional[np.ndarray] = _cohort_recenter_per_period(
+            U_per_period_leavers_full[eligible_mask], cohort_id_eligible
+        )
+    else:
+        U_centered_pp_leavers = None
+
     # Eligible group IDs for survey IF expansion
     eligible_group_ids = [all_groups[g] for g in range(n_groups) if eligible_mask[g]]
 
@@ -5068,6 +5358,9 @@ def _compute_cohort_recentered_inputs(
         U_centered_joiners,
         U_centered_leavers,
         eligible_group_ids,
+        U_centered_pp_overall,
+        U_centered_pp_joiners,
+        U_centered_pp_leavers,
     )
 
 
@@ -5112,58 +5405,108 @@ def _plugin_se(U_centered: np.ndarray, divisor: int) -> float:
     return float(np.sqrt(sigma_hat_sq) / np.sqrt(divisor))
 
 
-def _validate_group_constant_strata_psu(
+def _strata_psu_vary_within_group(
     resolved: Any,
     data: pd.DataFrame,
     group_col: str,
     survey_weights: Optional[np.ndarray],
+) -> Tuple[bool, bool]:
+    """Return (strata_varies_within_group, psu_varies_within_group).
+
+    Diagnostic helper used to gate out-of-scope combinations for the
+    cell-period IF allocator — heterogeneity and ``n_bootstrap > 0``
+    currently require within-group constancy because they read
+    ``obs_survey_info`` through the legacy group-level expansion path.
+    PR 3 and PR 4 will extend them. Zero-weight rows are excluded from
+    the check (subpopulation contract).
+    """
+    if resolved is None:
+        return False, False
+    pos_mask = np.asarray(survey_weights) > 0
+    if not pos_mask.any():
+        return False, False
+    g_eff = np.asarray(data[group_col].values)[pos_mask]
+    strata_varies = False
+    psu_varies = False
+    if resolved.strata is not None:
+        s_eff = np.asarray(resolved.strata)[pos_mask]
+        strata_varies = bool(
+            pd.DataFrame({"g": g_eff, "s": s_eff}).groupby("g")["s"].nunique().gt(1).any()
+        )
+    if resolved.psu is not None:
+        p_eff = np.asarray(resolved.psu)[pos_mask]
+        psu_varies = bool(
+            pd.DataFrame({"g": g_eff, "p": p_eff}).groupby("g")["p"].nunique().gt(1).any()
+        )
+    return strata_varies, psu_varies
+
+
+def _validate_cell_constant_strata_psu(
+    resolved: Any,
+    data: pd.DataFrame,
+    group_col: str,
+    time_col: str,
+    survey_weights: Optional[np.ndarray],
 ) -> None:
-    """Reject survey designs where strata or PSU vary within group.
-
-    The dCDH IF expansion ``psi_i = U[g] * (w_i / W_g)`` treats the
-    group as the effective sampling unit for design-based variance.
-    When strata or PSU vary within group, the expansion silently spreads
-    horizon-specific IF mass onto observations whose survey stratum or
-    PSU differs from the rest of the group, contaminating the
-    stratified-PSU variance. Reject those designs with a clear error.
-
-    Zero-weight rows are excluded from the check (subpopulation
-    contract — an excluded row with a different stratum/PSU label does
-    not actually participate in the variance).
+    """Reject survey designs where strata or PSU vary within a (g, t) cell.
+
+    Under the cell-period IF allocator, ``psi_i`` attributes each
+    observation's mass to its ``(g, t)`` cell scaled by proportional
+    weight share:
+    ``psi_i = U_centered_per_period[g, t] * (w_i / W_{g, t})``. Binder
+    TSL aggregates at PSU level, so strata / PSU labels within a cell
+    must be unambiguous. In one-obs-per-cell panels (the canonical dCDH
+    structure) this check is trivially satisfied; in multi-obs-per-cell
+    panels, a cell split across strata or PSUs is rejected. This is a
+    strict relaxation of the old within-group constancy rule shipped
+    before the cell-period allocator (REGISTRY.md
+    ``ChaisemartinDHaultfoeuille`` Note on survey IF expansion).
+
+    Zero-weight rows are excluded (subpopulation contract).
     """
     if resolved is None:
         return
     pos_mask = np.asarray(survey_weights) > 0
+    if not pos_mask.any():
+        return
     g_eff = np.asarray(data[group_col].values)[pos_mask]
+    t_eff = np.asarray(data[time_col].values)[pos_mask]
     if resolved.strata is not None:
         s_eff = np.asarray(resolved.strata)[pos_mask]
-        df_s = pd.DataFrame({"g": g_eff, "s": s_eff})
-        varying = df_s.groupby("g")["s"].nunique()
+        df_cell = pd.DataFrame({"g": g_eff, "t": t_eff, "s": s_eff})
+        varying = df_cell.groupby(["g", "t"])["s"].nunique()
         bad = varying[varying > 1]
         if len(bad) > 0:
             raise ValueError(
                 f"ChaisemartinDHaultfoeuille survey support requires "
-                f"strata to be constant within group, but "
-                f"{len(bad)} group(s) have multiple strata "
-                f"(examples: {bad.index.tolist()[:5]}). The IF "
-                f"expansion psi_i = U[g] * (w_i / W_g) treats the "
-                f"group as the effective sampling unit for stratified "
-                f"design-based variance."
+                f"strata to be constant within each (group, time) cell "
+                f"under the cell-period IF allocator, but {len(bad)} "
+                f"cell(s) have multiple strata (examples: "
+                f"{bad.index.tolist()[:5]}). The cell allocator treats "
+                f"the (g, t) cell as the effective sampling unit for "
+                f"stratified Binder variance; within-cell stratum "
+                f"variation is ambiguous. The allocator DOES support "
+                f"strata that vary across cells of the same group "
+                f"(relaxation over the previous within-group constancy "
+                f"rule shipped in earlier releases)."
             )
     if resolved.psu is not None:
         p_eff = np.asarray(resolved.psu)[pos_mask]
-        df_p = pd.DataFrame({"g": g_eff, "p": p_eff})
-        varying = df_p.groupby("g")["p"].nunique()
+        df_cell = pd.DataFrame({"g": g_eff, "t": t_eff, "p": p_eff})
+        varying = df_cell.groupby(["g", "t"])["p"].nunique()
         bad = varying[varying > 1]
         if len(bad) > 0:
             raise ValueError(
                 f"ChaisemartinDHaultfoeuille survey support requires "
-                f"PSU to be constant within group, but "
-                f"{len(bad)} group(s) have multiple PSUs "
-                f"(examples: {bad.index.tolist()[:5]}). The IF "
-                f"expansion psi_i = U[g] * (w_i / W_g) treats the "
-                f"group as the effective sampling unit for stratified "
-                f"design-based variance."
+                f"PSU to be constant within each (group, time) cell "
+                f"under the cell-period IF allocator, but {len(bad)} "
+                f"cell(s) have multiple PSUs (examples: "
+                f"{bad.index.tolist()[:5]}). The cell allocator treats "
+                f"the (g, t) cell as the effective sampling unit; "
+                f"within-cell PSU variation is ambiguous. The allocator "
+                f"DOES support PSU that varies across cells of the same "
+                f"group (relaxation over the previous within-group "
+                f"constancy rule shipped in earlier releases)."
             )
 
 
@@ -5236,6 +5579,7 @@ def _compute_se(
     divisor: int,
     obs_survey_info: Optional[dict],
     eligible_groups: Optional[list] = None,
+    U_centered_per_period: Optional[np.ndarray] = None,
 ) -> Tuple[float, Optional[int]]:
     """Dispatch to plug-in SE or survey-design-aware SE.
 
@@ -5244,6 +5588,17 @@ def _compute_se(
     to TSL variance (or replicate-weight variance when the resolved
     design carries replicate weights).
 
+    ``U_centered_per_period`` is the cell-period attribution of
+    ``U_centered``. Each row ``g`` satisfies
+    ``U_centered_per_period[g, :].sum() == U_centered[g]``. When
+    supplied, the survey-aware path expands influence to observations
+    using the cell-level allocator
+    ``psi_i = U_centered_per_period[g_i, t_i] * (w_i / W_{g_i, t_i})``;
+    when ``None`` the path falls back to the group-level allocator
+    ``psi_i = U_centered[g_i] * (w_i / W_{g_i})``. Byte-identical
+    under within-group-constant PSU since both allocators telescope to
+    the same PSU-level sum (REGISTRY.md survey IF expansion Note).
+
     Returns ``(se, n_valid_replicates)``. ``n_valid_replicates`` is
     ``None`` under the plug-in / TSL paths, and the number of valid
     replicate columns under the replicate path.
@@ -5258,10 +5613,16 @@ def _compute_se(
     # compute_survey_if_variance() expects estimator-scale psi.
     # Scale by 1/divisor to normalize before survey expansion.
     U_scaled = U_centered / divisor
+    U_pp_scaled = (
+        U_centered_per_period / divisor
+        if U_centered_per_period is not None
+        else None
+    )
     return _survey_se_from_group_if(
         U_centered=U_scaled,
         eligible_groups=eligible_groups,
         obs_survey_info=obs_survey_info,
+        U_centered_per_period=U_pp_scaled,
     )
 
 
@@ -5269,31 +5630,45 @@ def _survey_se_from_group_if(
     U_centered: np.ndarray,
     eligible_groups: list,
     obs_survey_info: dict,
+    U_centered_per_period: Optional[np.ndarray] = None,
 ) -> Tuple[float, Optional[int]]:
-    """Compute survey-design-aware SE from group-level centered IFs.
-
-    Expands group-level influence function values to observation level
-    using the proportional-weight allocation ``psi_i = U[g] * (w_i / W_g)``
-    so that ``sum(psi_i for i in g) = U[g]``, then delegates to either
-    ``compute_survey_if_variance()`` (TSL / analytical path) or
-    ``compute_replicate_if_variance()`` (BRR/Fay/JK1/JKn/SDR) depending
-    on whether the resolved design carries replicate weights.
-
-    This is a library extension not in the dCDH papers (the paper's
-    plug-in variance assumes iid sampling). The replicate path uses
-    Rao-Wu weight-ratio rescaling; see REGISTRY.md
-    ``ChaisemartinDHaultfoeuille`` Note on survey expansion.
+    """Compute survey-design-aware SE from per-group / per-cell centered IFs.
+
+    Expands influence-function mass to observation level using the
+    **cell-period allocator**
+    ``psi_i = U_centered_per_period[g_i, t_i] * (w_i / W_{g_i, t_i})``
+    when ``U_centered_per_period`` is supplied, or the legacy group-
+    level allocator ``psi_i = U_centered[g_i] * (w_i / W_{g_i})``
+    otherwise. Both allocators are equivalent at the PSU-level sum
+    under within-group-constant PSU (per-cell sums telescope to
+    ``U_centered[g]``), so Binder TSL variance is byte-identical on
+    inputs the old within-group-constant validator accepted. The cell-
+    period allocator additionally supports PSU/strata that vary across
+    cells of g — the lift the allocator buys for Stage 2. Dispatches
+    to ``compute_survey_if_variance()`` (TSL) or
+    ``compute_replicate_if_variance()`` (BRR/Fay/JK1/JKn/SDR) based on
+    the resolved design. See REGISTRY.md ``ChaisemartinDHaultfoeuille``
+    Note on survey IF expansion for the contract.
 
     Parameters
     ----------
     U_centered : np.ndarray
         Cohort-recentered per-group IF values (one per eligible group).
+        Used for degenerate-cohort detection and fallback expansion.
     eligible_groups : list
-        Group IDs corresponding to entries of ``U_centered``
-        (variance-eligible groups after singleton-baseline exclusion).
+        Group IDs corresponding to entries of ``U_centered`` /
+        ``U_centered_per_period`` (variance-eligible groups after
+        singleton-baseline exclusion).
     obs_survey_info : dict
         Observation-level survey data retained from ``fit()`` setup:
-        ``{"group_ids": ..., "weights": ..., "resolved": ...}``.
+        ``{"group_ids", "time_ids", "weights", "resolved", "periods"}``.
+        ``time_ids`` and ``periods`` are required for the cell-level
+        allocator; when either is absent, the group-level allocator is
+        used.
+    U_centered_per_period : np.ndarray of shape (n_eligible, n_periods), optional
+        Cohort-recentered per-``(g, t)``-cell IF attributions, with
+        ``U_centered_per_period.sum(axis=1) == U_centered``. When
+        supplied, the cell allocator is used.
 
     Returns
     -------
@@ -5301,9 +5676,7 @@ def _survey_se_from_group_if(
         ``(se, n_valid_replicates)``. ``se`` is the survey-design-aware
         SE, or NaN if degenerate. ``n_valid_replicates`` is the number
         of valid replicate columns used for variance under the replicate
-        path, or ``None`` under the TSL (analytical) path. Callers
-        track the min across sites to set an effective ``df_survey``
-        for replicate designs.
+        path, or ``None`` under the TSL (analytical) path.
     """
     from diff_diff.survey import (
         compute_replicate_if_variance,
@@ -5325,6 +5698,8 @@ def _survey_se_from_group_if(
     group_ids = obs_survey_info["group_ids"]
     weights = obs_survey_info["weights"]
     resolved = obs_survey_info["resolved"]
+    time_ids = obs_survey_info.get("time_ids")
+    periods = obs_survey_info.get("periods")
 
     # Zero-weight rows are out-of-sample (SurveyDesign.subpopulation()).
     # Skip them before the group-ID factorization so NaN / non-comparable
@@ -5343,20 +5718,65 @@ def _survey_se_from_group_if(
     gids_eff = np.asarray(group_ids)[pos_mask]
     w_eff = weights_arr[pos_mask]
 
-    # Build group → U_centered lookup (vectorized via factorization)
-    group_to_u = {gid: U_centered[idx] for idx, gid in enumerate(eligible_groups)}
-
-    # Map group IFs to observation level (effective sample only)
-    u_obs_eff = np.array([group_to_u.get(gid, 0.0) for gid in gids_eff])
-
-    # Compute per-group weight totals W_g via bincount on effective sample
-    unique_gids, inverse = np.unique(gids_eff, return_inverse=True)
-    w_totals_per_group = np.bincount(inverse, weights=w_eff)
-    w_obs_total_eff = w_totals_per_group[inverse]
+    use_cell_allocator = (
+        U_centered_per_period is not None
+        and time_ids is not None
+        and periods is not None
+        and U_centered_per_period.size > 0
+    )
 
-    # Expand to observation level: psi_i = U[g] * (w_i / W_g)
-    safe_w = np.where(w_obs_total_eff > 0, w_obs_total_eff, 1.0)
-    psi[pos_mask] = u_obs_eff * (w_eff / safe_w)
+    if use_cell_allocator:
+        tids_eff = np.asarray(time_ids)[pos_mask]
+        # Map row's group to an index in eligible_groups (−1 when the
+        # group is ineligible — singleton-baseline exclusion drops it).
+        eligible_idx_by_gid = {gid: i for i, gid in enumerate(eligible_groups)}
+        elig_idx_eff = np.array(
+            [eligible_idx_by_gid.get(gid, -1) for gid in gids_eff],
+            dtype=np.int64,
+        )
+        # Map row's time to a column index in U_centered_per_period.
+        # get_indexer returns −1 for values absent from `periods`
+        # (should be rare in practice — positive-weight rows almost
+        # always survive cell aggregation; if one slips through we
+        # treat it like an ineligible row rather than crash).
+        col_idx_eff = np.asarray(
+            pd.Index(periods).get_indexer(tids_eff), dtype=np.int64
+        )
+        valid_cell = (elig_idx_eff >= 0) & (col_idx_eff >= 0)
+        n_eligible = len(eligible_groups)
+        n_periods_pp = U_centered_per_period.shape[1]
+        # Per-(g, t) weight totals W_{g,t} over the effective sample.
+        W_cell = np.zeros((n_eligible, n_periods_pp), dtype=np.float64)
+        np.add.at(
+            W_cell,
+            (elig_idx_eff[valid_cell], col_idx_eff[valid_cell]),
+            w_eff[valid_cell],
+        )
+        # Lookup U_centered_per_period and W_cell per row.
+        u_obs_cell = np.zeros(w_eff.shape[0], dtype=np.float64)
+        u_obs_cell[valid_cell] = U_centered_per_period[
+            elig_idx_eff[valid_cell], col_idx_eff[valid_cell]
+        ]
+        w_cell_at_row = np.zeros(w_eff.shape[0], dtype=np.float64)
+        w_cell_at_row[valid_cell] = W_cell[
+            elig_idx_eff[valid_cell], col_idx_eff[valid_cell]
+        ]
+        safe_w = np.where(w_cell_at_row > 0, w_cell_at_row, 1.0)
+        psi[pos_mask] = u_obs_cell * (w_eff / safe_w)
+    else:
+        # Legacy group-level allocator (no per-period attribution
+        # provided, or time/period info unavailable). Preserved for
+        # paths that haven't threaded per-period attribution through
+        # yet (e.g., the heterogeneity psi_obs construction in
+        # _compute_heterogeneity_test — gated to within-group-constant
+        # PSU in Stage 2 per PR 2 scope).
+        group_to_u = {gid: U_centered[idx] for idx, gid in enumerate(eligible_groups)}
+        u_obs_eff = np.array([group_to_u.get(gid, 0.0) for gid in gids_eff])
+        unique_gids, inverse = np.unique(gids_eff, return_inverse=True)
+        w_totals_per_group = np.bincount(inverse, weights=w_eff)
+        w_obs_total_eff = w_totals_per_group[inverse]
+        safe_w = np.where(w_obs_total_eff > 0, w_obs_total_eff, 1.0)
+        psi[pos_mask] = u_obs_eff * (w_eff / safe_w)
 
     # Dispatch: replicate variance (BRR/Fay/JK1/JKn/SDR) vs TSL.
     # Mirrors the inline branch in TripleDifference:1206-1238 and
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 64d2e63a..424b0c2e 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -589,7 +589,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param
 
 - **Note:** When every variance-eligible group forms its own `(D_{g,1}, F_g, S_g)` cohort (a degenerate small-panel case where the cohort framework has zero degrees of freedom), the cohort-recentered plug-in formula is unidentified: cohort recentering subtracts the cohort mean from each group's `U^G_g`, and for singleton cohorts the centered value is exactly zero, so the centered influence function vector collapses to all zeros. The estimator returns `overall_se = NaN` with a `UserWarning` rather than silently collapsing to `0.0` (which would falsely imply infinite precision). The `DID_M` point estimate remains well-defined. The bootstrap path inherits the same degeneracy on these panels — the multiplier weights act on an all-zero vector, so the bootstrap distribution is also degenerate. **Deviation from R `DIDmultiplegtDYN`:** R returns a non-zero SE on the canonical 4-group worked example via small-sample sandwich machinery that Python does not implement. Both responses are valid for a degenerate case; Python's `NaN`+warning is the safer default. To get a non-degenerate SE, include more groups so cohorts have peers (real-world panels typically have `G >> K`).
 
-- **Note (cluster contract):** `ChaisemartinDHaultfoeuille` clusters at the group level by default. The cohort-recentered analytical SE plug-in operates on per-group influence-function values (one `U^G_g` per group); the multiplier bootstrap generates one weight per group. The user-facing `cluster=` kwarg is not supported: the constructor accepts `cluster=None` (the default and only supported value); passing any non-`None` value raises `NotImplementedError` at construction time (and the same gate fires from `set_params`) — custom user-specified clustering is reserved for a future phase. **Automatic PSU-level clustering under `survey_design`:** when `survey_design` carries PSUs strictly coarser than the group and `n_bootstrap > 0`, the multiplier bootstrap automatically switches to Hall-Mammen wild PSU clustering (see the Note on survey + bootstrap below). Under the default auto-inject `psu=group`, PSU coincides with group and the group-level and PSU-level paths are identical (bit-for-bit via the identity-map fast path). The matching test for the `cluster=` gate is `test_cluster_parameter_raises_not_implemented` in `tests/test_chaisemartin_dhaultfoeuille.py::TestForwardCompatGates`.
+- **Note (cluster contract):** `ChaisemartinDHaultfoeuille` clusters at the group level by default. The analytical SE plug-in operates on per-group influence-function values (one `U^G_g` per group) and, under the cell-period allocator, on their per-cell decomposition `U[g, t]` which telescopes back to `U^G_g` at the PSU-level sum. The multiplier bootstrap generates one weight per group. The user-facing `cluster=` kwarg is not supported: the constructor accepts `cluster=None` (the default and only supported value); passing any non-`None` value raises `NotImplementedError` at construction time (and the same gate fires from `set_params`) — custom user-specified clustering is reserved for a future phase. **Automatic PSU-level clustering under `survey_design`:** the analytical TSL path supports PSU labels that vary across cells of a group (within-cell constancy required); the multiplier bootstrap still uses the legacy group-level PSU map and raises `NotImplementedError` when combined with within-group-varying PSU (PR 4 lifts this). Under the default auto-inject `psu=group`, PSU coincides with group and the group-level and cell-level paths are identical at PSU-sum granularity (bit-for-bit via the identity-map fast path). The matching test for the `cluster=` gate is `test_cluster_parameter_raises_not_implemented` in `tests/test_chaisemartin_dhaultfoeuille.py::TestForwardCompatGates`.
 
 - **Note (bootstrap inference surface):** When `n_bootstrap > 0`, the top-level `results.overall_p_value` / `results.overall_conf_int` (and joiners/leavers analogues) hold **percentile-based bootstrap inference** computed by the multiplier bootstrap, NOT normal-theory recomputations from the bootstrap SE. The t-stat (`overall_t_stat`, etc.) is computed from the SE via `safe_inference()[0]` to satisfy the project's anti-pattern rule (never compute `t = effect / se` inline) — bootstrap does not define an alternative t-stat semantic for percentile bootstrap, so the SE-based t-stat is the natural choice. `event_study_effects[1]`, `summary()`, `to_dataframe()`, `is_significant`, and `significance_stars` all read from these top-level fields and therefore reflect the bootstrap inference automatically. The library precedent for this propagation is `imputation.py:790-805`, `two_stage.py:778-787`, and `efficient_did.py:1009-1013`. The single-period placebo (`L_max=None`) still has NaN bootstrap fields; multi-horizon placebos (`L_max >= 1`) have valid bootstrap SE/CI/p via `placebo_horizon_ses/cis/p_values` on the bootstrap results object. The matching test is `test_bootstrap_p_value_and_ci_propagated_to_top_level` in `tests/test_chaisemartin_dhaultfoeuille.py::TestBootstrap`.
 
@@ -615,7 +615,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param
 
 - **Note (Phase 3 state-set trends):** Implements state-set-specific trends from Web Appendix Section 1.4 (Assumptions 13-14). Restricts the control pool for each switcher to groups in the same set (e.g., same state in county-level data). The restriction applies in all four DID/IF paths: `_compute_multi_horizon_dids()`, `_compute_per_group_if_multi_horizon()`, `_compute_multi_horizon_placebos()`, and `_compute_per_group_if_placebo_horizon()`. Cohort structure stays as `(D_{g,1}, F_g, S_g)` triples (does not incorporate set membership). Set membership must be time-invariant per group. **Note on Assumption 14 (common support):** The paper requires a common last-untreated period across sets (`T_u^s` equal for all `s`). This implementation does NOT enforce Assumption 14 up front. Instead, when within-set controls are exhausted at a given horizon (because a set has shorter untreated support than others), the affected switcher/horizon pairs are silently excluded via the existing empty-control-pool mechanism. This means `N_l` may be smaller under `trends_nonparam` than without it, and the effective estimand is trimmed to the within-set support at each horizon. The existing multi-horizon A11 warning fires when exclusions occur. Activated via `trends_nonparam="state_column"` in `fit()`.
 
-- **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. **Rejected combinations:** `controls` (matching R), `trends_linear` (heterogeneity test uses raw level changes, incompatible with second-differenced outcomes), and `trends_nonparam` (heterogeneity test does not thread state-set control-pool restrictions). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`. **Note (survey support):** Under `survey_design`, heterogeneity uses WLS with per-group weights `W_g = sum of obs-level survey weights in group g`, and the standard error is computed via Taylor Series Linearization of the group-level influence function: `ψ_g[X] = inv(X'WX)[1,:] @ x_g * W_g * r_g`, expanded to observation level as `ψ_i = ψ_g * (w_i / W_g)`, then aggregated through `compute_survey_if_variance` for stratified/PSU/FPC variance. Inference uses the t-distribution with `df_survey` when provided. Under rank deficiency (any regression coefficient dropped by `solve_ols`'s R-style drop), all inference fields return NaN (conservative, matches the NaN-consistent contract). **Library extension (replicate weights):** Under a replicate-weight design (BRR/Fay/JK1/JKn/SDR), the heterogeneity regression dispatches to `compute_replicate_if_variance` (Rao-Wu weight-ratio rescaling) instead of the Binder TSL formula. The effective df is the shared `min(resolved_survey.df_survey, min(n_valid_across_sites) - 1)` used by the rest of the dCDH surfaces; if the base `df_survey` is undefined (QR-rank ≤ 1), heterogeneity inference is NaN regardless of the local `n_valid_het` (matching the dCDH top-level contract — per-site `n_valid` cannot rescue a rank-deficient design). **Library extension:** R `DIDmultiplegtDYN::predict_het` does not natively support survey weights.
+- **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. **Rejected combinations:** `controls` (matching R), `trends_linear` (heterogeneity test uses raw level changes, incompatible with second-differenced outcomes), and `trends_nonparam` (heterogeneity test does not thread state-set control-pool restrictions). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`. **Note (survey support):** Under `survey_design`, heterogeneity uses WLS with per-group weights `W_g = sum of obs-level survey weights in group g`, and the standard error is computed via Taylor Series Linearization of the group-level influence function: `ψ_g[X] = inv(X'WX)[1,:] @ x_g * W_g * r_g`, expanded to observation level as `ψ_i = ψ_g * (w_i / W_g)`, then aggregated through `compute_survey_if_variance` for stratified/PSU/FPC variance. Inference uses the t-distribution with `df_survey` when provided. Under rank deficiency (any regression coefficient dropped by `solve_ols`'s R-style drop), all inference fields return NaN (conservative, matches the NaN-consistent contract). **Library extension (replicate weights):** Under a replicate-weight design (BRR/Fay/JK1/JKn/SDR), the heterogeneity regression dispatches to `compute_replicate_if_variance` (Rao-Wu weight-ratio rescaling) instead of the Binder TSL formula. The effective df is the shared `min(resolved_survey.df_survey, min(n_valid_across_sites) - 1)` used by the rest of the dCDH surfaces; if the base `df_survey` is undefined (QR-rank ≤ 1), heterogeneity inference is NaN regardless of the local `n_valid_het` (matching the dCDH top-level contract — per-site `n_valid` cannot rescue a rank-deficient design). **Library extension:** R `DIDmultiplegtDYN::predict_het` does not natively support survey weights. **Scope note (cell-period allocator):** Heterogeneity still uses the legacy group-level IF expansion `ψ_i = ψ_g * (w_i / W_g)`. Combining `heterogeneity=` with a survey design whose PSU or strata vary within group raises `NotImplementedError`; the cell-period extension of the heterogeneity path is deferred to a follow-up PR. Within-group-constant PSU/strata (including the auto-injected `psu=group`) is fully supported.
 
 - **Note (HonestDiD integration):** HonestDiD sensitivity analysis (Rambachan & Roth 2023) is available on the placebo + event study surface via `honest_did=True` in `fit()` or `compute_honest_did(results)` post-hoc. **Library extension:** dCDH HonestDiD uses `DID^{pl}_l` placebo estimates as pre-period coefficients rather than standard event-study pre-treatment coefficients. The Rambachan-Roth restrictions bound violations of the parallel trends assumption underlying the dCDH placebo estimand; interpretation differs from canonical event-study HonestDiD. A `UserWarning` is emitted at runtime. Uses diagonal variance (no full VCV available for dCDH). Relative magnitudes (DeltaRM) with Mbar=1.0 is the default when called from `fit()`, targeting the equal-weight average over all post-treatment horizons (`l_vec=None`). R's HonestDiD defaults to the first post/on-impact effect; use `compute_honest_did(results, ...)` with a custom `l_vec` to match that behavior. When `trends_linear=True`, bounds apply to the second-differenced estimand (parallel trends in first differences). Requires `L_max >= 1` for multi-horizon placebos. Gaps in the horizon grid from `trends_nonparam` support-trimming are handled by filtering to the largest consecutive block and warning.
 
@@ -649,8 +649,8 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param
 - [x] Design-2 switch-in/switch-out descriptive wrapper (Web Appendix Section 1.6)
 - [x] HonestDiD (Rambachan-Roth 2023) integration on placebo + event study surface
 - [x] Survey design support: pweight with strata/PSU/FPC via Taylor Series Linearization (analytical) **or replicate-weight variance (BRR/Fay/JK1/JKn/SDR)**, covering the main ATT surface, covariate adjustment (DID^X), heterogeneity testing, the TWFE diagnostic (fit and standalone `twowayfeweights()` helper), and HonestDiD bounds. Opt-in **PSU-level Hall-Mammen wild bootstrap** is also supported via `n_bootstrap > 0`.
-- **Note:** Survey IF expansion (`psi_i = U[g] * (w_i / W_g)`) is a library extension not in the dCDH papers. The paper's plug-in variance assumes iid sampling; the TSL variance accounts for complex survey design by expanding group-level influence functions to observation level proportionally to survey weights, then applying the standard Binder (1983) stratified PSU variance formula. The expansion treats each group as the effective sampling unit; **strata and PSU must therefore be constant within group** (validated in `fit()` — designs with mixed strata or PSU labels within a single group raise `ValueError`). When `survey_design.psu` is not specified, `fit()` auto-injects `psu=<group column>` so the TSL variance, `df_survey`, and t-based inference match the per-group structure the IF expansion assumes; users who want a coarser cluster can pass an explicit `psu` that is constant within group. Within-group-varying **weights** are supported (each observation contributes proportionally). Under replicate-weight designs, the same group-level IF expansion is aggregated via Rao-Wu weight-ratio rescaling (`compute_replicate_if_variance` at `diff_diff/survey.py:1681`) rather than the Binder TSL formula. All five methods (BRR/Fay/JK1/JKn/SDR) are supported method-agnostically through the unified helper; the effective `df_survey` is reduced to `min(n_valid) - 1` across IF sites when some replicate solves fail (matching `efficient_did.py:1133-1135` and `triple_diff.py:676-686` precedents). Under DID^X, the first-stage residualization coefficient `theta_hat` is computed once on full-sample weights and treated as fixed (FWL plug-in IF convention) — per-replicate refits of `theta_hat` are not performed.
-- **Note (survey + bootstrap contract):** When `survey_design` and `n_bootstrap > 0` are both active, the bootstrap uses Hall-Mammen wild multiplier weights (Rademacher/Mammen/Webb) **at the PSU level**. Under the default auto-injected `psu=group`, the PSU coincides with the group so the wild bootstrap is a clean group-level clustered bootstrap (identity-map fast path, bit-identical to the non-survey multiplier bootstrap). When the user passes an explicit strictly-coarser PSU (e.g., `psu=state` with groups at county level), the IF contributions of all groups within a PSU receive the same bootstrap multiplier — the standard Hall-Mammen wild PSU bootstrap. Strata do not participate in the bootstrap randomization (they contribute only through the analytical TSL variance); this is conservative when strata differ substantially in variance. A `UserWarning` fires only when PSU is strictly coarser than group. **Replicate-weight designs and `n_bootstrap > 0` are mutually exclusive** (replicate variance is closed-form; bootstrap would double-count variance) — the combination raises `NotImplementedError`, matching `efficient_did.py:989`, `staggered.py:1869`, `two_stage.py:251-253`. For HonestDiD bounds under replicate weights, the replicate-effective `df_survey = min(resolved_survey.df_survey, min(n_valid_across_sites) - 1)` propagates to t-critical values — capped by the design's QR-rank-based df so a rank-deficient replicate matrix never produces a larger effective df than the design supports. When `resolved_survey.df_survey` is undefined (QR-rank ≤ 1), the effective df stays `None` and all inference fields (including HonestDiD bounds) are NaN — per-site `n_valid` cannot rescue a rank-deficient design.
+- **Note (Survey IF expansion — library convention):** Survey IF expansion is a library extension not in the dCDH papers (the paper's plug-in variance assumes iid sampling). The library convention builds observation-level `psi_i` by proportionally distributing per-group IF mass within weight share: either at the group level (`psi_i = U_centered[g] * w_i / W_g`, the previous convention) or at the per-`(g, t)` cell level via the cell-period allocator shipped in this release. Cell-level expansion: decompose `U[g]` into per-period attributions `U[g, t]`, cohort-center each column independently, then expand to observation level as `psi_i = U_centered_per_period[g_i, t_i] * (w_i / W_{g_i, t_i})`. Binder (1983) stratified-PSU variance aggregates the resulting `psi` at PSU level. **Post-period attribution convention:** each transition term in the IF sum (of the form `role_weight * (Y_{g, t} - Y_{g, t-1})` for DID_M or `S_g * (Y_{g, out} - Y_{g, ref})` for DID_l) is attributed as a single *difference* to the POST-period cell, not split into a `+Y_post` / `-Y_pre` pair across two cells. This is a library *convention*, not a theorem — adopted because it preserves the group-sum, PSU-sum, and cohort-sum identities of the previous group-level expansion (so Binder variance coincides with the group-level variance under the auto-injected `psu=group`) and because Monte Carlo coverage at nominal 95% is empirically close to nominal on a DGP where PSUs vary across the cells of each group (see `tests/test_dcdh_cell_period_coverage.py`). A covariance-aware two-cell allocator is a plausible alternative and may be worth exploring if future designs motivate an explicit observation-level IF derivation; the method currently in the library is **not derived from the observation-level survey linearization of the contrast** and makes no stronger claim than "coverage is approximately nominal under the tested DGPs and the group-sum identity holds exactly." Under within-group-constant PSU (the pre-allocator accepted input), per-cell sums telescope to `U_centered[g]` and Binder variance is byte-identical (up to single-ULP floating-point noise) to the previous group-level expansion. **Strata and PSU must be constant within each `(g, t)` cell** (trivially satisfied in one-obs-per-cell panels — the canonical dCDH structure); variation **across cells of a group** is supported by the allocator. Within-group-varying **weights** are supported as before. When `survey_design.psu` is not specified, `fit()` auto-injects `psu=<group column>` so the TSL variance, `df_survey`, and t-based inference match the per-group PSU structure. **Strata that vary across cells of a group require either an explicit `psu=<col>` or the original `SurveyDesign(..., nest=True)` flag** — under `nest=True` the resolver combines `(stratum, psu)` into globally-unique labels, so the auto-injected `psu=<group>` is re-labeled per stratum and the cell allocator proceeds. Only the `nest=False` + varying-strata + omitted-psu combination is rejected up front with a targeted `ValueError` at `fit()` time (the synthesized PSU column would reuse group labels across strata and trip the cross-stratum PSU uniqueness check in `SurveyDesign.resolve()`). Under replicate-weight designs, the same cell-level `psi_i` is aggregated via Rao-Wu weight-ratio rescaling (`compute_replicate_if_variance` at `diff_diff/survey.py:1681`) rather than the Binder TSL formula. All five methods (BRR/Fay/JK1/JKn/SDR) are supported method-agnostically through the unified helper; the effective `df_survey` is reduced to `min(n_valid) - 1` across IF sites when some replicate solves fail (matching `efficient_did.py:1133-1135` and `triple_diff.py:676-686` precedents). Under DID^X, the first-stage residualization coefficient `theta_hat` is computed once on full-sample weights and treated as fixed (FWL plug-in IF convention) — per-replicate refits of `theta_hat` are not performed. **Scope limitations (follow-up PRs):** (a) `heterogeneity=` combined with within-group-varying PSU/strata raises `NotImplementedError` — the heterogeneity WLS `psi_obs` still uses the legacy group-level expansion, to be extended in PR 3; (b) `n_bootstrap > 0` combined with within-group-varying PSU raises `NotImplementedError` — the PSU-level Hall-Mammen wild bootstrap still uses the legacy group-level PSU map, to be extended in PR 4.
+- **Note (survey + bootstrap contract):** When `survey_design` and `n_bootstrap > 0` are both active, the bootstrap uses Hall-Mammen wild multiplier weights (Rademacher/Mammen/Webb) **at the PSU level**. Under the default auto-injected `psu=group`, the PSU coincides with the group so the wild bootstrap is a clean group-level clustered bootstrap (identity-map fast path, bit-identical to the non-survey multiplier bootstrap). When the user passes an explicit strictly-coarser PSU (e.g., `psu=state` with groups at county level), the IF contributions of all groups within a PSU receive the same bootstrap multiplier — the standard Hall-Mammen wild PSU bootstrap. Strata do not participate in the bootstrap randomization (they contribute only through the analytical TSL variance); this is conservative when strata differ substantially in variance. A `UserWarning` fires only when PSU is strictly coarser than group. **Scope note (cell-period allocator):** The PSU-level bootstrap uses a group-level `group_id_to_psu_code` map and therefore requires PSU to be constant within each group. Combining `n_bootstrap > 0` with a PSU that varies within group raises `NotImplementedError`; the cell-level Hall-Mammen extension is deferred to a follow-up PR. The analytical TSL variance fully supports within-group-varying PSU via the cell-period allocator — use `n_bootstrap=0` for those designs. **Replicate-weight designs and `n_bootstrap > 0` are mutually exclusive** (replicate variance is closed-form; bootstrap would double-count variance) — the combination raises `NotImplementedError`, matching `efficient_did.py:989`, `staggered.py:1869`, `two_stage.py:251-253`. For HonestDiD bounds under replicate weights, the replicate-effective `df_survey = min(resolved_survey.df_survey, min(n_valid_across_sites) - 1)` propagates to t-critical values — capped by the design's QR-rank-based df so a rank-deficient replicate matrix never produces a larger effective df than the design supports. When `resolved_survey.df_survey` is undefined (QR-rank ≤ 1), the effective df stays `None` and all inference fields (including HonestDiD bounds) are NaN — per-site `n_valid` cannot rescue a rank-deficient design.
 
 ---
 
diff --git a/tests/test_dcdh_cell_period_coverage.py b/tests/test_dcdh_cell_period_coverage.py
new file mode 100644
index 00000000..b3ba0203
--- /dev/null
+++ b/tests/test_dcdh_cell_period_coverage.py
@@ -0,0 +1,140 @@
+"""Monte Carlo coverage simulation for the cell-period IF allocator.
+
+Validates empirical coverage under a DGP with PSU that varies across
+cells of the same group — the regime unlocked by PR 2's relaxation of
+the within-group constancy rule. Under within-group-constant PSU the
+cell allocator reduces to the previous group allocator byte-for-byte,
+so the coverage check only exercises the new code path.
+
+Marked ``slow`` and excluded from the default pytest run. To execute:
+
+    pytest tests/test_dcdh_cell_period_coverage.py -m slow -v
+"""
+
+from __future__ import annotations
+
+import warnings
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from diff_diff.chaisemartin_dhaultfoeuille import ChaisemartinDHaultfoeuille
+from diff_diff.survey import SurveyDesign
+
+
+def _simulate_panel(
+    n_groups: int,
+    n_periods: int,
+    first_treated_period: int,
+    tau: float,
+    rng: np.random.Generator,
+    psu_sigma: float = 0.5,
+    obs_sigma: float = 1.0,
+    group_sigma: float = 1.0,
+) -> pd.DataFrame:
+    """Generate a one-obs-per-cell panel with within-group-varying PSU.
+
+    Each group has two PSUs assigned by ``period % 2``. The PSU
+    contributes a ``(group, psu)``-specific intercept, creating the
+    within-group residual correlation that design-based variance is
+    meant to capture. Treatment starts at ``first_treated_period`` for
+    the first half of groups and never for the rest.
+    """
+    groups = np.arange(n_groups)
+    treated = groups < n_groups // 2
+    group_fe = rng.normal(0.0, group_sigma, size=n_groups)
+    # (group, psu) effect — constant across time within the (g, psu)
+    # pair. Drives within-group correlation that varies by cell.
+    psu_fe = rng.normal(0.0, psu_sigma, size=(n_groups, 2))
+
+    rows = []
+    for g in groups:
+        for t in range(n_periods):
+            parity = int(t % 2)
+            # PSU labels are globally unique per (group, parity) so the
+            # Binder variance sees 2 * n_groups sampling clusters rather
+            # than just 2 global PSU codes reused across groups. Each
+            # PSU collects the cells of a single group's odd or even
+            # periods; within-cell constancy (one obs per cell) is
+            # trivially satisfied.
+            psu_id = int(g) * 2 + parity
+            d = 1 if (treated[g] and t >= first_treated_period) else 0
+            y = (
+                group_fe[g]
+                + 0.1 * t
+                + tau * d
+                + psu_fe[g, parity]
+                + rng.normal(0.0, obs_sigma)
+            )
+            rows.append({
+                "group": int(g),
+                "period": int(t),
+                "treatment": int(d),
+                "outcome": float(y),
+                "psu": psu_id,
+                "pw": 1.0,
+            })
+    return pd.DataFrame(rows)
+
+
+@pytest.mark.slow
+def test_cell_period_allocator_coverage_within_group_varying_psu():
+    """Empirical 95% coverage for DID_M under the cell-period allocator
+    on a DGP with within-group-varying PSU. Tolerance ±2.5pp is ~2
+    MC-sigma at n_reps=500 for a true 95% target (sqrt(0.95*0.05/500)
+    = 0.0098, so 2-sigma = 1.96pp — ±2.5pp is deliberately a touch
+    looser to absorb small-sample bias without masking a real
+    under-coverage bug).
+    """
+    n_reps = 500
+    n_groups = 40
+    n_periods = 6
+    first_treated_period = 3
+    tau_true = 2.0
+
+    rng = np.random.default_rng(20260418)
+    covered = 0
+    failed = 0
+
+    for r in range(n_reps):
+        df = _simulate_panel(
+            n_groups=n_groups,
+            n_periods=n_periods,
+            first_treated_period=first_treated_period,
+            tau=tau_true,
+            rng=rng,
+        )
+        sd = SurveyDesign(weights="pw", psu="psu")
+        try:
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                res = ChaisemartinDHaultfoeuille(seed=r + 1).fit(
+                    df,
+                    outcome="outcome", group="group",
+                    time="period", treatment="treatment",
+                    survey_design=sd, L_max=1,
+                )
+        except Exception:
+            failed += 1
+            continue
+
+        ci = res.event_study_effects[1]["conf_int"]
+        if ci is None or not all(np.isfinite(ci)):
+            failed += 1
+            continue
+        lo, hi = float(ci[0]), float(ci[1])
+        if lo <= tau_true <= hi:
+            covered += 1
+
+    completed = n_reps - failed
+    assert completed >= int(0.95 * n_reps), (
+        f"MC simulation had {failed}/{n_reps} fit failures, above "
+        f"the 5% tolerance."
+    )
+    coverage = covered / completed
+    assert 0.925 <= coverage <= 0.975, (
+        f"Empirical coverage {coverage:.3f} (completed {completed}) "
+        f"outside [0.925, 0.975]; true tau={tau_true}, "
+        f"n_groups={n_groups}, n_periods={n_periods}, n_reps={n_reps}."
+    )
diff --git a/tests/test_methodology_chaisemartin_dhaultfoeuille.py b/tests/test_methodology_chaisemartin_dhaultfoeuille.py
index fc62f6ca..06be4fef 100644
--- a/tests/test_methodology_chaisemartin_dhaultfoeuille.py
+++ b/tests/test_methodology_chaisemartin_dhaultfoeuille.py
@@ -348,7 +348,7 @@ def test_cohort_recentering_not_grand_mean(self):
             a11_minus_zeroed_arr,
         ) = _compute_per_period_dids(D_mat=D_mat, Y_mat=Y_mat, N_mat=N_mat, periods=periods)
 
-        U_overall = _compute_full_per_group_contributions(
+        U_overall, _ = _compute_full_per_group_contributions(
             D_mat=D_mat,
             Y_mat=Y_mat,
             N_mat=N_mat,
diff --git a/tests/test_survey_dcdh.py b/tests/test_survey_dcdh.py
index 1aa9f14c..e400c3eb 100644
--- a/tests/test_survey_dcdh.py
+++ b/tests/test_survey_dcdh.py
@@ -1072,35 +1072,201 @@ def test_survey_design2_runs(self):
 
 
 class TestSurveyWithinGroupValidation:
-    """Survey designs with strata or PSU varying within a single group
-    are rejected because the dCDH IF expansion treats the group as the
-    effective sampling unit."""
-
-    def test_rejects_varying_psu_within_group(self, base_data):
+    """Cell-period IF allocator contract: strata and PSU may vary ACROSS
+    cells of a group, but must be constant WITHIN each (g, t) cell. In
+    canonical one-obs-per-cell panels the cell-level constancy check is
+    trivially satisfied. Out-of-scope combinations (heterogeneity +
+    within-group-varying PSU; n_bootstrap > 0 + within-group-varying
+    PSU) raise NotImplementedError with a pointer to the follow-up PR.
+    """
+
+    def test_accepts_varying_psu_within_group(self, base_data):
+        """Under the cell-period allocator, PSU that varies across cells
+        of a group is a valid design — the allocator attributes IF mass
+        to each (g, t) cell separately and Binder TSL aggregates at PSU
+        level with the honest cell-level variance.
+        """
         df_ = base_data.copy()
         df_["pw"] = 1.0
         df_["stratum"] = 0
-        # PSU varies within each group (alternates by period)
+        # PSU varies within each group (alternates by period). Still
+        # constant within each (g, t) cell because one obs per cell.
         df_["psu"] = df_["period"] % 2
         sd = SurveyDesign(weights="pw", strata="stratum", psu="psu")
-        with pytest.raises(ValueError, match="PSU to be constant within group"):
+        result = ChaisemartinDHaultfoeuille(seed=1).fit(
+            df_, outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            survey_design=sd, L_max=2,
+        )
+        assert np.isfinite(result.overall_att)
+        assert np.isfinite(result.overall_se)
+        # And the SE differs from a within-group-constant-PSU baseline,
+        # because the cell allocator now honors the extra PSU structure.
+        df_const = base_data.copy()
+        df_const["pw"] = 1.0
+        df_const["stratum"] = 0
+        df_const["psu"] = 0  # constant-within-group PSU baseline
+        sd_const = SurveyDesign(weights="pw", strata="stratum", psu="psu")
+        r_const = ChaisemartinDHaultfoeuille(seed=1).fit(
+            df_const, outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            survey_design=sd_const, L_max=2,
+        )
+        assert result.overall_se != r_const.overall_se, (
+            "Cell-period allocator must produce a different SE when PSU "
+            "actually varies across cells vs. constant-within-group."
+        )
+
+    def test_accepts_varying_strata_within_group(self, base_data):
+        """Strata that vary across cells of a group are supported under
+        the cell-period allocator, trivially satisfying within-cell
+        constancy in one-obs-per-cell panels.
+        """
+        df_ = base_data.copy()
+        df_["pw"] = 1.0
+        # Stratum varies within each group across cells
+        df_["stratum"] = df_["period"] % 2
+        # PSU = group, nested inside stratum so the resolver accepts the
+        # cross-stratum reuse of group labels.
+        df_["psu"] = df_["group"]
+        sd = SurveyDesign(weights="pw", strata="stratum", psu="psu", nest=True)
+        result = ChaisemartinDHaultfoeuille(seed=1).fit(
+            df_, outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            survey_design=sd, L_max=2,
+        )
+        assert np.isfinite(result.overall_att)
+        assert np.isfinite(result.overall_se)
+
+    def test_heterogeneity_with_varying_psu_raises(self, base_data):
+        """heterogeneity= is gated under within-group-varying PSU until
+        PR 3 ships the cell-period allocator for the WLS psi_obs path.
+        """
+        df_ = base_data.copy()
+        df_["pw"] = 1.0
+        df_["stratum"] = 0
+        df_["psu"] = df_["period"] % 2  # varies within group
+        df_["x_het"] = np.arange(len(df_)) % 3  # categorical covariate
+        sd = SurveyDesign(weights="pw", strata="stratum", psu="psu")
+        with pytest.raises(NotImplementedError, match="heterogeneity"):
             ChaisemartinDHaultfoeuille(seed=1).fit(
+                df_, outcome="outcome", group="group",
+                time="period", treatment="treatment",
+                heterogeneity="x_het", L_max=1,
+                survey_design=sd,
+            )
+
+    def test_bootstrap_with_varying_psu_raises(self, base_data):
+        """n_bootstrap > 0 is gated under within-group-varying PSU until
+        PR 4 ships the cell-level Hall-Mammen wild bootstrap.
+        """
+        df_ = base_data.copy()
+        df_["pw"] = 1.0
+        df_["stratum"] = 0
+        df_["psu"] = df_["period"] % 2  # varies within group
+        sd = SurveyDesign(weights="pw", strata="stratum", psu="psu")
+        with pytest.raises(NotImplementedError, match="n_bootstrap"):
+            ChaisemartinDHaultfoeuille(n_bootstrap=50, seed=1).fit(
                 df_, outcome="outcome", group="group",
                 time="period", treatment="treatment",
                 survey_design=sd,
             )
 
-    def test_rejects_varying_strata_within_group(self, base_data):
+    def test_auto_inject_with_varying_strata_nest_true_succeeds(self, base_data):
+        """When strata varies across cells of a group and the user
+        passes ``nest=True`` with no explicit ``psu``, the auto-inject
+        path is valid: ``SurveyDesign.resolve()`` combines
+        ``(stratum, psu)`` into globally-unique labels via the
+        nest=True path (``diff_diff/survey.py:299-302``), so the
+        cross-stratum PSU uniqueness check is satisfied. Byte-check
+        against the explicit ``SurveyDesign(..., psu="group",
+        nest=True)`` baseline — both paths resolve to the same design.
+        """
         df_ = base_data.copy()
         df_["pw"] = 1.0
-        # Stratum varies within each group
         df_["stratum"] = df_["period"] % 2
-        # Give each obs a unique PSU label so the SurveyDesign resolver
-        # doesn't reject on cross-stratum PSU reuse — we want our
-        # within-group strata check to fire first.
+        sd_auto = SurveyDesign(weights="pw", strata="stratum", nest=True)
+        sd_explicit = SurveyDesign(
+            weights="pw", strata="stratum", psu="group", nest=True,
+        )
+        r_auto = ChaisemartinDHaultfoeuille(seed=1).fit(
+            df_, outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            survey_design=sd_auto, L_max=2,
+        )
+        r_explicit = ChaisemartinDHaultfoeuille(seed=1).fit(
+            df_, outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            survey_design=sd_explicit, L_max=2,
+        )
+        assert np.isfinite(r_auto.overall_att)
+        assert np.isfinite(r_auto.overall_se)
+        if np.isfinite(r_auto.overall_se) and np.isfinite(r_explicit.overall_se):
+            assert r_auto.overall_se == pytest.approx(
+                r_explicit.overall_se, rel=1e-6
+            )
+        assert r_auto.survey_metadata is not None
+        assert r_explicit.survey_metadata is not None
+        assert (
+            r_auto.survey_metadata.df_survey
+            == r_explicit.survey_metadata.df_survey
+        )
+
+    def test_auto_inject_with_varying_strata_raises(self, base_data):
+        """Auto-injected `psu=<group>` with nest=False cannot honor
+        strata that vary across cells of a group — the synthesized PSU
+        column would reuse group labels across strata and trip the
+        cross-stratum PSU uniqueness check. fit() detects that combo
+        before survey resolution and raises a targeted ValueError
+        pointing users to the explicit `psu=<col>, nest=True` path.
+        """
+        df_ = base_data.copy()
+        df_["pw"] = 1.0
+        df_["stratum"] = df_["period"] % 2  # varies across cells of each group
+        sd = SurveyDesign(weights="pw", strata="stratum")
+        with pytest.raises(ValueError, match=r"psu=<col>"):
+            ChaisemartinDHaultfoeuille(seed=1).fit(
+                df_, outcome="outcome", group="group",
+                time="period", treatment="treatment",
+                survey_design=sd,
+            )
+
+    def test_within_cell_psu_variation_rejected(self, base_data):
+        """Multiple PSUs inside a single (g, t) cell (a multi-obs-per-
+        cell panel) remain ambiguous under the cell allocator and must
+        be rejected.
+        """
+        df_ = base_data.copy()
+        df_["pw"] = 1.0
+        df_["stratum"] = 0
+        df_["psu"] = 0
+        # Duplicate the first row with a different PSU so that cell
+        # (group[0], period[0]) has two obs with different PSU labels.
+        dup = df_.iloc[0].copy()
+        dup["psu"] = 99
+        df_ = pd.concat([df_, pd.DataFrame([dup])], ignore_index=True)
+        sd = SurveyDesign(weights="pw", strata="stratum", psu="psu")
+        with pytest.raises(ValueError, match="PSU to be constant within each"):
+            ChaisemartinDHaultfoeuille(seed=1).fit(
+                df_, outcome="outcome", group="group",
+                time="period", treatment="treatment",
+                survey_design=sd,
+            )
+
+    def test_within_cell_strata_variation_rejected(self, base_data):
+        """Multiple strata inside a single (g, t) cell are ambiguous
+        under the cell allocator and must be rejected.
+        """
+        df_ = base_data.copy()
+        df_["pw"] = 1.0
+        df_["stratum"] = 0
+        # Duplicate the first row with a different stratum.
+        dup = df_.iloc[0].copy()
+        dup["stratum"] = 1
+        df_ = pd.concat([df_, pd.DataFrame([dup])], ignore_index=True)
         df_["psu"] = np.arange(len(df_))
         sd = SurveyDesign(weights="pw", strata="stratum", psu="psu")
-        with pytest.raises(ValueError, match="strata to be constant within group"):
+        with pytest.raises(ValueError, match="strata to be constant within each"):
             ChaisemartinDHaultfoeuille(seed=1).fit(
                 df_, outcome="outcome", group="group",
                 time="period", treatment="treatment",
@@ -1271,17 +1437,103 @@ def test_off_horizon_row_duplication_does_not_change_se(self, base_data):
                 f"{r_dup.overall_se}) — auto-inject psu=group is not active."
             )
 
-    def test_rejection_excludes_zero_weight_rows(self, base_data):
-        """A zero-weight row with a different PSU from its group must
-        not trigger rejection — it is out-of-sample by the
-        subpopulation contract and does not enter the variance."""
+    def test_cell_allocator_row_sum_identity(self):
+        """Cell-period allocator contract: for every group, the per-
+        period attribution sums across time to the per-group IF
+        (before cohort centering). This is the invariant that makes
+        PSU-level Binder aggregation telescope to ``U_centered[g]``
+        under within-group-constant PSU and therefore guarantees byte-
+        identity with the legacy group-level allocator on the old
+        accepted input set. Hand-computed on a 4-group × 3-period
+        panel: two never-treated (stable_0) and two joiners switching
+        at ``t = 2``.
+        """
+        from diff_diff.chaisemartin_dhaultfoeuille import (
+            _compute_full_per_group_contributions,
+            _cohort_recenter,
+            _cohort_recenter_per_period,
+        )
+
+        # D_mat, Y_mat, N_mat shaped (n_groups=4, n_periods=3).
+        D_mat = np.array(
+            [
+                [0, 0, 0],  # G0 never-treated
+                [0, 0, 0],  # G1 never-treated
+                [0, 0, 1],  # G2 joiner at t=2
+                [0, 0, 1],  # G3 joiner at t=2
+            ],
+            dtype=float,
+        )
+        Y_mat = np.array(
+            [
+                [1.0, 2.0, 3.0],
+                [2.1, 3.1, 4.2],
+                [0.5, 1.2, 5.4],
+                [1.3, 2.4, 6.1],
+            ],
+            dtype=float,
+        )
+        N_mat = np.ones_like(D_mat, dtype=int)
+        # Per-period cell counts aligned to periods[1:]
+        # t=1: all stable_0 (4 in n_00); t=2: 2 joiners (n_10) + 2 stable_0 (n_00)
+        n_10_t_arr = np.array([0, 2], dtype=int)
+        n_00_t_arr = np.array([4, 2], dtype=int)
+        n_01_t_arr = np.array([0, 0], dtype=int)
+        n_11_t_arr = np.array([0, 0], dtype=int)
+        # A11 zeroed at t=1 (no joiners); active at t=2.
+        a11_plus_zeroed = np.array([True, False], dtype=bool)
+        a11_minus_zeroed = np.array([True, True], dtype=bool)
+
+        U, U_pp = _compute_full_per_group_contributions(
+            D_mat=D_mat, Y_mat=Y_mat, N_mat=N_mat,
+            n_10_t_arr=n_10_t_arr, n_00_t_arr=n_00_t_arr,
+            n_01_t_arr=n_01_t_arr, n_11_t_arr=n_11_t_arr,
+            a11_plus_zeroed_arr=a11_plus_zeroed,
+            a11_minus_zeroed_arr=a11_minus_zeroed,
+            side="overall",
+            compute_per_period=True,
+        )
+        assert U_pp is not None
+
+        # Hand computation at t=2 joiner side:
+        #   G0: stable_0, -(2/2) * (3.0 - 2.0) = -1.0
+        #   G1: stable_0, -(2/2) * (4.2 - 3.1) = -1.1
+        #   G2: joiner,  (5.4 - 1.2) = 4.2
+        #   G3: joiner,  (6.1 - 2.4) = 3.7
+        expected_U = np.array([-1.0, -1.1, 4.2, 3.7])
+        np.testing.assert_allclose(U, expected_U, atol=1e-12)
+
+        # Row-sum identity: U_per_period.sum(axis=1) == U exactly.
+        np.testing.assert_allclose(U_pp.sum(axis=1), U, atol=1e-12)
+
+        # Post-period attribution: all mass at t=2 (the transition's
+        # post cell); t=0 and t=1 columns are zero for every group.
+        np.testing.assert_array_equal(U_pp[:, 0], np.zeros(4))
+        np.testing.assert_array_equal(U_pp[:, 1], np.zeros(4))
+        np.testing.assert_allclose(U_pp[:, 2], expected_U, atol=1e-12)
+
+        # Cohort centering preserves the row-sum identity: per-period
+        # cohort centering and group-level cohort centering produce
+        # 2D and 1D arrays whose row sums agree to FP precision.
+        # Cohorts: A = {G0, G1} (never-treated), B = {G2, G3} (joiners).
+        cohort_ids = np.array([0, 0, 1, 1])
+        U_c = _cohort_recenter(U, cohort_ids)
+        U_pp_c = _cohort_recenter_per_period(U_pp, cohort_ids)
+        np.testing.assert_allclose(U_pp_c.sum(axis=1), U_c, atol=1e-12)
+
+    def test_within_cell_check_excludes_zero_weight_rows(self, base_data):
+        """A zero-weight row with a different PSU label from its cell
+        must not trigger rejection — it is out-of-sample by the
+        subpopulation contract and does not enter the variance.
+        """
         df_ = base_data.copy()
         df_["pw"] = 1.0
         df_["stratum"] = 0
         df_["psu"] = 0
-        # Inject a zero-weight row with a different PSU
+        # Inject a zero-weight row whose PSU would collide with the
+        # first row's cell if it were counted.
         sample = df_.iloc[0].copy()
-        sample["psu"] = 99  # would violate constancy if counted
+        sample["psu"] = 99  # would violate within-cell constancy if counted
         sample["pw"] = 0.0
         df_ = pd.concat([df_, pd.DataFrame([sample])], ignore_index=True)
         sd = SurveyDesign(weights="pw", strata="stratum", psu="psu")