diff --git a/diff_diff/synthetic_did.py b/diff_diff/synthetic_did.py
index ca8baf07..97976f7c 100644
--- a/diff_diff/synthetic_did.py
+++ b/diff_diff/synthetic_did.py
@@ -416,43 +416,92 @@ def fit(  # type: ignore[override]
             )
         )
 
-        # Compute auto-regularization (or use user overrides)
-        auto_zeta_omega, auto_zeta_lambda = _compute_regularization(
-            Y_pre_control, len(treated_units), len(post_periods)
+        # --- Y normalization ---------------------------------------------
+        # τ is location-invariant and scale-equivariant in Y. Normalizing Y
+        # once before weight optimization, the estimator, and the variance
+        # procedures (and rescaling τ/SE/CI/effects by Y_scale at the end)
+        # is mathematically a no-op but prevents ~6-digit precision loss in
+        # the SDID double-difference when outcomes span millions-to-billions.
+        # Normalization constants come from controls' pre-period only so the
+        # reference is unaffected by treatment. See REGISTRY.md §SyntheticDiD
+        # edge cases and synth-inference/synthdid#71 for R's version.
+        Y_shift = float(np.mean(Y_pre_control))
+        Y_scale_raw = float(np.std(Y_pre_control))
+        # Relative floor: avoid amplifying roundoff when std is tiny but
+        # nonzero (near-constant Y_pre_control). Fall back to 1.0 in that
+        # case and on non-finite std.
+        _scale_floor = 1e-12 * max(abs(Y_shift), 1.0)
+        Y_scale = (
+            Y_scale_raw
+            if np.isfinite(Y_scale_raw) and Y_scale_raw > _scale_floor
+            else 1.0
+        )
+        Y_pre_control_n = (Y_pre_control - Y_shift) / Y_scale
+        Y_post_control_n = (Y_post_control - Y_shift) / Y_scale
+        Y_pre_treated_n = (Y_pre_treated - Y_shift) / Y_scale
+        Y_post_treated_n = (Y_post_treated - Y_shift) / Y_scale
+
+        # Auto-regularization on normalized Y. FW's argmin is invariant under
+        # (Y, ζ) -> (Y/s, ζ/s); auto-zetas computed on Y_n are already on the
+        # normalized scale. User-supplied zetas are in original-Y units and
+        # are divided by Y_scale for internal FW use. Original-scale values
+        # are stored on results_ / self so diagnostic methods (in_time_placebo,
+        # sensitivity_to_zeta_omega) — which operate on the stored original-
+        # scale fit snapshot — see the same zeta the user specified.
+        auto_zeta_omega_n, auto_zeta_lambda_n = _compute_regularization(
+            Y_pre_control_n, len(treated_units), len(post_periods)
+        )
+        zeta_omega_n = (
+            self.zeta_omega / Y_scale
+            if self.zeta_omega is not None
+            else auto_zeta_omega_n
+        )
+        zeta_lambda_n = (
+            self.zeta_lambda / Y_scale
+            if self.zeta_lambda is not None
+            else auto_zeta_lambda_n
+        )
+        # Report the user-supplied value exactly (no roundoff from /*Y_scale
+        # roundtrip); report auto-zeta rescaled to original Y units.
+        zeta_omega = (
+            self.zeta_omega if self.zeta_omega is not None else auto_zeta_omega_n * Y_scale
+        )
+        zeta_lambda = (
+            self.zeta_lambda if self.zeta_lambda is not None else auto_zeta_lambda_n * Y_scale
         )
-        zeta_omega = self.zeta_omega if self.zeta_omega is not None else auto_zeta_omega
-        zeta_lambda = self.zeta_lambda if self.zeta_lambda is not None else auto_zeta_lambda
 
-        # Store noise level for diagnostics
+        # Store noise level for diagnostics (reported on original Y scale).
         from diff_diff.utils import _compute_noise_level
 
-        noise_level = _compute_noise_level(Y_pre_control)
+        noise_level_n = _compute_noise_level(Y_pre_control_n)
+        noise_level = noise_level_n * Y_scale
 
-        # Data-dependent convergence threshold (matches R's 1e-5 * noise.level).
-        # Floor of 1e-5 when noise_level == 0: R would use 0.0, causing FW to
-        # run all max_iter iterations.  The result is equivalent (zero-noise
-        # data has no variation to optimize), but the floor enables early stop.
-        min_decrease = 1e-5 * noise_level if noise_level > 0 else 1e-5
+        # Data-dependent convergence threshold (matches R's 1e-5 * noise.level),
+        # evaluated on normalized Y since FW operates on normalized Y. Floor of
+        # 1e-5 when noise is zero: R would use 0.0, causing FW to run all
+        # max_iter iterations; the floor enables early stop on zero-variation
+        # inputs without changing the optimum.
+        min_decrease = 1e-5 * noise_level_n if noise_level_n > 0 else 1e-5
 
-        # Compute unit weights (Frank-Wolfe with sparsification)
-        # Survey weights enter via the treated mean target
+        # Compute unit weights (Frank-Wolfe with sparsification) on normalized Y.
+        # Survey weights enter via the treated mean target.
         if w_treated is not None:
-            Y_pre_treated_mean = np.average(Y_pre_treated, axis=1, weights=w_treated)
+            Y_pre_treated_mean_n = np.average(Y_pre_treated_n, axis=1, weights=w_treated)
         else:
-            Y_pre_treated_mean = np.mean(Y_pre_treated, axis=1)
+            Y_pre_treated_mean_n = np.mean(Y_pre_treated_n, axis=1)
 
         unit_weights = compute_sdid_unit_weights(
-            Y_pre_control,
-            Y_pre_treated_mean,
-            zeta_omega=zeta_omega,
+            Y_pre_control_n,
+            Y_pre_treated_mean_n,
+            zeta_omega=zeta_omega_n,
             min_decrease=min_decrease,
         )
 
-        # Compute time weights (Frank-Wolfe on collapsed form)
+        # Compute time weights (Frank-Wolfe on collapsed form) on normalized Y.
         time_weights = compute_time_weights(
-            Y_pre_control,
-            Y_post_control,
-            zeta_lambda=zeta_lambda,
+            Y_pre_control_n,
+            Y_post_control_n,
+            zeta_lambda=zeta_lambda_n,
             min_decrease=min_decrease,
         )
 
@@ -465,23 +514,32 @@ def fit(  # type: ignore[override]
         else:
             omega_eff = unit_weights
 
-        # Compute SDID estimate
+        # Compute SDID estimate on normalized Y, then rescale to original units.
         if w_treated is not None:
-            Y_post_treated_mean = np.average(Y_post_treated, axis=1, weights=w_treated)
+            Y_post_treated_mean_n = np.average(
+                Y_post_treated_n, axis=1, weights=w_treated
+            )
         else:
-            Y_post_treated_mean = np.mean(Y_post_treated, axis=1)
+            Y_post_treated_mean_n = np.mean(Y_post_treated_n, axis=1)
 
-        att = compute_sdid_estimator(
-            Y_pre_control,
-            Y_post_control,
-            Y_pre_treated_mean,
-            Y_post_treated_mean,
+        att_n = compute_sdid_estimator(
+            Y_pre_control_n,
+            Y_post_control_n,
+            Y_pre_treated_mean_n,
+            Y_post_treated_mean_n,
             omega_eff,
             time_weights,
         )
+        att = att_n * Y_scale
 
-        # Compute pre-treatment fit (RMSE) using composed weights.
-        # Retain trajectories on results for plotting / custom diagnostics.
+        # Recover original-scale treated means for diagnostics / trajectories.
+        Y_pre_treated_mean = Y_pre_treated_mean_n * Y_scale + Y_shift
+        Y_post_treated_mean = Y_post_treated_mean_n * Y_scale + Y_shift
+
+        # Compute pre-treatment fit (RMSE) using composed weights on the
+        # original Y (user-visible scale). omega_eff is a simplex — applies
+        # cleanly to any linear rescale of Y — so trajectories live on the
+        # original outcome scale for plotting and the poor-fit warning.
         synthetic_pre_trajectory = Y_pre_control @ omega_eff
         synthetic_post_trajectory = Y_post_control @ omega_eff
         pre_fit_rmse = np.sqrt(
@@ -510,49 +568,57 @@ def fit(  # type: ignore[override]
         treated_pre_trajectory = Y_pre_treated_mean
         treated_post_trajectory = Y_post_treated_mean
 
-        # Compute standard errors based on variance_method
+        # Compute standard errors on normalized Y, rescale to original units.
+        # Variance procedures resample / permute indices (independent of Y
+        # values) so RNG streams stay aligned across scales.
         if self.variance_method == "bootstrap":
-            se, bootstrap_estimates = self._bootstrap_se(
-                Y_pre_control,
-                Y_post_control,
-                Y_pre_treated,
-                Y_post_treated,
+            se_n, bootstrap_estimates_n = self._bootstrap_se(
+                Y_pre_control_n,
+                Y_post_control_n,
+                Y_pre_treated_n,
+                Y_post_treated_n,
                 unit_weights,
                 time_weights,
                 w_treated=w_treated,
                 w_control=w_control,
                 resolved_survey=_unit_resolved_survey,
             )
-            placebo_effects = bootstrap_estimates
+            se = se_n * Y_scale
+            placebo_effects = np.asarray(bootstrap_estimates_n) * Y_scale
             inference_method = "bootstrap"
         elif self.variance_method == "jackknife":
             # Fixed-weight jackknife (R's synthdid Algorithm 3)
-            se, jackknife_estimates = self._jackknife_se(
-                Y_pre_control,
-                Y_post_control,
-                Y_pre_treated,
-                Y_post_treated,
+            se_n, jackknife_estimates_n = self._jackknife_se(
+                Y_pre_control_n,
+                Y_post_control_n,
+                Y_pre_treated_n,
+                Y_post_treated_n,
                 unit_weights,
                 time_weights,
                 w_treated=w_treated,
                 w_control=w_control,
             )
-            placebo_effects = jackknife_estimates
+            se = se_n * Y_scale
+            placebo_effects = np.asarray(jackknife_estimates_n) * Y_scale
             inference_method = "jackknife"
         else:
-            # Use placebo-based variance (R's synthdid Algorithm 4)
-            se, placebo_effects = self._placebo_variance_se(
-                Y_pre_control,
-                Y_post_control,
-                Y_pre_treated_mean,
-                Y_post_treated_mean,
+            # Use placebo-based variance (R's synthdid Algorithm 4).
+            # Placebo re-estimates ω, λ inside the loop; it must receive the
+            # normalized zetas and operate on normalized Y.
+            se_n, placebo_effects_n = self._placebo_variance_se(
+                Y_pre_control_n,
+                Y_post_control_n,
+                Y_pre_treated_mean_n,
+                Y_post_treated_mean_n,
                 n_treated=len(treated_units),
-                zeta_omega=zeta_omega,
-                zeta_lambda=zeta_lambda,
+                zeta_omega=zeta_omega_n,
+                zeta_lambda=zeta_lambda_n,
                 min_decrease=min_decrease,
                 replications=self.n_bootstrap,
                 w_control=w_control,
             )
+            se = se_n * Y_scale
+            placebo_effects = np.asarray(placebo_effects_n) * Y_scale
             inference_method = "placebo"
 
         # Compute test statistics
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 0bf3c5de..5c0cd045 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -1515,6 +1515,7 @@ Convergence criterion: stop when objective decrease < min_decrease² (default mi
 - **Jackknife with non-finite LOO estimate**: Returns NaN SE. Unlike bootstrap/placebo, jackknife is deterministic and cannot skip failed iterations; NaN propagates through `var()` (matches R behavior).
 - **Jackknife with survey weights**: Guards on effective positive support (omega * w_control > 0 and w_treated > 0) after composition, not raw FW counts. Returns NaN SE if fewer than 2 effective controls or 2 positive-weight treated units. Per-iteration zero-sum guards return NaN for individual LOO iterations when remaining composed weights sum to zero.
 - **Note:** Survey support: weights, strata, PSU, and FPC are all supported. Full-design surveys use Rao-Wu rescaled bootstrap (Phase 6); non-bootstrap variance methods (`variance_method="placebo"` or `"jackknife"`) require weights-only (strata/PSU/FPC require bootstrap). Both sides weighted per WLS regression interpretation: treated-side means are survey-weighted (Frank-Wolfe target and ATT formula); control-side synthetic weights are composed with survey weights post-optimization (ω_eff = ω * w_co, renormalized). Frank-Wolfe optimization itself is unweighted — survey importance enters after trajectory-matching. Covariate residualization uses WLS with survey weights. Placebo, jackknife, and bootstrap SE preserve survey weights on both sides.
+- **Note:** Internal Y normalization. Before weight optimization, the estimator, and variance procedures, `fit()` centers Y by `mean(Y_pre_control)` and scales by `std(Y_pre_control)`; `Y_scale` falls back to `1.0` when std is non-finite or below `1e-12 * max(|mean|, 1)`. Auto-regularization and `noise_level` are computed on normalized Y; user-supplied `zeta_omega` / `zeta_lambda` are divided by `Y_scale` internally for Frank-Wolfe. τ, SE, CI, the placebo/bootstrap/jackknife effect vectors, `results_.noise_level`, and `results_.zeta_omega` / `results_.zeta_lambda` are all reported on the user's original outcome scale (user-supplied zetas are echoed back exactly to avoid float roundoff). Mathematically a no-op — τ is location-invariant and scale-equivariant, and FW weights are invariant under `(Y, ζ) → (Y/s, ζ/s)` — but prevents catastrophic cancellation in the SDID double-difference when outcomes span millions-to-billions (see synth-inference/synthdid#71 for the R-package version of this issue). Normalization constants are derived from controls' pre-period only so the reference is unaffected by treatment. Scope: `in_time_placebo()` and `sensitivity_to_zeta_omega()` continue to run on the stored original-scale snapshot with original-scale zetas (preserving pre-fix behavior); extending normalization into those diagnostic paths is tracked separately.
 
 *Validation diagnostics (post-fit methods on `SyntheticDiDResults`):*
 
diff --git a/tests/test_methodology_sdid.py b/tests/test_methodology_sdid.py
index 37ad91f6..36b8162a 100644
--- a/tests/test_methodology_sdid.py
+++ b/tests/test_methodology_sdid.py
@@ -2100,3 +2100,146 @@ def test_asdict_excludes_internal_diagnostic_state(self):
         assert "_fit_snapshot" not in d
         assert "_loo_unit_ids" not in d
         assert "_loo_roles" not in d
+
+
+# =============================================================================
+# Scale equivariance — internal Y normalization prevents catastrophic
+# cancellation when outcomes are on extreme scales (millions-to-billions).
+# τ is location-invariant and scale-equivariant, so fitting on ``a*Y + b``
+# should produce τ/SE that scale exactly with ``a`` and leave p-values
+# unchanged. On normally-scaled data the fix must be a numerical no-op.
+# =============================================================================
+
+
+class TestScaleEquivariance:
+    """SDID is location-invariant and scale-equivariant in Y."""
+
+    # Hard-coded baselines captured pre-fix on a well-scaled panel. If these
+    # drift the fix is not a true no-op on normal data and review is warranted.
+    _BASELINE = {
+        "placebo":   (4.603349837478791,   0.29385822261006445, 0.004975124378109453,   200),
+        "bootstrap": (4.603349837478791,   0.1589472532935243,  0.4869109947643979,     191),
+        "jackknife": (4.603349837478791,   0.19908075946622925, 2.716551077849484e-118,  23),
+    }
+
+    # (a, b) pairs. Includes extreme scales where pre-fix SDID loses
+    # ~6 mantissa digits in the double-difference subtraction.
+    _SCALES = [(1e-6, 0.0), (1.0, 0.0), (1e6, 1e9), (1e9, -1e6), (1.0, 1e9)]
+
+    @staticmethod
+    def _rescale(df, a, b):
+        out = df.copy()
+        out["outcome"] = a * out["outcome"] + b
+        return out
+
+    @staticmethod
+    def _fit(data, variance_method, *, seed=1, n_bootstrap=200):
+        return SyntheticDiD(
+            variance_method=variance_method, n_bootstrap=n_bootstrap, seed=seed
+        ).fit(
+            data,
+            outcome="outcome",
+            treatment="treated",
+            unit="unit",
+            time="period",
+            post_periods=[5, 6, 7],
+        )
+
+    @pytest.mark.parametrize("variance_method", ["placebo", "bootstrap", "jackknife"])
+    def test_baseline_parity_small_scale(self, variance_method):
+        """Existing-fixture results match pre-fix literals — guards against
+        drift; a true no-op should hit float epsilon relative to baseline."""
+        att0, se0, p0, n0 = self._BASELINE[variance_method]
+        data = _make_panel(seed=42)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", UserWarning)
+            r = self._fit(data, variance_method)
+        assert r.att == pytest.approx(att0, rel=1e-8)
+        assert r.se == pytest.approx(se0, rel=1e-8)
+        assert r.p_value == pytest.approx(p0, rel=1e-8)
+        assert len(r.placebo_effects) == n0
+
+    @pytest.mark.parametrize("variance_method", ["placebo", "bootstrap", "jackknife"])
+    def test_scale_equivariance(self, variance_method):
+        """τ/a, SE/|a|, p-value, and n_successful must be invariant under
+        (Y → a*Y + b) across ~15 orders of magnitude."""
+        data = _make_panel(seed=42)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", UserWarning)
+            r0 = self._fit(data, variance_method)
+        att0, se0, p0 = r0.att, r0.se, r0.p_value
+        n0 = len(r0.placebo_effects)
+        noise0 = r0.noise_level
+        zeta_omega0 = r0.zeta_omega
+
+        for a, b in self._SCALES:
+            scaled = self._rescale(data, a, b)
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", UserWarning)
+                r = self._fit(scaled, variance_method)
+            # Variance-method success count must be identical; divergence
+            # would shift the empirical p-value floor 1/(n+1).
+            assert len(r.placebo_effects) == n0, (
+                f"(a={a}, b={b}) yielded {len(r.placebo_effects)} effects, "
+                f"baseline had {n0}"
+            )
+            assert r.att / a == pytest.approx(att0, rel=1e-8), f"att failed at a={a}, b={b}"
+            assert r.se / abs(a) == pytest.approx(se0, rel=1e-6), f"se failed at a={a}, b={b}"
+            assert r.p_value == pytest.approx(p0, rel=1e-6), f"p failed at a={a}, b={b}"
+            # Reported diagnostics scale with Y just like SE (noise_level,
+            # zeta_omega are both in outcome units).
+            assert r.noise_level / abs(a) == pytest.approx(noise0, rel=1e-8)
+            assert r.zeta_omega / abs(a) == pytest.approx(zeta_omega0, rel=1e-8)
+
+    @pytest.mark.parametrize("variance_method", ["placebo", "bootstrap", "jackknife"])
+    def test_detects_true_effect_at_extreme_scale(self, variance_method):
+        """Pre-fix regression: catastrophic cancellation at Y~1e9 degraded
+        SEs so p-values clustered near 0.5 regardless of true effect. Here
+        the true ATT is 0.5% of baseline — below the pre-fix precision
+        floor — and must still produce a detectable, correctly-scaled τ."""
+        rng = np.random.default_rng(0)
+        n_control, n_treated, n_pre, n_post = 25, 3, 6, 4
+        baseline_level = 1e9  # deliberately extreme
+        true_att = 5e6  # 0.5% of baseline — lost in 6-digit cancellation pre-fix
+        rows = []
+        for unit in range(n_control + n_treated):
+            is_treated = unit >= n_control
+            unit_fe = rng.normal(0, 2e6)
+            for t in range(n_pre + n_post):
+                y = baseline_level + unit_fe + t * 3e5 + rng.normal(0, 5e5)
+                if is_treated and t >= n_pre:
+                    y += true_att
+                rows.append({
+                    "unit": unit, "period": t,
+                    "treated": int(is_treated), "outcome": y,
+                })
+        data = pd.DataFrame(rows)
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", UserWarning)
+            r = SyntheticDiD(
+                variance_method=variance_method, n_bootstrap=200, seed=7
+            ).fit(
+                data, outcome="outcome", treatment="treated",
+                unit="unit", time="period",
+                post_periods=list(range(n_pre, n_pre + n_post)),
+            )
+
+        # τ must land near the true effect (within ~3 SE); SE must be
+        # positive, finite, and small enough that the effect is significant
+        # by z-statistic. For placebo/jackknife the empirical p-value is a
+        # proper null-distribution test; for bootstrap the empirical p is
+        # not a null test (draws center on τ̂), so check z directly.
+        assert np.isfinite(r.att) and np.isfinite(r.se)
+        assert r.se > 0
+        assert abs(r.att - true_att) < max(3 * r.se, 0.1 * true_att)
+        z = abs(r.att / r.se)
+        assert z > 3, (
+            f"Effect at Y~1e9 must be detectable by z-stat; att={r.att}, "
+            f"se={r.se}, z={z} (variance_method={variance_method})"
+        )
+        if variance_method != "bootstrap":
+            assert r.p_value < 0.05, (
+                f"Effect at Y~1e9 must reject null; p_value={r.p_value} "
+                f"(variance_method={variance_method})"
+            )