igerber · igerber · Apr 20, 2026 · Apr 19, 2026 · Apr 19, 2026 · Apr 20, 2026
diff --git a/diff_diff/business_report.py b/diff_diff/business_report.py
@@ -1854,6 +1854,48 @@ def _significance_phrase(p: Optional[float], alpha: float) -> str:
     return "the confidence interval includes zero; the data are consistent with no effect"
 
 
+def _smallest_failing_grid_m(sens: Dict[str, Any]) -> Optional[float]:
+    """If the smallest evaluated M on the HonestDiD sensitivity grid
+    already has the robust CI including zero, return that M. Returns
+    ``None`` when the grid is missing or when the smallest evaluated
+    point is still robust — in the latter case ``breakdown_M`` is an
+    interpolated threshold between grid points, not a statement about
+    the smallest grid point itself.
+
+    Matches the twin helper in ``diagnostic_report.py``; keep the two
+    in sync for cross-surface parity.
+    """
+    grid_points = sens.get("grid") or []
+    sorted_grid = sorted(
+        (p for p in grid_points if isinstance(p.get("M"), (int, float))),
+        key=lambda p: p["M"],
+    )
+    if not sorted_grid:
+        return None
+    smallest = sorted_grid[0]
+    if not smallest.get("robust_to_zero", True):
+        return float(smallest["M"])
+    return None
+
+
+def _sentence_first_upper(text: str) -> str:
+    """Uppercase only the first character of ``text``, preserving all
+    other casing. Unlike ``str.capitalize()``, which lowercases every
+    character after the first, this keeps user-supplied abbreviations
+    and proper nouns intact.
+
+    Examples
+    --------
+    >>> _sentence_first_upper("the NJ minimum-wage increase")
+    'The NJ minimum-wage increase'
+    >>> _sentence_first_upper("Castle Doctrine law adoption")
+    'Castle Doctrine law adoption'
+    """
+    if not text:
+        return text
+    return text[0].upper() + text[1:]
+
+
 def _direction_verb(effect: float, outcome_direction: Optional[str]) -> str:
     """Return a direction-aware verb for the headline sentence.
 
@@ -1929,7 +1971,16 @@ def _render_headline_sentence(schema: Dict[str, Any]) -> str:
         # is not actually available.
         ci_str = " (inference unavailable: confidence interval is undefined for this fit)"
     by_clause = f" by {magnitude}" if effect != 0 else ""
-    return f"{treatment.capitalize()} {verb} {outcome}{by_clause}{ci_str}."
+    # Round-1 BR/DR canonical-validation (2026-04-19): Python's
+    # ``str.capitalize()`` lowercases everything except the first
+    # character, so ``"the NJ minimum-wage increase".capitalize()``
+    # returns ``"The nj minimum-wage increase"`` — flattening the
+    # ``NJ`` abbreviation. Real canonical datasets (Card-Krueger,
+    # Castle Doctrine) carry proper-noun / acronym tokens in the
+    # user-supplied ``treatment_label``, so preserve user casing and
+    # only ensure the first character is uppercase.
+    treatment_sentence = _sentence_first_upper(treatment)
+    return f"{treatment_sentence} {verb} {outcome}{by_clause}{ci_str}."
 
 
 def _render_summary(schema: Dict[str, Any]) -> str:
@@ -2088,11 +2139,33 @@ def _render_summary(schema: Dict[str, Any]) -> str:
                 f"pre-period variation."
             )
         elif isinstance(bkd, (int, float)):
-            sentences.append(
-                f"HonestDiD: the result is fragile — the confidence interval "
-                f"includes zero once violations reach {bkd:.2g}x the "
-                f"pre-period variation."
-            )
+            # Round-1 BR/DR canonical-validation (2026-04-19) then
+            # tightened per CI review on PR #341 R1:
+            # ``breakdown_M`` is the smallest M at which the robust
+            # CI includes zero (interpolated between grid points) —
+            # not a claim about any specific grid point. Earlier fix
+            # keyed off ``bkd <= 0.05`` which incorrectly asserted
+            # "smallest grid point fails" even for grids that start
+            # at M=0 where the smallest evaluated point is still
+            # robust (e.g., grid=[0, 0.25, ...] with bkd=0.03). The
+            # "smallest grid point" wording is only accurate when
+            # the smallest evaluated M on the grid itself fails
+            # (``robust_to_zero == False``); otherwise fall through
+            # to the numeric multiplier.
+            smallest_failed_m = _smallest_failing_grid_m(sens)
+            if smallest_failed_m is not None:
+                sentences.append(
+                    "HonestDiD: the result is fragile — the confidence "
+                    "interval includes zero even at the smallest M "
+                    f"evaluated on the sensitivity grid (M = "
+                    f"{smallest_failed_m:.2g})."
+                )
+            else:
+                sentences.append(
+                    f"HonestDiD: the result is fragile — the confidence "
+                    f"interval includes zero once violations reach {bkd:.2g}x "
+                    f"the pre-period variation."
+                )
 
     # Sample sentence. For fits with a dynamic comparison set (CS /
     # ContinuousDiD / StaggeredTripleDiff / EfficientDiD /

diff --git a/diff_diff/diagnostic_report.py b/diff_diff/diagnostic_report.py
@@ -2780,6 +2780,32 @@ def _collect_pre_period_coefs(
     return results_list, n_dropped_undefined
 
 
+def _smallest_failing_grid_m_dr(sens: Dict[str, Any]) -> Optional[float]:
+    """Return the smallest evaluated M on the HonestDiD sensitivity
+    grid if it already has the robust CI including zero, else ``None``.
+    Matches ``business_report._smallest_failing_grid_m`` — both helpers
+    must stay in sync for cross-surface parity. See PR #341 R1 review.
+
+    ``breakdown_M`` is an interpolated threshold between grid points,
+    so "the smallest grid point fails" is only a valid claim when the
+    smallest actually-evaluated M has ``robust_to_zero == False``. On
+    a grid that starts at M=0 where the smallest evaluated point is
+    still robust, the breakdown value is information about what
+    happens between grid points — not at the smallest grid point.
+    """
+    grid_points = sens.get("grid") or []
+    sorted_grid = sorted(
+        (p for p in grid_points if isinstance(p.get("M"), (int, float))),
+        key=lambda p: p["M"],
+    )
+    if not sorted_grid:
+        return None
+    smallest = sorted_grid[0]
+    if not smallest.get("robust_to_zero", True):
+        return float(smallest["M"])
+    return None
+
+
 def _pt_verdict(p: Optional[float]) -> str:
     """Map a pre-trends joint p-value to the three-bin verdict enum.
 
@@ -3118,13 +3144,33 @@ def _render_overall_interpretation(schema: Dict[str, Any], labels: Dict[str, str
                 f"pre-period variation."
             )
         else:
-            sentences.append(
-                f"HonestDiD sensitivity: the result is fragile — the "
-                f"confidence interval includes zero once violations reach "
-                f"{bkd:.2g}x the pre-period variation."
-                if isinstance(bkd, (int, float))
-                else ""
-            )
+            # Round-1 BR/DR canonical-validation (2026-04-19) then
+            # tightened per CI review on PR #341 R1: the "smallest
+            # grid point" wording is only semantically correct when
+            # the smallest M actually evaluated on the sensitivity
+            # grid has ``robust_to_zero == False``. ``breakdown_M``
+            # is the interpolated threshold between grid points, so
+            # a small breakdown value on a grid starting at M=0
+            # (where the smallest evaluated point is still robust)
+            # would previously have been narrated as "smallest grid
+            # point fails" — stronger than the evaluated grid
+            # supports. Mirror BR's fix: check the grid directly.
+            if isinstance(bkd, (int, float)):
+                smallest_failed_m = _smallest_failing_grid_m_dr(sens)
+                if smallest_failed_m is not None:
+                    sentences.append(
+                        "HonestDiD sensitivity: the result is fragile — "
+                        "the confidence interval includes zero even at "
+                        "the smallest M evaluated on the sensitivity "
+                        f"grid (M = {smallest_failed_m:.2g})."
+                    )
+                else:
+                    sentences.append(
+                        f"HonestDiD sensitivity: the result is fragile — "
+                        f"the confidence interval includes zero once "
+                        f"violations reach {bkd:.2g}x the pre-period "
+                        f"variation."
+                    )
 
     # Sentence 4: one secondary caveat if present.
     bacon = schema.get("bacon") or {}