Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 79 additions & 6 deletions diff_diff/business_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -1854,6 +1854,48 @@ def _significance_phrase(p: Optional[float], alpha: float) -> str:
return "the confidence interval includes zero; the data are consistent with no effect"


def _smallest_failing_grid_m(sens: Dict[str, Any]) -> Optional[float]:
"""If the smallest evaluated M on the HonestDiD sensitivity grid
already has the robust CI including zero, return that M. Returns
``None`` when the grid is missing or when the smallest evaluated
point is still robust — in the latter case ``breakdown_M`` is an
interpolated threshold between grid points, not a statement about
the smallest grid point itself.

Matches the twin helper in ``diagnostic_report.py``; keep the two
in sync for cross-surface parity.
"""
grid_points = sens.get("grid") or []
sorted_grid = sorted(
(p for p in grid_points if isinstance(p.get("M"), (int, float))),
key=lambda p: p["M"],
)
if not sorted_grid:
return None
smallest = sorted_grid[0]
if not smallest.get("robust_to_zero", True):
return float(smallest["M"])
return None


def _sentence_first_upper(text: str) -> str:
"""Uppercase only the first character of ``text``, preserving all
other casing. Unlike ``str.capitalize()``, which lowercases every
character after the first, this keeps user-supplied abbreviations
and proper nouns intact.

Examples
--------
>>> _sentence_first_upper("the NJ minimum-wage increase")
'The NJ minimum-wage increase'
>>> _sentence_first_upper("Castle Doctrine law adoption")
'Castle Doctrine law adoption'
"""
if not text:
return text
return text[0].upper() + text[1:]


def _direction_verb(effect: float, outcome_direction: Optional[str]) -> str:
"""Return a direction-aware verb for the headline sentence.

Expand Down Expand Up @@ -1929,7 +1971,16 @@ def _render_headline_sentence(schema: Dict[str, Any]) -> str:
# is not actually available.
ci_str = " (inference unavailable: confidence interval is undefined for this fit)"
by_clause = f" by {magnitude}" if effect != 0 else ""
return f"{treatment.capitalize()} {verb} {outcome}{by_clause}{ci_str}."
# Round-1 BR/DR canonical-validation (2026-04-19): Python's
# ``str.capitalize()`` lowercases everything except the first
# character, so ``"the NJ minimum-wage increase".capitalize()``
# returns ``"The nj minimum-wage increase"`` — flattening the
# ``NJ`` abbreviation. Real canonical datasets (Card-Krueger,
# Castle Doctrine) carry proper-noun / acronym tokens in the
# user-supplied ``treatment_label``, so preserve user casing and
# only ensure the first character is uppercase.
treatment_sentence = _sentence_first_upper(treatment)
return f"{treatment_sentence} {verb} {outcome}{by_clause}{ci_str}."


def _render_summary(schema: Dict[str, Any]) -> str:
Expand Down Expand Up @@ -2088,11 +2139,33 @@ def _render_summary(schema: Dict[str, Any]) -> str:
f"pre-period variation."
)
elif isinstance(bkd, (int, float)):
sentences.append(
f"HonestDiD: the result is fragile — the confidence interval "
f"includes zero once violations reach {bkd:.2g}x the "
f"pre-period variation."
)
# Round-1 BR/DR canonical-validation (2026-04-19) then
# tightened per CI review on PR #341 R1:
# ``breakdown_M`` is the smallest M at which the robust
# CI includes zero (interpolated between grid points) —
# not a claim about any specific grid point. Earlier fix
# keyed off ``bkd <= 0.05`` which incorrectly asserted
# "smallest grid point fails" even for grids that start
# at M=0 where the smallest evaluated point is still
# robust (e.g., grid=[0, 0.25, ...] with bkd=0.03). The
# "smallest grid point" wording is only accurate when
# the smallest evaluated M on the grid itself fails
# (``robust_to_zero == False``); otherwise fall through
# to the numeric multiplier.
smallest_failed_m = _smallest_failing_grid_m(sens)
if smallest_failed_m is not None:
sentences.append(
"HonestDiD: the result is fragile — the confidence "
"interval includes zero even at the smallest M "
f"evaluated on the sensitivity grid (M = "
f"{smallest_failed_m:.2g})."
)
else:
sentences.append(
f"HonestDiD: the result is fragile — the confidence "
f"interval includes zero once violations reach {bkd:.2g}x "
f"the pre-period variation."
)

# Sample sentence. For fits with a dynamic comparison set (CS /
# ContinuousDiD / StaggeredTripleDiff / EfficientDiD /
Expand Down
60 changes: 53 additions & 7 deletions diff_diff/diagnostic_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -2780,6 +2780,32 @@ def _collect_pre_period_coefs(
return results_list, n_dropped_undefined


def _smallest_failing_grid_m_dr(sens: Dict[str, Any]) -> Optional[float]:
"""Return the smallest evaluated M on the HonestDiD sensitivity
grid if it already has the robust CI including zero, else ``None``.
Matches ``business_report._smallest_failing_grid_m`` — both helpers
must stay in sync for cross-surface parity. See PR #341 R1 review.

``breakdown_M`` is an interpolated threshold between grid points,
so "the smallest grid point fails" is only a valid claim when the
smallest actually-evaluated M has ``robust_to_zero == False``. On
a grid that starts at M=0 where the smallest evaluated point is
still robust, the breakdown value is information about what
happens between grid points — not at the smallest grid point.
"""
grid_points = sens.get("grid") or []
sorted_grid = sorted(
(p for p in grid_points if isinstance(p.get("M"), (int, float))),
key=lambda p: p["M"],
)
if not sorted_grid:
return None
smallest = sorted_grid[0]
if not smallest.get("robust_to_zero", True):
return float(smallest["M"])
return None


def _pt_verdict(p: Optional[float]) -> str:
"""Map a pre-trends joint p-value to the three-bin verdict enum.

Expand Down Expand Up @@ -3118,13 +3144,33 @@ def _render_overall_interpretation(schema: Dict[str, Any], labels: Dict[str, str
f"pre-period variation."
)
else:
sentences.append(
f"HonestDiD sensitivity: the result is fragile — the "
f"confidence interval includes zero once violations reach "
f"{bkd:.2g}x the pre-period variation."
if isinstance(bkd, (int, float))
else ""
)
# Round-1 BR/DR canonical-validation (2026-04-19) then
# tightened per CI review on PR #341 R1: the "smallest
# grid point" wording is only semantically correct when
# the smallest M actually evaluated on the sensitivity
# grid has ``robust_to_zero == False``. ``breakdown_M``
# is the interpolated threshold between grid points, so
# a small breakdown value on a grid starting at M=0
# (where the smallest evaluated point is still robust)
# would previously have been narrated as "smallest grid
# point fails" — stronger than the evaluated grid
# supports. Mirror BR's fix: check the grid directly.
if isinstance(bkd, (int, float)):
smallest_failed_m = _smallest_failing_grid_m_dr(sens)
if smallest_failed_m is not None:
sentences.append(
"HonestDiD sensitivity: the result is fragile — "
"the confidence interval includes zero even at "
"the smallest M evaluated on the sensitivity "
f"grid (M = {smallest_failed_m:.2g})."
)
else:
sentences.append(
f"HonestDiD sensitivity: the result is fragile — "
f"the confidence interval includes zero once "
f"violations reach {bkd:.2g}x the pre-period "
f"variation."
)

# Sentence 4: one secondary caveat if present.
bacon = schema.get("bacon") or {}
Expand Down
Loading
Loading