cajigaslab · iahncajigas · Mar 5, 2026 · Mar 5, 2026
diff --git a/tests/test_validation_parity_consistency.py b/tests/test_validation_parity_consistency.py
@@ -0,0 +1,54 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+
+
+def _strict_failure_topics(strict_summary: dict[str, object]) -> set[str]:
+    failures = strict_summary.get("failures", [])
+    out: set[str] = set()
+    if not isinstance(failures, list):
+        return out
+    for item in failures:
+        text = str(item).strip()
+        if not text:
+            continue
+        topic = text.split(":", 1)[0].strip()
+        if topic:
+            out.add(topic)
+    return out
+
+
+def _pdf_failure_topics(pdf_summary: dict[str, object]) -> set[str]:
+    rows = pdf_summary.get("topics", [])
+    out: set[str] = set()
+    if not isinstance(rows, list):
+        return out
+    for row in rows:
+        if not isinstance(row, dict):
+            continue
+        topic = str(row.get("topic", "")).strip()
+        parity_pass = row.get("parity_pass")
+        if topic and parity_pass is False:
+            out.add(topic)
+    return out
+
+
+def test_pdf_image_mode_parity_matches_strict_ordinal_summary_when_artifacts_present() -> None:
+    repo_root = Path(__file__).resolve().parents[1]
+    strict_path = repo_root / "output" / "pdf" / "image_mode_parity" / "summary_full.json"
+    pdf_path = repo_root / "output" / "pdf" / "validation_report_latest.json"
+
+    if not strict_path.exists() or not pdf_path.exists():
+        pytest.skip("Validation parity artifacts not present locally")
+
+    strict_summary = json.loads(strict_path.read_text(encoding="utf-8"))
+    pdf_summary = json.loads(pdf_path.read_text(encoding="utf-8"))
+
+    strict_topics = _strict_failure_topics(strict_summary)
+    pdf_topics = _pdf_failure_topics(pdf_summary)
+
+    assert strict_topics == pdf_topics
+    assert int(pdf_summary.get("parity_failures", -1)) == len(pdf_topics)
diff --git a/tools/reports/check_validation_visuals.py b/tools/reports/check_validation_visuals.py
@@ -19,6 +19,11 @@
 import yaml
 from PIL import Image
 
+DEFAULT_NO_FIGURE_TOPICS = {
+    "publish_all_helpfiles",
+    "FitResultReference",
+}
+
 
 def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(description=__doc__)
@@ -74,14 +79,15 @@ def _image_fingerprint(path: Path) -> str:
 
 def _load_no_figure_topics(manifest_path: Path) -> set[str]:
     if not manifest_path.exists():
-        return set()
+        return set(DEFAULT_NO_FIGURE_TOPICS)
     payload = yaml.safe_load(manifest_path.read_text(encoding="utf-8")) or {}
     rows = payload.get("topics", [])
     out: set[str] = set()
     for row in rows:
         expected_figs = int(row.get("expected_figure_count", 0) or 0)
         if bool(row.get("no_figure_utility", False)) or expected_figs <= 0:
             out.add(str(row.get("topic", "")).strip())
+    out.update(DEFAULT_NO_FIGURE_TOPICS)
     return {topic for topic in out if topic}
 
 

diff --git a/tools/reports/generate_validation_pdf.py b/tools/reports/generate_validation_pdf.py
@@ -17,6 +17,7 @@
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
+from datetime import timezone
 
 import nbformat
 import numpy as np
@@ -75,6 +76,17 @@ class NotebookReport:
     parity_metrics: dict[str, object] | None
 
 
+def _to_path(value: str | Path | None) -> Path | None:
+    if value is None:
+        return None
+    if isinstance(value, Path):
+        return value
+    text = str(value).strip()
+    if not text:
+        return None
+    return Path(text)
+
+
 def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument(
@@ -122,7 +134,7 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--parity-threshold",
         type=float,
-        default=0.80,
+        default=0.70,
         help="Minimum image similarity score in [0,1] for Python-vs-MATLAB pass.",
     )
     parser.add_argument(
@@ -164,6 +176,24 @@ def parse_args() -> argparse.Namespace:
         default=REPO_ROOT / "parity" / "line_by_line_review_report.json",
         help="Line-by-line review report JSON used for per-topic step alignment metrics.",
     )
+    parser.add_argument(
+        "--ordinal-parity-manifest",
+        type=Path,
+        default=REPO_ROOT / "parity" / "help_source_manifest.yml",
+        help="Manifest used by strict ordinal image parity checker.",
+    )
+    parser.add_argument(
+        "--python-image-root",
+        type=Path,
+        default=REPO_ROOT / "output" / "notebook_images",
+        help="Root with Python fig_### images for strict ordinal parity checks.",
+    )
+    parser.add_argument(
+        "--matlab-image-root",
+        type=Path,
+        default=REPO_ROOT / "output" / "matlab_help_images",
+        help="Root with MATLAB fig_### reference images for strict ordinal parity checks.",
+    )
     parser.add_argument(
         "--skip-command-tests",
         action="store_true",
@@ -700,6 +730,86 @@ def _cross_topic_duplicate_stats(reports: list[NotebookReport]) -> dict[str, int
     }
 
 
+def _run_strict_ordinal_parity(
+    repo_root: Path,
+    *,
+    manifest: Path,
+    python_image_root: Path,
+    matlab_image_root: Path,
+    threshold: float,
+    out_json: Path,
+    diff_root: Path,
+) -> dict[str, object]:
+    cmd = [
+        "python",
+        "tools/reports/check_helpfile_ordinal_image_parity.py",
+        "--manifest",
+        str(manifest),
+        "--python-image-root",
+        str(python_image_root),
+        "--matlab-image-root",
+        str(matlab_image_root),
+        "--ssim-threshold",
+        f"{threshold:.6f}",
+        "--out-json",
+        str(out_json),
+        "--diff-root",
+        str(diff_root),
+    ]
+    result = subprocess.run(cmd, cwd=repo_root, capture_output=True, text=True, check=False)
+    if out_json.exists():
+        return json.loads(out_json.read_text(encoding="utf-8"))
+    raise RuntimeError(
+        "Strict ordinal parity checker did not produce summary JSON. "
+        f"stdout={result.stdout[-400:]} stderr={result.stderr[-400:]}"
+    )
+
+
+def _apply_strict_ordinal_summary_to_reports(
+    reports: list[NotebookReport],
+    summary: dict[str, object],
+    *,
+    threshold: float,
+) -> None:
+    rows = summary.get("topics", [])
+    by_topic = {str(row.get("topic", "")).strip(): row for row in rows if isinstance(row, dict)}
+
+    for report in reports:
+        topic_row = by_topic.get(report.topic)
+        if topic_row is None:
+            report.parity_pass = None
+            report.similarity_score = None
+            report.matched_python_image = None
+            report.matched_matlab_image = None
+            continue
+
+        no_figure_utility = bool(topic_row.get("no_figure_utility", False))
+        pairs = topic_row.get("pairs", [])
+        if not isinstance(pairs, list):
+            pairs = []
+
+        if pairs:
+            scores = [float(pair.get("score", 0.0)) for pair in pairs]
+            report.similarity_score = min(scores)
+            first_pair = pairs[0]
+            report.matched_python_image = _to_path(first_pair.get("python_image"))
+            report.matched_matlab_image = _to_path(first_pair.get("matlab_image"))
+        else:
+            report.similarity_score = None
+            report.matched_python_image = None
+            report.matched_matlab_image = None
+
+        if no_figure_utility:
+            report.parity_pass = True
+            continue
+
+        produced = int(topic_row.get("produced_figures", 0) or 0)
+        reference = int(topic_row.get("reference_figures", 0) or 0)
+        counts_ok = produced == reference
+        pairs_ok = all(float(pair.get("score", 0.0)) >= threshold for pair in pairs)
+        report.parity_pass = bool(counts_ok and pairs_ok)
+
+
 def _draw_wrapped_lines(
     pdf: canvas.Canvas,
     x: float,
@@ -1385,7 +1495,10 @@ def generate_pdf_report(
     example_output_spec: Path,
     numeric_drift_report: Path,
     line_review_report: Path,
-) -> tuple[Path, list[NotebookReport], list[CommandResult], Path | None]:
+    ordinal_parity_manifest: Path,
+    python_image_root: Path,
+    matlab_image_root: Path,
+) -> tuple[Path, list[NotebookReport], list[CommandResult], Path | None, Path | None]:
     output_pdf.parent.mkdir(parents=True, exist_ok=True)
     tmp_dir.mkdir(parents=True, exist_ok=True)
 
@@ -1430,6 +1543,24 @@ def generate_pdf_report(
             )
         )
 
+    strict_ordinal_summary_path: Path | None = None
+    if parity_mode == "image" and not skip_parity_check:
+        strict_ordinal_summary_path = tmp_dir / "image_mode_parity" / "summary_pdf_mode.json"
+        strict_ordinal_summary = _run_strict_ordinal_parity(
+            repo_root=repo_root,
+            manifest=ordinal_parity_manifest,
+            python_image_root=python_image_root,
+            matlab_image_root=matlab_image_root,
+            threshold=parity_threshold,
+            out_json=strict_ordinal_summary_path,
+            diff_root=tmp_dir / "image_mode_parity" / "diffs_pdf_mode",
+        )
+        _apply_strict_ordinal_summary_to_reports(
+            reports=reports,
+            summary=strict_ordinal_summary,
+            threshold=parity_threshold,
+        )
+
     commit = (
         subprocess.run(["git", "rev-parse", "--short", "HEAD"], cwd=repo_root, capture_output=True, text=True)
         .stdout.strip()
@@ -1466,15 +1597,17 @@ def generate_pdf_report(
         draw_example_comparison_page(pdf=pdf, report=report, index=index, total=total)
 
     pdf.save()
-    return output_pdf, reports, command_results, resolved_matlab_help_root
+    return output_pdf, reports, command_results, resolved_matlab_help_root, strict_ordinal_summary_path
 
 
 def main() -> int:
     args = parse_args()
     stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     output_pdf = args.output_dir / f"nstat_python_validation_report_{stamp}.pdf"
+    output_json = args.output_dir / f"nstat_python_validation_report_{stamp}.json"
+    latest_json = args.output_dir / "validation_report_latest.json"
 
-    report_path, reports, command_results, matlab_help_root = generate_pdf_report(
+    report_path, reports, command_results, matlab_help_root, strict_ordinal_summary_path = generate_pdf_report(
         repo_root=args.repo_root,
         manifest_path=args.manifest,
         output_pdf=output_pdf,
@@ -1490,6 +1623,9 @@ def main() -> int:
         example_output_spec=args.example_output_spec,
         numeric_drift_report=args.numeric_drift_report,
         line_review_report=args.line_review_report,
+        ordinal_parity_manifest=args.ordinal_parity_manifest,
+        python_image_root=args.python_image_root,
+        matlab_image_root=args.matlab_image_root,
     )
 
     executed = sum(1 for report in reports if report.executed)
@@ -1521,6 +1657,37 @@ def main() -> int:
     print(f"Numeric drift topic results: checked={numeric_checked} failures={numeric_failures}")
     print(f"Command checks: total={len(command_results)} failed={command_failures}")
 
+    summary_payload = {
+        "generated_utc": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "report_pdf": str(report_path),
+        "parity_mode": args.parity_mode,
+        "parity_threshold": float(args.parity_threshold),
+        "strict_ordinal_summary": str(strict_ordinal_summary_path) if strict_ordinal_summary_path else None,
+        "notebooks_total": len(reports),
+        "notebooks_executed": executed,
+        "notebooks_failed": exec_failures,
+        "parity_checked": parity_checked,
+        "parity_failures": parity_failures,
+        "command_checks_total": len(command_results),
+        "command_checks_failed": command_failures,
+        "topics": [
+            {
+                "topic": report.topic,
+                "executed": report.executed,
+                "parity_pass": report.parity_pass,
+                "similarity_score": report.similarity_score,
+                "image_count": report.image_count,
+                "unique_image_count": report.unique_image_count,
+                "matched_python_image": str(report.matched_python_image) if report.matched_python_image else None,
+                "matched_matlab_image": str(report.matched_matlab_image) if report.matched_matlab_image else None,
+            }
+            for report in reports
+        ],
+    }
+    output_json.parent.mkdir(parents=True, exist_ok=True)
+    output_json.write_text(json.dumps(summary_payload, indent=2), encoding="utf-8")
+    latest_json.write_text(json.dumps(summary_payload, indent=2), encoding="utf-8")
+
     return 0 if exec_failures == 0 and command_failures == 0 and parity_failures == 0 else 1