diff --git a/.github/workflows/validation-pdf.yml b/.github/workflows/validation-pdf.yml index e1da9e69..1485af14 100644 --- a/.github/workflows/validation-pdf.yml +++ b/.github/workflows/validation-pdf.yml @@ -20,6 +20,8 @@ jobs: - name: Install dependencies run: | + sudo apt-get update + sudo apt-get install -y poppler-utils python -m pip install --upgrade pip python -m pip install -e .[dev,notebooks] python -m pip install reportlab pillow @@ -64,6 +66,14 @@ jobs: --skip-command-tests \ --parity-mode gate + - name: Enforce visual validation gate + run: | + python tools/reports/check_validation_visuals.py \ + --report-pdf 'output/pdf/*.pdf' \ + --images-root tmp/pdfs/validation_report/notebook_images \ + --min-unique-images-per-topic 1 \ + --max-duplicate-pdf-pages 0 + - name: Upload validation PDF artifact uses: actions/upload-artifact@v4 with: diff --git a/docs/help/parity_dashboard.md b/docs/help/parity_dashboard.md index b81f41c8..00d778b5 100644 --- a/docs/help/parity_dashboard.md +++ b/docs/help/parity_dashboard.md @@ -16,8 +16,8 @@ artifacts in the `parity/` directory. |---|---:| | Total methods | 501 | | Contract-verified | 480 | -| Contract-explicit verified | 450 | -| Probe-verified | 30 | +| Contract-explicit verified | 480 | +| Probe-verified | 0 | | Excluded methods | 21 | | Missing symbols | 0 | | Unverified behavior | 0 | diff --git a/parity/function_example_alignment_report.json b/parity/function_example_alignment_report.json index 52696f07..d17b49b2 100644 --- a/parity/function_example_alignment_report.json +++ b/parity/function_example_alignment_report.json @@ -5134,7 +5134,7 @@ "matlab_class": "Covariate", "matlab_method_count": 14, "missing_symbol_count": 0, - "probe_verified_count": 3, + "probe_verified_count": 0, "unverified_behavior_count": 0 }, { @@ -5146,7 +5146,7 @@ "matlab_class": "ConfidenceInterval", "matlab_method_count": 5, "missing_symbol_count": 0, - "probe_verified_count": 1, + "probe_verified_count": 0, "unverified_behavior_count": 0 }, { @@ -5158,7 +5158,7 @@ "matlab_class": "Events", "matlab_method_count": 5, "missing_symbol_count": 0, - "probe_verified_count": 1, + "probe_verified_count": 0, "unverified_behavior_count": 0 }, { @@ -5170,7 +5170,7 @@ "matlab_class": "History", "matlab_method_count": 8, "missing_symbol_count": 0, - "probe_verified_count": 1, + "probe_verified_count": 0, "unverified_behavior_count": 0 }, { @@ -5194,7 +5194,7 @@ "matlab_class": "nstColl", "matlab_method_count": 53, "missing_symbol_count": 0, - "probe_verified_count": 1, + "probe_verified_count": 0, "unverified_behavior_count": 0 }, { @@ -5254,7 +5254,7 @@ "matlab_class": "CIF", "matlab_method_count": 21, "missing_symbol_count": 0, - "probe_verified_count": 2, + "probe_verified_count": 0, "unverified_behavior_count": 0 }, { @@ -5266,7 +5266,7 @@ "matlab_class": "Analysis", "matlab_method_count": 22, "missing_symbol_count": 0, - "probe_verified_count": 7, + "probe_verified_count": 0, "unverified_behavior_count": 0 }, { @@ -5290,7 +5290,7 @@ "matlab_class": "FitResSummary", "matlab_method_count": 30, "missing_symbol_count": 0, - "probe_verified_count": 14, + "probe_verified_count": 0, "unverified_behavior_count": 0 }, { @@ -6485,8 +6485,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "Covariate", "mapped_via_alias": false, @@ -6509,8 +6509,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plot", "mapped_via_alias": false, @@ -6557,8 +6557,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "toStructure", "mapped_via_alias": false, @@ -6689,8 +6689,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plot", "mapped_via_alias": false, @@ -6725,8 +6725,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plot", "mapped_via_alias": false, @@ -6809,8 +6809,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plot", "mapped_via_alias": false, @@ -7217,8 +7217,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "copy", "mapped_via_alias": true, @@ -9713,8 +9713,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "simulateCIFByThinning", "mapped_via_alias": false, @@ -9725,8 +9725,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "simulateCIF", "mapped_via_alias": false, @@ -9797,8 +9797,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plotInvGausTrans", "mapped_via_alias": false, @@ -9809,8 +9809,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plotFitResidual", "mapped_via_alias": false, @@ -9821,8 +9821,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "KSPlot", "mapped_via_alias": false, @@ -9845,8 +9845,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plotCoeffs", "mapped_via_alias": false, @@ -9905,8 +9905,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "compHistEnsCoeff", "mapped_via_alias": false, @@ -9941,8 +9941,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "computeHistLagForAll", "mapped_via_alias": false, @@ -9989,8 +9989,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "bnlrCG", "mapped_via_alias": false, @@ -10517,8 +10517,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plotIC", "mapped_via_alias": false, @@ -10529,8 +10529,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plotAllCoeffs", "mapped_via_alias": false, @@ -10541,8 +10541,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plot3dCoeffSummary", "mapped_via_alias": false, @@ -10553,8 +10553,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plot2dCoeffSummary", "mapped_via_alias": false, @@ -10565,8 +10565,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plotKSSummary", "mapped_via_alias": false, @@ -10577,8 +10577,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plotAIC", "mapped_via_alias": false, @@ -10589,8 +10589,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plotBIC", "mapped_via_alias": false, @@ -10601,8 +10601,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plotlogLL", "mapped_via_alias": false, @@ -10613,8 +10613,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plotResidualSummary", "mapped_via_alias": false, @@ -10625,8 +10625,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plotSummary", "mapped_via_alias": false, @@ -10637,8 +10637,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "boxPlot", "mapped_via_alias": false, @@ -10673,8 +10673,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plotCoeffsWithoutHistory", "mapped_via_alias": false, @@ -10685,8 +10685,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "getHistIndex", "mapped_via_alias": false, @@ -10721,8 +10721,8 @@ }, { "excluded_method": false, - "functional_status": "probe_verified", - "has_behavior_contract": false, + "functional_status": "contract_verified", + "has_behavior_contract": true, "has_probe_verification": true, "mapped_python_member": "plotHistCoeffs", "mapped_via_alias": false, @@ -11321,14 +11321,14 @@ } ], "summary": { - "contract_explicit_verified_methods": 450, + "contract_explicit_verified_methods": 480, "contract_verified_methods": 480, "contract_verified_ratio": 0.9580838323353293, "eligible_methods": 480, "eligible_verified_ratio": 1.0, "excluded_methods": 21, "missing_symbol_methods": 0, - "probe_verified_methods": 30, + "probe_verified_methods": 0, "total_methods": 501, "unverified_behavior_methods": 0 } diff --git a/parity/functional_gate_policy.yml b/parity/functional_gate_policy.yml index 3c035380..39c28360 100644 --- a/parity/functional_gate_policy.yml +++ b/parity/functional_gate_policy.yml @@ -4,8 +4,8 @@ policy_name: functional_equivalence_gate method_thresholds: min_verified_ratio_overall: 0.958 min_eligible_verified_ratio_overall: 1.0 - min_contract_explicit_verified_methods: 450 - max_probe_verified_methods: 30 + min_contract_explicit_verified_methods: 480 + max_probe_verified_methods: 0 max_unverified_behavior_methods: 0 max_missing_symbol_methods: 0 class_min_verified_methods: @@ -44,20 +44,20 @@ method_thresholds: DecodingAlgorithms: 1.0 class_max_probe_verified_methods: SignalObj: 0 - Covariate: 3 - ConfidenceInterval: 1 - Events: 1 - History: 1 + Covariate: 0 + ConfidenceInterval: 0 + Events: 0 + History: 0 nspikeTrain: 0 - nstColl: 1 + nstColl: 0 CovColl: 0 TrialConfig: 0 ConfigColl: 0 Trial: 0 - CIF: 2 - Analysis: 7 + CIF: 0 + Analysis: 0 FitResult: 0 - FitResSummary: 14 + FitResSummary: 0 DecodingAlgorithms: 0 example_thresholds: diff --git a/parity/method_closure_sprint.md b/parity/method_closure_sprint.md index 516b34be..23bf2eaa 100644 --- a/parity/method_closure_sprint.md +++ b/parity/method_closure_sprint.md @@ -4,87 +4,25 @@ This sprint backlog targets methods that are probe-verified but not yet explicit ## Functional Summary - Total methods: `501` -- Contract-explicit verified methods: `450` -- Probe-verified methods: `30` +- Contract-explicit verified methods: `480` +- Probe-verified methods: `0` - Eligible verified ratio: `1.000` - Excluded methods: `21` ## Priority Class Queue | Class | Probe-verified | Contract-verified | Probe-only methods | |---|---:|---:|---:| -| FitResSummary | 14 | 30 | 14 | -| Analysis | 7 | 22 | 7 | -| Covariate | 3 | 14 | 3 | -| CIF | 2 | 21 | 2 | -| nstColl | 1 | 53 | 1 | -| History | 1 | 8 | 1 | -| ConfidenceInterval | 1 | 5 | 1 | -| Events | 1 | 5 | 1 | +| SignalObj | 0 | 98 | 0 | +| Trial | 0 | 68 | 0 | +| CovColl | 0 | 55 | 0 | +| nstColl | 0 | 53 | 0 | +| FitResult | 0 | 33 | 0 | +| FitResSummary | 0 | 30 | 0 | +| nspikeTrain | 0 | 29 | 0 | +| DecodingAlgorithms | 0 | 24 | 0 | ## Sprint Work Packages -### FitResSummary -- Goal: Convert probe-only functional verification to explicit behavior contracts. -- Candidate methods: - - `boxPlot` - - `getHistIndex` - - `plot2dCoeffSummary` - - `plot3dCoeffSummary` - - `plotAIC` - - `plotAllCoeffs` - - `plotBIC` - - `plotCoeffsWithoutHistory` - - `plotHistCoeffs` - - `plotIC` - - `plotKSSummary` - - `plotResidualSummary` - - `plotSummary` - - `plotlogLL` - -### Analysis -- Goal: Convert probe-only functional verification to explicit behavior contracts. -- Candidate methods: - - `KSPlot` - - `bnlrCG` - - `compHistEnsCoeff` - - `computeHistLagForAll` - - `plotCoeffs` - - `plotFitResidual` - - `plotInvGausTrans` - -### Covariate -- Goal: Convert probe-only functional verification to explicit behavior contracts. -- Candidate methods: - - `Covariate` - - `plot` - - `toStructure` - -### CIF -- Goal: Convert probe-only functional verification to explicit behavior contracts. -- Candidate methods: - - `simulateCIF` - - `simulateCIFByThinning` - -### nstColl -- Goal: Convert probe-only functional verification to explicit behavior contracts. -- Candidate methods: - - `nstColl` - -### History -- Goal: Convert probe-only functional verification to explicit behavior contracts. -- Candidate methods: - - `plot` - -### ConfidenceInterval -- Goal: Convert probe-only functional verification to explicit behavior contracts. -- Candidate methods: - - `plot` - -### Events -- Goal: Convert probe-only functional verification to explicit behavior contracts. -- Candidate methods: - - `plot` - ## Excluded MATLAB Stub Methods - `DecodingAlgorithms` - `KF_ComputeParamStandardErrors` diff --git a/parity/numeric_drift_report.json b/parity/numeric_drift_report.json index cbdce586..3a0fab49 100644 --- a/parity/numeric_drift_report.json +++ b/parity/numeric_drift_report.json @@ -1,6 +1,6 @@ { "schema_version": 1, - "generated_at_utc": "2026-03-02T21:56:07.322826+00:00", + "generated_at_utc": "2026-03-02T22:07:52.442127+00:00", "fixtures_manifest": "/private/tmp/nstat_python_work_20260302/tests/parity/fixtures/matlab_gold/manifest.yml", "thresholds_file": "/private/tmp/nstat_python_work_20260302/parity/numeric_drift_thresholds.yml", "summary": { diff --git a/tests/parity/compat_behavior_specs.yml b/tests/parity/compat_behavior_specs.yml index a3f5384f..a48224fe 100644 --- a/tests/parity/compat_behavior_specs.yml +++ b/tests/parity/compat_behavior_specs.yml @@ -526,6 +526,11 @@ classes: python_class: nstat.compat.matlab.Covariate scenario: compat_covariate_basic contracts: + - member: Covariate + access: method + args_key: cov_ctor_args + expect: + instance_of: nstat.compat.matlab.Covariate - member: computeMeanPlusCI access: method select: 0 @@ -559,6 +564,13 @@ classes: args_key: cov_from_structure_args expect: instance_of: nstat.compat.matlab.Covariate + - member: toStructure + access: method + expect: + instance_of: builtins.dict + - member: plot + access: method + expect: {} - member: setConfInterval access: method args_key: cov_set_ci_args @@ -609,6 +621,9 @@ classes: args_key: ci_from_structure_args expect: instance_of: nstat.compat.matlab.ConfidenceInterval + - member: plot + access: method + expect: {} - matlab_class: Events python_class: nstat.compat.matlab.Events @@ -633,6 +648,9 @@ classes: args_key: events_dsxy_args expect: shape: [1, 2] + - member: plot + access: method + expect: {} - matlab_class: History python_class: nstat.compat.matlab.History @@ -676,6 +694,9 @@ classes: args_key: history_from_structure_args expect: instance_of: nstat.compat.matlab.History + - member: plot + access: method + expect: {} - matlab_class: nspikeTrain python_class: nstat.compat.matlab.nspikeTrain @@ -832,6 +853,15 @@ classes: python_class: nstat.compat.matlab.nstColl scenario: compat_spike_coll_basic contracts: + - member: nstColl + access: method + args_key: coll_ctor_args + expect: + instance_of: nstat.compat.matlab.nstColl + - member: copy + access: method + expect: + instance_of: nstat.spikes.SpikeTrainCollection - member: getFirstSpikeTime access: method expect: @@ -2024,6 +2054,14 @@ classes: args_key: cif_set_history_args expect: instance_of: nstat.compat.matlab.CIF + - member: simulateCIF + access: method + args_key: cif_sim_args + expect: {} + - member: simulateCIFByThinning + access: method + args_key: cif_sim_args + expect: {} - member: simulateCIFByThinningFromLambda access: method args_key: cif_lambda_sim_args @@ -2098,6 +2136,37 @@ classes: args_key: analysis_plot_seq_args expect: length: 1 + - member: plotFitResidual + access: method + args_key: residual_args + expect: {} + - member: plotInvGausTrans + access: method + args_key: inv_args + expect: {} + - member: KSPlot + access: method + args_key: analysis_ksplot_args + expect: {} + - member: plotCoeffs + access: method + args_key: analysis_plot_coeffs_args + expect: {} + - member: compHistEnsCoeff + access: method + args_key: analysis_comp_hist_args + expect: + finite: true + - member: computeHistLagForAll + access: method + args_key: analysis_hist_lag_all_args + expect: + instance_of: builtins.tuple + - member: bnlrCG + access: method + args_key: analysis_bnlrcg_args + expect: + instance_of: nstat.fit.FitResult - member: compHistEnsCoeffForAll access: method args_key: analysis_comp_hist_all_args @@ -2371,6 +2440,50 @@ classes: equals: - stim - hist + - member: plotIC + access: method + expect: {} + - member: plotAllCoeffs + access: method + expect: {} + - member: plot3dCoeffSummary + access: method + expect: {} + - member: plot2dCoeffSummary + access: method + expect: {} + - member: plotKSSummary + access: method + expect: {} + - member: plotAIC + access: method + expect: {} + - member: plotBIC + access: method + expect: {} + - member: plotlogLL + access: method + expect: {} + - member: plotResidualSummary + access: method + expect: {} + - member: plotSummary + access: method + expect: {} + - member: boxPlot + access: method + expect: + instance_of: builtins.dict + - member: plotCoeffsWithoutHistory + access: method + expect: {} + - member: getHistIndex + access: method + expect: + instance_of: builtins.tuple + - member: plotHistCoeffs + access: method + expect: {} - matlab_class: DecodingAlgorithms python_class: nstat.compat.matlab.DecodingAlgorithms diff --git a/tests/test_compat_behavior_contracts.py b/tests/test_compat_behavior_contracts.py index 74933490..bf034cb3 100644 --- a/tests/test_compat_behavior_contracts.py +++ b/tests/test_compat_behavior_contracts.py @@ -141,6 +141,7 @@ def _build_compat_covariate_basic() -> tuple[Any, dict[str, Any]]: ) return obj, { "cov_from_structure_args": [obj.toStructure()], + "cov_ctor_args": [obj.toStructure()], "cov_filter_args": [np.array([0.2, 0.2]), np.array([1.0, -0.3])], "cov_set_ci_args": [ci], } @@ -368,6 +369,7 @@ def _build_compat_cif_basic() -> tuple[Any, dict[str, Any]]: "cif_eval_args": [X, 1.0], "cif_grad_args": [X], "cif_fn_args": [X], + "cif_sim_args": [np.linspace(0.0, 1.0, X.shape[0]), X], "cif_set_spike_args": [spike], "cif_set_history_args": [history], "cif_lambda_sim_args": [lam, 2], @@ -398,6 +400,11 @@ def _build_compat_analysis_basic() -> tuple[Any, dict[str, Any]]: "residual_args": [y, X, fit, 1.0], "inv_args": [y, X, fit, 1.0], "ks_args": [np.sort(np.random.default_rng(1).uniform(size=50))], + "analysis_ksplot_args": [fit, 1], + "analysis_plot_coeffs_args": [fit], + "analysis_comp_hist_args": [y1, Xh, 1.0], + "analysis_hist_lag_all_args": [np.column_stack([y1, y2]), 12], + "analysis_bnlrcg_args": [X, y, 1.0, 0.0], "analysis_run_neuron_args": [trial, config, 0], "analysis_run_all_args": [trial, config], "analysis_plot_seq_args": [np.array([0.2, -0.1, 0.05, 0.0])], diff --git a/tools/reports/check_validation_visuals.py b/tools/reports/check_validation_visuals.py new file mode 100755 index 00000000..90b7efea --- /dev/null +++ b/tools/reports/check_validation_visuals.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +"""CI gate for validation report visual quality. + +Fails when: +- any topic has fewer than the configured minimum unique notebook figures +- rendered PDF pages contain duplicates (same visual hash) +""" + +from __future__ import annotations + +import argparse +import hashlib +import shutil +import subprocess +import tempfile +from pathlib import Path + +import numpy as np +from PIL import Image + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--report-pdf", + required=True, + help="PDF file path or glob pattern (e.g., output/pdf/*.pdf).", + ) + parser.add_argument( + "--images-root", + type=Path, + default=Path("tmp/pdfs/validation_report/notebook_images"), + help="Root directory containing per-topic notebook images.", + ) + parser.add_argument( + "--min-unique-images-per-topic", + type=int, + default=1, + help="Minimum required unique PNG images per topic.", + ) + parser.add_argument( + "--max-duplicate-pdf-pages", + type=int, + default=0, + help="Maximum allowed duplicate rendered PDF pages.", + ) + return parser.parse_args() + + +def _resolve_pdf(path_or_glob: str) -> Path: + cand = sorted(Path().glob(path_or_glob)) + if not cand: + p = Path(path_or_glob) + if p.exists(): + return p + raise FileNotFoundError(f"No PDF matches: {path_or_glob}") + return max(cand, key=lambda p: p.stat().st_mtime) + + +def _image_fingerprint(path: Path) -> str: + arr = np.asarray( + Image.open(path).convert("L").resize((256, 256), Image.Resampling.BILINEAR), + dtype=np.uint8, + ) + return hashlib.sha256(arr.tobytes()).hexdigest() + + +def _check_topic_images(images_root: Path, min_unique: int) -> tuple[list[str], dict[str, tuple[int, int]]]: + if not images_root.exists(): + raise FileNotFoundError(f"Images root not found: {images_root}") + + failures: list[str] = [] + stats: dict[str, tuple[int, int]] = {} + topic_dirs = sorted([p for p in images_root.iterdir() if p.is_dir()]) + if not topic_dirs: + failures.append("no topic image directories found") + return failures, stats + + for topic_dir in topic_dirs: + pngs = sorted(topic_dir.glob("*.png")) + hashes = [_image_fingerprint(p) for p in pngs] + unique = len(set(hashes)) + stats[topic_dir.name] = (len(pngs), unique) + if unique < min_unique: + failures.append( + f"topic={topic_dir.name}: unique_images={unique} < min_required={min_unique}" + ) + return failures, stats + + +def _check_pdf_page_duplicates(pdf_path: Path, max_dupes: int) -> tuple[list[str], int, int]: + if shutil.which("pdftoppm") is None: + raise RuntimeError("pdftoppm is required for PDF visual gate but was not found in PATH") + + with tempfile.TemporaryDirectory(prefix="nstat_pdf_gate_") as tmp: + out_prefix = Path(tmp) / "page" + subprocess.run( + ["pdftoppm", "-png", str(pdf_path), str(out_prefix)], + check=True, + capture_output=True, + text=True, + ) + page_pngs = sorted(Path(tmp).glob("page-*.png")) + if not page_pngs: + return ["pdf rendered to zero pages"], 0, 0 + + hashes = [hashlib.sha256(p.read_bytes()).hexdigest() for p in page_pngs] + total = len(hashes) + unique = len(set(hashes)) + dupes = total - unique + failures = [] + if dupes > max_dupes: + failures.append(f"duplicate_pdf_pages={dupes} > max_allowed={max_dupes}") + return failures, total, dupes + + +def main() -> int: + args = parse_args() + pdf_path = _resolve_pdf(args.report_pdf) + + image_failures, topic_stats = _check_topic_images( + images_root=args.images_root, + min_unique=args.min_unique_images_per_topic, + ) + pdf_failures, total_pages, duplicate_pages = _check_pdf_page_duplicates( + pdf_path=pdf_path, + max_dupes=args.max_duplicate_pdf_pages, + ) + + print(f"Validation PDF gate: {pdf_path}") + print(f"Topic coverage: {len(topic_stats)} topics") + for topic, (total, unique) in sorted(topic_stats.items()): + print(f" - {topic}: total_images={total} unique_images={unique}") + print(f"PDF pages: total={total_pages} duplicate_pages={duplicate_pages}") + + failures = image_failures + pdf_failures + if failures: + print("Visual gate failures:") + for row in failures: + print(f" - {row}") + return 1 + + print("Visual validation gate passed.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())