diff --git a/desloppify/app/cli_support/parser.py b/desloppify/app/cli_support/parser.py index 5da8348b..8612fffd 100644 --- a/desloppify/app/cli_support/parser.py +++ b/desloppify/app/cli_support/parser.py @@ -112,6 +112,15 @@ def create_parser(*, langs: list[str], detector_names: list[str]) -> argparse.Ar action="version", version=_cli_version_string(), ) + parser.add_argument( + "--allow-unsafe-coerce", + action="store_true", + default=False, + help=( + "Allow unsafe persistence coercion/re-save for recovery workflows. " + "Use only when explicitly repairing corrupted state/plan payloads." + ), + ) sub = parser.add_subparsers( dest="command", parser_class=_NoAbbrevArgumentParser, diff --git a/desloppify/base/exception_sets.py b/desloppify/base/exception_sets.py index f9a6451e..bd7f9553 100644 --- a/desloppify/base/exception_sets.py +++ b/desloppify/base/exception_sets.py @@ -30,6 +30,10 @@ class TriageValidationError(CommandError): """Raised for invalid triage-stage attestation or workflow inputs.""" +class PersistenceSafetyError(CommandError): + """Raised when plan/state persistence safety checks fail.""" + + PLAN_LOAD_EXCEPTIONS = ( ImportError, AttributeError, @@ -42,6 +46,7 @@ class TriageValidationError(CommandError): __all__ = [ "CommandError", "PLAN_LOAD_EXCEPTIONS", + "PersistenceSafetyError", "PacketValidationError", "RunnerTimeoutError", "TriageValidationError", diff --git a/desloppify/base/runtime_state.py b/desloppify/base/runtime_state.py index 3ab0fce8..904fdfb1 100644 --- a/desloppify/base/runtime_state.py +++ b/desloppify/base/runtime_state.py @@ -67,6 +67,7 @@ class RuntimeContext: source_file_cache: SourceFileCache = field( default_factory=lambda: SourceFileCache(max_entries=16) ) + allow_unsafe_coerce: bool = False _PROCESS_RUNTIME_CONTEXT = RuntimeContext() diff --git a/desloppify/cli.py b/desloppify/cli.py index bb418d55..2f521c5b 100644 --- a/desloppify/cli.py +++ b/desloppify/cli.py @@ -19,6 +19,7 @@ from desloppify.base.discovery.paths import get_default_path, get_project_root from desloppify.base.registry import detector_names, on_detector_registered from desloppify.base.runtime_state import runtime_scope +from desloppify.base.runtime_state import current_runtime_context from desloppify.languages import available_langs from desloppify.state import load_state @@ -169,6 +170,9 @@ def main() -> None: try: with runtime_scope(): + current_runtime_context().allow_unsafe_coerce = bool( + getattr(args, "allow_unsafe_coerce", False) + ) _resolve_default_path(args) _load_shared_runtime(args) diff --git a/desloppify/engine/_plan/persistence.py b/desloppify/engine/_plan/persistence.py index c156772c..6c37f180 100644 --- a/desloppify/engine/_plan/persistence.py +++ b/desloppify/engine/_plan/persistence.py @@ -9,7 +9,9 @@ from pathlib import Path from desloppify.base.discovery.file_paths import safe_write_text +from desloppify.base.exception_sets import PersistenceSafetyError from desloppify.base.output.fallbacks import log_best_effort_failure +from desloppify.base.runtime_state import current_runtime_context from desloppify.engine._plan.schema import ( PLAN_VERSION, PlanModel, @@ -22,61 +24,179 @@ logger = logging.getLogger(__name__) PLAN_FILE = STATE_DIR / "plan.json" +_UNSAFE_MARKER_KEY = "_unsafe_load_reasons" +_QUARANTINE_KEY = "_load_quarantine" +_SAFE_ERROR_PREFIX = "DLP_PERSISTENCE_PLAN" -def load_plan(path: Path | None = None) -> PlanModel: - """Load plan from disk, or return empty plan on missing/corruption.""" +def _allow_unsafe_coerce(allow_unsafe_coerce: bool | None) -> bool: + if allow_unsafe_coerce is not None: + return bool(allow_unsafe_coerce) + return bool(current_runtime_context().allow_unsafe_coerce) + + +def _quarantine_path(plan_path: Path) -> Path: + stamp = utc_now().replace(":", "-") + return plan_path.with_name(f"{plan_path.stem}.quarantine.{stamp}{plan_path.suffix}") + + +def _write_quarantine_snapshot(plan_path: Path, *, raw_text: str, reason: str) -> Path | None: + quarantine_path = _quarantine_path(plan_path) + payload = { + "source_path": str(plan_path), + "reason": reason, + "captured_at": utc_now(), + "raw_text": raw_text, + } + try: + safe_write_text(quarantine_path, json.dumps(payload, indent=2, default=json_default) + "\n") + except OSError as exc: + log_best_effort_failure(logger, "write plan quarantine snapshot", exc) + return None + return quarantine_path + + +def _raise_plan_safety_error(*, code: str, detail: str, quarantine_path: Path | None = None) -> None: + message = f"[{_SAFE_ERROR_PREFIX}_{code}] {detail}" + if quarantine_path is not None: + message += f" Recovery snapshot: {quarantine_path}" + raise PersistenceSafetyError(message, exit_code=2) + + +def load_plan( + path: Path | None = None, + *, + allow_unsafe_coerce: bool | None = None, +) -> PlanModel: + """Load plan from disk with explicit safety checks.""" plan_path = path or PLAN_FILE + unsafe_allowed = _allow_unsafe_coerce(allow_unsafe_coerce) if not plan_path.exists(): return empty_plan() + raw_primary = "" try: - data = json.loads(plan_path.read_text()) + raw_primary = plan_path.read_text() + data = json.loads(raw_primary) except (json.JSONDecodeError, UnicodeDecodeError, OSError) as ex: # Try backup before giving up backup = plan_path.with_suffix(".json.bak") if backup.exists(): try: - data = json.loads(backup.read_text()) + raw_backup = backup.read_text() + data = json.loads(raw_backup) logger.warning("Plan file corrupted (%s), loaded from backup.", ex) print(f" Warning: Plan file corrupted ({ex}), loaded from backup.", file=sys.stderr) # Fall through to validation below except (json.JSONDecodeError, UnicodeDecodeError, OSError) as backup_ex: logger.warning("Plan file and backup both corrupted: %s / %s", ex, backup_ex) - print(f" Warning: Plan file corrupted ({ex}). Starting fresh.", file=sys.stderr) - return empty_plan() + quarantine_path = _write_quarantine_snapshot( + plan_path, + raw_text=raw_primary, + reason=f"primary parse error: {ex}; backup parse error: {backup_ex}", + ) + _raise_plan_safety_error( + code="PARSE_FAILED", + detail="Plan file and backup are unreadable.", + quarantine_path=quarantine_path, + ) else: logger.warning("Plan file corrupted (%s). Starting fresh.", ex) - print(f" Warning: Plan file corrupted ({ex}). Starting fresh.", file=sys.stderr) - return empty_plan() + quarantine_path = _write_quarantine_snapshot( + plan_path, + raw_text=raw_primary, + reason=f"primary parse error: {ex}", + ) + _raise_plan_safety_error( + code="PARSE_FAILED", + detail="Plan file is unreadable and no backup is available.", + quarantine_path=quarantine_path, + ) if not isinstance(data, dict): logger.warning("Plan file root is not a JSON object. Starting fresh.") - print(" Warning: Plan file root must be a JSON object. Starting fresh.", file=sys.stderr) - return empty_plan() + quarantine_path = _write_quarantine_snapshot( + plan_path, + raw_text=raw_primary, + reason="plan root is not a JSON object", + ) + _raise_plan_safety_error( + code="ROOT_NOT_OBJECT", + detail="Plan file root must be a JSON object.", + quarantine_path=quarantine_path, + ) version = data.get("version", 1) if version > PLAN_VERSION: - logger.warning("Plan file version %d > supported %d.", version, PLAN_VERSION) - print( - f" Warning: Plan file version {version} is newer than supported " - f"({PLAN_VERSION}). Some features may not work correctly.", - file=sys.stderr, + if not unsafe_allowed: + _raise_plan_safety_error( + code="FUTURE_VERSION", + detail=( + f"Plan schema version {version} is newer than supported ({PLAN_VERSION}). " + "Re-run with --allow-unsafe-coerce only for manual recovery." + ), + ) + logger.warning( + "Unsafe plan coercion enabled for future schema version %s (supported=%s).", + version, + PLAN_VERSION, ) ensure_plan_defaults(data) try: validate_plan(data) except ValueError as ex: - logger.warning("Plan invariants invalid (%s). Starting fresh.", ex) - print(f" Warning: Plan invariants invalid ({ex}). Starting fresh.", file=sys.stderr) - return empty_plan() + quarantine_path = _write_quarantine_snapshot( + plan_path, + raw_text=raw_primary, + reason=f"plan invariants invalid: {ex}", + ) + _raise_plan_safety_error( + code="INVALID_INVARIANTS", + detail=f"Plan invariants invalid: {ex}", + quarantine_path=quarantine_path, + ) + + reasons: list[str] = [] + if version > PLAN_VERSION: + reasons.append("future_schema_version") + quarantine_payload = data.get(_QUARANTINE_KEY) + if isinstance(quarantine_payload, dict) and quarantine_payload: + reasons.append("normalized_malformed_sections") + if reasons: + data[_UNSAFE_MARKER_KEY] = reasons return data # type: ignore[return-value] -def save_plan(plan: PlanModel | dict, path: Path | None = None) -> None: +def _assert_safe_to_save( + plan: PlanModel | dict[str, object], + *, + allow_unsafe_coerce: bool | None, +) -> None: + unsafe_allowed = _allow_unsafe_coerce(allow_unsafe_coerce) + reasons = plan.get(_UNSAFE_MARKER_KEY) + if unsafe_allowed: + return + if isinstance(reasons, list) and reasons: + _raise_plan_safety_error( + code="UNSAFE_SAVE_BLOCKED", + detail=( + "Plan payload contains unsafe normalization markers " + f"({', '.join(str(item) for item in reasons)}). " + "Use --allow-unsafe-coerce only after manual verification." + ), + ) + + +def save_plan( + plan: PlanModel | dict, + path: Path | None = None, + *, + allow_unsafe_coerce: bool | None = None, +) -> None: """Validate and save plan to disk atomically.""" + _assert_safe_to_save(plan, allow_unsafe_coerce=allow_unsafe_coerce) ensure_plan_defaults(plan) plan["updated"] = utc_now() validate_plan(plan) @@ -84,7 +204,9 @@ def save_plan(plan: PlanModel | dict, path: Path | None = None) -> None: plan_path = path or PLAN_FILE plan_path.parent.mkdir(parents=True, exist_ok=True) - content = json.dumps(plan, indent=2, default=json_default) + "\n" + serializable_plan = dict(plan) + serializable_plan.pop(_UNSAFE_MARKER_KEY, None) + content = json.dumps(serializable_plan, indent=2, default=json_default) + "\n" if plan_path.exists(): backup = plan_path.with_suffix(".json.bak") diff --git a/desloppify/engine/_plan/schema.py b/desloppify/engine/_plan/schema.py index ac7691cb..85db153a 100644 --- a/desloppify/engine/_plan/schema.py +++ b/desloppify/engine/_plan/schema.py @@ -196,6 +196,43 @@ def ensure_plan_defaults(plan: dict[str, Any]) -> None: for key, value in defaults.items(): plan.setdefault(key, value) _upgrade_plan_to_v7(plan) + _normalize_skipped_entries(plan) + + +def _normalize_skipped_entries(plan: dict[str, Any]) -> None: + """Normalize malformed skipped entries without dropping original payloads.""" + skipped = plan.get("skipped") + if not isinstance(skipped, dict): + return + + quarantine = plan.setdefault("_load_quarantine", {}) + if not isinstance(quarantine, dict): + quarantine = {} + plan["_load_quarantine"] = quarantine + + quarantined_entries = quarantine.setdefault("invalid_skipped_entries", {}) + if not isinstance(quarantined_entries, dict): + quarantined_entries = {} + quarantine["invalid_skipped_entries"] = quarantined_entries + + for issue_id, entry in list(skipped.items()): + if not isinstance(entry, dict): + quarantined_entries[issue_id] = entry + skipped[issue_id] = { + "issue_id": issue_id, + "kind": "temporary", + "reason": "Recovered malformed skipped entry", + } + continue + + kind = entry.get("kind") + if kind in VALID_SKIP_KINDS: + continue + + quarantined_entries[issue_id] = dict(entry) + entry["kind"] = "temporary" + entry.setdefault("issue_id", issue_id) + entry.setdefault("reason", "Recovered invalid skip kind") def triage_clusters(plan: dict[str, Any]) -> dict[str, Cluster]: diff --git a/desloppify/engine/_plan/schema_migrations.py b/desloppify/engine/_plan/schema_migrations.py index cc10c5bb..c5d941a0 100644 --- a/desloppify/engine/_plan/schema_migrations.py +++ b/desloppify/engine/_plan/schema_migrations.py @@ -21,12 +21,22 @@ def _ensure_container( key: str, expected_type: type[list] | type[dict], default_factory, + *, + quarantine: dict[str, Any] | None = None, ) -> None: - if not isinstance(plan.get(key), expected_type): - plan[key] = default_factory() + current = plan.get(key) + if isinstance(current, expected_type): + return + if quarantine is not None and key in plan: + quarantine.setdefault("container_type_mismatches", {})[key] = current + plan[key] = default_factory() def ensure_container_types(plan: dict[str, Any]) -> None: + quarantine = plan.setdefault("_load_quarantine", {}) + if not isinstance(quarantine, dict): + quarantine = {} + plan["_load_quarantine"] = quarantine for key, expected_type, default_factory in ( ("queue_order", list, list), ("deferred", list, list), @@ -39,11 +49,17 @@ def ensure_container_types(plan: dict[str, Any]) -> None: ("execution_log", list, list), ("epic_triage_meta", dict, dict), ): - _ensure_container(plan, key, expected_type, default_factory) + _ensure_container( + plan, + key, + expected_type, + default_factory, + quarantine=quarantine, + ) _rename_key(plan["epic_triage_meta"], "finding_snapshot_hash", "issue_snapshot_hash") - _ensure_container(plan, "commit_log", list, list) + _ensure_container(plan, "commit_log", list, list, quarantine=quarantine) _rename_key(plan, "uncommitted_findings", "uncommitted_issues") - _ensure_container(plan, "uncommitted_issues", list, list) + _ensure_container(plan, "uncommitted_issues", list, list, quarantine=quarantine) if "commit_tracking_branch" not in plan: plan["commit_tracking_branch"] = None diff --git a/desloppify/engine/_state/persistence.py b/desloppify/engine/_state/persistence.py index 5c3bf187..99750bd2 100644 --- a/desloppify/engine/_state/persistence.py +++ b/desloppify/engine/_state/persistence.py @@ -15,6 +15,8 @@ ] from desloppify.base.discovery.file_paths import safe_write_text +from desloppify.base.exception_sets import PersistenceSafetyError +from desloppify.base.runtime_state import current_runtime_context from desloppify.base.text_utils import is_numeric from desloppify.engine._state.schema import ( CURRENT_VERSION, @@ -23,6 +25,7 @@ empty_state, ensure_state_defaults, json_default, + utc_now, validate_state_invariants, ) @@ -31,6 +34,48 @@ from desloppify.engine._state import _recompute_stats +_UNSAFE_MARKER_KEY = "_unsafe_load_reasons" +_QUARANTINE_KEY = "_load_quarantine" +_SAFE_ERROR_PREFIX = "DLP_PERSISTENCE_STATE" + + +def _allow_unsafe_coerce(allow_unsafe_coerce: bool | None) -> bool: + if allow_unsafe_coerce is not None: + return bool(allow_unsafe_coerce) + return bool(current_runtime_context().allow_unsafe_coerce) + + +def _quarantine_path(state_path: Path) -> Path: + ts = utc_now().replace(":", "-") + return state_path.with_name(f"{state_path.stem}.quarantine.{ts}{state_path.suffix}") + + +def _write_quarantine_snapshot( + state_path: Path, + *, + raw_text: str, + reason: str, +) -> Path | None: + quarantine_path = _quarantine_path(state_path) + payload = { + "source_path": str(state_path), + "reason": reason, + "raw_text": raw_text, + } + try: + safe_write_text(quarantine_path, json.dumps(payload, indent=2, default=json_default) + "\n") + except OSError as exc: + logger.debug("Failed writing state quarantine snapshot for %s: %s", state_path, exc) + return None + return quarantine_path + + +def _raise_state_safety_error(*, code: str, detail: str, quarantine_path: Path | None = None) -> None: + message = f"[{_SAFE_ERROR_PREFIX}_{code}] {detail}" + if quarantine_path is not None: + message += f" Recovery snapshot: {quarantine_path}" + raise PersistenceSafetyError(message, exit_code=2) + def _load_json(path: Path) -> dict[str, object]: data = json.loads(path.read_text()) @@ -48,14 +93,23 @@ def _normalize_loaded_state(data: object) -> dict[str, object]: return normalized -def load_state(path: Path | None = None) -> StateModel: - """Load state from disk, or return empty state on missing/corruption.""" +def load_state( + path: Path | None = None, + *, + allow_unsafe_coerce: bool | None = None, +) -> StateModel: + """Load state from disk with explicit safety checks.""" state_path = path or STATE_FILE + unsafe_allowed = _allow_unsafe_coerce(allow_unsafe_coerce) if not state_path.exists(): return empty_state() + raw_primary = "" try: - data = _load_json(state_path) + raw_primary = state_path.read_text() + data = json.loads(raw_primary) + if not isinstance(data, dict): + raise ValueError("state file root must be a JSON object") except (json.JSONDecodeError, UnicodeDecodeError, OSError, ValueError) as ex: backup = state_path.with_suffix(".json.bak") if backup.exists(): @@ -76,7 +130,10 @@ def load_state(path: Path | None = None) -> StateModel: f" ⚠ State file corrupted ({ex}), loaded from backup.", file=sys.stderr, ) - return _normalize_loaded_state(backup_data) + normalized_backup = _normalize_loaded_state(backup_data) + if isinstance(normalized_backup.get(_QUARANTINE_KEY), dict) and normalized_backup.get(_QUARANTINE_KEY): + normalized_backup[_UNSAFE_MARKER_KEY] = ["normalized_malformed_sections"] + return normalized_backup except ( json.JSONDecodeError, UnicodeDecodeError, @@ -93,49 +150,55 @@ def load_state(path: Path | None = None) -> StateModel: ) logger.debug("Backup state load failed from %s: %s", backup, backup_ex) - logger.warning( - "State file load failed for %s and backup recovery was unavailable. " - "Falling back to empty state: %s", + quarantine_path = _write_quarantine_snapshot( state_path, - ex, + raw_text=raw_primary, + reason=f"primary parse error: {ex}", + ) + _raise_state_safety_error( + code="PARSE_FAILED", + detail="State file is unreadable and backup recovery failed.", + quarantine_path=quarantine_path, ) - print(f" ⚠ State file corrupted ({ex}). Starting fresh.", file=sys.stderr) - rename_failed = False - try: - state_path.rename(state_path.with_suffix(".json.corrupted")) - except OSError as rename_ex: - rename_failed = True - logger.debug( - "Failed to rename corrupted state file %s: %s", state_path, rename_ex - ) - if rename_failed: - logger.debug( - "Corrupted state file retained at original path: %s", state_path - ) - return empty_state() version = data.get("version", 1) if version > CURRENT_VERSION: - print( - " ⚠ State file version " - f"{version} is newer than supported ({CURRENT_VERSION}). " - "Some features may not work correctly.", - file=sys.stderr, + if not unsafe_allowed: + _raise_state_safety_error( + code="FUTURE_VERSION", + detail=( + f"State schema version {version} is newer than supported ({CURRENT_VERSION}). " + "Re-run with --allow-unsafe-coerce only for manual recovery." + ), + ) + logger.warning( + "Unsafe state coercion enabled for future schema version %s (supported=%s).", + version, + CURRENT_VERSION, ) try: - return _normalize_loaded_state(data) + normalized = _normalize_loaded_state(data) except (ValueError, TypeError, AttributeError) as normalize_ex: - logger.warning( - "State invariants invalid for %s; falling back to empty state: %s", + quarantine_path = _write_quarantine_snapshot( state_path, - normalize_ex, + raw_text=raw_primary, + reason=f"state invariants invalid: {normalize_ex}", ) - print( - f" ⚠ State invariants invalid ({normalize_ex}). Starting fresh.", - file=sys.stderr, + _raise_state_safety_error( + code="INVALID_INVARIANTS", + detail=f"State invariants invalid: {normalize_ex}", + quarantine_path=quarantine_path, ) - return empty_state() + + reasons: list[str] = [] + if version > CURRENT_VERSION: + reasons.append("future_schema_version") + if isinstance(normalized.get(_QUARANTINE_KEY), dict) and normalized.get(_QUARANTINE_KEY): + reasons.append("normalized_malformed_sections") + if reasons: + normalized[_UNSAFE_MARKER_KEY] = reasons + return normalized def _coerce_integrity_target(value: object) -> float | None: @@ -163,8 +226,21 @@ def save_state( path: Path | None = None, *, subjective_integrity_target: float | None = None, + allow_unsafe_coerce: bool | None = None, ) -> None: """Recompute stats/score and save to disk atomically.""" + unsafe_allowed = _allow_unsafe_coerce(allow_unsafe_coerce) + unsafe_reasons = state.get(_UNSAFE_MARKER_KEY) + if not unsafe_allowed and isinstance(unsafe_reasons, list) and unsafe_reasons: + _raise_state_safety_error( + code="UNSAFE_SAVE_BLOCKED", + detail=( + "State payload contains unsafe normalization markers " + f"({', '.join(str(item) for item in unsafe_reasons)}). " + "Use --allow-unsafe-coerce only after manual verification." + ), + ) + ensure_state_defaults(state) _recompute_stats( state, @@ -179,7 +255,9 @@ def save_state( state_path = path or STATE_FILE state_path.parent.mkdir(parents=True, exist_ok=True) - content = json.dumps(state, indent=2, default=json_default) + "\n" + serializable_state = dict(state) + serializable_state.pop(_UNSAFE_MARKER_KEY, None) + content = json.dumps(serializable_state, indent=2, default=json_default) + "\n" if state_path.exists(): backup = state_path.with_suffix(".json.bak") diff --git a/desloppify/engine/_state/schema.py b/desloppify/engine/_state/schema.py index 85faf2c2..4cd2d5b0 100644 --- a/desloppify/engine/_state/schema.py +++ b/desloppify/engine/_state/schema.py @@ -290,6 +290,8 @@ class StateModel(TypedDict, total=False): attestation_log: list[AttestationLogEntry] concern_dismissals: dict[str, ConcernDismissal] _plan_start_scores_for_reveal: dict[str, Any] + _load_quarantine: dict[str, Any] + _unsafe_load_reasons: list[str] class ScanDiff(TypedDict): @@ -384,23 +386,47 @@ def ensure_state_defaults(state: StateModel | dict) -> None: for key, value in empty_state().items(): mutable_state.setdefault(key, value) + if not isinstance(state.get("_load_quarantine"), dict): + state["_load_quarantine"] = {} + + quarantine = state["_load_quarantine"] + if not isinstance(quarantine, dict): + quarantine = {} + state["_load_quarantine"] = quarantine + + container_mismatches = quarantine.setdefault("container_type_mismatches", {}) + if not isinstance(container_mismatches, dict): + container_mismatches = {} + quarantine["container_type_mismatches"] = container_mismatches + if not isinstance(state.get("issues"), dict): + container_mismatches["issues"] = state.get("issues") state["issues"] = {} if not isinstance(state.get("stats"), dict): + container_mismatches["stats"] = state.get("stats") state["stats"] = {} if not isinstance(state.get("scan_history"), list): + container_mismatches["scan_history"] = state.get("scan_history") state["scan_history"] = [] if not isinstance(state.get("scan_coverage"), dict): + container_mismatches["scan_coverage"] = state.get("scan_coverage") state["scan_coverage"] = {} if not isinstance(state.get("score_confidence"), dict): + container_mismatches["score_confidence"] = state.get("score_confidence") state["score_confidence"] = {} if not isinstance(state.get("subjective_integrity"), dict): + container_mismatches["subjective_integrity"] = state.get("subjective_integrity") state["subjective_integrity"] = {} all_issues = state["issues"] + invalid_issues = quarantine.setdefault("invalid_issues", {}) + if not isinstance(invalid_issues, dict): + invalid_issues = {} + quarantine["invalid_issues"] = invalid_issues to_remove: list[str] = [] for issue_id, issue in all_issues.items(): if not isinstance(issue, dict): + invalid_issues[issue_id] = issue to_remove.append(issue_id) continue diff --git a/desloppify/tests/plan/test_persistence_safety.py b/desloppify/tests/plan/test_persistence_safety.py new file mode 100644 index 00000000..ac960368 --- /dev/null +++ b/desloppify/tests/plan/test_persistence_safety.py @@ -0,0 +1,85 @@ +"""Safety-focused tests for plan persistence behavior.""" + +from __future__ import annotations + +import json + +import pytest + +from desloppify.base.exception_sets import PersistenceSafetyError +from desloppify.engine.plan import empty_plan, load_plan, save_plan + + +class TestPlanLoadSafety: + def test_missing_file_returns_empty_plan(self, tmp_path): + loaded = load_plan(tmp_path / "missing-plan.json") + assert loaded["queue_order"] == [] + assert loaded["version"] == 7 + + def test_corrupt_plan_raises_and_writes_quarantine_snapshot(self, tmp_path): + p = tmp_path / "plan.json" + p.write_text("{not json") + + with pytest.raises(PersistenceSafetyError) as exc_info: + load_plan(p) + assert "DLP_PERSISTENCE_PLAN_PARSE_FAILED" in str(exc_info.value) + assert list(tmp_path.glob("plan.quarantine.*.json")) + + def test_future_version_requires_explicit_unsafe_override(self, tmp_path): + p = tmp_path / "plan.json" + p.write_text( + json.dumps( + { + "version": 999, + "created": "2026-01-01T00:00:00+00:00", + "updated": "2026-01-01T00:00:00+00:00", + "queue_order": [], + "skipped": {}, + } + ) + ) + + with pytest.raises(PersistenceSafetyError) as exc_info: + load_plan(p) + assert "DLP_PERSISTENCE_PLAN_FUTURE_VERSION" in str(exc_info.value) + + loaded = load_plan(p, allow_unsafe_coerce=True) + assert loaded["version"] == 7 + assert "future_schema_version" in loaded.get("_unsafe_load_reasons", []) + + def test_container_mismatch_is_preserved_in_quarantine_bucket(self, tmp_path): + p = tmp_path / "plan.json" + p.write_text( + json.dumps( + { + "version": 7, + "created": "2026-01-01T00:00:00+00:00", + "updated": "2026-01-01T00:00:00+00:00", + "queue_order": {"not": "a-list"}, + "skipped": {}, + } + ) + ) + loaded = load_plan(p) + assert loaded["queue_order"] == [] + mismatch = loaded.get("_load_quarantine", {}).get("container_type_mismatches", {}) + assert mismatch.get("queue_order") == {"not": "a-list"} + assert "normalized_malformed_sections" in loaded.get("_unsafe_load_reasons", []) + + +class TestPlanSaveSafety: + def test_save_blocks_unsafe_payload_without_override(self, tmp_path): + p = tmp_path / "plan.json" + plan = empty_plan() + plan["_unsafe_load_reasons"] = ["future_schema_version"] + with pytest.raises(PersistenceSafetyError) as exc_info: + save_plan(plan, p) + assert "DLP_PERSISTENCE_PLAN_UNSAFE_SAVE_BLOCKED" in str(exc_info.value) + + def test_save_allows_unsafe_payload_with_override(self, tmp_path): + p = tmp_path / "plan.json" + plan = empty_plan() + plan["_unsafe_load_reasons"] = ["future_schema_version"] + save_plan(plan, p, allow_unsafe_coerce=True) + loaded = json.loads(p.read_text()) + assert "_unsafe_load_reasons" not in loaded diff --git a/desloppify/tests/state/test_state.py b/desloppify/tests/state/test_state.py index 2b29ee47..12643bfa 100644 --- a/desloppify/tests/state/test_state.py +++ b/desloppify/tests/state/test_state.py @@ -5,6 +5,7 @@ import pytest +from desloppify.base.exception_sets import PersistenceSafetyError from desloppify.engine._state import filtering as state_query_mod from desloppify.state import ( MergeScanOptions, @@ -307,15 +308,18 @@ def test_corrupt_json_tries_backup(self, tmp_path): def test_corrupt_json_no_backup_returns_empty(self, tmp_path): p = tmp_path / "state.json" p.write_text("{bad json!!") - s = load_state(p) - assert s["version"] == 1 - assert s["issues"] == {} + with pytest.raises(PersistenceSafetyError) as exc_info: + load_state(p) + assert "DLP_PERSISTENCE_STATE_PARSE_FAILED" in str(exc_info.value) + quarantine_files = list(tmp_path.glob("state.quarantine.*.json")) + assert quarantine_files - def test_corrupt_json_renames_file(self, tmp_path): + def test_corrupt_json_writes_quarantine_snapshot(self, tmp_path): p = tmp_path / "state.json" p.write_text("{bad json!!") - load_state(p) - assert (tmp_path / "state.json.corrupted").exists() + with pytest.raises(PersistenceSafetyError): + load_state(p) + assert list(tmp_path.glob("state.quarantine.*.json")) def test_corrupt_json_and_corrupt_backup_returns_empty(self, tmp_path): p = tmp_path / "state.json" @@ -323,9 +327,20 @@ def test_corrupt_json_and_corrupt_backup_returns_empty(self, tmp_path): backup = tmp_path / "state.json.bak" backup.write_text("{also bad") - s = load_state(p) - assert s["version"] == 1 - assert s["issues"] == {} + with pytest.raises(PersistenceSafetyError) as exc_info: + load_state(p) + assert "DLP_PERSISTENCE_STATE_PARSE_FAILED" in str(exc_info.value) + + def test_future_version_requires_explicit_unsafe_override(self, tmp_path): + p = tmp_path / "state.json" + p.write_text(json.dumps({"version": 999, "issues": {}})) + with pytest.raises(PersistenceSafetyError) as exc_info: + load_state(p) + assert "DLP_PERSISTENCE_STATE_FUTURE_VERSION" in str(exc_info.value) + + loaded = load_state(p, allow_unsafe_coerce=True) + assert loaded["version"] == 999 + assert "future_schema_version" in loaded.get("_unsafe_load_reasons", []) # --------------------------------------------------------------------------- @@ -382,6 +397,22 @@ def test_invalid_status_gets_normalized_before_save(self, tmp_path): loaded = json.loads(p.read_text()) assert loaded["issues"]["x"]["status"] == "open" + def test_save_state_blocks_unsafe_payload_without_override(self, tmp_path): + p = tmp_path / "state.json" + st = empty_state() + st["_unsafe_load_reasons"] = ["future_schema_version"] + with pytest.raises(PersistenceSafetyError) as exc_info: + save_state(st, p) + assert "DLP_PERSISTENCE_STATE_UNSAFE_SAVE_BLOCKED" in str(exc_info.value) + + def test_save_state_allows_unsafe_payload_with_override(self, tmp_path): + p = tmp_path / "state.json" + st = empty_state() + st["_unsafe_load_reasons"] = ["future_schema_version"] + save_state(st, p, allow_unsafe_coerce=True) + loaded = json.loads(p.read_text()) + assert "_unsafe_load_reasons" not in loaded + # --------------------------------------------------------------------------- # _upsert_issues