From db787abdd1694438f8b0049dd2694667f6979b2a Mon Sep 17 00:00:00 2001 From: Daniel Meppiel Date: Sun, 3 May 2026 21:16:02 +0200 Subject: [PATCH] fix(tests): make cache-lockfile-parity test resilient to leftover deploy artifacts and timestamp drift The regression-trap test added in #1116 was failing on main, blocking the v0.12.1 release CI on macOS arm64 build-validate and all 3 OSes' integration tests jobs. Two fixture-level bugs caused false positives: 1. Stale deployed-files state across regimes _reset_install_state removed apm_modules/ + apm.lock.yaml between runs but left the prior run's integration outputs (.github/agents/, .agents/ skills/, ...) on disk. With the lockfile gone, the integrate phase had an empty managed_files set; the leftover files looked like user-authored collisions to BaseIntegrator.check_collision(); they were skipped and never appended to target_paths -- so the new lockfile recorded only the single freshly-written file, not the package's full deployed_files list. Cold-cache (run A, fresh disk) recorded 5 files; warm-cache (B) and no-cache (C) recorded 1. Fix: clone the project tree into a fresh subdir per regime via _clone_project(). Each run starts from pristine filesystem state, which is what 'same input, same output' actually means for this invariant. No hand-rolled cleanup list to keep in sync with new target deploy roots. 2. generated_at always drifts between independent runs The lockfile's generated_at is captured at write time; with no existing lockfile to dedup against (each cloned project starts empty), every run produces a fresh timestamp. The byte-identical assertion was unsatisfiable by construction. Fix: hash the lockfile excluding the generated_at line. The parity contract is about resolution outcome (resolved_commit, content_hash, deployed_files, package_type, ...), not the wall-clock write timestamp. Verified locally with GITHUB_TOKEN=$(gh auth token): uv run --extra dev pytest tests/integration/test_cache_lockfile_parity.py -> 1 passed in 12.73s Lint: uv run --extra dev ruff check src/ tests/ -> All checks passed uv run --extra dev ruff format --check src/ tests/ -> 668 files already formatted No product code changed -- the underlying cache layer was always deterministic; only the fixture was wrong. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../integration/test_cache_lockfile_parity.py | 73 ++++++++++++------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/tests/integration/test_cache_lockfile_parity.py b/tests/integration/test_cache_lockfile_parity.py index 3d5ac8f4..548bed50 100644 --- a/tests/integration/test_cache_lockfile_parity.py +++ b/tests/integration/test_cache_lockfile_parity.py @@ -1,16 +1,16 @@ """Lockfile-determinism integration test under the persistent cache. Regression-trap for the worst silent failure the cache layer could -introduce: byte-level lockfile drift between cached and non-cached -runs. If ``apm install`` produces a different ``apm.lock.yaml`` when -``APM_NO_CACHE=1`` is set vs. when the cache is hot, a CI run that -ships with a stale cache would commit a lockfile that disagrees with -the reproducible-from-scratch baseline -- and downstream installs -would diverge. +introduce: lockfile drift between cached and non-cached runs. If +``apm install`` produces a different ``apm.lock.yaml`` (modulo the +``generated_at`` write-timestamp) when ``APM_NO_CACHE=1`` is set vs. +when the cache is hot, a CI run that ships with a stale cache would +commit a lockfile that disagrees with the reproducible-from-scratch +baseline -- and downstream installs would diverge. The contract: ``apm install`` from the same ``apm.yml`` MUST produce -a byte-identical lockfile regardless of cache state. This test -asserts it across three regimes: +a content-identical lockfile (excluding ``generated_at``) regardless +of cache state. This test asserts it across three regimes: Run A: cold cache (cache empty) Run B: cache hot (warm reuse, no network for unchanged deps) @@ -83,18 +83,36 @@ def _run_install(apm: str, project: Path, *, env_overrides: dict[str, str]) -> N def _lockfile_sha(project: Path) -> str: + """Hash the lockfile excluding the `generated_at` line. + + `generated_at` is a wall-clock timestamp captured at write time, so it + necessarily differs between independent runs. The parity invariant is + about resolution outcome (resolved_commit, content_hash, deployed_files, + package_type, ...), not the write timestamp. + """ lock = project / "apm.lock.yaml" assert lock.is_file(), "apm.lock.yaml not produced by install" - return hashlib.sha256(lock.read_bytes()).hexdigest() + canonical = "\n".join( + line + for line in lock.read_text(encoding="utf-8").splitlines() + if not line.startswith("generated_at:") + ) + return hashlib.sha256(canonical.encode("utf-8")).hexdigest() -def _reset_install_state(project: Path) -> None: - """Remove install artifacts but keep apm.yml so the next run is identical input.""" - for child in (project / "apm_modules", project / "apm.lock.yaml"): - if child.is_dir(): - shutil.rmtree(child, ignore_errors=True) - elif child.is_file(): - child.unlink() +def _clone_project(template: Path, dest: Path) -> Path: + """Clone *template* into *dest* so each regime runs against pristine state. + + Reusing the same project dir across regimes would leave previously-deployed + integration outputs (`.github/agents/`, `.agents/skills/`, ...) on disk. + With the lockfile deleted between runs, those orphaned files look like + user-authored collisions to the integrators (`check_collision()` returns + True), so they are skipped and never recorded in the new lockfile's + `deployed_files`. That is a fixture artifact, not a cache-layer drift, so + we sidestep it by giving each regime its own copy of the project tree. + """ + shutil.copytree(template, dest) + return dest def test_lockfile_byte_identical_across_cache_regimes( @@ -102,7 +120,7 @@ def test_lockfile_byte_identical_across_cache_regimes( project_with_apm: Path, tmp_path: Path, ) -> None: - """A, B, C must produce byte-identical apm.lock.yaml. + """A, B, C must produce content-identical apm.lock.yaml (modulo `generated_at`). A: cold cache (fresh APM_CACHE_DIR pointing at empty dir) B: warm cache (same dir, second run reuses entries) @@ -112,30 +130,31 @@ def test_lockfile_byte_identical_across_cache_regimes( cache_dir.mkdir() # Run A: cold cache + project_a = _clone_project(project_with_apm, tmp_path / "run-a") _run_install( apm_command, - project_with_apm, + project_a, env_overrides={"APM_CACHE_DIR": str(cache_dir), "CI": "1"}, ) - sha_a = _lockfile_sha(project_with_apm) + sha_a = _lockfile_sha(project_a) - # Run B: warm cache (same APM_CACHE_DIR retained) - _reset_install_state(project_with_apm) + # Run B: warm cache (same APM_CACHE_DIR retained, fresh project tree) + project_b = _clone_project(project_with_apm, tmp_path / "run-b") _run_install( apm_command, - project_with_apm, + project_b, env_overrides={"APM_CACHE_DIR": str(cache_dir), "CI": "1"}, ) - sha_b = _lockfile_sha(project_with_apm) + sha_b = _lockfile_sha(project_b) - # Run C: cache disabled - _reset_install_state(project_with_apm) + # Run C: cache disabled (fresh project tree) + project_c = _clone_project(project_with_apm, tmp_path / "run-c") _run_install( apm_command, - project_with_apm, + project_c, env_overrides={"APM_NO_CACHE": "1", "CI": "1"}, ) - sha_c = _lockfile_sha(project_with_apm) + sha_c = _lockfile_sha(project_c) assert sha_a == sha_b, ( "Lockfile drifted between cold-cache and warm-cache runs -- "