From a34b9b87750fcfef2ed3540afc0b8aaafe1dd697 Mon Sep 17 00:00:00 2001 From: Daniel Meppiel Date: Tue, 5 May 2026 13:29:45 +0200 Subject: [PATCH 1/2] fix(install): exclude .apm-pin marker from package content hash Re-installing any package previously installed by APM 0.12.2 erased its apm_modules/ directory: the .apm-pin cache marker (added by #1137 for drift-replay) is written AFTER hash recording, so the on-disk hash diverged from the lockfile hash on every subsequent install, falsely tripping the supply-chain content-hash mismatch check and rmtree'ing the cache. Surfaced by tests/integration/test_guardrailing_hero_e2e.py, which runs 'apm install ' twice via run_command(show_output= True) and asserts apm_modules/github/ still exists -- the v0.12.2 release tag failed in CI on every platform on this test. Fix: exclude .apm-pin from compute_package_hash via a new _EXCLUDED_FILES set, mirroring _EXCLUDED_DIRS. Add regression test test_skips_apm_pin_marker. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 1 + src/apm_cli/utils/content_hash.py | 9 +++++++++ tests/unit/test_content_hash.py | 21 +++++++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f246767cd..15fb9e8ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - **Parallel subdir install race.** `apm install` no longer intermittently fails with `RuntimeError: Subdirectory '' not found in repository` when multiple dependencies (including ADO sub-path deps) resolve to different subdirectories of the same `repo@ref`. The shared clone cache now stores subdir-agnostic bare clones and each consumer materializes its own working tree (mirrors the WS3 `GitCache` pattern). (#1135, fixes #1126, fixes #1140) +- **Re-installing the same package no longer rmtree's it.** `compute_package_hash` now excludes the `.apm-pin` cache marker (introduced by #1137) so the supply-chain content-hash check sees a stable hash across installs instead of falsely tripping and deleting `apm_modules///`. (#1142, regression from #1137) ### Changed diff --git a/src/apm_cli/utils/content_hash.py b/src/apm_cli/utils/content_hash.py index 380c693b0..3babbfacc 100644 --- a/src/apm_cli/utils/content_hash.py +++ b/src/apm_cli/utils/content_hash.py @@ -7,6 +7,13 @@ # Directories excluded from hashing (not relevant to package content) _EXCLUDED_DIRS = {".git", "__pycache__"} +# Files excluded from hashing. ``.apm-pin`` is the cache-pin marker +# (see :mod:`apm_cli.install.cache_pin`) written AFTER hash recording +# during install; including it would make the on-disk hash diverge +# from the lockfile-recorded hash on every subsequent install, +# falsely tripping the supply-chain content-hash mismatch check. +_EXCLUDED_FILES = {".apm-pin"} + # Well-known hash for empty/missing packages _EMPTY_HASH = "sha256:" + hashlib.sha256(b"").hexdigest() @@ -41,6 +48,8 @@ def compute_package_hash(package_path: Path) -> str: if any(part in _EXCLUDED_DIRS for part in rel.parts): continue if item.is_file(): + if rel.name in _EXCLUDED_FILES: + continue regular_files.append(rel) # Sort lexicographically by POSIX path for determinism diff --git a/tests/unit/test_content_hash.py b/tests/unit/test_content_hash.py index dd072daa8..d7f4913be 100644 --- a/tests/unit/test_content_hash.py +++ b/tests/unit/test_content_hash.py @@ -72,6 +72,27 @@ def test_skips_pycache(self, tmp_path): assert hash_before == hash_after + def test_skips_apm_pin_marker(self, tmp_path): + """``.apm-pin`` cache-pin marker is excluded from hashing. + + Regression test for the v0.12.2 release-blocking bug: the + ``.apm-pin`` marker (introduced in PR #1137 for drift-replay + cache verification) is written to the package root AFTER the + install-time hash is recorded in the lockfile. Including it in + :func:`compute_package_hash` made every subsequent ``apm + install`` of the same package observe a hash mismatch against + the lockfile, falsely tripping the supply-chain content-hash + check in ``FreshDependencySource.acquire`` and + ``safe_rmtree``-ing the package directory. + """ + (tmp_path / "apm.yml").write_text("name: x\n") + hash_before = compute_package_hash(tmp_path) + + (tmp_path / ".apm-pin").write_text('{"schema_version": 1, "resolved_commit": "deadbeef"}') + hash_after = compute_package_hash(tmp_path) + + assert hash_before == hash_after + def test_empty_directory(self, tmp_path): """Empty directory returns a well-known hash.""" empty = tmp_path / "empty" From 9cf5452f05b498094a9b851ac5a3473bb5024a1d Mon Sep 17 00:00:00 2001 From: Daniel Meppiel Date: Tue, 5 May 2026 13:36:53 +0200 Subject: [PATCH 2/2] fix(install): scope .apm-pin exclusion to package root Address Copilot review feedback on PR #1142: a basename match would exclude any .apm-pin file at any depth, so a malicious package could hide bytes under subdir/.apm-pin to bypass the integrity hash. The cache-pin marker is only ever written to the package root by cache_pin.write_marker, so scope the exclusion accordingly. Extend the regression test to assert nested .apm-pin files are still hashed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/apm_cli/utils/content_hash.py | 16 +++++++++------- tests/unit/test_content_hash.py | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/apm_cli/utils/content_hash.py b/src/apm_cli/utils/content_hash.py index 3babbfacc..4a637df26 100644 --- a/src/apm_cli/utils/content_hash.py +++ b/src/apm_cli/utils/content_hash.py @@ -7,12 +7,14 @@ # Directories excluded from hashing (not relevant to package content) _EXCLUDED_DIRS = {".git", "__pycache__"} -# Files excluded from hashing. ``.apm-pin`` is the cache-pin marker -# (see :mod:`apm_cli.install.cache_pin`) written AFTER hash recording -# during install; including it would make the on-disk hash diverge -# from the lockfile-recorded hash on every subsequent install, -# falsely tripping the supply-chain content-hash mismatch check. -_EXCLUDED_FILES = {".apm-pin"} +# Files at the package root excluded from hashing. ``.apm-pin`` is the +# cache-pin marker (see :mod:`apm_cli.install.cache_pin`) written AFTER +# hash recording during install; including it would make the on-disk +# hash diverge from the lockfile-recorded hash on every subsequent +# install, falsely tripping the supply-chain content-hash mismatch +# check. Scoped to root paths only so a package cannot slip a +# ``subdir/.apm-pin`` past the integrity hash. +_EXCLUDED_ROOT_FILES = {".apm-pin"} # Well-known hash for empty/missing packages _EMPTY_HASH = "sha256:" + hashlib.sha256(b"").hexdigest() @@ -48,7 +50,7 @@ def compute_package_hash(package_path: Path) -> str: if any(part in _EXCLUDED_DIRS for part in rel.parts): continue if item.is_file(): - if rel.name in _EXCLUDED_FILES: + if len(rel.parts) == 1 and rel.name in _EXCLUDED_ROOT_FILES: continue regular_files.append(rel) diff --git a/tests/unit/test_content_hash.py b/tests/unit/test_content_hash.py index d7f4913be..d5efd9ab1 100644 --- a/tests/unit/test_content_hash.py +++ b/tests/unit/test_content_hash.py @@ -84,6 +84,12 @@ def test_skips_apm_pin_marker(self, tmp_path): the lockfile, falsely tripping the supply-chain content-hash check in ``FreshDependencySource.acquire`` and ``safe_rmtree``-ing the package directory. + + Exclusion is scoped to the package root: a nested + ``subdir/.apm-pin`` (which the install pipeline never writes) + MUST still be hashed so a malicious package cannot smuggle + bytes past the integrity check by burying them under that + name. """ (tmp_path / "apm.yml").write_text("name: x\n") hash_before = compute_package_hash(tmp_path) @@ -93,6 +99,16 @@ def test_skips_apm_pin_marker(self, tmp_path): assert hash_before == hash_after + # A nested .apm-pin (never written by the install pipeline) is + # NOT excluded -- defense against using the marker name as a + # blind spot in the integrity hash. + nested = tmp_path / "subdir" + nested.mkdir() + (nested / ".apm-pin").write_text("smuggled bytes") + hash_with_nested = compute_package_hash(tmp_path) + + assert hash_with_nested != hash_after + def test_empty_directory(self, tmp_path): """Empty directory returns a well-known hash.""" empty = tmp_path / "empty"