diff --git a/CHANGELOG.md b/CHANGELOG.md index f246767c..15fb9e8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - **Parallel subdir install race.** `apm install` no longer intermittently fails with `RuntimeError: Subdirectory '' not found in repository` when multiple dependencies (including ADO sub-path deps) resolve to different subdirectories of the same `repo@ref`. The shared clone cache now stores subdir-agnostic bare clones and each consumer materializes its own working tree (mirrors the WS3 `GitCache` pattern). (#1135, fixes #1126, fixes #1140) +- **Re-installing the same package no longer rmtree's it.** `compute_package_hash` now excludes the `.apm-pin` cache marker (introduced by #1137) so the supply-chain content-hash check sees a stable hash across installs instead of falsely tripping and deleting `apm_modules///`. (#1142, regression from #1137) ### Changed diff --git a/src/apm_cli/utils/content_hash.py b/src/apm_cli/utils/content_hash.py index 380c693b..4a637df2 100644 --- a/src/apm_cli/utils/content_hash.py +++ b/src/apm_cli/utils/content_hash.py @@ -7,6 +7,15 @@ # Directories excluded from hashing (not relevant to package content) _EXCLUDED_DIRS = {".git", "__pycache__"} +# Files at the package root excluded from hashing. ``.apm-pin`` is the +# cache-pin marker (see :mod:`apm_cli.install.cache_pin`) written AFTER +# hash recording during install; including it would make the on-disk +# hash diverge from the lockfile-recorded hash on every subsequent +# install, falsely tripping the supply-chain content-hash mismatch +# check. Scoped to root paths only so a package cannot slip a +# ``subdir/.apm-pin`` past the integrity hash. +_EXCLUDED_ROOT_FILES = {".apm-pin"} + # Well-known hash for empty/missing packages _EMPTY_HASH = "sha256:" + hashlib.sha256(b"").hexdigest() @@ -41,6 +50,8 @@ def compute_package_hash(package_path: Path) -> str: if any(part in _EXCLUDED_DIRS for part in rel.parts): continue if item.is_file(): + if len(rel.parts) == 1 and rel.name in _EXCLUDED_ROOT_FILES: + continue regular_files.append(rel) # Sort lexicographically by POSIX path for determinism diff --git a/tests/unit/test_content_hash.py b/tests/unit/test_content_hash.py index dd072daa..d5efd9ab 100644 --- a/tests/unit/test_content_hash.py +++ b/tests/unit/test_content_hash.py @@ -72,6 +72,43 @@ def test_skips_pycache(self, tmp_path): assert hash_before == hash_after + def test_skips_apm_pin_marker(self, tmp_path): + """``.apm-pin`` cache-pin marker is excluded from hashing. + + Regression test for the v0.12.2 release-blocking bug: the + ``.apm-pin`` marker (introduced in PR #1137 for drift-replay + cache verification) is written to the package root AFTER the + install-time hash is recorded in the lockfile. Including it in + :func:`compute_package_hash` made every subsequent ``apm + install`` of the same package observe a hash mismatch against + the lockfile, falsely tripping the supply-chain content-hash + check in ``FreshDependencySource.acquire`` and + ``safe_rmtree``-ing the package directory. + + Exclusion is scoped to the package root: a nested + ``subdir/.apm-pin`` (which the install pipeline never writes) + MUST still be hashed so a malicious package cannot smuggle + bytes past the integrity check by burying them under that + name. + """ + (tmp_path / "apm.yml").write_text("name: x\n") + hash_before = compute_package_hash(tmp_path) + + (tmp_path / ".apm-pin").write_text('{"schema_version": 1, "resolved_commit": "deadbeef"}') + hash_after = compute_package_hash(tmp_path) + + assert hash_before == hash_after + + # A nested .apm-pin (never written by the install pipeline) is + # NOT excluded -- defense against using the marker name as a + # blind spot in the integrity hash. + nested = tmp_path / "subdir" + nested.mkdir() + (nested / ".apm-pin").write_text("smuggled bytes") + hash_with_nested = compute_package_hash(tmp_path) + + assert hash_with_nested != hash_after + def test_empty_directory(self, tmp_path): """Empty directory returns a well-known hash.""" empty = tmp_path / "empty"