diff --git a/CHANGELOG.md b/CHANGELOG.md index 15fb9e8ac..f71ed74d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- **Marketplace upstreams: curated pass-through with allow-list governance.** Selectively re-expose plugins from an external marketplace under your own, with build-time commit pinning in `apm.lock.yaml` and Anthropic-conformant emission (no `metadata.apm.*` keys injected). New `upstreams:` block in `apm.yml -> marketplace:`; new `apm marketplace upstream add/list/remove` CLI; `packages[]` entries pick between `source:` (direct) and `upstream:` + `plugin:` (pass-through). APM does **not** re-host content; consumer installs always fetch from the upstream git host. See the [Marketplace upstreams guide](https://microsoft.github.io/apm/guides/marketplace-upstreams/). (#1136) + ## [0.12.2] - 2026-05-05 ### Added diff --git a/docs/astro.config.mjs b/docs/astro.config.mjs index 3a8229bd8..ad8f06b94 100644 --- a/docs/astro.config.mjs +++ b/docs/astro.config.mjs @@ -77,6 +77,7 @@ export default defineConfig({ { label: 'Org-Wide Packages', slug: 'guides/org-packages' }, { label: 'Marketplaces', slug: 'guides/marketplaces' }, { label: 'Marketplace Authoring', slug: 'guides/marketplace-authoring' }, + { label: 'Marketplace upstreams', slug: 'guides/marketplace-upstreams' }, { label: 'CI Policy Enforcement', slug: 'guides/ci-policy-setup' }, { label: 'Agent Workflows (Experimental)', slug: 'guides/agent-workflows' }, ], diff --git a/docs/src/content/docs/enterprise/security.md b/docs/src/content/docs/enterprise/security.md index ccb3ad803..f0ab3f36f 100644 --- a/docs/src/content/docs/enterprise/security.md +++ b/docs/src/content/docs/enterprise/security.md @@ -37,7 +37,9 @@ APM has no runtime footprint. Once `apm install` or `apm compile` completes, the ## Dependency provenance -APM resolves dependencies directly from git repositories. There is no intermediary registry, proxy, or mirror. +APM resolves dependencies directly from git repositories. APM does not run an artifact registry or proxy server, and does not re-host third-party content. + +Curators MAY publish a marketplace that exposes plugins from external upstream marketplaces via the [marketplace upstreams](../../guides/marketplace-upstreams/) feature. Upstreams are a curated allow-list with build-time commit pinning -- not a binary mirror. Consumer installs always fetch plugin content from the original git host; APM never proxies or re-hosts that content. See [Marketplace upstreams: trust model](../../guides/marketplace-upstreams/#trust-model) for the full contract. ### Exact commit pinning diff --git a/docs/src/content/docs/guides/marketplace-authoring.md b/docs/src/content/docs/guides/marketplace-authoring.md index 9d0a10167..acb913e5e 100644 --- a/docs/src/content/docs/guides/marketplace-authoring.md +++ b/docs/src/content/docs/guides/marketplace-authoring.md @@ -481,6 +481,7 @@ Not in the first release. `apm marketplace publish` uses the `gh` CLI and assume ## Related reading - [Marketplaces guide](../marketplaces/) -- consumer-side: registering and installing from a marketplace. +- [Marketplace upstreams](../marketplace-upstreams/) -- expose plugins from external marketplaces with allow-list governance and immutable commit pinning. - [CLI command reference](../../reference/cli-commands/) -- authoritative options for `apm pack` and every `apm marketplace` subcommand. - [Manifest schema](../../reference/manifest-schema/) -- the `apm.yml` shape including the `marketplace:` block. - [Plugins guide](../plugins/) -- what a plugin is and how consumers install one. diff --git a/docs/src/content/docs/guides/marketplace-upstreams.md b/docs/src/content/docs/guides/marketplace-upstreams.md new file mode 100644 index 000000000..42abbed98 --- /dev/null +++ b/docs/src/content/docs/guides/marketplace-upstreams.md @@ -0,0 +1,151 @@ +--- +title: "Marketplace upstreams" +description: Selectively expose plugins from external marketplaces with allow-list governance and immutable commit pinning. +sidebar: + order: 7 +--- + +Upstreams are APM's equivalent of Artifactory remote repositories -- they let your internal marketplace selectively expose plugins from external sources, with allow-list governance and immutable commit pinning, without running an artifact server. + +This guide is for **marketplace curators** who want to re-expose plugins from a third-party marketplace (for example, a public Claude Code marketplace) inside their own marketplace, with control over which plugins are exposed and at what version. If you are authoring a marketplace from scratch, start with the [Authoring a marketplace](../marketplace-authoring/) guide first. + +## Quick start + +Register an external marketplace under a local alias, then expose one of its plugins: + +```bash +# 1. Register the upstream marketplace, pinned to an immutable commit. +# Use a real 40-char SHA (preferred over tags -- tags can be re-pointed). +# Get the current SHA with: git ls-remote https://github.com/owner/repo HEAD +apm marketplace upstream add owner/repo \ + --alias myupstream \ + --ref 0000000000000000000000000000000000000001 + +# 2. Confirm the upstream is registered in apm.yml +apm marketplace upstream list + +# 3. Expose one plugin under your own display name +apm marketplace package add \ + --upstream myupstream \ + --plugin original-plugin-name \ + --name my-plugin + +# 4. Build your marketplace.json +apm pack +``` + +The emitted `marketplace.json` is byte-for-byte Anthropic-conformant -- it does **not** carry any APM-specific keys. Provenance (manifest SHA, resolved plugin SHA, canonical owner) is recorded only in your `apm.lock.yaml` under the `upstreams:` section. + +## Schema + +In `apm.yml`: + +```yaml +marketplace: + upstreams: + - alias: myupstream + repo: owner/repo + path: .claude-plugin/marketplace.json # default + ref: # required for reproducibility + branch: main # used only with allow_head + host: github.com # default + allow_head: false # default; opt-in to mutable refs + packages: + # Direct package + - name: my-skill + source: owner/repo + version: ">=1.0.0" + + # Upstream-sourced package + - name: my-plugin # display name in your marketplace + upstream: myupstream # references upstreams[].alias + plugin: original-plugin-name # name in the upstream marketplace + version: ">=1.0.0" # optional curator override +``` + +`upstream` and `source` are mutually exclusive on a single `packages[]` entry. + +## CLI reference + +| Command | Purpose | +|---|---| +| `apm marketplace upstream add --alias --ref ` | Register an upstream marketplace pinned to a 40-char SHA (recommended) | +| `apm marketplace upstream add --alias --ref v1.2.3` | Pin to an annotated tag (acceptable for stable upstreams; SHA still preferred) | +| `apm marketplace upstream add --alias --branch main --allow-head` | Track a mutable branch -- requires explicit `--allow-head` opt-in (warned every build) | +| `apm marketplace upstream list` | List registered upstreams | +| `apm marketplace upstream remove [--yes]` | Remove an upstream (rejects if any package still references it) | +| `apm marketplace package add --upstream --plugin [--name ...]` | Expose an upstream plugin in your `packages[]` | + +### Tag vs SHA: when to use which + +- **Always prefer a 40-char SHA.** It is content-addressed: even if the upstream force-pushes the branch the tag points at, your build keeps resolving the original tree. +- **Tags are acceptable** when the upstream maintainer has a strong stable-tag discipline (annotated, signed, never moved). APM still resolves the tag to its current SHA at build time and writes the resolved SHA to `apm.lock.yaml` -- so reproducibility holds for that lockfile, but a fresh `add` after the tag moves will resolve to a new SHA. +- **Branches** (`--branch main --allow-head`) are explicitly opt-in. Every build emits a warning, and enterprise policy can reject HEAD-tracking entries entirely. + +## Reproducibility + +Every build pins: + +- The **upstream `marketplace.json` commit SHA** (so the manifest itself can't change under you). +- Each **upstream plugin's resolved commit SHA** (so the plugin source code can't change under you). + +These pins are written to `apm.lock.yaml` under `upstreams:`. Subsequent rebuilds replay from the lock and produce byte-identical output. + +:::note[Planned] +`apm marketplace upstream refresh` is not yet implemented. To advance the pins today, re-run `apm marketplace upstream add` with a new `--ref` value. A dedicated `refresh` command that shows an old-SHA-to-new-SHA diff before committing is planned for a future release. +::: + +## Failure modes + +The builder fails closed on every upstream-resolution problem (exit code `2`) rather than silently skipping. You will see one of these named errors: + +```text +[x] upstream alias 'myupstream' is not declared in marketplace.upstreams +``` + +The `--upstream` value on a `packages[]` entry must reference a declared `upstreams[].alias`. Add the upstream first, or fix the typo in the package entry. + +```text +[x] upstream 'myupstream' canonical name has changed: declared 'old-owner/Repo' but GitHub returns 'new-owner/Repo' (possible repo rename or takeover) +``` + +The repo at the configured owner/repo path no longer matches what your lockfile recorded the last time the upstream was refreshed. Investigate before advancing the pin -- this is the same signal package-confusion attacks produce. + +```text +[x] upstream 'myupstream' resolves to ref 'main' which is a moving branch; pass --allow-head to opt in or pin --ref to a SHA / tag +``` + +You attempted to register or build an upstream against a branch without explicit `--allow-head`. Either pin to an immutable SHA / tag (recommended) or opt in to HEAD-tracking with `--allow-head` and accept the per-build warning. + +## Trust model + +Upstreams are a **curated pass-through**, not a binary mirror. + +| Concern | v1 status | +|---|---| +| Allow-list governance (curator picks which plugins are exposed) | Yes | +| Build-time commit pinning (manifest SHA + plugin SHA in lockfile) | Yes | +| Reproducible curator builds (rebuild from lock = byte-identical output) | Yes | +| Defence against upstream repo rename / takeover | Yes (canonical-owner check) | +| Consumer-side artifact custody | No -- consumer clones from upstream git host at install | +| Resilience to upstream takedown / force-push | No -- consumer install fails if upstream rewrites history | + +Air-gapped re-hosting is **out of scope for v1** and is tracked separately as a future `distribution: rehost` mode. + +## What is NOT supported in v1 + +- **Re-hosting / artifact custody.** Consumer installs always fetch plugin content from the upstream git host. APM never proxies or stores that content. +- **Transitive upstreams.** If marketplace B upstreams marketplace C, you cannot upstream B and inherit C transitively. +- **Cross-host upstreams.** The upstream and its referenced plugins must live on the same git host family (for example, both on github.com). +- **Search.** `apm marketplace upstream list` covers discovery in v1. + +:::note[Planned] +`apm doctor` will gain upstream health checks in a future release: reachability testing, pinned-vs-HEAD drift reporting, and manifest-SHA round-trip verification against the lockfile. +::: + +## Related + +- [Authoring a marketplace](../marketplace-authoring/) -- start here if your `apm.yml` has no `marketplace:` block yet. +- [Marketplaces (consumer)](../marketplaces/) -- registering and consuming external marketplaces from a project. +- [Security and trust](../../enterprise/security/) -- the full APM security model. + diff --git a/docs/src/content/docs/reference/cli-commands.md b/docs/src/content/docs/reference/cli-commands.md index 2360a9a95..5c41a7638 100644 --- a/docs/src/content/docs/reference/cli-commands.md +++ b/docs/src/content/docs/reference/cli-commands.md @@ -1489,18 +1489,22 @@ apm marketplace package add SOURCE [OPTIONS] ``` **Arguments:** -- `SOURCE` - GitHub `owner/repo` reference +- `SOURCE` - GitHub `owner/repo` reference (omit when `--upstream` is used) **Options:** - `--version TEXT` - Semver range constraint (e.g. `">=1.0.0"`) - `--ref TEXT` - Pin to a git ref (SHA, tag, or HEAD). Mutable refs are auto-resolved to SHA - `-d`, `--description TEXT` - Short description for the entry - `-s`, `--subdir TEXT` - Subdirectory inside source repo +- `--upstream ALIAS` - Reference a registered upstream marketplace (mutually exclusive with `SOURCE`/`--subdir`). See [Marketplace Upstreams](../../guides/marketplace-upstreams/). +- `--plugin NAME` - Plugin name in the upstream marketplace (used with `--upstream`; defaults to the entry's name) +- `--name NAME` - Override the displayed package name in your marketplace +- `--allow-head` - Permit upstream-sourced entries that resolve to a moving branch HEAD (warned) - `--include-prerelease` - Include pre-release versions - `--no-verify` - Skip remote repository verification - `--verbose` - Enable verbose output -`--version` and `--ref` are mutually exclusive. When neither is provided, the current `HEAD` SHA is pinned automatically. +`--version` and `--ref` are mutually exclusive. `SOURCE` and `--upstream` are mutually exclusive. When neither `--version` nor `--ref` is provided, the current `HEAD` SHA is pinned automatically. **Examples:** ```bash @@ -1516,6 +1520,45 @@ apm marketplace package add acme/code-review # Add with description and skip verification (requires explicit --ref SHA) apm marketplace package add acme/code-review --ref abc123...40chars \ --description "Code review skill" --no-verify + +# Add a package sourced from a registered upstream marketplace +apm marketplace package add --upstream gitnexus --plugin gitnexus \ + --name acme-gitnexus +``` + +#### `apm marketplace upstream` - Manage upstream marketplaces + +Curate plugins from external Claude Code marketplaces under your own +allow-list. Subcommands edit `apm.yml -> marketplace.upstreams`. See +the [Marketplace Upstreams guide](../../guides/marketplace-upstreams/) +for the trust model and end-to-end example. + +```bash +apm marketplace upstream add OWNER/REPO --alias ALIAS [OPTIONS] +apm marketplace upstream list [--verbose] +apm marketplace upstream remove ALIAS [--yes] +``` + +**`add` options:** +- `--alias TEXT` (required) - Local alias used to reference the upstream from `packages[]` +- `--ref TEXT` - Pin the upstream `marketplace.json` to a SHA or tag (mutable refs auto-resolve to SHA) +- `--branch TEXT` - Track a mutable branch (requires `--allow-head`) +- `--path TEXT` - Path to the upstream `marketplace.json` (default: `.claude-plugin/marketplace.json`) +- `--host TEXT` - Git host (default: `github.com`; supports GHE/GHES) +- `--allow-head` - Permit branch HEAD tracking (warned at build time) + +**Examples:** +```bash +# Register a public upstream pinned to a SHA +apm marketplace upstream add abhigyanpatwari/GitNexus \ + --alias gitnexus \ + --ref abc1234...40chars + +# List registered upstreams +apm marketplace upstream list + +# Remove an upstream (rejects if any package still references it) +apm marketplace upstream remove gitnexus --yes ``` #### `apm marketplace package set` - Update a package entry diff --git a/packages/apm-guide/.apm/skills/apm-usage/commands.md b/packages/apm-guide/.apm/skills/apm-usage/commands.md index 74e769106..2d97bb1f8 100644 --- a/packages/apm-guide/.apm/skills/apm-usage/commands.md +++ b/packages/apm-guide/.apm/skills/apm-usage/commands.md @@ -83,8 +83,12 @@ | `apm marketplace doctor` | Diagnose git, network, auth, and marketplace config readiness | `-v` | | `apm marketplace publish` | Open PRs on consumer repos from `consumer-targets.yml` | `--targets PATH`, `--dry-run`, `--no-pr`, `--draft`, `--allow-downgrade`, `--allow-ref-change`, `--parallel N`, `-y` | | `apm marketplace package add ` | Add a plugin entry to `marketplace.plugins` (source accepts `owner/repo` or `./path`) | `--name`, `--version`, `--ref` (mutable refs auto-resolved to SHA), `-d`/`--description`, `-s`/`--subdir`, `--tag-pattern`, `--tags`, `--include-prerelease`, `--no-verify` | +| `apm marketplace package add --upstream --plugin ` | Expose an upstream marketplace's plugin (mutually exclusive with positional ``) | `--name`, `--version`, `--ref`, `--tag-pattern`, `--tags`, `--include-prerelease`, `--allow-head` | | `apm marketplace package set ` | Update fields on an existing plugin entry | `--version`, `--ref` (mutable refs auto-resolved to SHA), `--description`, `--subdir`, `--tag-pattern`, `--tags`, `--include-prerelease` | | `apm marketplace package remove ` | Remove a plugin entry from `marketplace.plugins` | `--yes` | +| `apm marketplace upstream add --alias ` | Register an upstream marketplace (allow-list governance, immutable commit pinning, no re-hosting) | `--ref` (immutable; mutable refs auto-resolved), `--branch` (requires `--allow-head`), `--path`, `--host`, `--allow-head`, `--no-verify` | +| `apm marketplace upstream list` | List registered upstream marketplaces | `-v` | +| `apm marketplace upstream remove ` | Remove an upstream (rejects when any package still references the alias) | | To build the marketplace, run `apm pack` (it reads `apm.yml` and writes `.claude-plugin/marketplace.json` whenever the `marketplace:` block is present). `apm init --marketplace` is the equivalent shortcut at project-creation time -- it seeds a fresh `apm.yml` with the `marketplace:` block already in place. diff --git a/packages/apm-guide/.apm/skills/apm-usage/package-authoring.md b/packages/apm-guide/.apm/skills/apm-usage/package-authoring.md index cf29ba144..adccb7895 100644 --- a/packages/apm-guide/.apm/skills/apm-usage/package-authoring.md +++ b/packages/apm-guide/.apm/skills/apm-usage/package-authoring.md @@ -284,8 +284,41 @@ marketplace: description: Plugin shipped alongside this repo source: ./plugins/local-tool # local path (no remote fetch) version: 0.1.0 + + upstreams: # optional; expose external-marketplace plugins + - alias: gitnexus # local handle + repo: abhigyanpatwari/GitNexus # upstream marketplace repo + ref: <40-char-sha> # required for reproducibility + # branch: main # alternative to ref; requires allow_head: true + # path: .claude-plugin/marketplace.json # default + # host: github.com # default + # allow_head: false # default; opt-in to mutable refs ``` +Add `packages[]` entries that reference `upstreams[].alias` to expose +specific plugins from an upstream: + +```yaml +marketplace: + upstreams: + - alias: gitnexus + repo: abhigyanpatwari/GitNexus + ref: <40-char-sha> + packages: + - name: acme-gitnexus # display name in your marketplace + upstream: gitnexus # references upstreams[].alias + plugin: gitnexus # name in the upstream marketplace + version: ">=1.0.0" # optional curator override +``` + +`source` and `upstream` are mutually exclusive on a single entry. +Upstreams are a curated allow-list with build-time commit pinning; +APM does NOT re-host upstream content -- consumer installs always +fetch plugin source from the upstream git host. Provenance (manifest +SHA, resolved plugin SHA, canonical owner) is recorded only in +`apm.lock.yaml`; the emitted `marketplace.json` stays +Anthropic-conformant. + Schema rules: - `owner.name` is required. `name`, `description`, `version` are optional inside the block (inherited from apm.yml top level). diff --git a/scripts/test-integration.sh b/scripts/test-integration.sh index bf2be3274..69cf65415 100755 --- a/scripts/test-integration.sh +++ b/scripts/test-integration.sh @@ -552,6 +552,16 @@ run_e2e_tests() { exit 1 fi + log_info "Running marketplace upstream build integration tests..." + echo "Command: pytest tests/integration/marketplace/test_upstream_build_integration.py -v --tb=short" + + if pytest tests/integration/marketplace/test_upstream_build_integration.py -v --tb=short; then + log_success "Marketplace upstream build integration tests passed!" + else + log_error "Marketplace upstream build integration tests failed!" + exit 1 + fi + log_success "All integration test suites completed successfully!" diff --git a/src/apm_cli/commands/marketplace/__init__.py b/src/apm_cli/commands/marketplace/__init__.py index 39d039178..4a40717f5 100644 --- a/src/apm_cli/commands/marketplace/__init__.py +++ b/src/apm_cli/commands/marketplace/__init__.py @@ -87,6 +87,7 @@ class MarketplaceGroup(click.Group): "doctor", "publish", "package", + "upstream", "migrate", ] @@ -206,8 +207,10 @@ def marketplace(ctx): from .plugin import package # noqa: E402 +from .upstream import upstream # noqa: E402 marketplace.add_command(package) +marketplace.add_command(upstream) def _check_gitignore_for_marketplace_json(logger): diff --git a/src/apm_cli/commands/marketplace/plugin/__init__.py b/src/apm_cli/commands/marketplace/plugin/__init__.py index fdbfeca6c..188a64d28 100644 --- a/src/apm_cli/commands/marketplace/plugin/__init__.py +++ b/src/apm_cli/commands/marketplace/plugin/__init__.py @@ -2,7 +2,6 @@ from __future__ import annotations -import re import sys from pathlib import Path @@ -15,10 +14,9 @@ MarketplaceYmlError, # noqa: F401 OfflineMissError, ) +from ....marketplace.ref_resolver import FULL_SHA_RE as _SHA_RE from ..._helpers import _is_interactive # noqa: F401 -_SHA_RE = re.compile(r"^[0-9a-f]{40}$") - def _yml_path() -> Path: """Return the active marketplace authoring config path in CWD.""" diff --git a/src/apm_cli/commands/marketplace/plugin/add.py b/src/apm_cli/commands/marketplace/plugin/add.py index 0a2eb58a9..6f20644c5 100644 --- a/src/apm_cli/commands/marketplace/plugin/add.py +++ b/src/apm_cli/commands/marketplace/plugin/add.py @@ -18,8 +18,8 @@ @package.command(help="Add a package to marketplace authoring config") -@click.argument("source") -@click.option("--name", default=None, help="Package name (default: repo name)") +@click.argument("source", required=False) +@click.option("--name", default=None, help="Package name (default: repo name or plugin name)") @click.option("--version", default=None, help="Semver range (e.g. '>=1.0.0')") @click.option( "--ref", @@ -30,6 +30,17 @@ @click.option("--tag-pattern", default=None, help="Tag pattern (e.g. 'v{version}')") @click.option("--tags", default=None, help="Comma-separated tags") @click.option("--include-prerelease", is_flag=True, help="Include prerelease versions") +@click.option( + "--upstream", + default=None, + help="Expose a plugin from a registered upstream (mutually exclusive with SOURCE)", +) +@click.option( + "--plugin", + default=None, + help="Plugin name in the upstream marketplace (required when --upstream is set)", +) +@click.option("--allow-head", is_flag=True, help="Allow upstream plugin to track a mutable ref") @click.option("--no-verify", is_flag=True, help="Skip remote reachability check") @click.option("--verbose", "-v", is_flag=True, help="Show detailed output") def add( @@ -41,16 +52,42 @@ def add( tag_pattern, tags, include_prerelease, + upstream, + plugin, + allow_head, no_verify, verbose, ): """Add a package entry to marketplace authoring config.""" - from ....marketplace.yml_editor import add_plugin_entry + from ....marketplace.yml_editor import add_plugin_entry, add_upstream_package_entry logger = CommandLogger("marketplace-package-add", verbose=verbose) yml = _ensure_yml_exists(logger) - # --version and --ref are mutually exclusive. + # SOURCE xor --upstream. + if upstream and source: + raise click.UsageError( + "SOURCE and --upstream are mutually exclusive. " + "Use SOURCE for direct git packages, or --upstream for " + "plugins from a registered upstream marketplace." + ) + if not upstream and not source: + raise click.UsageError( + "Provide either a SOURCE (e.g. 'owner/repo') or '--upstream '." + ) + if source and (plugin or allow_head): + raise click.UsageError( + "--plugin and --allow-head only apply to upstream packages (use with --upstream)." + ) + if upstream and plugin is None: + raise click.UsageError( + "--plugin is required when --upstream is set. " + "Specify the plugin name as it appears in the upstream marketplace." + ) + if subdir and upstream: + raise click.UsageError("--subdir only applies to direct packages (not --upstream).") + + # --version and --ref are mutually exclusive on either path. if version and ref: raise click.UsageError( "--version and --ref are mutually exclusive. " @@ -59,11 +96,34 @@ def add( parsed_tags = _parse_tags(tags) - # Verify source reachability unless skipped. + if upstream: + try: + resolved_name = add_upstream_package_entry( + yml, + upstream=upstream, + plugin=plugin, + name=name, + version=version, + ref=ref, + tag_pattern=tag_pattern, + tags=parsed_tags, + include_prerelease=include_prerelease, + allow_head=allow_head, + ) + except MarketplaceYmlError as exc: + logger.error(str(exc), symbol="error") + sys.exit(2) + logger.success( + f"Added package '{resolved_name}' from upstream '{upstream}'", + symbol="check", + ) + logger.info("Next: 'apm pack' to build marketplace.json", symbol="info") + return + + # Direct package path (existing behaviour, unchanged). if not no_verify: _verify_source(logger, source) - # Resolve mutable refs to concrete SHAs. ref = _resolve_ref(logger, source, ref, version, no_verify) try: diff --git a/src/apm_cli/commands/marketplace/upstream.py b/src/apm_cli/commands/marketplace/upstream.py new file mode 100644 index 000000000..8e9ef98d2 --- /dev/null +++ b/src/apm_cli/commands/marketplace/upstream.py @@ -0,0 +1,248 @@ +"""Marketplace upstream subgroup -- click wiring + helpers.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import click + +from ...core.command_logger import CommandLogger +from ...marketplace.errors import GitLsRemoteError, MarketplaceYmlError, OfflineMissError +from ...marketplace.yml_editor import ( + add_upstream_entry, + list_upstream_entries, + remove_upstream_entry, +) +from .plugin import _SHA_RE, _ensure_yml_exists + + +def _build_resolver(repo: str, host: str | None): + """Build a ``RefResolver`` configured for the upstream's host + auth. + + Resolves the per-host token via ``AuthResolver`` so private upstream + repos and GHE/GHES hosts work transparently. Owner is derived from + *repo* (``owner/name``) so org-scoped credentials match. + """ + from ...core.auth import AuthResolver + from ...marketplace.ref_resolver import RefResolver + + target_host = host or "github.com" + org = repo.split("/", 1)[0] if "/" in repo else None + try: + ctx = AuthResolver().resolve(target_host, org=org) + token = ctx.token + except Exception: + token = None + return RefResolver(host=target_host, token=token) + + +@click.group(help="Manage upstream marketplaces in authoring config") +def upstream(): + """Add, list, or remove upstream marketplaces in apm.yml.""" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _verify_upstream_repo(logger: CommandLogger, repo: str, host: str | None) -> None: + """Verify *repo* is reachable via ``git ls-remote``. + + Soft-warns on offline; hard-errors on definitive miss. + """ + resolver = _build_resolver(repo, host) + try: + resolver.list_remote_refs(repo) + except GitLsRemoteError as exc: + logger.error( + f"Upstream '{repo}' is not reachable: {exc}", + symbol="error", + ) + sys.exit(2) + except OfflineMissError: + logger.warning( + f"Cannot verify upstream '{repo}' (offline / no cache).", + symbol="warning", + ) + + +def _resolve_upstream_ref_to_sha( + logger: CommandLogger, + repo: str, + ref: str, + host: str | None, +) -> str: + """Resolve a mutable ref (tag / branch) to a SHA via ls-remote. + + Returns *ref* unchanged when it already looks like a 40-char SHA or + when offline. Hard-errors when ls-remote returns no matching ref. + """ + if _SHA_RE.match(ref): + return ref + + resolver = _build_resolver(repo, host) + try: + refs = resolver.list_remote_refs(repo) + except OfflineMissError: + logger.warning( + f"Offline: cannot resolve ref '{ref}' for {repo}; storing as-is.", + symbol="warning", + ) + return ref + except GitLsRemoteError as exc: + logger.error( + f"Failed to resolve ref '{ref}' for {repo}: {exc}", + symbol="error", + ) + sys.exit(2) + + for entry in refs: + if entry.name in ( + f"refs/tags/{ref}", + f"refs/heads/{ref}", + ref, + ): + return entry.sha + logger.error( + f"Ref '{ref}' not found in {repo}.", + symbol="error", + ) + sys.exit(2) + + +# --------------------------------------------------------------------------- +# upstream add +# --------------------------------------------------------------------------- + + +@upstream.command(help="Register an upstream marketplace") +@click.argument("repo", required=True) +@click.option( + "--alias", required=True, help="Local alias for the upstream (used to reference plugins)" +) +@click.option( + "--ref", + default=None, + help="Pin to an immutable ref (40-char SHA or tag). Mutable refs are auto-resolved.", +) +@click.option("--branch", default=None, help="Track a mutable branch (requires --allow-head)") +@click.option( + "--path", + default=None, + help="Manifest path inside the upstream repo (default: .claude-plugin/marketplace.json)", +) +@click.option("--host", default=None, help="Git host FQDN (default: github.com)") +@click.option("--allow-head", is_flag=True, help="Permit a mutable branch (HEAD-tracking)") +@click.option("--no-verify", is_flag=True, help="Skip remote reachability check") +@click.option("--verbose", "-v", is_flag=True, help="Show detailed output") +def add(repo, alias, ref, branch, path, host, allow_head, no_verify, verbose): + """Add an upstream marketplace entry to authoring config.""" + logger = CommandLogger("marketplace-upstream-add", verbose=verbose) + yml = _ensure_yml_exists(logger) + + if ref and branch: + raise click.UsageError("--ref and --branch are mutually exclusive.") + if not ref and not branch: + raise click.UsageError("Specify either --ref (immutable) or --branch (with --allow-head).") + if branch and not allow_head: + raise click.UsageError( + "--branch requires --allow-head to acknowledge the mutable-pin trade-off." + ) + + if not no_verify: + _verify_upstream_repo(logger, repo, host) + + if ref is not None: + ref = _resolve_upstream_ref_to_sha(logger, repo, ref, host) + + try: + add_upstream_entry( + Path(yml), + alias=alias, + repo=repo, + ref=ref, + branch=branch, + path=path, + host=host, + allow_head=allow_head, + ) + except MarketplaceYmlError as exc: + logger.error(str(exc), symbol="error") + sys.exit(2) + + logger.success( + f"Registered upstream '{alias}' -> {repo}", + symbol="check", + ) + logger.info( + f"Next: 'apm marketplace package add --upstream {alias} --plugin '", + symbol="info", + ) + + +# --------------------------------------------------------------------------- +# upstream list +# --------------------------------------------------------------------------- + + +@upstream.command("list", help="List registered upstream marketplaces") +@click.option("--verbose", "-v", is_flag=True, help="Show detailed output") +def list_cmd(verbose): + """List upstream marketplaces.""" + logger = CommandLogger("marketplace-upstream-list", verbose=verbose) + yml = _ensure_yml_exists(logger) + entries = list_upstream_entries(Path(yml)) + + if not entries: + logger.info( + "No upstream marketplaces registered. " + "Run 'apm marketplace upstream add --alias --ref ' to add one.", + symbol="info", + ) + return + + noun = "upstream" if len(entries) == 1 else "upstreams" + logger.info(f"{len(entries)} {noun} registered:", symbol="info") + for entry in entries: + alias = entry.get("alias", "") + repo = entry.get("repo", "") + host = entry.get("host", "github.com") + pin = entry.get("ref") or entry.get("branch", "") + head = " (HEAD-tracking)" if entry.get("allow_head") else "" + logger.info(f" {alias} -> {host}/{repo} @ {pin}{head}", symbol="info") + + +# --------------------------------------------------------------------------- +# upstream remove +# --------------------------------------------------------------------------- + + +@upstream.command(help="Remove an upstream marketplace") +@click.argument("alias", required=True) +@click.option("--yes", "-y", is_flag=True, help="Skip confirmation prompt") +@click.option("--verbose", "-v", is_flag=True, help="Show detailed output") +def remove(alias, yes, verbose): + """Remove an upstream marketplace from authoring config.""" + logger = CommandLogger("marketplace-upstream-remove", verbose=verbose) + yml = _ensure_yml_exists(logger) + # Validate the alias exists before prompting -- unknown alias always + # exits 2 without a confirm prompt (prevents misleading UX). + try: + remove_upstream_entry(Path(yml), alias, dry_run=True) + except MarketplaceYmlError as exc: + logger.error(str(exc), symbol="error") + sys.exit(2) + if not yes: + click.confirm(f"Remove upstream '{alias}'?", abort=True) + try: + remove_upstream_entry(Path(yml), alias) + except MarketplaceYmlError as exc: + logger.error(str(exc), symbol="error") + sys.exit(2) + + logger.success(f"Removed upstream '{alias}'") + + +__all__ = ["add", "list_cmd", "remove", "upstream"] diff --git a/src/apm_cli/commands/pack.py b/src/apm_cli/commands/pack.py index cdf543884..bb70aca31 100644 --- a/src/apm_cli/commands/pack.py +++ b/src/apm_cli/commands/pack.py @@ -213,15 +213,19 @@ def _render_marketplace_result(logger, report, dry_run, extra_warnings=None): logger.warning(warn_msg) for warn_msg in report.warnings: logger.warning(warn_msg) + total_packages = len(report.resolved) + len(report.upstream_resolved) if dry_run or report.dry_run: logger.dry_run_notice( - f"Would write marketplace.json ({len(report.resolved)} package(s)) " - f"-> {report.output_path}" + f"Would write marketplace.json ({total_packages} package(s)) -> {report.output_path}" ) return - logger.success( - f"Built marketplace.json ({len(report.resolved)} package(s)) -> {report.output_path}" - ) + direct_count = len(report.resolved) + upstream_count = len(report.upstream_resolved) + if upstream_count > 0: + count_str = f"{direct_count} direct + {upstream_count} upstream" + else: + count_str = f"{total_packages} package(s)" + logger.success(f"Built marketplace.json ({count_str}) -> {report.output_path}") @click.command( diff --git a/src/apm_cli/deps/lockfile.py b/src/apm_cli/deps/lockfile.py index e2c20333c..90fb8d63f 100644 --- a/src/apm_cli/deps/lockfile.py +++ b/src/apm_cli/deps/lockfile.py @@ -225,6 +225,95 @@ def to_dependency_ref(self) -> DependencyReference: ) +@dataclass +class LockedUpstream: + """Provenance record for an upstream-sourced marketplace package. + + Captures the full chain needed to verify a curator-controlled + pass-through emission: which external marketplace was queried, at + which immutable manifest commit, and which plugin commit was + selected. Stored under :attr:`LockFile.upstreams` keyed by alias. + """ + + alias: str + host: str + owner: str + repo: str + path: str + manifest_sha: str + canonical_full_name: str | None = None + refreshed_at: str | None = None + # plugins[upstream_name] = LockedUpstreamPlugin + plugins: dict[str, "LockedUpstreamPlugin"] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + result: dict[str, Any] = { + "host": self.host, + "owner": self.owner, + "repo": self.repo, + "path": self.path, + "manifest_sha": self.manifest_sha, + } + if self.canonical_full_name: + result["canonical_full_name"] = self.canonical_full_name + if self.refreshed_at: + result["refreshed_at"] = self.refreshed_at + if self.plugins: + result["plugins"] = { + name: plugin.to_dict() for name, plugin in sorted(self.plugins.items()) + } + return result + + @classmethod + def from_dict(cls, alias: str, data: dict[str, Any]) -> "LockedUpstream": + plugins_raw = data.get("plugins") or {} + plugins = { + name: LockedUpstreamPlugin.from_dict(name, body) for name, body in plugins_raw.items() + } + return cls( + alias=alias, + host=data.get("host", ""), + owner=data.get("owner", ""), + repo=data.get("repo", ""), + path=data.get("path", ""), + manifest_sha=data.get("manifest_sha", ""), + canonical_full_name=data.get("canonical_full_name"), + refreshed_at=data.get("refreshed_at"), + plugins=plugins, + ) + + +@dataclass +class LockedUpstreamPlugin: + """A single curator-exposed plugin pinned from an upstream marketplace.""" + + upstream_name: str # plugin name as it appears in the upstream manifest + emitted_as: str # display name in the curator's marketplace.json + # ``None`` is a valid pin only when ``pin_source == "branch-head"`` + # (allow_head opt-in flow). The lockfile round-trips it as ``null``; + # ``from_dict`` preserves that semantics rather than coercing to ``""``. + resolved_sha: str | None + resolved_source: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + result: dict[str, Any] = { + "emitted_as": self.emitted_as, + "resolved_sha": self.resolved_sha, + } + if self.resolved_source: + result["resolved_source"] = dict(sorted(self.resolved_source.items())) + return result + + @classmethod + def from_dict(cls, upstream_name: str, data: dict[str, Any]) -> "LockedUpstreamPlugin": + return cls( + upstream_name=upstream_name, + emitted_as=data.get("emitted_as", upstream_name), + resolved_sha=data.get("resolved_sha"), + resolved_source=dict(data.get("resolved_source") or {}), + ) + + @dataclass class LockFile: """APM lock file for reproducible dependency resolution.""" @@ -237,6 +326,7 @@ class LockFile: mcp_configs: dict[str, dict] = field(default_factory=dict) local_deployed_files: list[str] = field(default_factory=list) local_deployed_file_hashes: dict[str, str] = field(default_factory=dict) + upstreams: dict[str, LockedUpstream] = field(default_factory=dict) def add_dependency(self, dep: LockedDependency) -> None: """Add a dependency to the lock file.""" @@ -283,6 +373,10 @@ def to_yaml(self) -> str: data["local_deployed_file_hashes"] = dict( sorted(self.local_deployed_file_hashes.items()) ) + if self.upstreams: + data["upstreams"] = { + alias: upstream.to_dict() for alias, upstream in sorted(self.upstreams.items()) + } from ..utils.yaml_io import yaml_to_str return yaml_to_str(data) @@ -309,6 +403,13 @@ def from_yaml(cls, yaml_str: str) -> "LockFile": lock.mcp_configs = dict(data.get("mcp_configs") or {}) lock.local_deployed_files = list(data.get("local_deployed_files", [])) lock.local_deployed_file_hashes = dict(data.get("local_deployed_file_hashes") or {}) + upstreams_raw = data.get("upstreams") or {} + if isinstance(upstreams_raw, dict): + lock.upstreams = { + alias: LockedUpstream.from_dict(alias, body) + for alias, body in upstreams_raw.items() + if isinstance(body, dict) + } # Synthesize a virtual self-entry representing the project's own # local content. This unifies traversal across "real" dependencies # and the local package, without changing the on-disk YAML shape. diff --git a/src/apm_cli/marketplace/builder.py b/src/apm_cli/marketplace/builder.py index 2d411ac3f..37b877333 100644 --- a/src/apm_cli/marketplace/builder.py +++ b/src/apm_cli/marketplace/builder.py @@ -19,7 +19,6 @@ import json import logging -import re import urllib.error import urllib.request from collections import OrderedDict @@ -43,10 +42,22 @@ OfflineMissError, # noqa: F401 RefNotFoundError, ) +from .models import parse_marketplace_json from .ref_resolver import RefResolver, RemoteRef # noqa: F401 from .semver import SemVer, parse_semver, satisfies_range from .tag_pattern import build_tag_regex, render_tag # noqa: F401 -from .yml_schema import MarketplaceYml, PackageEntry, load_marketplace_yml +from .upstream_cache import UpstreamCache +from .upstream_resolver import ( + ResolvedUpstreamPackage, + UpstreamResolver, + UpstreamResolverDiagnostic, +) +from .yml_schema import ( + MarketplaceYml, + PackageEntry, + UpstreamPackageEntry, + load_marketplace_yml, +) logger = logging.getLogger(__name__) @@ -66,10 +77,17 @@ @dataclass(frozen=True) class BuildDiagnostic: - """Structured diagnostic emitted during marketplace.json composition.""" + """Structured diagnostic emitted during marketplace.json composition. - level: str # "warning" | "verbose" + Levels: ``"verbose"`` (info-only), ``"warning"`` (non-fatal), + ``"error"`` (fatal -- ``build()`` will raise BuildError before + writing). Optional ``code`` is a stable identifier for upstream + diagnostics so callers can match without parsing message text. + """ + + level: str # "verbose" | "warning" | "error" message: str + code: str = "" @dataclass(frozen=True) @@ -92,6 +110,8 @@ class ResolveResult: entries: tuple[ResolvedPackage, ...] errors: tuple[tuple[str, str], ...] # (package name, error message) pairs + upstream_entries: tuple[ResolvedUpstreamPackage, ...] = () + upstream_diagnostics: tuple[UpstreamResolverDiagnostic, ...] = () @property def ok(self) -> bool: @@ -113,6 +133,8 @@ class BuildReport: removed_count: int = 0 output_path: Path = field(default_factory=lambda: Path(".")) dry_run: bool = False + upstream_resolved: tuple[ResolvedUpstreamPackage, ...] = () + upstream_diagnostics: tuple[UpstreamResolverDiagnostic, ...] = () @dataclass @@ -133,8 +155,9 @@ class BuildOptions: # Builder # --------------------------------------------------------------------------- -# 40-char hex SHA pattern -_SHA40_RE = re.compile(r"^[0-9a-f]{40}$") +# 40-char hex SHA pattern -- shared across marketplace modules; see +# ``marketplace/ref_resolver.FULL_SHA_RE``. +from .ref_resolver import FULL_SHA_RE as _SHA40_RE # noqa: E402 # Version range indicators -- if a version string starts with any of these # or contains spaces, it's a resolution constraint, not a display override. @@ -195,6 +218,148 @@ def _subtract_plugin_root(source: str, plugin_root: str) -> str: return "./" + result +class _UpstreamResolverFactory: + """Build an :class:`UpstreamResolver` wired to a builder context. + + Extracted from :meth:`MarketplaceBuilder._build_upstream_resolver` + to flatten what was previously a 100+ line closure stack. Owns: + + - the per-host :class:`RefResolver` cache (lazy, never inherits the + curator's auth context across hosts); + - the ``ref_to_sha`` resolution path (SHA short-circuit, offline + guard, ls-remote-backed tag/branch resolution); + - the ``version_range`` semver-pattern resolution path. + + Lifetime is one build invocation. Holding the cache as instance + state (rather than a closure cell) makes the dependencies between + the three resolver functions explicit. + """ + + def __init__(self, builder: MarketplaceBuilder, yml: MarketplaceYml) -> None: + self._builder = builder + self._yml = yml + self._host_resolvers: dict[str, RefResolver] = {} + # Populated by _build_upstream_resolver from the existing lockfile. + # Maps ``owner/repo`` to the previously resolved manifest SHA so that + # offline rebuilds (``BuildOptions.offline=True``) can replay the + # cached SHA without a network round-trip to re-resolve a tag/branch. + self._lockfile_shas_by_repo: dict[str, str] = {} + + # -- internal: per-host RefResolver cache ----------------------------------- + + def _resolver_for_host(self, host: str) -> RefResolver: + if host not in self._host_resolvers: + if host == self._builder._host: + self._host_resolvers[host] = self._builder._get_resolver() + else: + # v1: only github.com is supported. Construct an + # unauthenticated RefResolver so we never leak the + # curator's PAT to a foreign host. + self._host_resolvers[host] = RefResolver( + timeout_seconds=self._builder._options.timeout_seconds, + offline=self._builder._options.offline, + host=host, + token=None, + ) + return self._host_resolvers[host] + + # -- ref resolution --------------------------------------------------------- + + def ref_to_sha(self, host: str, owner: str, repo: str, ref_or_branch: str) -> str: + # SHA short-circuit -- offline-safe, no network. + if _SHA40_RE.match(ref_or_branch): + return ref_or_branch + owner_repo = f"{owner}/{repo}" + # Lockfile replay: when offline and we have a previously resolved SHA + # for this repo (loaded at build start), return it without a network + # call. Enables ``--offline`` rebuilds for tag/branch refs. + if self._builder._options.offline: + known = self._lockfile_shas_by_repo.get(owner_repo) + if known: + return known + raise BuildError(f"cannot resolve ref '{ref_or_branch}' offline for {owner}/{repo}") + resolver = self._resolver_for_host(host) + refs = resolver.list_remote_refs(owner_repo) + for remote in refs: + if remote.name == f"refs/tags/{ref_or_branch}": + return remote.sha + if remote.name == f"refs/heads/{ref_or_branch}": + # B1: branch refs are mutable. Reject unless allow_head is + # enabled at the build level; callers that need to track HEAD + # explicitly set BuildOptions.allow_head=True. + if not self._builder._options.allow_head: + raise BuildError( + f"ref '{ref_or_branch}' resolves to a mutable branch HEAD; " + f"set allow_head: true or pin a SHA/tag for reproducible builds." + ) + return remote.sha + raise BuildError(f"ref '{ref_or_branch}' not found in {owner}/{repo}") + + # -- version-range resolution ---------------------------------------------- + + def version_range( + self, + host: str, + owner: str, + repo: str, + semver_range: str, + *, + tag_pattern: str | None = None, + include_prerelease: bool = False, + ) -> tuple[str, str]: + resolver = self._resolver_for_host(host) + owner_repo = f"{owner}/{repo}" + pattern = tag_pattern or self._yml.build.tag_pattern + tag_rx = build_tag_regex(pattern) + refs = resolver.list_remote_refs(owner_repo) + candidates: list[tuple[SemVer, str, str]] = [] + for remote in refs: + if not remote.name.startswith("refs/tags/"): + continue + tag_name = remote.name[len("refs/tags/") :] + m = tag_rx.match(tag_name) + if not m: + continue + version_str = m.group("version") + sv = parse_semver(version_str) + if sv is None: + continue + if sv.is_prerelease and not ( + include_prerelease or self._builder._options.include_prerelease + ): + continue + if satisfies_range(sv, semver_range): + candidates.append((sv, tag_name, remote.sha)) + if not candidates: + raise BuildError( + f"no version of {owner}/{repo} matches '{semver_range}' (pattern='{pattern}')" + ) + candidates.sort(key=lambda c: c[0], reverse=True) + _, best_tag, best_sha = candidates[0] + return best_tag, best_sha + + # -- assembly --------------------------------------------------------------- + + def build(self) -> UpstreamResolver: + upstreams_by_alias = {u.alias: u for u in self._yml.upstreams} + # ``canonical_full_name`` is intentionally ``None`` in v1. + # The rename-guard (checking that the fetched repo full_name still + # matches the lockfile's recorded name) requires a GitHub Contents API + # call for which no existing v1 helper exists. The lockfile already + # records the field for forward-compatibility; the guard fires in v2 + # once the helper is wired. This deferral is documented in the trust + # table in docs/guides/marketplace-upstreams.md. + return UpstreamResolver( + upstreams=upstreams_by_alias, + cache=UpstreamCache(), + ref_to_sha=self.ref_to_sha, + canonical_full_name=None, + version_range_resolver=self.version_range, + auth_resolver=self._builder._auth_resolver, + offline=self._builder._options.offline, + ) + + class MarketplaceBuilder: """Load marketplace.yml, resolve refs, compose and write marketplace.json. @@ -486,60 +651,150 @@ def _resolve_version_range( def resolve(self) -> ResolveResult: """Resolve every entry concurrently. + Direct (:class:`PackageEntry`) and upstream-sourced + (:class:`UpstreamPackageEntry`) entries are partitioned by + ``isinstance``. Direct entries follow the existing concurrent + ``git ls-remote`` path; upstream entries are resolved through + :class:`UpstreamResolver`, which owns its own cache, atomic-fetch + invariant, and rename guard. + Returns ------- ResolveResult - Contains resolved entries and any errors encountered. + Contains resolved direct entries, resolved upstream entries, + any per-package errors, and any structured upstream + diagnostics. Raises ------ BuildError - On any resolution failure (unless ``continue_on_error``). + On any direct-resolution failure (unless ``continue_on_error``). + Upstream failures are always collected as diagnostics and + error rows; ``build()`` raises ``BuildError`` before writing + output when any error diagnostics are present. """ yml = self._load_yml() - entries = yml.packages - if not entries: + all_entries = yml.packages + if not all_entries: return ResolveResult(entries=(), errors=()) + direct_entries: list[tuple[int, PackageEntry]] = [] + upstream_entries: list[tuple[int, UpstreamPackageEntry]] = [] + for idx, entry in enumerate(all_entries): + if isinstance(entry, UpstreamPackageEntry): + upstream_entries.append((idx, entry)) + else: + direct_entries.append((idx, entry)) + results: dict[int, ResolvedPackage] = {} errors: list[tuple[str, str]] = [] - # Eagerly resolve auth + create the shared RefResolver before - # spawning workers -- avoids a race on _ensure_auth() and - # matches the pattern used in _prefetch_metadata(). - self._get_resolver() - - with ThreadPoolExecutor(max_workers=min(self._options.concurrency, len(entries))) as pool: - future_to_index = { - pool.submit(self._resolve_entry, entry): idx for idx, entry in enumerate(entries) - } - for future in as_completed(future_to_index): - idx = future_to_index[future] - entry = entries[idx] - try: - resolved = future.result(timeout=self._options.timeout_seconds) - results[idx] = resolved - except BuildError as exc: - if self._options.continue_on_error: - errors.append((entry.name, str(exc))) - else: - raise - except Exception as exc: - logger.debug("Unexpected error resolving '%s'", entry.name, exc_info=True) - if self._options.continue_on_error: - errors.append((entry.name, str(exc))) - else: - raise BuildError( - f"Unexpected error resolving '{entry.name}': {exc}", - package=entry.name, - ) from exc - - # Return in yml order - ordered: list[ResolvedPackage] = [] - for idx in range(len(entries)): + # -- direct path (existing concurrent ls-remote flow) --------------- + if direct_entries: + # Eagerly resolve auth + create the shared RefResolver before + # spawning workers -- avoids a race on _ensure_auth() and + # matches the pattern used in _prefetch_metadata(). + self._get_resolver() + + with ThreadPoolExecutor( + max_workers=min(self._options.concurrency, len(direct_entries)) + ) as pool: + future_to_index = { + pool.submit(self._resolve_entry, entry): idx for idx, entry in direct_entries + } + for future in as_completed(future_to_index): + idx = future_to_index[future] + entry = all_entries[idx] + try: + resolved = future.result(timeout=self._options.timeout_seconds) + results[idx] = resolved + except BuildError as exc: + if self._options.continue_on_error: + errors.append((entry.name, str(exc))) + else: + raise + except Exception as exc: + logger.debug("Unexpected error resolving '%s'", entry.name, exc_info=True) + if self._options.continue_on_error: + errors.append((entry.name, str(exc))) + else: + raise BuildError( + f"Unexpected error resolving '{entry.name}': {exc}", + package=entry.name, + ) from exc + + # -- direct results ordered by yml index --------------------------- + ordered_direct: list[ResolvedPackage] = [] + for idx in range(len(all_entries)): if idx in results: - ordered.append(results[idx]) - return ResolveResult(entries=tuple(ordered), errors=tuple(errors)) + ordered_direct.append(results[idx]) + + # -- upstream path ------------------------------------------------- + upstream_resolved: list[ResolvedUpstreamPackage] = [] + upstream_diagnostics: list[UpstreamResolverDiagnostic] = [] + if upstream_entries: + resolver = self._build_upstream_resolver(yml) + entries_only = [e for _, e in upstream_entries] + upstream_resolved, upstream_diagnostics = resolver.resolve_all(entries_only) + # Lift error-level upstream diagnostics into the errors list + # so the orchestrator and ``build()`` can react uniformly. + for diag in upstream_diagnostics: + if diag.level == "error": + label = diag.plugin_name or diag.upstream_alias or "upstream" + errors.append((label, diag.message)) + + return ResolveResult( + entries=tuple(ordered_direct), + errors=tuple(errors), + upstream_entries=tuple(upstream_resolved), + upstream_diagnostics=tuple(upstream_diagnostics), + ) + + # -- upstream resolver wiring ------------------------------------------- + + def _build_upstream_resolver(self, yml: MarketplaceYml) -> UpstreamResolver: + """Construct an :class:`UpstreamResolver` for this build. + + v1 constraint: only ``github.com`` upstreams are wired; entries + targeting any other host yield an error diagnostic from + :meth:`UpstreamResolver.resolve_all`. Cross-host fan-out is + slated for v2. + + Implementation note: the per-host ``RefResolver`` cache and the + ref/version resolver functions used to live inside this method + as a 100+ line closure stack. They are now factored into + :class:`_UpstreamResolverFactory` for readability and + testability; this method remains the single assembly point. + """ + factory = _UpstreamResolverFactory(self, yml) + # Seed the factory with previously resolved manifest SHAs from the + # lockfile. This allows offline rebuilds to replay known SHAs for + # tag/branch refs without making network calls. + factory._lockfile_shas_by_repo.update(self._load_lockfile_upstream_shas()) + return factory.build() + + def _load_lockfile_upstream_shas(self) -> dict[str, str]: + """Return ``{owner/repo: manifest_sha}`` from the current lockfile. + + Best-effort: any read/parse error returns an empty dict, falling + back to online resolution. Only 40-char SHAs are returned (tags and + branch names stored in old lockfiles are skipped). + """ + try: + from ..deps.lockfile import LockFile, get_lockfile_path + + lockfile_path = get_lockfile_path(self._project_root) + if not lockfile_path.exists(): + return {} + lock = LockFile.load_or_create(lockfile_path) + shas: dict[str, str] = {} + for lu in lock.upstreams.values(): + owner_repo = f"{lu.owner}/{lu.repo}" + if lu.manifest_sha and _SHA40_RE.match(lu.manifest_sha): + shas[owner_repo] = lu.manifest_sha + return shas + except Exception: + return {} # -- remote description fetcher ----------------------------------------- @@ -694,7 +949,12 @@ def _prefetch_metadata(self, resolved: list[ResolvedPackage]) -> dict[str, dict[ # -- composition -------------------------------------------------------- - def compose_marketplace_json(self, resolved: list[ResolvedPackage]) -> dict[str, Any]: + def compose_marketplace_json( + self, + resolved: list[ResolvedPackage], + *, + upstream_resolved: list[ResolvedUpstreamPackage] | None = None, + ) -> dict[str, Any]: """Produce an Anthropic-compliant marketplace.json dict. All APM-only fields are stripped. Key order follows the Anthropic @@ -703,7 +963,12 @@ def compose_marketplace_json(self, resolved: list[ResolvedPackage]) -> dict[str, Parameters ---------- resolved: - List of resolved packages (from ``resolve()``). + List of resolved direct packages (from ``resolve()``). + upstream_resolved: + Optional list of resolved upstream-sourced packages. Emitted + after ``resolved`` in the output ``plugins`` array. v1 does + not interleave upstream and direct entries; consumers see + direct emissions first, then upstream pass-throughs. Returns ------- @@ -717,7 +982,9 @@ def compose_marketplace_json(self, resolved: list[ResolvedPackage]) -> dict[str, # Build a name -> entry map so we can reach back for local-package # description / homepage that came from the yml itself. - entry_by_name: dict[str, PackageEntry] = {e.name: e for e in yml.packages} + entry_by_name: dict[str, PackageEntry] = { + e.name: e for e in yml.packages if not isinstance(e, UpstreamPackageEntry) + } doc: dict[str, Any] = OrderedDict() doc["name"] = yml.name @@ -874,6 +1141,11 @@ def compose_marketplace_json(self, resolved: list[ResolvedPackage]) -> dict[str, plugins.append(plugin) + # -- upstream-sourced plugins -- + if upstream_resolved: + for rup in upstream_resolved: + plugins.append(self._emit_upstream_plugin(rup)) + # Verbose summary line summary_parts: list[str] = [] if plugin_root and strip_count > 0: @@ -918,6 +1190,76 @@ def compose_marketplace_json(self, resolved: list[ResolvedPackage]) -> dict[str, doc["plugins"] = plugins return doc + # -- upstream emission -------------------------------------------------- + + def _emit_upstream_plugin(self, rup: ResolvedUpstreamPackage) -> dict[str, Any]: + """Emit a single upstream-sourced plugin entry. + + Hard rules: + + * Output is byte-for-byte Anthropic-conformant; **no APM-specific + keys** (no ``metadata.apm.*``). Provenance lives in + ``apm.lock.yaml`` only. + * Curator overrides on :class:`UpstreamPackageEntry` win over + upstream values for ``description``, ``version``, and ``tags``. + ``author``/``license``/``repository``/``homepage`` are + curator-only (the strict upstream parser does not surface + these fields, so there is nothing to fall back to). + * Source emission shape matches the direct-package emit shape so + ``parse_marketplace_json`` round-trips cleanly: outer + ``source`` key with inner ``source`` discriminator + (``"github"`` or ``"git-subdir"``), ``url`` for git-subdir. + """ + entry = rup.entry + plugin: dict[str, Any] = OrderedDict() + plugin["name"] = entry.name + + # description: curator override > upstream plugin's value + description = entry.description if entry.description else rup.plugin.description + if description: + plugin["description"] = description + + # version: curator override (if a display version) > upstream value + if entry.version and _is_display_version(entry.version): + plugin["version"] = entry.version + elif rup.plugin.version: + plugin["version"] = rup.plugin.version + + # author / license / repository (curator-only -- StrictPlugin + # does not carry these). Mirrors direct-emit behaviour. + if entry.author: + plugin["author"] = dict(entry.author) + if entry.license: + plugin["license"] = entry.license + if entry.repository: + plugin["repository"] = entry.repository + + # tags: curator override > upstream plugin tags + tags = entry.tags or rup.plugin.tags + if tags: + plugin["tags"] = list(tags) + + if entry.homepage: + plugin["homepage"] = entry.homepage + + # source: shape matches direct emission so consumers using + # ``parse_marketplace_json`` see no difference. + source_obj: dict[str, Any] = OrderedDict() + if rup.plugin_subdir: + source_obj["source"] = "git-subdir" + source_obj["url"] = rup.plugin_repo + source_obj["path"] = rup.plugin_subdir + else: + source_obj["source"] = "github" + source_obj["repo"] = rup.plugin_repo + if rup.plugin_ref: + source_obj["ref"] = rup.plugin_ref + if rup.plugin_sha: + source_obj["sha"] = rup.plugin_sha + plugin["source"] = source_obj + + return plugin + # -- diff --------------------------------------------------------------- @staticmethod @@ -1001,22 +1343,98 @@ def _load_existing_json(self, path: Path) -> dict[str, Any] | None: # -- full pipeline ------------------------------------------------------ def build(self) -> BuildReport: - """Full pipeline: load -> resolve -> compose -> write. + """Full pipeline: load -> resolve -> compose -> validate -> write. Returns ------- BuildReport - Summary including diff statistics. + Summary including diff statistics and (when present) upstream + resolution diagnostics. + + Raises + ------ + BuildError + When any error-level diagnostic is present (resolution + failure, upstream rejection, round-trip mismatch). No + ``marketplace.json`` is written when an error is raised -- + ``continue_on_error`` only governs whether multiple errors + are collected before raising; it never permits writing + broken output. """ result = self.resolve() resolved = list(result.entries) + upstream_resolved = list(result.upstream_entries) errors = result.errors - new_json = self.compose_marketplace_json(resolved) + new_json = self.compose_marketplace_json( + resolved, + upstream_resolved=upstream_resolved, + ) build_warnings = getattr(self, "_compose_warnings", ()) - build_diagnostics = getattr(self, "_compose_diagnostics", ()) + build_diagnostics = list(getattr(self, "_compose_diagnostics", ())) + + # Lift upstream resolver diagnostics into the structured + # diagnostics so existing CLI rendering picks them up. Error and + # warning levels survive; codes are preserved. Warning-level + # entries are also lifted into ``build_warnings`` so the existing + # ``logger.warning()`` rendering path in ``pack.py`` surfaces + # them. Status symbols are NOT baked into the message; the + # rendering layer adds them via ``CommandLogger`` based on level. + warning_messages: list[str] = list(build_warnings) + for diag in result.upstream_diagnostics: + label_parts: list[str] = [] + if diag.upstream_alias: + label_parts.append(f"upstream '{diag.upstream_alias}'") + if diag.plugin_name: + label_parts.append(f"plugin '{diag.plugin_name}'") + label = " / ".join(label_parts) + message = f"{label}: {diag.message}" if label else diag.message + build_diagnostics.append( + BuildDiagnostic(level=diag.level, message=message, code=diag.code) + ) + if diag.level == "warning": + warning_messages.append(message) + build_warnings = warning_messages + output_path = self._output_path() + # -- fail-closed gate before writing ------------------------------- + # Upstream resolution failures must NEVER produce a published + # marketplace.json (they signal supply-chain or governance + # breaks: missing alias, rename detected, unsupported source + # shape). Direct-package errors continue to honour the existing + # ``continue_on_error`` semantics (skip the failed entry, emit + # the rest). + upstream_errors = [d for d in build_diagnostics if d.level == "error"] + if upstream_errors: + if self._resolver is not None: + self._resolver.close() + headline = upstream_errors[0].message + extra = len(upstream_errors) - 1 + summary = headline if extra == 0 else f"{headline} (and {extra} more)" + raise BuildError(f"Build failed: {summary}") + + # -- round-trip validation invariant ------------------------------- + # Every emitted plugin must survive the lenient consumer parser + # used by ``apm browse``/``apm install`` resolution. Catches + # silent-skip discrepancies between the strict emission path and + # the consumer parser before consumers ever see the output. + try: + roundtrip = parse_marketplace_json(new_json) + except Exception as exc: + if self._resolver is not None: + self._resolver.close() + raise BuildError(f"Round-trip parse of emitted marketplace.json failed: {exc}") from exc + emitted_plugin_count = len(new_json.get("plugins", [])) + roundtrip_count = len(roundtrip.plugins) + if roundtrip_count != emitted_plugin_count: + if self._resolver is not None: + self._resolver.close() + raise BuildError( + f"Round-trip parse dropped plugins: emitted " + f"{emitted_plugin_count}, parsed {roundtrip_count}" + ) + # Load existing for diff old_json = self._load_existing_json(output_path) unchanged, added, updated, removed = self._compute_diff(old_json, new_json) @@ -1026,6 +1444,7 @@ def build(self) -> BuildReport: output_path.parent.mkdir(parents=True, exist_ok=True) content = self._serialize_json(new_json) self._atomic_write(output_path, content) + self._write_upstream_lockfile(upstream_resolved) # Cleanup resolver if self._resolver is not None: @@ -1042,8 +1461,80 @@ def build(self) -> BuildReport: removed_count=removed, output_path=output_path, dry_run=self._options.dry_run, + upstream_resolved=tuple(upstream_resolved), + upstream_diagnostics=result.upstream_diagnostics, ) + def _write_upstream_lockfile(self, upstream_resolved: list[ResolvedUpstreamPackage]) -> None: + """Persist upstream provenance into ``apm.lock.yaml``. + + For every successfully resolved upstream package, record the + upstream marketplace coordinates (host/owner/repo/path/manifest + SHA) and the per-plugin pin (resolved SHA + emitted display + name + resolved source dict). The lockfile is the **only** + place provenance lives -- ``marketplace.json`` stays + Anthropic-conformant with no APM-specific keys. + + Reads the existing lockfile (if any), preserves all unrelated + sections, replaces the ``upstreams`` block, and writes back. + """ + from datetime import datetime, timezone + + from ..deps.lockfile import ( + LockedUpstream, + LockedUpstreamPlugin, + LockFile, + get_lockfile_path, + ) + + lockfile_path = get_lockfile_path(self._project_root) + lock = LockFile.load_or_create(lockfile_path) + + if not upstream_resolved: + # No upstreams in this build; if the existing lockfile has + # an ``upstreams`` block from a previous build, preserve it + # only when the apm.yml still declares those upstreams. + # For v1 simplicity, leave the existing block untouched + # when there's nothing new to write. + return + + refreshed_at = datetime.now(timezone.utc).isoformat() + new_upstreams: dict[str, LockedUpstream] = {} + for rup in upstream_resolved: + alias = rup.upstream.alias + if alias not in new_upstreams: + new_upstreams[alias] = LockedUpstream( + alias=alias, + host=rup.upstream.host, + owner=rup.upstream.repo.split("/", 1)[0] + if "/" in rup.upstream.repo + else rup.upstream.repo, + repo=rup.upstream.repo.split("/", 1)[1] if "/" in rup.upstream.repo else "", + path=rup.upstream.path, + manifest_sha=rup.upstream_manifest_sha, + canonical_full_name=rup.upstream_canonical_full_name, + refreshed_at=refreshed_at, + ) + resolved_source = { + "host": rup.plugin_host, + "repo": rup.plugin_repo, + "sha": rup.plugin_sha, + } + if rup.plugin_subdir: + resolved_source["path"] = rup.plugin_subdir + if rup.plugin_ref: + resolved_source["ref"] = rup.plugin_ref + new_upstreams[alias].plugins[rup.plugin.name] = LockedUpstreamPlugin( + upstream_name=rup.plugin.name, + emitted_as=rup.entry.name, + resolved_sha=rup.plugin_sha, + resolved_source=resolved_source, + ) + + lock.upstreams = new_upstreams + lockfile_path.parent.mkdir(parents=True, exist_ok=True) + lock.write(lockfile_path) + # --------------------------------------------------------------------------- # Helpers diff --git a/src/apm_cli/marketplace/ref_resolver.py b/src/apm_cli/marketplace/ref_resolver.py index dcfaf2f1a..fb59c0c56 100644 --- a/src/apm_cli/marketplace/ref_resolver.py +++ b/src/apm_cli/marketplace/ref_resolver.py @@ -30,6 +30,10 @@ from .git_stderr import translate_git_stderr __all__ = [ + "FULL_SHA_RE", + "OWNER_REPO_RE", + "GitLsRemoteError", + "OfflineMissError", "RefCache", "RefResolver", "RemoteRef", @@ -39,7 +43,17 @@ # Dataclass # --------------------------------------------------------------------------- -_SHA_RE = re.compile(r"^[0-9a-f]{40}$") +# Shared 40-char lowercase-hex commit-SHA pattern. Imported by all +# marketplace + plugin-CLI modules that need to discriminate a SHA from +# a tag/branch ref. Kept here because ``ref_resolver`` is the natural +# home for git-ref concepts. ``_SHA_RE`` is a backwards-compatible alias. +FULL_SHA_RE = re.compile(r"^[0-9a-f]{40}$") +_SHA_RE = FULL_SHA_RE + +# Shared ``owner/repo`` shape validator. Rejects leading dot or slash to +# keep cache-key composites stable. Imported by upstream_cache, +# upstream_parser, and yml_schema to avoid triplication. +OWNER_REPO_RE = re.compile(r"^[^./\s][^/\s]*/[^/\s]+$") @dataclass(frozen=True) diff --git a/src/apm_cli/marketplace/upstream_cache.py b/src/apm_cli/marketplace/upstream_cache.py new file mode 100644 index 000000000..21e040bb0 --- /dev/null +++ b/src/apm_cli/marketplace/upstream_cache.py @@ -0,0 +1,544 @@ +"""Content-addressed cache for upstream ``marketplace.json`` manifests. + +Where :mod:`apm_cli.marketplace.client` is the *consumer-side* cache +(TTL-keyed, browse-oriented, scoped to the curator's own +marketplaces.json), this module is the *curator-side* cache used by +:class:`UpstreamResolver` when building a marketplace whose packages +are sourced from an external marketplace. + +Design contract +--------------- + +- **Content-addressed by manifest SHA.** Each cache entry is keyed by + the immutable git commit SHA of the upstream repo at fetch time. + Entries are never invalidated by TTL; once a SHA-pinned entry is + written, all subsequent reads at the same SHA serve the same bytes. + +- **Windows-safe key.** The on-disk cache key uses the ``__`` (double + underscore) delimiter. Colons (``:``) are illegal in NTFS file + names; using them silently breaks Windows curators who run + ``apm marketplace upstream refresh`` (test-coverage panel item 5). + +- **Hashed composite + delimiter rejection.** The composite key + ``upstream__________`` is + hashed (SHA-256, hex-truncated to 16 chars) so neither path length + nor exotic characters in inputs can cause filesystem failures. + All inputs are also rejected if they contain the ``__`` delimiter + (defence-in-depth), and path-derived inputs route through + :func:`apm_cli.utils.path_security.validate_path_segments`. + +- **Per-upstream-host auth.** Tokens are resolved against the + *upstream* host (``upstream.host``, ``upstream.owner``), never + inherited from the curator's marketplace-source auth context. We + pass ``unauth_first=True`` so public upstream repos do NOT have + the curator's ``repo``-scoped PAT attached -- that PAT belongs to + the curator's repos, not the upstream's. + +- **Defence-in-depth integrity check.** Every cache hit re-verifies + that the on-disk recorded SHA matches the SHA the caller requested. + A poisoned cache file with a different SHA in its sidecar manifest + is treated as a miss and re-fetched. + +The resolver layer (:mod:`upstream_resolver`) is responsible for +turning refs/branches/tags into immutable SHAs and for the +canonical-``full_name`` repo-rename guard. This module assumes the +caller already pinned an explicit SHA. +""" + +from __future__ import annotations + +import hashlib +import json +import logging +import os +import re +from collections.abc import Callable +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from apm_cli.utils.path_security import ( + PathTraversalError, + ensure_path_within, + validate_path_segments, +) + +logger = logging.getLogger(__name__) + + +__all__ = [ + "DEFAULT_UPSTREAM_CACHE_DIRNAME", + "UpstreamCache", + "UpstreamCacheError", + "UpstreamCacheKey", + "compute_cache_key", +] + + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +DEFAULT_UPSTREAM_CACHE_DIRNAME = os.path.join("cache", "upstream") + +# Cache-key delimiter -- ``__`` is Windows-safe (no colons), and we +# reject the delimiter in user-supplied inputs to keep the composite +# unambiguously parseable (defence-in-depth: even though the composite +# is hashed, we don't want forge-able collisions). +_KEY_DELIM = "__" + +# Truncated SHA-256 length used in the on-disk cache directory name. +# 16 hex chars = 64 bits of entropy, enough to make collision-by-typo +# astronomically unlikely while keeping cache paths short on +# Windows where MAX_PATH still bites legacy tooling. +_KEY_HASH_LEN = 16 + +# 40-char hex git SHA. The cache is content-addressed; non-SHA refs +# (branches, tags) MUST be resolved upstream by the resolver layer +# before reaching this cache. Aliased to the shared canonical pattern. +from .ref_resolver import FULL_SHA_RE as _FULL_SHA_RE # noqa: E402 +from .ref_resolver import OWNER_REPO_RE as _REMOTE_SOURCE_RE # noqa: E402 + +# Conservative host shape -- defence-in-depth on top of the regex in +# yml_schema.py. The cache layer never trusts that the caller already validated. +_HOST_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9.\-]{0,253}$") + + +# --------------------------------------------------------------------------- +# Errors +# --------------------------------------------------------------------------- + + +class UpstreamCacheError(Exception): + """Raised when an upstream cache operation cannot be completed safely. + + Distinct from :class:`MarketplaceFetchError` so callers can branch + on cache-specific failures (poisoned entry, key validation, + on-disk corruption) without catching network errors. + """ + + +# --------------------------------------------------------------------------- +# Public dataclasses +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class UpstreamCacheKey: + """Validated, fully-resolved cache-key inputs. + + Constructed exclusively by :func:`compute_cache_key`. Once an + instance exists, callers can rely on every field being: + - non-empty + - free of the ``__`` delimiter + - free of path-traversal sequences (for ``path``) + - shape-validated (host, owner/repo, full SHA) + """ + + host: str + owner: str + repo: str + sha: str + path: str + + @property + def composite(self) -> str: + """Plain-text composite -- for diagnostics, NOT for filesystem.""" + return _KEY_DELIM.join(["upstream", self.host, self.owner, self.repo, self.sha, self.path]) + + @property + def fingerprint(self) -> str: + """Short SHA-256 fingerprint of the composite (filesystem-safe).""" + digest = hashlib.sha256(self.composite.encode("utf-8")).hexdigest() + return digest[:_KEY_HASH_LEN] + + @property + def directory_name(self) -> str: + """On-disk directory name for this cache entry. + + Format: ``upstream__________`` + + Embeds a human-readable prefix so curators can grep ``~/.apm/cache`` + for an upstream they recognise, while the trailing hash makes the + directory uniquely keyed. + """ + sha_prefix = self.sha[:8] + # Replace ``/`` in repo (only the slash between owner/repo would + # appear) with ``-`` so the on-disk name stays a single segment. + repo_safe = self.repo.replace("/", "-") + owner_safe = self.owner + return _KEY_DELIM.join( + [ + "upstream", + _sanitise_for_dirname(self.host), + _sanitise_for_dirname(owner_safe), + _sanitise_for_dirname(repo_safe), + sha_prefix, + self.fingerprint, + ] + ) + + +# --------------------------------------------------------------------------- +# Public functions +# --------------------------------------------------------------------------- + + +def compute_cache_key( + *, + host: str, + owner: str, + repo: str, + sha: str, + path: str, +) -> UpstreamCacheKey: + """Validate raw inputs and return an :class:`UpstreamCacheKey`. + + Raises + ------ + UpstreamCacheError + If any input is empty, contains the ``__`` delimiter, contains + path-traversal sequences, or fails its shape regex. + """ + _check_no_delimiter("host", host) + _check_no_delimiter("owner", owner) + _check_no_delimiter("repo", repo) + _check_no_delimiter("sha", sha) + _check_no_delimiter("path", path) + + if not _HOST_RE.match(host): + raise UpstreamCacheError(f"invalid upstream host: {host!r}") + + repo_combined = f"{owner}/{repo}" + if not _REMOTE_SOURCE_RE.match(repo_combined): + raise UpstreamCacheError( + f"invalid upstream owner/repo: {repo_combined!r} " + f"(must match '/' shape, no leading dot)" + ) + + if not _FULL_SHA_RE.match(sha): + raise UpstreamCacheError( + f"invalid upstream sha: {sha!r}; cache keys require a full 40-char hex SHA" + ) + + if not path or path.startswith("/"): + raise UpstreamCacheError(f"invalid upstream path: {path!r}; must be non-empty and relative") + + try: + validate_path_segments(path, context="upstream cache path") + except PathTraversalError as exc: + raise UpstreamCacheError(f"invalid upstream path {path!r}: {type(exc).__name__}") from exc + + return UpstreamCacheKey( + host=host, + owner=owner, + repo=repo, + sha=sha, + path=path, + ) + + +# --------------------------------------------------------------------------- +# UpstreamCache class +# --------------------------------------------------------------------------- + + +class UpstreamCache: + """Filesystem-backed, SHA-keyed cache for upstream marketplace JSON. + + Two-file layout per entry inside ``//``:: + + manifest.json -- raw bytes the upstream returned (decoded JSON) + meta.json -- {"sha": "...", "host": "...", ...} + + The ``meta.json`` exists solely for the defence-in-depth integrity + check on every read: a cached file whose meta SHA does not match + the requested SHA is treated as a miss and silently re-fetched. + + The class is intentionally side-effect-light to make injection + straightforward in tests: + + - ``base_dir`` is overridable at construction time. + - ``fetch_callback`` is the single I/O boundary; the default + implementation uses :class:`AuthResolver` per upstream host but + tests can pass a stub. + """ + + def __init__( + self, + *, + base_dir: Path | None = None, + fetch_callback: Callable[[UpstreamCacheKey, Any], Any] | None = None, + ) -> None: + if base_dir is None: + from apm_cli.config import CONFIG_DIR + + base_dir = Path(CONFIG_DIR) / DEFAULT_UPSTREAM_CACHE_DIRNAME + self._base_dir = Path(base_dir) + self._fetch_callback = fetch_callback or _default_fetch_via_github_api + + # -- Public API --------------------------------------------------------- + + @property + def base_dir(self) -> Path: + """Cache root directory (read-only).""" + return self._base_dir + + def get(self, key: UpstreamCacheKey) -> dict[str, Any] | None: + """Return cached JSON for *key* if present and integrity-valid.""" + entry_dir = self._entry_dir(key) + manifest_path = entry_dir / "manifest.json" + meta_path = entry_dir / "meta.json" + if not manifest_path.exists() or not meta_path.exists(): + return None + try: + meta = json.loads(meta_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as exc: + logger.debug( + "Upstream cache meta unreadable for %s: %s", key.directory_name, type(exc).__name__ + ) + return None + recorded_sha = meta.get("sha") + if recorded_sha != key.sha: + # Poisoned or stale entry: treat as miss. We do NOT delete + # because deletion could mask a legitimate concurrent + # writer; the next put() will overwrite. + logger.warning( + "Upstream cache integrity miss for %s: meta sha %r != requested sha %r", + key.directory_name, + recorded_sha, + key.sha, + ) + return None + try: + raw_bytes = manifest_path.read_bytes() + except OSError as exc: + logger.debug( + "Upstream cache manifest unreadable for %s: %s", + key.directory_name, + type(exc).__name__, + ) + return None + recorded_content_sha256 = meta.get("content_sha256") + if recorded_content_sha256 is not None: + actual_sha256 = hashlib.sha256(raw_bytes).hexdigest() + if actual_sha256 != recorded_content_sha256: + logger.warning( + "Upstream cache content integrity miss for %s: expected %s, got %s", + key.directory_name, + recorded_content_sha256, + actual_sha256, + ) + return None + try: + return json.loads(raw_bytes.decode("utf-8")) + except (UnicodeDecodeError, json.JSONDecodeError) as exc: + logger.debug( + "Upstream cache manifest parse error for %s: %s", + key.directory_name, + type(exc).__name__, + ) + return None + + def put(self, key: UpstreamCacheKey, manifest: dict[str, Any]) -> None: + """Write a fetched manifest into the cache atomically (best-effort).""" + entry_dir = self._entry_dir(key) + entry_dir.mkdir(parents=True, exist_ok=True) + + # Order: manifest first, then meta. ``get()`` reads meta last + # via ``_load_meta()``, which short-circuits the cache hit when + # meta is absent. If a crash interleaves these writes, an + # incomplete entry is skipped (manifest without meta) rather + # than served stale. + manifest_path = entry_dir / "manifest.json" + meta_path = entry_dir / "meta.json" + + manifest_bytes = json.dumps(manifest, indent=2, sort_keys=True).encode("utf-8") + content_sha256 = hashlib.sha256(manifest_bytes).hexdigest() + try: + manifest_path.write_bytes(manifest_bytes) + meta_path.write_text( + json.dumps( + { + "content_sha256": content_sha256, + "host": key.host, + "owner": key.owner, + "path": key.path, + "repo": key.repo, + "sha": key.sha, + }, + sort_keys=True, + ), + encoding="utf-8", + ) + except OSError as exc: + raise UpstreamCacheError( + f"failed to write upstream cache for {key.composite}: {type(exc).__name__}" + ) from exc + + def get_or_fetch( + self, + key: UpstreamCacheKey, + *, + auth_resolver: Any = None, + offline: bool = False, + ) -> dict[str, Any]: + """Return cached manifest, fetching on miss. + + Parameters + ---------- + key + Validated cache key. + auth_resolver + Optional :class:`apm_cli.core.auth.AuthResolver`. Threaded + through to the fetch callback so tests can inject a mock. + offline + If *True*, raise :class:`UpstreamCacheError` on miss + instead of attempting a network fetch. Wired to the + curator-build ``--offline`` flag. + + Returns + ------- + dict + Decoded JSON of the upstream ``marketplace.json`` at the + pinned SHA. + """ + cached = self.get(key) + if cached is not None: + logger.debug("Upstream cache hit for %s", key.directory_name) + return cached + + if offline: + raise UpstreamCacheError( + f"offline mode: no cached upstream entry for " + f"{key.host}/{key.owner}/{key.repo}@{key.sha} (path={key.path}). " + f"Run a build online first." + ) + + logger.debug("Upstream cache miss for %s", key.directory_name) + manifest = self._fetch_callback(key, auth_resolver) + if not isinstance(manifest, dict): + raise UpstreamCacheError( + f"upstream fetch returned non-JSON-object content for {key.composite}" + ) + self.put(key, manifest) + return manifest + + # -- Internal ----------------------------------------------------------- + + def _entry_dir(self, key: UpstreamCacheKey) -> Path: + """Resolve the on-disk directory for *key*, with containment guard.""" + candidate = self._base_dir / key.directory_name + # Belt-and-braces: ensure the resolved directory is still + # under the cache base, even if a future contributor changes + # the directory_name format. + try: + ensure_path_within(candidate, self._base_dir) + except PathTraversalError as exc: + raise UpstreamCacheError(f"upstream cache directory escapes base: {exc}") from exc + return candidate + + +# --------------------------------------------------------------------------- +# Default fetch callback (network) +# --------------------------------------------------------------------------- + + +def _default_fetch_via_github_api( + key: UpstreamCacheKey, + auth_resolver: Any, +) -> dict[str, Any]: + """Fetch ``manifest.json`` from GitHub Contents API at ``key.sha``. + + Uses :class:`AuthResolver.try_with_fallback` with + ``unauth_first=True``: the upstream is typically a public repo, + and we MUST NOT attach the curator's PAT to a request that does + not need it (supply-chain panel item 3 -- never leak curator + credentials to upstream-host endpoints). + + The host classification refuses non-GitHub hosts in v1, mirroring + the strict parser's host allow-list. + """ + import requests + + from apm_cli.core.auth import AuthResolver + + if auth_resolver is None: + auth_resolver = AuthResolver() + + host_info = AuthResolver.classify_host(key.host) + if host_info.kind not in ("github", "ghe_cloud", "ghes"): + raise UpstreamCacheError( + f"upstream host {key.host!r} is not a supported GitHub variant; " + f"refusing to fetch to avoid forwarding GitHub credentials elsewhere" + ) + + api_base = host_info.api_base + url = f"{api_base}/repos/{key.owner}/{key.repo}/contents/{key.path}?ref={key.sha}" + + def _do_fetch(token: str | None, _git_env: dict[str, str]) -> dict[str, Any]: + headers = { + "Accept": "application/vnd.github.v3.raw", + "User-Agent": "apm-cli (upstream-cache)", + } + if token and host_info.kind in ("github", "ghe_cloud", "ghes"): + headers["Authorization"] = f"token {token}" + resp = requests.get(url, headers=headers, timeout=30) + if resp.status_code == 404: + raise UpstreamCacheError( + f"upstream marketplace.json not found: " + f"{key.owner}/{key.repo}@{key.sha[:8]} {key.path}" + ) + resp.raise_for_status() + return resp.json() + + try: + return auth_resolver.try_with_fallback( + key.host, + _do_fetch, + org=key.owner, + unauth_first=True, + ) + except UpstreamCacheError: + raise + except requests.HTTPError as exc: + # Drop the response object from the exception chain. ``exc.response`` + # carries the original ``request.headers`` dict which includes the + # ``Authorization: token `` header set in ``_do_fetch``. Logging + # frameworks that walk ``__cause__`` could otherwise leak the + # curator's PAT. Re-raise with status + URL only. + status = exc.response.status_code if exc.response is not None else "?" + raise UpstreamCacheError( + f"failed to fetch upstream manifest for " + f"{key.host}/{key.owner}/{key.repo}@{key.sha[:8]} {key.path}: " + f"HTTP {status}" + ) from None + except Exception as exc: + raise UpstreamCacheError( + f"failed to fetch upstream manifest for " + f"{key.host}/{key.owner}/{key.repo}@{key.sha[:8]} {key.path}: {exc}" + ) from exc + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _check_no_delimiter(field: str, value: Any) -> None: + if not isinstance(value, str) or not value: + raise UpstreamCacheError(f"upstream cache {field} must be a non-empty string") + if _KEY_DELIM in value: + raise UpstreamCacheError( + f"upstream cache {field} must not contain the cache delimiter '{_KEY_DELIM}': {value!r}" + ) + + +def _sanitise_for_dirname(value: str) -> str: + """Strip / replace any character that could trip filesystem rules. + + The composite is already hashed so collision risk is negligible; + this purely keeps the human-readable prefix visually clean and + POSIX/Windows safe. + """ + return re.sub(r"[^A-Za-z0-9._\-]", "-", value) diff --git a/src/apm_cli/marketplace/upstream_parser.py b/src/apm_cli/marketplace/upstream_parser.py new file mode 100644 index 000000000..1aabf4bb8 --- /dev/null +++ b/src/apm_cli/marketplace/upstream_parser.py @@ -0,0 +1,773 @@ +"""Strict parser for upstream ``marketplace.json`` manifests. + +Counterpart to :func:`apm_cli.marketplace.models.parse_marketplace_json`, +which is *lenient*: it silently skips unrecognised entries to maximise +compatibility with both Copilot CLI and Claude Code formats. That +behaviour is correct for the consumer-side ``apm marketplace browse`` +flow, but wrong for the curator-side ``upstream`` resolution where every +silent skip is a build-time foot-gun (an entry the curator thinks is +exposed but is not). + +This module provides the strict alternative used by +:class:`UpstreamResolver`: + +- Every supported source shape is parsed into a fully-resolved + :class:`StrictPluginSource` -- no ambiguity for downstream resolvers. +- Every unsupported entry produces a named :class:`StrictRejection` + instead of being dropped. The builder maps these to + ``BuildDiagnostic(level="error")`` so curators see exactly which + entry was refused and why. +- String-shorthand sources (``./foo`` / ``foo``) are resolved as + subdirectories of the upstream marketplace's repo, gated by + ``metadata.pluginRoot`` and validated against path traversal via + :func:`apm_cli.utils.path_security.validate_path_segments`. + +Contract +-------- + +The strict parser only accepts entries it is sure how to resolve to an +immutable, single-host, git-backed coordinate. v1 supports the +``github`` host family and the following source shapes: + +* ``repository: "owner/repo"`` (Copilot CLI shape) plus optional + ``ref``/``sha``. +* ``source: {type: "github", repo: "owner/repo", ref?, sha?}``. +* ``source: {type: "git-subdir", repo|url: "owner/repo", path, + ref?, sha?}`` -- subdir is preserved verbatim and validated. +* ``source: "./foo"`` or ``"foo"`` (string shorthand) -- resolved as a + subdirectory of the upstream repo. The resolved subdir must live + under ``metadata.pluginRoot`` (or under the repo root if + ``pluginRoot`` is unset). ``..`` segments are rejected. + +Anything else (``npm``, arbitrary URLs, non-github hosts, malformed +``owner/repo``) is a :class:`StrictRejection`. Rejections do **not** +raise; they are returned in :class:`StrictManifest.rejections` so the +caller can decide whether a single bad entry should fail the entire +build or merely surface as a diagnostic. + +Cross-references +---------------- + +* Plan: ``~/.copilot/session-state/.../plan.md`` -- "Builder behaviour + (atomic, deterministic, strict)" + "Source-shape support matrix". +* Schema: :mod:`apm_cli.marketplace.yml_schema` -- + :class:`UpstreamPackageEntry` references the alias which carries the + upstream coordinates this parser needs. +""" + +from __future__ import annotations + +import logging +import re +from dataclasses import dataclass +from pathlib import PurePosixPath +from typing import Any + +from apm_cli.utils.path_security import PathTraversalError, validate_path_segments + +logger = logging.getLogger(__name__) + + +__all__ = [ + "REJECTION_REASONS", + "StrictManifest", + "StrictPlugin", + "StrictPluginSource", + "StrictRejection", + "parse_marketplace_strict", +] + + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +# ``owner/repo`` shape. Reuses the shared regex from ref_resolver with the +# same leading-dot rejection so ``./local`` cannot pose as a remote source. +from .ref_resolver import FULL_SHA_RE as _FULL_SHA_RE # noqa: E402 +from .ref_resolver import OWNER_REPO_RE as _REMOTE_SOURCE_RE # noqa: E402 + +# Tag / ref names: conservative subset. Disallows whitespace, tilde, +# caret, and other characters that ``git ls-remote`` would refuse, but +# permits the dots/slashes/dashes that real release tags use. +_REF_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._/\-+]{0,254}$") + +# Host allow-list for documentation purposes only. Actual host validation +# is performed by the resolver layer (``_UpstreamResolverFactory``) where +# the network call happens. The parser does NOT gate on this constant so +# that GHE upstreams (e.g. ``github.example.com``) are not rejected at +# parse time before the resolver has a chance to attempt auth resolution. +_SUPPORTED_HOSTS: frozenset[str] = frozenset({"github.com"}) + +# Strict per-shape key sets. Anything outside these is a ``unknown-key`` +# rejection so curators learn about typos at strict-parse time rather +# than seeing the entry silently dropped by the lenient consumer +# parser. +_GITHUB_SOURCE_KEYS: frozenset[str] = frozenset({"type", "host", "repo", "ref", "sha", "branch"}) +_GIT_SUBDIR_SOURCE_KEYS: frozenset[str] = frozenset( + {"type", "host", "repo", "url", "path", "ref", "sha", "branch"} +) + +# Top-level plugin entry keys we recognise. The lenient parser accepts +# arbitrary metadata; the strict parser tolerates display-only fields +# (``description``, ``version``, ``tags``, ``homepage``, ``license``, +# ``author``, ``authors``, ``keywords``, ``category``, ``categories``) +# but rejects anything not on this list to surface typos like +# ``soruce``. +_PLUGIN_ENTRY_KEYS: frozenset[str] = frozenset( + { + "name", + "source", + "repository", + "ref", + "sha", + "description", + "version", + "tags", + "homepage", + "license", + "author", + "authors", + "keywords", + "category", + "categories", + } +) + + +# Public catalogue of rejection reasons. Documented for callers (CLI +# diagnostics, tests) so the named-reason contract is part of the +# module API. +REJECTION_REASONS: frozenset[str] = frozenset( + { + "missing-name", + "duplicate-name", + "missing-source", + "ambiguous-source", + "invalid-source-shape", + "unknown-source-key", + "unknown-plugin-key", + "unsupported-source-type", + "npm-source", + "invalid-repo", + "unsupported-host", + "missing-subdir", + "relative-source-out-of-root", + "path-traversal", + "invalid-ref", + "invalid-sha", + } +) + + +# --------------------------------------------------------------------------- +# Public dataclasses +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class StrictPluginSource: + """Fully-resolved upstream plugin source coordinates. + + Always carries enough information for :class:`RefResolver` to + produce an immutable commit SHA without re-parsing the upstream + manifest. ``ref`` and ``sha`` are mutually optional but at least + one is set when the upstream pinned its plugin; if both are unset + the resolver MUST treat the entry as unpinned and apply the + precedence ladder (curator override, then upstream-marketplace + pinned ref). + """ + + type: str # "github" | "git-subdir" + host: str # always in _SUPPORTED_HOSTS + repo: str # "owner/repo" + ref: str | None = None + sha: str | None = None + subdir: str | None = None # subdirectory within the repo, if any + + +@dataclass(frozen=True) +class StrictPlugin: + """A strictly-parsed upstream plugin entry.""" + + name: str + source: StrictPluginSource + description: str = "" + version: str = "" + tags: tuple[str, ...] = () + + +@dataclass(frozen=True) +class StrictRejection: + """A named rejection of a single upstream plugin entry. + + ``reason`` is one of :data:`REJECTION_REASONS` -- callers may + pattern-match for diagnostics. ``detail`` is a human-readable + explanation suitable for surfacing in CLI output. + """ + + plugin_name: str # may be "" when name is missing/blank + reason: str + detail: str + + +@dataclass(frozen=True) +class StrictManifest: + """Result of strict-parsing an upstream ``marketplace.json``.""" + + name: str + plugins: tuple[StrictPlugin, ...] = () + rejections: tuple[StrictRejection, ...] = () + plugin_root: str = "" + owner_name: str = "" + description: str = "" + + def find_plugin(self, plugin_name: str) -> StrictPlugin | None: + """Look up a plugin by exact name (case-sensitive). + + Strict lookup intentionally avoids the lenient parser's + case-insensitive match: the curator declared ``plugin: `` + verbatim and a typo should fail loudly. + """ + for p in self.plugins: + if p.name == plugin_name: + return p + return None + + +# --------------------------------------------------------------------------- +# Public entry point +# --------------------------------------------------------------------------- + + +def parse_marketplace_strict( + data: dict[str, Any], + *, + upstream_owner_repo: str, + upstream_host: str = "github.com", +) -> StrictManifest: + """Strict-parse a fetched upstream ``marketplace.json``. + + Parameters + ---------- + data + The decoded JSON content of the upstream manifest. + upstream_owner_repo + ``owner/repo`` of the repository hosting the upstream + marketplace. Used to resolve string-shorthand sources whose + repo is implicit. + upstream_host + Host of the upstream repository. v1 only accepts hosts in + :data:`_SUPPORTED_HOSTS`. + + Returns + ------- + StrictManifest + Parsed manifest with ``plugins`` (accepted entries) and + ``rejections`` (named per-entry refusals). The caller decides + whether any rejection should fail the build. + """ + if not isinstance(data, dict): + raise TypeError( + f"upstream marketplace.json root must be a JSON object, got {type(data).__name__}" + ) + + if not _REMOTE_SOURCE_RE.match(upstream_owner_repo): + raise ValueError( + f"upstream_owner_repo must be in 'owner/repo' shape, got '{upstream_owner_repo}'" + ) + + manifest_name = str(data.get("name", "")).strip() + description = str(data.get("description", "")) + + # ``owner`` may be a string or a {name, email} dict in real + # marketplaces (Claude Code uses the dict shape). Tolerate both. + raw_owner = data.get("owner", "") + if isinstance(raw_owner, dict): + owner_name = str(raw_owner.get("name", "")) + elif isinstance(raw_owner, str): + owner_name = raw_owner + else: + owner_name = "" + + metadata = data.get("metadata", {}) + plugin_root = "" + if isinstance(metadata, dict): + raw_root = metadata.get("pluginRoot", "") + if isinstance(raw_root, str): + plugin_root = raw_root.strip().lstrip("./") + + raw_plugins = data.get("plugins", []) + if not isinstance(raw_plugins, list): + return StrictManifest( + name=manifest_name, + owner_name=owner_name, + description=description, + plugin_root=plugin_root, + rejections=( + StrictRejection( + plugin_name="", + reason="invalid-source-shape", + detail="upstream marketplace.json 'plugins' must be a list", + ), + ), + ) + + plugins: list[StrictPlugin] = [] + rejections: list[StrictRejection] = [] + seen_names: set[str] = set() + + for entry in raw_plugins: + if not isinstance(entry, dict): + rejections.append( + StrictRejection( + plugin_name="", + reason="invalid-source-shape", + detail=f"plugin entry must be a JSON object, got {type(entry).__name__}", + ) + ) + continue + result = _parse_strict_entry( + entry, + upstream_owner_repo=upstream_owner_repo, + upstream_host=upstream_host, + plugin_root=plugin_root, + ) + if isinstance(result, StrictRejection): + rejections.append(result) + continue + if result.name in seen_names: + rejections.append( + StrictRejection( + plugin_name=result.name, + reason="duplicate-name", + detail=( + f"plugin name '{result.name}' appears more than once " + f"in upstream marketplace" + ), + ) + ) + continue + seen_names.add(result.name) + plugins.append(result) + + return StrictManifest( + name=manifest_name, + plugins=tuple(plugins), + rejections=tuple(rejections), + plugin_root=plugin_root, + owner_name=owner_name, + description=description, + ) + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + + +def _parse_strict_entry( + entry: dict[str, Any], + *, + upstream_owner_repo: str, + upstream_host: str, + plugin_root: str, +) -> StrictPlugin | StrictRejection: + """Parse a single plugin entry, returning either a plugin or a rejection.""" + name = str(entry.get("name", "")).strip() + if not name: + return StrictRejection( + plugin_name="", + reason="missing-name", + detail="plugin entry has no 'name' field", + ) + + unknown = set(entry.keys()) - _PLUGIN_ENTRY_KEYS + if unknown: + return StrictRejection( + plugin_name=name, + reason="unknown-plugin-key", + detail=( + f"plugin '{name}' has unknown key(s): {sorted(unknown)}. " + f"Allowed: {sorted(_PLUGIN_ENTRY_KEYS)}" + ), + ) + + has_source = "source" in entry + has_repository = "repository" in entry + if has_source and has_repository: + return StrictRejection( + plugin_name=name, + reason="ambiguous-source", + detail=(f"plugin '{name}' declares both 'source' and 'repository'; use exactly one"), + ) + if not has_source and not has_repository: + return StrictRejection( + plugin_name=name, + reason="missing-source", + detail=f"plugin '{name}' has neither 'source' nor 'repository'", + ) + + description = str(entry.get("description", "")) + version = str(entry.get("version", "")) + raw_tags = entry.get("tags", []) + tags: tuple[str, ...] = () + if isinstance(raw_tags, list): + tags = tuple(str(t) for t in raw_tags if isinstance(t, str)) + + if has_repository: + source_or_rejection = _resolve_repository_shape( + name=name, + entry=entry, + upstream_host=upstream_host, + ) + else: + source_or_rejection = _resolve_source_field( + name=name, + raw=entry["source"], + entry=entry, + upstream_owner_repo=upstream_owner_repo, + upstream_host=upstream_host, + plugin_root=plugin_root, + ) + + if isinstance(source_or_rejection, StrictRejection): + return source_or_rejection + + return StrictPlugin( + name=name, + source=source_or_rejection, + description=description, + version=version, + tags=tags, + ) + + +def _resolve_repository_shape( + *, + name: str, + entry: dict[str, Any], + upstream_host: str, +) -> StrictPluginSource | StrictRejection: + """Resolve the Copilot-CLI ``repository: owner/repo`` shape.""" + repo = entry.get("repository") + if not isinstance(repo, str) or not _REMOTE_SOURCE_RE.match(repo): + return StrictRejection( + plugin_name=name, + reason="invalid-repo", + detail=f"plugin '{name}' has invalid 'repository': {repo!r}", + ) + + ref_value = entry.get("ref") + sha_value = entry.get("sha") + ref = _validate_ref(name, ref_value) + if isinstance(ref, StrictRejection): + return ref + sha = _validate_sha(name, sha_value) + if isinstance(sha, StrictRejection): + return sha + + return StrictPluginSource( + type="github", + host=upstream_host, + repo=repo, + ref=ref, + sha=sha, + ) + + +def _resolve_source_field( + *, + name: str, + raw: Any, + entry: dict[str, Any], + upstream_owner_repo: str, + upstream_host: str, + plugin_root: str, +) -> StrictPluginSource | StrictRejection: + """Resolve the Claude-shape ``source`` field (string or dict).""" + if isinstance(raw, str): + return _resolve_string_source( + name=name, + raw=raw, + entry=entry, + upstream_owner_repo=upstream_owner_repo, + upstream_host=upstream_host, + plugin_root=plugin_root, + ) + if isinstance(raw, dict): + return _resolve_dict_source( + name=name, + raw=raw, + entry=entry, + upstream_host=upstream_host, + ) + return StrictRejection( + plugin_name=name, + reason="invalid-source-shape", + detail=( + f"plugin '{name}' has 'source' of unsupported type " + f"{type(raw).__name__}; expected string or object" + ), + ) + + +def _resolve_string_source( + *, + name: str, + raw: str, + entry: dict[str, Any], + upstream_owner_repo: str, + upstream_host: str, + plugin_root: str, +) -> StrictPluginSource | StrictRejection: + """Resolve a string-shorthand source as a subdir of the upstream repo. + + The shorthand is resolved as a subdirectory of the upstream + marketplace's repository; ``metadata.pluginRoot`` (when set) acts + as the containment base. ``..`` segments are rejected outright, + and the resolved subdir is required to remain under + ``pluginRoot``. + """ + cleaned = raw.strip() + if not cleaned: + return StrictRejection( + plugin_name=name, + reason="invalid-source-shape", + detail=f"plugin '{name}' has empty 'source' string", + ) + + # Normalise leading ``./`` BEFORE traversal validation so the + # idiomatic Claude-shorthand form is accepted; ``..`` segments + # remain rejected. + while cleaned.startswith("./"): + cleaned = cleaned[2:] + + if cleaned.startswith("/"): + return StrictRejection( + plugin_name=name, + reason="path-traversal", + detail=f"plugin '{name}' has absolute 'source' path: {raw!r}", + ) + + try: + validate_path_segments(cleaned, context=f"plugin '{name}' source") + except PathTraversalError as exc: + return StrictRejection( + plugin_name=name, + reason="path-traversal", + detail=str(exc), + ) + + rel = cleaned + + if plugin_root: + # Compose subdir as ``/`` and verify the + # resulting parts contain no ``..``. PurePosixPath collapses + # ``./`` segments deterministically. + composed = PurePosixPath(plugin_root) / rel + composed_str = str(composed) + try: + validate_path_segments(composed_str, context=f"plugin '{name}' resolved source") + except PathTraversalError as exc: + return StrictRejection( + plugin_name=name, + reason="path-traversal", + detail=str(exc), + ) + # Defence-in-depth: composed path must still start with + # plugin_root (PurePosixPath should preserve this; we assert + # to catch any future contributor mistakes). + composed_parts = composed.parts + root_parts = PurePosixPath(plugin_root).parts + if composed_parts[: len(root_parts)] != root_parts: + return StrictRejection( + plugin_name=name, + reason="relative-source-out-of-root", + detail=(f"plugin '{name}' source {raw!r} escapes pluginRoot '{plugin_root}'"), + ) + subdir = composed_str + else: + subdir = rel + + ref_value = entry.get("ref") + sha_value = entry.get("sha") + ref = _validate_ref(name, ref_value) + if isinstance(ref, StrictRejection): + return ref + sha = _validate_sha(name, sha_value) + if isinstance(sha, StrictRejection): + return sha + + return StrictPluginSource( + type="git-subdir", + host=upstream_host, + repo=upstream_owner_repo, + ref=ref, + sha=sha, + subdir=subdir or None, + ) + + +def _resolve_dict_source( + *, + name: str, + raw: dict[str, Any], + entry: dict[str, Any], + upstream_host: str, +) -> StrictPluginSource | StrictRejection: + """Resolve a dict ``source`` field (Claude format).""" + # Claude Code uses ``type``; the lenient parser also tolerates an + # alternate ``source`` key inside the dict. Strict parsing requires + # ``type`` to be set explicitly so that ``unsupported-source-type`` + # rejections name the actual offending value. + # Exception: the short-form ``{"repo": "owner/repo", "ref": "..."}`` + # that some APM and Anthropic marketplaces emit without an explicit + # ``type`` key is tolerated -- we infer ``github`` when ``repo`` is + # present. Only hard-reject when ``type`` is explicitly set to an + # unrecognised value. + source_type = raw.get("type", "") + if not isinstance(source_type, str) or not source_type: + if "repo" in raw: + source_type = "github" + else: + return StrictRejection( + plugin_name=name, + reason="invalid-source-shape", + detail=f"plugin '{name}' source object missing 'type' discriminator and 'repo' key", + ) + + if source_type == "npm": + return StrictRejection( + plugin_name=name, + reason="npm-source", + detail=( + f"plugin '{name}' uses 'npm' source type which is not " + f"supported by APM upstreams (v1)" + ), + ) + + if source_type not in {"github", "git-subdir"}: + return StrictRejection( + plugin_name=name, + reason="unsupported-source-type", + detail=( + f"plugin '{name}' uses source.type='{source_type}', " + f"expected one of {{github, git-subdir}}" + ), + ) + + allowed_keys = _GITHUB_SOURCE_KEYS if source_type == "github" else _GIT_SUBDIR_SOURCE_KEYS + unknown = set(raw.keys()) - allowed_keys + if unknown: + return StrictRejection( + plugin_name=name, + reason="unknown-source-key", + detail=( + f"plugin '{name}' source has unknown key(s): {sorted(unknown)}. " + f"Allowed for type='{source_type}': {sorted(allowed_keys)}" + ), + ) + + # Host: defaults to upstream host. Host validation is performed by the + # resolver layer when the actual API call is made; the parser does not + # gate on host to keep GHE upstreams working. + host = raw.get("host", upstream_host) + if not isinstance(host, str) or not host: + host = upstream_host + + # ``repo`` may be expressed as ``repo: owner/repo`` or as a full + # ``url:`` (for git-subdir). Only the former is supported in v1 to + # avoid URL-shape ambiguity. + repo = raw.get("repo") + if "url" in raw and not repo: + return StrictRejection( + plugin_name=name, + reason="invalid-repo", + detail=( + f"plugin '{name}' uses source.url; APM upstreams require " + f"source.repo='owner/repo' in v1" + ), + ) + if not isinstance(repo, str) or not _REMOTE_SOURCE_RE.match(repo): + return StrictRejection( + plugin_name=name, + reason="invalid-repo", + detail=f"plugin '{name}' has invalid source.repo: {repo!r}", + ) + + subdir: str | None = None + if source_type == "git-subdir": + path = raw.get("path") + if not isinstance(path, str) or not path.strip(): + return StrictRejection( + plugin_name=name, + reason="missing-subdir", + detail=f"plugin '{name}' git-subdir source missing 'path'", + ) + try: + validate_path_segments(path, context=f"plugin '{name}' source.path") + except PathTraversalError as exc: + return StrictRejection( + plugin_name=name, + reason="path-traversal", + detail=str(exc), + ) + subdir = path.strip().lstrip("./") + if subdir.startswith("/"): + return StrictRejection( + plugin_name=name, + reason="path-traversal", + detail=f"plugin '{name}' has absolute source.path: {path!r}", + ) + + ref_value = raw.get("ref") or entry.get("ref") + sha_value = raw.get("sha") or entry.get("sha") + ref = _validate_ref(name, ref_value) + if isinstance(ref, StrictRejection): + return ref + sha = _validate_sha(name, sha_value) + if isinstance(sha, StrictRejection): + return sha + + return StrictPluginSource( + type=source_type, + host=host, + repo=repo, + ref=ref, + sha=sha, + subdir=subdir, + ) + + +def _validate_ref(name: str, value: Any) -> str | None | StrictRejection: + """Validate a ref value or return a rejection. + + Empty / missing -> ``None`` (the resolver applies the precedence + ladder). Non-string or shape-violating ref -> rejection. + """ + if value is None or value == "": + return None + if not isinstance(value, str) or not _REF_RE.match(value): + return StrictRejection( + plugin_name=name, + reason="invalid-ref", + detail=f"plugin '{name}' has invalid ref: {value!r}", + ) + return value + + +def _validate_sha(name: str, value: Any) -> str | None | StrictRejection: + """Validate a 40-char git SHA or return a rejection.""" + if value is None or value == "": + return None + if not isinstance(value, str) or not _FULL_SHA_RE.match(value): + return StrictRejection( + plugin_name=name, + reason="invalid-sha", + detail=( + f"plugin '{name}' has invalid sha: {value!r}; " + f"strict parser requires a full 40-char hex SHA" + ), + ) + return value diff --git a/src/apm_cli/marketplace/upstream_resolver.py b/src/apm_cli/marketplace/upstream_resolver.py new file mode 100644 index 000000000..3091504f8 --- /dev/null +++ b/src/apm_cli/marketplace/upstream_resolver.py @@ -0,0 +1,602 @@ +"""Upstream resolution layer: turn ``UpstreamPackageEntry`` into a +fully-resolved, immutable plugin source ready for emission. + +This module sits between the schema layer (which only validates shape) +and the :class:`MarketplaceBuilder` (which assembles the final +``marketplace.json`` and lockfile). It owns three invariants the +builder cannot reasonably express on its own: + +1. **Atomic-fetch invariant.** Each registered :class:`Upstream` is + fetched and strict-parsed AT MOST ONCE per build, even when many + packages reference the same upstream. Multiple fetches would let a + poisoned mid-build response slip past the SHA pin recorded in the + first fetch. + +2. **Repo-rename guard.** GitHub's transparent rename redirect would + otherwise let a renamed repo silently change identity between + builds. We compare the API-reported ``full_name`` against the + configured ``upstream.repo`` and fail closed on mismatch + (supply-chain panel item 4). + +3. **Precedence ladder.** Curator-supplied ``ref`` > curator-supplied + ``version`` (semver range) > upstream plugin's pinned ``ref`` > + upstream registration ``ref`` > ``branch`` HEAD when ``allow_head`` + is opted in. Anything else is an unpinned-build error so the + builder fails loudly rather than emitting a non-reproducible + ``marketplace.json``. + +The resolver is wired into the builder via dependency injection: cache, +ref-resolution, repo-identity, and version-range resolution are all +callables passed at construction time so unit tests never touch the +network. +""" + +from __future__ import annotations + +import logging +from collections.abc import Callable, Mapping +from dataclasses import dataclass +from typing import Any + +from .upstream_cache import UpstreamCache, UpstreamCacheError, compute_cache_key +from .upstream_parser import ( + StrictManifest, + StrictPlugin, + StrictRejection, + parse_marketplace_strict, +) +from .yml_schema import Upstream, UpstreamPackageEntry + +logger = logging.getLogger(__name__) + + +__all__ = [ + "RepoRenameError", + "ResolvedUpstreamPackage", + "UpstreamResolutionError", + "UpstreamResolver", + "UpstreamResolverDiagnostic", +] + + +# --------------------------------------------------------------------------- +# Module constants +# --------------------------------------------------------------------------- + +# Shared 40-char SHA pattern. +from .ref_resolver import FULL_SHA_RE as _FULL_SHA_RE # noqa: E402 + +# --------------------------------------------------------------------------- +# Errors +# --------------------------------------------------------------------------- + + +class UpstreamResolutionError(Exception): + """Raised when an upstream-sourced package cannot be resolved. + + Carries a stable ``code`` so the builder can map resolver failures + to ``BuildDiagnostic`` rows without parsing message text. + """ + + def __init__(self, code: str, message: str) -> None: + super().__init__(message) + self.code = code + + +class RepoRenameError(UpstreamResolutionError): + """The upstream's reported ``full_name`` does not match the registration.""" + + def __init__(self, configured: str, reported: str) -> None: + super().__init__( + "repo-rename-detected", + ( + f"upstream repo identity mismatch: configured " + f"{configured!r} but GitHub reports {reported!r}. " + f"Refusing to fetch -- update upstream registration to " + f"the canonical name or remove the upstream." + ), + ) + self.configured = configured + self.reported = reported + + +# --------------------------------------------------------------------------- +# Diagnostic + result types +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class UpstreamResolverDiagnostic: + """A non-fatal diagnostic emitted during resolution. + + The builder lifts these into :class:`BuildDiagnostic` rows. Levels + parallel the builder's vocabulary: ``"warning"`` for unpinned-but- + allowed entries; ``"error"`` for the rejections collected during + strict parsing. + """ + + level: str # "warning" | "error" + code: str + message: str + upstream_alias: str = "" + plugin_name: str = "" + + +@dataclass(frozen=True) +class ResolvedUpstreamPackage: + """A fully-resolved upstream-sourced package, ready for emission. + + Attributes mirror the data the builder needs to: + 1. Emit a vanilla Anthropic-conformant entry in the curator's + ``marketplace.json`` (no ``metadata.apm.*`` keys). + 2. Record full provenance in the lockfile's ``upstreams:`` block. + """ + + entry: UpstreamPackageEntry + upstream: Upstream + plugin: StrictPlugin + # Final resolved coordinates of the *plugin*. ``ref`` is the user- + # facing pin (sha or tag); ``sha`` is the immutable commit SHA + # the lockfile records. ``ref`` and ``sha`` MAY be equal (when the + # curator pinned a SHA directly). + plugin_host: str + plugin_repo: str + plugin_subdir: str | None + plugin_ref: str | None # may be None for HEAD-tracking entries + plugin_sha: str | None # may be None when ref-resolution deferred + # The upstream-registration provenance we need in the lockfile. + upstream_manifest_sha: str + upstream_canonical_full_name: str + # Source of the resolved ref, for diagnostics + lockfile metadata. + # Values: "curator-ref" | "curator-version" | "upstream-pin" | + # "upstream-registration-ref" | "branch-head". + pin_source: str + + +# --------------------------------------------------------------------------- +# Callable types for dependency injection +# --------------------------------------------------------------------------- + +# (host, owner, repo, ref-or-branch) -> 40-char SHA. Used to collapse +# branches/tags to immutable SHAs before they reach the cache key. +RefToShaResolver = Callable[[str, str, str, str], str] + +# (host, owner, repo) -> canonical "owner/repo" as reported by the +# git host's API. Empty string means "unknown -- skip rename check". +CanonicalFullNameResolver = Callable[[str, str, str], str] + +# (host, owner, repo, semver_range, *, tag_pattern, include_prerelease) +# -> (resolved_ref, resolved_sha). Used for curator-supplied +# ``version:`` semver ranges on UpstreamPackageEntry. +VersionRangeResolver = Callable[..., tuple[str, str]] + + +# --------------------------------------------------------------------------- +# Resolver +# --------------------------------------------------------------------------- + + +@dataclass +class _UpstreamFetchRecord: + """Internal cache of a per-upstream fetch result. + + Keyed by upstream alias to enforce the atomic-fetch invariant. + """ + + manifest: StrictManifest + manifest_sha: str # the resolved SHA used as the cache key + canonical_full_name: str + fetched: bool = True + + +class UpstreamResolver: + """Resolves :class:`UpstreamPackageEntry` values into emission-ready records. + + Construction parameters + ----------------------- + upstreams + The full :class:`Upstream` collection from + :class:`MarketplaceConfig`, indexed by ``alias`` for O(1) + lookup. + cache + :class:`UpstreamCache` instance used for upstream + ``marketplace.json`` storage. Tests inject one rooted at a + ``tmp_path``. + ref_to_sha + Callable that resolves a ref/branch to a 40-char SHA against + the *upstream marketplace's* repo. Required so the cache key + is always SHA-keyed even when the curator pinned a tag/branch. + canonical_full_name + Callable that returns the upstream host's canonical + ``owner/repo`` for the registration. Used for the rename + guard. May return empty to skip the check (offline rebuilds). + version_range_resolver + Callable used to turn a curator-supplied ``version:`` semver + range into a concrete ref+SHA against the *upstream plugin's* + repo (NOT the upstream marketplace's repo). Optional: when + omitted, version ranges raise ``UpstreamResolutionError``. + auth_resolver + Optional :class:`AuthResolver` instance threaded into cache + fetches. + offline + When true, cache misses raise instead of fetching. Maps to + the builder's ``--offline`` flag (e.g. lockfile rebuild + reproducibility check). + """ + + def __init__( + self, + upstreams: Mapping[str, Upstream], + *, + cache: UpstreamCache, + ref_to_sha: RefToShaResolver, + canonical_full_name: CanonicalFullNameResolver | None = None, + version_range_resolver: VersionRangeResolver | None = None, + auth_resolver: Any = None, + offline: bool = False, + ) -> None: + self._upstreams: dict[str, Upstream] = dict(upstreams) + self._cache = cache + self._ref_to_sha = ref_to_sha + self._canonical_full_name = canonical_full_name + self._version_range_resolver = version_range_resolver + self._auth_resolver = auth_resolver + self._offline = offline + + self._fetched: dict[str, _UpstreamFetchRecord] = {} + self._diagnostics: list[UpstreamResolverDiagnostic] = [] + + # -- Public API --------------------------------------------------------- + + @property + def diagnostics(self) -> tuple[UpstreamResolverDiagnostic, ...]: + """Diagnostics collected during resolution (immutable view).""" + return tuple(self._diagnostics) + + def resolve_all( + self, + entries: list[UpstreamPackageEntry], + ) -> tuple[list[ResolvedUpstreamPackage], list[UpstreamResolverDiagnostic]]: + """Resolve a batch of upstream-sourced packages. + + Continues on per-package failures so the builder can report + every error at once instead of bailing on the first. Critical + upstream-level failures (rename, missing alias, fetch error) + propagate to the upstream's *every* dependent package as + per-package diagnostics so the curator sees the cascade. + + Resolution is intentionally sequential. Build-time concurrency + was considered and deferred: the per-upstream cache layer is + the bottleneck, manifest fetches dominate wall-clock, and + most curator marketplaces have a small number of upstreams + (single digits). Threading would obscure error attribution + without a measurable speedup at the current scale and is + revisited only if a curator profile produces evidence + otherwise. + """ + resolved: list[ResolvedUpstreamPackage] = [] + for entry in entries: + try: + resolved.append(self.resolve_package(entry)) + except UpstreamResolutionError as exc: + self._diagnostics.append( + UpstreamResolverDiagnostic( + level="error", + code=exc.code, + message=str(exc), + upstream_alias=entry.upstream_alias, + plugin_name=entry.plugin or entry.name, + ) + ) + return resolved, list(self._diagnostics) + + def resolve_package(self, entry: UpstreamPackageEntry) -> ResolvedUpstreamPackage: + """Resolve a single upstream-sourced package.""" + upstream = self._upstreams.get(entry.upstream_alias) + if upstream is None: + raise UpstreamResolutionError( + "unknown-upstream-alias", + ( + f"package {entry.name!r} references upstream " + f"alias {entry.upstream_alias!r} which is not " + f"registered. Run 'apm marketplace upstream add' " + f"or fix the alias." + ), + ) + + record = self._get_or_fetch_upstream(upstream) + + plugin_name = entry.plugin or entry.name + plugin = record.manifest.find_plugin(plugin_name) + if plugin is None: + available = ", ".join(p.name for p in record.manifest.plugins) or "" + raise UpstreamResolutionError( + "missing-plugin", + ( + f"upstream {upstream.alias!r} does not contain a " + f"plugin named {plugin_name!r}. Available: {available}." + ), + ) + + resolved_ref, resolved_sha, pin_source = self._apply_precedence_ladder( + entry=entry, + plugin=plugin, + upstream=upstream, + ) + + plugin_host = plugin.source.host + plugin_repo = plugin.source.repo + plugin_subdir = plugin.source.subdir + + return ResolvedUpstreamPackage( + entry=entry, + upstream=upstream, + plugin=plugin, + plugin_host=plugin_host, + plugin_repo=plugin_repo, + plugin_subdir=plugin_subdir, + plugin_ref=resolved_ref, + plugin_sha=resolved_sha, + upstream_manifest_sha=record.manifest_sha, + upstream_canonical_full_name=record.canonical_full_name, + pin_source=pin_source, + ) + + # -- Internal: upstream fetch ------------------------------------------ + + def _get_or_fetch_upstream(self, upstream: Upstream) -> _UpstreamFetchRecord: + """Fetch + strict-parse an upstream once per build (atomic invariant).""" + cached = self._fetched.get(upstream.alias) + if cached is not None: + return cached + + owner, repo = upstream.repo.split("/", 1) + + canonical = "" + if self._canonical_full_name is not None: + try: + canonical = self._canonical_full_name(upstream.host, owner, repo) + except Exception as exc: + # Identity check is part of the supply-chain guard; if it + # raises (network error, 5xx), refuse to proceed rather + # than silently downgrade to "unknown identity". + raise UpstreamResolutionError( + "canonical-name-unavailable", + ( + f"could not verify identity of upstream " + f"{upstream.repo!r}: {exc}. Refusing to fetch." + ), + ) from exc + + if canonical and canonical.lower() != upstream.repo.lower(): + raise RepoRenameError(configured=upstream.repo, reported=canonical) + + # Resolve the registration's ref / branch to an immutable SHA so + # the cache is always SHA-keyed. + target_ref = upstream.ref or upstream.branch + if upstream.ref is None and not upstream.allow_head: + raise UpstreamResolutionError( + "upstream-unpinned", + ( + f"upstream {upstream.alias!r} has no pinned ref and " + f"allow_head=false; declare 'ref:' or set allow_head: true." + ), + ) + if upstream.ref is None and upstream.allow_head: + self._diagnostics.append( + UpstreamResolverDiagnostic( + level="warning", + code="upstream-tracks-head", + message=( + f"upstream {upstream.alias!r} tracks branch " + f"{upstream.branch!r} HEAD; lockfile records the " + f"resolved SHA but rebuilds may drift." + ), + upstream_alias=upstream.alias, + ) + ) + + try: + manifest_sha = self._normalise_to_sha( + self._ref_to_sha(upstream.host, owner, repo, target_ref) + ) + except UpstreamResolutionError: + raise + except Exception as exc: + raise UpstreamResolutionError( + "ref-resolution-failed", + ( + f"could not resolve upstream {upstream.alias!r} " + f"ref {target_ref!r} to a commit SHA: {exc}." + ), + ) from exc + + try: + key = compute_cache_key( + host=upstream.host, + owner=owner, + repo=repo, + sha=manifest_sha, + path=upstream.path, + ) + except UpstreamCacheError as exc: + raise UpstreamResolutionError("invalid-cache-key", str(exc)) from exc + + try: + raw = self._cache.get_or_fetch( + key, + auth_resolver=self._auth_resolver, + offline=self._offline, + ) + except UpstreamCacheError as exc: + raise UpstreamResolutionError("upstream-fetch-failed", str(exc)) from exc + + manifest = parse_marketplace_strict( + raw, + upstream_owner_repo=upstream.repo, + upstream_host=upstream.host, + ) + + # Hoist strict-parser rejections into the diagnostic stream as + # build errors. The builder treats any non-empty error list as + # a hard failure (exit code 2). + for rej in manifest.rejections: + self._record_strict_rejection(upstream.alias, rej) + + record = _UpstreamFetchRecord( + manifest=manifest, + manifest_sha=manifest_sha, + canonical_full_name=canonical or upstream.repo, + ) + self._fetched[upstream.alias] = record + return record + + def _record_strict_rejection(self, upstream_alias: str, rejection: StrictRejection) -> None: + self._diagnostics.append( + UpstreamResolverDiagnostic( + level="error", + code=f"upstream-rejection:{rejection.reason}", + message=( + f"upstream {upstream_alias!r} plugin " + f"{rejection.plugin_name!r}: {rejection.detail}" + ), + upstream_alias=upstream_alias, + plugin_name=rejection.plugin_name, + ) + ) + + # -- Internal: precedence ladder --------------------------------------- + + def _apply_precedence_ladder( + self, + *, + entry: UpstreamPackageEntry, + plugin: StrictPlugin, + upstream: Upstream, + ) -> tuple[str | None, str | None, str]: + """Determine final (ref, sha, pin_source) for the package. + + Order: + 1. Curator ``entry.ref`` (sha or tag). + 2. Curator ``entry.version`` (semver range, requires + ``version_range_resolver``). + 3. Upstream plugin's pinned ``ref`` / ``sha``. + 4. ``allow_head`` HEAD (deferred resolution). + 5. Otherwise: hard fail (unpinned). + """ + if entry.ref is not None: + ref = entry.ref + sha = self._maybe_sha(ref) + if sha is None: + # ref is a tag or branch name -- resolve to an immutable SHA so + # the lockfile records a content-addressed pin, not a movable ref. + owner, repo = plugin.source.repo.split("/", 1) + sha = self._normalise_to_sha(self._ref_to_sha(plugin.source.host, owner, repo, ref)) + return ref, sha, "curator-ref" + + if entry.version is not None: + if self._version_range_resolver is None: + raise UpstreamResolutionError( + "version-resolver-missing", + ( + f"package {entry.name!r} pins a semver range " + f"({entry.version!r}) but no version_range_resolver " + f"was provided to the UpstreamResolver." + ), + ) + owner, repo = plugin.source.repo.split("/", 1) + try: + ref, sha = self._version_range_resolver( + plugin.source.host, + owner, + repo, + entry.version, + tag_pattern=entry.tag_pattern, + include_prerelease=entry.include_prerelease, + ) + except Exception as exc: + raise UpstreamResolutionError( + "version-resolution-failed", + ( + f"could not resolve semver range " + f"{entry.version!r} for {entry.name!r}: {exc}." + ), + ) from exc + return ref, self._normalise_to_sha(sha), "curator-version" + + # 3. Upstream plugin's pin (sha preferred, else tag-style ref). + if plugin.source.sha is not None: + return plugin.source.sha, plugin.source.sha, "upstream-pin" + if plugin.source.ref is not None: + ref = plugin.source.ref + sha = self._maybe_sha(ref) + if sha is None: + # ref is a tag or branch name -- resolve to an immutable SHA. + # This prevents a force-pushed tag from silently changing what + # was pinned at build time. + owner, repo = plugin.source.repo.split("/", 1) + sha = self._normalise_to_sha(self._ref_to_sha(plugin.source.host, owner, repo, ref)) + return ref, sha, "upstream-pin" + + # 4. Same-repo fallback (deterministic). When the upstream + # plugin lives in the same repo as the upstream marketplace + # itself -- the typical single-repo Claude marketplace shape + # (e.g. abhigyanpatwari/GitNexus) -- the upstream-registration + # SHA we just resolved IS the plugin SHA. This branch + # reproducibly pins without any opt-in. + fetched = self._fetched.get(upstream.alias) + if fetched is not None and plugin.source.repo.lower() == upstream.repo.lower(): + return ( + fetched.manifest_sha, + fetched.manifest_sha, + "upstream-registration-ref", + ) + + # 5. HEAD-tracking opt-in: defer resolution. Lockfile writer + # records what the builder ultimately resolved; rebuilds rely + # on the lockfile SHA, not the network. + if entry.allow_head or upstream.allow_head: + self._diagnostics.append( + UpstreamResolverDiagnostic( + level="warning", + code="package-tracks-upstream-head", + message=( + f"package {entry.name!r} tracks upstream " + f"{upstream.alias!r} HEAD; rebuilds may drift " + f"unless lockfile is honoured." + ), + upstream_alias=upstream.alias, + plugin_name=entry.name, + ) + ) + return None, None, "branch-head" + + # 6. Hard fail. + raise UpstreamResolutionError( + "package-unpinned", + ( + f"package {entry.name!r} (upstream {upstream.alias!r}) is " + f"unpinned: no curator ref/version, upstream plugin has " + f"no ref/sha, and the plugin source repo " + f"({plugin.source.repo!r}) does not match the upstream " + f"registration repo ({upstream.repo!r}). Set 'ref:' or " + f"'version:' on the package." + ), + ) + + # -- Internal: helpers -------------------------------------------------- + + @staticmethod + def _maybe_sha(ref: str) -> str | None: + """Return ref if it is already a 40-char SHA, else None.""" + return ref if _FULL_SHA_RE.match(ref) else None + + @staticmethod + def _normalise_to_sha(value: str) -> str: + """Validate that *value* is a full 40-char hex SHA.""" + if not isinstance(value, str) or not _FULL_SHA_RE.match(value): + raise UpstreamResolutionError( + "invalid-sha", + f"expected a 40-char hex SHA, got {value!r}", + ) + return value diff --git a/src/apm_cli/marketplace/yml_editor.py b/src/apm_cli/marketplace/yml_editor.py index d1dca6b9c..b5f3f9e70 100644 --- a/src/apm_cli/marketplace/yml_editor.py +++ b/src/apm_cli/marketplace/yml_editor.py @@ -13,10 +13,9 @@ from __future__ import annotations -import re # noqa: F401 +import re from io import StringIO from pathlib import Path -from typing import List, Optional # noqa: F401, UP035 from ruamel.yaml import YAML @@ -31,7 +30,11 @@ __all__ = [ "add_plugin_entry", + "add_upstream_entry", + "add_upstream_package_entry", + "list_upstream_entries", "remove_plugin_entry", + "remove_upstream_entry", "update_plugin_entry", ] @@ -297,3 +300,257 @@ def remove_plugin_entry(yml_path: Path, name: str) -> None: # --- write + validate --- _write_and_validate(yml_path, data, original_text) + + +# ------------------------------------------------------------------- +# Upstream entries +# ------------------------------------------------------------------- + + +_UPSTREAM_ALIAS_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_-]{0,63}$") +_UPSTREAM_REPO_RE = re.compile(r"^[A-Za-z0-9._-]+/[A-Za-z0-9._-]+$") + + +def _validate_upstream_alias(alias: str) -> None: + if not _UPSTREAM_ALIAS_RE.match(alias): + raise MarketplaceYmlError( + f"Upstream alias '{alias}' must start with a letter or digit and contain " + f"only letters, digits, '_' or '-' (max 64 chars)." + ) + + +def _validate_upstream_repo(repo: str) -> None: + if not _UPSTREAM_REPO_RE.match(repo): + raise MarketplaceYmlError( + f"Upstream 'repo' must match '/' shape, got '{repo}'" + ) + + +def _find_upstream_index(upstreams, alias: str) -> int: + """Return the index of the upstream whose ``alias`` matches. + + Aliases are case-sensitive (unlike package names) because they are + used as primary keys throughout the upstream resolution and + lockfile pipeline. Raises ``MarketplaceYmlError`` if not found. + """ + for idx, entry in enumerate(upstreams): + entry_alias = entry.get("alias", "") + if isinstance(entry_alias, str) and entry_alias == alias: + return idx + raise MarketplaceYmlError(f"Upstream '{alias}' not found") + + +def add_upstream_entry( + yml_path: Path, + *, + alias: str, + repo: str, + ref: str | None = None, + branch: str | None = None, + path: str | None = None, + host: str | None = None, + allow_head: bool = False, +) -> None: + """Append a new entry to ``upstreams[]``. + + Either ``ref`` (commit SHA, tag) or ``branch`` (with ``allow_head`` + when mutable) must be provided -- mirroring the strict-parser + contract. Raises ``MarketplaceYmlError`` on validation failure or + duplicate alias. + """ + _validate_upstream_alias(alias) + _validate_upstream_repo(repo) + + if ref is None and branch is None: + raise MarketplaceYmlError("Upstream must specify either 'ref' (commit/tag) or 'branch'.") + if path is not None: + _validate_subdir(path) + + data, original_text = _load_rt(yml_path) + container = _get_marketplace_container(data) + upstreams = container.get("upstreams") + if upstreams is None: + from ruamel.yaml.comments import CommentedSeq + + upstreams = CommentedSeq() + container["upstreams"] = upstreams + + # Duplicate-alias check. + for entry in upstreams: + entry_alias = entry.get("alias", "") + if isinstance(entry_alias, str) and entry_alias == alias: + raise MarketplaceYmlError(f"Upstream '{alias}' already exists") + + from ruamel.yaml.comments import CommentedMap + + new_entry = CommentedMap() + new_entry["alias"] = alias + new_entry["repo"] = repo + if ref is not None: + new_entry["ref"] = ref + if branch is not None: + new_entry["branch"] = branch + if path is not None: + new_entry["path"] = path + if host is not None: + new_entry["host"] = host + if allow_head: + new_entry["allow_head"] = True + + upstreams.append(new_entry) + _write_and_validate(yml_path, data, original_text) + + +def remove_upstream_entry(yml_path: Path, alias: str, *, dry_run: bool = False) -> None: + """Remove an ``upstreams[]`` entry by alias. + + Raises ``MarketplaceYmlError`` when the alias is in use by any + ``packages[]`` entry -- removing it would leave the manifest with + dangling references. + + When *dry_run* is True, all validation is performed but the file is + not written. Useful for pre-validating before prompting the user. + """ + data, original_text = _load_rt(yml_path) + container = _get_marketplace_container(data) + upstreams = container.get("upstreams") + if upstreams is None: + raise MarketplaceYmlError(f"Upstream '{alias}' not found") + + # Reject removal if any package still references this alias. + packages = container.get("packages") or [] + for entry in packages: + entry_alias = entry.get("upstream", None) + if isinstance(entry_alias, str) and entry_alias == alias: + entry_name = entry.get("name", "") + raise MarketplaceYmlError( + f"Upstream '{alias}' is still referenced by package " + f"'{entry_name}'. Remove the package first." + ) + + idx = _find_upstream_index(upstreams, alias) + if dry_run: + return + del upstreams[idx] + _write_and_validate(yml_path, data, original_text) + + +def list_upstream_entries(yml_path: Path) -> list[dict]: + """Return the list of upstream entries as plain dicts. + + The values are read-only snapshots; modifying them does not write + back to disk. Returns an empty list when no ``upstreams:`` block is + present. + """ + data, _ = _load_rt(yml_path) + container = _get_marketplace_container(data) + upstreams = container.get("upstreams") or [] + result: list[dict] = [] + for entry in upstreams: + if not isinstance(entry, dict): + continue + # Materialise to a plain dict (drop ruamel CommentedMap wrapping). + result.append({k: v for k, v in entry.items()}) + return result + + +def add_upstream_package_entry( + yml_path: Path, + *, + upstream: str, + plugin: str | None = None, + name: str | None = None, + version: str | None = None, + ref: str | None = None, + tag_pattern: str | None = None, + tags: list[str] | None = None, + include_prerelease: bool = False, + allow_head: bool = False, + description: str | None = None, +) -> str: + """Append an upstream-sourced ``packages[]`` entry. + + Distinct from :func:`add_plugin_entry`: this writes ``upstream`` and + ``plugin`` keys instead of ``source``. The referenced upstream alias + must already exist in the ``upstreams:`` block. + + Returns the resolved package name (``name`` or ``plugin``). + """ + if version is not None and ref is not None: + raise MarketplaceYmlError("Cannot specify both 'version' and 'ref' -- pick one") + + if not isinstance(upstream, str) or not upstream: + raise MarketplaceYmlError("'upstream' alias is required") + + effective_plugin = plugin if plugin is not None else name + if not effective_plugin: + raise MarketplaceYmlError( + "Either 'plugin' or 'name' must be provided so the upstream plugin can be located" + ) + + if name is None: + name = effective_plugin + + data, original_text = _load_rt(yml_path) + container = _get_marketplace_container(data) + + # Verify the upstream alias is registered. + upstreams = container.get("upstreams") or [] + alias_known = any(isinstance(u, dict) and u.get("alias") == upstream for u in upstreams) + if not alias_known: + raise MarketplaceYmlError( + f"Upstream alias '{upstream}' is not registered. " + f"Run 'apm marketplace upstream add --alias {upstream} ...' first." + ) + + packages = container.get("packages") + if packages is None: + from ruamel.yaml.comments import CommentedSeq + + packages = CommentedSeq() + container["packages"] = packages + + # Cross-shape duplicate-name check (case-insensitive). + lower = name.lower() + for entry in packages: + entry_name = entry.get("name", "") + if isinstance(entry_name, str) and entry_name.lower() == lower: + raise MarketplaceYmlError(f"Package '{name}' already exists") + + # (upstream, plugin) tuple uniqueness check. + for entry in packages: + if ( + isinstance(entry, dict) + and entry.get("upstream") == upstream + and (entry.get("plugin") or entry.get("name")) == effective_plugin + ): + raise MarketplaceYmlError( + f"Plugin '{effective_plugin}' from upstream '{upstream}' is already exposed" + ) + + from ruamel.yaml.comments import CommentedMap + + new_entry = CommentedMap() + new_entry["name"] = name + new_entry["upstream"] = upstream + if plugin is not None and plugin != name: + new_entry["plugin"] = plugin + + if version is not None: + new_entry["version"] = version + if ref is not None: + new_entry["ref"] = ref + if tag_pattern is not None: + new_entry["tag_pattern"] = tag_pattern + if include_prerelease: + new_entry["include_prerelease"] = True + if allow_head: + new_entry["allow_head"] = True + if tags is not None and len(tags) > 0: + new_entry["tags"] = tags + if description is not None: + new_entry["description"] = description + + packages.append(new_entry) + _write_and_validate(yml_path, data, original_text) + return name diff --git a/src/apm_cli/marketplace/yml_schema.py b/src/apm_cli/marketplace/yml_schema.py index dd568386b..e7a606721 100644 --- a/src/apm_cli/marketplace/yml_schema.py +++ b/src/apm_cli/marketplace/yml_schema.py @@ -44,12 +44,16 @@ __all__ = [ "LOCAL_SOURCE_RE", "SOURCE_RE", + "DirectPackageEntry", "MarketplaceBuild", "MarketplaceConfig", "MarketplaceOwner", + "MarketplacePackage", "MarketplaceYml", # backwards-compat alias "MarketplaceYmlError", "PackageEntry", + "Upstream", + "UpstreamPackageEntry", "load_marketplace_from_apm_yml", "load_marketplace_from_legacy_yml", "load_marketplace_yml", @@ -70,6 +74,18 @@ # source field validation. SOURCE_RE = re.compile(r"^(?:[^/]+/[^/]+|\./.*)$") LOCAL_SOURCE_RE = re.compile(r"^\./") +# Remote-only ``owner/repo`` shape -- used by upstream registration +# validation, where local-path shorthand is not meaningful. Disallows a +# leading ``.`` so ``./local`` is rejected, matching curator intent. +# Shared with upstream_cache and upstream_parser via ref_resolver. +from .ref_resolver import OWNER_REPO_RE as _REMOTE_SOURCE_RE # noqa: E402 + +# Upstream alias: identifier-like, used as a directory-safe key in cache +# paths and lockfile keys. Conservative character set keeps cache keys +# Windows-safe and avoids collisions with the ``__`` cache delimiter. +_UPSTREAM_ALIAS_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_-]{0,63}$") +# Hostname validation -- minimal sanity check; refers to git-host domain. +_UPSTREAM_HOST_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9.\-]{0,253}$") # Placeholder tokens accepted in ``tag_pattern`` / ``build.tagPattern``. _TAG_PLACEHOLDERS = ("{version}", "{name}") @@ -103,6 +119,49 @@ } ) +# Alias for the direct-package shape; used when explicitly distinguishing +# from the upstream-package shape in builder dispatch and tests. +_DIRECT_PACKAGE_KEYS = _PACKAGE_ENTRY_KEYS + +# Strict key set for upstream-sourced package entries. NOTE: ``source`` +# and ``subdir`` are deliberately excluded -- presence of either alongside +# ``upstream`` is an authoring error caught at parse time. +_UPSTREAM_PACKAGE_KEYS = frozenset( + { + "name", + "upstream", + "plugin", + "version", + "ref", + "tag_pattern", + "include_prerelease", + "allow_head", + # Anthropic pass-through overrides (curator may override + # individual display fields without touching the upstream + # plugin's content). + "description", + "homepage", + "tags", + "keywords", + "author", + "license", + "repository", + } +) + +# Strict key set for entries inside the ``upstreams:`` block. +_UPSTREAM_REGISTRATION_KEYS = frozenset( + { + "alias", + "repo", + "path", + "ref", + "branch", + "host", + "allow_head", + } +) + # Limits for keywords/tags array to prevent DoS via oversized manifests (S4). _MAX_TAGS_COUNT = 50 _MAX_TAG_LENGTH = 100 @@ -166,6 +225,7 @@ def _parse_author(raw: Any, index: int) -> dict[str, str] | None: "metadata", "build", "packages", + "upstreams", } ) # --------------------------------------------------------------------------- @@ -226,6 +286,116 @@ class PackageEntry: is_local: bool = False +# Alias for explicit naming in dispatch / tests; identical to PackageEntry. +DirectPackageEntry = PackageEntry + + +@dataclass(frozen=True) +class Upstream: + """A registered external marketplace pointer. + + Declared in ``apm.yml -> marketplace.upstreams[]``. Each entry is + addressed by its ``alias`` from upstream-sourced ``packages[]`` + entries (via the ``upstream:`` field). + + Attributes + ---------- + alias : str + Curator-chosen identifier; must match + ``[A-Za-z0-9][A-Za-z0-9_-]{0,63}`` so it is safe to use as a + directory-segment in cache paths and as a lockfile key. + repo : str + ``owner/repo`` of the upstream marketplace. + path : str + Path to the upstream ``marketplace.json`` within ``repo``. + Defaults to ``.claude-plugin/marketplace.json``. + ref : str | None + Pinned commit SHA or tag for the upstream manifest. Required + for reproducible builds; if absent, ``allow_head`` must be + ``True`` and ``branch`` HEAD is used (with a build warning). + branch : str + Branch tracked when ``ref`` is absent and ``allow_head`` is + ``True``. Defaults to ``main``. + host : str + Git host. Defaults to ``github.com``. + allow_head : bool + Curator opt-in to track a moving branch HEAD. Default ``False``. + Governance can forbid via the + ``marketplace.upstream.allow_unpinned_refs`` policy key. + """ + + alias: str + repo: str + path: str = ".claude-plugin/marketplace.json" + ref: str | None = None + branch: str = "main" + host: str = "github.com" + allow_head: bool = False + + +@dataclass(frozen=True) +class UpstreamPackageEntry: + """A ``packages[]`` entry sourced from a registered upstream. + + Distinguished from :class:`PackageEntry` (the direct shape) by the + presence of the ``upstream`` field and the absence of ``source``. + The builder dispatches on ``isinstance``; mixing both shapes in the + same ``packages:`` list is supported. + + Attributes + ---------- + name : str + Display name in the curator's emitted ``marketplace.json``. May + differ from ``plugin`` when the curator renames. + upstream_alias : str + References ``upstreams[].alias``. The YAML key is ``upstream``; + the internal attribute is renamed because ``upstream`` would + clash with future API method names and to make the alias-vs- + plugin-name distinction explicit at use sites. + plugin : str | None + Name of the plugin in the upstream marketplace. Defaults to + ``name`` when absent. + version : str | None + Optional curator-supplied semver range (mutually exclusive with + ``ref``). Resolved against the upstream plugin's repo tags. + ref : str | None + Optional curator-supplied commit SHA or tag (overrides the + upstream plugin's pinned ref). + tag_pattern : str | None + Optional curator override of ``build.tagPattern`` for this + package's version resolution. + include_prerelease : bool + Curator override; default ``False``. + allow_head : bool + Per-entry opt-in to mutable refs. Default ``False``. + description, homepage, tags, author, license, repository : + Optional Anthropic pass-through overrides; if absent, the + upstream plugin's values pass through verbatim. + """ + + name: str + upstream_alias: str + plugin: str | None = None + # APM-only fields (overrides; mutually exclusive `version`/`ref`) + version: str | None = None + ref: str | None = None + tag_pattern: str | None = None + include_prerelease: bool = False + allow_head: bool = False + # Anthropic pass-through overrides + description: str | None = None + homepage: str | None = None + tags: tuple[str, ...] = () + author: Mapping[str, str] | None = None + license: str | None = None + repository: str | None = None + + +# Public union alias for the heterogeneous ``packages`` collection. The +# builder dispatches on ``isinstance`` to pick the correct emit path. +MarketplacePackage = PackageEntry | UpstreamPackageEntry + + @dataclass(frozen=True) class MarketplaceConfig: """Parsed marketplace configuration. @@ -251,7 +421,8 @@ class MarketplaceConfig: output: str = ".claude-plugin/marketplace.json" metadata: dict[str, Any] = field(default_factory=dict) build: MarketplaceBuild = field(default_factory=MarketplaceBuild) - packages: tuple[PackageEntry, ...] = () + packages: tuple[MarketplacePackage, ...] = () + upstreams: tuple[Upstream, ...] = () # Origin tracking + override-detection metadata source_path: Path | None = None is_legacy: bool = False @@ -373,13 +544,40 @@ def _parse_build(raw: Any) -> MarketplaceBuild: return MarketplaceBuild(tag_pattern=tag_pattern) -def _parse_package_entry(raw: Any, index: int) -> PackageEntry: - """Parse and validate a single ``packages`` entry.""" +def _parse_package_entry(raw: Any, index: int) -> MarketplacePackage: + """Dispatch to the direct-package or upstream-package parser. + + Discriminates on the presence of ``source`` vs ``upstream``. Both + keys present, or neither, is a hard error caught here so the + individual shape parsers can apply their own strict key sets. + """ if not isinstance(raw, dict): raise MarketplaceYmlError(f"packages[{index}] must be a mapping") + has_source = "source" in raw and raw["source"] is not None + has_upstream = "upstream" in raw and raw["upstream"] is not None + + if has_source and has_upstream: + raise MarketplaceYmlError( + f"packages[{index}]: 'source' and 'upstream' are mutually " + f"exclusive (a package is either a direct git source or " + f"sourced from a registered upstream, never both)" + ) + if not has_source and not has_upstream: + raise MarketplaceYmlError( + f"packages[{index}]: requires exactly one of 'source' " + f"(direct) or 'upstream' (upstream-sourced)" + ) + + if has_upstream: + return _parse_upstream_package_entry(raw, index) + return _parse_direct_package_entry(raw, index) + + +def _parse_direct_package_entry(raw: dict[str, Any], index: int) -> PackageEntry: + """Parse and validate a single direct ``packages`` entry.""" # -- strict key check -- - _check_unknown_keys(raw, _PACKAGE_ENTRY_KEYS, context=f"packages[{index}]") + _check_unknown_keys(raw, _DIRECT_PACKAGE_KEYS, context=f"packages[{index}]") name = _require_str(raw, "name", context=f"packages[{index}]") source = _require_str(raw, "source", context=f"packages[{index}]") @@ -530,6 +728,241 @@ def _parse_package_entry(raw: Any, index: int) -> PackageEntry: ) +def _parse_upstream_package_entry(raw: dict[str, Any], index: int) -> UpstreamPackageEntry: + """Parse and validate a single upstream-sourced ``packages`` entry. + + The strict key set forbids ``source`` and ``subdir`` -- their + presence on an upstream entry is an authoring error caught here. + Cross-validation that ``upstream`` references a declared alias + happens later in :func:`_build_config` once the ``upstreams:`` + block has been parsed. + """ + ctx = f"packages[{index}]" + _check_unknown_keys(raw, _UPSTREAM_PACKAGE_KEYS, context=ctx) + + name = _require_str(raw, "name", context=ctx) + upstream_alias = _require_str(raw, "upstream", context=ctx) + if not _UPSTREAM_ALIAS_RE.match(upstream_alias): + raise MarketplaceYmlError( + f"'{ctx}.upstream' '{upstream_alias}' is not a valid alias " + f"(allowed: leading alphanumeric, then alphanumerics/'_'/'-', " + f"max 64 chars)" + ) + + plugin: str | None = raw.get("plugin") + if plugin is not None: + if not isinstance(plugin, str) or not plugin.strip(): + raise MarketplaceYmlError(f"'{ctx}.plugin' must be a non-empty string") + plugin = plugin.strip() + + # APM-only: version (semver range, mutually exclusive with ref) + version: str | None = raw.get("version") + if version is not None: + version = str(version).strip() + if not version: + raise MarketplaceYmlError(f"'{ctx}.version' must be a non-empty string") + + # APM-only: ref (commit SHA or tag) + ref: str | None = raw.get("ref") + if ref is not None: + ref = str(ref).strip() + if not ref: + raise MarketplaceYmlError(f"'{ctx}.ref' must be a non-empty string") + + if version is not None and ref is not None: + raise MarketplaceYmlError( + f"'{ctx}': 'version' and 'ref' are mutually exclusive " + f"(precedence: explicit ref wins; specify only one)" + ) + + # APM-only: tag_pattern + tag_pattern: str | None = raw.get("tag_pattern") + if tag_pattern is not None: + if not isinstance(tag_pattern, str) or not tag_pattern.strip(): + raise MarketplaceYmlError(f"'{ctx}.tag_pattern' must be a non-empty string") + tag_pattern = tag_pattern.strip() + _validate_tag_pattern(tag_pattern, context=f"{ctx}.tag_pattern") + + # APM-only: include_prerelease + include_prerelease = raw.get("include_prerelease", False) + if not isinstance(include_prerelease, bool): + raise MarketplaceYmlError(f"'{ctx}.include_prerelease' must be a boolean") + + # APM-only: allow_head (per-entry opt-in to mutable refs) + allow_head = raw.get("allow_head", False) + if not isinstance(allow_head, bool): + raise MarketplaceYmlError(f"'{ctx}.allow_head' must be a boolean") + + # Anthropic pass-through overrides (all optional) + description: str | None = raw.get("description") + if description is not None: + if not isinstance(description, str) or not description.strip(): + raise MarketplaceYmlError(f"'{ctx}.description' must be a non-empty string") + description = description.strip() + + homepage: str | None = raw.get("homepage") + if homepage is not None: + if not isinstance(homepage, str) or not homepage.strip(): + raise MarketplaceYmlError(f"'{ctx}.homepage' must be a non-empty string") + homepage = homepage.strip() + + raw_tags = raw.get("tags") + tags: tuple[str, ...] = () + if raw_tags is not None: + if not isinstance(raw_tags, list): + raise MarketplaceYmlError(f"'{ctx}.tags' must be a list of strings") + for i, item in enumerate(raw_tags): + if not isinstance(item, str): + raise MarketplaceYmlError( + f"'{ctx}.tags[{i}]' must be a string, got {type(item).__name__}" + ) + tags = tuple(str(t) for t in raw_tags) + + raw_keywords = raw.get("keywords") + if raw_keywords is not None: + if not isinstance(raw_keywords, list): + raise MarketplaceYmlError(f"'{ctx}.keywords' must be a list of strings") + for i, item in enumerate(raw_keywords): + if not isinstance(item, str): + raise MarketplaceYmlError( + f"'{ctx}.keywords[{i}]' must be a string, got {type(item).__name__}" + ) + seen = set(tags) + merged = list(tags) + for kw in raw_keywords: + if kw not in seen: + seen.add(kw) + merged.append(kw) + tags = tuple(merged) + + # S4: cap tags array length and item length (mirror direct shape). + if len(tags) > _MAX_TAGS_COUNT: + import logging as _logging + + _logging.getLogger(__name__).warning( + "packages[%d] ('%s'): tags truncated from %d to %d items", + index, + name, + len(tags), + _MAX_TAGS_COUNT, + ) + tags = tags[:_MAX_TAGS_COUNT] + tags = tuple(t[:_MAX_TAG_LENGTH] for t in tags) + + author = _parse_author(raw.get("author"), index) + + license_val: str | None = raw.get("license") + if license_val is not None: + if not isinstance(license_val, str) or not license_val.strip(): + raise MarketplaceYmlError(f"'{ctx}.license' must be a non-empty string") + license_val = license_val.strip() + + repository: str | None = raw.get("repository") + if repository is not None: + if not isinstance(repository, str) or not repository.strip(): + raise MarketplaceYmlError(f"'{ctx}.repository' must be a non-empty string") + repository = repository.strip() + + return UpstreamPackageEntry( + name=name, + upstream_alias=upstream_alias, + plugin=plugin, + version=version, + ref=ref, + tag_pattern=tag_pattern, + include_prerelease=include_prerelease, + allow_head=allow_head, + description=description, + homepage=homepage, + tags=tags, + author=author, + license=license_val, + repository=repository, + ) + + +def _parse_upstream_registration(raw: Any, index: int) -> Upstream: + """Parse and validate a single ``upstreams[]`` block entry.""" + ctx = f"upstreams[{index}]" + if not isinstance(raw, dict): + raise MarketplaceYmlError(f"{ctx} must be a mapping") + + _check_unknown_keys(raw, _UPSTREAM_REGISTRATION_KEYS, context=ctx) + + alias = _require_str(raw, "alias", context=ctx) + if not _UPSTREAM_ALIAS_RE.match(alias): + raise MarketplaceYmlError( + f"'{ctx}.alias' '{alias}' is not a valid alias " + f"(allowed: leading alphanumeric, then alphanumerics/'_'/'-', " + f"max 64 chars)" + ) + + repo = _require_str(raw, "repo", context=ctx) + if not _REMOTE_SOURCE_RE.match(repo): + raise MarketplaceYmlError(f"'{ctx}.repo' must match '/' shape, got '{repo}'") + + path = raw.get("path") + if path is None: + path = ".claude-plugin/marketplace.json" + if not isinstance(path, str) or not path.strip(): + raise MarketplaceYmlError(f"'{ctx}.path' must be a non-empty string") + path = path.strip() + try: + validate_path_segments(path, context=f"{ctx}.path") + except PathTraversalError as exc: + raise MarketplaceYmlError(str(exc)) from exc + + ref: str | None = raw.get("ref") + if ref is not None: + ref = str(ref).strip() + if not ref: + raise MarketplaceYmlError(f"'{ctx}.ref' must be a non-empty string") + + branch = raw.get("branch") + if branch is None: + branch = "main" + if not isinstance(branch, str) or not branch.strip(): + raise MarketplaceYmlError(f"'{ctx}.branch' must be a non-empty string") + branch = branch.strip() + + host = raw.get("host") + if host is None: + host = "github.com" + if not isinstance(host, str) or not host.strip(): + raise MarketplaceYmlError(f"'{ctx}.host' must be a non-empty string") + host = host.strip() + if not _UPSTREAM_HOST_RE.match(host): + raise MarketplaceYmlError(f"'{ctx}.host' '{host}' is not a valid hostname") + + allow_head = raw.get("allow_head", False) + if not isinstance(allow_head, bool): + raise MarketplaceYmlError(f"'{ctx}.allow_head' must be a boolean") + + # Reproducibility guard: a missing ref is only acceptable if the + # author explicitly opts in via ``allow_head: true``. The builder + # additionally emits a ``BuildDiagnostic(level="warn")`` whenever + # an upstream is resolved against branch HEAD; governance can + # forbid it via the ``marketplace.upstream.allow_unpinned_refs`` + # policy key. + if ref is None and not allow_head: + raise MarketplaceYmlError( + f"'{ctx}': 'ref' is required for reproducible builds. " + f"Set ref to a commit SHA or tag, or set " + f"'allow_head: true' to opt in to tracking branch HEAD " + f"(builds will warn)." + ) + + return Upstream( + alias=alias, + repo=repo, + path=path, + ref=ref, + branch=branch, + host=host, + allow_head=allow_head, + ) + + # --------------------------------------------------------------------------- # Public loader # --------------------------------------------------------------------------- @@ -768,6 +1201,29 @@ def _build_config( # -- build -- build = _parse_build(marketplace_dict.get("build")) + # -- upstreams (optional) -- parse FIRST so packages cross-validation + # can verify each upstream-sourced entry references a declared alias. + raw_upstreams = marketplace_dict.get("upstreams") + if raw_upstreams is None: + raw_upstreams = [] + if not isinstance(raw_upstreams, list): + raise MarketplaceYmlError("'upstreams' must be a list") + + upstream_entries: list[Upstream] = [] + seen_aliases: dict[str, int] = {} + for u_idx, raw_upstream in enumerate(raw_upstreams): + upstream = _parse_upstream_registration(raw_upstream, u_idx) + if upstream.alias in seen_aliases: + raise MarketplaceYmlError( + f"Duplicate upstream alias '{upstream.alias}' " + f"(upstreams[{seen_aliases[upstream.alias]}] and " + f"upstreams[{u_idx}])" + ) + seen_aliases[upstream.alias] = u_idx + upstream_entries.append(upstream) + + declared_aliases = {u.alias for u in upstream_entries} + # -- packages -- raw_packages = marketplace_dict.get("packages") if raw_packages is None: @@ -775,10 +1231,13 @@ def _build_config( if not isinstance(raw_packages, list): raise MarketplaceYmlError("'packages' must be a list") - entries: list[PackageEntry] = [] + entries: list[MarketplacePackage] = [] seen_names: dict[str, int] = {} + seen_upstream_pairs: dict[tuple[str, str], int] = {} for idx, raw_entry in enumerate(raw_packages): entry = _parse_package_entry(raw_entry, idx) + # Cross-shape `name` uniqueness (panel: prevents dependency- + # confusion / name shadowing across direct + upstream). lower_name = entry.name.lower() if lower_name in seen_names: raise MarketplaceYmlError( @@ -786,6 +1245,30 @@ def _build_config( f"(packages[{seen_names[lower_name]}] and packages[{idx}])" ) seen_names[lower_name] = idx + + if isinstance(entry, UpstreamPackageEntry): + # Cross-validate against declared upstreams. + if entry.upstream_alias not in declared_aliases: + known = ", ".join(sorted(declared_aliases)) or "(none declared)" + raise MarketplaceYmlError( + f"packages[{idx}] ('{entry.name}'): " + f"upstream '{entry.upstream_alias}' is not declared " + f"in marketplace.upstreams (known aliases: {known})" + ) + # Reject duplicate (upstream_alias, plugin) pairs -- the + # same upstream plugin cannot be exposed twice under + # different display names without explicit operator intent. + plugin_key = entry.plugin or entry.name + pair = (entry.upstream_alias, plugin_key) + if pair in seen_upstream_pairs: + prev = seen_upstream_pairs[pair] + raise MarketplaceYmlError( + f"Duplicate upstream package " + f"({entry.upstream_alias}/{plugin_key}) " + f"(packages[{prev}] and packages[{idx}])" + ) + seen_upstream_pairs[pair] = idx + entries.append(entry) return MarketplaceConfig( @@ -797,6 +1280,7 @@ def _build_config( metadata=metadata, build=build, packages=tuple(entries), + upstreams=tuple(upstream_entries), source_path=source_path, is_legacy=is_legacy, name_overridden=name_overridden, diff --git a/tests/integration/marketplace/test_upstream_build_integration.py b/tests/integration/marketplace/test_upstream_build_integration.py new file mode 100644 index 000000000..30186f500 --- /dev/null +++ b/tests/integration/marketplace/test_upstream_build_integration.py @@ -0,0 +1,231 @@ +"""Integration-with-fixtures tests for the upstream build pipeline. + +These tests exercise the full MarketplaceBuilder end-to-end with a +pre-populated UpstreamCache (no network calls) to validate: + +1. A mixed direct+upstream apm.yml produces the expected + marketplace.json and apm.lock.yaml entries. +2. Rebuilding with BuildOptions(offline=True) from the populated lockfile + produces byte-identical output (reproducibility invariant). + +No real git ls-remote or HTTP calls are made. Both the direct package +and the upstream plugin use pinned SHA refs (no tag resolution needed); +the upstream manifest is served from a fixture dict injected into +UpstreamCache before the build. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import yaml + +from apm_cli.marketplace.builder import BuildOptions, MarketplaceBuilder +from apm_cli.marketplace.upstream_cache import UpstreamCache, UpstreamCacheKey, compute_cache_key + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +_SHA_MANIFEST = "a" * 40 +_SHA_PLUGIN = "b" * 40 +_SHA_DIRECT = "c" * 40 + +_UPSTREAM_MANIFEST: dict = { + "name": "fixture-upstream", + "owner": {"name": "fixture-org"}, + "plugins": [ + { + "name": "upstream-plugin", + "description": "A plugin from the upstream fixture.", + "source": { + "type": "github", + "repo": "fixture-org/upstream-plugin", + "ref": "main", + "sha": _SHA_PLUGIN, + }, + } + ], +} + +_MIXED_YML = f"""\ +name: fixture-marketplace +description: Integration fixture with direct + upstream packages +version: 1.0.0 +marketplace: + owner: + name: Fixture Org + email: fixture@example.com + url: https://example.com + metadata: + pluginRoot: plugins + category: testing + upstreams: + - alias: fixture-upstream + repo: fixture-org/fixture-upstream + path: .apm/marketplace.json + ref: {_SHA_MANIFEST} + packages: + - name: direct-pkg + description: A direct package. + source: fixture-org/direct-pkg + ref: {_SHA_DIRECT} + - name: upstream-pkg + description: Curated from upstream. + upstream: fixture-upstream + plugin: upstream-plugin +""" + + +def _write_yml(tmp_path: Path, content: str) -> Path: + p = tmp_path / "apm.yml" + p.write_text(content, encoding="utf-8") + return p + + +def _pre_populate_cache(cache_dir: Path, manifest: dict) -> None: + """Write the fixture manifest into UpstreamCache so no network fetch + is needed during the build.""" + cache = UpstreamCache(base_dir=cache_dir, fetch_callback=lambda k, a: manifest) + key: UpstreamCacheKey = compute_cache_key( + host="github.com", + owner="fixture-org", + repo="fixture-upstream", + sha=_SHA_MANIFEST, + path=".apm/marketplace.json", + ) + cache.put(key, manifest) + + +def _make_builder(yml_path: Path, cache_dir: Path, *, offline: bool = False) -> MarketplaceBuilder: + """Build a MarketplaceBuilder with the upstream cache injected.""" + opts = BuildOptions(dry_run=False, offline=offline) + builder = MarketplaceBuilder(yml_path, options=opts) + # Inject the pre-populated cache so upstream resolution never hits the + # network. We replace the factory's ``build()`` return value. + upstream_manifest = _UPSTREAM_MANIFEST + + original_build_resolver = builder._build_upstream_resolver + + def _patched_build_resolver(yml): + resolver = original_build_resolver(yml) + # Swap the cache to one backed by our fixture directory. + resolver._cache = UpstreamCache( + base_dir=cache_dir, + fetch_callback=lambda k, a: upstream_manifest, + ) + return resolver + + builder._build_upstream_resolver = _patched_build_resolver + return builder + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestUpstreamBuildIntegration: + """Full-pipeline integration tests for upstream resolution.""" + + def test_mixed_build_emits_both_entries(self, tmp_path: Path) -> None: + """A mixed direct+upstream apm.yml must produce a marketplace.json + that contains BOTH plugin entries, with no ``metadata.apm.*`` keys + injected.""" + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + _pre_populate_cache(cache_dir, _UPSTREAM_MANIFEST) + yml_path = _write_yml(tmp_path, _MIXED_YML) + + builder = _make_builder(yml_path, cache_dir) + report = builder.build() + + assert not report.errors, f"Build had errors: {report.errors}" + + out_path = tmp_path / ".claude-plugin" / "marketplace.json" + assert out_path.exists(), "marketplace.json was not produced" + + marketplace = json.loads(out_path.read_text(encoding="utf-8")) + plugins = marketplace.get("plugins", []) + plugin_names = {p["name"] for p in plugins} + + assert "direct-pkg" in plugin_names, f"direct-pkg missing from {plugin_names}" + assert "upstream-pkg" in plugin_names, f"upstream-pkg missing from {plugin_names}" + + # No APM-specific metadata keys in the emitted JSON (pass-through + # contract: emitted marketplace.json must be Anthropic-conformant). + raw_text = out_path.read_text(encoding="utf-8") + assert "apm" not in raw_text.split('"metadata"', 1)[-1].split('"plugins"')[0], ( + "APM-specific metadata keys found in emitted marketplace.json" + ) + + def test_lockfile_records_upstream_provenance(self, tmp_path: Path) -> None: + """After a successful build the lockfile must record upstream + provenance: manifest_sha and the resolved plugin sha.""" + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + _pre_populate_cache(cache_dir, _UPSTREAM_MANIFEST) + yml_path = _write_yml(tmp_path, _MIXED_YML) + + builder = _make_builder(yml_path, cache_dir) + builder.build() + + lock_path = tmp_path / "apm.lock.yaml" + assert lock_path.exists(), "apm.lock.yaml was not produced" + + lock = yaml.safe_load(lock_path.read_text(encoding="utf-8")) + upstreams = lock.get("upstreams", {}) + assert "fixture-upstream" in upstreams, ( + f"upstream alias not in lockfile. Keys: {list(upstreams)}" + ) + alias_entry = upstreams["fixture-upstream"] + assert alias_entry.get("manifest_sha") == _SHA_MANIFEST, ( + f"manifest_sha mismatch: {alias_entry.get('manifest_sha')}" + ) + plugins_lock = alias_entry.get("plugins", {}) + assert "upstream-plugin" in plugins_lock, ( + f"upstream-plugin not in lockfile plugins: {list(plugins_lock)}" + ) + assert plugins_lock["upstream-plugin"].get("resolved_sha") == _SHA_PLUGIN, ( + f"resolved_sha mismatch: {plugins_lock['upstream-plugin'].get('resolved_sha')}" + ) + + def test_offline_rebuild_is_byte_identical(self, tmp_path: Path) -> None: + """Rebuilding with offline=True after an initial build must produce + byte-identical marketplace.json output (reproducibility invariant). + + The initial build populates apm.lock.yaml with pinned SHAs. The + offline rebuild reads those SHAs from the lock instead of calling + the network, and must produce the same emitted JSON.""" + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + _pre_populate_cache(cache_dir, _UPSTREAM_MANIFEST) + yml_path = _write_yml(tmp_path, _MIXED_YML) + + # ---- first build ------------------------------------------------------- + builder = _make_builder(yml_path, cache_dir) + report1 = builder.build() + + assert not report1.errors, f"First build had errors: {report1.errors}" + out_path = tmp_path / ".claude-plugin" / "marketplace.json" + first_output = out_path.read_bytes() + + # ---- offline rebuild --------------------------------------------------- + # Both direct (pinned SHA) and upstream (locked manifest SHA) bypass + # the network entirely in offline mode. + builder2 = _make_builder(yml_path, cache_dir, offline=True) + report2 = builder2.build() + assert not report2.errors, f"Offline rebuild had errors: {report2.errors}" + + second_output = out_path.read_bytes() + assert first_output == second_output, ( + "Offline rebuild produced different marketplace.json output.\n" + f"First: {first_output[:200]}\n" + f"Second: {second_output[:200]}" + ) diff --git a/tests/unit/commands/test_marketplace_upstream.py b/tests/unit/commands/test_marketplace_upstream.py new file mode 100644 index 000000000..bd63b6ba8 --- /dev/null +++ b/tests/unit/commands/test_marketplace_upstream.py @@ -0,0 +1,364 @@ +"""Tests for ``apm marketplace upstream {add,list,remove}`` CLI commands.""" + +from __future__ import annotations + +import textwrap +from pathlib import Path + +import pytest +import yaml +from click.testing import CliRunner + +from apm_cli.commands.marketplace import marketplace + +SHA40 = "c" * 40 + + +def _write_yml(tmp_path: Path, content: str | None = None) -> Path: + if content is None: + content = textwrap.dedent("""\ + name: acme-marketplace + description: ACME curated marketplace + version: 1.0.0 + owner: + name: ACME Corp + packages: [] + """) + p = tmp_path / "marketplace.yml" + p.write_text(content, encoding="utf-8") + return p + + +@pytest.fixture +def runner(): + return CliRunner() + + +class TestUpstreamAdd: + def test_happy_with_ref_and_no_verify(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + result = runner.invoke( + marketplace, + [ + "upstream", + "add", + "abhigyanpatwari/GitNexus", + "--alias", + "gitnexus", + "--ref", + SHA40, + "--no-verify", + ], + ) + assert result.exit_code == 0, result.output + assert "gitnexus" in result.output + data = yaml.safe_load((tmp_path / "marketplace.yml").read_text()) + assert data["upstreams"][0]["alias"] == "gitnexus" + assert data["upstreams"][0]["ref"] == SHA40 + + def test_branch_requires_allow_head(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + result = runner.invoke( + marketplace, + [ + "upstream", + "add", + "a/b", + "--alias", + "ok", + "--branch", + "main", + "--no-verify", + ], + ) + assert result.exit_code != 0 + assert "allow-head" in result.output.lower() + + def test_ref_xor_branch(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + result = runner.invoke( + marketplace, + [ + "upstream", + "add", + "a/b", + "--alias", + "ok", + "--ref", + SHA40, + "--branch", + "main", + "--no-verify", + ], + ) + assert result.exit_code != 0 + assert "mutually exclusive" in result.output.lower() + + def test_neither_ref_nor_branch(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + result = runner.invoke( + marketplace, + ["upstream", "add", "a/b", "--alias", "ok", "--no-verify"], + ) + assert result.exit_code != 0 + assert "either --ref" in result.output.lower() or "specify either" in result.output.lower() + + def test_invalid_alias_exits_2(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + result = runner.invoke( + marketplace, + [ + "upstream", + "add", + "a/b", + "--alias", + "-bad-alias", + "--ref", + SHA40, + "--no-verify", + ], + ) + assert result.exit_code == 2 + assert "alias" in result.output.lower() + + def test_duplicate_alias_exits_2(self, runner, tmp_path, monkeypatch): + # Re-adding the same alias must hard-fail at exit code 2 from + # the CLI layer (not silently overwrite). The editor layer + # raises ``MarketplaceYmlError``; this test pins the CLI + # contract that surfaces it. + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + first = runner.invoke( + marketplace, + [ + "upstream", + "add", + "abhigyanpatwari/GitNexus", + "--alias", + "gitnexus", + "--ref", + SHA40, + "--no-verify", + ], + ) + assert first.exit_code == 0, first.output + + second = runner.invoke( + marketplace, + [ + "upstream", + "add", + "other/Repo", + "--alias", + "gitnexus", + "--ref", + SHA40, + "--no-verify", + ], + ) + assert second.exit_code == 2, second.output + assert "gitnexus" in second.output + # Original entry must remain untouched. + data = yaml.safe_load((tmp_path / "marketplace.yml").read_text()) + assert len(data["upstreams"]) == 1 + assert data["upstreams"][0]["repo"] == "abhigyanpatwari/GitNexus" + + +class TestUpstreamList: + def test_empty(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + result = runner.invoke(marketplace, ["upstream", "list"]) + assert result.exit_code == 0 + assert "no upstream" in result.output.lower() + + def test_lists_after_add(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + runner.invoke( + marketplace, + [ + "upstream", + "add", + "abhigyanpatwari/GitNexus", + "--alias", + "gitnexus", + "--ref", + SHA40, + "--no-verify", + ], + ) + result = runner.invoke(marketplace, ["upstream", "list"]) + assert result.exit_code == 0 + assert "gitnexus" in result.output + assert "GitNexus" in result.output + + +class TestUpstreamRemove: + def test_removes_existing(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + runner.invoke( + marketplace, + [ + "upstream", + "add", + "a/b", + "--alias", + "tobedeleted", + "--ref", + SHA40, + "--no-verify", + ], + ) + result = runner.invoke(marketplace, ["upstream", "remove", "tobedeleted", "--yes"]) + assert result.exit_code == 0 + assert "tobedeleted" in result.output + data = yaml.safe_load((tmp_path / "marketplace.yml").read_text()) + assert not data.get("upstreams") + + def test_unknown_alias_exits_2(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + result = runner.invoke(marketplace, ["upstream", "remove", "ghost", "--yes"]) + assert result.exit_code == 2 + assert "not found" in result.output.lower() + + def test_blocked_when_referenced(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml( + tmp_path, + textwrap.dedent("""\ + name: acme-marketplace + description: ACME + version: 1.0.0 + owner: + name: ACME Corp + upstreams: + - alias: gitnexus + repo: a/b + ref: cccccccccccccccccccccccccccccccccccccccc + packages: + - name: my-skill + upstream: gitnexus + plugin: gitnexus + """), + ) + result = runner.invoke(marketplace, ["upstream", "remove", "gitnexus", "--yes"]) + assert result.exit_code == 2 + assert "still referenced" in result.output.lower() + + +# --------------------------------------------------------------------------- +# package add --upstream +# --------------------------------------------------------------------------- + + +class TestPackageAddUpstream: + def _add_upstream(self, runner, alias="gitnexus", repo="abhigyanpatwari/GitNexus"): + return runner.invoke( + marketplace, + [ + "upstream", + "add", + repo, + "--alias", + alias, + "--ref", + SHA40, + "--no-verify", + ], + ) + + def test_happy_with_upstream(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + assert self._add_upstream(runner).exit_code == 0 + result = runner.invoke( + marketplace, + [ + "package", + "add", + "--upstream", + "gitnexus", + "--plugin", + "gitnexus", + "--name", + "acme-gitnexus", + ], + ) + assert result.exit_code == 0, result.output + data = yaml.safe_load((tmp_path / "marketplace.yml").read_text()) + pkg = data["packages"][0] + assert pkg["name"] == "acme-gitnexus" + assert pkg["upstream"] == "gitnexus" + assert pkg["plugin"] == "gitnexus" + assert "source" not in pkg + + def test_source_xor_upstream(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + result = runner.invoke( + marketplace, + [ + "package", + "add", + "acme/foo", + "--upstream", + "gitnexus", + "--no-verify", + ], + ) + assert result.exit_code != 0 + assert "mutually exclusive" in result.output.lower() + + def test_neither_source_nor_upstream(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + result = runner.invoke(marketplace, ["package", "add"]) + assert result.exit_code != 0 + assert "either a source" in result.output.lower() + + def test_unknown_upstream_alias_exits_2(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + result = runner.invoke( + marketplace, + [ + "package", + "add", + "--upstream", + "ghost", + "--plugin", + "x", + "--name", + "x", + ], + ) + assert result.exit_code == 2 + assert "not registered" in result.output.lower() + + def test_subdir_rejected_with_upstream(self, runner, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _write_yml(tmp_path) + assert self._add_upstream(runner).exit_code == 0 + result = runner.invoke( + marketplace, + [ + "package", + "add", + "--upstream", + "gitnexus", + "--plugin", + "gitnexus", + "--subdir", + "src", + ], + ) + assert result.exit_code != 0 + assert "subdir only applies" in result.output.lower() diff --git a/tests/unit/commands/test_pack_upstream_count.py b/tests/unit/commands/test_pack_upstream_count.py new file mode 100644 index 000000000..de093f22c --- /dev/null +++ b/tests/unit/commands/test_pack_upstream_count.py @@ -0,0 +1,94 @@ +"""Regression-trap tests for the ``apm pack`` package-count rendering. + +Bug fix at ``src/apm_cli/commands/pack.py``: the success and dry-run +messages reported ``len(report.resolved)`` only, undercounting builds +that emit upstream-sourced packages. This test pins the fix by asserting +the rendered message reflects the **total** of direct + upstream entries +for every output path the builder can take. + +If the count regresses (e.g. reverts to ``len(resolved)``), these tests +fail before the build finishes -- giving the regression a hard failure +rather than a silent UX drift. +""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from apm_cli.commands.pack import _render_marketplace_result +from apm_cli.marketplace.builder import BuildReport + + +def _make_report( + *, + resolved_count: int, + upstream_count: int, + dry_run: bool = False, +) -> BuildReport: + """Build a minimal ``BuildReport`` with the requested counts. + + Only ``resolved`` and ``upstream_resolved`` lengths are exercised + by the renderer; payloads can be opaque sentinels. + """ + return BuildReport( + resolved=tuple(object() for _ in range(resolved_count)), + errors=(), + warnings=(), + unchanged_count=0, + added_count=resolved_count + upstream_count, + updated_count=0, + removed_count=0, + output_path=Path("/tmp/marketplace.json"), + dry_run=dry_run, + upstream_resolved=tuple(object() for _ in range(upstream_count)), + ) + + +def test_success_message_counts_direct_plus_upstream() -> None: + """Success path: ``Built marketplace.json (N direct + M upstream)`` when + upstream packages are present.""" + logger = MagicMock() + report = _make_report(resolved_count=2, upstream_count=3) + + _render_marketplace_result(logger, report, dry_run=False) + + logger.success.assert_called_once() + msg = logger.success.call_args[0][0] + assert "(2 direct + 3 upstream)" in msg, msg + assert "Built marketplace.json" in msg + + +def test_success_message_with_only_upstream_packages() -> None: + """Upstream-only build shows ``0 direct + N upstream`` breakdown.""" + logger = MagicMock() + report = _make_report(resolved_count=0, upstream_count=4) + + _render_marketplace_result(logger, report, dry_run=False) + + msg = logger.success.call_args[0][0] + assert "(0 direct + 4 upstream)" in msg, msg + + +def test_dry_run_message_counts_direct_plus_upstream() -> None: + """Dry-run path renders the same total via ``dry_run_notice``.""" + logger = MagicMock() + report = _make_report(resolved_count=1, upstream_count=2, dry_run=True) + + _render_marketplace_result(logger, report, dry_run=True) + + logger.dry_run_notice.assert_called_once() + msg = logger.dry_run_notice.call_args[0][0] + assert "(3 package(s))" in msg, msg + assert "Would write marketplace.json" in msg + + +def test_no_packages_renders_zero_count() -> None: + """Empty build -- count is 0, no off-by-one.""" + logger = MagicMock() + report = _make_report(resolved_count=0, upstream_count=0) + + _render_marketplace_result(logger, report, dry_run=False) + + msg = logger.success.call_args[0][0] + assert "(0 package(s))" in msg, msg diff --git a/tests/unit/marketplace/test_builder_upstream_integration.py b/tests/unit/marketplace/test_builder_upstream_integration.py new file mode 100644 index 000000000..c6f70ccb1 --- /dev/null +++ b/tests/unit/marketplace/test_builder_upstream_integration.py @@ -0,0 +1,456 @@ +"""Integration tests for upstream-sourced packages in MarketplaceBuilder. + +These tests exercise the full build pipeline end-to-end with both direct +and upstream-sourced ``packages[]`` entries. The upstream cache is wired +to an in-memory ``fetch_callback`` so no network access is required. + +Coverage targets: + +- Mixed-shape build: one direct + one upstream package both emit, with + upstream entries appearing after direct entries (v1 emit order). +- Upstream-only build emits a valid marketplace.json that round-trips + through ``parse_marketplace_json``. +- No ``metadata.apm.*`` keys are injected into the output. +- Curator overrides on description/version/tags win over upstream + values; ``author``/``license``/``repository``/``homepage`` are + curator-only (no upstream fallback). +- Upstream resolution errors raise ``BuildError`` BEFORE writing. +- Round-trip parse via ``parse_marketplace_json`` succeeds for every + emitted plugin. +""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from apm_cli.marketplace.builder import ( + BuildOptions, + MarketplaceBuilder, +) +from apm_cli.marketplace.errors import BuildError +from apm_cli.marketplace.models import parse_marketplace_json +from apm_cli.marketplace.ref_resolver import RemoteRef +from apm_cli.marketplace.upstream_cache import UpstreamCache +from apm_cli.marketplace.upstream_resolver import UpstreamResolver + +SHA_DIRECT = "a" * 40 +SHA_UPSTREAM_MANIFEST = "b" * 40 +SHA_UPSTREAM_PLUGIN = "c" * 40 + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +class _MockRefResolver: + """In-process mock for RefResolver -- no subprocess calls.""" + + def __init__(self, refs_by_remote: dict[str, list[RemoteRef]] | None = None) -> None: + self._refs = refs_by_remote or {} + + def list_remote_refs(self, owner_repo: str) -> list[RemoteRef]: + if owner_repo not in self._refs: + from apm_cli.marketplace.errors import GitLsRemoteError + + raise GitLsRemoteError( + package="", + summary=f"Remote '{owner_repo}' not found.", + hint="Check the source.", + ) + return self._refs[owner_repo] + + def close(self) -> None: + pass + + +def _gitnexus_manifest() -> dict: + """Minimal upstream marketplace.json modelled on GitNexus.""" + return { + "name": "gitnexus-marketplace", + "owner": {"name": "abhigyanpatwari"}, + "plugins": [ + { + "name": "gitnexus", + "description": "Upstream-supplied description", + "version": "1.0.0", + "tags": ["upstream-tag"], + "source": { + "type": "git-subdir", + "repo": "abhigyanpatwari/GitNexus", + "path": "gitnexus-claude-plugin", + "sha": SHA_UPSTREAM_PLUGIN, + }, + } + ], + } + + +def _write_yml(tmp_path: Path, body: str) -> Path: + yml_path = tmp_path / "apm.yml" + yml_path.write_text(body, encoding="utf-8") + return yml_path + + +def _patch_resolver_factory( + builder: MarketplaceBuilder, + *, + cache: UpstreamCache, + ref_to_sha_value: str = SHA_UPSTREAM_MANIFEST, +) -> None: + """Replace ``_build_upstream_resolver`` with a test-controlled factory. + + Bypasses the network-touching default helpers (``ref_to_sha`` and + ``canonical_full_name``). The provided cache supplies the upstream + manifest in-memory. + """ + + def _factory(yml): # type: ignore[no-untyped-def] + upstreams_by_alias = {u.alias: u for u in yml.upstreams} + + def _ref_to_sha(host: str, owner: str, repo: str, ref: str) -> str: + return ref_to_sha_value + + return UpstreamResolver( + upstreams=upstreams_by_alias, + cache=cache, + ref_to_sha=_ref_to_sha, + canonical_full_name=None, + ) + + builder._build_upstream_resolver = _factory # type: ignore[assignment] + + +# --------------------------------------------------------------------------- +# Mixed-shape and upstream-only builds +# --------------------------------------------------------------------------- + + +_MIXED_YML = """\ +name: acme-marketplace +description: ACME curated marketplace +version: 0.1.0 +marketplace: + owner: + name: ACME Corp + upstreams: + - alias: gitnexus + repo: abhigyanpatwari/GitNexus + ref: bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + packages: + - name: direct-tool + source: acme/direct-tool + version: "^1.0.0" + - name: acme-gitnexus + upstream: gitnexus + plugin: gitnexus + description: ACME-curated GitNexus + tags: + - acme + - approved +""" + + +def test_mixed_shape_build_emits_both_plugins(tmp_path: Path) -> None: + """Direct + upstream packages both emit; upstream after direct.""" + yml_path = _write_yml(tmp_path, _MIXED_YML) + cache = UpstreamCache( + base_dir=tmp_path / ".cache", + fetch_callback=MagicMock(return_value=_gitnexus_manifest()), + ) + options = BuildOptions(offline=True) + builder = MarketplaceBuilder(yml_path, options) + builder._resolver = _MockRefResolver( # type: ignore[assignment] + {"acme/direct-tool": [RemoteRef(name="refs/tags/v1.0.0", sha=SHA_DIRECT)]} + ) + _patch_resolver_factory(builder, cache=cache) + + report = builder.build() + + assert report.errors == () + assert len(report.resolved) == 1 + assert len(report.upstream_resolved) == 1 + + # Output marketplace.json has both plugins; direct first, upstream second. + output_path = report.output_path + import json + + doc = json.loads(output_path.read_text(encoding="utf-8")) + plugin_names = [p["name"] for p in doc["plugins"]] + assert plugin_names == ["direct-tool", "acme-gitnexus"] + + +def test_upstream_emission_has_no_apm_metadata(tmp_path: Path) -> None: + """Hard rule: no ``metadata.apm.*`` keys in emitted marketplace.json.""" + yml_path = _write_yml(tmp_path, _MIXED_YML) + cache = UpstreamCache( + base_dir=tmp_path / ".cache", + fetch_callback=MagicMock(return_value=_gitnexus_manifest()), + ) + options = BuildOptions(offline=True) + builder = MarketplaceBuilder(yml_path, options) + builder._resolver = _MockRefResolver( # type: ignore[assignment] + {"acme/direct-tool": [RemoteRef(name="refs/tags/v1.0.0", sha=SHA_DIRECT)]} + ) + _patch_resolver_factory(builder, cache=cache) + + report = builder.build() + import json + + doc = json.loads(report.output_path.read_text(encoding="utf-8")) + + # No top-level apm key, no per-plugin apm metadata. + assert "apm" not in doc + metadata = doc.get("metadata", {}) + assert "apm" not in metadata + for plugin in doc["plugins"]: + assert "apm" not in plugin + plugin_meta = plugin.get("metadata", {}) + if isinstance(plugin_meta, dict): + assert "apm" not in plugin_meta + + +def test_upstream_emission_curator_override_wins_for_description_and_tags( + tmp_path: Path, +) -> None: + """Curator overrides win over upstream values for description/tags.""" + yml_path = _write_yml(tmp_path, _MIXED_YML) + cache = UpstreamCache( + base_dir=tmp_path / ".cache", + fetch_callback=MagicMock(return_value=_gitnexus_manifest()), + ) + options = BuildOptions(offline=True) + builder = MarketplaceBuilder(yml_path, options) + builder._resolver = _MockRefResolver( # type: ignore[assignment] + {"acme/direct-tool": [RemoteRef(name="refs/tags/v1.0.0", sha=SHA_DIRECT)]} + ) + _patch_resolver_factory(builder, cache=cache) + + report = builder.build() + import json + + doc = json.loads(report.output_path.read_text(encoding="utf-8")) + upstream_plugin = next(p for p in doc["plugins"] if p["name"] == "acme-gitnexus") + assert upstream_plugin["description"] == "ACME-curated GitNexus" + assert upstream_plugin["tags"] == ["acme", "approved"] + # Version was not overridden -- falls back to upstream value. + assert upstream_plugin["version"] == "1.0.0" + + +def test_upstream_emission_uses_git_subdir_shape_when_subdir_present( + tmp_path: Path, +) -> None: + """Upstream plugin source shape matches the direct-emit contract.""" + yml_path = _write_yml(tmp_path, _MIXED_YML) + cache = UpstreamCache( + base_dir=tmp_path / ".cache", + fetch_callback=MagicMock(return_value=_gitnexus_manifest()), + ) + options = BuildOptions(offline=True) + builder = MarketplaceBuilder(yml_path, options) + builder._resolver = _MockRefResolver( # type: ignore[assignment] + {"acme/direct-tool": [RemoteRef(name="refs/tags/v1.0.0", sha=SHA_DIRECT)]} + ) + _patch_resolver_factory(builder, cache=cache) + + report = builder.build() + import json + + doc = json.loads(report.output_path.read_text(encoding="utf-8")) + upstream_plugin = next(p for p in doc["plugins"] if p["name"] == "acme-gitnexus") + source = upstream_plugin["source"] + # Matches direct-emit: outer "source" key with inner "source" + # discriminator + ``url`` field for git-subdir. + assert source["source"] == "git-subdir" + assert source["url"] == "abhigyanpatwari/GitNexus" + assert source["path"] == "gitnexus-claude-plugin" + assert source["sha"] == SHA_UPSTREAM_PLUGIN + + +def test_round_trip_via_parse_marketplace_json(tmp_path: Path) -> None: + """Every emitted plugin survives the lenient consumer parser.""" + yml_path = _write_yml(tmp_path, _MIXED_YML) + cache = UpstreamCache( + base_dir=tmp_path / ".cache", + fetch_callback=MagicMock(return_value=_gitnexus_manifest()), + ) + options = BuildOptions(offline=True) + builder = MarketplaceBuilder(yml_path, options) + builder._resolver = _MockRefResolver( # type: ignore[assignment] + {"acme/direct-tool": [RemoteRef(name="refs/tags/v1.0.0", sha=SHA_DIRECT)]} + ) + _patch_resolver_factory(builder, cache=cache) + + report = builder.build() + import json + + doc = json.loads(report.output_path.read_text(encoding="utf-8")) + manifest = parse_marketplace_json(doc) + parsed_names = sorted(p.name for p in manifest.plugins) + assert parsed_names == ["acme-gitnexus", "direct-tool"] + + +# --------------------------------------------------------------------------- +# Failure modes +# --------------------------------------------------------------------------- + + +_UPSTREAM_ONLY_YML = """\ +name: acme-marketplace +description: ACME curated marketplace +version: 0.1.0 +marketplace: + owner: + name: ACME Corp + upstreams: + - alias: gitnexus + repo: abhigyanpatwari/GitNexus + ref: bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + packages: + - name: acme-gitnexus + upstream: gitnexus + plugin: gitnexus +""" + + +def test_upstream_resolution_error_raises_build_error_before_writing( + tmp_path: Path, +) -> None: + """Unknown upstream alias must raise BuildError; no marketplace.json written.""" + yml = _UPSTREAM_ONLY_YML.replace("plugin: gitnexus", "plugin: does-not-exist") + yml_path = _write_yml(tmp_path, yml) + cache = UpstreamCache( + base_dir=tmp_path / ".cache", + fetch_callback=MagicMock(return_value=_gitnexus_manifest()), + ) + options = BuildOptions(offline=True, continue_on_error=True) + builder = MarketplaceBuilder(yml_path, options) + _patch_resolver_factory(builder, cache=cache) + + with pytest.raises(BuildError): + builder.build() + + # Output path must not exist -- fail-closed gate prevented write. + output_path = tmp_path / ".claude-plugin" / "marketplace.json" + assert not output_path.exists() + + +def test_upstream_only_build_emits_valid_marketplace(tmp_path: Path) -> None: + """Build with only upstream packages produces a valid marketplace.json.""" + yml_path = _write_yml(tmp_path, _UPSTREAM_ONLY_YML) + cache = UpstreamCache( + base_dir=tmp_path / ".cache", + fetch_callback=MagicMock(return_value=_gitnexus_manifest()), + ) + options = BuildOptions(offline=True) + builder = MarketplaceBuilder(yml_path, options) + _patch_resolver_factory(builder, cache=cache) + + report = builder.build() + assert report.errors == () + assert len(report.upstream_resolved) == 1 + + import json + + doc = json.loads(report.output_path.read_text(encoding="utf-8")) + assert [p["name"] for p in doc["plugins"]] == ["acme-gitnexus"] + # Round-trip parse must succeed. + parsed = parse_marketplace_json(doc) + assert len(parsed.plugins) == 1 + + +# --------------------------------------------------------------------------- +# Reproducibility + regression-trap (test-coverage panel recommendations) +# --------------------------------------------------------------------------- + + +def test_byte_identical_rebuild_produces_same_output(tmp_path: Path) -> None: + """Rebuilding from the same lock+inputs yields byte-identical bytes. + + Regression trap for the reproducibility contract: two consecutive + builds against an identical mixed (direct + upstream) ``apm.yml`` + with the same upstream manifest fixture must emit the same + ``marketplace.json`` byte stream. Any non-determinism (timestamp + leak, dict-iteration order, formatter drift) breaks this. + """ + fixture_manifest = _gitnexus_manifest() + + def _build_once(work_dir: Path) -> bytes: + work_dir.mkdir(parents=True, exist_ok=True) + yml_path = _write_yml(work_dir, _MIXED_YML) + cache = UpstreamCache( + base_dir=work_dir / ".cache", + fetch_callback=MagicMock(return_value=fixture_manifest), + ) + options = BuildOptions(offline=True) + builder = MarketplaceBuilder(yml_path, options) + builder._resolver = _MockRefResolver( # type: ignore[assignment] + {"acme/direct-tool": [RemoteRef(name="refs/tags/v1.0.0", sha=SHA_DIRECT)]} + ) + _patch_resolver_factory(builder, cache=cache) + report = builder.build() + assert report.errors == () + return report.output_path.read_bytes() + + first = _build_once(tmp_path / "build-a") + second = _build_once(tmp_path / "build-b") + assert first == second + + +def test_upstream_does_not_mutate_direct_package_emission(tmp_path: Path) -> None: + """Adding an upstream entry does not change the direct subset's bytes. + + Regression trap: the direct-package emission produced by a mixed + (direct + upstream) build must match the direct-only emission of + the same direct entry, plugin-by-plugin. If upstream bookkeeping + ever leaks into the direct emission path, this fails. + """ + import json + + direct_only_yml = """\ +name: acme-marketplace +description: ACME curated marketplace +version: 0.1.0 +marketplace: + owner: + name: ACME Corp + packages: + - name: direct-tool + source: acme/direct-tool + version: "^1.0.0" +""" + + # Direct-only build. + direct_dir = tmp_path / "direct-only" + direct_dir.mkdir(parents=True, exist_ok=True) + direct_yml = _write_yml(direct_dir, direct_only_yml) + direct_builder = MarketplaceBuilder(direct_yml, BuildOptions(offline=True)) + direct_builder._resolver = _MockRefResolver( # type: ignore[assignment] + {"acme/direct-tool": [RemoteRef(name="refs/tags/v1.0.0", sha=SHA_DIRECT)]} + ) + direct_report = direct_builder.build() + direct_doc = json.loads(direct_report.output_path.read_text(encoding="utf-8")) + direct_plugin = next(p for p in direct_doc["plugins"] if p["name"] == "direct-tool") + + # Mixed build. + mixed_dir = tmp_path / "mixed" + mixed_dir.mkdir(parents=True, exist_ok=True) + mixed_yml = _write_yml(mixed_dir, _MIXED_YML) + cache = UpstreamCache( + base_dir=mixed_dir / ".cache", + fetch_callback=MagicMock(return_value=_gitnexus_manifest()), + ) + mixed_builder = MarketplaceBuilder(mixed_yml, BuildOptions(offline=True)) + mixed_builder._resolver = _MockRefResolver( # type: ignore[assignment] + {"acme/direct-tool": [RemoteRef(name="refs/tags/v1.0.0", sha=SHA_DIRECT)]} + ) + _patch_resolver_factory(mixed_builder, cache=cache) + mixed_report = mixed_builder.build() + mixed_doc = json.loads(mixed_report.output_path.read_text(encoding="utf-8")) + mixed_direct_plugin = next(p for p in mixed_doc["plugins"] if p["name"] == "direct-tool") + + # The direct plugin's emitted dict must match across builds. + assert direct_plugin == mixed_direct_plugin diff --git a/tests/unit/marketplace/test_lockfile_upstreams.py b/tests/unit/marketplace/test_lockfile_upstreams.py new file mode 100644 index 000000000..899fccb3f --- /dev/null +++ b/tests/unit/marketplace/test_lockfile_upstreams.py @@ -0,0 +1,294 @@ +"""Tests for ``apm.lock.yaml`` upstream provenance writing. + +Covers the LockedUpstream / LockedUpstreamPlugin dataclasses and the +end-to-end path where ``MarketplaceBuilder.build()`` persists upstream +provenance into ``apm.lock.yaml`` after a successful build. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from unittest.mock import MagicMock + +import yaml + +from apm_cli.deps.lockfile import ( + LockedUpstream, + LockedUpstreamPlugin, + LockFile, + get_lockfile_path, +) +from apm_cli.marketplace.builder import BuildOptions, MarketplaceBuilder +from apm_cli.marketplace.ref_resolver import RemoteRef +from apm_cli.marketplace.upstream_cache import UpstreamCache +from apm_cli.marketplace.upstream_resolver import UpstreamResolver + +SHA_DIRECT = "a" * 40 +SHA_UPSTREAM_MANIFEST = "b" * 40 +SHA_UPSTREAM_PLUGIN = "c" * 40 + + +# --------------------------------------------------------------------------- +# Dataclass round-trips +# --------------------------------------------------------------------------- + + +class TestLockedUpstreamRoundTrip: + def test_to_dict_includes_required_fields(self) -> None: + up = LockedUpstream( + alias="gitnexus", + host="github.com", + owner="abhigyanpatwari", + repo="GitNexus", + path=".claude-plugin/marketplace.json", + manifest_sha=SHA_UPSTREAM_MANIFEST, + canonical_full_name="abhigyanpatwari/GitNexus", + refreshed_at="2024-01-01T00:00:00+00:00", + ) + data = up.to_dict() + assert data["host"] == "github.com" + assert data["owner"] == "abhigyanpatwari" + assert data["repo"] == "GitNexus" + assert data["path"] == ".claude-plugin/marketplace.json" + assert data["manifest_sha"] == SHA_UPSTREAM_MANIFEST + assert data["canonical_full_name"] == "abhigyanpatwari/GitNexus" + assert data["refreshed_at"] == "2024-01-01T00:00:00+00:00" + # Plugins absent from output when empty. + assert "plugins" not in data + + def test_to_dict_with_plugins_sorted_by_upstream_name(self) -> None: + up = LockedUpstream( + alias="gitnexus", + host="github.com", + owner="abhigyanpatwari", + repo="GitNexus", + path=".claude-plugin/marketplace.json", + manifest_sha=SHA_UPSTREAM_MANIFEST, + plugins={ + "zebra": LockedUpstreamPlugin( + upstream_name="zebra", + emitted_as="acme-zebra", + resolved_sha=SHA_UPSTREAM_PLUGIN, + resolved_source={"sha": SHA_UPSTREAM_PLUGIN}, + ), + "alpha": LockedUpstreamPlugin( + upstream_name="alpha", + emitted_as="acme-alpha", + resolved_sha=SHA_DIRECT, + resolved_source={"sha": SHA_DIRECT}, + ), + }, + ) + data = up.to_dict() + # Keys sorted alphabetically for deterministic output. + assert list(data["plugins"].keys()) == ["alpha", "zebra"] + + def test_round_trip_preserves_data(self) -> None: + original = LockedUpstream( + alias="gitnexus", + host="github.com", + owner="abhigyanpatwari", + repo="GitNexus", + path=".claude-plugin/marketplace.json", + manifest_sha=SHA_UPSTREAM_MANIFEST, + canonical_full_name="abhigyanpatwari/GitNexus", + plugins={ + "gitnexus": LockedUpstreamPlugin( + upstream_name="gitnexus", + emitted_as="acme-gitnexus", + resolved_sha=SHA_UPSTREAM_PLUGIN, + resolved_source={ + "host": "github.com", + "repo": "abhigyanpatwari/GitNexus", + "sha": SHA_UPSTREAM_PLUGIN, + }, + ), + }, + ) + data = original.to_dict() + restored = LockedUpstream.from_dict("gitnexus", data) + assert restored.host == original.host + assert restored.owner == original.owner + assert restored.repo == original.repo + assert restored.manifest_sha == original.manifest_sha + assert restored.canonical_full_name == original.canonical_full_name + assert "gitnexus" in restored.plugins + plugin = restored.plugins["gitnexus"] + assert plugin.emitted_as == "acme-gitnexus" + assert plugin.resolved_sha == SHA_UPSTREAM_PLUGIN + assert plugin.resolved_source["sha"] == SHA_UPSTREAM_PLUGIN + + +# --------------------------------------------------------------------------- +# LockFile YAML round-trip with upstreams +# --------------------------------------------------------------------------- + + +class TestLockFileUpstreams: + def test_empty_upstreams_omitted_from_yaml(self) -> None: + lock = LockFile() + out = lock.to_yaml() + assert "upstreams:" not in out + + def test_yaml_round_trip_preserves_upstreams(self) -> None: + lock = LockFile() + lock.upstreams["gitnexus"] = LockedUpstream( + alias="gitnexus", + host="github.com", + owner="abhigyanpatwari", + repo="GitNexus", + path=".claude-plugin/marketplace.json", + manifest_sha=SHA_UPSTREAM_MANIFEST, + plugins={ + "gitnexus": LockedUpstreamPlugin( + upstream_name="gitnexus", + emitted_as="acme-gitnexus", + resolved_sha=SHA_UPSTREAM_PLUGIN, + resolved_source={"sha": SHA_UPSTREAM_PLUGIN}, + ), + }, + ) + yaml_str = lock.to_yaml() + assert "upstreams:" in yaml_str + assert "gitnexus:" in yaml_str + + restored = LockFile.from_yaml(yaml_str) + assert "gitnexus" in restored.upstreams + gx = restored.upstreams["gitnexus"] + assert gx.host == "github.com" + assert gx.manifest_sha == SHA_UPSTREAM_MANIFEST + assert "gitnexus" in gx.plugins + + +# --------------------------------------------------------------------------- +# Builder integration: writes upstream provenance after build +# --------------------------------------------------------------------------- + + +_YML = """\ +name: acme-marketplace +description: ACME curated marketplace +version: 0.1.0 +marketplace: + owner: + name: ACME Corp + upstreams: + - alias: gitnexus + repo: abhigyanpatwari/GitNexus + ref: bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + packages: + - name: acme-gitnexus + upstream: gitnexus + plugin: gitnexus +""" + + +def _gitnexus_manifest() -> dict: + return { + "name": "gitnexus-marketplace", + "owner": {"name": "abhigyanpatwari"}, + "plugins": [ + { + "name": "gitnexus", + "description": "GitNexus plugin", + "version": "1.0.0", + "source": { + "type": "git-subdir", + "repo": "abhigyanpatwari/GitNexus", + "path": "gitnexus-claude-plugin", + "sha": SHA_UPSTREAM_PLUGIN, + }, + } + ], + } + + +class _MockRefResolver: + def __init__(self) -> None: + self._refs: dict[str, list[RemoteRef]] = {} + + def list_remote_refs(self, owner_repo: str) -> list[RemoteRef]: + from apm_cli.marketplace.errors import GitLsRemoteError + + raise GitLsRemoteError(package="", summary="Not used", hint="") + + def close(self) -> None: + pass + + +def _patch_resolver_factory(builder: MarketplaceBuilder, *, cache: UpstreamCache) -> None: + def _factory(yml): # type: ignore[no-untyped-def] + upstreams_by_alias = {u.alias: u for u in yml.upstreams} + + def _ref_to_sha(host: str, owner: str, repo: str, ref: str) -> str: + return SHA_UPSTREAM_MANIFEST + + return UpstreamResolver( + upstreams=upstreams_by_alias, + cache=cache, + ref_to_sha=_ref_to_sha, + canonical_full_name=None, + ) + + builder._build_upstream_resolver = _factory # type: ignore[assignment] + + +def test_build_writes_upstream_section_to_lockfile(tmp_path: Path) -> None: + """A successful build with upstream packages writes provenance to apm.lock.yaml.""" + yml_path = tmp_path / "apm.yml" + yml_path.write_text(_YML, encoding="utf-8") + cache = UpstreamCache( + base_dir=tmp_path / ".cache", + fetch_callback=MagicMock(return_value=_gitnexus_manifest()), + ) + options = BuildOptions(offline=True) + builder = MarketplaceBuilder(yml_path, options) + builder._resolver = _MockRefResolver() # type: ignore[assignment] + _patch_resolver_factory(builder, cache=cache) + + report = builder.build() + assert report.errors == () + + lockfile_path = get_lockfile_path(tmp_path) + assert lockfile_path.exists() + + data = yaml.safe_load(lockfile_path.read_text(encoding="utf-8")) + assert "upstreams" in data + assert "gitnexus" in data["upstreams"] + upstream = data["upstreams"]["gitnexus"] + assert upstream["host"] == "github.com" + assert upstream["owner"] == "abhigyanpatwari" + assert upstream["repo"] == "GitNexus" + assert upstream["manifest_sha"] == SHA_UPSTREAM_MANIFEST + assert "plugins" in upstream + plugin = upstream["plugins"]["gitnexus"] + assert plugin["emitted_as"] == "acme-gitnexus" + assert plugin["resolved_sha"] == SHA_UPSTREAM_PLUGIN + assert plugin["resolved_source"]["sha"] == SHA_UPSTREAM_PLUGIN + + +def test_build_does_not_inject_apm_metadata_when_lockfile_has_upstreams( + tmp_path: Path, +) -> None: + """marketplace.json must remain Anthropic-conformant even when lock has upstreams.""" + yml_path = tmp_path / "apm.yml" + yml_path.write_text(_YML, encoding="utf-8") + cache = UpstreamCache( + base_dir=tmp_path / ".cache", + fetch_callback=MagicMock(return_value=_gitnexus_manifest()), + ) + options = BuildOptions(offline=True) + builder = MarketplaceBuilder(yml_path, options) + builder._resolver = _MockRefResolver() # type: ignore[assignment] + _patch_resolver_factory(builder, cache=cache) + + report = builder.build() + doc = json.loads(report.output_path.read_text(encoding="utf-8")) + # No top-level apm key, no per-plugin apm key, no per-plugin metadata.apm. + assert "apm" not in doc + for plugin in doc["plugins"]: + assert "apm" not in plugin + meta = plugin.get("metadata", {}) + if isinstance(meta, dict): + assert "apm" not in meta diff --git a/tests/unit/marketplace/test_upstream_cache.py b/tests/unit/marketplace/test_upstream_cache.py new file mode 100644 index 000000000..8fc8f2184 --- /dev/null +++ b/tests/unit/marketplace/test_upstream_cache.py @@ -0,0 +1,383 @@ +"""Unit tests for ``apm_cli.marketplace.upstream_cache``. + +Locks the cache-key contract (Windows-safe, hashed, delimiter-rejecting), +the integrity-check semantics (poisoned entries treated as miss), +the per-upstream-host auth path (no curator-PAT leakage), and the +classify-host defence-in-depth guard. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from apm_cli.marketplace.upstream_cache import ( + UpstreamCache, + UpstreamCacheError, + UpstreamCacheKey, + compute_cache_key, +) + +# Reusable, valid sample inputs. 40-char hex SHA. +GOOD_SHA = "a" * 40 +GOOD_SHA_2 = "b" * 40 +GOOD_INPUTS = { + "host": "github.com", + "owner": "abhigyanpatwari", + "repo": "GitNexus", + "sha": GOOD_SHA, + "path": ".claude-plugin/marketplace.json", +} + + +# --------------------------------------------------------------------------- +# compute_cache_key validation +# --------------------------------------------------------------------------- + + +class TestComputeCacheKey: + def test_happy_path(self): + key = compute_cache_key(**GOOD_INPUTS) + assert isinstance(key, UpstreamCacheKey) + assert key.host == "github.com" + assert key.owner == "abhigyanpatwari" + assert key.repo == "GitNexus" + assert key.sha == GOOD_SHA + assert key.path == ".claude-plugin/marketplace.json" + + def test_composite_uses_double_underscore_delim(self): + key = compute_cache_key(**GOOD_INPUTS) + # ``__`` between every segment, exactly 5 occurrences: + # upstream__host__owner__repo__sha__path + assert key.composite.count("__") == 5 + assert key.composite.startswith("upstream__") + # Single underscores from internal names must survive. + assert "abhigyanpatwari" in key.composite + + def test_directory_name_is_windows_safe(self): + """The on-disk dir name must contain ZERO colons (NTFS-illegal).""" + key = compute_cache_key(**GOOD_INPUTS) + assert ":" not in key.directory_name + # And must still be a single path segment (no slashes). + assert "/" not in key.directory_name + assert "\\" not in key.directory_name + + def test_fingerprint_is_16_hex_chars(self): + key = compute_cache_key(**GOOD_INPUTS) + assert len(key.fingerprint) == 16 + int(key.fingerprint, 16) # must parse as hex + + def test_fingerprint_stable_across_calls(self): + k1 = compute_cache_key(**GOOD_INPUTS) + k2 = compute_cache_key(**GOOD_INPUTS) + assert k1.fingerprint == k2.fingerprint + assert k1.directory_name == k2.directory_name + + def test_different_sha_produces_different_fingerprint(self): + k1 = compute_cache_key(**GOOD_INPUTS) + k2 = compute_cache_key(**{**GOOD_INPUTS, "sha": GOOD_SHA_2}) + assert k1.fingerprint != k2.fingerprint + + def test_different_path_produces_different_fingerprint(self): + k1 = compute_cache_key(**GOOD_INPUTS) + k2 = compute_cache_key(**{**GOOD_INPUTS, "path": "claude-plugin/marketplace.json"}) + assert k1.fingerprint != k2.fingerprint + + @pytest.mark.parametrize( + "field", + ["host", "owner", "repo", "sha", "path"], + ) + def test_rejects_double_underscore_in_input(self, field): + bad = {**GOOD_INPUTS, field: GOOD_INPUTS[field][:-1] + "__bad"} + with pytest.raises(UpstreamCacheError, match="cache delimiter"): + compute_cache_key(**bad) + + @pytest.mark.parametrize("field", list(GOOD_INPUTS.keys())) + def test_rejects_empty_input(self, field): + bad = {**GOOD_INPUTS, field: ""} + with pytest.raises(UpstreamCacheError): + compute_cache_key(**bad) + + @pytest.mark.parametrize("field", list(GOOD_INPUTS.keys())) + def test_rejects_none_input(self, field): + bad = {**GOOD_INPUTS, field: None} + with pytest.raises(UpstreamCacheError): + compute_cache_key(**bad) + + def test_rejects_short_sha(self): + with pytest.raises(UpstreamCacheError, match="full 40-char hex SHA"): + compute_cache_key(**{**GOOD_INPUTS, "sha": "abc1234"}) + + def test_rejects_uppercase_sha(self): + with pytest.raises(UpstreamCacheError, match="full 40-char hex SHA"): + compute_cache_key(**{**GOOD_INPUTS, "sha": "A" * 40}) + + def test_rejects_absolute_path(self): + with pytest.raises(UpstreamCacheError, match="must be non-empty and relative"): + compute_cache_key(**{**GOOD_INPUTS, "path": "/etc/passwd"}) + + def test_rejects_traversal_path(self): + with pytest.raises(UpstreamCacheError): + compute_cache_key(**{**GOOD_INPUTS, "path": "../../etc/passwd"}) + + def test_rejects_invalid_owner_repo_with_leading_dot(self): + with pytest.raises(UpstreamCacheError, match="invalid upstream owner/repo"): + compute_cache_key(**{**GOOD_INPUTS, "owner": ".secret"}) + + def test_rejects_invalid_host(self): + with pytest.raises(UpstreamCacheError, match="invalid upstream host"): + compute_cache_key(**{**GOOD_INPUTS, "host": "not a host"}) + + +# --------------------------------------------------------------------------- +# UpstreamCache get / put / integrity check +# --------------------------------------------------------------------------- + + +class TestUpstreamCacheReadWrite: + def test_miss_returns_none(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path) + key = compute_cache_key(**GOOD_INPUTS) + assert cache.get(key) is None + + def test_put_then_get_roundtrip(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path) + key = compute_cache_key(**GOOD_INPUTS) + manifest = {"name": "gitnexus", "plugins": [{"name": "gitnexus"}]} + cache.put(key, manifest) + assert cache.get(key) == manifest + + def test_put_writes_manifest_and_meta_files(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path) + key = compute_cache_key(**GOOD_INPUTS) + cache.put(key, {"name": "x"}) + entry_dir = tmp_path / key.directory_name + assert (entry_dir / "manifest.json").exists() + assert (entry_dir / "meta.json").exists() + meta = json.loads((entry_dir / "meta.json").read_text(encoding="utf-8")) + assert meta["sha"] == key.sha + assert meta["host"] == key.host + assert meta["owner"] == key.owner + assert meta["repo"] == key.repo + assert meta["path"] == key.path + + def test_integrity_mismatch_treated_as_miss(self, tmp_path: Path): + """A poisoned meta.json with the wrong SHA is treated as cache miss.""" + cache = UpstreamCache(base_dir=tmp_path) + key = compute_cache_key(**GOOD_INPUTS) + cache.put(key, {"name": "x"}) + # Tamper with meta to simulate a poisoned entry / disk + # corruption / older code that wrote an inconsistent record. + meta_path = tmp_path / key.directory_name / "meta.json" + meta_path.write_text(json.dumps({"sha": GOOD_SHA_2, "host": key.host}), encoding="utf-8") + # Cache miss -- the recorded SHA does not match the requested SHA. + assert cache.get(key) is None + + def test_corrupt_manifest_treated_as_miss(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path) + key = compute_cache_key(**GOOD_INPUTS) + cache.put(key, {"name": "x"}) + manifest_path = tmp_path / key.directory_name / "manifest.json" + manifest_path.write_text("not json {{{", encoding="utf-8") + assert cache.get(key) is None + + def test_corrupt_meta_treated_as_miss(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path) + key = compute_cache_key(**GOOD_INPUTS) + cache.put(key, {"name": "x"}) + meta_path = tmp_path / key.directory_name / "meta.json" + meta_path.write_text("not json {{{", encoding="utf-8") + assert cache.get(key) is None + + def test_different_keys_have_separate_directories(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path) + k1 = compute_cache_key(**GOOD_INPUTS) + k2 = compute_cache_key(**{**GOOD_INPUTS, "sha": GOOD_SHA_2}) + cache.put(k1, {"v": 1}) + cache.put(k2, {"v": 2}) + assert cache.get(k1) == {"v": 1} + assert cache.get(k2) == {"v": 2} + + +# --------------------------------------------------------------------------- +# get_or_fetch behaviour +# --------------------------------------------------------------------------- + + +class TestGetOrFetch: + def test_hit_does_not_call_fetch(self, tmp_path: Path): + fetch = MagicMock() + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=fetch) + key = compute_cache_key(**GOOD_INPUTS) + cache.put(key, {"cached": True}) + result = cache.get_or_fetch(key) + assert result == {"cached": True} + fetch.assert_not_called() + + def test_miss_calls_fetch_and_caches(self, tmp_path: Path): + fetch = MagicMock(return_value={"fetched": True}) + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=fetch) + key = compute_cache_key(**GOOD_INPUTS) + result = cache.get_or_fetch(key) + assert result == {"fetched": True} + fetch.assert_called_once() + # Second call hits cache. + cache.get_or_fetch(key) + fetch.assert_called_once() + + def test_offline_with_miss_raises(self, tmp_path: Path): + fetch = MagicMock() + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=fetch) + key = compute_cache_key(**GOOD_INPUTS) + with pytest.raises(UpstreamCacheError, match="offline mode"): + cache.get_or_fetch(key, offline=True) + fetch.assert_not_called() + + def test_offline_with_hit_returns_cached(self, tmp_path: Path): + fetch = MagicMock() + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=fetch) + key = compute_cache_key(**GOOD_INPUTS) + cache.put(key, {"cached": True}) + result = cache.get_or_fetch(key, offline=True) + assert result == {"cached": True} + fetch.assert_not_called() + + def test_fetch_returning_non_dict_raises(self, tmp_path: Path): + fetch = MagicMock(return_value=["not", "a", "dict"]) + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=fetch) + key = compute_cache_key(**GOOD_INPUTS) + with pytest.raises(UpstreamCacheError, match="non-JSON-object"): + cache.get_or_fetch(key) + + +# --------------------------------------------------------------------------- +# Default fetch callback: classify-host guard + auth flow +# --------------------------------------------------------------------------- + + +class TestDefaultFetchAuthFlow: + def test_non_github_host_refused_before_token_resolution(self, tmp_path: Path): + """Defence-in-depth: never forward GitHub creds to a non-GitHub host.""" + # Use a host shape the validator accepts but classify_host rejects. + # gitlab.com classifies as ``generic``. + bad_inputs = {**GOOD_INPUTS, "host": "gitlab.com"} + key = compute_cache_key(**bad_inputs) + cache = UpstreamCache(base_dir=tmp_path) + # Inject a sentinel auth_resolver -- the guard must trip BEFORE + # any auth call is made. + sentinel_auth = MagicMock() + with pytest.raises(UpstreamCacheError, match="not a supported GitHub variant"): + cache.get_or_fetch(key, auth_resolver=sentinel_auth) + sentinel_auth.try_with_fallback.assert_not_called() + sentinel_auth.resolve.assert_not_called() + + def test_github_fetch_uses_unauth_first(self, tmp_path: Path, monkeypatch): + """Curator PAT must NOT be attached to a public-repo upstream fetch.""" + captured: dict = {} + + class FakeAuth: + def try_with_fallback(self, host, op, *, org=None, port=None, unauth_first=False, **kw): + captured["host"] = host + captured["org"] = org + captured["unauth_first"] = unauth_first + # Simulate the unauth path: token=None, empty git env. + return op(None, {}) + + # Stub requests.get so the operation completes. + class FakeResp: + status_code = 200 + + def json(self): + return {"name": "x"} + + def raise_for_status(self): + pass + + def fake_get(url, headers=None, timeout=None): + captured["url"] = url + captured["headers"] = headers or {} + return FakeResp() + + monkeypatch.setattr( + "apm_cli.marketplace.upstream_cache.requests", + type("R", (), {"get": staticmethod(fake_get)}), + raising=False, + ) + # Patch via the import path used inside the function. + import requests + + monkeypatch.setattr(requests, "get", fake_get) + + cache = UpstreamCache(base_dir=tmp_path) + key = compute_cache_key(**GOOD_INPUTS) + result = cache.get_or_fetch(key, auth_resolver=FakeAuth()) + assert result == {"name": "x"} + # unauth_first must be True (no curator PAT on public upstreams). + assert captured["unauth_first"] is True + # org passes the upstream owner, NOT the curator's repo owner. + assert captured["org"] == "abhigyanpatwari" + # No Authorization header attached when token is None. + assert "Authorization" not in captured["headers"] + # URL targets the GitHub Contents API at the pinned SHA. + assert "/repos/abhigyanpatwari/GitNexus/contents/" in captured["url"] + assert f"ref={GOOD_SHA}" in captured["url"] + + def test_github_fetch_attaches_token_when_provided(self, tmp_path: Path, monkeypatch): + """When AuthResolver returns a token (e.g. private upstream, EMU), attach it.""" + captured: dict = {} + + class FakeAuth: + def try_with_fallback(self, host, op, *, org=None, port=None, unauth_first=False, **kw): + # Simulate the auth path: token attached. + return op("ghp_secret_token", {"GIT_TERMINAL_PROMPT": "0"}) + + class FakeResp: + status_code = 200 + + def json(self): + return {"name": "x"} + + def raise_for_status(self): + pass + + def fake_get(url, headers=None, timeout=None): + captured["headers"] = headers or {} + return FakeResp() + + import requests + + monkeypatch.setattr(requests, "get", fake_get) + + cache = UpstreamCache(base_dir=tmp_path) + key = compute_cache_key(**GOOD_INPUTS) + cache.get_or_fetch(key, auth_resolver=FakeAuth()) + assert captured["headers"].get("Authorization") == "token ghp_secret_token" + + def test_404_raises_named_error(self, tmp_path: Path, monkeypatch): + class FakeAuth: + def try_with_fallback(self, host, op, **kw): + return op(None, {}) + + class FakeResp: + status_code = 404 + + def raise_for_status(self): + raise AssertionError("must not call raise_for_status on 404") + + def json(self): + return {} + + def fake_get(url, headers=None, timeout=None): + return FakeResp() + + import requests + + monkeypatch.setattr(requests, "get", fake_get) + + cache = UpstreamCache(base_dir=tmp_path) + key = compute_cache_key(**GOOD_INPUTS) + with pytest.raises(UpstreamCacheError, match="not found"): + cache.get_or_fetch(key, auth_resolver=FakeAuth()) diff --git a/tests/unit/marketplace/test_upstream_parser.py b/tests/unit/marketplace/test_upstream_parser.py new file mode 100644 index 000000000..06cf26912 --- /dev/null +++ b/tests/unit/marketplace/test_upstream_parser.py @@ -0,0 +1,511 @@ +"""Tests for the strict upstream marketplace.json parser. + +Counterpart to ``tests/unit/marketplace/test_models.py`` (which covers +the lenient consumer parser). The strict parser is the line of +defence for curator-side upstream resolution -- every rejection must be +named so curators see exactly which entry failed and why. +""" + +from __future__ import annotations + +import pytest + +from apm_cli.marketplace.upstream_parser import ( + REJECTION_REASONS, + StrictManifest, + StrictPlugin, + StrictPluginSource, + StrictRejection, + parse_marketplace_strict, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _parse( + plugins: list[dict], + *, + metadata: dict | None = None, + upstream_owner_repo: str = "abhigyanpatwari/GitNexus", + upstream_host: str = "github.com", +) -> StrictManifest: + data: dict = { + "name": "gitnexus-marketplace", + "owner": {"name": "GitNexus", "email": "x@example.com"}, + "plugins": plugins, + } + if metadata is not None: + data["metadata"] = metadata + return parse_marketplace_strict( + data, + upstream_owner_repo=upstream_owner_repo, + upstream_host=upstream_host, + ) + + +def _only_rejection(manifest: StrictManifest) -> StrictRejection: + """Assert exactly one rejection and return it.""" + assert len(manifest.plugins) == 0, ( + f"expected no accepted plugins, got {[p.name for p in manifest.plugins]}" + ) + assert len(manifest.rejections) == 1, ( + f"expected exactly one rejection, got {len(manifest.rejections)}" + ) + return manifest.rejections[0] + + +# --------------------------------------------------------------------------- +# Manifest-level shape +# --------------------------------------------------------------------------- + + +class TestManifestRoot: + def test_root_must_be_object(self): + with pytest.raises(TypeError, match="root must be a JSON object"): + parse_marketplace_strict( + ["not", "an", "object"], # type: ignore[arg-type] + upstream_owner_repo="o/r", + ) + + def test_unsupported_host_parsed_without_rejection(self): + # B4: host validation is performed by the resolver layer, NOT the + # parser. The parser must accept non-github.com upstream hosts so + # that GHE upstreams work without a parse-time gate. + manifest = parse_marketplace_strict( + {"name": "x", "plugins": []}, + upstream_owner_repo="o/r", + upstream_host="gitlab.com", + ) + assert manifest.rejections == () + assert manifest.plugins == () + + def test_invalid_owner_repo_raises(self): + # Wrong-shape upstream coordinates are a programming error + # (the schema already validated them), so we raise rather + # than emitting a per-entry rejection. + with pytest.raises(ValueError, match="owner/repo"): + parse_marketplace_strict( + {"name": "x", "plugins": []}, + upstream_owner_repo="bad-shape", + ) + + def test_plugins_not_list_yields_rejection(self): + manifest = parse_marketplace_strict( + {"name": "x", "plugins": "not-a-list"}, + upstream_owner_repo="o/r", + ) + assert manifest.plugins == () + assert len(manifest.rejections) == 1 + assert manifest.rejections[0].reason == "invalid-source-shape" + + def test_owner_dict_extracted(self): + manifest = _parse([]) + assert manifest.owner_name == "GitNexus" + + def test_owner_string_tolerated(self): + data = {"name": "x", "owner": "ACME", "plugins": []} + manifest = parse_marketplace_strict(data, upstream_owner_repo="o/r") + assert manifest.owner_name == "ACME" + + def test_plugin_root_normalised(self): + manifest = _parse([], metadata={"pluginRoot": "./plugins"}) + assert manifest.plugin_root == "plugins" + + def test_known_rejection_reasons_form_closed_set(self): + # The published REJECTION_REASONS set must stay closed -- if a + # contributor adds a new reason without listing it here, the + # CLI rejection-reason mapping will silently miss it. + assert "missing-source" in REJECTION_REASONS + assert "invalid-repo" in REJECTION_REASONS + assert "npm-source" in REJECTION_REASONS + + +# --------------------------------------------------------------------------- +# Source-shape support matrix -- happy paths +# --------------------------------------------------------------------------- + + +class TestSourceShapesHappy: + def test_repository_shape_minimal(self): + manifest = _parse( + [ + { + "name": "p", + "repository": "owner/repo", + } + ] + ) + assert manifest.rejections == () + assert len(manifest.plugins) == 1 + plugin = manifest.plugins[0] + assert isinstance(plugin, StrictPlugin) + assert plugin.source == StrictPluginSource( + type="github", + host="github.com", + repo="owner/repo", + ) + + def test_repository_shape_with_ref_and_sha(self): + sha = "a" * 40 + manifest = _parse( + [ + { + "name": "p", + "repository": "owner/repo", + "ref": "v1.2.3", + "sha": sha, + } + ] + ) + assert manifest.rejections == () + plugin = manifest.plugins[0] + assert plugin.source.ref == "v1.2.3" + assert plugin.source.sha == sha + + def test_github_dict_shape(self): + manifest = _parse( + [ + { + "name": "p", + "source": { + "type": "github", + "repo": "owner/repo", + "ref": "main", + }, + } + ] + ) + plugin = manifest.plugins[0] + assert plugin.source.type == "github" + assert plugin.source.repo == "owner/repo" + assert plugin.source.ref == "main" + assert plugin.source.subdir is None + + def test_git_subdir_dict_shape(self): + manifest = _parse( + [ + { + "name": "p", + "source": { + "type": "git-subdir", + "repo": "owner/repo", + "path": "subpath/here", + "ref": "v1.0.0", + }, + } + ] + ) + plugin = manifest.plugins[0] + assert plugin.source.type == "git-subdir" + assert plugin.source.subdir == "subpath/here" + + def test_string_shorthand_with_plugin_root(self): + # GitNexus uses this shape exactly. + manifest = _parse( + [ + { + "name": "gitnexus", + "source": "./gitnexus-claude-plugin", + } + ], + metadata={"pluginRoot": "."}, + ) + plugin = manifest.plugins[0] + assert plugin.source.type == "git-subdir" + assert plugin.source.repo == "abhigyanpatwari/GitNexus" + assert plugin.source.subdir == "gitnexus-claude-plugin" + + def test_string_shorthand_with_explicit_plugin_root(self): + manifest = _parse( + [{"name": "p", "source": "./foo"}], + metadata={"pluginRoot": "plugins"}, + ) + plugin = manifest.plugins[0] + assert plugin.source.subdir == "plugins/foo" + + def test_bare_name_string_source(self): + # ``foo`` (no ``./``) is also acceptable. + manifest = _parse([{"name": "p", "source": "foo"}]) + plugin = manifest.plugins[0] + assert plugin.source.type == "git-subdir" + assert plugin.source.subdir == "foo" + + +# --------------------------------------------------------------------------- +# Per-entry rejections (named-reason contract) +# --------------------------------------------------------------------------- + + +class TestEntryRejections: + def test_missing_name_rejected(self): + rej = _only_rejection(_parse([{"repository": "o/r"}])) + assert rej.reason == "missing-name" + assert rej.plugin_name == "" + + def test_blank_name_rejected(self): + rej = _only_rejection(_parse([{"name": " ", "repository": "o/r"}])) + assert rej.reason == "missing-name" + + def test_unknown_plugin_key_rejected(self): + rej = _only_rejection(_parse([{"name": "p", "repository": "o/r", "soruce": "typo"}])) + assert rej.reason == "unknown-plugin-key" + assert "soruce" in rej.detail + + def test_missing_source_and_repository(self): + rej = _only_rejection(_parse([{"name": "p"}])) + assert rej.reason == "missing-source" + + def test_both_source_and_repository_ambiguous(self): + rej = _only_rejection( + _parse( + [ + { + "name": "p", + "repository": "o/r", + "source": {"type": "github", "repo": "o/r"}, + } + ] + ) + ) + assert rej.reason == "ambiguous-source" + + def test_invalid_repository_string(self): + rej = _only_rejection(_parse([{"name": "p", "repository": "no-slash"}])) + assert rej.reason == "invalid-repo" + + def test_npm_source_type_rejected(self): + rej = _only_rejection( + _parse( + [ + { + "name": "p", + "source": {"type": "npm", "package": "@foo/bar"}, + } + ] + ) + ) + assert rej.reason == "npm-source" + + def test_unsupported_source_type(self): + rej = _only_rejection(_parse([{"name": "p", "source": {"type": "tarball", "url": "x"}}])) + assert rej.reason == "unsupported-source-type" + + def test_unknown_source_key(self): + rej = _only_rejection( + _parse( + [ + { + "name": "p", + "source": { + "type": "github", + "repo": "o/r", + "extraneous": "junk", + }, + } + ] + ) + ) + assert rej.reason == "unknown-source-key" + + def test_cross_host_source_accepted_by_parser(self): + # B4: host validation is performed by the resolver layer. The parser + # accepts cross-host source objects (e.g. a plugin hosted on an + # enterprise GHE instance) and stores the host so the resolver can + # apply its own per-host auth logic. + manifest = _parse( + [ + { + "name": "p", + "source": { + "type": "github", + "repo": "o/r", + "host": "gitlab.com", + }, + } + ] + ) + assert manifest.rejections == () + assert len(manifest.plugins) == 1 + assert manifest.plugins[0].source.host == "gitlab.com" + + def test_url_form_rejected(self): + rej = _only_rejection( + _parse( + [ + { + "name": "p", + "source": { + "type": "git-subdir", + "url": "https://github.com/o/r", + "path": "p", + }, + } + ] + ) + ) + assert rej.reason == "invalid-repo" + + def test_invalid_repo_in_dict_source(self): + rej = _only_rejection( + _parse([{"name": "p", "source": {"type": "github", "repo": "no-slash"}}]) + ) + assert rej.reason == "invalid-repo" + + def test_git_subdir_missing_path(self): + rej = _only_rejection( + _parse([{"name": "p", "source": {"type": "git-subdir", "repo": "o/r"}}]) + ) + assert rej.reason == "missing-subdir" + + def test_path_traversal_in_dict_source(self): + rej = _only_rejection( + _parse( + [ + { + "name": "p", + "source": { + "type": "git-subdir", + "repo": "o/r", + "path": "../escape", + }, + } + ] + ) + ) + assert rej.reason == "path-traversal" + + def test_path_traversal_in_string_source(self): + rej = _only_rejection(_parse([{"name": "p", "source": "../escape"}])) + assert rej.reason == "path-traversal" + + def test_absolute_path_in_string_source(self): + rej = _only_rejection(_parse([{"name": "p", "source": "/etc/passwd"}])) + assert rej.reason == "path-traversal" + + def test_url_encoded_traversal_in_string_source(self): + rej = _only_rejection(_parse([{"name": "p", "source": "%2e%2e/secret"}])) + assert rej.reason == "path-traversal" + + def test_invalid_ref_shape(self): + rej = _only_rejection(_parse([{"name": "p", "repository": "o/r", "ref": "has space"}])) + assert rej.reason == "invalid-ref" + + def test_invalid_sha_truncated(self): + rej = _only_rejection(_parse([{"name": "p", "repository": "o/r", "sha": "abcd1234"}])) + assert rej.reason == "invalid-sha" + + def test_invalid_source_shape_non_string_non_dict(self): + rej = _only_rejection(_parse([{"name": "p", "source": 42}])) + assert rej.reason == "invalid-source-shape" + + def test_empty_string_source(self): + # Empty source string is structurally present but unusable; + # the parser surfaces this as ``invalid-source-shape`` rather + # than ``missing-source`` so the curator sees a different + # error than for a truly absent ``source`` key. + rej = _only_rejection(_parse([{"name": "p", "source": ""}])) + assert rej.reason == "invalid-source-shape" + + def test_dict_source_missing_type_with_repo_infers_github(self): + # B3: the short-form ``{"repo": "owner/repo", "ref": "..."}`` that + # some APM and Anthropic marketplaces emit without an explicit ``type`` + # key is now accepted by inferring ``type=github`` when ``repo`` is + # present. Hard-reject only when ``type`` is present but unrecognised. + manifest = _parse([{"name": "p", "source": {"repo": "o/r"}}]) + assert manifest.rejections == () + assert len(manifest.plugins) == 1 + assert manifest.plugins[0].source.type == "github" + + def test_dict_source_missing_type_and_repo_yields_rejection(self): + # Still a rejection when both ``type`` and ``repo`` are absent -- + # the parser cannot infer a source shape. + rej = _only_rejection(_parse([{"name": "p", "source": {}}])) + assert rej.reason == "invalid-source-shape" + + def test_non_dict_plugin_entry(self): + manifest = _parse(["not-a-dict"]) # type: ignore[list-item] + rej = _only_rejection(manifest) + assert rej.reason == "invalid-source-shape" + + +# --------------------------------------------------------------------------- +# Multi-entry behaviour +# --------------------------------------------------------------------------- + + +class TestMultiEntry: + def test_partial_success_does_not_drop_good_entries(self): + manifest = _parse( + [ + {"name": "good", "repository": "o/r"}, + {"name": "bad", "source": {"type": "npm"}}, + {"name": "also-good", "repository": "o2/r2"}, + ] + ) + names = [p.name for p in manifest.plugins] + assert names == ["good", "also-good"] + assert len(manifest.rejections) == 1 + assert manifest.rejections[0].plugin_name == "bad" + + def test_duplicate_name_rejected(self): + manifest = _parse( + [ + {"name": "p", "repository": "o/r"}, + {"name": "p", "repository": "o/r2"}, + ] + ) + # First wins, second rejected. + assert len(manifest.plugins) == 1 + assert len(manifest.rejections) == 1 + assert manifest.rejections[0].reason == "duplicate-name" + + def test_find_plugin_is_case_sensitive(self): + manifest = _parse([{"name": "GitNexus", "repository": "o/r"}]) + assert manifest.find_plugin("GitNexus") is not None + # Strict lookup: lowercase mismatch is a miss. + assert manifest.find_plugin("gitnexus") is None + + +# --------------------------------------------------------------------------- +# Real-world fixture: GitNexus marketplace.json shape +# --------------------------------------------------------------------------- + + +class TestGitNexusFixture: + """Locks the contract against the real upstream marketplace shape. + + Mirrors ``https://raw.githubusercontent.com/abhigyanpatwari/GitNexus/main/.claude-plugin/marketplace.json`` + -- if a future schema change to GitNexus breaks this test, the + upstream-parser support matrix needs to be revisited. + """ + + def test_gitnexus_top_level_fixture_parses_cleanly(self): + data = { + "name": "gitnexus-marketplace", + "owner": {"name": "GitNexus", "email": "nico@gitnexus.dev"}, + "metadata": { + "description": "Code intelligence powered by a knowledge graph", + "homepage": "https://github.com/nicosxt/gitnexus", + }, + "plugins": [ + { + "name": "gitnexus", + "version": "1.3.3", + "source": "./gitnexus-claude-plugin", + "description": "Code intelligence", + } + ], + } + manifest = parse_marketplace_strict(data, upstream_owner_repo="abhigyanpatwari/GitNexus") + assert manifest.rejections == () + assert manifest.name == "gitnexus-marketplace" + assert manifest.owner_name == "GitNexus" + assert len(manifest.plugins) == 1 + plugin = manifest.plugins[0] + assert plugin.name == "gitnexus" + assert plugin.version == "1.3.3" + assert plugin.source.type == "git-subdir" + assert plugin.source.repo == "abhigyanpatwari/GitNexus" + assert plugin.source.subdir == "gitnexus-claude-plugin" diff --git a/tests/unit/marketplace/test_upstream_resolver.py b/tests/unit/marketplace/test_upstream_resolver.py new file mode 100644 index 000000000..fe431d76c --- /dev/null +++ b/tests/unit/marketplace/test_upstream_resolver.py @@ -0,0 +1,555 @@ +"""Unit tests for ``apm_cli.marketplace.upstream_resolver``. + +Covers the three core invariants: +- Atomic-fetch (each upstream fetched once even with many packages). +- Repo-rename guard (canonical full_name mismatch fails closed). +- Precedence ladder for ref resolution. + +Strict-parser rejections are also routed through the resolver into +the diagnostic stream as build errors. +""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from apm_cli.marketplace.upstream_cache import UpstreamCache +from apm_cli.marketplace.upstream_resolver import ( + RepoRenameError, + ResolvedUpstreamPackage, + UpstreamResolutionError, + UpstreamResolver, +) +from apm_cli.marketplace.yml_schema import Upstream, UpstreamPackageEntry + +SHA_A = "a" * 40 +SHA_B = "b" * 40 +SHA_PLUGIN = "c" * 40 + + +def make_upstream( + *, + alias: str = "gitnexus", + repo: str = "abhigyanpatwari/GitNexus", + ref: str | None = SHA_A, + branch: str = "main", + allow_head: bool = False, + path: str = ".claude-plugin/marketplace.json", + host: str = "github.com", +) -> Upstream: + return Upstream( + alias=alias, + repo=repo, + path=path, + ref=ref, + branch=branch, + host=host, + allow_head=allow_head, + ) + + +def make_entry( + *, + name: str = "gitnexus", + upstream_alias: str = "gitnexus", + plugin: str | None = None, + ref: str | None = None, + version: str | None = None, + allow_head: bool = False, + tag_pattern: str | None = None, + include_prerelease: bool = False, +) -> UpstreamPackageEntry: + return UpstreamPackageEntry( + name=name, + upstream_alias=upstream_alias, + plugin=plugin, + ref=ref, + version=version, + allow_head=allow_head, + tag_pattern=tag_pattern, + include_prerelease=include_prerelease, + ) + + +def gitnexus_manifest() -> dict: + """A minimal upstream marketplace.json shaped like GitNexus.""" + return { + "name": "gitnexus-marketplace", + "owner": {"name": "abhigyanpatwari"}, + "plugins": [ + { + "name": "gitnexus", + "source": { + "type": "git-subdir", + "repo": "abhigyanpatwari/GitNexus", + "path": "gitnexus-claude-plugin", + }, + "description": "GitNexus plugin", + } + ], + } + + +def manifest_with_pinned_plugin(*, plugin_sha: str = SHA_PLUGIN) -> dict: + return { + "name": "marketplace", + "owner": {"name": "abhigyanpatwari"}, + "plugins": [ + { + "name": "gitnexus", + "source": { + "type": "git-subdir", + "repo": "abhigyanpatwari/GitNexus", + "path": "gitnexus-claude-plugin", + "sha": plugin_sha, + }, + } + ], + } + + +def ref_to_sha_static(value: str) -> RefToSha: + """Return a ref_to_sha callable that always returns *value*.""" + + def _resolver(host: str, owner: str, repo: str, ref: str) -> str: + return value + + return _resolver + + +# Type alias for clarity in test code +RefToSha = object + + +# --------------------------------------------------------------------------- +# Atomic-fetch invariant +# --------------------------------------------------------------------------- + + +class TestAtomicFetch: + def test_single_upstream_fetched_once_for_many_packages(self, tmp_path: Path): + """Multiple packages on the same upstream must trigger only one fetch.""" + cache = UpstreamCache( + base_dir=tmp_path, + fetch_callback=MagicMock(return_value=gitnexus_manifest()), + ) + # Track how many times the fetch_callback was called. + fetch_callback = cache._fetch_callback # MagicMock + upstream = make_upstream() + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + ) + entries = [ + make_entry(name="acme-gitnexus", plugin="gitnexus"), + make_entry(name="acme-gitnexus-2", plugin="gitnexus"), + make_entry(name="acme-gitnexus-3", plugin="gitnexus"), + ] + resolved, _ = resolver.resolve_all(entries) + # All three resolved successfully. + assert len(resolved) == 3 + # And the upstream JSON was fetched exactly once. + assert fetch_callback.call_count == 1 + + def test_separate_upstreams_each_fetched_once(self, tmp_path: Path): + """Distinct upstreams each fetch independently.""" + manifest_a = gitnexus_manifest() + manifest_b = { + "name": "other-marketplace", + "owner": {"name": "other-owner"}, + "plugins": [ + { + "name": "other", + "source": { + "type": "git-subdir", + "repo": "other-owner/other-repo", + "path": "plugin", + }, + } + ], + } + + # Fetch returns different content based on the cache key host/owner + def fetch(key, _auth): + if key.repo == "GitNexus": + return manifest_a + return manifest_b + + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=fetch) + upstreams = { + "gitnexus": make_upstream(), + "other": make_upstream(alias="other", repo="other-owner/other-repo", ref=SHA_B), + } + resolver = UpstreamResolver( + upstreams=upstreams, + cache=cache, + ref_to_sha=lambda host, owner, repo, ref: SHA_A if repo == "GitNexus" else SHA_B, + ) + entries = [ + make_entry(name="acme-gitnexus", upstream_alias="gitnexus", plugin="gitnexus"), + make_entry(name="acme-other", upstream_alias="other", plugin="other"), + ] + resolved, _ = resolver.resolve_all(entries) + assert len(resolved) == 2 + + +# --------------------------------------------------------------------------- +# Repo-rename guard +# --------------------------------------------------------------------------- + + +class TestRepoRenameGuard: + def test_rename_detected_raises(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=lambda k, a: gitnexus_manifest()) + upstream = make_upstream(repo="abhigyanpatwari/GitNexus") + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + canonical_full_name=lambda h, o, r: "evil-actor/GitNexus", + ) + entry = make_entry() + with pytest.raises(RepoRenameError, match="repo identity mismatch"): + resolver.resolve_package(entry) + + def test_rename_is_case_insensitive_match(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=lambda k, a: gitnexus_manifest()) + upstream = make_upstream(repo="abhigyanpatwari/GitNexus") + # GitHub may report different casing; allow it. + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + canonical_full_name=lambda h, o, r: "ABHIGYANPATWARI/gitnexus", + ) + # Should NOT raise. + result = resolver.resolve_package(make_entry()) + assert isinstance(result, ResolvedUpstreamPackage) + + def test_canonical_resolver_failure_fails_closed(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=lambda k, a: gitnexus_manifest()) + upstream = make_upstream() + + def boom(h, o, r): + raise RuntimeError("api 503") + + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + canonical_full_name=boom, + ) + with pytest.raises(UpstreamResolutionError) as exc_info: + resolver.resolve_package(make_entry()) + assert exc_info.value.code == "canonical-name-unavailable" + + def test_canonical_resolver_returning_empty_skips_check(self, tmp_path: Path): + """Empty canonical means 'unknown'; allowed for offline rebuilds.""" + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=lambda k, a: gitnexus_manifest()) + upstream = make_upstream() + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + canonical_full_name=lambda h, o, r: "", + ) + result = resolver.resolve_package(make_entry()) + assert result.upstream_canonical_full_name == "abhigyanpatwari/GitNexus" + + +# --------------------------------------------------------------------------- +# Precedence ladder +# --------------------------------------------------------------------------- + + +class TestPrecedenceLadder: + def test_curator_ref_wins(self, tmp_path: Path): + cache = UpstreamCache( + base_dir=tmp_path, fetch_callback=lambda k, a: manifest_with_pinned_plugin() + ) + upstream = make_upstream() + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + ) + result = resolver.resolve_package(make_entry(ref=SHA_B)) + assert result.plugin_ref == SHA_B + assert result.plugin_sha == SHA_B + assert result.pin_source == "curator-ref" + + def test_curator_version_uses_resolver(self, tmp_path: Path): + cache = UpstreamCache( + base_dir=tmp_path, fetch_callback=lambda k, a: manifest_with_pinned_plugin() + ) + upstream = make_upstream() + + def version_resolver(host, owner, repo, range_, *, tag_pattern, include_prerelease): + assert range_ == "^1.0.0" + return "v1.2.3", SHA_B + + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + version_range_resolver=version_resolver, + ) + result = resolver.resolve_package(make_entry(version="^1.0.0")) + assert result.plugin_ref == "v1.2.3" + assert result.plugin_sha == SHA_B + assert result.pin_source == "curator-version" + + def test_curator_version_without_resolver_raises(self, tmp_path: Path): + cache = UpstreamCache( + base_dir=tmp_path, fetch_callback=lambda k, a: manifest_with_pinned_plugin() + ) + upstream = make_upstream() + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + ) + with pytest.raises(UpstreamResolutionError) as exc_info: + resolver.resolve_package(make_entry(version="^1.0.0")) + assert exc_info.value.code == "version-resolver-missing" + + def test_upstream_pinned_sha(self, tmp_path: Path): + cache = UpstreamCache( + base_dir=tmp_path, + fetch_callback=lambda k, a: manifest_with_pinned_plugin(plugin_sha=SHA_PLUGIN), + ) + upstream = make_upstream() + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + ) + result = resolver.resolve_package(make_entry()) + assert result.plugin_sha == SHA_PLUGIN + assert result.pin_source == "upstream-pin" + + def test_same_repo_fallback_uses_manifest_sha(self, tmp_path: Path): + """GitNexus shape: plugin lives in same repo as marketplace, no plugin pin.""" + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=lambda k, a: gitnexus_manifest()) + upstream = make_upstream(ref=SHA_A) + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + ) + result = resolver.resolve_package(make_entry()) + assert result.plugin_sha == SHA_A + assert result.pin_source == "upstream-registration-ref" + assert result.upstream_manifest_sha == SHA_A + + def test_unpinned_no_fallback_raises(self, tmp_path: Path): + """Plugin in different repo, no curator pin, no plugin pin -> fail.""" + manifest = { + "name": "marketplace", + "owner": {"name": "abhigyanpatwari"}, + "plugins": [ + { + "name": "gitnexus", + "source": { + "type": "github", + "repo": "different-owner/different-repo", + }, + } + ], + } + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=lambda k, a: manifest) + upstream = make_upstream() + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + ) + with pytest.raises(UpstreamResolutionError) as exc_info: + resolver.resolve_package(make_entry()) + assert exc_info.value.code == "package-unpinned" + + def test_allow_head_emits_warning_returns_none_ref(self, tmp_path: Path): + manifest = { + "name": "marketplace", + "owner": {"name": "abhigyanpatwari"}, + "plugins": [ + { + "name": "gitnexus", + "source": { + "type": "github", + "repo": "different-owner/different-repo", + }, + } + ], + } + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=lambda k, a: manifest) + upstream = make_upstream() + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + ) + result = resolver.resolve_package(make_entry(allow_head=True)) + assert result.plugin_ref is None + assert result.plugin_sha is None + assert result.pin_source == "branch-head" + codes = [d.code for d in resolver.diagnostics] + assert "package-tracks-upstream-head" in codes + + +# --------------------------------------------------------------------------- +# Upstream-level errors and diagnostics +# --------------------------------------------------------------------------- + + +class TestUpstreamLevelErrors: + def test_unknown_alias(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=lambda k, a: gitnexus_manifest()) + resolver = UpstreamResolver( + upstreams={}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + ) + with pytest.raises(UpstreamResolutionError) as exc_info: + resolver.resolve_package(make_entry(upstream_alias="nope")) + assert exc_info.value.code == "unknown-upstream-alias" + + def test_missing_plugin_in_upstream(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=lambda k, a: gitnexus_manifest()) + upstream = make_upstream() + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + ) + with pytest.raises(UpstreamResolutionError) as exc_info: + resolver.resolve_package(make_entry(plugin="does-not-exist")) + assert exc_info.value.code == "missing-plugin" + + def test_unpinned_upstream_without_allow_head(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=lambda k, a: gitnexus_manifest()) + upstream = make_upstream(ref=None, allow_head=False) + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + ) + with pytest.raises(UpstreamResolutionError) as exc_info: + resolver.resolve_package(make_entry()) + assert exc_info.value.code == "upstream-unpinned" + + def test_unpinned_upstream_with_allow_head_emits_warning(self, tmp_path: Path): + cache = UpstreamCache( + base_dir=tmp_path, + fetch_callback=lambda k, a: manifest_with_pinned_plugin(), + ) + upstream = make_upstream(ref=None, allow_head=True) + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + ) + result = resolver.resolve_package(make_entry()) + assert result.plugin_sha == SHA_PLUGIN + codes = [d.code for d in resolver.diagnostics] + assert "upstream-tracks-head" in codes + + def test_strict_rejection_surfaces_as_error_diagnostic(self, tmp_path: Path): + """Strict-parser rejections become resolver-level error diagnostics.""" + manifest = { + "name": "marketplace", + "owner": {"name": "abhigyanpatwari"}, + "plugins": [ + { + "name": "gitnexus", + "source": { + "type": "git-subdir", + "repo": "abhigyanpatwari/GitNexus", + "path": "gitnexus-claude-plugin", + }, + }, + { + "name": "broken", + "source": {"type": "npm"}, # unsupported-source-type + }, + ], + } + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=lambda k, a: manifest) + upstream = make_upstream() + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + ) + # Resolving the well-formed entry succeeds; the rejection is + # still surfaced through the diagnostic stream. + result = resolver.resolve_package(make_entry()) + assert result is not None + codes = [d.code for d in resolver.diagnostics] + assert any(c.startswith("upstream-rejection:") for c in codes) + assert any(d.level == "error" for d in resolver.diagnostics) + + +# --------------------------------------------------------------------------- +# resolve_all error aggregation +# --------------------------------------------------------------------------- + + +class TestResolveAll: + def test_continues_on_per_package_failure(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=lambda k, a: gitnexus_manifest()) + upstream = make_upstream() + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=ref_to_sha_static(SHA_A), + ) + entries = [ + make_entry(name="ok", plugin="gitnexus"), + make_entry(name="bad", plugin="does-not-exist"), + make_entry(name="ok2", plugin="gitnexus"), + ] + resolved, diagnostics = resolver.resolve_all(entries) + assert len(resolved) == 2 + # The failure becomes a diagnostic, not an exception. + assert any(d.code == "missing-plugin" for d in diagnostics) + + +# --------------------------------------------------------------------------- +# Ref-to-SHA validation +# --------------------------------------------------------------------------- + + +class TestRefToShaValidation: + def test_invalid_sha_returned_by_callback_raises(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=lambda k, a: gitnexus_manifest()) + upstream = make_upstream() + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + # Returns a short string, not a 40-char SHA. + ref_to_sha=ref_to_sha_static("abc123"), + ) + with pytest.raises(UpstreamResolutionError) as exc_info: + resolver.resolve_package(make_entry()) + assert exc_info.value.code == "invalid-sha" + + def test_ref_resolution_failure_reports_named_error(self, tmp_path: Path): + cache = UpstreamCache(base_dir=tmp_path, fetch_callback=lambda k, a: gitnexus_manifest()) + upstream = make_upstream() + + def boom(host, owner, repo, ref): + raise RuntimeError("network down") + + resolver = UpstreamResolver( + upstreams={upstream.alias: upstream}, + cache=cache, + ref_to_sha=boom, + ) + with pytest.raises(UpstreamResolutionError) as exc_info: + resolver.resolve_package(make_entry()) + assert exc_info.value.code == "ref-resolution-failed" diff --git a/tests/unit/marketplace/test_yml_editor_upstreams.py b/tests/unit/marketplace/test_yml_editor_upstreams.py new file mode 100644 index 000000000..1f9a91678 --- /dev/null +++ b/tests/unit/marketplace/test_yml_editor_upstreams.py @@ -0,0 +1,246 @@ +"""Tests for the upstream editor helpers in ``yml_editor``.""" + +from __future__ import annotations + +import textwrap +from pathlib import Path + +import pytest +import yaml + +from apm_cli.marketplace.errors import MarketplaceYmlError +from apm_cli.marketplace.yml_editor import ( + add_plugin_entry, + add_upstream_entry, + list_upstream_entries, + remove_upstream_entry, +) + +SHA40 = "b" * 40 + + +def _write_yml(tmp_path: Path, content: str) -> Path: + p = tmp_path / "marketplace.yml" + p.write_text(textwrap.dedent(content), encoding="utf-8") + return p + + +_BASE_YML = """\ +name: acme-marketplace +description: ACME curated marketplace +version: 1.0.0 +owner: + name: ACME Corp +packages: [] +""" + + +# --------------------------------------------------------------------------- +# add_upstream_entry +# --------------------------------------------------------------------------- + + +class TestAddUpstreamHappy: + def test_add_with_ref_creates_block(self, tmp_path: Path) -> None: + yml = _write_yml(tmp_path, _BASE_YML) + add_upstream_entry( + yml, + alias="gitnexus", + repo="abhigyanpatwari/GitNexus", + ref=SHA40, + ) + data = yaml.safe_load(yml.read_text(encoding="utf-8")) + assert "upstreams" in data + assert len(data["upstreams"]) == 1 + entry = data["upstreams"][0] + assert entry["alias"] == "gitnexus" + assert entry["repo"] == "abhigyanpatwari/GitNexus" + assert entry["ref"] == SHA40 + + def test_add_with_branch_and_allow_head(self, tmp_path: Path) -> None: + yml = _write_yml(tmp_path, _BASE_YML) + add_upstream_entry( + yml, + alias="gitnexus", + repo="abhigyanpatwari/GitNexus", + branch="main", + allow_head=True, + ) + data = yaml.safe_load(yml.read_text(encoding="utf-8")) + entry = data["upstreams"][0] + assert entry["branch"] == "main" + assert entry["allow_head"] is True + + def test_add_with_optional_path_and_host(self, tmp_path: Path) -> None: + yml = _write_yml(tmp_path, _BASE_YML) + add_upstream_entry( + yml, + alias="gitlab-mirror", + repo="example/repo", + ref=SHA40, + path=".claude-plugin/marketplace.json", + host="gitlab.com", + ) + data = yaml.safe_load(yml.read_text(encoding="utf-8")) + entry = data["upstreams"][0] + assert entry["path"] == ".claude-plugin/marketplace.json" + assert entry["host"] == "gitlab.com" + + def test_add_appends_when_block_exists(self, tmp_path: Path) -> None: + existing = textwrap.dedent("""\ + name: acme-marketplace + description: ACME + version: 1.0.0 + owner: + name: ACME Corp + upstreams: + - alias: first + repo: acme/first + ref: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + packages: [] + """) + yml = _write_yml(tmp_path, existing) + add_upstream_entry( + yml, + alias="second", + repo="acme/second", + ref=SHA40, + ) + data = yaml.safe_load(yml.read_text(encoding="utf-8")) + aliases = [u["alias"] for u in data["upstreams"]] + assert aliases == ["first", "second"] + + +class TestAddUpstreamErrors: + def test_invalid_alias_rejected(self, tmp_path: Path) -> None: + yml = _write_yml(tmp_path, _BASE_YML) + # B5: the regex is ``^[A-Za-z0-9][A-Za-z0-9_-]{0,63}$``. + # Aliases starting with '-' are still invalid; digit-leading ones are now valid. + with pytest.raises(MarketplaceYmlError, match="Upstream alias"): + add_upstream_entry(yml, alias="-bad-alias", repo="a/b", ref=SHA40) + + def test_digit_leading_alias_accepted(self, tmp_path: Path) -> None: + # B5: digit-leading aliases are now valid (schema and editor aligned). + yml = _write_yml(tmp_path, _BASE_YML) + add_upstream_entry(yml, alias="2fa-marketplace", repo="a/b", ref=SHA40) + data = yaml.safe_load(yml.read_text()) + assert data["upstreams"][0]["alias"] == "2fa-marketplace" + + def test_invalid_repo_rejected(self, tmp_path: Path) -> None: + yml = _write_yml(tmp_path, _BASE_YML) + with pytest.raises(MarketplaceYmlError, match="repo"): + add_upstream_entry(yml, alias="ok", repo="not-a-repo", ref=SHA40) + + def test_missing_ref_and_branch_rejected(self, tmp_path: Path) -> None: + yml = _write_yml(tmp_path, _BASE_YML) + with pytest.raises(MarketplaceYmlError, match=r"ref.*branch"): + add_upstream_entry(yml, alias="ok", repo="a/b") + + def test_duplicate_alias_rejected(self, tmp_path: Path) -> None: + yml = _write_yml(tmp_path, _BASE_YML) + add_upstream_entry(yml, alias="dup", repo="a/b", ref=SHA40) + with pytest.raises(MarketplaceYmlError, match=r"already exists"): + add_upstream_entry(yml, alias="dup", repo="a/b", ref=SHA40) + + +# --------------------------------------------------------------------------- +# remove_upstream_entry +# --------------------------------------------------------------------------- + + +class TestRemoveUpstream: + def test_remove_existing_alias(self, tmp_path: Path) -> None: + yml = _write_yml(tmp_path, _BASE_YML) + add_upstream_entry(yml, alias="gitnexus", repo="a/b", ref=SHA40) + remove_upstream_entry(yml, "gitnexus") + data = yaml.safe_load(yml.read_text(encoding="utf-8")) + # Block may exist as empty list or be omitted; both acceptable. + assert not data.get("upstreams") + + def test_remove_unknown_alias_errors(self, tmp_path: Path) -> None: + yml = _write_yml(tmp_path, _BASE_YML) + with pytest.raises(MarketplaceYmlError, match="not found"): + remove_upstream_entry(yml, "ghost") + + def test_remove_blocked_when_referenced(self, tmp_path: Path) -> None: + yml = _write_yml(tmp_path, _BASE_YML) + add_upstream_entry(yml, alias="gitnexus", repo="a/b", ref=SHA40) + + # Manually inject a package that references the upstream. + text = yml.read_text(encoding="utf-8") + text = text.replace( + "packages: []", + textwrap.dedent("""\ + packages: + - name: acme-gitnexus + upstream: gitnexus + plugin: gitnexus + """), + ) + yml.write_text(text, encoding="utf-8") + + with pytest.raises(MarketplaceYmlError, match=r"still referenced"): + remove_upstream_entry(yml, "gitnexus") + + +# --------------------------------------------------------------------------- +# list_upstream_entries +# --------------------------------------------------------------------------- + + +class TestListUpstreams: + def test_list_empty_when_no_block(self, tmp_path: Path) -> None: + yml = _write_yml(tmp_path, _BASE_YML) + assert list_upstream_entries(yml) == [] + + def test_list_returns_plain_dicts(self, tmp_path: Path) -> None: + yml = _write_yml(tmp_path, _BASE_YML) + add_upstream_entry(yml, alias="a", repo="x/y", ref=SHA40) + add_upstream_entry(yml, alias="b", repo="x/z", branch="main", allow_head=True) + entries = list_upstream_entries(yml) + assert len(entries) == 2 + aliases = [e["alias"] for e in entries] + assert aliases == ["a", "b"] + # Plain dicts (not CommentedMap). + assert isinstance(entries[0], dict) + + +# --------------------------------------------------------------------------- +# Roundtrip preserves comments +# --------------------------------------------------------------------------- + + +def test_add_upstream_preserves_existing_comments(tmp_path: Path) -> None: + """Round-trip mode preserves leading and trailing comments around edits.""" + yml = _write_yml( + tmp_path, + textwrap.dedent("""\ + # Top-level marketplace + name: acme-marketplace + description: ACME + version: 1.0.0 + owner: + name: ACME Corp + # End of file + """), + ) + add_upstream_entry(yml, alias="gitnexus", repo="a/b", ref=SHA40) + text = yml.read_text(encoding="utf-8") + assert "# Top-level marketplace" in text + assert "# End of file" in text + assert "alias: gitnexus" in text + + +# --------------------------------------------------------------------------- +# Integration: package add with upstream coexists with direct entries +# --------------------------------------------------------------------------- + + +def test_upstream_block_does_not_break_direct_package_add(tmp_path: Path) -> None: + yml = _write_yml(tmp_path, _BASE_YML) + add_upstream_entry(yml, alias="gitnexus", repo="a/b", ref=SHA40) + add_plugin_entry(yml, source="acme/direct", version=">=1.0.0") + data = yaml.safe_load(yml.read_text(encoding="utf-8")) + assert len(data["upstreams"]) == 1 + assert len(data["packages"]) == 1 + assert data["packages"][0]["name"] == "direct" diff --git a/tests/unit/marketplace/test_yml_schema_upstreams.py b/tests/unit/marketplace/test_yml_schema_upstreams.py new file mode 100644 index 000000000..aa119ed16 --- /dev/null +++ b/tests/unit/marketplace/test_yml_schema_upstreams.py @@ -0,0 +1,466 @@ +"""Schema tests for the marketplace ``upstreams:`` block and the +upstream-sourced ``packages[]`` shape (issue #1136). + +These tests cover the type-level discriminated union introduced for +upstream marketplace pass-through: parsing both shapes side-by-side, +strict per-shape key validation, alias/host/repo validation, and +cross-validation between ``packages`` and ``upstreams`` blocks. +""" + +from __future__ import annotations + +import textwrap +from pathlib import Path + +import pytest + +from apm_cli.marketplace.errors import MarketplaceYmlError +from apm_cli.marketplace.yml_schema import ( + MarketplacePackage, # noqa: F401 -- re-exported public alias + PackageEntry, + Upstream, + UpstreamPackageEntry, + load_marketplace_yml, +) + + +def _write_yml(tmp_path: Path, content: str) -> Path: + p = tmp_path / "marketplace.yml" + p.write_text(textwrap.dedent(content), encoding="utf-8") + return p + + +_BASE = """\ +name: acme-tools +description: Acme marketplace +version: 1.0.0 +owner: + name: Acme Corp +""" + + +class TestUpstreamRegistration: + def test_minimal_with_ref(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: gitnexus + repo: abhigyanpatwari/GitNexus + ref: 0123456789abcdef0123456789abcdef01234567 +packages: + - name: tool-a + source: acme/tool-a + version: ">=1.0.0" +""", + ) + result = load_marketplace_yml(yml) + assert len(result.upstreams) == 1 + u = result.upstreams[0] + assert u.alias == "gitnexus" + assert u.repo == "abhigyanpatwari/GitNexus" + assert u.path == ".claude-plugin/marketplace.json" + assert u.branch == "main" + assert u.host == "github.com" + assert u.allow_head is False + assert u.ref is not None + + def test_allow_head_without_ref_is_permitted(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: dev + repo: org/dev-marketplace + allow_head: true + branch: develop +packages: + - name: tool-a + source: acme/tool-a + version: ">=1.0.0" +""", + ) + result = load_marketplace_yml(yml) + u = result.upstreams[0] + assert u.allow_head is True + assert u.branch == "develop" + assert u.ref is None + + def test_missing_ref_without_allow_head_rejected(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: floats + repo: org/floats +packages: [] +""", + ) + with pytest.raises(MarketplaceYmlError, match=r"'ref' is required"): + load_marketplace_yml(yml) + + def test_invalid_alias_rejected(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: "bad alias!" + repo: org/repo + ref: abc123 +packages: [] +""", + ) + with pytest.raises(MarketplaceYmlError, match="not a valid alias"): + load_marketplace_yml(yml) + + def test_invalid_repo_shape_rejected(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: u + repo: "./local" + ref: abc +packages: [] +""", + ) + with pytest.raises(MarketplaceYmlError, match="/"): + load_marketplace_yml(yml) + + def test_path_traversal_rejected(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: u + repo: org/repo + ref: abc + path: "../escape/marketplace.json" +packages: [] +""", + ) + with pytest.raises(MarketplaceYmlError): + load_marketplace_yml(yml) + + def test_invalid_host_rejected(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: u + repo: org/repo + ref: abc + host: "not a host" +packages: [] +""", + ) + with pytest.raises(MarketplaceYmlError, match="not a valid hostname"): + load_marketplace_yml(yml) + + def test_unknown_key_rejected(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: u + repo: org/repo + ref: abc + typo_key: value +packages: [] +""", + ) + with pytest.raises(MarketplaceYmlError, match="Unknown key"): + load_marketplace_yml(yml) + + def test_duplicate_alias_rejected(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: u + repo: org/a + ref: abc + - alias: u + repo: org/b + ref: def +packages: [] +""", + ) + with pytest.raises(MarketplaceYmlError, match="Duplicate upstream alias"): + load_marketplace_yml(yml) + + +class TestUpstreamPackageEntry: + def test_minimal_upstream_package(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: gitnexus + repo: abhigyanpatwari/GitNexus + ref: abc123 +packages: + - name: gitnexus + upstream: gitnexus + version: ">=1.0.0" +""", + ) + result = load_marketplace_yml(yml) + assert len(result.packages) == 1 + e = result.packages[0] + assert isinstance(e, UpstreamPackageEntry) + assert e.name == "gitnexus" + assert e.upstream_alias == "gitnexus" + assert e.plugin is None + assert e.allow_head is False + + def test_upstream_package_with_overrides(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: gitnexus + repo: abhigyanpatwari/GitNexus + ref: abc123 +packages: + - name: my-renamed + upstream: gitnexus + plugin: original + ref: feedface + description: "ACME-curated" + tags: [acme, approved] +""", + ) + result = load_marketplace_yml(yml) + e = result.packages[0] + assert isinstance(e, UpstreamPackageEntry) + assert e.name == "my-renamed" + assert e.plugin == "original" + assert e.ref == "feedface" + assert e.description == "ACME-curated" + assert e.tags == ("acme", "approved") + + def test_source_and_upstream_mutex(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: u + repo: org/repo + ref: abc +packages: + - name: confused + source: acme/x + upstream: u +""", + ) + with pytest.raises(MarketplaceYmlError, match="mutually exclusive"): + load_marketplace_yml(yml) + + def test_neither_source_nor_upstream_rejected(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +packages: + - name: orphan + version: ">=1.0.0" +""", + ) + with pytest.raises(MarketplaceYmlError, match="exactly one of"): + load_marketplace_yml(yml) + + def test_subdir_on_upstream_rejected(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: u + repo: org/repo + ref: abc +packages: + - name: bad + upstream: u + subdir: nested +""", + ) + with pytest.raises(MarketplaceYmlError, match="Unknown key"): + load_marketplace_yml(yml) + + def test_version_and_ref_mutex(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: u + repo: org/repo + ref: abc +packages: + - name: bad + upstream: u + version: ">=1.0.0" + ref: feedface +""", + ) + with pytest.raises(MarketplaceYmlError, match="mutually exclusive"): + load_marketplace_yml(yml) + + def test_unknown_alias_rejected(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: known + repo: org/repo + ref: abc +packages: + - name: x + upstream: undeclared + version: ">=1.0.0" +""", + ) + with pytest.raises(MarketplaceYmlError, match="not declared"): + load_marketplace_yml(yml) + + def test_invalid_alias_in_package_rejected(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: u + repo: org/repo + ref: abc +packages: + - name: x + upstream: "bad alias!" + version: ">=1.0.0" +""", + ) + with pytest.raises(MarketplaceYmlError, match="not a valid alias"): + load_marketplace_yml(yml) + + +class TestMixedPackages: + def test_direct_and_upstream_coexist(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: gitnexus + repo: abhigyanpatwari/GitNexus + ref: abc123 +packages: + - name: direct-tool + source: acme/direct-tool + version: ">=1.0.0" + - name: upstream-tool + upstream: gitnexus + plugin: gitnexus + version: ">=2.0.0" +""", + ) + result = load_marketplace_yml(yml) + assert len(result.packages) == 2 + direct, upstream = result.packages + assert isinstance(direct, PackageEntry) + assert direct.source == "acme/direct-tool" + assert isinstance(upstream, UpstreamPackageEntry) + assert upstream.plugin == "gitnexus" + + def test_cross_shape_name_uniqueness(self, tmp_path: Path): + """Direct and upstream entries cannot share a name (panel: + prevents dependency-confusion / shadowing).""" + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: u + repo: org/repo + ref: abc +packages: + - name: shared + source: acme/local + version: ">=1.0.0" + - name: shared + upstream: u + version: ">=1.0.0" +""", + ) + with pytest.raises(MarketplaceYmlError, match="Duplicate package name"): + load_marketplace_yml(yml) + + def test_duplicate_upstream_pair_rejected(self, tmp_path: Path): + yml = _write_yml( + tmp_path, + _BASE + + """\ +upstreams: + - alias: u + repo: org/repo + ref: abc +packages: + - name: alpha + upstream: u + plugin: shared + version: ">=1.0.0" + - name: beta + upstream: u + plugin: shared + version: ">=1.0.0" +""", + ) + with pytest.raises(MarketplaceYmlError, match="Duplicate upstream package"): + load_marketplace_yml(yml) + + def test_no_upstreams_no_breakage(self, tmp_path: Path): + """A marketplace.yml without ``upstreams:`` and without any + upstream-shape package still parses unchanged -- proves the + feature is purely additive for existing curators.""" + yml = _write_yml( + tmp_path, + _BASE + + """\ +packages: + - name: tool-a + source: acme/tool-a + version: ">=1.0.0" +""", + ) + result = load_marketplace_yml(yml) + assert result.upstreams == () + assert len(result.packages) == 1 + assert isinstance(result.packages[0], PackageEntry) + + +class TestUpstreamDataclassDefaults: + def test_upstream_defaults(self): + u = Upstream(alias="x", repo="o/r", ref="abc") + assert u.path == ".claude-plugin/marketplace.json" + assert u.branch == "main" + assert u.host == "github.com" + assert u.allow_head is False + + def test_upstream_package_defaults(self): + e = UpstreamPackageEntry(name="x", upstream_alias="y") + assert e.plugin is None + assert e.allow_head is False + assert e.tags == () + assert e.include_prerelease is False