From c42a29df7fa73991497b1051c4792872b74bf183 Mon Sep 17 00:00:00 2001
From: herr kaste <herr.kaste@gmail.com>
Date: Mon, 2 Mar 2026 13:09:19 +0100
Subject: [PATCH 01/13] Add snapshot_test workflow with auto shoot/diff support

Introduce a new scripts.snapshot_test command that can create reduced
registry/channel snapshots and compare them with a line-based diff.
The default mode performs the standard workflow: create/overwrite
snapshot.yml when no base exists, otherwise write a timestamped snapshot
(.yml) and compare it against the base.

The implementation captures stdout/stderr from generate_registry, crawl,
and generate_channel into a snapshot.log file inside the temp directory.
It keeps the temp directory on failure for inspection, and removes it on
success.

Also add snapshot.toml with a selected package set.
---
 README.md                   |  19 ++
 scripts/snapshot_test.py    | 500 ++++++++++++++++++++++++++++++++++++
 snapshot.toml               |  15 ++
 tests/test_snapshot_test.py | 120 +++++++++
 4 files changed, 654 insertions(+)
 create mode 100644 scripts/snapshot_test.py
 create mode 100644 snapshot.toml
 create mode 100644 tests/test_snapshot_test.py
diff --git a/README.md b/README.md
index 9a7bf996a..b27633e6d 100644
--- a/README.md
+++ b/README.md
@@ -167,6 +167,25 @@ The command above reuses the same layout as [CI](https://github.com/packagecontr
 - You can choose another directory via `--restore-from <path>` if you want to stage the backup elsewhere.
 - Try this locally first, then commit and push to actually replace/update/restore the GitHub action cache
 
+---
+
+### `snapshot_test.py`
+
+Creates a compact, single-file snapshot for regression testing (`registry + channel`) from a reduced package set.
+
+```bash
+uv run -m scripts.snapshot_test
+uv run -m scripts.snapshot_test --base snapshot.yml --conf snapshot.toml
+uv run -m scripts.snapshot_test shoot
+uv run -m scripts.snapshot_test diff snapshot-2026-03-02-1210-abcd123
+```
+
+- Default mode (no subcommand):
+  - if base exists, writes a new `snapshot-<timestamp>-<hash>.yml` and prints a line-based diff vs base
+  - if base does not exist, writes/creates the base snapshot, using `shoot`.
+- `shoot` explicitly creates/overwrites a target snapshot (default: `snapshot.yml`).
+- Noise is sent to a temporary folder (`tmp--<timestamp>-<hash>`), which is removed on success.
+
 ## Tests
 
 We use `pytest`. Execute everything via uv so dependencies come from `pyproject.toml`/`uv.lock`:
diff --git a/scripts/snapshot_test.py b/scripts/snapshot_test.py
new file mode 100644
index 000000000..479955074
--- /dev/null
+++ b/scripts/snapshot_test.py
@@ -0,0 +1,500 @@
+from __future__ import annotations
+
+import argparse
+from dataclasses import dataclass
+from datetime import datetime
+import difflib
+import json
+from pathlib import Path
+import shutil
+import subprocess
+import sys
+import tomllib
+from typing import TextIO
+
+from rich.console import Console
+
+
+DEFAULT_BASE = "snapshot.yml"
+DEFAULT_CONF = "snapshot.toml"
+DEFAULT_LOG = "snapshot.log"
+
+RED_ON_BLACK = "\x1b[31;40m"
+YELLOW_ON_BLACK = "\x1b[33;40m"
+RESET = "\x1b[0m"
+
+CONSOLE = Console(stderr=True)
+
+
+@dataclass
+class ShootContext:
+    now: datetime
+    commit_hash: str
+    commit_subject: str
+
+
+def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
+    raw_argv = list(sys.argv[1:] if argv is None else argv)
+
+    if raw_argv and raw_argv[0] == "auto":
+        return parse_auto_args(raw_argv[1:])
+
+    if is_auto_mode_argv(raw_argv):
+        return parse_auto_args(raw_argv)
+
+    parser = build_main_parser()
+    normalized_argv = normalize_argv(raw_argv)
+    return parser.parse_args(normalized_argv)
+
+
+def main(argv: list[str] | None = None) -> int:
+    args = parse_args(argv)
+    if args.command == "shoot":
+        return run_shoot(args)
+    if args.command == "diff":
+        return run_diff(args)
+    return run_auto(args)
+
+
+def run_auto(args: argparse.Namespace) -> int:
+    ctx = collect_shoot_context()
+    base_path = Path(args.base)
+    output_path = resolve_auto_output_path(base_path, args.output, ctx)
+
+    create_snapshot_with_spinner(output_path, Path(args.conf), ctx)
+    print(f"Created snapshot at {output_path}")
+
+    if base_path.exists() and base_path.resolve() != output_path.resolve():
+        print(f"Comparing {base_path} to {output_path}")
+        print_snapshot_diff(base_path, output_path)
+
+    return 0
+
+
+def run_shoot(args: argparse.Namespace) -> int:
+    ctx = collect_shoot_context()
+    output_path = Path(args.filename)
+
+    create_snapshot_with_spinner(output_path, Path(args.conf), ctx)
+    print(f"Created snapshot at {output_path}")
+    return 0
+
+
+def run_diff(args: argparse.Namespace) -> int:
+    files = args.files
+    if len(files) > 2:
+        raise SystemExit("diff accepts at most two snapshot files")
+
+    if not files:
+        snapshots = sorted(Path.cwd().glob("snapshot-*"))
+        if not snapshots:
+            print("No snapshots found matching 'snapshot-*'.")
+            return 0
+        for path in snapshots:
+            print(path.name)
+        return 0
+
+    if len(files) == 1:
+        left = Path(args.base)
+        right = Path(files[0])
+    else:
+        left = Path(files[0])
+        right = Path(files[1])
+
+    print(f"Comparing {left} to {right}")
+    print_snapshot_diff(left, right)
+    return 0
+
+
+def create_snapshot_with_spinner(
+    output_path: Path,
+    conf_path: Path,
+    ctx: ShootContext,
+) -> None:
+    with CONSOLE.status("Creating snapshot", spinner="dots"):
+        create_snapshot(output_path, conf_path, ctx)
+
+
+def normalize_argv(argv: list[str]) -> list[str]:
+    if not argv:
+        return argv
+    if argv[0] in {"-h", "--help", "shoot", "diff"}:
+        return argv
+    return ["shoot", *argv]
+
+
+class SubcommandHelpFormatter(argparse.HelpFormatter):
+    def _format_action(self, action: argparse.Action) -> str:
+        if isinstance(action, argparse._SubParsersAction):
+            return "".join(self._format_action(choice) for choice in action._get_subactions())
+        return super()._format_action(action)
+
+
+def build_main_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="Create and diff crawler snapshots.",
+        formatter_class=SubcommandHelpFormatter,
+    )
+    add_auto_mode_arguments(parser)
+
+    subparsers = parser.add_subparsers(
+        title="subcommands",
+        dest="command",
+        required=True,
+    )
+
+    shoot = subparsers.add_parser("shoot", help="Create or overwrite a snapshot")
+    shoot.add_argument(
+        "filename",
+        nargs="?",
+        default=DEFAULT_BASE,
+        help=f"Output snapshot path (default: {DEFAULT_BASE})",
+    )
+    shoot.add_argument(
+        "--conf",
+        default=DEFAULT_CONF,
+        help=f"Snapshot config file (default: {DEFAULT_CONF})",
+    )
+
+    diff = subparsers.add_parser("diff", help="Diff one or two snapshots")
+    diff.add_argument(
+        "--base",
+        default=DEFAULT_BASE,
+        help=f"Default base snapshot if one file is provided (default: {DEFAULT_BASE})",
+    )
+    diff.add_argument("files", nargs="*", help="Snapshot file(s) to diff")
+
+    return parser
+
+
+def parse_auto_args(argv: list[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        prog="snapshot_test.py",
+        description="Create and diff crawler snapshots.",
+    )
+    add_auto_mode_arguments(parser)
+    args = parser.parse_args(argv)
+    args.command = "auto"
+    return args
+
+
+def add_auto_mode_arguments(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument(
+        "--base",
+        default=DEFAULT_BASE,
+        help=f"Base snapshot used for diffing (default: {DEFAULT_BASE})",
+    )
+    parser.add_argument(
+        "--conf",
+        default=DEFAULT_CONF,
+        help=f"Snapshot config file (default: {DEFAULT_CONF})",
+    )
+    parser.add_argument(
+        "--output",
+        "-o",
+        default=None,
+        help=(
+            "Output snapshot path. "
+            "Default: when base exists use snapshot-<YYYY-MM-DD-HHMM>-<short-hash>.yml, "
+            "otherwise write to --base."
+        ),
+    )
+
+
+def is_auto_mode_argv(argv: list[str]) -> bool:
+    if not argv:
+        return True
+    if argv[0] in {"shoot", "diff", "-h", "--help"}:
+        return False
+    return argv[0].startswith("-")
+
+
+def collect_shoot_context() -> ShootContext:
+    now = datetime.now()
+    short_hash = run_capture(["git", "rev-parse", "--short", "HEAD"])
+    commit_subject = run_capture(["git", "log", "-1", "--pretty=%s"])
+    return ShootContext(now=now, commit_hash=short_hash, commit_subject=commit_subject)
+
+
+def resolve_auto_output_path(
+    base_path: Path,
+    output_arg: str | None,
+    ctx: ShootContext,
+) -> Path:
+    if output_arg:
+        return Path(output_arg)
+    if not base_path.exists():
+        return base_path
+
+    stamp = ctx.now.strftime("%Y-%m-%d-%H%M")
+    return Path(f"snapshot-{stamp}-{ctx.commit_hash}.yml")
+
+
+def create_snapshot(output_path: Path, conf_path: Path, ctx: ShootContext) -> None:
+    names = load_snapshot_packages(conf_path)
+    if not names:
+        raise ValueError(f"{conf_path} does not contain any snapshot packages.")
+
+    temp_dir = resolve_temp_dir(ctx)
+    log_path = temp_dir / DEFAULT_LOG
+
+    cleanup = False
+    try:
+        with log_path.open("w", encoding="utf-8") as log_file:
+            write_log(log_file, f"date: {ctx.now.isoformat()}")
+            write_log(log_file, f"commit: {ctx.commit_hash} {ctx.commit_subject}")
+            write_log(log_file, f"temp_dir: {temp_dir}")
+            write_log(log_file, f"output: {output_path}")
+
+            reduced_registry, channel = build_snapshot_payload(temp_dir, names, log_file)
+            snapshot_text = render_snapshot(ctx, names, reduced_registry, channel)
+
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            output_path.write_text(snapshot_text, encoding="utf-8")
+            cleanup = True
+    finally:
+        if cleanup:
+            shutil.rmtree(temp_dir, ignore_errors=True)
+        else:
+            print(f"Snapshot failed. Kept temp dir for introspection: {temp_dir}", file=sys.stderr)
+            if log_path.exists():
+                print(f"Log: {log_path}", file=sys.stderr)
+
+
+def resolve_temp_dir(ctx: ShootContext) -> Path:
+    stamp = ctx.now.strftime("%Y-%m-%d-%H%M")
+    stem = f"tmp--{stamp}-{ctx.commit_hash}"
+    candidate = Path.cwd() / stem
+    index = 1
+    while candidate.exists():
+        candidate = Path.cwd() / f"{stem}-{index}"
+        index += 1
+    candidate.mkdir(parents=True, exist_ok=False)
+    return candidate
+
+
+def build_snapshot_payload(
+    temp_dir: Path,
+    names: list[str],
+    log_file: TextIO,
+) -> tuple[dict, dict]:
+    full_registry = temp_dir / "registry-full.json"
+    reduced_registry_path = temp_dir / "registry.json"
+    workspace_path = temp_dir / "workspace.json"
+    channel_path = temp_dir / "channel.json"
+
+    run_step([
+        sys.executable,
+        "-m",
+        "scripts.generate_registry",
+        "--output",
+        str(full_registry),
+    ], log_file)
+
+    write_log(log_file, f"Reducing registry to {len(names)} configured packages")
+    reduced_registry = write_reduced_registry(full_registry, reduced_registry_path, names)
+
+    run_step([
+        sys.executable,
+        "-m",
+        "scripts.crawl",
+        "--registry",
+        str(reduced_registry_path),
+        "--workspace",
+        str(workspace_path),
+        "--limit",
+        str(max(len(names), 1)),
+    ], log_file)
+
+    run_step([
+        sys.executable,
+        "-m",
+        "scripts.generate_channel",
+        "--registry",
+        str(reduced_registry_path),
+        "--workspace",
+        str(workspace_path),
+        "--output",
+        str(channel_path),
+    ], log_file)
+
+    channel = read_json(channel_path)
+    return reduced_registry, channel
+
+
+def write_reduced_registry(
+    full_registry_path: Path,
+    reduced_registry_path: Path,
+    names: list[str],
+) -> dict:
+    full_registry = read_json(full_registry_path)
+    wanted = set(names)
+
+    selected_packages = [
+        package
+        for package in full_registry.get("packages", [])
+        if package.get("name") in wanted
+    ]
+
+    found_names = {package["name"] for package in selected_packages if package.get("name")}
+    missing = [name for name in names if name not in found_names]
+    if missing:
+        joined = ", ".join(missing)
+        raise ValueError(f"Packages listed in config but missing from generated registry: {joined}")
+
+    selected_sources = {
+        package["source"]
+        for package in selected_packages
+        if package.get("source")
+    }
+    repositories = [
+        repo
+        for repo in full_registry.get("repositories", [])
+        if repo in selected_sources
+    ]
+
+    reduced_registry = {
+        "repositories": repositories,
+        "packages": selected_packages,
+        "libraries": [],
+    }
+    reduced_registry_path.write_text(
+        json.dumps(reduced_registry, indent=2, ensure_ascii=False) + "\n",
+        encoding="utf-8",
+    )
+    return reduced_registry
+
+
+def render_snapshot(
+    ctx: ShootContext,
+    names: list[str],
+    reduced_registry: dict,
+    channel: dict,
+) -> str:
+    package_lines = [f"  - {name}" for name in names]
+    header_lines = [
+        f"date: {format_snapshot_date(ctx.now)}",
+        f"commit: {ctx.commit_hash} {ctx.commit_subject}",
+        "packages:",
+        *package_lines,
+    ]
+
+    registry_json = json.dumps(reduced_registry, indent=2, ensure_ascii=False)
+    channel_json = json.dumps(channel, indent=2, ensure_ascii=False)
+
+    return "\n".join([
+        *header_lines,
+        "---",
+        registry_json,
+        "---",
+        channel_json,
+        "",
+    ])
+
+
+def format_snapshot_date(now: datetime) -> str:
+    return f"{now.strftime('%B %Y')}, {ordinal(now.day)} {now.strftime('%H:%M')}"
+
+
+def ordinal(day: int) -> str:
+    if 10 <= day % 100 <= 20:
+        suffix = "th"
+    else:
+        suffix = {1: "st", 2: "nd", 3: "rd"}.get(day % 10, "th")
+    return f"{day}{suffix}"
+
+
+def print_snapshot_diff(left: Path, right: Path) -> None:
+    if not left.exists():
+        raise FileNotFoundError(f"Base snapshot not found: {left}")
+    if not right.exists():
+        raise FileNotFoundError(f"Snapshot not found: {right}")
+
+    left_lines = left.read_text(encoding="utf-8").splitlines()
+    right_lines = right.read_text(encoding="utf-8").splitlines()
+
+    diff_lines = list(difflib.unified_diff(
+        left_lines,
+        right_lines,
+        fromfile=str(left),
+        tofile=str(right),
+        lineterm="",
+    ))
+
+    if not diff_lines:
+        print("No differences.")
+        return
+
+    for line in diff_lines:
+        if line.startswith("--- ") or line.startswith("+++ "):
+            continue
+        if line.startswith("-"):
+            print(f"{RED_ON_BLACK}{line}{RESET}")
+        elif line.startswith("+"):
+            print(f"{YELLOW_ON_BLACK}{line}{RESET}")
+        else:
+            print(line)
+
+
+def load_snapshot_packages(conf_path: Path) -> list[str]:
+    if not conf_path.exists():
+        raise FileNotFoundError(f"Snapshot config not found: {conf_path}")
+
+    text = conf_path.read_text(encoding="utf-8")
+    try:
+        data = tomllib.loads(text)
+    except tomllib.TOMLDecodeError as exc:
+        raise ValueError(f"Invalid TOML in {conf_path}: {exc}") from exc
+
+    packages = data.get("snapshot", {}).get("packages", [])
+    if not isinstance(packages, list):
+        raise ValueError(f"Invalid config in {conf_path}: snapshot.packages must be a list")
+    return [str(package) for package in packages if str(package).strip()]
+
+
+def read_json(path: Path) -> dict:
+    with path.open("r", encoding="utf-8") as handle:
+        data = json.load(handle)
+    if not isinstance(data, dict):
+        raise ValueError(f"Expected JSON object in {path}")
+    return data
+
+
+def run_capture(command: list[str]) -> str:
+    completed = subprocess.run(command, check=True, capture_output=True, text=True)
+    return completed.stdout.strip()
+
+
+def run_step(command: list[str], log_file: TextIO) -> None:
+    printable = " ".join(command)
+    write_log(log_file, f"$ {printable}")
+
+    try:
+        completed = subprocess.run(command, check=True, capture_output=True, text=True)
+    except subprocess.CalledProcessError as exc:
+        append_command_output(log_file, exc.stdout, exc.stderr)
+        write_log(log_file, f"Command failed with exit code {exc.returncode}")
+        raise
+
+    append_command_output(log_file, completed.stdout, completed.stderr)
+
+
+def append_command_output(log_file: TextIO, stdout: str | None, stderr: str | None) -> None:
+    if stdout:
+        log_file.write(stdout)
+        if not stdout.endswith("\n"):
+            log_file.write("\n")
+    if stderr:
+        log_file.write(stderr)
+        if not stderr.endswith("\n"):
+            log_file.write("\n")
+    log_file.flush()
+
+
+def write_log(log_file: TextIO, message: str) -> None:
+    log_file.write(f"{message}\n")
+    log_file.flush()
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/snapshot.toml b/snapshot.toml
new file mode 100644
index 000000000..dd3ef743a
--- /dev/null
+++ b/snapshot.toml
@@ -0,0 +1,15 @@
+[snapshot]
+packages = [
+  "ABNF-sublime-syntax",
+  "Affixify",
+  "Bison",
+  "4GL",
+  "LSP",
+  "SublimeLinter",
+  "GitSavvy",
+  "Package Control",
+  "SFTP",
+  "Theme - Monokai Pro",
+  "CodeIgniter 3 Snippets",
+  "AlignComment",
+]
diff --git a/tests/test_snapshot_test.py b/tests/test_snapshot_test.py
new file mode 100644
index 000000000..9af27ab21
--- /dev/null
+++ b/tests/test_snapshot_test.py
@@ -0,0 +1,120 @@
+from datetime import datetime
+from pathlib import Path
+import sys
+
+from scripts.snapshot_test import (
+    ShootContext,
+    load_snapshot_packages,
+    normalize_argv,
+    ordinal,
+    parse_args,
+    print_snapshot_diff,
+    resolve_auto_output_path,
+    run_step,
+)
+
+
+def test_normalize_argv_keeps_empty_argv_unchanged() -> None:
+    assert normalize_argv([]) == []
+
+
+def test_parse_args_treats_positional_as_shoot_filename() -> None:
+    args = parse_args(["snapshot-next.yml"])
+
+    assert args.command == "shoot"
+    assert args.filename == "snapshot-next.yml"
+
+
+def test_parse_args_treats_options_without_command_as_auto() -> None:
+    args = parse_args(["--base", "snapshot.yml"])
+
+    assert args.command == "auto"
+    assert args.base == "snapshot.yml"
+
+
+def test_resolve_auto_output_path_appends_yml_extension_for_new_snapshot(
+    tmp_path: Path,
+) -> None:
+    ctx = ShootContext(
+        now=datetime(2026, 3, 2, 12, 53),
+        commit_hash="abc1234",
+        commit_subject="subject",
+    )
+    base_path = tmp_path / "snapshot.yml"
+    base_path.write_text("base", encoding="utf-8")
+
+    output = resolve_auto_output_path(base_path, None, ctx)
+
+    assert output.name == "snapshot-2026-03-02-1253-abc1234.yml"
+
+
+def test_load_snapshot_packages_from_toml(tmp_path: Path) -> None:
+    config = tmp_path / "snapshot.toml"
+    config.write_text(
+        "[snapshot]\n"
+        "packages = [\"foo\", \"bar\"]\n",
+        encoding="utf-8",
+    )
+
+    assert load_snapshot_packages(config) == ["foo", "bar"]
+
+
+def test_load_snapshot_packages_rejects_non_toml_config(tmp_path: Path) -> None:
+    config = tmp_path / "snapshot.toml"
+    config.write_text(
+        "[snapshot]\n"
+        "packages =\n"
+        "  foo\n"
+        "  bar\n",
+        encoding="utf-8",
+    )
+
+    try:
+        load_snapshot_packages(config)
+    except ValueError as exc:
+        assert "Invalid TOML" in str(exc)
+    else:
+        raise AssertionError("Expected ValueError for invalid TOML")
+
+
+def test_run_step_writes_stdout_and_stderr_to_log(tmp_path: Path) -> None:
+    log_path = tmp_path / "snapshot.log"
+    with log_path.open("w", encoding="utf-8") as log_file:
+        run_step(
+            [
+                sys.executable,
+                "-c",
+                "import sys; print('hello out'); print('hello err', file=sys.stderr)",
+            ],
+            log_file,
+        )
+
+    text = log_path.read_text(encoding="utf-8")
+    assert "hello out" in text
+    assert "hello err" in text
+
+
+def test_print_snapshot_diff_hides_unified_file_headers(
+    tmp_path: Path,
+    capsys,
+) -> None:
+    left = tmp_path / "left.yml"
+    right = tmp_path / "right.yml"
+    left.write_text("date: one\nvalue: old\n", encoding="utf-8")
+    right.write_text("date: one\nvalue: new\n", encoding="utf-8")
+
+    print_snapshot_diff(left, right)
+    lines = capsys.readouterr().out.splitlines()
+
+    assert not any(line.startswith("--- ") for line in lines)
+    assert not any(line.startswith("+++ ") for line in lines)
+    assert any(line.startswith("@@") for line in lines)
+
+
+def test_ordinal_suffixes() -> None:
+    assert ordinal(1) == "1st"
+    assert ordinal(2) == "2nd"
+    assert ordinal(3) == "3rd"
+    assert ordinal(4) == "4th"
+    assert ordinal(11) == "11th"
+    assert ordinal(23) == "23rd"

From 6e2688b46bab410896afbefb86f71eb4e45ed714 Mon Sep 17 00:00:00 2001
From: herr kaste <herr.kaste@gmail.com>
Date: Mon, 2 Mar 2026 13:28:02 +0100
Subject: [PATCH 02/13] Add interactive snapshot picker for diff mode

---
 README.md                   |   3 +-
 pyproject.toml              |   1 +
 scripts/snapshot_test.py    | 120 +++++++++++++++++++++++++++++++++++-
 tests/test_snapshot_test.py |  32 ++++++++++
 uv.lock                     |  11 ++++
 5 files changed, 163 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index b27633e6d..36223e0de 100644
--- a/README.md
+++ b/README.md
@@ -177,7 +177,8 @@ Creates a compact, single-file snapshot for regression testing (`registry + chan
 uv run -m scripts.snapshot_test
 uv run -m scripts.snapshot_test --base snapshot.yml --conf snapshot.toml
 uv run -m scripts.snapshot_test shoot
-uv run -m scripts.snapshot_test diff snapshot-2026-03-02-1210-abcd123
+uv run -m scripts.snapshot_test diff
+uv run -m scripts.snapshot_test diff snapshot-2026-03-02-1210-abcd123.yml
 ```
 
 - Default mode (no subcommand):
diff --git a/pyproject.toml b/pyproject.toml
index 1cd1e9033..7dabd65e3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,6 +8,7 @@ dependencies = [
     "aiohttp>=3.11.18",
     "inflect>=7.5.0",
     "packaging>=24.2",
+    "readchar>=4.2.1",
     "rich>=13.7",
     "tzdata; platform_system == 'Windows'",
 ]
diff --git a/scripts/snapshot_test.py b/scripts/snapshot_test.py
index 479955074..08f9afd79 100644
--- a/scripts/snapshot_test.py
+++ b/scripts/snapshot_test.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import argparse
+from collections.abc import Callable
 from dataclasses import dataclass
 from datetime import datetime
 import difflib
@@ -12,7 +13,12 @@
 import tomllib
 from typing import TextIO
 
+from readchar import key as readchar_key
+from readchar import readkey
 from rich.console import Console
+from rich.live import Live
+from rich.panel import Panel
+from rich.text import Text
 
 
 DEFAULT_BASE = "snapshot.yml"
@@ -23,7 +29,8 @@
 YELLOW_ON_BLACK = "\x1b[33;40m"
 RESET = "\x1b[0m"
 
-CONSOLE = Console(stderr=True)
+STDOUT_CONSOLE = Console()
+STDERR_CONSOLE = Console(stderr=True)
 
 
 @dataclass
@@ -86,10 +93,28 @@ def run_diff(args: argparse.Namespace) -> int:
         raise SystemExit("diff accepts at most two snapshot files")
 
     if not files:
-        snapshots = sorted(Path.cwd().glob("snapshot-*"))
+        snapshots = list_available_snapshots()
         if not snapshots:
             print("No snapshots found matching 'snapshot-*'.")
             return 0
+
+        if len(snapshots) == 1:
+            left = Path(args.base)
+            right = snapshots[0]
+            print(f"Comparing {left} to {right}")
+            print_snapshot_diff(left, right)
+            return 0
+
+        if is_interactive_terminal():
+            selected = select_snapshot_interactively(snapshots)
+            if selected is None:
+                return 0
+            left = Path(args.base)
+            right = selected
+            print(f"Comparing {left} to {right}")
+            print_snapshot_diff(left, right)
+            return 0
+
         for path in snapshots:
             print(path.name)
         return 0
@@ -106,12 +131,101 @@ def run_diff(args: argparse.Namespace) -> int:
     return 0
 
 
+def list_available_snapshots() -> list[Path]:
+    candidates = {
+        path
+        for pattern in ("snapshot-*.yml", "snapshot-*")
+        for path in Path.cwd().glob(pattern)
+        if path.is_file()
+    }
+    return sorted(candidates, key=lambda path: path.name)
+
+
+def is_interactive_terminal() -> bool:
+    return sys.stdin.isatty() and sys.stdout.isatty()
+
+
+def select_snapshot_interactively(
+    snapshots: list[Path],
+    key_reader: Callable[[], str] | None = None,
+    console: Console | None = None,
+) -> Path | None:
+    if not snapshots:
+        return None
+
+    selected = 0
+    key_reader = read_key_action if key_reader is None else key_reader
+    console = STDOUT_CONSOLE if console is None else console
+
+    with Live(
+        render_snapshot_selector(snapshots, selected),
+        console=console,
+        transient=True,
+        auto_refresh=False,
+    ) as live:
+        while True:
+            action = key_reader()
+
+            if action == "enter":
+                return snapshots[selected]
+            if action in {"q", "esc", "ctrl_c"}:
+                return None
+
+            next_selected = move_selection(selected, len(snapshots), action)
+            if next_selected != selected:
+                selected = next_selected
+                live.update(render_snapshot_selector(snapshots, selected), refresh=True)
+
+
+def render_snapshot_selector(snapshots: list[Path], selected: int) -> Panel:
+    body = Text("Use ↑/↓ to choose a snapshot, Enter to diff, q to cancel\n\n")
+    for index, path in enumerate(snapshots):
+        prefix = "❯" if index == selected else " "
+        style = "bold cyan" if index == selected else ""
+        body.append(f"{prefix} {path.name}\n", style=style)
+    return Panel(body, title="Available snapshots")
+
+
+def move_selection(current: int, total: int, key: str) -> int:
+    if total <= 0:
+        return 0
+    if key == "up":
+        return (current - 1) % total
+    if key == "down":
+        return (current + 1) % total
+    return current
+
+
+def read_key_action() -> str:
+    try:
+        pressed = readkey()
+    except KeyboardInterrupt:
+        return "ctrl_c"
+    return normalize_key_press(pressed)
+
+
+def normalize_key_press(pressed: str) -> str:
+    if pressed == readchar_key.UP:
+        return "up"
+    if pressed == readchar_key.DOWN:
+        return "down"
+    if pressed in {readchar_key.ENTER, "\r", "\n"}:
+        return "enter"
+    if pressed == readchar_key.ESC:
+        return "esc"
+    if pressed == readchar_key.CTRL_C:
+        return "ctrl_c"
+    if pressed.lower() == "q":
+        return "q"
+    return "other"
+
+
 def create_snapshot_with_spinner(
     output_path: Path,
     conf_path: Path,
     ctx: ShootContext,
 ) -> None:
-    with CONSOLE.status("Creating snapshot", spinner="dots"):
+    with STDERR_CONSOLE.status("Creating snapshot", spinner="dots"):
         create_snapshot(output_path, conf_path, ctx)
 
 
diff --git a/tests/test_snapshot_test.py b/tests/test_snapshot_test.py
index 9af27ab21..b4b6a57f8 100644
--- a/tests/test_snapshot_test.py
+++ b/tests/test_snapshot_test.py
@@ -1,3 +1,4 @@
+import argparse
 from datetime import datetime
 from pathlib import Path
 import sys
@@ -5,11 +6,13 @@
 from scripts.snapshot_test import (
     ShootContext,
     load_snapshot_packages,
+    move_selection,
     normalize_argv,
     ordinal,
     parse_args,
     print_snapshot_diff,
     resolve_auto_output_path,
+    run_diff,
     run_step,
 )
 
@@ -111,6 +114,35 @@ def test_print_snapshot_diff_hides_unified_file_headers(
     assert any(line.startswith("@@") for line in lines)
 
 
+def test_run_diff_with_single_candidate_without_files_shows_diff(
+    tmp_path: Path,
+    monkeypatch,
+    capsys,
+) -> None:
+    base = tmp_path / "snapshot.yml"
+    candidate = tmp_path / "snapshot-2026-03-02-1253-abc1234.yml"
+    base.write_text("value: old\n", encoding="utf-8")
+    candidate.write_text("value: new\n", encoding="utf-8")
+    monkeypatch.chdir(tmp_path)
+
+    result = run_diff(argparse.Namespace(files=[], base=str(base)))
+
+    out = capsys.readouterr().out
+    assert result == 0
+    assert "Comparing" in out
+    assert candidate.name in out
+    assert "@@" in out
+
+
+def test_move_selection_wraps_for_up_and_down() -> None:
+    assert move_selection(0, 3, "up") == 2
+    assert move_selection(2, 3, "down") == 0
+
+
+def test_move_selection_ignores_unknown_keys() -> None:
+    assert move_selection(1, 3, "x") == 1
+
+
 def test_ordinal_suffixes() -> None:
     assert ordinal(1) == "1st"
     assert ordinal(2) == "2nd"
diff --git a/uv.lock b/uv.lock
index 04625eec4..be4ba001b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -402,6 +402,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/7f/338843f449ace853647ace35870874f69a764d251872ed1b4de9f234822c/pytest_asyncio-0.26.0-py3-none-any.whl", hash = "sha256:7b51ed894f4fbea1340262bdae5135797ebbe21d8638978e35d31c6d19f72fb0", size = 19694, upload-time = "2025-03-25T06:22:27.807Z" },
 ]
 
+[[package]]
+name = "readchar"
+version = "4.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/dd/f8/8657b8cbb4ebeabfbdf991ac40eca8a1d1bd012011bd44ad1ed10f5cb494/readchar-4.2.1.tar.gz", hash = "sha256:91ce3faf07688de14d800592951e5575e9c7a3213738ed01d394dcc949b79adb", size = 9685, upload-time = "2024-11-04T18:28:07.757Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a9/10/e4b1e0e5b6b6745c8098c275b69bc9d73e9542d5c7da4f137542b499ed44/readchar-4.2.1-py3-none-any.whl", hash = "sha256:a769305cd3994bb5fa2764aa4073452dc105a4ec39068ffe6efd3c20c60acc77", size = 9350, upload-time = "2024-11-04T18:28:02.859Z" },
+]
+
 [[package]]
 name = "rich"
 version = "14.2.0"
@@ -449,6 +458,7 @@ dependencies = [
     { name = "aiohttp" },
     { name = "inflect" },
     { name = "packaging" },
+    { name = "readchar" },
     { name = "rich" },
     { name = "tzdata", marker = "sys_platform == 'win32'" },
 ]
@@ -466,6 +476,7 @@ requires-dist = [
     { name = "aiohttp", specifier = ">=3.11.18" },
     { name = "inflect", specifier = ">=7.5.0" },
     { name = "packaging", specifier = ">=24.2" },
+    { name = "readchar", specifier = ">=4.2.1" },
     { name = "rich", specifier = ">=13.7" },
     { name = "tzdata", marker = "sys_platform == 'win32'" },
 ]

From 3183944adb57c1866d15d23053d2c783953f70dd Mon Sep 17 00:00:00 2001
From: herr kaste <herr.kaste@gmail.com>
Date: Tue, 3 Mar 2026 12:50:17 +0100
Subject: [PATCH 03/13] Reorder functions

---
 scripts/snapshot_test.py | 42 ++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/scripts/snapshot_test.py b/scripts/snapshot_test.py
index 08f9afd79..503ce0ea0 100644
--- a/scripts/snapshot_test.py
+++ b/scripts/snapshot_test.py
@@ -40,20 +40,6 @@ class ShootContext:
     commit_subject: str
 
 
-def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
-    raw_argv = list(sys.argv[1:] if argv is None else argv)
-
-    if raw_argv and raw_argv[0] == "auto":
-        return parse_auto_args(raw_argv[1:])
-
-    if is_auto_mode_argv(raw_argv):
-        return parse_auto_args(raw_argv)
-
-    parser = build_main_parser()
-    normalized_argv = normalize_argv(raw_argv)
-    return parser.parse_args(normalized_argv)
-
-
 def main(argv: list[str] | None = None) -> int:
     args = parse_args(argv)
     if args.command == "shoot":
@@ -220,13 +206,18 @@ def normalize_key_press(pressed: str) -> str:
     return "other"
 
 
-def create_snapshot_with_spinner(
-    output_path: Path,
-    conf_path: Path,
-    ctx: ShootContext,
-) -> None:
-    with STDERR_CONSOLE.status("Creating snapshot", spinner="dots"):
-        create_snapshot(output_path, conf_path, ctx)
+def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
+    raw_argv = list(sys.argv[1:] if argv is None else argv)
+
+    if raw_argv and raw_argv[0] == "auto":
+        return parse_auto_args(raw_argv[1:])
+
+    if is_auto_mode_argv(raw_argv):
+        return parse_auto_args(raw_argv)
+
+    parser = build_main_parser()
+    normalized_argv = normalize_argv(raw_argv)
+    return parser.parse_args(normalized_argv)
 
 
 def normalize_argv(argv: list[str]) -> list[str]:
@@ -344,6 +335,15 @@ def resolve_auto_output_path(
     return Path(f"snapshot-{stamp}-{ctx.commit_hash}.yml")
 
 
+def create_snapshot_with_spinner(
+    output_path: Path,
+    conf_path: Path,
+    ctx: ShootContext,
+) -> None:
+    with STDERR_CONSOLE.status("Creating snapshot", spinner="dots"):
+        create_snapshot(output_path, conf_path, ctx)
+
+
 def create_snapshot(output_path: Path, conf_path: Path, ctx: ShootContext) -> None:
     names = load_snapshot_packages(conf_path)
     if not names:

From 4f19623c2ca4683e67868b45df2cc5afbd96f72a Mon Sep 17 00:00:00 2001
From: herr kaste <herr.kaste@gmail.com>
Date: Tue, 3 Mar 2026 13:00:34 +0100
Subject: [PATCH 04/13] Better status updates when creating a snapshot

---
 scripts/snapshot_test.py | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/scripts/snapshot_test.py b/scripts/snapshot_test.py
index 503ce0ea0..81396c086 100644
--- a/scripts/snapshot_test.py
+++ b/scripts/snapshot_test.py
@@ -340,11 +340,21 @@ def create_snapshot_with_spinner(
     conf_path: Path,
     ctx: ShootContext,
 ) -> None:
-    with STDERR_CONSOLE.status("Creating snapshot", spinner="dots"):
-        create_snapshot(output_path, conf_path, ctx)
+    with STDERR_CONSOLE.status("Creating snapshot", spinner="dots") as status:
+        create_snapshot(
+            output_path,
+            conf_path,
+            ctx,
+            update_status=status.update,
+        )
 
 
-def create_snapshot(output_path: Path, conf_path: Path, ctx: ShootContext) -> None:
+def create_snapshot(
+    output_path: Path,
+    conf_path: Path,
+    ctx: ShootContext,
+    update_status: Callable[[str], None],
+) -> None:
     names = load_snapshot_packages(conf_path)
     if not names:
         raise ValueError(f"{conf_path} does not contain any snapshot packages.")
@@ -360,7 +370,12 @@ def create_snapshot(output_path: Path, conf_path: Path, ctx: ShootContext) -> No
             write_log(log_file, f"temp_dir: {temp_dir}")
             write_log(log_file, f"output: {output_path}")
 
-            reduced_registry, channel = build_snapshot_payload(temp_dir, names, log_file)
+            reduced_registry, channel = build_snapshot_payload(
+                temp_dir,
+                names,
+                log_file,
+                update_status=update_status,
+            )
             snapshot_text = render_snapshot(ctx, names, reduced_registry, channel)
 
             output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -391,12 +406,14 @@ def build_snapshot_payload(
     temp_dir: Path,
     names: list[str],
     log_file: TextIO,
+    update_status: Callable[[str], None],
 ) -> tuple[dict, dict]:
     full_registry = temp_dir / "registry-full.json"
     reduced_registry_path = temp_dir / "registry.json"
     workspace_path = temp_dir / "workspace.json"
     channel_path = temp_dir / "channel.json"
 
+    update_status("Generating registry")
     run_step([
         sys.executable,
         "-m",
@@ -408,6 +425,7 @@ def build_snapshot_payload(
     write_log(log_file, f"Reducing registry to {len(names)} configured packages")
     reduced_registry = write_reduced_registry(full_registry, reduced_registry_path, names)
 
+    update_status("Crawling packages")
     run_step([
         sys.executable,
         "-m",
@@ -420,6 +438,7 @@ def build_snapshot_payload(
         str(max(len(names), 1)),
     ], log_file)
 
+    update_status("Generating final channel.json")
     run_step([
         sys.executable,
         "-m",

From c3baaee82b550666faf43433503eb6d9c9f8e211 Mon Sep 17 00:00:00 2001
From: herr kaste <herr.kaste@gmail.com>
Date: Fri, 27 Feb 2026 12:38:28 +0100
Subject: [PATCH 05/13] Add `-<number>` shorthand to crawl.py

---
 scripts/crawl.py             | 32 +++++++++++++++++++++++++++++---
 tests/crawl/test_cli_args.py | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 3 deletions(-)
 create mode 100644 tests/crawl/test_cli_args.py

diff --git a/scripts/crawl.py b/scripts/crawl.py
index 719035c2c..34332104a 100644
--- a/scripts/crawl.py
+++ b/scripts/crawl.py
@@ -837,8 +837,11 @@ def which_hub(url: str) -> str:
     return "unknown"
 
 
-def parse_args():
-    parser = argparse.ArgumentParser(description="Crawl the registry and update the workspace.")
+def parse_args(argv: list[str] | None = None):
+    parser = argparse.ArgumentParser(
+        description="Crawl the registry and update the workspace.",
+        epilog="Numeric shorthand: -<n> sets crawl limit, e.g. -1000 == --limit 1000.",
+    )
     parser.add_argument(
         "--registry",
         type=str,
@@ -876,7 +879,30 @@ def parse_args():
         default=".",
         help="Working directory to resolve file paths (default: .)"
     )
-    return parser.parse_args()
+    normalized_argv = normalize_limit_argv(sys.argv[1:] if argv is None else argv)
+    if count_limit_occurrences(normalized_argv) > 1:
+        parser.error("--limit/-n can only be specified once")
+    return parser.parse_args(normalized_argv)
+
+
+def normalize_limit_argv(argv: list[str]) -> list[str]:
+    normalized = []
+    for arg in argv:
+        if re.fullmatch(r"-\d+", arg):
+            normalized.extend(["--limit", arg[1:]])
+            continue
+        normalized.append(arg)
+    return normalized
+
+
+def count_limit_occurrences(argv: list[str]) -> int:
+    count = 0
+    for arg in argv:
+        if arg in {"--limit", "-n"}:
+            count += 1
+        elif arg.startswith("--limit="):
+            count += 1
+    return count
 
 
 def env_flag(name: str, default: bool = False) -> bool:
diff --git a/tests/crawl/test_cli_args.py b/tests/crawl/test_cli_args.py
new file mode 100644
index 000000000..be3b92651
--- /dev/null
+++ b/tests/crawl/test_cli_args.py
@@ -0,0 +1,36 @@
+import pytest
+
+from scripts.crawl import normalize_limit_argv, parse_args
+
+
+def test_normalize_limit_argv_rewrites_numeric_shorthand() -> None:
+    assert normalize_limit_argv(["--presto", "-1000", "--name", "Example"]) == [
+        "--presto",
+        "--limit",
+        "1000",
+        "--name",
+        "Example",
+    ]
+
+
+def test_parse_args_accepts_numeric_shorthand_limit() -> None:
+    args = parse_args(["-1000"])
+
+    assert args.limit == 1000
+
+
+def test_parse_args_still_accepts_explicit_limit() -> None:
+    args = parse_args(["--limit", "75"])
+
+    assert args.limit == 75
+
+
+def test_parse_args_rejects_duplicate_limit_flags() -> None:
+    with pytest.raises(SystemExit):
+        parse_args(["-n", "100", "-n", "75"])
+
+
+
+def test_parse_args_rejects_shorthand_plus_limit_flag() -> None:
+    with pytest.raises(SystemExit):
+        parse_args(["-100", "--limit", "24"])

From 2f760de56c4a7a9de46733084aa4f293927d8a78 Mon Sep 17 00:00:00 2001
From: herr kaste <herr.kaste@gmail.com>
Date: Fri, 27 Feb 2026 13:00:43 +0100
Subject: [PATCH 06/13] Allow omitting standard info for package releases

* Allow dismissing "releases" completely
  => allow all builds and set tags:true
* Omitting "tags" and "branch" and "asset"
  => set tags: true
---
 scripts/crawl.py          |   9 +++
 tests/crawl/test_basic.py | 130 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 139 insertions(+)

diff --git a/scripts/crawl.py b/scripts/crawl.py
index 34332104a..1a7a2e239 100644
--- a/scripts/crawl.py
+++ b/scripts/crawl.py
@@ -714,6 +714,12 @@ def normalize_release_definition(
     repo_url: str,
     details: str | None = None
 ):
+    if not releases:
+        releases.append({
+            "sublime_text": "*",
+            "tags": True
+        })
+
     for r in releases[:]:
         r.setdefault("platforms", ["*"])
         if isinstance(r["platforms"], str):
@@ -725,6 +731,9 @@ def normalize_release_definition(
                 err(f"sublime_text as a list is only valid in conjunction with 'asset', {repo_url}")
                 releases.remove(r)
 
+        if r.keys().isdisjoint({"url", "asset", "branch", "tags"}):
+            r["tags"] = True
+
         if base := r.get("base", details):
             r["base"] = resolve_url(repo_url, base)
 
diff --git a/tests/crawl/test_basic.py b/tests/crawl/test_basic.py
index d1d3ff84c..fad436e4f 100644
--- a/tests/crawl/test_basic.py
+++ b/tests/crawl/test_basic.py
@@ -299,6 +299,136 @@ async def test_accept_stylized_dates_for_static_releases(
     assert release["date"] == date_expected
 
 
+@pytest.mark.asyncio
+async def test_missing_release_definitions_default_to_tags(set_github_info):
+    registry = {
+        "repositories": [
+            "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json"
+        ],
+        "packages": [
+            {
+                "name": "ImplicitRelease",
+                "details": "https://github.com/example/implicit-release",
+                "source": "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json",
+                "schema_version": "3.0.0"
+            }
+        ]
+    }
+
+    workspace = {"packages": {}}
+
+    github_info = {
+        "metadata": {
+            "id": "R_implicitrelease",
+            "name": "ImplicitRelease",
+            "description": "Fixture package with implicit release definition",
+            "homepage": "https://github.com/example/implicit-release",
+            "author": "example",
+            "readme": "https://raw.githubusercontent.com/example/implicit-release/main/README.md",
+            "default_branch": "main",
+            "stars": 0,
+            "created_at": "2024-01-01T00:00:00Z"
+        },
+        "tags": [
+            {
+                "name": "1.2.3",
+                "sha": "abc123",
+                "date": "2024-05-10T12:00:00Z",
+                "url": "https://codeload.github.com/example/implicit-release/zip/1.2.3"
+            }
+        ],
+        "branches": [
+            {
+                "name": "main",
+                "version": "2024.05.11.12.00.00",
+                "sha": "def456",
+                "date": "2024-05-11T12:00:00Z",
+                "url": "https://codeload.github.com/example/implicit-release/zip/main"
+            }
+        ]
+    }
+
+    set_github_info(github_info)
+
+    await main_(registry, workspace, None, 100)
+
+    package = workspace["packages"].get("ImplicitRelease")
+    assert package is not None
+
+    releases = package.get("releases", [])
+    assert len(releases) == 1
+    assert releases[0]["sublime_text"] == "*"
+    assert releases[0]["platforms"] == ["*"]
+    assert releases[0]["version"] == "1.2.3"
+    assert releases[0]["url"].endswith("/1.2.3")
+
+
+@pytest.mark.asyncio
+async def test_release_without_asset_or_branch_defaults_to_tags(set_github_info):
+    registry = {
+        "repositories": [
+            "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json"
+        ],
+        "packages": [
+            {
+                "name": "AutoTags",
+                "details": "https://github.com/example/auto-tags",
+                "releases": [
+                    {
+                        "sublime_text": "*"
+                    }
+                ],
+                "source": "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json",
+                "schema_version": "3.0.0"
+            }
+        ]
+    }
+
+    workspace = {"packages": {}}
+
+    github_info = {
+        "metadata": {
+            "id": "R_autotags",
+            "name": "AutoTags",
+            "description": "Fixture package with implicit tags",
+            "homepage": "https://github.com/example/auto-tags",
+            "author": "example",
+            "readme": "https://raw.githubusercontent.com/example/auto-tags/main/README.md",
+            "default_branch": "main",
+            "stars": 0,
+            "created_at": "2024-01-01T00:00:00Z"
+        },
+        "tags": [
+            {
+                "name": "2.0.0",
+                "sha": "abc123",
+                "date": "2024-05-10T12:00:00Z",
+                "url": "https://codeload.github.com/example/auto-tags/zip/2.0.0"
+            }
+        ],
+        "branches": [
+            {
+                "name": "main",
+                "version": "2024.05.11.12.00.00",
+                "sha": "def456",
+                "date": "2024-05-11T12:00:00Z",
+                "url": "https://codeload.github.com/example/auto-tags/zip/main"
+            }
+        ]
+    }
+
+    set_github_info(github_info)
+
+    await main_(registry, workspace, None, 100)
+
+    package = workspace["packages"].get("AutoTags")
+    assert package is not None
+
+    release = package["releases"][0]
+    assert release["version"] == "2.0.0"
+    assert release["url"].endswith("/2.0.0")
+
+
 @pytest.mark.asyncio
 async def test_prerelease_tag_does_not_use_branch_fallback(set_now, set_github_info, capsys):
     registry = {

From ee741f2672bd217d6aeb60c4aa59368b49ff065a Mon Sep 17 00:00:00 2001
From: herr kaste <herr.kaste@gmail.com>
Date: Fri, 27 Feb 2026 13:00:57 +0100
Subject: [PATCH 07/13] Add regression tests for `normalize_release_definition`

---
 .../test_normalize_release_definition.py      | 141 ++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 tests/crawl/test_normalize_release_definition.py

diff --git a/tests/crawl/test_normalize_release_definition.py b/tests/crawl/test_normalize_release_definition.py
new file mode 100644
index 000000000..c907449f2
--- /dev/null
+++ b/tests/crawl/test_normalize_release_definition.py
@@ -0,0 +1,141 @@
+import pytest
+
+from scripts.crawl import normalize_release_definition
+
+
+REPO_URL = "https://raw.githubusercontent.com/example/channel/main/repository.json"
+
+
+def test_adds_synthetic_release_when_missing() -> None:
+    releases: list[dict] = []
+
+    normalize_release_definition(
+        releases,
+        REPO_URL,
+        "https://github.com/example/implicit-release",
+    )
+
+    assert len(releases) == 1
+    assert releases[0]["sublime_text"] == "*"
+    assert releases[0]["platforms"] == ["*"]
+    assert releases[0]["tags"] is True
+    assert releases[0]["base"] == "https://github.com/example/implicit-release"
+
+
+def test_autofills_tags_when_asset_and_branch_missing() -> None:
+    releases = [{"sublime_text": "*"}]
+
+    normalize_release_definition(
+        releases,
+        REPO_URL,
+        "https://github.com/example/auto-tags",
+    )
+
+    assert releases[0]["tags"] is True
+    assert releases[0]["base"] == "https://github.com/example/auto-tags"
+
+
+def test_does_not_autofill_tags_for_static_url_release() -> None:
+    releases = [{
+        "sublime_text": "*",
+        "url": "https://example.com/pkg.zip",
+        "version": "1.2.3",
+        "date": "2024-05-10T12:00:00Z",
+    }]
+
+    normalize_release_definition(
+        releases,
+        REPO_URL,
+        "https://github.com/example/static-release",
+    )
+
+    assert "tags" not in releases[0]
+    assert releases[0]["url"] == "https://example.com/pkg.zip"
+
+
+@pytest.mark.parametrize("field", ["asset", "branch", "tags"])
+def test_does_not_overwrite_existing_release_source(field: str) -> None:
+    releases = [{field: True}]
+
+    normalize_release_definition(
+        releases,
+        REPO_URL,
+        "https://github.com/example/source-is-already-defined",
+    )
+
+    assert "tags" not in releases[0] or field == "tags"
+
+
+def test_normalizes_platforms_string_to_list() -> None:
+    releases = [{"platforms": "linux", "tags": True}]
+
+    normalize_release_definition(releases, REPO_URL, "https://github.com/example/pkg")
+
+    assert releases[0]["platforms"] == ["linux"]
+    assert releases[0]["sublime_text"] == "*"
+
+
+def test_removes_invalid_sublime_text_list_without_asset(capsys) -> None:
+    releases = [{"sublime_text": ["*"], "tags": True}]
+
+    normalize_release_definition(releases, REPO_URL, "https://github.com/example/pkg")
+
+    err = capsys.readouterr().err
+    assert "sublime_text as a list is only valid in conjunction with 'asset'" in err
+    assert releases == []
+
+
+def test_resolves_relative_base_url() -> None:
+    releases = [{"base": "./repo", "tags": True}]
+
+    normalize_release_definition(releases, REPO_URL)
+
+    assert releases[0]["base"] == "https://raw.githubusercontent.com/example/channel/main/repo"
+
+
+def test_resolves_and_updates_download_url() -> None:
+    releases = [{
+        "url": "https://nodeload.github.com/example/pkg/zipball/main",
+        "version": "1.2.3",
+        "date": "2024-05-10T12:00:00Z",
+    }]
+
+    normalize_release_definition(releases, REPO_URL)
+
+    assert releases[0]["url"] == "https://codeload.github.com/example/pkg/zip/main"
+
+
+@pytest.mark.parametrize(
+    ("date_input", "date_expected"),
+    [
+        ("2024-05-10 12:00", "2024-05-10T12:00:00Z"),
+        ("2024-05-10", "2024-05-10T00:00:00Z"),
+    ],
+)
+def test_normalizes_stylized_dates(
+    date_input: str,
+    date_expected: str,
+) -> None:
+    releases = [{
+        "url": "https://example.com/pkg.zip",
+        "version": "1.2.3",
+        "date": date_input,
+    }]
+
+    normalize_release_definition(releases, REPO_URL)
+
+    assert releases[0]["date"] == date_expected
+
+
+def test_removes_release_with_invalid_date(capsys) -> None:
+    releases = [{
+        "url": "https://example.com/pkg.zip",
+        "version": "1.2.3",
+        "date": "May 10, 2024",
+    }]
+
+    normalize_release_definition(releases, REPO_URL)
+
+    err = capsys.readouterr().err
+    assert "date May 10, 2024 is not formatted correctly" in err
+    assert releases == []

From 0467c841440bbf6e165d4a9d5ba034fefa6a85f8 Mon Sep 17 00:00:00 2001
From: herr kaste <herr.kaste@gmail.com>
Date: Fri, 27 Feb 2026 14:11:16 +0100
Subject: [PATCH 08/13] Support constrained tag releases

Add version-constraint support for tag-based release resolution by using
normalize_version_spec and SpecifierSet when a version constraint is
present.  For unconstrained tags, keep the legacy strict semver parsing
path.

If the user uses version constraints, maybe synthesize an automatic
open-ended tags release.  This is so that the user only has to configure
frozen, left-behind version/st_build pairs while keeping the standard
newest tag for newest build semantic.

E.g.

```
    "releases": [
        {
            "sublime_text": "<4000",
            "version": "2.5.*"
        }
    ],
```

In the example, the package defines an old version for older sublime
builds.  Here we would add an open-ended release definition, e.g.

```
        {
            "sublime_text": ">=4000",
            "tags": true
        }
```
---
 scripts/crawl.py                              | 204 ++++++++++++++----
 tests/crawl/test_basic.py                     | 203 +++++++++++++++++
 .../test_normalize_release_definition.py      | 126 ++++++++++-
 3 files changed, 485 insertions(+), 48 deletions(-)

diff --git a/scripts/crawl.py b/scripts/crawl.py
index 1a7a2e239..b3453c05d 100644
--- a/scripts/crawl.py
+++ b/scripts/crawl.py
@@ -4,14 +4,16 @@
 from collections import defaultdict
 from copy import deepcopy
 from datetime import datetime, timedelta, timezone
+from functools import partial
 from itertools import product
 import json
 import os
 import re
 import sys
 from typing import Literal, Mapping, NotRequired, Required, TypedDict
-from packaging.specifiers import SpecifierSet
 
+import packaging
+from packaging.specifiers import SpecifierSet
 
 from .bitbucket import fetch_bitbucket_info, RepoInfo as BitbucketRepoInfo
 from .generate_registry import Registry, PackageEntry as RegistryEntry
@@ -26,7 +28,10 @@
     normalize_st_build,
     normalize_version_spec,
 )
-from ._utils import next_run, parse_version, resolve_url, update_url, write_json, pl, pick
+from ._utils import (
+    next_run, parse_version, resolve_url, update_url, write_json, pl, pick,
+    VersionInfo
+)
 import traceback
 
 
@@ -494,6 +499,11 @@ async def resolve_tags(
         return [], None
 
     tag_prefix = "" if tag_definition is True else tag_definition
+    version_set = None
+    if version_spec := definition.get("version"):
+        normalized_spec = normalize_version_spec(version_spec)
+        version_set = SpecifierSet(normalized_spec) if normalized_spec else None
+
     resolved_releases: list[Release] = []
     now = datetime.now(timezone.utc)
     cutoff = now - timedelta(weeks=53)
@@ -504,32 +514,50 @@ async def resolve_tags(
     prerelease_found: str | None = None
     found_final = False
     async for tag in info["tags"]:
-        if (
-            tag["name"].startswith(tag_prefix)
-            and (version_string := (
-                tag["name"].removeprefix(tag_prefix)
+        tag_name = tag["name"]
+
+        is_prerelease = False
+        if version_set:
+            # For constrained tags, use packaging.Version against SpecifierSet.
+            tag_match = match_tag_version(tag_name, tag_prefix)
+            if not tag_match:
+                continue
+            version: packaging.version.Version
+            version, version_string = tag_match
+            if not version_set.contains(version, prereleases=True):
+                continue
+            is_final_version = not (version.is_prerelease or version.is_devrelease)
+            is_prerelease = version.is_prerelease
+        else:
+            # Standard tag semantics use our custom, strict semver parser.
+            if not tag_name.startswith(tag_prefix):
+                continue
+            version_string = (
+                tag_name.removeprefix(tag_prefix)
                 if tag_prefix
-                else tag["name"].removeprefix("v")
-            ))
-            and (version := parse_version(version_string))
-        ):
-            tag_date = datetime.strptime(tag["date"], UTC_FORMAT).replace(tzinfo=timezone.utc)
-            if tag_date < cutoff and found_final:
-                break
+                else tag_name.removeprefix("v")
+            )
+            version_: VersionInfo | None
+            version_ = parse_version(version_string)
+            if not version_:
+                continue
+            is_final_version = version_.is_final
+            is_prerelease = version_.is_prerelease
 
-            if tag_date >= cutoff or (
-                version.is_final or
-                (version.is_prerelease and not prerelease_found)
-            ):
-                r_ = deepcopy(definition)
-                r_.pop("tags")
-                r_ |= pick(("url", "date"), tag)
-                r_ |= {"version": version_string}
-                resolved_releases.append(r_)  # type: ignore[arg-type]
-                if version.is_final:
-                    found_final = True
-                elif version.is_prerelease:
-                    prerelease_found = version_string
+        tag_date = datetime.strptime(tag["date"], UTC_FORMAT).replace(tzinfo=timezone.utc)
+        if tag_date < cutoff and found_final:
+            break
+
+        if tag_date >= cutoff or (is_final_version or (is_prerelease and not prerelease_found)):
+            r_ = deepcopy(definition)
+            r_.pop("tags")
+            r_ |= pick(("url", "date"), tag)
+            r_ |= {"version": version_string}
+            resolved_releases.append(r_)  # type: ignore[arg-type]
+            if is_final_version:
+                found_final = True
+            elif is_prerelease:
+                prerelease_found = version_string
 
     if found_final:
         return resolved_releases, None
@@ -714,38 +742,120 @@ def normalize_release_definition(
     repo_url: str,
     details: str | None = None
 ):
+    normalize_ = partial(normalize_release_entry, releases, repo_url, details)
+
     if not releases:
         releases.append({
             "sublime_text": "*",
             "tags": True
         })
 
-    for r in releases[:]:
-        r.setdefault("platforms", ["*"])
-        if isinstance(r["platforms"], str):
-            r["platforms"] = [r["platforms"]]
+    auto_release = maybe_make_auto_open_ended_tags_release(releases)
 
-        r.setdefault("sublime_text", "*")
-        if isinstance(r["sublime_text"], list):
-            if "asset" not in r:
-                err(f"sublime_text as a list is only valid in conjunction with 'asset', {repo_url}")
-                releases.remove(r)
+    for release in releases[:]:
+        normalize_(release)
 
-        if r.keys().isdisjoint({"url", "asset", "branch", "tags"}):
-            r["tags"] = True
+    if auto_release:
+        normalize_(auto_release)
+        releases.append(auto_release)
 
-        if base := r.get("base", details):
-            r["base"] = resolve_url(repo_url, base)
 
-        if "url" in r:
-            r["url"] = update_url(resolve_url(repo_url, r["url"]))
+def normalize_release_entry(
+    releases: list[ReleaseDescription],
+    repo_url: str,
+    details: str | None,
+    release: ReleaseDescription,
+) -> None:
+    release.setdefault("platforms", ["*"])
+    if isinstance(release["platforms"], str):
+        release["platforms"] = [release["platforms"]]
+
+    release.setdefault("sublime_text", "*")
+    if isinstance(release["sublime_text"], list) and "asset" not in release:
+        err(f"sublime_text as a list is only valid in conjunction with 'asset', {repo_url}")
+        releases.remove(release)
+        return
 
-        if "date" in r:
-            try:
-                r["date"] = normalize_datetime_str(r["date"])
-            except ValueError:
-                err(f"date {r['date']} is not formatted correctly, {repo_url}")
-                releases.remove(r)
+    if release.keys().isdisjoint({"url", "asset", "branch", "tags"}):
+        release["tags"] = True
+
+    if base := release.get("base", details):
+        release["base"] = resolve_url(repo_url, base)
+
+    if "url" in release:
+        release["url"] = update_url(resolve_url(repo_url, release["url"]))
+
+    if "date" in release:
+        try:
+            release["date"] = normalize_datetime_str(release["date"])
+        except ValueError:
+            err(f"date {release['date']} is not formatted correctly, {repo_url}")
+            releases.remove(release)
+
+
+def maybe_make_auto_open_ended_tags_release(
+    releases: list[ReleaseDescription],
+) -> ReleaseDescription | None:
+    max_build = -1
+
+    for release in releases:
+        if not release.get("version"):
+            return None
+
+        if "url" in release or "asset" in release or "branch" in release:
+            return None
+
+        if release.get("tags") is True:
+            return None
+
+        st_max = parse_sublime_text_max(release.get("sublime_text"))
+        if st_max == float("inf"):
+            return None
+
+        max_build = max(max_build, int(st_max))
+
+    if max_build < 0:
+        return None
+
+    return {
+        "sublime_text": f">{max_build}",
+        "tags": True,
+    }
+
+
+def parse_sublime_text_max(selector) -> float:
+    if not isinstance(selector, str):
+        return float("inf")
+
+    s = re.sub(r"\s+", "", selector)
+    if s in ("", "*"):
+        return float("inf")
+
+    range_index = s.find("-")
+    if range_index != -1:
+        right = s[range_index + 1:]
+        n = parse_int_prefix(right)
+        return float(n) if n is not None else float("inf")
+
+    if s.startswith("<="):
+        n = parse_int_prefix(s[2:])
+        return float(n) if n is not None else float("inf")
+
+    if s.startswith("<"):
+        n = parse_int_prefix(s[1:])
+        return float(max(0, n - 1)) if n is not None else float("inf")
+
+    if s.startswith(">=") or s.startswith(">"):
+        return float("inf")
+
+    n = parse_int_prefix(s)
+    return float(n) if n is not None else float("inf")
+
+
+def parse_int_prefix(text: str) -> int | None:
+    if match := re.match(r"^\d+", text):
+        return int(match.group(0))
+    return None
 
 
 def compile_release_asset_pattern(
diff --git a/tests/crawl/test_basic.py b/tests/crawl/test_basic.py
index fad436e4f..ed2b7f074 100644
--- a/tests/crawl/test_basic.py
+++ b/tests/crawl/test_basic.py
@@ -429,6 +429,209 @@ async def test_release_without_asset_or_branch_defaults_to_tags(set_github_info)
     assert release["url"].endswith("/2.0.0")
 
 
+@pytest.mark.asyncio
+async def test_version_constrained_tags_and_auto_open_ended_release(set_now, set_github_info):
+    registry = {
+        "repositories": [
+            "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json"
+        ],
+        "packages": [
+            {
+                "name": "ConstrainedTags",
+                "details": "https://github.com/example/constrained-tags",
+                "releases": [
+                    {
+                        "sublime_text": "<4000",
+                        "version": "<3.0.0"
+                    }
+                ],
+                "source": "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json",
+                "schema_version": "3.0.0"
+            }
+        ]
+    }
+
+    workspace = {"packages": {}}
+
+    github_info = {
+        "metadata": {
+            "id": "R_constrainedtags",
+            "name": "ConstrainedTags",
+            "description": "Fixture package with constrained tags",
+            "homepage": "https://github.com/example/constrained-tags",
+            "author": "example",
+            "readme": "https://raw.githubusercontent.com/example/constrained-tags/main/README.md",
+            "default_branch": "main",
+            "stars": 0,
+            "created_at": "2024-01-01T00:00:00Z"
+        },
+        "tags": [
+            {
+                "name": "3.1.0",
+                "sha": "sha310",
+                "date": "2025-05-10T12:00:00Z",
+                "url": "https://codeload.github.com/example/constrained-tags/zip/3.1.0"
+            },
+            {
+                "name": "2.9.9",
+                "sha": "sha299",
+                "date": "2024-01-10T12:00:00Z",
+                "url": "https://codeload.github.com/example/constrained-tags/zip/2.9.9"
+            }
+        ],
+        "branches": [
+            {
+                "name": "main",
+                "version": "2025.05.10.12.00.00",
+                "sha": "def456",
+                "date": "2025-05-10T12:00:00Z",
+                "url": "https://codeload.github.com/example/constrained-tags/zip/main"
+            }
+        ]
+    }
+
+    set_now("2025-08-13T21:44:16Z")
+    set_github_info(github_info)
+
+    await main_(registry, workspace, None, 100)
+
+    package = workspace["packages"].get("ConstrainedTags")
+    assert package is not None
+
+    by_st = {release["sublime_text"]: release for release in package["releases"]}
+    assert by_st["<4000"]["version"] == "2.9.9"
+    assert by_st[">3999"]["version"] == "3.1.0"
+
+
+@pytest.mark.asyncio
+async def test_unconstrained_tags_keep_legacy_semver_parsing(set_now, set_github_info, capsys):
+    registry = {
+        "repositories": [
+            "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json"
+        ],
+        "packages": [
+            {
+                "name": "LegacySemverTags",
+                "details": "https://github.com/example/legacy-semver-tags",
+                "releases": [
+                    {
+                        "sublime_text": "*",
+                        "tags": True
+                    }
+                ],
+                "source": "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json",
+                "schema_version": "3.0.0"
+            }
+        ]
+    }
+
+    workspace = {"packages": {}}
+
+    github_info = {
+        "metadata": {
+            "id": "R_legacysemvertags",
+            "name": "LegacySemverTags",
+            "description": "Fixture package for legacy semver tags",
+            "homepage": "https://github.com/example/legacy-semver-tags",
+            "author": "example",
+            "readme": "https://raw.githubusercontent.com/example/legacy-semver-tags/main/README.md",
+            "default_branch": "main",
+            "stars": 0,
+            "created_at": "2024-01-01T00:00:00Z"
+        },
+        "tags": [
+            {
+                "name": "1.0rc1",
+                "sha": "abc123",
+                "date": "2024-05-10T12:00:00Z",
+                "url": "https://codeload.github.com/example/legacy-semver-tags/zip/1.0rc1"
+            }
+        ],
+        "branches": [
+            {
+                "name": "main",
+                "version": "2024.05.10.12.00.00",
+                "sha": "def456",
+                "date": "2024-05-10T12:00:00Z",
+                "url": "https://codeload.github.com/example/legacy-semver-tags/zip/main"
+            }
+        ]
+    }
+
+    set_now("2024-05-11T00:00:00Z")
+    set_github_info(github_info)
+
+    await main_(registry, workspace, None, 100)
+
+    err = capsys.readouterr().err
+    assert (
+        "No valid version found for https://github.com/example/legacy-semver-tags.  "
+        "Falling back to tip of main."
+    ) in err
+
+    package = workspace["packages"].get("LegacySemverTags")
+    assert package is not None
+    assert package["releases"][0]["version"] == "2024.05.10.12.00.00"
+
+
+@pytest.mark.asyncio
+async def test_constrained_tags_use_packaging_version_parsing(set_now, set_github_info):
+    registry = {
+        "repositories": [
+            "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json"
+        ],
+        "packages": [
+            {
+                "name": "ConstrainedPep440Tags",
+                "details": "https://github.com/example/constrained-pep440-tags",
+                "releases": [
+                    {
+                        "sublime_text": "*",
+                        "tags": True,
+                        "version": ">=1.0rc1"
+                    }
+                ],
+                "source": "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json",
+                "schema_version": "3.0.0"
+            }
+        ]
+    }
+
+    workspace = {"packages": {}}
+
+    github_info = {
+        "metadata": {
+            "id": "R_constrainedpep440tags",
+            "name": "ConstrainedPep440Tags",
+            "description": "Fixture package for constrained pep440 tags",
+            "homepage": "https://github.com/example/constrained-pep440-tags",
+            "author": "example",
+            "readme": "https://raw.githubusercontent.com/example/constrained-pep440-tags/main/README.md",
+            "default_branch": "main",
+            "stars": 0,
+            "created_at": "2024-01-01T00:00:00Z"
+        },
+        "tags": [
+            {
+                "name": "1.0rc1",
+                "sha": "abc123",
+                "date": "2024-05-10T12:00:00Z",
+                "url": "https://codeload.github.com/example/constrained-pep440-tags/zip/1.0rc1"
+            }
+        ],
+        "branches": []
+    }
+
+    set_now("2024-05-11T00:00:00Z")
+    set_github_info(github_info)
+
+    await main_(registry, workspace, None, 100)
+
+    package = workspace["packages"].get("ConstrainedPep440Tags")
+    assert package is not None
+    assert package["releases"][0]["version"] == "1.0rc1"
+
+
 @pytest.mark.asyncio
 async def test_prerelease_tag_does_not_use_branch_fallback(set_now, set_github_info, capsys):
     registry = {
diff --git a/tests/crawl/test_normalize_release_definition.py b/tests/crawl/test_normalize_release_definition.py
index c907449f2..b318d5118 100644
--- a/tests/crawl/test_normalize_release_definition.py
+++ b/tests/crawl/test_normalize_release_definition.py
@@ -1,6 +1,6 @@
 import pytest
 
-from scripts.crawl import normalize_release_definition
+from scripts.crawl import normalize_release_definition, parse_sublime_text_max
 
 
 REPO_URL = "https://raw.githubusercontent.com/example/channel/main/repository.json"
@@ -66,6 +66,130 @@ def test_does_not_overwrite_existing_release_source(field: str) -> None:
     assert "tags" not in releases[0] or field == "tags"
 
 
+@pytest.mark.parametrize(
+    ("selector", "expected"),
+    [
+        (None, float("inf")),
+        ("", float("inf")),
+        ("*", float("inf")),
+        ("  *  ", float("inf")),
+        ("3092", 3092),
+        ("3092 - 4000", 4000),
+        ("3092-4000", 4000),
+        ("<3092", 3091),
+        ("<=3092", 3092),
+        (">3092", float("inf")),
+        (">=3092", float("inf")),
+        (" >=  4075 ", float("inf")),
+        (">  4075", float("inf")),
+        ("n/a", float("inf")),
+    ],
+)
+def test_parse_sublime_text_max(selector, expected: float) -> None:
+    assert parse_sublime_text_max(selector) == expected
+
+
+def test_adds_open_ended_tags_release_for_version_constrained_tags() -> None:
+    releases = [{
+        "sublime_text": "<4000",
+        "version": "<3.0.0",
+    }]
+
+    normalize_release_definition(
+        releases,
+        REPO_URL,
+        "https://github.com/example/constrained-tags",
+    )
+
+    assert len(releases) == 2
+    first, second = releases
+    assert first["tags"] is True
+    assert first["sublime_text"] == "<4000"
+    assert second["tags"] is True
+    assert second["sublime_text"] == ">3999"
+
+
+def test_does_not_add_open_ended_when_unconstrained_tags_exists() -> None:
+    releases = [
+        {"sublime_text": "3000 - 4000", "version": "<3.0.0"},
+        {"sublime_text": ">4000", "tags": True},
+    ]
+
+    normalize_release_definition(
+        releases,
+        REPO_URL,
+        "https://github.com/example/constrained-tags",
+    )
+
+    assert len(releases) == 2
+
+
+def test_does_not_add_open_ended_without_any_version_key() -> None:
+    releases = [{"sublime_text": "3000 - 4000"}]
+
+    normalize_release_definition(
+        releases,
+        REPO_URL,
+        "https://github.com/example/constrained-tags",
+    )
+
+    assert len(releases) == 1
+    assert releases[0]["sublime_text"] == "3000 - 4000"
+    assert releases[0]["tags"] is True
+
+
+def test_does_not_add_open_ended_for_empty_version() -> None:
+    releases = [{"sublime_text": "3000 - 4000", "version": ""}]
+
+    normalize_release_definition(
+        releases,
+        REPO_URL,
+        "https://github.com/example/constrained-tags",
+    )
+
+    assert len(releases) == 1
+    assert releases[0]["sublime_text"] == "3000 - 4000"
+
+
+def test_does_not_add_open_ended_when_any_release_lacks_version() -> None:
+    releases = [
+        {"sublime_text": "3000 - 4000", "version": "<3.0.0"},
+        {"sublime_text": "4001 - 4200"},
+    ]
+
+    normalize_release_definition(
+        releases,
+        REPO_URL,
+        "https://github.com/example/constrained-tags",
+    )
+
+    assert len(releases) == 2
+
+
+def test_does_not_add_open_ended_when_branch_or_asset_exists() -> None:
+    releases = [
+        {"sublime_text": "3000 - 4000", "version": "<3.0.0"},
+        {"sublime_text": "*", "branch": True},
+    ]
+
+    normalize_release_definition(releases, REPO_URL, "https://github.com/example/pkg")
+
+    assert len(releases) == 2
+
+
+def test_does_not_add_open_ended_for_static_releases() -> None:
+    releases = [{
+        "sublime_text": "*",
+        "version": "1.2.3",
+        "url": "https://example.com/pkg.zip",
+        "date": "2024-05-10T12:00:00Z",
+    }]
+
+    normalize_release_definition(releases, REPO_URL, "https://github.com/example/pkg")
+
+    assert len(releases) == 1
+
+
 def test_normalizes_platforms_string_to_list() -> None:
     releases = [{"platforms": "linux", "tags": True}]
 

From 43f467514cb46da66405a356d0e9ad019950824d Mon Sep 17 00:00:00 2001
From: herr kaste <herr.kaste@gmail.com>
Date: Fri, 27 Feb 2026 14:19:07 +0100
Subject: [PATCH 09/13] Normalize dynamic version specs early

Move normalize_version_spec into normalize_release_entry for dynamic
release definitions, so constraints are canonicalized once during the
normalize phase.

This allows to treat "*" as non-constraining ("") as per docs.
---
 scripts/crawl.py                                 |  9 +++++----
 tests/crawl/test_normalize_release_definition.py | 16 ++++++++++++++++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/scripts/crawl.py b/scripts/crawl.py
index b3453c05d..7aa6c2770 100644
--- a/scripts/crawl.py
+++ b/scripts/crawl.py
@@ -501,8 +501,7 @@ async def resolve_tags(
     tag_prefix = "" if tag_definition is True else tag_definition
     version_set = None
     if version_spec := definition.get("version"):
-        normalized_spec = normalize_version_spec(version_spec)
-        version_set = SpecifierSet(normalized_spec) if normalized_spec else None
+        version_set = SpecifierSet(version_spec)
 
     resolved_releases: list[Release] = []
     now = datetime.now(timezone.utc)
@@ -621,8 +620,7 @@ async def resolve_assets(
 
     spec_set = None
     if version_spec := definition.get("version"):
-        normalized_spec = normalize_version_spec(version_spec)
-        spec_set = SpecifierSet(normalized_spec) if normalized_spec else None
+        spec_set = SpecifierSet(version_spec)
 
     resolved_releases: list[Release] = []
     async for release in info["releases"]:  # type: ignore[typeddict-item]
@@ -779,6 +777,9 @@ def normalize_release_entry(
     if release.keys().isdisjoint({"url", "asset", "branch", "tags"}):
         release["tags"] = True
 
+    if "url" not in release and (version_spec := release.get("version")):
+        release["version"] = normalize_version_spec(version_spec)
+
     if base := release.get("base", details):
         release["base"] = resolve_url(repo_url, base)
 
diff --git a/tests/crawl/test_normalize_release_definition.py b/tests/crawl/test_normalize_release_definition.py
index b318d5118..d1f623c4f 100644
--- a/tests/crawl/test_normalize_release_definition.py
+++ b/tests/crawl/test_normalize_release_definition.py
@@ -51,6 +51,22 @@ def test_does_not_autofill_tags_for_static_url_release() -> None:
 
     assert "tags" not in releases[0]
     assert releases[0]["url"] == "https://example.com/pkg.zip"
+    assert releases[0]["version"] == "1.2.3"
+
+
+def test_normalizes_version_constraint_for_dynamic_release() -> None:
+    releases = [{
+        "sublime_text": "<4000",
+        "version": "2.5.*",
+    }]
+
+    normalize_release_definition(
+        releases,
+        REPO_URL,
+        "https://github.com/example/version-spec",
+    )
+
+    assert releases[0]["version"] == "==2.5.*"
 
 
 @pytest.mark.parametrize("field", ["asset", "branch", "tags"])

From 4fafc1afe04743a537c98c340bf693475e80489d Mon Sep 17 00:00:00 2001
From: herr kaste <herr.kaste@gmail.com>
Date: Fri, 27 Feb 2026 14:22:26 +0100
Subject: [PATCH 10/13] Treat wildcard version as unconstrained for auto
 open-ended synthesis

When deciding whether to synthesize an automatic open-ended tags release,
handle version="*" the same as an empty/missing version constraint.

This avoids generating a synthetic >max release from definitions that are
not actually version-constrained before normalization runs.
---
 scripts/crawl.py                                 |  2 +-
 tests/crawl/test_normalize_release_definition.py | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/scripts/crawl.py b/scripts/crawl.py
index 7aa6c2770..f9da2da04 100644
--- a/scripts/crawl.py
+++ b/scripts/crawl.py
@@ -800,7 +800,7 @@ def maybe_make_auto_open_ended_tags_release(
     max_build = -1
 
     for release in releases:
-        if not release.get("version"):
+        if release.get("version", "") in ("*", ""):
             return None
 
         if "url" in release or "asset" in release or "branch" in release:
diff --git a/tests/crawl/test_normalize_release_definition.py b/tests/crawl/test_normalize_release_definition.py
index d1f623c4f..74689f2f7 100644
--- a/tests/crawl/test_normalize_release_definition.py
+++ b/tests/crawl/test_normalize_release_definition.py
@@ -167,6 +167,19 @@ def test_does_not_add_open_ended_for_empty_version() -> None:
     assert releases[0]["sublime_text"] == "3000 - 4000"
 
 
+def test_does_not_add_open_ended_for_star_version() -> None:
+    releases = [{"sublime_text": "3000 - 4000", "version": "*"}]
+
+    normalize_release_definition(
+        releases,
+        REPO_URL,
+        "https://github.com/example/constrained-tags",
+    )
+
+    assert len(releases) == 1
+    assert releases[0]["sublime_text"] == "3000 - 4000"
+
+
 def test_does_not_add_open_ended_when_any_release_lacks_version() -> None:
     releases = [
         {"sublime_text": "3000 - 4000", "version": "<3.0.0"},

From f231a273907e9e24f5c657a0f5fc62d949becfc9 Mon Sep 17 00:00:00 2001
From: herr kaste <herr.kaste@gmail.com>
Date: Fri, 27 Feb 2026 14:37:47 +0100
Subject: [PATCH 11/13] Add pack-spec.md for package release definition
 behavior

---
 pack-spec.md | 311 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 311 insertions(+)
 create mode 100644 pack-spec.md

diff --git a/pack-spec.md b/pack-spec.md
new file mode 100644
index 000000000..1c3ab26c4
--- /dev/null
+++ b/pack-spec.md
@@ -0,0 +1,311 @@
+# repository.json spec
+
+This document describes the user-facing `repository.json` format used to define
+packages and their release sources.
+
+It intentionally documents what this crawler supports today.
+`example-repository.json` from Package Control is good inspiration, but broader.
+
+## Top-level structure
+
+`repository.json` is a JSON object with these fields:
+
+```json
+{
+  "$schema": "sublime://packagecontrol.io/schemas/repository",
+  "schema_version": "4.0.0",
+  "packages": [ ... ],
+  "libraries": []
+}
+```
+
+Only `packages` is relevant for this document.
+
+## Package metadata
+
+Each package entry contains metadata and a list of release definitions:
+
+```json
+{
+  "name": "GitSavvy",
+  "details": "https://github.com/timbrel/GitSavvy",
+  "labels": ["vcs"],
+  "releases": [ ... ]
+}
+```
+
+Common fields:
+- `name` (string, usually required): Package name shown in Package Control.
+  If omitted, name is derived from `details` where possible.
+- `details` (string, optional but common): Repository URL used to fetch metadata.
+- `description`, `author`, `homepage`, `readme`, `issues`, `donate`, `buy`
+  (optional): Explicit metadata overrides.
+- `labels` (list, optional): Search/category labels.
+- `previous_names` (list, optional): Old package names for rename migration.
+- `releases` (list, optional): One or more release definitions.
+
+If `releases` is missing, a synthetic default release is created:
+
+```json
+{
+  "sublime_text": "*",
+  "tags": true
+}
+```
+
+A minimal definition the crawler understands is:
+
+```json
+{
+  "details": "https://github.com/timbrel/GitSavvy",
+}
+```
+
+However, adding a name really helps in readability.
+
+## Release definitions
+
+For packages, there are three dynamic release modes:
+
+1. `tags` (normal/default)
+2. `branch`
+3. `asset`
+
+There is also static/manual form (`url` + `version` + `date`) which is treated
+as already fulfilled and passed through.
+
+---
+
+### 1) Tags mode (default)
+
+Minimal explicit tags release:
+
+```json
+{
+  "sublime_text": "*",
+  "tags": true
+}
+```
+
+If a release has none of `url`, `asset`, `branch`, `tags`, we autofill
+`"tags": true`.
+
+`tags` values:
+- `true`: all tags are considered, a possible prefix "v" is stripped automatically
+- string prefix: only tags starting with that prefix are considered
+
+Examples:
+
+```json
+{ "sublime_text": "<4000", "tags": "st3-" }
+```
+
+```json
+{ "sublime_text": ">=4000", "tags": true }
+```
+
+#### Tag parsing behavior
+
+Without a `version` constraint, tags use strict semver parsing
+(`major.minor.patch`, optional `-prerelease`, optional `+build`).
+
+With a `version` constraint, parsing uses PEP 440 (`packaging.Version`), which
+allows versions like `1.0rc1`.  See below for examples.
+
+Example:
+
+```json
+[
+  {
+    "sublime_text": "<4000",
+    "version": "2.5.*"
+  }
+]
+```
+
+#### Rolling window behavior
+
+Tags mode keeps all matching tags from the recent rolling window
+(about 53 weeks).
+If that window does not contain a final release, we still keep a leading
+prerelease when present.
+
+If no valid tag can be resolved, tags mode reports an error and can fall back
+to branch mode (see below).
+
+---
+
+### 2) Branch mode
+
+Branch-based release definitions:
+
+```json
+{
+  "sublime_text": "*",
+  "branch": true
+}
+```
+
+```json
+{
+  "sublime_text": "*",
+  "branch": "dev"
+}
+```
+
+`branch` values:
+- `true`: use repository default branch (`default_branch`, fallback `master`)
+- string: use that branch name
+
+Resolved branch releases get their version synthesized from branch date,
+e.g. `2024-05-10T12:00:00Z` -> `2024.05.10.12.00.00`.
+
+#### Tags -> branch fallback
+
+If a release definition is tag-based and no valid tag is found,
+the crawler tries branch resolution for the same definition.
+
+- If `branch` is set, that branch is used for fallback.
+- If `branch` is not set, fallback goes to default branch (`branch: true`).
+
+This fallback is per release definition.
+
+---
+
+### 3) Asset mode
+
+Asset mode resolves downloadable artifacts from hosted release assets
+(currently GitHub release assets in practice).
+
+Minimal asset release:
+
+```json
+{
+  "asset": "A File Icon.sublime-package"
+}
+```
+
+Commonly with wildcards:
+
+```json
+{
+  "asset": "*.sublime-package"
+}
+```
+
+Asset patterns are glob-like strings:
+- `*` matches any sequence
+- `?` matches a single character
+
+Supported placeholders in package asset patterns:
+- `${version}`: resolved tag version
+- `${st_build}`: normalized `sublime_text` build marker
+- `${platform}`: platform token (`*` becomes `any`)
+
+Example with placeholders:
+
+```json
+{
+  "asset": "Less-${version}-st${st_build}.sublime-package",
+  "sublime_text": ["4107 - 4148", ">=4149"]
+}
+```
+
+Asset mode resolves a target matrix of `platforms x sublime_text` and picks the
+first matching asset per target (newest releases first).
+Different targets may resolve to different versions if needed.
+
+If targets remain unresolved, the crawler logs which `(platform, st_build)`
+combinations are missing.
+
+> Note: asset mode does **not** fall back to branch mode when assets are missing.
+
+---
+
+### Static/manual releases
+
+A release containing `url`, `version`, and `date` is considered fulfilled:
+
+```json
+{
+  "sublime_text": "*",
+  "platforms": ["*"],
+  "version": "1.2.3",
+  "url": "https://example.com/my-package.zip",
+  "date": "2024-05-10T12:00:00Z"
+}
+```
+
+Accepted `date` input formats are normalized to UTC form:
+- `YYYY-MM-DDTHH:MM:SSZ`
+- `YYYY-MM-DD HH:MM:SS`
+- `YYYY-MM-DD HH:MM`
+- `YYYY-MM-DD`
+
+## Constraints and defaults
+
+Common release fields:
+- `sublime_text` (string, optional): default `"*"`.
+- `platforms` (string or list, optional): default `["*"]`.
+- `base` (string, optional): release source URL; defaults to package `details`.
+- `tags` (bool or string, optional): tag mode and optional prefix.
+- `branch` (bool or string, optional): branch mode.
+- `asset` (string, optional): asset mode pattern.
+- `version` (string, optional): version filter for dynamic modes.
+- `url`, `date` (string, static mode): explicit resolved release.
+
+`version` normalization:
+- `"*"` or empty -> no constraint
+- bare versions/prefixes are normalized with `==`
+  - `"2.5.*" -> "==2.5.*"`
+  - `"1.2.3" -> "==1.2.3"`
+- full specifiers are kept, e.g. `">=2,<3"`
+
+Supported `sublime_text` selectors include:
+- `"*"`
+- exact build (`"4147"`)
+- comparisons (`">4147"`, `">=4147"`, `"<4147"`, `"<=4147"`)
+- inclusive ranges (`"3154 - 4069"`)
+
+`sublime_text` as a list is only valid for `asset` releases.
+
+Common `platforms` values seen in package definitions:
+- `*`, `windows`, `osx`, `linux`
+- and architecture-specific forms like
+  `windows-x64`, `windows-x32`, `osx-x64`, `osx-arm64`, `linux-x64`, `linux-arm64`
+
+## Automatic open-ended tags release (for constrained tags)
+
+When all release definitions are version-constrained tag releases with bounded
+`sublime_text` ranges, the crawler may append a synthetic open-ended tags
+release.
+
+Example input:
+
+```json
+[
+  {
+    "sublime_text": "<4000",
+    "version": "<3.0.0"
+  }
+]
+```
+
+becomes effectively:
+
+```json
+[
+  {
+    "sublime_text": "<4000",
+    "version": "<3.0.0",
+    "tags": true
+  },
+  {
+    "sublime_text": ">3999",
+    "tags": true
+  }
+]
+```
+
+This avoids leaving newer Sublime builds without a release definition
+and effectively keeps the releases section short.

From a4f966143ccb75f8dd042be645379faa4405fb13 Mon Sep 17 00:00:00 2001
From: herr kaste <herr.kaste@gmail.com>
Date: Fri, 27 Feb 2026 16:21:06 +0100
Subject: [PATCH 12/13] Add `crawl --explain` sub-command

---
 README.md                    |   2 +
 pack-spec.md                 |   3 +
 scripts/_explain_package.py  | 135 +++++++++++++++++++++++++++++++++++
 scripts/crawl.py             |  80 +++++++++++++++++----
 tests/crawl/test_cli_args.py |  12 +++-
 5 files changed, 218 insertions(+), 14 deletions(-)
 create mode 100644 scripts/_explain_package.py

diff --git a/README.md b/README.md
index 36223e0de..4213fd3df 100644
--- a/README.md
+++ b/README.md
@@ -77,6 +77,7 @@ stores it in a workspace file (`workspace.json`).
 Supports crawling all packages, or a single package via the `--name` option.
 Use `--presto` (or set `PRESTO_PRESTO_CRAWL=1`) to bypass `next_crawl` scheduling
 and fast-forward the workspace by crawling up to `--limit` packages.
+Use `--explain` to show the normalized package entry for a package
 
 - Integrates with GitHub, GitLab, and Bitbucket APIs to fetch detailed info and releases.
 - Requires a valid `GITHUB_TOKEN` in your environment for GitHub API access because GitHub's GraphQl
@@ -88,6 +89,7 @@ and fast-forward the workspace by crawling up to `--limit` packages.
 ```bash
 $ GITHUB_TOKEN=ghp_yourgithubtokenhere uv run -m scripts.crawl
 $ uv run -m scripts.crawl --name GitSavvy
+$ uv run -m scripts.crawl --explain GitSavvy
 ```
 
 ---
diff --git a/pack-spec.md b/pack-spec.md
index 1c3ab26c4..fe61210c1 100644
--- a/pack-spec.md
+++ b/pack-spec.md
@@ -63,6 +63,9 @@ A minimal definition the crawler understands is:
 
 However, adding a name really helps in readability.
 
+Tip: Use `$ uv run -m scripts.crawl --explain GitSavvy` to show the
+     normalized/expanded package definition.
+
 ## Release definitions
 
 For packages, there are three dynamic release modes:
diff --git a/scripts/_explain_package.py b/scripts/_explain_package.py
new file mode 100644
index 000000000..51b7cfdd0
--- /dev/null
+++ b/scripts/_explain_package.py
@@ -0,0 +1,135 @@
+from __future__ import annotations
+
+from difflib import SequenceMatcher
+import json
+from typing import Any
+
+from rich import box
+from rich.console import Console
+from rich.table import Table
+from rich.text import Text
+
+
+ADDED_STYLE = "yellow on black"
+REMOVED_STYLE = "red on black"
+
+
+def print_package_explain(
+    name: str,
+    original: dict[str, Any],
+    normalized: dict[str, Any],
+    console: Console | None = None,
+) -> None:
+    console = console or Console()
+    console.print()
+    console.rule("Left: registry entry | Right: normalized entry")
+
+    _render_json_diff_table(
+        title="",
+        left_obj=original,
+        right_obj=normalized,
+        console=console,
+    )
+
+
+def _render_json_diff_table(
+    title: str,
+    left_obj: dict[str, Any] | list[Any],
+    right_obj: dict[str, Any] | list[Any],
+    console: Console,
+) -> None:
+    table = Table(
+        title=title,
+        box=box.SIMPLE_HEAD,
+        expand=True,
+        show_header=False,
+        show_lines=False,
+    )
+
+    for left_line, right_line in _side_by_side_json_diff_rows(left_obj, right_obj):
+        table.add_row(left_line, right_line)
+
+    console.print(table)
+
+
+def _side_by_side_json_diff_rows(
+    left_obj: dict[str, Any] | list[Any],
+    right_obj: dict[str, Any] | list[Any],
+) -> list[tuple[Text, Text]]:
+    left_line: str | None
+    right_line: str | None
+    left_lines = _to_json_lines(left_obj)
+    right_lines = _to_json_lines(right_obj)
+
+    rows: list[tuple[Text, Text]] = []
+    matcher = SequenceMatcher(a=left_lines, b=right_lines)
+
+    left_no = 1
+    right_no = 1
+    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+        if tag == "equal":
+            for left_line, right_line in zip(left_lines[i1:i2], right_lines[j1:j2], strict=True):
+                rows.append((
+                    _make_line(left_no, "  ", left_line),
+                    _make_line(right_no, "  ", right_line),
+                ))
+                left_no += 1
+                right_no += 1
+            continue
+
+        if tag == "replace":
+            left_chunk = left_lines[i1:i2]
+            right_chunk = right_lines[j1:j2]
+            for idx in range(max(len(left_chunk), len(right_chunk))):
+                left_line = left_chunk[idx] if idx < len(left_chunk) else None
+                right_line = right_chunk[idx] if idx < len(right_chunk) else None
+                left_text = (
+                    _make_line(left_no, "- ", left_line, REMOVED_STYLE)
+                    if left_line is not None
+                    else Text("")
+                )
+                right_text = (
+                    _make_line(right_no, "+ ", right_line, ADDED_STYLE)
+                    if right_line is not None
+                    else Text("")
+                )
+                rows.append((left_text, right_text))
+                if left_line is not None:
+                    left_no += 1
+                if right_line is not None:
+                    right_no += 1
+            continue
+
+        if tag == "delete":
+            for left_line in left_lines[i1:i2]:
+                rows.append((
+                    _make_line(left_no, "- ", left_line, REMOVED_STYLE),
+                    Text(""),
+                ))
+                left_no += 1
+            continue
+
+        if tag == "insert":
+            for right_line in right_lines[j1:j2]:
+                rows.append((
+                    Text(""),
+                    _make_line(right_no, "+ ", right_line, ADDED_STYLE),
+                ))
+                right_no += 1
+
+    if not rows:
+        rows.append((Text("(empty)"), Text("(empty)")))
+    return rows
+
+
+def _make_line(number: int, marker: str, content: str, style: str = "") -> Text:
+    line = Text()
+    line.append(f"{number:>4} ", style="dim")
+    line.append(marker, style=style)
+    line.append(content, style=style)
+    return line
+
+
+def _to_json_lines(obj: dict[str, Any] | list[Any]) -> list[str]:
+    dumped = json.dumps(obj, indent=2, ensure_ascii=False, sort_keys=True)
+    return dumped.splitlines()
diff --git a/scripts/crawl.py b/scripts/crawl.py
index f9da2da04..549c72beb 100644
--- a/scripts/crawl.py
+++ b/scripts/crawl.py
@@ -32,6 +32,7 @@
     next_run, parse_version, resolve_url, update_url, write_json, pl, pick,
     VersionInfo
 )
+from ._explain_package import print_package_explain
 import traceback
 
 
@@ -110,6 +111,28 @@ def err(*args, **kwargs) -> None:
     print(*args, **kwargs, file=sys.stderr)
 
 
+def explain_main(registry: str, name: str) -> int:
+    if not os.path.exists(registry):
+        err(f"FATAL: Registry file '{registry}' does not exist.")
+        return 1
+
+    try:
+        with open(registry, "r") as reg_file:
+            registry_data = json.load(reg_file)
+    except Exception as e:
+        err(f"FATAL: Could not read registry file '{registry}': {e}")
+        return 1
+
+    package = find_registry_package(registry_data, name)
+    if not package:
+        err(f"Package '{name}' not found in registry.")
+        return 1
+
+    normalized = normalize_registry_entry(deepcopy(package))
+    print_package_explain(name, package, normalized)  # type: ignore[arg-type]
+    return 0
+
+
 async def main(
     registry: str,
     workspace: str,
@@ -148,13 +171,11 @@ async def main_(
 ) -> None:
     name_requested = bool(name)
     if name:
-        for entry in registry["packages"]:
-            if entry.get("name") == name:
-                tocrawl = [entry]
-                break
-        else:
+        package = find_registry_package(registry, name)
+        if not package:
             err(f"Package '{name}' not found in registry.")
             return
+        tocrawl = [package]
     else:
         maintenance(registry, workspace)
         tocrawl = next_packages_to_crawl(registry, workspace, limit=limit, presto=presto)
@@ -371,16 +392,10 @@ async def crawl_package(
     maybe_skip_crawling(entry, existing, now)
     ensure_secure_source(entry, existing)
 
-    out: WorkspaceEntry = {**entry}  # type: ignore[typeddict-item]
-    if "readme" in out:
-        out["readme"] = update_url(  # type: ignore[typeddict-unknown-key]
-            resolve_url(out["source"], out["readme"])  # type: ignore[typeddict-item]
-        )
+    out = normalize_registry_entry(entry)
     details = out.get("details")
     release_definitions: list[ReleaseDescription] = \
         out.get("releases", [])  # type: ignore[assignment]
-    migrate_release_definitions_from_v2(release_definitions)
-    normalize_release_definition(release_definitions, out["source"], details)
 
     releases: list[Release] = []
 
@@ -490,6 +505,28 @@ def extend(new_releases: list[Release]):
     return out
 
 
+def find_registry_package(registry: Registry, name: str) -> RegistryEntry | None:
+    for entry in registry.get("packages", []):
+        if entry.get("name") == name:
+            return entry
+    return None
+
+
+def normalize_registry_entry(entry: RegistryEntry) -> WorkspaceEntry:
+    out: WorkspaceEntry = {**entry}  # type: ignore[typeddict-item]
+    if "readme" in out:
+        out["readme"] = update_url(  # type: ignore[typeddict-unknown-key]
+            resolve_url(out["source"], out["readme"])  # type: ignore[typeddict-item]
+        )
+
+    details = out.get("details")
+    release_definitions: list[ReleaseDescription] = \
+        out.setdefault("releases", [])  # type: ignore[assignment]
+    migrate_release_definitions_from_v2(release_definitions)
+    normalize_release_definition(release_definitions, out["source"], details)
+    return out
+
+
 async def resolve_tags(
     info: HubRepoInfo,
     definition: ReleaseDescription,
@@ -979,6 +1016,15 @@ def parse_args(argv: list[str] | None = None):
         help=(
             "Optional name of a package to crawl. "
             "If not provided, all packages will be crawled."))
+    parser.add_argument(
+        "--explain",
+        type=str,
+        default=None,
+        help=(
+            "Show the normalized package entry for the named package and "
+            "exit without writing the workspace."
+        ),
+    )
     parser.add_argument(
         "--limit", "-n",
         type=int,
@@ -1002,7 +1048,11 @@ def parse_args(argv: list[str] | None = None):
     normalized_argv = normalize_limit_argv(sys.argv[1:] if argv is None else argv)
     if count_limit_occurrences(normalized_argv) > 1:
         parser.error("--limit/-n can only be specified once")
-    return parser.parse_args(normalized_argv)
+
+    args = parser.parse_args(normalized_argv)
+    if args.name and args.explain:
+        parser.error("Use either --name or --explain, not both")
+    return args
 
 
 def normalize_limit_argv(argv: list[str]) -> list[str]:
@@ -1038,4 +1088,8 @@ def env_flag(name: str, default: bool = False) -> bool:
     os.makedirs(wd, exist_ok=True)
     args.registry = os.path.normpath(os.path.join(wd, args.registry))
     args.workspace = os.path.normpath(os.path.join(wd, args.workspace))
+
+    if args.explain:
+        raise SystemExit(explain_main(args.registry, args.explain))
+
     asyncio.run(main(args.registry, args.workspace, args.name, args.limit, args.presto))
diff --git a/tests/crawl/test_cli_args.py b/tests/crawl/test_cli_args.py
index be3b92651..d284fc597 100644
--- a/tests/crawl/test_cli_args.py
+++ b/tests/crawl/test_cli_args.py
@@ -30,7 +30,17 @@ def test_parse_args_rejects_duplicate_limit_flags() -> None:
         parse_args(["-n", "100", "-n", "75"])
 
 
-
 def test_parse_args_rejects_shorthand_plus_limit_flag() -> None:
     with pytest.raises(SystemExit):
         parse_args(["-100", "--limit", "24"])
+
+
+def test_parse_args_accepts_explain_mode() -> None:
+    args = parse_args(["--explain", "Example"])
+
+    assert args.explain == "Example"
+
+
+def test_parse_args_rejects_name_and_explain_together() -> None:
+    with pytest.raises(SystemExit):
+        parse_args(["--name", "Foo", "--explain", "Foo"])

From ec408247cb018ab76679301e3d6ac36a6afdb504 Mon Sep 17 00:00:00 2001
From: herr kaste <herr.kaste@gmail.com>
Date: Fri, 27 Feb 2026 22:49:52 +0100
Subject: [PATCH 13/13] Enhance `crawl_library --explain`

---
 scripts/_explain_package.py        | 60 +++++++++++++++++++++++++++++-
 scripts/_resolve_lib.py            | 14 ++++---
 scripts/crawl_libraries.py         |  7 +++-
 tests/library_crawler/test_main.py | 56 ++++++++++++++++++++++++++++
 4 files changed, 128 insertions(+), 9 deletions(-)

diff --git a/scripts/_explain_package.py b/scripts/_explain_package.py
index 51b7cfdd0..69fdc26f5 100644
--- a/scripts/_explain_package.py
+++ b/scripts/_explain_package.py
@@ -32,6 +32,59 @@ def print_package_explain(
     )
 
 
+def print_library_explain(
+    name: str,
+    rows: list[tuple[dict[str, Any], list[dict[str, Any]]]],
+    metadata: dict[str, Any] | None = None,
+    console: Console | None = None,
+) -> None:
+    console = console or Console()
+
+    if metadata is not None:
+        console.print(_to_pretty_json(metadata))
+        console.print()
+
+    console.rule(f"{name}: input release definitions and normalized variations")
+
+    table = Table(
+        box=box.SIMPLE_HEAD,
+        expand=True,
+        show_header=True,
+        show_lines=False,
+    )
+    table.add_column("#", style="yellow", no_wrap=True)
+    table.add_column("Input definition", ratio=1, overflow="fold")
+    table.add_column("Normalized variation", ratio=1, overflow="fold")
+
+    if not rows:
+        table.add_row("-", "(empty)", "(empty)")
+    else:
+        for release_no, (left, right_variations) in enumerate(rows, start=1):
+            if release_no > 1:
+                table.add_row("", "", "")
+
+            if not right_variations:
+                table.add_row(str(release_no), _to_pretty_json(left), "(empty)")
+                continue
+
+            if len(right_variations) == 1:
+                table.add_row(
+                    str(release_no),
+                    _to_pretty_json(left),
+                    _to_pretty_json(right_variations[0]),
+                )
+                continue
+
+            for variation_no, right in enumerate(right_variations, start=1):
+                table.add_row(
+                    f"{release_no}-{variation_no}",
+                    _to_pretty_json(left) if variation_no == 1 else "",
+                    _to_pretty_json(right),
+                )
+
+    console.print(table)
+
+
 def _render_json_diff_table(
     title: str,
     left_obj: dict[str, Any] | list[Any],
@@ -130,6 +183,9 @@ def _make_line(number: int, marker: str, content: str, style: str = "") -> Text:
     return line
 
 
+def _to_pretty_json(obj: dict[str, Any] | list[Any]) -> str:
+    return json.dumps(obj, indent=2, ensure_ascii=False, sort_keys=True)
+
+
 def _to_json_lines(obj: dict[str, Any] | list[Any]) -> list[str]:
-    dumped = json.dumps(obj, indent=2, ensure_ascii=False, sort_keys=True)
-    return dumped.splitlines()
+    return _to_pretty_json(obj).splitlines()
diff --git a/scripts/_resolve_lib.py b/scripts/_resolve_lib.py
index 22298605a..3b8749b33 100644
--- a/scripts/_resolve_lib.py
+++ b/scripts/_resolve_lib.py
@@ -384,14 +384,17 @@ def normalize_st_build(st_specifier: str) -> str:
     return st_specifier[2:]
 
 
-def explain_library(library: RegistryEntry) -> list[dict]:
-    releases = list(map(normalize_release_def, library.get("releases", [])))
-    output: list[dict] = []
-    for release in releases:
+def explain_library(library: RegistryEntry) -> list[tuple[dict, list[dict]]]:
+    raw_libraries = library.get("releases", [])
+    normalized = list(map(normalize_release_def, library.get("releases", [])))
+    output: list[tuple[dict, list[dict]]] = []
+    for left, release in zip(raw_libraries, normalized):
         if "url" in release:
+            output.append((left, [release]))  # type: ignore[arg-type, list-item]
             continue
         base = release.get("base")
         auto_assets = "pypi.org/project/" in base
+        right = []
         for concrete in spell_out_constraint_variations(release, auto_assets=auto_assets):
             entry: dict[str, object] = {
                 "base": base,
@@ -402,7 +405,8 @@ def explain_library(library: RegistryEntry) -> list[dict]:
                 "version": release["version"] or "*",
                 "tag_prefix": release["tag_prefix"] or "v?"
             }
-            output.append(entry)
+            right.append(entry)
+        output.append((left, right))  # type: ignore[arg-type]
     return output
 
 
diff --git a/scripts/crawl_libraries.py b/scripts/crawl_libraries.py
index f6d690875..ad414121c 100644
--- a/scripts/crawl_libraries.py
+++ b/scripts/crawl_libraries.py
@@ -23,6 +23,7 @@
     resolve_library,
 )
 from ._utils import err, write_json
+from ._explain_package import print_library_explain
 
 
 DEFAULT_REGISTRY = "./registry.json"
@@ -347,8 +348,10 @@ async def handle_explain(name: str, args: Args) -> int:
         raise ValueError(
             f'Library "{name}" not found in {args.registry.name}.'
         )
-    concrete_defs = explain_library(library)
-    print(json.dumps(concrete_defs, indent=2, ensure_ascii=False))
+
+    explain_rows = explain_library(library)
+    metadata = {key: value for key, value in library.items() if key != "releases"}
+    print_library_explain(name, explain_rows, metadata=metadata)
     return 0
 
 
diff --git a/tests/library_crawler/test_main.py b/tests/library_crawler/test_main.py
index 1176b5fd3..de4664763 100644
--- a/tests/library_crawler/test_main.py
+++ b/tests/library_crawler/test_main.py
@@ -511,6 +511,62 @@ async def test_name_and_explain_reject_removed_library(monkeypatch, tmp_path):
         await crawl_libraries.run(args)
 
 
+@pytest.mark.asyncio
+async def test_handle_explain_renders_release_variation_rows(monkeypatch, tmp_path):
+    repo_path = tmp_path / "registry.json"
+    release_defs = [{"base": "https://pypi.org/project/example", "version": "*"}]
+    write_json(repo_path, {"libraries": [{"name": "alpha", "releases": release_defs}]})
+    output_path = tmp_path / "libraries.json"
+    args = make_args(tmp_path, repo_path, output_path, explain="alpha")
+
+    explain_rows = [
+        (
+            release_defs[0],
+            [
+                {
+                    "base": "https://pypi.org/project/example",
+                    "asset": ["example-win-py38-${version}.zip"],
+                    "platform": "windows",
+                    "python_version": "3.8",
+                    "sublime_text": "*",
+                    "version": "*",
+                    "tag_prefix": "v?",
+                },
+                {
+                    "base": "https://pypi.org/project/example",
+                    "asset": ["example-win-py33-${version}.zip"],
+                    "platform": "windows",
+                    "python_version": "3.3",
+                    "sublime_text": "*",
+                    "version": "*",
+                    "tag_prefix": "v?",
+                },
+            ],
+        ),
+    ]
+    captured = {}
+
+    monkeypatch.setattr(crawl_libraries, "explain_library", lambda _: explain_rows)
+
+    def fake_print_library_explain(name, rows, metadata=None):
+        captured["name"] = name
+        captured["rows"] = rows
+        captured["metadata"] = metadata
+
+    monkeypatch.setattr(
+        crawl_libraries,
+        "print_library_explain",
+        fake_print_library_explain,
+    )
+
+    result = await crawl_libraries.run(args)
+
+    assert result == 0
+    assert captured["name"] == "alpha"
+    assert captured["rows"] == explain_rows
+    assert captured["metadata"] == {"name": "alpha"}
+
+
 @pytest.mark.parametrize(
     ("names", "expected"),
     [