From c42a29df7fa73991497b1051c4792872b74bf183 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Mon, 2 Mar 2026 13:09:19 +0100 Subject: [PATCH 01/13] Add snapshot_test workflow with auto shoot/diff support Introduce a new scripts.snapshot_test command that can create reduced registry/channel snapshots and compare them with a line-based diff. The default mode performs the standard workflow: create/overwrite snapshot.yml when no base exists, otherwise write a timestamped snapshot (.yml) and compare it against the base. The implementation captures stdout/stderr from generate_registry, crawl, and generate_channel into a snapshot.log file inside the temp directory. It keeps the temp directory on failure for inspection, and removes it on success. Also add snapshot.toml with a selected package set. --- README.md | 19 ++ scripts/snapshot_test.py | 500 ++++++++++++++++++++++++++++++++++++ snapshot.toml | 15 ++ tests/test_snapshot_test.py | 120 +++++++++ 4 files changed, 654 insertions(+) create mode 100644 scripts/snapshot_test.py create mode 100644 snapshot.toml create mode 100644 tests/test_snapshot_test.py diff --git a/README.md b/README.md index 9a7bf996a..b27633e6d 100644 --- a/README.md +++ b/README.md @@ -167,6 +167,25 @@ The command above reuses the same layout as [CI](https://github.com/packagecontr - You can choose another directory via `--restore-from ` if you want to stage the backup elsewhere. - Try this locally first, then commit and push to actually replace/update/restore the GitHub action cache +--- + +### `snapshot_test.py` + +Creates a compact, single-file snapshot for regression testing (`registry + channel`) from a reduced package set. + +```bash +uv run -m scripts.snapshot_test +uv run -m scripts.snapshot_test --base snapshot.yml --conf snapshot.toml +uv run -m scripts.snapshot_test shoot +uv run -m scripts.snapshot_test diff snapshot-2026-03-02-1210-abcd123 +``` + +- Default mode (no subcommand): + - if base exists, writes a new `snapshot--.yml` and prints a line-based diff vs base + - if base does not exist, writes/creates the base snapshot, using `shoot`. +- `shoot` explicitly creates/overwrites a target snapshot (default: `snapshot.yml`). +- Noise is sent to a temporary folder (`tmp---`), which is removed on success. + ## Tests We use `pytest`. Execute everything via uv so dependencies come from `pyproject.toml`/`uv.lock`: diff --git a/scripts/snapshot_test.py b/scripts/snapshot_test.py new file mode 100644 index 000000000..479955074 --- /dev/null +++ b/scripts/snapshot_test.py @@ -0,0 +1,500 @@ +from __future__ import annotations + +import argparse +from dataclasses import dataclass +from datetime import datetime +import difflib +import json +from pathlib import Path +import shutil +import subprocess +import sys +import tomllib +from typing import TextIO + +from rich.console import Console + + +DEFAULT_BASE = "snapshot.yml" +DEFAULT_CONF = "snapshot.toml" +DEFAULT_LOG = "snapshot.log" + +RED_ON_BLACK = "\x1b[31;40m" +YELLOW_ON_BLACK = "\x1b[33;40m" +RESET = "\x1b[0m" + +CONSOLE = Console(stderr=True) + + +@dataclass +class ShootContext: + now: datetime + commit_hash: str + commit_subject: str + + +def parse_args(argv: list[str] | None = None) -> argparse.Namespace: + raw_argv = list(sys.argv[1:] if argv is None else argv) + + if raw_argv and raw_argv[0] == "auto": + return parse_auto_args(raw_argv[1:]) + + if is_auto_mode_argv(raw_argv): + return parse_auto_args(raw_argv) + + parser = build_main_parser() + normalized_argv = normalize_argv(raw_argv) + return parser.parse_args(normalized_argv) + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(argv) + if args.command == "shoot": + return run_shoot(args) + if args.command == "diff": + return run_diff(args) + return run_auto(args) + + +def run_auto(args: argparse.Namespace) -> int: + ctx = collect_shoot_context() + base_path = Path(args.base) + output_path = resolve_auto_output_path(base_path, args.output, ctx) + + create_snapshot_with_spinner(output_path, Path(args.conf), ctx) + print(f"Created snapshot at {output_path}") + + if base_path.exists() and base_path.resolve() != output_path.resolve(): + print(f"Comparing {base_path} to {output_path}") + print_snapshot_diff(base_path, output_path) + + return 0 + + +def run_shoot(args: argparse.Namespace) -> int: + ctx = collect_shoot_context() + output_path = Path(args.filename) + + create_snapshot_with_spinner(output_path, Path(args.conf), ctx) + print(f"Created snapshot at {output_path}") + return 0 + + +def run_diff(args: argparse.Namespace) -> int: + files = args.files + if len(files) > 2: + raise SystemExit("diff accepts at most two snapshot files") + + if not files: + snapshots = sorted(Path.cwd().glob("snapshot-*")) + if not snapshots: + print("No snapshots found matching 'snapshot-*'.") + return 0 + for path in snapshots: + print(path.name) + return 0 + + if len(files) == 1: + left = Path(args.base) + right = Path(files[0]) + else: + left = Path(files[0]) + right = Path(files[1]) + + print(f"Comparing {left} to {right}") + print_snapshot_diff(left, right) + return 0 + + +def create_snapshot_with_spinner( + output_path: Path, + conf_path: Path, + ctx: ShootContext, +) -> None: + with CONSOLE.status("Creating snapshot", spinner="dots"): + create_snapshot(output_path, conf_path, ctx) + + +def normalize_argv(argv: list[str]) -> list[str]: + if not argv: + return argv + if argv[0] in {"-h", "--help", "shoot", "diff"}: + return argv + return ["shoot", *argv] + + +class SubcommandHelpFormatter(argparse.HelpFormatter): + def _format_action(self, action: argparse.Action) -> str: + if isinstance(action, argparse._SubParsersAction): + return "".join(self._format_action(choice) for choice in action._get_subactions()) + return super()._format_action(action) + + +def build_main_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Create and diff crawler snapshots.", + formatter_class=SubcommandHelpFormatter, + ) + add_auto_mode_arguments(parser) + + subparsers = parser.add_subparsers( + title="subcommands", + dest="command", + required=True, + ) + + shoot = subparsers.add_parser("shoot", help="Create or overwrite a snapshot") + shoot.add_argument( + "filename", + nargs="?", + default=DEFAULT_BASE, + help=f"Output snapshot path (default: {DEFAULT_BASE})", + ) + shoot.add_argument( + "--conf", + default=DEFAULT_CONF, + help=f"Snapshot config file (default: {DEFAULT_CONF})", + ) + + diff = subparsers.add_parser("diff", help="Diff one or two snapshots") + diff.add_argument( + "--base", + default=DEFAULT_BASE, + help=f"Default base snapshot if one file is provided (default: {DEFAULT_BASE})", + ) + diff.add_argument("files", nargs="*", help="Snapshot file(s) to diff") + + return parser + + +def parse_auto_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser( + prog="snapshot_test.py", + description="Create and diff crawler snapshots.", + ) + add_auto_mode_arguments(parser) + args = parser.parse_args(argv) + args.command = "auto" + return args + + +def add_auto_mode_arguments(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "--base", + default=DEFAULT_BASE, + help=f"Base snapshot used for diffing (default: {DEFAULT_BASE})", + ) + parser.add_argument( + "--conf", + default=DEFAULT_CONF, + help=f"Snapshot config file (default: {DEFAULT_CONF})", + ) + parser.add_argument( + "--output", + "-o", + default=None, + help=( + "Output snapshot path. " + "Default: when base exists use snapshot--.yml, " + "otherwise write to --base." + ), + ) + + +def is_auto_mode_argv(argv: list[str]) -> bool: + if not argv: + return True + if argv[0] in {"shoot", "diff", "-h", "--help"}: + return False + return argv[0].startswith("-") + + +def collect_shoot_context() -> ShootContext: + now = datetime.now() + short_hash = run_capture(["git", "rev-parse", "--short", "HEAD"]) + commit_subject = run_capture(["git", "log", "-1", "--pretty=%s"]) + return ShootContext(now=now, commit_hash=short_hash, commit_subject=commit_subject) + + +def resolve_auto_output_path( + base_path: Path, + output_arg: str | None, + ctx: ShootContext, +) -> Path: + if output_arg: + return Path(output_arg) + if not base_path.exists(): + return base_path + + stamp = ctx.now.strftime("%Y-%m-%d-%H%M") + return Path(f"snapshot-{stamp}-{ctx.commit_hash}.yml") + + +def create_snapshot(output_path: Path, conf_path: Path, ctx: ShootContext) -> None: + names = load_snapshot_packages(conf_path) + if not names: + raise ValueError(f"{conf_path} does not contain any snapshot packages.") + + temp_dir = resolve_temp_dir(ctx) + log_path = temp_dir / DEFAULT_LOG + + cleanup = False + try: + with log_path.open("w", encoding="utf-8") as log_file: + write_log(log_file, f"date: {ctx.now.isoformat()}") + write_log(log_file, f"commit: {ctx.commit_hash} {ctx.commit_subject}") + write_log(log_file, f"temp_dir: {temp_dir}") + write_log(log_file, f"output: {output_path}") + + reduced_registry, channel = build_snapshot_payload(temp_dir, names, log_file) + snapshot_text = render_snapshot(ctx, names, reduced_registry, channel) + + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(snapshot_text, encoding="utf-8") + cleanup = True + finally: + if cleanup: + shutil.rmtree(temp_dir, ignore_errors=True) + else: + print(f"Snapshot failed. Kept temp dir for introspection: {temp_dir}", file=sys.stderr) + if log_path.exists(): + print(f"Log: {log_path}", file=sys.stderr) + + +def resolve_temp_dir(ctx: ShootContext) -> Path: + stamp = ctx.now.strftime("%Y-%m-%d-%H%M") + stem = f"tmp--{stamp}-{ctx.commit_hash}" + candidate = Path.cwd() / stem + index = 1 + while candidate.exists(): + candidate = Path.cwd() / f"{stem}-{index}" + index += 1 + candidate.mkdir(parents=True, exist_ok=False) + return candidate + + +def build_snapshot_payload( + temp_dir: Path, + names: list[str], + log_file: TextIO, +) -> tuple[dict, dict]: + full_registry = temp_dir / "registry-full.json" + reduced_registry_path = temp_dir / "registry.json" + workspace_path = temp_dir / "workspace.json" + channel_path = temp_dir / "channel.json" + + run_step([ + sys.executable, + "-m", + "scripts.generate_registry", + "--output", + str(full_registry), + ], log_file) + + write_log(log_file, f"Reducing registry to {len(names)} configured packages") + reduced_registry = write_reduced_registry(full_registry, reduced_registry_path, names) + + run_step([ + sys.executable, + "-m", + "scripts.crawl", + "--registry", + str(reduced_registry_path), + "--workspace", + str(workspace_path), + "--limit", + str(max(len(names), 1)), + ], log_file) + + run_step([ + sys.executable, + "-m", + "scripts.generate_channel", + "--registry", + str(reduced_registry_path), + "--workspace", + str(workspace_path), + "--output", + str(channel_path), + ], log_file) + + channel = read_json(channel_path) + return reduced_registry, channel + + +def write_reduced_registry( + full_registry_path: Path, + reduced_registry_path: Path, + names: list[str], +) -> dict: + full_registry = read_json(full_registry_path) + wanted = set(names) + + selected_packages = [ + package + for package in full_registry.get("packages", []) + if package.get("name") in wanted + ] + + found_names = {package["name"] for package in selected_packages if package.get("name")} + missing = [name for name in names if name not in found_names] + if missing: + joined = ", ".join(missing) + raise ValueError(f"Packages listed in config but missing from generated registry: {joined}") + + selected_sources = { + package["source"] + for package in selected_packages + if package.get("source") + } + repositories = [ + repo + for repo in full_registry.get("repositories", []) + if repo in selected_sources + ] + + reduced_registry = { + "repositories": repositories, + "packages": selected_packages, + "libraries": [], + } + reduced_registry_path.write_text( + json.dumps(reduced_registry, indent=2, ensure_ascii=False) + "\n", + encoding="utf-8", + ) + return reduced_registry + + +def render_snapshot( + ctx: ShootContext, + names: list[str], + reduced_registry: dict, + channel: dict, +) -> str: + package_lines = [f" - {name}" for name in names] + header_lines = [ + f"date: {format_snapshot_date(ctx.now)}", + f"commit: {ctx.commit_hash} {ctx.commit_subject}", + "packages:", + *package_lines, + ] + + registry_json = json.dumps(reduced_registry, indent=2, ensure_ascii=False) + channel_json = json.dumps(channel, indent=2, ensure_ascii=False) + + return "\n".join([ + *header_lines, + "---", + registry_json, + "---", + channel_json, + "", + ]) + + +def format_snapshot_date(now: datetime) -> str: + return f"{now.strftime('%B %Y')}, {ordinal(now.day)} {now.strftime('%H:%M')}" + + +def ordinal(day: int) -> str: + if 10 <= day % 100 <= 20: + suffix = "th" + else: + suffix = {1: "st", 2: "nd", 3: "rd"}.get(day % 10, "th") + return f"{day}{suffix}" + + +def print_snapshot_diff(left: Path, right: Path) -> None: + if not left.exists(): + raise FileNotFoundError(f"Base snapshot not found: {left}") + if not right.exists(): + raise FileNotFoundError(f"Snapshot not found: {right}") + + left_lines = left.read_text(encoding="utf-8").splitlines() + right_lines = right.read_text(encoding="utf-8").splitlines() + + diff_lines = list(difflib.unified_diff( + left_lines, + right_lines, + fromfile=str(left), + tofile=str(right), + lineterm="", + )) + + if not diff_lines: + print("No differences.") + return + + for line in diff_lines: + if line.startswith("--- ") or line.startswith("+++ "): + continue + if line.startswith("-"): + print(f"{RED_ON_BLACK}{line}{RESET}") + elif line.startswith("+"): + print(f"{YELLOW_ON_BLACK}{line}{RESET}") + else: + print(line) + + +def load_snapshot_packages(conf_path: Path) -> list[str]: + if not conf_path.exists(): + raise FileNotFoundError(f"Snapshot config not found: {conf_path}") + + text = conf_path.read_text(encoding="utf-8") + try: + data = tomllib.loads(text) + except tomllib.TOMLDecodeError as exc: + raise ValueError(f"Invalid TOML in {conf_path}: {exc}") from exc + + packages = data.get("snapshot", {}).get("packages", []) + if not isinstance(packages, list): + raise ValueError(f"Invalid config in {conf_path}: snapshot.packages must be a list") + return [str(package) for package in packages if str(package).strip()] + + +def read_json(path: Path) -> dict: + with path.open("r", encoding="utf-8") as handle: + data = json.load(handle) + if not isinstance(data, dict): + raise ValueError(f"Expected JSON object in {path}") + return data + + +def run_capture(command: list[str]) -> str: + completed = subprocess.run(command, check=True, capture_output=True, text=True) + return completed.stdout.strip() + + +def run_step(command: list[str], log_file: TextIO) -> None: + printable = " ".join(command) + write_log(log_file, f"$ {printable}") + + try: + completed = subprocess.run(command, check=True, capture_output=True, text=True) + except subprocess.CalledProcessError as exc: + append_command_output(log_file, exc.stdout, exc.stderr) + write_log(log_file, f"Command failed with exit code {exc.returncode}") + raise + + append_command_output(log_file, completed.stdout, completed.stderr) + + +def append_command_output(log_file: TextIO, stdout: str | None, stderr: str | None) -> None: + if stdout: + log_file.write(stdout) + if not stdout.endswith("\n"): + log_file.write("\n") + if stderr: + log_file.write(stderr) + if not stderr.endswith("\n"): + log_file.write("\n") + log_file.flush() + + +def write_log(log_file: TextIO, message: str) -> None: + log_file.write(f"{message}\n") + log_file.flush() + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/snapshot.toml b/snapshot.toml new file mode 100644 index 000000000..dd3ef743a --- /dev/null +++ b/snapshot.toml @@ -0,0 +1,15 @@ +[snapshot] +packages = [ + "ABNF-sublime-syntax", + "Affixify", + "Bison", + "4GL", + "LSP", + "SublimeLinter", + "GitSavvy", + "Package Control", + "SFTP", + "Theme - Monokai Pro", + "CodeIgniter 3 Snippets", + "AlignComment", +] diff --git a/tests/test_snapshot_test.py b/tests/test_snapshot_test.py new file mode 100644 index 000000000..9af27ab21 --- /dev/null +++ b/tests/test_snapshot_test.py @@ -0,0 +1,120 @@ +from datetime import datetime +from pathlib import Path +import sys + +from scripts.snapshot_test import ( + ShootContext, + load_snapshot_packages, + normalize_argv, + ordinal, + parse_args, + print_snapshot_diff, + resolve_auto_output_path, + run_step, +) + + +def test_normalize_argv_keeps_empty_argv_unchanged() -> None: + assert normalize_argv([]) == [] + + +def test_parse_args_treats_positional_as_shoot_filename() -> None: + args = parse_args(["snapshot-next.yml"]) + + assert args.command == "shoot" + assert args.filename == "snapshot-next.yml" + + +def test_parse_args_treats_options_without_command_as_auto() -> None: + args = parse_args(["--base", "snapshot.yml"]) + + assert args.command == "auto" + assert args.base == "snapshot.yml" + + +def test_resolve_auto_output_path_appends_yml_extension_for_new_snapshot( + tmp_path: Path, +) -> None: + ctx = ShootContext( + now=datetime(2026, 3, 2, 12, 53), + commit_hash="abc1234", + commit_subject="subject", + ) + base_path = tmp_path / "snapshot.yml" + base_path.write_text("base", encoding="utf-8") + + output = resolve_auto_output_path(base_path, None, ctx) + + assert output.name == "snapshot-2026-03-02-1253-abc1234.yml" + + +def test_load_snapshot_packages_from_toml(tmp_path: Path) -> None: + config = tmp_path / "snapshot.toml" + config.write_text( + "[snapshot]\n" + "packages = [\"foo\", \"bar\"]\n", + encoding="utf-8", + ) + + assert load_snapshot_packages(config) == ["foo", "bar"] + + +def test_load_snapshot_packages_rejects_non_toml_config(tmp_path: Path) -> None: + config = tmp_path / "snapshot.toml" + config.write_text( + "[snapshot]\n" + "packages =\n" + " foo\n" + " bar\n", + encoding="utf-8", + ) + + try: + load_snapshot_packages(config) + except ValueError as exc: + assert "Invalid TOML" in str(exc) + else: + raise AssertionError("Expected ValueError for invalid TOML") + + +def test_run_step_writes_stdout_and_stderr_to_log(tmp_path: Path) -> None: + log_path = tmp_path / "snapshot.log" + with log_path.open("w", encoding="utf-8") as log_file: + run_step( + [ + sys.executable, + "-c", + "import sys; print('hello out'); print('hello err', file=sys.stderr)", + ], + log_file, + ) + + text = log_path.read_text(encoding="utf-8") + assert "hello out" in text + assert "hello err" in text + + +def test_print_snapshot_diff_hides_unified_file_headers( + tmp_path: Path, + capsys, +) -> None: + left = tmp_path / "left.yml" + right = tmp_path / "right.yml" + left.write_text("date: one\nvalue: old\n", encoding="utf-8") + right.write_text("date: one\nvalue: new\n", encoding="utf-8") + + print_snapshot_diff(left, right) + lines = capsys.readouterr().out.splitlines() + + assert not any(line.startswith("--- ") for line in lines) + assert not any(line.startswith("+++ ") for line in lines) + assert any(line.startswith("@@") for line in lines) + + +def test_ordinal_suffixes() -> None: + assert ordinal(1) == "1st" + assert ordinal(2) == "2nd" + assert ordinal(3) == "3rd" + assert ordinal(4) == "4th" + assert ordinal(11) == "11th" + assert ordinal(23) == "23rd" From 6e2688b46bab410896afbefb86f71eb4e45ed714 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Mon, 2 Mar 2026 13:28:02 +0100 Subject: [PATCH 02/13] Add interactive snapshot picker for diff mode --- README.md | 3 +- pyproject.toml | 1 + scripts/snapshot_test.py | 120 +++++++++++++++++++++++++++++++++++- tests/test_snapshot_test.py | 32 ++++++++++ uv.lock | 11 ++++ 5 files changed, 163 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b27633e6d..36223e0de 100644 --- a/README.md +++ b/README.md @@ -177,7 +177,8 @@ Creates a compact, single-file snapshot for regression testing (`registry + chan uv run -m scripts.snapshot_test uv run -m scripts.snapshot_test --base snapshot.yml --conf snapshot.toml uv run -m scripts.snapshot_test shoot -uv run -m scripts.snapshot_test diff snapshot-2026-03-02-1210-abcd123 +uv run -m scripts.snapshot_test diff +uv run -m scripts.snapshot_test diff snapshot-2026-03-02-1210-abcd123.yml ``` - Default mode (no subcommand): diff --git a/pyproject.toml b/pyproject.toml index 1cd1e9033..7dabd65e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ dependencies = [ "aiohttp>=3.11.18", "inflect>=7.5.0", "packaging>=24.2", + "readchar>=4.2.1", "rich>=13.7", "tzdata; platform_system == 'Windows'", ] diff --git a/scripts/snapshot_test.py b/scripts/snapshot_test.py index 479955074..08f9afd79 100644 --- a/scripts/snapshot_test.py +++ b/scripts/snapshot_test.py @@ -1,6 +1,7 @@ from __future__ import annotations import argparse +from collections.abc import Callable from dataclasses import dataclass from datetime import datetime import difflib @@ -12,7 +13,12 @@ import tomllib from typing import TextIO +from readchar import key as readchar_key +from readchar import readkey from rich.console import Console +from rich.live import Live +from rich.panel import Panel +from rich.text import Text DEFAULT_BASE = "snapshot.yml" @@ -23,7 +29,8 @@ YELLOW_ON_BLACK = "\x1b[33;40m" RESET = "\x1b[0m" -CONSOLE = Console(stderr=True) +STDOUT_CONSOLE = Console() +STDERR_CONSOLE = Console(stderr=True) @dataclass @@ -86,10 +93,28 @@ def run_diff(args: argparse.Namespace) -> int: raise SystemExit("diff accepts at most two snapshot files") if not files: - snapshots = sorted(Path.cwd().glob("snapshot-*")) + snapshots = list_available_snapshots() if not snapshots: print("No snapshots found matching 'snapshot-*'.") return 0 + + if len(snapshots) == 1: + left = Path(args.base) + right = snapshots[0] + print(f"Comparing {left} to {right}") + print_snapshot_diff(left, right) + return 0 + + if is_interactive_terminal(): + selected = select_snapshot_interactively(snapshots) + if selected is None: + return 0 + left = Path(args.base) + right = selected + print(f"Comparing {left} to {right}") + print_snapshot_diff(left, right) + return 0 + for path in snapshots: print(path.name) return 0 @@ -106,12 +131,101 @@ def run_diff(args: argparse.Namespace) -> int: return 0 +def list_available_snapshots() -> list[Path]: + candidates = { + path + for pattern in ("snapshot-*.yml", "snapshot-*") + for path in Path.cwd().glob(pattern) + if path.is_file() + } + return sorted(candidates, key=lambda path: path.name) + + +def is_interactive_terminal() -> bool: + return sys.stdin.isatty() and sys.stdout.isatty() + + +def select_snapshot_interactively( + snapshots: list[Path], + key_reader: Callable[[], str] | None = None, + console: Console | None = None, +) -> Path | None: + if not snapshots: + return None + + selected = 0 + key_reader = read_key_action if key_reader is None else key_reader + console = STDOUT_CONSOLE if console is None else console + + with Live( + render_snapshot_selector(snapshots, selected), + console=console, + transient=True, + auto_refresh=False, + ) as live: + while True: + action = key_reader() + + if action == "enter": + return snapshots[selected] + if action in {"q", "esc", "ctrl_c"}: + return None + + next_selected = move_selection(selected, len(snapshots), action) + if next_selected != selected: + selected = next_selected + live.update(render_snapshot_selector(snapshots, selected), refresh=True) + + +def render_snapshot_selector(snapshots: list[Path], selected: int) -> Panel: + body = Text("Use ↑/↓ to choose a snapshot, Enter to diff, q to cancel\n\n") + for index, path in enumerate(snapshots): + prefix = "❯" if index == selected else " " + style = "bold cyan" if index == selected else "" + body.append(f"{prefix} {path.name}\n", style=style) + return Panel(body, title="Available snapshots") + + +def move_selection(current: int, total: int, key: str) -> int: + if total <= 0: + return 0 + if key == "up": + return (current - 1) % total + if key == "down": + return (current + 1) % total + return current + + +def read_key_action() -> str: + try: + pressed = readkey() + except KeyboardInterrupt: + return "ctrl_c" + return normalize_key_press(pressed) + + +def normalize_key_press(pressed: str) -> str: + if pressed == readchar_key.UP: + return "up" + if pressed == readchar_key.DOWN: + return "down" + if pressed in {readchar_key.ENTER, "\r", "\n"}: + return "enter" + if pressed == readchar_key.ESC: + return "esc" + if pressed == readchar_key.CTRL_C: + return "ctrl_c" + if pressed.lower() == "q": + return "q" + return "other" + + def create_snapshot_with_spinner( output_path: Path, conf_path: Path, ctx: ShootContext, ) -> None: - with CONSOLE.status("Creating snapshot", spinner="dots"): + with STDERR_CONSOLE.status("Creating snapshot", spinner="dots"): create_snapshot(output_path, conf_path, ctx) diff --git a/tests/test_snapshot_test.py b/tests/test_snapshot_test.py index 9af27ab21..b4b6a57f8 100644 --- a/tests/test_snapshot_test.py +++ b/tests/test_snapshot_test.py @@ -1,3 +1,4 @@ +import argparse from datetime import datetime from pathlib import Path import sys @@ -5,11 +6,13 @@ from scripts.snapshot_test import ( ShootContext, load_snapshot_packages, + move_selection, normalize_argv, ordinal, parse_args, print_snapshot_diff, resolve_auto_output_path, + run_diff, run_step, ) @@ -111,6 +114,35 @@ def test_print_snapshot_diff_hides_unified_file_headers( assert any(line.startswith("@@") for line in lines) +def test_run_diff_with_single_candidate_without_files_shows_diff( + tmp_path: Path, + monkeypatch, + capsys, +) -> None: + base = tmp_path / "snapshot.yml" + candidate = tmp_path / "snapshot-2026-03-02-1253-abc1234.yml" + base.write_text("value: old\n", encoding="utf-8") + candidate.write_text("value: new\n", encoding="utf-8") + monkeypatch.chdir(tmp_path) + + result = run_diff(argparse.Namespace(files=[], base=str(base))) + + out = capsys.readouterr().out + assert result == 0 + assert "Comparing" in out + assert candidate.name in out + assert "@@" in out + + +def test_move_selection_wraps_for_up_and_down() -> None: + assert move_selection(0, 3, "up") == 2 + assert move_selection(2, 3, "down") == 0 + + +def test_move_selection_ignores_unknown_keys() -> None: + assert move_selection(1, 3, "x") == 1 + + def test_ordinal_suffixes() -> None: assert ordinal(1) == "1st" assert ordinal(2) == "2nd" diff --git a/uv.lock b/uv.lock index 04625eec4..be4ba001b 100644 --- a/uv.lock +++ b/uv.lock @@ -402,6 +402,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/7f/338843f449ace853647ace35870874f69a764d251872ed1b4de9f234822c/pytest_asyncio-0.26.0-py3-none-any.whl", hash = "sha256:7b51ed894f4fbea1340262bdae5135797ebbe21d8638978e35d31c6d19f72fb0", size = 19694, upload-time = "2025-03-25T06:22:27.807Z" }, ] +[[package]] +name = "readchar" +version = "4.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/dd/f8/8657b8cbb4ebeabfbdf991ac40eca8a1d1bd012011bd44ad1ed10f5cb494/readchar-4.2.1.tar.gz", hash = "sha256:91ce3faf07688de14d800592951e5575e9c7a3213738ed01d394dcc949b79adb", size = 9685, upload-time = "2024-11-04T18:28:07.757Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/10/e4b1e0e5b6b6745c8098c275b69bc9d73e9542d5c7da4f137542b499ed44/readchar-4.2.1-py3-none-any.whl", hash = "sha256:a769305cd3994bb5fa2764aa4073452dc105a4ec39068ffe6efd3c20c60acc77", size = 9350, upload-time = "2024-11-04T18:28:02.859Z" }, +] + [[package]] name = "rich" version = "14.2.0" @@ -449,6 +458,7 @@ dependencies = [ { name = "aiohttp" }, { name = "inflect" }, { name = "packaging" }, + { name = "readchar" }, { name = "rich" }, { name = "tzdata", marker = "sys_platform == 'win32'" }, ] @@ -466,6 +476,7 @@ requires-dist = [ { name = "aiohttp", specifier = ">=3.11.18" }, { name = "inflect", specifier = ">=7.5.0" }, { name = "packaging", specifier = ">=24.2" }, + { name = "readchar", specifier = ">=4.2.1" }, { name = "rich", specifier = ">=13.7" }, { name = "tzdata", marker = "sys_platform == 'win32'" }, ] From 3183944adb57c1866d15d23053d2c783953f70dd Mon Sep 17 00:00:00 2001 From: herr kaste Date: Tue, 3 Mar 2026 12:50:17 +0100 Subject: [PATCH 03/13] Reorder functions --- scripts/snapshot_test.py | 42 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/scripts/snapshot_test.py b/scripts/snapshot_test.py index 08f9afd79..503ce0ea0 100644 --- a/scripts/snapshot_test.py +++ b/scripts/snapshot_test.py @@ -40,20 +40,6 @@ class ShootContext: commit_subject: str -def parse_args(argv: list[str] | None = None) -> argparse.Namespace: - raw_argv = list(sys.argv[1:] if argv is None else argv) - - if raw_argv and raw_argv[0] == "auto": - return parse_auto_args(raw_argv[1:]) - - if is_auto_mode_argv(raw_argv): - return parse_auto_args(raw_argv) - - parser = build_main_parser() - normalized_argv = normalize_argv(raw_argv) - return parser.parse_args(normalized_argv) - - def main(argv: list[str] | None = None) -> int: args = parse_args(argv) if args.command == "shoot": @@ -220,13 +206,18 @@ def normalize_key_press(pressed: str) -> str: return "other" -def create_snapshot_with_spinner( - output_path: Path, - conf_path: Path, - ctx: ShootContext, -) -> None: - with STDERR_CONSOLE.status("Creating snapshot", spinner="dots"): - create_snapshot(output_path, conf_path, ctx) +def parse_args(argv: list[str] | None = None) -> argparse.Namespace: + raw_argv = list(sys.argv[1:] if argv is None else argv) + + if raw_argv and raw_argv[0] == "auto": + return parse_auto_args(raw_argv[1:]) + + if is_auto_mode_argv(raw_argv): + return parse_auto_args(raw_argv) + + parser = build_main_parser() + normalized_argv = normalize_argv(raw_argv) + return parser.parse_args(normalized_argv) def normalize_argv(argv: list[str]) -> list[str]: @@ -344,6 +335,15 @@ def resolve_auto_output_path( return Path(f"snapshot-{stamp}-{ctx.commit_hash}.yml") +def create_snapshot_with_spinner( + output_path: Path, + conf_path: Path, + ctx: ShootContext, +) -> None: + with STDERR_CONSOLE.status("Creating snapshot", spinner="dots"): + create_snapshot(output_path, conf_path, ctx) + + def create_snapshot(output_path: Path, conf_path: Path, ctx: ShootContext) -> None: names = load_snapshot_packages(conf_path) if not names: From 4f19623c2ca4683e67868b45df2cc5afbd96f72a Mon Sep 17 00:00:00 2001 From: herr kaste Date: Tue, 3 Mar 2026 13:00:34 +0100 Subject: [PATCH 04/13] Better status updates when creating a snapshot --- scripts/snapshot_test.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/scripts/snapshot_test.py b/scripts/snapshot_test.py index 503ce0ea0..81396c086 100644 --- a/scripts/snapshot_test.py +++ b/scripts/snapshot_test.py @@ -340,11 +340,21 @@ def create_snapshot_with_spinner( conf_path: Path, ctx: ShootContext, ) -> None: - with STDERR_CONSOLE.status("Creating snapshot", spinner="dots"): - create_snapshot(output_path, conf_path, ctx) + with STDERR_CONSOLE.status("Creating snapshot", spinner="dots") as status: + create_snapshot( + output_path, + conf_path, + ctx, + update_status=status.update, + ) -def create_snapshot(output_path: Path, conf_path: Path, ctx: ShootContext) -> None: +def create_snapshot( + output_path: Path, + conf_path: Path, + ctx: ShootContext, + update_status: Callable[[str], None], +) -> None: names = load_snapshot_packages(conf_path) if not names: raise ValueError(f"{conf_path} does not contain any snapshot packages.") @@ -360,7 +370,12 @@ def create_snapshot(output_path: Path, conf_path: Path, ctx: ShootContext) -> No write_log(log_file, f"temp_dir: {temp_dir}") write_log(log_file, f"output: {output_path}") - reduced_registry, channel = build_snapshot_payload(temp_dir, names, log_file) + reduced_registry, channel = build_snapshot_payload( + temp_dir, + names, + log_file, + update_status=update_status, + ) snapshot_text = render_snapshot(ctx, names, reduced_registry, channel) output_path.parent.mkdir(parents=True, exist_ok=True) @@ -391,12 +406,14 @@ def build_snapshot_payload( temp_dir: Path, names: list[str], log_file: TextIO, + update_status: Callable[[str], None], ) -> tuple[dict, dict]: full_registry = temp_dir / "registry-full.json" reduced_registry_path = temp_dir / "registry.json" workspace_path = temp_dir / "workspace.json" channel_path = temp_dir / "channel.json" + update_status("Generating registry") run_step([ sys.executable, "-m", @@ -408,6 +425,7 @@ def build_snapshot_payload( write_log(log_file, f"Reducing registry to {len(names)} configured packages") reduced_registry = write_reduced_registry(full_registry, reduced_registry_path, names) + update_status("Crawling packages") run_step([ sys.executable, "-m", @@ -420,6 +438,7 @@ def build_snapshot_payload( str(max(len(names), 1)), ], log_file) + update_status("Generating final channel.json") run_step([ sys.executable, "-m", From c3baaee82b550666faf43433503eb6d9c9f8e211 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Fri, 27 Feb 2026 12:38:28 +0100 Subject: [PATCH 05/13] Add `-` shorthand to crawl.py --- scripts/crawl.py | 32 +++++++++++++++++++++++++++++--- tests/crawl/test_cli_args.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 tests/crawl/test_cli_args.py diff --git a/scripts/crawl.py b/scripts/crawl.py index 719035c2c..34332104a 100644 --- a/scripts/crawl.py +++ b/scripts/crawl.py @@ -837,8 +837,11 @@ def which_hub(url: str) -> str: return "unknown" -def parse_args(): - parser = argparse.ArgumentParser(description="Crawl the registry and update the workspace.") +def parse_args(argv: list[str] | None = None): + parser = argparse.ArgumentParser( + description="Crawl the registry and update the workspace.", + epilog="Numeric shorthand: - sets crawl limit, e.g. -1000 == --limit 1000.", + ) parser.add_argument( "--registry", type=str, @@ -876,7 +879,30 @@ def parse_args(): default=".", help="Working directory to resolve file paths (default: .)" ) - return parser.parse_args() + normalized_argv = normalize_limit_argv(sys.argv[1:] if argv is None else argv) + if count_limit_occurrences(normalized_argv) > 1: + parser.error("--limit/-n can only be specified once") + return parser.parse_args(normalized_argv) + + +def normalize_limit_argv(argv: list[str]) -> list[str]: + normalized = [] + for arg in argv: + if re.fullmatch(r"-\d+", arg): + normalized.extend(["--limit", arg[1:]]) + continue + normalized.append(arg) + return normalized + + +def count_limit_occurrences(argv: list[str]) -> int: + count = 0 + for arg in argv: + if arg in {"--limit", "-n"}: + count += 1 + elif arg.startswith("--limit="): + count += 1 + return count def env_flag(name: str, default: bool = False) -> bool: diff --git a/tests/crawl/test_cli_args.py b/tests/crawl/test_cli_args.py new file mode 100644 index 000000000..be3b92651 --- /dev/null +++ b/tests/crawl/test_cli_args.py @@ -0,0 +1,36 @@ +import pytest + +from scripts.crawl import normalize_limit_argv, parse_args + + +def test_normalize_limit_argv_rewrites_numeric_shorthand() -> None: + assert normalize_limit_argv(["--presto", "-1000", "--name", "Example"]) == [ + "--presto", + "--limit", + "1000", + "--name", + "Example", + ] + + +def test_parse_args_accepts_numeric_shorthand_limit() -> None: + args = parse_args(["-1000"]) + + assert args.limit == 1000 + + +def test_parse_args_still_accepts_explicit_limit() -> None: + args = parse_args(["--limit", "75"]) + + assert args.limit == 75 + + +def test_parse_args_rejects_duplicate_limit_flags() -> None: + with pytest.raises(SystemExit): + parse_args(["-n", "100", "-n", "75"]) + + + +def test_parse_args_rejects_shorthand_plus_limit_flag() -> None: + with pytest.raises(SystemExit): + parse_args(["-100", "--limit", "24"]) From 2f760de56c4a7a9de46733084aa4f293927d8a78 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Fri, 27 Feb 2026 13:00:43 +0100 Subject: [PATCH 06/13] Allow omitting standard info for package releases * Allow dismissing "releases" completely => allow all builds and set tags:true * Omitting "tags" and "branch" and "asset" => set tags: true --- scripts/crawl.py | 9 +++ tests/crawl/test_basic.py | 130 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) diff --git a/scripts/crawl.py b/scripts/crawl.py index 34332104a..1a7a2e239 100644 --- a/scripts/crawl.py +++ b/scripts/crawl.py @@ -714,6 +714,12 @@ def normalize_release_definition( repo_url: str, details: str | None = None ): + if not releases: + releases.append({ + "sublime_text": "*", + "tags": True + }) + for r in releases[:]: r.setdefault("platforms", ["*"]) if isinstance(r["platforms"], str): @@ -725,6 +731,9 @@ def normalize_release_definition( err(f"sublime_text as a list is only valid in conjunction with 'asset', {repo_url}") releases.remove(r) + if r.keys().isdisjoint({"url", "asset", "branch", "tags"}): + r["tags"] = True + if base := r.get("base", details): r["base"] = resolve_url(repo_url, base) diff --git a/tests/crawl/test_basic.py b/tests/crawl/test_basic.py index d1d3ff84c..fad436e4f 100644 --- a/tests/crawl/test_basic.py +++ b/tests/crawl/test_basic.py @@ -299,6 +299,136 @@ async def test_accept_stylized_dates_for_static_releases( assert release["date"] == date_expected +@pytest.mark.asyncio +async def test_missing_release_definitions_default_to_tags(set_github_info): + registry = { + "repositories": [ + "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json" + ], + "packages": [ + { + "name": "ImplicitRelease", + "details": "https://github.com/example/implicit-release", + "source": "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json", + "schema_version": "3.0.0" + } + ] + } + + workspace = {"packages": {}} + + github_info = { + "metadata": { + "id": "R_implicitrelease", + "name": "ImplicitRelease", + "description": "Fixture package with implicit release definition", + "homepage": "https://github.com/example/implicit-release", + "author": "example", + "readme": "https://raw.githubusercontent.com/example/implicit-release/main/README.md", + "default_branch": "main", + "stars": 0, + "created_at": "2024-01-01T00:00:00Z" + }, + "tags": [ + { + "name": "1.2.3", + "sha": "abc123", + "date": "2024-05-10T12:00:00Z", + "url": "https://codeload.github.com/example/implicit-release/zip/1.2.3" + } + ], + "branches": [ + { + "name": "main", + "version": "2024.05.11.12.00.00", + "sha": "def456", + "date": "2024-05-11T12:00:00Z", + "url": "https://codeload.github.com/example/implicit-release/zip/main" + } + ] + } + + set_github_info(github_info) + + await main_(registry, workspace, None, 100) + + package = workspace["packages"].get("ImplicitRelease") + assert package is not None + + releases = package.get("releases", []) + assert len(releases) == 1 + assert releases[0]["sublime_text"] == "*" + assert releases[0]["platforms"] == ["*"] + assert releases[0]["version"] == "1.2.3" + assert releases[0]["url"].endswith("/1.2.3") + + +@pytest.mark.asyncio +async def test_release_without_asset_or_branch_defaults_to_tags(set_github_info): + registry = { + "repositories": [ + "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json" + ], + "packages": [ + { + "name": "AutoTags", + "details": "https://github.com/example/auto-tags", + "releases": [ + { + "sublime_text": "*" + } + ], + "source": "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json", + "schema_version": "3.0.0" + } + ] + } + + workspace = {"packages": {}} + + github_info = { + "metadata": { + "id": "R_autotags", + "name": "AutoTags", + "description": "Fixture package with implicit tags", + "homepage": "https://github.com/example/auto-tags", + "author": "example", + "readme": "https://raw.githubusercontent.com/example/auto-tags/main/README.md", + "default_branch": "main", + "stars": 0, + "created_at": "2024-01-01T00:00:00Z" + }, + "tags": [ + { + "name": "2.0.0", + "sha": "abc123", + "date": "2024-05-10T12:00:00Z", + "url": "https://codeload.github.com/example/auto-tags/zip/2.0.0" + } + ], + "branches": [ + { + "name": "main", + "version": "2024.05.11.12.00.00", + "sha": "def456", + "date": "2024-05-11T12:00:00Z", + "url": "https://codeload.github.com/example/auto-tags/zip/main" + } + ] + } + + set_github_info(github_info) + + await main_(registry, workspace, None, 100) + + package = workspace["packages"].get("AutoTags") + assert package is not None + + release = package["releases"][0] + assert release["version"] == "2.0.0" + assert release["url"].endswith("/2.0.0") + + @pytest.mark.asyncio async def test_prerelease_tag_does_not_use_branch_fallback(set_now, set_github_info, capsys): registry = { From ee741f2672bd217d6aeb60c4aa59368b49ff065a Mon Sep 17 00:00:00 2001 From: herr kaste Date: Fri, 27 Feb 2026 13:00:57 +0100 Subject: [PATCH 07/13] Add regression tests for `normalize_release_definition` --- .../test_normalize_release_definition.py | 141 ++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 tests/crawl/test_normalize_release_definition.py diff --git a/tests/crawl/test_normalize_release_definition.py b/tests/crawl/test_normalize_release_definition.py new file mode 100644 index 000000000..c907449f2 --- /dev/null +++ b/tests/crawl/test_normalize_release_definition.py @@ -0,0 +1,141 @@ +import pytest + +from scripts.crawl import normalize_release_definition + + +REPO_URL = "https://raw.githubusercontent.com/example/channel/main/repository.json" + + +def test_adds_synthetic_release_when_missing() -> None: + releases: list[dict] = [] + + normalize_release_definition( + releases, + REPO_URL, + "https://github.com/example/implicit-release", + ) + + assert len(releases) == 1 + assert releases[0]["sublime_text"] == "*" + assert releases[0]["platforms"] == ["*"] + assert releases[0]["tags"] is True + assert releases[0]["base"] == "https://github.com/example/implicit-release" + + +def test_autofills_tags_when_asset_and_branch_missing() -> None: + releases = [{"sublime_text": "*"}] + + normalize_release_definition( + releases, + REPO_URL, + "https://github.com/example/auto-tags", + ) + + assert releases[0]["tags"] is True + assert releases[0]["base"] == "https://github.com/example/auto-tags" + + +def test_does_not_autofill_tags_for_static_url_release() -> None: + releases = [{ + "sublime_text": "*", + "url": "https://example.com/pkg.zip", + "version": "1.2.3", + "date": "2024-05-10T12:00:00Z", + }] + + normalize_release_definition( + releases, + REPO_URL, + "https://github.com/example/static-release", + ) + + assert "tags" not in releases[0] + assert releases[0]["url"] == "https://example.com/pkg.zip" + + +@pytest.mark.parametrize("field", ["asset", "branch", "tags"]) +def test_does_not_overwrite_existing_release_source(field: str) -> None: + releases = [{field: True}] + + normalize_release_definition( + releases, + REPO_URL, + "https://github.com/example/source-is-already-defined", + ) + + assert "tags" not in releases[0] or field == "tags" + + +def test_normalizes_platforms_string_to_list() -> None: + releases = [{"platforms": "linux", "tags": True}] + + normalize_release_definition(releases, REPO_URL, "https://github.com/example/pkg") + + assert releases[0]["platforms"] == ["linux"] + assert releases[0]["sublime_text"] == "*" + + +def test_removes_invalid_sublime_text_list_without_asset(capsys) -> None: + releases = [{"sublime_text": ["*"], "tags": True}] + + normalize_release_definition(releases, REPO_URL, "https://github.com/example/pkg") + + err = capsys.readouterr().err + assert "sublime_text as a list is only valid in conjunction with 'asset'" in err + assert releases == [] + + +def test_resolves_relative_base_url() -> None: + releases = [{"base": "./repo", "tags": True}] + + normalize_release_definition(releases, REPO_URL) + + assert releases[0]["base"] == "https://raw.githubusercontent.com/example/channel/main/repo" + + +def test_resolves_and_updates_download_url() -> None: + releases = [{ + "url": "https://nodeload.github.com/example/pkg/zipball/main", + "version": "1.2.3", + "date": "2024-05-10T12:00:00Z", + }] + + normalize_release_definition(releases, REPO_URL) + + assert releases[0]["url"] == "https://codeload.github.com/example/pkg/zip/main" + + +@pytest.mark.parametrize( + ("date_input", "date_expected"), + [ + ("2024-05-10 12:00", "2024-05-10T12:00:00Z"), + ("2024-05-10", "2024-05-10T00:00:00Z"), + ], +) +def test_normalizes_stylized_dates( + date_input: str, + date_expected: str, +) -> None: + releases = [{ + "url": "https://example.com/pkg.zip", + "version": "1.2.3", + "date": date_input, + }] + + normalize_release_definition(releases, REPO_URL) + + assert releases[0]["date"] == date_expected + + +def test_removes_release_with_invalid_date(capsys) -> None: + releases = [{ + "url": "https://example.com/pkg.zip", + "version": "1.2.3", + "date": "May 10, 2024", + }] + + normalize_release_definition(releases, REPO_URL) + + err = capsys.readouterr().err + assert "date May 10, 2024 is not formatted correctly" in err + assert releases == [] From 0467c841440bbf6e165d4a9d5ba034fefa6a85f8 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Fri, 27 Feb 2026 14:11:16 +0100 Subject: [PATCH 08/13] Support constrained tag releases Add version-constraint support for tag-based release resolution by using normalize_version_spec and SpecifierSet when a version constraint is present. For unconstrained tags, keep the legacy strict semver parsing path. If the user uses version constraints, maybe synthesize an automatic open-ended tags release. This is so that the user only has to configure frozen, left-behind version/st_build pairs while keeping the standard newest tag for newest build semantic. E.g. ``` "releases": [ { "sublime_text": "<4000", "version": "2.5.*" } ], ``` In the example, the package defines an old version for older sublime builds. Here we would add an open-ended release definition, e.g. ``` { "sublime_text": ">=4000", "tags": true } ``` --- scripts/crawl.py | 204 ++++++++++++++---- tests/crawl/test_basic.py | 203 +++++++++++++++++ .../test_normalize_release_definition.py | 126 ++++++++++- 3 files changed, 485 insertions(+), 48 deletions(-) diff --git a/scripts/crawl.py b/scripts/crawl.py index 1a7a2e239..b3453c05d 100644 --- a/scripts/crawl.py +++ b/scripts/crawl.py @@ -4,14 +4,16 @@ from collections import defaultdict from copy import deepcopy from datetime import datetime, timedelta, timezone +from functools import partial from itertools import product import json import os import re import sys from typing import Literal, Mapping, NotRequired, Required, TypedDict -from packaging.specifiers import SpecifierSet +import packaging +from packaging.specifiers import SpecifierSet from .bitbucket import fetch_bitbucket_info, RepoInfo as BitbucketRepoInfo from .generate_registry import Registry, PackageEntry as RegistryEntry @@ -26,7 +28,10 @@ normalize_st_build, normalize_version_spec, ) -from ._utils import next_run, parse_version, resolve_url, update_url, write_json, pl, pick +from ._utils import ( + next_run, parse_version, resolve_url, update_url, write_json, pl, pick, + VersionInfo +) import traceback @@ -494,6 +499,11 @@ async def resolve_tags( return [], None tag_prefix = "" if tag_definition is True else tag_definition + version_set = None + if version_spec := definition.get("version"): + normalized_spec = normalize_version_spec(version_spec) + version_set = SpecifierSet(normalized_spec) if normalized_spec else None + resolved_releases: list[Release] = [] now = datetime.now(timezone.utc) cutoff = now - timedelta(weeks=53) @@ -504,32 +514,50 @@ async def resolve_tags( prerelease_found: str | None = None found_final = False async for tag in info["tags"]: - if ( - tag["name"].startswith(tag_prefix) - and (version_string := ( - tag["name"].removeprefix(tag_prefix) + tag_name = tag["name"] + + is_prerelease = False + if version_set: + # For constrained tags, use packaging.Version against SpecifierSet. + tag_match = match_tag_version(tag_name, tag_prefix) + if not tag_match: + continue + version: packaging.version.Version + version, version_string = tag_match + if not version_set.contains(version, prereleases=True): + continue + is_final_version = not (version.is_prerelease or version.is_devrelease) + is_prerelease = version.is_prerelease + else: + # Standard tag semantics use our custom, strict semver parser. + if not tag_name.startswith(tag_prefix): + continue + version_string = ( + tag_name.removeprefix(tag_prefix) if tag_prefix - else tag["name"].removeprefix("v") - )) - and (version := parse_version(version_string)) - ): - tag_date = datetime.strptime(tag["date"], UTC_FORMAT).replace(tzinfo=timezone.utc) - if tag_date < cutoff and found_final: - break + else tag_name.removeprefix("v") + ) + version_: VersionInfo | None + version_ = parse_version(version_string) + if not version_: + continue + is_final_version = version_.is_final + is_prerelease = version_.is_prerelease - if tag_date >= cutoff or ( - version.is_final or - (version.is_prerelease and not prerelease_found) - ): - r_ = deepcopy(definition) - r_.pop("tags") - r_ |= pick(("url", "date"), tag) - r_ |= {"version": version_string} - resolved_releases.append(r_) # type: ignore[arg-type] - if version.is_final: - found_final = True - elif version.is_prerelease: - prerelease_found = version_string + tag_date = datetime.strptime(tag["date"], UTC_FORMAT).replace(tzinfo=timezone.utc) + if tag_date < cutoff and found_final: + break + + if tag_date >= cutoff or (is_final_version or (is_prerelease and not prerelease_found)): + r_ = deepcopy(definition) + r_.pop("tags") + r_ |= pick(("url", "date"), tag) + r_ |= {"version": version_string} + resolved_releases.append(r_) # type: ignore[arg-type] + if is_final_version: + found_final = True + elif is_prerelease: + prerelease_found = version_string if found_final: return resolved_releases, None @@ -714,38 +742,120 @@ def normalize_release_definition( repo_url: str, details: str | None = None ): + normalize_ = partial(normalize_release_entry, releases, repo_url, details) + if not releases: releases.append({ "sublime_text": "*", "tags": True }) - for r in releases[:]: - r.setdefault("platforms", ["*"]) - if isinstance(r["platforms"], str): - r["platforms"] = [r["platforms"]] + auto_release = maybe_make_auto_open_ended_tags_release(releases) - r.setdefault("sublime_text", "*") - if isinstance(r["sublime_text"], list): - if "asset" not in r: - err(f"sublime_text as a list is only valid in conjunction with 'asset', {repo_url}") - releases.remove(r) + for release in releases[:]: + normalize_(release) - if r.keys().isdisjoint({"url", "asset", "branch", "tags"}): - r["tags"] = True + if auto_release: + normalize_(auto_release) + releases.append(auto_release) - if base := r.get("base", details): - r["base"] = resolve_url(repo_url, base) - if "url" in r: - r["url"] = update_url(resolve_url(repo_url, r["url"])) +def normalize_release_entry( + releases: list[ReleaseDescription], + repo_url: str, + details: str | None, + release: ReleaseDescription, +) -> None: + release.setdefault("platforms", ["*"]) + if isinstance(release["platforms"], str): + release["platforms"] = [release["platforms"]] + + release.setdefault("sublime_text", "*") + if isinstance(release["sublime_text"], list) and "asset" not in release: + err(f"sublime_text as a list is only valid in conjunction with 'asset', {repo_url}") + releases.remove(release) + return - if "date" in r: - try: - r["date"] = normalize_datetime_str(r["date"]) - except ValueError: - err(f"date {r['date']} is not formatted correctly, {repo_url}") - releases.remove(r) + if release.keys().isdisjoint({"url", "asset", "branch", "tags"}): + release["tags"] = True + + if base := release.get("base", details): + release["base"] = resolve_url(repo_url, base) + + if "url" in release: + release["url"] = update_url(resolve_url(repo_url, release["url"])) + + if "date" in release: + try: + release["date"] = normalize_datetime_str(release["date"]) + except ValueError: + err(f"date {release['date']} is not formatted correctly, {repo_url}") + releases.remove(release) + + +def maybe_make_auto_open_ended_tags_release( + releases: list[ReleaseDescription], +) -> ReleaseDescription | None: + max_build = -1 + + for release in releases: + if not release.get("version"): + return None + + if "url" in release or "asset" in release or "branch" in release: + return None + + if release.get("tags") is True: + return None + + st_max = parse_sublime_text_max(release.get("sublime_text")) + if st_max == float("inf"): + return None + + max_build = max(max_build, int(st_max)) + + if max_build < 0: + return None + + return { + "sublime_text": f">{max_build}", + "tags": True, + } + + +def parse_sublime_text_max(selector) -> float: + if not isinstance(selector, str): + return float("inf") + + s = re.sub(r"\s+", "", selector) + if s in ("", "*"): + return float("inf") + + range_index = s.find("-") + if range_index != -1: + right = s[range_index + 1:] + n = parse_int_prefix(right) + return float(n) if n is not None else float("inf") + + if s.startswith("<="): + n = parse_int_prefix(s[2:]) + return float(n) if n is not None else float("inf") + + if s.startswith("<"): + n = parse_int_prefix(s[1:]) + return float(max(0, n - 1)) if n is not None else float("inf") + + if s.startswith(">=") or s.startswith(">"): + return float("inf") + + n = parse_int_prefix(s) + return float(n) if n is not None else float("inf") + + +def parse_int_prefix(text: str) -> int | None: + if match := re.match(r"^\d+", text): + return int(match.group(0)) + return None def compile_release_asset_pattern( diff --git a/tests/crawl/test_basic.py b/tests/crawl/test_basic.py index fad436e4f..ed2b7f074 100644 --- a/tests/crawl/test_basic.py +++ b/tests/crawl/test_basic.py @@ -429,6 +429,209 @@ async def test_release_without_asset_or_branch_defaults_to_tags(set_github_info) assert release["url"].endswith("/2.0.0") +@pytest.mark.asyncio +async def test_version_constrained_tags_and_auto_open_ended_release(set_now, set_github_info): + registry = { + "repositories": [ + "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json" + ], + "packages": [ + { + "name": "ConstrainedTags", + "details": "https://github.com/example/constrained-tags", + "releases": [ + { + "sublime_text": "<4000", + "version": "<3.0.0" + } + ], + "source": "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json", + "schema_version": "3.0.0" + } + ] + } + + workspace = {"packages": {}} + + github_info = { + "metadata": { + "id": "R_constrainedtags", + "name": "ConstrainedTags", + "description": "Fixture package with constrained tags", + "homepage": "https://github.com/example/constrained-tags", + "author": "example", + "readme": "https://raw.githubusercontent.com/example/constrained-tags/main/README.md", + "default_branch": "main", + "stars": 0, + "created_at": "2024-01-01T00:00:00Z" + }, + "tags": [ + { + "name": "3.1.0", + "sha": "sha310", + "date": "2025-05-10T12:00:00Z", + "url": "https://codeload.github.com/example/constrained-tags/zip/3.1.0" + }, + { + "name": "2.9.9", + "sha": "sha299", + "date": "2024-01-10T12:00:00Z", + "url": "https://codeload.github.com/example/constrained-tags/zip/2.9.9" + } + ], + "branches": [ + { + "name": "main", + "version": "2025.05.10.12.00.00", + "sha": "def456", + "date": "2025-05-10T12:00:00Z", + "url": "https://codeload.github.com/example/constrained-tags/zip/main" + } + ] + } + + set_now("2025-08-13T21:44:16Z") + set_github_info(github_info) + + await main_(registry, workspace, None, 100) + + package = workspace["packages"].get("ConstrainedTags") + assert package is not None + + by_st = {release["sublime_text"]: release for release in package["releases"]} + assert by_st["<4000"]["version"] == "2.9.9" + assert by_st[">3999"]["version"] == "3.1.0" + + +@pytest.mark.asyncio +async def test_unconstrained_tags_keep_legacy_semver_parsing(set_now, set_github_info, capsys): + registry = { + "repositories": [ + "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json" + ], + "packages": [ + { + "name": "LegacySemverTags", + "details": "https://github.com/example/legacy-semver-tags", + "releases": [ + { + "sublime_text": "*", + "tags": True + } + ], + "source": "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json", + "schema_version": "3.0.0" + } + ] + } + + workspace = {"packages": {}} + + github_info = { + "metadata": { + "id": "R_legacysemvertags", + "name": "LegacySemverTags", + "description": "Fixture package for legacy semver tags", + "homepage": "https://github.com/example/legacy-semver-tags", + "author": "example", + "readme": "https://raw.githubusercontent.com/example/legacy-semver-tags/main/README.md", + "default_branch": "main", + "stars": 0, + "created_at": "2024-01-01T00:00:00Z" + }, + "tags": [ + { + "name": "1.0rc1", + "sha": "abc123", + "date": "2024-05-10T12:00:00Z", + "url": "https://codeload.github.com/example/legacy-semver-tags/zip/1.0rc1" + } + ], + "branches": [ + { + "name": "main", + "version": "2024.05.10.12.00.00", + "sha": "def456", + "date": "2024-05-10T12:00:00Z", + "url": "https://codeload.github.com/example/legacy-semver-tags/zip/main" + } + ] + } + + set_now("2024-05-11T00:00:00Z") + set_github_info(github_info) + + await main_(registry, workspace, None, 100) + + err = capsys.readouterr().err + assert ( + "No valid version found for https://github.com/example/legacy-semver-tags. " + "Falling back to tip of main." + ) in err + + package = workspace["packages"].get("LegacySemverTags") + assert package is not None + assert package["releases"][0]["version"] == "2024.05.10.12.00.00" + + +@pytest.mark.asyncio +async def test_constrained_tags_use_packaging_version_parsing(set_now, set_github_info): + registry = { + "repositories": [ + "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json" + ], + "packages": [ + { + "name": "ConstrainedPep440Tags", + "details": "https://github.com/example/constrained-pep440-tags", + "releases": [ + { + "sublime_text": "*", + "tags": True, + "version": ">=1.0rc1" + } + ], + "source": "https://raw.githubusercontent.com/wbond/package_control_channel/refs/heads/master/repository.json", + "schema_version": "3.0.0" + } + ] + } + + workspace = {"packages": {}} + + github_info = { + "metadata": { + "id": "R_constrainedpep440tags", + "name": "ConstrainedPep440Tags", + "description": "Fixture package for constrained pep440 tags", + "homepage": "https://github.com/example/constrained-pep440-tags", + "author": "example", + "readme": "https://raw.githubusercontent.com/example/constrained-pep440-tags/main/README.md", + "default_branch": "main", + "stars": 0, + "created_at": "2024-01-01T00:00:00Z" + }, + "tags": [ + { + "name": "1.0rc1", + "sha": "abc123", + "date": "2024-05-10T12:00:00Z", + "url": "https://codeload.github.com/example/constrained-pep440-tags/zip/1.0rc1" + } + ], + "branches": [] + } + + set_now("2024-05-11T00:00:00Z") + set_github_info(github_info) + + await main_(registry, workspace, None, 100) + + package = workspace["packages"].get("ConstrainedPep440Tags") + assert package is not None + assert package["releases"][0]["version"] == "1.0rc1" + + @pytest.mark.asyncio async def test_prerelease_tag_does_not_use_branch_fallback(set_now, set_github_info, capsys): registry = { diff --git a/tests/crawl/test_normalize_release_definition.py b/tests/crawl/test_normalize_release_definition.py index c907449f2..b318d5118 100644 --- a/tests/crawl/test_normalize_release_definition.py +++ b/tests/crawl/test_normalize_release_definition.py @@ -1,6 +1,6 @@ import pytest -from scripts.crawl import normalize_release_definition +from scripts.crawl import normalize_release_definition, parse_sublime_text_max REPO_URL = "https://raw.githubusercontent.com/example/channel/main/repository.json" @@ -66,6 +66,130 @@ def test_does_not_overwrite_existing_release_source(field: str) -> None: assert "tags" not in releases[0] or field == "tags" +@pytest.mark.parametrize( + ("selector", "expected"), + [ + (None, float("inf")), + ("", float("inf")), + ("*", float("inf")), + (" * ", float("inf")), + ("3092", 3092), + ("3092 - 4000", 4000), + ("3092-4000", 4000), + ("<3092", 3091), + ("<=3092", 3092), + (">3092", float("inf")), + (">=3092", float("inf")), + (" >= 4075 ", float("inf")), + ("> 4075", float("inf")), + ("n/a", float("inf")), + ], +) +def test_parse_sublime_text_max(selector, expected: float) -> None: + assert parse_sublime_text_max(selector) == expected + + +def test_adds_open_ended_tags_release_for_version_constrained_tags() -> None: + releases = [{ + "sublime_text": "<4000", + "version": "<3.0.0", + }] + + normalize_release_definition( + releases, + REPO_URL, + "https://github.com/example/constrained-tags", + ) + + assert len(releases) == 2 + first, second = releases + assert first["tags"] is True + assert first["sublime_text"] == "<4000" + assert second["tags"] is True + assert second["sublime_text"] == ">3999" + + +def test_does_not_add_open_ended_when_unconstrained_tags_exists() -> None: + releases = [ + {"sublime_text": "3000 - 4000", "version": "<3.0.0"}, + {"sublime_text": ">4000", "tags": True}, + ] + + normalize_release_definition( + releases, + REPO_URL, + "https://github.com/example/constrained-tags", + ) + + assert len(releases) == 2 + + +def test_does_not_add_open_ended_without_any_version_key() -> None: + releases = [{"sublime_text": "3000 - 4000"}] + + normalize_release_definition( + releases, + REPO_URL, + "https://github.com/example/constrained-tags", + ) + + assert len(releases) == 1 + assert releases[0]["sublime_text"] == "3000 - 4000" + assert releases[0]["tags"] is True + + +def test_does_not_add_open_ended_for_empty_version() -> None: + releases = [{"sublime_text": "3000 - 4000", "version": ""}] + + normalize_release_definition( + releases, + REPO_URL, + "https://github.com/example/constrained-tags", + ) + + assert len(releases) == 1 + assert releases[0]["sublime_text"] == "3000 - 4000" + + +def test_does_not_add_open_ended_when_any_release_lacks_version() -> None: + releases = [ + {"sublime_text": "3000 - 4000", "version": "<3.0.0"}, + {"sublime_text": "4001 - 4200"}, + ] + + normalize_release_definition( + releases, + REPO_URL, + "https://github.com/example/constrained-tags", + ) + + assert len(releases) == 2 + + +def test_does_not_add_open_ended_when_branch_or_asset_exists() -> None: + releases = [ + {"sublime_text": "3000 - 4000", "version": "<3.0.0"}, + {"sublime_text": "*", "branch": True}, + ] + + normalize_release_definition(releases, REPO_URL, "https://github.com/example/pkg") + + assert len(releases) == 2 + + +def test_does_not_add_open_ended_for_static_releases() -> None: + releases = [{ + "sublime_text": "*", + "version": "1.2.3", + "url": "https://example.com/pkg.zip", + "date": "2024-05-10T12:00:00Z", + }] + + normalize_release_definition(releases, REPO_URL, "https://github.com/example/pkg") + + assert len(releases) == 1 + + def test_normalizes_platforms_string_to_list() -> None: releases = [{"platforms": "linux", "tags": True}] From 43f467514cb46da66405a356d0e9ad019950824d Mon Sep 17 00:00:00 2001 From: herr kaste Date: Fri, 27 Feb 2026 14:19:07 +0100 Subject: [PATCH 09/13] Normalize dynamic version specs early Move normalize_version_spec into normalize_release_entry for dynamic release definitions, so constraints are canonicalized once during the normalize phase. This allows to treat "*" as non-constraining ("") as per docs. --- scripts/crawl.py | 9 +++++---- tests/crawl/test_normalize_release_definition.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/scripts/crawl.py b/scripts/crawl.py index b3453c05d..7aa6c2770 100644 --- a/scripts/crawl.py +++ b/scripts/crawl.py @@ -501,8 +501,7 @@ async def resolve_tags( tag_prefix = "" if tag_definition is True else tag_definition version_set = None if version_spec := definition.get("version"): - normalized_spec = normalize_version_spec(version_spec) - version_set = SpecifierSet(normalized_spec) if normalized_spec else None + version_set = SpecifierSet(version_spec) resolved_releases: list[Release] = [] now = datetime.now(timezone.utc) @@ -621,8 +620,7 @@ async def resolve_assets( spec_set = None if version_spec := definition.get("version"): - normalized_spec = normalize_version_spec(version_spec) - spec_set = SpecifierSet(normalized_spec) if normalized_spec else None + spec_set = SpecifierSet(version_spec) resolved_releases: list[Release] = [] async for release in info["releases"]: # type: ignore[typeddict-item] @@ -779,6 +777,9 @@ def normalize_release_entry( if release.keys().isdisjoint({"url", "asset", "branch", "tags"}): release["tags"] = True + if "url" not in release and (version_spec := release.get("version")): + release["version"] = normalize_version_spec(version_spec) + if base := release.get("base", details): release["base"] = resolve_url(repo_url, base) diff --git a/tests/crawl/test_normalize_release_definition.py b/tests/crawl/test_normalize_release_definition.py index b318d5118..d1f623c4f 100644 --- a/tests/crawl/test_normalize_release_definition.py +++ b/tests/crawl/test_normalize_release_definition.py @@ -51,6 +51,22 @@ def test_does_not_autofill_tags_for_static_url_release() -> None: assert "tags" not in releases[0] assert releases[0]["url"] == "https://example.com/pkg.zip" + assert releases[0]["version"] == "1.2.3" + + +def test_normalizes_version_constraint_for_dynamic_release() -> None: + releases = [{ + "sublime_text": "<4000", + "version": "2.5.*", + }] + + normalize_release_definition( + releases, + REPO_URL, + "https://github.com/example/version-spec", + ) + + assert releases[0]["version"] == "==2.5.*" @pytest.mark.parametrize("field", ["asset", "branch", "tags"]) From 4fafc1afe04743a537c98c340bf693475e80489d Mon Sep 17 00:00:00 2001 From: herr kaste Date: Fri, 27 Feb 2026 14:22:26 +0100 Subject: [PATCH 10/13] Treat wildcard version as unconstrained for auto open-ended synthesis When deciding whether to synthesize an automatic open-ended tags release, handle version="*" the same as an empty/missing version constraint. This avoids generating a synthetic >max release from definitions that are not actually version-constrained before normalization runs. --- scripts/crawl.py | 2 +- tests/crawl/test_normalize_release_definition.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/scripts/crawl.py b/scripts/crawl.py index 7aa6c2770..f9da2da04 100644 --- a/scripts/crawl.py +++ b/scripts/crawl.py @@ -800,7 +800,7 @@ def maybe_make_auto_open_ended_tags_release( max_build = -1 for release in releases: - if not release.get("version"): + if release.get("version", "") in ("*", ""): return None if "url" in release or "asset" in release or "branch" in release: diff --git a/tests/crawl/test_normalize_release_definition.py b/tests/crawl/test_normalize_release_definition.py index d1f623c4f..74689f2f7 100644 --- a/tests/crawl/test_normalize_release_definition.py +++ b/tests/crawl/test_normalize_release_definition.py @@ -167,6 +167,19 @@ def test_does_not_add_open_ended_for_empty_version() -> None: assert releases[0]["sublime_text"] == "3000 - 4000" +def test_does_not_add_open_ended_for_star_version() -> None: + releases = [{"sublime_text": "3000 - 4000", "version": "*"}] + + normalize_release_definition( + releases, + REPO_URL, + "https://github.com/example/constrained-tags", + ) + + assert len(releases) == 1 + assert releases[0]["sublime_text"] == "3000 - 4000" + + def test_does_not_add_open_ended_when_any_release_lacks_version() -> None: releases = [ {"sublime_text": "3000 - 4000", "version": "<3.0.0"}, From f231a273907e9e24f5c657a0f5fc62d949becfc9 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Fri, 27 Feb 2026 14:37:47 +0100 Subject: [PATCH 11/13] Add pack-spec.md for package release definition behavior --- pack-spec.md | 311 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 311 insertions(+) create mode 100644 pack-spec.md diff --git a/pack-spec.md b/pack-spec.md new file mode 100644 index 000000000..1c3ab26c4 --- /dev/null +++ b/pack-spec.md @@ -0,0 +1,311 @@ +# repository.json spec + +This document describes the user-facing `repository.json` format used to define +packages and their release sources. + +It intentionally documents what this crawler supports today. +`example-repository.json` from Package Control is good inspiration, but broader. + +## Top-level structure + +`repository.json` is a JSON object with these fields: + +```json +{ + "$schema": "sublime://packagecontrol.io/schemas/repository", + "schema_version": "4.0.0", + "packages": [ ... ], + "libraries": [] +} +``` + +Only `packages` is relevant for this document. + +## Package metadata + +Each package entry contains metadata and a list of release definitions: + +```json +{ + "name": "GitSavvy", + "details": "https://github.com/timbrel/GitSavvy", + "labels": ["vcs"], + "releases": [ ... ] +} +``` + +Common fields: +- `name` (string, usually required): Package name shown in Package Control. + If omitted, name is derived from `details` where possible. +- `details` (string, optional but common): Repository URL used to fetch metadata. +- `description`, `author`, `homepage`, `readme`, `issues`, `donate`, `buy` + (optional): Explicit metadata overrides. +- `labels` (list, optional): Search/category labels. +- `previous_names` (list, optional): Old package names for rename migration. +- `releases` (list, optional): One or more release definitions. + +If `releases` is missing, a synthetic default release is created: + +```json +{ + "sublime_text": "*", + "tags": true +} +``` + +A minimal definition the crawler understands is: + +```json +{ + "details": "https://github.com/timbrel/GitSavvy", +} +``` + +However, adding a name really helps in readability. + +## Release definitions + +For packages, there are three dynamic release modes: + +1. `tags` (normal/default) +2. `branch` +3. `asset` + +There is also static/manual form (`url` + `version` + `date`) which is treated +as already fulfilled and passed through. + +--- + +### 1) Tags mode (default) + +Minimal explicit tags release: + +```json +{ + "sublime_text": "*", + "tags": true +} +``` + +If a release has none of `url`, `asset`, `branch`, `tags`, we autofill +`"tags": true`. + +`tags` values: +- `true`: all tags are considered, a possible prefix "v" is stripped automatically +- string prefix: only tags starting with that prefix are considered + +Examples: + +```json +{ "sublime_text": "<4000", "tags": "st3-" } +``` + +```json +{ "sublime_text": ">=4000", "tags": true } +``` + +#### Tag parsing behavior + +Without a `version` constraint, tags use strict semver parsing +(`major.minor.patch`, optional `-prerelease`, optional `+build`). + +With a `version` constraint, parsing uses PEP 440 (`packaging.Version`), which +allows versions like `1.0rc1`. See below for examples. + +Example: + +```json +[ + { + "sublime_text": "<4000", + "version": "2.5.*" + } +] +``` + +#### Rolling window behavior + +Tags mode keeps all matching tags from the recent rolling window +(about 53 weeks). +If that window does not contain a final release, we still keep a leading +prerelease when present. + +If no valid tag can be resolved, tags mode reports an error and can fall back +to branch mode (see below). + +--- + +### 2) Branch mode + +Branch-based release definitions: + +```json +{ + "sublime_text": "*", + "branch": true +} +``` + +```json +{ + "sublime_text": "*", + "branch": "dev" +} +``` + +`branch` values: +- `true`: use repository default branch (`default_branch`, fallback `master`) +- string: use that branch name + +Resolved branch releases get their version synthesized from branch date, +e.g. `2024-05-10T12:00:00Z` -> `2024.05.10.12.00.00`. + +#### Tags -> branch fallback + +If a release definition is tag-based and no valid tag is found, +the crawler tries branch resolution for the same definition. + +- If `branch` is set, that branch is used for fallback. +- If `branch` is not set, fallback goes to default branch (`branch: true`). + +This fallback is per release definition. + +--- + +### 3) Asset mode + +Asset mode resolves downloadable artifacts from hosted release assets +(currently GitHub release assets in practice). + +Minimal asset release: + +```json +{ + "asset": "A File Icon.sublime-package" +} +``` + +Commonly with wildcards: + +```json +{ + "asset": "*.sublime-package" +} +``` + +Asset patterns are glob-like strings: +- `*` matches any sequence +- `?` matches a single character + +Supported placeholders in package asset patterns: +- `${version}`: resolved tag version +- `${st_build}`: normalized `sublime_text` build marker +- `${platform}`: platform token (`*` becomes `any`) + +Example with placeholders: + +```json +{ + "asset": "Less-${version}-st${st_build}.sublime-package", + "sublime_text": ["4107 - 4148", ">=4149"] +} +``` + +Asset mode resolves a target matrix of `platforms x sublime_text` and picks the +first matching asset per target (newest releases first). +Different targets may resolve to different versions if needed. + +If targets remain unresolved, the crawler logs which `(platform, st_build)` +combinations are missing. + +> Note: asset mode does **not** fall back to branch mode when assets are missing. + +--- + +### Static/manual releases + +A release containing `url`, `version`, and `date` is considered fulfilled: + +```json +{ + "sublime_text": "*", + "platforms": ["*"], + "version": "1.2.3", + "url": "https://example.com/my-package.zip", + "date": "2024-05-10T12:00:00Z" +} +``` + +Accepted `date` input formats are normalized to UTC form: +- `YYYY-MM-DDTHH:MM:SSZ` +- `YYYY-MM-DD HH:MM:SS` +- `YYYY-MM-DD HH:MM` +- `YYYY-MM-DD` + +## Constraints and defaults + +Common release fields: +- `sublime_text` (string, optional): default `"*"`. +- `platforms` (string or list, optional): default `["*"]`. +- `base` (string, optional): release source URL; defaults to package `details`. +- `tags` (bool or string, optional): tag mode and optional prefix. +- `branch` (bool or string, optional): branch mode. +- `asset` (string, optional): asset mode pattern. +- `version` (string, optional): version filter for dynamic modes. +- `url`, `date` (string, static mode): explicit resolved release. + +`version` normalization: +- `"*"` or empty -> no constraint +- bare versions/prefixes are normalized with `==` + - `"2.5.*" -> "==2.5.*"` + - `"1.2.3" -> "==1.2.3"` +- full specifiers are kept, e.g. `">=2,<3"` + +Supported `sublime_text` selectors include: +- `"*"` +- exact build (`"4147"`) +- comparisons (`">4147"`, `">=4147"`, `"<4147"`, `"<=4147"`) +- inclusive ranges (`"3154 - 4069"`) + +`sublime_text` as a list is only valid for `asset` releases. + +Common `platforms` values seen in package definitions: +- `*`, `windows`, `osx`, `linux` +- and architecture-specific forms like + `windows-x64`, `windows-x32`, `osx-x64`, `osx-arm64`, `linux-x64`, `linux-arm64` + +## Automatic open-ended tags release (for constrained tags) + +When all release definitions are version-constrained tag releases with bounded +`sublime_text` ranges, the crawler may append a synthetic open-ended tags +release. + +Example input: + +```json +[ + { + "sublime_text": "<4000", + "version": "<3.0.0" + } +] +``` + +becomes effectively: + +```json +[ + { + "sublime_text": "<4000", + "version": "<3.0.0", + "tags": true + }, + { + "sublime_text": ">3999", + "tags": true + } +] +``` + +This avoids leaving newer Sublime builds without a release definition +and effectively keeps the releases section short. From a4f966143ccb75f8dd042be645379faa4405fb13 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Fri, 27 Feb 2026 16:21:06 +0100 Subject: [PATCH 12/13] Add `crawl --explain` sub-command --- README.md | 2 + pack-spec.md | 3 + scripts/_explain_package.py | 135 +++++++++++++++++++++++++++++++++++ scripts/crawl.py | 80 +++++++++++++++++---- tests/crawl/test_cli_args.py | 12 +++- 5 files changed, 218 insertions(+), 14 deletions(-) create mode 100644 scripts/_explain_package.py diff --git a/README.md b/README.md index 36223e0de..4213fd3df 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,7 @@ stores it in a workspace file (`workspace.json`). Supports crawling all packages, or a single package via the `--name` option. Use `--presto` (or set `PRESTO_PRESTO_CRAWL=1`) to bypass `next_crawl` scheduling and fast-forward the workspace by crawling up to `--limit` packages. +Use `--explain` to show the normalized package entry for a package - Integrates with GitHub, GitLab, and Bitbucket APIs to fetch detailed info and releases. - Requires a valid `GITHUB_TOKEN` in your environment for GitHub API access because GitHub's GraphQl @@ -88,6 +89,7 @@ and fast-forward the workspace by crawling up to `--limit` packages. ```bash $ GITHUB_TOKEN=ghp_yourgithubtokenhere uv run -m scripts.crawl $ uv run -m scripts.crawl --name GitSavvy +$ uv run -m scripts.crawl --explain GitSavvy ``` --- diff --git a/pack-spec.md b/pack-spec.md index 1c3ab26c4..fe61210c1 100644 --- a/pack-spec.md +++ b/pack-spec.md @@ -63,6 +63,9 @@ A minimal definition the crawler understands is: However, adding a name really helps in readability. +Tip: Use `$ uv run -m scripts.crawl --explain GitSavvy` to show the + normalized/expanded package definition. + ## Release definitions For packages, there are three dynamic release modes: diff --git a/scripts/_explain_package.py b/scripts/_explain_package.py new file mode 100644 index 000000000..51b7cfdd0 --- /dev/null +++ b/scripts/_explain_package.py @@ -0,0 +1,135 @@ +from __future__ import annotations + +from difflib import SequenceMatcher +import json +from typing import Any + +from rich import box +from rich.console import Console +from rich.table import Table +from rich.text import Text + + +ADDED_STYLE = "yellow on black" +REMOVED_STYLE = "red on black" + + +def print_package_explain( + name: str, + original: dict[str, Any], + normalized: dict[str, Any], + console: Console | None = None, +) -> None: + console = console or Console() + console.print() + console.rule("Left: registry entry | Right: normalized entry") + + _render_json_diff_table( + title="", + left_obj=original, + right_obj=normalized, + console=console, + ) + + +def _render_json_diff_table( + title: str, + left_obj: dict[str, Any] | list[Any], + right_obj: dict[str, Any] | list[Any], + console: Console, +) -> None: + table = Table( + title=title, + box=box.SIMPLE_HEAD, + expand=True, + show_header=False, + show_lines=False, + ) + + for left_line, right_line in _side_by_side_json_diff_rows(left_obj, right_obj): + table.add_row(left_line, right_line) + + console.print(table) + + +def _side_by_side_json_diff_rows( + left_obj: dict[str, Any] | list[Any], + right_obj: dict[str, Any] | list[Any], +) -> list[tuple[Text, Text]]: + left_line: str | None + right_line: str | None + left_lines = _to_json_lines(left_obj) + right_lines = _to_json_lines(right_obj) + + rows: list[tuple[Text, Text]] = [] + matcher = SequenceMatcher(a=left_lines, b=right_lines) + + left_no = 1 + right_no = 1 + for tag, i1, i2, j1, j2 in matcher.get_opcodes(): + if tag == "equal": + for left_line, right_line in zip(left_lines[i1:i2], right_lines[j1:j2], strict=True): + rows.append(( + _make_line(left_no, " ", left_line), + _make_line(right_no, " ", right_line), + )) + left_no += 1 + right_no += 1 + continue + + if tag == "replace": + left_chunk = left_lines[i1:i2] + right_chunk = right_lines[j1:j2] + for idx in range(max(len(left_chunk), len(right_chunk))): + left_line = left_chunk[idx] if idx < len(left_chunk) else None + right_line = right_chunk[idx] if idx < len(right_chunk) else None + left_text = ( + _make_line(left_no, "- ", left_line, REMOVED_STYLE) + if left_line is not None + else Text("") + ) + right_text = ( + _make_line(right_no, "+ ", right_line, ADDED_STYLE) + if right_line is not None + else Text("") + ) + rows.append((left_text, right_text)) + if left_line is not None: + left_no += 1 + if right_line is not None: + right_no += 1 + continue + + if tag == "delete": + for left_line in left_lines[i1:i2]: + rows.append(( + _make_line(left_no, "- ", left_line, REMOVED_STYLE), + Text(""), + )) + left_no += 1 + continue + + if tag == "insert": + for right_line in right_lines[j1:j2]: + rows.append(( + Text(""), + _make_line(right_no, "+ ", right_line, ADDED_STYLE), + )) + right_no += 1 + + if not rows: + rows.append((Text("(empty)"), Text("(empty)"))) + return rows + + +def _make_line(number: int, marker: str, content: str, style: str = "") -> Text: + line = Text() + line.append(f"{number:>4} ", style="dim") + line.append(marker, style=style) + line.append(content, style=style) + return line + + +def _to_json_lines(obj: dict[str, Any] | list[Any]) -> list[str]: + dumped = json.dumps(obj, indent=2, ensure_ascii=False, sort_keys=True) + return dumped.splitlines() diff --git a/scripts/crawl.py b/scripts/crawl.py index f9da2da04..549c72beb 100644 --- a/scripts/crawl.py +++ b/scripts/crawl.py @@ -32,6 +32,7 @@ next_run, parse_version, resolve_url, update_url, write_json, pl, pick, VersionInfo ) +from ._explain_package import print_package_explain import traceback @@ -110,6 +111,28 @@ def err(*args, **kwargs) -> None: print(*args, **kwargs, file=sys.stderr) +def explain_main(registry: str, name: str) -> int: + if not os.path.exists(registry): + err(f"FATAL: Registry file '{registry}' does not exist.") + return 1 + + try: + with open(registry, "r") as reg_file: + registry_data = json.load(reg_file) + except Exception as e: + err(f"FATAL: Could not read registry file '{registry}': {e}") + return 1 + + package = find_registry_package(registry_data, name) + if not package: + err(f"Package '{name}' not found in registry.") + return 1 + + normalized = normalize_registry_entry(deepcopy(package)) + print_package_explain(name, package, normalized) # type: ignore[arg-type] + return 0 + + async def main( registry: str, workspace: str, @@ -148,13 +171,11 @@ async def main_( ) -> None: name_requested = bool(name) if name: - for entry in registry["packages"]: - if entry.get("name") == name: - tocrawl = [entry] - break - else: + package = find_registry_package(registry, name) + if not package: err(f"Package '{name}' not found in registry.") return + tocrawl = [package] else: maintenance(registry, workspace) tocrawl = next_packages_to_crawl(registry, workspace, limit=limit, presto=presto) @@ -371,16 +392,10 @@ async def crawl_package( maybe_skip_crawling(entry, existing, now) ensure_secure_source(entry, existing) - out: WorkspaceEntry = {**entry} # type: ignore[typeddict-item] - if "readme" in out: - out["readme"] = update_url( # type: ignore[typeddict-unknown-key] - resolve_url(out["source"], out["readme"]) # type: ignore[typeddict-item] - ) + out = normalize_registry_entry(entry) details = out.get("details") release_definitions: list[ReleaseDescription] = \ out.get("releases", []) # type: ignore[assignment] - migrate_release_definitions_from_v2(release_definitions) - normalize_release_definition(release_definitions, out["source"], details) releases: list[Release] = [] @@ -490,6 +505,28 @@ def extend(new_releases: list[Release]): return out +def find_registry_package(registry: Registry, name: str) -> RegistryEntry | None: + for entry in registry.get("packages", []): + if entry.get("name") == name: + return entry + return None + + +def normalize_registry_entry(entry: RegistryEntry) -> WorkspaceEntry: + out: WorkspaceEntry = {**entry} # type: ignore[typeddict-item] + if "readme" in out: + out["readme"] = update_url( # type: ignore[typeddict-unknown-key] + resolve_url(out["source"], out["readme"]) # type: ignore[typeddict-item] + ) + + details = out.get("details") + release_definitions: list[ReleaseDescription] = \ + out.setdefault("releases", []) # type: ignore[assignment] + migrate_release_definitions_from_v2(release_definitions) + normalize_release_definition(release_definitions, out["source"], details) + return out + + async def resolve_tags( info: HubRepoInfo, definition: ReleaseDescription, @@ -979,6 +1016,15 @@ def parse_args(argv: list[str] | None = None): help=( "Optional name of a package to crawl. " "If not provided, all packages will be crawled.")) + parser.add_argument( + "--explain", + type=str, + default=None, + help=( + "Show the normalized package entry for the named package and " + "exit without writing the workspace." + ), + ) parser.add_argument( "--limit", "-n", type=int, @@ -1002,7 +1048,11 @@ def parse_args(argv: list[str] | None = None): normalized_argv = normalize_limit_argv(sys.argv[1:] if argv is None else argv) if count_limit_occurrences(normalized_argv) > 1: parser.error("--limit/-n can only be specified once") - return parser.parse_args(normalized_argv) + + args = parser.parse_args(normalized_argv) + if args.name and args.explain: + parser.error("Use either --name or --explain, not both") + return args def normalize_limit_argv(argv: list[str]) -> list[str]: @@ -1038,4 +1088,8 @@ def env_flag(name: str, default: bool = False) -> bool: os.makedirs(wd, exist_ok=True) args.registry = os.path.normpath(os.path.join(wd, args.registry)) args.workspace = os.path.normpath(os.path.join(wd, args.workspace)) + + if args.explain: + raise SystemExit(explain_main(args.registry, args.explain)) + asyncio.run(main(args.registry, args.workspace, args.name, args.limit, args.presto)) diff --git a/tests/crawl/test_cli_args.py b/tests/crawl/test_cli_args.py index be3b92651..d284fc597 100644 --- a/tests/crawl/test_cli_args.py +++ b/tests/crawl/test_cli_args.py @@ -30,7 +30,17 @@ def test_parse_args_rejects_duplicate_limit_flags() -> None: parse_args(["-n", "100", "-n", "75"]) - def test_parse_args_rejects_shorthand_plus_limit_flag() -> None: with pytest.raises(SystemExit): parse_args(["-100", "--limit", "24"]) + + +def test_parse_args_accepts_explain_mode() -> None: + args = parse_args(["--explain", "Example"]) + + assert args.explain == "Example" + + +def test_parse_args_rejects_name_and_explain_together() -> None: + with pytest.raises(SystemExit): + parse_args(["--name", "Foo", "--explain", "Foo"]) From ec408247cb018ab76679301e3d6ac36a6afdb504 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Fri, 27 Feb 2026 22:49:52 +0100 Subject: [PATCH 13/13] Enhance `crawl_library --explain` --- scripts/_explain_package.py | 60 +++++++++++++++++++++++++++++- scripts/_resolve_lib.py | 14 ++++--- scripts/crawl_libraries.py | 7 +++- tests/library_crawler/test_main.py | 56 ++++++++++++++++++++++++++++ 4 files changed, 128 insertions(+), 9 deletions(-) diff --git a/scripts/_explain_package.py b/scripts/_explain_package.py index 51b7cfdd0..69fdc26f5 100644 --- a/scripts/_explain_package.py +++ b/scripts/_explain_package.py @@ -32,6 +32,59 @@ def print_package_explain( ) +def print_library_explain( + name: str, + rows: list[tuple[dict[str, Any], list[dict[str, Any]]]], + metadata: dict[str, Any] | None = None, + console: Console | None = None, +) -> None: + console = console or Console() + + if metadata is not None: + console.print(_to_pretty_json(metadata)) + console.print() + + console.rule(f"{name}: input release definitions and normalized variations") + + table = Table( + box=box.SIMPLE_HEAD, + expand=True, + show_header=True, + show_lines=False, + ) + table.add_column("#", style="yellow", no_wrap=True) + table.add_column("Input definition", ratio=1, overflow="fold") + table.add_column("Normalized variation", ratio=1, overflow="fold") + + if not rows: + table.add_row("-", "(empty)", "(empty)") + else: + for release_no, (left, right_variations) in enumerate(rows, start=1): + if release_no > 1: + table.add_row("", "", "") + + if not right_variations: + table.add_row(str(release_no), _to_pretty_json(left), "(empty)") + continue + + if len(right_variations) == 1: + table.add_row( + str(release_no), + _to_pretty_json(left), + _to_pretty_json(right_variations[0]), + ) + continue + + for variation_no, right in enumerate(right_variations, start=1): + table.add_row( + f"{release_no}-{variation_no}", + _to_pretty_json(left) if variation_no == 1 else "", + _to_pretty_json(right), + ) + + console.print(table) + + def _render_json_diff_table( title: str, left_obj: dict[str, Any] | list[Any], @@ -130,6 +183,9 @@ def _make_line(number: int, marker: str, content: str, style: str = "") -> Text: return line +def _to_pretty_json(obj: dict[str, Any] | list[Any]) -> str: + return json.dumps(obj, indent=2, ensure_ascii=False, sort_keys=True) + + def _to_json_lines(obj: dict[str, Any] | list[Any]) -> list[str]: - dumped = json.dumps(obj, indent=2, ensure_ascii=False, sort_keys=True) - return dumped.splitlines() + return _to_pretty_json(obj).splitlines() diff --git a/scripts/_resolve_lib.py b/scripts/_resolve_lib.py index 22298605a..3b8749b33 100644 --- a/scripts/_resolve_lib.py +++ b/scripts/_resolve_lib.py @@ -384,14 +384,17 @@ def normalize_st_build(st_specifier: str) -> str: return st_specifier[2:] -def explain_library(library: RegistryEntry) -> list[dict]: - releases = list(map(normalize_release_def, library.get("releases", []))) - output: list[dict] = [] - for release in releases: +def explain_library(library: RegistryEntry) -> list[tuple[dict, list[dict]]]: + raw_libraries = library.get("releases", []) + normalized = list(map(normalize_release_def, library.get("releases", []))) + output: list[tuple[dict, list[dict]]] = [] + for left, release in zip(raw_libraries, normalized): if "url" in release: + output.append((left, [release])) # type: ignore[arg-type, list-item] continue base = release.get("base") auto_assets = "pypi.org/project/" in base + right = [] for concrete in spell_out_constraint_variations(release, auto_assets=auto_assets): entry: dict[str, object] = { "base": base, @@ -402,7 +405,8 @@ def explain_library(library: RegistryEntry) -> list[dict]: "version": release["version"] or "*", "tag_prefix": release["tag_prefix"] or "v?" } - output.append(entry) + right.append(entry) + output.append((left, right)) # type: ignore[arg-type] return output diff --git a/scripts/crawl_libraries.py b/scripts/crawl_libraries.py index f6d690875..ad414121c 100644 --- a/scripts/crawl_libraries.py +++ b/scripts/crawl_libraries.py @@ -23,6 +23,7 @@ resolve_library, ) from ._utils import err, write_json +from ._explain_package import print_library_explain DEFAULT_REGISTRY = "./registry.json" @@ -347,8 +348,10 @@ async def handle_explain(name: str, args: Args) -> int: raise ValueError( f'Library "{name}" not found in {args.registry.name}.' ) - concrete_defs = explain_library(library) - print(json.dumps(concrete_defs, indent=2, ensure_ascii=False)) + + explain_rows = explain_library(library) + metadata = {key: value for key, value in library.items() if key != "releases"} + print_library_explain(name, explain_rows, metadata=metadata) return 0 diff --git a/tests/library_crawler/test_main.py b/tests/library_crawler/test_main.py index 1176b5fd3..de4664763 100644 --- a/tests/library_crawler/test_main.py +++ b/tests/library_crawler/test_main.py @@ -511,6 +511,62 @@ async def test_name_and_explain_reject_removed_library(monkeypatch, tmp_path): await crawl_libraries.run(args) +@pytest.mark.asyncio +async def test_handle_explain_renders_release_variation_rows(monkeypatch, tmp_path): + repo_path = tmp_path / "registry.json" + release_defs = [{"base": "https://pypi.org/project/example", "version": "*"}] + write_json(repo_path, {"libraries": [{"name": "alpha", "releases": release_defs}]}) + output_path = tmp_path / "libraries.json" + args = make_args(tmp_path, repo_path, output_path, explain="alpha") + + explain_rows = [ + ( + release_defs[0], + [ + { + "base": "https://pypi.org/project/example", + "asset": ["example-win-py38-${version}.zip"], + "platform": "windows", + "python_version": "3.8", + "sublime_text": "*", + "version": "*", + "tag_prefix": "v?", + }, + { + "base": "https://pypi.org/project/example", + "asset": ["example-win-py33-${version}.zip"], + "platform": "windows", + "python_version": "3.3", + "sublime_text": "*", + "version": "*", + "tag_prefix": "v?", + }, + ], + ), + ] + captured = {} + + monkeypatch.setattr(crawl_libraries, "explain_library", lambda _: explain_rows) + + def fake_print_library_explain(name, rows, metadata=None): + captured["name"] = name + captured["rows"] = rows + captured["metadata"] = metadata + + monkeypatch.setattr( + crawl_libraries, + "print_library_explain", + fake_print_library_explain, + ) + + result = await crawl_libraries.run(args) + + assert result == 0 + assert captured["name"] == "alpha" + assert captured["rows"] == explain_rows + assert captured["metadata"] == {"name": "alpha"} + + @pytest.mark.parametrize( ("names", "expected"), [