packagecontrol · kaste · Mar 26, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 24, 2026
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -10,6 +10,9 @@ on:
     types:
       - completed
   workflow_dispatch:
+  push:
+    tags:
+      - '*'
   schedule:
     - cron: '22 8 * * *'
 
@@ -21,7 +24,7 @@ permissions:
 jobs:
   build-st4-channel:
     runs-on: ubuntu-latest
-    if: github.event_name != 'schedule'
+    if: github.event_name == 'workflow_run' || github.event_name == 'workflow_dispatch'
     outputs:
       update_needed: ${{ steps.check.outputs.update_needed }}
     env:
@@ -133,7 +136,7 @@ jobs:
 
   build-st3-channel:
     runs-on: ubuntu-latest
-    if: github.event_name != 'schedule'
+    if: github.event_name == 'workflow_run' || github.event_name == 'workflow_dispatch'
     env:
       RELEASE_TAG: the-st3-channel
       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -276,18 +279,11 @@ jobs:
           restore-keys: |
             wrk-cache-
 
-      - name: Fetch recent workflow runs
+      - name: Fetch workflow metadata for logs enrichment
         run: |
-          SINCE=$(date -u -d '24 hours ago' +%Y-%m-%dT%H:%M:%SZ)
-          gh api \
-            repos/${{ github.repository }}/actions/workflows/${{ env.WORKFLOW_ID }}/runs \
-            --method GET \
-            -f per_page=100 \
-            -f status=completed \
-            -f created=">=$SINCE" \
-            --paginate \
-            --jq '[.workflow_runs[] | {id, conclusion, run_started_at}]' \
-            > "$RUNNER_TEMP/workflow_runs.json"
+          uv run -m scripts.fetch_logs_metadata \
+            --runs-output "$RUNNER_TEMP/workflow_runs.json" \
+            --artifacts-output "$RUNNER_TEMP/workflow_artifacts.json"
 
       - name: Enrich logs.json
         run: |
@@ -299,8 +295,8 @@ jobs:
 
           uv run -m scripts.enrich_logs \
             --runs "$RUNNER_TEMP/workflow_runs.json" \
-            -i ./wrk/logs.json \
-            -o ./wrk/logs.json
+            --artifacts "$RUNNER_TEMP/workflow_artifacts.json" \
+            ./wrk/logs.json
 
           retry_5() {
             local n=1
@@ -355,7 +351,7 @@ jobs:
   gh-pages-nightly:
     runs-on: ubuntu-latest
     needs: enrich_logs
-    if: github.event_name == 'schedule'
+    if: github.event_name == 'schedule' || (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/'))
     environment:
       name: github-pages
       url: ${{ steps.deployment.outputs.page_url }}

diff --git a/README.md b/README.md
@@ -174,6 +174,49 @@ The command above reuses the same layout as [CI](https://github.com/packagecontr
 
 ---
 
+### Logs handling
+
+#### `collect_logs.py`
+
+`scripts/collect_logs.py` appends the current run's `notes.txt` to `logs.json` (rolling history),
+keyed by run id and trimmed to a retention window (`--history-days`, default 32).
+
+If you pass a `--workspace` it include a `found_updates` list for packages detected in that run.
+
+```bash
+uv run -m scripts.collect_logs --output ./logs.json --workspace ./workspace.json ./notes.txt
+```
+
+This is a very mechanical step done in the crawl.yml; after the job is done we enrich the logs
+in publish.yml:
+
+#### `refresh_logs.py` (plus lower-level helpers)
+
+Use this when you want to reproduce/update `logs.json` locally with GitHub Actions metadata.
+`gh` is required for the ad-hoc queries I make herein.
+
+```bash
+# one-shot: download logs (if missing), fetch metadata, enrich logs
+uv run -m scripts.refresh_logs --pretty
+```
+
+Defaults are tuned for local use:
+- repo: inferred from `GITHUB_REPOSITORY` or local `git origin`
+- workflow id: inferred from `crawl.yml` if not set
+- since window: `--since-hours 24`
+- metadata files: `./workflow_runs.json`, `./workflow_artifacts.json`
+- artifact scan cap: `--artifacts-max-pages 10`
+- logs path: `./logs.json`
+
+If you want explicit control, run the two low-level commands, that's what we do in publish.yml:
+
+```bash
+uv run -m scripts.fetch_logs_metadata --pretty
+uv run -m scripts.enrich_logs --pretty
+```
+
+---
+
 ### `snapshot_test.py`
 
 Creates a compact, single-file snapshot for regression testing (`registry + channel`) from a reduced package set.

diff --git a/scripts/enrich_logs.py b/scripts/enrich_logs.py
@@ -2,9 +2,10 @@
 
 import argparse
 import json
+from collections import defaultdict
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any
+from typing import Any, TypedDict
 
 from ._utils import write_json
 
@@ -14,35 +15,68 @@ class Args:
     input: str
     output: str
     runs: str
+    artifacts: str | None
     pretty: bool
 
 
+type RunId = str
+
+
+class RuntimeArtifact(TypedDict):
+    run_id: RunId
+    id: int
+    name: str
+    size: int
+    url: str
+
+
+class ArtifactMetadata(TypedDict):
+    id: int
+    name: str
+    size: int
+    url: str
+
+
 def main():
     args = parse_args()
     update_logs(args)
 
 
 def parse_args() -> Args:
     parser = argparse.ArgumentParser(
-        description="Enrich logs.json with workflow run metadata and fill missing runs."
+        description=(
+            "Enrich logs.json with workflow run metadata, artifacts metadata, "
+            "and fill missing runs."
+        )
     )
     parser.add_argument(
-        "-i",
-        "--input",
+        "input",
+        nargs="?",
         default="logs.json",
         help="Input logs file (default: logs.json).",
     )
     parser.add_argument(
         "-o",
         "--output",
-        default="logs.json",
-        help="Output logs file (default: logs.json).",
+        default=None,
+        help="Output logs file (defaults to INPUT).",
     )
     parser.add_argument(
         "-r",
         "--runs",
-        required=True,
-        help="Path to workflow_runs.json fetched from the GitHub API.",
+        default="./workflow_runs.json",
+        help=(
+            "Path to workflow_runs.json fetched from the GitHub API "
+            "(default: ./workflow_runs.json)."
+        ),
+    )
+    parser.add_argument(
+        "--artifacts",
+        default="./workflow_artifacts.json",
+        help=(
+            "Optional path to workflow_artifacts.json fetched from the GitHub API "
+            "(default: ./workflow_artifacts.json)."
+        ),
     )
     parser.add_argument(
         "--pretty",
@@ -52,59 +86,103 @@ def parse_args() -> Args:
     ns = parser.parse_args()
     return Args(
         input=ns.input,
-        output=ns.output,
+        output=ns.output or ns.input,
         runs=ns.runs,
+        artifacts=ns.artifacts,
         pretty=ns.pretty,
     )
 
 
 def update_logs(args: Args):
     entries = load_json(Path(args.input)) or []
     runs = load_json(Path(args.runs)) or []
+    artifacts: list[RuntimeArtifact] = []
+    if args.artifacts:
+        artifacts = load_json(Path(args.artifacts)) or []
+
+    if not isinstance(entries, list):
+        raise SystemExit(f"enrich_logs: input must be a JSON array: {args.input}")
+    if not isinstance(runs, list):
+        raise SystemExit(f"enrich_logs: runs must be a JSON array: {args.runs}")
+    if not isinstance(artifacts, list):
+        raise SystemExit(f"enrich_logs: artifacts must be a JSON array: {args.artifacts}")
 
     enriched = 0
     created = 0
+    artifacts_attached = 0
 
     runs_index = {
-        str(run.get("id")): {
+        run_id: {
             "conclusion": run.get("conclusion"),
             "run_started_at": run.get("run_started_at"),
         }
         for run in runs
-        if "id" in run
+        if (run_id := str(run.get("id", "")))
     }
+    artifacts_index = build_artifacts_index(artifacts)
 
     seen = set()
     for entry in entries:
-        run_id = str(entry.get("run_id", ""))
-        if not run_id:
-            continue
+        run_id = entry["run_id"]
         seen.add(run_id)
+
         info = runs_index.get(run_id)
-        if not info:
-            continue
-        if not entry.get("conclusion") and info.get("conclusion"):
+        if info and not entry.get("conclusion") and info.get("conclusion"):
             entry["conclusion"] = info["conclusion"]
             enriched += 1
 
+        run_artifacts = artifacts_index.get(run_id)
+        if run_artifacts and entry.get("artifacts") != run_artifacts:
+            entry["artifacts"] = run_artifacts
+            artifacts_attached += 1
+
     for run_id, info in runs_index.items():
         if run_id in seen:
             continue
+
         date = info.get("run_started_at")
         if not date:
             continue
+
         new_entry = {
             "date": date,
             "run_id": run_id,
         }
         if conclusion := info.get("conclusion"):
             new_entry["conclusion"] = conclusion
+        if run_artifacts := artifacts_index.get(run_id):
+            new_entry["artifacts"] = run_artifacts
+            artifacts_attached += 1
+
         entries.append(new_entry)
         created += 1
 
     entries.sort(key=lambda entry: entry.get("date", ""), reverse=True)
     write_json(args.output, entries, pretty=args.pretty, ensure_ascii=True)
-    print(f"Enriched entries: {enriched}, added missing runs: {created}")
+    print(
+        "Enriched entries: "
+        f"{enriched}, "
+        f"added missing runs: {created}, "
+        f"attached artifacts on entries: {artifacts_attached}"
+    )
+
+
+def build_artifacts_index(artifacts: list[RuntimeArtifact]) -> dict[RunId, list[ArtifactMetadata]]:
+    artifacts_by_run: defaultdict[RunId, list[ArtifactMetadata]] = defaultdict(list)
+
+    for artifact in artifacts:
+        run_id = artifact["run_id"]
+        artifacts_by_run[run_id].append({
+            "id": artifact["id"],
+            "name": artifact["name"],
+            "size": artifact["size"],
+            "url": artifact["url"],
+        })
+
+    for run_artifacts in artifacts_by_run.values():
+        run_artifacts.sort(key=lambda item: (item["name"].casefold(), str(item["id"])))
+
+    return dict(artifacts_by_run)
 
 
 def load_json(path: Path) -> Any: