packagecontrol · kaste · Mar 23, 2026 · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026
diff --git a/.github/workflows/crawl.yml b/.github/workflows/crawl.yml
@@ -26,10 +26,10 @@ jobs:
       PRESTO_PRESTO_CRAWL: ${{ vars.PRESTO_PRESTO_CRAWL }}
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
 
       - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: '3.13'
 
@@ -39,8 +39,29 @@ jobs:
       - name: Ensure wrk directory exists
         run: mkdir -p ./wrk
 
+      # --------------------------------------------------------------------
+      # Freeze one run-level timestamp for the entire crawl job.
+      #
+      # Why:
+      # - We want all artifacts and logs from a single workflow run to agree
+      #   on one exact point in time.
+      # - This avoids subtle drift where separate `date` calls differ by
+      #   seconds and make later analysis harder.
+      #
+      # Consumers of this frozen timestamp:
+      # - scripts/crawl.py      (run timestamp for crawl/update detection)
+      # - scripts/collect_logs.py (log entry timestamp fallback via NOW_TS)
+      #
+      # Notes:
+      # - We export via $GITHUB_ENV so NOW_TS is available to subsequent steps
+      #   in this job.
+      # - Use epoch seconds (`date +%s`) to stay timezone-agnostic.
+      # --------------------------------------------------------------------
+      - name: Freeze run timestamp
+        run: echo "NOW_TS=$(date +%s)" >> "$GITHUB_ENV"
+
       - name: Restore wrk cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
         with:
           path: |
             ./wrk
@@ -130,8 +151,7 @@ jobs:
           gh release upload ${{ env.RELEASE_TAG }} ./wrk/registry.json --clobber
           gh release upload ${{ env.RELEASE_TAG }} ./wrk/workspace.json --clobber
 
-          DATE_TS=$(date +%s)
-          DATE=$(TZ=Europe/Berlin date -d "@$DATE_TS" +"%B %d, %Y, %H:%M GMT%:::z" | sed -E 's/([+-])0/\1/')
+          DATE=$(TZ=Europe/Berlin date -d "@$NOW_TS" +"%B %d, %Y, %H:%M GMT%:::z" | sed -E 's/([+-])0/\1/')
           REPO_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
           # Build new notes
           {
@@ -165,13 +185,13 @@ jobs:
 
           uv run -m scripts.collect_logs \
             --run-id "${{ github.run_id }}" \
-            --timestamp "$DATE_TS" \
+            --workspace ./wrk/workspace.json \
             -o ./wrk/logs.json \
             notes.txt
 
       - name: Upload wrk backup
         id: crawl-backup-step
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
         with:
           name: crawl-backup
           path: wrk/
@@ -185,10 +205,10 @@ jobs:
       GITHUB_TOKEN: ${{ github.token }}
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
 
       - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: '3.13'
 
@@ -199,7 +219,7 @@ jobs:
         run: mkdir -p ./wrk
 
       - name: Restore wrk cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
         with:
           path: ./wrk
           key: stats-cache-${{ github.run_id }}
@@ -217,7 +237,7 @@ jobs:
 
       - name: Upload wrk backup
         id: stats-backup-step
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
         with:
           name: stats-backup
           path: wrk/

diff --git a/scripts/_utils.py b/scripts/_utils.py
@@ -57,6 +57,14 @@ def pl(count: int, word: str) -> str:
     return f"{count} {_INFLECT.plural(singular, count)}"
 
 
+def format_name_list(names: list[str]) -> str:
+    if len(names) == 1:
+        return names[0]
+    if len(names) == 2:
+        return f"{names[0]} and {names[1]}"
+    return f"{', '.join(names[:-1])}, and {names[-1]}"
+
+
 def pipe(v, *fns):
     for fn in fns:
         v = fn(v)

diff --git a/scripts/collect_logs.py b/scripts/collect_logs.py
@@ -7,14 +7,28 @@
 from dataclasses import dataclass
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
-from typing import Any
+from typing import Any, NotRequired, TypedDict
 
 from ._utils import write_json
 
 DEFAULT_OUTPUT = "logs.json"
 HISTORY_DAYS = 32
 
 
+class LogEntry(TypedDict):
+    # Canonical log entry shape written by collect_logs.
+    date: str
+    run_id: str
+    notes: str
+    found_updates: NotRequired[list[FoundUpdateEntry]]
+
+
+class FoundUpdateEntry(TypedDict):
+    name: str
+    detected_at: str
+    published_at: str
+
+
 def main():
     args = parse_args()
     try:
@@ -30,6 +44,7 @@ class Args:
     notes: str
     run_id: str | None
     timestamp: float | None
+    workspace: str | None
     history_days: int
     pretty: bool
 
@@ -55,6 +70,14 @@ def parse_args() -> Args:
         default=None,
         help="Unix timestamp (seconds) when the notes were produced.",
     )
+    parser.add_argument(
+        "--workspace",
+        default=None,
+        help=(
+            "Optional workspace JSON path. When provided, "
+            "collect found_updates from matching package entries."
+        ),
+    )
     parser.add_argument(
         "--history-days",
         type=int,
@@ -76,6 +99,7 @@ def parse_args() -> Args:
         notes=ns.notes,
         run_id=ns.run_id,
         timestamp=ns.timestamp,
+        workspace=ns.workspace,
         history_days=ns.history_days,
         pretty=ns.pretty,
     )
@@ -85,34 +109,43 @@ def update_logs(args: Args):
     run_id = args.run_id or os.environ.get("GITHUB_RUN_ID")
     if not run_id:
         raise SystemExit("collect_logs: missing --run-id or GITHUB_RUN_ID")
-    if args.timestamp is None:
-        raise SystemExit("collect_logs: missing --timestamp")
 
     notes_path = Path(args.notes)
     if not notes_path.is_file():
         raise SystemExit(f"collect_logs: notes file not found: {notes_path}")
 
     notes_text = notes_path.read_text(encoding="utf-8")
-    forced_timestamp = datetime.fromtimestamp(args.timestamp, tz=timezone.utc)
+    timestamp = args.timestamp
+    if timestamp is None:
+        env_now_ts = os.environ.get("NOW_TS")
+        if env_now_ts is None:
+            raise SystemExit("collect_logs: missing --timestamp")
+        timestamp = float(env_now_ts.strip())
+
+    runtime_ts = datetime.fromtimestamp(timestamp, tz=timezone.utc)
+    run_timestamp_iso = runtime_ts.strftime("%Y-%m-%dT%H:%M:%SZ")
 
     output_path = Path(args.output).expanduser().resolve()
     output_dir = output_path.parent
     if output_dir and not output_dir.exists():
         output_dir.mkdir(parents=True, exist_ok=True)
 
-    entries = load_logs(output_path)
+    entries: list[LogEntry] = load_json(output_path)
     run_id_str = str(run_id)
     entries = [entry for entry in entries if entry.get("run_id") != run_id_str]
 
-    entries.append({
-        "date": forced_timestamp.isoformat(),
+    entry: LogEntry = {
+        "date": runtime_ts.isoformat(),
         "run_id": run_id_str,
         "notes": notes_text,
-    })
+    }
+    if args.workspace:
+        entry["found_updates"] = derive_found_updates(args.workspace, run_timestamp_iso)
+    entries.append(entry)
 
     entries.sort(key=lambda entry: entry["date"], reverse=True)
 
-    cutoff = retention_cutoff(args.history_days, reference=now_utc())
+    cutoff = now_ts() - timedelta(days=args.history_days)
     kept_entries = [
         entry for entry in entries
         if datetime.fromisoformat(entry["date"]) >= cutoff
@@ -121,18 +154,45 @@ def update_logs(args: Args):
     write_json(output_path, kept_entries, pretty=args.pretty, ensure_ascii=True)
 
 
-def now_utc() -> datetime:
-    return datetime.now(timezone.utc)
+def derive_found_updates(workspace_path: str, run_timestamp_iso: str) -> list[FoundUpdateEntry]:
+    packages = load_workspace_packages(workspace_path)
+    found_updates: list[FoundUpdateEntry] = []
+    for entry in packages.values():
+        detected_at = entry.get("update_detected")
+        if detected_at == run_timestamp_iso:
+            found_updates.append({
+                "name": entry["name"],
+                "detected_at": detected_at,
+                "published_at": entry["last_modified"],
+            })
+
+    found_updates.sort(key=lambda item: item["name"].casefold())
+    return found_updates
+
+
+def load_workspace_packages(path: str) -> dict[str, dict]:
+    workspace_path = Path(path)
+    if not workspace_path.is_file():
+        raise SystemExit(f"collect_logs: workspace file not found: {workspace_path}")
 
+    workspace = load_json(workspace_path)
+    if not isinstance(workspace, dict):
+        raise SystemExit(f"collect_logs: workspace must be a JSON object: {workspace_path}")
 
-def retention_cutoff(keep_days: int, *, reference: datetime | None = None) -> datetime:
-    """Compute the earliest UTC timestamp we must retain."""
-    if reference is None:
-        reference = now_utc()
-    return reference - timedelta(days=keep_days)
+    packages: dict[str, dict] = workspace.get("packages", {})
+    if not isinstance(packages, dict):
+        raise SystemExit(f"collect_logs: workspace packages must be an object: {workspace_path}")
+
+    return packages
+
+
+def now_ts() -> datetime:
+    if value := os.getenv("NOW_TS"):
+        return datetime.fromtimestamp(float(value.strip()), tz=timezone.utc)
+    return datetime.now(timezone.utc)
 
 
-def load_logs(path: Path) -> list[dict[str, Any]]:
+def load_json(path: Path) -> Any:
     try:
         return json.loads(path.read_text(encoding="utf-8"))
     except FileNotFoundError:

diff --git a/scripts/crawl.py b/scripts/crawl.py
@@ -29,8 +29,8 @@
     normalize_version_spec,
 )
 from ._utils import (
-    next_run, parse_version, resolve_url, update_url, write_json, pl, pick,
-    VersionInfo
+    format_name_list, next_run, parse_version, resolve_url, update_url, write_json, pl, pick,
+    VersionInfo,
 )
 from ._explain_package import print_package_explain
 import traceback
@@ -81,6 +81,7 @@ class WorkspaceEntry(TypedDict, total=False):
     last_seen: IsoTimestamp
     next_crawl: IsoTimestamp
     last_modified: IsoTimestamp
+    update_detected: IsoTimestamp
     failing_since: IsoTimestamp
     fail_reason: str
 
@@ -252,6 +253,7 @@ async def main_(
         maintenance(registry, workspace)
         tocrawl = next_packages_to_crawl(registry, workspace, limit=limit, presto=presto)
 
+    updated_packages: list[str] = []
     async with aiohttp.ClientSession() as session:
         tasks = [
             crawl(
@@ -265,6 +267,8 @@ async def main_(
         results = await asyncio.gather(*tasks)
         for new_entry in results:
             workspace["packages"][new_entry["name"]] = new_entry
+            if "update_detected" in new_entry:
+                updated_packages.append(new_entry["name"])
             if name_requested:
                 print(json.dumps(new_entry, indent=2, ensure_ascii=False))
 
@@ -275,6 +279,11 @@ async def main_(
         f"in db."
     )
 
+    updated_packages = sorted(updated_packages)
+    if updated_packages:
+        s = "" if len(updated_packages) == 1 else "s"
+        print(f"Found update{s} for {format_name_list(updated_packages)}.")
+
     if len(tocrawl) > 0:
         print("GitHub", rate_limit_info)
 
@@ -286,7 +295,7 @@ def next_packages_to_crawl(
     Returns a list of packages to crawl, sorted by next_crawl timestamp.
     If next_crawl is not set, it defaults to the current time.
     """
-    now = datetime.now(timezone.utc)
+    now = now_ts()
     now_string = now.strftime(UTC_FORMAT)
     packages = registry["packages"]
     packages_to_crawl = [
@@ -355,7 +364,7 @@ def next_packages_to_crawl(
 def maintenance(registry: Registry, workspace: Workspace) -> None:
     # lookup all packages in workspace and mark them as `removed`
     # if they have been removed from the registry
-    now = datetime.now(timezone.utc)
+    now = now_ts()
     now_string = now.strftime(UTC_FORMAT)
     current_package_names = {entry["name"] for entry in registry["packages"]}
     packages = workspace["packages"]
@@ -369,7 +378,7 @@ async def crawl(
     existing: WorkspaceEntry
 ) -> WorkspaceEntry:
     out: WorkspaceEntry
-    now = datetime.now(timezone.utc)
+    now = now_ts()
     now_string = now.strftime(UTC_FORMAT)
 
     try:
@@ -433,6 +442,10 @@ async def crawl(
     else:
         out["last_modified"] = max((r["date"] for r in releases))
 
+        previous_last_modified = existing.get("last_modified")
+        if previous_last_modified and out["last_modified"] != previous_last_modified:
+            out["update_detected"] = now_string
+
         # Determine next_crawl interval
         last_modified_dt = (
             datetime
@@ -460,7 +473,7 @@ async def crawl_package(
     entry: RegistryEntry,
     existing: WorkspaceEntry
 ) -> WorkspaceEntry:
-    now = datetime.now(timezone.utc)
+    now = now_ts()
     maybe_skip_crawling(entry, existing, now)
     ensure_secure_source(entry, existing)
 
@@ -613,7 +626,7 @@ async def resolve_tags(
         version_set = SpecifierSet(version_spec)
 
     resolved_releases: list[Release] = []
-    now = datetime.now(timezone.utc)
+    now = now_ts()
     cutoff = now - timedelta(weeks=53)
 
     # We take all releases from the current (rolling) year, but if there
@@ -1148,6 +1161,12 @@ def count_limit_occurrences(argv: list[str]) -> int:
     return count
 
 
+def now_ts() -> datetime:
+    if value := os.getenv("NOW_TS"):
+        return datetime.fromtimestamp(float(value.strip()), tz=timezone.utc)
+    return datetime.now(timezone.utc)
+
+
 def env_flag(name: str, default: bool = False) -> bool:
     value = os.getenv(name)
     if value is None: