From 3c030e82f1cb7a63f7262a33b71e1184fecfaf8c Mon Sep 17 00:00:00 2001 From: herr kaste Date: Sat, 21 Mar 2026 13:17:24 +0100 Subject: [PATCH 01/15] Track update detection timestamps on packages Add `update_detected` to workspace package entries and set it when a package's `last_modified` value changes across successful crawls. - `update_detected` is not set on first discovery - `update_detected` is unset on the next run --- scripts/crawl.py | 5 ++ tests/crawl/test_update_detection.py | 73 ++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 tests/crawl/test_update_detection.py diff --git a/scripts/crawl.py b/scripts/crawl.py index 73545174f..97f1c304f 100644 --- a/scripts/crawl.py +++ b/scripts/crawl.py @@ -81,6 +81,7 @@ class WorkspaceEntry(TypedDict, total=False): last_seen: IsoTimestamp next_crawl: IsoTimestamp last_modified: IsoTimestamp + update_detected: IsoTimestamp failing_since: IsoTimestamp fail_reason: str @@ -433,6 +434,10 @@ async def crawl( else: out["last_modified"] = max((r["date"] for r in releases)) + previous_last_modified = existing.get("last_modified") + if previous_last_modified and out["last_modified"] != previous_last_modified: + out["update_detected"] = now_string + # Determine next_crawl interval last_modified_dt = ( datetime diff --git a/tests/crawl/test_update_detection.py b/tests/crawl/test_update_detection.py new file mode 100644 index 000000000..4772cebfa --- /dev/null +++ b/tests/crawl/test_update_detection.py @@ -0,0 +1,73 @@ +import pytest + +from scripts.crawl import crawl + + +@pytest.mark.asyncio +async def test_sets_update_detected_when_last_modified_changes(set_now, monkeypatch): + package = {"name": "Example"} + existing = { + "name": "Example", + "last_modified": "2024-05-01T00:00:00Z", + "update_detected": "2024-05-05T00:00:00Z", + } + + async def stub(*args, **kwargs): + return { + "name": "Example", + "releases": [{"date": "2024-05-31T00:00:00Z"}], + } + + set_now("2024-06-01T00:00:00Z") + monkeypatch.setattr("scripts.crawl.crawl_package", stub) + + result = await crawl(object(), package, existing) + + assert result["last_modified"] == "2024-05-31T00:00:00Z" + assert result["update_detected"] == "2024-06-01T00:00:00Z" + + +@pytest.mark.asyncio +async def test_does_not_set_update_detected_for_first_seen_package(set_now, monkeypatch): + package = {"name": "Example"} + existing = {"name": "Example"} + + async def stub(*args, **kwargs): + return { + "name": "Example", + "releases": [{"date": "2024-05-31T00:00:00Z"}], + } + + set_now("2024-06-01T00:00:00Z") + monkeypatch.setattr("scripts.crawl.crawl_package", stub) + + result = await crawl(object(), package, existing) + + assert result["last_modified"] == "2024-05-31T00:00:00Z" + assert "update_detected" not in result + + +@pytest.mark.asyncio +async def test_drops_previous_update_detected_when_last_modified_is_unchanged( + set_now, + monkeypatch, +): + package = {"name": "Example"} + existing = { + "name": "Example", + "last_modified": "2024-05-31T00:00:00Z", + "update_detected": "2024-05-15T00:00:00Z", + } + + async def stub(*args, **kwargs): + return { + "name": "Example", + "releases": [{"date": "2024-05-31T00:00:00Z"}], + } + + set_now("2024-06-01T00:00:00Z") + monkeypatch.setattr("scripts.crawl.crawl_package", stub) + + result = await crawl(object(), package, existing) + + assert "update_detected" not in result From 661d91e541545018231145971376cfabf5ab9093 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Sat, 21 Mar 2026 13:20:19 +0100 Subject: [PATCH 02/15] Print "Found update(s)..." After writing results, collect packages whose `update_detected_at` matches the run timestamp and print a readable summary line in the crawl output. Also move Oxford-list name formatting into `scripts._utils` and reuse it from both `crawl.py` and `crawl_libraries.py`. --- scripts/_utils.py | 8 ++++ scripts/crawl.py | 12 ++++- scripts/crawl_libraries.py | 10 +--- tests/crawl/test_update_detection.py | 68 +++++++++++++++++++++++++++- tests/test_utils.py | 13 ++++++ 5 files changed, 99 insertions(+), 12 deletions(-) diff --git a/scripts/_utils.py b/scripts/_utils.py index 38cf6b95c..979497940 100644 --- a/scripts/_utils.py +++ b/scripts/_utils.py @@ -57,6 +57,14 @@ def pl(count: int, word: str) -> str: return f"{count} {_INFLECT.plural(singular, count)}" +def format_name_list(names: list[str]) -> str: + if len(names) == 1: + return names[0] + if len(names) == 2: + return f"{names[0]} and {names[1]}" + return f"{', '.join(names[:-1])}, and {names[-1]}" + + def pipe(v, *fns): for fn in fns: v = fn(v) diff --git a/scripts/crawl.py b/scripts/crawl.py index 97f1c304f..745ba5335 100644 --- a/scripts/crawl.py +++ b/scripts/crawl.py @@ -29,8 +29,8 @@ normalize_version_spec, ) from ._utils import ( - next_run, parse_version, resolve_url, update_url, write_json, pl, pick, - VersionInfo + format_name_list, next_run, parse_version, resolve_url, update_url, write_json, pl, pick, + VersionInfo, ) from ._explain_package import print_package_explain import traceback @@ -253,6 +253,7 @@ async def main_( maintenance(registry, workspace) tocrawl = next_packages_to_crawl(registry, workspace, limit=limit, presto=presto) + updated_packages: list[str] = [] async with aiohttp.ClientSession() as session: tasks = [ crawl( @@ -266,6 +267,8 @@ async def main_( results = await asyncio.gather(*tasks) for new_entry in results: workspace["packages"][new_entry["name"]] = new_entry + if "update_detected" in new_entry: + updated_packages.append(new_entry["name"]) if name_requested: print(json.dumps(new_entry, indent=2, ensure_ascii=False)) @@ -276,6 +279,11 @@ async def main_( f"in db." ) + updated_packages = sorted(updated_packages) + if updated_packages: + s = "" if len(updated_packages) == 1 else "s" + print(f"Found update{s} for {format_name_list(updated_packages)}.") + if len(tocrawl) > 0: print("GitHub", rate_limit_info) diff --git a/scripts/crawl_libraries.py b/scripts/crawl_libraries.py index ad414121c..4bd9c40b7 100644 --- a/scripts/crawl_libraries.py +++ b/scripts/crawl_libraries.py @@ -22,7 +22,7 @@ load_json, resolve_library, ) -from ._utils import err, write_json +from ._utils import err, format_name_list, write_json from ._explain_package import print_library_explain @@ -465,13 +465,5 @@ def format_updated_message(names: list[str]) -> str: return f"{format_name_list(names)} have been updated." -def format_name_list(names: list[str]) -> str: - if len(names) == 1: - return names[0] - if len(names) == 2: - return f"{names[0]} and {names[1]}" - return f"{', '.join(names[:-1])}, and {names[-1]}" - - if __name__ == "__main__": main() diff --git a/tests/crawl/test_update_detection.py b/tests/crawl/test_update_detection.py index 4772cebfa..d2fe454ba 100644 --- a/tests/crawl/test_update_detection.py +++ b/tests/crawl/test_update_detection.py @@ -1,6 +1,6 @@ import pytest -from scripts.crawl import crawl +from scripts.crawl import crawl, main_ @pytest.mark.asyncio @@ -71,3 +71,69 @@ async def stub(*args, **kwargs): result = await crawl(object(), package, existing) assert "update_detected" not in result + + +@pytest.mark.asyncio +async def test_main_prints_sorted_oxford_list_for_updates(set_now, monkeypatch, capsys): + registry = { + "packages": [ + {"name": "gamma"}, + {"name": "alpha"}, + {"name": "beta"}, + ] + } + workspace = {"packages": {}} + + async def stub_crawl(session, package, existing): + return {"name": package["name"], "update_detected": "2024-06-01T00:00:00Z"} + + set_now("2024-06-01T00:00:00Z") + monkeypatch.setattr("scripts.crawl.crawl", stub_crawl) + + await main_(registry, workspace, None, 100) + + out = capsys.readouterr().out + assert "Found updates for alpha, beta, and gamma." in out + + +@pytest.mark.asyncio +async def test_main_prints_singular_update_summary(set_now, monkeypatch, capsys): + registry = {"packages": [{"name": "alpha"}, {"name": "beta"}]} + workspace = {"packages": {}} + + async def stub_crawl(session, package, existing): + if package["name"] == "alpha": + return {"name": package["name"], "update_detected": "2024-06-01T00:00:00Z"} + return {"name": package["name"]} + + set_now("2024-06-01T00:00:00Z") + monkeypatch.setattr("scripts.crawl.crawl", stub_crawl) + + await main_(registry, workspace, None, 100) + + out = capsys.readouterr().out + assert "Found update for alpha." in out + + +@pytest.mark.asyncio +async def test_main_does_not_report_first_seen_as_update( + set_now, + monkeypatch, + capsys, +): + registry = {"packages": [{"name": "alpha"}]} + workspace = {"packages": {}} + + async def stub_crawl_package(session, package, existing): + return { + "name": package["name"], + "releases": [{"date": "2024-05-31T00:00:00Z"}], + } + + set_now("2024-06-01T00:00:00Z") + monkeypatch.setattr("scripts.crawl.crawl_package", stub_crawl_package) + + await main_(registry, workspace, None, 100) + + out = capsys.readouterr().out + assert "Found update" not in out diff --git a/tests/test_utils.py b/tests/test_utils.py index 1716e2f5f..61016b4ff 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,6 +5,7 @@ is_semver, normalize_tz_aware_datetime, unique_values_preserving_order, + format_name_list, pl, ) @@ -107,6 +108,18 @@ def test_unique_values_preserving_order_iterable(): assert unique_values_preserving_order(values) == ["a", "b", "c"] +@pytest.mark.parametrize( + ("names", "expected"), + [ + (["alpha"], "alpha"), + (["alpha", "beta"], "alpha and beta"), + (["alpha", "beta", "gamma"], "alpha, beta, and gamma"), + ], +) +def test_format_name_list(names, expected): + assert format_name_list(names) == expected + + @pytest.mark.parametrize( ("count", "word", "expected"), [ From cbed7a29d70922406aa3ea7b5e5602dd0ed33adc Mon Sep 17 00:00:00 2001 From: herr kaste Date: Sat, 21 Mar 2026 13:21:12 +0100 Subject: [PATCH 03/15] Freeze crawl job timestamp in workflow Add an early `NOW_TS` export in the crawl workflow job and reuse that frozen value when building notes and invoking `scripts.collect_logs`. This aligns run artifacts and log timestamps to one shared run marker, and prepares `scripts.crawl` to read the same `NOW_TS` value. --- .github/workflows/crawl.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/crawl.yml b/.github/workflows/crawl.yml index 69a2bf795..55a217c6d 100644 --- a/.github/workflows/crawl.yml +++ b/.github/workflows/crawl.yml @@ -39,6 +39,9 @@ jobs: - name: Ensure wrk directory exists run: mkdir -p ./wrk + - name: Freeze run timestamp + run: echo "NOW_TS=$(date +%s)" >> "$GITHUB_ENV" + - name: Restore wrk cache uses: actions/cache@v4 with: @@ -130,8 +133,7 @@ jobs: gh release upload ${{ env.RELEASE_TAG }} ./wrk/registry.json --clobber gh release upload ${{ env.RELEASE_TAG }} ./wrk/workspace.json --clobber - DATE_TS=$(date +%s) - DATE=$(TZ=Europe/Berlin date -d "@$DATE_TS" +"%B %d, %Y, %H:%M GMT%:::z" | sed -E 's/([+-])0/\1/') + DATE=$(TZ=Europe/Berlin date -d "@$NOW_TS" +"%B %d, %Y, %H:%M GMT%:::z" | sed -E 's/([+-])0/\1/') REPO_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" # Build new notes { @@ -165,7 +167,7 @@ jobs: uv run -m scripts.collect_logs \ --run-id "${{ github.run_id }}" \ - --timestamp "$DATE_TS" \ + --timestamp "$NOW_TS" \ -o ./wrk/logs.json \ notes.txt From f4843bd5f4cc07dc95c12e761cc92e13c2989cf8 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Sat, 21 Mar 2026 13:21:58 +0100 Subject: [PATCH 04/15] Allow NOW_TS env variable to set the time for crawl Update `scripts.crawl` to honor `NOW_TS` first to freeze the time during runtime. `NOW_TS` is accepted as Unix epoch seconds. --- scripts/crawl.py | 16 +++++++++++----- tests/crawl/test_update_detection.py | 15 ++++++++++++++- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/scripts/crawl.py b/scripts/crawl.py index 745ba5335..6b9ceef71 100644 --- a/scripts/crawl.py +++ b/scripts/crawl.py @@ -295,7 +295,7 @@ def next_packages_to_crawl( Returns a list of packages to crawl, sorted by next_crawl timestamp. If next_crawl is not set, it defaults to the current time. """ - now = datetime.now(timezone.utc) + now = now_ts() now_string = now.strftime(UTC_FORMAT) packages = registry["packages"] packages_to_crawl = [ @@ -364,7 +364,7 @@ def next_packages_to_crawl( def maintenance(registry: Registry, workspace: Workspace) -> None: # lookup all packages in workspace and mark them as `removed` # if they have been removed from the registry - now = datetime.now(timezone.utc) + now = now_ts() now_string = now.strftime(UTC_FORMAT) current_package_names = {entry["name"] for entry in registry["packages"]} packages = workspace["packages"] @@ -378,7 +378,7 @@ async def crawl( existing: WorkspaceEntry ) -> WorkspaceEntry: out: WorkspaceEntry - now = datetime.now(timezone.utc) + now = now_ts() now_string = now.strftime(UTC_FORMAT) try: @@ -473,7 +473,7 @@ async def crawl_package( entry: RegistryEntry, existing: WorkspaceEntry ) -> WorkspaceEntry: - now = datetime.now(timezone.utc) + now = now_ts() maybe_skip_crawling(entry, existing, now) ensure_secure_source(entry, existing) @@ -626,7 +626,7 @@ async def resolve_tags( version_set = SpecifierSet(version_spec) resolved_releases: list[Release] = [] - now = datetime.now(timezone.utc) + now = now_ts() cutoff = now - timedelta(weeks=53) # We take all releases from the current (rolling) year, but if there @@ -1161,6 +1161,12 @@ def count_limit_occurrences(argv: list[str]) -> int: return count +def now_ts() -> datetime: + if value := os.getenv("NOW_TS"): + return datetime.fromtimestamp(float(value.strip()), tz=timezone.utc) + return datetime.now(timezone.utc) + + def env_flag(name: str, default: bool = False) -> bool: value = os.getenv(name) if value is None: diff --git a/tests/crawl/test_update_detection.py b/tests/crawl/test_update_detection.py index d2fe454ba..9fffdcc98 100644 --- a/tests/crawl/test_update_detection.py +++ b/tests/crawl/test_update_detection.py @@ -1,6 +1,7 @@ +from datetime import datetime, timezone import pytest -from scripts.crawl import crawl, main_ +from scripts.crawl import crawl, main_, now_ts @pytest.mark.asyncio @@ -137,3 +138,15 @@ async def stub_crawl_package(session, package, existing): out = capsys.readouterr().out assert "Found update" not in out + + +def test_now_ts_prefers_now_ts_env(monkeypatch, set_now): + set_now("2024-06-01T00:00:00Z") + monkeypatch.setenv("NOW_TS", "1717286400") + + expected = ( + datetime + .strptime("2024-06-02T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ") + .replace(tzinfo=timezone.utc) + ) + assert now_ts() == expected From 4fcc27ec077d49a982706320c26b8b226590a463 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Mon, 23 Mar 2026 10:58:01 +0100 Subject: [PATCH 05/15] Allow collect_logs to read NOW_TS by default Make `--timestamp` optional for collect_logs. Timestamp precedence is now: 1) explicit `--timestamp` 2) `NOW_TS` environment variable If neither is available, keep the previous failure behavior and raise `collect_logs: missing --timestamp`. Add tests for NOW_TS fallback, explicit-arg precedence over NOW_TS, and missing-timestamp failure. --- scripts/collect_logs.py | 11 ++++-- tests/test_collect_logs.py | 76 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/scripts/collect_logs.py b/scripts/collect_logs.py index f7182dc1e..bfc2a89c6 100644 --- a/scripts/collect_logs.py +++ b/scripts/collect_logs.py @@ -85,15 +85,20 @@ def update_logs(args: Args): run_id = args.run_id or os.environ.get("GITHUB_RUN_ID") if not run_id: raise SystemExit("collect_logs: missing --run-id or GITHUB_RUN_ID") - if args.timestamp is None: - raise SystemExit("collect_logs: missing --timestamp") notes_path = Path(args.notes) if not notes_path.is_file(): raise SystemExit(f"collect_logs: notes file not found: {notes_path}") notes_text = notes_path.read_text(encoding="utf-8") - forced_timestamp = datetime.fromtimestamp(args.timestamp, tz=timezone.utc) + timestamp = args.timestamp + if timestamp is None: + now_ts = os.environ.get("NOW_TS") + if now_ts is None: + raise SystemExit("collect_logs: missing --timestamp") + timestamp = float(now_ts.strip()) + + forced_timestamp = datetime.fromtimestamp(timestamp, tz=timezone.utc) output_path = Path(args.output).expanduser().resolve() output_dir = output_path.parent diff --git a/tests/test_collect_logs.py b/tests/test_collect_logs.py index 5056e1ecf..98c280051 100644 --- a/tests/test_collect_logs.py +++ b/tests/test_collect_logs.py @@ -2,6 +2,8 @@ import sys from datetime import datetime, timezone +import pytest + import scripts.collect_logs as collect_logs @@ -97,3 +99,77 @@ def test_collect_logs_prunes_entries_outside_retention(tmp_path, monkeypatch): assert all(entry["run_id"] != "1" for entry in entries) # Ordering should keep the newest entry first assert entries[0]["run_id"] == "99" + + +def test_collect_logs_uses_now_ts_when_timestamp_is_missing(tmp_path, monkeypatch): + notes_path = tmp_path / "notes.txt" + notes_path.write_text("line\n", encoding="utf-8") + + ts = datetime(2024, 10, 5, 9, 30, tzinfo=timezone.utc).timestamp() + monkeypatch.setenv("NOW_TS", str(ts)) + monkeypatch.setattr( + collect_logs, + "now_utc", + lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), + ) + + logs_path = tmp_path / "logs.json" + args = collect_logs.Args( + output=str(logs_path), + notes=str(notes_path), + run_id="42", + timestamp=None, + history_days=collect_logs.HISTORY_DAYS, + pretty=False, + ) + + collect_logs.update_logs(args) + + entries = json.loads(logs_path.read_text(encoding="utf-8")) + assert entries[0]["date"] == "2024-10-05T09:30:00+00:00" + + +def test_collect_logs_timestamp_arg_wins_over_now_ts(tmp_path, monkeypatch): + notes_path = tmp_path / "notes.txt" + notes_path.write_text("line\n", encoding="utf-8") + + arg_ts = datetime(2024, 10, 5, 10, 0, tzinfo=timezone.utc).timestamp() + env_ts = datetime(2024, 10, 5, 11, 0, tzinfo=timezone.utc).timestamp() + monkeypatch.setenv("NOW_TS", str(env_ts)) + monkeypatch.setattr( + collect_logs, + "now_utc", + lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), + ) + + logs_path = tmp_path / "logs.json" + args = collect_logs.Args( + output=str(logs_path), + notes=str(notes_path), + run_id="43", + timestamp=arg_ts, + history_days=collect_logs.HISTORY_DAYS, + pretty=False, + ) + + collect_logs.update_logs(args) + + entries = json.loads(logs_path.read_text(encoding="utf-8")) + assert entries[0]["date"] == "2024-10-05T10:00:00+00:00" + + +def test_collect_logs_raises_same_error_when_no_timestamp_available(tmp_path): + notes_path = tmp_path / "notes.txt" + notes_path.write_text("line\n", encoding="utf-8") + + args = collect_logs.Args( + output=str(tmp_path / "logs.json"), + notes=str(notes_path), + run_id="44", + timestamp=None, + history_days=collect_logs.HISTORY_DAYS, + pretty=False, + ) + + with pytest.raises(SystemExit, match="collect_logs: missing --timestamp"): + collect_logs.update_logs(args) From 21a3af8d4a86041d12b8c29668b773f3e09cdb3e Mon Sep 17 00:00:00 2001 From: herr kaste Date: Mon, 23 Mar 2026 10:59:22 +0100 Subject: [PATCH 06/15] Rename collect_logs runtime timestamp variable Rename `forced_timestamp` to `runtime_ts` in collect_logs for clearer naming aligned with run-level timestamp semantics. --- scripts/collect_logs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/collect_logs.py b/scripts/collect_logs.py index bfc2a89c6..0b3eb4ecc 100644 --- a/scripts/collect_logs.py +++ b/scripts/collect_logs.py @@ -98,7 +98,7 @@ def update_logs(args: Args): raise SystemExit("collect_logs: missing --timestamp") timestamp = float(now_ts.strip()) - forced_timestamp = datetime.fromtimestamp(timestamp, tz=timezone.utc) + runtime_ts = datetime.fromtimestamp(timestamp, tz=timezone.utc) output_path = Path(args.output).expanduser().resolve() output_dir = output_path.parent @@ -110,7 +110,7 @@ def update_logs(args: Args): entries = [entry for entry in entries if entry.get("run_id") != run_id_str] entries.append({ - "date": forced_timestamp.isoformat(), + "date": runtime_ts.isoformat(), "run_id": run_id_str, "notes": notes_text, }) From a4d6d62d17b1a3f167a9ff42f5f81765dba9fa3d Mon Sep 17 00:00:00 2001 From: herr kaste Date: Mon, 23 Mar 2026 10:59:43 +0100 Subject: [PATCH 07/15] Drop explicit collect_logs timestamp in workflow Remove `--timestamp` from the collect_logs invocation in crawl.yml. collect_logs now resolves the run timestamp from `NOW_TS` when no explicit timestamp argument is provided. --- .github/workflows/crawl.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/crawl.yml b/.github/workflows/crawl.yml index 55a217c6d..130a51e22 100644 --- a/.github/workflows/crawl.yml +++ b/.github/workflows/crawl.yml @@ -167,7 +167,6 @@ jobs: uv run -m scripts.collect_logs \ --run-id "${{ github.run_id }}" \ - --timestamp "$NOW_TS" \ -o ./wrk/logs.json \ notes.txt From cc8ea107c30ae539f62f4b440e1c2fba8d0e3e93 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Mon, 23 Mar 2026 11:00:32 +0100 Subject: [PATCH 08/15] Document frozen NOW_TS usage in crawl workflow Add an expanded comment block above the NOW_TS export step in crawl.yml. The comment explains why the timestamp is frozen once per run and calls out that both scripts/crawl.py and scripts/collect_logs.py consume the same run-level timestamp. --- .github/workflows/crawl.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/crawl.yml b/.github/workflows/crawl.yml index 130a51e22..1b94106f7 100644 --- a/.github/workflows/crawl.yml +++ b/.github/workflows/crawl.yml @@ -39,6 +39,24 @@ jobs: - name: Ensure wrk directory exists run: mkdir -p ./wrk + # -------------------------------------------------------------------- + # Freeze one run-level timestamp for the entire crawl job. + # + # Why: + # - We want all artifacts and logs from a single workflow run to agree + # on one exact point in time. + # - This avoids subtle drift where separate `date` calls differ by + # seconds and make later analysis harder. + # + # Consumers of this frozen timestamp: + # - scripts/crawl.py (run timestamp for crawl/update detection) + # - scripts/collect_logs.py (log entry timestamp fallback via NOW_TS) + # + # Notes: + # - We export via $GITHUB_ENV so NOW_TS is available to subsequent steps + # in this job. + # - Use epoch seconds (`date +%s`) to stay timezone-agnostic. + # -------------------------------------------------------------------- - name: Freeze run timestamp run: echo "NOW_TS=$(date +%s)" >> "$GITHUB_ENV" From 013e9e5656aa9facb83c5b40cd872b2867765ea9 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Mon, 23 Mar 2026 11:07:25 +0100 Subject: [PATCH 09/15] Add structured found_updates to collected logs Extend collect_logs with optional --workspace support so each new log entry can include structured package update detections for the run. When a workspace path is provided, collect_logs now derives `found_updates` by matching package `update_detected` timestamps against the frozen run timestamp, emits deterministic name ordering, and keeps `published_at` optional when `last_modified` is missing. Also wire the crawl workflow to pass --workspace so production logs include found_updates, and add focused tests for matching, ordering, empty lists, dedupe behavior, and timestamp precedence. --- .github/workflows/crawl.yml | 1 + scripts/collect_logs.py | 54 +++++++++- tests/test_collect_logs.py | 209 ++++++++++++++++++++++++++++++++++++ 3 files changed, 260 insertions(+), 4 deletions(-) diff --git a/.github/workflows/crawl.yml b/.github/workflows/crawl.yml index 1b94106f7..393d46947 100644 --- a/.github/workflows/crawl.yml +++ b/.github/workflows/crawl.yml @@ -185,6 +185,7 @@ jobs: uv run -m scripts.collect_logs \ --run-id "${{ github.run_id }}" \ + --workspace ./wrk/workspace.json \ -o ./wrk/logs.json \ notes.txt diff --git a/scripts/collect_logs.py b/scripts/collect_logs.py index 0b3eb4ecc..9b89b36c8 100644 --- a/scripts/collect_logs.py +++ b/scripts/collect_logs.py @@ -30,6 +30,7 @@ class Args: notes: str run_id: str | None timestamp: float | None + workspace: str | None history_days: int pretty: bool @@ -55,6 +56,14 @@ def parse_args() -> Args: default=None, help="Unix timestamp (seconds) when the notes were produced.", ) + parser.add_argument( + "--workspace", + default=None, + help=( + "Optional workspace JSON path. When provided, " + "collect found_updates from matching package entries." + ), + ) parser.add_argument( "--history-days", type=int, @@ -76,6 +85,7 @@ def parse_args() -> Args: notes=ns.notes, run_id=ns.run_id, timestamp=ns.timestamp, + workspace=ns.workspace, history_days=ns.history_days, pretty=ns.pretty, ) @@ -99,21 +109,25 @@ def update_logs(args: Args): timestamp = float(now_ts.strip()) runtime_ts = datetime.fromtimestamp(timestamp, tz=timezone.utc) + run_timestamp_iso = runtime_ts.strftime("%Y-%m-%dT%H:%M:%SZ") output_path = Path(args.output).expanduser().resolve() output_dir = output_path.parent if output_dir and not output_dir.exists(): output_dir.mkdir(parents=True, exist_ok=True) - entries = load_logs(output_path) + entries: list[dict[str, Any]] = load_json(output_path) run_id_str = str(run_id) entries = [entry for entry in entries if entry.get("run_id") != run_id_str] - entries.append({ + entry: dict[str, Any] = { "date": runtime_ts.isoformat(), "run_id": run_id_str, "notes": notes_text, - }) + } + if args.workspace: + entry["found_updates"] = derive_found_updates(args.workspace, run_timestamp_iso) + entries.append(entry) entries.sort(key=lambda entry: entry["date"], reverse=True) @@ -126,6 +140,38 @@ def update_logs(args: Args): write_json(output_path, kept_entries, pretty=args.pretty, ensure_ascii=True) +def derive_found_updates(workspace_path: str, run_timestamp_iso: str) -> list[dict[str, Any]]: + packages = load_workspace_packages(workspace_path) + found_updates = [] + for entry in packages.values(): + detected_at = entry.get("update_detected") + if detected_at == run_timestamp_iso: + found_updates.append({ + "name": entry["name"], + "detected_at": detected_at, + "published_at": entry.get("last_modified"), + }) + + found_updates.sort(key=lambda item: item["name"].casefold()) + return found_updates + + +def load_workspace_packages(path: str) -> dict[str, dict]: + workspace_path = Path(path) + if not workspace_path.is_file(): + raise SystemExit(f"collect_logs: workspace file not found: {workspace_path}") + + workspace = load_json(workspace_path) + if not isinstance(workspace, dict): + raise SystemExit(f"collect_logs: workspace must be a JSON object: {workspace_path}") + + packages: dict[str, dict] = workspace.get("packages", {}) + if not isinstance(packages, dict): + raise SystemExit(f"collect_logs: workspace packages must be an object: {workspace_path}") + + return packages + + def now_utc() -> datetime: return datetime.now(timezone.utc) @@ -137,7 +183,7 @@ def retention_cutoff(keep_days: int, *, reference: datetime | None = None) -> da return reference - timedelta(days=keep_days) -def load_logs(path: Path) -> list[dict[str, Any]]: +def load_json(path: Path) -> Any: try: return json.loads(path.read_text(encoding="utf-8")) except FileNotFoundError: diff --git a/tests/test_collect_logs.py b/tests/test_collect_logs.py index 98c280051..1387d4b4f 100644 --- a/tests/test_collect_logs.py +++ b/tests/test_collect_logs.py @@ -60,6 +60,211 @@ def test_collect_logs_deduplicates_run_id(tmp_path, monkeypatch): assert entries[0]["date"] == "2024-10-05T08:20:00+00:00" +def test_collect_logs_adds_found_updates_from_workspace(tmp_path, monkeypatch): + notes_path = tmp_path / "notes.txt" + notes_path.write_text("line\n", encoding="utf-8") + + ts = datetime(2024, 10, 5, 9, 30, tzinfo=timezone.utc).timestamp() + monkeypatch.setattr( + collect_logs, + "now_utc", + lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), + ) + + workspace_path = tmp_path / "workspace.json" + workspace_path.write_text( + json.dumps( + { + "packages": { + "zeta": { + "name": "Zeta", + "update_detected": "2024-10-05T09:30:00Z", + "last_modified": "2024-10-04T00:00:00Z", + }, + "alpha": { + "name": "alpha", + "update_detected": "2024-10-05T09:30:00Z", + }, + "foo": { + "name": "Foo and Bar", + "update_detected": "2024-10-05T09:30:00Z", + "last_modified": "2024-10-03T00:00:00Z", + }, + "no-match": { + "name": "No Match", + "update_detected": "2024-10-05T09:31:00Z", + "last_modified": "2024-10-01T00:00:00Z", + }, + } + } + ), + encoding="utf-8", + ) + + logs_path = tmp_path / "logs.json" + args = collect_logs.Args( + output=str(logs_path), + notes=str(notes_path), + run_id="200", + timestamp=ts, + workspace=str(workspace_path), + history_days=collect_logs.HISTORY_DAYS, + pretty=False, + ) + + collect_logs.update_logs(args) + + entries = json.loads(logs_path.read_text(encoding="utf-8")) + assert entries[0]["found_updates"] == [ + { + "name": "alpha", + "detected_at": "2024-10-05T09:30:00Z", + "published_at": None, + }, + { + "name": "Foo and Bar", + "detected_at": "2024-10-05T09:30:00Z", + "published_at": "2024-10-03T00:00:00Z", + }, + { + "name": "Zeta", + "detected_at": "2024-10-05T09:30:00Z", + "published_at": "2024-10-04T00:00:00Z", + }, + ] + + +def test_collect_logs_writes_empty_found_updates_list_when_no_matches(tmp_path, monkeypatch): + notes_path = tmp_path / "notes.txt" + notes_path.write_text("line\n", encoding="utf-8") + + ts = datetime(2024, 10, 5, 9, 30, tzinfo=timezone.utc).timestamp() + monkeypatch.setattr( + collect_logs, + "now_utc", + lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), + ) + + workspace_path = tmp_path / "workspace.json" + workspace_path.write_text( + json.dumps( + { + "packages": { + "alpha": { + "name": "alpha", + "update_detected": "2024-10-05T09:31:00Z", + "last_modified": "2024-10-01T00:00:00Z", + } + } + } + ), + encoding="utf-8", + ) + + logs_path = tmp_path / "logs.json" + args = collect_logs.Args( + output=str(logs_path), + notes=str(notes_path), + run_id="201", + timestamp=ts, + workspace=str(workspace_path), + history_days=collect_logs.HISTORY_DAYS, + pretty=False, + ) + + collect_logs.update_logs(args) + + entries = json.loads(logs_path.read_text(encoding="utf-8")) + assert entries[0]["found_updates"] == [] + + +def test_collect_logs_dedupes_run_id_with_found_updates(tmp_path, monkeypatch): + notes_path = tmp_path / "notes.txt" + notes_path.write_text("first\n", encoding="utf-8") + + monkeypatch.setenv("GITHUB_RUN_ID", "500") + monkeypatch.setattr( + collect_logs, + "now_utc", + lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), + ) + + workspace_path = tmp_path / "workspace.json" + workspace_path.write_text( + json.dumps( + { + "packages": { + "alpha": { + "name": "alpha", + "update_detected": "2024-10-05T09:30:00Z", + "last_modified": "2024-10-01T00:00:00Z", + } + } + } + ), + encoding="utf-8", + ) + + logs_path = tmp_path / "logs.json" + monkeypatch.setattr( + sys, + "argv", + [ + "collect-logs", + "-o", + str(logs_path), + "--timestamp", + str(datetime(2024, 10, 5, 9, 30, tzinfo=timezone.utc).timestamp()), + "--workspace", + str(workspace_path), + str(notes_path), + ], + ) + collect_logs.main() + + notes_path.write_text("second\n", encoding="utf-8") + workspace_path.write_text( + json.dumps( + { + "packages": { + "beta": { + "name": "beta", + "update_detected": "2024-10-05T10:00:00Z", + "last_modified": "2024-10-02T00:00:00Z", + } + } + } + ), + encoding="utf-8", + ) + monkeypatch.setattr( + sys, + "argv", + [ + "collect-logs", + "-o", + str(logs_path), + "--timestamp", + str(datetime(2024, 10, 5, 10, 0, tzinfo=timezone.utc).timestamp()), + "--workspace", + str(workspace_path), + str(notes_path), + ], + ) + collect_logs.main() + + entries = json.loads(logs_path.read_text(encoding="utf-8")) + assert len(entries) == 1 + assert entries[0]["notes"] == "second\n" + assert entries[0]["found_updates"] == [ + { + "name": "beta", + "detected_at": "2024-10-05T10:00:00Z", + "published_at": "2024-10-02T00:00:00Z", + } + ] + + def test_collect_logs_prunes_entries_outside_retention(tmp_path, monkeypatch): logs_path = tmp_path / "logs.json" old_entry = { @@ -88,6 +293,7 @@ def test_collect_logs_prunes_entries_outside_retention(tmp_path, monkeypatch): notes=str(notes_path), run_id="99", timestamp=fixed_now.timestamp(), + workspace=None, history_days=collect_logs.HISTORY_DAYS, pretty=True, ) @@ -119,6 +325,7 @@ def test_collect_logs_uses_now_ts_when_timestamp_is_missing(tmp_path, monkeypatc notes=str(notes_path), run_id="42", timestamp=None, + workspace=None, history_days=collect_logs.HISTORY_DAYS, pretty=False, ) @@ -148,6 +355,7 @@ def test_collect_logs_timestamp_arg_wins_over_now_ts(tmp_path, monkeypatch): notes=str(notes_path), run_id="43", timestamp=arg_ts, + workspace=None, history_days=collect_logs.HISTORY_DAYS, pretty=False, ) @@ -167,6 +375,7 @@ def test_collect_logs_raises_same_error_when_no_timestamp_available(tmp_path): notes=str(notes_path), run_id="44", timestamp=None, + workspace=None, history_days=collect_logs.HISTORY_DAYS, pretty=False, ) From dc365d5b497d6487fb4c36792b658750137e0226 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Mon, 23 Mar 2026 11:16:31 +0100 Subject: [PATCH 10/15] Require explicit reference in retention_cutoff Make collect_logs.retention_cutoff require a reference datetime instead of accepting an optional argument. The helper is only called with an explicit reference at its sole call site, so the optional branch was unnecessary. --- scripts/collect_logs.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/collect_logs.py b/scripts/collect_logs.py index 9b89b36c8..baa2d0f14 100644 --- a/scripts/collect_logs.py +++ b/scripts/collect_logs.py @@ -176,10 +176,8 @@ def now_utc() -> datetime: return datetime.now(timezone.utc) -def retention_cutoff(keep_days: int, *, reference: datetime | None = None) -> datetime: +def retention_cutoff(keep_days: int, *, reference: datetime) -> datetime: """Compute the earliest UTC timestamp we must retain.""" - if reference is None: - reference = now_utc() return reference - timedelta(days=keep_days) From ffd183fa052682a90a32cfc1db60ac7a4e854ecb Mon Sep 17 00:00:00 2001 From: herr kaste Date: Mon, 23 Mar 2026 11:17:38 +0100 Subject: [PATCH 11/15] Inline retention cutoff in collect_logs Inline the retention cutoff calculation in update_logs since the helper only wrapped a single trivial expression. This removes one indirection while keeping behavior unchanged. --- scripts/collect_logs.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/scripts/collect_logs.py b/scripts/collect_logs.py index baa2d0f14..e5bc99b6f 100644 --- a/scripts/collect_logs.py +++ b/scripts/collect_logs.py @@ -131,7 +131,7 @@ def update_logs(args: Args): entries.sort(key=lambda entry: entry["date"], reverse=True) - cutoff = retention_cutoff(args.history_days, reference=now_utc()) + cutoff = now_utc() - timedelta(days=args.history_days) kept_entries = [ entry for entry in entries if datetime.fromisoformat(entry["date"]) >= cutoff @@ -176,11 +176,6 @@ def now_utc() -> datetime: return datetime.now(timezone.utc) -def retention_cutoff(keep_days: int, *, reference: datetime) -> datetime: - """Compute the earliest UTC timestamp we must retain.""" - return reference - timedelta(days=keep_days) - - def load_json(path: Path) -> Any: try: return json.loads(path.read_text(encoding="utf-8")) From fd7ed1985594bb91e23713f190ea0403942fd152 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Mon, 23 Mar 2026 11:19:40 +0100 Subject: [PATCH 12/15] Use now_ts semantics in collect_logs pruning clock Replace collect_logs' `now_utc()` helper with a `now_ts()` helper that matches crawl's NOW_TS-aware behavior. This keeps retention pruning aligned with the same frozen run timestamp mechanism when NOW_TS is exported. Also update collect_logs tests to patch `now_ts` instead of `now_utc`. --- scripts/collect_logs.py | 12 +++++++----- tests/test_collect_logs.py | 14 +++++++------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/scripts/collect_logs.py b/scripts/collect_logs.py index e5bc99b6f..22c86881d 100644 --- a/scripts/collect_logs.py +++ b/scripts/collect_logs.py @@ -103,10 +103,10 @@ def update_logs(args: Args): notes_text = notes_path.read_text(encoding="utf-8") timestamp = args.timestamp if timestamp is None: - now_ts = os.environ.get("NOW_TS") - if now_ts is None: + env_now_ts = os.environ.get("NOW_TS") + if env_now_ts is None: raise SystemExit("collect_logs: missing --timestamp") - timestamp = float(now_ts.strip()) + timestamp = float(env_now_ts.strip()) runtime_ts = datetime.fromtimestamp(timestamp, tz=timezone.utc) run_timestamp_iso = runtime_ts.strftime("%Y-%m-%dT%H:%M:%SZ") @@ -131,7 +131,7 @@ def update_logs(args: Args): entries.sort(key=lambda entry: entry["date"], reverse=True) - cutoff = now_utc() - timedelta(days=args.history_days) + cutoff = now_ts() - timedelta(days=args.history_days) kept_entries = [ entry for entry in entries if datetime.fromisoformat(entry["date"]) >= cutoff @@ -172,7 +172,9 @@ def load_workspace_packages(path: str) -> dict[str, dict]: return packages -def now_utc() -> datetime: +def now_ts() -> datetime: + if value := os.getenv("NOW_TS"): + return datetime.fromtimestamp(float(value.strip()), tz=timezone.utc) return datetime.now(timezone.utc) diff --git a/tests/test_collect_logs.py b/tests/test_collect_logs.py index 1387d4b4f..7fda9d328 100644 --- a/tests/test_collect_logs.py +++ b/tests/test_collect_logs.py @@ -15,7 +15,7 @@ def test_collect_logs_deduplicates_run_id(tmp_path, monkeypatch): ) fixed_now = datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc) - monkeypatch.setattr(collect_logs, "now_utc", lambda: fixed_now) + monkeypatch.setattr(collect_logs, "now_ts", lambda: fixed_now) monkeypatch.setenv("GITHUB_RUN_ID", "12345") logs_path = tmp_path / "logs.json" @@ -67,7 +67,7 @@ def test_collect_logs_adds_found_updates_from_workspace(tmp_path, monkeypatch): ts = datetime(2024, 10, 5, 9, 30, tzinfo=timezone.utc).timestamp() monkeypatch.setattr( collect_logs, - "now_utc", + "now_ts", lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), ) @@ -141,7 +141,7 @@ def test_collect_logs_writes_empty_found_updates_list_when_no_matches(tmp_path, ts = datetime(2024, 10, 5, 9, 30, tzinfo=timezone.utc).timestamp() monkeypatch.setattr( collect_logs, - "now_utc", + "now_ts", lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), ) @@ -185,7 +185,7 @@ def test_collect_logs_dedupes_run_id_with_found_updates(tmp_path, monkeypatch): monkeypatch.setenv("GITHUB_RUN_ID", "500") monkeypatch.setattr( collect_logs, - "now_utc", + "now_ts", lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), ) @@ -286,7 +286,7 @@ def test_collect_logs_prunes_entries_outside_retention(tmp_path, monkeypatch): ) fixed_now = datetime(2024, 10, 5, 12, 0, tzinfo=timezone.utc) - monkeypatch.setattr(collect_logs, "now_utc", lambda: fixed_now) + monkeypatch.setattr(collect_logs, "now_ts", lambda: fixed_now) args = collect_logs.Args( output=str(logs_path), @@ -315,7 +315,7 @@ def test_collect_logs_uses_now_ts_when_timestamp_is_missing(tmp_path, monkeypatc monkeypatch.setenv("NOW_TS", str(ts)) monkeypatch.setattr( collect_logs, - "now_utc", + "now_ts", lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), ) @@ -345,7 +345,7 @@ def test_collect_logs_timestamp_arg_wins_over_now_ts(tmp_path, monkeypatch): monkeypatch.setenv("NOW_TS", str(env_ts)) monkeypatch.setattr( collect_logs, - "now_utc", + "now_ts", lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), ) From cc9270d1cdf2276ab6248553d3a68bdd15bc1e82 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Mon, 23 Mar 2026 11:22:26 +0100 Subject: [PATCH 13/15] Document logs.json output shape with TypedDicts Add typed structures for collect_logs output entries: - LogEntry - FoundUpdateEntry This makes the logs.json schema explicit in code and clarifies the optional found_updates payload shape. --- scripts/collect_logs.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/scripts/collect_logs.py b/scripts/collect_logs.py index 22c86881d..d902822d9 100644 --- a/scripts/collect_logs.py +++ b/scripts/collect_logs.py @@ -7,7 +7,7 @@ from dataclasses import dataclass from datetime import datetime, timedelta, timezone from pathlib import Path -from typing import Any +from typing import Any, NotRequired, TypedDict from ._utils import write_json @@ -15,6 +15,20 @@ HISTORY_DAYS = 32 +class LogEntry(TypedDict): + # Canonical log entry shape written by collect_logs. + date: str + run_id: str + notes: str + found_updates: NotRequired[list[FoundUpdateEntry]] + + +class FoundUpdateEntry(TypedDict): + name: str + detected_at: str + published_at: str | None + + def main(): args = parse_args() try: @@ -116,11 +130,11 @@ def update_logs(args: Args): if output_dir and not output_dir.exists(): output_dir.mkdir(parents=True, exist_ok=True) - entries: list[dict[str, Any]] = load_json(output_path) + entries: list[LogEntry] = load_json(output_path) run_id_str = str(run_id) entries = [entry for entry in entries if entry.get("run_id") != run_id_str] - entry: dict[str, Any] = { + entry: LogEntry = { "date": runtime_ts.isoformat(), "run_id": run_id_str, "notes": notes_text, @@ -140,9 +154,9 @@ def update_logs(args: Args): write_json(output_path, kept_entries, pretty=args.pretty, ensure_ascii=True) -def derive_found_updates(workspace_path: str, run_timestamp_iso: str) -> list[dict[str, Any]]: +def derive_found_updates(workspace_path: str, run_timestamp_iso: str) -> list[FoundUpdateEntry]: packages = load_workspace_packages(workspace_path) - found_updates = [] + found_updates: list[FoundUpdateEntry] = [] for entry in packages.values(): detected_at = entry.get("update_detected") if detected_at == run_timestamp_iso: From 777f14e6910c08c860c253b7f33ff15616145355 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Mon, 23 Mar 2026 11:26:23 +0100 Subject: [PATCH 14/15] Enforce last_modified for found_updates entries Align collect_logs with crawl invariants: if `update_detected` matches this run, `last_modified` must be present too. `found_updates.published_at` is hence a required string. --- scripts/collect_logs.py | 4 ++-- tests/test_collect_logs.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/collect_logs.py b/scripts/collect_logs.py index d902822d9..6fa9bd479 100644 --- a/scripts/collect_logs.py +++ b/scripts/collect_logs.py @@ -26,7 +26,7 @@ class LogEntry(TypedDict): class FoundUpdateEntry(TypedDict): name: str detected_at: str - published_at: str | None + published_at: str def main(): @@ -163,7 +163,7 @@ def derive_found_updates(workspace_path: str, run_timestamp_iso: str) -> list[Fo found_updates.append({ "name": entry["name"], "detected_at": detected_at, - "published_at": entry.get("last_modified"), + "published_at": entry["last_modified"], }) found_updates.sort(key=lambda item: item["name"].casefold()) diff --git a/tests/test_collect_logs.py b/tests/test_collect_logs.py index 7fda9d328..df9f5389e 100644 --- a/tests/test_collect_logs.py +++ b/tests/test_collect_logs.py @@ -84,6 +84,7 @@ def test_collect_logs_adds_found_updates_from_workspace(tmp_path, monkeypatch): "alpha": { "name": "alpha", "update_detected": "2024-10-05T09:30:00Z", + "last_modified": "2024-10-05T08:00:00Z", }, "foo": { "name": "Foo and Bar", @@ -119,7 +120,7 @@ def test_collect_logs_adds_found_updates_from_workspace(tmp_path, monkeypatch): { "name": "alpha", "detected_at": "2024-10-05T09:30:00Z", - "published_at": None, + "published_at": "2024-10-05T08:00:00Z", }, { "name": "Foo and Bar", From f732e948201613114cde03ac479e84a81a3365d7 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Mon, 23 Mar 2026 11:31:45 +0100 Subject: [PATCH 15/15] Bump workflow actions - actions/cache v4 -> v5 - actions/checkout v4 -> v6 - actions/setup-python v5 -> v6 - actions/upload-artifact v4 -> v7 --- .github/workflows/crawl.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/crawl.yml b/.github/workflows/crawl.yml index 393d46947..b80a26c39 100644 --- a/.github/workflows/crawl.yml +++ b/.github/workflows/crawl.yml @@ -26,10 +26,10 @@ jobs: PRESTO_PRESTO_CRAWL: ${{ vars.PRESTO_PRESTO_CRAWL }} steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.13' @@ -61,7 +61,7 @@ jobs: run: echo "NOW_TS=$(date +%s)" >> "$GITHUB_ENV" - name: Restore wrk cache - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: | ./wrk @@ -191,7 +191,7 @@ jobs: - name: Upload wrk backup id: crawl-backup-step - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: crawl-backup path: wrk/ @@ -205,10 +205,10 @@ jobs: GITHUB_TOKEN: ${{ github.token }} steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.13' @@ -219,7 +219,7 @@ jobs: run: mkdir -p ./wrk - name: Restore wrk cache - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ./wrk key: stats-cache-${{ github.run_id }} @@ -237,7 +237,7 @@ jobs: - name: Upload wrk backup id: stats-backup-step - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: stats-backup path: wrk/