diff --git a/.github/workflows/crawl.yml b/.github/workflows/crawl.yml index 69a2bf795..b80a26c39 100644 --- a/.github/workflows/crawl.yml +++ b/.github/workflows/crawl.yml @@ -26,10 +26,10 @@ jobs: PRESTO_PRESTO_CRAWL: ${{ vars.PRESTO_PRESTO_CRAWL }} steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.13' @@ -39,8 +39,29 @@ jobs: - name: Ensure wrk directory exists run: mkdir -p ./wrk + # -------------------------------------------------------------------- + # Freeze one run-level timestamp for the entire crawl job. + # + # Why: + # - We want all artifacts and logs from a single workflow run to agree + # on one exact point in time. + # - This avoids subtle drift where separate `date` calls differ by + # seconds and make later analysis harder. + # + # Consumers of this frozen timestamp: + # - scripts/crawl.py (run timestamp for crawl/update detection) + # - scripts/collect_logs.py (log entry timestamp fallback via NOW_TS) + # + # Notes: + # - We export via $GITHUB_ENV so NOW_TS is available to subsequent steps + # in this job. + # - Use epoch seconds (`date +%s`) to stay timezone-agnostic. + # -------------------------------------------------------------------- + - name: Freeze run timestamp + run: echo "NOW_TS=$(date +%s)" >> "$GITHUB_ENV" + - name: Restore wrk cache - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: | ./wrk @@ -130,8 +151,7 @@ jobs: gh release upload ${{ env.RELEASE_TAG }} ./wrk/registry.json --clobber gh release upload ${{ env.RELEASE_TAG }} ./wrk/workspace.json --clobber - DATE_TS=$(date +%s) - DATE=$(TZ=Europe/Berlin date -d "@$DATE_TS" +"%B %d, %Y, %H:%M GMT%:::z" | sed -E 's/([+-])0/\1/') + DATE=$(TZ=Europe/Berlin date -d "@$NOW_TS" +"%B %d, %Y, %H:%M GMT%:::z" | sed -E 's/([+-])0/\1/') REPO_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" # Build new notes { @@ -165,13 +185,13 @@ jobs: uv run -m scripts.collect_logs \ --run-id "${{ github.run_id }}" \ - --timestamp "$DATE_TS" \ + --workspace ./wrk/workspace.json \ -o ./wrk/logs.json \ notes.txt - name: Upload wrk backup id: crawl-backup-step - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: crawl-backup path: wrk/ @@ -185,10 +205,10 @@ jobs: GITHUB_TOKEN: ${{ github.token }} steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.13' @@ -199,7 +219,7 @@ jobs: run: mkdir -p ./wrk - name: Restore wrk cache - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ./wrk key: stats-cache-${{ github.run_id }} @@ -217,7 +237,7 @@ jobs: - name: Upload wrk backup id: stats-backup-step - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: stats-backup path: wrk/ diff --git a/scripts/_utils.py b/scripts/_utils.py index 38cf6b95c..979497940 100644 --- a/scripts/_utils.py +++ b/scripts/_utils.py @@ -57,6 +57,14 @@ def pl(count: int, word: str) -> str: return f"{count} {_INFLECT.plural(singular, count)}" +def format_name_list(names: list[str]) -> str: + if len(names) == 1: + return names[0] + if len(names) == 2: + return f"{names[0]} and {names[1]}" + return f"{', '.join(names[:-1])}, and {names[-1]}" + + def pipe(v, *fns): for fn in fns: v = fn(v) diff --git a/scripts/collect_logs.py b/scripts/collect_logs.py index f7182dc1e..6fa9bd479 100644 --- a/scripts/collect_logs.py +++ b/scripts/collect_logs.py @@ -7,7 +7,7 @@ from dataclasses import dataclass from datetime import datetime, timedelta, timezone from pathlib import Path -from typing import Any +from typing import Any, NotRequired, TypedDict from ._utils import write_json @@ -15,6 +15,20 @@ HISTORY_DAYS = 32 +class LogEntry(TypedDict): + # Canonical log entry shape written by collect_logs. + date: str + run_id: str + notes: str + found_updates: NotRequired[list[FoundUpdateEntry]] + + +class FoundUpdateEntry(TypedDict): + name: str + detected_at: str + published_at: str + + def main(): args = parse_args() try: @@ -30,6 +44,7 @@ class Args: notes: str run_id: str | None timestamp: float | None + workspace: str | None history_days: int pretty: bool @@ -55,6 +70,14 @@ def parse_args() -> Args: default=None, help="Unix timestamp (seconds) when the notes were produced.", ) + parser.add_argument( + "--workspace", + default=None, + help=( + "Optional workspace JSON path. When provided, " + "collect found_updates from matching package entries." + ), + ) parser.add_argument( "--history-days", type=int, @@ -76,6 +99,7 @@ def parse_args() -> Args: notes=ns.notes, run_id=ns.run_id, timestamp=ns.timestamp, + workspace=ns.workspace, history_days=ns.history_days, pretty=ns.pretty, ) @@ -85,34 +109,43 @@ def update_logs(args: Args): run_id = args.run_id or os.environ.get("GITHUB_RUN_ID") if not run_id: raise SystemExit("collect_logs: missing --run-id or GITHUB_RUN_ID") - if args.timestamp is None: - raise SystemExit("collect_logs: missing --timestamp") notes_path = Path(args.notes) if not notes_path.is_file(): raise SystemExit(f"collect_logs: notes file not found: {notes_path}") notes_text = notes_path.read_text(encoding="utf-8") - forced_timestamp = datetime.fromtimestamp(args.timestamp, tz=timezone.utc) + timestamp = args.timestamp + if timestamp is None: + env_now_ts = os.environ.get("NOW_TS") + if env_now_ts is None: + raise SystemExit("collect_logs: missing --timestamp") + timestamp = float(env_now_ts.strip()) + + runtime_ts = datetime.fromtimestamp(timestamp, tz=timezone.utc) + run_timestamp_iso = runtime_ts.strftime("%Y-%m-%dT%H:%M:%SZ") output_path = Path(args.output).expanduser().resolve() output_dir = output_path.parent if output_dir and not output_dir.exists(): output_dir.mkdir(parents=True, exist_ok=True) - entries = load_logs(output_path) + entries: list[LogEntry] = load_json(output_path) run_id_str = str(run_id) entries = [entry for entry in entries if entry.get("run_id") != run_id_str] - entries.append({ - "date": forced_timestamp.isoformat(), + entry: LogEntry = { + "date": runtime_ts.isoformat(), "run_id": run_id_str, "notes": notes_text, - }) + } + if args.workspace: + entry["found_updates"] = derive_found_updates(args.workspace, run_timestamp_iso) + entries.append(entry) entries.sort(key=lambda entry: entry["date"], reverse=True) - cutoff = retention_cutoff(args.history_days, reference=now_utc()) + cutoff = now_ts() - timedelta(days=args.history_days) kept_entries = [ entry for entry in entries if datetime.fromisoformat(entry["date"]) >= cutoff @@ -121,18 +154,45 @@ def update_logs(args: Args): write_json(output_path, kept_entries, pretty=args.pretty, ensure_ascii=True) -def now_utc() -> datetime: - return datetime.now(timezone.utc) +def derive_found_updates(workspace_path: str, run_timestamp_iso: str) -> list[FoundUpdateEntry]: + packages = load_workspace_packages(workspace_path) + found_updates: list[FoundUpdateEntry] = [] + for entry in packages.values(): + detected_at = entry.get("update_detected") + if detected_at == run_timestamp_iso: + found_updates.append({ + "name": entry["name"], + "detected_at": detected_at, + "published_at": entry["last_modified"], + }) + + found_updates.sort(key=lambda item: item["name"].casefold()) + return found_updates + + +def load_workspace_packages(path: str) -> dict[str, dict]: + workspace_path = Path(path) + if not workspace_path.is_file(): + raise SystemExit(f"collect_logs: workspace file not found: {workspace_path}") + workspace = load_json(workspace_path) + if not isinstance(workspace, dict): + raise SystemExit(f"collect_logs: workspace must be a JSON object: {workspace_path}") -def retention_cutoff(keep_days: int, *, reference: datetime | None = None) -> datetime: - """Compute the earliest UTC timestamp we must retain.""" - if reference is None: - reference = now_utc() - return reference - timedelta(days=keep_days) + packages: dict[str, dict] = workspace.get("packages", {}) + if not isinstance(packages, dict): + raise SystemExit(f"collect_logs: workspace packages must be an object: {workspace_path}") + + return packages + + +def now_ts() -> datetime: + if value := os.getenv("NOW_TS"): + return datetime.fromtimestamp(float(value.strip()), tz=timezone.utc) + return datetime.now(timezone.utc) -def load_logs(path: Path) -> list[dict[str, Any]]: +def load_json(path: Path) -> Any: try: return json.loads(path.read_text(encoding="utf-8")) except FileNotFoundError: diff --git a/scripts/crawl.py b/scripts/crawl.py index 73545174f..6b9ceef71 100644 --- a/scripts/crawl.py +++ b/scripts/crawl.py @@ -29,8 +29,8 @@ normalize_version_spec, ) from ._utils import ( - next_run, parse_version, resolve_url, update_url, write_json, pl, pick, - VersionInfo + format_name_list, next_run, parse_version, resolve_url, update_url, write_json, pl, pick, + VersionInfo, ) from ._explain_package import print_package_explain import traceback @@ -81,6 +81,7 @@ class WorkspaceEntry(TypedDict, total=False): last_seen: IsoTimestamp next_crawl: IsoTimestamp last_modified: IsoTimestamp + update_detected: IsoTimestamp failing_since: IsoTimestamp fail_reason: str @@ -252,6 +253,7 @@ async def main_( maintenance(registry, workspace) tocrawl = next_packages_to_crawl(registry, workspace, limit=limit, presto=presto) + updated_packages: list[str] = [] async with aiohttp.ClientSession() as session: tasks = [ crawl( @@ -265,6 +267,8 @@ async def main_( results = await asyncio.gather(*tasks) for new_entry in results: workspace["packages"][new_entry["name"]] = new_entry + if "update_detected" in new_entry: + updated_packages.append(new_entry["name"]) if name_requested: print(json.dumps(new_entry, indent=2, ensure_ascii=False)) @@ -275,6 +279,11 @@ async def main_( f"in db." ) + updated_packages = sorted(updated_packages) + if updated_packages: + s = "" if len(updated_packages) == 1 else "s" + print(f"Found update{s} for {format_name_list(updated_packages)}.") + if len(tocrawl) > 0: print("GitHub", rate_limit_info) @@ -286,7 +295,7 @@ def next_packages_to_crawl( Returns a list of packages to crawl, sorted by next_crawl timestamp. If next_crawl is not set, it defaults to the current time. """ - now = datetime.now(timezone.utc) + now = now_ts() now_string = now.strftime(UTC_FORMAT) packages = registry["packages"] packages_to_crawl = [ @@ -355,7 +364,7 @@ def next_packages_to_crawl( def maintenance(registry: Registry, workspace: Workspace) -> None: # lookup all packages in workspace and mark them as `removed` # if they have been removed from the registry - now = datetime.now(timezone.utc) + now = now_ts() now_string = now.strftime(UTC_FORMAT) current_package_names = {entry["name"] for entry in registry["packages"]} packages = workspace["packages"] @@ -369,7 +378,7 @@ async def crawl( existing: WorkspaceEntry ) -> WorkspaceEntry: out: WorkspaceEntry - now = datetime.now(timezone.utc) + now = now_ts() now_string = now.strftime(UTC_FORMAT) try: @@ -433,6 +442,10 @@ async def crawl( else: out["last_modified"] = max((r["date"] for r in releases)) + previous_last_modified = existing.get("last_modified") + if previous_last_modified and out["last_modified"] != previous_last_modified: + out["update_detected"] = now_string + # Determine next_crawl interval last_modified_dt = ( datetime @@ -460,7 +473,7 @@ async def crawl_package( entry: RegistryEntry, existing: WorkspaceEntry ) -> WorkspaceEntry: - now = datetime.now(timezone.utc) + now = now_ts() maybe_skip_crawling(entry, existing, now) ensure_secure_source(entry, existing) @@ -613,7 +626,7 @@ async def resolve_tags( version_set = SpecifierSet(version_spec) resolved_releases: list[Release] = [] - now = datetime.now(timezone.utc) + now = now_ts() cutoff = now - timedelta(weeks=53) # We take all releases from the current (rolling) year, but if there @@ -1148,6 +1161,12 @@ def count_limit_occurrences(argv: list[str]) -> int: return count +def now_ts() -> datetime: + if value := os.getenv("NOW_TS"): + return datetime.fromtimestamp(float(value.strip()), tz=timezone.utc) + return datetime.now(timezone.utc) + + def env_flag(name: str, default: bool = False) -> bool: value = os.getenv(name) if value is None: diff --git a/scripts/crawl_libraries.py b/scripts/crawl_libraries.py index ad414121c..4bd9c40b7 100644 --- a/scripts/crawl_libraries.py +++ b/scripts/crawl_libraries.py @@ -22,7 +22,7 @@ load_json, resolve_library, ) -from ._utils import err, write_json +from ._utils import err, format_name_list, write_json from ._explain_package import print_library_explain @@ -465,13 +465,5 @@ def format_updated_message(names: list[str]) -> str: return f"{format_name_list(names)} have been updated." -def format_name_list(names: list[str]) -> str: - if len(names) == 1: - return names[0] - if len(names) == 2: - return f"{names[0]} and {names[1]}" - return f"{', '.join(names[:-1])}, and {names[-1]}" - - if __name__ == "__main__": main() diff --git a/tests/crawl/test_update_detection.py b/tests/crawl/test_update_detection.py new file mode 100644 index 000000000..9fffdcc98 --- /dev/null +++ b/tests/crawl/test_update_detection.py @@ -0,0 +1,152 @@ +from datetime import datetime, timezone +import pytest + +from scripts.crawl import crawl, main_, now_ts + + +@pytest.mark.asyncio +async def test_sets_update_detected_when_last_modified_changes(set_now, monkeypatch): + package = {"name": "Example"} + existing = { + "name": "Example", + "last_modified": "2024-05-01T00:00:00Z", + "update_detected": "2024-05-05T00:00:00Z", + } + + async def stub(*args, **kwargs): + return { + "name": "Example", + "releases": [{"date": "2024-05-31T00:00:00Z"}], + } + + set_now("2024-06-01T00:00:00Z") + monkeypatch.setattr("scripts.crawl.crawl_package", stub) + + result = await crawl(object(), package, existing) + + assert result["last_modified"] == "2024-05-31T00:00:00Z" + assert result["update_detected"] == "2024-06-01T00:00:00Z" + + +@pytest.mark.asyncio +async def test_does_not_set_update_detected_for_first_seen_package(set_now, monkeypatch): + package = {"name": "Example"} + existing = {"name": "Example"} + + async def stub(*args, **kwargs): + return { + "name": "Example", + "releases": [{"date": "2024-05-31T00:00:00Z"}], + } + + set_now("2024-06-01T00:00:00Z") + monkeypatch.setattr("scripts.crawl.crawl_package", stub) + + result = await crawl(object(), package, existing) + + assert result["last_modified"] == "2024-05-31T00:00:00Z" + assert "update_detected" not in result + + +@pytest.mark.asyncio +async def test_drops_previous_update_detected_when_last_modified_is_unchanged( + set_now, + monkeypatch, +): + package = {"name": "Example"} + existing = { + "name": "Example", + "last_modified": "2024-05-31T00:00:00Z", + "update_detected": "2024-05-15T00:00:00Z", + } + + async def stub(*args, **kwargs): + return { + "name": "Example", + "releases": [{"date": "2024-05-31T00:00:00Z"}], + } + + set_now("2024-06-01T00:00:00Z") + monkeypatch.setattr("scripts.crawl.crawl_package", stub) + + result = await crawl(object(), package, existing) + + assert "update_detected" not in result + + +@pytest.mark.asyncio +async def test_main_prints_sorted_oxford_list_for_updates(set_now, monkeypatch, capsys): + registry = { + "packages": [ + {"name": "gamma"}, + {"name": "alpha"}, + {"name": "beta"}, + ] + } + workspace = {"packages": {}} + + async def stub_crawl(session, package, existing): + return {"name": package["name"], "update_detected": "2024-06-01T00:00:00Z"} + + set_now("2024-06-01T00:00:00Z") + monkeypatch.setattr("scripts.crawl.crawl", stub_crawl) + + await main_(registry, workspace, None, 100) + + out = capsys.readouterr().out + assert "Found updates for alpha, beta, and gamma." in out + + +@pytest.mark.asyncio +async def test_main_prints_singular_update_summary(set_now, monkeypatch, capsys): + registry = {"packages": [{"name": "alpha"}, {"name": "beta"}]} + workspace = {"packages": {}} + + async def stub_crawl(session, package, existing): + if package["name"] == "alpha": + return {"name": package["name"], "update_detected": "2024-06-01T00:00:00Z"} + return {"name": package["name"]} + + set_now("2024-06-01T00:00:00Z") + monkeypatch.setattr("scripts.crawl.crawl", stub_crawl) + + await main_(registry, workspace, None, 100) + + out = capsys.readouterr().out + assert "Found update for alpha." in out + + +@pytest.mark.asyncio +async def test_main_does_not_report_first_seen_as_update( + set_now, + monkeypatch, + capsys, +): + registry = {"packages": [{"name": "alpha"}]} + workspace = {"packages": {}} + + async def stub_crawl_package(session, package, existing): + return { + "name": package["name"], + "releases": [{"date": "2024-05-31T00:00:00Z"}], + } + + set_now("2024-06-01T00:00:00Z") + monkeypatch.setattr("scripts.crawl.crawl_package", stub_crawl_package) + + await main_(registry, workspace, None, 100) + + out = capsys.readouterr().out + assert "Found update" not in out + + +def test_now_ts_prefers_now_ts_env(monkeypatch, set_now): + set_now("2024-06-01T00:00:00Z") + monkeypatch.setenv("NOW_TS", "1717286400") + + expected = ( + datetime + .strptime("2024-06-02T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ") + .replace(tzinfo=timezone.utc) + ) + assert now_ts() == expected diff --git a/tests/test_collect_logs.py b/tests/test_collect_logs.py index 5056e1ecf..df9f5389e 100644 --- a/tests/test_collect_logs.py +++ b/tests/test_collect_logs.py @@ -2,6 +2,8 @@ import sys from datetime import datetime, timezone +import pytest + import scripts.collect_logs as collect_logs @@ -13,7 +15,7 @@ def test_collect_logs_deduplicates_run_id(tmp_path, monkeypatch): ) fixed_now = datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc) - monkeypatch.setattr(collect_logs, "now_utc", lambda: fixed_now) + monkeypatch.setattr(collect_logs, "now_ts", lambda: fixed_now) monkeypatch.setenv("GITHUB_RUN_ID", "12345") logs_path = tmp_path / "logs.json" @@ -58,6 +60,212 @@ def test_collect_logs_deduplicates_run_id(tmp_path, monkeypatch): assert entries[0]["date"] == "2024-10-05T08:20:00+00:00" +def test_collect_logs_adds_found_updates_from_workspace(tmp_path, monkeypatch): + notes_path = tmp_path / "notes.txt" + notes_path.write_text("line\n", encoding="utf-8") + + ts = datetime(2024, 10, 5, 9, 30, tzinfo=timezone.utc).timestamp() + monkeypatch.setattr( + collect_logs, + "now_ts", + lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), + ) + + workspace_path = tmp_path / "workspace.json" + workspace_path.write_text( + json.dumps( + { + "packages": { + "zeta": { + "name": "Zeta", + "update_detected": "2024-10-05T09:30:00Z", + "last_modified": "2024-10-04T00:00:00Z", + }, + "alpha": { + "name": "alpha", + "update_detected": "2024-10-05T09:30:00Z", + "last_modified": "2024-10-05T08:00:00Z", + }, + "foo": { + "name": "Foo and Bar", + "update_detected": "2024-10-05T09:30:00Z", + "last_modified": "2024-10-03T00:00:00Z", + }, + "no-match": { + "name": "No Match", + "update_detected": "2024-10-05T09:31:00Z", + "last_modified": "2024-10-01T00:00:00Z", + }, + } + } + ), + encoding="utf-8", + ) + + logs_path = tmp_path / "logs.json" + args = collect_logs.Args( + output=str(logs_path), + notes=str(notes_path), + run_id="200", + timestamp=ts, + workspace=str(workspace_path), + history_days=collect_logs.HISTORY_DAYS, + pretty=False, + ) + + collect_logs.update_logs(args) + + entries = json.loads(logs_path.read_text(encoding="utf-8")) + assert entries[0]["found_updates"] == [ + { + "name": "alpha", + "detected_at": "2024-10-05T09:30:00Z", + "published_at": "2024-10-05T08:00:00Z", + }, + { + "name": "Foo and Bar", + "detected_at": "2024-10-05T09:30:00Z", + "published_at": "2024-10-03T00:00:00Z", + }, + { + "name": "Zeta", + "detected_at": "2024-10-05T09:30:00Z", + "published_at": "2024-10-04T00:00:00Z", + }, + ] + + +def test_collect_logs_writes_empty_found_updates_list_when_no_matches(tmp_path, monkeypatch): + notes_path = tmp_path / "notes.txt" + notes_path.write_text("line\n", encoding="utf-8") + + ts = datetime(2024, 10, 5, 9, 30, tzinfo=timezone.utc).timestamp() + monkeypatch.setattr( + collect_logs, + "now_ts", + lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), + ) + + workspace_path = tmp_path / "workspace.json" + workspace_path.write_text( + json.dumps( + { + "packages": { + "alpha": { + "name": "alpha", + "update_detected": "2024-10-05T09:31:00Z", + "last_modified": "2024-10-01T00:00:00Z", + } + } + } + ), + encoding="utf-8", + ) + + logs_path = tmp_path / "logs.json" + args = collect_logs.Args( + output=str(logs_path), + notes=str(notes_path), + run_id="201", + timestamp=ts, + workspace=str(workspace_path), + history_days=collect_logs.HISTORY_DAYS, + pretty=False, + ) + + collect_logs.update_logs(args) + + entries = json.loads(logs_path.read_text(encoding="utf-8")) + assert entries[0]["found_updates"] == [] + + +def test_collect_logs_dedupes_run_id_with_found_updates(tmp_path, monkeypatch): + notes_path = tmp_path / "notes.txt" + notes_path.write_text("first\n", encoding="utf-8") + + monkeypatch.setenv("GITHUB_RUN_ID", "500") + monkeypatch.setattr( + collect_logs, + "now_ts", + lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), + ) + + workspace_path = tmp_path / "workspace.json" + workspace_path.write_text( + json.dumps( + { + "packages": { + "alpha": { + "name": "alpha", + "update_detected": "2024-10-05T09:30:00Z", + "last_modified": "2024-10-01T00:00:00Z", + } + } + } + ), + encoding="utf-8", + ) + + logs_path = tmp_path / "logs.json" + monkeypatch.setattr( + sys, + "argv", + [ + "collect-logs", + "-o", + str(logs_path), + "--timestamp", + str(datetime(2024, 10, 5, 9, 30, tzinfo=timezone.utc).timestamp()), + "--workspace", + str(workspace_path), + str(notes_path), + ], + ) + collect_logs.main() + + notes_path.write_text("second\n", encoding="utf-8") + workspace_path.write_text( + json.dumps( + { + "packages": { + "beta": { + "name": "beta", + "update_detected": "2024-10-05T10:00:00Z", + "last_modified": "2024-10-02T00:00:00Z", + } + } + } + ), + encoding="utf-8", + ) + monkeypatch.setattr( + sys, + "argv", + [ + "collect-logs", + "-o", + str(logs_path), + "--timestamp", + str(datetime(2024, 10, 5, 10, 0, tzinfo=timezone.utc).timestamp()), + "--workspace", + str(workspace_path), + str(notes_path), + ], + ) + collect_logs.main() + + entries = json.loads(logs_path.read_text(encoding="utf-8")) + assert len(entries) == 1 + assert entries[0]["notes"] == "second\n" + assert entries[0]["found_updates"] == [ + { + "name": "beta", + "detected_at": "2024-10-05T10:00:00Z", + "published_at": "2024-10-02T00:00:00Z", + } + ] + + def test_collect_logs_prunes_entries_outside_retention(tmp_path, monkeypatch): logs_path = tmp_path / "logs.json" old_entry = { @@ -79,13 +287,14 @@ def test_collect_logs_prunes_entries_outside_retention(tmp_path, monkeypatch): ) fixed_now = datetime(2024, 10, 5, 12, 0, tzinfo=timezone.utc) - monkeypatch.setattr(collect_logs, "now_utc", lambda: fixed_now) + monkeypatch.setattr(collect_logs, "now_ts", lambda: fixed_now) args = collect_logs.Args( output=str(logs_path), notes=str(notes_path), run_id="99", timestamp=fixed_now.timestamp(), + workspace=None, history_days=collect_logs.HISTORY_DAYS, pretty=True, ) @@ -97,3 +306,80 @@ def test_collect_logs_prunes_entries_outside_retention(tmp_path, monkeypatch): assert all(entry["run_id"] != "1" for entry in entries) # Ordering should keep the newest entry first assert entries[0]["run_id"] == "99" + + +def test_collect_logs_uses_now_ts_when_timestamp_is_missing(tmp_path, monkeypatch): + notes_path = tmp_path / "notes.txt" + notes_path.write_text("line\n", encoding="utf-8") + + ts = datetime(2024, 10, 5, 9, 30, tzinfo=timezone.utc).timestamp() + monkeypatch.setenv("NOW_TS", str(ts)) + monkeypatch.setattr( + collect_logs, + "now_ts", + lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), + ) + + logs_path = tmp_path / "logs.json" + args = collect_logs.Args( + output=str(logs_path), + notes=str(notes_path), + run_id="42", + timestamp=None, + workspace=None, + history_days=collect_logs.HISTORY_DAYS, + pretty=False, + ) + + collect_logs.update_logs(args) + + entries = json.loads(logs_path.read_text(encoding="utf-8")) + assert entries[0]["date"] == "2024-10-05T09:30:00+00:00" + + +def test_collect_logs_timestamp_arg_wins_over_now_ts(tmp_path, monkeypatch): + notes_path = tmp_path / "notes.txt" + notes_path.write_text("line\n", encoding="utf-8") + + arg_ts = datetime(2024, 10, 5, 10, 0, tzinfo=timezone.utc).timestamp() + env_ts = datetime(2024, 10, 5, 11, 0, tzinfo=timezone.utc).timestamp() + monkeypatch.setenv("NOW_TS", str(env_ts)) + monkeypatch.setattr( + collect_logs, + "now_ts", + lambda: datetime(2024, 10, 6, 0, 0, tzinfo=timezone.utc), + ) + + logs_path = tmp_path / "logs.json" + args = collect_logs.Args( + output=str(logs_path), + notes=str(notes_path), + run_id="43", + timestamp=arg_ts, + workspace=None, + history_days=collect_logs.HISTORY_DAYS, + pretty=False, + ) + + collect_logs.update_logs(args) + + entries = json.loads(logs_path.read_text(encoding="utf-8")) + assert entries[0]["date"] == "2024-10-05T10:00:00+00:00" + + +def test_collect_logs_raises_same_error_when_no_timestamp_available(tmp_path): + notes_path = tmp_path / "notes.txt" + notes_path.write_text("line\n", encoding="utf-8") + + args = collect_logs.Args( + output=str(tmp_path / "logs.json"), + notes=str(notes_path), + run_id="44", + timestamp=None, + workspace=None, + history_days=collect_logs.HISTORY_DAYS, + pretty=False, + ) + + with pytest.raises(SystemExit, match="collect_logs: missing --timestamp"): + collect_logs.update_logs(args) diff --git a/tests/test_utils.py b/tests/test_utils.py index 1716e2f5f..61016b4ff 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,6 +5,7 @@ is_semver, normalize_tz_aware_datetime, unique_values_preserving_order, + format_name_list, pl, ) @@ -107,6 +108,18 @@ def test_unique_values_preserving_order_iterable(): assert unique_values_preserving_order(values) == ["a", "b", "c"] +@pytest.mark.parametrize( + ("names", "expected"), + [ + (["alpha"], "alpha"), + (["alpha", "beta"], "alpha and beta"), + (["alpha", "beta", "gamma"], "alpha, beta, and gamma"), + ], +) +def test_format_name_list(names, expected): + assert format_name_list(names) == expected + + @pytest.mark.parametrize( ("count", "word", "expected"), [