Skip to content
Merged

Dev #348

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 12 additions & 16 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ on:
types:
- completed
workflow_dispatch:
push:
tags:
- '*'
schedule:
- cron: '22 8 * * *'

Expand All @@ -21,7 +24,7 @@ permissions:
jobs:
build-st4-channel:
runs-on: ubuntu-latest
if: github.event_name != 'schedule'
if: github.event_name == 'workflow_run' || github.event_name == 'workflow_dispatch'
outputs:
update_needed: ${{ steps.check.outputs.update_needed }}
env:
Expand Down Expand Up @@ -133,7 +136,7 @@ jobs:

build-st3-channel:
runs-on: ubuntu-latest
if: github.event_name != 'schedule'
if: github.event_name == 'workflow_run' || github.event_name == 'workflow_dispatch'
env:
RELEASE_TAG: the-st3-channel
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Expand Down Expand Up @@ -276,18 +279,11 @@ jobs:
restore-keys: |
wrk-cache-

- name: Fetch recent workflow runs
- name: Fetch workflow metadata for logs enrichment
run: |
SINCE=$(date -u -d '24 hours ago' +%Y-%m-%dT%H:%M:%SZ)
gh api \
repos/${{ github.repository }}/actions/workflows/${{ env.WORKFLOW_ID }}/runs \
--method GET \
-f per_page=100 \
-f status=completed \
-f created=">=$SINCE" \
--paginate \
--jq '[.workflow_runs[] | {id, conclusion, run_started_at}]' \
> "$RUNNER_TEMP/workflow_runs.json"
uv run -m scripts.fetch_logs_metadata \
--runs-output "$RUNNER_TEMP/workflow_runs.json" \
--artifacts-output "$RUNNER_TEMP/workflow_artifacts.json"

- name: Enrich logs.json
run: |
Expand All @@ -299,8 +295,8 @@ jobs:

uv run -m scripts.enrich_logs \
--runs "$RUNNER_TEMP/workflow_runs.json" \
-i ./wrk/logs.json \
-o ./wrk/logs.json
--artifacts "$RUNNER_TEMP/workflow_artifacts.json" \
./wrk/logs.json

retry_5() {
local n=1
Expand Down Expand Up @@ -355,7 +351,7 @@ jobs:
gh-pages-nightly:
runs-on: ubuntu-latest
needs: enrich_logs
if: github.event_name == 'schedule'
if: github.event_name == 'schedule' || (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/'))
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
Expand Down
43 changes: 43 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,49 @@ The command above reuses the same layout as [CI](https://github.com/packagecontr

---

### Logs handling

#### `collect_logs.py`

`scripts/collect_logs.py` appends the current run's `notes.txt` to `logs.json` (rolling history),
keyed by run id and trimmed to a retention window (`--history-days`, default 32).

If you pass a `--workspace` it include a `found_updates` list for packages detected in that run.

```bash
uv run -m scripts.collect_logs --output ./logs.json --workspace ./workspace.json ./notes.txt
```

This is a very mechanical step done in the crawl.yml; after the job is done we enrich the logs
in publish.yml:

#### `refresh_logs.py` (plus lower-level helpers)

Use this when you want to reproduce/update `logs.json` locally with GitHub Actions metadata.
`gh` is required for the ad-hoc queries I make herein.

```bash
# one-shot: download logs (if missing), fetch metadata, enrich logs
uv run -m scripts.refresh_logs --pretty
```

Defaults are tuned for local use:
- repo: inferred from `GITHUB_REPOSITORY` or local `git origin`
- workflow id: inferred from `crawl.yml` if not set
- since window: `--since-hours 24`
- metadata files: `./workflow_runs.json`, `./workflow_artifacts.json`
- artifact scan cap: `--artifacts-max-pages 10`
- logs path: `./logs.json`

If you want explicit control, run the two low-level commands, that's what we do in publish.yml:

```bash
uv run -m scripts.fetch_logs_metadata --pretty
uv run -m scripts.enrich_logs --pretty
```

---

### `snapshot_test.py`

Creates a compact, single-file snapshot for regression testing (`registry + channel`) from a reduced package set.
Expand Down
114 changes: 96 additions & 18 deletions scripts/enrich_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

import argparse
import json
from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from typing import Any, TypedDict

from ._utils import write_json

Expand All @@ -14,35 +15,68 @@ class Args:
input: str
output: str
runs: str
artifacts: str | None
pretty: bool


type RunId = str


class RuntimeArtifact(TypedDict):
run_id: RunId
id: int
name: str
size: int
url: str


class ArtifactMetadata(TypedDict):
id: int
name: str
size: int
url: str


def main():
args = parse_args()
update_logs(args)


def parse_args() -> Args:
parser = argparse.ArgumentParser(
description="Enrich logs.json with workflow run metadata and fill missing runs."
description=(
"Enrich logs.json with workflow run metadata, artifacts metadata, "
"and fill missing runs."
)
)
parser.add_argument(
"-i",
"--input",
"input",
nargs="?",
default="logs.json",
help="Input logs file (default: logs.json).",
)
parser.add_argument(
"-o",
"--output",
default="logs.json",
help="Output logs file (default: logs.json).",
default=None,
help="Output logs file (defaults to INPUT).",
)
parser.add_argument(
"-r",
"--runs",
required=True,
help="Path to workflow_runs.json fetched from the GitHub API.",
default="./workflow_runs.json",
help=(
"Path to workflow_runs.json fetched from the GitHub API "
"(default: ./workflow_runs.json)."
),
)
parser.add_argument(
"--artifacts",
default="./workflow_artifacts.json",
help=(
"Optional path to workflow_artifacts.json fetched from the GitHub API "
"(default: ./workflow_artifacts.json)."
),
)
parser.add_argument(
"--pretty",
Expand All @@ -52,59 +86,103 @@ def parse_args() -> Args:
ns = parser.parse_args()
return Args(
input=ns.input,
output=ns.output,
output=ns.output or ns.input,
runs=ns.runs,
artifacts=ns.artifacts,
pretty=ns.pretty,
)


def update_logs(args: Args):
entries = load_json(Path(args.input)) or []
runs = load_json(Path(args.runs)) or []
artifacts: list[RuntimeArtifact] = []
if args.artifacts:
artifacts = load_json(Path(args.artifacts)) or []

if not isinstance(entries, list):
raise SystemExit(f"enrich_logs: input must be a JSON array: {args.input}")
if not isinstance(runs, list):
raise SystemExit(f"enrich_logs: runs must be a JSON array: {args.runs}")
if not isinstance(artifacts, list):
raise SystemExit(f"enrich_logs: artifacts must be a JSON array: {args.artifacts}")

enriched = 0
created = 0
artifacts_attached = 0

runs_index = {
str(run.get("id")): {
run_id: {
"conclusion": run.get("conclusion"),
"run_started_at": run.get("run_started_at"),
}
for run in runs
if "id" in run
if (run_id := str(run.get("id", "")))
}
artifacts_index = build_artifacts_index(artifacts)

seen = set()
for entry in entries:
run_id = str(entry.get("run_id", ""))
if not run_id:
continue
run_id = entry["run_id"]
seen.add(run_id)

info = runs_index.get(run_id)
if not info:
continue
if not entry.get("conclusion") and info.get("conclusion"):
if info and not entry.get("conclusion") and info.get("conclusion"):
entry["conclusion"] = info["conclusion"]
enriched += 1

run_artifacts = artifacts_index.get(run_id)
if run_artifacts and entry.get("artifacts") != run_artifacts:
entry["artifacts"] = run_artifacts
artifacts_attached += 1

for run_id, info in runs_index.items():
if run_id in seen:
continue

date = info.get("run_started_at")
if not date:
continue

new_entry = {
"date": date,
"run_id": run_id,
}
if conclusion := info.get("conclusion"):
new_entry["conclusion"] = conclusion
if run_artifacts := artifacts_index.get(run_id):
new_entry["artifacts"] = run_artifacts
artifacts_attached += 1

entries.append(new_entry)
created += 1

entries.sort(key=lambda entry: entry.get("date", ""), reverse=True)
write_json(args.output, entries, pretty=args.pretty, ensure_ascii=True)
print(f"Enriched entries: {enriched}, added missing runs: {created}")
print(
"Enriched entries: "
f"{enriched}, "
f"added missing runs: {created}, "
f"attached artifacts on entries: {artifacts_attached}"
)


def build_artifacts_index(artifacts: list[RuntimeArtifact]) -> dict[RunId, list[ArtifactMetadata]]:
artifacts_by_run: defaultdict[RunId, list[ArtifactMetadata]] = defaultdict(list)

for artifact in artifacts:
run_id = artifact["run_id"]
artifacts_by_run[run_id].append({
"id": artifact["id"],
"name": artifact["name"],
"size": artifact["size"],
"url": artifact["url"],
})

for run_artifacts in artifacts_by_run.values():
run_artifacts.sort(key=lambda item: (item["name"].casefold(), str(item["id"])))

return dict(artifacts_by_run)


def load_json(path: Path) -> Any:
Expand Down
Loading
Loading