From 23586a54ccff66d27c0cc6faf05015544a5c59e6 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Sun, 3 May 2026 16:45:29 +0200 Subject: [PATCH 1/4] ci: migrate 16 of 21 ci.yml jobs to smithy self-hosted runners MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Builds on the spar pilot (pulseengine/spar#201) — same runner-class mapping, same workarounds for the rustsec parser CVSS 4.0 issue, same direct-cargo-deny pattern. Migrated to smithy: rust-cpu clippy, docs-check, test, semver-checks, coverage, proptest, fuzz, msrv lean-mem miri, mutants, verus light fmt, yaml-lint, deny, supply-chain, release-results Stay on ubuntu-latest (each with explanatory comment in-place): - playwright (--with-deps does sudo apt-get; smithy runners no sudo) - vscode-extension (xvfb-run + downloaded VS Code Test setup) - audit (cargo-audit 0.21 rustsec parser rejects CVSS 4.0) - kani (kani-verifier bundles CBMC, ~100 MB install) - rocq (Coq install, not on smithy yet) Two non-trivial fixes inside migrated jobs: - test: actionlint install moved from `sudo mv /tmp/actionlint /usr/local/bin` to `mv /tmp/actionlint $HOME/.local/bin` plus GITHUB_PATH update. Smithy runners have no sudo; same binary, different writable location. - deny: dropped the `cargo deny check` (which would fail loading advisory-db with CVSS 4.0) for `cargo deny check bans licenses sources`. The audit job (still on hosted) covers vulnerability matching meanwhile. Expected improvement: spar's broad migration showed ~470x end-to-end speedup on clippy (~470 min → 1 min) thanks to queue elimination. Rivet should see similar — its recent runs showed 600+ min total. --- .github/workflows/ci.yml | 78 ++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 23 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ce951bb..ea08ad7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: # ── Fast checks ─────────────────────────────────────────────────────── fmt: name: Format - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, light] steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable @@ -24,7 +24,7 @@ jobs: clippy: name: Clippy - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable @@ -35,7 +35,7 @@ jobs: yaml-lint: name: YAML Lint - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, light] steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v5 @@ -48,7 +48,7 @@ jobs: # A real gate — no continue-on-error. Budget <1 minute on cached runs. docs-check: name: Docs Check - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable @@ -67,7 +67,7 @@ jobs: # ── Tests ───────────────────────────────────────────────────────────── test: name: Test - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] env: RIVET_ACTIONLINT: "1" steps: @@ -79,11 +79,16 @@ jobs: ACTIONLINT_VERSION: "1.7.7" run: | set -euo pipefail + mkdir -p "$HOME/.local/bin" curl -fsSL -o /tmp/actionlint.tgz \ "https://github.com/rhysd/actionlint/releases/download/v${ACTIONLINT_VERSION}/actionlint_${ACTIONLINT_VERSION}_linux_amd64.tar.gz" tar -xzf /tmp/actionlint.tgz -C /tmp actionlint - sudo mv /tmp/actionlint /usr/local/bin/actionlint - actionlint --version + # smithy: install to $HOME/.local/bin (writable) instead of + # /usr/local/bin (needs sudo, which the runner user doesn't have). + # Same actionlint binary, just a different filesystem location. + mv /tmp/actionlint "$HOME/.local/bin/actionlint" + echo "$HOME/.local/bin" >> "$GITHUB_PATH" + "$HOME/.local/bin/actionlint" --version - name: Run tests (JUnit XML output) run: | cargo install cargo-nextest --locked 2>/dev/null || true @@ -104,6 +109,9 @@ jobs: playwright: name: Playwright E2E needs: [test] + # Stays on ubuntu-latest: `npx playwright install --with-deps` runs + # `apt-get install` (sudo needed) and pulls in Chromium + a slate + # of system libraries. Smithy runners have no sudo. runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 @@ -136,6 +144,8 @@ jobs: vscode-extension: name: VS Code Extension needs: [test] + # Stays on ubuntu-latest: `xvfb-run` headless display + downloaded + # VS Code Test environment expects sudo apt-get for system libs. runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 @@ -170,6 +180,11 @@ jobs: # ── Security audits ────────────────────────────────────────────────── audit: name: Security Audit (RustSec) + # Stays on ubuntu-latest: smithy ships cargo-audit v0.21.2 whose + # bundled rustsec parser rejects RUSTSEC-2026-0037 ("unsupported + # CVSS version: 4.0"). v0.22.1 fixes it but the install trips on + # smithy's sccache-on-cc setup. Move back once smithy bumps + # cargo-audit (tracked). runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 @@ -199,15 +214,25 @@ jobs: --ignore RUSTSEC-2026-0114 deny: - name: Cargo Deny (licenses, bans, sources, advisories) - runs-on: ubuntu-latest + # Renamed: skipping advisories until smithy ships an upgraded + # rustsec parser (see audit job comment). Same workaround as spar. + name: Cargo Deny (licenses, bans, sources) + runs-on: [self-hosted, linux, x64, light] steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - - name: Install cargo-deny - run: cargo install cargo-deny --locked - - name: Run cargo-deny - run: cargo deny check + # cargo-deny v0.16.4 is pre-installed on smithy via the toolchains + # role; the `cargo install` line below becomes a no-op when the + # same version is already present. Kept here so the workflow + # still works on hosted if the runner pool changes. + - name: Install cargo-deny (no-op when smithy version matches) + run: cargo install cargo-deny --locked --version 0.16.4 || true + # Skip `advisories` because the rustsec parser shared with + # cargo-audit rejects CVSS 4.0 advisories (RUSTSEC-2026-0037). + # bans / licenses / sources still gate. The audit job (on + # ubuntu-latest) covers vulnerability matching meanwhile. + - name: Run cargo-deny (bans + licenses + sources) + run: cargo deny check bans licenses sources # ── Public API stability (semver drift gate) ──────────────────────── # Runs only on pull_request so main can move freely between tags. @@ -218,7 +243,7 @@ jobs: semver-checks: name: Semver Checks (rivet-core public API) if: github.event_name == 'pull_request' - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable @@ -233,7 +258,7 @@ jobs: coverage: name: Code Coverage needs: [test] - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@nightly @@ -265,7 +290,8 @@ jobs: # ── Miri (undefined behavior, pointer provenance) ─────────────────── miri: name: Miri - runs-on: ubuntu-latest + # lean-mem: Miri allocates aggressively; benefits from 24G ceiling. + runs-on: [self-hosted, linux, x64, lean-mem] steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@nightly @@ -295,7 +321,7 @@ jobs: # ── Property-based testing (extended) ─────────────────────────────── proptest: name: Proptest (extended) - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable @@ -316,7 +342,8 @@ jobs: mutants: name: Mutation Testing (${{ matrix.crate }}) needs: [test] - runs-on: ubuntu-latest + # lean-mem: parallel cargo invocations under -j; RAM-aggressive. + runs-on: [self-hosted, linux, x64, lean-mem] timeout-minutes: 45 # Hard gate only for rivet-cli; rivet-core is still surfacing real # coverage gaps that need tests written. Flip to `false` once killed. @@ -383,7 +410,7 @@ jobs: fuzz: name: Fuzz Testing if: github.event_name == 'push' && github.ref == 'refs/heads/main' - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] continue-on-error: true steps: - uses: actions/checkout@v6 @@ -414,7 +441,7 @@ jobs: # ── Supply chain verification ─────────────────────────────────────── supply-chain: name: Supply Chain (cargo-vet) - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, light] steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable @@ -439,6 +466,8 @@ jobs: kani: name: Kani Proofs needs: [test] + # Stays on ubuntu-latest: kani-verifier bundles CBMC (~100 MB), + # not pre-installed on smithy. Move once toolchains role ships kani. runs-on: ubuntu-latest continue-on-error: true timeout-minutes: 45 @@ -467,7 +496,8 @@ jobs: verus: name: Verus Proofs needs: [test] - runs-on: ubuntu-latest + # lean-mem: Verus solver work benefits from RAM headroom. + runs-on: [self-hosted, linux, x64, lean-mem] continue-on-error: true timeout-minutes: 20 steps: @@ -509,6 +539,8 @@ jobs: rocq: name: Rocq Proofs needs: [test] + # Stays on ubuntu-latest: Rocq (Coq) install is heavy and + # not pre-provisioned on smithy. Migrate once toolchains role ships it. runs-on: ubuntu-latest continue-on-error: true timeout-minutes: 20 @@ -530,7 +562,7 @@ jobs: # ── MSRV check ────────────────────────────────────────────────────── msrv: name: MSRV (1.89) - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@1.89.0 @@ -546,7 +578,7 @@ jobs: name: Publish Test Results on Release if: startsWith(github.ref, 'refs/tags/v') needs: [test, coverage, miri, proptest, playwright] - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, light] permissions: contents: write id-token: write From 9d8c214f539351b2e2143b51283c8db2d4a6d772 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Sun, 3 May 2026 17:05:13 +0200 Subject: [PATCH 2/4] ci(miri): bump timeout-minutes 15->30 after smithy run hit limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First migration run timed out exactly at 15:00 with tests still progressing (last printed test at ~11:00). Smithy's lean-mem class appears to run the slow tail tests slower than the previous hosted runner did — could be cgroup memory pressure (24G MemoryHigh under Miri's shadow allocations) or just longer tail test perf. Bumping the budget conservatively; revisit once we have a few green runs to dial it back closer to actual. Semver Checks is also failing on this PR — upstream issue ('unsupported rustdoc format v57', the action ships a too-old cargo-semver-checks). NOT a smithy-migration issue; would fail on hosted too. Tracked as a separate followup; doesn't block this PR. --- .github/workflows/ci.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ea08ad7..3369a6a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -314,7 +314,13 @@ jobs: # s-expr parsing and hit the same cursor deallocation UB as # yaml_cst/feature_model (pulseengine/rowan#211). run: cargo miri test -p rivet-core --lib -- --skip bazel --skip db --skip externals --skip export --skip providers --skip test_scanner --skip yaml_edit --skip markdown --skip parse_actual_hazards --skip stpa_hazard --skip yaml_hir --skip feature_model --skip doc_check --skip sexpr_eval --skip query_embed --skip parse_query --skip execute_sexpr - timeout-minutes: 15 + # Bumped 15→30 during smithy migration: first run timed out at + # 15 min with the last printed test at the 11-min mark (i.e., + # the slow tests at the tail just ran past the budget on + # smithy's lean-mem class). Hosted may have been fine because + # of different tail-test perf characteristics. Revisit once + # we have a few green runs to set the budget closer to actual. + timeout-minutes: 30 env: MIRIFLAGS: "-Zmiri-disable-isolation -Zmiri-tree-borrows" From 62ebeac27f45ba0f8555dee511f33a46ca9c9bc4 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Sun, 3 May 2026 17:30:25 +0200 Subject: [PATCH 3/4] ci: retrigger after smithy TMPDIR fix Smithy main now points TMPDIR / TMP / TEMP at the per-runner /var/lib/runners/runnerN/_tmp on lv_runners (500 G), instead of the host's /tmp on lv_root (80 G). Previous run hit 'no space left on device' when the rivet HTML-export test ran out of root FS budget. Runners restarted; this commit triggers a fresh CI. From 55826720de7d7c6c0cb355ab2403989475701c06 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Sun, 3 May 2026 18:17:48 +0200 Subject: [PATCH 4/4] ci(semver-checks): replace stale wrapper action with direct cargo install obi1kenobi/cargo-semver-checks-action@v2 bundles an older cargo-semver-checks that doesn't recognise rustdoc JSON v57 (the format current stable rustdoc emits). Every PR run failed with 'unsupported rustdoc format v57 for file: rivet_core.json'. Going direct: install the latest cargo-semver-checks at job time and invoke it. Slightly slower on cold cache but tracks the upstream rustdoc format. Same end-effect as the wrapper. Caught during the rivet broad-CI smithy migration (PR #262); not related to self-hosted vs hosted. --- .github/workflows/ci.yml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3369a6a..49eb15d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -248,11 +248,17 @@ jobs: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - - name: Run cargo-semver-checks - uses: obi1kenobi/cargo-semver-checks-action@v2 - with: - package: rivet-core - feature-group: default-features + # The previous obi1kenobi/cargo-semver-checks-action@v2 wrapper + # bundled an older cargo-semver-checks that didn't recognise the + # rustdoc JSON v57 format that current stable rustdoc emits, so it + # failed every run with `unsupported rustdoc format v57`. Going + # direct: install the latest cargo-semver-checks at runtime and + # invoke it. Slightly slower (compile cost on a cold cache) but + # tracks the rustdoc format upstream emits. + - name: Install cargo-semver-checks + run: cargo install --locked cargo-semver-checks + - name: Check rivet-core public API + run: cargo semver-checks check-release -p rivet-core --default-features # ── Code coverage (Rust nightly for source-based instrumentation) ─── coverage: