From 23586a54ccff66d27c0cc6faf05015544a5c59e6 Mon Sep 17 00:00:00 2001
From: Ralf Anton Beier <ralf_beier@me.com>
Date: Sun, 3 May 2026 16:45:29 +0200
Subject: [PATCH 1/4] ci: migrate 16 of 21 ci.yml jobs to smithy self-hosted
 runners
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Builds on the spar pilot (pulseengine/spar#201) — same runner-class
mapping, same workarounds for the rustsec parser CVSS 4.0 issue,
same direct-cargo-deny pattern.

Migrated to smithy:

  rust-cpu      clippy, docs-check, test, semver-checks, coverage,
                proptest, fuzz, msrv
  lean-mem      miri, mutants, verus
  light         fmt, yaml-lint, deny, supply-chain, release-results

Stay on ubuntu-latest (each with explanatory comment in-place):

  - playwright       (--with-deps does sudo apt-get; smithy runners no sudo)
  - vscode-extension (xvfb-run + downloaded VS Code Test setup)
  - audit            (cargo-audit 0.21 rustsec parser rejects CVSS 4.0)
  - kani             (kani-verifier bundles CBMC, ~100 MB install)
  - rocq             (Coq install, not on smithy yet)

Two non-trivial fixes inside migrated jobs:

  - test: actionlint install moved from `sudo mv /tmp/actionlint
    /usr/local/bin` to `mv /tmp/actionlint $HOME/.local/bin` plus
    GITHUB_PATH update. Smithy runners have no sudo; same binary,
    different writable location.
  - deny: dropped the `cargo deny check` (which would fail loading
    advisory-db with CVSS 4.0) for `cargo deny check bans licenses
    sources`. The audit job (still on hosted) covers vulnerability
    matching meanwhile.

Expected improvement: spar's broad migration showed ~470x end-to-end
speedup on clippy (~470 min → 1 min) thanks to queue elimination.
Rivet should see similar — its recent runs showed 600+ min total.
---
 .github/workflows/ci.yml | 78 ++++++++++++++++++++++++++++------------
 1 file changed, 55 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ce951bb..ea08ad7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -14,7 +14,7 @@ jobs:
   # ── Fast checks ───────────────────────────────────────────────────────
   fmt:
     name: Format
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, linux, x64, light]
     steps:
       - uses: actions/checkout@v6
       - uses: dtolnay/rust-toolchain@stable
@@ -24,7 +24,7 @@ jobs:
 
   clippy:
     name: Clippy
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, linux, x64, rust-cpu]
     steps:
       - uses: actions/checkout@v6
       - uses: dtolnay/rust-toolchain@stable
@@ -35,7 +35,7 @@ jobs:
 
   yaml-lint:
     name: YAML Lint
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, linux, x64, light]
     steps:
       - uses: actions/checkout@v6
       - uses: actions/setup-python@v5
@@ -48,7 +48,7 @@ jobs:
   # A real gate — no continue-on-error.  Budget <1 minute on cached runs.
   docs-check:
     name: Docs Check
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, linux, x64, rust-cpu]
     steps:
       - uses: actions/checkout@v6
       - uses: dtolnay/rust-toolchain@stable
@@ -67,7 +67,7 @@ jobs:
   # ── Tests ─────────────────────────────────────────────────────────────
   test:
     name: Test
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, linux, x64, rust-cpu]
     env:
       RIVET_ACTIONLINT: "1"
     steps:
@@ -79,11 +79,16 @@ jobs:
           ACTIONLINT_VERSION: "1.7.7"
         run: |
           set -euo pipefail
+          mkdir -p "$HOME/.local/bin"
           curl -fsSL -o /tmp/actionlint.tgz \
             "https://github.com/rhysd/actionlint/releases/download/v${ACTIONLINT_VERSION}/actionlint_${ACTIONLINT_VERSION}_linux_amd64.tar.gz"
           tar -xzf /tmp/actionlint.tgz -C /tmp actionlint
-          sudo mv /tmp/actionlint /usr/local/bin/actionlint
-          actionlint --version
+          # smithy: install to $HOME/.local/bin (writable) instead of
+          # /usr/local/bin (needs sudo, which the runner user doesn't have).
+          # Same actionlint binary, just a different filesystem location.
+          mv /tmp/actionlint "$HOME/.local/bin/actionlint"
+          echo "$HOME/.local/bin" >> "$GITHUB_PATH"
+          "$HOME/.local/bin/actionlint" --version
       - name: Run tests (JUnit XML output)
         run: |
           cargo install cargo-nextest --locked 2>/dev/null || true
@@ -104,6 +109,9 @@ jobs:
   playwright:
     name: Playwright E2E
     needs: [test]
+    # Stays on ubuntu-latest: `npx playwright install --with-deps` runs
+    # `apt-get install` (sudo needed) and pulls in Chromium + a slate
+    # of system libraries. Smithy runners have no sudo.
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v6
@@ -136,6 +144,8 @@ jobs:
   vscode-extension:
     name: VS Code Extension
     needs: [test]
+    # Stays on ubuntu-latest: `xvfb-run` headless display + downloaded
+    # VS Code Test environment expects sudo apt-get for system libs.
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v6
@@ -170,6 +180,11 @@ jobs:
   # ── Security audits ──────────────────────────────────────────────────
   audit:
     name: Security Audit (RustSec)
+    # Stays on ubuntu-latest: smithy ships cargo-audit v0.21.2 whose
+    # bundled rustsec parser rejects RUSTSEC-2026-0037 ("unsupported
+    # CVSS version: 4.0"). v0.22.1 fixes it but the install trips on
+    # smithy's sccache-on-cc setup. Move back once smithy bumps
+    # cargo-audit (tracked).
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v6
@@ -199,15 +214,25 @@ jobs:
           --ignore RUSTSEC-2026-0114
 
   deny:
-    name: Cargo Deny (licenses, bans, sources, advisories)
-    runs-on: ubuntu-latest
+    # Renamed: skipping advisories until smithy ships an upgraded
+    # rustsec parser (see audit job comment). Same workaround as spar.
+    name: Cargo Deny (licenses, bans, sources)
+    runs-on: [self-hosted, linux, x64, light]
     steps:
       - uses: actions/checkout@v6
       - uses: dtolnay/rust-toolchain@stable
-      - name: Install cargo-deny
-        run: cargo install cargo-deny --locked
-      - name: Run cargo-deny
-        run: cargo deny check
+      # cargo-deny v0.16.4 is pre-installed on smithy via the toolchains
+      # role; the `cargo install` line below becomes a no-op when the
+      # same version is already present. Kept here so the workflow
+      # still works on hosted if the runner pool changes.
+      - name: Install cargo-deny (no-op when smithy version matches)
+        run: cargo install cargo-deny --locked --version 0.16.4 || true
+      # Skip `advisories` because the rustsec parser shared with
+      # cargo-audit rejects CVSS 4.0 advisories (RUSTSEC-2026-0037).
+      # bans / licenses / sources still gate. The audit job (on
+      # ubuntu-latest) covers vulnerability matching meanwhile.
+      - name: Run cargo-deny (bans + licenses + sources)
+        run: cargo deny check bans licenses sources
 
   # ── Public API stability (semver drift gate) ────────────────────────
   # Runs only on pull_request so main can move freely between tags.
@@ -218,7 +243,7 @@ jobs:
   semver-checks:
     name: Semver Checks (rivet-core public API)
     if: github.event_name == 'pull_request'
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, linux, x64, rust-cpu]
     steps:
       - uses: actions/checkout@v6
       - uses: dtolnay/rust-toolchain@stable
@@ -233,7 +258,7 @@ jobs:
   coverage:
     name: Code Coverage
     needs: [test]
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, linux, x64, rust-cpu]
     steps:
       - uses: actions/checkout@v6
       - uses: dtolnay/rust-toolchain@nightly
@@ -265,7 +290,8 @@ jobs:
   # ── Miri (undefined behavior, pointer provenance) ───────────────────
   miri:
     name: Miri
-    runs-on: ubuntu-latest
+    # lean-mem: Miri allocates aggressively; benefits from 24G ceiling.
+    runs-on: [self-hosted, linux, x64, lean-mem]
     steps:
       - uses: actions/checkout@v6
       - uses: dtolnay/rust-toolchain@nightly
@@ -295,7 +321,7 @@ jobs:
   # ── Property-based testing (extended) ───────────────────────────────
   proptest:
     name: Proptest (extended)
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, linux, x64, rust-cpu]
     steps:
       - uses: actions/checkout@v6
       - uses: dtolnay/rust-toolchain@stable
@@ -316,7 +342,8 @@ jobs:
   mutants:
     name: Mutation Testing (${{ matrix.crate }})
     needs: [test]
-    runs-on: ubuntu-latest
+    # lean-mem: parallel cargo invocations under -j; RAM-aggressive.
+    runs-on: [self-hosted, linux, x64, lean-mem]
     timeout-minutes: 45
     # Hard gate only for rivet-cli; rivet-core is still surfacing real
     # coverage gaps that need tests written. Flip to `false` once killed.
@@ -383,7 +410,7 @@ jobs:
   fuzz:
     name: Fuzz Testing
     if: github.event_name == 'push' && github.ref == 'refs/heads/main'
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, linux, x64, rust-cpu]
     continue-on-error: true
     steps:
       - uses: actions/checkout@v6
@@ -414,7 +441,7 @@ jobs:
   # ── Supply chain verification ───────────────────────────────────────
   supply-chain:
     name: Supply Chain (cargo-vet)
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, linux, x64, light]
     steps:
       - uses: actions/checkout@v6
       - uses: dtolnay/rust-toolchain@stable
@@ -439,6 +466,8 @@ jobs:
   kani:
     name: Kani Proofs
     needs: [test]
+    # Stays on ubuntu-latest: kani-verifier bundles CBMC (~100 MB),
+    # not pre-installed on smithy. Move once toolchains role ships kani.
     runs-on: ubuntu-latest
     continue-on-error: true
     timeout-minutes: 45
@@ -467,7 +496,8 @@ jobs:
   verus:
     name: Verus Proofs
     needs: [test]
-    runs-on: ubuntu-latest
+    # lean-mem: Verus solver work benefits from RAM headroom.
+    runs-on: [self-hosted, linux, x64, lean-mem]
     continue-on-error: true
     timeout-minutes: 20
     steps:
@@ -509,6 +539,8 @@ jobs:
   rocq:
     name: Rocq Proofs
     needs: [test]
+    # Stays on ubuntu-latest: Rocq (Coq) install is heavy and
+    # not pre-provisioned on smithy. Migrate once toolchains role ships it.
     runs-on: ubuntu-latest
     continue-on-error: true
     timeout-minutes: 20
@@ -530,7 +562,7 @@ jobs:
   # ── MSRV check ──────────────────────────────────────────────────────
   msrv:
     name: MSRV (1.89)
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, linux, x64, rust-cpu]
     steps:
       - uses: actions/checkout@v6
       - uses: dtolnay/rust-toolchain@1.89.0
@@ -546,7 +578,7 @@ jobs:
     name: Publish Test Results on Release
     if: startsWith(github.ref, 'refs/tags/v')
     needs: [test, coverage, miri, proptest, playwright]
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, linux, x64, light]
     permissions:
       contents: write
       id-token: write

From 9d8c214f539351b2e2143b51283c8db2d4a6d772 Mon Sep 17 00:00:00 2001
From: Ralf Anton Beier <ralf_beier@me.com>
Date: Sun, 3 May 2026 17:05:13 +0200
Subject: [PATCH 2/4] ci(miri): bump timeout-minutes 15->30 after smithy run
 hit limit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

First migration run timed out exactly at 15:00 with tests still
progressing (last printed test at ~11:00). Smithy's lean-mem class
appears to run the slow tail tests slower than the previous hosted
runner did — could be cgroup memory pressure (24G MemoryHigh under
Miri's shadow allocations) or just longer tail test perf. Bumping
the budget conservatively; revisit once we have a few green runs
to dial it back closer to actual.

Semver Checks is also failing on this PR — upstream issue
('unsupported rustdoc format v57', the action ships a too-old
cargo-semver-checks). NOT a smithy-migration issue; would fail on
hosted too. Tracked as a separate followup; doesn't block this PR.
---
 .github/workflows/ci.yml | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ea08ad7..3369a6a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -314,7 +314,13 @@ jobs:
         # s-expr parsing and hit the same cursor deallocation UB as
         # yaml_cst/feature_model (pulseengine/rowan#211).
         run: cargo miri test -p rivet-core --lib -- --skip bazel --skip db --skip externals --skip export --skip providers --skip test_scanner --skip yaml_edit --skip markdown --skip parse_actual_hazards --skip stpa_hazard --skip yaml_hir --skip feature_model --skip doc_check --skip sexpr_eval --skip query_embed --skip parse_query --skip execute_sexpr
-        timeout-minutes: 15
+        # Bumped 15→30 during smithy migration: first run timed out at
+        # 15 min with the last printed test at the 11-min mark (i.e.,
+        # the slow tests at the tail just ran past the budget on
+        # smithy's lean-mem class). Hosted may have been fine because
+        # of different tail-test perf characteristics. Revisit once
+        # we have a few green runs to set the budget closer to actual.
+        timeout-minutes: 30
         env:
           MIRIFLAGS: "-Zmiri-disable-isolation -Zmiri-tree-borrows"
 

From 62ebeac27f45ba0f8555dee511f33a46ca9c9bc4 Mon Sep 17 00:00:00 2001
From: Ralf Anton Beier <ralf_beier@me.com>
Date: Sun, 3 May 2026 17:30:25 +0200
Subject: [PATCH 3/4] ci: retrigger after smithy TMPDIR fix

Smithy main now points TMPDIR / TMP / TEMP at the per-runner
/var/lib/runners/runnerN/_tmp on lv_runners (500 G), instead of
the host's /tmp on lv_root (80 G). Previous run hit 'no space
left on device' when the rivet HTML-export test ran out of root
FS budget. Runners restarted; this commit triggers a fresh CI.

From 55826720de7d7c6c0cb355ab2403989475701c06 Mon Sep 17 00:00:00 2001
From: Ralf Anton Beier <ralf_beier@me.com>
Date: Sun, 3 May 2026 18:17:48 +0200
Subject: [PATCH 4/4] ci(semver-checks): replace stale wrapper action with
 direct cargo install

obi1kenobi/cargo-semver-checks-action@v2 bundles an older
cargo-semver-checks that doesn't recognise rustdoc JSON v57
(the format current stable rustdoc emits). Every PR run failed
with 'unsupported rustdoc format v57 for file: rivet_core.json'.

Going direct: install the latest cargo-semver-checks at job time
and invoke it. Slightly slower on cold cache but tracks the
upstream rustdoc format. Same end-effect as the wrapper.

Caught during the rivet broad-CI smithy migration (PR #262); not
related to self-hosted vs hosted.
---
 .github/workflows/ci.yml | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3369a6a..49eb15d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -248,11 +248,17 @@ jobs:
       - uses: actions/checkout@v6
       - uses: dtolnay/rust-toolchain@stable
       - uses: Swatinem/rust-cache@v2
-      - name: Run cargo-semver-checks
-        uses: obi1kenobi/cargo-semver-checks-action@v2
-        with:
-          package: rivet-core
-          feature-group: default-features
+      # The previous obi1kenobi/cargo-semver-checks-action@v2 wrapper
+      # bundled an older cargo-semver-checks that didn't recognise the
+      # rustdoc JSON v57 format that current stable rustdoc emits, so it
+      # failed every run with `unsupported rustdoc format v57`. Going
+      # direct: install the latest cargo-semver-checks at runtime and
+      # invoke it. Slightly slower (compile cost on a cold cache) but
+      # tracks the rustdoc format upstream emits.
+      - name: Install cargo-semver-checks
+        run: cargo install --locked cargo-semver-checks
+      - name: Check rivet-core public API
+        run: cargo semver-checks check-release -p rivet-core --default-features
 
   # ── Code coverage (Rust nightly for source-based instrumentation) ───
   coverage: