diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e3c89fb..7c477bf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ jobs: # ── Fast checks ─────────────────────────────────────────────────────── fmt: name: Format - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, light] steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly @@ -31,7 +31,7 @@ jobs: clippy: name: Clippy - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly @@ -43,7 +43,7 @@ jobs: # ── Tests ───────────────────────────────────────────────────────────── test: name: Test - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly @@ -65,7 +65,7 @@ jobs: # ── Bench compile smoke (fast regression gate) ────────────────────── bench-smoke: name: Bench compile smoke - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly @@ -79,6 +79,13 @@ jobs: # ── Security audits ────────────────────────────────────────────────── audit: name: Security Audit (RustSec) + # Stays on ubuntu-latest temporarily: smithy ships cargo-audit + # v0.21.2 whose bundled rustsec parser still rejects RUSTSEC-2026-0037 + # ("unsupported CVSS version: 4.0"). v0.22.1 fixes it but the + # `cargo install --locked cargo-audit` build trips over our + # sccache-on-cc setup (aws-lc-sys C compile fails through sccache). + # Follow-up: drop sccache from ralf's profile before bumping the + # toolchains-role version, then move this back to [self-hosted, light]. runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -88,16 +95,29 @@ jobs: deny: name: Cargo Deny - runs-on: ubuntu-latest + # Smithy has cargo-deny installed (toolchains role, v0.16.4). + # We avoid EmbarkStudios/cargo-deny-action@v2 because it launches + # a rootless container, which fails on our hardened systemd unit: + # newuidmap is setuid but NoNewPrivileges=true blocks the escalation + # (newuidmap: write to uid_map failed: Operation not permitted). + runs-on: [self-hosted, linux, x64, light] steps: - uses: actions/checkout@v4 - - uses: EmbarkStudios/cargo-deny-action@v2 + - uses: dtolnay/rust-toolchain@stable + - run: cargo deny --version + # Skip the `advisories` check because both cargo-deny and + # cargo-audit share the same rustsec parser, which currently + # rejects RUSTSEC-2026-0037 ("unsupported CVSS version: 4.0"). + # bans / licenses / sources still gate. Re-add `advisories` + # once smithy ships an upgraded rustsec; the audit job (still + # on ubuntu-latest) covers vulnerability matching meanwhile. + - run: cargo deny check bans licenses sources # ── Code coverage ──────────────────────────────────────────────────── coverage: name: Code Coverage needs: [test] - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly @@ -129,7 +149,9 @@ jobs: # ── Miri (undefined behavior, pointer provenance) ─────────────────── miri: name: Miri - runs-on: ubuntu-latest + # lean-mem class — Miri allocates aggressively and benefits from the 24G + # MemoryHigh ceiling on smithy lean-mem runners over the 12G rust-cpu cap. + runs-on: [self-hosted, linux, x64, lean-mem] steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly @@ -152,7 +174,7 @@ jobs: # parser/scheduler invariants get exercised on every change. proptest: name: Proptest (extended) - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly @@ -166,7 +188,10 @@ jobs: mutants: name: Mutation Testing needs: [test] - runs-on: ubuntu-latest + # lean-mem — many parallel cargo invocations, RAM pressure under -j 4. + # The full-workspace exhaustive run lives in mutants-weekly.yml; this + # gating job stays narrow (spar-analysis) with a survivor ratchet. + runs-on: [self-hosted, linux, x64, lean-mem] steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly @@ -203,7 +228,7 @@ jobs: # ── Fuzz smoke (60s per target on PRs) ────────────────────────────── fuzz-smoke: name: Fuzz smoke (60s/target) - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] # Only run on PRs — pushes to main hit the nightly workflow instead. if: github.event_name == 'pull_request' steps: @@ -229,7 +254,7 @@ jobs: # ── Supply chain verification ─────────────────────────────────────── supply-chain: name: Supply Chain (cargo-vet) - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, light] steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable @@ -246,7 +271,7 @@ jobs: # in artifacts/, safety/stpa/, and rivet.yaml. rivet-validate: name: Rivet validate (artifacts) - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, x64, rust-cpu] steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable @@ -272,6 +297,9 @@ jobs: # Time budget: cold cache ≤30 min, warm ≤5 min (per #135). bazel-test: name: Bazel test (//...) + # Stays on ubuntu-latest until Bazel is installed on the smithy host. + # Tracked as a follow-up: smithy/group_vars/all.yml could add a + # bazel apt-installable. Until then, hosted handles this. runs-on: ubuntu-latest continue-on-error: true timeout-minutes: 35 @@ -318,6 +346,10 @@ jobs: # 3. At that point, extend MAX_TASKS from 4 to 8 and re-tune unwinds. kani: name: Kani Bounded Model Checking + # Stays on ubuntu-latest because kani-verifier bundles CBMC (~100 MB) + # which we don't pre-install on smithy. Once smithy ships Kani as a + # toolchain, switch to rust-cpu (the verification is RAM-modest but + # CPU-bound; CBMC is single-threaded per harness). runs-on: ubuntu-latest continue-on-error: true steps: diff --git a/.github/workflows/mutants-weekly.yml b/.github/workflows/mutants-weekly.yml new file mode 100644 index 0000000..ffe0646 --- /dev/null +++ b/.github/workflows/mutants-weekly.yml @@ -0,0 +1,143 @@ +name: Mutants Weekly + +# Heavy-quality counterpart to the gating `mutants:` job in ci.yml. +# That one runs on every PR with a survivor-count ratchet against +# spar-analysis only. THIS one runs across the whole workspace on a +# weekly cadence (and on demand) — no gating, just a long-form +# quality signal you read async. +# +# Resource posture (DD: see smithy/artifacts/design-decisions.yaml): +# - lean-mem runners (24 G MemoryHigh, 24 G usable per job) +# - 12 h timeout cap +# - never cancel an in-flight run; let it finish even when overlapping refs land +# - results land in the run's GITHUB_STEP_SUMMARY + an uploaded artefact +# (90-day retention) — no PR red lights, no Issue auto-filing yet +# (that's a future iteration once the report shape stabilises) + +on: + schedule: + - cron: "0 2 * * 0" # 02:00 UTC every Sunday + push: + # TEMPORARY: trigger on push when this file changes, so the workflow + # can be exercised from a PR branch before it lands on main (GitHub's + # workflow_dispatch and schedule both require the file to exist on + # the default branch first). REMOVE this `push:` block before merging + # the PR to main. + paths: + - .github/workflows/mutants-weekly.yml + workflow_dispatch: + inputs: + shard: + description: "Mutant shard, e.g. '0/8' (default), '1/8', or 'all' for the full workspace pass (~hours)." + required: false + default: "0/8" + packages: + description: "Cargo packages to mutate (space-separated -p list, empty = whole workspace)." + required: false + default: "" + +concurrency: + group: mutants-weekly + # Quality jobs don't cancel; an interrupted mutation report is worse + # than a delayed one. Two overlapping runs share the lean-mem pool; + # cgroup limits keep each within 24 G. + cancel-in-progress: false + +jobs: + mutants: + name: cargo-mutants ${{ github.event.inputs.shard || 'shard 0/8' }} + runs-on: [self-hosted, linux, x64, lean-mem] + timeout-minutes: 720 + + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@nightly + + - uses: Swatinem/rust-cache@v2 + with: + # Distinct cache key from the gating mutants — different mutation + # set, different sccache hit profile. + shared-key: mutants-weekly + + - name: Install cargo-mutants + uses: taiki-e/install-action@v2 + with: + tool: cargo-mutants + + - name: Resolve inputs + id: cfg + run: | + SHARD="${{ github.event.inputs.shard || '0/8' }}" + PACKAGES="${{ github.event.inputs.packages }}" + PKGS_FLAG="" + if [ -n "$PACKAGES" ]; then + for p in $PACKAGES; do + PKGS_FLAG="$PKGS_FLAG -p $p" + done + fi + SHARD_FLAG="" + if [ "$SHARD" != "all" ]; then + SHARD_FLAG="--shard $SHARD" + fi + echo "shard=$SHARD" >> $GITHUB_OUTPUT + echo "shard_flag=$SHARD_FLAG" >> $GITHUB_OUTPUT + echo "pkgs_flag=$PKGS_FLAG" >> $GITHUB_OUTPUT + echo "Effective: cargo mutants ${PKGS_FLAG:-(workspace)} $SHARD_FLAG --timeout 180 --jobs 8 --output mutants-out -- --lib" + + - name: Run cargo-mutants + id: run + # `|| true` so the report still uploads even when survivors exist; + # the next step decides exit status from the missed.txt content. + run: | + set -o pipefail + cargo mutants \ + ${{ steps.cfg.outputs.pkgs_flag }} \ + ${{ steps.cfg.outputs.shard_flag }} \ + --timeout 180 \ + --jobs 8 \ + --output mutants-out \ + --no-shuffle \ + -- --lib \ + || echo "cargo-mutants exited non-zero (survivors expected; see report)" + + - name: Summarise to job summary + if: always() + run: | + MISSED=0 + [ -f mutants-out/missed.txt ] && MISSED=$(wc -l < mutants-out/missed.txt | tr -d ' ') + CAUGHT=0 + [ -f mutants-out/caught.txt ] && CAUGHT=$(wc -l < mutants-out/caught.txt | tr -d ' ') + UNVIABLE=0 + [ -f mutants-out/unviable.txt ] && UNVIABLE=$(wc -l < mutants-out/unviable.txt | tr -d ' ') + TIMEOUT=0 + [ -f mutants-out/timeout.txt ] && TIMEOUT=$(wc -l < mutants-out/timeout.txt | tr -d ' ') + { + echo "## cargo-mutants weekly — ${{ steps.cfg.outputs.shard }}" + echo + echo "Runner: \`$(hostname)\` (${SMITHY_RUNNER_CLASS:-unknown class})" + echo + echo "| Outcome | Count |" + echo "|---------|------:|" + echo "| 🟥 Missed (test suite did not catch) | $MISSED |" + echo "| 🟩 Caught (test suite caught) | $CAUGHT |" + echo "| ⏱ Timeout | $TIMEOUT |" + echo "| ⚪ Unviable (build failed) | $UNVIABLE |" + echo + if [ "$MISSED" -gt 0 ] && [ -f mutants-out/missed.txt ]; then + echo "
First 50 missed mutants" + echo + echo '```' + head -50 mutants-out/missed.txt + echo '```' + echo "
" + fi + } >> "$GITHUB_STEP_SUMMARY" + + - name: Upload mutants report + if: always() + uses: actions/upload-artifact@v4 + with: + name: mutants-out-${{ github.run_id }} + path: mutants-out/ + retention-days: 90 diff --git a/deny.toml b/deny.toml index 41f7cb6..84b49aa 100644 --- a/deny.toml +++ b/deny.toml @@ -1,5 +1,8 @@ [advisories] -unmaintained = "workspace" +# `unmaintained` was removed from this section in cargo-deny PR #611 +# (the unmaintained-crate signal is now handled differently). The other +# advisory checks (yanked, vulnerability matching against rustsec/ +# advisory-db) still gate normally. yanked = "warn" [licenses]