tempoxyz · joshieDo · May 5, 2026 · May 5, 2026 · May 5, 2026 · May 7, 2026
diff --git a/.github/workflows/bench-e2e.yml b/.github/workflows/bench-e2e.yml
@@ -1,7 +1,7 @@
 # E2E benchmark job.
 #
-# Called by bench.yml when mode=e2e. Runs `nu tempo.nu bench` for an
-# interleaved B-F-F-B comparison using synthetic transactions.
+# Called by bench.yml when mode=e2e. Runs `nu bench-e2e.nu e2e`
+# for an interleaved baseline-feature-feature-baseline comparison using two local validators.
 
 name: bench-e2e
 
@@ -29,10 +29,10 @@ on:
         required: true
         default: "300"
       bloat:
-        description: State bloat size in MiB.
+        description: State bloat snapshot size (1g, 10g, 100g).
         type: string
         required: true
-        default: "100000"
+        default: "100g"
       tps:
         description: Target transactions per second.
         type: string
@@ -42,10 +42,10 @@ on:
         description: Benchmark backend.
         type: choice
         required: true
-        default: tempo-bench
+        default: txgen
         options:
-          - tempo-bench
           - txgen
+          - tempo-bench
       txgen-ref:
         description: Optional ref to pin in tempoxyz/txgen.
         type: string
@@ -237,17 +237,19 @@ env:
   RUSTC_WRAPPER: "sccache"
 
 permissions:
+  actions: read
   contents: read
   pull-requests: write
 
 jobs:
   bench-e2e:
     name: bench-e2e
-    runs-on: [self-hosted, Linux, X64, bare-metal]
+    runs-on: [self-hosted, Linux, X64, bare-metal-dual-schelk]
     timeout-minutes: 300
     env:
       BENCH_PR: ${{ inputs.pr }}
       BENCH_ACTOR: ${{ inputs.actor || github.actor }}
+      BENCH_MODE: ${{ inputs.mode || 'e2e' }}
       BENCH_PRESET: ${{ inputs.preset }}
       BENCH_DURATION: ${{ inputs.duration }}
       BENCH_BLOAT: ${{ inputs.bloat }}
@@ -364,7 +366,7 @@ jobs:
             const bHf2 = process.env.BENCH_BASELINE_HARDFORK || '';
             const fHf2 = process.env.BENCH_FEATURE_HARDFORK || '';
             const hfNote2 = bHf2 ? `, baseline-hardfork: \`${bHf2}\`, feature-hardfork: \`${fHf2}\`` : '';
-            core.exportVariable('BENCH_CONFIG', `**Config:** mode: \`${mode}\`, preset: \`${preset}\`, duration: \`${duration}s\`, bloat: \`${bloat} MiB\`, tps: \`${tps}\`, baseline: \`${baseline}\`, feature: \`${feature}\`, backend: \`${backend}\`, txgen-ref: \`${txgenRef}\`${samplyNote}${tracyNote}${hfNote2}`);
+            core.exportVariable('BENCH_CONFIG', `**Config:** mode: \`${mode}\`, preset: \`${preset}\`, duration: \`${duration}s\`, bloat: \`${bloat}\`, tps: \`${tps}\`, baseline: \`${baseline}\`, feature: \`${feature}\`, backend: \`${backend}\`, txgen-ref: \`${txgenRef}\`${samplyNote}${tracyNote}${hfNote2}`);
 
             const { buildBody } = require('./.github/scripts/bench-update-status.js');
             await github.rest.issues.updateComment({
@@ -378,6 +380,13 @@ jobs:
       - uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad # v0.0.9
         continue-on-error: true
 
+      - name: Validate e2e options
+        run: |
+          if [ -n "$BENCH_BASELINE_HARDFORK" ] || [ -n "$BENCH_FEATURE_HARDFORK" ]; then
+            echo "::error::mode=e2e hardfork comparison is not wired for the single-runner local harness yet."
+            exit 1
+          fi
+
       - name: Install txgen backend
         if: env.BENCH_BACKEND == 'txgen'
         env:
@@ -510,36 +519,30 @@ jobs:
             const s = require('./.github/scripts/bench-update-status.js');
             await s({github, context, status: 'Running benchmark...'});
 
-      - name: Run benchmark
+      - name: Run e2e benchmark
         id: bench
         env:
           BASELINE_REF: ${{ steps.refs.outputs.baseline-ref }}
           FEATURE_REF: ${{ steps.refs.outputs.feature-ref }}
         run: |
-          if [ "$BENCH_BACKEND" = "txgen" ]; then
-            cmd=(nu contrib/bench/bench-txgen.nu run)
-          else
-            cmd=(nu tempo.nu bench)
-          fi
+          cmd=(nu bench-e2e.nu e2e)
           cmd+=(
             --preset "$BENCH_PRESET"
-            --mode dev
             --bloat "$BENCH_BLOAT"
+            --backend "$BENCH_BACKEND"
             --duration "$BENCH_DURATION"
             --tps "$BENCH_TPS"
-            --no-infra
             --baseline "$BASELINE_REF"
             --feature "$FEATURE_REF"
-            --bench-datadir "/reth-bench/tempo_${BENCH_BLOAT}mb"
+            --baseline-name "${{ steps.refs.outputs.baseline-name }}"
+            --feature-name "${{ steps.refs.outputs.feature-name }}"
             --tune
-            --gas-limit 1000000000000
           )
+          [ "$BENCH_FORCE_BLOAT" = "true" ] && cmd+=(--force-bloat)
           [ "$BENCH_SAMPLY" = "true" ] && cmd+=(--samply)
           [ "$BENCH_TRACY" != "off" ] && cmd+=(--tracy "$BENCH_TRACY" --tracy-seconds "$BENCH_TRACY_SECONDS" --tracy-offset "$BENCH_TRACY_OFFSET")
           [ -n "$BENCH_BASELINE_ARGS" ] && cmd+=(--baseline-args="$BENCH_BASELINE_ARGS")
           [ -n "$BENCH_FEATURE_ARGS" ] && cmd+=(--feature-args="$BENCH_FEATURE_ARGS")
-          [ -n "$BENCH_BASELINE_HARDFORK" ] && cmd+=(--baseline-hardfork "$BENCH_BASELINE_HARDFORK" --feature-hardfork "$BENCH_FEATURE_HARDFORK")
-          [ "$BENCH_FORCE_BLOAT" = "true" ] && cmd+=(--force)
           [ -n "$BENCH_BENCH_ARGS" ] && cmd+=(--bench-args="$BENCH_BENCH_ARGS")
           [ -n "$BENCH_BENCH_ENV" ] && cmd+=(--bench-env="$BENCH_BENCH_ENV")
           [ -n "$BENCH_BASELINE_ENV" ] && cmd+=(--baseline-env="$BENCH_BASELINE_ENV")
@@ -559,7 +562,7 @@ jobs:
           echo "Results directory: $RESULTS_DIR"
 
       - name: Upload results
-        if: "!cancelled()"
+        if: ${{ !cancelled() }}
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: tempo-bench-results
@@ -649,31 +652,34 @@ jobs:
               }
             } catch (e) {}
 
+            const runs = ['baseline-1', 'feature-1', 'feature-2', 'baseline-2'];
+
             // Samply profile links (URLs produced by tempo.nu upload-samply-profile)
             let samplySection = '';
             if (process.env.BENCH_SAMPLY === 'true') {
-              const runs = ['baseline-1', 'feature-1', 'feature-2', 'baseline-2'];
               const links = [];
               for (const run of runs) {
-                try {
-                  const url = fs.readFileSync(`${resultsDir}/profile-${run}-url.txt`, 'utf8').trim();
-                  if (url) links.push(`- **${run}**: [Firefox Profiler](${url})`);
-                } catch (e) {}
+                for (const role of ['a', 'b']) {
+                  try {
+                    const url = fs.readFileSync(`${resultsDir}/profile-${run}-${role}-url.txt`, 'utf8').trim();
+                    if (url) links.push(`- **${run} / ${role}**: [Firefox Profiler](${url})`);
+                  } catch (e) {}
+                }
               }
               if (links.length > 0) {
                 samplySection = `\n\n### Samply Profiles\n\n${links.join('\n')}\n`;
               }
             }
 
-            // Tracy profile links (URLs produced by tempo.nu upload-tracy-profile)
+            // Tracy profile links (URLs produced by tempo.nu upload-tracy-profile).
+            // Single-runner e2e captures both local validators in one phase-level file.
             let tracySection = '';
             if (process.env.BENCH_TRACY && process.env.BENCH_TRACY !== 'off') {
-              const runs = ['baseline-1', 'feature-1', 'feature-2', 'baseline-2'];
               const links = [];
               for (const run of runs) {
                 try {
                   const url = fs.readFileSync(`${resultsDir}/tracy-${run}-url.txt`, 'utf8').trim();
-                  if (url) links.push(`- **${run}**: [Tracy Viewer](${url})`);
+                  if (url) links.push(`- **${run} / local validators**: [Tracy Viewer](${url})`);
                 } catch (e) {}
               }
               if (links.length > 0) {

diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -1,7 +1,7 @@
 # Runs tempo benchmarks.
 #
-# Benchmarks use `nu tempo.nu bench` which runs an interleaved B-F-F-B
-# comparison against a schelk-managed snapshot on a self-hosted runner.
+# E2E benchmarks run `nu bench-e2e.nu e2e` against the dual-schelk runner.
+# Replay benchmarks use `nu tempo.nu bench-replay`.
 #
 # Trigger via PR comment (`@decofe bench` or `derek bench`).
 
@@ -91,11 +91,12 @@ jobs:
             actor = context.payload.comment.user.login;
 
             const body = context.payload.comment.body.trim();
-            const intArgs = new Set(['duration', 'bloat', 'tps', 'tracy-seconds', 'tracy-offset', 'blocks', 'warmup']);
+            const intArgs = new Set(['duration', 'tps', 'tracy-seconds', 'tracy-offset', 'blocks', 'warmup']);
             const refArgs = new Set(['baseline', 'feature', 'txgen-ref']);
             const stringArgs = new Set(['mode', 'preset', 'backend', 'tracy', 'baseline-args', 'feature-args', 'baseline-hardfork', 'feature-hardfork', 'bench-args', 'bench-env', 'baseline-env', 'feature-env', 'chain']);
             const boolArgs = new Set(['samply', 'force-bloat', 'no-slack', 'no-existing-recipients']);
-            const defaults = { mode: 'e2e', preset: 'tip20', duration: '300', bloat: '100', tps: '10000', baseline: '', feature: '', backend: 'tempo-bench', 'txgen-ref': '', samply: 'false', tracy: 'off', 'tracy-seconds': '30', 'tracy-offset': '120', 'baseline-args': '', 'feature-args': '', 'baseline-hardfork': '', 'feature-hardfork': '', 'force-bloat': 'false', 'no-slack': 'false', 'no-existing-recipients': 'false', 'bench-args': '', 'bench-env': '', 'baseline-env': '', 'feature-env': '', blocks: '5000', warmup: '1000', chain: 'mainnet' };
+            const bloatValues = new Set(['1g', '10g', '100g']);
+            const defaults = { mode: 'e2e', preset: 'tip20', duration: '300', bloat: '100g', tps: '10000', baseline: '', feature: '', backend: 'txgen', 'txgen-ref': '', samply: 'false', tracy: 'off', 'tracy-seconds': '30', 'tracy-offset': '120', 'baseline-args': '', 'feature-args': '', 'baseline-hardfork': '', 'feature-hardfork': '', 'force-bloat': 'false', 'no-slack': 'false', 'no-existing-recipients': 'false', 'bench-args': '', 'bench-env': '', 'baseline-env': '', 'feature-env': '', blocks: '5000', warmup: '1000', chain: 'mainnet' };
             const unknown = [];
             const invalid = [];
             const args = body.replace(/^(?:@decofe|derek) bench\s*/, '');
@@ -140,6 +141,13 @@ jobs:
                 } else {
                   invalid.push(`\`${key}=${value}\` (must be true or false)`);
                 }
+              } else if (key === 'bloat') {
+                const normalized = value.toLowerCase();
+                if (!bloatValues.has(normalized)) {
+                  invalid.push(`\`${key}=${value}\` (must be one of: 1g, 10g, 100g)`);
+                } else {
+                  defaults[key] = normalized;
+                }
               } else if (stringArgs.has(key)) {
                 if (!value) {
                   invalid.push(`\`${key}=\` (must not be empty)`);
@@ -154,7 +162,7 @@ jobs:
             if (unknown.length) errors.push(`Unknown argument(s): \`${unknown.join('`, `')}\``);
             if (invalid.length) errors.push(`Invalid value(s): ${invalid.join(', ')}`);
             if (errors.length) {
-              const msg = `❌ **Invalid bench command**\n\n${errors.join('\n')}\n\n**Usage:** \`@decofe bench [mode=MODE] [chain=mainnet|testnet] [blocks=N] [warmup=N] [preset=NAME] [duration=N] [bloat=N] [tps=N] [baseline=REF] [feature=REF] [backend=NAME] [txgen-ref=REF] [samply] [force-bloat] [no-slack] [existing-recipients=BOOL] [tracy=MODE] [tracy-seconds=N] [tracy-offset=N] [baseline-args="ARGS"] [feature-args="ARGS"] [baseline-hardfork=FORK] [feature-hardfork=FORK] [bench-args="ARGS"] [bench-env="VARS"] [baseline-env="VARS"] [feature-env="VARS"]\``;
+              const msg = `❌ **Invalid bench command**\n\n${errors.join('\n')}\n\n**Usage:** \`@decofe bench [mode=MODE] [chain=mainnet|testnet] [blocks=N] [warmup=N] [preset=NAME] [duration=N] [bloat=1g|10g|100g] [tps=N] [baseline=REF] [feature=REF] [backend=NAME] [txgen-ref=REF] [samply] [force-bloat] [no-slack] [existing-recipients=BOOL] [tracy=MODE] [tracy-seconds=N] [tracy-offset=N] [baseline-args="ARGS"] [feature-args="ARGS"] [baseline-hardfork=FORK] [feature-hardfork=FORK] [bench-args="ARGS"] [bench-env="VARS"] [baseline-env="VARS"] [feature-env="VARS"]\``;
               await github.rest.issues.createComment({
                 owner: context.repo.owner,
                 repo: context.repo.repo,
@@ -167,7 +175,7 @@ jobs:
             mode = defaults.mode;
             preset = defaults.preset;
             duration = defaults.duration;
-            bloat = String(parseInt(defaults.bloat, 10) * 1000);
+            bloat = defaults.bloat;
             tps = defaults.tps;
             baseline = defaults.baseline;
             feature = defaults.feature;
@@ -196,7 +204,7 @@ jobs:
             const erFlag = `--existing-recipients=${existingRecipients}`;
             benchArgs = benchArgs ? `${benchArgs} ${erFlag}` : erFlag;
 
-            const usageStr = '**Usage:** `@decofe bench [mode=MODE] [chain=mainnet|testnet] [blocks=N] [warmup=N] [preset=NAME] [duration=N] [bloat=N] [tps=N] [baseline=REF] [feature=REF] [backend=NAME] [txgen-ref=REF] [samply] [force-bloat] [no-slack] [existing-recipients=BOOL] [tracy=MODE] [tracy-seconds=N] [tracy-offset=N] [baseline-args="ARGS"] [feature-args="ARGS"] [baseline-hardfork=FORK] [feature-hardfork=FORK] [bench-args="ARGS"] [bench-env="VARS"] [baseline-env="VARS"] [feature-env="VARS"]`';
+            const usageStr = '**Usage:** `@decofe bench [mode=MODE] [chain=mainnet|testnet] [blocks=N] [warmup=N] [preset=NAME] [duration=N] [bloat=1g|10g|100g] [tps=N] [baseline=REF] [feature=REF] [backend=NAME] [txgen-ref=REF] [samply] [force-bloat] [no-slack] [existing-recipients=BOOL] [tracy=MODE] [tracy-seconds=N] [tracy-offset=N] [baseline-args="ARGS"] [feature-args="ARGS"] [baseline-hardfork=FORK] [feature-hardfork=FORK] [bench-args="ARGS"] [bench-env="VARS"] [baseline-env="VARS"] [feature-env="VARS"]`';
 
             // Validate chain value
             if (!['mainnet', 'testnet'].includes(chain)) {
@@ -392,7 +400,7 @@ jobs:
             const bHf = process.env.ACK_BASELINE_HARDFORK || '';
             const fHf = process.env.ACK_FEATURE_HARDFORK || '';
             const hfNote = bHf ? `, baseline-hardfork: \`${bHf}\`, feature-hardfork: \`${fHf}\`` : '';
-            const config = `**Config:** mode: \`${mode}\`, preset: \`${preset}\`, duration: \`${duration}s\`, bloat: \`${bloat} MiB\`, tps: \`${tps}\`, baseline: \`${baseline}\`, feature: \`${feature}\`, backend: \`${backend}\`, txgen-ref: \`${txgenRef}\`${samplyNote}${tracyNote}${naNote}${hfNote}`;
+            const config = `**Config:** mode: \`${mode}\`, preset: \`${preset}\`, duration: \`${duration}s\`, bloat: \`${bloat}\`, tps: \`${tps}\`, baseline: \`${baseline}\`, feature: \`${feature}\`, backend: \`${backend}\`, txgen-ref: \`${txgenRef}\`${samplyNote}${tracyNote}${naNote}${hfNote}`;
 
             const { data: comment } = await github.rest.issues.createComment({
               owner: context.repo.owner,