Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 35 additions & 29 deletions .github/workflows/bench-e2e.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# E2E benchmark job.
#
# Called by bench.yml when mode=e2e. Runs `nu tempo.nu bench` for an
# interleaved B-F-F-B comparison using synthetic transactions.
# Called by bench.yml when mode=e2e. Runs `nu bench-e2e.nu e2e`
# for an interleaved baseline-feature-feature-baseline comparison using two local validators.

name: bench-e2e

Expand Down Expand Up @@ -29,10 +29,10 @@ on:
required: true
default: "300"
bloat:
description: State bloat size in MiB.
description: State bloat snapshot size (1g, 10g, 100g).
type: string
required: true
default: "100000"
default: "100g"
tps:
description: Target transactions per second.
type: string
Expand All @@ -42,10 +42,10 @@ on:
description: Benchmark backend.
type: choice
required: true
default: tempo-bench
default: txgen
options:
- tempo-bench
- txgen
- tempo-bench
txgen-ref:
description: Optional ref to pin in tempoxyz/txgen.
type: string
Expand Down Expand Up @@ -237,17 +237,19 @@ env:
RUSTC_WRAPPER: "sccache"

permissions:
actions: read
contents: read
pull-requests: write

jobs:
bench-e2e:
name: bench-e2e
runs-on: [self-hosted, Linux, X64, bare-metal]
runs-on: [self-hosted, Linux, X64, bare-metal-dual-schelk]
timeout-minutes: 300
env:
BENCH_PR: ${{ inputs.pr }}
BENCH_ACTOR: ${{ inputs.actor || github.actor }}
BENCH_MODE: ${{ inputs.mode || 'e2e' }}
BENCH_PRESET: ${{ inputs.preset }}
BENCH_DURATION: ${{ inputs.duration }}
BENCH_BLOAT: ${{ inputs.bloat }}
Expand Down Expand Up @@ -364,7 +366,7 @@ jobs:
const bHf2 = process.env.BENCH_BASELINE_HARDFORK || '';
const fHf2 = process.env.BENCH_FEATURE_HARDFORK || '';
const hfNote2 = bHf2 ? `, baseline-hardfork: \`${bHf2}\`, feature-hardfork: \`${fHf2}\`` : '';
core.exportVariable('BENCH_CONFIG', `**Config:** mode: \`${mode}\`, preset: \`${preset}\`, duration: \`${duration}s\`, bloat: \`${bloat} MiB\`, tps: \`${tps}\`, baseline: \`${baseline}\`, feature: \`${feature}\`, backend: \`${backend}\`, txgen-ref: \`${txgenRef}\`${samplyNote}${tracyNote}${hfNote2}`);
core.exportVariable('BENCH_CONFIG', `**Config:** mode: \`${mode}\`, preset: \`${preset}\`, duration: \`${duration}s\`, bloat: \`${bloat}\`, tps: \`${tps}\`, baseline: \`${baseline}\`, feature: \`${feature}\`, backend: \`${backend}\`, txgen-ref: \`${txgenRef}\`${samplyNote}${tracyNote}${hfNote2}`);

const { buildBody } = require('./.github/scripts/bench-update-status.js');
await github.rest.issues.updateComment({
Expand All @@ -378,6 +380,13 @@ jobs:
- uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad # v0.0.9
continue-on-error: true

- name: Validate e2e options
run: |
if [ -n "$BENCH_BASELINE_HARDFORK" ] || [ -n "$BENCH_FEATURE_HARDFORK" ]; then
echo "::error::mode=e2e hardfork comparison is not wired for the single-runner local harness yet."
exit 1
fi

- name: Install txgen backend
if: env.BENCH_BACKEND == 'txgen'
env:
Expand Down Expand Up @@ -510,36 +519,30 @@ jobs:
const s = require('./.github/scripts/bench-update-status.js');
await s({github, context, status: 'Running benchmark...'});

- name: Run benchmark
- name: Run e2e benchmark
id: bench
env:
BASELINE_REF: ${{ steps.refs.outputs.baseline-ref }}
FEATURE_REF: ${{ steps.refs.outputs.feature-ref }}
run: |
if [ "$BENCH_BACKEND" = "txgen" ]; then
cmd=(nu contrib/bench/bench-txgen.nu run)
else
cmd=(nu tempo.nu bench)
fi
cmd=(nu bench-e2e.nu e2e)
cmd+=(
--preset "$BENCH_PRESET"
--mode dev
--bloat "$BENCH_BLOAT"
--backend "$BENCH_BACKEND"
--duration "$BENCH_DURATION"
--tps "$BENCH_TPS"
--no-infra
--baseline "$BASELINE_REF"
--feature "$FEATURE_REF"
--bench-datadir "/reth-bench/tempo_${BENCH_BLOAT}mb"
--baseline-name "${{ steps.refs.outputs.baseline-name }}"
--feature-name "${{ steps.refs.outputs.feature-name }}"
--tune
--gas-limit 1000000000000
)
[ "$BENCH_FORCE_BLOAT" = "true" ] && cmd+=(--force-bloat)
[ "$BENCH_SAMPLY" = "true" ] && cmd+=(--samply)
[ "$BENCH_TRACY" != "off" ] && cmd+=(--tracy "$BENCH_TRACY" --tracy-seconds "$BENCH_TRACY_SECONDS" --tracy-offset "$BENCH_TRACY_OFFSET")
[ -n "$BENCH_BASELINE_ARGS" ] && cmd+=(--baseline-args="$BENCH_BASELINE_ARGS")
[ -n "$BENCH_FEATURE_ARGS" ] && cmd+=(--feature-args="$BENCH_FEATURE_ARGS")
[ -n "$BENCH_BASELINE_HARDFORK" ] && cmd+=(--baseline-hardfork "$BENCH_BASELINE_HARDFORK" --feature-hardfork "$BENCH_FEATURE_HARDFORK")
[ "$BENCH_FORCE_BLOAT" = "true" ] && cmd+=(--force)
[ -n "$BENCH_BENCH_ARGS" ] && cmd+=(--bench-args="$BENCH_BENCH_ARGS")
[ -n "$BENCH_BENCH_ENV" ] && cmd+=(--bench-env="$BENCH_BENCH_ENV")
[ -n "$BENCH_BASELINE_ENV" ] && cmd+=(--baseline-env="$BENCH_BASELINE_ENV")
Expand All @@ -559,7 +562,7 @@ jobs:
echo "Results directory: $RESULTS_DIR"

- name: Upload results
if: "!cancelled()"
if: ${{ !cancelled() }}
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: tempo-bench-results
Expand Down Expand Up @@ -649,31 +652,34 @@ jobs:
}
} catch (e) {}

const runs = ['baseline-1', 'feature-1', 'feature-2', 'baseline-2'];

// Samply profile links (URLs produced by tempo.nu upload-samply-profile)
let samplySection = '';
if (process.env.BENCH_SAMPLY === 'true') {
const runs = ['baseline-1', 'feature-1', 'feature-2', 'baseline-2'];
const links = [];
for (const run of runs) {
try {
const url = fs.readFileSync(`${resultsDir}/profile-${run}-url.txt`, 'utf8').trim();
if (url) links.push(`- **${run}**: [Firefox Profiler](${url})`);
} catch (e) {}
for (const role of ['a', 'b']) {
try {
const url = fs.readFileSync(`${resultsDir}/profile-${run}-${role}-url.txt`, 'utf8').trim();
if (url) links.push(`- **${run} / ${role}**: [Firefox Profiler](${url})`);
} catch (e) {}
}
}
if (links.length > 0) {
samplySection = `\n\n### Samply Profiles\n\n${links.join('\n')}\n`;
}
}

// Tracy profile links (URLs produced by tempo.nu upload-tracy-profile)
// Tracy profile links (URLs produced by tempo.nu upload-tracy-profile).
// Single-runner e2e captures both local validators in one phase-level file.
let tracySection = '';
if (process.env.BENCH_TRACY && process.env.BENCH_TRACY !== 'off') {
const runs = ['baseline-1', 'feature-1', 'feature-2', 'baseline-2'];
const links = [];
for (const run of runs) {
try {
const url = fs.readFileSync(`${resultsDir}/tracy-${run}-url.txt`, 'utf8').trim();
if (url) links.push(`- **${run}**: [Tracy Viewer](${url})`);
if (url) links.push(`- **${run} / local validators**: [Tracy Viewer](${url})`);
} catch (e) {}
}
if (links.length > 0) {
Expand Down
24 changes: 16 additions & 8 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Runs tempo benchmarks.
#
# Benchmarks use `nu tempo.nu bench` which runs an interleaved B-F-F-B
# comparison against a schelk-managed snapshot on a self-hosted runner.
# E2E benchmarks run `nu bench-e2e.nu e2e` against the dual-schelk runner.
# Replay benchmarks use `nu tempo.nu bench-replay`.
#
# Trigger via PR comment (`@decofe bench` or `derek bench`).

Expand Down Expand Up @@ -91,11 +91,12 @@ jobs:
actor = context.payload.comment.user.login;

const body = context.payload.comment.body.trim();
const intArgs = new Set(['duration', 'bloat', 'tps', 'tracy-seconds', 'tracy-offset', 'blocks', 'warmup']);
const intArgs = new Set(['duration', 'tps', 'tracy-seconds', 'tracy-offset', 'blocks', 'warmup']);
const refArgs = new Set(['baseline', 'feature', 'txgen-ref']);
const stringArgs = new Set(['mode', 'preset', 'backend', 'tracy', 'baseline-args', 'feature-args', 'baseline-hardfork', 'feature-hardfork', 'bench-args', 'bench-env', 'baseline-env', 'feature-env', 'chain']);
const boolArgs = new Set(['samply', 'force-bloat', 'no-slack', 'no-existing-recipients']);
const defaults = { mode: 'e2e', preset: 'tip20', duration: '300', bloat: '100', tps: '10000', baseline: '', feature: '', backend: 'tempo-bench', 'txgen-ref': '', samply: 'false', tracy: 'off', 'tracy-seconds': '30', 'tracy-offset': '120', 'baseline-args': '', 'feature-args': '', 'baseline-hardfork': '', 'feature-hardfork': '', 'force-bloat': 'false', 'no-slack': 'false', 'no-existing-recipients': 'false', 'bench-args': '', 'bench-env': '', 'baseline-env': '', 'feature-env': '', blocks: '5000', warmup: '1000', chain: 'mainnet' };
const bloatValues = new Set(['1g', '10g', '100g']);
const defaults = { mode: 'e2e', preset: 'tip20', duration: '300', bloat: '100g', tps: '10000', baseline: '', feature: '', backend: 'txgen', 'txgen-ref': '', samply: 'false', tracy: 'off', 'tracy-seconds': '30', 'tracy-offset': '120', 'baseline-args': '', 'feature-args': '', 'baseline-hardfork': '', 'feature-hardfork': '', 'force-bloat': 'false', 'no-slack': 'false', 'no-existing-recipients': 'false', 'bench-args': '', 'bench-env': '', 'baseline-env': '', 'feature-env': '', blocks: '5000', warmup: '1000', chain: 'mainnet' };
const unknown = [];
const invalid = [];
const args = body.replace(/^(?:@decofe|derek) bench\s*/, '');
Expand Down Expand Up @@ -140,6 +141,13 @@ jobs:
} else {
invalid.push(`\`${key}=${value}\` (must be true or false)`);
}
} else if (key === 'bloat') {
const normalized = value.toLowerCase();
if (!bloatValues.has(normalized)) {
invalid.push(`\`${key}=${value}\` (must be one of: 1g, 10g, 100g)`);
} else {
defaults[key] = normalized;
}
} else if (stringArgs.has(key)) {
if (!value) {
invalid.push(`\`${key}=\` (must not be empty)`);
Expand All @@ -154,7 +162,7 @@ jobs:
if (unknown.length) errors.push(`Unknown argument(s): \`${unknown.join('`, `')}\``);
if (invalid.length) errors.push(`Invalid value(s): ${invalid.join(', ')}`);
if (errors.length) {
const msg = `❌ **Invalid bench command**\n\n${errors.join('\n')}\n\n**Usage:** \`@decofe bench [mode=MODE] [chain=mainnet|testnet] [blocks=N] [warmup=N] [preset=NAME] [duration=N] [bloat=N] [tps=N] [baseline=REF] [feature=REF] [backend=NAME] [txgen-ref=REF] [samply] [force-bloat] [no-slack] [existing-recipients=BOOL] [tracy=MODE] [tracy-seconds=N] [tracy-offset=N] [baseline-args="ARGS"] [feature-args="ARGS"] [baseline-hardfork=FORK] [feature-hardfork=FORK] [bench-args="ARGS"] [bench-env="VARS"] [baseline-env="VARS"] [feature-env="VARS"]\``;
const msg = `❌ **Invalid bench command**\n\n${errors.join('\n')}\n\n**Usage:** \`@decofe bench [mode=MODE] [chain=mainnet|testnet] [blocks=N] [warmup=N] [preset=NAME] [duration=N] [bloat=1g|10g|100g] [tps=N] [baseline=REF] [feature=REF] [backend=NAME] [txgen-ref=REF] [samply] [force-bloat] [no-slack] [existing-recipients=BOOL] [tracy=MODE] [tracy-seconds=N] [tracy-offset=N] [baseline-args="ARGS"] [feature-args="ARGS"] [baseline-hardfork=FORK] [feature-hardfork=FORK] [bench-args="ARGS"] [bench-env="VARS"] [baseline-env="VARS"] [feature-env="VARS"]\``;
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
Expand All @@ -167,7 +175,7 @@ jobs:
mode = defaults.mode;
preset = defaults.preset;
duration = defaults.duration;
bloat = String(parseInt(defaults.bloat, 10) * 1000);
bloat = defaults.bloat;
tps = defaults.tps;
baseline = defaults.baseline;
feature = defaults.feature;
Expand Down Expand Up @@ -196,7 +204,7 @@ jobs:
const erFlag = `--existing-recipients=${existingRecipients}`;
benchArgs = benchArgs ? `${benchArgs} ${erFlag}` : erFlag;

const usageStr = '**Usage:** `@decofe bench [mode=MODE] [chain=mainnet|testnet] [blocks=N] [warmup=N] [preset=NAME] [duration=N] [bloat=N] [tps=N] [baseline=REF] [feature=REF] [backend=NAME] [txgen-ref=REF] [samply] [force-bloat] [no-slack] [existing-recipients=BOOL] [tracy=MODE] [tracy-seconds=N] [tracy-offset=N] [baseline-args="ARGS"] [feature-args="ARGS"] [baseline-hardfork=FORK] [feature-hardfork=FORK] [bench-args="ARGS"] [bench-env="VARS"] [baseline-env="VARS"] [feature-env="VARS"]`';
const usageStr = '**Usage:** `@decofe bench [mode=MODE] [chain=mainnet|testnet] [blocks=N] [warmup=N] [preset=NAME] [duration=N] [bloat=1g|10g|100g] [tps=N] [baseline=REF] [feature=REF] [backend=NAME] [txgen-ref=REF] [samply] [force-bloat] [no-slack] [existing-recipients=BOOL] [tracy=MODE] [tracy-seconds=N] [tracy-offset=N] [baseline-args="ARGS"] [feature-args="ARGS"] [baseline-hardfork=FORK] [feature-hardfork=FORK] [bench-args="ARGS"] [bench-env="VARS"] [baseline-env="VARS"] [feature-env="VARS"]`';

// Validate chain value
if (!['mainnet', 'testnet'].includes(chain)) {
Expand Down Expand Up @@ -392,7 +400,7 @@ jobs:
const bHf = process.env.ACK_BASELINE_HARDFORK || '';
const fHf = process.env.ACK_FEATURE_HARDFORK || '';
const hfNote = bHf ? `, baseline-hardfork: \`${bHf}\`, feature-hardfork: \`${fHf}\`` : '';
const config = `**Config:** mode: \`${mode}\`, preset: \`${preset}\`, duration: \`${duration}s\`, bloat: \`${bloat} MiB\`, tps: \`${tps}\`, baseline: \`${baseline}\`, feature: \`${feature}\`, backend: \`${backend}\`, txgen-ref: \`${txgenRef}\`${samplyNote}${tracyNote}${naNote}${hfNote}`;
const config = `**Config:** mode: \`${mode}\`, preset: \`${preset}\`, duration: \`${duration}s\`, bloat: \`${bloat}\`, tps: \`${tps}\`, baseline: \`${baseline}\`, feature: \`${feature}\`, backend: \`${backend}\`, txgen-ref: \`${txgenRef}\`${samplyNote}${tracyNote}${naNote}${hfNote}`;

const { data: comment } = await github.rest.issues.createComment({
owner: context.repo.owner,
Expand Down
Loading
Loading