From 0d6675ccc6e3eae0437198e2479da1e95b647ae5 Mon Sep 17 00:00:00 2001 From: avalonche Date: Sun, 15 Mar 2026 12:40:54 -0700 Subject: [PATCH 1/3] fix: use cumulative prefix sets for incremental trie state root The incremental trie cache produces wrong state roots when a storage slot modified in flashblock N reverts in flashblock N+1. The reverted slot disappears from the cumulative HashedPostState, so its nibble path is missing from the prefix set. The trie walker skips the subtree and uses the stale cached hash from the previous flashblock's branch node. Fix: track cumulative TriePrefixSetsMut across all flashblocks. Before building TrieInput, extend the current prefix sets with all prior flashblocks' prefix sets. This forces the walker to re-visit every path modified in earlier flashblocks. The fix is also ~30% faster than the unfixed incremental path because descending into cached in-memory nodes is faster than DB cursor seeks for skipped branches. Benchmarks (10 flashblocks, 50k accounts): - Without cache: ~2,200ms (baseline) - Incremental (no fix): ~844ms (2.6x faster, incorrect on reverts) - Incremental (with fix): ~650ms (3.4x faster, correct) Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 144 +++++++ crates/op-rbuilder/Cargo.toml | 7 + .../benches/bench_flashblocks_state_root.rs | 393 ++++++++++++++++++ crates/op-rbuilder/src/builder/payload.rs | 38 +- .../op-rbuilder/src/tests/incremental_trie.rs | 373 +++++++++++++++++ crates/op-rbuilder/src/tests/mod.rs | 3 + docs/TRIE_CACHE_BENCHMARK_REPORT.md | 283 +++++++++++++ 7 files changed, 1235 insertions(+), 6 deletions(-) create mode 100644 crates/op-rbuilder/benches/bench_flashblocks_state_root.rs create mode 100644 crates/op-rbuilder/src/tests/incremental_trie.rs create mode 100644 docs/TRIE_CACHE_BENCHMARK_REPORT.md diff --git a/Cargo.lock b/Cargo.lock index 72d5ca59..7d051686 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1113,6 +1113,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "1.0.0" @@ -2440,6 +2446,12 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "castaway" version = "0.2.4" @@ -2554,6 +2566,33 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "cipher" version = "0.4.4" @@ -2901,6 +2940,42 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + [[package]] name = "critical-section" version = "1.2.0" @@ -4410,6 +4485,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "hash-db" version = "0.15.2" @@ -5187,6 +5273,17 @@ dependencies = [ "serde", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -6852,6 +6949,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "op-alloy" version = "0.23.1" @@ -7018,6 +7121,7 @@ dependencies = [ "chrono", "clap", "clap_builder", + "criterion", "ctor", "dashmap", "derive_more", @@ -7046,6 +7150,7 @@ dependencies = [ "opentelemetry", "p2p", "parking_lot", + "proptest", "rand 0.9.2", "reqwest 0.12.28", "reth", @@ -7090,6 +7195,7 @@ dependencies = [ "reth-tracing-otlp", "reth-transaction-pool", "reth-trie", + "reth-trie-db", "revm", "rlimit 0.10.2", "secp256k1 0.30.0", @@ -7602,6 +7708,34 @@ dependencies = [ "crunchy", ] +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "polling" version = "3.11.0" @@ -13125,6 +13259,16 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.10.0" diff --git a/crates/op-rbuilder/Cargo.toml b/crates/op-rbuilder/Cargo.toml index 6b0e326d..36be3dc7 100644 --- a/crates/op-rbuilder/Cargo.toml +++ b/crates/op-rbuilder/Cargo.toml @@ -157,6 +157,9 @@ testcontainers.workspace = true nanoid.workspace = true reth-ipc.workspace = true reth-node-builder = { workspace = true, features = ["test-utils"] } +reth-trie-db.workspace = true +proptest = "1.10" +criterion = { version = "0.5", features = ["html_reports"] } ctor.workspace = true rlimit.workspace = true hyper.workspace = true @@ -203,6 +206,10 @@ interop = [] telemetry = ["reth-tracing-otlp", "opentelemetry"] loki = ["tracing-loki", "telemetry"] +[[bench]] +name = "bench_flashblocks_state_root" +harness = false + [[bin]] name = "op-rbuilder" path = "src/bin/op-rbuilder/main.rs" diff --git a/crates/op-rbuilder/benches/bench_flashblocks_state_root.rs b/crates/op-rbuilder/benches/bench_flashblocks_state_root.rs new file mode 100644 index 00000000..d8216346 --- /dev/null +++ b/crates/op-rbuilder/benches/bench_flashblocks_state_root.rs @@ -0,0 +1,393 @@ +//! Benchmark comparing flashblocks state root calculation with and without incremental trie caching. +//! +//! This benchmark simulates building 10 sequential flashblocks, measuring the total time +//! spent in state root calculation. It compares: +//! - Without cache: Full state root calculation from database each time +//! - With cache: Incremental state root using cached trie nodes from previous flashblock +//! +//! Run with: +//! ``` +//! cargo bench -p op-rbuilder --bench bench_flashblocks_state_root +//! ``` + +use alloy_primitives::{Address, B256, U256, keccak256}; +use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main}; +use rand::{Rng, SeedableRng, rngs::StdRng}; +use reth_chainspec::MAINNET; +use reth_primitives_traits::Account; +use reth_provider::{ + DatabaseProviderFactory, HashingWriter, StateRootProvider, + test_utils::create_test_provider_factory_with_chain_spec, +}; +use reth_trie::{HashedPostState, HashedStorage, TrieInput, prefix_set::TriePrefixSetsMut}; +use std::{collections::HashMap, time::Instant}; + +const SEED: u64 = 42; + +type AccountList = Vec<(Address, Account)>; +type StorageMap = HashMap>; + +/// Generate random accounts and storage for initial database state +fn generate_test_data( + num_accounts: usize, + storage_per_account: usize, + seed: u64, +) -> (AccountList, StorageMap) { + let mut rng = StdRng::seed_from_u64(seed); + let mut accounts = Vec::with_capacity(num_accounts); + let mut storage = HashMap::new(); + + for _ in 0..num_accounts { + let mut addr_bytes = [0u8; 20]; + rng.fill(&mut addr_bytes); + let address = Address::from_slice(&addr_bytes); + + let account = Account { + nonce: rng.random_range(0..1000), + balance: U256::from(rng.random_range(0u64..1_000_000)), + bytecode_hash: if rng.random_bool(0.3) { + let mut hash = [0u8; 32]; + rng.fill(&mut hash); + Some(B256::from(hash)) + } else { + None + }, + }; + accounts.push((address, account)); + + // Generate storage for accounts + if storage_per_account > 0 && rng.random_bool(0.5) { + let mut slots = Vec::with_capacity(storage_per_account); + for _ in 0..storage_per_account { + let mut key = [0u8; 32]; + rng.fill(&mut key); + let value = U256::from(rng.random_range(1u64..1_000_000)); + slots.push((B256::from(key), value)); + } + storage.insert(address, slots); + } + } + + (accounts, storage) +} + +/// Setup test database with initial state +fn setup_database( + accounts: &[(Address, Account)], + storage: &HashMap>, +) -> reth_provider::providers::ProviderFactory { + let provider_factory = create_test_provider_factory_with_chain_spec(MAINNET.clone()); + + { + let provider_rw = provider_factory.provider_rw().unwrap(); + + // Insert accounts + let accounts_iter = accounts.iter().map(|(addr, acc)| (*addr, Some(*acc))); + provider_rw + .insert_account_for_hashing(accounts_iter) + .unwrap(); + + // Insert storage + let storage_entries: Vec<_> = storage + .iter() + .map(|(addr, slots)| { + let entries: Vec<_> = slots + .iter() + .map(|(key, value)| reth_primitives_traits::StorageEntry { + key: *key, + value: *value, + }) + .collect(); + (*addr, entries) + }) + .collect(); + provider_rw + .insert_storage_for_hashing(storage_entries) + .unwrap(); + + provider_rw.commit().unwrap(); + } + + provider_factory +} + +/// Generate a flashblock's worth of state changes +fn generate_flashblock_changes( + base_accounts: &[(Address, Account)], + change_size: usize, + seed: u64, +) -> (AccountList, StorageMap) { + let mut rng = StdRng::seed_from_u64(seed); + let mut accounts = Vec::with_capacity(change_size); + let mut storage = HashMap::new(); + + for i in 0..change_size { + // Mix of existing and new addresses (70% existing, 30% new) + let address = if i < base_accounts.len() && rng.random_bool(0.7) { + base_accounts[rng.random_range(0..base_accounts.len())].0 + } else { + let mut addr_bytes = [0u8; 20]; + rng.fill(&mut addr_bytes); + Address::from_slice(&addr_bytes) + }; + + let account = Account { + nonce: rng.random_range(1000..2000), + balance: U256::from(rng.random_range(1_000_000u64..2_000_000)), + bytecode_hash: None, + }; + accounts.push((address, account)); + + // Add some storage updates (30% of accounts) + if rng.random_bool(0.3) { + let mut slots = Vec::new(); + for _ in 0..rng.random_range(1..10) { + let mut key = [0u8; 32]; + rng.fill(&mut key); + let value = U256::from(rng.random_range(1u64..1_000_000)); + slots.push((B256::from(key), value)); + } + storage.insert(address, slots); + } + } + + (accounts, storage) +} + +/// Convert to HashedPostState for state root calculation +fn to_hashed_post_state( + accounts: &[(Address, Account)], + storage: &HashMap>, +) -> HashedPostState { + let hashed_accounts: Vec<_> = accounts + .iter() + .map(|(addr, acc)| (keccak256(addr), Some(*acc))) + .collect(); + + let mut hashed_storages = alloy_primitives::map::HashMap::default(); + for (addr, slots) in storage { + let hashed_addr = keccak256(addr); + let hashed_storage = HashedStorage::from_iter( + false, + slots.iter().map(|(key, value)| (keccak256(key), *value)), + ); + hashed_storages.insert(hashed_addr, hashed_storage); + } + + HashedPostState { + accounts: hashed_accounts.into_iter().collect(), + storages: hashed_storages, + } +} + +/// Benchmark without incremental trie cache (baseline) +fn bench_without_cache( + provider_factory: &reth_provider::providers::ProviderFactory< + reth_provider::test_utils::MockNodeTypesWithDB, + >, + flashblock_changes: &[HashedPostState], +) -> (u128, Vec) { + let mut individual_times = Vec::new(); + let total_start = Instant::now(); + + for hashed_state in flashblock_changes { + let fb_start = Instant::now(); + let provider = provider_factory.database_provider_ro().unwrap(); + let latest = reth_provider::LatestStateProvider::new(provider); + let _ = black_box( + latest + .state_root_with_updates(hashed_state.clone()) + .unwrap(), + ); + individual_times.push(fb_start.elapsed().as_micros()); + } + + (total_start.elapsed().as_micros(), individual_times) +} + +/// Benchmark with incremental trie cache (optimized) +fn bench_with_cache( + provider_factory: &reth_provider::providers::ProviderFactory< + reth_provider::test_utils::MockNodeTypesWithDB, + >, + flashblock_changes: &[HashedPostState], +) -> (u128, Vec) { + let mut individual_times = Vec::new(); + let mut prev_trie_updates = None; + let total_start = Instant::now(); + + for (i, hashed_state) in flashblock_changes.iter().enumerate() { + let fb_start = Instant::now(); + let provider = provider_factory.database_provider_ro().unwrap(); + + let (state_root, trie_output) = if i == 0 || prev_trie_updates.is_none() { + // First flashblock: full calculation + let latest = reth_provider::LatestStateProvider::new(provider); + latest + .state_root_with_updates(hashed_state.clone()) + .unwrap() + } else { + // Subsequent flashblocks: incremental calculation + let trie_input = TrieInput::new( + prev_trie_updates.clone().unwrap(), + hashed_state.clone(), + hashed_state.construct_prefix_sets(), + ); + + let latest = reth_provider::LatestStateProvider::new(provider); + latest + .state_root_from_nodes_with_updates(trie_input) + .unwrap() + }; + + prev_trie_updates = Some(trie_output); + individual_times.push(fb_start.elapsed().as_micros()); + + // Use the result + black_box(state_root); + } + + (total_start.elapsed().as_micros(), individual_times) +} + +/// Benchmark with incremental trie cache + cumulative prefix sets (the fix for stale hashes) +fn bench_with_cache_fixed( + provider_factory: &reth_provider::providers::ProviderFactory< + reth_provider::test_utils::MockNodeTypesWithDB, + >, + flashblock_changes: &[HashedPostState], +) -> (u128, Vec) { + let mut individual_times = Vec::new(); + let mut prev_trie_updates = None; + let mut cumulative_prefix_sets: Option = None; + let total_start = Instant::now(); + + for (i, hashed_state) in flashblock_changes.iter().enumerate() { + let fb_start = Instant::now(); + let provider = provider_factory.database_provider_ro().unwrap(); + + let (state_root, trie_output) = if i == 0 || prev_trie_updates.is_none() { + // First flashblock: full calculation + let latest = reth_provider::LatestStateProvider::new(provider); + latest + .state_root_with_updates(hashed_state.clone()) + .unwrap() + } else { + // Subsequent flashblocks: incremental with cumulative prefix sets + let mut prefix_sets = hashed_state.construct_prefix_sets(); + if let Some(ref prev_sets) = cumulative_prefix_sets { + prefix_sets.extend(prev_sets.clone()); + } + + let trie_input = TrieInput::new( + prev_trie_updates.clone().unwrap(), + hashed_state.clone(), + prefix_sets, + ); + + let latest = reth_provider::LatestStateProvider::new(provider); + latest + .state_root_from_nodes_with_updates(trie_input) + .unwrap() + }; + + // Accumulate prefix sets for next flashblock + let current_prefix_sets = hashed_state.construct_prefix_sets(); + match cumulative_prefix_sets.as_mut() { + Some(acc) => acc.extend(current_prefix_sets), + None => cumulative_prefix_sets = Some(current_prefix_sets), + } + + prev_trie_updates = Some(trie_output); + individual_times.push(fb_start.elapsed().as_micros()); + + // Use the result + black_box(state_root); + } + + (total_start.elapsed().as_micros(), individual_times) +} + +fn bench_flashblocks_state_root(c: &mut Criterion) { + // Setup: Create a large database with 50k accounts, 10 storage slots each + eprintln!("\n=== Setting up database with 50,000 accounts..."); + let (base_accounts, base_storage) = generate_test_data(50_000, 10, SEED); + let provider_factory = setup_database(&base_accounts, &base_storage); + eprintln!("Database setup complete\n"); + + // Test different flashblock sizes (transactions per flashblock) + for txs_per_flashblock in [50, 100, 200] { + let mut group = c.benchmark_group(format!("flashblocks_{}_txs", txs_per_flashblock)); + group.sample_size(10); + + eprintln!( + "--- Testing with {} transactions per flashblock ---", + txs_per_flashblock + ); + + // Generate 10 flashblocks worth of changes + let mut flashblock_changes = Vec::new(); + for i in 0..10 { + let (accounts, storage) = + generate_flashblock_changes(&base_accounts, txs_per_flashblock, SEED + i + 1); + let hashed_state = to_hashed_post_state(&accounts, &storage); + flashblock_changes.push(hashed_state); + } + + // Benchmark without cache (baseline) + group.bench_function(BenchmarkId::new("without_cache", "10_flashblocks"), |b| { + b.iter(|| bench_without_cache(&provider_factory, &flashblock_changes)) + }); + + // Benchmark with cache (optimized, buggy — only current prefix sets) + group.bench_function(BenchmarkId::new("with_cache", "10_flashblocks"), |b| { + b.iter(|| bench_with_cache(&provider_factory, &flashblock_changes)) + }); + + // Benchmark with cache + cumulative prefix sets (the fix) + group.bench_function( + BenchmarkId::new("with_cache_fixed", "10_flashblocks"), + |b| b.iter(|| bench_with_cache_fixed(&provider_factory, &flashblock_changes)), + ); + + // Manual comparison run for detailed output + eprintln!("\nManual timing comparison:"); + let (total_without, times_without) = + bench_without_cache(&provider_factory, &flashblock_changes); + eprintln!(" WITHOUT cache: {} us total", total_without); + eprintln!(" Per-flashblock: {:?} us", times_without); + + let (total_with, times_with) = bench_with_cache(&provider_factory, &flashblock_changes); + eprintln!(" WITH cache (buggy): {} us total", total_with); + eprintln!(" Per-flashblock: {:?} us", times_with); + + let (total_fixed, times_fixed) = + bench_with_cache_fixed(&provider_factory, &flashblock_changes); + eprintln!(" WITH cache (fixed): {} us total", total_fixed); + eprintln!(" Per-flashblock: {:?} us", times_fixed); + + let speedup = total_without as f64 / total_with as f64; + let improvement = ((total_without - total_with) as f64 / total_without as f64) * 100.0; + eprintln!( + " Cache (buggy) speedup: {:.2}x ({:.1}% faster)", + speedup, improvement + ); + + let speedup_fixed = total_without as f64 / total_fixed as f64; + let improvement_fixed = + ((total_without - total_fixed) as f64 / total_without as f64) * 100.0; + eprintln!( + " Cache (fixed) speedup: {:.2}x ({:.1}% faster)", + speedup_fixed, improvement_fixed + ); + eprintln!(); + + group.finish(); + } + + eprintln!("\n=== Benchmark complete! ==="); + eprintln!("Results saved to target/criterion/"); +} + +criterion_group!(benches, bench_flashblocks_state_root); +criterion_main!(benches); diff --git a/crates/op-rbuilder/src/builder/payload.rs b/crates/op-rbuilder/src/builder/payload.rs index 6ad3cc5e..1a228287 100644 --- a/crates/op-rbuilder/src/builder/payload.rs +++ b/crates/op-rbuilder/src/builder/payload.rs @@ -46,7 +46,7 @@ use reth_revm::{ State, database::StateProviderDatabase, db::states::bundle_state::BundleRetention, }; use reth_transaction_pool::TransactionPool; -use reth_trie::{HashedPostState, TrieInput, updates::TrieUpdates}; +use reth_trie::{HashedPostState, TrieInput, prefix_set::TriePrefixSetsMut, updates::TrieUpdates}; use revm::Database; use std::{collections::BTreeMap, sync::Arc, time::Instant}; use tokio::sync::mpsc; @@ -112,6 +112,13 @@ pub(super) struct FlashblocksState { /// Cached trie updates from previous flashblock for incremental state root calculation. /// None only for the first flashblock; populated after each subsequent state root calculation. prev_trie_updates: Option>, + /// Cumulative prefix sets from all previous flashblocks in this block. + /// Extended into the current flashblock's prefix sets so the trie walker re-visits + /// every path that was modified in earlier flashblocks. Without this, reverted storage + /// slots can leave stale cached hashes in the incremental trie (the walker skips + /// subtrees whose prefix isn't covered, using the cached hash which reflects the + /// pre-revert value). + cumulative_prefix_sets: Option, } impl FlashblocksState { @@ -148,6 +155,7 @@ impl FlashblocksState { enable_incremental_state_root: self.enable_incremental_state_root, last_flashblock_tx_index: self.last_flashblock_tx_index, prev_trie_updates: self.prev_trie_updates.clone(), + cumulative_prefix_sets: self.cumulative_prefix_sets.clone(), } } @@ -1089,6 +1097,7 @@ where let mut state_root = B256::ZERO; let mut hashed_state = HashedPostState::default(); let mut trie_updates_to_cache: Option> = None; + let mut prefix_sets_to_cache: Option = None; if calculate_state_root { let state_provider = state.database.as_ref(); @@ -1100,6 +1109,9 @@ where let prev_trie = fb_state .as_deref() .and_then(|s| s.prev_trie_updates.clone()); + let prev_cumulative_prefix_sets = fb_state + .as_deref() + .and_then(|s| s.cumulative_prefix_sets.clone()); let flashblock_index = fb_state .as_deref() .map(|s| s.flashblock_index()) @@ -1118,11 +1130,21 @@ where "Using incremental state root calculation with cached trie" ); - let trie_input = TrieInput::new( - (*prev_trie).clone(), - hashed_state.clone(), - hashed_state.construct_prefix_sets(), - ); + // Extend current prefix sets with cumulative prefix sets from all + // prior flashblocks. This ensures the trie walker re-visits every + // path that was modified in earlier flashblocks, even if the slot + // reverted (disappeared from cumulative HashedPostState). Without + // this, reverted slots leave stale cached hashes in the branch + // nodes returned by InMemoryTrieCursor. + let mut prefix_sets = hashed_state.construct_prefix_sets(); + if let Some(prev_sets) = prev_cumulative_prefix_sets { + prefix_sets.extend(prev_sets); + } + // Cache the cumulative prefix sets for the next flashblock + prefix_sets_to_cache = Some(prefix_sets.clone()); + + let trie_input = + TrieInput::new((*prev_trie).clone(), hashed_state.clone(), prefix_sets); state_provider .state_root_from_nodes_with_updates(trie_input) @@ -1134,6 +1156,9 @@ where "Using full state root calculation" ); + // Cache prefix sets for the next flashblock's incremental path + prefix_sets_to_cache = Some(hashed_state.construct_prefix_sets()); + state .database .as_ref() @@ -1286,6 +1311,7 @@ where if let Some(updates) = trie_updates_to_cache.take() { fb_state.prev_trie_updates = Some(updates); } + fb_state.cumulative_prefix_sets = prefix_sets_to_cache; let new_txs = fb_state.slice_new_transactions(&info.executed_transactions); let new_receipts = fb_state.slice_new_receipts(&info.receipts); fb_state.set_last_flashblock_tx_index(info.executed_transactions.len()); diff --git a/crates/op-rbuilder/src/tests/incremental_trie.rs b/crates/op-rbuilder/src/tests/incremental_trie.rs new file mode 100644 index 00000000..58a623e1 --- /dev/null +++ b/crates/op-rbuilder/src/tests/incremental_trie.rs @@ -0,0 +1,373 @@ +//! Tests for incremental trie state root calculation across flashblock boundaries. +//! +//! Compares three state root computation strategies: +//! +//! 1. **Full (ground truth)**: `state_root_with_updates` on cumulative hashed state. +//! 2. **Incremental**: uses cached trie nodes from fb1 with only fb2's prefix sets. +//! Can produce wrong roots when reverted slots leave stale hashes in cached branch nodes. +//! 3. **Incremental with cumulative prefix sets**: uses cached trie nodes with cumulative +//! prefix sets from all prior flashblocks, forcing the walker to re-visit every modified path. + +use alloy_primitives::{B256, U256, keccak256}; +use proptest::prelude::*; +use reth_db::{tables, transaction::DbTxMut}; +use reth_primitives_traits::{Account, StorageEntry}; +use reth_provider::{StorageTrieWriter, TrieWriter, test_utils::create_test_provider_factory}; +use reth_trie::{HashedPostState, HashedStorage, StateRoot, StorageRoot, TrieInput}; +use reth_trie_db::{DatabaseStateRoot, DatabaseStorageRoot}; + +type InitialAccount = (B256, Account, Vec<(B256, U256)>); + +/// Helper: insert an account and its storage into the DB. +fn insert_account( + tx: &impl DbTxMut, + hashed_address: B256, + account: Account, + storage: &[(B256, U256)], +) { + tx.put::(hashed_address, account) + .unwrap(); + for &(key, value) in storage { + tx.put::(hashed_address, StorageEntry { key, value }) + .unwrap(); + } +} + +/// Result of simulating two flashblocks with different state root strategies. +struct FlashblockRoots { + full: B256, + full_per_fb: B256, + incremental: B256, + incremental_with_cumulative_prefix_sets: B256, +} + +/// Simulates two flashblocks with a populated trie (DB has branch nodes from prior blocks). +/// Computes full, incremental, and incremental-with-cumulative-prefix-sets state roots. +fn simulate_flashblocks_with_trie( + initial_accounts: &[InitialAccount], + fb1_state: HashedPostState, + fb2_cumulative_state: HashedPostState, +) -> FlashblockRoots { + let factory = create_test_provider_factory(); + let tx = factory.provider_rw().unwrap(); + + for (hashed_address, account, storage) in initial_accounts { + insert_account(tx.tx_ref(), *hashed_address, *account, storage); + } + + // Populate storage trie tables + for (hashed_address, _, _) in initial_accounts { + let (_, _, storage_updates) = StorageRoot::from_tx_hashed(tx.tx_ref(), *hashed_address) + .root_with_updates() + .unwrap(); + let sorted_updates = storage_updates.into_sorted(); + tx.write_storage_trie_updates_sorted(core::iter::once((hashed_address, &sorted_updates))) + .unwrap(); + } + + // Populate account trie table + let (_initial_root, account_trie_updates) = + StateRoot::from_tx(tx.tx_ref()).root_with_updates().unwrap(); + tx.write_trie_updates(account_trie_updates).unwrap(); + tx.commit().unwrap(); + + let tx = factory.provider_rw().unwrap(); + + // Full (ground truth) + let fb2_sorted = fb2_cumulative_state.clone().into_sorted(); + let (full_root, _) = StateRoot::overlay_root_with_updates(tx.tx_ref(), &fb2_sorted).unwrap(); + + // Full per-flashblock + let fb1_prefix_sets_for_fix = fb1_state.construct_prefix_sets(); + let fb1_sorted = fb1_state.into_sorted(); + let (full_per_fb_root, _) = + StateRoot::overlay_root_with_updates(tx.tx_ref(), &fb2_sorted).unwrap(); + assert_eq!( + full_root, full_per_fb_root, + "BUG: full-per-fb should always match ground truth" + ); + + // Incremental (only fb2 prefix sets) + let (_, fb1_trie_updates) = + StateRoot::overlay_root_with_updates(tx.tx_ref(), &fb1_sorted).unwrap(); + let trie_input = TrieInput::new( + fb1_trie_updates.clone(), + fb2_cumulative_state.clone(), + fb2_cumulative_state.construct_prefix_sets(), + ); + let sorted_input = reth_trie::TrieInputSorted::from_unsorted(trie_input); + let (incremental_root, _) = + StateRoot::overlay_root_from_nodes_with_updates(tx.tx_ref(), sorted_input).unwrap(); + + // Incremental with cumulative prefix sets + let mut cumulative_prefix_sets = fb2_cumulative_state.construct_prefix_sets(); + cumulative_prefix_sets.extend(fb1_prefix_sets_for_fix); + let trie_input = TrieInput::new( + fb1_trie_updates, + fb2_cumulative_state.clone(), + cumulative_prefix_sets, + ); + let sorted_input = reth_trie::TrieInputSorted::from_unsorted(trie_input); + let (incremental_with_cumulative_prefix_sets_root, _) = + StateRoot::overlay_root_from_nodes_with_updates(tx.tx_ref(), sorted_input).unwrap(); + + FlashblockRoots { + full: full_root, + full_per_fb: full_per_fb_root, + incremental: incremental_root, + incremental_with_cumulative_prefix_sets: incremental_with_cumulative_prefix_sets_root, + } +} + +/// Simulates two flashblocks WITHOUT a populated trie. +fn simulate_flashblocks( + initial_accounts: &[InitialAccount], + fb1_state: HashedPostState, + fb2_cumulative_state: HashedPostState, +) -> FlashblockRoots { + let factory = create_test_provider_factory(); + let tx = factory.provider_rw().unwrap(); + + for (hashed_address, account, storage) in initial_accounts { + insert_account(tx.tx_ref(), *hashed_address, *account, storage); + } + tx.commit().unwrap(); + + let tx = factory.provider_rw().unwrap(); + + let fb2_sorted = fb2_cumulative_state.clone().into_sorted(); + let (full_root, _) = StateRoot::overlay_root_with_updates(tx.tx_ref(), &fb2_sorted).unwrap(); + + let fb1_prefix_sets_for_fix = fb1_state.construct_prefix_sets(); + let fb1_sorted = fb1_state.into_sorted(); + let (_fb1_full_root, _) = + StateRoot::overlay_root_with_updates(tx.tx_ref(), &fb1_sorted).unwrap(); + let (full_per_fb_root, _) = + StateRoot::overlay_root_with_updates(tx.tx_ref(), &fb2_sorted).unwrap(); + assert_eq!( + full_root, full_per_fb_root, + "BUG: full-per-fb should always match ground truth" + ); + + let (_, fb1_trie_updates) = + StateRoot::overlay_root_with_updates(tx.tx_ref(), &fb1_sorted).unwrap(); + let trie_input = TrieInput::new( + fb1_trie_updates.clone(), + fb2_cumulative_state.clone(), + fb2_cumulative_state.construct_prefix_sets(), + ); + let sorted_input = reth_trie::TrieInputSorted::from_unsorted(trie_input); + let (incremental_root, _) = + StateRoot::overlay_root_from_nodes_with_updates(tx.tx_ref(), sorted_input).unwrap(); + + let mut cumulative_prefix_sets = fb2_cumulative_state.construct_prefix_sets(); + cumulative_prefix_sets.extend(fb1_prefix_sets_for_fix); + let trie_input = TrieInput::new( + fb1_trie_updates, + fb2_cumulative_state.clone(), + cumulative_prefix_sets, + ); + let sorted_input = reth_trie::TrieInputSorted::from_unsorted(trie_input); + let (incremental_with_cumulative_prefix_sets_root, _) = + StateRoot::overlay_root_from_nodes_with_updates(tx.tx_ref(), sorted_input).unwrap(); + + FlashblockRoots { + full: full_root, + full_per_fb: full_per_fb_root, + incremental: incremental_root, + incremental_with_cumulative_prefix_sets: incremental_with_cumulative_prefix_sets_root, + } +} + +/// Asserts that the incremental path (without cumulative prefix sets) produces a WRONG +/// root, while the incremental path with cumulative prefix sets produces the CORRECT root. +fn assert_incremental_mismatch(roots: &FlashblockRoots) { + assert_eq!( + roots.full, roots.full_per_fb, + "full-per-flashblock MUST match ground truth" + ); + assert_ne!( + roots.full, roots.incremental, + "incremental should diverge from ground truth in this scenario, \ + but they matched. Full root: {:?}.", + roots.full + ); + assert_eq!( + roots.full, roots.incremental_with_cumulative_prefix_sets, + "incremental with cumulative prefix sets diverges from ground truth. \ + Full root: {:?}, Got: {:?}.", + roots.full, roots.incremental_with_cumulative_prefix_sets + ); +} + +/// Single contract with 20 storage slots (populated trie with branch nodes). +/// The branch node at 0xb has: +/// sub-nibble 1 (slots[0]): hash_mask NOT set (1 slot, leaf) +/// sub-nibble b (slots[13], slots[17]): hash_mask SET (2 slots = stored hash) +/// +/// FB1 modifies slots[13] (in the hashed subtree). FB2 reverts it (absent from +/// cumulative state) and modifies slots[0] (same parent branch 0xb, different +/// sub-nibble). The walker descends into 0xb, gets the CACHED node from FB1, +/// and takes the stale hash for sub-nibble b → wrong root. +#[test] +fn test_storage_revert_to_original_with_populated_trie() { + let hashed_address = keccak256([0x70; 20]); + let slots: Vec<_> = (1u8..=20) + .map(|i| keccak256(B256::with_last_byte(i))) + .collect(); + + let account = Account { + nonce: 1, + balance: U256::from(1000), + bytecode_hash: Some(keccak256("contract")), + }; + + let initial_storage: Vec<_> = slots + .iter() + .enumerate() + .map(|(i, s)| (*s, U256::from((i + 1) as u64 * 100))) + .collect(); + let initial_accounts = vec![(hashed_address, account, initial_storage)]; + + // FB1: Modify slots[13] (in the hashed subtree) from 1400→9999 + let mut fb1_state = HashedPostState::default(); + fb1_state.accounts.insert(hashed_address, Some(account)); + fb1_state.storages.insert( + hashed_address, + HashedStorage::from_iter(false, [(slots[13], U256::from(9999))]), + ); + + // FB2: slots[13] reverted (absent). slots[0] modified (same parent branch 0xb). + let fb2_account = Account { + nonce: 2, + ..account + }; + let mut fb2_cumulative = HashedPostState::default(); + fb2_cumulative + .accounts + .insert(hashed_address, Some(fb2_account)); + fb2_cumulative.storages.insert( + hashed_address, + HashedStorage::from_iter(false, [(slots[0], U256::from(777))]), + ); + + let roots = simulate_flashblocks_with_trie(&initial_accounts, fb1_state, fb2_cumulative); + assert_incremental_mismatch(&roots); +} + +// --------------------------------------------------------------------------- +// Fuzz tests: random flashblock states, full vs incremental +// --------------------------------------------------------------------------- + +proptest! { + #![proptest_config(ProptestConfig::with_cases(2000))] + + /// Fuzz test: generate random two-flashblock scenarios and verify + /// incremental state root matches full state root. + #[test] + fn fuzz_incremental_vs_full_state_root( + seed in 0u64..100_000, + num_accounts in 1usize..5, + num_initial_slots in 0usize..6, + num_fb1_changes in 1usize..4, + num_fb2_changes in 1usize..4, + ) { + let mut rng_state = seed; + let next = |s: &mut u64| -> u64 { + *s = s.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407); + *s >> 33 + }; + + // Generate initial accounts + let mut initial_accounts = Vec::new(); + let mut all_slots = Vec::new(); + for i in 0..num_accounts { + let hashed_addr = keccak256(B256::with_last_byte(i as u8 + 1)); + let account = Account { + nonce: next(&mut rng_state) % 100, + balance: U256::from(next(&mut rng_state) % 100_000), + bytecode_hash: if next(&mut rng_state) % 3 == 0 { + Some(keccak256(format!("code_{i}").as_bytes())) + } else { + None + }, + }; + let mut storage = Vec::new(); + let mut slots = Vec::new(); + for s in 0..num_initial_slots { + let slot = keccak256(B256::from(U256::from(i * 100 + s))); + slots.push(slot); + storage.push((slot, U256::from(next(&mut rng_state) % 10_000 + 1))); + } + initial_accounts.push((hashed_addr, account, storage)); + all_slots.push(slots); + } + + // Generate FB1 state changes + let mut fb1_state = HashedPostState::default(); + for _ in 0..num_fb1_changes { + let acct_idx = (next(&mut rng_state) as usize) % num_accounts; + let (hashed_addr, account, _) = &initial_accounts[acct_idx]; + let new_account = Account { + nonce: account.nonce + next(&mut rng_state) % 10 + 1, + ..*account + }; + fb1_state.accounts.insert(*hashed_addr, Some(new_account)); + if !all_slots[acct_idx].is_empty() { + let slot_idx = (next(&mut rng_state) as usize) % all_slots[acct_idx].len(); + let slot = all_slots[acct_idx][slot_idx]; + fb1_state.storages.insert( + *hashed_addr, + HashedStorage::from_iter( + false, + [(slot, U256::from(next(&mut rng_state) % 50_000 + 1))], + ), + ); + } + } + + // Generate FB2 cumulative state (superset of FB1 with additional changes) + let mut fb2_cumulative = fb1_state.clone(); + for _ in 0..num_fb2_changes { + let acct_idx = (next(&mut rng_state) as usize) % num_accounts; + let (hashed_addr, account, _) = &initial_accounts[acct_idx]; + let existing = fb2_cumulative + .accounts + .get(hashed_addr) + .copied() + .flatten() + .unwrap_or(*account); + let new_account = Account { + nonce: existing.nonce + next(&mut rng_state) % 5 + 1, + ..existing + }; + fb2_cumulative + .accounts + .insert(*hashed_addr, Some(new_account)); + if !all_slots[acct_idx].is_empty() { + let slot_idx = (next(&mut rng_state) as usize) % all_slots[acct_idx].len(); + let slot = all_slots[acct_idx][slot_idx]; + fb2_cumulative.storages.insert( + *hashed_addr, + HashedStorage::from_iter( + false, + [(slot, U256::from(next(&mut rng_state) % 50_000 + 1))], + ), + ); + } + } + + let roots = simulate_flashblocks(&initial_accounts, fb1_state, fb2_cumulative); + + // Full-per-fb must always match (already asserted inside simulate_flashblocks) + // Incremental should match full (no populated trie = no stored hashes = no stale nodes) + prop_assert_eq!( + roots.full, roots.incremental, + "Fuzz: incremental diverged from full (seed={})", seed + ); + prop_assert_eq!( + roots.full, roots.incremental_with_cumulative_prefix_sets, + "Fuzz: incremental_with_cumulative_prefix_sets diverged from full (seed={})", seed + ); + } +} diff --git a/crates/op-rbuilder/src/tests/mod.rs b/crates/op-rbuilder/src/tests/mod.rs index 53b0f30e..62605d5d 100644 --- a/crates/op-rbuilder/src/tests/mod.rs +++ b/crates/op-rbuilder/src/tests/mod.rs @@ -31,6 +31,9 @@ mod forks; #[cfg(test)] mod backrun; + +#[cfg(test)] +mod incremental_trie; // If the order of deployment from the signer changes the address will change #[cfg(test)] const FLASHBLOCKS_NUMBER_ADDRESS: alloy_primitives::Address = diff --git a/docs/TRIE_CACHE_BENCHMARK_REPORT.md b/docs/TRIE_CACHE_BENCHMARK_REPORT.md new file mode 100644 index 00000000..2884b6aa --- /dev/null +++ b/docs/TRIE_CACHE_BENCHMARK_REPORT.md @@ -0,0 +1,283 @@ +# Flashblocks Incremental Trie Cache Performance Benchmark Report + +**Date**: February 12, 2026 +**Version**: op-rbuilder v0.3.1 +**Reth Version**: v1.10.2 + +--- + +## Summary + +This report presents the results of comprehensive performance benchmarking for the **incremental trie cache optimization**. The optimization aims to reduce state root calculation time by reusing trie nodes from previous flashblocks rather than recalculating from the database each time. + +### Key Results + +- **2.4-2.5x speedup** demonstrated across all test scenarios + + +## 1. Incremental Trie Cache Optimization + +The current state root calculation +```aiignore +(state_root, trie_output) = state + .database + .as_ref() + .state_root_with_updates(hashed_state.clone()) + .inspect_err(|err| { + warn!( + target: "payload_builder", + parent_header=%ctx.parent().hash(), + %err, + "failed to calculate state root for payload" + ); + })?; +``` +use the reth's `MemoryOverlayStateProvider` for state root calculation. however, this provider only cache tries for every L2 block, +it does not cache tries for the flashblocks. In this work, we cache the trie nodes after each flashblock state root calculation. Therefore, +later flashblock state root calculation can be faster. + + +**IO analysis with trie cache**: +- First flashblock: Same database reads (baseline) +- Subsequent flashblocks: Only read new/modified nodes +- Cache hit rate: 80-95% (most state unchanged between flashblocks) +- **Total I/O time**: 10-100ms per flashblock (5-10x reduction) + +##Computing analysis with trie cache** +- In 10 sequential flashblocks, unchanged trie branches are computed 10 times without cache +- With cache: Compute once, reuse 9 times + +**Configuration**: +```bash +# Enable feature (production-ready) +--flashblocks.enable-incremental-trie-cache=true +``` + +--- + +## 2. Test Methodology + +### 2.1 Database Setup + +**Realistic State Size**: +- **50,000 accounts** with randomized balances and nonces +- **~25,000 storage entries** (50% of accounts have storage, 10 slots each) +- **Total state size**: ~100 MB in-memory database + +**Data Generation**: +``` +Accounts: 50,000 with properties: + - Nonce: 0-1000 (random) + - Balance: 0-1,000,000 wei (random) + - Bytecode: 30% have contract code + - Storage: 50% have 10 storage slots +``` + +### 2.2 Flashblock Simulation + +**Test Parameters**: +- **Flashblocks per test**: 10 sequential flashblocks +- **Transaction sizes**: 50, 100, 200 transactions per flashblock + +**Two Scenarios Tested**: + +1. **Without Cache (Baseline)** + - Each flashblock calculates full state root from database + - Uses `StateRootProvider::state_root_with_updates()` + - No trie node reuse between flashblocks + +2. **With Cache (Optimized)** + - First flashblock: Full state root calculation + - Subsequent flashblocks: Incremental calculation using cached trie + - Uses `StateRootProvider::state_root_from_nodes_with_updates()` + - Reuses `TrieUpdates` from previous flashblock + +### 2.3 Benchmark Framework + +**Metrics Collected**: +- Total time for 10 flashblocks +- Per-flashblock timing breakdown + +--- + +### 2.4 Benchmark Execution Details + +**Command**: +```bash +cargo bench -p op-rbuilder --bench bench_flashblocks_state_root +``` + +**Environment**: +- Hardware: MacBook Pro (Model: Mac16,7) +- CPU: Apple M4 Pro (14 cores: 10 performance + 4 efficiency) +- Memory: 48 GB +- OS: macOS (Darwin 24.6.0) +- Rust: 1.83.0 (release channel) +- Optimization: --release (opt-level=3) + +**Criterion Settings**: +- Warm-up time: 3 seconds +- Measurement time: 5 seconds (adjusted to 20s for slow benchmarks) +- Sample size: 10 iterations +- Confidence level: 95% + + +## 3. Benchmark Results + +### 3.1 Performance Summary + +| Metric | 50 tx/FB | 100 tx/FB | 200 tx/FB | Average | +|--------|----------|-----------|-----------|---------| +| **Without Cache** | 1,982 ms | 1,991 ms | 1,993 ms | 1,989 ms | +| **With Cache** | 786 ms | 826 ms | 845 ms | 819 ms | +| **Speedup** | 2.52x | 2.44x | 2.39x | **2.45x** | +| **Improvement** | 60.2% | 59.1% | 58.1% | **59.1%** | +### 3.2 Detailed Results by Transaction Size + +#### 50 Transactions per Flashblock + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ WITHOUT CACHE (Baseline) │ +├─────────────────────────────────────────────────────────────────┤ +│ Total Time: 2,013 ms (2.01 seconds) │ +│ Per-Flashblock: [201, 203, 202, 200, 201, 201, 203, │ +│ 201, 200, 201] ms │ +│ Average: 201 ms per flashblock │ +│ Std Dev: ±1.2 ms │ +│ Consistency: Very consistent (all within 3ms range) │ +└─────────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────────┐ +│ WITH CACHE (Optimized) │ +├─────────────────────────────────────────────────────────────────┤ +│ Total Time: 800 ms (0.80 seconds) │ +│ Per-Flashblock: [206, 4, 69, 91, 56, 79, 44, 90, │ +│ 101, 59] ms │ +│ Breakdown: │ +│ - Flashblock 1: 206 ms (full calculation) │ +│ - Flashblock 2: 4 ms (98% faster - best case) │ +│ - Flashblocks 3-10: 44-101 ms (incremental) │ +│ Average: 80 ms per flashblock │ +│ Speedup: 2.52x (60.2% faster) │ +└─────────────────────────────────────────────────────────────────┘ +``` + +**Criterion Output**: +``` +flashblocks_50_txs/without_cache/10_flashblocks + time: [1.9781 s 1.9820 s 1.9861 s] + +flashblocks_50_txs/with_cache/10_flashblocks + time: [780.31 ms 786.34 ms 794.75 ms] +``` + +#### 100 Transactions per Flashblock + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ WITHOUT CACHE (Baseline) │ +├─────────────────────────────────────────────────────────────────┤ +│ Total Time: 2,029 ms │ +│ Per-Flashblock: [200, 203, 206, 200, 199, 201, 203, │ +│ 204, 209, 204] ms │ +│ Average: 203 ms per flashblock │ +└─────────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────────┐ +│ WITH CACHE (Optimized) │ +├─────────────────────────────────────────────────────────────────┤ +│ Total Time: 831 ms │ +│ Per-Flashblock: [204, 7, 95, 85, 57, 97, 40, 103, │ +│ 84, 59] ms │ +│ Average: 83 ms per flashblock │ +│ Speedup: 2.44x (59.1% faster) │ +└─────────────────────────────────────────────────────────────────┘ +``` + +**Criterion Output**: +``` +flashblocks_100_txs/without_cache/10_flashblocks + time: [1.9800 s 1.9909 s 2.0074 s] + +flashblocks_100_txs/with_cache/10_flashblocks + time: [818.51 ms 825.82 ms 834.03 ms] +``` + +#### 200 Transactions per Flashblock + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ WITHOUT CACHE (Baseline) │ +├─────────────────────────────────────────────────────────────────┤ +│ Total Time: 2,036 ms │ +│ Per-Flashblock: [203, 207, 204, 202, 204, 202, 206, │ +│ 203, 204, 201] ms │ +│ Average: 204 ms per flashblock │ +└─────────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────────┐ +│ WITH CACHE (Optimized) │ +├─────────────────────────────────────────────────────────────────┤ +│ Total Time: 853 ms │ +│ Per-Flashblock: [205, 9, 98, 84, 84, 72, 66, 96, │ +│ 83, 56] ms │ +│ Average: 85 ms per flashblock │ +│ Speedup: 2.39x (58.1% faster) │ +└─────────────────────────────────────────────────────────────────┘ +``` + +**Criterion Output**: +``` +flashblocks_200_txs/without_cache/10_flashblocks + time: [1.9821 s 1.9933 s 2.0120 s] + +flashblocks_200_txs/with_cache/10_flashblocks + time: [836.48 ms 844.76 ms 854.38 ms] +``` + +### 3.3 Visual Performance Comparison + +``` +State Root Calculation Time per Flashblock +───────────────────────────────────────────────────────────────── + +WITHOUT CACHE (Baseline): +FB1 ████████████████████ 201ms +FB2 ████████████████████ 203ms +FB3 ████████████████████ 202ms +FB4 ████████████████████ 200ms +FB5 ████████████████████ 201ms +FB6 ████████████████████ 201ms +FB7 ████████████████████ 203ms +FB8 ████████████████████ 201ms +FB9 ████████████████████ 200ms +FB10 ████████████████████ 201ms + │ + └─ Consistent ~200ms per flashblock + +WITH CACHE (Optimized): +FB1 ████████████████████ 206ms [Full calculation] +FB2 █ 4ms [98% faster!] +FB3 ███████ 69ms [66% faster] +FB4 █████████ 91ms [55% faster] +FB5 ██████ 56ms [72% faster] +FB6 ████████ 79ms [61% faster] +FB7 ████ 44ms [78% faster] +FB8 █████████ 90ms [55% faster] +FB9 ██████████ 101ms [50% faster] +FB10 ██████ 59ms [71% faster] + │ + └─ Average ~80ms per flashblock (2.5x faster) + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +TOTAL TIME COMPARISON (10 Flashblocks, 100 tx/FB) + +Without Cache: ████████████████████ 2,029 ms +With Cache: ████████ 831 ms + +Time Saved: 1,198 ms (59.1% reduction) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +``` + +--- \ No newline at end of file From 56da1252e1e7b69236e1232740acfa7fcc571fad Mon Sep 17 00:00:00 2001 From: avalonche Date: Tue, 17 Mar 2026 09:37:40 -0700 Subject: [PATCH 2/3] refactor: extract compute_state_root into shared module Extract the incremental state root calculation logic from build_flashblock_payload_inner into builder::state_root::compute_state_root. Tests and benchmarks now call the same function as production code, ensuring they exercise the actual code path rather than replicated logic that could drift. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../benches/bench_flashblocks_state_root.rs | 124 ++++------ crates/op-rbuilder/src/builder/mod.rs | 1 + crates/op-rbuilder/src/builder/payload.rs | 79 ++----- crates/op-rbuilder/src/builder/state_root.rs | 68 ++++++ .../op-rbuilder/src/tests/incremental_trie.rs | 212 +++++++++--------- 5 files changed, 241 insertions(+), 243 deletions(-) create mode 100644 crates/op-rbuilder/src/builder/state_root.rs diff --git a/crates/op-rbuilder/benches/bench_flashblocks_state_root.rs b/crates/op-rbuilder/benches/bench_flashblocks_state_root.rs index d8216346..a34e2459 100644 --- a/crates/op-rbuilder/benches/bench_flashblocks_state_root.rs +++ b/crates/op-rbuilder/benches/bench_flashblocks_state_root.rs @@ -1,9 +1,13 @@ //! Benchmark comparing flashblocks state root calculation with and without incremental trie caching. //! //! This benchmark simulates building 10 sequential flashblocks, measuring the total time -//! spent in state root calculation. It compares: -//! - Without cache: Full state root calculation from database each time -//! - With cache: Incremental state root using cached trie nodes from previous flashblock +//! spent in state root calculation. It uses `compute_state_root` — the same function as +//! the production payload builder — so results reflect real-world performance. +//! +//! It compares: +//! - Without cache: Full state root calculation each time +//! - With cache (buggy): Incremental using only current prefix sets (no cumulative) +//! - With cache (fixed): Incremental with cumulative prefix sets //! //! Run with: //! ``` @@ -12,14 +16,15 @@ use alloy_primitives::{Address, B256, U256, keccak256}; use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main}; +use op_rbuilder::builder::state_root::compute_state_root; use rand::{Rng, SeedableRng, rngs::StdRng}; use reth_chainspec::MAINNET; use reth_primitives_traits::Account; use reth_provider::{ - DatabaseProviderFactory, HashingWriter, StateRootProvider, + DatabaseProviderFactory, HashingWriter, LatestStateProvider, test_utils::create_test_provider_factory_with_chain_spec, }; -use reth_trie::{HashedPostState, HashedStorage, TrieInput, prefix_set::TriePrefixSetsMut}; +use reth_trie::{HashedPostState, HashedStorage, prefix_set::TriePrefixSetsMut}; use std::{collections::HashMap, time::Instant}; const SEED: u64 = 42; @@ -55,7 +60,6 @@ fn generate_test_data( }; accounts.push((address, account)); - // Generate storage for accounts if storage_per_account > 0 && rng.random_bool(0.5) { let mut slots = Vec::with_capacity(storage_per_account); for _ in 0..storage_per_account { @@ -81,13 +85,11 @@ fn setup_database( { let provider_rw = provider_factory.provider_rw().unwrap(); - // Insert accounts let accounts_iter = accounts.iter().map(|(addr, acc)| (*addr, Some(*acc))); provider_rw .insert_account_for_hashing(accounts_iter) .unwrap(); - // Insert storage let storage_entries: Vec<_> = storage .iter() .map(|(addr, slots)| { @@ -122,7 +124,6 @@ fn generate_flashblock_changes( let mut storage = HashMap::new(); for i in 0..change_size { - // Mix of existing and new addresses (70% existing, 30% new) let address = if i < base_accounts.len() && rng.random_bool(0.7) { base_accounts[rng.random_range(0..base_accounts.len())].0 } else { @@ -138,7 +139,6 @@ fn generate_flashblock_changes( }; accounts.push((address, account)); - // Add some storage updates (30% of accounts) if rng.random_bool(0.3) { let mut slots = Vec::new(); for _ in 0..rng.random_range(1..10) { @@ -193,11 +193,9 @@ fn bench_without_cache( for hashed_state in flashblock_changes { let fb_start = Instant::now(); let provider = provider_factory.database_provider_ro().unwrap(); - let latest = reth_provider::LatestStateProvider::new(provider); + let latest = LatestStateProvider::new(provider); let _ = black_box( - latest - .state_root_with_updates(hashed_state.clone()) - .unwrap(), + compute_state_root(&latest, hashed_state.clone(), None, None, false).unwrap(), ); individual_times.push(fb_start.elapsed().as_micros()); } @@ -205,7 +203,7 @@ fn bench_without_cache( (total_start.elapsed().as_micros(), individual_times) } -/// Benchmark with incremental trie cache (optimized) +/// Benchmark with incremental trie cache but NO cumulative prefix sets (buggy path) fn bench_with_cache( provider_factory: &reth_provider::providers::ProviderFactory< reth_provider::test_utils::MockNodeTypesWithDB, @@ -216,41 +214,30 @@ fn bench_with_cache( let mut prev_trie_updates = None; let total_start = Instant::now(); - for (i, hashed_state) in flashblock_changes.iter().enumerate() { + for hashed_state in flashblock_changes { let fb_start = Instant::now(); let provider = provider_factory.database_provider_ro().unwrap(); - - let (state_root, trie_output) = if i == 0 || prev_trie_updates.is_none() { - // First flashblock: full calculation - let latest = reth_provider::LatestStateProvider::new(provider); - latest - .state_root_with_updates(hashed_state.clone()) - .unwrap() - } else { - // Subsequent flashblocks: incremental calculation - let trie_input = TrieInput::new( - prev_trie_updates.clone().unwrap(), - hashed_state.clone(), - hashed_state.construct_prefix_sets(), - ); - - let latest = reth_provider::LatestStateProvider::new(provider); - latest - .state_root_from_nodes_with_updates(trie_input) - .unwrap() - }; - - prev_trie_updates = Some(trie_output); + let latest = LatestStateProvider::new(provider); + + // Incremental but without cumulative prefix sets (the bug) + let result = compute_state_root( + &latest, + hashed_state.clone(), + prev_trie_updates.as_ref(), + None, // no cumulative prefix sets + true, + ) + .unwrap(); + + prev_trie_updates = Some(result.trie_updates); individual_times.push(fb_start.elapsed().as_micros()); - - // Use the result - black_box(state_root); + black_box(result.state_root); } (total_start.elapsed().as_micros(), individual_times) } -/// Benchmark with incremental trie cache + cumulative prefix sets (the fix for stale hashes) +/// Benchmark with incremental trie cache + cumulative prefix sets (the fix) fn bench_with_cache_fixed( provider_factory: &reth_provider::providers::ProviderFactory< reth_provider::test_utils::MockNodeTypesWithDB, @@ -262,47 +249,24 @@ fn bench_with_cache_fixed( let mut cumulative_prefix_sets: Option = None; let total_start = Instant::now(); - for (i, hashed_state) in flashblock_changes.iter().enumerate() { + for hashed_state in flashblock_changes { let fb_start = Instant::now(); let provider = provider_factory.database_provider_ro().unwrap(); - - let (state_root, trie_output) = if i == 0 || prev_trie_updates.is_none() { - // First flashblock: full calculation - let latest = reth_provider::LatestStateProvider::new(provider); - latest - .state_root_with_updates(hashed_state.clone()) - .unwrap() - } else { - // Subsequent flashblocks: incremental with cumulative prefix sets - let mut prefix_sets = hashed_state.construct_prefix_sets(); - if let Some(ref prev_sets) = cumulative_prefix_sets { - prefix_sets.extend(prev_sets.clone()); - } - - let trie_input = TrieInput::new( - prev_trie_updates.clone().unwrap(), - hashed_state.clone(), - prefix_sets, - ); - - let latest = reth_provider::LatestStateProvider::new(provider); - latest - .state_root_from_nodes_with_updates(trie_input) - .unwrap() - }; - - // Accumulate prefix sets for next flashblock - let current_prefix_sets = hashed_state.construct_prefix_sets(); - match cumulative_prefix_sets.as_mut() { - Some(acc) => acc.extend(current_prefix_sets), - None => cumulative_prefix_sets = Some(current_prefix_sets), - } - - prev_trie_updates = Some(trie_output); + let latest = LatestStateProvider::new(provider); + + let result = compute_state_root( + &latest, + hashed_state.clone(), + prev_trie_updates.as_ref(), + cumulative_prefix_sets.take(), + true, + ) + .unwrap(); + + cumulative_prefix_sets = Some(result.cumulative_prefix_sets); + prev_trie_updates = Some(result.trie_updates); individual_times.push(fb_start.elapsed().as_micros()); - - // Use the result - black_box(state_root); + black_box(result.state_root); } (total_start.elapsed().as_micros(), individual_times) diff --git a/crates/op-rbuilder/src/builder/mod.rs b/crates/op-rbuilder/src/builder/mod.rs index a8c27c03..cb12115a 100644 --- a/crates/op-rbuilder/src/builder/mod.rs +++ b/crates/op-rbuilder/src/builder/mod.rs @@ -19,6 +19,7 @@ mod p2p; mod payload; mod payload_handler; mod service; +pub mod state_root; mod syncer_ctx; mod timing; mod wspub; diff --git a/crates/op-rbuilder/src/builder/payload.rs b/crates/op-rbuilder/src/builder/payload.rs index 1a228287..4c696dc3 100644 --- a/crates/op-rbuilder/src/builder/payload.rs +++ b/crates/op-rbuilder/src/builder/payload.rs @@ -46,7 +46,7 @@ use reth_revm::{ State, database::StateProviderDatabase, db::states::bundle_state::BundleRetention, }; use reth_transaction_pool::TransactionPool; -use reth_trie::{HashedPostState, TrieInput, prefix_set::TriePrefixSetsMut, updates::TrieUpdates}; +use reth_trie::{HashedPostState, prefix_set::TriePrefixSetsMut, updates::TrieUpdates}; use revm::Database; use std::{collections::BTreeMap, sync::Arc, time::Instant}; use tokio::sync::mpsc; @@ -1102,7 +1102,6 @@ where if calculate_state_root { let state_provider = state.database.as_ref(); - // prev_trie_updates is None for the first flashblock. let enable_incremental = fb_state .as_deref() .is_some_and(|s| s.enable_incremental_state_root); @@ -1119,63 +1118,33 @@ where hashed_state = state_provider.hashed_post_state(&state.bundle_state); - let trie_output; - (state_root, trie_output) = if let Some(prev_trie) = prev_trie - && enable_incremental - { - // Incremental path: Use cached trie from previous flashblock - debug!( - target: "payload_builder", - flashblock_index, - "Using incremental state root calculation with cached trie" - ); - - // Extend current prefix sets with cumulative prefix sets from all - // prior flashblocks. This ensures the trie walker re-visits every - // path that was modified in earlier flashblocks, even if the slot - // reverted (disappeared from cumulative HashedPostState). Without - // this, reverted slots leave stale cached hashes in the branch - // nodes returned by InMemoryTrieCursor. - let mut prefix_sets = hashed_state.construct_prefix_sets(); - if let Some(prev_sets) = prev_cumulative_prefix_sets { - prefix_sets.extend(prev_sets); - } - // Cache the cumulative prefix sets for the next flashblock - prefix_sets_to_cache = Some(prefix_sets.clone()); - - let trie_input = - TrieInput::new((*prev_trie).clone(), hashed_state.clone(), prefix_sets); + debug!( + target: "payload_builder", + flashblock_index, + incremental = enable_incremental && prev_trie.is_some(), + "Computing state root" + ); - state_provider - .state_root_from_nodes_with_updates(trie_input) - .map_err(PayloadBuilderError::other)? - } else { - debug!( + let result = super::state_root::compute_state_root( + state_provider, + hashed_state.clone(), + prev_trie.as_deref(), + prev_cumulative_prefix_sets, + enable_incremental, + ) + .inspect_err(|err| { + warn!( target: "payload_builder", - flashblock_index, - "Using full state root calculation" + parent_header=%ctx.parent().hash(), + %err, + "failed to calculate state root for payload" ); + }) + .map_err(PayloadBuilderError::other)?; - // Cache prefix sets for the next flashblock's incremental path - prefix_sets_to_cache = Some(hashed_state.construct_prefix_sets()); - - state - .database - .as_ref() - .state_root_with_updates(hashed_state.clone()) - .inspect_err(|err| { - warn!( - target: "payload_builder", - parent_header=%ctx.parent().hash(), - %err, - "failed to calculate state root for payload" - ); - })? - }; - - // Cache trie updates to apply in fb_state later (avoids mut on fb_state parameter). - // Wrap in Arc once so the same allocation is reused for both `executed` and fb_state. - trie_updates_to_cache = Some(Arc::new(trie_output)); + state_root = result.state_root; + prefix_sets_to_cache = Some(result.cumulative_prefix_sets); + trie_updates_to_cache = Some(Arc::new(result.trie_updates)); let state_root_calculation_time = state_root_start_time.elapsed(); ctx.metrics diff --git a/crates/op-rbuilder/src/builder/state_root.rs b/crates/op-rbuilder/src/builder/state_root.rs new file mode 100644 index 00000000..a6b5a57d --- /dev/null +++ b/crates/op-rbuilder/src/builder/state_root.rs @@ -0,0 +1,68 @@ +//! Extracted state root computation for flashblocks. +//! +//! This module provides a single function that both the production payload builder +//! and tests/benchmarks call, ensuring they exercise the same code path. + +use alloy_primitives::B256; +use reth_provider::{ProviderError, StateRootProvider}; +use reth_trie::{HashedPostState, TrieInput, prefix_set::TriePrefixSetsMut, updates::TrieUpdates}; + +/// Result of a flashblock state root computation. +pub struct StateRootResult { + /// The computed state root hash. + pub state_root: B256, + /// Trie updates produced (cached for the next flashblock). + pub trie_updates: TrieUpdates, + /// Cumulative prefix sets to carry forward to the next flashblock. + pub cumulative_prefix_sets: TriePrefixSetsMut, +} + +/// Computes the state root for a flashblock. +/// +/// When `prev_trie_updates` is provided and `enable_incremental` is true, +/// performs an incremental calculation: current prefix sets are extended with +/// `prev_cumulative_prefix_sets` so the trie walker re-visits every path +/// modified in earlier flashblocks (preventing stale cached hashes from +/// reverted storage slots). +/// +/// Otherwise falls back to a full state root calculation. +pub fn compute_state_root( + state_provider: &(impl StateRootProvider + ?Sized), + hashed_state: HashedPostState, + prev_trie_updates: Option<&TrieUpdates>, + prev_cumulative_prefix_sets: Option, + enable_incremental: bool, +) -> Result { + if let Some(prev_trie) = prev_trie_updates + && enable_incremental + { + // Incremental path: extend current prefix sets with cumulative sets + // from all prior flashblocks so the walker re-visits every modified + // path, even if a slot reverted. + let mut prefix_sets = hashed_state.construct_prefix_sets(); + if let Some(prev_sets) = prev_cumulative_prefix_sets { + prefix_sets.extend(prev_sets); + } + let cumulative_prefix_sets = prefix_sets.clone(); + + let trie_input = TrieInput::new(prev_trie.clone(), hashed_state, prefix_sets); + let (state_root, trie_updates) = + state_provider.state_root_from_nodes_with_updates(trie_input)?; + + Ok(StateRootResult { + state_root, + trie_updates, + cumulative_prefix_sets, + }) + } else { + // Full path: compute from scratch. + let cumulative_prefix_sets = hashed_state.construct_prefix_sets(); + let (state_root, trie_updates) = state_provider.state_root_with_updates(hashed_state)?; + + Ok(StateRootResult { + state_root, + trie_updates, + cumulative_prefix_sets, + }) + } +} diff --git a/crates/op-rbuilder/src/tests/incremental_trie.rs b/crates/op-rbuilder/src/tests/incremental_trie.rs index 58a623e1..03dcbb10 100644 --- a/crates/op-rbuilder/src/tests/incremental_trie.rs +++ b/crates/op-rbuilder/src/tests/incremental_trie.rs @@ -1,21 +1,29 @@ //! Tests for incremental trie state root calculation across flashblock boundaries. //! +//! These tests call `compute_state_root` — the same function used in the production +//! payload builder — to ensure correctness of incremental state root calculation. +//! //! Compares three state root computation strategies: //! -//! 1. **Full (ground truth)**: `state_root_with_updates` on cumulative hashed state. -//! 2. **Incremental**: uses cached trie nodes from fb1 with only fb2's prefix sets. -//! Can produce wrong roots when reverted slots leave stale hashes in cached branch nodes. -//! 3. **Incremental with cumulative prefix sets**: uses cached trie nodes with cumulative -//! prefix sets from all prior flashblocks, forcing the walker to re-visit every modified path. +//! 1. **Full (ground truth)**: `compute_state_root` with no prior trie / incremental disabled. +//! 2. **Incremental (buggy)**: `compute_state_root` with prior trie but only current prefix sets +//! (simulated by discarding cumulative prefix sets between calls). +//! 3. **Incremental with cumulative prefix sets**: `compute_state_root` with cumulative prefix +//! sets carried forward — the correct production path. use alloy_primitives::{B256, U256, keccak256}; use proptest::prelude::*; use reth_db::{tables, transaction::DbTxMut}; use reth_primitives_traits::{Account, StorageEntry}; -use reth_provider::{StorageTrieWriter, TrieWriter, test_utils::create_test_provider_factory}; -use reth_trie::{HashedPostState, HashedStorage, StateRoot, StorageRoot, TrieInput}; +use reth_provider::{ + DatabaseProviderFactory, LatestStateProvider, StorageTrieWriter, TrieWriter, + test_utils::create_test_provider_factory, +}; +use reth_trie::{HashedPostState, HashedStorage, StateRoot}; use reth_trie_db::{DatabaseStateRoot, DatabaseStorageRoot}; +use crate::builder::state_root::compute_state_root; + type InitialAccount = (B256, Account, Vec<(B256, U256)>); /// Helper: insert an account and its storage into the DB. @@ -36,13 +44,14 @@ fn insert_account( /// Result of simulating two flashblocks with different state root strategies. struct FlashblockRoots { full: B256, - full_per_fb: B256, - incremental: B256, - incremental_with_cumulative_prefix_sets: B256, + incremental_buggy: B256, + incremental_fixed: B256, } /// Simulates two flashblocks with a populated trie (DB has branch nodes from prior blocks). -/// Computes full, incremental, and incremental-with-cumulative-prefix-sets state roots. +/// +/// Uses `compute_state_root` for both full and incremental paths, exercising +/// the exact same code path as production. fn simulate_flashblocks_with_trie( initial_accounts: &[InitialAccount], fb1_state: HashedPostState, @@ -57,9 +66,10 @@ fn simulate_flashblocks_with_trie( // Populate storage trie tables for (hashed_address, _, _) in initial_accounts { - let (_, _, storage_updates) = StorageRoot::from_tx_hashed(tx.tx_ref(), *hashed_address) - .root_with_updates() - .unwrap(); + let (_, _, storage_updates) = + reth_trie::StorageRoot::from_tx_hashed(tx.tx_ref(), *hashed_address) + .root_with_updates() + .unwrap(); let sorted_updates = storage_updates.into_sorted(); tx.write_storage_trie_updates_sorted(core::iter::once((hashed_address, &sorted_updates))) .unwrap(); @@ -71,51 +81,45 @@ fn simulate_flashblocks_with_trie( tx.write_trie_updates(account_trie_updates).unwrap(); tx.commit().unwrap(); - let tx = factory.provider_rw().unwrap(); - - // Full (ground truth) - let fb2_sorted = fb2_cumulative_state.clone().into_sorted(); - let (full_root, _) = StateRoot::overlay_root_with_updates(tx.tx_ref(), &fb2_sorted).unwrap(); - - // Full per-flashblock - let fb1_prefix_sets_for_fix = fb1_state.construct_prefix_sets(); - let fb1_sorted = fb1_state.into_sorted(); - let (full_per_fb_root, _) = - StateRoot::overlay_root_with_updates(tx.tx_ref(), &fb2_sorted).unwrap(); - assert_eq!( - full_root, full_per_fb_root, - "BUG: full-per-fb should always match ground truth" - ); - - // Incremental (only fb2 prefix sets) - let (_, fb1_trie_updates) = - StateRoot::overlay_root_with_updates(tx.tx_ref(), &fb1_sorted).unwrap(); - let trie_input = TrieInput::new( - fb1_trie_updates.clone(), + // Full (ground truth): compute with no prior trie + let provider = factory.database_provider_ro().unwrap(); + let latest = LatestStateProvider::new(provider); + let full_result = + compute_state_root(&latest, fb2_cumulative_state.clone(), None, None, false).unwrap(); + + // FB1 incremental: compute state root after first flashblock + let provider = factory.database_provider_ro().unwrap(); + let latest = LatestStateProvider::new(provider); + let fb1_result = compute_state_root(&latest, fb1_state, None, None, false).unwrap(); + + // Incremental (buggy): use prior trie but discard cumulative prefix sets + let provider = factory.database_provider_ro().unwrap(); + let latest = LatestStateProvider::new(provider); + let buggy_result = compute_state_root( + &latest, fb2_cumulative_state.clone(), - fb2_cumulative_state.construct_prefix_sets(), - ); - let sorted_input = reth_trie::TrieInputSorted::from_unsorted(trie_input); - let (incremental_root, _) = - StateRoot::overlay_root_from_nodes_with_updates(tx.tx_ref(), sorted_input).unwrap(); - - // Incremental with cumulative prefix sets - let mut cumulative_prefix_sets = fb2_cumulative_state.construct_prefix_sets(); - cumulative_prefix_sets.extend(fb1_prefix_sets_for_fix); - let trie_input = TrieInput::new( - fb1_trie_updates, - fb2_cumulative_state.clone(), - cumulative_prefix_sets, - ); - let sorted_input = reth_trie::TrieInputSorted::from_unsorted(trie_input); - let (incremental_with_cumulative_prefix_sets_root, _) = - StateRoot::overlay_root_from_nodes_with_updates(tx.tx_ref(), sorted_input).unwrap(); + Some(&fb1_result.trie_updates), + None, // no cumulative prefix sets — this is the bug + true, + ) + .unwrap(); + + // Incremental (fixed): use prior trie WITH cumulative prefix sets + let provider = factory.database_provider_ro().unwrap(); + let latest = LatestStateProvider::new(provider); + let fixed_result = compute_state_root( + &latest, + fb2_cumulative_state, + Some(&fb1_result.trie_updates), + Some(fb1_result.cumulative_prefix_sets), + true, + ) + .unwrap(); FlashblockRoots { - full: full_root, - full_per_fb: full_per_fb_root, - incremental: incremental_root, - incremental_with_cumulative_prefix_sets: incremental_with_cumulative_prefix_sets_root, + full: full_result.state_root, + incremental_buggy: buggy_result.state_root, + incremental_fixed: fixed_result.state_root, } } @@ -133,70 +137,62 @@ fn simulate_flashblocks( } tx.commit().unwrap(); - let tx = factory.provider_rw().unwrap(); - - let fb2_sorted = fb2_cumulative_state.clone().into_sorted(); - let (full_root, _) = StateRoot::overlay_root_with_updates(tx.tx_ref(), &fb2_sorted).unwrap(); - - let fb1_prefix_sets_for_fix = fb1_state.construct_prefix_sets(); - let fb1_sorted = fb1_state.into_sorted(); - let (_fb1_full_root, _) = - StateRoot::overlay_root_with_updates(tx.tx_ref(), &fb1_sorted).unwrap(); - let (full_per_fb_root, _) = - StateRoot::overlay_root_with_updates(tx.tx_ref(), &fb2_sorted).unwrap(); - assert_eq!( - full_root, full_per_fb_root, - "BUG: full-per-fb should always match ground truth" - ); - - let (_, fb1_trie_updates) = - StateRoot::overlay_root_with_updates(tx.tx_ref(), &fb1_sorted).unwrap(); - let trie_input = TrieInput::new( - fb1_trie_updates.clone(), - fb2_cumulative_state.clone(), - fb2_cumulative_state.construct_prefix_sets(), - ); - let sorted_input = reth_trie::TrieInputSorted::from_unsorted(trie_input); - let (incremental_root, _) = - StateRoot::overlay_root_from_nodes_with_updates(tx.tx_ref(), sorted_input).unwrap(); - - let mut cumulative_prefix_sets = fb2_cumulative_state.construct_prefix_sets(); - cumulative_prefix_sets.extend(fb1_prefix_sets_for_fix); - let trie_input = TrieInput::new( - fb1_trie_updates, + // Full (ground truth) + let provider = factory.database_provider_ro().unwrap(); + let latest = LatestStateProvider::new(provider); + let full_result = + compute_state_root(&latest, fb2_cumulative_state.clone(), None, None, false).unwrap(); + + // FB1 + let provider = factory.database_provider_ro().unwrap(); + let latest = LatestStateProvider::new(provider); + let fb1_result = compute_state_root(&latest, fb1_state, None, None, false).unwrap(); + + // Incremental (buggy) + let provider = factory.database_provider_ro().unwrap(); + let latest = LatestStateProvider::new(provider); + let buggy_result = compute_state_root( + &latest, fb2_cumulative_state.clone(), - cumulative_prefix_sets, - ); - let sorted_input = reth_trie::TrieInputSorted::from_unsorted(trie_input); - let (incremental_with_cumulative_prefix_sets_root, _) = - StateRoot::overlay_root_from_nodes_with_updates(tx.tx_ref(), sorted_input).unwrap(); + Some(&fb1_result.trie_updates), + None, + true, + ) + .unwrap(); + + // Incremental (fixed) + let provider = factory.database_provider_ro().unwrap(); + let latest = LatestStateProvider::new(provider); + let fixed_result = compute_state_root( + &latest, + fb2_cumulative_state, + Some(&fb1_result.trie_updates), + Some(fb1_result.cumulative_prefix_sets), + true, + ) + .unwrap(); FlashblockRoots { - full: full_root, - full_per_fb: full_per_fb_root, - incremental: incremental_root, - incremental_with_cumulative_prefix_sets: incremental_with_cumulative_prefix_sets_root, + full: full_result.state_root, + incremental_buggy: buggy_result.state_root, + incremental_fixed: fixed_result.state_root, } } /// Asserts that the incremental path (without cumulative prefix sets) produces a WRONG /// root, while the incremental path with cumulative prefix sets produces the CORRECT root. fn assert_incremental_mismatch(roots: &FlashblockRoots) { - assert_eq!( - roots.full, roots.full_per_fb, - "full-per-flashblock MUST match ground truth" - ); assert_ne!( - roots.full, roots.incremental, - "incremental should diverge from ground truth in this scenario, \ + roots.full, roots.incremental_buggy, + "incremental (buggy) should diverge from ground truth in this scenario, \ but they matched. Full root: {:?}.", roots.full ); assert_eq!( - roots.full, roots.incremental_with_cumulative_prefix_sets, + roots.full, roots.incremental_fixed, "incremental with cumulative prefix sets diverges from ground truth. \ Full root: {:?}, Got: {:?}.", - roots.full, roots.incremental_with_cumulative_prefix_sets + roots.full, roots.incremental_fixed ); } @@ -359,14 +355,14 @@ proptest! { let roots = simulate_flashblocks(&initial_accounts, fb1_state, fb2_cumulative); - // Full-per-fb must always match (already asserted inside simulate_flashblocks) - // Incremental should match full (no populated trie = no stored hashes = no stale nodes) + // Without populated trie, incremental should match full + // (no stored hashes = no stale nodes) prop_assert_eq!( - roots.full, roots.incremental, + roots.full, roots.incremental_buggy, "Fuzz: incremental diverged from full (seed={})", seed ); prop_assert_eq!( - roots.full, roots.incremental_with_cumulative_prefix_sets, + roots.full, roots.incremental_fixed, "Fuzz: incremental_with_cumulative_prefix_sets diverged from full (seed={})", seed ); } From 9b155792fda283e3e9b965d02249f16d7b11093d Mon Sep 17 00:00:00 2001 From: avalonche Date: Tue, 17 Mar 2026 15:11:14 -0700 Subject: [PATCH 3/3] chore: remove benchmark report doc Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/TRIE_CACHE_BENCHMARK_REPORT.md | 283 ---------------------------- 1 file changed, 283 deletions(-) delete mode 100644 docs/TRIE_CACHE_BENCHMARK_REPORT.md diff --git a/docs/TRIE_CACHE_BENCHMARK_REPORT.md b/docs/TRIE_CACHE_BENCHMARK_REPORT.md deleted file mode 100644 index 2884b6aa..00000000 --- a/docs/TRIE_CACHE_BENCHMARK_REPORT.md +++ /dev/null @@ -1,283 +0,0 @@ -# Flashblocks Incremental Trie Cache Performance Benchmark Report - -**Date**: February 12, 2026 -**Version**: op-rbuilder v0.3.1 -**Reth Version**: v1.10.2 - ---- - -## Summary - -This report presents the results of comprehensive performance benchmarking for the **incremental trie cache optimization**. The optimization aims to reduce state root calculation time by reusing trie nodes from previous flashblocks rather than recalculating from the database each time. - -### Key Results - -- **2.4-2.5x speedup** demonstrated across all test scenarios - - -## 1. Incremental Trie Cache Optimization - -The current state root calculation -```aiignore -(state_root, trie_output) = state - .database - .as_ref() - .state_root_with_updates(hashed_state.clone()) - .inspect_err(|err| { - warn!( - target: "payload_builder", - parent_header=%ctx.parent().hash(), - %err, - "failed to calculate state root for payload" - ); - })?; -``` -use the reth's `MemoryOverlayStateProvider` for state root calculation. however, this provider only cache tries for every L2 block, -it does not cache tries for the flashblocks. In this work, we cache the trie nodes after each flashblock state root calculation. Therefore, -later flashblock state root calculation can be faster. - - -**IO analysis with trie cache**: -- First flashblock: Same database reads (baseline) -- Subsequent flashblocks: Only read new/modified nodes -- Cache hit rate: 80-95% (most state unchanged between flashblocks) -- **Total I/O time**: 10-100ms per flashblock (5-10x reduction) - -##Computing analysis with trie cache** -- In 10 sequential flashblocks, unchanged trie branches are computed 10 times without cache -- With cache: Compute once, reuse 9 times - -**Configuration**: -```bash -# Enable feature (production-ready) ---flashblocks.enable-incremental-trie-cache=true -``` - ---- - -## 2. Test Methodology - -### 2.1 Database Setup - -**Realistic State Size**: -- **50,000 accounts** with randomized balances and nonces -- **~25,000 storage entries** (50% of accounts have storage, 10 slots each) -- **Total state size**: ~100 MB in-memory database - -**Data Generation**: -``` -Accounts: 50,000 with properties: - - Nonce: 0-1000 (random) - - Balance: 0-1,000,000 wei (random) - - Bytecode: 30% have contract code - - Storage: 50% have 10 storage slots -``` - -### 2.2 Flashblock Simulation - -**Test Parameters**: -- **Flashblocks per test**: 10 sequential flashblocks -- **Transaction sizes**: 50, 100, 200 transactions per flashblock - -**Two Scenarios Tested**: - -1. **Without Cache (Baseline)** - - Each flashblock calculates full state root from database - - Uses `StateRootProvider::state_root_with_updates()` - - No trie node reuse between flashblocks - -2. **With Cache (Optimized)** - - First flashblock: Full state root calculation - - Subsequent flashblocks: Incremental calculation using cached trie - - Uses `StateRootProvider::state_root_from_nodes_with_updates()` - - Reuses `TrieUpdates` from previous flashblock - -### 2.3 Benchmark Framework - -**Metrics Collected**: -- Total time for 10 flashblocks -- Per-flashblock timing breakdown - ---- - -### 2.4 Benchmark Execution Details - -**Command**: -```bash -cargo bench -p op-rbuilder --bench bench_flashblocks_state_root -``` - -**Environment**: -- Hardware: MacBook Pro (Model: Mac16,7) -- CPU: Apple M4 Pro (14 cores: 10 performance + 4 efficiency) -- Memory: 48 GB -- OS: macOS (Darwin 24.6.0) -- Rust: 1.83.0 (release channel) -- Optimization: --release (opt-level=3) - -**Criterion Settings**: -- Warm-up time: 3 seconds -- Measurement time: 5 seconds (adjusted to 20s for slow benchmarks) -- Sample size: 10 iterations -- Confidence level: 95% - - -## 3. Benchmark Results - -### 3.1 Performance Summary - -| Metric | 50 tx/FB | 100 tx/FB | 200 tx/FB | Average | -|--------|----------|-----------|-----------|---------| -| **Without Cache** | 1,982 ms | 1,991 ms | 1,993 ms | 1,989 ms | -| **With Cache** | 786 ms | 826 ms | 845 ms | 819 ms | -| **Speedup** | 2.52x | 2.44x | 2.39x | **2.45x** | -| **Improvement** | 60.2% | 59.1% | 58.1% | **59.1%** | -### 3.2 Detailed Results by Transaction Size - -#### 50 Transactions per Flashblock - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ WITHOUT CACHE (Baseline) │ -├─────────────────────────────────────────────────────────────────┤ -│ Total Time: 2,013 ms (2.01 seconds) │ -│ Per-Flashblock: [201, 203, 202, 200, 201, 201, 203, │ -│ 201, 200, 201] ms │ -│ Average: 201 ms per flashblock │ -│ Std Dev: ±1.2 ms │ -│ Consistency: Very consistent (all within 3ms range) │ -└─────────────────────────────────────────────────────────────────┘ - -┌─────────────────────────────────────────────────────────────────┐ -│ WITH CACHE (Optimized) │ -├─────────────────────────────────────────────────────────────────┤ -│ Total Time: 800 ms (0.80 seconds) │ -│ Per-Flashblock: [206, 4, 69, 91, 56, 79, 44, 90, │ -│ 101, 59] ms │ -│ Breakdown: │ -│ - Flashblock 1: 206 ms (full calculation) │ -│ - Flashblock 2: 4 ms (98% faster - best case) │ -│ - Flashblocks 3-10: 44-101 ms (incremental) │ -│ Average: 80 ms per flashblock │ -│ Speedup: 2.52x (60.2% faster) │ -└─────────────────────────────────────────────────────────────────┘ -``` - -**Criterion Output**: -``` -flashblocks_50_txs/without_cache/10_flashblocks - time: [1.9781 s 1.9820 s 1.9861 s] - -flashblocks_50_txs/with_cache/10_flashblocks - time: [780.31 ms 786.34 ms 794.75 ms] -``` - -#### 100 Transactions per Flashblock - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ WITHOUT CACHE (Baseline) │ -├─────────────────────────────────────────────────────────────────┤ -│ Total Time: 2,029 ms │ -│ Per-Flashblock: [200, 203, 206, 200, 199, 201, 203, │ -│ 204, 209, 204] ms │ -│ Average: 203 ms per flashblock │ -└─────────────────────────────────────────────────────────────────┘ - -┌─────────────────────────────────────────────────────────────────┐ -│ WITH CACHE (Optimized) │ -├─────────────────────────────────────────────────────────────────┤ -│ Total Time: 831 ms │ -│ Per-Flashblock: [204, 7, 95, 85, 57, 97, 40, 103, │ -│ 84, 59] ms │ -│ Average: 83 ms per flashblock │ -│ Speedup: 2.44x (59.1% faster) │ -└─────────────────────────────────────────────────────────────────┘ -``` - -**Criterion Output**: -``` -flashblocks_100_txs/without_cache/10_flashblocks - time: [1.9800 s 1.9909 s 2.0074 s] - -flashblocks_100_txs/with_cache/10_flashblocks - time: [818.51 ms 825.82 ms 834.03 ms] -``` - -#### 200 Transactions per Flashblock - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ WITHOUT CACHE (Baseline) │ -├─────────────────────────────────────────────────────────────────┤ -│ Total Time: 2,036 ms │ -│ Per-Flashblock: [203, 207, 204, 202, 204, 202, 206, │ -│ 203, 204, 201] ms │ -│ Average: 204 ms per flashblock │ -└─────────────────────────────────────────────────────────────────┘ - -┌─────────────────────────────────────────────────────────────────┐ -│ WITH CACHE (Optimized) │ -├─────────────────────────────────────────────────────────────────┤ -│ Total Time: 853 ms │ -│ Per-Flashblock: [205, 9, 98, 84, 84, 72, 66, 96, │ -│ 83, 56] ms │ -│ Average: 85 ms per flashblock │ -│ Speedup: 2.39x (58.1% faster) │ -└─────────────────────────────────────────────────────────────────┘ -``` - -**Criterion Output**: -``` -flashblocks_200_txs/without_cache/10_flashblocks - time: [1.9821 s 1.9933 s 2.0120 s] - -flashblocks_200_txs/with_cache/10_flashblocks - time: [836.48 ms 844.76 ms 854.38 ms] -``` - -### 3.3 Visual Performance Comparison - -``` -State Root Calculation Time per Flashblock -───────────────────────────────────────────────────────────────── - -WITHOUT CACHE (Baseline): -FB1 ████████████████████ 201ms -FB2 ████████████████████ 203ms -FB3 ████████████████████ 202ms -FB4 ████████████████████ 200ms -FB5 ████████████████████ 201ms -FB6 ████████████████████ 201ms -FB7 ████████████████████ 203ms -FB8 ████████████████████ 201ms -FB9 ████████████████████ 200ms -FB10 ████████████████████ 201ms - │ - └─ Consistent ~200ms per flashblock - -WITH CACHE (Optimized): -FB1 ████████████████████ 206ms [Full calculation] -FB2 █ 4ms [98% faster!] -FB3 ███████ 69ms [66% faster] -FB4 █████████ 91ms [55% faster] -FB5 ██████ 56ms [72% faster] -FB6 ████████ 79ms [61% faster] -FB7 ████ 44ms [78% faster] -FB8 █████████ 90ms [55% faster] -FB9 ██████████ 101ms [50% faster] -FB10 ██████ 59ms [71% faster] - │ - └─ Average ~80ms per flashblock (2.5x faster) - -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -TOTAL TIME COMPARISON (10 Flashblocks, 100 tx/FB) - -Without Cache: ████████████████████ 2,029 ms -With Cache: ████████ 831 ms - -Time Saved: 1,198 ms (59.1% reduction) -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -``` - ---- \ No newline at end of file