diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index ac1395be7b..982524904b 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -69,6 +69,8 @@ jobs:
           path: ~/.jolt
       - name: Install Jolt RISC-V Rust toolchain
         run: cargo run install-toolchain
+      - name: Clear Dory URS cache
+        run: rm -rf ~/.cache/dory
       - name: Install nextest
         uses: taiki-e/install-action@nextest
       - name: Run jolt-core tests
diff --git a/.gitignore b/.gitignore
index 6c88a867c6..fc6d03d695 100644
--- a/.gitignore
+++ b/.gitignore
@@ -65,3 +65,4 @@ jolt-sdk/tests/fib_io_device_bytes.rs
 jolt-sdk/tests/fib_proof_bytes.rs
 jolt-sdk/tests/jolt_verifier_preprocessing_bytes.rs
 
+bytecode-commitment-progress.md
diff --git a/book/src/usage/guests_hosts/hosts.md b/book/src/usage/guests_hosts/hosts.md
index 5c05bb9dda..5c1f2fae1f 100644
--- a/book/src/usage/guests_hosts/hosts.md
+++ b/book/src/usage/guests_hosts/hosts.md
@@ -5,7 +5,7 @@ Hosts are where we can invoke the Jolt prover to prove functions defined within
 The host imports the guest package, and will have automatically generated functions to build each of the Jolt functions. For the SHA3 example we looked at in the [guest](./guests.md) section, the `jolt::provable` procedural macro generates several functions that can be invoked from the host (shown below):
 
 - `compile_sha3(target_dir)` to compile the SHA3 guest to RISC-V
-- `preprocess_prover_sha3` and `verifier_preprocessing_from_prover_sha3` to generate the prover and verifier preprocessing. Note that the preprocessing only needs to be generated once for a given guest program, and can subsequently be reused to prove multiple invocations of the guest.
+- `preprocess_sha3` and `verifier_preprocessing_from_prover_sha3` to generate the prover and verifier preprocessing. Note that the preprocessing only needs to be generated once for a given guest program, and can subsequently be reused to prove multiple invocations of the guest.
 - `build_prover_sha3` returns a closure for the prover, which takes in the same input types as the original function and modifies the output to additionally include a proof.
 - `build_verifier_sha3` returns a closure for the verifier, which verifies the proof. The verifier closure's parameters comprise of the program input, the claimed output, a `bool` value claiming whether the guest panicked, and the proof.
 
@@ -14,7 +14,7 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_sha3(target_dir);
 
-    let prover_preprocessing = guest::preprocess_prover_sha3(&mut program);
+    let prover_preprocessing = guest::preprocess_sha3(&mut program);
     let verifier_preprocessing =
         guest::verifier_preprocessing_from_prover_sha3(&prover_preprocessing);
 
diff --git a/examples/alloc/src/main.rs b/examples/alloc/src/main.rs
index 1afd790d20..8845e61aaf 100644
--- a/examples/alloc/src/main.rs
+++ b/examples/alloc/src/main.rs
@@ -7,12 +7,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_alloc(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_alloc(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_alloc(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_alloc(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_alloc(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_alloc(&prover_preprocessing);
 
     let prove_alloc = guest::build_prover_alloc(program, prover_preprocessing);
     let verify_alloc = guest::build_verifier_alloc(verifier_preprocessing);
diff --git a/examples/btreemap/host/src/main.rs b/examples/btreemap/host/src/main.rs
index 011f502489..5bfb3ef5b5 100644
--- a/examples/btreemap/host/src/main.rs
+++ b/examples/btreemap/host/src/main.rs
@@ -17,19 +17,12 @@ pub fn btreemap() {
         guest::compile_btreemap(target_dir)
     });
 
-    let shared_preprocessing = step!("Preprocessing shared", {
-        guest::preprocess_shared_btreemap(&mut program)
-    });
-
     let prover_preprocessing = step!("Preprocessing prover", {
-        guest::preprocess_prover_btreemap(shared_preprocessing.clone())
+        guest::preprocess_btreemap(&mut program)
     });
 
     let verifier_preprocessing = step!("Preprocessing verifier", {
-        guest::preprocess_verifier_btreemap(
-            shared_preprocessing,
-            prover_preprocessing.generators.to_verifier_setup(),
-        )
+        guest::verifier_preprocessing_from_prover_btreemap(&prover_preprocessing)
     });
 
     let prove = step!("Building prover", {
diff --git a/examples/collatz/src/main.rs b/examples/collatz/src/main.rs
index c91450547d..1ea0415512 100644
--- a/examples/collatz/src/main.rs
+++ b/examples/collatz/src/main.rs
@@ -8,12 +8,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_collatz_convergence(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_collatz_convergence(&mut program);
-    let prover_preprocessing =
-        guest::preprocess_prover_collatz_convergence(shared_preprocessing.clone());
-    let verifier_setup = prover_preprocessing.generators.to_verifier_setup();
+    let prover_preprocessing = guest::preprocess_collatz_convergence(&mut program);
     let verifier_preprocessing =
-        guest::preprocess_verifier_collatz_convergence(shared_preprocessing, verifier_setup);
+        guest::verifier_preprocessing_from_prover_collatz_convergence(&prover_preprocessing);
 
     let prove_collatz_single =
         guest::build_prover_collatz_convergence(program, prover_preprocessing);
@@ -31,12 +28,9 @@ pub fn main() {
     // Prove/verify convergence for a range of numbers:
     let mut program = guest::compile_collatz_convergence_range(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_collatz_convergence_range(&mut program);
-    let prover_preprocessing =
-        guest::preprocess_prover_collatz_convergence_range(shared_preprocessing.clone());
-    let verifier_setup = prover_preprocessing.generators.to_verifier_setup();
+    let prover_preprocessing = guest::preprocess_collatz_convergence_range(&mut program);
     let verifier_preprocessing =
-        guest::preprocess_verifier_collatz_convergence_range(shared_preprocessing, verifier_setup);
+        guest::verifier_preprocessing_from_prover_collatz_convergence_range(&prover_preprocessing);
 
     let prove_collatz_convergence =
         guest::build_prover_collatz_convergence_range(program, prover_preprocessing);
diff --git a/examples/fibonacci/src/main.rs b/examples/fibonacci/src/main.rs
index ac2b755cad..58bfd5e05f 100644
--- a/examples/fibonacci/src/main.rs
+++ b/examples/fibonacci/src/main.rs
@@ -6,16 +6,18 @@ pub fn main() {
     tracing_subscriber::fmt::init();
 
     let save_to_disk = std::env::args().any(|arg| arg == "--save");
+    let committed_bytecode = std::env::args().any(|arg| arg == "--committed-bytecode");
 
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_fib(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_fib(&mut program);
-
-    let prover_preprocessing = guest::preprocess_prover_fib(shared_preprocessing.clone());
-    let verifier_setup = prover_preprocessing.generators.to_verifier_setup();
+    let prover_preprocessing = if committed_bytecode {
+        guest::preprocess_committed_fib(&mut program)
+    } else {
+        guest::preprocess_fib(&mut program)
+    };
     let verifier_preprocessing =
-        guest::preprocess_verifier_fib(shared_preprocessing, verifier_setup);
+        guest::verifier_preprocessing_from_prover_fib(&prover_preprocessing);
 
     if save_to_disk {
         serialize_and_print_size(
@@ -26,7 +28,6 @@ pub fn main() {
         .expect("Could not serialize preprocessing.");
     }
 
-    let prove_fib = guest::build_prover_fib(program, prover_preprocessing);
     let verify_fib = guest::build_verifier_fib(verifier_preprocessing);
 
     let program_summary = guest::analyze_fib(10);
@@ -39,8 +40,22 @@ pub fn main() {
     info!("Trace file written to: {trace_file}.");
 
     let now = Instant::now();
-    let (output, proof, io_device) = prove_fib(50);
+    let (output, proof, io_device) = if committed_bytecode {
+        let prove_fib = guest::build_prover_committed_fib(program, prover_preprocessing);
+        prove_fib(50)
+    } else {
+        let prove_fib = guest::build_prover_fib(program, prover_preprocessing);
+        prove_fib(50)
+    };
     info!("Prover runtime: {} s", now.elapsed().as_secs_f64());
+    info!(
+        "bytecode mode: {}",
+        if committed_bytecode {
+            "Committed"
+        } else {
+            "Full"
+        }
+    );
 
     if save_to_disk {
         serialize_and_print_size("Proof", "/tmp/fib_proof.bin", &proof)
diff --git a/examples/hash-bench/src/main.rs b/examples/hash-bench/src/main.rs
index 181ec912c9..8c498ab3f2 100644
--- a/examples/hash-bench/src/main.rs
+++ b/examples/hash-bench/src/main.rs
@@ -6,11 +6,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_hashbench(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_hashbench(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_hashbench(shared_preprocessing.clone());
-    let verifier_setup = prover_preprocessing.generators.to_verifier_setup();
+    let prover_preprocessing = guest::preprocess_hashbench(&mut program);
     let verifier_preprocessing =
-        guest::preprocess_verifier_hashbench(shared_preprocessing, verifier_setup);
+        guest::verifier_preprocessing_from_prover_hashbench(&prover_preprocessing);
 
     let prove_hashbench = guest::build_prover_hashbench(program, prover_preprocessing);
     let verify_hashbench = guest::build_verifier_hashbench(verifier_preprocessing);
diff --git a/examples/malloc/src/main.rs b/examples/malloc/src/main.rs
index d28e99d067..39b3b955d4 100644
--- a/examples/malloc/src/main.rs
+++ b/examples/malloc/src/main.rs
@@ -4,12 +4,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_alloc(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_alloc(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_alloc(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_alloc(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_alloc(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_alloc(&prover_preprocessing);
 
     let prove = guest::build_prover_alloc(program, prover_preprocessing);
     let verify = guest::build_verifier_alloc(verifier_preprocessing);
diff --git a/examples/memory-ops/src/main.rs b/examples/memory-ops/src/main.rs
index a95af60aa0..3516b6144c 100644
--- a/examples/memory-ops/src/main.rs
+++ b/examples/memory-ops/src/main.rs
@@ -7,12 +7,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_memory_ops(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_memory_ops(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_memory_ops(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_memory_ops(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_memory_ops(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_memory_ops(&prover_preprocessing);
 
     let prove = guest::build_prover_memory_ops(program, prover_preprocessing);
     let verify = guest::build_verifier_memory_ops(verifier_preprocessing);
diff --git a/examples/merkle-tree/src/main.rs b/examples/merkle-tree/src/main.rs
index c31353402c..4a89261071 100644
--- a/examples/merkle-tree/src/main.rs
+++ b/examples/merkle-tree/src/main.rs
@@ -8,12 +8,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_merkle_tree(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_merkle_tree(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_merkle_tree(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_merkle_tree(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_merkle_tree(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_merkle_tree(&prover_preprocessing);
 
     let leaf1: &[u8] = &[5u8; 32];
     let leaf2 = [6u8; 32];
diff --git a/examples/muldiv/src/main.rs b/examples/muldiv/src/main.rs
index 7a3680e5dc..5cc95530db 100644
--- a/examples/muldiv/src/main.rs
+++ b/examples/muldiv/src/main.rs
@@ -7,12 +7,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_muldiv(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_muldiv(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_muldiv(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_muldiv(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_muldiv(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_muldiv(&prover_preprocessing);
 
     let prove = guest::build_prover_muldiv(program, prover_preprocessing);
     let verify = guest::build_verifier_muldiv(verifier_preprocessing);
diff --git a/examples/multi-function/src/main.rs b/examples/multi-function/src/main.rs
index 6d9f9da9f8..c12c081bbd 100644
--- a/examples/multi-function/src/main.rs
+++ b/examples/multi-function/src/main.rs
@@ -8,11 +8,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_add(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_add(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_add(shared_preprocessing.clone());
-    let verifier_setup = prover_preprocessing.generators.to_verifier_setup();
+    let prover_preprocessing = guest::preprocess_add(&mut program);
     let verifier_preprocessing =
-        guest::preprocess_verifier_add(shared_preprocessing, verifier_setup);
+        guest::verifier_preprocessing_from_prover_add(&prover_preprocessing);
 
     let prove_add = guest::build_prover_add(program, prover_preprocessing);
     let verify_add = guest::build_verifier_add(verifier_preprocessing);
@@ -21,12 +19,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_mul(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_mul(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_mul(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_mul(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_mul(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_mul(&prover_preprocessing);
 
     let prove_mul = guest::build_prover_mul(program, prover_preprocessing);
     let verify_mul = guest::build_verifier_mul(verifier_preprocessing);
diff --git a/examples/overflow/src/main.rs b/examples/overflow/src/main.rs
index 4a17575e70..a677dc4537 100644
--- a/examples/overflow/src/main.rs
+++ b/examples/overflow/src/main.rs
@@ -9,9 +9,7 @@ pub fn main() {
     // An overflowing stack should fail to prove.
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_overflow_stack(target_dir);
-    let shared_preprocessing = guest::preprocess_shared_overflow_stack(&mut program);
-    let prover_preprocessing =
-        guest::preprocess_prover_overflow_stack(shared_preprocessing.clone());
+    let prover_preprocessing = guest::preprocess_overflow_stack(&mut program);
     let prove_overflow_stack = guest::build_prover_overflow_stack(program, prover_preprocessing);
 
     let res = panic::catch_unwind(|| {
@@ -23,8 +21,7 @@ pub fn main() {
 
     // now lets try to overflow the heap, should also panic
     let mut program = guest::compile_overflow_heap(target_dir);
-    let shared_preprocessing = guest::preprocess_shared_overflow_heap(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_overflow_heap(shared_preprocessing.clone());
+    let prover_preprocessing = guest::preprocess_overflow_heap(&mut program);
     let prove_overflow_heap = guest::build_prover_overflow_heap(program, prover_preprocessing);
 
     let res = panic::catch_unwind(|| {
@@ -35,15 +32,11 @@ pub fn main() {
     // valid case for stack allocation, calls overflow_stack() under the hood
     // but with stack_size=8192
     let mut program = guest::compile_allocate_stack_with_increased_size(target_dir);
-
-    let shared_preprocessing =
-        guest::preprocess_shared_allocate_stack_with_increased_size(&mut program);
-    let prover_preprocessing =
-        guest::preprocess_prover_allocate_stack_with_increased_size(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_allocate_stack_with_increased_size(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_allocate_stack_with_increased_size(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_allocate_stack_with_increased_size(
+            &prover_preprocessing,
+        );
 
     let prove_allocate_stack_with_increased_size =
         guest::build_prover_allocate_stack_with_increased_size(program, prover_preprocessing);
diff --git a/examples/random/src/main.rs b/examples/random/src/main.rs
index e4456db259..0379c49bd0 100644
--- a/examples/random/src/main.rs
+++ b/examples/random/src/main.rs
@@ -7,12 +7,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_rand(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_rand(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_rand(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_rand(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_rand(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_rand(&prover_preprocessing);
 
     let prove = guest::build_prover_rand(program, prover_preprocessing);
     let verify = guest::build_verifier_rand(verifier_preprocessing);
diff --git a/examples/recover-ecdsa/src/main.rs b/examples/recover-ecdsa/src/main.rs
index 038a5c1fa7..512a59ca22 100644
--- a/examples/recover-ecdsa/src/main.rs
+++ b/examples/recover-ecdsa/src/main.rs
@@ -31,12 +31,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_recover(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_recover(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_recover(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_recover(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_recover(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_recover(&prover_preprocessing);
 
     if save_to_disk {
         serialize_and_print_size(
diff --git a/examples/secp256k1-ecdsa-verify/src/main.rs b/examples/secp256k1-ecdsa-verify/src/main.rs
index dfe38f6da8..4ebc61bcec 100644
--- a/examples/secp256k1-ecdsa-verify/src/main.rs
+++ b/examples/secp256k1-ecdsa-verify/src/main.rs
@@ -7,12 +7,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_secp256k1_ecdsa_verify(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_secp256k1_ecdsa_verify(&mut program);
-    let prover_preprocessing =
-        guest::preprocess_prover_secp256k1_ecdsa_verify(shared_preprocessing.clone());
-    let verifier_setup = prover_preprocessing.generators.to_verifier_setup();
+    let prover_preprocessing = guest::preprocess_secp256k1_ecdsa_verify(&mut program);
     let verifier_preprocessing =
-        guest::preprocess_verifier_secp256k1_ecdsa_verify(shared_preprocessing, verifier_setup);
+        guest::verifier_preprocessing_from_prover_secp256k1_ecdsa_verify(&prover_preprocessing);
 
     let prove_secp256k1_ecdsa_verify =
         guest::build_prover_secp256k1_ecdsa_verify(program, prover_preprocessing);
diff --git a/examples/sha2-chain/src/main.rs b/examples/sha2-chain/src/main.rs
index 94114c0414..f7f1ccbd60 100644
--- a/examples/sha2-chain/src/main.rs
+++ b/examples/sha2-chain/src/main.rs
@@ -7,12 +7,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_sha2_chain(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_sha2_chain(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_sha2_chain(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_sha2_chain(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_sha2_chain(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_sha2_chain(&prover_preprocessing);
 
     let prove_sha2_chain = guest::build_prover_sha2_chain(program, prover_preprocessing);
     let verify_sha2_chain = guest::build_verifier_sha2_chain(verifier_preprocessing);
diff --git a/examples/sha2-ex/src/main.rs b/examples/sha2-ex/src/main.rs
index 4bce837fb8..2d86050f25 100644
--- a/examples/sha2-ex/src/main.rs
+++ b/examples/sha2-ex/src/main.rs
@@ -7,12 +7,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_sha2(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_sha2(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_sha2(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_sha2(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_sha2(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_sha2(&prover_preprocessing);
 
     let prove_sha2 = guest::build_prover_sha2(program, prover_preprocessing);
     let verify_sha2 = guest::build_verifier_sha2(verifier_preprocessing);
diff --git a/examples/sha3-chain/src/main.rs b/examples/sha3-chain/src/main.rs
index 97e223467b..cae32b0148 100644
--- a/examples/sha3-chain/src/main.rs
+++ b/examples/sha3-chain/src/main.rs
@@ -6,12 +6,9 @@ pub fn main() {
 
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_sha3_chain(target_dir);
-    let shared_preprocessing = guest::preprocess_shared_sha3_chain(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_sha3_chain(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_sha3_chain(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_sha3_chain(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_sha3_chain(&prover_preprocessing);
 
     let prove_sha3_chain = guest::build_prover_sha3_chain(program, prover_preprocessing);
     let verify_sha3_chain = guest::build_verifier_sha3_chain(verifier_preprocessing);
diff --git a/examples/sha3-ex/src/main.rs b/examples/sha3-ex/src/main.rs
index 1b49530258..69467d6f4e 100644
--- a/examples/sha3-ex/src/main.rs
+++ b/examples/sha3-ex/src/main.rs
@@ -6,12 +6,9 @@ pub fn main() {
 
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_sha3(target_dir);
-    let shared_preprocessing = guest::preprocess_shared_sha3(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_sha3(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_sha3(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_sha3(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_sha3(&prover_preprocessing);
 
     let prove_sha3 = guest::build_prover_sha3(program, prover_preprocessing);
     let verify_sha3 = guest::build_verifier_sha3(verifier_preprocessing);
diff --git a/examples/stdlib/src/main.rs b/examples/stdlib/src/main.rs
index 8edd0fed21..8b84b31743 100644
--- a/examples/stdlib/src/main.rs
+++ b/examples/stdlib/src/main.rs
@@ -7,12 +7,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_int_to_string(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_int_to_string(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_int_to_string(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_int_to_string(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_int_to_string(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_int_to_string(&prover_preprocessing);
 
     let prove = guest::build_prover_int_to_string(program, prover_preprocessing);
     let verify = guest::build_verifier_int_to_string(verifier_preprocessing);
@@ -24,12 +21,9 @@ pub fn main() {
 
     let mut program = guest::compile_string_concat(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_string_concat(&mut program);
-    let prover_preprocessing = guest::preprocess_prover_string_concat(shared_preprocessing.clone());
-    let verifier_preprocessing = guest::preprocess_verifier_string_concat(
-        shared_preprocessing,
-        prover_preprocessing.generators.to_verifier_setup(),
-    );
+    let prover_preprocessing = guest::preprocess_string_concat(&mut program);
+    let verifier_preprocessing =
+        guest::verifier_preprocessing_from_prover_string_concat(&prover_preprocessing);
 
     let prove = guest::build_prover_string_concat(program, prover_preprocessing);
     let verify = guest::build_verifier_string_concat(verifier_preprocessing);
diff --git a/jolt-core/benches/e2e_profiling.rs b/jolt-core/benches/e2e_profiling.rs
index cf5cb3b65d..b171c452ef 100644
--- a/jolt-core/benches/e2e_profiling.rs
+++ b/jolt-core/benches/e2e_profiling.rs
@@ -1,5 +1,8 @@
+use std::sync::Arc;
+
 use ark_serialize::CanonicalSerialize;
 use jolt_core::host;
+use jolt_core::zkvm::bytecode::BytecodePreprocessing;
 use jolt_core::zkvm::prover::JoltProverPreprocessing;
 use jolt_core::zkvm::verifier::{JoltSharedPreprocessing, JoltVerifierPreprocessing};
 use jolt_core::zkvm::{RV64IMACProver, RV64IMACVerifier};
@@ -201,19 +204,22 @@ fn prove_example(
 ) -> Vec<(tracing::Span, Box<dyn FnOnce()>)> {
     let mut tasks = Vec::new();
     let mut program = host::Program::new(example_name);
-    let (bytecode, init_memory_state, _) = program.decode();
+    let (instructions, init_memory_state, _) = program.decode();
     let (_lazy_trace, trace, _, program_io) = program.trace(&serialized_input, &[], &[]);
     let padded_trace_len = (trace.len() + 1).next_power_of_two();
     drop(trace);
 
     let task = move || {
+        let bytecode: Arc<BytecodePreprocessing> =
+            BytecodePreprocessing::preprocess(instructions).into();
         let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode,
+            &bytecode,
             program_io.memory_layout.clone(),
             init_memory_state,
             padded_trace_len,
         );
-        let preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
+        let preprocessing =
+            JoltProverPreprocessing::new(shared_preprocessing.clone(), Arc::clone(&bytecode));
 
         let elf_contents_opt = program.get_elf_contents();
         let elf_contents = elf_contents_opt.as_deref().expect("elf contents is None");
@@ -229,9 +235,10 @@ fn prove_example(
         let program_io = prover.program_io.clone();
         let (jolt_proof, _) = prover.prove();
 
-        let verifier_preprocessing = JoltVerifierPreprocessing::new(
+        let verifier_preprocessing = JoltVerifierPreprocessing::new_full(
             shared_preprocessing,
             preprocessing.generators.to_verifier_setup(),
+            Arc::clone(&preprocessing.bytecode),
         );
         let verifier =
             RV64IMACVerifier::new(&verifier_preprocessing, jolt_proof, program_io, None, None)
@@ -255,7 +262,7 @@ fn prove_example_with_trace(
     _scale: usize,
 ) -> (std::time::Duration, usize, usize, usize) {
     let mut program = host::Program::new(example_name);
-    let (bytecode, init_memory_state, _) = program.decode();
+    let (instructions, init_memory_state, _) = program.decode();
     let (_, trace, _, program_io) = program.trace(&serialized_input, &[], &[]);
 
     assert!(
@@ -263,13 +270,15 @@ fn prove_example_with_trace(
         "Trace is longer than expected"
     );
 
+    let bytecode: Arc<BytecodePreprocessing> =
+        BytecodePreprocessing::preprocess(instructions).into();
     let shared_preprocessing = JoltSharedPreprocessing::new(
-        bytecode.clone(),
+        &bytecode,
         program_io.memory_layout.clone(),
         init_memory_state,
         trace.len().next_power_of_two(),
     );
-    let preprocessing = JoltProverPreprocessing::new(shared_preprocessing);
+    let preprocessing = JoltProverPreprocessing::new(shared_preprocessing, Arc::clone(&bytecode));
 
     let elf_contents_opt = program.get_elf_contents();
     let elf_contents = elf_contents_opt.as_deref().expect("elf contents is None");
diff --git a/jolt-core/src/guest/prover.rs b/jolt-core/src/guest/prover.rs
index a20023fed7..9df31cc5b2 100644
--- a/jolt-core/src/guest/prover.rs
+++ b/jolt-core/src/guest/prover.rs
@@ -16,16 +16,20 @@ pub fn preprocess(
     guest: &Program,
     max_trace_length: usize,
 ) -> JoltProverPreprocessing<ark_bn254::Fr, DoryCommitmentScheme> {
+    use crate::zkvm::bytecode::BytecodePreprocessing;
     use crate::zkvm::verifier::JoltSharedPreprocessing;
+    use std::sync::Arc;
 
-    let (bytecode, memory_init, program_size) = guest.decode();
+    let (instructions, memory_init, program_size) = guest.decode();
 
     let mut memory_config = guest.memory_config;
     memory_config.program_size = Some(program_size);
     let memory_layout = MemoryLayout::new(&memory_config);
+
+    let bytecode = Arc::new(BytecodePreprocessing::preprocess(instructions));
     let shared_preprocessing =
-        JoltSharedPreprocessing::new(bytecode, memory_layout, memory_init, max_trace_length);
-    JoltProverPreprocessing::new(shared_preprocessing)
+        JoltSharedPreprocessing::new(&bytecode, memory_layout, memory_init, max_trace_length);
+    JoltProverPreprocessing::new(shared_preprocessing, bytecode)
 }
 
 #[allow(clippy::type_complexity, clippy::too_many_arguments)]
diff --git a/jolt-core/src/guest/verifier.rs b/jolt-core/src/guest/verifier.rs
index 5c2a92904d..c642c9f525 100644
--- a/jolt-core/src/guest/verifier.rs
+++ b/jolt-core/src/guest/verifier.rs
@@ -1,3 +1,5 @@
+use std::sync::Arc;
+
 use crate::field::JoltField;
 use crate::poly::commitment::commitment_scheme::CommitmentScheme;
 use crate::poly::commitment::commitment_scheme::StreamingCommitmentScheme;
@@ -6,6 +8,7 @@ use crate::guest::program::Program;
 use crate::poly::commitment::dory::DoryCommitmentScheme;
 use crate::transcripts::Transcript;
 use crate::utils::errors::ProofVerifyError;
+use crate::zkvm::bytecode::BytecodePreprocessing;
 use crate::zkvm::proof_serialization::JoltProof;
 use crate::zkvm::verifier::JoltSharedPreprocessing;
 use crate::zkvm::verifier::JoltVerifier;
@@ -18,14 +21,17 @@ pub fn preprocess(
     max_trace_length: usize,
     verifier_setup: <DoryCommitmentScheme as CommitmentScheme>::VerifierSetup,
 ) -> JoltVerifierPreprocessing<ark_bn254::Fr, DoryCommitmentScheme> {
-    let (bytecode, memory_init, program_size) = guest.decode();
+    let (bytecode_instructions, memory_init, program_size) = guest.decode();
 
     let mut memory_config = guest.memory_config;
     memory_config.program_size = Some(program_size);
     let memory_layout = MemoryLayout::new(&memory_config);
+
+    let bytecode: Arc<BytecodePreprocessing> =
+        BytecodePreprocessing::preprocess(bytecode_instructions).into();
     let shared =
-        JoltSharedPreprocessing::new(bytecode, memory_layout, memory_init, max_trace_length);
-    JoltVerifierPreprocessing::new(shared, verifier_setup)
+        JoltSharedPreprocessing::new(&bytecode, memory_layout, memory_init, max_trace_length);
+    JoltVerifierPreprocessing::new_full(shared, verifier_setup, bytecode)
 }
 
 pub fn verify<F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, FS: Transcript>(
diff --git a/jolt-core/src/poly/commitment/commitment_scheme.rs b/jolt-core/src/poly/commitment/commitment_scheme.rs
index 6debe3b519..7e1a2faa43 100644
--- a/jolt-core/src/poly/commitment/commitment_scheme.rs
+++ b/jolt-core/src/poly/commitment/commitment_scheme.rs
@@ -27,7 +27,13 @@ pub trait CommitmentScheme: Clone + Sync + Send + 'static {
     /// A hint that helps the prover compute an opening proof. Typically some byproduct of
     /// the commitment computation, e.g. for Dory the Pedersen commitments to the rows can be
     /// used as a hint for the opening proof.
-    type OpeningProofHint: Sync + Send + Clone + Debug + PartialEq;
+    type OpeningProofHint: Sync
+        + Send
+        + Clone
+        + Debug
+        + PartialEq
+        + CanonicalSerialize
+        + CanonicalDeserialize;
 
     /// Generates the prover setup for this PCS. `max_num_vars` is the maximum number of
     /// variables of any polynomial that will be committed using this setup.
diff --git a/jolt-core/src/poly/commitment/dory/dory_globals.rs b/jolt-core/src/poly/commitment/dory/dory_globals.rs
index c4c2ebe421..5f78157184 100644
--- a/jolt-core/src/poly/commitment/dory/dory_globals.rs
+++ b/jolt-core/src/poly/commitment/dory/dory_globals.rs
@@ -151,7 +151,12 @@ static mut UNTRUSTED_ADVICE_T: OnceLock<usize> = OnceLock::new();
 static mut UNTRUSTED_ADVICE_MAX_NUM_ROWS: OnceLock<usize> = OnceLock::new();
 static mut UNTRUSTED_ADVICE_NUM_COLUMNS: OnceLock<usize> = OnceLock::new();
 
-// Context tracking: 0=Main, 1=TrustedAdvice, 2=UntrustedAdvice
+// Bytecode globals
+static mut BYTECODE_T: OnceLock<usize> = OnceLock::new();
+static mut BYTECODE_MAX_NUM_ROWS: OnceLock<usize> = OnceLock::new();
+static mut BYTECODE_NUM_COLUMNS: OnceLock<usize> = OnceLock::new();
+
+// Context tracking: 0=Main, 1=TrustedAdvice, 2=UntrustedAdvice, 3=Bytecode
 static CURRENT_CONTEXT: AtomicU8 = AtomicU8::new(0);
 
 // Layout tracking: 0=CycleMajor, 1=AddressMajor
@@ -163,6 +168,7 @@ pub enum DoryContext {
     Main = 0,
     TrustedAdvice = 1,
     UntrustedAdvice = 2,
+    Bytecode = 3,
 }
 
 impl From<u8> for DoryContext {
@@ -171,6 +177,7 @@ impl From<u8> for DoryContext {
             0 => DoryContext::Main,
             1 => DoryContext::TrustedAdvice,
             2 => DoryContext::UntrustedAdvice,
+            3 => DoryContext::Bytecode,
             _ => panic!("Invalid DoryContext value: {value}"),
         }
     }
@@ -190,6 +197,48 @@ impl Drop for DoryContextGuard {
 pub struct DoryGlobals;
 
 impl DoryGlobals {
+    /// Initialize Bytecode context so its `num_columns` matches Main's `sigma_main`.
+    ///
+    /// This is required for committed-bytecode Stage 8 folding when `sigma_main > sigma_bytecode`:
+    /// we commit bytecode chunk polynomials using the Main matrix width (more columns, fewer rows),
+    /// so they embed as a top block of rows in the Main matrix when extra cycle variables are fixed to 0.
+    pub fn initialize_bytecode_context_for_main_sigma(
+        k_chunk: usize,
+        bytecode_len: usize,
+        log_k_chunk: usize,
+        log_t: usize,
+    ) -> Option<()> {
+        let (sigma_main, _) = Self::main_sigma_nu(log_k_chunk, log_t);
+        let num_columns = 1usize << sigma_main;
+        let total_size = k_chunk * bytecode_len;
+
+        assert!(
+            total_size % num_columns == 0,
+            "bytecode matrix width {num_columns} must divide total_size {total_size}"
+        );
+        let num_rows = total_size / num_columns;
+
+        // If already initialized, ensure it matches (avoid silently ignoring OnceCell::set failures).
+        #[allow(static_mut_refs)]
+        unsafe {
+            if let (Some(existing_cols), Some(existing_rows), Some(existing_t)) = (
+                BYTECODE_NUM_COLUMNS.get(),
+                BYTECODE_MAX_NUM_ROWS.get(),
+                BYTECODE_T.get(),
+            ) {
+                assert_eq!(*existing_cols, num_columns);
+                assert_eq!(*existing_rows, num_rows);
+                assert_eq!(*existing_t, bytecode_len);
+                return Some(());
+            }
+        }
+
+        Self::set_num_columns_for_context(num_columns, DoryContext::Bytecode);
+        Self::set_T_for_context(bytecode_len, DoryContext::Bytecode);
+        Self::set_max_num_rows_for_context(num_rows, DoryContext::Bytecode);
+        Some(())
+    }
+
     /// Split `total_vars` into a *balanced* pair `(sigma, nu)` where:
     /// - **sigma** is the number of **column** variables
     /// - **nu** is the number of **row** variables
@@ -251,7 +300,6 @@ impl DoryGlobals {
     /// Set the Dory matrix layout directly (test-only).
     ///
     /// In production code, prefer passing the layout to `initialize_context` instead.
-    #[cfg(test)]
     pub fn set_layout(layout: DoryLayout) {
         CURRENT_LAYOUT.store(layout as u8, Ordering::SeqCst);
     }
@@ -305,6 +353,9 @@ impl DoryGlobals {
                 DoryContext::UntrustedAdvice => {
                     let _ = UNTRUSTED_ADVICE_MAX_NUM_ROWS.set(max_num_rows);
                 }
+                DoryContext::Bytecode => {
+                    let _ = BYTECODE_MAX_NUM_ROWS.set(max_num_rows);
+                }
             }
         }
     }
@@ -321,6 +372,9 @@ impl DoryGlobals {
                 DoryContext::UntrustedAdvice => *UNTRUSTED_ADVICE_MAX_NUM_ROWS
                     .get()
                     .expect("untrusted_advice max_num_rows not initialized"),
+                DoryContext::Bytecode => *BYTECODE_MAX_NUM_ROWS
+                    .get()
+                    .expect("bytecode max_num_rows not initialized"),
             }
         }
     }
@@ -338,6 +392,9 @@ impl DoryGlobals {
                 DoryContext::UntrustedAdvice => {
                     let _ = UNTRUSTED_ADVICE_NUM_COLUMNS.set(num_columns);
                 }
+                DoryContext::Bytecode => {
+                    let _ = BYTECODE_NUM_COLUMNS.set(num_columns);
+                }
             }
         }
     }
@@ -354,6 +411,9 @@ impl DoryGlobals {
                 DoryContext::UntrustedAdvice => *UNTRUSTED_ADVICE_NUM_COLUMNS
                     .get()
                     .expect("untrusted_advice num_columns not initialized"),
+                DoryContext::Bytecode => *BYTECODE_NUM_COLUMNS
+                    .get()
+                    .expect("bytecode num_columns not initialized"),
             }
         }
     }
@@ -371,6 +431,9 @@ impl DoryGlobals {
                 DoryContext::UntrustedAdvice => {
                     let _ = UNTRUSTED_ADVICE_T.set(t);
                 }
+                DoryContext::Bytecode => {
+                    let _ = BYTECODE_T.set(t);
+                }
             }
         }
     }
@@ -387,6 +450,7 @@ impl DoryGlobals {
                 DoryContext::UntrustedAdvice => *UNTRUSTED_ADVICE_T
                     .get()
                     .expect("untrusted_advice t not initialized"),
+                DoryContext::Bytecode => *BYTECODE_T.get().expect("bytecode t not initialized"),
             }
         }
     }
@@ -414,7 +478,7 @@ impl DoryGlobals {
     /// # Arguments
     /// * `K` - Maximum address space size (K in OneHot polynomials)
     /// * `T` - Maximum trace length (cycle count)
-    /// * `context` - The Dory context to initialize (Main, TrustedAdvice, or UntrustedAdvice)
+    /// * `context` - The Dory context to initialize (Main, TrustedAdvice, UntrustedAdvice, Bytecode)
     /// * `layout` - Optional layout for the Dory matrix. Only applies to Main context.
     ///   If `Some(layout)`, sets the layout. If `None`, leaves the existing layout
     ///   unchanged (defaults to `CycleMajor` after `reset()`). Ignored for advice contexts.
@@ -466,6 +530,11 @@ impl DoryGlobals {
             let _ = UNTRUSTED_ADVICE_T.take();
             let _ = UNTRUSTED_ADVICE_MAX_NUM_ROWS.take();
             let _ = UNTRUSTED_ADVICE_NUM_COLUMNS.take();
+
+            // Reset bytecode globals
+            let _ = BYTECODE_T.take();
+            let _ = BYTECODE_MAX_NUM_ROWS.take();
+            let _ = BYTECODE_NUM_COLUMNS.take();
         }
 
         // Reset context to Main
diff --git a/jolt-core/src/poly/commitment/dory/wrappers.rs b/jolt-core/src/poly/commitment/dory/wrappers.rs
index 431387d7c2..a4c3fa5eb9 100644
--- a/jolt-core/src/poly/commitment/dory/wrappers.rs
+++ b/jolt-core/src/poly/commitment/dory/wrappers.rs
@@ -227,28 +227,50 @@ where
     let dory_layout = DoryGlobals::get_layout();
 
     // Dense polynomials (all scalar variants except OneHot/RLC) are committed row-wise.
-    // Under AddressMajor, dense coefficients occupy evenly-spaced columns, so each row
-    // commitment uses `cycles_per_row` bases (one per occupied column).
-    let (dense_affine_bases, dense_chunk_size): (Vec<_>, usize) = match (dory_context, dory_layout)
+    //
+    // In `Main` + `AddressMajor`, we have two *representations* in this repo:
+    // - **Trace-dense**: length == T (e.g., `RdInc`, `RamInc`). These are embedded into the
+    //   main matrix by occupying evenly-spaced columns, so each row commitment uses
+    //   `cycles_per_row` bases (one per occupied column).
+    // - **Matrix-dense**: length == K*T (e.g., bytecode chunk polynomials). These occupy the
+    //   full matrix and must use the full `row_len` bases.
+    let is_trace_dense = match poly {
+        MultilinearPolynomial::LargeScalars(p) => p.Z.len() == DoryGlobals::get_T(),
+        MultilinearPolynomial::BoolScalars(p) => p.coeffs.len() == DoryGlobals::get_T(),
+        MultilinearPolynomial::U8Scalars(p) => p.coeffs.len() == DoryGlobals::get_T(),
+        MultilinearPolynomial::U16Scalars(p) => p.coeffs.len() == DoryGlobals::get_T(),
+        MultilinearPolynomial::U32Scalars(p) => p.coeffs.len() == DoryGlobals::get_T(),
+        MultilinearPolynomial::U64Scalars(p) => p.coeffs.len() == DoryGlobals::get_T(),
+        MultilinearPolynomial::U128Scalars(p) => p.coeffs.len() == DoryGlobals::get_T(),
+        MultilinearPolynomial::I64Scalars(p) => p.coeffs.len() == DoryGlobals::get_T(),
+        MultilinearPolynomial::I128Scalars(p) => p.coeffs.len() == DoryGlobals::get_T(),
+        MultilinearPolynomial::S128Scalars(p) => p.coeffs.len() == DoryGlobals::get_T(),
+        MultilinearPolynomial::OneHot(_) | MultilinearPolynomial::RLC(_) => false,
+    };
+
+    let is_trace_dense_main_addr_major = dory_context == DoryContext::Main
+        && dory_layout == DoryLayout::AddressMajor
+        && is_trace_dense;
+
+    let (dense_affine_bases, dense_chunk_size): (Vec<_>, usize) = if is_trace_dense_main_addr_major
     {
-        (DoryContext::Main, DoryLayout::AddressMajor) => {
-            let cycles_per_row = DoryGlobals::address_major_cycles_per_row();
-            let bases: Vec<_> = g1_slice
-                .par_iter()
-                .take(row_len)
-                .step_by(row_len / cycles_per_row)
-                .map(|g| g.0.into_affine())
-                .collect();
-            (bases, cycles_per_row)
-        }
-        _ => (
+        let cycles_per_row = DoryGlobals::address_major_cycles_per_row();
+        let bases: Vec<_> = g1_slice
+            .par_iter()
+            .take(row_len)
+            .step_by(row_len / cycles_per_row)
+            .map(|g| g.0.into_affine())
+            .collect();
+        (bases, cycles_per_row)
+    } else {
+        (
             g1_slice
                 .par_iter()
                 .take(row_len)
                 .map(|g| g.0.into_affine())
                 .collect(),
             row_len,
-        ),
+        )
     };
 
     let result: Vec<ArkG1> = match poly {
diff --git a/jolt-core/src/poly/opening_proof.rs b/jolt-core/src/poly/opening_proof.rs
index 3b3f93553d..5f1316d717 100644
--- a/jolt-core/src/poly/opening_proof.rs
+++ b/jolt-core/src/poly/opening_proof.rs
@@ -152,10 +152,14 @@ pub enum SumcheckId {
     RegistersClaimReduction,
     RegistersReadWriteChecking,
     RegistersValEvaluation,
+    BytecodeReadRafAddressPhase,
     BytecodeReadRaf,
+    BooleanityAddressPhase,
     Booleanity,
     AdviceClaimReductionCyclePhase,
     AdviceClaimReduction,
+    BytecodeClaimReductionCyclePhase,
+    BytecodeClaimReduction,
     IncClaimReduction,
     HammingWeightClaimReduction,
 }
diff --git a/jolt-core/src/poly/rlc_polynomial.rs b/jolt-core/src/poly/rlc_polynomial.rs
index 47a68c231e..3785dae52b 100644
--- a/jolt-core/src/poly/rlc_polynomial.rs
+++ b/jolt-core/src/poly/rlc_polynomial.rs
@@ -4,8 +4,10 @@ use crate::poly::multilinear_polynomial::MultilinearPolynomial;
 use crate::utils::accumulation::Acc6S;
 use crate::utils::math::{s64_from_diff_u64s, Math};
 use crate::utils::thread::unsafe_allocate_zero_vec;
+use crate::zkvm::bytecode::chunks::{lane_value, total_lanes};
 use crate::zkvm::config::OneHotParams;
-use crate::zkvm::instruction::LookupQuery;
+use crate::zkvm::instruction::{Flags, InstructionLookup, LookupQuery};
+use crate::zkvm::lookup_table::LookupTables;
 use crate::zkvm::ram::remap_address;
 use crate::zkvm::{bytecode::BytecodePreprocessing, witness::CommittedPolynomial};
 use allocative::Allocative;
@@ -16,7 +18,7 @@ use rayon::prelude::*;
 use std::collections::HashMap;
 use std::sync::Arc;
 use tracer::ChunksIterator;
-use tracer::{instruction::Cycle, LazyTraceIterator};
+use tracer::{instruction::Cycle, instruction::Instruction, LazyTraceIterator};
 
 #[derive(Clone, Debug)]
 pub struct RLCStreamingData {
@@ -24,6 +26,96 @@ pub struct RLCStreamingData {
     pub memory_layout: MemoryLayout,
 }
 
+/// Computes the bytecode chunk polynomial contribution to a vector-matrix product.
+///
+/// This is a standalone version of the bytecode VMP computation that can be used
+/// by external callers (e.g., GPU prover) without needing a full `StreamingRLCContext`.
+///
+/// # Arguments
+/// * `result` - Output buffer to accumulate contributions into
+/// * `left_vec` - Left vector for the vector-matrix product (length >= num_rows)
+/// * `num_columns` - Number of columns in the Dory matrix
+/// * `bytecode_polys` - List of (chunk_index, coefficient) pairs for the RLC
+/// * `bytecode` - Bytecode preprocessing data
+/// * `one_hot_params` - One-hot parameters (contains k_chunk)
+pub fn compute_bytecode_vmp_contribution<F: JoltField>(
+    result: &mut [F],
+    left_vec: &[F],
+    num_columns: usize,
+    bytecode_polys: &[(usize, F)],
+    bytecode: &BytecodePreprocessing,
+    one_hot_params: &OneHotParams,
+) {
+    if bytecode_polys.is_empty() {
+        return;
+    }
+
+    let layout = DoryGlobals::get_layout();
+    let k_chunk = one_hot_params.k_chunk;
+    let bytecode_len = bytecode.bytecode.len();
+    let bytecode_cols = num_columns;
+    let total = total_lanes();
+
+    debug_assert!(
+        k_chunk * bytecode_len >= bytecode_cols,
+        "bytecode_len*k_chunk must cover at least one full row: (k_chunk*bytecode_len)={} < num_columns={}",
+        k_chunk * bytecode_len,
+        bytecode_cols
+    );
+
+    for (chunk_idx, coeff) in bytecode_polys.iter() {
+        if coeff.is_zero() {
+            continue;
+        }
+        for (cycle, instr) in bytecode.bytecode.iter().enumerate().take(bytecode_len) {
+            let normalized = instr.normalize();
+            let circuit_flags = <Instruction as Flags>::circuit_flags(instr);
+            let instr_flags = <Instruction as Flags>::instruction_flags(instr);
+            let lookup_idx = <Instruction as InstructionLookup<XLEN>>::lookup_table(instr)
+                .map(|t| LookupTables::<XLEN>::enum_index(&t));
+            let raf_flag =
+                !crate::zkvm::instruction::InterleavedBitsMarker::is_interleaved_operands(
+                    &circuit_flags,
+                );
+
+            let unexpanded_pc = F::from_u64(normalized.address as u64);
+            let imm = F::from_i128(normalized.operands.imm);
+            let rs1 = normalized.operands.rs1;
+            let rs2 = normalized.operands.rs2;
+            let rd = normalized.operands.rd;
+
+            for lane in 0..k_chunk {
+                let global_lane = chunk_idx * k_chunk + lane;
+                if global_lane >= total {
+                    break;
+                }
+                let value = lane_value::<F>(
+                    global_lane,
+                    rs1,
+                    rs2,
+                    rd,
+                    unexpanded_pc,
+                    imm,
+                    &circuit_flags,
+                    &instr_flags,
+                    lookup_idx,
+                    raf_flag,
+                );
+                if value.is_zero() {
+                    continue;
+                }
+                let global_index =
+                    layout.address_cycle_to_index(lane, cycle, k_chunk, bytecode_len);
+                let row_index = global_index / bytecode_cols;
+                let col_index = global_index % bytecode_cols;
+                if row_index < left_vec.len() {
+                    result[col_index] += left_vec[row_index] * (*coeff) * value;
+                }
+            }
+        }
+    }
+}
+
 /// Source of trace data for streaming VMV computation.
 #[derive(Clone, Debug)]
 pub enum TraceSource {
@@ -56,6 +148,8 @@ impl TraceSource {
 pub struct StreamingRLCContext<F: JoltField> {
     pub dense_polys: Vec<(CommittedPolynomial, F)>,
     pub onehot_polys: Vec<(CommittedPolynomial, F)>,
+    /// Bytecode chunk polynomials with their RLC coefficients.
+    pub bytecode_polys: Vec<(usize, F)>,
     /// Advice polynomials with their RLC coefficients.
     /// These are NOT streamed from trace - they're passed in directly.
     pub advice_polys: Vec<(F, MultilinearPolynomial<F>)>,
@@ -179,6 +273,7 @@ impl<F: JoltField> RLCPolynomial<F> {
 
         let mut dense_polys = Vec::new();
         let mut onehot_polys = Vec::new();
+        let mut bytecode_polys = Vec::new();
         let mut advice_polys = Vec::new();
 
         for (poly_id, coeff) in poly_ids.iter().zip(coefficients.iter()) {
@@ -191,6 +286,11 @@ impl<F: JoltField> RLCPolynomial<F> {
                 | CommittedPolynomial::RamRa(_) => {
                     onehot_polys.push((*poly_id, *coeff));
                 }
+                CommittedPolynomial::BytecodeChunk(_) => {
+                    if let CommittedPolynomial::BytecodeChunk(idx) = poly_id {
+                        bytecode_polys.push((*idx, *coeff));
+                    }
+                }
                 CommittedPolynomial::TrustedAdvice | CommittedPolynomial::UntrustedAdvice => {
                     // Advice polynomials are passed in directly (not streamed from trace)
                     if advice_poly_map.contains_key(poly_id) {
@@ -206,6 +306,7 @@ impl<F: JoltField> RLCPolynomial<F> {
             streaming_context: Some(Arc::new(StreamingRLCContext {
                 dense_polys,
                 onehot_polys,
+                bytecode_polys,
                 advice_polys,
                 trace_source,
                 preprocessing,
@@ -399,6 +500,26 @@ guardrail in gen_from_trace should ensure sigma_main >= sigma_a."
             });
     }
 
+    /// Adds the bytecode chunk polynomial contribution to the vector-matrix-vector product result.
+    ///
+    /// Bytecode chunk polynomials are embedded in the top-left block by fixing the extra cycle
+    /// variables to 0, so we only iterate cycles in `[0, bytecode_len)`.
+    fn vmp_bytecode_contribution(
+        result: &mut [F],
+        left_vec: &[F],
+        num_columns: usize,
+        ctx: &StreamingRLCContext<F>,
+    ) {
+        compute_bytecode_vmp_contribution(
+            result,
+            left_vec,
+            num_columns,
+            &ctx.bytecode_polys,
+            &ctx.preprocessing.bytecode,
+            &ctx.one_hot_params,
+        );
+    }
+
     /// Streaming VMP implementation that generates rows on-demand from trace.
     /// Achieves O(sqrt(n)) space complexity by lazily generating the witness.
     /// Single pass through trace for both dense and one-hot polynomials.
@@ -450,6 +571,7 @@ guardrail in gen_from_trace should ensure sigma_main >= sigma_a."
         let mut result = materialized.vector_matrix_product(left_vec);
 
         Self::vmp_advice_contribution(&mut result, left_vec, num_columns, ctx);
+        Self::vmp_bytecode_contribution(&mut result, left_vec, num_columns, ctx);
 
         result
     }
@@ -573,6 +695,7 @@ guardrail in gen_from_trace should ensure sigma_main >= sigma_a."
 
         // Advice contribution is small and independent of the trace; add it after the streamed pass.
         Self::vmp_advice_contribution(&mut result, left_vec, num_columns, ctx);
+        Self::vmp_bytecode_contribution(&mut result, left_vec, num_columns, ctx);
         result
     }
 
@@ -627,6 +750,7 @@ guardrail in gen_from_trace should ensure sigma_main >= sigma_a."
 
         // Advice contribution is small and independent of the trace; add it after the streamed pass.
         Self::vmp_advice_contribution(&mut result, left_vec, num_columns, ctx);
+        Self::vmp_bytecode_contribution(&mut result, left_vec, num_columns, ctx);
         result
     }
 }
diff --git a/jolt-core/src/subprotocols/booleanity.rs b/jolt-core/src/subprotocols/booleanity.rs
index ed6d58a0a0..9dd057eff8 100644
--- a/jolt-core/src/subprotocols/booleanity.rs
+++ b/jolt-core/src/subprotocols/booleanity.rs
@@ -36,7 +36,10 @@ use crate::{
             OpeningAccumulator, OpeningPoint, ProverOpeningAccumulator, SumcheckId,
             VerifierOpeningAccumulator, BIG_ENDIAN,
         },
-        shared_ra_polys::{compute_all_G_and_ra_indices, RaIndices, SharedRaPolynomials},
+        shared_ra_polys::{
+            compute_all_G, compute_all_G_and_ra_indices, compute_ra_indices, RaIndices,
+            SharedRaPolynomials,
+        },
         split_eq_poly::GruenSplitEqPolynomial,
         unipoly::UniPoly,
     },
@@ -388,6 +391,53 @@ impl<F: JoltField> BooleanitySumcheckProver<F> {
 
         gruen_poly * self.eq_r_r
     }
+
+    fn ingest_address_challenge(&mut self, r_j: F::Challenge, round: usize) {
+        // Phase 1: Bind B and update F
+        self.B.bind(r_j);
+        self.F.update(r_j);
+
+        // Transition to phase 2
+        if round == self.params.log_k_chunk - 1 {
+            self.eq_r_r = self.B.get_current_scalar();
+
+            // Initialize SharedRaPolynomials with per-poly pre-scaled eq tables (by rho_i)
+            let F_table = std::mem::take(&mut self.F);
+            let ra_indices = std::mem::take(&mut self.ra_indices);
+            let base_eq = F_table.clone_values();
+            let num_polys = self.params.polynomial_types.len();
+            debug_assert!(
+                num_polys == self.gamma_powers.len(),
+                "gamma_powers length mismatch: got {}, expected {}",
+                self.gamma_powers.len(),
+                num_polys
+            );
+            let tables: Vec<Vec<F>> = (0..num_polys)
+                .into_par_iter()
+                .map(|i| {
+                    let rho = self.gamma_powers[i];
+                    base_eq.iter().map(|v| rho * *v).collect()
+                })
+                .collect();
+            self.H = Some(SharedRaPolynomials::new(
+                tables,
+                ra_indices,
+                self.params.one_hot_params.clone(),
+            ));
+
+            // Drop G arrays
+            let g = std::mem::take(&mut self.G);
+            drop_in_background_thread(g);
+        }
+    }
+
+    fn ingest_cycle_challenge(&mut self, r_j: F::Challenge) {
+        // Phase 2: Bind D and H
+        self.D.bind(r_j);
+        if let Some(ref mut h) = self.H {
+            h.bind_in_place(r_j, BindingOrder::LowToHigh);
+        }
+    }
 }
 
 impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T> for BooleanitySumcheckProver<F> {
@@ -407,48 +457,9 @@ impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T> for BooleanitySum
     #[tracing::instrument(skip_all, name = "BooleanitySumcheckProver::ingest_challenge")]
     fn ingest_challenge(&mut self, r_j: F::Challenge, round: usize) {
         if round < self.params.log_k_chunk {
-            // Phase 1: Bind B and update F
-            self.B.bind(r_j);
-            self.F.update(r_j);
-
-            // Transition to phase 2
-            if round == self.params.log_k_chunk - 1 {
-                self.eq_r_r = self.B.get_current_scalar();
-
-                // Initialize SharedRaPolynomials with per-poly pre-scaled eq tables (by rho_i)
-                let F_table = std::mem::take(&mut self.F);
-                let ra_indices = std::mem::take(&mut self.ra_indices);
-                let base_eq = F_table.clone_values();
-                let num_polys = self.params.polynomial_types.len();
-                debug_assert!(
-                    num_polys == self.gamma_powers.len(),
-                    "gamma_powers length mismatch: got {}, expected {}",
-                    self.gamma_powers.len(),
-                    num_polys
-                );
-                let tables: Vec<Vec<F>> = (0..num_polys)
-                    .into_par_iter()
-                    .map(|i| {
-                        let rho = self.gamma_powers[i];
-                        base_eq.iter().map(|v| rho * *v).collect()
-                    })
-                    .collect();
-                self.H = Some(SharedRaPolynomials::new(
-                    tables,
-                    ra_indices,
-                    self.params.one_hot_params.clone(),
-                ));
-
-                // Drop G arrays
-                let g = std::mem::take(&mut self.G);
-                drop_in_background_thread(g);
-            }
+            self.ingest_address_challenge(r_j, round);
         } else {
-            // Phase 2: Bind D and H
-            self.D.bind(r_j);
-            if let Some(ref mut h) = self.H {
-                h.bind_in_place(r_j, BindingOrder::LowToHigh);
-            }
+            self.ingest_cycle_challenge(r_j);
         }
     }
 
@@ -483,6 +494,393 @@ impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T> for BooleanitySum
     }
 }
 
+/// Booleanity Address-Phase Sumcheck Prover.
+///
+/// This prover handles only the first `log_k_chunk` rounds (address variables).
+/// The cycle-phase prover is constructed separately from witness + accumulator (Option B).
+#[derive(Allocative)]
+pub struct BooleanityAddressSumcheckProver<F: JoltField> {
+    /// B: split-eq over address-chunk variables (LowToHigh).
+    B: GruenSplitEqPolynomial<F>,
+    /// G[i][k] = Σ_j eq(r_cycle, j) · ra_i(k, j) for all RA polynomials
+    G: Vec<Vec<F>>,
+    /// F: Expanding table for address phase
+    F: ExpandingTable<F>,
+    /// Last round polynomial for claim computation
+    last_round_poly: Option<UniPoly<F>>,
+    /// Final claim after binding all address variables
+    address_claim: Option<F>,
+    /// Parameters (shared with cycle prover)
+    pub params: BooleanitySumcheckParams<F>,
+}
+
+impl<F: JoltField> BooleanityAddressSumcheckProver<F> {
+    /// Initialize a BooleanityAddressSumcheckProver.
+    ///
+    /// Computes G polynomials and RA indices in a single pass over the trace.
+    #[tracing::instrument(skip_all, name = "BooleanityAddressSumcheckProver::initialize")]
+    pub fn initialize(
+        params: BooleanitySumcheckParams<F>,
+        trace: &[Cycle],
+        bytecode: &BytecodePreprocessing,
+        memory_layout: &MemoryLayout,
+    ) -> Self {
+        // Compute G in a single pass over the trace (witness-dependent).
+        let G = compute_all_G::<F>(
+            trace,
+            bytecode,
+            memory_layout,
+            &params.one_hot_params,
+            &params.r_cycle,
+        );
+
+        // Initialize split-eq polynomial for address variables
+        let B = GruenSplitEqPolynomial::new(&params.r_address, BindingOrder::LowToHigh);
+
+        // Initialize expanding table for address phase
+        let k_chunk = 1 << params.log_k_chunk;
+        let mut F_table = ExpandingTable::new(k_chunk, BindingOrder::LowToHigh);
+        F_table.reset(F::one());
+
+        Self {
+            B,
+            G,
+            F: F_table,
+            last_round_poly: None,
+            address_claim: None,
+            params,
+        }
+    }
+
+    fn compute_message_impl(&self, round: usize, previous_claim: F) -> UniPoly<F> {
+        let m = round + 1;
+        let B = &self.B;
+        let N = self.params.polynomial_types.len();
+
+        // Compute quadratic coefficients via generic split-eq fold
+        let quadratic_coeffs: [F; DEGREE_BOUND - 1] = B
+            .par_fold_out_in_unreduced::<9, { DEGREE_BOUND - 1 }>(&|k_prime| {
+                let coeffs = (0..N)
+                    .into_par_iter()
+                    .map(|i| {
+                        let G_i = &self.G[i];
+                        let inner_sum = G_i[k_prime << m..(k_prime + 1) << m]
+                            .par_iter()
+                            .enumerate()
+                            .map(|(k, &G_k)| {
+                                let k_m = k >> (m - 1);
+                                let F_k = self.F[k & ((1 << (m - 1)) - 1)];
+                                let G_times_F = G_k * F_k;
+
+                                let eval_infty = G_times_F * F_k;
+                                let eval_0 = if k_m == 0 {
+                                    eval_infty - G_times_F
+                                } else {
+                                    F::zero()
+                                };
+                                [eval_0, eval_infty]
+                            })
+                            .fold_with(
+                                [F::Unreduced::<5>::zero(); DEGREE_BOUND - 1],
+                                |running, new| {
+                                    [
+                                        running[0] + new[0].as_unreduced_ref(),
+                                        running[1] + new[1].as_unreduced_ref(),
+                                    ]
+                                },
+                            )
+                            .reduce(
+                                || [F::Unreduced::zero(); DEGREE_BOUND - 1],
+                                |running, new| [running[0] + new[0], running[1] + new[1]],
+                            );
+
+                        let gamma_2i = self.params.gamma_powers_square[i];
+                        [
+                            gamma_2i * F::from_barrett_reduce(inner_sum[0]),
+                            gamma_2i * F::from_barrett_reduce(inner_sum[1]),
+                        ]
+                    })
+                    .reduce(
+                        || [F::zero(); DEGREE_BOUND - 1],
+                        |running, new| [running[0] + new[0], running[1] + new[1]],
+                    );
+                coeffs
+            });
+
+        B.gruen_poly_deg_3(quadratic_coeffs[0], quadratic_coeffs[1], previous_claim)
+    }
+
+    fn ingest_challenge_impl(&mut self, r_j: F::Challenge) {
+        self.B.bind(r_j);
+        self.F.update(r_j);
+    }
+}
+
+impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T>
+    for BooleanityAddressSumcheckProver<F>
+{
+    fn degree(&self) -> usize {
+        self.params.degree()
+    }
+
+    fn num_rounds(&self) -> usize {
+        self.params.log_k_chunk
+    }
+
+    fn input_claim(&self, _accumulator: &ProverOpeningAccumulator<F>) -> F {
+        self.params.input_claim(_accumulator)
+    }
+
+    fn compute_message(&mut self, round: usize, previous_claim: F) -> UniPoly<F> {
+        let poly = self.compute_message_impl(round, previous_claim);
+        self.last_round_poly = Some(poly.clone());
+        poly
+    }
+
+    fn ingest_challenge(&mut self, r_j: F::Challenge, round: usize) {
+        if let Some(poly) = self.last_round_poly.take() {
+            let claim = poly.evaluate(&r_j);
+            if round == self.params.log_k_chunk - 1 {
+                self.address_claim = Some(claim);
+            }
+        }
+        self.ingest_challenge_impl(r_j);
+    }
+
+    fn cache_openings(
+        &self,
+        accumulator: &mut ProverOpeningAccumulator<F>,
+        transcript: &mut T,
+        sumcheck_challenges: &[F::Challenge],
+    ) {
+        let mut r_address = sumcheck_challenges.to_vec();
+        r_address.reverse();
+        let opening_point = OpeningPoint::<BIG_ENDIAN, F>::new(r_address);
+        let address_claim = self
+            .address_claim
+            .expect("Booleanity address-phase claim missing");
+        accumulator.append_virtual(
+            transcript,
+            VirtualPolynomial::BooleanityAddrClaim,
+            SumcheckId::BooleanityAddressPhase,
+            opening_point,
+            address_claim,
+        );
+    }
+
+    #[cfg(feature = "allocative")]
+    fn update_flamegraph(&self, flamegraph: &mut FlameGraphBuilder) {
+        flamegraph.visit_root(self);
+    }
+}
+
+/// Booleanity Cycle-Phase Sumcheck Prover.
+///
+/// This prover handles the remaining `log_t` rounds (cycle variables).
+/// It is constructed from scratch via [`BooleanityCycleSumcheckProver::initialize`].
+#[derive(Allocative)]
+pub struct BooleanityCycleSumcheckProver<F: JoltField> {
+    /// D: split-eq over time/cycle variables (LowToHigh).
+    D: GruenSplitEqPolynomial<F>,
+    /// Shared H polynomials (RA polys bound over address, pre-scaled by gamma)
+    H: SharedRaPolynomials<F>,
+    /// eq(r_address, r_address) from address phase
+    eq_r_r: F,
+    /// Per-polynomial powers γ^i (in the base field).
+    gamma_powers: Vec<F>,
+    /// Per-polynomial inverse powers γ^{-i} (in the base field).
+    gamma_powers_inv: Vec<F>,
+    /// Parameters
+    pub params: BooleanitySumcheckParams<F>,
+}
+
+impl<F: JoltField> BooleanityCycleSumcheckProver<F> {
+    /// Initialize the cycle-phase prover from scratch (Option B).
+    ///
+    /// Reconstructs all cycle-phase state from:
+    /// - `params` (sampled in Stage 6a, must match verifier)
+    /// - witness inputs (`trace`, `bytecode`, `memory_layout`)
+    /// - Stage 6a address challenges (read from `accumulator`)
+    #[tracing::instrument(skip_all, name = "BooleanityCycleSumcheckProver::initialize")]
+    pub fn initialize(
+        params: BooleanitySumcheckParams<F>,
+        trace: &[Cycle],
+        bytecode: &BytecodePreprocessing,
+        memory_layout: &MemoryLayout,
+        accumulator: &ProverOpeningAccumulator<F>,
+    ) -> Self {
+        // Recover Stage 6a address challenges from the accumulator.
+        // These were stored as BIG_ENDIAN (MSB-first) by the address-phase cache_openings.
+        let (r_address_point, _) = accumulator.get_virtual_polynomial_opening(
+            VirtualPolynomial::BooleanityAddrClaim,
+            SumcheckId::BooleanityAddressPhase,
+        );
+        let mut r_address_low_to_high = r_address_point.r;
+        r_address_low_to_high.reverse();
+
+        // Recompute eq_r_r = eq(params.r_address, r_address_challenges) using the same binding
+        // progression as the address prover.
+        let mut B = GruenSplitEqPolynomial::new(&params.r_address, BindingOrder::LowToHigh);
+        for r_j in r_address_low_to_high.iter().cloned() {
+            B.bind(r_j);
+        }
+        let eq_r_r = B.get_current_scalar();
+
+        // Recompute base eq table over k_chunk addresses from the address challenges.
+        let k_chunk = 1 << params.log_k_chunk;
+        let mut F_table = ExpandingTable::new(k_chunk, BindingOrder::LowToHigh);
+        F_table.reset(F::one());
+        for r_j in r_address_low_to_high.iter().cloned() {
+            F_table.update(r_j);
+        }
+        let base_eq = F_table.clone_values();
+
+        // Compute RA indices from witness (unfused with G computation).
+        let ra_indices = compute_ra_indices(trace, bytecode, memory_layout, &params.one_hot_params);
+
+        // Compute prover-only batching coefficients rho_i = gamma^i and inverses.
+        let num_polys = params.polynomial_types.len();
+        let gamma_f: F = params.gamma.into();
+        let mut gamma_powers = Vec::with_capacity(num_polys);
+        let mut gamma_powers_inv = Vec::with_capacity(num_polys);
+        let mut rho_i = F::one();
+        for _ in 0..num_polys {
+            gamma_powers.push(rho_i);
+            gamma_powers_inv.push(
+                rho_i
+                    .inverse()
+                    .expect("gamma is nonzero, so rho_i is invertible"),
+            );
+            rho_i *= gamma_f;
+        }
+
+        // Initialize SharedRaPolynomials with per-poly pre-scaled eq tables (by rho_i).
+        let tables: Vec<Vec<F>> = (0..num_polys)
+            .into_par_iter()
+            .map(|i| {
+                let rho = gamma_powers[i];
+                base_eq.iter().map(|v| rho * *v).collect()
+            })
+            .collect();
+        let H = SharedRaPolynomials::new(tables, ra_indices, params.one_hot_params.clone());
+
+        // Cycle split-eq polynomial over r_cycle.
+        let D = GruenSplitEqPolynomial::new(&params.r_cycle, BindingOrder::LowToHigh);
+
+        Self {
+            D,
+            H,
+            eq_r_r,
+            gamma_powers,
+            gamma_powers_inv,
+            params,
+        }
+    }
+
+    fn compute_message_impl(&self, previous_claim: F) -> UniPoly<F> {
+        let D = &self.D;
+        let H = &self.H;
+        let num_polys = H.num_polys();
+
+        // Compute quadratic coefficients via generic split-eq fold
+        let quadratic_coeffs: [F; DEGREE_BOUND - 1] = D
+            .par_fold_out_in_unreduced::<9, { DEGREE_BOUND - 1 }>(&|j_prime| {
+                let mut acc_c = F::Unreduced::<9>::zero();
+                let mut acc_e = F::Unreduced::<9>::zero();
+                for i in 0..num_polys {
+                    let h_0 = H.get_bound_coeff(i, 2 * j_prime);
+                    let h_1 = H.get_bound_coeff(i, 2 * j_prime + 1);
+                    let b = h_1 - h_0;
+
+                    let rho = self.gamma_powers[i];
+                    acc_c += h_0.mul_unreduced::<9>(h_0 - rho);
+                    acc_e += b.mul_unreduced::<9>(b);
+                }
+                [
+                    F::from_montgomery_reduce::<9>(acc_c),
+                    F::from_montgomery_reduce::<9>(acc_e),
+                ]
+            });
+
+        // Adjust claim by eq_r_r scaling
+        let adjusted_claim = previous_claim * self.eq_r_r.inverse().unwrap();
+        let gruen_poly =
+            D.gruen_poly_deg_3(quadratic_coeffs[0], quadratic_coeffs[1], adjusted_claim);
+
+        gruen_poly * self.eq_r_r
+    }
+
+    fn ingest_challenge_impl(&mut self, r_j: F::Challenge) {
+        self.D.bind(r_j);
+        self.H.bind_in_place(r_j, BindingOrder::LowToHigh);
+    }
+}
+
+impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T>
+    for BooleanityCycleSumcheckProver<F>
+{
+    fn degree(&self) -> usize {
+        self.params.degree()
+    }
+
+    fn num_rounds(&self) -> usize {
+        self.params.log_t
+    }
+
+    fn input_claim(&self, accumulator: &ProverOpeningAccumulator<F>) -> F {
+        accumulator
+            .get_virtual_polynomial_opening(
+                VirtualPolynomial::BooleanityAddrClaim,
+                SumcheckId::BooleanityAddressPhase,
+            )
+            .1
+    }
+
+    fn compute_message(&mut self, _round: usize, previous_claim: F) -> UniPoly<F> {
+        self.compute_message_impl(previous_claim)
+    }
+
+    fn ingest_challenge(&mut self, r_j: F::Challenge, _round: usize) {
+        self.ingest_challenge_impl(r_j)
+    }
+
+    fn cache_openings(
+        &self,
+        accumulator: &mut ProverOpeningAccumulator<F>,
+        transcript: &mut T,
+        sumcheck_challenges: &[F::Challenge],
+    ) {
+        let (r_address_point, _) = accumulator.get_virtual_polynomial_opening(
+            VirtualPolynomial::BooleanityAddrClaim,
+            SumcheckId::BooleanityAddressPhase,
+        );
+        let mut r_address_le = r_address_point.r;
+        r_address_le.reverse();
+        let mut full_challenges = r_address_le;
+        full_challenges.extend_from_slice(sumcheck_challenges);
+        let opening_point = self.params.normalize_opening_point(&full_challenges);
+
+        // H is scaled by rho_i; unscale so cached openings match the committed polynomials.
+        let claims: Vec<F> = (0..self.H.num_polys())
+            .map(|i| self.H.final_sumcheck_claim(i) * self.gamma_powers_inv[i])
+            .collect();
+
+        accumulator.append_sparse(
+            transcript,
+            self.params.polynomial_types.clone(),
+            SumcheckId::Booleanity,
+            opening_point.r[..self.params.log_k_chunk].to_vec(),
+            opening_point.r[self.params.log_k_chunk..].to_vec(),
+            claims,
+        );
+    }
+
+    #[cfg(feature = "allocative")]
+    fn update_flamegraph(&self, flamegraph: &mut FlameGraphBuilder) {
+        flamegraph.visit_root(self);
+    }
+}
+
 /// Booleanity Sumcheck Verifier.
 pub struct BooleanitySumcheckVerifier<F: JoltField> {
     params: BooleanitySumcheckParams<F>,
@@ -545,3 +943,163 @@ impl<F: JoltField, T: Transcript> SumcheckInstanceVerifier<F, T> for BooleanityS
         );
     }
 }
+
+pub struct BooleanityAddressSumcheckVerifier<F: JoltField> {
+    params: BooleanitySumcheckParams<F>,
+}
+
+impl<F: JoltField> BooleanityAddressSumcheckVerifier<F> {
+    pub fn new(params: BooleanitySumcheckParams<F>) -> Self {
+        Self { params }
+    }
+
+    /// Consume this verifier and return the underlying parameters (for Option B orchestration).
+    pub fn into_params(self) -> BooleanitySumcheckParams<F> {
+        self.params
+    }
+
+    pub fn into_cycle_verifier(self) -> BooleanityCycleSumcheckVerifier<F> {
+        BooleanityCycleSumcheckVerifier {
+            params: self.params,
+        }
+    }
+}
+
+impl<F: JoltField, T: Transcript> SumcheckInstanceVerifier<F, T>
+    for BooleanityAddressSumcheckVerifier<F>
+{
+    fn degree(&self) -> usize {
+        self.params.degree()
+    }
+
+    fn num_rounds(&self) -> usize {
+        self.params.log_k_chunk
+    }
+
+    fn input_claim(&self, accumulator: &VerifierOpeningAccumulator<F>) -> F {
+        self.params.input_claim(accumulator)
+    }
+
+    fn expected_output_claim(
+        &self,
+        accumulator: &VerifierOpeningAccumulator<F>,
+        _sumcheck_challenges: &[F::Challenge],
+    ) -> F {
+        accumulator
+            .get_virtual_polynomial_opening(
+                VirtualPolynomial::BooleanityAddrClaim,
+                SumcheckId::BooleanityAddressPhase,
+            )
+            .1
+    }
+
+    fn cache_openings(
+        &self,
+        accumulator: &mut VerifierOpeningAccumulator<F>,
+        transcript: &mut T,
+        sumcheck_challenges: &[F::Challenge],
+    ) {
+        let mut r_address = sumcheck_challenges.to_vec();
+        r_address.reverse();
+        accumulator.append_virtual(
+            transcript,
+            VirtualPolynomial::BooleanityAddrClaim,
+            SumcheckId::BooleanityAddressPhase,
+            OpeningPoint::<BIG_ENDIAN, F>::new(r_address),
+        );
+    }
+}
+
+pub struct BooleanityCycleSumcheckVerifier<F: JoltField> {
+    params: BooleanitySumcheckParams<F>,
+}
+
+impl<F: JoltField> BooleanityCycleSumcheckVerifier<F> {
+    pub fn new(params: BooleanitySumcheckParams<F>) -> Self {
+        Self { params }
+    }
+}
+
+impl<F: JoltField, T: Transcript> SumcheckInstanceVerifier<F, T>
+    for BooleanityCycleSumcheckVerifier<F>
+{
+    fn degree(&self) -> usize {
+        self.params.degree()
+    }
+
+    fn num_rounds(&self) -> usize {
+        self.params.log_t
+    }
+
+    fn input_claim(&self, accumulator: &VerifierOpeningAccumulator<F>) -> F {
+        accumulator
+            .get_virtual_polynomial_opening(
+                VirtualPolynomial::BooleanityAddrClaim,
+                SumcheckId::BooleanityAddressPhase,
+            )
+            .1
+    }
+
+    fn expected_output_claim(
+        &self,
+        accumulator: &VerifierOpeningAccumulator<F>,
+        sumcheck_challenges: &[F::Challenge],
+    ) -> F {
+        let (r_address_point, _) = accumulator.get_virtual_polynomial_opening(
+            VirtualPolynomial::BooleanityAddrClaim,
+            SumcheckId::BooleanityAddressPhase,
+        );
+        let mut r_address_le = r_address_point.r;
+        r_address_le.reverse();
+        let mut full_challenges = r_address_le;
+        full_challenges.extend_from_slice(sumcheck_challenges);
+
+        let ra_claims: Vec<F> = self
+            .params
+            .polynomial_types
+            .iter()
+            .map(|poly_type| {
+                accumulator
+                    .get_committed_polynomial_opening(*poly_type, SumcheckId::Booleanity)
+                    .1
+            })
+            .collect();
+
+        let combined_r: Vec<F::Challenge> = self
+            .params
+            .r_address
+            .iter()
+            .cloned()
+            .rev()
+            .chain(self.params.r_cycle.iter().cloned().rev())
+            .collect();
+
+        EqPolynomial::<F>::mle(&full_challenges, &combined_r)
+            * zip(&self.params.gamma_powers_square, ra_claims)
+                .map(|(gamma_2i, ra)| (ra.square() - ra) * gamma_2i)
+                .sum::<F>()
+    }
+
+    fn cache_openings(
+        &self,
+        accumulator: &mut VerifierOpeningAccumulator<F>,
+        transcript: &mut T,
+        sumcheck_challenges: &[F::Challenge],
+    ) {
+        let (r_address_point, _) = accumulator.get_virtual_polynomial_opening(
+            VirtualPolynomial::BooleanityAddrClaim,
+            SumcheckId::BooleanityAddressPhase,
+        );
+        let mut r_address_le = r_address_point.r;
+        r_address_le.reverse();
+        let mut full_challenges = r_address_le;
+        full_challenges.extend_from_slice(sumcheck_challenges);
+        let opening_point = self.params.normalize_opening_point(&full_challenges);
+        accumulator.append_sparse(
+            transcript,
+            self.params.polynomial_types.clone(),
+            SumcheckId::Booleanity,
+            opening_point.r,
+        );
+    }
+}
diff --git a/jolt-core/src/utils/errors.rs b/jolt-core/src/utils/errors.rs
index a9e8b12909..b3800e13eb 100644
--- a/jolt-core/src/utils/errors.rs
+++ b/jolt-core/src/utils/errors.rs
@@ -28,10 +28,14 @@ pub enum ProofVerifyError {
     InvalidReadWriteConfig(String),
     #[error("Invalid one-hot configuration: {0}")]
     InvalidOneHotConfig(String),
+    #[error("Invalid bytecode commitment configuration: {0}")]
+    InvalidBytecodeConfig(String),
     #[error("Dory proof verification failed: {0}")]
     DoryError(String),
     #[error("Sumcheck verification failed")]
     SumcheckVerificationError,
     #[error("Univariate-skip round verification failed")]
     UniSkipVerificationError,
+    #[error("Bytecode type mismatch: {0}")]
+    BytecodeTypeMismatch(String),
 }
diff --git a/jolt-core/src/zkvm/bytecode/chunks.rs b/jolt-core/src/zkvm/bytecode/chunks.rs
new file mode 100644
index 0000000000..991818edbf
--- /dev/null
+++ b/jolt-core/src/zkvm/bytecode/chunks.rs
@@ -0,0 +1,147 @@
+use crate::field::JoltField;
+use crate::poly::commitment::dory::DoryGlobals;
+use crate::poly::multilinear_polynomial::MultilinearPolynomial;
+use crate::utils::thread::unsafe_allocate_zero_vec;
+use crate::zkvm::bytecode::BytecodePreprocessing;
+use crate::zkvm::instruction::{
+    Flags, InstructionLookup, NUM_CIRCUIT_FLAGS, NUM_INSTRUCTION_FLAGS,
+};
+use crate::zkvm::lookup_table::LookupTables;
+use common::constants::{REGISTER_COUNT, XLEN};
+use rayon::prelude::*;
+use tracer::instruction::Instruction;
+
+/// Total number of "lanes" to commit bytecode fields
+pub const fn total_lanes() -> usize {
+    3 * (REGISTER_COUNT as usize) // rs1, rs2, rd one-hot lanes
+        + 2 // unexpanded_pc, imm
+        + NUM_CIRCUIT_FLAGS
+        + NUM_INSTRUCTION_FLAGS
+        + <LookupTables<XLEN> as strum::EnumCount>::COUNT
+        + 1 // raf flag
+}
+
+#[allow(clippy::too_many_arguments)]
+#[inline(always)]
+pub fn lane_value<F: JoltField>(
+    global_lane: usize,
+    rs1: Option<u8>,
+    rs2: Option<u8>,
+    rd: Option<u8>,
+    unexpanded_pc: F,
+    imm: F,
+    circuit_flags: &[bool; NUM_CIRCUIT_FLAGS],
+    instr_flags: &[bool; NUM_INSTRUCTION_FLAGS],
+    lookup_idx: Option<usize>,
+    raf_flag: bool,
+) -> F {
+    let reg_count = REGISTER_COUNT as usize;
+    let rs1_start = 0usize;
+    let rs2_start = rs1_start + reg_count;
+    let rd_start = rs2_start + reg_count;
+    let unexp_pc_idx = rd_start + reg_count;
+    let imm_idx = unexp_pc_idx + 1;
+    let circuit_start = imm_idx + 1;
+    let instr_start = circuit_start + NUM_CIRCUIT_FLAGS;
+    let lookup_start = instr_start + NUM_INSTRUCTION_FLAGS;
+    let raf_flag_idx = lookup_start + <LookupTables<XLEN> as strum::EnumCount>::COUNT;
+
+    if global_lane < rs2_start {
+        // rs1 one-hot
+        let r = global_lane as u8;
+        return F::from_bool(rs1 == Some(r));
+    }
+    if global_lane < rd_start {
+        // rs2 one-hot
+        let r = (global_lane - rs2_start) as u8;
+        return F::from_bool(rs2 == Some(r));
+    }
+    if global_lane < unexp_pc_idx {
+        // rd one-hot
+        let r = (global_lane - rd_start) as u8;
+        return F::from_bool(rd == Some(r));
+    }
+    if global_lane == unexp_pc_idx {
+        return unexpanded_pc;
+    }
+    if global_lane == imm_idx {
+        return imm;
+    }
+    if global_lane < instr_start {
+        let flag_idx = global_lane - circuit_start;
+        return F::from_bool(circuit_flags[flag_idx]);
+    }
+    if global_lane < lookup_start {
+        let flag_idx = global_lane - instr_start;
+        return F::from_bool(instr_flags[flag_idx]);
+    }
+    if global_lane < raf_flag_idx {
+        let table_idx = global_lane - lookup_start;
+        return F::from_bool(lookup_idx == Some(table_idx));
+    }
+    debug_assert_eq!(global_lane, raf_flag_idx);
+    F::from_bool(raf_flag)
+}
+
+#[tracing::instrument(skip_all, name = "bytecode::build_bytecode_chunks")]
+pub fn build_bytecode_chunks<F: JoltField>(
+    bytecode: &BytecodePreprocessing,
+    log_k_chunk: usize,
+) -> Vec<MultilinearPolynomial<F>> {
+    let k_chunk = 1usize << log_k_chunk;
+    let bytecode_len = bytecode.bytecode.len();
+    let total = total_lanes();
+    let num_chunks = total.div_ceil(k_chunk);
+
+    (0..num_chunks)
+        .into_par_iter()
+        .map(|chunk_idx| {
+            let mut coeffs = unsafe_allocate_zero_vec(k_chunk * bytecode_len);
+            for k in 0..bytecode_len {
+                let instr = &bytecode.bytecode[k];
+                let normalized = instr.normalize();
+                let circuit_flags = <Instruction as Flags>::circuit_flags(instr);
+                let instr_flags = <Instruction as Flags>::instruction_flags(instr);
+                let lookup_idx = <Instruction as InstructionLookup<XLEN>>::lookup_table(instr)
+                    .map(|t| LookupTables::<XLEN>::enum_index(&t));
+                let raf_flag =
+                    !crate::zkvm::instruction::InterleavedBitsMarker::is_interleaved_operands(
+                        &circuit_flags,
+                    );
+
+                let unexpanded_pc = F::from_u64(normalized.address as u64);
+                let imm = F::from_i128(normalized.operands.imm);
+                let rs1 = normalized.operands.rs1;
+                let rs2 = normalized.operands.rs2;
+                let rd = normalized.operands.rd;
+
+                for lane in 0..k_chunk {
+                    let global_lane = chunk_idx * k_chunk + lane;
+                    if global_lane >= total {
+                        break;
+                    }
+                    let value = lane_value::<F>(
+                        global_lane,
+                        rs1,
+                        rs2,
+                        rd,
+                        unexpanded_pc,
+                        imm,
+                        &circuit_flags,
+                        &instr_flags,
+                        lookup_idx,
+                        raf_flag,
+                    );
+                    let idx = DoryGlobals::get_layout().address_cycle_to_index(
+                        lane,
+                        k,
+                        k_chunk,
+                        bytecode_len,
+                    );
+                    coeffs[idx] = value;
+                }
+            }
+            MultilinearPolynomial::from(coeffs)
+        })
+        .collect()
+}
diff --git a/jolt-core/src/zkvm/bytecode/mod.rs b/jolt-core/src/zkvm/bytecode/mod.rs
index 82f6fb62ab..6744c16944 100644
--- a/jolt-core/src/zkvm/bytecode/mod.rs
+++ b/jolt-core/src/zkvm/bytecode/mod.rs
@@ -1,12 +1,212 @@
-use ark_serialize::{CanonicalDeserialize, CanonicalSerialize};
+use std::io::{Read, Write};
+use std::sync::Arc;
+
+use ark_serialize::{
+    CanonicalDeserialize, CanonicalSerialize, Compress, SerializationError, Valid, Validate,
+};
 use common::constants::{ALIGNMENT_FACTOR_BYTECODE, RAM_START_ADDRESS};
 use tracer::instruction::{Cycle, Instruction};
 
+use crate::poly::commitment::commitment_scheme::CommitmentScheme;
+use crate::poly::commitment::dory::{DoryContext, DoryGlobals};
+use crate::utils::math::Math;
+use crate::utils::errors::ProofVerifyError;
+use crate::zkvm::bytecode::chunks::{build_bytecode_chunks, total_lanes};
+use rayon::prelude::*;
+
+pub(crate) mod chunks;
 pub mod read_raf_checking;
 
+/// Bytecode commitments that were derived from actual bytecode.
+///
+/// This type enforces at the type level that commitments came from honest
+/// preprocessing of full bytecode. The canonical constructor is `derive()`,
+/// which takes full bytecode and computes commitments.
+///
+/// # Trust Model
+/// - Create via `derive()` from full bytecode (offline preprocessing)
+/// - Or deserialize from a trusted source (assumes honest origin)
+/// - Pass to verifier preprocessing for succinct (online) verification
+///
+/// # Security Warning
+/// If you construct this type with arbitrary commitments (bypassing `derive()`),
+/// verification will be unsound. Only use `derive()` or trusted deserialization.
+#[derive(Clone, Debug, PartialEq, CanonicalSerialize, CanonicalDeserialize)]
+pub struct TrustedBytecodeCommitments<PCS: CommitmentScheme> {
+    /// The bytecode chunk commitments.
+    /// Trust is enforced by the type - create via `derive()` or deserialize from trusted source.
+    pub commitments: Vec<PCS::Commitment>,
+    /// log2(k_chunk) used for lane chunking.
+    pub log_k_chunk: u8,
+    /// Bytecode length (power-of-two padded).
+    pub bytecode_len: usize,
+}
+
+impl<PCS: CommitmentScheme> TrustedBytecodeCommitments<PCS> {
+    /// Derive commitments from full bytecode (the canonical constructor).
+    ///
+    /// This is the "offline preprocessing" step that must be done honestly.
+    /// Returns trusted commitments + hints for opening proofs.
+    #[tracing::instrument(skip_all, name = "TrustedBytecodeCommitments::derive")]
+    pub fn derive(
+        bytecode: &BytecodePreprocessing,
+        generators: &PCS::ProverSetup,
+        log_k_chunk: usize,
+        max_trace_len: usize,
+    ) -> (Self, Vec<PCS::OpeningProofHint>) {
+        let k_chunk = 1usize << log_k_chunk;
+        let bytecode_len = bytecode.bytecode.len();
+        let num_chunks = total_lanes().div_ceil(k_chunk);
+
+        let log_t = max_trace_len.log_2();
+        let _guard = DoryGlobals::initialize_bytecode_context_for_main_sigma(
+            k_chunk,
+            bytecode_len,
+            log_k_chunk,
+            log_t,
+        );
+        let _ctx = DoryGlobals::with_context(DoryContext::Bytecode);
+
+        let bytecode_chunks = build_bytecode_chunks::<PCS::Field>(bytecode, log_k_chunk);
+        debug_assert_eq!(bytecode_chunks.len(), num_chunks);
+
+        let (commitments, hints): (Vec<_>, Vec<_>) = bytecode_chunks
+            .par_iter()
+            .map(|poly| PCS::commit(poly, generators))
+            .unzip();
+
+        (
+            Self {
+                commitments,
+                log_k_chunk: log_k_chunk as u8,
+                bytecode_len,
+            },
+            hints,
+        )
+    }
+}
+
+/// Bytecode information available to the verifier.
+///
+/// In `Full` mode, the verifier has access to the complete bytecode preprocessing
+/// and can materialize bytecode-dependent polynomials (O(K) work).
+///
+/// In `Committed` mode, the verifier only sees commitments to the bytecode polynomials,
+/// enabling succinct verification via claim reductions.
+///
+/// **Note**: The bytecode size K is stored in `JoltSharedPreprocessing.bytecode_size`,
+/// NOT in this enum. Use `shared.bytecode_size` to get the size.
+#[derive(Debug, Clone)]
+pub enum VerifierBytecode<PCS: CommitmentScheme> {
+    /// Full bytecode available (Full mode) — verifier can materialize polynomials.
+    Full(Arc<BytecodePreprocessing>),
+    /// Only trusted commitments available (Committed mode) — verifier uses claim reductions.
+    /// Size K is in `JoltSharedPreprocessing.bytecode_size`.
+    Committed(TrustedBytecodeCommitments<PCS>),
+}
+
+impl<PCS: CommitmentScheme> VerifierBytecode<PCS> {
+    /// Returns the full bytecode preprocessing, or an error if in Committed mode.
+    pub fn as_full(&self) -> Result<&Arc<BytecodePreprocessing>, ProofVerifyError> {
+        match self {
+            VerifierBytecode::Full(bp) => Ok(bp),
+            VerifierBytecode::Committed(_) => Err(ProofVerifyError::BytecodeTypeMismatch(
+                "expected Full, got Committed".to_string(),
+            )),
+        }
+    }
+
+    /// Returns true if this is Full mode.
+    pub fn is_full(&self) -> bool {
+        matches!(self, VerifierBytecode::Full(_))
+    }
+
+    /// Returns true if this is Committed mode.
+    pub fn is_committed(&self) -> bool {
+        matches!(self, VerifierBytecode::Committed(_))
+    }
+
+    /// Returns the trusted commitments, or an error if in Full mode.
+    pub fn as_committed(&self) -> Result<&TrustedBytecodeCommitments<PCS>, ProofVerifyError> {
+        match self {
+            VerifierBytecode::Committed(trusted) => Ok(trusted),
+            VerifierBytecode::Full(_) => Err(ProofVerifyError::BytecodeTypeMismatch(
+                "expected Committed, got Full".to_string(),
+            )),
+        }
+    }
+}
+
+// Manual serialization for VerifierBytecode
+// Format: tag (u8) followed by variant data
+impl<PCS: CommitmentScheme> CanonicalSerialize for VerifierBytecode<PCS> {
+    fn serialize_with_mode<W: Write>(
+        &self,
+        mut writer: W,
+        compress: Compress,
+    ) -> Result<(), SerializationError> {
+        match self {
+            VerifierBytecode::Full(bp) => {
+                0u8.serialize_with_mode(&mut writer, compress)?;
+                bp.as_ref().serialize_with_mode(&mut writer, compress)?;
+            }
+            VerifierBytecode::Committed(trusted) => {
+                1u8.serialize_with_mode(&mut writer, compress)?;
+                trusted.serialize_with_mode(&mut writer, compress)?;
+            }
+        }
+        Ok(())
+    }
+
+    fn serialized_size(&self, compress: Compress) -> usize {
+        1 + match self {
+            VerifierBytecode::Full(bp) => bp.serialized_size(compress),
+            VerifierBytecode::Committed(trusted) => trusted.serialized_size(compress),
+        }
+    }
+}
+
+impl<PCS: CommitmentScheme> Valid for VerifierBytecode<PCS> {
+    fn check(&self) -> Result<(), SerializationError> {
+        match self {
+            VerifierBytecode::Full(bp) => bp.check(),
+            VerifierBytecode::Committed(trusted) => trusted.check(),
+        }
+    }
+}
+
+impl<PCS: CommitmentScheme> CanonicalDeserialize for VerifierBytecode<PCS> {
+    fn deserialize_with_mode<R: Read>(
+        mut reader: R,
+        compress: Compress,
+        validate: Validate,
+    ) -> Result<Self, SerializationError> {
+        let tag = u8::deserialize_with_mode(&mut reader, compress, validate)?;
+        match tag {
+            0 => {
+                let bp =
+                    BytecodePreprocessing::deserialize_with_mode(&mut reader, compress, validate)?;
+                Ok(VerifierBytecode::Full(Arc::new(bp)))
+            }
+            1 => {
+                let trusted = TrustedBytecodeCommitments::<PCS>::deserialize_with_mode(
+                    &mut reader,
+                    compress,
+                    validate,
+                )?;
+                Ok(VerifierBytecode::Committed(trusted))
+            }
+            _ => Err(SerializationError::InvalidData),
+        }
+    }
+}
+
+/// Bytecode preprocessing data (O(K)).
+///
+/// **Note**: The bytecode size K is stored in `JoltSharedPreprocessing.bytecode_size`,
+/// NOT in this struct. Use `shared.bytecode_size` to get the size.
 #[derive(Default, Debug, Clone, CanonicalSerialize, CanonicalDeserialize)]
 pub struct BytecodePreprocessing {
-    pub code_size: usize,
     pub bytecode: Vec<Instruction>,
     /// Maps the memory address of each instruction in the bytecode to its "virtual" address.
     /// See Section 6.1 of the Jolt paper, "Reflecting the program counter". The virtual address
@@ -21,18 +221,15 @@ impl BytecodePreprocessing {
         bytecode.insert(0, Instruction::NoOp);
         let pc_map = BytecodePCMapper::new(&bytecode);
 
-        let code_size = bytecode.len().next_power_of_two().max(2);
+        let bytecode_size = bytecode.len().next_power_of_two().max(2);
 
         // Bytecode: Pad to nearest power of 2
-        bytecode.resize(code_size, Instruction::NoOp);
+        bytecode.resize(bytecode_size, Instruction::NoOp);
 
-        Self {
-            code_size,
-            bytecode,
-            pc_map,
-        }
+        Self { bytecode, pc_map }
     }
 
+    #[inline(always)]
     pub fn get_pc(&self, cycle: &Cycle) -> usize {
         if matches!(cycle, tracer::instruction::Cycle::NoOp) {
             return 0;
@@ -56,13 +253,17 @@ impl BytecodePCMapper {
         let mut indices: Vec<Option<(usize, u16)>> = {
             // For read-raf tests we simulate bytecode being empty
             #[cfg(test)]
-            if bytecode.len() == 1 {
-                vec![None; 1]
-            } else {
-                vec![None; Self::get_index(bytecode.last().unwrap().normalize().address) + 1]
+            {
+                if bytecode.len() == 1 {
+                    vec![None; 1]
+                } else {
+                    vec![None; Self::get_index(bytecode.last().unwrap().normalize().address) + 1]
+                }
             }
             #[cfg(not(test))]
-            vec![None; Self::get_index(bytecode.last().unwrap().normalize().address) + 1]
+            {
+                vec![None; Self::get_index(bytecode.last().unwrap().normalize().address) + 1]
+            }
         };
         let mut last_pc = 0;
         // Push the initial noop instruction
@@ -89,6 +290,7 @@ impl BytecodePCMapper {
         Self { indices }
     }
 
+    #[inline(always)]
     pub fn get_pc(&self, address: usize, virtual_sequence_remaining: u16) -> usize {
         let (base_pc, max_inline_seq) = self
             .indices
@@ -98,6 +300,7 @@ impl BytecodePCMapper {
         base_pc + (max_inline_seq - virtual_sequence_remaining) as usize
     }
 
+    #[inline(always)]
     pub const fn get_index(address: usize) -> usize {
         assert!(address >= RAM_START_ADDRESS as usize);
         assert!(address.is_multiple_of(ALIGNMENT_FACTOR_BYTECODE));
diff --git a/jolt-core/src/zkvm/bytecode/read_raf_checking.rs b/jolt-core/src/zkvm/bytecode/read_raf_checking.rs
index 223a6feaef..cc2af56021 100644
--- a/jolt-core/src/zkvm/bytecode/read_raf_checking.rs
+++ b/jolt-core/src/zkvm/bytecode/read_raf_checking.rs
@@ -24,10 +24,13 @@ use crate::{
         sumcheck_verifier::{SumcheckInstanceParams, SumcheckInstanceVerifier},
     },
     transcripts::Transcript,
-    utils::{math::Math, small_scalar::SmallScalar, thread::unsafe_allocate_zero_vec},
+    utils::{
+        errors::ProofVerifyError, math::Math, small_scalar::SmallScalar,
+        thread::unsafe_allocate_zero_vec,
+    },
     zkvm::{
         bytecode::BytecodePreprocessing,
-        config::OneHotParams,
+        config::{BytecodeMode, OneHotParams},
         instruction::{
             CircuitFlags, Flags, InstructionFlags, InstructionLookup, InterleavedBitsMarker,
             NUM_CIRCUIT_FLAGS,
@@ -371,17 +374,8 @@ impl<F: JoltField> BytecodeReadRafSumcheckProver<F> {
         // Drop trace and preprocessing - no longer needed after this
         self.trace = Arc::new(Vec::new());
     }
-}
-
-impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T>
-    for BytecodeReadRafSumcheckProver<F>
-{
-    fn get_params(&self) -> &dyn SumcheckInstanceParams<F> {
-        &self.params
-    }
 
-    #[tracing::instrument(skip_all, name = "BytecodeReadRafSumcheckProver::compute_message")]
-    fn compute_message(&mut self, round: usize, _previous_claim: F) -> UniPoly<F> {
+    fn compute_message_internal(&mut self, round: usize, _previous_claim: F) -> UniPoly<F> {
         if round < self.params.log_K {
             const DEGREE: usize = 2;
 
@@ -394,7 +388,8 @@ impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T>
                     });
 
                     let int_evals =
-                        self.params.int_poly
+                        self.params
+                            .int_poly
                             .sumcheck_evals(i, DEGREE, BindingOrder::LowToHigh);
 
                     // We have a separate Val polynomial for each stage
@@ -408,13 +403,20 @@ impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T>
                     // Which matches with the input claim:
                     // rv_1 + gamma * rv_2 + gamma^2 * rv_3 + gamma^3 * rv_4 + gamma^4 * rv_5 + gamma^5 * raf_1 + gamma^6 * raf_3
                     let mut val_evals = self
-                        .params.val_polys
+                        .params
+                        .val_polys
                         .iter()
                         // Val polynomials
                         .map(|val| val.sumcheck_evals_array::<DEGREE>(i, BindingOrder::LowToHigh))
                         // Here are the RAF polynomials and their powers
                         .zip([Some(&int_evals), None, Some(&int_evals), None, None])
-                        .zip([Some(self.params.gamma_powers[5]), None, Some(self.params.gamma_powers[4]), None, None])
+                        .zip([
+                            Some(self.params.gamma_powers[5]),
+                            None,
+                            Some(self.params.gamma_powers[4]),
+                            None,
+                            None,
+                        ])
                         .map(|((val_evals, int_evals), gamma)| {
                             std::array::from_fn::<F, DEGREE, _>(|j| {
                                 val_evals[j]
@@ -450,7 +452,7 @@ impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T>
 
             agg_round_poly
         } else {
-            let degree = <Self as SumcheckInstanceProver<F, T>>::degree(self);
+            let degree = self.params.degree();
 
             let out_len = self.gruen_eq_polys[0].E_out_current().len();
             let in_len = self.gruen_eq_polys[0].E_in_current().len();
@@ -520,8 +522,7 @@ impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T>
         }
     }
 
-    #[tracing::instrument(skip_all, name = "BytecodeReadRafSumcheckProver::ingest_challenge")]
-    fn ingest_challenge(&mut self, r_j: F::Challenge, round: usize) {
+    fn ingest_challenge_internal(&mut self, r_j: F::Challenge, round: usize) {
         if let Some(prev_round_polys) = self.prev_round_polys.take() {
             self.prev_round_claims = prev_round_polys.map(|poly| poly.evaluate(&r_j));
         }
@@ -550,6 +551,24 @@ impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T>
                 .for_each(|poly| poly.bind(r_j));
         }
     }
+}
+
+impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T>
+    for BytecodeReadRafSumcheckProver<F>
+{
+    fn get_params(&self) -> &dyn SumcheckInstanceParams<F> {
+        &self.params
+    }
+
+    #[tracing::instrument(skip_all, name = "BytecodeReadRafSumcheckProver::compute_message")]
+    fn compute_message(&mut self, round: usize, _previous_claim: F) -> UniPoly<F> {
+        self.compute_message_internal(round, _previous_claim)
+    }
+
+    #[tracing::instrument(skip_all, name = "BytecodeReadRafSumcheckProver::ingest_challenge")]
+    fn ingest_challenge(&mut self, r_j: F::Challenge, round: usize) {
+        self.ingest_challenge_internal(r_j, round)
+    }
 
     fn cache_openings(
         &self,
@@ -584,6 +603,548 @@ impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T>
     }
 }
 
+/// Bytecode Read+RAF Address-Phase Sumcheck Prover.
+///
+/// This prover handles only the first `log_K` rounds (address variables).
+/// The cycle-phase prover is constructed separately from witness + accumulator (Option B).
+#[derive(Allocative)]
+pub struct BytecodeReadRafAddressSumcheckProver<F: JoltField> {
+    /// Per-stage address MLEs F_i(k) built from eq(r_cycle_stage_i, (chunk_index, j)).
+    F: [MultilinearPolynomial<F>; N_STAGES],
+    /// Binding challenges for the first log_K variables.
+    r_address_prime: Vec<F::Challenge>,
+    /// Previous-round claims s_i(0)+s_i(1) per stage.
+    prev_round_claims: [F; N_STAGES],
+    /// Round polynomials per stage for advancing to the next claim.
+    prev_round_polys: Option<[UniPoly<F>; N_STAGES]>,
+    /// Parameters (shared with cycle prover).
+    pub params: BytecodeReadRafSumcheckParams<F>,
+}
+
+impl<F: JoltField> BytecodeReadRafAddressSumcheckProver<F> {
+    /// Initialize a BytecodeReadRafAddressSumcheckProver.
+    #[tracing::instrument(skip_all, name = "BytecodeReadRafAddressSumcheckProver::initialize")]
+    pub fn initialize(
+        params: BytecodeReadRafSumcheckParams<F>,
+        trace: Arc<Vec<Cycle>>,
+        bytecode_preprocessing: Arc<BytecodePreprocessing>,
+    ) -> Self {
+        let claim_per_stage = [
+            params.rv_claims[0] + params.gamma_powers[5] * params.raf_claim,
+            params.rv_claims[1],
+            params.rv_claims[2] + params.gamma_powers[4] * params.raf_shift_claim,
+            params.rv_claims[3],
+            params.rv_claims[4],
+        ];
+
+        // Two-table split-eq optimization for computing F[stage][k] = Σ_{c: PC(c)=k} eq(r_cycle, c).
+        let T = trace.len();
+        let K = params.K;
+        let log_T = params.log_T;
+
+        let lo_bits = log_T / 2;
+        let hi_bits = log_T - lo_bits;
+        let in_len: usize = 1 << lo_bits;
+        let out_len: usize = 1 << hi_bits;
+
+        let (E_hi, E_lo): ([Vec<F>; N_STAGES], [Vec<F>; N_STAGES]) = rayon::join(
+            || {
+                params
+                    .r_cycles
+                    .each_ref()
+                    .map(|r_cycle| EqPolynomial::evals(&r_cycle[..hi_bits]))
+            },
+            || {
+                params
+                    .r_cycles
+                    .each_ref()
+                    .map(|r_cycle| EqPolynomial::evals(&r_cycle[hi_bits..]))
+            },
+        );
+
+        let num_threads = rayon::current_num_threads();
+        let chunk_size = out_len.div_ceil(num_threads);
+
+        let F_polys: [Vec<F>; N_STAGES] = E_hi[0]
+            .par_chunks(chunk_size)
+            .enumerate()
+            .map(|(chunk_idx, chunk)| {
+                let mut partial: [Vec<F>; N_STAGES] =
+                    array::from_fn(|_| unsafe_allocate_zero_vec(K));
+                let mut inner: [Vec<F>; N_STAGES] = array::from_fn(|_| unsafe_allocate_zero_vec(K));
+                let mut touched = Vec::with_capacity(in_len);
+
+                let chunk_start = chunk_idx * chunk_size;
+                for (local_idx, _) in chunk.iter().enumerate() {
+                    let c_hi = chunk_start + local_idx;
+                    let c_hi_base = c_hi * in_len;
+
+                    for &k in &touched {
+                        for stage in 0..N_STAGES {
+                            inner[stage][k] = F::zero();
+                        }
+                    }
+                    touched.clear();
+
+                    for c_lo in 0..in_len {
+                        let c = c_hi_base + c_lo;
+                        if c >= T {
+                            break;
+                        }
+
+                        let pc = bytecode_preprocessing.get_pc(&trace[c]);
+                        if inner[0][pc].is_zero() {
+                            touched.push(pc);
+                        }
+                        for stage in 0..N_STAGES {
+                            inner[stage][pc] += E_lo[stage][c_lo];
+                        }
+                    }
+
+                    for &k in &touched {
+                        for stage in 0..N_STAGES {
+                            partial[stage][k] += E_hi[stage][c_hi] * inner[stage][k];
+                        }
+                    }
+                }
+                partial
+            })
+            .reduce(
+                || array::from_fn(|_| unsafe_allocate_zero_vec(K)),
+                |mut a, b| {
+                    for stage in 0..N_STAGES {
+                        a[stage]
+                            .par_iter_mut()
+                            .zip(b[stage].par_iter())
+                            .for_each(|(a, b)| *a += *b);
+                    }
+                    a
+                },
+            );
+
+        let F = F_polys.map(MultilinearPolynomial::from);
+
+        Self {
+            F,
+            r_address_prime: Vec::with_capacity(params.log_K),
+            prev_round_claims: claim_per_stage,
+            prev_round_polys: None,
+            params,
+        }
+    }
+
+    fn compute_message_impl(&mut self, _previous_claim: F) -> UniPoly<F> {
+        const DEGREE: usize = 2;
+
+        let eval_per_stage: [[F; DEGREE]; N_STAGES] = (0..self.params.val_polys[0].len() / 2)
+            .into_par_iter()
+            .map(|i| {
+                let ra_evals = self
+                    .F
+                    .each_ref()
+                    .map(|poly| poly.sumcheck_evals_array::<DEGREE>(i, BindingOrder::LowToHigh));
+
+                let int_evals =
+                    self.params
+                        .int_poly
+                        .sumcheck_evals(i, DEGREE, BindingOrder::LowToHigh);
+
+                let mut val_evals = self
+                    .params
+                    .val_polys
+                    .iter()
+                    .map(|val| val.sumcheck_evals_array::<DEGREE>(i, BindingOrder::LowToHigh))
+                    .zip([Some(&int_evals), None, Some(&int_evals), None, None])
+                    .zip([
+                        Some(self.params.gamma_powers[5]),
+                        None,
+                        Some(self.params.gamma_powers[4]),
+                        None,
+                        None,
+                    ])
+                    .map(|((val_evals, int_evals), gamma)| {
+                        std::array::from_fn::<F, DEGREE, _>(|j| {
+                            val_evals[j]
+                                + int_evals
+                                    .map_or(F::zero(), |int_evals| int_evals[j] * gamma.unwrap())
+                        })
+                    });
+
+                array::from_fn(|stage| {
+                    let [ra_at_0, ra_at_2] = ra_evals[stage];
+                    let [val_at_0, val_at_2] = val_evals.next().unwrap();
+                    [ra_at_0 * val_at_0, ra_at_2 * val_at_2]
+                })
+            })
+            .reduce(
+                || [[F::zero(); DEGREE]; N_STAGES],
+                |a, b| array::from_fn(|i| array::from_fn(|j| a[i][j] + b[i][j])),
+            );
+
+        let mut round_polys: [_; N_STAGES] = array::from_fn(|_| UniPoly::zero());
+        let mut agg_round_poly = UniPoly::zero();
+
+        for (stage, evals) in eval_per_stage.into_iter().enumerate() {
+            let [eval_at_0, eval_at_2] = evals;
+            let eval_at_1 = self.prev_round_claims[stage] - eval_at_0;
+            let round_poly = UniPoly::from_evals(&[eval_at_0, eval_at_1, eval_at_2]);
+            agg_round_poly += &(&round_poly * self.params.gamma_powers[stage]);
+            round_polys[stage] = round_poly;
+        }
+
+        self.prev_round_polys = Some(round_polys);
+        agg_round_poly
+    }
+
+    fn ingest_challenge_impl(&mut self, r_j: F::Challenge) {
+        if let Some(prev_round_polys) = self.prev_round_polys.take() {
+            self.prev_round_claims = prev_round_polys.map(|poly| poly.evaluate(&r_j));
+        }
+
+        self.params
+            .val_polys
+            .iter_mut()
+            .for_each(|poly| poly.bind_parallel(r_j, BindingOrder::LowToHigh));
+        self.params
+            .int_poly
+            .bind_parallel(r_j, BindingOrder::LowToHigh);
+        self.F
+            .iter_mut()
+            .for_each(|poly| poly.bind_parallel(r_j, BindingOrder::LowToHigh));
+        self.r_address_prime.push(r_j);
+    }
+}
+
+impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T>
+    for BytecodeReadRafAddressSumcheckProver<F>
+{
+    fn degree(&self) -> usize {
+        self.params.degree()
+    }
+
+    fn num_rounds(&self) -> usize {
+        self.params.log_K
+    }
+
+    fn input_claim(&self, _accumulator: &ProverOpeningAccumulator<F>) -> F {
+        self.params.input_claim(_accumulator)
+    }
+
+    fn compute_message(&mut self, _round: usize, previous_claim: F) -> UniPoly<F> {
+        self.compute_message_impl(previous_claim)
+    }
+
+    fn ingest_challenge(&mut self, r_j: F::Challenge, _round: usize) {
+        self.ingest_challenge_impl(r_j)
+    }
+
+    fn cache_openings(
+        &self,
+        accumulator: &mut ProverOpeningAccumulator<F>,
+        transcript: &mut T,
+        sumcheck_challenges: &[F::Challenge],
+    ) {
+        let mut r_address = sumcheck_challenges.to_vec();
+        r_address.reverse();
+        let opening_point = OpeningPoint::<BIG_ENDIAN, F>::new(r_address);
+        let address_claim: F = self
+            .prev_round_claims
+            .iter()
+            .zip(self.params.gamma_powers.iter())
+            .take(N_STAGES)
+            .map(|(claim, gamma)| *claim * *gamma)
+            .sum();
+        accumulator.append_virtual(
+            transcript,
+            VirtualPolynomial::BytecodeReadRafAddrClaim,
+            SumcheckId::BytecodeReadRafAddressPhase,
+            opening_point.clone(),
+            address_claim,
+        );
+
+        // Emit Val-only claims at the Stage 6a boundary only when the staged-Val/claim-reduction
+        // path is enabled.
+        if self.params.use_staged_val_claims {
+            for stage in 0..N_STAGES {
+                let claim = self.params.val_polys[stage].final_sumcheck_claim();
+                accumulator.append_virtual(
+                    transcript,
+                    VirtualPolynomial::BytecodeValStage(stage),
+                    SumcheckId::BytecodeReadRafAddressPhase,
+                    opening_point.clone(),
+                    claim,
+                );
+            }
+        }
+    }
+
+    #[cfg(feature = "allocative")]
+    fn update_flamegraph(&self, flamegraph: &mut FlameGraphBuilder) {
+        flamegraph.visit_root(self);
+    }
+}
+
+/// Bytecode Read+RAF Cycle-Phase Sumcheck Prover.
+///
+/// This prover handles the remaining `log_T` rounds (cycle variables).
+/// It is constructed from scratch via [`BytecodeReadRafCycleSumcheckProver::initialize`].
+#[derive(Allocative)]
+pub struct BytecodeReadRafCycleSumcheckProver<F: JoltField> {
+    /// Chunked RA polynomials over address variables.
+    ra: Vec<RaPolynomial<u8, F>>,
+    /// Per-stage Gruen-split eq polynomials over cycle vars.
+    gruen_eq_polys: [GruenSplitEqPolynomial<F>; N_STAGES],
+    /// Previous-round claims s_i(0)+s_i(1) per stage.
+    prev_round_claims: [F; N_STAGES],
+    /// Round polynomials per stage.
+    prev_round_polys: Option<[UniPoly<F>; N_STAGES]>,
+    /// Final sumcheck claims of stage Val polynomials (with RAF Int folded).
+    bound_val_evals: [F; N_STAGES],
+    /// Parameters.
+    pub params: BytecodeReadRafSumcheckParams<F>,
+}
+
+impl<F: JoltField> BytecodeReadRafCycleSumcheckProver<F> {
+    /// Initialize the cycle-phase prover from scratch (Option B).
+    ///
+    /// This recomputes the address-phase internal state (per-stage claims and bound value
+    /// evaluations) by replaying the address binding using the Stage 6a challenges from the
+    /// accumulator. This avoids passing prover state across stages at the cost of extra work.
+    #[tracing::instrument(skip_all, name = "BytecodeReadRafCycleSumcheckProver::initialize")]
+    pub fn initialize(
+        params: BytecodeReadRafSumcheckParams<F>,
+        trace: Arc<Vec<Cycle>>,
+        bytecode_preprocessing: Arc<BytecodePreprocessing>,
+        accumulator: &ProverOpeningAccumulator<F>,
+    ) -> Self {
+        // Recover Stage 6a address challenges from the accumulator.
+        // Address-phase cache_openings stored them as BIG_ENDIAN (MSB-first).
+        let (r_address_point, _) = accumulator.get_virtual_polynomial_opening(
+            VirtualPolynomial::BytecodeReadRafAddrClaim,
+            SumcheckId::BytecodeReadRafAddressPhase,
+        );
+
+        // Sumcheck challenges were generated LowToHigh; recover that order for replay.
+        let mut r_address_low_to_high = r_address_point.r.clone();
+        r_address_low_to_high.reverse();
+
+        // Re-run the address prover deterministically (no transcript) to recover:
+        // - per-stage claims after binding all address variables
+        // - bound value evaluations (Val + RAF Int folds) as scalars
+        let mut addr = BytecodeReadRafAddressSumcheckProver::initialize(
+            params.clone(),
+            Arc::clone(&trace),
+            Arc::clone(&bytecode_preprocessing),
+        );
+        for (round, r_j) in r_address_low_to_high.iter().cloned().enumerate() {
+            let _ = round; // replay is round-agnostic for this instance
+                           // previous_claim is ignored by this instance (it uses internal per-stage state).
+            let _ = addr.compute_message_impl(F::zero());
+            addr.ingest_challenge_impl(r_j);
+        }
+
+        // Compute bound_val_evals from the now-fully-bound val_polys and int_poly.
+        let int_poly = addr.params.int_poly.final_sumcheck_claim();
+        let bound_val_evals: [F; N_STAGES] = addr
+            .params
+            .val_polys
+            .iter()
+            .zip([
+                int_poly * addr.params.gamma_powers[5],
+                F::zero(),
+                int_poly * addr.params.gamma_powers[4],
+                F::zero(),
+                F::zero(),
+            ])
+            .map(|(poly, int_term)| poly.final_sumcheck_claim() + int_term)
+            .collect::<Vec<F>>()
+            .try_into()
+            .unwrap();
+
+        // Build RA polynomials from witness using MSB-first address challenges.
+        let r_address_chunks = params
+            .one_hot_params
+            .compute_r_address_chunks::<F>(&r_address_point.r);
+        let ra: Vec<RaPolynomial<u8, F>> = r_address_chunks
+            .iter()
+            .enumerate()
+            .map(|(i, r_address_chunk)| {
+                let ra_i: Vec<Option<u8>> = trace
+                    .par_iter()
+                    .map(|cycle| {
+                        let pc = bytecode_preprocessing.get_pc(cycle);
+                        Some(params.one_hot_params.bytecode_pc_chunk(pc, i))
+                    })
+                    .collect();
+                RaPolynomial::new(Arc::new(ra_i), EqPolynomial::evals(r_address_chunk))
+            })
+            .collect();
+
+        let gruen_eq_polys = params
+            .r_cycles
+            .each_ref()
+            .map(|r_cycle| GruenSplitEqPolynomial::new(r_cycle, BindingOrder::LowToHigh));
+
+        Self {
+            ra,
+            gruen_eq_polys,
+            prev_round_claims: addr.prev_round_claims,
+            prev_round_polys: None,
+            bound_val_evals,
+            params,
+        }
+    }
+
+    fn compute_message_impl(&mut self, _previous_claim: F) -> UniPoly<F> {
+        let degree = self.params.degree();
+
+        let out_len = self.gruen_eq_polys[0].E_out_current().len();
+        let in_len = self.gruen_eq_polys[0].E_in_current().len();
+        let in_n_vars = in_len.log_2();
+
+        let mut evals_per_stage: [Vec<F>; N_STAGES] = (0..out_len)
+            .into_par_iter()
+            .map(|j_hi| {
+                let mut ra_eval_pairs = vec![(F::zero(), F::zero()); self.ra.len()];
+                let mut ra_prod_evals = vec![F::zero(); degree - 1];
+                let mut evals_per_stage: [_; N_STAGES] =
+                    array::from_fn(|_| vec![F::Unreduced::zero(); degree - 1]);
+
+                for j_lo in 0..in_len {
+                    let j = j_lo + (j_hi << in_n_vars);
+
+                    for (i, ra_i) in self.ra.iter().enumerate() {
+                        let ra_i_eval_at_j_0 = ra_i.get_bound_coeff(j * 2);
+                        let ra_i_eval_at_j_1 = ra_i.get_bound_coeff(j * 2 + 1);
+                        ra_eval_pairs[i] = (ra_i_eval_at_j_0, ra_i_eval_at_j_1);
+                    }
+                    eval_linear_prod_assign(&ra_eval_pairs, &mut ra_prod_evals);
+
+                    for stage in 0..N_STAGES {
+                        let eq_in_eval = self.gruen_eq_polys[stage].E_in_current()[j_lo];
+                        for i in 0..degree - 1 {
+                            evals_per_stage[stage][i] +=
+                                eq_in_eval.mul_unreduced::<9>(ra_prod_evals[i]);
+                        }
+                    }
+                }
+
+                array::from_fn(|stage| {
+                    let eq_out_eval = self.gruen_eq_polys[stage].E_out_current()[j_hi];
+                    evals_per_stage[stage]
+                        .iter()
+                        .map(|v| eq_out_eval * F::from_montgomery_reduce(*v))
+                        .collect()
+                })
+            })
+            .reduce(
+                || array::from_fn(|_| vec![F::zero(); degree - 1]),
+                |a, b| array::from_fn(|i| zip_eq(&a[i], &b[i]).map(|(a, b)| *a + *b).collect()),
+            );
+
+        // Multiply by bound values
+        for (stage, evals) in evals_per_stage.iter_mut().enumerate() {
+            evals
+                .iter_mut()
+                .for_each(|v| *v *= self.bound_val_evals[stage]);
+        }
+
+        let mut round_polys: [_; N_STAGES] = array::from_fn(|_| UniPoly::zero());
+        let mut agg_round_poly = UniPoly::zero();
+
+        for (stage, evals) in evals_per_stage.iter().enumerate() {
+            let claim = self.prev_round_claims[stage];
+            let round_poly = self.gruen_eq_polys[stage].gruen_poly_from_evals(evals, claim);
+            agg_round_poly += &(&round_poly * self.params.gamma_powers[stage]);
+            round_polys[stage] = round_poly;
+        }
+
+        self.prev_round_polys = Some(round_polys);
+        agg_round_poly
+    }
+
+    fn ingest_challenge_impl(&mut self, r_j: F::Challenge) {
+        if let Some(prev_round_polys) = self.prev_round_polys.take() {
+            self.prev_round_claims = prev_round_polys.map(|poly| poly.evaluate(&r_j));
+        }
+
+        self.ra
+            .iter_mut()
+            .for_each(|ra| ra.bind_parallel(r_j, BindingOrder::LowToHigh));
+        self.gruen_eq_polys
+            .iter_mut()
+            .for_each(|poly| poly.bind(r_j));
+    }
+}
+
+impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T>
+    for BytecodeReadRafCycleSumcheckProver<F>
+{
+    fn degree(&self) -> usize {
+        self.params.degree()
+    }
+
+    fn num_rounds(&self) -> usize {
+        self.params.log_T
+    }
+
+    fn input_claim(&self, accumulator: &ProverOpeningAccumulator<F>) -> F {
+        accumulator
+            .get_virtual_polynomial_opening(
+                VirtualPolynomial::BytecodeReadRafAddrClaim,
+                SumcheckId::BytecodeReadRafAddressPhase,
+            )
+            .1
+    }
+
+    fn compute_message(&mut self, _round: usize, previous_claim: F) -> UniPoly<F> {
+        self.compute_message_impl(previous_claim)
+    }
+
+    fn ingest_challenge(&mut self, r_j: F::Challenge, _round: usize) {
+        self.ingest_challenge_impl(r_j)
+    }
+
+    fn cache_openings(
+        &self,
+        accumulator: &mut ProverOpeningAccumulator<F>,
+        transcript: &mut T,
+        sumcheck_challenges: &[F::Challenge],
+    ) {
+        let (r_address_point, _) = accumulator.get_virtual_polynomial_opening(
+            VirtualPolynomial::BytecodeReadRafAddrClaim,
+            SumcheckId::BytecodeReadRafAddressPhase,
+        );
+        let mut r_address_le = r_address_point.r;
+        r_address_le.reverse();
+        let mut full_challenges = r_address_le;
+        full_challenges.extend_from_slice(sumcheck_challenges);
+        let opening_point = self.params.normalize_opening_point(&full_challenges);
+        let (r_address, r_cycle) = opening_point.split_at(self.params.log_K);
+
+        let r_address_chunks = self
+            .params
+            .one_hot_params
+            .compute_r_address_chunks::<F>(&r_address.r);
+
+        for i in 0..self.params.d {
+            accumulator.append_sparse(
+                transcript,
+                vec![CommittedPolynomial::BytecodeRa(i)],
+                SumcheckId::BytecodeReadRaf,
+                r_address_chunks[i].clone(),
+                r_cycle.clone().into(),
+                vec![self.ra[i].final_sumcheck_claim()],
+            );
+        }
+    }
+
+    #[cfg(feature = "allocative")]
+    fn update_flamegraph(&self, flamegraph: &mut FlameGraphBuilder) {
+        flamegraph.visit_root(self);
+    }
+}
+
 pub struct BytecodeReadRafSumcheckVerifier<F: JoltField> {
     params: BytecodeReadRafSumcheckParams<F>,
 }
@@ -695,6 +1256,252 @@ impl<F: JoltField, T: Transcript> SumcheckInstanceVerifier<F, T>
     }
 }
 
+pub struct BytecodeReadRafAddressSumcheckVerifier<F: JoltField> {
+    params: BytecodeReadRafSumcheckParams<F>,
+}
+
+impl<F: JoltField> BytecodeReadRafAddressSumcheckVerifier<F> {
+    pub fn new(
+        bytecode_preprocessing: Option<&BytecodePreprocessing>,
+        n_cycle_vars: usize,
+        one_hot_params: &OneHotParams,
+        opening_accumulator: &VerifierOpeningAccumulator<F>,
+        transcript: &mut impl Transcript,
+        bytecode_mode: BytecodeMode,
+    ) -> Result<Self, ProofVerifyError> {
+        let mut params = match bytecode_mode {
+            // Commitment mode: verifier MUST avoid O(K_bytecode) work here, and later stages will
+            // relate staged Val claims to committed bytecode.
+            BytecodeMode::Committed => BytecodeReadRafSumcheckParams::gen_verifier(
+                n_cycle_vars,
+                one_hot_params,
+                opening_accumulator,
+                transcript,
+            ),
+            // Full mode: verifier materializes/evaluates bytecode-dependent polynomials (O(K_bytecode)).
+            BytecodeMode::Full => BytecodeReadRafSumcheckParams::gen(
+                bytecode_preprocessing.ok_or_else(|| {
+                    ProofVerifyError::BytecodeTypeMismatch(
+                        "expected Full bytecode preprocessing, got Committed".to_string(),
+                    )
+                })?,
+                n_cycle_vars,
+                one_hot_params,
+                opening_accumulator,
+                transcript,
+            ),
+        };
+        params.use_staged_val_claims = bytecode_mode == BytecodeMode::Committed;
+        Ok(Self { params })
+    }
+
+    /// Consume this verifier and return the underlying parameters (for Option B orchestration).
+    pub fn into_params(self) -> BytecodeReadRafSumcheckParams<F> {
+        self.params
+    }
+
+    pub fn into_cycle_verifier(self) -> BytecodeReadRafCycleSumcheckVerifier<F> {
+        BytecodeReadRafCycleSumcheckVerifier {
+            params: self.params,
+        }
+    }
+}
+
+impl<F: JoltField, T: Transcript> SumcheckInstanceVerifier<F, T>
+    for BytecodeReadRafAddressSumcheckVerifier<F>
+{
+    fn degree(&self) -> usize {
+        self.params.degree()
+    }
+
+    fn num_rounds(&self) -> usize {
+        self.params.log_K
+    }
+
+    fn input_claim(&self, accumulator: &VerifierOpeningAccumulator<F>) -> F {
+        self.params.input_claim(accumulator)
+    }
+
+    fn expected_output_claim(
+        &self,
+        accumulator: &VerifierOpeningAccumulator<F>,
+        _sumcheck_challenges: &[F::Challenge],
+    ) -> F {
+        accumulator
+            .get_virtual_polynomial_opening(
+                VirtualPolynomial::BytecodeReadRafAddrClaim,
+                SumcheckId::BytecodeReadRafAddressPhase,
+            )
+            .1
+    }
+
+    fn cache_openings(
+        &self,
+        accumulator: &mut VerifierOpeningAccumulator<F>,
+        transcript: &mut T,
+        sumcheck_challenges: &[F::Challenge],
+    ) {
+        let mut r_address = sumcheck_challenges.to_vec();
+        r_address.reverse();
+        let opening_point = OpeningPoint::<BIG_ENDIAN, F>::new(r_address);
+        accumulator.append_virtual(
+            transcript,
+            VirtualPolynomial::BytecodeReadRafAddrClaim,
+            SumcheckId::BytecodeReadRafAddressPhase,
+            opening_point.clone(),
+        );
+
+        // Populate opening points for the Val-only bytecode stage claims emitted in Stage 6a,
+        // but only when the staged-Val/claim-reduction path is enabled.
+        if self.params.use_staged_val_claims {
+            for stage in 0..N_STAGES {
+                accumulator.append_virtual(
+                    transcript,
+                    VirtualPolynomial::BytecodeValStage(stage),
+                    SumcheckId::BytecodeReadRafAddressPhase,
+                    opening_point.clone(),
+                );
+            }
+        }
+    }
+}
+
+pub struct BytecodeReadRafCycleSumcheckVerifier<F: JoltField> {
+    params: BytecodeReadRafSumcheckParams<F>,
+}
+
+impl<F: JoltField> BytecodeReadRafCycleSumcheckVerifier<F> {
+    pub fn new(params: BytecodeReadRafSumcheckParams<F>) -> Self {
+        Self { params }
+    }
+}
+
+impl<F: JoltField, T: Transcript> SumcheckInstanceVerifier<F, T>
+    for BytecodeReadRafCycleSumcheckVerifier<F>
+{
+    fn degree(&self) -> usize {
+        self.params.degree()
+    }
+
+    fn num_rounds(&self) -> usize {
+        self.params.log_T
+    }
+
+    fn input_claim(&self, accumulator: &VerifierOpeningAccumulator<F>) -> F {
+        accumulator
+            .get_virtual_polynomial_opening(
+                VirtualPolynomial::BytecodeReadRafAddrClaim,
+                SumcheckId::BytecodeReadRafAddressPhase,
+            )
+            .1
+    }
+
+    fn expected_output_claim(
+        &self,
+        accumulator: &VerifierOpeningAccumulator<F>,
+        sumcheck_challenges: &[F::Challenge],
+    ) -> F {
+        let (r_address_point, _) = accumulator.get_virtual_polynomial_opening(
+            VirtualPolynomial::BytecodeReadRafAddrClaim,
+            SumcheckId::BytecodeReadRafAddressPhase,
+        );
+        let mut r_address_le = r_address_point.r;
+        r_address_le.reverse();
+        let mut full_challenges = r_address_le;
+        full_challenges.extend_from_slice(sumcheck_challenges);
+        let opening_point = self.params.normalize_opening_point(&full_challenges);
+        let (r_address_prime, r_cycle_prime) = opening_point.split_at(self.params.log_K);
+
+        let int_poly = self.params.int_poly.evaluate(&r_address_prime.r);
+
+        let ra_claims = (0..self.params.d).map(|i| {
+            accumulator
+                .get_committed_polynomial_opening(
+                    CommittedPolynomial::BytecodeRa(i),
+                    SumcheckId::BytecodeReadRaf,
+                )
+                .1
+        });
+
+        let int_terms = [
+            int_poly * self.params.gamma_powers[5], // RAF for Stage1
+            F::zero(),                              // There's no raf for Stage2
+            int_poly * self.params.gamma_powers[4], // RAF for Stage3
+            F::zero(),                              // There's no raf for Stage4
+            F::zero(),                              // There's no raf for Stage5
+        ];
+        let val = if self.params.use_staged_val_claims {
+            // Fast verifier path: consume Val_s(r_bc) claims emitted at the Stage 6a boundary,
+            // rather than re-evaluating `val_polys` (O(K_bytecode)).
+            (0..N_STAGES)
+                .zip(self.params.r_cycles.iter())
+                .zip(self.params.gamma_powers.iter())
+                .zip(int_terms)
+                .map(|(((stage, r_cycle), gamma), int_term)| {
+                    let val_claim = accumulator
+                        .get_virtual_polynomial_opening(
+                            VirtualPolynomial::BytecodeValStage(stage),
+                            SumcheckId::BytecodeReadRafAddressPhase,
+                        )
+                        .1;
+                    (val_claim + int_term)
+                        * EqPolynomial::<F>::mle(r_cycle, &r_cycle_prime.r)
+                        * *gamma
+                })
+                .sum::<F>()
+        } else {
+            // Legacy verifier path: directly evaluate Val polynomials at r_bc (O(K_bytecode)).
+            self.params
+                .val_polys
+                .iter()
+                .zip(&self.params.r_cycles)
+                .zip(&self.params.gamma_powers)
+                .zip(int_terms)
+                .map(|(((val, r_cycle), gamma), int_term)| {
+                    (val.evaluate(&r_address_prime.r) + int_term)
+                        * EqPolynomial::<F>::mle(r_cycle, &r_cycle_prime.r)
+                        * *gamma
+                })
+                .sum::<F>()
+        };
+
+        ra_claims.fold(val, |running, ra_claim| running * ra_claim)
+    }
+
+    fn cache_openings(
+        &self,
+        accumulator: &mut VerifierOpeningAccumulator<F>,
+        transcript: &mut T,
+        sumcheck_challenges: &[F::Challenge],
+    ) {
+        let (r_address_point, _) = accumulator.get_virtual_polynomial_opening(
+            VirtualPolynomial::BytecodeReadRafAddrClaim,
+            SumcheckId::BytecodeReadRafAddressPhase,
+        );
+        let mut r_address_le = r_address_point.r;
+        r_address_le.reverse();
+        let mut full_challenges = r_address_le;
+        full_challenges.extend_from_slice(sumcheck_challenges);
+        let opening_point = self.params.normalize_opening_point(&full_challenges);
+        let (r_address, r_cycle) = opening_point.split_at(self.params.log_K);
+
+        let r_address_chunks = self
+            .params
+            .one_hot_params
+            .compute_r_address_chunks::<F>(&r_address.r);
+
+        (0..self.params.d).for_each(|i| {
+            let opening_point = [&r_address_chunks[i][..], &r_cycle.r].concat();
+            accumulator.append_sparse(
+                transcript,
+                vec![CommittedPolynomial::BytecodeRa(i)],
+                SumcheckId::BytecodeReadRaf,
+                opening_point,
+            );
+        });
+    }
+}
+
 #[derive(Allocative, Clone)]
 pub struct BytecodeReadRafSumcheckParams<F: JoltField> {
     /// Index `i` stores `gamma^i`.
@@ -708,6 +1515,9 @@ pub struct BytecodeReadRafSumcheckParams<F: JoltField> {
     /// log2(K) and log2(T) used to determine round counts.
     pub log_K: usize,
     pub log_T: usize,
+    /// If true, Stage 6a emits `Val_s(r_bc)` as virtual openings and Stage 6b consumes them
+    /// (instead of verifier re-materializing/evaluating `val_polys`).
+    pub use_staged_val_claims: bool,
     /// Number of address chunks (and RA polynomials in the product).
     pub d: usize,
     /// Stage Val polynomials evaluated over address vars.
@@ -719,6 +1529,13 @@ pub struct BytecodeReadRafSumcheckParams<F: JoltField> {
     /// Identity polynomial over address vars used to inject RAF contributions.
     pub int_poly: IdentityPolynomial<F>,
     pub r_cycles: [Vec<F::Challenge>; N_STAGES],
+    /// Stage-specific batching gammas used to define Val(k) polynomials.
+    /// Stored so later claim reductions can reconstruct lane weights without resampling the transcript.
+    pub stage1_gammas: Vec<F>,
+    pub stage2_gammas: Vec<F>,
+    pub stage3_gammas: Vec<F>,
+    pub stage4_gammas: Vec<F>,
+    pub stage5_gammas: Vec<F>,
 }
 
 impl<F: JoltField> BytecodeReadRafSumcheckParams<F> {
@@ -730,9 +1547,44 @@ impl<F: JoltField> BytecodeReadRafSumcheckParams<F> {
         opening_accumulator: &dyn OpeningAccumulator<F>,
         transcript: &mut impl Transcript,
     ) -> Self {
-        let gamma_powers = transcript.challenge_scalar_powers(7);
+        Self::gen_impl(
+            Some(bytecode_preprocessing),
+            n_cycle_vars,
+            one_hot_params,
+            opening_accumulator,
+            transcript,
+            true,
+        )
+    }
+
+    /// Verifier-side generator: avoids materializing Val(k) polynomials (O(K_bytecode)).
+    #[tracing::instrument(skip_all, name = "BytecodeReadRafSumcheckParams::gen_verifier")]
+    pub fn gen_verifier(
+        n_cycle_vars: usize,
+        one_hot_params: &OneHotParams,
+        opening_accumulator: &dyn OpeningAccumulator<F>,
+        transcript: &mut impl Transcript,
+    ) -> Self {
+        Self::gen_impl(
+            None,
+            n_cycle_vars,
+            one_hot_params,
+            opening_accumulator,
+            transcript,
+            false,
+        )
+    }
 
-        let bytecode = &bytecode_preprocessing.bytecode;
+    #[allow(clippy::too_many_arguments)]
+    fn gen_impl(
+        bytecode_preprocessing: Option<&BytecodePreprocessing>,
+        n_cycle_vars: usize,
+        one_hot_params: &OneHotParams,
+        opening_accumulator: &dyn OpeningAccumulator<F>,
+        transcript: &mut impl Transcript,
+        compute_val_polys: bool,
+    ) -> Self {
+        let gamma_powers = transcript.challenge_scalar_powers(7);
 
         // Generate all stage-specific gamma powers upfront (order must match verifier)
         let stage1_gammas: Vec<F> = transcript.challenge_scalar_powers(2 + NUM_CIRCUIT_FLAGS);
@@ -749,38 +1601,46 @@ impl<F: JoltField> BytecodeReadRafSumcheckParams<F> {
         let rv_claim_5 = Self::compute_rv_claim_5(opening_accumulator, &stage5_gammas);
         let rv_claims = [rv_claim_1, rv_claim_2, rv_claim_3, rv_claim_4, rv_claim_5];
 
-        // Pre-compute eq_r_register for stages 4 and 5 (they use different r_register points)
-        let r_register_4 = opening_accumulator
-            .get_virtual_polynomial_opening(
-                VirtualPolynomial::RdWa,
-                SumcheckId::RegistersReadWriteChecking,
-            )
-            .0
-            .r;
-        let eq_r_register_4 =
-            EqPolynomial::<F>::evals(&r_register_4[..(REGISTER_COUNT as usize).log_2()]);
-
-        let r_register_5 = opening_accumulator
-            .get_virtual_polynomial_opening(
-                VirtualPolynomial::RdWa,
-                SumcheckId::RegistersValEvaluation,
+        let val_polys = if compute_val_polys {
+            let bytecode = &bytecode_preprocessing
+                .expect("compute_val_polys requires bytecode preprocessing")
+                .bytecode;
+            // Pre-compute eq_r_register for stages 4 and 5 (they use different r_register points)
+            let r_register_4 = opening_accumulator
+                .get_virtual_polynomial_opening(
+                    VirtualPolynomial::RdWa,
+                    SumcheckId::RegistersReadWriteChecking,
+                )
+                .0
+                .r;
+            let eq_r_register_4 =
+                EqPolynomial::<F>::evals(&r_register_4[..(REGISTER_COUNT as usize).log_2()]);
+
+            let r_register_5 = opening_accumulator
+                .get_virtual_polynomial_opening(
+                    VirtualPolynomial::RdWa,
+                    SumcheckId::RegistersValEvaluation,
+                )
+                .0
+                .r;
+            let eq_r_register_5 =
+                EqPolynomial::<F>::evals(&r_register_5[..(REGISTER_COUNT as usize).log_2()]);
+
+            // Fused pass: compute all val polynomials in a single parallel iteration
+            Self::compute_val_polys(
+                bytecode,
+                &eq_r_register_4,
+                &eq_r_register_5,
+                &stage1_gammas,
+                &stage2_gammas,
+                &stage3_gammas,
+                &stage4_gammas,
+                &stage5_gammas,
             )
-            .0
-            .r;
-        let eq_r_register_5 =
-            EqPolynomial::<F>::evals(&r_register_5[..(REGISTER_COUNT as usize).log_2()]);
-
-        // Fused pass: compute all val polynomials in a single parallel iteration
-        let val_polys = Self::compute_val_polys(
-            bytecode,
-            &eq_r_register_4,
-            &eq_r_register_5,
-            &stage1_gammas,
-            &stage2_gammas,
-            &stage3_gammas,
-            &stage4_gammas,
-            &stage5_gammas,
-        );
+        } else {
+            // Verifier doesn't need these (and must not iterate over bytecode).
+            array::from_fn(|_| MultilinearPolynomial::default())
+        };
 
         let int_poly = IdentityPolynomial::new(one_hot_params.bytecode_k.log_2());
 
@@ -840,12 +1700,18 @@ impl<F: JoltField> BytecodeReadRafSumcheckParams<F> {
             log_K: one_hot_params.bytecode_k.log_2(),
             d: one_hot_params.bytecode_d,
             log_T: n_cycle_vars,
+            use_staged_val_claims: false,
             val_polys,
             rv_claims,
             raf_claim,
             raf_shift_claim,
             int_poly,
             r_cycles,
+            stage1_gammas,
+            stage2_gammas,
+            stage3_gammas,
+            stage4_gammas,
+            stage5_gammas,
         }
     }
 
diff --git a/jolt-core/src/zkvm/claim_reductions/advice.rs b/jolt-core/src/zkvm/claim_reductions/advice.rs
index 6ec3ddd049..cb972c2e25 100644
--- a/jolt-core/src/zkvm/claim_reductions/advice.rs
+++ b/jolt-core/src/zkvm/claim_reductions/advice.rs
@@ -510,11 +510,8 @@ impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T> for AdviceClaimRe
     fn round_offset(&self, max_num_rounds: usize) -> usize {
         match self.params.phase {
             ReductionPhase::CycleVariables => {
-                // Align to the *start* of Booleanity's cycle segment, so local rounds correspond
-                // to low Dory column bits in the unified point ordering.
-                let booleanity_rounds = self.params.log_k_chunk + self.params.log_t;
-                let booleanity_offset = max_num_rounds - booleanity_rounds;
-                booleanity_offset + self.params.log_k_chunk
+                // Stage 6b only spans cycle variables; align to the start of the cycle segment.
+                max_num_rounds.saturating_sub(self.params.log_t)
             }
             ReductionPhase::AddressVariables => 0,
         }
@@ -656,11 +653,7 @@ impl<F: JoltField, T: Transcript> SumcheckInstanceVerifier<F, T>
     fn round_offset(&self, max_num_rounds: usize) -> usize {
         let params = self.params.borrow();
         match params.phase {
-            ReductionPhase::CycleVariables => {
-                let booleanity_rounds = params.log_k_chunk + params.log_t;
-                let booleanity_offset = max_num_rounds - booleanity_rounds;
-                booleanity_offset + params.log_k_chunk
-            }
+            ReductionPhase::CycleVariables => max_num_rounds.saturating_sub(params.log_t),
             ReductionPhase::AddressVariables => 0,
         }
     }
diff --git a/jolt-core/src/zkvm/claim_reductions/bytecode.rs b/jolt-core/src/zkvm/claim_reductions/bytecode.rs
new file mode 100644
index 0000000000..0cebaee937
--- /dev/null
+++ b/jolt-core/src/zkvm/claim_reductions/bytecode.rs
@@ -0,0 +1,624 @@
+//! Two-phase Bytecode claim reduction (Stage 6b cycle → Stage 7 lane/address).
+//!
+//! This reduction batches the 5 bytecode Val-stage claims emitted at the Stage 6a boundary:
+//! `Val_s(r_bc)` for `s = 0..5` (val-only; RAF terms excluded).
+//!
+//! High level:
+//! - Sample `η` and form `C_in = Σ_s η^s · Val_s(r_bc)`.
+//! - Define a canonical set of bytecode "lanes" (448 total) and a lane weight function
+//!   `W_η(lane) = Σ_s η^s · w_s(lane)` derived from the same stage-specific gammas used to
+//!   define `Val_s`.
+//! - Prove, via a two-phase sumcheck, that `C_in` equals a single linear functional of the
+//!   (eventual) committed bytecode chunk polynomials.
+//!
+//! NOTE: This module wires the reduction logic and emits openings for bytecode chunk polynomials.
+//! Commitment + Stage 8 batching integration is handled separately (see `bytecode-commitment-progress.md`).
+
+use std::cell::RefCell;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::Arc;
+
+use allocative::Allocative;
+use itertools::Itertools;
+use rayon::prelude::*;
+
+use crate::field::JoltField;
+use crate::poly::commitment::dory::{DoryGlobals, DoryLayout};
+use crate::poly::eq_poly::EqPolynomial;
+use crate::poly::multilinear_polynomial::{
+    BindingOrder, MultilinearPolynomial, PolynomialBinding, PolynomialEvaluation,
+};
+use crate::poly::opening_proof::{
+    OpeningAccumulator, OpeningPoint, ProverOpeningAccumulator, SumcheckId,
+    VerifierOpeningAccumulator, BIG_ENDIAN, LITTLE_ENDIAN,
+};
+use crate::poly::unipoly::UniPoly;
+use crate::subprotocols::sumcheck_prover::SumcheckInstanceProver;
+use crate::subprotocols::sumcheck_verifier::{SumcheckInstanceParams, SumcheckInstanceVerifier};
+use crate::transcripts::Transcript;
+use crate::utils::math::Math;
+use crate::utils::thread::unsafe_allocate_zero_vec;
+use crate::zkvm::bytecode::chunks::{build_bytecode_chunks, total_lanes};
+use crate::zkvm::bytecode::read_raf_checking::BytecodeReadRafSumcheckParams;
+use crate::zkvm::bytecode::BytecodePreprocessing;
+use crate::zkvm::instruction::{
+    CircuitFlags, InstructionFlags, NUM_CIRCUIT_FLAGS, NUM_INSTRUCTION_FLAGS,
+};
+use crate::zkvm::lookup_table::LookupTables;
+use crate::zkvm::witness::{CommittedPolynomial, VirtualPolynomial};
+use common::constants::{REGISTER_COUNT, XLEN};
+use strum::EnumCount;
+
+const DEGREE_BOUND: usize = 2;
+const NUM_VAL_STAGES: usize = 5;
+
+/// For `DoryLayout::AddressMajor`, committed bytecode chunks are stored in "cycle-major" index order
+/// (cycle*K + address), which makes `BindingOrder::LowToHigh` bind **lane** bits first.
+///
+/// The claim reduction sumcheck needs to bind **cycle** bits first in Stage 6b, so we permute
+/// dense coefficient vectors into the `DoryLayout::CycleMajor` order (address*T + cycle) when
+/// running the reduction. This is a pure index permutation, i.e. a variable renaming, and the
+/// resulting evaluations match the committed polynomial when the opening point is interpreted in
+/// the unified `[lane || cycle]` order.
+fn permute_address_major_to_cycle_major<F: JoltField>(
+    coeffs: Vec<F>,
+    k_chunk: usize,
+    t_size: usize,
+) -> Vec<F> {
+    debug_assert_eq!(coeffs.len(), k_chunk * t_size);
+    let mut out: Vec<F> = unsafe_allocate_zero_vec(k_chunk * t_size);
+    for lane in 0..k_chunk {
+        for k in 0..t_size {
+            // AddressMajor: idx = cycle * K + address
+            let idx_in = k * k_chunk + lane;
+            // CycleMajor: idx = address * T + cycle
+            let idx_out = lane * t_size + k;
+            out[idx_out] = coeffs[idx_in];
+        }
+    }
+    out
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Allocative)]
+pub enum BytecodeReductionPhase {
+    CycleVariables,
+    LaneVariables,
+}
+
+#[derive(Clone, Allocative)]
+pub struct BytecodeClaimReductionParams<F: JoltField> {
+    pub phase: BytecodeReductionPhase,
+    pub eta: F,
+    pub eta_powers: [F; NUM_VAL_STAGES],
+    pub log_k: usize,
+    pub log_k_chunk: usize,
+    pub num_chunks: usize,
+    /// Bytecode address point (log_K bits, big-endian).
+    pub r_bc: OpeningPoint<BIG_ENDIAN, F>,
+    /// Per-chunk lane weight tables (length = k_chunk) for `W_eta`.
+    pub chunk_lane_weights: Vec<Vec<F>>,
+    /// (little-endian) challenges used in the cycle phase.
+    pub cycle_var_challenges: Vec<F::Challenge>,
+}
+
+impl<F: JoltField> BytecodeClaimReductionParams<F> {
+    pub fn new(
+        bytecode_read_raf_params: &BytecodeReadRafSumcheckParams<F>,
+        accumulator: &dyn OpeningAccumulator<F>,
+        transcript: &mut impl Transcript,
+    ) -> Self {
+        let log_k = bytecode_read_raf_params.log_K;
+
+        let eta: F = transcript.challenge_scalar();
+        let mut eta_powers = [F::one(); NUM_VAL_STAGES];
+        for i in 1..NUM_VAL_STAGES {
+            eta_powers[i] = eta_powers[i - 1] * eta;
+        }
+
+        // r_bc comes from the Stage 6a BytecodeReadRaf address phase.
+        let (r_bc, _) = accumulator.get_virtual_polynomial_opening(
+            VirtualPolynomial::BytecodeReadRafAddrClaim,
+            SumcheckId::BytecodeReadRafAddressPhase,
+        );
+
+        let log_k_chunk = bytecode_read_raf_params.one_hot_params.log_k_chunk;
+        let k_chunk = 1 << log_k_chunk;
+        let num_chunks = total_lanes().div_ceil(k_chunk);
+
+        let chunk_lane_weights = compute_chunk_lane_weights(
+            bytecode_read_raf_params,
+            accumulator,
+            &eta_powers,
+            num_chunks,
+            k_chunk,
+        );
+
+        Self {
+            phase: BytecodeReductionPhase::CycleVariables,
+            eta,
+            eta_powers,
+            log_k,
+            log_k_chunk,
+            num_chunks,
+            r_bc,
+            chunk_lane_weights,
+            cycle_var_challenges: vec![],
+        }
+    }
+}
+
+impl<F: JoltField> SumcheckInstanceParams<F> for BytecodeClaimReductionParams<F> {
+    fn input_claim(&self, accumulator: &dyn OpeningAccumulator<F>) -> F {
+        match self.phase {
+            BytecodeReductionPhase::CycleVariables => (0..NUM_VAL_STAGES)
+                .map(|stage| {
+                    let (_, val_claim) = accumulator.get_virtual_polynomial_opening(
+                        VirtualPolynomial::BytecodeValStage(stage),
+                        SumcheckId::BytecodeReadRafAddressPhase,
+                    );
+                    self.eta_powers[stage] * val_claim
+                })
+                .sum(),
+            BytecodeReductionPhase::LaneVariables => {
+                accumulator
+                    .get_virtual_polynomial_opening(
+                        VirtualPolynomial::BytecodeClaimReductionIntermediate,
+                        SumcheckId::BytecodeClaimReductionCyclePhase,
+                    )
+                    .1
+            }
+        }
+    }
+
+    fn degree(&self) -> usize {
+        DEGREE_BOUND
+    }
+
+    fn num_rounds(&self) -> usize {
+        match self.phase {
+            BytecodeReductionPhase::CycleVariables => self.log_k,
+            BytecodeReductionPhase::LaneVariables => self.log_k_chunk,
+        }
+    }
+
+    fn normalize_opening_point(
+        &self,
+        challenges: &[<F as JoltField>::Challenge],
+    ) -> OpeningPoint<BIG_ENDIAN, F> {
+        match self.phase {
+            BytecodeReductionPhase::CycleVariables => {
+                OpeningPoint::<LITTLE_ENDIAN, F>::new(challenges.to_vec()).match_endianness()
+            }
+            BytecodeReductionPhase::LaneVariables => {
+                // Full point: [lane || cycle] in big-endian.
+                let full_le: Vec<F::Challenge> =
+                    [self.cycle_var_challenges.as_slice(), challenges].concat();
+                OpeningPoint::<LITTLE_ENDIAN, F>::new(full_le).match_endianness()
+            }
+        }
+    }
+}
+
+#[derive(Allocative)]
+pub struct BytecodeClaimReductionProver<F: JoltField> {
+    pub params: BytecodeClaimReductionParams<F>,
+    /// Chunk polynomials B_i(lane, k) (eventually committed).
+    bytecode_chunks: Vec<MultilinearPolynomial<F>>,
+    /// Weight polynomials W_i(lane, k) = W_eta(lane) * eq(r_bc, k) (multilinear).
+    weight_chunks: Vec<MultilinearPolynomial<F>>,
+    /// Batched-sumcheck scaling for trailing dummy rounds (see `round_offset`).
+    #[allocative(skip)]
+    batch_dummy_rounds: AtomicUsize,
+}
+
+impl<F: JoltField> BytecodeClaimReductionProver<F> {
+    #[tracing::instrument(skip_all, name = "BytecodeClaimReductionProver::initialize")]
+    pub fn initialize(
+        params: BytecodeClaimReductionParams<F>,
+        bytecode: Arc<BytecodePreprocessing>,
+    ) -> Self {
+        let log_k = params.log_k;
+        let t_size = 1 << log_k;
+        let k_chunk = 1 << params.log_k_chunk;
+        let layout = DoryGlobals::get_layout();
+
+        // Eq table over the bytecode address point.
+        let eq_r_bc = EqPolynomial::<F>::evals(&params.r_bc.r);
+        debug_assert_eq!(eq_r_bc.len(), t_size);
+
+        // Build per-chunk weight polynomials as an outer product (lane_weight ⊗ eq_r_bc).
+        let weight_chunks: Vec<MultilinearPolynomial<F>> = (0..params.num_chunks)
+            .into_par_iter()
+            .map(|chunk_idx| {
+                let lane_weights = &params.chunk_lane_weights[chunk_idx];
+                debug_assert_eq!(lane_weights.len(), k_chunk);
+                let mut coeffs: Vec<F> = unsafe_allocate_zero_vec(k_chunk * t_size);
+                for lane in 0..k_chunk {
+                    let w = lane_weights[lane];
+                    for k in 0..t_size {
+                        // Claim reduction always uses CycleMajor ordering so that
+                        // `BindingOrder::LowToHigh` binds cycle bits first in Stage 6b.
+                        let idx =
+                            DoryLayout::CycleMajor.address_cycle_to_index(lane, k, k_chunk, t_size);
+                        coeffs[idx] = w * eq_r_bc[k];
+                    }
+                }
+                MultilinearPolynomial::from(coeffs)
+            })
+            .collect();
+
+        // Build per-chunk bytecode polynomials B_i(lane, k).
+        let bytecode_len = bytecode.bytecode.len();
+        debug_assert_eq!(bytecode_len, t_size);
+        let mut bytecode_chunks = build_bytecode_chunks::<F>(&bytecode, params.log_k_chunk);
+        if layout == DoryLayout::AddressMajor {
+            // Permute committed AddressMajor coefficient order into CycleMajor for the reduction.
+            for poly in bytecode_chunks.iter_mut() {
+                if let MultilinearPolynomial::LargeScalars(p) = poly {
+                    let old = std::mem::take(&mut p.Z);
+                    p.Z = permute_address_major_to_cycle_major(old, k_chunk, t_size);
+                } else {
+                    unreachable!("bytecode chunks are dense field polynomials");
+                }
+            }
+        }
+
+        debug_assert_eq!(bytecode_chunks.len(), params.num_chunks);
+        debug_assert_eq!(weight_chunks.len(), params.num_chunks);
+
+        Self {
+            params,
+            bytecode_chunks,
+            weight_chunks,
+            batch_dummy_rounds: AtomicUsize::new(0),
+        }
+    }
+
+    fn compute_message_impl(&self, previous_claim: F) -> UniPoly<F> {
+        let half = self.bytecode_chunks[0].len() / 2;
+        let mut evals: [F; DEGREE_BOUND] = (0..half)
+            .into_par_iter()
+            .map(|j| {
+                let mut out = [F::zero(); DEGREE_BOUND];
+                for (b, w) in self.bytecode_chunks.iter().zip(self.weight_chunks.iter()) {
+                    let b_evals =
+                        b.sumcheck_evals_array::<DEGREE_BOUND>(j, BindingOrder::LowToHigh);
+                    let w_evals =
+                        w.sumcheck_evals_array::<DEGREE_BOUND>(j, BindingOrder::LowToHigh);
+                    for i in 0..DEGREE_BOUND {
+                        out[i] += b_evals[i] * w_evals[i];
+                    }
+                }
+                out
+            })
+            .reduce(
+                || [F::zero(); DEGREE_BOUND],
+                |mut acc, arr| {
+                    acc.iter_mut().zip(arr.iter()).for_each(|(a, b)| *a += *b);
+                    acc
+                },
+            );
+
+        // If this instance is back-loaded in a batched sumcheck (i.e., it has trailing dummy
+        // rounds), then `previous_claim` is scaled by 2^{dummy_rounds}. The per-round univariate
+        // evaluations must be scaled by the same factor to satisfy the sumcheck consistency check.
+        let dummy_rounds = self.batch_dummy_rounds.load(Ordering::Relaxed);
+        if dummy_rounds != 0 {
+            let scale = F::one().mul_pow_2(dummy_rounds);
+            for e in evals.iter_mut() {
+                *e *= scale;
+            }
+        }
+        UniPoly::from_evals_and_hint(previous_claim, &evals)
+    }
+}
+
+impl<F: JoltField, T: Transcript> SumcheckInstanceProver<F, T> for BytecodeClaimReductionProver<F> {
+    fn get_params(&self) -> &dyn SumcheckInstanceParams<F> {
+        &self.params
+    }
+
+    fn round_offset(&self, max_num_rounds: usize) -> usize {
+        // Bytecode claim reduction's cycle-phase rounds must align to the *start* of the
+        // batched cycle challenge vector so that its (log_K) point is the suffix (LSB side)
+        // of the full (log_T) cycle point used by other Stage 6b instances. This is required
+        // for Stage 8's committed-bytecode embedding when log_T > log_K.
+        //
+        // This deviates from the default "front-loaded" batching offset, so we record the number
+        // of trailing dummy rounds and scale univariate evaluations accordingly.
+        let dummy_rounds = max_num_rounds.saturating_sub(self.params.num_rounds());
+        self.batch_dummy_rounds
+            .store(dummy_rounds, Ordering::Relaxed);
+        0
+    }
+
+    fn compute_message(&mut self, _round: usize, previous_claim: F) -> UniPoly<F> {
+        self.compute_message_impl(previous_claim)
+    }
+
+    fn ingest_challenge(&mut self, r_j: F::Challenge, _round: usize) {
+        if self.params.phase == BytecodeReductionPhase::CycleVariables {
+            self.params.cycle_var_challenges.push(r_j);
+        }
+        self.bytecode_chunks
+            .iter_mut()
+            .for_each(|p| p.bind_parallel(r_j, BindingOrder::LowToHigh));
+        self.weight_chunks
+            .iter_mut()
+            .for_each(|p| p.bind_parallel(r_j, BindingOrder::LowToHigh));
+    }
+
+    fn cache_openings(
+        &self,
+        accumulator: &mut ProverOpeningAccumulator<F>,
+        transcript: &mut T,
+        sumcheck_challenges: &[F::Challenge],
+    ) {
+        match self.params.phase {
+            BytecodeReductionPhase::CycleVariables => {
+                // Cache intermediate claim for Stage 7.
+                let opening_point = self.params.normalize_opening_point(sumcheck_challenges);
+
+                let mut sum = F::zero();
+                for (b, w) in self.bytecode_chunks.iter().zip(self.weight_chunks.iter()) {
+                    debug_assert_eq!(b.len(), w.len());
+                    for i in 0..b.len() {
+                        sum += b.get_bound_coeff(i) * w.get_bound_coeff(i);
+                    }
+                }
+
+                accumulator.append_virtual(
+                    transcript,
+                    VirtualPolynomial::BytecodeClaimReductionIntermediate,
+                    SumcheckId::BytecodeClaimReductionCyclePhase,
+                    opening_point,
+                    sum,
+                );
+            }
+            BytecodeReductionPhase::LaneVariables => {
+                // Cache final openings of the bytecode chunk polynomials at the full point.
+                let opening_point = self.params.normalize_opening_point(sumcheck_challenges);
+                let (r_lane, r_cycle) = opening_point.split_at(self.params.log_k_chunk);
+
+                let polynomial_types: Vec<CommittedPolynomial> = (0..self.params.num_chunks)
+                    .map(CommittedPolynomial::BytecodeChunk)
+                    .collect();
+                let claims: Vec<F> = self
+                    .bytecode_chunks
+                    .iter()
+                    .map(|p| p.final_sumcheck_claim())
+                    .collect();
+
+                accumulator.append_sparse(
+                    transcript,
+                    polynomial_types,
+                    SumcheckId::BytecodeClaimReduction,
+                    r_lane.r,
+                    r_cycle.r,
+                    claims,
+                );
+            }
+        }
+    }
+
+    #[cfg(feature = "allocative")]
+    fn update_flamegraph(&self, flamegraph: &mut allocative::FlameGraphBuilder) {
+        flamegraph.visit_root(self);
+    }
+}
+
+pub struct BytecodeClaimReductionVerifier<F: JoltField> {
+    pub params: RefCell<BytecodeClaimReductionParams<F>>,
+}
+
+impl<F: JoltField> BytecodeClaimReductionVerifier<F> {
+    pub fn new(params: BytecodeClaimReductionParams<F>) -> Self {
+        Self {
+            params: RefCell::new(params),
+        }
+    }
+}
+
+impl<F: JoltField, T: Transcript> SumcheckInstanceVerifier<F, T>
+    for BytecodeClaimReductionVerifier<F>
+{
+    fn get_params(&self) -> &dyn SumcheckInstanceParams<F> {
+        unsafe { &*self.params.as_ptr() }
+    }
+
+    fn round_offset(&self, _max_num_rounds: usize) -> usize {
+        // Must mirror the prover: align this instance to the start of the batched challenge vector.
+        0
+    }
+
+    fn expected_output_claim(
+        &self,
+        accumulator: &VerifierOpeningAccumulator<F>,
+        sumcheck_challenges: &[F::Challenge],
+    ) -> F {
+        let params = self.params.borrow();
+        match params.phase {
+            BytecodeReductionPhase::CycleVariables => {
+                accumulator
+                    .get_virtual_polynomial_opening(
+                        VirtualPolynomial::BytecodeClaimReductionIntermediate,
+                        SumcheckId::BytecodeClaimReductionCyclePhase,
+                    )
+                    .1
+            }
+            BytecodeReductionPhase::LaneVariables => {
+                let opening_point = params.normalize_opening_point(sumcheck_challenges);
+                let (r_lane, r_cycle) = opening_point.split_at(params.log_k_chunk);
+
+                let eq_eval = EqPolynomial::<F>::mle(&r_cycle.r, &params.r_bc.r);
+
+                // Evaluate each chunk's lane-weight polynomial at r_lane and combine with chunk openings.
+                let mut sum = F::zero();
+                for chunk_idx in 0..params.num_chunks {
+                    let (_, chunk_opening) = accumulator.get_committed_polynomial_opening(
+                        CommittedPolynomial::BytecodeChunk(chunk_idx),
+                        SumcheckId::BytecodeClaimReduction,
+                    );
+                    let w_poly =
+                        MultilinearPolynomial::from(params.chunk_lane_weights[chunk_idx].clone());
+                    let w_eval = w_poly.evaluate(&r_lane.r);
+                    sum += chunk_opening * w_eval;
+                }
+
+                sum * eq_eval
+            }
+        }
+    }
+
+    fn cache_openings(
+        &self,
+        accumulator: &mut VerifierOpeningAccumulator<F>,
+        transcript: &mut T,
+        sumcheck_challenges: &[F::Challenge],
+    ) {
+        let mut params = self.params.borrow_mut();
+        match params.phase {
+            BytecodeReductionPhase::CycleVariables => {
+                let opening_point = params.normalize_opening_point(sumcheck_challenges);
+                accumulator.append_virtual(
+                    transcript,
+                    VirtualPolynomial::BytecodeClaimReductionIntermediate,
+                    SumcheckId::BytecodeClaimReductionCyclePhase,
+                    opening_point,
+                );
+                // Record LE challenges for phase 2 normalization.
+                params.cycle_var_challenges = sumcheck_challenges.to_vec();
+            }
+            BytecodeReductionPhase::LaneVariables => {
+                let opening_point = params.normalize_opening_point(sumcheck_challenges);
+                let polynomial_types: Vec<CommittedPolynomial> = (0..params.num_chunks)
+                    .map(CommittedPolynomial::BytecodeChunk)
+                    .collect();
+                accumulator.append_sparse(
+                    transcript,
+                    polynomial_types,
+                    SumcheckId::BytecodeClaimReduction,
+                    opening_point.r,
+                );
+            }
+        }
+    }
+}
+
+fn compute_chunk_lane_weights<F: JoltField>(
+    bytecode_read_raf_params: &BytecodeReadRafSumcheckParams<F>,
+    accumulator: &dyn OpeningAccumulator<F>,
+    eta_powers: &[F; NUM_VAL_STAGES],
+    num_chunks: usize,
+    k_chunk: usize,
+) -> Vec<Vec<F>> {
+    let reg_count = REGISTER_COUNT as usize;
+    let total = total_lanes();
+
+    // Offsets (canonical lane ordering)
+    let rs1_start = 0usize;
+    let rs2_start = rs1_start + reg_count;
+    let rd_start = rs2_start + reg_count;
+    let unexp_pc_idx = rd_start + reg_count;
+    let imm_idx = unexp_pc_idx + 1;
+    let circuit_start = imm_idx + 1;
+    let instr_start = circuit_start + NUM_CIRCUIT_FLAGS;
+    let lookup_start = instr_start + NUM_INSTRUCTION_FLAGS;
+    let raf_flag_idx = lookup_start + LookupTables::<XLEN>::COUNT;
+    debug_assert_eq!(raf_flag_idx + 1, total);
+
+    // Eq tables for stage4/stage5 register selection weights.
+    let log_reg = reg_count.log_2();
+    let r_register_4 = accumulator
+        .get_virtual_polynomial_opening(
+            VirtualPolynomial::RdWa,
+            SumcheckId::RegistersReadWriteChecking,
+        )
+        .0
+        .r;
+    let eq_r_register_4 = EqPolynomial::<F>::evals(&r_register_4[..log_reg]);
+
+    let r_register_5 = accumulator
+        .get_virtual_polynomial_opening(VirtualPolynomial::RdWa, SumcheckId::RegistersValEvaluation)
+        .0
+        .r;
+    let eq_r_register_5 = EqPolynomial::<F>::evals(&r_register_5[..log_reg]);
+
+    let mut weights = vec![F::zero(); total];
+
+    // Stage 1
+    {
+        let coeff = eta_powers[0];
+        let g = &bytecode_read_raf_params.stage1_gammas;
+        weights[unexp_pc_idx] += coeff * g[0];
+        weights[imm_idx] += coeff * g[1];
+        for i in 0..NUM_CIRCUIT_FLAGS {
+            weights[circuit_start + i] += coeff * g[2 + i];
+        }
+    }
+
+    // Stage 2
+    {
+        let coeff = eta_powers[1];
+        let g = &bytecode_read_raf_params.stage2_gammas;
+        weights[circuit_start + (CircuitFlags::Jump as usize)] += coeff * g[0];
+        weights[instr_start + (InstructionFlags::Branch as usize)] += coeff * g[1];
+        weights[instr_start + (InstructionFlags::IsRdNotZero as usize)] += coeff * g[2];
+        weights[circuit_start + (CircuitFlags::WriteLookupOutputToRD as usize)] += coeff * g[3];
+    }
+
+    // Stage 3
+    {
+        let coeff = eta_powers[2];
+        let g = &bytecode_read_raf_params.stage3_gammas;
+        weights[imm_idx] += coeff * g[0];
+        weights[unexp_pc_idx] += coeff * g[1];
+        weights[instr_start + (InstructionFlags::LeftOperandIsRs1Value as usize)] += coeff * g[2];
+        weights[instr_start + (InstructionFlags::LeftOperandIsPC as usize)] += coeff * g[3];
+        weights[instr_start + (InstructionFlags::RightOperandIsRs2Value as usize)] += coeff * g[4];
+        weights[instr_start + (InstructionFlags::RightOperandIsImm as usize)] += coeff * g[5];
+        weights[instr_start + (InstructionFlags::IsNoop as usize)] += coeff * g[6];
+        weights[circuit_start + (CircuitFlags::VirtualInstruction as usize)] += coeff * g[7];
+        weights[circuit_start + (CircuitFlags::IsFirstInSequence as usize)] += coeff * g[8];
+    }
+
+    // Stage 4
+    {
+        let coeff = eta_powers[3];
+        let g = &bytecode_read_raf_params.stage4_gammas;
+        for r in 0..reg_count {
+            weights[rd_start + r] += coeff * g[0] * eq_r_register_4[r];
+            weights[rs1_start + r] += coeff * g[1] * eq_r_register_4[r];
+            weights[rs2_start + r] += coeff * g[2] * eq_r_register_4[r];
+        }
+    }
+
+    // Stage 5
+    {
+        let coeff = eta_powers[4];
+        let g = &bytecode_read_raf_params.stage5_gammas;
+        for r in 0..reg_count {
+            weights[rd_start + r] += coeff * g[0] * eq_r_register_5[r];
+        }
+        weights[raf_flag_idx] += coeff * g[1];
+        for i in 0..LookupTables::<XLEN>::COUNT {
+            weights[lookup_start + i] += coeff * g[2 + i];
+        }
+    }
+
+    // Chunk into k_chunk-sized blocks.
+    (0..num_chunks)
+        .map(|chunk_idx| {
+            (0..k_chunk)
+                .map(|lane| {
+                    let global = chunk_idx * k_chunk + lane;
+                    if global < total {
+                        weights[global]
+                    } else {
+                        F::zero()
+                    }
+                })
+                .collect_vec()
+        })
+        .collect_vec()
+}
diff --git a/jolt-core/src/zkvm/claim_reductions/hamming_weight.rs b/jolt-core/src/zkvm/claim_reductions/hamming_weight.rs
index d40860f35a..266287f80c 100644
--- a/jolt-core/src/zkvm/claim_reductions/hamming_weight.rs
+++ b/jolt-core/src/zkvm/claim_reductions/hamming_weight.rs
@@ -98,6 +98,7 @@ use crate::subprotocols::{
 };
 use crate::transcripts::Transcript;
 use crate::zkvm::{
+    bytecode::BytecodePreprocessing,
     config::OneHotParams,
     verifier::JoltSharedPreprocessing,
     witness::{CommittedPolynomial, VirtualPolynomial},
@@ -309,13 +310,14 @@ impl<F: JoltField> HammingWeightClaimReductionProver<F> {
         params: HammingWeightClaimReductionParams<F>,
         trace: &[Cycle],
         preprocessing: &JoltSharedPreprocessing,
+        bytecode: &BytecodePreprocessing,
         one_hot_params: &OneHotParams,
     ) -> Self {
         // Compute all G_i polynomials via streaming.
         // `params.r_cycle` is in BIG_ENDIAN (OpeningPoint) convention.
         let G_vecs = compute_all_G::<F>(
             trace,
-            &preprocessing.bytecode,
+            bytecode,
             &preprocessing.memory_layout,
             one_hot_params,
             &params.r_cycle,
diff --git a/jolt-core/src/zkvm/claim_reductions/mod.rs b/jolt-core/src/zkvm/claim_reductions/mod.rs
index 5d19f993a1..d208bff0f9 100644
--- a/jolt-core/src/zkvm/claim_reductions/mod.rs
+++ b/jolt-core/src/zkvm/claim_reductions/mod.rs
@@ -1,4 +1,5 @@
 pub mod advice;
+pub mod bytecode;
 pub mod hamming_weight;
 pub mod increments;
 pub mod instruction_lookups;
@@ -9,6 +10,10 @@ pub use advice::{
     AdviceClaimReductionParams, AdviceClaimReductionProver, AdviceClaimReductionVerifier,
     AdviceKind,
 };
+pub use bytecode::{
+    BytecodeClaimReductionParams, BytecodeClaimReductionProver, BytecodeClaimReductionVerifier,
+    BytecodeReductionPhase,
+};
 pub use hamming_weight::{
     HammingWeightClaimReductionParams, HammingWeightClaimReductionProver,
     HammingWeightClaimReductionVerifier,
diff --git a/jolt-core/src/zkvm/config.rs b/jolt-core/src/zkvm/config.rs
index c7846b1347..64e792e7ac 100644
--- a/jolt-core/src/zkvm/config.rs
+++ b/jolt-core/src/zkvm/config.rs
@@ -1,5 +1,8 @@
 use allocative::Allocative;
-use ark_serialize::{CanonicalDeserialize, CanonicalSerialize};
+use ark_serialize::{
+    CanonicalDeserialize, CanonicalSerialize, Compress, SerializationError, Valid, Validate,
+};
+use std::io::{Read, Write};
 
 use crate::field::JoltField;
 use crate::utils::math::Math;
@@ -20,6 +23,60 @@ pub fn get_instruction_sumcheck_phases(log_t: usize) -> usize {
     }
 }
 
+/// Controls whether the prover/verifier use the **full** bytecode path (verifier may do O(K))
+/// or the **committed** bytecode path (staged Val claims + claim reduction + folded Stage 8
+/// opening for bytecode chunk commitments).
+#[repr(u8)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Allocative)]
+pub enum BytecodeMode {
+    /// Full mode: verifier may materialize bytecode-dependent polynomials (O(K_bytecode)).
+    Full = 0,
+    /// Committed mode: use staged Val claims + `BytecodeClaimReduction`, and fold committed
+    /// bytecode chunk openings into the joint Stage 8 opening (Bytecode context embedding).
+    Committed = 1,
+}
+
+impl Default for BytecodeMode {
+    fn default() -> Self {
+        Self::Full
+    }
+}
+
+impl CanonicalSerialize for BytecodeMode {
+    fn serialize_with_mode<W: Write>(
+        &self,
+        writer: W,
+        compress: Compress,
+    ) -> Result<(), SerializationError> {
+        (*self as u8).serialize_with_mode(writer, compress)
+    }
+
+    fn serialized_size(&self, compress: Compress) -> usize {
+        (*self as u8).serialized_size(compress)
+    }
+}
+
+impl Valid for BytecodeMode {
+    fn check(&self) -> Result<(), SerializationError> {
+        Ok(())
+    }
+}
+
+impl CanonicalDeserialize for BytecodeMode {
+    fn deserialize_with_mode<R: Read>(
+        reader: R,
+        compress: Compress,
+        validate: Validate,
+    ) -> Result<Self, SerializationError> {
+        let value = u8::deserialize_with_mode(reader, compress, validate)?;
+        match value {
+            0 => Ok(Self::Full),
+            1 => Ok(Self::Committed),
+            _ => Err(SerializationError::InvalidData),
+        }
+    }
+}
+
 /// Configuration for read-write checking sumchecks.
 ///
 /// Contains parameters that control phase structure for RAM and register
@@ -150,6 +207,22 @@ impl OneHotConfig {
         }
     }
 
+    /// Create a OneHotConfig with an explicit log_k_chunk.
+    pub fn from_log_k_chunk(log_k_chunk: usize) -> Self {
+        debug_assert!(log_k_chunk == 4 || log_k_chunk == 8);
+        let log_k_chunk = log_k_chunk as u8;
+        let lookups_ra_virtual_log_k_chunk = if log_k_chunk == 4 {
+            LOG_K / 8
+        } else {
+            LOG_K / 4
+        };
+
+        Self {
+            log_k_chunk,
+            lookups_ra_virtual_log_k_chunk: lookups_ra_virtual_log_k_chunk as u8,
+        }
+    }
+
     /// Validates that the one-hot configuration is valid.
     ///
     /// This is called by the verifier to ensure the prover hasn't provided
diff --git a/jolt-core/src/zkvm/mod.rs b/jolt-core/src/zkvm/mod.rs
index 82117f6b76..fe5ebf6d2c 100644
--- a/jolt-core/src/zkvm/mod.rs
+++ b/jolt-core/src/zkvm/mod.rs
@@ -36,6 +36,9 @@ pub mod spartan;
 pub mod verifier;
 pub mod witness;
 
+#[cfg(test)]
+mod tests;
+
 // Scoped CPU profiler for performance analysis. Feature-gated by "pprof".
 // Usage: let _guard = pprof_scope!("label");
 //
diff --git a/jolt-core/src/zkvm/proof_serialization.rs b/jolt-core/src/zkvm/proof_serialization.rs
index 9712bd7717..c03e027598 100644
--- a/jolt-core/src/zkvm/proof_serialization.rs
+++ b/jolt-core/src/zkvm/proof_serialization.rs
@@ -19,7 +19,7 @@ use crate::{
     subprotocols::sumcheck::SumcheckInstanceProof,
     transcripts::Transcript,
     zkvm::{
-        config::{OneHotConfig, ReadWriteConfig},
+        config::{BytecodeMode, OneHotConfig, ReadWriteConfig},
         instruction::{CircuitFlags, InstructionFlags},
         witness::{CommittedPolynomial, VirtualPolynomial},
     },
@@ -36,13 +36,15 @@ pub struct JoltProof<F: JoltField, PCS: CommitmentScheme<Field = F>, FS: Transcr
     pub stage3_sumcheck_proof: SumcheckInstanceProof<F, FS>,
     pub stage4_sumcheck_proof: SumcheckInstanceProof<F, FS>,
     pub stage5_sumcheck_proof: SumcheckInstanceProof<F, FS>,
-    pub stage6_sumcheck_proof: SumcheckInstanceProof<F, FS>,
+    pub stage6a_sumcheck_proof: SumcheckInstanceProof<F, FS>,
+    pub stage6b_sumcheck_proof: SumcheckInstanceProof<F, FS>,
     pub stage7_sumcheck_proof: SumcheckInstanceProof<F, FS>,
     pub joint_opening_proof: PCS::Proof,
     pub untrusted_advice_commitment: Option<PCS::Commitment>,
     pub trace_length: usize,
     pub ram_K: usize,
     pub bytecode_K: usize,
+    pub bytecode_mode: BytecodeMode,
     pub rw_config: ReadWriteConfig,
     pub one_hot_config: OneHotConfig,
     pub dory_layout: DoryLayout,
@@ -252,6 +254,10 @@ impl CanonicalSerialize for CommittedPolynomial {
                 3u8.serialize_with_mode(&mut writer, compress)?;
                 (u8::try_from(*i).unwrap()).serialize_with_mode(writer, compress)
             }
+            Self::BytecodeChunk(i) => {
+                7u8.serialize_with_mode(&mut writer, compress)?;
+                (u8::try_from(*i).unwrap()).serialize_with_mode(writer, compress)
+            }
             Self::RamRa(i) => {
                 4u8.serialize_with_mode(&mut writer, compress)?;
                 (u8::try_from(*i).unwrap()).serialize_with_mode(writer, compress)
@@ -264,7 +270,10 @@ impl CanonicalSerialize for CommittedPolynomial {
     fn serialized_size(&self, _compress: Compress) -> usize {
         match self {
             Self::RdInc | Self::RamInc | Self::TrustedAdvice | Self::UntrustedAdvice => 1,
-            Self::InstructionRa(_) | Self::BytecodeRa(_) | Self::RamRa(_) => 2,
+            Self::InstructionRa(_)
+            | Self::BytecodeRa(_)
+            | Self::BytecodeChunk(_)
+            | Self::RamRa(_) => 2,
         }
     }
 }
@@ -299,6 +308,10 @@ impl CanonicalDeserialize for CommittedPolynomial {
                 }
                 5 => Self::TrustedAdvice,
                 6 => Self::UntrustedAdvice,
+                7 => {
+                    let i = u8::deserialize_with_mode(reader, compress, validate)?;
+                    Self::BytecodeChunk(i as usize)
+                }
                 _ => return Err(SerializationError::InvalidData),
             },
         )
@@ -365,6 +378,15 @@ impl CanonicalSerialize for VirtualPolynomial {
                 40u8.serialize_with_mode(&mut writer, compress)?;
                 (u8::try_from(*flag).unwrap()).serialize_with_mode(&mut writer, compress)
             }
+            Self::BytecodeValStage(stage) => {
+                41u8.serialize_with_mode(&mut writer, compress)?;
+                (u8::try_from(*stage).unwrap()).serialize_with_mode(&mut writer, compress)
+            }
+            Self::BytecodeReadRafAddrClaim => 42u8.serialize_with_mode(&mut writer, compress),
+            Self::BooleanityAddrClaim => 43u8.serialize_with_mode(&mut writer, compress),
+            Self::BytecodeClaimReductionIntermediate => {
+                44u8.serialize_with_mode(&mut writer, compress)
+            }
         }
     }
 
@@ -406,11 +428,15 @@ impl CanonicalSerialize for VirtualPolynomial {
             | Self::RamValInit
             | Self::RamValFinal
             | Self::RamHammingWeight
-            | Self::UnivariateSkip => 1,
+            | Self::UnivariateSkip
+            | Self::BytecodeReadRafAddrClaim
+            | Self::BooleanityAddrClaim
+            | Self::BytecodeClaimReductionIntermediate => 1,
             Self::InstructionRa(_)
             | Self::OpFlags(_)
             | Self::InstructionFlags(_)
-            | Self::LookupTableFlag(_) => 2,
+            | Self::LookupTableFlag(_)
+            | Self::BytecodeValStage(_) => 2,
         }
     }
 }
@@ -486,6 +512,13 @@ impl CanonicalDeserialize for VirtualPolynomial {
                     let flag = u8::deserialize_with_mode(&mut reader, compress, validate)?;
                     Self::LookupTableFlag(flag as usize)
                 }
+                41 => {
+                    let stage = u8::deserialize_with_mode(&mut reader, compress, validate)?;
+                    Self::BytecodeValStage(stage as usize)
+                }
+                42 => Self::BytecodeReadRafAddrClaim,
+                43 => Self::BooleanityAddrClaim,
+                44 => Self::BytecodeClaimReductionIntermediate,
                 _ => return Err(SerializationError::InvalidData),
             },
         )
diff --git a/jolt-core/src/zkvm/prover.rs b/jolt-core/src/zkvm/prover.rs
index 814ff22bbb..3d9cf4226a 100644
--- a/jolt-core/src/zkvm/prover.rs
+++ b/jolt-core/src/zkvm/prover.rs
@@ -16,7 +16,9 @@ use std::{
 use crate::poly::commitment::dory::DoryContext;
 use ark_serialize::{CanonicalDeserialize, CanonicalSerialize};
 
-use crate::zkvm::config::ReadWriteConfig;
+use crate::zkvm::bytecode::chunks::total_lanes;
+use crate::zkvm::bytecode::{BytecodePreprocessing, TrustedBytecodeCommitments};
+use crate::zkvm::config::{BytecodeMode, ReadWriteConfig};
 use crate::zkvm::verifier::JoltSharedPreprocessing;
 use crate::zkvm::Serializable;
 
@@ -41,7 +43,10 @@ use crate::{
     },
     pprof_scope,
     subprotocols::{
-        booleanity::{BooleanitySumcheckParams, BooleanitySumcheckProver},
+        booleanity::{
+            BooleanityAddressSumcheckProver, BooleanityCycleSumcheckProver,
+            BooleanitySumcheckParams,
+        },
         sumcheck::{BatchedSumcheck, SumcheckInstanceProof},
         sumcheck_prover::SumcheckInstanceProver,
         univariate_skip::{prove_uniskip_round, UniSkipFirstRoundProof},
@@ -52,6 +57,7 @@ use crate::{
         bytecode::read_raf_checking::BytecodeReadRafSumcheckParams,
         claim_reductions::{
             AdviceClaimReductionParams, AdviceClaimReductionProver, AdviceKind,
+            BytecodeClaimReductionParams, BytecodeClaimReductionProver, BytecodeReductionPhase,
             HammingWeightClaimReductionParams, HammingWeightClaimReductionProver,
             IncClaimReductionSumcheckParams, IncClaimReductionSumcheckProver,
             InstructionLookupsClaimReductionSumcheckParams,
@@ -96,7 +102,9 @@ use crate::{
 use crate::{
     poly::commitment::commitment_scheme::CommitmentScheme,
     zkvm::{
-        bytecode::read_raf_checking::BytecodeReadRafSumcheckProver,
+        bytecode::read_raf_checking::{
+            BytecodeReadRafAddressSumcheckProver, BytecodeReadRafCycleSumcheckProver,
+        },
         fiat_shamir_preamble,
         instruction_lookups::{
             ra_virtual::InstructionRaSumcheckProver as LookupsRaSumcheckProver,
@@ -153,6 +161,9 @@ pub struct JoltCpuProver<
     /// The advice claim reduction sumcheck effectively spans two stages (6 and 7).
     /// Cache the prover state here between stages.
     advice_reduction_prover_untrusted: Option<AdviceClaimReductionProver<F>>,
+    /// The bytecode claim reduction sumcheck effectively spans two stages (6b and 7).
+    /// Cache the prover state here between stages.
+    bytecode_reduction_prover: Option<BytecodeClaimReductionProver<F>>,
     pub unpadded_trace_len: usize,
     pub padded_trace_len: usize,
     pub transcript: ProofTranscript,
@@ -162,6 +173,8 @@ pub struct JoltCpuProver<
     pub final_ram_state: Vec<u64>,
     pub one_hot_params: OneHotParams,
     pub rw_config: ReadWriteConfig,
+    /// First-class selection of full vs committed bytecode mode.
+    pub bytecode_mode: BytecodeMode,
 }
 impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscript: Transcript>
     JoltCpuProver<'a, F, PCS, ProofTranscript>
@@ -174,6 +187,29 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
         trusted_advice: &[u8],
         trusted_advice_commitment: Option<PCS::Commitment>,
         trusted_advice_hint: Option<PCS::OpeningProofHint>,
+    ) -> Self {
+        Self::gen_from_elf_with_bytecode_mode(
+            preprocessing,
+            elf_contents,
+            inputs,
+            untrusted_advice,
+            trusted_advice,
+            trusted_advice_commitment,
+            trusted_advice_hint,
+            BytecodeMode::Full,
+        )
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    pub fn gen_from_elf_with_bytecode_mode(
+        preprocessing: &'a JoltProverPreprocessing<F, PCS>,
+        elf_contents: &[u8],
+        inputs: &[u8],
+        untrusted_advice: &[u8],
+        trusted_advice: &[u8],
+        trusted_advice_commitment: Option<PCS::Commitment>,
+        trusted_advice_hint: Option<PCS::OpeningProofHint>,
+        bytecode_mode: BytecodeMode,
     ) -> Self {
         let memory_config = MemoryConfig {
             max_untrusted_advice_size: preprocessing.shared.memory_layout.max_untrusted_advice_size,
@@ -219,7 +255,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
             trace.len(),
         );
 
-        Self::gen_from_trace(
+        Self::gen_from_trace_with_bytecode_mode(
             preprocessing,
             lazy_trace,
             trace,
@@ -227,6 +263,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
             trusted_advice_commitment,
             trusted_advice_hint,
             final_memory_state,
+            bytecode_mode,
         )
     }
 
@@ -308,6 +345,28 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
     }
 
     pub fn gen_from_trace(
+        preprocessing: &'a JoltProverPreprocessing<F, PCS>,
+        lazy_trace: LazyTraceIterator,
+        trace: Vec<Cycle>,
+        program_io: JoltDevice,
+        trusted_advice_commitment: Option<PCS::Commitment>,
+        trusted_advice_hint: Option<PCS::OpeningProofHint>,
+        final_memory_state: Memory,
+    ) -> Self {
+        Self::gen_from_trace_with_bytecode_mode(
+            preprocessing,
+            lazy_trace,
+            trace,
+            program_io,
+            trusted_advice_commitment,
+            trusted_advice_hint,
+            final_memory_state,
+            BytecodeMode::Full,
+        )
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    pub fn gen_from_trace_with_bytecode_mode(
         preprocessing: &'a JoltProverPreprocessing<F, PCS>,
         lazy_trace: LazyTraceIterator,
         mut trace: Vec<Cycle>,
@@ -315,6 +374,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
         trusted_advice_commitment: Option<PCS::Commitment>,
         trusted_advice_hint: Option<PCS::OpeningProofHint>,
         final_memory_state: Memory,
+        bytecode_mode: BytecodeMode,
     ) -> Self {
         // truncate trailing zeros on device outputs
         program_io.outputs.truncate(
@@ -332,6 +392,15 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
         } else {
             (trace.len() + 1).next_power_of_two()
         };
+
+        // In Committed mode, Stage 8 folds bytecode chunk openings into the *joint* opening.
+        // That folding currently requires log_T >= log_K_bytecode, so we ensure the padded trace
+        // length is at least the (power-of-two padded) bytecode size.
+        let padded_trace_len = if bytecode_mode == BytecodeMode::Committed {
+            padded_trace_len.max(preprocessing.shared.bytecode_size)
+        } else {
+            padded_trace_len
+        };
         // We may need extra padding so the main Dory matrix has enough (row, col) variables
         // to embed advice commitments committed in their own preprocessing-only contexts.
         let has_trusted_advice = !program_io.trusted_advice.is_empty();
@@ -385,8 +454,16 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
         let log_T = trace.len().log_2();
         let ram_log_K = ram_K.log_2();
         let rw_config = ReadWriteConfig::new(log_T, ram_log_K);
-        let one_hot_params =
-            OneHotParams::new(log_T, preprocessing.shared.bytecode.code_size, ram_K);
+        let one_hot_params = if bytecode_mode == BytecodeMode::Committed {
+            let committed = preprocessing
+                .bytecode_commitments
+                .as_ref()
+                .expect("bytecode commitments missing in committed mode");
+            let config = OneHotConfig::from_log_k_chunk(committed.log_k_chunk as usize);
+            OneHotParams::from_config(&config, preprocessing.shared.bytecode_size, ram_K)
+        } else {
+            OneHotParams::new(log_T, preprocessing.shared.bytecode_size, ram_K)
+        };
 
         Self {
             preprocessing,
@@ -402,6 +479,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
             },
             advice_reduction_prover_trusted: None,
             advice_reduction_prover_untrusted: None,
+            bytecode_reduction_prover: None,
             unpadded_trace_len,
             padded_trace_len,
             transcript,
@@ -411,6 +489,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
             final_ram_state,
             one_hot_params,
             rw_config,
+            bytecode_mode,
         }
     }
 
@@ -432,15 +511,20 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
             &mut self.transcript,
         );
 
-        tracing::info!(
-            "bytecode size: {}",
-            self.preprocessing.shared.bytecode.code_size
-        );
+        tracing::info!("bytecode size: {}", self.preprocessing.shared.bytecode_size);
 
         let (commitments, mut opening_proof_hints) = self.generate_and_commit_witness_polynomials();
         let untrusted_advice_commitment = self.generate_and_commit_untrusted_advice();
         self.generate_and_commit_trusted_advice();
 
+        if self.bytecode_mode == BytecodeMode::Committed {
+            if let Some(trusted) = &self.preprocessing.bytecode_commitments {
+                for commitment in &trusted.commitments {
+                    self.transcript.append_serializable(commitment);
+                }
+            }
+        }
+
         // Add advice hints for batched Stage 8 opening
         if let Some(hint) = self.advice.trusted_advice_hint.take() {
             opening_proof_hints.insert(CommittedPolynomial::TrustedAdvice, hint);
@@ -448,13 +532,24 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
         if let Some(hint) = self.advice.untrusted_advice_hint.take() {
             opening_proof_hints.insert(CommittedPolynomial::UntrustedAdvice, hint);
         }
+        if self.bytecode_mode == BytecodeMode::Committed {
+            if let Some(hints) = self.preprocessing.bytecode_commitment_hints.as_ref() {
+                for (idx, hint) in hints.iter().enumerate() {
+                    opening_proof_hints
+                        .insert(CommittedPolynomial::BytecodeChunk(idx), hint.clone());
+                }
+            }
+        }
 
         let (stage1_uni_skip_first_round_proof, stage1_sumcheck_proof) = self.prove_stage1();
         let (stage2_uni_skip_first_round_proof, stage2_sumcheck_proof) = self.prove_stage2();
         let stage3_sumcheck_proof = self.prove_stage3();
         let stage4_sumcheck_proof = self.prove_stage4();
         let stage5_sumcheck_proof = self.prove_stage5();
-        let stage6_sumcheck_proof = self.prove_stage6();
+        let (stage6a_sumcheck_proof, bytecode_read_raf_params, booleanity_params) =
+            self.prove_stage6a();
+        let stage6b_sumcheck_proof =
+            self.prove_stage6b(bytecode_read_raf_params, booleanity_params);
         let stage7_sumcheck_proof = self.prove_stage7();
 
         let joint_opening_proof = self.prove_stage8(opening_proof_hints);
@@ -489,12 +584,14 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
             stage3_sumcheck_proof,
             stage4_sumcheck_proof,
             stage5_sumcheck_proof,
-            stage6_sumcheck_proof,
+            stage6a_sumcheck_proof,
+            stage6b_sumcheck_proof,
             stage7_sumcheck_proof,
             joint_opening_proof,
             trace_length: self.trace.len(),
             ram_K: self.one_hot_params.ram_k,
             bytecode_K: self.one_hot_params.bytecode_k,
+            bytecode_mode: self.bytecode_mode,
             rw_config: self.rw_config.clone(),
             one_hot_config: self.one_hot_params.to_config(),
             dory_layout: DoryGlobals::get_layout(),
@@ -548,7 +645,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
                 .par_iter()
                 .map(|poly_id| {
                     let witness: MultilinearPolynomial<F> = poly_id.generate_witness(
-                        &self.preprocessing.shared.bytecode,
+                        &self.preprocessing.bytecode,
                         &self.preprocessing.shared.memory_layout,
                         &trace,
                         Some(&self.one_hot_params),
@@ -588,6 +685,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
                             poly.stream_witness_and_commit_rows::<_, PCS>(
                                 &self.preprocessing.generators,
                                 &self.preprocessing.shared,
+                                &self.preprocessing.bytecode,
                                 &chunk,
                                 &self.one_hot_params,
                             )
@@ -702,7 +800,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
         let mut uni_skip = OuterUniSkipProver::initialize(
             uni_skip_params.clone(),
             &self.trace,
-            &self.preprocessing.shared.bytecode,
+            &self.preprocessing.bytecode,
         );
         let first_round_proof = prove_uniskip_round(
             &mut uni_skip,
@@ -718,7 +816,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
         let schedule = LinearOnlySchedule::new(uni_skip_params.tau.len() - 1);
         let shared = OuterSharedState::new(
             Arc::clone(&self.trace),
-            &self.preprocessing.shared.bytecode,
+            &self.preprocessing.bytecode,
             &uni_skip_params,
             &self.opening_accumulator,
         );
@@ -798,7 +896,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
         let ram_read_write_checking = RamReadWriteCheckingProver::initialize(
             ram_read_write_checking_params,
             &self.trace,
-            &self.preprocessing.shared.bytecode,
+            &self.preprocessing.bytecode,
             &self.program_io.memory_layout,
             &self.initial_ram_state,
         );
@@ -875,7 +973,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
         let spartan_shift = ShiftSumcheckProver::initialize(
             spartan_shift_params,
             Arc::clone(&self.trace),
-            &self.preprocessing.shared.bytecode,
+            &self.preprocessing.bytecode,
         );
         let spartan_instruction_input = InstructionInputSumcheckProver::initialize(
             spartan_instruction_input_params,
@@ -955,19 +1053,19 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
         let registers_read_write_checking = RegistersReadWriteCheckingProver::initialize(
             registers_read_write_checking_params,
             self.trace.clone(),
-            &self.preprocessing.shared.bytecode,
+            &self.preprocessing.bytecode,
             &self.program_io.memory_layout,
         );
         let ram_val_evaluation = RamValEvaluationSumcheckProver::initialize(
             ram_val_evaluation_params,
             &self.trace,
-            &self.preprocessing.shared.bytecode,
+            &self.preprocessing.bytecode,
             &self.program_io.memory_layout,
         );
         let ram_val_final = ValFinalSumcheckProver::initialize(
             ram_val_final_params,
             &self.trace,
-            &self.preprocessing.shared.bytecode,
+            &self.preprocessing.bytecode,
             &self.program_io.memory_layout,
         );
 
@@ -1024,7 +1122,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
         let registers_val_evaluation = RegistersValEvaluationSumcheckProver::initialize(
             registers_val_evaluation_params,
             &self.trace,
-            &self.preprocessing.shared.bytecode,
+            &self.preprocessing.bytecode,
             &self.program_io.memory_layout,
         );
         let ram_ra_reduction = RamRaClaimReductionSumcheckProver::initialize(
@@ -1070,20 +1168,25 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
     }
 
     #[tracing::instrument(skip_all)]
-    fn prove_stage6(&mut self) -> SumcheckInstanceProof<F, ProofTranscript> {
+    fn prove_stage6a(
+        &mut self,
+    ) -> (
+        SumcheckInstanceProof<F, ProofTranscript>,
+        BytecodeReadRafSumcheckParams<F>,
+        BooleanitySumcheckParams<F>,
+    ) {
         #[cfg(not(target_arch = "wasm32"))]
-        print_current_memory_usage("Stage 6 baseline");
+        print_current_memory_usage("Stage 6a baseline");
 
-        let bytecode_read_raf_params = BytecodeReadRafSumcheckParams::gen(
-            &self.preprocessing.shared.bytecode,
+        let mut bytecode_read_raf_params = BytecodeReadRafSumcheckParams::gen(
+            &self.preprocessing.bytecode,
             self.trace.len().log_2(),
             &self.one_hot_params,
             &self.opening_accumulator,
             &mut self.transcript,
         );
-
-        let ram_hamming_booleanity_params =
-            HammingBooleanitySumcheckParams::new(&self.opening_accumulator);
+        bytecode_read_raf_params.use_staged_val_claims =
+            self.bytecode_mode == BytecodeMode::Committed;
 
         let booleanity_params = BooleanitySumcheckParams::new(
             self.trace.len().log_2(),
@@ -1092,6 +1195,56 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
             &mut self.transcript,
         );
 
+        let mut bytecode_read_raf = BytecodeReadRafAddressSumcheckProver::initialize(
+            bytecode_read_raf_params.clone(),
+            Arc::clone(&self.trace),
+            Arc::clone(&self.preprocessing.bytecode),
+        );
+        let mut booleanity = BooleanityAddressSumcheckProver::initialize(
+            booleanity_params.clone(),
+            &self.trace,
+            &self.preprocessing.bytecode,
+            &self.program_io.memory_layout,
+        );
+
+        #[cfg(feature = "allocative")]
+        {
+            print_data_structure_heap_usage(
+                "BytecodeReadRafAddressSumcheckProver",
+                &bytecode_read_raf,
+            );
+            print_data_structure_heap_usage("BooleanityAddressSumcheckProver", &booleanity);
+        }
+
+        let mut instances: Vec<&mut dyn SumcheckInstanceProver<_, _>> =
+            vec![&mut bytecode_read_raf, &mut booleanity];
+
+        #[cfg(feature = "allocative")]
+        write_instance_flamegraph_svg(&instances, "stage6a_start_flamechart.svg");
+        tracing::info!("Stage 6a proving");
+        let (sumcheck_proof, _r_stage6a) = BatchedSumcheck::prove(
+            instances.iter_mut().map(|v| &mut **v as _).collect(),
+            &mut self.opening_accumulator,
+            &mut self.transcript,
+        );
+        #[cfg(feature = "allocative")]
+        write_instance_flamegraph_svg(&instances, "stage6a_end_flamechart.svg");
+
+        (sumcheck_proof, bytecode_read_raf_params, booleanity_params)
+    }
+
+    #[tracing::instrument(skip_all)]
+    fn prove_stage6b(
+        &mut self,
+        bytecode_read_raf_params: BytecodeReadRafSumcheckParams<F>,
+        booleanity_params: BooleanitySumcheckParams<F>,
+    ) -> SumcheckInstanceProof<F, ProofTranscript> {
+        #[cfg(not(target_arch = "wasm32"))]
+        print_current_memory_usage("Stage 6b baseline");
+
+        let ram_hamming_booleanity_params =
+            HammingBooleanitySumcheckParams::new(&self.opening_accumulator);
+
         let ram_ra_virtual_params = RamRaVirtualParams::new(
             self.trace.len(),
             &self.one_hot_params,
@@ -1108,7 +1261,24 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
             &mut self.transcript,
         );
 
-        // Advice claim reduction (Phase 1 in Stage 6): trusted and untrusted are separate instances.
+        // Bytecode claim reduction (Phase 1 in Stage 6b): consumes Val_s(r_bc) from Stage 6a and
+        // caches an intermediate claim for Stage 7.
+        if self.bytecode_mode == BytecodeMode::Committed {
+            let bytecode_reduction_params = BytecodeClaimReductionParams::new(
+                &bytecode_read_raf_params,
+                &self.opening_accumulator,
+                &mut self.transcript,
+            );
+            self.bytecode_reduction_prover = Some(BytecodeClaimReductionProver::initialize(
+                bytecode_reduction_params,
+                Arc::clone(&self.preprocessing.bytecode),
+            ));
+        } else {
+            // Legacy mode: do not run the bytecode claim reduction.
+            self.bytecode_reduction_prover = None;
+        }
+
+        // Advice claim reduction (Phase 1 in Stage 6b): trusted and untrusted are separate instances.
         if self.advice.trusted_advice_polynomial.is_some() {
             let trusted_advice_params = AdviceClaimReductionParams::new(
                 AdviceKind::Trusted,
@@ -1159,20 +1329,22 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
             };
         }
 
-        let mut bytecode_read_raf = BytecodeReadRafSumcheckProver::initialize(
+        // Initialize Stage 6b cycle provers from scratch (Option B).
+        let mut bytecode_read_raf = BytecodeReadRafCycleSumcheckProver::initialize(
             bytecode_read_raf_params,
             Arc::clone(&self.trace),
-            Arc::clone(&self.preprocessing.shared.bytecode),
+            Arc::clone(&self.preprocessing.bytecode),
+            &self.opening_accumulator,
         );
-        let mut ram_hamming_booleanity =
-            HammingBooleanitySumcheckProver::initialize(ram_hamming_booleanity_params, &self.trace);
-
-        let mut booleanity = BooleanitySumcheckProver::initialize(
+        let mut booleanity = BooleanityCycleSumcheckProver::initialize(
             booleanity_params,
             &self.trace,
-            &self.preprocessing.shared.bytecode,
+            &self.preprocessing.bytecode,
             &self.program_io.memory_layout,
+            &self.opening_accumulator,
         );
+        let mut ram_hamming_booleanity =
+            HammingBooleanitySumcheckProver::initialize(ram_hamming_booleanity_params, &self.trace);
 
         let mut ram_ra_virtual = RamRaVirtualSumcheckProver::initialize(
             ram_ra_virtual_params,
@@ -1187,12 +1359,15 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
 
         #[cfg(feature = "allocative")]
         {
-            print_data_structure_heap_usage("BytecodeReadRafSumcheckProver", &bytecode_read_raf);
+            print_data_structure_heap_usage(
+                "BytecodeReadRafCycleSumcheckProver",
+                &bytecode_read_raf,
+            );
             print_data_structure_heap_usage(
                 "ram HammingBooleanitySumcheckProver",
                 &ram_hamming_booleanity,
             );
-            print_data_structure_heap_usage("BooleanitySumcheckProver", &booleanity);
+            print_data_structure_heap_usage("BooleanityCycleSumcheckProver", &booleanity);
             print_data_structure_heap_usage("RamRaSumcheckProver", &ram_ra_virtual);
             print_data_structure_heap_usage("LookupsRaSumcheckProver", &lookups_ra_virtual);
             print_data_structure_heap_usage("IncClaimReductionSumcheckProver", &inc_reduction);
@@ -1212,6 +1387,9 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
             &mut lookups_ra_virtual,
             &mut inc_reduction,
         ];
+        if let Some(bytecode) = self.bytecode_reduction_prover.as_mut() {
+            instances.push(bytecode);
+        }
         if let Some(advice) = self.advice_reduction_prover_trusted.as_mut() {
             instances.push(advice);
         }
@@ -1220,15 +1398,16 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
         }
 
         #[cfg(feature = "allocative")]
-        write_instance_flamegraph_svg(&instances, "stage6_start_flamechart.svg");
-        tracing::info!("Stage 6 proving");
-        let (sumcheck_proof, _r_stage6) = BatchedSumcheck::prove(
+        write_instance_flamegraph_svg(&instances, "stage6b_start_flamechart.svg");
+        tracing::info!("Stage 6b proving");
+
+        let (sumcheck_proof, _r_stage6b) = BatchedSumcheck::prove(
             instances.iter_mut().map(|v| &mut **v as _).collect(),
             &mut self.opening_accumulator,
             &mut self.transcript,
         );
         #[cfg(feature = "allocative")]
-        write_instance_flamegraph_svg(&instances, "stage6_end_flamechart.svg");
+        write_instance_flamegraph_svg(&instances, "stage6b_end_flamechart.svg");
         drop_in_background_thread(bytecode_read_raf);
         drop_in_background_thread(ram_hamming_booleanity);
         drop_in_background_thread(booleanity);
@@ -1253,6 +1432,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
             hw_params,
             &self.trace,
             &self.preprocessing.shared,
+            &self.preprocessing.bytecode,
             &self.one_hot_params,
         );
 
@@ -1260,10 +1440,15 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
         print_data_structure_heap_usage("HammingWeightClaimReductionProver", &hw_prover);
 
         // Run Stage 7 batched sumcheck (address rounds only).
-        // Includes HammingWeightClaimReduction plus address phase of advice reduction instances (if needed).
+        // Includes HammingWeightClaimReduction plus lane/address-phase reductions (if needed).
         let mut instances: Vec<Box<dyn SumcheckInstanceProver<F, ProofTranscript>>> =
             vec![Box::new(hw_prover)];
 
+        if let Some(mut bytecode_reduction_prover) = self.bytecode_reduction_prover.take() {
+            bytecode_reduction_prover.params.phase = BytecodeReductionPhase::LaneVariables;
+            instances.push(Box::new(bytecode_reduction_prover));
+        }
+
         if let Some(mut advice_reduction_prover_trusted) =
             self.advice_reduction_prover_trusted.take()
         {
@@ -1435,6 +1620,49 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
             ));
         }
 
+        // Bytecode chunk polynomials: committed in Bytecode context and embedded into the
+        // main opening point by fixing the extra cycle variables to 0.
+        if self.bytecode_mode == BytecodeMode::Committed {
+            let (bytecode_point, _) = self.opening_accumulator.get_committed_polynomial_opening(
+                CommittedPolynomial::BytecodeChunk(0),
+                SumcheckId::BytecodeClaimReduction,
+            );
+            let log_t = opening_point.r.len() - log_k_chunk;
+            let log_k = bytecode_point.r.len() - log_k_chunk;
+            assert!(
+                log_k <= log_t,
+                "bytecode folding requires log_T >= log_K (got log_T={log_t}, log_K={log_k})"
+            );
+            #[cfg(test)]
+            {
+                if log_k == log_t {
+                    assert_eq!(
+                        bytecode_point.r, opening_point.r,
+                        "BytecodeChunk opening point must equal unified opening point when log_K == log_T"
+                    );
+                } else {
+                    let (r_lane_main, r_cycle_main) = opening_point.split_at(log_k_chunk);
+                    let (r_lane_bc, r_cycle_bc) = bytecode_point.split_at(log_k_chunk);
+                    debug_assert_eq!(r_lane_main.r, r_lane_bc.r);
+                    debug_assert_eq!(&r_cycle_main.r[(log_t - log_k)..], r_cycle_bc.r.as_slice());
+                }
+            }
+            let lagrange_factor =
+                compute_advice_lagrange_factor::<F>(&opening_point.r, &bytecode_point.r);
+
+            let num_chunks = total_lanes().div_ceil(self.one_hot_params.k_chunk);
+            for i in 0..num_chunks {
+                let (_, claim) = self.opening_accumulator.get_committed_polynomial_opening(
+                    CommittedPolynomial::BytecodeChunk(i),
+                    SumcheckId::BytecodeClaimReduction,
+                );
+                polynomial_claims.push((
+                    CommittedPolynomial::BytecodeChunk(i),
+                    claim * lagrange_factor,
+                ));
+            }
+        }
+
         // 2. Sample gamma and compute powers for RLC
         let claims: Vec<F> = polynomial_claims.iter().map(|(_, c)| *c).collect();
         self.transcript.append_scalars(&claims);
@@ -1448,7 +1676,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme<Field = F>, ProofTranscrip
         };
 
         let streaming_data = Arc::new(RLCStreamingData {
-            bytecode: Arc::clone(&self.preprocessing.shared.bytecode),
+            bytecode: Arc::clone(&self.preprocessing.bytecode),
             memory_layout: self.preprocessing.shared.memory_layout.clone(),
         });
 
@@ -1519,6 +1747,17 @@ fn write_instance_flamegraph_svg(
 pub struct JoltProverPreprocessing<F: JoltField, PCS: CommitmentScheme<Field = F>> {
     pub generators: PCS::ProverSetup,
     pub shared: JoltSharedPreprocessing,
+    /// Full bytecode preprocessing (prover always has full access for witness computation).
+    pub bytecode: Arc<BytecodePreprocessing>,
+    /// Trusted bytecode commitments (only in Committed mode).
+    ///
+    /// In Full mode: None (verifier has full bytecode).
+    /// In Committed mode: Some(trusted) for bytecode chunk polynomial commitments.
+    pub bytecode_commitments: Option<TrustedBytecodeCommitments<PCS>>,
+    /// Opening proof hints for bytecode commitments, e.g., Dory tier-1 data (only in Committed mode).
+    ///
+    /// One hint per commitment in `bytecode_commitments`.
+    pub bytecode_commitment_hints: Option<Vec<PCS::OpeningProofHint>>,
 }
 
 impl<F, PCS> JoltProverPreprocessing<F, PCS>
@@ -1526,11 +1765,8 @@ where
     F: JoltField,
     PCS: CommitmentScheme<Field = F>,
 {
-    #[tracing::instrument(skip_all, name = "JoltProverPreprocessing::gen")]
-    pub fn new(
-        shared: JoltSharedPreprocessing,
-        // max_trace_length: usize,
-    ) -> JoltProverPreprocessing<F, PCS> {
+    /// Setup generators based on trace length (Main context).
+    fn setup_generators(shared: &JoltSharedPreprocessing) -> PCS::ProverSetup {
         use common::constants::ONEHOT_CHUNK_THRESHOLD_LOG_T;
         let max_T: usize = shared.max_padded_trace_length.next_power_of_two();
         let max_log_T = max_T.log_2();
@@ -1540,8 +1776,79 @@ where
         } else {
             8
         };
-        let generators = PCS::setup_prover(max_log_k_chunk + max_log_T);
-        JoltProverPreprocessing { generators, shared }
+        PCS::setup_prover(max_log_k_chunk + max_log_T)
+    }
+
+    /// Setup generators for Committed mode, ensuring capacity for both:
+    /// - Main context up to `max_padded_trace_length`
+    /// - Bytecode context up to `bytecode_size`
+    fn setup_generators_committed(shared: &JoltSharedPreprocessing) -> PCS::ProverSetup {
+        use common::constants::ONEHOT_CHUNK_THRESHOLD_LOG_T;
+        let max_t_any: usize = shared
+            .max_padded_trace_length
+            .max(shared.bytecode_size)
+            .next_power_of_two();
+        let max_log_t_any = max_t_any.log_2();
+        let max_log_k_chunk = if max_log_t_any < ONEHOT_CHUNK_THRESHOLD_LOG_T {
+            4
+        } else {
+            8
+        };
+        PCS::setup_prover(max_log_k_chunk + max_log_t_any)
+    }
+
+    /// Create prover preprocessing in Full mode (no bytecode commitments).
+    ///
+    /// Use this when the verifier will have access to full bytecode.
+    #[tracing::instrument(skip_all, name = "JoltProverPreprocessing::new")]
+    pub fn new(
+        shared: JoltSharedPreprocessing,
+        bytecode: Arc<BytecodePreprocessing>,
+    ) -> JoltProverPreprocessing<F, PCS> {
+        let generators = Self::setup_generators(&shared);
+        JoltProverPreprocessing {
+            generators,
+            shared,
+            bytecode,
+            bytecode_commitments: None,
+            bytecode_commitment_hints: None,
+        }
+    }
+
+    /// Create prover preprocessing in Committed mode (with bytecode commitments).
+    ///
+    /// Use this when the verifier should only receive bytecode commitments (succinct verification).
+    /// Computes commitments + hints for all bytecode chunk polynomials during preprocessing.
+    #[tracing::instrument(skip_all, name = "JoltProverPreprocessing::new_committed")]
+    pub fn new_committed(
+        shared: JoltSharedPreprocessing,
+        bytecode: Arc<BytecodePreprocessing>,
+    ) -> JoltProverPreprocessing<F, PCS> {
+        let generators = Self::setup_generators_committed(&shared);
+        let max_t_any: usize = shared
+            .max_padded_trace_length
+            .max(shared.bytecode_size)
+            .next_power_of_two();
+        let max_log_t = max_t_any.log_2();
+        let log_k_chunk = if max_log_t < common::constants::ONEHOT_CHUNK_THRESHOLD_LOG_T {
+            4
+        } else {
+            8
+        };
+        let (trusted_commitments, hints) =
+            TrustedBytecodeCommitments::derive(&bytecode, &generators, log_k_chunk, max_t_any);
+        JoltProverPreprocessing {
+            generators,
+            shared,
+            bytecode,
+            bytecode_commitments: Some(trusted_commitments),
+            bytecode_commitment_hints: Some(hints),
+        }
+    }
+
+    /// Check if this preprocessing is in Committed mode.
+    pub fn is_committed_mode(&self) -> bool {
+        self.bytecode_commitments.is_some()
     }
 
     pub fn save_to_target_dir(&self, target_dir: &str) -> std::io::Result<()> {
@@ -1566,891 +1873,3 @@ impl<F: JoltField, PCS: CommitmentScheme<Field = F>> Serializable
     for JoltProverPreprocessing<F, PCS>
 {
 }
-
-#[cfg(test)]
-mod tests {
-    use ark_bn254::Fr;
-    use serial_test::serial;
-
-    use crate::host;
-    use crate::poly::commitment::dory::{DoryGlobals, DoryLayout};
-    use crate::poly::{
-        commitment::{
-            commitment_scheme::CommitmentScheme,
-            dory::{DoryCommitmentScheme, DoryContext},
-        },
-        multilinear_polynomial::MultilinearPolynomial,
-        opening_proof::{OpeningAccumulator, SumcheckId},
-    };
-    use crate::zkvm::claim_reductions::AdviceKind;
-    use crate::zkvm::verifier::JoltSharedPreprocessing;
-    use crate::zkvm::witness::CommittedPolynomial;
-    use crate::zkvm::{
-        prover::JoltProverPreprocessing,
-        ram::populate_memory_states,
-        verifier::{JoltVerifier, JoltVerifierPreprocessing},
-        RV64IMACProver, RV64IMACVerifier,
-    };
-
-    fn commit_trusted_advice_preprocessing_only(
-        preprocessing: &JoltProverPreprocessing<Fr, DoryCommitmentScheme>,
-        trusted_advice_bytes: &[u8],
-    ) -> (
-        <DoryCommitmentScheme as CommitmentScheme>::Commitment,
-        <DoryCommitmentScheme as CommitmentScheme>::OpeningProofHint,
-    ) {
-        let max_trusted_advice_size = preprocessing.shared.memory_layout.max_trusted_advice_size;
-        let mut trusted_advice_words = vec![0u64; (max_trusted_advice_size as usize) / 8];
-        populate_memory_states(
-            0,
-            trusted_advice_bytes,
-            Some(&mut trusted_advice_words),
-            None,
-        );
-
-        let poly = MultilinearPolynomial::<Fr>::from(trusted_advice_words);
-        let advice_len = poly.len().next_power_of_two().max(1);
-
-        let _guard =
-            DoryGlobals::initialize_context(1, advice_len, DoryContext::TrustedAdvice, None);
-        let (commitment, hint) = {
-            let _ctx = DoryGlobals::with_context(DoryContext::TrustedAdvice);
-            DoryCommitmentScheme::commit(&poly, &preprocessing.generators)
-        };
-        (commitment, hint)
-    }
-
-    #[test]
-    #[serial]
-    fn fib_e2e_dory() {
-        DoryGlobals::reset();
-        let mut program = host::Program::new("fibonacci-guest");
-        let inputs = postcard::to_stdvec(&100u32).unwrap();
-        let (bytecode, init_memory_state, _) = program.decode();
-        let (_, _, _, io_device) = program.trace(&inputs, &[], &[]);
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            io_device.memory_layout.clone(),
-            init_memory_state,
-            1 << 16,
-        );
-
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-        let elf_contents_opt = program.get_elf_contents();
-        let elf_contents = elf_contents_opt.as_deref().expect("elf contents is None");
-        let prover = RV64IMACProver::gen_from_elf(
-            &prover_preprocessing,
-            elf_contents,
-            &inputs,
-            &[],
-            &[],
-            None,
-            None,
-        );
-        let io_device = prover.program_io.clone();
-        let (jolt_proof, debug_info) = prover.prove();
-
-        let verifier_preprocessing = JoltVerifierPreprocessing::new(
-            shared_preprocessing,
-            prover_preprocessing.generators.to_verifier_setup(),
-        );
-        let verifier = RV64IMACVerifier::new(
-            &verifier_preprocessing,
-            jolt_proof,
-            io_device,
-            None,
-            debug_info,
-        )
-        .expect("Failed to create verifier");
-        verifier.verify().expect("Failed to verify proof");
-    }
-
-    #[test]
-    #[serial]
-    fn small_trace_e2e_dory() {
-        DoryGlobals::reset();
-        let mut program = host::Program::new("fibonacci-guest");
-        let inputs = postcard::to_stdvec(&5u32).unwrap();
-        let (bytecode, init_memory_state, _) = program.decode();
-        let (_, _, _, io_device) = program.trace(&inputs, &[], &[]);
-
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            io_device.memory_layout.clone(),
-            init_memory_state,
-            256,
-        );
-
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-        let elf_contents_opt = program.get_elf_contents();
-        let elf_contents = elf_contents_opt.as_deref().expect("elf contents is None");
-        let log_chunk = 8; // Use default log_chunk for tests
-        let prover = RV64IMACProver::gen_from_elf(
-            &prover_preprocessing,
-            elf_contents,
-            &inputs,
-            &[],
-            &[],
-            None,
-            None,
-        );
-
-        assert!(
-            prover.padded_trace_len <= (1 << log_chunk),
-            "Test requires T <= chunk_size ({}), got T = {}",
-            1 << log_chunk,
-            prover.padded_trace_len
-        );
-
-        let io_device = prover.program_io.clone();
-        let (jolt_proof, debug_info) = prover.prove();
-
-        let verifier_preprocessing = JoltVerifierPreprocessing::new(
-            prover_preprocessing.shared.clone(),
-            prover_preprocessing.generators.to_verifier_setup(),
-        );
-        let verifier = RV64IMACVerifier::new(
-            &verifier_preprocessing,
-            jolt_proof,
-            io_device,
-            None,
-            debug_info,
-        )
-        .expect("Failed to create verifier");
-        verifier.verify().expect("Failed to verify proof");
-    }
-
-    #[test]
-    #[serial]
-    fn sha3_e2e_dory() {
-        DoryGlobals::reset();
-        // Ensure SHA3 inline library is linked and auto-registered
-        #[cfg(feature = "host")]
-        use jolt_inlines_keccak256 as _;
-        // SHA3 inlines are automatically registered via #[ctor::ctor]
-        // when the jolt-inlines-keccak256 crate is linked (see lib.rs)
-
-        let mut program = host::Program::new("sha3-guest");
-        let (bytecode, init_memory_state, _) = program.decode();
-        let inputs = postcard::to_stdvec(&[5u8; 32]).unwrap();
-        let (_, _, _, io_device) = program.trace(&inputs, &[], &[]);
-
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            io_device.memory_layout.clone(),
-            init_memory_state,
-            1 << 16,
-        );
-
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-        let elf_contents_opt = program.get_elf_contents();
-        let elf_contents = elf_contents_opt.as_deref().expect("elf contents is None");
-        let prover = RV64IMACProver::gen_from_elf(
-            &prover_preprocessing,
-            elf_contents,
-            &inputs,
-            &[],
-            &[],
-            None,
-            None,
-        );
-        let io_device = prover.program_io.clone();
-        let (jolt_proof, debug_info) = prover.prove();
-
-        let verifier_preprocessing = JoltVerifierPreprocessing::new(
-            prover_preprocessing.shared.clone(),
-            prover_preprocessing.generators.to_verifier_setup(),
-        );
-        let verifier = RV64IMACVerifier::new(
-            &verifier_preprocessing,
-            jolt_proof,
-            io_device.clone(),
-            None,
-            debug_info,
-        )
-        .expect("Failed to create verifier");
-        verifier.verify().expect("Failed to verify proof");
-        assert_eq!(
-            io_device.inputs, inputs,
-            "Inputs mismatch: expected {:?}, got {:?}",
-            inputs, io_device.inputs
-        );
-        let expected_output = &[
-            0xd0, 0x3, 0x5c, 0x96, 0x86, 0x6e, 0xe2, 0x2e, 0x81, 0xf5, 0xc4, 0xef, 0xbd, 0x88,
-            0x33, 0xc1, 0x7e, 0xa1, 0x61, 0x10, 0x81, 0xfc, 0xd7, 0xa3, 0xdd, 0xce, 0xce, 0x7f,
-            0x44, 0x72, 0x4, 0x66,
-        ];
-        assert_eq!(io_device.outputs, expected_output, "Outputs mismatch",);
-    }
-
-    #[test]
-    #[serial]
-    fn sha2_e2e_dory() {
-        DoryGlobals::reset();
-        // Ensure SHA2 inline library is linked and auto-registered
-        #[cfg(feature = "host")]
-        use jolt_inlines_sha2 as _;
-        // SHA2 inlines are automatically registered via #[ctor::ctor]
-        // when the jolt-inlines-sha2 crate is linked (see lib.rs)
-        let mut program = host::Program::new("sha2-guest");
-        let (bytecode, init_memory_state, _) = program.decode();
-        let inputs = postcard::to_stdvec(&[5u8; 32]).unwrap();
-        let (_, _, _, io_device) = program.trace(&inputs, &[], &[]);
-
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            io_device.memory_layout.clone(),
-            init_memory_state,
-            1 << 16,
-        );
-
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-        let elf_contents_opt = program.get_elf_contents();
-        let elf_contents = elf_contents_opt.as_deref().expect("elf contents is None");
-        let prover = RV64IMACProver::gen_from_elf(
-            &prover_preprocessing,
-            elf_contents,
-            &inputs,
-            &[],
-            &[],
-            None,
-            None,
-        );
-        let io_device = prover.program_io.clone();
-        let (jolt_proof, debug_info) = prover.prove();
-
-        let verifier_preprocessing = JoltVerifierPreprocessing::new(
-            prover_preprocessing.shared.clone(),
-            prover_preprocessing.generators.to_verifier_setup(),
-        );
-        let verifier = RV64IMACVerifier::new(
-            &verifier_preprocessing,
-            jolt_proof,
-            io_device.clone(),
-            None,
-            debug_info,
-        )
-        .expect("Failed to create verifier");
-        verifier.verify().expect("Failed to verify proof");
-        let expected_output = &[
-            0x28, 0x9b, 0xdf, 0x82, 0x9b, 0x4a, 0x30, 0x26, 0x7, 0x9a, 0x3e, 0xa0, 0x89, 0x73,
-            0xb1, 0x97, 0x2d, 0x12, 0x4e, 0x7e, 0xaf, 0x22, 0x33, 0xc6, 0x3, 0x14, 0x3d, 0xc6,
-            0x3b, 0x50, 0xd2, 0x57,
-        ];
-        assert_eq!(
-            io_device.outputs, expected_output,
-            "Outputs mismatch: expected {:?}, got {:?}",
-            expected_output, io_device.outputs
-        );
-    }
-
-    #[test]
-    #[serial]
-    fn sha2_e2e_dory_with_unused_advice() {
-        DoryGlobals::reset();
-        // SHA2 guest does not consume advice, but providing both trusted and untrusted advice
-        // should still work correctly through the full pipeline:
-        // - Trusted: commit in preprocessing-only context, reduce in Stage 6, batch in Stage 8
-        // - Untrusted: commit at prove time, reduce in Stage 6, batch in Stage 8
-        let mut program = host::Program::new("sha2-guest");
-        let (bytecode, init_memory_state, _) = program.decode();
-        let inputs = postcard::to_stdvec(&[5u8; 32]).unwrap();
-        let trusted_advice = postcard::to_stdvec(&[7u8; 32]).unwrap();
-        let untrusted_advice = postcard::to_stdvec(&[9u8; 32]).unwrap();
-
-        let (_, _, _, io_device) = program.trace(&inputs, &untrusted_advice, &trusted_advice);
-
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            io_device.memory_layout.clone(),
-            init_memory_state,
-            1 << 16,
-        );
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-        let elf_contents = program.get_elf_contents().expect("elf contents is None");
-
-        let (trusted_commitment, trusted_hint) =
-            commit_trusted_advice_preprocessing_only(&prover_preprocessing, &trusted_advice);
-
-        let prover = RV64IMACProver::gen_from_elf(
-            &prover_preprocessing,
-            &elf_contents,
-            &inputs,
-            &untrusted_advice,
-            &trusted_advice,
-            Some(trusted_commitment),
-            Some(trusted_hint),
-        );
-        let io_device = prover.program_io.clone();
-        let (jolt_proof, debug_info) = prover.prove();
-
-        let verifier_preprocessing = JoltVerifierPreprocessing::from(&prover_preprocessing);
-        RV64IMACVerifier::new(
-            &verifier_preprocessing,
-            jolt_proof,
-            io_device.clone(),
-            Some(trusted_commitment),
-            debug_info,
-        )
-        .expect("Failed to create verifier")
-        .verify()
-        .expect("Failed to verify proof");
-
-        // Verify output is correct (advice should not affect sha2 output)
-        let expected_output = &[
-            0x28, 0x9b, 0xdf, 0x82, 0x9b, 0x4a, 0x30, 0x26, 0x7, 0x9a, 0x3e, 0xa0, 0x89, 0x73,
-            0xb1, 0x97, 0x2d, 0x12, 0x4e, 0x7e, 0xaf, 0x22, 0x33, 0xc6, 0x3, 0x14, 0x3d, 0xc6,
-            0x3b, 0x50, 0xd2, 0x57,
-        ];
-        assert_eq!(io_device.outputs, expected_output);
-    }
-
-    #[test]
-    #[serial]
-    fn max_advice_with_small_trace() {
-        DoryGlobals::reset();
-        // Tests that max-sized advice (4KB = 512 words) works with a minimal trace.
-        // With balanced dims (sigma_a=5, nu_a=4 for 512 words), the minimum padded trace
-        // (256 cycles -> total_vars=12) is sufficient to embed advice.
-        let mut program = host::Program::new("fibonacci-guest");
-        let inputs = postcard::to_stdvec(&5u32).unwrap();
-        let trusted_advice = vec![7u8; 4096];
-        let untrusted_advice = vec![9u8; 4096];
-
-        let (bytecode, init_memory_state, _) = program.decode();
-        let (lazy_trace, trace, final_memory_state, io_device) =
-            program.trace(&inputs, &untrusted_advice, &trusted_advice);
-
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            io_device.memory_layout.clone(),
-            init_memory_state,
-            256,
-        );
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-        tracing::info!(
-            "preprocessing.memory_layout.max_trusted_advice_size: {}",
-            shared_preprocessing.memory_layout.max_trusted_advice_size
-        );
-
-        let (trusted_commitment, trusted_hint) =
-            commit_trusted_advice_preprocessing_only(&prover_preprocessing, &trusted_advice);
-
-        let prover = RV64IMACProver::gen_from_trace(
-            &prover_preprocessing,
-            lazy_trace,
-            trace,
-            io_device,
-            Some(trusted_commitment),
-            Some(trusted_hint),
-            final_memory_state,
-        );
-
-        // Trace is tiny but advice is max-sized
-        assert!(prover.unpadded_trace_len < 512);
-        assert_eq!(prover.padded_trace_len, 256);
-
-        let io_device = prover.program_io.clone();
-        let (jolt_proof, debug_info) = prover.prove();
-
-        let verifier_preprocessing = JoltVerifierPreprocessing::from(&prover_preprocessing);
-        RV64IMACVerifier::new(
-            &verifier_preprocessing,
-            jolt_proof,
-            io_device,
-            Some(trusted_commitment),
-            debug_info,
-        )
-        .expect("Failed to create verifier")
-        .verify()
-        .expect("Verification failed");
-    }
-
-    #[test]
-    #[serial]
-    fn advice_e2e_dory() {
-        DoryGlobals::reset();
-        // Tests a guest (merkle-tree) that actually consumes both trusted and untrusted advice.
-        let mut program = host::Program::new("merkle-tree-guest");
-        let (bytecode, init_memory_state, _) = program.decode();
-
-        // Merkle tree with 4 leaves: input=leaf1, trusted=[leaf2, leaf3], untrusted=leaf4
-        let inputs = postcard::to_stdvec(&[5u8; 32].as_slice()).unwrap();
-        let untrusted_advice = postcard::to_stdvec(&[8u8; 32]).unwrap();
-        let mut trusted_advice = postcard::to_stdvec(&[6u8; 32]).unwrap();
-        trusted_advice.extend(postcard::to_stdvec(&[7u8; 32]).unwrap());
-
-        let (_, _, _, io_device) = program.trace(&inputs, &untrusted_advice, &trusted_advice);
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            io_device.memory_layout.clone(),
-            init_memory_state,
-            1 << 16,
-        );
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-        let elf_contents = program.get_elf_contents().expect("elf contents is None");
-
-        let (trusted_commitment, trusted_hint) =
-            commit_trusted_advice_preprocessing_only(&prover_preprocessing, &trusted_advice);
-
-        let prover = RV64IMACProver::gen_from_elf(
-            &prover_preprocessing,
-            &elf_contents,
-            &inputs,
-            &untrusted_advice,
-            &trusted_advice,
-            Some(trusted_commitment),
-            Some(trusted_hint),
-        );
-        let io_device = prover.program_io.clone();
-        let (jolt_proof, debug_info) = prover.prove();
-
-        let verifier_preprocessing = JoltVerifierPreprocessing::from(&prover_preprocessing);
-        RV64IMACVerifier::new(
-            &verifier_preprocessing,
-            jolt_proof,
-            io_device.clone(),
-            Some(trusted_commitment),
-            debug_info,
-        )
-        .expect("Failed to create verifier")
-        .verify()
-        .expect("Verification failed");
-
-        // Expected merkle root for leaves [5;32], [6;32], [7;32], [8;32]
-        let expected_output = &[
-            0xb4, 0x37, 0x0f, 0x3a, 0xb, 0x3d, 0x38, 0xa8, 0x7a, 0x6c, 0x4c, 0x46, 0x9, 0xe7, 0x83,
-            0xb3, 0xcc, 0xb7, 0x1c, 0x30, 0x1f, 0xf8, 0x54, 0xd, 0xf7, 0xdd, 0xc8, 0x42, 0x32,
-            0xbb, 0x16, 0xd7,
-        ];
-        assert_eq!(io_device.outputs, expected_output);
-    }
-
-    #[test]
-    #[serial]
-    fn advice_opening_point_derives_from_unified_point() {
-        DoryGlobals::reset();
-        // Tests that advice opening points are correctly derived from the unified main opening
-        // point using Dory's balanced dimension policy.
-        //
-        // For a small trace (256 cycles), the advice row coordinates span both Stage 6 (cycle)
-        // and Stage 7 (address) challenges, verifying the two-phase reduction works correctly.
-        let mut program = host::Program::new("fibonacci-guest");
-        let inputs = postcard::to_stdvec(&5u32).unwrap();
-        let trusted_advice = postcard::to_stdvec(&[7u8; 32]).unwrap();
-        let untrusted_advice = postcard::to_stdvec(&[9u8; 32]).unwrap();
-
-        let (bytecode, init_memory_state, _) = program.decode();
-        let (lazy_trace, trace, final_memory_state, io_device) =
-            program.trace(&inputs, &untrusted_advice, &trusted_advice);
-
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            io_device.memory_layout.clone(),
-            init_memory_state,
-            1 << 16,
-        );
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-        let (trusted_commitment, trusted_hint) =
-            commit_trusted_advice_preprocessing_only(&prover_preprocessing, &trusted_advice);
-
-        let prover = RV64IMACProver::gen_from_trace(
-            &prover_preprocessing,
-            lazy_trace,
-            trace,
-            io_device,
-            Some(trusted_commitment),
-            Some(trusted_hint),
-            final_memory_state,
-        );
-
-        assert_eq!(prover.padded_trace_len, 256, "test expects small trace");
-
-        let io_device = prover.program_io.clone();
-        let (jolt_proof, debug_info) = prover.prove();
-        let debug_info = debug_info.expect("expected debug_info in tests");
-
-        // Get unified opening point and derive expected advice point
-        let (opening_point, _) = debug_info
-            .opening_accumulator
-            .get_committed_polynomial_opening(
-                CommittedPolynomial::InstructionRa(0),
-                SumcheckId::HammingWeightClaimReduction,
-            );
-        let mut point_dory_le = opening_point.r.clone();
-        point_dory_le.reverse();
-
-        let total_vars = point_dory_le.len();
-        let (sigma_main, _nu_main) = DoryGlobals::balanced_sigma_nu(total_vars);
-        let (sigma_a, nu_a) = DoryGlobals::advice_sigma_nu_from_max_bytes(
-            prover_preprocessing
-                .shared
-                .memory_layout
-                .max_trusted_advice_size as usize,
-        );
-
-        // Build expected advice point: [col_bits[0..sigma_a] || row_bits[0..nu_a]]
-        let mut expected_advice_le: Vec<_> = point_dory_le[0..sigma_a].to_vec();
-        expected_advice_le.extend_from_slice(&point_dory_le[sigma_main..sigma_main + nu_a]);
-
-        // Verify both advice types derive the same opening point
-        for (name, kind) in [
-            ("trusted", AdviceKind::Trusted),
-            ("untrusted", AdviceKind::Untrusted),
-        ] {
-            let get_fn = debug_info
-                .opening_accumulator
-                .get_advice_opening(kind, SumcheckId::AdviceClaimReduction);
-            assert!(
-                get_fn.is_some(),
-                "{name} advice opening missing for AdviceClaimReductionPhase2"
-            );
-            let (point_be, _) = get_fn.unwrap();
-            let mut point_le = point_be.r.clone();
-            point_le.reverse();
-            assert_eq!(point_le, expected_advice_le, "{name} advice point mismatch");
-        }
-
-        // Verify end-to-end
-        let verifier_preprocessing = JoltVerifierPreprocessing::from(&prover_preprocessing);
-        RV64IMACVerifier::new(
-            &verifier_preprocessing,
-            jolt_proof,
-            io_device,
-            Some(trusted_commitment),
-            Some(debug_info),
-        )
-        .expect("Failed to create verifier")
-        .verify()
-        .expect("Verification failed");
-    }
-
-    #[test]
-    #[serial]
-    fn memory_ops_e2e_dory() {
-        DoryGlobals::reset();
-        let mut program = host::Program::new("memory-ops-guest");
-        let (bytecode, init_memory_state, _) = program.decode();
-        let (_, _, _, io_device) = program.trace(&[], &[], &[]);
-
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            io_device.memory_layout.clone(),
-            init_memory_state,
-            1 << 16,
-        );
-
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-        let elf_contents_opt = program.get_elf_contents();
-        let elf_contents = elf_contents_opt.as_deref().expect("elf contents is None");
-        let prover = RV64IMACProver::gen_from_elf(
-            &prover_preprocessing,
-            elf_contents,
-            &[],
-            &[],
-            &[],
-            None,
-            None,
-        );
-        let io_device = prover.program_io.clone();
-        let (jolt_proof, debug_info) = prover.prove();
-
-        let verifier_preprocessing = JoltVerifierPreprocessing::new(
-            prover_preprocessing.shared.clone(),
-            prover_preprocessing.generators.to_verifier_setup(),
-        );
-        let verifier = RV64IMACVerifier::new(
-            &verifier_preprocessing,
-            jolt_proof,
-            io_device,
-            None,
-            debug_info,
-        )
-        .expect("Failed to create verifier");
-        verifier.verify().expect("Failed to verify proof");
-    }
-
-    #[test]
-    #[serial]
-    fn btreemap_e2e_dory() {
-        DoryGlobals::reset();
-        let mut program = host::Program::new("btreemap-guest");
-        let (bytecode, init_memory_state, _) = program.decode();
-        let inputs = postcard::to_stdvec(&50u32).unwrap();
-        let (_, _, _, io_device) = program.trace(&inputs, &[], &[]);
-
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            io_device.memory_layout.clone(),
-            init_memory_state,
-            1 << 16,
-        );
-
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-        let elf_contents_opt = program.get_elf_contents();
-        let elf_contents = elf_contents_opt.as_deref().expect("elf contents is None");
-        let prover = RV64IMACProver::gen_from_elf(
-            &prover_preprocessing,
-            elf_contents,
-            &inputs,
-            &[],
-            &[],
-            None,
-            None,
-        );
-        let io_device = prover.program_io.clone();
-        let (jolt_proof, debug_info) = prover.prove();
-
-        let verifier_preprocessing = JoltVerifierPreprocessing::new(
-            prover_preprocessing.shared.clone(),
-            prover_preprocessing.generators.to_verifier_setup(),
-        );
-        let verifier = RV64IMACVerifier::new(
-            &verifier_preprocessing,
-            jolt_proof,
-            io_device,
-            None,
-            debug_info,
-        )
-        .expect("Failed to create verifier");
-        verifier.verify().expect("Failed to verify proof");
-    }
-
-    #[test]
-    #[serial]
-    fn muldiv_e2e_dory() {
-        DoryGlobals::reset();
-        let mut program = host::Program::new("muldiv-guest");
-        let (bytecode, init_memory_state, _) = program.decode();
-        let inputs = postcard::to_stdvec(&[9u32, 5u32, 3u32]).unwrap();
-        let (_, _, _, io_device) = program.trace(&inputs, &[], &[]);
-
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            io_device.memory_layout.clone(),
-            init_memory_state,
-            1 << 16,
-        );
-
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-        let elf_contents_opt = program.get_elf_contents();
-        let elf_contents = elf_contents_opt.as_deref().expect("elf contents is None");
-        let prover = RV64IMACProver::gen_from_elf(
-            &prover_preprocessing,
-            elf_contents,
-            &[50],
-            &[],
-            &[],
-            None,
-            None,
-        );
-        let io_device = prover.program_io.clone();
-        let (jolt_proof, debug_info) = prover.prove();
-
-        let verifier_preprocessing = JoltVerifierPreprocessing::new(
-            prover_preprocessing.shared.clone(),
-            prover_preprocessing.generators.to_verifier_setup(),
-        );
-        let verifier = RV64IMACVerifier::new(
-            &verifier_preprocessing,
-            jolt_proof,
-            io_device,
-            None,
-            debug_info,
-        )
-        .expect("Failed to create verifier");
-        verifier.verify().expect("Failed to verify proof");
-    }
-
-    #[test]
-    #[serial]
-    #[should_panic]
-    fn truncated_trace() {
-        let mut program = host::Program::new("fibonacci-guest");
-        let (bytecode, init_memory_state, _) = program.decode();
-        let inputs = postcard::to_stdvec(&9u8).unwrap();
-        let (lazy_trace, mut trace, final_memory_state, mut program_io) =
-            program.trace(&inputs, &[], &[]);
-        trace.truncate(100);
-        program_io.outputs[0] = 0; // change the output to 0
-
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            program_io.memory_layout.clone(),
-            init_memory_state,
-            1 << 16,
-        );
-
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-
-        let prover = RV64IMACProver::gen_from_trace(
-            &prover_preprocessing,
-            lazy_trace,
-            trace,
-            program_io.clone(),
-            None,
-            None,
-            final_memory_state,
-        );
-
-        let (proof, _) = prover.prove();
-
-        let verifier_preprocessing = JoltVerifierPreprocessing::new(
-            prover_preprocessing.shared.clone(),
-            prover_preprocessing.generators.to_verifier_setup(),
-        );
-        let verifier =
-            RV64IMACVerifier::new(&verifier_preprocessing, proof, program_io, None, None).unwrap();
-        verifier.verify().unwrap();
-    }
-
-    #[test]
-    #[serial]
-    #[should_panic]
-    fn malicious_trace() {
-        let mut program = host::Program::new("fibonacci-guest");
-        let inputs = postcard::to_stdvec(&1u8).unwrap();
-        let (bytecode, init_memory_state, _) = program.decode();
-        let (lazy_trace, trace, final_memory_state, mut program_io) =
-            program.trace(&inputs, &[], &[]);
-
-        // Since the preprocessing is done with the original memory layout, the verifier should fail
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            program_io.memory_layout.clone(),
-            init_memory_state,
-            1 << 16,
-        );
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-
-        // change memory address of output & termination bit to the same address as input
-        // changes here should not be able to spoof the verifier result
-        program_io.memory_layout.output_start = program_io.memory_layout.input_start;
-        program_io.memory_layout.output_end = program_io.memory_layout.input_end;
-        program_io.memory_layout.termination = program_io.memory_layout.input_start;
-
-        let prover = RV64IMACProver::gen_from_trace(
-            &prover_preprocessing,
-            lazy_trace,
-            trace,
-            program_io.clone(),
-            None,
-            None,
-            final_memory_state,
-        );
-        let (proof, _) = prover.prove();
-
-        let verifier_preprocessing = JoltVerifierPreprocessing::new(
-            prover_preprocessing.shared.clone(),
-            prover_preprocessing.generators.to_verifier_setup(),
-        );
-        let verifier =
-            JoltVerifier::new(&verifier_preprocessing, proof, program_io, None, None).unwrap();
-        verifier.verify().unwrap();
-    }
-
-    #[test]
-    #[serial]
-    fn fib_e2e_dory_address_major() {
-        DoryGlobals::reset();
-        DoryGlobals::set_layout(DoryLayout::AddressMajor);
-
-        let mut program = host::Program::new("fibonacci-guest");
-        let inputs = postcard::to_stdvec(&50u32).unwrap();
-        let (bytecode, init_memory_state, _) = program.decode();
-        let (_, _, _, io_device) = program.trace(&inputs, &[], &[]);
-
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            io_device.memory_layout.clone(),
-            init_memory_state,
-            1 << 16,
-        );
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-        let elf_contents = program.get_elf_contents().expect("elf contents is None");
-        let prover = RV64IMACProver::gen_from_elf(
-            &prover_preprocessing,
-            &elf_contents,
-            &inputs,
-            &[],
-            &[],
-            None,
-            None,
-        );
-        let io_device = prover.program_io.clone();
-        let (proof, debug_info) = prover.prove();
-
-        let verifier_preprocessing = JoltVerifierPreprocessing::new(
-            shared_preprocessing,
-            prover_preprocessing.generators.to_verifier_setup(),
-        );
-
-        // DoryGlobals is now initialized inside the verifier's verify_stage8
-        RV64IMACVerifier::new(&verifier_preprocessing, proof, io_device, None, debug_info)
-            .expect("verifier creation failed")
-            .verify()
-            .expect("verification failed");
-    }
-
-    #[test]
-    #[serial]
-    fn advice_e2e_dory_address_major() {
-        DoryGlobals::reset();
-        DoryGlobals::set_layout(DoryLayout::AddressMajor);
-
-        // Tests a guest (merkle-tree) that actually consumes both trusted and untrusted advice.
-        let mut program = host::Program::new("merkle-tree-guest");
-        let (bytecode, init_memory_state, _) = program.decode();
-
-        // Merkle tree with 4 leaves: input=leaf1, trusted=[leaf2, leaf3], untrusted=leaf4
-        let inputs = postcard::to_stdvec(&[5u8; 32].as_slice()).unwrap();
-        let untrusted_advice = postcard::to_stdvec(&[8u8; 32]).unwrap();
-        let mut trusted_advice = postcard::to_stdvec(&[6u8; 32]).unwrap();
-        trusted_advice.extend(postcard::to_stdvec(&[7u8; 32]).unwrap());
-
-        let (_, _, _, io_device) = program.trace(&inputs, &untrusted_advice, &trusted_advice);
-        let shared_preprocessing = JoltSharedPreprocessing::new(
-            bytecode.clone(),
-            io_device.memory_layout.clone(),
-            init_memory_state,
-            1 << 16,
-        );
-        let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone());
-        let elf_contents = program.get_elf_contents().expect("elf contents is None");
-
-        let (trusted_commitment, trusted_hint) =
-            commit_trusted_advice_preprocessing_only(&prover_preprocessing, &trusted_advice);
-
-        let prover = RV64IMACProver::gen_from_elf(
-            &prover_preprocessing,
-            &elf_contents,
-            &inputs,
-            &untrusted_advice,
-            &trusted_advice,
-            Some(trusted_commitment),
-            Some(trusted_hint),
-        );
-        let io_device = prover.program_io.clone();
-        let (jolt_proof, debug_info) = prover.prove();
-
-        let verifier_preprocessing = JoltVerifierPreprocessing::from(&prover_preprocessing);
-        RV64IMACVerifier::new(
-            &verifier_preprocessing,
-            jolt_proof,
-            io_device.clone(),
-            Some(trusted_commitment),
-            debug_info,
-        )
-        .expect("Failed to create verifier")
-        .verify()
-        .expect("Verification failed");
-
-        // Expected merkle root for leaves [5;32], [6;32], [7;32], [8;32]
-        let expected_output = &[
-            0xb4, 0x37, 0x0f, 0x3a, 0xb, 0x3d, 0x38, 0xa8, 0x7a, 0x6c, 0x4c, 0x46, 0x9, 0xe7, 0x83,
-            0xb3, 0xcc, 0xb7, 0x1c, 0x30, 0x1f, 0xf8, 0x54, 0xd, 0xf7, 0xdd, 0xc8, 0x42, 0x32,
-            0xbb, 0x16, 0xd7,
-        ];
-        assert_eq!(io_device.outputs, expected_output);
-    }
-}
diff --git a/jolt-core/src/zkvm/tests.rs b/jolt-core/src/zkvm/tests.rs
new file mode 100644
index 0000000000..1f165b9584
--- /dev/null
+++ b/jolt-core/src/zkvm/tests.rs
@@ -0,0 +1,792 @@
+//! End-to-end test infrastructure for Jolt ZKVM.
+//!
+//! This module provides a unified test runner that reduces boilerplate across e2e tests.
+//! Tests can be configured via `E2ETestConfig` to vary:
+//! - Program (fibonacci, sha2, etc.)
+//! - BytecodeMode (Full vs Committed)
+//! - DoryLayout (CycleMajor vs AddressMajor)
+//! - Trace size
+//! - Advice (trusted/untrusted)
+
+use std::sync::Arc;
+
+use ark_bn254::Fr;
+use serial_test::serial;
+
+use crate::host;
+use crate::poly::commitment::commitment_scheme::CommitmentScheme;
+use crate::poly::commitment::dory::{DoryCommitmentScheme, DoryContext, DoryGlobals, DoryLayout};
+use crate::poly::multilinear_polynomial::MultilinearPolynomial;
+use crate::poly::opening_proof::{OpeningAccumulator, SumcheckId};
+use crate::zkvm::bytecode::chunks::total_lanes;
+use crate::zkvm::bytecode::BytecodePreprocessing;
+use crate::zkvm::claim_reductions::AdviceKind;
+use crate::zkvm::config::BytecodeMode;
+use crate::zkvm::prover::JoltProverPreprocessing;
+use crate::zkvm::ram::populate_memory_states;
+use crate::zkvm::verifier::{JoltSharedPreprocessing, JoltVerifier, JoltVerifierPreprocessing};
+use crate::zkvm::witness::CommittedPolynomial;
+use crate::zkvm::{RV64IMACProver, RV64IMACVerifier};
+
+/// Configuration for an end-to-end test.
+#[derive(Clone)]
+pub struct E2ETestConfig {
+    /// Guest program name (e.g., "fibonacci-guest", "sha2-guest")
+    pub program_name: &'static str,
+    /// Serialized inputs to pass to the guest
+    pub inputs: Vec<u8>,
+    /// Maximum padded trace length (must be power of 2)
+    pub max_trace_length: usize,
+    /// Whether to use Committed bytecode mode (vs Full)
+    pub committed_bytecode: bool,
+    /// Dory layout override (None = use default CycleMajor)
+    pub dory_layout: Option<DoryLayout>,
+    /// Trusted advice bytes
+    pub trusted_advice: Vec<u8>,
+    /// Untrusted advice bytes
+    pub untrusted_advice: Vec<u8>,
+    /// Expected output bytes (None = don't verify output)
+    pub expected_output: Option<Vec<u8>>,
+}
+
+impl Default for E2ETestConfig {
+    fn default() -> Self {
+        Self {
+            program_name: "fibonacci-guest",
+            inputs: postcard::to_stdvec(&100u32).unwrap(),
+            max_trace_length: 1 << 16,
+            committed_bytecode: false,
+            dory_layout: None,
+            trusted_advice: vec![],
+            untrusted_advice: vec![],
+            expected_output: None,
+        }
+    }
+}
+
+impl E2ETestConfig {
+    // ========================================================================
+    // Program Constructors
+    // ========================================================================
+
+    /// Create config for fibonacci with custom input.
+    pub fn fibonacci(n: u32) -> Self {
+        Self {
+            inputs: postcard::to_stdvec(&n).unwrap(),
+            ..Default::default()
+        }
+    }
+
+    /// Create config for sha2 (with default 32-byte input).
+    pub fn sha2() -> Self {
+        Self {
+            program_name: "sha2-guest",
+            inputs: postcard::to_stdvec(&[5u8; 32]).unwrap(),
+            expected_output: Some(vec![
+                0x28, 0x9b, 0xdf, 0x82, 0x9b, 0x4a, 0x30, 0x26, 0x7, 0x9a, 0x3e, 0xa0, 0x89, 0x73,
+                0xb1, 0x97, 0x2d, 0x12, 0x4e, 0x7e, 0xaf, 0x22, 0x33, 0xc6, 0x3, 0x14, 0x3d, 0xc6,
+                0x3b, 0x50, 0xd2, 0x57,
+            ]),
+            ..Default::default()
+        }
+    }
+
+    /// Create config for sha3 (with default 32-byte input).
+    pub fn sha3() -> Self {
+        Self {
+            program_name: "sha3-guest",
+            inputs: postcard::to_stdvec(&[5u8; 32]).unwrap(),
+            expected_output: Some(vec![
+                0xd0, 0x3, 0x5c, 0x96, 0x86, 0x6e, 0xe2, 0x2e, 0x81, 0xf5, 0xc4, 0xef, 0xbd, 0x88,
+                0x33, 0xc1, 0x7e, 0xa1, 0x61, 0x10, 0x81, 0xfc, 0xd7, 0xa3, 0xdd, 0xce, 0xce, 0x7f,
+                0x44, 0x72, 0x4, 0x66,
+            ]),
+            ..Default::default()
+        }
+    }
+
+    /// Create config for merkle-tree guest.
+    /// Default: 4 leaves with input=[5;32], trusted=[6;32,7;32], untrusted=[8;32]
+    pub fn merkle_tree() -> Self {
+        let inputs = postcard::to_stdvec(&[5u8; 32].as_slice()).unwrap();
+        let untrusted_advice = postcard::to_stdvec(&[8u8; 32]).unwrap();
+        let mut trusted_advice = postcard::to_stdvec(&[6u8; 32]).unwrap();
+        trusted_advice.extend(postcard::to_stdvec(&[7u8; 32]).unwrap());
+
+        Self {
+            program_name: "merkle-tree-guest",
+            inputs,
+            trusted_advice,
+            untrusted_advice,
+            expected_output: Some(vec![
+                0xb4, 0x37, 0x0f, 0x3a, 0xb, 0x3d, 0x38, 0xa8, 0x7a, 0x6c, 0x4c, 0x46, 0x9, 0xe7,
+                0x83, 0xb3, 0xcc, 0xb7, 0x1c, 0x30, 0x1f, 0xf8, 0x54, 0xd, 0xf7, 0xdd, 0xc8, 0x42,
+                0x32, 0xbb, 0x16, 0xd7,
+            ]),
+            ..Default::default()
+        }
+    }
+
+    /// Create config for memory-ops guest (no inputs).
+    pub fn memory_ops() -> Self {
+        Self {
+            program_name: "memory-ops-guest",
+            inputs: vec![],
+            ..Default::default()
+        }
+    }
+
+    /// Create config for btreemap guest.
+    pub fn btreemap(n: u32) -> Self {
+        Self {
+            program_name: "btreemap-guest",
+            inputs: postcard::to_stdvec(&n).unwrap(),
+            ..Default::default()
+        }
+    }
+
+    /// Create config for muldiv guest.
+    pub fn muldiv(a: u32, b: u32, c: u32) -> Self {
+        Self {
+            program_name: "muldiv-guest",
+            inputs: postcard::to_stdvec(&[a, b, c]).unwrap(),
+            ..Default::default()
+        }
+    }
+
+    // ========================================================================
+    // Builder Methods
+    // ========================================================================
+
+    /// Set committed bytecode mode.
+    pub fn with_committed_bytecode(mut self) -> Self {
+        self.committed_bytecode = true;
+        self
+    }
+
+    /// Set Dory layout.
+    pub fn with_dory_layout(mut self, layout: DoryLayout) -> Self {
+        self.dory_layout = Some(layout);
+        self
+    }
+
+    /// Set small trace (256 cycles).
+    pub fn with_small_trace(mut self) -> Self {
+        self.max_trace_length = 256;
+        self
+    }
+
+    /// Set custom max trace length.
+    #[allow(dead_code)] // API for future tests
+    pub fn with_max_trace_length(mut self, len: usize) -> Self {
+        self.max_trace_length = len;
+        self
+    }
+
+    /// Set trusted advice bytes.
+    pub fn with_trusted_advice(mut self, advice: Vec<u8>) -> Self {
+        self.trusted_advice = advice;
+        self
+    }
+
+    /// Set untrusted advice bytes.
+    pub fn with_untrusted_advice(mut self, advice: Vec<u8>) -> Self {
+        self.untrusted_advice = advice;
+        self
+    }
+
+    /// Set expected output for verification.
+    #[allow(dead_code)] // API for future tests
+    pub fn expecting_output(mut self, output: Vec<u8>) -> Self {
+        self.expected_output = Some(output);
+        self
+    }
+
+    /// Clear expected output (don't verify).
+    #[allow(dead_code)] // API for future tests
+    pub fn without_output_check(mut self) -> Self {
+        self.expected_output = None;
+        self
+    }
+}
+
+/// Run an end-to-end test with the given configuration.
+///
+/// This handles all axes of variation:
+/// - Program selection
+/// - Bytecode mode (Full vs Committed)
+/// - Dory layout (CycleMajor vs AddressMajor)
+/// - Trusted/untrusted advice (computes commitment if non-empty)
+/// - Maximum padded trace length
+pub fn run_e2e_test(config: E2ETestConfig) {
+    // Setup Dory globals
+    DoryGlobals::reset();
+    if let Some(layout) = config.dory_layout {
+        DoryGlobals::set_layout(layout);
+    }
+
+    // Decode and trace program
+    let mut program = host::Program::new(config.program_name);
+    let (instructions, init_memory_state, _) = program.decode();
+    let (_, _, _, io_device) = program.trace(
+        &config.inputs,
+        &config.untrusted_advice,
+        &config.trusted_advice,
+    );
+
+    // Preprocess bytecode
+    let bytecode = Arc::new(BytecodePreprocessing::preprocess(instructions));
+    let shared_preprocessing = JoltSharedPreprocessing::new(
+        &bytecode,
+        io_device.memory_layout.clone(),
+        init_memory_state,
+        config.max_trace_length,
+    );
+
+    // Create prover preprocessing (mode-dependent)
+    let prover_preprocessing = if config.committed_bytecode {
+        JoltProverPreprocessing::new_committed(shared_preprocessing.clone(), Arc::clone(&bytecode))
+    } else {
+        JoltProverPreprocessing::new(shared_preprocessing.clone(), Arc::clone(&bytecode))
+    };
+
+    // Verify mode is correct
+    assert_eq!(
+        prover_preprocessing.is_committed_mode(),
+        config.committed_bytecode,
+        "Prover mode mismatch"
+    );
+
+    // Compute trusted advice commitment if advice is provided
+    let (trusted_commitment, trusted_hint) = if !config.trusted_advice.is_empty() {
+        let (c, h) =
+            commit_trusted_advice_preprocessing_only(&prover_preprocessing, &config.trusted_advice);
+        (Some(c), Some(h))
+    } else {
+        (None, None)
+    };
+
+    // Create prover and prove
+    let elf_contents = program.get_elf_contents().expect("elf contents is None");
+    let bytecode_mode = if config.committed_bytecode {
+        BytecodeMode::Committed
+    } else {
+        BytecodeMode::Full
+    };
+    let prover = RV64IMACProver::gen_from_elf_with_bytecode_mode(
+        &prover_preprocessing,
+        &elf_contents,
+        &config.inputs,
+        &config.untrusted_advice,
+        &config.trusted_advice,
+        trusted_commitment,
+        trusted_hint,
+        bytecode_mode,
+    );
+    let io_device = prover.program_io.clone();
+    let (jolt_proof, debug_info) = prover.prove();
+    assert_eq!(jolt_proof.bytecode_mode, bytecode_mode);
+
+    // Create verifier preprocessing from prover (respects mode)
+    let verifier_preprocessing = JoltVerifierPreprocessing::from(&prover_preprocessing);
+
+    // Verify mode propagated correctly
+    assert_eq!(
+        verifier_preprocessing.bytecode.is_committed(),
+        config.committed_bytecode,
+        "Verifier mode mismatch"
+    );
+
+    // Verify
+    let verifier = RV64IMACVerifier::new(
+        &verifier_preprocessing,
+        jolt_proof,
+        io_device.clone(),
+        trusted_commitment,
+        debug_info,
+    )
+    .expect("Failed to create verifier");
+    verifier.verify().expect("Verification failed");
+
+    // Check expected output if specified
+    if let Some(expected) = config.expected_output {
+        assert_eq!(
+            io_device.outputs, expected,
+            "Output mismatch for program '{}'",
+            config.program_name
+        );
+    }
+}
+
+/// Helper to commit trusted advice during preprocessing.
+fn commit_trusted_advice_preprocessing_only(
+    preprocessing: &JoltProverPreprocessing<Fr, DoryCommitmentScheme>,
+    trusted_advice_bytes: &[u8],
+) -> (
+    <DoryCommitmentScheme as CommitmentScheme>::Commitment,
+    <DoryCommitmentScheme as CommitmentScheme>::OpeningProofHint,
+) {
+    let max_trusted_advice_size = preprocessing.shared.memory_layout.max_trusted_advice_size;
+    let mut trusted_advice_words = vec![0u64; (max_trusted_advice_size as usize) / 8];
+    populate_memory_states(
+        0,
+        trusted_advice_bytes,
+        Some(&mut trusted_advice_words),
+        None,
+    );
+
+    let poly = MultilinearPolynomial::<Fr>::from(trusted_advice_words);
+    let advice_len = poly.len().next_power_of_two().max(1);
+
+    let _guard = DoryGlobals::initialize_context(1, advice_len, DoryContext::TrustedAdvice, None);
+    let (commitment, hint) = {
+        let _ctx = DoryGlobals::with_context(DoryContext::TrustedAdvice);
+        DoryCommitmentScheme::commit(&poly, &preprocessing.generators)
+    };
+    (commitment, hint)
+}
+
+#[test]
+#[serial]
+fn fib_e2e() {
+    run_e2e_test(E2ETestConfig::default());
+}
+
+#[test]
+#[serial]
+fn fib_e2e_small_trace() {
+    run_e2e_test(E2ETestConfig::fibonacci(5).with_small_trace());
+}
+
+#[test]
+#[serial]
+fn sha2_e2e() {
+    #[cfg(feature = "host")]
+    use jolt_inlines_sha2 as _;
+    run_e2e_test(E2ETestConfig::sha2());
+}
+
+#[test]
+#[serial]
+fn sha3_e2e() {
+    #[cfg(feature = "host")]
+    use jolt_inlines_keccak256 as _;
+    run_e2e_test(E2ETestConfig::sha3());
+}
+
+#[test]
+#[serial]
+fn sha2_with_unused_advice_e2e() {
+    // SHA2 guest does not consume advice, but providing both trusted and untrusted advice
+    // should still work correctly through the full pipeline.
+    #[cfg(feature = "host")]
+    use jolt_inlines_sha2 as _;
+
+    run_e2e_test(
+        E2ETestConfig::sha2()
+            .with_trusted_advice(postcard::to_stdvec(&[7u8; 32]).unwrap())
+            .with_untrusted_advice(postcard::to_stdvec(&[9u8; 32]).unwrap()),
+    );
+}
+
+#[test]
+#[serial]
+fn advice_merkle_tree_e2e() {
+    run_e2e_test(E2ETestConfig::merkle_tree());
+}
+
+#[test]
+#[serial]
+fn memory_ops_e2e() {
+    run_e2e_test(E2ETestConfig::memory_ops());
+}
+
+#[test]
+#[serial]
+fn btreemap_e2e() {
+    run_e2e_test(E2ETestConfig::btreemap(50));
+}
+
+#[test]
+#[serial]
+fn muldiv_e2e() {
+    run_e2e_test(E2ETestConfig::muldiv(9, 5, 3));
+}
+
+#[test]
+#[serial]
+fn fib_e2e_address_major() {
+    run_e2e_test(E2ETestConfig::default().with_dory_layout(DoryLayout::AddressMajor));
+}
+
+#[test]
+#[serial]
+fn advice_merkle_tree_e2e_address_major() {
+    run_e2e_test(E2ETestConfig::merkle_tree().with_dory_layout(DoryLayout::AddressMajor));
+}
+
+// ============================================================================
+// New Tests - Committed Bytecode Mode
+//
+// These tests exercise the end-to-end committed bytecode path.
+// ============================================================================
+
+#[test]
+#[serial]
+fn fib_e2e_committed_bytecode() {
+    run_e2e_test(E2ETestConfig::default().with_committed_bytecode());
+}
+
+#[test]
+#[serial]
+fn fib_e2e_committed_bytecode_address_major() {
+    run_e2e_test(
+        E2ETestConfig::default()
+            .with_committed_bytecode()
+            .with_dory_layout(DoryLayout::AddressMajor),
+    );
+}
+
+// ============================================================================
+// New Tests - Bytecode Lane Ordering / Chunking
+// ============================================================================
+
+#[test]
+fn bytecode_lane_chunking_counts() {
+    // Canonical lane spec (see bytecode-commitment-progress.md):
+    // 3*REGISTER_COUNT (rs1/rs2/rd) + 2 scalars + 13 circuit flags + 7 instr flags
+    // + 41 lookup selector + 1 raf flag = 448 (with REGISTER_COUNT=128).
+    assert_eq!(total_lanes(), 448);
+    assert_eq!(total_lanes().div_ceil(16), 28);
+    assert_eq!(total_lanes().div_ceil(256), 2);
+}
+
+// ============================================================================
+// New Tests - Bytecode Mode Detection
+// ============================================================================
+
+#[test]
+#[serial]
+fn bytecode_mode_detection_full() {
+    DoryGlobals::reset();
+    let mut program = host::Program::new("fibonacci-guest");
+    let (instructions, init_memory_state, _) = program.decode();
+    let (_, _, _, io_device) = program.trace(&[], &[], &[]);
+
+    let bytecode = Arc::new(BytecodePreprocessing::preprocess(instructions));
+    let shared = JoltSharedPreprocessing::new(
+        &bytecode,
+        io_device.memory_layout.clone(),
+        init_memory_state,
+        1 << 16,
+    );
+
+    // Full mode
+    let prover_full: JoltProverPreprocessing<Fr, DoryCommitmentScheme> =
+        JoltProverPreprocessing::new(shared.clone(), Arc::clone(&bytecode));
+    assert!(!prover_full.is_committed_mode());
+    assert!(prover_full.bytecode_commitments.is_none());
+
+    let verifier_full = JoltVerifierPreprocessing::from(&prover_full);
+    assert!(verifier_full.bytecode.is_full());
+    assert!(!verifier_full.bytecode.is_committed());
+    assert!(verifier_full.bytecode.as_full().is_ok());
+    assert!(verifier_full.bytecode.as_committed().is_err());
+}
+
+#[test]
+#[serial]
+fn bytecode_mode_detection_committed() {
+    DoryGlobals::reset();
+    let mut program = host::Program::new("fibonacci-guest");
+    let (instructions, init_memory_state, _) = program.decode();
+    let (_, _, _, io_device) = program.trace(&[], &[], &[]);
+
+    let bytecode = Arc::new(BytecodePreprocessing::preprocess(instructions));
+    let shared = JoltSharedPreprocessing::new(
+        &bytecode,
+        io_device.memory_layout.clone(),
+        init_memory_state,
+        1 << 16,
+    );
+
+    // Committed mode
+    let prover_committed: JoltProverPreprocessing<Fr, DoryCommitmentScheme> =
+        JoltProverPreprocessing::new_committed(shared.clone(), Arc::clone(&bytecode));
+    assert!(prover_committed.is_committed_mode());
+    assert!(prover_committed.bytecode_commitments.is_some());
+
+    let verifier_committed = JoltVerifierPreprocessing::from(&prover_committed);
+    assert!(!verifier_committed.bytecode.is_full());
+    assert!(verifier_committed.bytecode.is_committed());
+    assert!(verifier_committed.bytecode.as_full().is_err());
+    assert!(verifier_committed.bytecode.as_committed().is_ok());
+}
+
+// ============================================================================
+// Internal and Security Tests
+//
+// These tests require access to prover internals or manipulate trace/io
+// directly for security testing. They cannot use E2ETestConfig.
+// ============================================================================
+
+#[test]
+#[serial]
+fn max_advice_with_small_trace() {
+    DoryGlobals::reset();
+    // Tests that max-sized advice (4KB = 512 words) works with a minimal trace.
+    // With balanced dims (sigma_a=5, nu_a=4 for 512 words), the minimum padded trace
+    // (256 cycles -> total_vars=12) is sufficient to embed advice.
+    let mut program = host::Program::new("fibonacci-guest");
+    let inputs = postcard::to_stdvec(&5u32).unwrap();
+    let trusted_advice = vec![7u8; 4096];
+    let untrusted_advice = vec![9u8; 4096];
+
+    let (instructions, init_memory_state, _) = program.decode();
+    let (lazy_trace, trace, final_memory_state, io_device) =
+        program.trace(&inputs, &untrusted_advice, &trusted_advice);
+
+    let bytecode: Arc<BytecodePreprocessing> =
+        BytecodePreprocessing::preprocess(instructions).into();
+    let shared_preprocessing = JoltSharedPreprocessing::new(
+        &bytecode,
+        io_device.memory_layout.clone(),
+        init_memory_state,
+        256,
+    );
+    let prover_preprocessing: JoltProverPreprocessing<Fr, DoryCommitmentScheme> =
+        JoltProverPreprocessing::new(shared_preprocessing.clone(), Arc::clone(&bytecode));
+    tracing::info!(
+        "preprocessing.memory_layout.max_trusted_advice_size: {}",
+        shared_preprocessing.memory_layout.max_trusted_advice_size
+    );
+
+    let (trusted_commitment, trusted_hint) =
+        commit_trusted_advice_preprocessing_only(&prover_preprocessing, &trusted_advice);
+
+    let prover = RV64IMACProver::gen_from_trace(
+        &prover_preprocessing,
+        lazy_trace,
+        trace,
+        io_device,
+        Some(trusted_commitment),
+        Some(trusted_hint),
+        final_memory_state,
+    );
+
+    // Trace is tiny but advice is max-sized
+    assert!(prover.unpadded_trace_len < 512);
+    assert_eq!(prover.padded_trace_len, 256);
+
+    let io_device = prover.program_io.clone();
+    let (jolt_proof, debug_info) = prover.prove();
+
+    let verifier_preprocessing = JoltVerifierPreprocessing::from(&prover_preprocessing);
+    RV64IMACVerifier::new(
+        &verifier_preprocessing,
+        jolt_proof,
+        io_device,
+        Some(trusted_commitment),
+        debug_info,
+    )
+    .expect("Failed to create verifier")
+    .verify()
+    .expect("Verification failed");
+}
+
+#[test]
+#[serial]
+fn advice_opening_point_derives_from_unified_point() {
+    DoryGlobals::reset();
+    // Tests that advice opening points are correctly derived from the unified main opening
+    // point using Dory's balanced dimension policy.
+    //
+    // For a small trace (256 cycles), the advice row coordinates span both Stage 6 (cycle)
+    // and Stage 7 (address) challenges, verifying the two-phase reduction works correctly.
+    let mut program = host::Program::new("fibonacci-guest");
+    let inputs = postcard::to_stdvec(&5u32).unwrap();
+    let trusted_advice = postcard::to_stdvec(&[7u8; 32]).unwrap();
+    let untrusted_advice = postcard::to_stdvec(&[9u8; 32]).unwrap();
+
+    let (instructions, init_memory_state, _) = program.decode();
+    let (lazy_trace, trace, final_memory_state, io_device) =
+        program.trace(&inputs, &untrusted_advice, &trusted_advice);
+
+    let bytecode: Arc<BytecodePreprocessing> =
+        BytecodePreprocessing::preprocess(instructions).into();
+    let shared_preprocessing = JoltSharedPreprocessing::new(
+        &bytecode,
+        io_device.memory_layout.clone(),
+        init_memory_state,
+        1 << 16,
+    );
+    let prover_preprocessing: JoltProverPreprocessing<Fr, DoryCommitmentScheme> =
+        JoltProverPreprocessing::new(shared_preprocessing.clone(), Arc::clone(&bytecode));
+    let (trusted_commitment, trusted_hint) =
+        commit_trusted_advice_preprocessing_only(&prover_preprocessing, &trusted_advice);
+
+    let prover = RV64IMACProver::gen_from_trace(
+        &prover_preprocessing,
+        lazy_trace,
+        trace,
+        io_device,
+        Some(trusted_commitment),
+        Some(trusted_hint),
+        final_memory_state,
+    );
+
+    assert_eq!(prover.padded_trace_len, 256, "test expects small trace");
+
+    let io_device = prover.program_io.clone();
+    let (jolt_proof, debug_info) = prover.prove();
+    let debug_info = debug_info.expect("expected debug_info in tests");
+
+    // Get unified opening point and derive expected advice point
+    let (opening_point, _) = debug_info
+        .opening_accumulator
+        .get_committed_polynomial_opening(
+            CommittedPolynomial::InstructionRa(0),
+            SumcheckId::HammingWeightClaimReduction,
+        );
+    let mut point_dory_le = opening_point.r.clone();
+    point_dory_le.reverse();
+
+    let total_vars = point_dory_le.len();
+    let (sigma_main, _nu_main) = DoryGlobals::balanced_sigma_nu(total_vars);
+    let (sigma_a, nu_a) = DoryGlobals::advice_sigma_nu_from_max_bytes(
+        prover_preprocessing
+            .shared
+            .memory_layout
+            .max_trusted_advice_size as usize,
+    );
+
+    // Build expected advice point: [col_bits[0..sigma_a] || row_bits[0..nu_a]]
+    let mut expected_advice_le: Vec<_> = point_dory_le[0..sigma_a].to_vec();
+    expected_advice_le.extend_from_slice(&point_dory_le[sigma_main..sigma_main + nu_a]);
+
+    // Verify both advice types derive the same opening point
+    for (name, kind) in [
+        ("trusted", AdviceKind::Trusted),
+        ("untrusted", AdviceKind::Untrusted),
+    ] {
+        let get_fn = debug_info
+            .opening_accumulator
+            .get_advice_opening(kind, SumcheckId::AdviceClaimReduction);
+        assert!(
+            get_fn.is_some(),
+            "{name} advice opening missing for AdviceClaimReductionPhase2"
+        );
+        let (point_be, _) = get_fn.unwrap();
+        let mut point_le = point_be.r.clone();
+        point_le.reverse();
+        assert_eq!(point_le, expected_advice_le, "{name} advice point mismatch");
+    }
+
+    // Verify end-to-end
+    let verifier_preprocessing = JoltVerifierPreprocessing::from(&prover_preprocessing);
+    RV64IMACVerifier::new(
+        &verifier_preprocessing,
+        jolt_proof,
+        io_device,
+        Some(trusted_commitment),
+        Some(debug_info),
+    )
+    .expect("Failed to create verifier")
+    .verify()
+    .expect("Verification failed");
+}
+
+#[test]
+#[serial]
+#[should_panic]
+fn truncated_trace() {
+    let mut program = host::Program::new("fibonacci-guest");
+    let (instructions, init_memory_state, _) = program.decode();
+    let inputs = postcard::to_stdvec(&9u8).unwrap();
+    let (lazy_trace, mut trace, final_memory_state, mut program_io) =
+        program.trace(&inputs, &[], &[]);
+    trace.truncate(100);
+    program_io.outputs[0] = 0; // change the output to 0
+
+    let bytecode: Arc<BytecodePreprocessing> =
+        BytecodePreprocessing::preprocess(instructions).into();
+    let shared_preprocessing = JoltSharedPreprocessing::new(
+        &bytecode,
+        program_io.memory_layout.clone(),
+        init_memory_state,
+        1 << 16,
+    );
+
+    let prover_preprocessing: JoltProverPreprocessing<Fr, DoryCommitmentScheme> =
+        JoltProverPreprocessing::new(shared_preprocessing.clone(), Arc::clone(&bytecode));
+
+    let prover = RV64IMACProver::gen_from_trace(
+        &prover_preprocessing,
+        lazy_trace,
+        trace,
+        program_io.clone(),
+        None,
+        None,
+        final_memory_state,
+    );
+
+    let (proof, _) = prover.prove();
+
+    let verifier_preprocessing = JoltVerifierPreprocessing::new_full(
+        prover_preprocessing.shared.clone(),
+        prover_preprocessing.generators.to_verifier_setup(),
+        Arc::clone(&prover_preprocessing.bytecode),
+    );
+    let verifier =
+        RV64IMACVerifier::new(&verifier_preprocessing, proof, program_io, None, None).unwrap();
+    verifier.verify().unwrap();
+}
+
+#[test]
+#[serial]
+#[should_panic]
+fn malicious_trace() {
+    let mut program = host::Program::new("fibonacci-guest");
+    let inputs = postcard::to_stdvec(&1u8).unwrap();
+    let (instructions, init_memory_state, _) = program.decode();
+    let (lazy_trace, trace, final_memory_state, mut program_io) = program.trace(&inputs, &[], &[]);
+
+    let bytecode: Arc<BytecodePreprocessing> =
+        BytecodePreprocessing::preprocess(instructions).into();
+
+    // Since the preprocessing is done with the original memory layout, the verifier should fail
+    let shared_preprocessing = JoltSharedPreprocessing::new(
+        &bytecode,
+        program_io.memory_layout.clone(),
+        init_memory_state,
+        1 << 16,
+    );
+    let prover_preprocessing: JoltProverPreprocessing<Fr, DoryCommitmentScheme> =
+        JoltProverPreprocessing::new(shared_preprocessing.clone(), Arc::clone(&bytecode));
+
+    // change memory address of output & termination bit to the same address as input
+    // changes here should not be able to spoof the verifier result
+    program_io.memory_layout.output_start = program_io.memory_layout.input_start;
+    program_io.memory_layout.output_end = program_io.memory_layout.input_end;
+    program_io.memory_layout.termination = program_io.memory_layout.input_start;
+
+    let prover = RV64IMACProver::gen_from_trace(
+        &prover_preprocessing,
+        lazy_trace,
+        trace,
+        program_io.clone(),
+        None,
+        None,
+        final_memory_state,
+    );
+    let (proof, _) = prover.prove();
+
+    let verifier_preprocessing = JoltVerifierPreprocessing::new_full(
+        prover_preprocessing.shared.clone(),
+        prover_preprocessing.generators.to_verifier_setup(),
+        Arc::clone(&prover_preprocessing.bytecode),
+    );
+    let verifier =
+        JoltVerifier::new(&verifier_preprocessing, proof, program_io, None, None).unwrap();
+    verifier.verify().unwrap();
+}
diff --git a/jolt-core/src/zkvm/verifier.rs b/jolt-core/src/zkvm/verifier.rs
index 1f7e0cd6e8..347873ed2d 100644
--- a/jolt-core/src/zkvm/verifier.rs
+++ b/jolt-core/src/zkvm/verifier.rs
@@ -7,9 +7,11 @@ use std::sync::Arc;
 use crate::poly::commitment::commitment_scheme::CommitmentScheme;
 use crate::poly::commitment::dory::{DoryContext, DoryGlobals};
 use crate::subprotocols::sumcheck::BatchedSumcheck;
-use crate::zkvm::bytecode::BytecodePreprocessing;
+use crate::zkvm::bytecode::chunks::total_lanes;
+use crate::zkvm::bytecode::{BytecodePreprocessing, TrustedBytecodeCommitments, VerifierBytecode};
 use crate::zkvm::claim_reductions::advice::ReductionPhase;
 use crate::zkvm::claim_reductions::RegistersClaimReductionSumcheckVerifier;
+use crate::zkvm::config::BytecodeMode;
 use crate::zkvm::config::OneHotParams;
 #[cfg(feature = "prover")]
 use crate::zkvm::prover::JoltProverPreprocessing;
@@ -18,11 +20,15 @@ use crate::zkvm::ram::RAMPreprocessing;
 use crate::zkvm::witness::all_committed_polynomials;
 use crate::zkvm::Serializable;
 use crate::zkvm::{
-    bytecode::read_raf_checking::BytecodeReadRafSumcheckVerifier,
+    bytecode::read_raf_checking::{
+        BytecodeReadRafAddressSumcheckVerifier, BytecodeReadRafCycleSumcheckVerifier,
+        BytecodeReadRafSumcheckParams,
+    },
     claim_reductions::{
-        AdviceClaimReductionVerifier, AdviceKind, HammingWeightClaimReductionVerifier,
-        IncClaimReductionSumcheckVerifier, InstructionLookupsClaimReductionSumcheckVerifier,
-        RamRaClaimReductionSumcheckVerifier,
+        AdviceClaimReductionVerifier, AdviceKind, BytecodeClaimReductionParams,
+        BytecodeClaimReductionVerifier, BytecodeReductionPhase,
+        HammingWeightClaimReductionVerifier, IncClaimReductionSumcheckVerifier,
+        InstructionLookupsClaimReductionSumcheckVerifier, RamRaClaimReductionSumcheckVerifier,
     },
     fiat_shamir_preamble,
     instruction_lookups::{
@@ -58,7 +64,10 @@ use crate::{
     },
     pprof_scope,
     subprotocols::{
-        booleanity::{BooleanitySumcheckParams, BooleanitySumcheckVerifier},
+        booleanity::{
+            BooleanityAddressSumcheckVerifier, BooleanityCycleSumcheckVerifier,
+            BooleanitySumcheckParams,
+        },
         sumcheck_verifier::SumcheckInstanceVerifier,
     },
     transcripts::Transcript,
@@ -69,7 +78,6 @@ use anyhow::Context;
 use ark_serialize::{CanonicalDeserialize, CanonicalSerialize};
 use common::jolt_device::MemoryLayout;
 use itertools::Itertools;
-use tracer::instruction::Instruction;
 use tracer::JoltDevice;
 
 pub struct JoltVerifier<
@@ -90,6 +98,9 @@ pub struct JoltVerifier<
     /// The advice claim reduction sumcheck effectively spans two stages (6 and 7).
     /// Cache the verifier state here between stages.
     advice_reduction_verifier_untrusted: Option<AdviceClaimReductionVerifier<F>>,
+    /// The bytecode claim reduction sumcheck effectively spans two stages (6b and 7).
+    /// Cache the verifier state here between stages.
+    bytecode_reduction_verifier: Option<BytecodeClaimReductionVerifier<F>>,
     pub spartan_key: UniformSpartanKey<F>,
     pub one_hot_params: OneHotParams,
 }
@@ -162,6 +173,30 @@ impl<'a, F: JoltField, PCS: CommitmentScheme<Field = F>, ProofTranscript: Transc
         let one_hot_params =
             OneHotParams::from_config(&proof.one_hot_config, proof.bytecode_K, proof.ram_K);
 
+        if proof.bytecode_mode == BytecodeMode::Committed {
+            let committed = preprocessing.bytecode.as_committed()?;
+            if committed.log_k_chunk != proof.one_hot_config.log_k_chunk {
+                return Err(ProofVerifyError::InvalidBytecodeConfig(format!(
+                    "bytecode log_k_chunk mismatch: commitments={}, proof={}",
+                    committed.log_k_chunk, proof.one_hot_config.log_k_chunk
+                )));
+            }
+            if committed.bytecode_len != preprocessing.shared.bytecode_size {
+                return Err(ProofVerifyError::InvalidBytecodeConfig(format!(
+                    "bytecode length mismatch: commitments={}, shared={}",
+                    committed.bytecode_len, preprocessing.shared.bytecode_size
+                )));
+            }
+            let k_chunk = 1usize << (committed.log_k_chunk as usize);
+            let expected_chunks = total_lanes().div_ceil(k_chunk);
+            if committed.commitments.len() != expected_chunks {
+                return Err(ProofVerifyError::InvalidBytecodeConfig(format!(
+                    "expected {expected_chunks} bytecode commitments, got {}",
+                    committed.commitments.len()
+                )));
+            }
+        }
+
         Ok(Self {
             trusted_advice_commitment,
             program_io,
@@ -171,6 +206,7 @@ impl<'a, F: JoltField, PCS: CommitmentScheme<Field = F>, ProofTranscript: Transc
             opening_accumulator,
             advice_reduction_verifier_trusted: None,
             advice_reduction_verifier_untrusted: None,
+            bytecode_reduction_verifier: None,
             spartan_key,
             one_hot_params,
         })
@@ -201,13 +237,20 @@ impl<'a, F: JoltField, PCS: CommitmentScheme<Field = F>, ProofTranscript: Transc
             self.transcript
                 .append_serializable(trusted_advice_commitment);
         }
+        if self.proof.bytecode_mode == BytecodeMode::Committed {
+            let trusted = self.preprocessing.bytecode.as_committed()?;
+            for commitment in &trusted.commitments {
+                self.transcript.append_serializable(commitment);
+            }
+        }
 
         self.verify_stage1()?;
         self.verify_stage2()?;
         self.verify_stage3()?;
         self.verify_stage4()?;
         self.verify_stage5()?;
-        self.verify_stage6()?;
+        let (bytecode_read_raf_params, booleanity_params) = self.verify_stage6a()?;
+        self.verify_stage6b(bytecode_read_raf_params, booleanity_params)?;
         self.verify_stage7()?;
         self.verify_stage8()?;
 
@@ -401,26 +444,62 @@ impl<'a, F: JoltField, PCS: CommitmentScheme<Field = F>, ProofTranscript: Transc
         Ok(())
     }
 
-    fn verify_stage6(&mut self) -> Result<(), anyhow::Error> {
+    fn verify_stage6a(
+        &mut self,
+    ) -> Result<
+        (
+            BytecodeReadRafSumcheckParams<F>,
+            BooleanitySumcheckParams<F>,
+        ),
+        anyhow::Error,
+    > {
         let n_cycle_vars = self.proof.trace_length.log_2();
-        let bytecode_read_raf = BytecodeReadRafSumcheckVerifier::gen(
-            &self.preprocessing.shared.bytecode,
+        let bytecode_preprocessing = match self.proof.bytecode_mode {
+            BytecodeMode::Committed => {
+                // Ensure we have committed bytecode commitments for committed mode.
+                let _ = self.preprocessing.bytecode.as_committed()?;
+                None
+            }
+            BytecodeMode::Full => Some(self.preprocessing.bytecode.as_full()?.as_ref()),
+        };
+        let bytecode_read_raf = BytecodeReadRafAddressSumcheckVerifier::new(
+            bytecode_preprocessing,
             n_cycle_vars,
             &self.one_hot_params,
             &self.opening_accumulator,
             &mut self.transcript,
-        );
-
-        let ram_hamming_booleanity =
-            HammingBooleanitySumcheckVerifier::new(&self.opening_accumulator);
+            self.proof.bytecode_mode,
+        )?;
         let booleanity_params = BooleanitySumcheckParams::new(
             n_cycle_vars,
             &self.one_hot_params,
             &self.opening_accumulator,
             &mut self.transcript,
         );
+        let booleanity = BooleanityAddressSumcheckVerifier::new(booleanity_params);
 
-        let booleanity = BooleanitySumcheckVerifier::new(booleanity_params);
+        let instances: Vec<&dyn SumcheckInstanceVerifier<F, ProofTranscript>> =
+            vec![&bytecode_read_raf, &booleanity];
+
+        let _r_stage6a = BatchedSumcheck::verify(
+            &self.proof.stage6a_sumcheck_proof,
+            instances,
+            &mut self.opening_accumulator,
+            &mut self.transcript,
+        )
+        .context("Stage 6a")?;
+        Ok((bytecode_read_raf.into_params(), booleanity.into_params()))
+    }
+
+    fn verify_stage6b(
+        &mut self,
+        bytecode_read_raf_params: BytecodeReadRafSumcheckParams<F>,
+        booleanity_params: BooleanitySumcheckParams<F>,
+    ) -> Result<(), anyhow::Error> {
+        // Initialize Stage 6b cycle verifiers from scratch (Option B).
+        let booleanity = BooleanityCycleSumcheckVerifier::new(booleanity_params);
+        let ram_hamming_booleanity =
+            HammingBooleanitySumcheckVerifier::new(&self.opening_accumulator);
         let ram_ra_virtual = RamRaVirtualSumcheckVerifier::new(
             self.proof.trace_length,
             &self.one_hot_params,
@@ -438,7 +517,26 @@ impl<'a, F: JoltField, PCS: CommitmentScheme<Field = F>, ProofTranscript: Transc
             &mut self.transcript,
         );
 
-        // Advice claim reduction (Phase 1 in Stage 6): trusted and untrusted are separate instances.
+        // Bytecode claim reduction (Phase 1 in Stage 6b): consumes Val_s(r_bc) from Stage 6a and
+        // caches an intermediate claim for Stage 7.
+        //
+        // IMPORTANT: This must be sampled *after* other Stage 6b params (e.g. lookup/inc gammas),
+        // to match the prover's transcript order.
+        if self.proof.bytecode_mode == BytecodeMode::Committed {
+            let bytecode_reduction_params = BytecodeClaimReductionParams::new(
+                &bytecode_read_raf_params,
+                &self.opening_accumulator,
+                &mut self.transcript,
+            );
+            self.bytecode_reduction_verifier = Some(BytecodeClaimReductionVerifier::new(
+                bytecode_reduction_params,
+            ));
+        } else {
+            // Legacy mode: do not run the bytecode claim reduction.
+            self.bytecode_reduction_verifier = None;
+        }
+
+        // Advice claim reduction (Phase 1 in Stage 6b): trusted and untrusted are separate instances.
         if self.trusted_advice_commitment.is_some() {
             self.advice_reduction_verifier_trusted = Some(AdviceClaimReductionVerifier::new(
                 AdviceKind::Trusted,
@@ -464,6 +562,8 @@ impl<'a, F: JoltField, PCS: CommitmentScheme<Field = F>, ProofTranscript: Transc
             ));
         }
 
+        let bytecode_read_raf = BytecodeReadRafCycleSumcheckVerifier::new(bytecode_read_raf_params);
+
         let mut instances: Vec<&dyn SumcheckInstanceVerifier<F, ProofTranscript>> = vec![
             &bytecode_read_raf,
             &ram_hamming_booleanity,
@@ -472,6 +572,9 @@ impl<'a, F: JoltField, PCS: CommitmentScheme<Field = F>, ProofTranscript: Transc
             &lookups_ra_virtual,
             &inc_reduction,
         ];
+        if let Some(ref bytecode) = self.bytecode_reduction_verifier {
+            instances.push(bytecode);
+        }
         if let Some(ref advice) = self.advice_reduction_verifier_trusted {
             instances.push(advice);
         }
@@ -479,13 +582,13 @@ impl<'a, F: JoltField, PCS: CommitmentScheme<Field = F>, ProofTranscript: Transc
             instances.push(advice);
         }
 
-        let _r_stage6 = BatchedSumcheck::verify(
-            &self.proof.stage6_sumcheck_proof,
+        let _r_stage6b = BatchedSumcheck::verify(
+            &self.proof.stage6b_sumcheck_proof,
             instances,
             &mut self.opening_accumulator,
             &mut self.transcript,
         )
-        .context("Stage 6")?;
+        .context("Stage 6b")?;
 
         Ok(())
     }
@@ -502,6 +605,12 @@ impl<'a, F: JoltField, PCS: CommitmentScheme<Field = F>, ProofTranscript: Transc
 
         let mut instances: Vec<&dyn SumcheckInstanceVerifier<F, ProofTranscript>> =
             vec![&hw_verifier];
+
+        if let Some(bytecode_reduction_verifier) = self.bytecode_reduction_verifier.as_mut() {
+            bytecode_reduction_verifier.params.borrow_mut().phase =
+                BytecodeReductionPhase::LaneVariables;
+            instances.push(bytecode_reduction_verifier);
+        }
         if let Some(advice_reduction_verifier_trusted) =
             self.advice_reduction_verifier_trusted.as_mut()
         {
@@ -624,6 +733,51 @@ impl<'a, F: JoltField, PCS: CommitmentScheme<Field = F>, ProofTranscript: Transc
             ));
         }
 
+        // Bytecode chunk polynomials: committed in Bytecode context and embedded into the
+        // main opening point by fixing the extra cycle variables to 0.
+        if self.proof.bytecode_mode == BytecodeMode::Committed {
+            let (bytecode_point, _) = self.opening_accumulator.get_committed_polynomial_opening(
+                CommittedPolynomial::BytecodeChunk(0),
+                SumcheckId::BytecodeClaimReduction,
+            );
+            let log_t = opening_point.r.len() - log_k_chunk;
+            let log_k = bytecode_point.r.len() - log_k_chunk;
+            if log_k > log_t {
+                return Err(ProofVerifyError::InvalidBytecodeConfig(format!(
+                    "bytecode folding requires log_T >= log_K (got log_T={log_t}, log_K={log_k})"
+                ))
+                .into());
+            }
+            #[cfg(test)]
+            {
+                if log_k == log_t {
+                    assert_eq!(
+                        bytecode_point.r, opening_point.r,
+                        "BytecodeChunk opening point must equal unified opening point when log_K == log_T"
+                    );
+                } else {
+                    let (r_lane_main, r_cycle_main) = opening_point.split_at(log_k_chunk);
+                    let (r_lane_bc, r_cycle_bc) = bytecode_point.split_at(log_k_chunk);
+                    debug_assert_eq!(r_lane_main.r, r_lane_bc.r);
+                    debug_assert_eq!(&r_cycle_main.r[(log_t - log_k)..], r_cycle_bc.r.as_slice());
+                }
+            }
+            let lagrange_factor =
+                compute_advice_lagrange_factor::<F>(&opening_point.r, &bytecode_point.r);
+
+            let num_chunks = total_lanes().div_ceil(self.one_hot_params.k_chunk);
+            for i in 0..num_chunks {
+                let (_, claim) = self.opening_accumulator.get_committed_polynomial_opening(
+                    CommittedPolynomial::BytecodeChunk(i),
+                    SumcheckId::BytecodeClaimReduction,
+                );
+                polynomial_claims.push((
+                    CommittedPolynomial::BytecodeChunk(i),
+                    claim * lagrange_factor,
+                ));
+            }
+        }
+
         // 2. Sample gamma and compute powers for RLC
         let claims: Vec<F> = polynomial_claims.iter().map(|(_, c)| *c).collect();
         self.transcript.append_scalars(&claims);
@@ -665,6 +819,15 @@ impl<'a, F: JoltField, PCS: CommitmentScheme<Field = F>, ProofTranscript: Transc
             }
         }
 
+        if self.proof.bytecode_mode == BytecodeMode::Committed {
+            let committed = self.preprocessing.bytecode.as_committed()?;
+            for (idx, commitment) in committed.commitments.iter().enumerate() {
+                commitments_map
+                    .entry(CommittedPolynomial::BytecodeChunk(idx))
+                    .or_insert_with(|| commitment.clone());
+            }
+        }
+
         // Compute joint commitment: Σ γ_i · C_i
         let joint_commitment = self.compute_joint_commitment(&mut commitments_map, &state);
 
@@ -712,81 +875,35 @@ impl<'a, F: JoltField, PCS: CommitmentScheme<Field = F>, ProofTranscript: Transc
     }
 }
 
-#[derive(Debug, Clone)]
+/// Shared preprocessing between prover and verifier.
+///
+/// **Note**: This struct does NOT contain the full bytecode data.
+/// - Bytecode size K is stored here as the single source of truth.
+/// - Full bytecode data is in `JoltProverPreprocessing.bytecode`.
+/// - Verifier bytecode (Full or Committed) is in `JoltVerifierPreprocessing.bytecode`.
+#[derive(Debug, Clone, CanonicalSerialize, CanonicalDeserialize)]
 pub struct JoltSharedPreprocessing {
-    pub bytecode: Arc<BytecodePreprocessing>,
+    pub bytecode_size: usize,
     pub ram: RAMPreprocessing,
     pub memory_layout: MemoryLayout,
     pub max_padded_trace_length: usize,
 }
 
-impl CanonicalSerialize for JoltSharedPreprocessing {
-    fn serialize_with_mode<W: std::io::Write>(
-        &self,
-        mut writer: W,
-        compress: ark_serialize::Compress,
-    ) -> Result<(), ark_serialize::SerializationError> {
-        // Serialize the inner BytecodePreprocessing (not the Arc wrapper)
-        self.bytecode
-            .as_ref()
-            .serialize_with_mode(&mut writer, compress)?;
-        self.ram.serialize_with_mode(&mut writer, compress)?;
-        self.memory_layout
-            .serialize_with_mode(&mut writer, compress)?;
-        self.max_padded_trace_length
-            .serialize_with_mode(&mut writer, compress)?;
-        Ok(())
-    }
-
-    fn serialized_size(&self, compress: ark_serialize::Compress) -> usize {
-        self.bytecode.serialized_size(compress)
-            + self.ram.serialized_size(compress)
-            + self.memory_layout.serialized_size(compress)
-            + self.max_padded_trace_length.serialized_size(compress)
-    }
-}
-
-impl CanonicalDeserialize for JoltSharedPreprocessing {
-    fn deserialize_with_mode<R: std::io::Read>(
-        mut reader: R,
-        compress: ark_serialize::Compress,
-        validate: ark_serialize::Validate,
-    ) -> Result<Self, ark_serialize::SerializationError> {
-        let bytecode =
-            BytecodePreprocessing::deserialize_with_mode(&mut reader, compress, validate)?;
-        let ram = RAMPreprocessing::deserialize_with_mode(&mut reader, compress, validate)?;
-        let memory_layout = MemoryLayout::deserialize_with_mode(&mut reader, compress, validate)?;
-        let max_padded_trace_length =
-            usize::deserialize_with_mode(&mut reader, compress, validate)?;
-        Ok(Self {
-            bytecode: Arc::new(bytecode),
-            ram,
-            memory_layout,
-            max_padded_trace_length,
-        })
-    }
-}
-
-impl ark_serialize::Valid for JoltSharedPreprocessing {
-    fn check(&self) -> Result<(), ark_serialize::SerializationError> {
-        self.bytecode.check()?;
-        self.ram.check()?;
-        self.memory_layout.check()
-    }
-}
-
 impl JoltSharedPreprocessing {
+    /// Create shared preprocessing from bytecode.
+    ///
+    /// Bytecode size K is derived from `bytecode.bytecode.len()` (already padded).
+    /// The caller is responsible for wrapping bytecode in `Arc` and passing to prover/verifier.
     #[tracing::instrument(skip_all, name = "JoltSharedPreprocessing::new")]
     pub fn new(
-        bytecode: Vec<Instruction>,
+        bytecode: &BytecodePreprocessing,
         memory_layout: MemoryLayout,
         memory_init: Vec<(u64, u8)>,
         max_padded_trace_length: usize,
     ) -> JoltSharedPreprocessing {
-        let bytecode = Arc::new(BytecodePreprocessing::preprocess(bytecode));
         let ram = RAMPreprocessing::preprocess(memory_init);
         Self {
-            bytecode,
+            bytecode_size: bytecode.bytecode.len(),
             ram,
             memory_layout,
             max_padded_trace_length,
@@ -794,7 +911,7 @@ impl JoltSharedPreprocessing {
     }
 }
 
-#[derive(Debug, Clone, CanonicalSerialize, CanonicalDeserialize)]
+#[derive(Debug, Clone)]
 pub struct JoltVerifierPreprocessing<F, PCS>
 where
     F: JoltField,
@@ -802,6 +919,69 @@ where
 {
     pub generators: PCS::VerifierSetup,
     pub shared: JoltSharedPreprocessing,
+    /// Bytecode information for verification.
+    ///
+    /// In Full mode: contains full bytecode preprocessing (O(K) data).
+    /// In Committed mode: contains only commitments (succinct).
+    pub bytecode: VerifierBytecode<PCS>,
+}
+
+impl<F, PCS> CanonicalSerialize for JoltVerifierPreprocessing<F, PCS>
+where
+    F: JoltField,
+    PCS: CommitmentScheme<Field = F>,
+{
+    fn serialize_with_mode<W: std::io::Write>(
+        &self,
+        mut writer: W,
+        compress: ark_serialize::Compress,
+    ) -> Result<(), ark_serialize::SerializationError> {
+        self.generators.serialize_with_mode(&mut writer, compress)?;
+        self.shared.serialize_with_mode(&mut writer, compress)?;
+        self.bytecode.serialize_with_mode(&mut writer, compress)?;
+        Ok(())
+    }
+
+    fn serialized_size(&self, compress: ark_serialize::Compress) -> usize {
+        self.generators.serialized_size(compress)
+            + self.shared.serialized_size(compress)
+            + self.bytecode.serialized_size(compress)
+    }
+}
+
+impl<F, PCS> ark_serialize::Valid for JoltVerifierPreprocessing<F, PCS>
+where
+    F: JoltField,
+    PCS: CommitmentScheme<Field = F>,
+{
+    fn check(&self) -> Result<(), ark_serialize::SerializationError> {
+        self.generators.check()?;
+        self.shared.check()?;
+        self.bytecode.check()
+    }
+}
+
+impl<F, PCS> CanonicalDeserialize for JoltVerifierPreprocessing<F, PCS>
+where
+    F: JoltField,
+    PCS: CommitmentScheme<Field = F>,
+{
+    fn deserialize_with_mode<R: std::io::Read>(
+        mut reader: R,
+        compress: ark_serialize::Compress,
+        validate: ark_serialize::Validate,
+    ) -> Result<Self, ark_serialize::SerializationError> {
+        let generators =
+            PCS::VerifierSetup::deserialize_with_mode(&mut reader, compress, validate)?;
+        let shared =
+            JoltSharedPreprocessing::deserialize_with_mode(&mut reader, compress, validate)?;
+        let bytecode = VerifierBytecode::deserialize_with_mode(&mut reader, compress, validate)?;
+        Ok(Self {
+            generators,
+            shared,
+            bytecode,
+        })
+    }
 }
 
 impl<F, PCS> Serializable for JoltVerifierPreprocessing<F, PCS>
@@ -835,14 +1015,39 @@ where
 }
 
 impl<F: JoltField, PCS: CommitmentScheme<Field = F>> JoltVerifierPreprocessing<F, PCS> {
-    #[tracing::instrument(skip_all, name = "JoltVerifierPreprocessing::new")]
-    pub fn new(
+    /// Create verifier preprocessing in Full mode (verifier has full bytecode).
+    #[tracing::instrument(skip_all, name = "JoltVerifierPreprocessing::new_full")]
+    pub fn new_full(
+        shared: JoltSharedPreprocessing,
+        generators: PCS::VerifierSetup,
+        bytecode: Arc<BytecodePreprocessing>,
+    ) -> JoltVerifierPreprocessing<F, PCS> {
+        Self {
+            generators,
+            shared,
+            bytecode: VerifierBytecode::Full(bytecode),
+        }
+    }
+
+    /// Create verifier preprocessing in Committed mode with trusted commitments.
+    ///
+    /// This is the "fast path" for online verification. The `TrustedBytecodeCommitments`
+    /// type guarantees (at the type level) that these commitments were derived from
+    /// actual bytecode via `TrustedBytecodeCommitments::derive()`.
+    ///
+    /// # Trust Model
+    /// The caller must ensure the commitments were honestly derived (e.g., loaded from
+    /// a trusted file or received from trusted preprocessing).
+    #[tracing::instrument(skip_all, name = "JoltVerifierPreprocessing::new_committed")]
+    pub fn new_committed(
         shared: JoltSharedPreprocessing,
         generators: PCS::VerifierSetup,
+        bytecode_commitments: TrustedBytecodeCommitments<PCS>,
     ) -> JoltVerifierPreprocessing<F, PCS> {
         Self {
             generators,
-            shared: shared.clone(),
+            shared,
+            bytecode: VerifierBytecode::Committed(bytecode_commitments),
         }
     }
 }
@@ -853,9 +1058,15 @@ impl<F: JoltField, PCS: CommitmentScheme<Field = F>> From<&JoltProverPreprocessi
 {
     fn from(prover_preprocessing: &JoltProverPreprocessing<F, PCS>) -> Self {
         let generators = PCS::setup_verifier(&prover_preprocessing.generators);
+        // Choose VerifierBytecode variant based on whether prover has bytecode commitments
+        let bytecode = match &prover_preprocessing.bytecode_commitments {
+            Some(commitments) => VerifierBytecode::Committed(commitments.clone()),
+            None => VerifierBytecode::Full(Arc::clone(&prover_preprocessing.bytecode)),
+        };
         Self {
             generators,
             shared: prover_preprocessing.shared.clone(),
+            bytecode,
         }
     }
 }
diff --git a/jolt-core/src/zkvm/witness.rs b/jolt-core/src/zkvm/witness.rs
index efcef73652..e4011002f5 100644
--- a/jolt-core/src/zkvm/witness.rs
+++ b/jolt-core/src/zkvm/witness.rs
@@ -31,6 +31,9 @@ pub enum CommittedPolynomial {
     InstructionRa(usize),
     /// One-hot ra polynomial for the bytecode instance of Shout
     BytecodeRa(usize),
+    /// Packed bytecode commitment chunk polynomial (lane chunk i).
+    /// This is used by BytecodeClaimReduction; commitment + batching integration is staged separately.
+    BytecodeChunk(usize),
     /// One-hot ra/wa polynomial for the RAM instance of Twist
     /// Note that for RAM, ra and wa are the same polynomial because
     /// there is at most one load or store per cycle.
@@ -64,6 +67,7 @@ impl CommittedPolynomial {
         &self,
         setup: &PCS::ProverSetup,
         preprocessing: &JoltSharedPreprocessing,
+        bytecode: &BytecodePreprocessing,
         row_cycles: &[tracer::instruction::Cycle],
         one_hot_params: &OneHotParams,
     ) -> <PCS as StreamingCommitmentScheme>::ChunkState
@@ -108,12 +112,15 @@ impl CommittedPolynomial {
                 let row: Vec<Option<usize>> = row_cycles
                     .iter()
                     .map(|cycle| {
-                        let pc = preprocessing.bytecode.get_pc(cycle);
+                        let pc = bytecode.get_pc(cycle);
                         Some(one_hot_params.bytecode_pc_chunk(pc, *idx) as usize)
                     })
                     .collect();
                 PCS::process_chunk_onehot(setup, one_hot_params.k_chunk, &row)
             }
+            CommittedPolynomial::BytecodeChunk(_) => {
+                panic!("Bytecode chunk polynomials are not stream-committed yet")
+            }
             CommittedPolynomial::RamRa(idx) => {
                 let row: Vec<Option<usize>> = row_cycles
                     .iter()
@@ -159,6 +166,9 @@ impl CommittedPolynomial {
                     one_hot_params.k_chunk,
                 ))
             }
+            CommittedPolynomial::BytecodeChunk(_) => {
+                panic!("Bytecode chunk polynomials are not supported by generate_witness yet")
+            }
             CommittedPolynomial::RamRa(i) => {
                 let one_hot_params = one_hot_params.unwrap();
                 let addresses: Vec<_> = trace
@@ -271,4 +281,8 @@ pub enum VirtualPolynomial {
     OpFlags(CircuitFlags),
     InstructionFlags(InstructionFlags),
     LookupTableFlag(usize),
+    BytecodeValStage(usize),
+    BytecodeReadRafAddrClaim,
+    BooleanityAddrClaim,
+    BytecodeClaimReductionIntermediate,
 }
diff --git a/jolt-inlines/bigint/src/multiplication/mod.rs b/jolt-inlines/bigint/src/multiplication/mod.rs
index ec327f0fad..3aac420c7b 100644
--- a/jolt-inlines/bigint/src/multiplication/mod.rs
+++ b/jolt-inlines/bigint/src/multiplication/mod.rs
@@ -10,7 +10,6 @@ const OUTPUT_LIMBS: usize = 2 * INPUT_LIMBS;
 pub mod sdk;
 pub use sdk::*;
 
-#[cfg(feature = "host")]
 pub mod exec;
 #[cfg(feature = "host")]
 pub mod sequence_builder;
diff --git a/jolt-inlines/bigint/src/multiplication/sdk.rs b/jolt-inlines/bigint/src/multiplication/sdk.rs
index f927a4fb27..11ca6a8b75 100644
--- a/jolt-inlines/bigint/src/multiplication/sdk.rs
+++ b/jolt-inlines/bigint/src/multiplication/sdk.rs
@@ -33,7 +33,10 @@ pub fn bigint256_mul(lhs: [u64; INPUT_LIMBS], rhs: [u64; INPUT_LIMBS]) -> [u64;
 /// - `a` and `b` must point to at least 32 bytes of readable memory
 /// - `result` must point to at least 64 bytes of writable memory
 /// - The memory regions may overlap (result can be the same as a or b)
-#[cfg(not(feature = "host"))]
+#[cfg(all(
+    not(feature = "host"),
+    any(target_arch = "riscv32", target_arch = "riscv64")
+))]
 pub unsafe fn bigint256_mul_inline(a: *const u64, b: *const u64, result: *mut u64) {
     use super::{BIGINT256_MUL_FUNCT3, BIGINT256_MUL_FUNCT7, INLINE_OPCODE};
     core::arch::asm!(
@@ -59,7 +62,10 @@ pub unsafe fn bigint256_mul_inline(a: *const u64, b: *const u64, result: *mut u6
 /// - All pointers must be valid and properly aligned for u64 access (8-byte alignment)
 /// - `a` and `b` must point to at least 32 bytes of readable memory
 /// - `result` must point to at least 64 bytes of writable memory
-#[cfg(feature = "host")]
+#[cfg(any(
+    feature = "host",
+    not(any(target_arch = "riscv32", target_arch = "riscv64"))
+))]
 pub unsafe fn bigint256_mul_inline(a: *const u64, b: *const u64, result: *mut u64) {
     use crate::multiplication::exec;
 
diff --git a/jolt-sdk/macros/src/lib.rs b/jolt-sdk/macros/src/lib.rs
index 58ab22c7ec..0b292af8eb 100644
--- a/jolt-sdk/macros/src/lib.rs
+++ b/jolt-sdk/macros/src/lib.rs
@@ -66,16 +66,18 @@ impl MacroBuilder {
     fn build(&mut self) -> TokenStream {
         let memory_config_fn = self.make_memory_config_fn();
         let build_prover_fn = self.make_build_prover_fn();
+        let build_prover_committed_fn = self.make_build_prover_committed_fn();
         let build_verifier_fn = self.make_build_verifier_fn();
         let analyze_fn = self.make_analyze_function();
         let trace_to_file_fn = self.make_trace_to_file_func();
         let compile_fn = self.make_compile_func();
+        let preprocess_fn = self.make_preprocess_func();
+        let preprocess_committed_fn = self.make_preprocess_committed_func();
         let preprocess_shared_fn = self.make_preprocess_shared_func();
-        let preprocess_prover_fn = self.make_preprocess_prover_func();
-        let preprocess_verifier_fn = self.make_preprocess_verifier_func();
         let verifier_preprocess_from_prover_fn = self.make_preprocess_from_prover_func();
         let commit_trusted_advice_fn = self.make_commit_trusted_advice_func();
         let prove_fn = self.make_prove_func();
+        let prove_committed_fn = self.make_prove_committed_func();
 
         let attributes = parse_attributes(&self.attr);
         let mut execute_fn = quote! {};
@@ -96,17 +98,19 @@ impl MacroBuilder {
         quote! {
             #memory_config_fn
             #build_prover_fn
+            #build_prover_committed_fn
             #build_verifier_fn
             #execute_fn
             #analyze_fn
             #trace_to_file_fn
             #compile_fn
+            #preprocess_fn
+            #preprocess_committed_fn
             #preprocess_shared_fn
-            #preprocess_prover_fn
-            #preprocess_verifier_fn
             #verifier_preprocess_from_prover_fn
             #commit_trusted_advice_fn
             #prove_fn
+            #prove_committed_fn
             #main_fn
         }
         .into()
@@ -206,6 +210,69 @@ impl MacroBuilder {
         }
     }
 
+    fn make_build_prover_committed_fn(&self) -> TokenStream2 {
+        let fn_name = self.get_func_name();
+        let build_prover_fn_name =
+            Ident::new(&format!("build_prover_committed_{fn_name}"), fn_name.span());
+        let prove_output_ty = self.get_prove_output_type();
+
+        // Include public, trusted_advice, and untrusted_advice arguments for the prover
+        let ordered_func_args = self.get_all_func_args_in_order();
+        let all_names: Vec<_> = ordered_func_args.iter().map(|(name, _)| name).collect();
+        let all_types: Vec<_> = ordered_func_args.iter().map(|(_, ty)| ty).collect();
+
+        let inputs_vec: Vec<_> = self.func.sig.inputs.iter().collect();
+        let inputs = quote! { #(#inputs_vec),* };
+        let prove_fn_name = Ident::new(&format!("prove_committed_{fn_name}"), fn_name.span());
+        let imports = self.make_imports();
+
+        let has_trusted_advice = !self.trusted_func_args.is_empty();
+
+        let commitment_param_in_closure = if has_trusted_advice {
+            quote! { , trusted_advice_commitment: Option<<jolt::PCS as jolt::CommitmentScheme>::Commitment>,
+            trusted_advice_hint: Option<<jolt::PCS as jolt::CommitmentScheme>::OpeningProofHint> }
+        } else {
+            quote! {}
+        };
+
+        let commitment_arg_in_call = if has_trusted_advice {
+            quote! { , trusted_advice_commitment, trusted_advice_hint }
+        } else {
+            quote! {}
+        };
+
+        let return_type = if has_trusted_advice {
+            quote! {
+                impl Fn(#(#all_types),*, Option<<jolt::PCS as jolt::CommitmentScheme>::Commitment>, Option<<jolt::PCS as jolt::CommitmentScheme>::OpeningProofHint>) -> #prove_output_ty + Sync + Send
+            }
+        } else {
+            quote! {
+                impl Fn(#(#all_types),*) -> #prove_output_ty + Sync + Send
+            }
+        };
+
+        quote! {
+            #[cfg(all(not(target_arch = "wasm32"), not(feature = "guest")))]
+            pub fn #build_prover_fn_name(
+                program: jolt::host::Program,
+                preprocessing: jolt::JoltProverPreprocessing<jolt::F, jolt::PCS>,
+            ) -> #return_type
+            {
+                #imports
+                let program = std::sync::Arc::new(program);
+                let preprocessing = std::sync::Arc::new(preprocessing);
+
+                let prove_closure = move |#inputs #commitment_param_in_closure| {
+                    let program = (*program).clone();
+                    let preprocessing = (*preprocessing).clone();
+                    #prove_fn_name(program, preprocessing, #(#all_names),* #commitment_arg_in_call)
+                };
+
+                prove_closure
+            }
+        }
+    }
+
     fn make_build_verifier_fn(&self) -> TokenStream2 {
         let fn_name = self.get_func_name();
         let build_verifier_fn_name =
@@ -427,7 +494,7 @@ impl MacroBuilder {
         }
     }
 
-    fn make_preprocess_shared_func(&self) -> TokenStream2 {
+    fn make_preprocess_func(&self) -> TokenStream2 {
         let attributes = parse_attributes(&self.attr);
         let max_trace_length = proc_macro2::Literal::u64_unsuffixed(attributes.max_trace_length);
         let max_input_size = proc_macro2::Literal::u64_unsuffixed(attributes.max_input_size);
@@ -441,16 +508,15 @@ impl MacroBuilder {
         let imports = self.make_imports();
 
         let fn_name = self.get_func_name();
-        let preprocess_shared_fn_name =
-            Ident::new(&format!("preprocess_shared_{fn_name}"), fn_name.span());
+        let preprocess_fn_name = Ident::new(&format!("preprocess_{fn_name}"), fn_name.span());
         quote! {
             #[cfg(all(not(target_arch = "wasm32"), not(feature = "guest")))]
-            pub fn #preprocess_shared_fn_name(program: &mut jolt::host::Program)
-                -> jolt::JoltSharedPreprocessing
+            pub fn #preprocess_fn_name(program: &mut jolt::host::Program)
+                -> jolt::JoltProverPreprocessing<jolt::F, jolt::PCS>
             {
                 #imports
 
-                let (bytecode, memory_init, program_size) = program.decode();
+                let (instructions, memory_init, program_size) = program.decode();
                 let memory_config = MemoryConfig {
                     max_input_size: #max_input_size,
                     max_output_size: #max_output_size,
@@ -462,55 +528,106 @@ impl MacroBuilder {
                 };
                 let memory_layout = MemoryLayout::new(&memory_config);
 
-                let preprocessing = JoltSharedPreprocessing::new(
-                    bytecode,
+                let bytecode = BytecodePreprocessing::preprocess(instructions);
+                let shared = JoltSharedPreprocessing::new(
+                    &bytecode,
                     memory_layout,
                     memory_init,
                     #max_trace_length,
                 );
-
-                preprocessing
+                JoltProverPreprocessing::new(shared, std::sync::Arc::new(bytecode))
             }
         }
     }
 
-    fn make_preprocess_prover_func(&self) -> TokenStream2 {
+    fn make_preprocess_committed_func(&self) -> TokenStream2 {
+        let attributes = parse_attributes(&self.attr);
+        let max_trace_length = proc_macro2::Literal::u64_unsuffixed(attributes.max_trace_length);
+        let max_input_size = proc_macro2::Literal::u64_unsuffixed(attributes.max_input_size);
+        let max_output_size = proc_macro2::Literal::u64_unsuffixed(attributes.max_output_size);
+        let max_untrusted_advice_size =
+            proc_macro2::Literal::u64_unsuffixed(attributes.max_untrusted_advice_size);
+        let max_trusted_advice_size =
+            proc_macro2::Literal::u64_unsuffixed(attributes.max_trusted_advice_size);
+        let stack_size = proc_macro2::Literal::u64_unsuffixed(attributes.stack_size);
+        let memory_size = proc_macro2::Literal::u64_unsuffixed(attributes.memory_size);
         let imports = self.make_imports();
 
         let fn_name = self.get_func_name();
-        let preprocess_prover_fn_name =
-            Ident::new(&format!("preprocess_prover_{fn_name}"), fn_name.span());
+        let preprocess_fn_name =
+            Ident::new(&format!("preprocess_committed_{fn_name}"), fn_name.span());
         quote! {
             #[cfg(all(not(target_arch = "wasm32"), not(feature = "guest")))]
-            pub fn #preprocess_prover_fn_name(shared_preprocessing: jolt::JoltSharedPreprocessing)
+            pub fn #preprocess_fn_name(program: &mut jolt::host::Program)
                 -> jolt::JoltProverPreprocessing<jolt::F, jolt::PCS>
             {
                 #imports
-                let prover_preprocessing = JoltProverPreprocessing::new(
-                    shared_preprocessing,
-                );
 
-                prover_preprocessing
+                let (instructions, memory_init, program_size) = program.decode();
+                let memory_config = MemoryConfig {
+                    max_input_size: #max_input_size,
+                    max_output_size: #max_output_size,
+                    max_untrusted_advice_size: #max_untrusted_advice_size,
+                    max_trusted_advice_size: #max_trusted_advice_size,
+                    stack_size: #stack_size,
+                    memory_size: #memory_size,
+                    program_size: Some(program_size),
+                };
+                let memory_layout = MemoryLayout::new(&memory_config);
+
+                let bytecode = BytecodePreprocessing::preprocess(instructions);
+                let shared = JoltSharedPreprocessing::new(
+                    &bytecode,
+                    memory_layout,
+                    memory_init,
+                    #max_trace_length,
+                );
+                JoltProverPreprocessing::new_committed(shared, std::sync::Arc::new(bytecode))
             }
         }
     }
 
-    fn make_preprocess_verifier_func(&self) -> TokenStream2 {
+    fn make_preprocess_shared_func(&self) -> TokenStream2 {
+        let attributes = parse_attributes(&self.attr);
+        let max_trace_length = proc_macro2::Literal::u64_unsuffixed(attributes.max_trace_length);
+        let max_input_size = proc_macro2::Literal::u64_unsuffixed(attributes.max_input_size);
+        let max_output_size = proc_macro2::Literal::u64_unsuffixed(attributes.max_output_size);
+        let max_untrusted_advice_size =
+            proc_macro2::Literal::u64_unsuffixed(attributes.max_untrusted_advice_size);
+        let max_trusted_advice_size =
+            proc_macro2::Literal::u64_unsuffixed(attributes.max_trusted_advice_size);
+        let stack_size = proc_macro2::Literal::u64_unsuffixed(attributes.stack_size);
+        let memory_size = proc_macro2::Literal::u64_unsuffixed(attributes.memory_size);
         let imports = self.make_imports();
 
         let fn_name = self.get_func_name();
-        let preprocess_verifier_fn_name =
-            Ident::new(&format!("preprocess_verifier_{fn_name}"), fn_name.span());
+        let preprocess_shared_fn_name =
+            Ident::new(&format!("preprocess_shared_{fn_name}"), fn_name.span());
         quote! {
             #[cfg(all(not(target_arch = "wasm32"), not(feature = "guest")))]
-            pub fn #preprocess_verifier_fn_name(
-                shared_preprocess: jolt::JoltSharedPreprocessing,
-                generators: <jolt::PCS as jolt::CommitmentScheme>::VerifierSetup,
-            ) -> jolt::JoltVerifierPreprocessing<jolt::F, jolt::PCS>
+            pub fn #preprocess_shared_fn_name(program: &mut jolt::host::Program)
+                -> (jolt::JoltSharedPreprocessing, jolt::BytecodePreprocessing)
             {
                 #imports
-                let preprocessing = JoltVerifierPreprocessing::new(shared_preprocess, generators);
-                preprocessing
+                let (instructions, memory_init, program_size) = program.decode();
+                let memory_config = MemoryConfig {
+                    max_input_size: #max_input_size,
+                    max_output_size: #max_output_size,
+                    max_untrusted_advice_size: #max_untrusted_advice_size,
+                    max_trusted_advice_size: #max_trusted_advice_size,
+                    stack_size: #stack_size,
+                    memory_size: #memory_size,
+                    program_size: Some(program_size),
+                };
+                let memory_layout = MemoryLayout::new(&memory_config);
+                let bytecode = BytecodePreprocessing::preprocess(instructions);
+                let preprocessing = JoltSharedPreprocessing::new(
+                    &bytecode,
+                    memory_layout,
+                    memory_init,
+                    #max_trace_length,
+                );
+                (preprocessing, bytecode)
             }
         }
     }
@@ -687,12 +804,110 @@ impl MacroBuilder {
 
                 let elf_contents_opt = program.get_elf_contents();
                 let elf_contents = elf_contents_opt.as_deref().expect("elf contents is None");
-                let prover = RV64IMACProver::gen_from_elf(&preprocessing,
+                let prover = RV64IMACProver::gen_from_elf_with_bytecode_mode(&preprocessing,
+                    &elf_contents,
+                    &input_bytes,
+                    &untrusted_advice_bytes,
+                    &trusted_advice_bytes,
+                    #commitment_arg,
+                    jolt::BytecodeMode::Full,
+                );
+                let io_device = prover.program_io.clone();
+                let (jolt_proof, _) = prover.prove();
+
+                #handle_return
+
+                (ret_val, jolt_proof, io_device)
+            }
+        }
+    }
+
+    fn make_prove_committed_func(&self) -> TokenStream2 {
+        let prove_output_ty = self.get_prove_output_type();
+
+        let handle_return = match &self.func.sig.output {
+            ReturnType::Default => quote! {
+                let ret_val = ();
+            },
+            ReturnType::Type(_, ty) => quote! {
+                let mut outputs = io_device.outputs.clone();
+                outputs.resize(preprocessing.shared.memory_layout.max_output_size as usize, 0);
+                let ret_val = jolt::postcard::from_bytes::<#ty>(&outputs).unwrap();
+            },
+        };
+
+        let set_program_args = self.pub_func_args.iter().map(|(name, _)| {
+            quote! {
+                input_bytes.append(&mut jolt::postcard::to_stdvec(&#name).unwrap())
+            }
+        });
+        let set_program_untrusted_advice_args = self.untrusted_func_args.iter().map(|(name, _)| {
+            quote! {
+                untrusted_advice_bytes.append(&mut jolt::postcard::to_stdvec(&#name).unwrap())
+            }
+        });
+        let set_program_trusted_advice_args = self.trusted_func_args.iter().map(|(name, _)| {
+            quote! {
+                trusted_advice_bytes.append(&mut jolt::postcard::to_stdvec(&#name).unwrap())
+            }
+        });
+
+        let fn_name = self.get_func_name();
+        let inputs_vec: Vec<_> = self.func.sig.inputs.iter().collect();
+        let inputs = quote! { #(#inputs_vec),* };
+        let imports = self.make_imports();
+
+        let prove_fn_name = syn::Ident::new(&format!("prove_committed_{fn_name}"), fn_name.span());
+
+        let has_trusted_advice = !self.trusted_func_args.is_empty();
+
+        let commitment_param = if has_trusted_advice {
+            quote! { , trusted_advice_commitment: Option<<jolt::PCS as jolt::CommitmentScheme>::Commitment>,
+            trusted_advice_hint: Option<<jolt::PCS as jolt::CommitmentScheme>::OpeningProofHint> }
+        } else {
+            quote! {}
+        };
+
+        let commitment_arg = if has_trusted_advice {
+            quote! { trusted_advice_commitment, trusted_advice_hint }
+        } else {
+            quote! { None, None }
+        };
+
+        quote! {
+            #[cfg(all(not(target_arch = "wasm32"), not(feature = "guest")))]
+            #[allow(clippy::too_many_arguments)]
+            pub fn #prove_fn_name(
+                mut program: jolt::host::Program,
+                preprocessing: jolt::JoltProverPreprocessing<jolt::F, jolt::PCS>,
+                #inputs
+                #commitment_param
+            ) -> #prove_output_ty {
+                #imports
+
+                if !preprocessing.is_committed_mode() {
+                    panic!(
+                        "Committed bytecode proving requires committed preprocessing. \
+                        Use `preprocess_committed_*` / `JoltProverPreprocessing::new_committed`."
+                    );
+                }
+
+                let mut input_bytes = vec![];
+                #(#set_program_args;)*
+                let mut untrusted_advice_bytes = vec![];
+                #(#set_program_untrusted_advice_args;)*
+                let mut trusted_advice_bytes = vec![];
+                #(#set_program_trusted_advice_args;)*
+
+                let elf_contents_opt = program.get_elf_contents();
+                let elf_contents = elf_contents_opt.as_deref().expect("elf contents is None");
+                let prover = RV64IMACProver::gen_from_elf_with_bytecode_mode(&preprocessing,
                     &elf_contents,
                     &input_bytes,
                     &untrusted_advice_bytes,
                     &trusted_advice_bytes,
                     #commitment_arg,
+                    jolt::BytecodeMode::Committed,
                 );
                 let io_device = prover.program_io.clone();
                 let (jolt_proof, _) = prover.prove();
@@ -890,6 +1105,7 @@ impl MacroBuilder {
                 RV64IMACVerifier,
                 RV64IMACProof,
                 host::Program,
+                BytecodePreprocessing,
                 JoltProverPreprocessing,
                 MemoryConfig,
                 MemoryLayout,
diff --git a/jolt-sdk/src/host_utils.rs b/jolt-sdk/src/host_utils.rs
index af6c8192a6..4b9c3cea93 100644
--- a/jolt-sdk/src/host_utils.rs
+++ b/jolt-sdk/src/host_utils.rs
@@ -10,6 +10,8 @@ pub use jolt_core::ark_bn254::Fr as F;
 pub use jolt_core::field::JoltField;
 pub use jolt_core::guest;
 pub use jolt_core::poly::commitment::dory::DoryCommitmentScheme as PCS;
+pub use jolt_core::zkvm::bytecode::BytecodePreprocessing;
+pub use jolt_core::zkvm::config::BytecodeMode;
 pub use jolt_core::zkvm::{
     proof_serialization::JoltProof, verifier::JoltSharedPreprocessing,
     verifier::JoltVerifierPreprocessing, RV64IMACProof, RV64IMACVerifier, Serializable,
diff --git a/src/main.rs b/src/main.rs
index 771806164e..84f4aded53 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -222,12 +222,9 @@ pub fn main() {
     let target_dir = "/tmp/jolt-guest-targets";
     let mut program = guest::compile_fib(target_dir);
 
-    let shared_preprocessing = guest::preprocess_shared_fib(&mut program);
-
-    let prover_preprocessing = guest::preprocess_prover_fib(shared_preprocessing.clone());
-    let verifier_setup = prover_preprocessing.generators.to_verifier_setup();
+    let prover_preprocessing = guest::preprocess_fib(&mut program);
     let verifier_preprocessing =
-        guest::preprocess_verifier_fib(shared_preprocessing, verifier_setup);
+        guest::verifier_preprocessing_from_prover_fib(&prover_preprocessing);
 
     let prove_fib = guest::build_prover_fib(program, prover_preprocessing);
     let verify_fib = guest::build_verifier_fib(verifier_preprocessing);