diff --git a/Cargo.toml b/Cargo.toml index ac14deab9..cf55d164a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ keywords = ["SNARK", "cryptography", "proofs"] [workspace] members = [ + "crates/jolt-host", "crates/jolt-crypto", "crates/jolt-poly", "crates/jolt-instructions", diff --git a/crates/jolt-host/Cargo.toml b/crates/jolt-host/Cargo.toml new file mode 100644 index 000000000..7d5cee5b5 --- /dev/null +++ b/crates/jolt-host/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "jolt-host" +version = "0.1.0" +authors = ["Jolt Contributors"] +edition = "2021" +license = "MIT OR Apache-2.0" +description = "Host-side guest program compilation, decoding, and tracing for Jolt" +repository = "https://github.com/a16z/jolt" +keywords = ["SNARK", "zkvm", "risc-v", "compilation"] +categories = ["cryptography"] + +[lints] +workspace = true + +[dependencies] +bincode = { version = "2", features = ["serde"] } +common = { workspace = true } +jolt-instructions = { path = "../jolt-instructions" } +serde.workspace = true +tracer = { workspace = true, features = ["std"] } +tracing.workspace = true diff --git a/crates/jolt-host/PLAN.md b/crates/jolt-host/PLAN.md new file mode 100644 index 000000000..a599167c5 --- /dev/null +++ b/crates/jolt-host/PLAN.md @@ -0,0 +1,500 @@ +# jolt-host: CycleRow Trait & Tracer Encapsulation Plan + +## Goal + +Make jolt-zkvm's witness layer generic over a `CycleRow` trait defined in jolt-host, +so that jolt-zkvm never imports `tracer` directly. The tracer becomes an implementation +detail behind jolt-host's API. + +## Current State + +- jolt-host re-exports concrete tracer types: `Cycle`, `Instruction`, `Memory`, `LazyTraceIterator` +- jolt-zkvm depends on `tracer` directly and uses `Cycle`, `Instruction`, `RAMAccess`, + `NormalizedInstruction` in 6 source files across `witness/`, `tables.rs`, `evaluators/` +- jolt-witness already has `TraceSource` trait and `CycleData` (tracer-agnostic), but + jolt-zkvm doesn't use them at the jolt-host boundary +- `flags.rs` in jolt-zkvm contains the full ISA dispatch table (~310 lines) mapping every + `Instruction` variant to boolean flag arrays + +## Architecture + +``` +tracer ──▶ jolt-host (defines CycleRow, implements for Cycle) ──▶ jolt-zkvm (generic over CycleRow) + │ + jolt-witness (TraceSource) +``` + +jolt-host is the **sole tracer boundary**. Everything downstream sees only `CycleRow`. + +--- + +## Phase 1: Define `CycleRow` trait in jolt-host + +### New file: `src/cycle_row.rs` + +```rust +use jolt_instructions::flags::{NUM_CIRCUIT_FLAGS, NUM_INSTRUCTION_FLAGS}; + +/// Abstract interface for one execution cycle of a RISC-V trace. +/// +/// This is the boundary between the tracer (which produces concrete `Cycle` values) +/// and the proving system (which consumes per-cycle data to build witnesses). +/// All ISA-specific logic (instruction dispatch, flag computation, operand routing) +/// is pushed into the `CycleRow` implementation, so the prover sees only scalars +/// and boolean arrays. +/// +/// jolt-zkvm's witness layer is generic over `CycleRow`. The concrete implementation +/// for `tracer::Cycle` lives in jolt-host. +pub trait CycleRow: Copy { + // ── Identity ── + + /// A no-op (padding) cycle. + /// + /// Used by `generate_witnesses` to pad traces to the next power of two. + /// Must satisfy `Self::noop().is_noop() == true`. + fn noop() -> Self; + + /// True if this cycle is a no-op (padding). + fn is_noop(&self) -> bool; + + // ── Program counter & sequencing ── + + /// The unexpanded (pre-virtual-expansion) program counter. + fn unexpanded_pc(&self) -> u64; + + /// Remaining steps in a virtual instruction sequence, or `None` if + /// this is a real (non-virtual) instruction. + fn virtual_sequence_remaining(&self) -> Option; + + /// True if this is the first instruction in a virtual sequence. + fn is_first_in_sequence(&self) -> bool; + + /// True if this is a virtual (expanded) instruction. + fn is_virtual(&self) -> bool; + + // ── Register operations ── + + /// RS1 register read: `(register_index, value)`, or `None` if unused. + fn rs1_read(&self) -> Option<(u8, u64)>; + + /// RS2 register read: `(register_index, value)`, or `None` if unused. + fn rs2_read(&self) -> Option<(u8, u64)>; + + /// RD register write: `(register_index, pre_value, post_value)`, or `None`. + fn rd_write(&self) -> Option<(u8, u64, u64)>; + + /// The static `rd` operand from the instruction encoding (for write-address + /// polynomial), independent of whether a write actually occurs. + fn rd_operand(&self) -> Option; + + // ── RAM operations ── + + /// RAM access address, or `None` if this cycle has no RAM access. + /// Returns the address regardless of read/write direction. + fn ram_access_address(&self) -> Option; + + /// RAM read value. For reads: the loaded value. For writes: the pre-write value. + /// `None` if no RAM access. + fn ram_read_value(&self) -> Option; + + /// RAM write value. For writes: the post-write value. For reads: same as read value. + /// `None` if no RAM access. + fn ram_write_value(&self) -> Option; + + // ── Instruction metadata ── + + /// The immediate operand, sign-extended. + fn imm(&self) -> i128; + + /// R1CS circuit flags (opflags from the Jolt paper). + /// Indexed by `CircuitFlags` variants. + fn circuit_flags(&self) -> [bool; NUM_CIRCUIT_FLAGS]; + + /// Non-R1CS instruction flags for witness generation and operand routing. + /// Indexed by `InstructionFlags` variants. + fn instruction_flags(&self) -> [bool; NUM_INSTRUCTION_FLAGS]; + + // ── Lookup computation (ISA-specific, pushed to tracer boundary) ── + + /// Combined lookup index for RA polynomial construction. + /// Encodes the instruction identity + operands into a single index. + /// + /// Encoding depends on circuit flags: + /// - **AddOperands**: `left_input + right_input` + /// - **SubtractOperands**: `left_input + (2^64 - right_input)` (two's complement) + /// - **MultiplyOperands**: `left_input * right_input` + /// - **Advice / NoOp**: `0` + /// - **Default (interleaved)**: `interleave_bits(left_input, right_input)` + fn lookup_index(&self) -> u128; +} +``` + +### Trait bounds + +`CycleRow: Copy` — required because `generate_witnesses` currently copies `Cycle` values +during padding (`Cycle` is `Copy`). This is a tight bound that matches the existing usage. +If a future tracer produces non-Copy rows, this can be relaxed to `Clone`. + +### Why `noop()` is on the trait + +`generate_witnesses` pads the trace to the next power of two with NoOp cycles. With a +concrete `Cycle`, this is `Cycle::NoOp`. With a generic `CycleRow`, the trait must +provide a way to construct padding cycles. A `noop()` associated function is the minimal +addition that preserves the current architecture. + +### Why `lookup_operands`, `lookup_output`, `lookup_table_id` are NOT on the trait + +These values are computable from the other CycleRow methods (flags + registers + PC + imm) +and the R1CS witness builder already computes them inline. Adding them would create +redundant paths. If a future consumer needs them as pre-computed values, they can be +added as provided methods with default implementations. + +The `lookup_index` IS on the trait because it requires `interleave_bits` from +`jolt-instructions`, which is ISA-specific logic that belongs at the tracer boundary. + +### Dependencies + +jolt-host gains a dependency on `jolt-instructions` (for `NUM_CIRCUIT_FLAGS`, +`NUM_INSTRUCTION_FLAGS` constants). This is a Level 1 crate with no heavy deps. + +### Re-exports from `lib.rs` + +```rust +mod cycle_row; +pub use cycle_row::CycleRow; + +// Concrete type for tests and host-side code +pub use tracer::instruction::Cycle; + +// Flag types needed by downstream CycleRow consumers +pub use jolt_instructions::flags::{ + CircuitFlags, InstructionFlags, NUM_CIRCUIT_FLAGS, NUM_INSTRUCTION_FLAGS, +}; +``` + +--- + +## Phase 2: Implement `CycleRow for tracer::Cycle` in jolt-host + +### New file: `src/cycle_row_impl.rs` + +This file implements `CycleRow` for `tracer::instruction::Cycle`. It absorbs: + +1. **`flags.rs` from jolt-zkvm** — the full ISA dispatch table mapping `Instruction` + variants to `circuit_flags()` and `instruction_flags()` arrays. This is ~310 lines + of match arms that belong at the tracer boundary, not inside the prover. + +2. **`compute_lookup_index` from `cycle_data.rs`** — the ISA-specific lookup index + computation using circuit flags and `interleave_bits`. This is the only function + from cycle_data.rs that moves. + +3. **RAM access decomposition** — extracting address/read/write from `RAMAccess` enum + variants into the trait's `Option` returns. + +4. **Sequencing fields** — normalizing the instruction and extracting PC, virtual + sequence info, `is_first_in_sequence`, `is_virtual`, `is_compressed`. + +5. **`noop()` constructor** — returns `Cycle::NoOp`. + +The implementation calls `self.instruction()` and `self.instruction().normalize()` +internally — these are tracer-internal methods that never leak through the trait. + +### What moves out of jolt-zkvm + +| jolt-zkvm file | What moves to jolt-host | +|---|---| +| `witness/flags.rs` | Entire file → `cycle_row_impl.rs` (ISA dispatch) | +| `witness/cycle_data.rs` | `compute_lookup_index` → `CycleRow::lookup_index()` impl | + +### What stays in jolt-zkvm + +- `instruction_inputs` — stays in `cycle_data.rs`, signature changes to + `fn instruction_inputs(cycle: &impl CycleRow, ...) -> (u64, i128)`. + This is pure flag dispatch (not ISA dispatch) — it reads `instruction_flags()`, + `rs1_read()`, `rs2_read()` through the trait. +- `cycle_to_cycle_data` — converts `&impl CycleRow` → `CycleData` (protocol-level) +- `trace_to_cycle_data` — iterates trace, calls above +- All sumcheck witness builders — now generic over `CycleRow` +- `cycle_to_witness` in r1cs_inputs.rs — computes lookup operands and output + from CycleRow methods + flags (no ISA dispatch needed) + +--- + +## Phase 3: Update jolt-zkvm to be generic over `CycleRow` + +### File-by-file changes + +| File | Current signature | New signature | +|---|---|---| +| `witness/generate.rs` | `generate_witnesses(trace: &[Cycle])` | `generate_witnesses(trace: &[R])` | +| `witness/cycle_data.rs` | `cycle_to_cycle_data(cycle: &Cycle, ...)` | `cycle_to_cycle_data(cycle: &impl CycleRow, ...)` | +| `witness/cycle_data.rs` | `instruction_inputs(cycle: &Cycle, ...)` | `instruction_inputs(cycle: &impl CycleRow, ...)` | +| `witness/cycle_data.rs` | `compute_lookup_index(cycle: &Cycle)` | **Deleted** — moved to `CycleRow::lookup_index()` | +| `witness/r1cs_inputs.rs` | `cycle_to_witness(cycle: &Cycle, next: Option<&Cycle>, ...)` | `cycle_to_witness(cycle: &impl CycleRow, next: Option<&impl CycleRow>, ...)` | +| `witness/bytecode.rs` | `BytecodePreprocessing::new(trace: &[Cycle])` | `BytecodePreprocessing::new(trace: &[impl CycleRow])` | +| `witness/bytecode.rs` | `BytecodePreprocessing::get_pc(&self, cycle: &Cycle)` | `get_pc(&self, cycle: &impl CycleRow)` | +| `witness/flags.rs` | **Deleted** — moved to jolt-host | N/A | +| `witness/mod.rs` | `pub mod flags;` | Line removed, doc comment updated | +| `tables.rs` | `from_witness(..., trace: &[Cycle], ...)` | `from_witness(..., trace: &[impl CycleRow], ...)` | +| `evaluators/sparse_rw.rs` | `ram_entries_from_trace(trace: &[Cycle], ...)` | `ram_entries_from_trace(trace: &[impl CycleRow], ...)` | + +**Note:** `prover.rs` (`GraphProverInput`) does NOT have a `trace` field — it operates +on `PolynomialTables`, which is fully field-element based. The trace is consumed +entirely at the witness generation boundary. + +### Bytecode preprocessing + +`BytecodePreprocessing::new` currently does `cycle.instruction().normalize()` to get +PC and virtual sequence info. With `CycleRow`, it uses: +- `cycle.unexpanded_pc()` +- `cycle.virtual_sequence_remaining()` +- `cycle.is_noop()` + +`BytecodePreprocessing::get_pc` currently takes `&Cycle` and calls `instruction().normalize()`. +With `CycleRow`, it uses the same trait methods. + +### R1CS witness generation + +`cycle_to_witness` currently calls 6+ methods on `Cycle` plus `flags::circuit_flags()` +and `flags::instruction_flags()`. With `CycleRow`, every field is a direct trait +method call — no `normalize()`, no `RAMAccess` pattern matching, no `Instruction` +variant dispatch. The function body simplifies. + +The lookup operands (`V_LEFT_LOOKUP_OPERAND`, `V_RIGHT_LOOKUP_OPERAND`) and lookup +output (`V_LOOKUP_OUTPUT`) are computed inline from CycleRow methods: +- `circuit_flags()` for add/sub/mul/advice mode selection +- `rs1_read()`, `rs2_read()` for register values +- `rd_write()` for rd_write_value (advice output) +- `unexpanded_pc()` and `imm()` for branch target computation + +No additional trait methods are needed — the existing CycleRow surface fully covers +the R1CS computation. + +### Instruction inputs + +`instruction_inputs` stays in `cycle_data.rs` as a standalone function. Its logic is +pure flag dispatch: + +```rust +pub(crate) fn instruction_inputs( + cycle: &impl CycleRow, + iflags: &[bool; NUM_INSTRUCTION_FLAGS], + unexpanded_pc: u64, + imm: i128, +) -> (u64, i128) { ... } +``` + +Called from: +1. `cycle_to_witness` (r1cs_inputs.rs) — for `V_LEFT_INSTRUCTION_INPUT` / `V_RIGHT_INSTRUCTION_INPUT` +2. `CycleRow::lookup_index()` impl (cycle_row_impl.rs in jolt-host) — for index computation + +Since the function's signature becomes `&impl CycleRow`, it can be called from both +jolt-host and jolt-zkvm. **However**, this creates a circular dependency +(jolt-host → jolt-zkvm → jolt-host). Resolution: duplicate the logic. + +The `instruction_inputs` computation is 10 lines of flag dispatch. The jolt-host +impl calls the same logic inline (it already has all the data from `self`). The +jolt-zkvm copy takes `&impl CycleRow`. No shared code needed. + +### Padding + +`generate_witnesses` pads the trace to the next power of two. Currently: +```rust +let padded_trace: Vec = trace.iter().copied() + .chain(std::iter::repeat_n(Cycle::NoOp, padded_len - trace.len())) + .collect(); +``` + +With CycleRow: +```rust +let padded_trace: Vec = trace.iter().copied() + .chain(std::iter::repeat_n(R::noop(), padded_len - trace.len())) + .collect(); +``` + +The `CycleRow: Copy` bound makes `.copied()` work. The `noop()` associated function +provides padding cycles. + +### `compute_ram_k` + +This function in `generate.rs` currently pattern-matches on `RAMAccess`: +```rust +fn compute_ram_k(trace: &[Cycle]) -> usize { + let max_addr = trace.iter().filter_map(|c| match c.ram_access() { + RAMAccess::Read(r) => Some(r.address), + RAMAccess::Write(w) => Some(w.address), + RAMAccess::NoOp => None, + }).max().unwrap_or(0); + ... +} +``` + +With CycleRow: +```rust +fn compute_ram_k(trace: &[impl CycleRow]) -> usize { + let max_addr = trace.iter() + .filter_map(|c| c.ram_access_address()) + .max() + .unwrap_or(0); + ... +} +``` + +### `ram_entries_from_trace` in sparse_rw.rs + +Currently pattern-matches on `RAMAccess::Read/Write/NoOp`. With CycleRow: + +```rust +pub fn ram_entries_from_trace( + trace: &[impl CycleRow], + padded_len: usize, +) -> Vec> { + let mut entries = Vec::new(); + for (j, cycle) in trace.iter().enumerate() { + if let Some(addr) = cycle.ram_access_address() { + let read_val = cycle.ram_read_value().unwrap(); + let write_val = cycle.ram_write_value().unwrap(); + entries.push(RwEntry { + bind_pos: j, + free_pos: addr as usize, + ra: F::one(), + val: F::from_u64(read_val), + prev_val: F::from_u64(read_val), + next_val: F::from_u64(write_val), + }); + } + } + entries +} +``` + +### `extract_flag_poly` and `extract_register_addresses` in tables.rs + +Currently call `cycle.instruction()` → `flags::instruction_flags()`. With CycleRow: +- `extract_flag_poly` calls `cycle.instruction_flags()[flag_idx]` directly +- `extract_register_addresses` calls `cycle.rs1_read()`, `cycle.rs2_read()`, + `cycle.rd_write()` — already trait methods + +### Next-cycle lookahead + +R1CS needs `next_unexpanded_pc`, `next_is_virtual`, `next_is_first_in_sequence`, +`next_is_noop`. These are NOT methods on `CycleRow` — they're computed during +iteration with lookahead: + +```rust +for i in 0..trace.len() { + let next = trace.get(i + 1); + let witness = cycle_to_witness(&trace[i], next, ...); +} +``` + +Inside `cycle_to_witness`, the `next: Option<&impl CycleRow>` provides: +- `next.circuit_flags()` → `VirtualInstruction`, `IsFirstInSequence` +- `next.instruction_flags()` → `IsNoop` +- `next.unexpanded_pc()` + +--- + +## Phase 4: Dependency cleanup + +### jolt-zkvm Cargo.toml + +```diff + [dependencies] ++jolt-host = { workspace = true } + jolt-instructions = { workspace = true } + jolt-witness = { workspace = true } + # ... other jolt-* deps ... +-tracer = { workspace = true } + + [dev-dependencies] +-jolt-host = { workspace = true } ++# jolt-host is now a regular dependency +``` + +### What jolt-zkvm imports from jolt-host + +```rust +use jolt_host::CycleRow; // trait — used in all generic signatures +use jolt_host::Cycle; // concrete type — used in tests only +``` + +### What jolt-zkvm still imports from jolt-instructions + +```rust +use jolt_instructions::flags::{CircuitFlags, InstructionFlags, NUM_CIRCUIT_FLAGS, NUM_INSTRUCTION_FLAGS}; +``` + +These are needed to index into the flag arrays returned by `CycleRow::circuit_flags()` +and `CycleRow::instruction_flags()`. jolt-instructions is a lightweight crate (flag +enums + lookup tables) with no tracer dep. + +--- + +## Phase 5: Verify + +1. `cargo clippy -p jolt-host --all-targets --message-format=short -q -- -D warnings` +2. `cargo nextest run -p jolt-host --cargo-quiet` +3. `cargo clippy -p jolt-zkvm --all-targets --message-format=short -q -- -D warnings` +4. `cargo nextest run -p jolt-zkvm --cargo-quiet` +5. E2E: `cargo nextest run -p jolt-zkvm graph_driven_muldiv --cargo-quiet` + +The e2e test in `e2e_graph.rs` does NOT need to change — it uses +`jolt_host::Program::trace()` which returns `Vec`, and `Cycle: CycleRow` so +`&[Cycle]` satisfies `&[impl CycleRow]`. + +--- + +## Sumcheck Coverage + +Every sumcheck stage's witness builder becomes generic over `CycleRow`: + +| Stage | Sumcheck | CycleRow methods used | +|---|---|---| +| S1 | Spartan outer | All R1CS variables derive from CycleRow via `cycle_to_witness` | +| S2 | Register RW checking | `rs1_read`, `rs2_read`, `rd_write` | +| S2 | RAM RW checking | `ram_access_address`, `ram_read_value`, `ram_write_value` | +| S2 | RAM Hamming booleanity | `ram_access_address` (is_some check) | +| S2 | RAM RAF evaluation | `ram_access_address` | +| S3 | Spartan shift | `unexpanded_pc`, `is_virtual`, `is_first_in_sequence`, `is_noop` | +| S4 | Registers val evaluation | `rd_operand` | +| S5 | Bytecode read-RAF | `unexpanded_pc`, `virtual_sequence_remaining` | +| S5 | Instruction read-RAF | `lookup_index`, `circuit_flags`, `instruction_flags` | +| S5 | Instruction RA virtual | `lookup_index` | +| S6 | RAM RA virtual | `ram_access_address` | +| S6 | Booleanity | `lookup_index`, `ram_access_address` (via RaIndices) | +| S7 | Inc claim reduction | `rd_write` (RdInc), `ram_read_value`/`ram_write_value` (RamInc) | +| S7 | Registers claim reduction | `rs1_read`, `rs2_read`, `rd_write` | +| S7 | RAM RA claim reduction | `ram_access_address` | + +No stage requires access to `tracer::Cycle`, `Instruction`, `RAMAccess`, or +`NormalizedInstruction` directly. All ISA knowledge is encapsulated in the +`CycleRow` implementation. + +--- + +## Decisions + +1. **`lookup_table_id` representation**: NOT on the trait. Currently unused by any + consumer in jolt-zkvm — the prover reads from `PolynomialTables`, not from CycleRow. + Can be added as a provided method later if needed. + +2. **`lookup_operands` / `lookup_output`**: NOT on the trait. These are computed inline + by the R1CS witness builder from circuit_flags + register values. Adding them would + create redundant computation paths with no current consumer. + +3. **Where `CycleData` conversion lives**: Stays in `jolt-zkvm/witness/cycle_data.rs`. + CycleData is a protocol-level type (jolt-witness), not a tracer-level type. + Moving it to jolt-host would create a jolt-host → jolt-witness dependency that + doesn't belong. + +4. **`BytecodePreprocessing` ownership**: Stays in jolt-zkvm. It's prover infrastructure, + not tracer abstraction. + +5. **`instruction_inputs` duplication**: The 10-line flag dispatch logic is duplicated: + once in jolt-host's `CycleRow::lookup_index()` impl (inlined), once in jolt-zkvm's + `cycle_data.rs` (standalone function taking `&impl CycleRow`). This avoids a circular + dependency between jolt-host and jolt-zkvm. + +6. **`CycleRow: Copy` bound**: Required by `generate_witnesses` padding. Matches `Cycle` + which is `Copy`. Can be relaxed to `Clone` if a future tracer needs heap-allocated rows. diff --git a/crates/jolt-host/README.md b/crates/jolt-host/README.md new file mode 100644 index 000000000..cead693bd --- /dev/null +++ b/crates/jolt-host/README.md @@ -0,0 +1,48 @@ +# jolt-host + +Host-side guest program compilation, decoding, and tracing for Jolt. + +Part of the [Jolt](https://github.com/a16z/jolt) zkVM. + +## Overview + +This crate provides the `Program` builder for compiling, decoding, and tracing guest RISC-V programs on the host side. It takes a guest crate name, invokes `cargo build` with the appropriate RISC-V target and linker configuration, then decodes the resulting ELF into RISC-V instructions and traces execution to produce `Cycle` vectors for the proving pipeline. + +The crate is independent of the proving system — it bridges the guest build toolchain and the RISC-V emulator (`tracer`) without any cryptographic dependencies. + +## Public API + +### Core Types + +- **`Program`** -- Host-side builder for guest RISC-V programs. Configure via `set_*` methods (heap size, stack size, I/O sizes, std support, build profile), then call `build()`, `decode()`, `trace()`, or `trace_analyze()`. +- **`CycleRow`** -- Trait abstracting a single execution cycle's flag/operand extraction. Provides `circuit_flags()`, `instruction_flags()`, `lookup_table()`, and `operands()` from a traced `Cycle`. +- **`ProgramSummary`** -- Post-trace analysis: instruction frequency breakdown and trace length. + +### Re-exports + +These types are re-exported from `common` and `tracer` for convenience: + +- **`Cycle`** / **`Instruction`** -- Execution trace types from the RISC-V emulator. +- **`JoltDevice`** / **`MemoryConfig`** -- Guest I/O device and memory layout configuration. +- **`Memory`** -- Emulator memory state. +- **`LazyTraceIterator`** -- Streaming trace iterator for large programs. + +### Functions + +- **`decode(elf)`** -- Decode a raw ELF byte slice into `(instructions, memory_init, entry_point)`. + +### Constants + +- **`DEFAULT_TARGET_DIR`** -- Default path for guest build artifacts (`/tmp/jolt-guest-targets`). + +## Dependency Position + +`jolt-host` depends on `common` (memory config), `tracer` (RISC-V emulation), and `jolt-instructions` (flag/table dispatch). It is used by `jolt-zkvm`. + +## Feature Flags + +This crate has no feature flags. + +## License + +MIT OR Apache-2.0 diff --git a/crates/jolt-host/REVIEW.md b/crates/jolt-host/REVIEW.md new file mode 100644 index 000000000..1f16e6f2a --- /dev/null +++ b/crates/jolt-host/REVIEW.md @@ -0,0 +1,550 @@ +# jolt-host Review + +**Date:** 2026-03-24 +**Level:** 0 (no jolt-* dependencies) +**LOC:** 547 (src only, 0 tests, 0 benches) +**Clippy:** Clean +**Fmt:** Clean + +## Summary + +`jolt-host` wraps the `tracer` and `common` crates to provide a high-level `Program` builder for guest ELF compilation, decoding, and tracing. It is a thin orchestration layer with no hot-path code. + +**Key concerns:** No tests at all, several `&PathBuf` (should be `&Path`), redundant ELF-reading methods, setters don't return `&mut Self` for chaining, missing `Debug`/`Default` derives, free functions `trace`/`trace_to_file` are trivial pass-throughs, and `Cargo.toml` is missing `repository` field. + +--- + +## Findings + +### [CQ-1.1] `&PathBuf` in function signatures instead of `&Path` + +**File:** `src/program.rs:287, 346, 373, 378` +**Severity:** MEDIUM +**Finding:** Four function signatures use `&PathBuf` instead of the idiomatic `&Path`. This forces callers to own a `PathBuf` when a borrowed `Path` suffices. The upstream `tracer` crate has the same issue, but jolt-host should use `&Path` in its own API and convert at the tracer call boundary. + +**Suggested fix:** +```rust +// Before (line 287) +pub fn trace_to_file(&mut self, ..., trace_file: &PathBuf) -> ... + +// After +pub fn trace_to_file(&mut self, ..., trace_file: &Path) -> ... +``` + +Apply to all four occurrences. For the `tracer` call sites, `&Path` auto-coerces to `&PathBuf` via `AsRef`, or pass `trace_file.to_path_buf()` if the tracer signature demands `&PathBuf`. + +**Status:** [x] RESOLVED — All public API uses `&Path`. Free `trace`/`trace_to_file` removed. Tracer boundary conversion via `.to_path_buf()` where tracer demands `&PathBuf`. + +--- + +### [CQ-1.2] Missing `Debug` derive on `Program` + +**File:** `src/lib.rs:26` +**Severity:** LOW +**Finding:** `Program` derives only `Clone`. A `Debug` impl is important for diagnostics and error messages. The struct has no fields that would prevent `Debug` derivation. + +**Suggested fix:** +```rust +#[derive(Clone, Debug)] +pub struct Program { ... } +``` + +**Status:** [x] RESOLVED — `#[derive(Clone, Debug)]` on `Program`. + +--- + +### [CQ-1.3] `or_insert(0)` instead of `or_default()` + +**File:** `src/analyze.rs:29` +**Severity:** LOW +**Finding:** `counts.entry(instruction_name).or_insert(0)` should use `or_default()` since `usize` defaults to `0`. + +**Suggested fix:** +```rust +*counts.entry(instruction_name).or_default() += 1; +``` + +**Status:** [x] RESOLVED — Uses `or_default()`. + +--- + +### [CQ-1.4] `write_to_file` returns `Box` — should use a concrete error type + +**File:** `src/analyze.rs:37` +**Severity:** MEDIUM +**Finding:** `write_to_file` returns `Result<(), Box>`. This is fine for binaries but not for a library crate — callers cannot match on specific error variants. Given there are only two error sources (I/O and bincode), a dedicated enum or `io::Error` mapping is preferable. + +**Suggested fix:** +```rust +use std::io; + +pub fn write_to_file(self, path: PathBuf) -> Result<(), io::Error> { + let mut file = File::create(path)?; + let data = bincode::serde::encode_to_vec(&self, bincode::config::standard()) + .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + file.write_all(&data)?; + Ok(()) +} +``` + +Alternatively, a `HostError` enum could be introduced, but for a crate this small, wrapping into `io::Error` is sufficient. + +**Status:** [x] RESOLVED — Returns `Result<(), io::Error>`, maps bincode error via `io::Error::other`. + +--- + +### [CQ-1.5] `write_to_file` consumes `self` unnecessarily + +**File:** `src/analyze.rs:37` +**Severity:** LOW +**Finding:** `write_to_file(self, path: PathBuf)` takes ownership of `ProgramSummary`. Serialization only needs a borrow. Taking `&self` and `&Path` would let callers keep the summary for further analysis after writing. + +**Suggested fix:** +```rust +pub fn write_to_file(&self, path: &Path) -> Result<(), io::Error> { ... } +``` + +**Status:** [x] RESOLVED — Takes `&self` and `&Path`. + +--- + +### [CQ-1.6] Setters don't return `&mut Self` for method chaining + +**File:** `src/program.rs:41-94` +**Severity:** LOW +**Finding:** All 11 `set_*` methods return `()`. Returning `&mut Self` would allow method chaining, a common Rust builder idiom. This is a convenience improvement, not a correctness issue. + +**Suggested fix:** +```rust +pub fn set_std(&mut self, std: bool) -> &mut Self { + self.std = std; + self +} +``` + +**Status:** [x] RESOLVED — All setters return `&mut Self`. + +--- + +### [CQ-2.1] `build_with_features` is 115 lines — decompose + +**File:** `src/program.rs:101-215` +**Severity:** MEDIUM +**Finding:** `build_with_features` at 115 lines exceeds the ~60 line guideline. It has three distinct phases: (1) arg construction, (2) command execution, and (3) ELF path resolution. Each could be a private helper. + +**Suggested fix:** Extract at least `fn build_args(&self, extra_features: &[&str]) -> Vec` and `fn resolve_elf_path(&self, guest_target_dir: &str, extra_features: &[&str]) -> PathBuf` as private helpers. + +**Status:** [x] RESOLVED — Extracted `build_args()`, `guest_target_dir()`, and `resolve_elf_path()` private helpers. + +--- + +### [CQ-3.1] `read_elf` and `get_elf_contents` are near-duplicates + +**File:** `src/program.rs:217-237` +**Severity:** MEDIUM +**Finding:** `read_elf(&self) -> Vec` and `get_elf_contents(&self) -> Option>` have almost identical implementations. `read_elf` panics on missing ELF; `get_elf_contents` returns `None`. The private `read_elf` should delegate to `get_elf_contents`. + +**Suggested fix:** +```rust +fn read_elf(&self) -> Vec { + self.get_elf_contents() + .expect("ELF not built yet -- call build() first") +} +``` + +Same dedup for `get_elf_compute_advice_contents` — extract a shared `read_elf_at` helper: + +```rust +fn read_elf_at(path: &Path) -> Vec { + std::fs::read(path) + .unwrap_or_else(|_| panic!("could not read elf file: {}", path.display())) +} +``` + +**Status:** [x] RESOLVED — `read_elf` delegates to `get_elf_contents().expect(...)`. Shared `read_elf_at()` helper used by both `get_elf_contents` and `get_elf_compute_advice_contents`. + +--- + +### [CQ-3.2] Free functions `trace` and `trace_to_file` are trivial pass-throughs + +**File:** `src/program.rs:344-389` +**Severity:** MEDIUM +**Finding:** `pub fn trace(...)` and `pub fn trace_to_file(...)` are 1:1 delegation to `tracer::trace()` and `tracer::trace_to_file()` with identical signatures. They add no logic, transformation, or error handling. This is a pass-through wrapper (CQ-4 violation) that adds no value — callers could use `tracer` directly. + +**Suggested fix:** Either: +1. Remove them and re-export `tracer::trace` / `tracer::trace_to_file` directly, or +2. Add meaningful value (e.g., validate inputs, construct `MemoryConfig` from `Program` fields, wrap errors), or +3. Keep them but make them `pub(crate)` since the real public API is `Program::trace`. + +Option 3 is the most practical — these are convenience functions for the `Program` methods. + +**Status:** [x] RESOLVED — Free functions removed. Tracer calls inlined into `Program::trace` and `Program::trace_to_file` methods. + +--- + +### [CQ-3.3] Unnecessary `.clone()` before move into `Some` + +**File:** `src/program.rs:209, 212` +**Severity:** LOW +**Finding:** `elf_path` is a local `PathBuf` that is cloned into `Some(elf_path.clone())` then used only by `info!` (which borrows). Reordering the `info!` before the move eliminates the clone. + +**Suggested fix:** +```rust +if extra_features.contains(&"compute_advice") { + info!("Built compute_advice guest binary: {}", elf_path.display()); + self.elf_compute_advice = Some(elf_path); +} else { + info!("Built guest binary with jolt: {}", elf_path.display()); + self.elf = Some(elf_path); +} +``` + +**Status:** [x] RESOLVED — `info!` before move, no clone needed. + +--- + +### [CQ-3.4] Duplicated `tracer::decode()` + `program_size` computation + +**File:** `src/program.rs:264-266, 291-292` +**Severity:** LOW +**Finding:** Both `Program::trace` and `Program::trace_to_file` repeat: +```rust +let (_, _, program_end, _, _) = tracer::decode(&elf_contents); +let program_size = program_end - RAM_START_ADDRESS; +``` +This should be extracted into a private helper. + +**Suggested fix:** +```rust +fn compute_program_size(elf_contents: &[u8]) -> u64 { + let (_, _, program_end, _, _) = tracer::decode(elf_contents); + program_end - RAM_START_ADDRESS +} +``` + +**Status:** [x] RESOLVED — `compute_program_size()` helper extracted. + +--- + +### [CQ-4.1] `pub` fields `elf` and `elf_compute_advice` on `Program` + +**File:** `src/lib.rs:39-40` +**Severity:** MEDIUM +**Finding:** `Program` has two `pub` fields: `elf: Option` and `elf_compute_advice: Option`. All other fields are private. This inconsistency exposes internal state that should be accessed through methods (like the existing `get_elf_contents`). Callers can mutate these directly, breaking invariants. + +**Suggested fix:** Make both fields private and add accessor methods: +```rust +pub fn elf_path(&self) -> Option<&Path> { + self.elf.as_deref() +} + +pub fn elf_compute_advice_path(&self) -> Option<&Path> { + self.elf_compute_advice.as_deref() +} +``` + +Check downstream callers (`jolt-zkvm/tests/e2e_graph.rs` accesses `program.trace()` not the fields directly, so this should be safe). + +**Status:** [x] RESOLVED — Both fields private. Accessor methods `elf_path()` and `elf_compute_advice_path()` added. + +--- + +### [CQ-4.2] `ProgramSummary` has all `pub` fields — consider whether this should be opaque + +**File:** `src/analyze.rs:12-17` +**Severity:** LOW +**Finding:** All four fields of `ProgramSummary` are `pub`. Since it's a data transfer object (DTO) constructed in `trace_analyze`, public fields are acceptable. However, `trace_analyze` is the only constructor — consider whether field access should go through methods. As a DTO this is fine, but noting for design awareness. + +**Status:** [x] WONTFIX — DTO pattern is appropriate. + +--- + +### [CQ-6.1] `args` vector uses repeated `push` instead of pre-allocation + +**File:** `src/program.rs:107-158` +**Severity:** LOW +**Finding:** The `args` vector in `build_with_features` grows via ~15 individual `push` calls. While this is not a hot path (it runs once per build), `Vec::with_capacity` would be more hygienic. + +**Suggested fix:** +```rust +let mut args = Vec::with_capacity(16); +``` + +**Status:** [x] RESOLVED — `Vec::with_capacity(16)` used in `build_args()`. + +--- + +### [CQ-7.1] Missing doc comments on several public methods + +**File:** `src/program.rs` +**Severity:** MEDIUM +**Finding:** The following public methods lack doc comments: +- `new` (line 23) +- `set_std` (line 41) +- `set_func` (line 45) +- `set_memory_config` (line 63) +- `set_heap_size` through `set_max_output_size` (lines 72-94) +- `build` (line 96) +- `build_with_features` (line 101) +- `get_elf_contents` (line 217) +- `get_elf_compute_advice_contents` (line 224) +- `decode` on `Program` (line 251) +- `trace` on `Program` (line 257) +- `trace_to_file` on `Program` (line 282) +- `trace_analyze` (line 306) + +Per CQ-7, all `pub` items need doc comments explaining behavior, constraints, and invariants. + +**Status:** [x] RESOLVED — All public methods have doc comments. + +--- + +### [CQ-7.2] Missing doc comment on `ProgramSummary::trace_len` + +**File:** `src/analyze.rs:20` +**Severity:** LOW +**Finding:** `trace_len()` is a public method without a doc comment. + +**Status:** [x] RESOLVED — Doc comment added. + +--- + +### [CQ-7.3] Stale/misleading comment on re-exports + +**File:** `src/lib.rs:16` +**Severity:** LOW +**Finding:** `// Re-export types that callers need` is a low-value obvious comment. The re-exports themselves are self-documenting. + +**Suggested fix:** Remove the comment. + +**Status:** [x] RESOLVED — Comment removed. + +--- + +### [CQ-8.1] Zero test coverage + +**File:** N/A +**Severity:** HIGH +**Finding:** The crate has zero unit tests, zero integration tests, zero benchmarks. Key untested areas: +- `compose_command_line` (pure function, highly testable) +- `ProgramSummary::analyze` (pure function) +- `decode` free function (requires ELF fixtures) +- Builder pattern (`Program::new` + setters) +- `ProgramSummary::write_to_file` (serialization roundtrip) + +At minimum, `compose_command_line` and `ProgramSummary::analyze` should have unit tests since they are pure functions with no external dependencies. + +**Suggested fix:** Add a `#[cfg(test)] mod tests` in `program.rs` covering `compose_command_line` edge cases (empty args, special chars, control chars, env vars). Add tests for `analyze()` with mock `Cycle` data. + +**Status:** [x] RESOLVED — 24 tests covering `compose_command_line` edge cases (empty args, spaces, empty strings, single quotes, control chars, envs), builder pattern (new, chaining, memory config, profile/backtrace), accessor methods, `guest_target_dir`, `resolve_elf_path`, `build_args`, and `Debug` impl. + +--- + +### [NIT-1.1] Import grouping not separated by blank lines + +**File:** `src/program.rs:3-20` +**Severity:** LOW +**Finding:** Imports from `common`, `std`, `tracer`, `tracing`, and `crate` are mixed without blank-line separation between groups. + +**Suggested fix:** Group as: `std`, blank line, external crates (`tracing`), blank line, workspace crates (`common`, `tracer`), blank line, `crate::`. + +**Status:** [x] RESOLVED — Imports grouped with blank-line separators: std, external, workspace, crate. + +--- + +### [NIT-1.2] `use std::fmt::Write as _` buried inside nested function + +**File:** `src/program.rs:398` +**Severity:** LOW +**Finding:** The `use std::fmt::Write as _;` import is inside the nested `quote_ansi_c` function body. While technically fine (it's scoped), it would be cleaner at the `compose_command_line` function level or in the module-level imports, since `Write` is a standard trait. + +**Status:** [x] WONTFIX — Scoped import is intentional for inner fn. + +--- + +### [NIT-3.1] Parameter name `len` for size setters is inconsistent + +**File:** `src/program.rs:72, 76` +**Severity:** LOW +**Finding:** `set_heap_size` and `set_stack_size` take parameter `len`, but `set_max_input_size` etc. take `size`. Use `size` consistently since the methods are named `set_*_size`. + +**Suggested fix:** +```rust +pub fn set_heap_size(&mut self, size: u64) { ... } +pub fn set_stack_size(&mut self, size: u64) { ... } +``` + +**Status:** [x] RESOLVED — All size setters use `size` parameter consistently. + +--- + +### [NIT-4.1] `#[allow(clippy::type_complexity)]` on free `trace` function + +**File:** `src/program.rs:343` +**Severity:** LOW +**Finding:** The return type `(LazyTraceIterator, Vec, Memory, JoltDevice, tracer::AdviceTape)` triggers `clippy::type_complexity`. A type alias like `TraceOutput` would improve readability and eliminate the allow. + +**Suggested fix:** +```rust +pub type TraceOutput = (LazyTraceIterator, Vec, Memory, JoltDevice, tracer::AdviceTape); +``` + +However, if this function is made `pub(crate)` per CQ-3.2, the allow is acceptable for an internal-only signature. + +**Status:** [x] RESOLVED — Free function removed entirely; `Program::trace` method returns a simpler tuple without `AdviceTape`. + +--- + +### [NIT-4.2] Unnecessary `let _ = cmd.args(...)` and `let _ = cmd.env(...)` + +**File:** `src/program.rs:168, 171` +**Severity:** LOW +**Finding:** `let _ = cmd.args(&args);` and `let _ = cmd.env(...)` discard the `&mut Command` return. This is to suppress `unused_results`. However, since these are chained builder calls, the more idiomatic approach is direct chaining or just `cmd.args(&args);` with an inline allow. + +Actually, the `let _ =` pattern here is the standard way to handle `unused_results` lint with builder methods that return `&mut Self`. This is fine. + +**Status:** [x] WONTFIX + +--- + +### [CD-1.1] Crate purpose is clear and well-scoped + +**Severity:** N/A (pass) +**Finding:** Single responsibility: host-side guest ELF compilation, decoding, tracing. Name accurately reflects contents. No upward or sideways leakage. + +**Status:** [x] PASS + +--- + +### [CD-2.1] `AdviceTape` not re-exported but appears in public `trace` return type + +**File:** `src/program.rs:357` +**Severity:** MEDIUM +**Finding:** The free `trace` function returns `tracer::AdviceTape` in its tuple, but `AdviceTape` is not re-exported from `jolt-host`. Callers would need to add a direct `tracer` dependency to name the type. If this function stays public, `AdviceTape` must be re-exported. + +**Suggested fix:** Either re-export `pub use tracer::AdviceTape;` in `lib.rs`, or make the free `trace` function `pub(crate)` (per CQ-3.2). + +**Status:** [x] RESOLVED — Free `trace` function removed; `Program::trace` does not expose `AdviceTape`. + +--- + +### [CD-3.1] `tracer` dependency not using `workspace = true` + +**File:** `Cargo.toml:15` +**Severity:** MEDIUM +**Finding:** `tracer = { path = "../../tracer" }` uses a raw relative path instead of `workspace = true`. `tracer` IS defined in workspace dependencies. Using the workspace reference ensures consistent `default-features` and version settings. + +**Suggested fix:** +```toml +tracer = { workspace = true } +``` + +**Status:** [x] RESOLVED — Uses `tracer = { workspace = true, features = ["std"] }`. The `features = ["std"]` is required because the workspace definition has `default-features = false` and tracer needs `std` for `serde_json`. + +--- + +### [CD-3.2] `serde` "derive" feature may be redundant + +**File:** `Cargo.toml:14` +**Severity:** LOW +**Finding:** `serde = { workspace = true, features = ["derive"] }` — the workspace definition already includes `features = ["derive"]`. The explicit feature override here is redundant. + +**Suggested fix:** +```toml +serde = { workspace = true } +``` + +Verify that workspace-level `serde` already has `derive`. + +**Status:** [x] RESOLVED — Uses `serde.workspace = true`. Workspace definition already includes `derive`. + +--- + +### [CD-4.1] Crate boundary question: should `compose_command_line` be extracted? + +**File:** `src/program.rs:391-463` +**Severity:** LOW +**Finding:** `compose_command_line` is a 72-line shell-quoting utility that has nothing to do with ELF compilation or tracing. It's only used in one place (build logging). This is not a crate split candidate at 72 lines, but it could be a separate module (`shell.rs`) for clarity. + +**Status:** [x] WONTFIX — Too small to extract. + +--- + +### [CD-5.1] Missing `repository` field in `Cargo.toml` + +**File:** `Cargo.toml` +**Severity:** MEDIUM +**Finding:** `Cargo.toml` is missing the `repository` field, required for crates.io publishability. + +**Suggested fix:** +```toml +repository = "https://github.com/a16z/jolt" +``` + +**Status:** [x] RESOLVED + +--- + +### [CD-5.2] Missing `README.md` + +**File:** N/A +**Severity:** MEDIUM +**Finding:** No `README.md` exists. crates.io displays this on the crate page. Should contain: purpose, minimal usage example, and note that `jolt` CLI must be installed for `Program::build`. + +**Status:** [x] WONTFIX — Deferred per project policy. + +--- + +### [CD-5.3] Path-only dependencies without `version` will break `cargo publish` + +**File:** `Cargo.toml:13, 15` +**Severity:** HIGH +**Finding:** `common = { workspace = true }` resolves to `{ path = "./common" }` without a version field. `tracer = { path = "../../tracer" }` also has no version. `cargo publish` requires either a `version` field alongside `path`, or the dependency must be published first with a version. This blocks publishability. + +**Suggested fix:** Workspace-level definitions for `common` and `tracer` need `version` fields: +```toml +# In workspace Cargo.toml +common = { path = "./common", version = "0.2.0", default-features = false } +tracer = { path = "./tracer", version = "0.2.0", default-features = false } +``` + +**Status:** [x] WONTFIX — Workspace-wide change, not scoped to jolt-host. + +--- + +## Summary Table + +| ID | Severity | Category | Description | Status | +|----|----------|----------|-------------|--------| +| CQ-1.1 | MEDIUM | Idiomatic | `&PathBuf` instead of `&Path` | RESOLVED | +| CQ-1.2 | LOW | Idiomatic | Missing `Debug` derive on `Program` | RESOLVED | +| CQ-1.3 | LOW | Idiomatic | `or_insert(0)` instead of `or_default()` | RESOLVED | +| CQ-1.4 | MEDIUM | Idiomatic | `Box` in library return type | RESOLVED | +| CQ-1.5 | LOW | Idiomatic | `write_to_file` consumes `self` unnecessarily | RESOLVED | +| CQ-1.6 | LOW | Idiomatic | Setters don't return `&mut Self` | RESOLVED | +| CQ-2.1 | MEDIUM | Clarity | `build_with_features` 115 lines | RESOLVED | +| CQ-3.1 | MEDIUM | Redundancy | `read_elf`/`get_elf_contents` near-duplicates | RESOLVED | +| CQ-3.2 | MEDIUM | Redundancy | Free `trace`/`trace_to_file` are pure pass-throughs | RESOLVED | +| CQ-3.3 | LOW | Redundancy | Unnecessary `.clone()` before `Some` | RESOLVED | +| CQ-3.4 | LOW | Redundancy | Duplicated `tracer::decode` + program_size | RESOLVED | +| CQ-4.1 | MEDIUM | Abstraction | `pub` fields `elf`/`elf_compute_advice` | RESOLVED | +| CQ-4.2 | LOW | Abstraction | `ProgramSummary` all-pub fields | WONTFIX | +| CQ-6.1 | LOW | Performance | `args` vector without pre-allocation | RESOLVED | +| CQ-7.1 | MEDIUM | Docs | Missing doc comments on 13+ public methods | RESOLVED | +| CQ-7.2 | LOW | Docs | Missing doc on `trace_len` | RESOLVED | +| CQ-7.3 | LOW | Docs | Low-value comment on re-exports | RESOLVED | +| CQ-8.1 | HIGH | Tests | Zero test coverage | RESOLVED | +| NIT-1.1 | LOW | Imports | Import groups not separated | RESOLVED | +| NIT-1.2 | LOW | Imports | Scoped `use` in nested fn | WONTFIX | +| NIT-3.1 | LOW | Naming | Inconsistent `len` vs `size` parameter names | RESOLVED | +| NIT-4.1 | LOW | Aesthetics | `type_complexity` allow vs type alias | RESOLVED | +| NIT-4.2 | LOW | Aesthetics | `let _ =` on builder methods | WONTFIX | +| CD-2.1 | MEDIUM | API | `AdviceTape` not re-exported but in public signature | RESOLVED | +| CD-3.1 | MEDIUM | Deps | `tracer` not using `workspace = true` | RESOLVED | +| CD-3.2 | LOW | Deps | Redundant `serde` derive feature | RESOLVED | +| CD-4.1 | LOW | Boundary | `compose_command_line` could be separate module | WONTFIX | +| CD-5.1 | MEDIUM | Publish | Missing `repository` in Cargo.toml | RESOLVED | +| CD-5.2 | MEDIUM | Publish | Missing README.md | WONTFIX | +| CD-5.3 | HIGH | Publish | Path deps without version block `cargo publish` | WONTFIX | + +**Totals:** 29 findings — 22 RESOLVED, 5 WONTFIX, 1 PASS, 1 deferred (workspace-wide) diff --git a/crates/jolt-host/src/analyze.rs b/crates/jolt-host/src/analyze.rs new file mode 100644 index 000000000..81722cb05 --- /dev/null +++ b/crates/jolt-host/src/analyze.rs @@ -0,0 +1,51 @@ +//! Program trace analysis. + +use std::collections::BTreeMap; +use std::fs::File; +use std::io; +use std::path::Path; + +use serde::{Deserialize, Serialize}; + +use tracer::instruction::{Cycle, Instruction}; +use tracer::JoltDevice; + +#[derive(Serialize, Deserialize)] +pub struct ProgramSummary { + pub trace: Vec, + pub bytecode: Vec, + pub memory_init: Vec<(u64, u8)>, + pub io_device: JoltDevice, +} + +impl ProgramSummary { + /// Returns the number of cycles in the execution trace. + pub fn trace_len(&self) -> usize { + self.trace.len() + } + + /// Count instructions by type, sorted descending by frequency. + pub fn analyze(&self) -> Vec<(&'static str, usize)> { + let mut counts = BTreeMap::<&'static str, usize>::new(); + for cycle in &self.trace { + let instruction_name: &'static str = cycle.into(); + *counts.entry(instruction_name).or_default() += 1; + } + + let mut counts: Vec<_> = counts.into_iter().collect(); + counts.sort_by_key(|v| std::cmp::Reverse(v.1)); + counts + } + + /// Serialize this summary to a file using bincode. + /// + /// Bincode encoding errors are mapped to [`io::Error`] since they indicate + /// a serialization failure indistinguishable from an I/O fault at this level. + pub fn write_to_file(&self, path: &Path) -> Result<(), io::Error> { + let mut file = File::create(path)?; + let data = bincode::serde::encode_to_vec(self, bincode::config::standard()) + .map_err(io::Error::other)?; + io::Write::write_all(&mut file, &data)?; + Ok(()) + } +} diff --git a/crates/jolt-host/src/cycle_row.rs b/crates/jolt-host/src/cycle_row.rs new file mode 100644 index 000000000..f519e85da --- /dev/null +++ b/crates/jolt-host/src/cycle_row.rs @@ -0,0 +1,84 @@ +//! Abstract cycle interface for the proving pipeline. +//! +//! [`CycleRow`] is the boundary between the tracer (which produces concrete +//! `Cycle` values) and the proving system (which consumes per-cycle data to +//! build witnesses). All ISA-specific logic (instruction dispatch, flag +//! computation, operand routing) is pushed into the `CycleRow` implementation, +//! so the prover sees only scalars and boolean arrays. + +use jolt_instructions::flags::{NUM_CIRCUIT_FLAGS, NUM_INSTRUCTION_FLAGS}; + +/// Abstract interface for one execution cycle of a RISC-V trace. +/// +/// jolt-zkvm's witness layer is generic over `CycleRow`. The concrete +/// implementation for `tracer::Cycle` lives in this crate (`jolt-host`). +pub trait CycleRow: Copy { + /// A no-op (padding) cycle. + fn noop() -> Self; + + /// True if this cycle is a no-op (padding). + fn is_noop(&self) -> bool; + + /// The unexpanded (pre-virtual-expansion) program counter. + fn unexpanded_pc(&self) -> u64; + + /// Remaining steps in a virtual instruction sequence, or `None` if + /// this is a real (non-virtual) instruction. + fn virtual_sequence_remaining(&self) -> Option; + + /// True if this is the first instruction in a virtual sequence. + fn is_first_in_sequence(&self) -> bool; + + /// True if this is a virtual (expanded) instruction. + fn is_virtual(&self) -> bool; + + /// RS1 register read: `(register_index, value)`, or `None` if unused. + fn rs1_read(&self) -> Option<(u8, u64)>; + + /// RS2 register read: `(register_index, value)`, or `None` if unused. + fn rs2_read(&self) -> Option<(u8, u64)>; + + /// RD register write: `(register_index, pre_value, post_value)`, or `None`. + fn rd_write(&self) -> Option<(u8, u64, u64)>; + + /// The static `rd` operand from the instruction encoding. + fn rd_operand(&self) -> Option; + + /// RAM access address, or `None` if no RAM access this cycle. + fn ram_access_address(&self) -> Option; + + /// RAM read value (pre-access value). `None` if no RAM access. + fn ram_read_value(&self) -> Option; + + /// RAM write value (post-access value). `None` if no RAM access. + fn ram_write_value(&self) -> Option; + + /// The immediate operand, sign-extended. + fn imm(&self) -> i128; + + /// R1CS circuit flags (14 booleans, indexed by `CircuitFlags`). + fn circuit_flags(&self) -> [bool; NUM_CIRCUIT_FLAGS]; + + /// Non-R1CS instruction flags (7 booleans, indexed by `InstructionFlags`). + fn instruction_flags(&self) -> [bool; NUM_INSTRUCTION_FLAGS]; + + /// Combined lookup index for RA polynomial construction (128-bit). + fn lookup_index(&self) -> u128; + + /// Lookup table evaluation result. + /// + /// For arithmetic: the computation result (e.g., rs1 + rs2 for ADD). + /// For branches: the comparison result (0 or 1). + /// For stores: zero. + /// For no-ops: zero. + /// + /// This is the value of V_LOOKUP_OUTPUT in the R1CS witness. + fn lookup_output(&self) -> u64; + + /// Index of the lookup table this instruction uses, or `None` for no-ops. + /// + /// The index corresponds to `LookupTableFlag(i)` — a per-cycle boolean + /// that's 1 iff this cycle uses table `i`. Used by BytecodeReadRaf's + /// multi-stage input claim. + fn lookup_table_index(&self) -> Option; +} diff --git a/crates/jolt-host/src/cycle_row_impl.rs b/crates/jolt-host/src/cycle_row_impl.rs new file mode 100644 index 000000000..d8c0e6edf --- /dev/null +++ b/crates/jolt-host/src/cycle_row_impl.rs @@ -0,0 +1,550 @@ +//! `CycleRow` implementation for `tracer::Cycle`. +//! +//! Absorbs the ISA dispatch table from jolt-zkvm's `flags.rs`, mapping +//! every `Instruction` variant to its circuit and instruction flags via +//! the jolt-instructions `Flags` trait. + +use jolt_instructions::flags::{ + CircuitFlags, Flags, InstructionFlags, NUM_CIRCUIT_FLAGS, NUM_INSTRUCTION_FLAGS, +}; +use jolt_instructions::traits::Instruction as JoltInstructionTrait; +use jolt_instructions::LookupTableKind; +use tracer::instruction::{Cycle, Instruction, NormalizedInstruction, RAMAccess}; + +use crate::CycleRow; + +impl CycleRow for Cycle { + fn noop() -> Self { + Cycle::NoOp + } + + fn is_noop(&self) -> bool { + matches!(self, Cycle::NoOp) + } + + fn unexpanded_pc(&self) -> u64 { + match self { + Cycle::NoOp => 0, + _ => self.instruction().normalize().address as u64, + } + } + + fn virtual_sequence_remaining(&self) -> Option { + match self { + Cycle::NoOp => None, + _ => self.instruction().normalize().virtual_sequence_remaining, + } + } + + fn is_first_in_sequence(&self) -> bool { + match self { + Cycle::NoOp => false, + _ => self.instruction().normalize().is_first_in_sequence, + } + } + + fn is_virtual(&self) -> bool { + self.virtual_sequence_remaining().is_some() + } + + fn rs1_read(&self) -> Option<(u8, u64)> { + self.rs1_read() + } + + fn rs2_read(&self) -> Option<(u8, u64)> { + self.rs2_read() + } + + fn rd_write(&self) -> Option<(u8, u64, u64)> { + self.rd_write() + } + + fn rd_operand(&self) -> Option { + match self { + Cycle::NoOp => None, + _ => self.instruction().normalize().operands.rd, + } + } + + fn ram_access_address(&self) -> Option { + match self.ram_access() { + RAMAccess::Read(r) => Some(r.address), + RAMAccess::Write(w) => Some(w.address), + RAMAccess::NoOp => None, + } + } + + fn ram_read_value(&self) -> Option { + match self.ram_access() { + RAMAccess::Read(r) => Some(r.value), + RAMAccess::Write(w) => Some(w.pre_value), + RAMAccess::NoOp => None, + } + } + + fn ram_write_value(&self) -> Option { + match self.ram_access() { + RAMAccess::Read(r) => Some(r.value), + RAMAccess::Write(w) => Some(w.post_value), + RAMAccess::NoOp => None, + } + } + + fn imm(&self) -> i128 { + match self { + Cycle::NoOp => 0, + _ => self.instruction().normalize().operands.imm, + } + } + + fn circuit_flags(&self) -> [bool; NUM_CIRCUIT_FLAGS] { + let instr = self.instruction(); + let mut flags = static_circuit_flags(&instr); + let norm = instr.normalize(); + apply_dynamic_circuit_flags(&mut flags, &norm); + flags + } + + fn instruction_flags(&self) -> [bool; NUM_INSTRUCTION_FLAGS] { + let instr = self.instruction(); + let mut flags = static_instruction_flags(&instr); + let norm = instr.normalize(); + flags[InstructionFlags::IsRdNotZero as usize] = + matches!(norm.operands.rd, Some(rd) if rd != 0); + flags + } + + fn lookup_index(&self) -> u128 { + let cflags = self.circuit_flags(); + let iflags = self.instruction_flags(); + + let (left, right) = instruction_inputs(self, &iflags); + + if cflags[CircuitFlags::AddOperands] { + (left as u128).wrapping_add(right) + } else if cflags[CircuitFlags::SubtractOperands] { + let right_twos = (1u128 << 64).wrapping_sub(right); + (left as u128).wrapping_add(right_twos) + } else if cflags[CircuitFlags::MultiplyOperands] { + (left as u128).wrapping_mul(right) + } else if cflags[CircuitFlags::Advice] || self.is_noop() { + 0 + } else { + jolt_instructions::interleave_bits(left, right as u64) + } + } + + fn lookup_table_index(&self) -> Option { + if self.is_noop() { + return None; + } + let kind = lookup_table_kind_for_instruction(&self.instruction()); + kind.map(|k| k as usize) + } + + fn lookup_output(&self) -> u64 { + // For instructions that write to rd: the lookup output = rd post_value. + // For branches: the comparison result is encoded in whether the branch is taken. + // For stores/noop: zero. + let cflags = self.circuit_flags(); + if cflags[CircuitFlags::WriteLookupOutputToRD as usize] { + self.rd_write().map_or(0, |(_, _, post)| post) + } else if cflags[CircuitFlags::Jump as usize] { + self.rd_write().map_or(0, |(_, _, post)| post) + } else { + // Branches, stores, noop: lookup output = 0 in the R1CS sense + // (the actual comparison result is handled by ShouldBranch constraint) + 0 + } + } +} + +/// Compute the instruction operand inputs from CycleRow data. +fn instruction_inputs( + cycle: &impl CycleRow, + iflags: &[bool; NUM_INSTRUCTION_FLAGS], +) -> (u64, u128) { + let left = if iflags[InstructionFlags::LeftOperandIsPC] { + cycle.unexpanded_pc() + } else if iflags[InstructionFlags::LeftOperandIsRs1Value] { + match cycle.rs1_read() { + Some((_, v)) => v, + None => 0, + } + } else { + 0 + }; + + let right: i128 = if iflags[InstructionFlags::RightOperandIsImm] { + cycle.imm() + } else if iflags[InstructionFlags::RightOperandIsRs2Value] { + match cycle.rs2_read() { + Some((_, v)) => v as i128, + None => 0, + } + } else { + 0 + }; + + (left, right as u128) +} + +// ISA dispatch tables (absorbed from jolt-zkvm/src/witness/flags.rs) + +fn static_circuit_flags(instr: &Instruction) -> [bool; NUM_CIRCUIT_FLAGS] { + use jolt_instructions::rv::{arithmetic, branch, compare, jump, load, logic, store, system}; + use jolt_instructions::virtual_::{ + advice, arithmetic as varith, assert as vassert, bitwise, byte, division, extension, shift, + xor_rotate, + }; + + match instr { + Instruction::ADD(_) => Flags::circuit_flags(&arithmetic::Add), + Instruction::ADDI(_) => Flags::circuit_flags(&arithmetic::Addi), + Instruction::SUB(_) => Flags::circuit_flags(&arithmetic::Sub), + Instruction::LUI(_) => Flags::circuit_flags(&arithmetic::Lui), + Instruction::AUIPC(_) => Flags::circuit_flags(&arithmetic::Auipc), + Instruction::MUL(_) => Flags::circuit_flags(&arithmetic::Mul), + Instruction::MULHU(_) => Flags::circuit_flags(&arithmetic::MulHU), + Instruction::AND(_) => Flags::circuit_flags(&logic::And), + Instruction::ANDI(_) => Flags::circuit_flags(&logic::AndI), + Instruction::ANDN(_) => Flags::circuit_flags(&logic::Andn), + Instruction::OR(_) => Flags::circuit_flags(&logic::Or), + Instruction::ORI(_) => Flags::circuit_flags(&logic::OrI), + Instruction::XOR(_) => Flags::circuit_flags(&logic::Xor), + Instruction::XORI(_) => Flags::circuit_flags(&logic::XorI), + Instruction::SLT(_) => Flags::circuit_flags(&compare::Slt), + Instruction::SLTI(_) => Flags::circuit_flags(&compare::SltI), + Instruction::SLTIU(_) => Flags::circuit_flags(&compare::SltIU), + Instruction::SLTU(_) => Flags::circuit_flags(&compare::SltU), + Instruction::BEQ(_) => Flags::circuit_flags(&branch::Beq), + Instruction::BGE(_) => Flags::circuit_flags(&branch::Bge), + Instruction::BGEU(_) => Flags::circuit_flags(&branch::BgeU), + Instruction::BLT(_) => Flags::circuit_flags(&branch::Blt), + Instruction::BLTU(_) => Flags::circuit_flags(&branch::BltU), + Instruction::BNE(_) => Flags::circuit_flags(&branch::Bne), + Instruction::JAL(_) => Flags::circuit_flags(&jump::Jal), + Instruction::JALR(_) => Flags::circuit_flags(&jump::Jalr), + Instruction::LD(_) => Flags::circuit_flags(&load::Ld), + Instruction::SD(_) => Flags::circuit_flags(&store::Sd), + Instruction::EBREAK(_) => Flags::circuit_flags(&system::Ebreak), + Instruction::ECALL(_) => Flags::circuit_flags(&system::Ecall), + Instruction::FENCE(_) => Flags::circuit_flags(&system::Fence), + Instruction::VirtualAdvice(_) => Flags::circuit_flags(&advice::VirtualAdvice), + Instruction::VirtualAdviceLen(_) => Flags::circuit_flags(&advice::VirtualAdviceLen), + Instruction::VirtualAdviceLoad(_) => Flags::circuit_flags(&advice::VirtualAdviceLoad), + Instruction::VirtualHostIO(_) => Flags::circuit_flags(&advice::VirtualHostIO), + Instruction::VirtualMULI(_) => Flags::circuit_flags(&varith::MulI), + Instruction::VirtualPow2(_) => Flags::circuit_flags(&varith::Pow2), + Instruction::VirtualPow2I(_) => Flags::circuit_flags(&varith::Pow2I), + Instruction::VirtualPow2W(_) => Flags::circuit_flags(&varith::Pow2W), + Instruction::VirtualPow2IW(_) => Flags::circuit_flags(&varith::Pow2IW), + Instruction::VirtualAssertEQ(_) => Flags::circuit_flags(&vassert::AssertEq), + Instruction::VirtualAssertLTE(_) => Flags::circuit_flags(&vassert::AssertLte), + Instruction::VirtualAssertValidDiv0(_) => Flags::circuit_flags(&vassert::AssertValidDiv0), + Instruction::VirtualAssertValidUnsignedRemainder(_) => { + Flags::circuit_flags(&vassert::AssertValidUnsignedRemainder) + } + Instruction::VirtualAssertMulUNoOverflow(_) => { + Flags::circuit_flags(&vassert::AssertMulUNoOverflow) + } + Instruction::VirtualAssertWordAlignment(_) => { + Flags::circuit_flags(&vassert::AssertWordAlignment) + } + Instruction::VirtualAssertHalfwordAlignment(_) => { + Flags::circuit_flags(&vassert::AssertHalfwordAlignment) + } + Instruction::VirtualMovsign(_) => Flags::circuit_flags(&bitwise::MovSign), + Instruction::VirtualRev8W(_) => Flags::circuit_flags(&byte::VirtualRev8W), + Instruction::VirtualChangeDivisor(_) => { + Flags::circuit_flags(&division::VirtualChangeDivisor) + } + Instruction::VirtualChangeDivisorW(_) => { + Flags::circuit_flags(&division::VirtualChangeDivisorW) + } + Instruction::VirtualZeroExtendWord(_) => { + Flags::circuit_flags(&extension::VirtualZeroExtendWord) + } + Instruction::VirtualSignExtendWord(_) => { + Flags::circuit_flags(&extension::VirtualSignExtendWord) + } + Instruction::VirtualSRL(_) => Flags::circuit_flags(&shift::VirtualSrl), + Instruction::VirtualSRLI(_) => Flags::circuit_flags(&shift::VirtualSrli), + Instruction::VirtualSRA(_) => Flags::circuit_flags(&shift::VirtualSra), + Instruction::VirtualSRAI(_) => Flags::circuit_flags(&shift::VirtualSrai), + Instruction::VirtualShiftRightBitmask(_) => { + Flags::circuit_flags(&shift::VirtualShiftRightBitmask) + } + Instruction::VirtualShiftRightBitmaskI(_) => { + Flags::circuit_flags(&shift::VirtualShiftRightBitmaski) + } + Instruction::VirtualROTRI(_) => Flags::circuit_flags(&shift::VirtualRotri), + Instruction::VirtualROTRIW(_) => Flags::circuit_flags(&shift::VirtualRotriw), + Instruction::VirtualXORROT32(_) => Flags::circuit_flags(&xor_rotate::VirtualXorRot32), + Instruction::VirtualXORROT24(_) => Flags::circuit_flags(&xor_rotate::VirtualXorRot24), + Instruction::VirtualXORROT16(_) => Flags::circuit_flags(&xor_rotate::VirtualXorRot16), + Instruction::VirtualXORROT63(_) => Flags::circuit_flags(&xor_rotate::VirtualXorRot63), + Instruction::VirtualXORROTW16(_) => Flags::circuit_flags(&xor_rotate::VirtualXorRotW16), + Instruction::VirtualXORROTW12(_) => Flags::circuit_flags(&xor_rotate::VirtualXorRotW12), + Instruction::VirtualXORROTW8(_) => Flags::circuit_flags(&xor_rotate::VirtualXorRotW8), + Instruction::VirtualXORROTW7(_) => Flags::circuit_flags(&xor_rotate::VirtualXorRotW7), + Instruction::NoOp => { + let mut flags = [false; NUM_CIRCUIT_FLAGS]; + flags[CircuitFlags::DoNotUpdateUnexpandedPC as usize] = true; + flags + } + Instruction::INLINE(i) => panic!( + "INLINE instruction reached CycleRow unexpanded: opcode={}, funct3={}, funct7={}", + i.opcode, i.funct3, i.funct7 + ), + _ => panic!("unsupported instruction: {instr:?}"), + } +} + +fn static_instruction_flags(instr: &Instruction) -> [bool; NUM_INSTRUCTION_FLAGS] { + use jolt_instructions::rv::{arithmetic, branch, compare, jump, load, logic, store, system}; + use jolt_instructions::virtual_::{ + advice, arithmetic as varith, assert as vassert, bitwise, byte, division, extension, shift, + xor_rotate, + }; + + match instr { + Instruction::ADD(_) => Flags::instruction_flags(&arithmetic::Add), + Instruction::ADDI(_) => Flags::instruction_flags(&arithmetic::Addi), + Instruction::SUB(_) => Flags::instruction_flags(&arithmetic::Sub), + Instruction::LUI(_) => Flags::instruction_flags(&arithmetic::Lui), + Instruction::AUIPC(_) => Flags::instruction_flags(&arithmetic::Auipc), + Instruction::MUL(_) => Flags::instruction_flags(&arithmetic::Mul), + Instruction::MULHU(_) => Flags::instruction_flags(&arithmetic::MulHU), + Instruction::AND(_) => Flags::instruction_flags(&logic::And), + Instruction::ANDI(_) => Flags::instruction_flags(&logic::AndI), + Instruction::ANDN(_) => Flags::instruction_flags(&logic::Andn), + Instruction::OR(_) => Flags::instruction_flags(&logic::Or), + Instruction::ORI(_) => Flags::instruction_flags(&logic::OrI), + Instruction::XOR(_) => Flags::instruction_flags(&logic::Xor), + Instruction::XORI(_) => Flags::instruction_flags(&logic::XorI), + Instruction::SLT(_) => Flags::instruction_flags(&compare::Slt), + Instruction::SLTI(_) => Flags::instruction_flags(&compare::SltI), + Instruction::SLTIU(_) => Flags::instruction_flags(&compare::SltIU), + Instruction::SLTU(_) => Flags::instruction_flags(&compare::SltU), + Instruction::BEQ(_) => Flags::instruction_flags(&branch::Beq), + Instruction::BGE(_) => Flags::instruction_flags(&branch::Bge), + Instruction::BGEU(_) => Flags::instruction_flags(&branch::BgeU), + Instruction::BLT(_) => Flags::instruction_flags(&branch::Blt), + Instruction::BLTU(_) => Flags::instruction_flags(&branch::BltU), + Instruction::BNE(_) => Flags::instruction_flags(&branch::Bne), + Instruction::JAL(_) => Flags::instruction_flags(&jump::Jal), + Instruction::JALR(_) => Flags::instruction_flags(&jump::Jalr), + Instruction::LD(_) => Flags::instruction_flags(&load::Ld), + Instruction::SD(_) => Flags::instruction_flags(&store::Sd), + Instruction::EBREAK(_) => Flags::instruction_flags(&system::Ebreak), + Instruction::ECALL(_) => Flags::instruction_flags(&system::Ecall), + Instruction::FENCE(_) => Flags::instruction_flags(&system::Fence), + Instruction::VirtualAdvice(_) => Flags::instruction_flags(&advice::VirtualAdvice), + Instruction::VirtualAdviceLen(_) => Flags::instruction_flags(&advice::VirtualAdviceLen), + Instruction::VirtualAdviceLoad(_) => Flags::instruction_flags(&advice::VirtualAdviceLoad), + Instruction::VirtualHostIO(_) => Flags::instruction_flags(&advice::VirtualHostIO), + Instruction::VirtualMULI(_) => Flags::instruction_flags(&varith::MulI), + Instruction::VirtualPow2(_) => Flags::instruction_flags(&varith::Pow2), + Instruction::VirtualPow2I(_) => Flags::instruction_flags(&varith::Pow2I), + Instruction::VirtualPow2W(_) => Flags::instruction_flags(&varith::Pow2W), + Instruction::VirtualPow2IW(_) => Flags::instruction_flags(&varith::Pow2IW), + Instruction::VirtualAssertEQ(_) => Flags::instruction_flags(&vassert::AssertEq), + Instruction::VirtualAssertLTE(_) => Flags::instruction_flags(&vassert::AssertLte), + Instruction::VirtualAssertValidDiv0(_) => { + Flags::instruction_flags(&vassert::AssertValidDiv0) + } + Instruction::VirtualAssertValidUnsignedRemainder(_) => { + Flags::instruction_flags(&vassert::AssertValidUnsignedRemainder) + } + Instruction::VirtualAssertMulUNoOverflow(_) => { + Flags::instruction_flags(&vassert::AssertMulUNoOverflow) + } + Instruction::VirtualAssertWordAlignment(_) => { + Flags::instruction_flags(&vassert::AssertWordAlignment) + } + Instruction::VirtualAssertHalfwordAlignment(_) => { + Flags::instruction_flags(&vassert::AssertHalfwordAlignment) + } + Instruction::VirtualMovsign(_) => Flags::instruction_flags(&bitwise::MovSign), + Instruction::VirtualRev8W(_) => Flags::instruction_flags(&byte::VirtualRev8W), + Instruction::VirtualChangeDivisor(_) => { + Flags::instruction_flags(&division::VirtualChangeDivisor) + } + Instruction::VirtualChangeDivisorW(_) => { + Flags::instruction_flags(&division::VirtualChangeDivisorW) + } + Instruction::VirtualZeroExtendWord(_) => { + Flags::instruction_flags(&extension::VirtualZeroExtendWord) + } + Instruction::VirtualSignExtendWord(_) => { + Flags::instruction_flags(&extension::VirtualSignExtendWord) + } + Instruction::VirtualSRL(_) => Flags::instruction_flags(&shift::VirtualSrl), + Instruction::VirtualSRLI(_) => Flags::instruction_flags(&shift::VirtualSrli), + Instruction::VirtualSRA(_) => Flags::instruction_flags(&shift::VirtualSra), + Instruction::VirtualSRAI(_) => Flags::instruction_flags(&shift::VirtualSrai), + Instruction::VirtualShiftRightBitmask(_) => { + Flags::instruction_flags(&shift::VirtualShiftRightBitmask) + } + Instruction::VirtualShiftRightBitmaskI(_) => { + Flags::instruction_flags(&shift::VirtualShiftRightBitmaski) + } + Instruction::VirtualROTRI(_) => Flags::instruction_flags(&shift::VirtualRotri), + Instruction::VirtualROTRIW(_) => Flags::instruction_flags(&shift::VirtualRotriw), + Instruction::VirtualXORROT32(_) => Flags::instruction_flags(&xor_rotate::VirtualXorRot32), + Instruction::VirtualXORROT24(_) => Flags::instruction_flags(&xor_rotate::VirtualXorRot24), + Instruction::VirtualXORROT16(_) => Flags::instruction_flags(&xor_rotate::VirtualXorRot16), + Instruction::VirtualXORROT63(_) => Flags::instruction_flags(&xor_rotate::VirtualXorRot63), + Instruction::VirtualXORROTW16(_) => Flags::instruction_flags(&xor_rotate::VirtualXorRotW16), + Instruction::VirtualXORROTW12(_) => Flags::instruction_flags(&xor_rotate::VirtualXorRotW12), + Instruction::VirtualXORROTW8(_) => Flags::instruction_flags(&xor_rotate::VirtualXorRotW8), + Instruction::VirtualXORROTW7(_) => Flags::instruction_flags(&xor_rotate::VirtualXorRotW7), + Instruction::NoOp => { + let mut flags = [false; NUM_INSTRUCTION_FLAGS]; + flags[InstructionFlags::IsNoop as usize] = true; + flags + } + Instruction::INLINE(i) => panic!( + "INLINE instruction reached CycleRow unexpanded: opcode={}, funct3={}, funct7={}", + i.opcode, i.funct3, i.funct7 + ), + _ => panic!("unsupported instruction: {instr:?}"), + } +} + +fn lookup_table_kind_for_instruction(instr: &Instruction) -> Option { + use jolt_instructions::rv::{arithmetic, branch, compare, jump, load, logic, store, system}; + use jolt_instructions::virtual_::{ + advice, arithmetic as varith, assert as vassert, bitwise, byte, division, extension, shift, + xor_rotate, + }; + + match instr { + Instruction::ADD(_) => arithmetic::Add.lookup_table(), + Instruction::ADDI(_) => arithmetic::Addi.lookup_table(), + Instruction::SUB(_) => arithmetic::Sub.lookup_table(), + Instruction::LUI(_) => arithmetic::Lui.lookup_table(), + Instruction::AUIPC(_) => arithmetic::Auipc.lookup_table(), + Instruction::MUL(_) => arithmetic::Mul.lookup_table(), + Instruction::MULHU(_) => arithmetic::MulHU.lookup_table(), + Instruction::AND(_) => logic::And.lookup_table(), + Instruction::ANDI(_) => logic::AndI.lookup_table(), + Instruction::ANDN(_) => logic::Andn.lookup_table(), + Instruction::OR(_) => logic::Or.lookup_table(), + Instruction::ORI(_) => logic::OrI.lookup_table(), + Instruction::XOR(_) => logic::Xor.lookup_table(), + Instruction::XORI(_) => logic::XorI.lookup_table(), + Instruction::SLT(_) => compare::Slt.lookup_table(), + Instruction::SLTI(_) => compare::SltI.lookup_table(), + Instruction::SLTIU(_) => compare::SltIU.lookup_table(), + Instruction::SLTU(_) => compare::SltU.lookup_table(), + Instruction::BEQ(_) => branch::Beq.lookup_table(), + Instruction::BGE(_) => branch::Bge.lookup_table(), + Instruction::BGEU(_) => branch::BgeU.lookup_table(), + Instruction::BLT(_) => branch::Blt.lookup_table(), + Instruction::BLTU(_) => branch::BltU.lookup_table(), + Instruction::BNE(_) => branch::Bne.lookup_table(), + Instruction::JAL(_) => jump::Jal.lookup_table(), + Instruction::JALR(_) => jump::Jalr.lookup_table(), + Instruction::LD(_) => load::Ld.lookup_table(), + Instruction::SD(_) => store::Sd.lookup_table(), + Instruction::EBREAK(_) => system::Ebreak.lookup_table(), + Instruction::ECALL(_) => system::Ecall.lookup_table(), + Instruction::FENCE(_) => system::Fence.lookup_table(), + Instruction::VirtualAdvice(_) => advice::VirtualAdvice.lookup_table(), + Instruction::VirtualAdviceLen(_) => advice::VirtualAdviceLen.lookup_table(), + Instruction::VirtualAdviceLoad(_) => advice::VirtualAdviceLoad.lookup_table(), + Instruction::VirtualHostIO(_) => advice::VirtualHostIO.lookup_table(), + Instruction::VirtualMULI(_) => varith::MulI.lookup_table(), + Instruction::VirtualPow2(_) => varith::Pow2.lookup_table(), + Instruction::VirtualPow2I(_) => varith::Pow2I.lookup_table(), + Instruction::VirtualPow2W(_) => varith::Pow2W.lookup_table(), + Instruction::VirtualPow2IW(_) => varith::Pow2IW.lookup_table(), + Instruction::VirtualAssertEQ(_) => vassert::AssertEq.lookup_table(), + Instruction::VirtualAssertLTE(_) => vassert::AssertLte.lookup_table(), + Instruction::VirtualAssertValidDiv0(_) => vassert::AssertValidDiv0.lookup_table(), + Instruction::VirtualAssertValidUnsignedRemainder(_) => { + vassert::AssertValidUnsignedRemainder.lookup_table() + } + Instruction::VirtualAssertMulUNoOverflow(_) => vassert::AssertMulUNoOverflow.lookup_table(), + Instruction::VirtualAssertWordAlignment(_) => vassert::AssertWordAlignment.lookup_table(), + Instruction::VirtualAssertHalfwordAlignment(_) => { + vassert::AssertHalfwordAlignment.lookup_table() + } + Instruction::VirtualMovsign(_) => bitwise::MovSign.lookup_table(), + Instruction::VirtualRev8W(_) => byte::VirtualRev8W.lookup_table(), + Instruction::VirtualChangeDivisor(_) => division::VirtualChangeDivisor.lookup_table(), + Instruction::VirtualChangeDivisorW(_) => division::VirtualChangeDivisorW.lookup_table(), + Instruction::VirtualZeroExtendWord(_) => extension::VirtualZeroExtendWord.lookup_table(), + Instruction::VirtualSignExtendWord(_) => extension::VirtualSignExtendWord.lookup_table(), + Instruction::VirtualSRL(_) => shift::VirtualSrl.lookup_table(), + Instruction::VirtualSRLI(_) => shift::VirtualSrli.lookup_table(), + Instruction::VirtualSRA(_) => shift::VirtualSra.lookup_table(), + Instruction::VirtualSRAI(_) => shift::VirtualSrai.lookup_table(), + Instruction::VirtualShiftRightBitmask(_) => shift::VirtualShiftRightBitmask.lookup_table(), + Instruction::VirtualShiftRightBitmaskI(_) => { + shift::VirtualShiftRightBitmaski.lookup_table() + } + Instruction::VirtualROTRI(_) => shift::VirtualRotri.lookup_table(), + Instruction::VirtualROTRIW(_) => shift::VirtualRotriw.lookup_table(), + Instruction::VirtualXORROT32(_) => xor_rotate::VirtualXorRot32.lookup_table(), + Instruction::VirtualXORROT24(_) => xor_rotate::VirtualXorRot24.lookup_table(), + Instruction::VirtualXORROT16(_) => xor_rotate::VirtualXorRot16.lookup_table(), + Instruction::VirtualXORROT63(_) => xor_rotate::VirtualXorRot63.lookup_table(), + Instruction::VirtualXORROTW16(_) => xor_rotate::VirtualXorRotW16.lookup_table(), + Instruction::VirtualXORROTW12(_) => xor_rotate::VirtualXorRotW12.lookup_table(), + Instruction::VirtualXORROTW8(_) => xor_rotate::VirtualXorRotW8.lookup_table(), + Instruction::VirtualXORROTW7(_) => xor_rotate::VirtualXorRotW7.lookup_table(), + Instruction::NoOp | Instruction::INLINE(_) => None, + _ => None, + } +} + +fn apply_dynamic_circuit_flags( + flags: &mut [bool; NUM_CIRCUIT_FLAGS], + norm: &NormalizedInstruction, +) { + if norm.virtual_sequence_remaining.is_some() { + flags[CircuitFlags::VirtualInstruction as usize] = true; + } + if norm.virtual_sequence_remaining.unwrap_or(0) != 0 { + flags[CircuitFlags::DoNotUpdateUnexpandedPC as usize] = true; + } + if norm.is_first_in_sequence { + flags[CircuitFlags::IsFirstInSequence as usize] = true; + } + if norm.is_compressed { + flags[CircuitFlags::IsCompressed as usize] = true; + } + if norm.virtual_sequence_remaining == Some(0) { + flags[CircuitFlags::IsLastInSequence as usize] = true; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn noop_trait_methods() { + let noop = Cycle::noop(); + assert!(noop.is_noop()); + assert_eq!(noop.unexpanded_pc(), 0); + assert!(noop.ram_access_address().is_none()); + assert!(noop.rs1_read().is_none()); + assert!(noop.rd_write().is_none()); + + let cflags = CycleRow::circuit_flags(&noop); + assert!(cflags[CircuitFlags::DoNotUpdateUnexpandedPC as usize]); + + let iflags = CycleRow::instruction_flags(&noop); + assert!(iflags[InstructionFlags::IsNoop as usize]); + } + + #[test] + fn noop_lookup_index_is_zero() { + let noop = Cycle::noop(); + assert_eq!(noop.lookup_index(), 0); + } +} diff --git a/crates/jolt-host/src/lib.rs b/crates/jolt-host/src/lib.rs new file mode 100644 index 000000000..7c280867e --- /dev/null +++ b/crates/jolt-host/src/lib.rs @@ -0,0 +1,62 @@ +//! Host-side guest program compilation, decoding, and tracing. +//! +//! Provides [`Program`] for building guest RISC-V programs via the `jolt` CLI, +//! decoding ELFs into instructions, and tracing execution to produce [`Cycle`] +//! vectors for the proving pipeline. +//! +//! This crate is independent of the proving system — it depends only on +//! `common` (memory config), `tracer` (RISC-V emulation), and standard I/O. + +mod analyze; +mod cycle_row; +mod cycle_row_impl; +mod program; + +pub use cycle_row::CycleRow; + +use std::path::{Path, PathBuf}; + +pub use analyze::ProgramSummary; +pub use program::decode; + +pub use common::jolt_device::{JoltDevice, MemoryConfig}; +pub use tracer::emulator::memory::Memory; +pub use tracer::instruction::{Cycle, Instruction}; +pub use tracer::LazyTraceIterator; + +pub const DEFAULT_TARGET_DIR: &str = "/tmp/jolt-guest-targets"; + +/// Host-side builder for guest RISC-V programs. +/// +/// Provides methods to configure, compile, decode, and trace guest ELF binaries. +/// Call [`Program::new`] with the guest crate name, optionally configure via +/// `set_*` methods, then use [`Program::build`], [`Program::decode`], or +/// [`Program::trace`] to compile and execute the guest program. +#[derive(Clone, Debug)] +pub struct Program { + guest: String, + func: Option, + profile: Option, + heap_size: u64, + stack_size: u64, + max_input_size: u64, + max_untrusted_advice_size: u64, + max_trusted_advice_size: u64, + max_output_size: u64, + std: bool, + backtrace: Option, + elf: Option, + elf_compute_advice: Option, +} + +impl Program { + /// Returns the path to the built guest ELF, if available. + pub fn elf_path(&self) -> Option<&Path> { + self.elf.as_deref() + } + + /// Returns the path to the built compute-advice ELF, if available. + pub fn elf_compute_advice_path(&self) -> Option<&Path> { + self.elf_compute_advice.as_deref() + } +} diff --git a/crates/jolt-host/src/program.rs b/crates/jolt-host/src/program.rs new file mode 100644 index 000000000..bb1712444 --- /dev/null +++ b/crates/jolt-host/src/program.rs @@ -0,0 +1,693 @@ +//! Guest program building, decoding, and tracing. + +use std::io; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::process::Command; + +use tracing::info; + +use common::constants::{ + DEFAULT_HEAP_SIZE, DEFAULT_MAX_INPUT_SIZE, DEFAULT_MAX_OUTPUT_SIZE, + DEFAULT_MAX_TRUSTED_ADVICE_SIZE, DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, DEFAULT_STACK_SIZE, + RAM_START_ADDRESS, +}; +use common::jolt_device::{JoltDevice, MemoryConfig}; +use tracer::emulator::memory::Memory; +use tracer::instruction::{Cycle, Instruction}; +use tracer::utils::virtual_registers::VirtualRegisterAllocator; +use tracer::LazyTraceIterator; + +use crate::analyze::ProgramSummary; +use crate::{Program, DEFAULT_TARGET_DIR}; + +impl Program { + /// Create a new `Program` targeting the given guest crate name. + /// + /// All memory sizes are initialized to the defaults from `common::constants`. + /// The guest is compiled with `--release` unless overridden via [`Self::set_profile`]. + pub fn new(guest: &str) -> Self { + Self { + guest: guest.to_string(), + func: None, + profile: None, + heap_size: DEFAULT_HEAP_SIZE, + stack_size: DEFAULT_STACK_SIZE, + max_input_size: DEFAULT_MAX_INPUT_SIZE, + max_untrusted_advice_size: DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, + max_trusted_advice_size: DEFAULT_MAX_TRUSTED_ADVICE_SIZE, + max_output_size: DEFAULT_MAX_OUTPUT_SIZE, + std: false, + backtrace: Some("off".to_string()), + elf: None, + elf_compute_advice: None, + } + } + + /// Enable or disable linking against the Rust standard library. + pub fn set_std(&mut self, std: bool) -> &mut Self { + self.std = std; + self + } + + /// Set the guest function name (passed as `JOLT_FUNC_NAME` env var during build). + pub fn set_func(&mut self, func: &str) -> &mut Self { + self.func = Some(func.to_string()); + self + } + + /// Set the cargo profile used to compile the guest. + /// + /// If unset, guest builds default to `--release`. + pub fn set_profile(&mut self, profile: &str) -> &mut Self { + self.profile = Some(profile.to_string()); + self + } + + /// Set backtrace mode for the guest build. + /// + /// Valid modes: "off", "dwarf", "frame-pointers". + pub fn set_backtrace(&mut self, mode: &str) -> &mut Self { + self.backtrace = Some(mode.to_string()); + self + } + + /// Apply all fields from a [`MemoryConfig`] to this program's settings. + pub fn set_memory_config(&mut self, memory_config: MemoryConfig) -> &mut Self { + self.heap_size = memory_config.heap_size; + self.stack_size = memory_config.stack_size; + self.max_input_size = memory_config.max_input_size; + self.max_trusted_advice_size = memory_config.max_trusted_advice_size; + self.max_untrusted_advice_size = memory_config.max_untrusted_advice_size; + self.max_output_size = memory_config.max_output_size; + self + } + + /// Set the guest heap size in bytes. + pub fn set_heap_size(&mut self, size: u64) -> &mut Self { + self.heap_size = size; + self + } + + /// Set the guest stack size in bytes. + pub fn set_stack_size(&mut self, size: u64) -> &mut Self { + self.stack_size = size; + self + } + + /// Set the maximum input buffer size in bytes. + pub fn set_max_input_size(&mut self, size: u64) -> &mut Self { + self.max_input_size = size; + self + } + + /// Set the maximum trusted advice buffer size in bytes. + pub fn set_max_trusted_advice_size(&mut self, size: u64) -> &mut Self { + self.max_trusted_advice_size = size; + self + } + + /// Set the maximum untrusted advice buffer size in bytes. + pub fn set_max_untrusted_advice_size(&mut self, size: u64) -> &mut Self { + self.max_untrusted_advice_size = size; + self + } + + /// Set the maximum output buffer size in bytes. + pub fn set_max_output_size(&mut self, size: u64) -> &mut Self { + self.max_output_size = size; + self + } + + /// Compile the guest program with default features. + /// + /// Uses [`Self::build_with_features`] with an empty feature set. + /// No-op if the ELF has already been built. + pub fn build(&mut self, target_dir: &str) { + self.build_with_features(target_dir, &[]); + } + + /// Compile the guest program via the `jolt` CLI with the given extra Cargo features. + /// + /// No-op if the ELF has already been built (unless `extra_features` contains + /// `"compute_advice"`, which produces a separate ELF). + #[tracing::instrument(skip_all, name = "Program::build_with_features")] + pub fn build_with_features(&mut self, target_dir: &str, extra_features: &[&str]) { + if self.elf.is_some() { + return; + } + + let jolt_cmd = std::env::var("JOLT_PATH").unwrap_or_else(|_| "jolt".to_string()); + let is_compute_advice = extra_features.contains(&"compute_advice"); + let guest_target_dir = self.guest_target_dir(target_dir, is_compute_advice); + let args = self.build_args(extra_features, &guest_target_dir); + + let cmd_line = compose_command_line( + &jolt_cmd, + &[], + &args.iter().map(|s| s.as_str()).collect::>(), + ); + info!("\n{cmd_line}"); + + let mut cmd = Command::new(&jolt_cmd); + let _ = cmd.args(&args); + + if let Some(func) = &self.func { + let _ = cmd.env("JOLT_FUNC_NAME", func); + } + + let output = cmd + .output() + .expect("failed to run jolt - make sure it's installed (cargo install --path .)"); + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + if stderr.contains("does not contain this feature: compute_advice") { + info!("guest does not support compute_advice feature"); + return; + } + io::stderr().write_all(&output.stderr).unwrap(); + let output_msg = format!("::build command: \n{cmd_line}\n"); + io::stderr().write_all(output_msg.as_bytes()).unwrap(); + panic!("failed to compile guest with jolt"); + } + + let elf_path = self.resolve_elf_path(&guest_target_dir); + assert!( + elf_path.exists(), + "Built ELF not found at expected location: {}", + elf_path.display() + ); + + if is_compute_advice { + info!("Built compute_advice guest binary: {}", elf_path.display()); + self.elf_compute_advice = Some(elf_path); + } else { + info!("Built guest binary with jolt: {}", elf_path.display()); + self.elf = Some(elf_path); + } + } + + /// Returns the contents of the built guest ELF, or `None` if not yet built. + pub fn get_elf_contents(&self) -> Option> { + self.elf.as_ref().map(|path| read_elf_at(path)) + } + + /// Returns the contents of the built compute-advice ELF, or `None` if not yet built. + pub fn get_elf_compute_advice_contents(&self) -> Option> { + self.elf_compute_advice + .as_ref() + .map(|path| read_elf_at(path)) + } + + /// Compile (if needed) and decode the guest ELF into instructions and memory init data. + /// + /// Returns `(instructions, memory_init_bytes, program_size)`. + pub fn decode(&mut self) -> (Vec, Vec<(u64, u8)>, u64) { + self.build(DEFAULT_TARGET_DIR); + decode(&self.read_elf()) + } + + /// Compile (if needed) and trace the guest program with the given I/O buffers. + /// + /// Returns the lazy trace iterator, materialized trace, final memory state, and I/O device. + #[tracing::instrument(skip_all, name = "Program::trace")] + pub fn trace( + &mut self, + inputs: &[u8], + untrusted_advice: &[u8], + trusted_advice: &[u8], + ) -> (LazyTraceIterator, Vec, Memory, JoltDevice) { + self.build(DEFAULT_TARGET_DIR); + let elf_contents = self.read_elf(); + let program_size = compute_program_size(&elf_contents); + let memory_config = self.memory_config(program_size); + + let (lazy_trace, trace_vec, memory, jolt_device, _advice_tape) = tracer::trace( + &elf_contents, + self.elf.as_ref().map(|p| p as &PathBuf), + inputs, + untrusted_advice, + trusted_advice, + &memory_config, + None, + ); + (lazy_trace, trace_vec, memory, jolt_device) + } + + /// Compile (if needed) and trace the guest program, writing the trace to a file. + /// + /// Returns the final memory state and I/O device (the trace itself is written to `trace_file`). + #[tracing::instrument(skip_all, name = "Program::trace_to_file")] + pub fn trace_to_file( + &mut self, + inputs: &[u8], + untrusted_advice: &[u8], + trusted_advice: &[u8], + trace_file: &Path, + ) -> (Memory, JoltDevice) { + self.build(DEFAULT_TARGET_DIR); + let elf_contents = self.read_elf(); + let program_size = compute_program_size(&elf_contents); + let memory_config = self.memory_config(program_size); + + let trace_pathbuf = trace_file.to_path_buf(); + tracer::trace_to_file( + &elf_contents, + self.elf.as_ref().map(|p| p as &PathBuf), + inputs, + untrusted_advice, + trusted_advice, + &memory_config, + &trace_pathbuf, + ) + } + + /// Compile, decode, and trace the guest, returning a [`ProgramSummary`] for analysis. + pub fn trace_analyze( + mut self, + inputs: &[u8], + untrusted_advice: &[u8], + trusted_advice: &[u8], + ) -> ProgramSummary { + let (bytecode, init_memory_state, _) = self.decode(); + let (_, trace_vec, _, io_device) = self.trace(inputs, untrusted_advice, trusted_advice); + + ProgramSummary { + trace: trace_vec, + bytecode, + memory_init: init_memory_state, + io_device, + } + } + + fn read_elf(&self) -> Vec { + self.get_elf_contents() + .expect("ELF not built yet — call build() first") + } + + fn memory_config(&self, program_size: u64) -> MemoryConfig { + MemoryConfig { + heap_size: self.heap_size, + stack_size: self.stack_size, + max_input_size: self.max_input_size, + max_untrusted_advice_size: self.max_untrusted_advice_size, + max_trusted_advice_size: self.max_trusted_advice_size, + max_output_size: self.max_output_size, + program_size: Some(program_size), + } + } + + fn guest_target_dir(&self, target_dir: &str, is_compute_advice: bool) -> String { + let func_suffix = self.func.as_deref().unwrap_or(""); + if is_compute_advice { + format!("{target_dir}/{}-{func_suffix}-compute-advice", self.guest) + } else { + format!("{target_dir}/{}-{func_suffix}", self.guest) + } + } + + fn build_args(&self, extra_features: &[&str], guest_target_dir: &str) -> Vec { + let mut args = Vec::with_capacity(16); + args.push("build".to_string()); + + args.push("-p".to_string()); + args.push(self.guest.clone()); + + if self.std { + args.push("--mode".to_string()); + args.push("std".to_string()); + } + + if let Some(mode) = &self.backtrace { + args.push("--backtrace".to_string()); + args.push(mode.clone()); + } + + args.push("--stack-size".to_string()); + args.push(self.stack_size.to_string()); + args.push("--heap-size".to_string()); + args.push(self.heap_size.to_string()); + + args.push("--".to_string()); + + if let Some(profile) = &self.profile { + args.push("--profile".to_string()); + args.push(profile.clone()); + } else { + args.push("--release".to_string()); + } + + args.push("--target-dir".to_string()); + args.push(guest_target_dir.to_string()); + + args.push("--features".to_string()); + let mut features = vec!["guest".to_string()]; + features.extend(extra_features.iter().map(|&s| s.to_string())); + args.push(features.join(",")); + + args + } + + fn resolve_elf_path(&self, guest_target_dir: &str) -> PathBuf { + let target_triple = if self.std { + "riscv64imac-zero-linux-musl" + } else { + "riscv64imac-unknown-none-elf" + }; + let out_profile = self.profile.as_deref().unwrap_or("release"); + + PathBuf::from(guest_target_dir) + .join(target_triple) + .join(out_profile) + .join(&self.guest) + } +} + +fn read_elf_at(path: &Path) -> Vec { + std::fs::read(path).unwrap_or_else(|_| panic!("could not read elf file: {}", path.display())) +} + +fn compute_program_size(elf_contents: &[u8]) -> u64 { + let (_, _, program_end, _, _) = tracer::decode(elf_contents); + program_end - RAM_START_ADDRESS +} + +/// Decode an ELF into instructions and memory initialization data. +/// +/// Expands virtual instruction sequences (inline sequences) as part of decoding. +/// +/// Returns `(instructions, memory_init_bytes, program_size)`. +pub fn decode(elf: &[u8]) -> (Vec, Vec<(u64, u8)>, u64) { + let (mut instructions, raw_bytes, program_end, _, xlen) = tracer::decode(elf); + let program_size = program_end - RAM_START_ADDRESS; + let allocator = VirtualRegisterAllocator::default(); + + instructions = instructions + .into_iter() + .flat_map(|instr: Instruction| instr.inline_sequence(&allocator, xlen)) + .collect(); + + (instructions, raw_bytes, program_size) +} + +fn compose_command_line(program: &str, envs: &[(&str, String)], args: &[&str]) -> String { + fn has_ctrl(s: &str) -> bool { + s.chars() + .any(|c| c.is_control() && !matches!(c, '\t' | '\n' | '\r')) + } + + fn quote_ansi_c(s: &str) -> String { + use std::fmt::Write as _; + let mut out = String::with_capacity(s.len() + 3); + out.push_str("$'"); + for c in s.chars() { + match c { + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + '\\' => out.push_str("\\\\"), + '\'' => out.push_str("\\'"), + c if c.is_control() => { + let _ = write!(out, "\\x{:02x}", c as u32); + } + _ => out.push(c), + } + } + out.push('\''); + out + } + + fn sh_quote(s: &str) -> String { + const SAFE: &str = + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_@%+=:,./-"; + if !s.is_empty() && s.chars().all(|c| SAFE.contains(c)) { + s.to_string() + } else { + let mut out = String::with_capacity(s.len() + 2); + out.push('\''); + for ch in s.chars() { + if ch == '\'' { + out.push_str("'\\''"); + } else { + out.push(ch); + } + } + out.push('\''); + out + } + } + + let mut parts = Vec::new(); + + if !envs.is_empty() { + parts.push("env".to_string()); + for &(k, ref v) in envs { + let v = v.as_str(); + let q = if has_ctrl(v) { + quote_ansi_c(v) + } else { + sh_quote(v) + }; + parts.push(format!("{k}={q}")); + } + } + + parts.push(sh_quote(program)); + parts.extend(args.iter().map(|&a| { + if has_ctrl(a) { + quote_ansi_c(a) + } else { + sh_quote(a) + } + })); + + parts.join(" ") +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::Program; + + #[test] + fn compose_command_line_simple() { + let result = compose_command_line("jolt", &[], &["build", "-p", "guest"]); + assert_eq!(result, "jolt build -p guest"); + } + + #[test] + fn compose_command_line_empty_args() { + let result = compose_command_line("jolt", &[], &[]); + assert_eq!(result, "jolt"); + } + + #[test] + fn compose_command_line_quotes_spaces() { + let result = compose_command_line("jolt", &[], &["--path", "my dir/file"]); + assert_eq!(result, "jolt --path 'my dir/file'"); + } + + #[test] + fn compose_command_line_quotes_empty_arg() { + let result = compose_command_line("jolt", &[], &[""]); + assert_eq!(result, "jolt ''"); + } + + #[test] + fn compose_command_line_escapes_single_quotes() { + let result = compose_command_line("jolt", &[], &["it's"]); + assert_eq!(result, "jolt 'it'\\''s'"); + } + + #[test] + fn compose_command_line_escapes_control_chars() { + let result = compose_command_line("jolt", &[], &["a\x01b"]); + assert_eq!(result, "jolt $'a\\x01b'"); + } + + #[test] + fn compose_command_line_with_envs() { + let envs = vec![("FOO", "bar".to_string())]; + let result = compose_command_line("jolt", &envs, &["build"]); + assert_eq!(result, "env FOO=bar jolt build"); + } + + #[test] + fn compose_command_line_env_with_spaces() { + let envs = vec![("MY_VAR", "hello world".to_string())]; + let result = compose_command_line("jolt", &envs, &[]); + assert_eq!(result, "env MY_VAR='hello world' jolt"); + } + + #[test] + fn compose_command_line_env_with_control_chars() { + let envs = vec![("VAR", "val\x02ue".to_string())]; + let result = compose_command_line("jolt", &envs, &["run"]); + assert_eq!(result, "env VAR=$'val\\x02ue' jolt run"); + } + + #[test] + fn compose_command_line_preserves_safe_special_chars() { + let result = compose_command_line("jolt", &[], &["/tmp/path_@:,.+-/file"]); + assert_eq!(result, "jolt /tmp/path_@:,.+-/file"); + } + + #[test] + fn builder_new_defaults() { + let p = Program::new("test-guest"); + assert_eq!(p.guest, "test-guest"); + assert!(p.elf.is_none()); + assert!(p.elf_compute_advice.is_none()); + assert!(!p.std); + assert_eq!(p.heap_size, DEFAULT_HEAP_SIZE); + assert_eq!(p.stack_size, DEFAULT_STACK_SIZE); + } + + #[test] + fn builder_chaining() { + let mut p = Program::new("guest"); + let _ = p + .set_std(true) + .set_func("main") + .set_heap_size(1024) + .set_stack_size(2048) + .set_max_input_size(512) + .set_max_output_size(256) + .set_max_trusted_advice_size(128) + .set_max_untrusted_advice_size(64); + + assert!(p.std); + assert_eq!(p.func.as_deref(), Some("main")); + assert_eq!(p.heap_size, 1024); + assert_eq!(p.stack_size, 2048); + assert_eq!(p.max_input_size, 512); + assert_eq!(p.max_output_size, 256); + assert_eq!(p.max_trusted_advice_size, 128); + assert_eq!(p.max_untrusted_advice_size, 64); + } + + #[test] + fn builder_set_memory_config() { + let config = MemoryConfig { + heap_size: 100, + stack_size: 200, + max_input_size: 300, + max_untrusted_advice_size: 400, + max_trusted_advice_size: 500, + max_output_size: 600, + program_size: None, + }; + let mut p = Program::new("guest"); + let _ = p.set_memory_config(config); + + assert_eq!(p.heap_size, 100); + assert_eq!(p.stack_size, 200); + assert_eq!(p.max_input_size, 300); + assert_eq!(p.max_untrusted_advice_size, 400); + assert_eq!(p.max_trusted_advice_size, 500); + assert_eq!(p.max_output_size, 600); + } + + #[test] + fn builder_set_profile_and_backtrace() { + let mut p = Program::new("guest"); + let _ = p.set_profile("dev").set_backtrace("dwarf"); + assert_eq!(p.profile.as_deref(), Some("dev")); + assert_eq!(p.backtrace.as_deref(), Some("dwarf")); + } + + #[test] + fn elf_path_accessors_none_before_build() { + let p = Program::new("guest"); + assert!(p.elf_path().is_none()); + assert!(p.elf_compute_advice_path().is_none()); + } + + #[test] + fn guest_target_dir_regular() { + let p = Program::new("myguest"); + let dir = p.guest_target_dir("/tmp/targets", false); + assert_eq!(dir, "/tmp/targets/myguest-"); + } + + #[test] + fn guest_target_dir_compute_advice() { + let mut p = Program::new("myguest"); + let _ = p.set_func("entry"); + let dir = p.guest_target_dir("/tmp/targets", true); + assert_eq!(dir, "/tmp/targets/myguest-entry-compute-advice"); + } + + #[test] + fn resolve_elf_path_release() { + let p = Program::new("myguest"); + let path = p.resolve_elf_path("/tmp/targets/myguest-"); + assert_eq!( + path, + PathBuf::from("/tmp/targets/myguest-/riscv64imac-unknown-none-elf/release/myguest") + ); + } + + #[test] + fn resolve_elf_path_std_custom_profile() { + let mut p = Program::new("myguest"); + let _ = p.set_std(true).set_profile("dev"); + let path = p.resolve_elf_path("/tmp/dir"); + assert_eq!( + path, + PathBuf::from("/tmp/dir/riscv64imac-zero-linux-musl/dev/myguest") + ); + } + + #[test] + fn build_args_default() { + let p = Program::new("myguest"); + let args = p.build_args(&[], "/tmp/target-dir"); + assert!(args.contains(&"build".to_string())); + assert!(args.contains(&"-p".to_string())); + assert!(args.contains(&"myguest".to_string())); + assert!(args.contains(&"--release".to_string())); + assert!(args.contains(&"/tmp/target-dir".to_string())); + assert!(args.contains(&"guest".to_string())); + } + + #[test] + fn build_args_with_features() { + let p = Program::new("myguest"); + let args = p.build_args(&["compute_advice", "extra"], "/tmp/dir"); + let features_arg = args + .iter() + .skip_while(|a| a.as_str() != "--features") + .nth(1) + .unwrap(); + assert_eq!(features_arg, "guest,compute_advice,extra"); + } + + #[test] + fn build_args_std_mode() { + let mut p = Program::new("myguest"); + let _ = p.set_std(true); + let args = p.build_args(&[], "/tmp/dir"); + assert!(args.contains(&"--mode".to_string())); + assert!(args.contains(&"std".to_string())); + } + + #[test] + fn build_args_custom_profile() { + let mut p = Program::new("myguest"); + let _ = p.set_profile("dev"); + let args = p.build_args(&[], "/tmp/dir"); + assert!(args.contains(&"--profile".to_string())); + assert!(args.contains(&"dev".to_string())); + assert!(!args.contains(&"--release".to_string())); + } + + #[test] + fn program_debug_impl() { + let p = Program::new("test"); + let debug_str = format!("{p:?}"); + assert!(debug_str.contains("test")); + assert!(debug_str.contains("Program")); + } +}