diff --git a/Cargo.lock b/Cargo.lock
index fb2cca639..cc22d786a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2790,6 +2790,21 @@ dependencies = [
  "rand 0.8.5",
 ]
 
+[[package]]
+name = "jolt-profiling"
+version = "0.1.0"
+dependencies = [
+ "allocative",
+ "inferno 0.12.6",
+ "memory-stats",
+ "pprof",
+ "prost 0.14.3",
+ "sysinfo",
+ "tracing",
+ "tracing-chrome",
+ "tracing-subscriber",
+]
+
 [[package]]
 name = "jolt-sdk"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index 935b71d83..6a4ca7078 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,7 @@ keywords = ["SNARK", "cryptography", "proofs"]
 
 [workspace]
 members = [
+  "crates/jolt-profiling",
   "crates/jolt-field",
     "jolt-core",
     "tracer",
@@ -235,6 +236,7 @@ sha3 = "0.10.8"
 blake2 = "0.10"
 blake3 = { version = "1.5.0" }
 light-poseidon = "0.4"
+digest = "0.10"
 jolt-optimizations = { git = "https://github.com/a16z/arkworks-algebra", branch = "dev/twist-shout" }
 dory = { package = "dory-pcs", version = "0.3.0", features = ["backends", "cache", "disk-persistence"] }
 
@@ -289,6 +291,8 @@ tracing-chrome = "0.7.1"
 tracing-subscriber = { version = "0.3.20", features = ["fmt", "env-filter"] }
 inferno = { version = "0.12.3" }
 allocative = { git = "https://github.com/facebookexperimental/allocative", rev = "85b773d85d526d068ce94724ff7a7b81203fc95e" }
+pprof = { version = "0.15", features = ["prost-codec", "flamegraph", "frame-pointer"] }
+prost = "0.14"
 
 # Parsing
 syn = { version = "2", features = ["full"] }
diff --git a/crates/jolt-profiling/Cargo.toml b/crates/jolt-profiling/Cargo.toml
new file mode 100644
index 000000000..d33348fab
--- /dev/null
+++ b/crates/jolt-profiling/Cargo.toml
@@ -0,0 +1,37 @@
+[package]
+name = "jolt-profiling"
+version = "0.1.0"
+authors = ["Jolt Contributors"]
+edition = "2021"
+description = "Profiling and tracing infrastructure for the Jolt proving system"
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/a16z/jolt"
+keywords = ["profiling", "tracing", "performance"]
+categories = ["development-tools::profiling"]
+publish = false
+
+[lints]
+workspace = true
+
+[features]
+default = []
+monitor = ["dep:sysinfo"]
+pprof = ["dep:pprof", "dep:prost"]
+allocative = ["dep:inferno", "dep:allocative"]
+
+[dependencies]
+tracing.workspace = true
+tracing-chrome.workspace = true
+tracing-subscriber.workspace = true
+inferno = { workspace = true, optional = true }
+allocative = { workspace = true, optional = true }
+
+[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
+memory-stats.workspace = true
+sysinfo = { workspace = true, optional = true }
+pprof = { workspace = true, optional = true }
+prost = { workspace = true, optional = true }
+
+[package.metadata.cargo-machete]
+# prost is required by pprof's prost-codec feature but not directly imported
+ignored = ["prost"]
diff --git a/crates/jolt-profiling/src/flamegraph.rs b/crates/jolt-profiling/src/flamegraph.rs
new file mode 100644
index 000000000..1a38fa3c0
--- /dev/null
+++ b/crates/jolt-profiling/src/flamegraph.rs
@@ -0,0 +1,55 @@
+//! Heap flamegraph generation from `allocative`-instrumented data structures.
+
+use std::{fs::File, io::Cursor, path::Path};
+
+use allocative::{Allocative, FlameGraphBuilder};
+use inferno::flamegraph::Options;
+
+use crate::units::{format_memory_size, BYTES_PER_GIB, BYTES_PER_MIB};
+
+/// Logs the heap allocation size of an `Allocative`-instrumented value.
+pub fn print_data_structure_heap_usage<T: Allocative>(label: &str, data: &T) {
+    if tracing::enabled!(tracing::Level::DEBUG) {
+        let memory_gib = allocative::size_of_unique_allocated_data(data) as f64 / BYTES_PER_GIB;
+        tracing::debug!(
+            label = label,
+            usage = %format_memory_size(memory_gib),
+            "heap allocation size"
+        );
+    }
+}
+
+/// Renders a [`FlameGraphBuilder`] to an SVG flamegraph file.
+///
+/// Uses `inferno` for rendering with MiB units and flame-chart mode.
+/// Logs a warning and returns on I/O failure instead of panicking.
+pub fn write_flamegraph_svg<P: AsRef<Path>>(flamegraph: FlameGraphBuilder, path: P) {
+    let mut opts = Options::default();
+    opts.color_diffusion = true;
+    opts.count_name = String::from("MiB");
+    opts.factor = 1.0 / BYTES_PER_MIB;
+    opts.flame_chart = true;
+
+    let flamegraph_src = flamegraph.finish_and_write_flame_graph();
+    let input = Cursor::new(flamegraph_src);
+
+    let output = match File::create(path.as_ref()) {
+        Ok(f) => f,
+        Err(e) => {
+            tracing::warn!(
+                path = %path.as_ref().display(),
+                error = %e,
+                "failed to create flamegraph SVG file"
+            );
+            return;
+        }
+    };
+
+    if let Err(e) = inferno::flamegraph::from_reader(&mut opts, input, output) {
+        tracing::warn!(
+            path = %path.as_ref().display(),
+            error = %e,
+            "failed to render flamegraph SVG"
+        );
+    }
+}
diff --git a/crates/jolt-profiling/src/lib.rs b/crates/jolt-profiling/src/lib.rs
new file mode 100644
index 000000000..f13a74e7e
--- /dev/null
+++ b/crates/jolt-profiling/src/lib.rs
@@ -0,0 +1,97 @@
+//! Profiling and tracing infrastructure for the Jolt proving system.
+//!
+//! Provides a unified interface for performance analysis across all Jolt crates:
+//!
+//! - **Tracing subscriber setup** — configures `tracing-chrome` (Perfetto/Chrome JSON)
+//!   and `tracing-subscriber` (console output) for the host binary.
+//! - **Memory profiling** — tracks memory deltas across proving stages via `memory-stats`.
+//! - **System metrics monitoring** (`monitor` feature) — background thread sampling
+//!   CPU usage, memory, active cores, and thread count. Outputs structured counter events
+//!   compatible with the Perfetto postprocessing script.
+//! - **CPU profiling** (`pprof` feature) — scoped `pprof` guards that write `.pb`
+//!   flamegraph files on drop.
+//! - **Heap flamegraphs** (`allocative` feature) — generates SVG flamegraphs from
+//!   `allocative`-instrumented data structures.
+//!
+//! # Usage
+//!
+//! Individual crates add `tracing` as a dependency and instrument their functions with
+//! `#[tracing::instrument]`. The host binary (e.g. `jolt-zkvm` CLI) depends on
+//! `jolt-profiling` to configure the subscriber that captures those spans.
+//!
+//! ```no_run
+//! use jolt_profiling::{setup_tracing, TracingFormat};
+//!
+//! let _guards = setup_tracing(
+//!     &[TracingFormat::Chrome],
+//!     "my_benchmark_20260306",
+//! );
+//! // All tracing spans from any Jolt crate now flow to Perfetto JSON output.
+//! ```
+//!
+//! # Feature Flags
+//!
+//! | Flag | Description |
+//! |------|-------------|
+//! | `monitor` | Background system metrics sampling (CPU, memory, cores) |
+//! | `pprof` | Scoped CPU profiling via `pprof` with `.pb` output |
+//! | `allocative` | Heap flamegraph generation from `allocative`-instrumented types |
+//!
+//! # Dependency Position
+//!
+//! This is a leaf crate — imported by host binaries and benchmarks.
+//! Library crates depend only on `tracing` for instrumentation.
+
+pub mod setup;
+
+#[cfg(not(target_arch = "wasm32"))]
+pub mod memory;
+
+#[cfg(all(not(target_arch = "wasm32"), feature = "monitor"))]
+pub mod monitor;
+
+mod pprof_guard;
+
+#[cfg(feature = "allocative")]
+pub mod flamegraph;
+#[cfg(feature = "allocative")]
+pub use flamegraph::{print_data_structure_heap_usage, write_flamegraph_svg};
+
+mod units;
+
+pub use setup::{setup_tracing, TracingFormat, TracingGuards};
+pub use units::{format_memory_size, BYTES_PER_GIB, BYTES_PER_MIB};
+
+#[cfg(not(target_arch = "wasm32"))]
+pub use memory::{
+    end_memory_tracing_span, print_current_memory_usage, report_memory_usage,
+    start_memory_tracing_span,
+};
+
+#[cfg(target_arch = "wasm32")]
+pub fn start_memory_tracing_span(_label: &'static str) {}
+
+#[cfg(target_arch = "wasm32")]
+pub fn end_memory_tracing_span(_label: &'static str) {}
+
+#[cfg(target_arch = "wasm32")]
+pub fn report_memory_usage() {}
+
+#[cfg(target_arch = "wasm32")]
+pub fn print_current_memory_usage(_label: &str) {}
+
+#[cfg(all(not(target_arch = "wasm32"), feature = "monitor"))]
+pub use monitor::MetricsMonitor;
+
+#[cfg(all(target_arch = "wasm32", feature = "monitor"))]
+#[must_use = "monitor stops when dropped"]
+pub struct MetricsMonitor;
+
+#[cfg(all(target_arch = "wasm32", feature = "monitor"))]
+impl MetricsMonitor {
+    pub fn start(_interval_secs: f64) -> Self {
+        Self
+    }
+}
+
+pub use pprof_guard::PprofGuard;
diff --git a/crates/jolt-profiling/src/memory.rs b/crates/jolt-profiling/src/memory.rs
new file mode 100644
index 000000000..4970eab02
--- /dev/null
+++ b/crates/jolt-profiling/src/memory.rs
@@ -0,0 +1,122 @@
+//! Memory profiling utilities.
+//!
+//! Tracks physical memory deltas across labeled spans. Call
+//! [`start_memory_tracing_span`] before the section and
+//! [`end_memory_tracing_span`] after, then [`report_memory_usage`] to
+//! log all collected deltas.
+
+use memory_stats::memory_stats;
+use std::{
+    collections::BTreeMap,
+    sync::{LazyLock, Mutex},
+};
+
+use crate::units::{format_memory_size, BYTES_PER_GIB};
+
+static MEMORY_USAGE_MAP: LazyLock<Mutex<BTreeMap<&'static str, f64>>> =
+    LazyLock::new(|| Mutex::new(BTreeMap::new()));
+static MEMORY_DELTA_MAP: LazyLock<Mutex<BTreeMap<&'static str, f64>>> =
+    LazyLock::new(|| Mutex::new(BTreeMap::new()));
+
+/// Records the current physical memory usage at the start of a labeled span.
+///
+/// Logs a warning and returns without recording if memory stats are unavailable
+/// or if a span with the same label is already open.
+pub fn start_memory_tracing_span(label: &'static str) {
+    let Some(stats) = memory_stats() else {
+        tracing::warn!(
+            span = label,
+            "memory stats unavailable, skipping span start"
+        );
+        return;
+    };
+    let memory_gib = stats.physical_mem as f64 / BYTES_PER_GIB;
+    let mut map = MEMORY_USAGE_MAP.lock().unwrap_or_else(|e| e.into_inner());
+    if map.insert(label, memory_gib).is_some() {
+        tracing::warn!(span = label, "duplicate memory span label, overwriting");
+    }
+}
+
+/// Closes a labeled memory span and records the memory delta (in GiB).
+///
+/// Logs a warning and returns without recording if memory stats are unavailable
+/// or if no matching span was opened.
+pub fn end_memory_tracing_span(label: &'static str) {
+    let Some(stats) = memory_stats() else {
+        tracing::warn!(span = label, "memory stats unavailable, skipping span end");
+        return;
+    };
+    let memory_gib_end = stats.physical_mem as f64 / BYTES_PER_GIB;
+    let Some(memory_gib_start) = MEMORY_USAGE_MAP
+        .lock()
+        .unwrap_or_else(|e| e.into_inner())
+        .remove(label)
+    else {
+        tracing::warn!(span = label, "no open memory span, skipping span end");
+        return;
+    };
+
+    let delta = memory_gib_end - memory_gib_start;
+    let _ = MEMORY_DELTA_MAP
+        .lock()
+        .unwrap_or_else(|e| e.into_inner())
+        .insert(label, delta);
+}
+
+/// Logs all collected memory deltas and warns about any unclosed spans.
+pub fn report_memory_usage() {
+    let memory_usage_map = MEMORY_USAGE_MAP.lock().unwrap_or_else(|e| e.into_inner());
+    for label in memory_usage_map.keys() {
+        tracing::warn!(span = label, "unclosed memory tracing span");
+    }
+
+    let memory_delta_map = MEMORY_DELTA_MAP.lock().unwrap_or_else(|e| e.into_inner());
+    for (label, delta) in memory_delta_map.iter() {
+        tracing::info!(
+            span = label,
+            delta = %format_memory_size(*delta),
+            "memory delta"
+        );
+    }
+}
+
+/// Logs the current physical memory usage at the point of call.
+pub fn print_current_memory_usage(label: &str) {
+    if tracing::enabled!(tracing::Level::DEBUG) {
+        if let Some(usage) = memory_stats() {
+            let memory_gib = usage.physical_mem as f64 / BYTES_PER_GIB;
+            tracing::debug!(
+                label = label,
+                usage = %format_memory_size(memory_gib),
+                "current memory usage"
+            );
+        } else {
+            tracing::debug!(label = label, "memory stats unavailable");
+        }
+    }
+}
+
+#[cfg(test)]
+#[expect(clippy::unwrap_used)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn memory_span_start_end_records_delta() {
+        start_memory_tracing_span("test_span_lifecycle");
+        end_memory_tracing_span("test_span_lifecycle");
+        let map = MEMORY_DELTA_MAP.lock().unwrap();
+        assert!(map.contains_key("test_span_lifecycle"));
+    }
+
+    #[test]
+    fn duplicate_span_warns_without_panic() {
+        start_memory_tracing_span("test_span_dup");
+        start_memory_tracing_span("test_span_dup");
+    }
+
+    #[test]
+    fn end_without_start_warns_without_panic() {
+        end_memory_tracing_span("test_span_nonexistent");
+    }
+}
diff --git a/crates/jolt-profiling/src/monitor.rs b/crates/jolt-profiling/src/monitor.rs
new file mode 100644
index 000000000..f021dee60
--- /dev/null
+++ b/crates/jolt-profiling/src/monitor.rs
@@ -0,0 +1,103 @@
+//! Background system metrics monitor.
+//!
+//! Spawns a thread that periodically samples CPU usage, memory, active cores,
+//! and thread count. Metrics are emitted as `tracing::debug!` events with
+//! structured `counters.*` fields, compatible with the Perfetto postprocessing
+//! script (`scripts/postprocess_trace.py`).
+
+use memory_stats::memory_stats;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::Arc;
+use std::thread::{self, JoinHandle};
+use std::time::Duration;
+use sysinfo::System;
+
+use crate::units::BYTES_PER_GIB;
+
+/// Background monitor that samples system metrics at a fixed interval.
+///
+/// Drop the monitor to terminate the background thread. The destructor
+/// signals the thread and joins it.
+#[must_use = "monitor stops when dropped"]
+pub struct MetricsMonitor {
+    handle: Option<JoinHandle<()>>,
+    stop_flag: Arc<AtomicBool>,
+}
+
+impl MetricsMonitor {
+    /// Starts the monitor with the given sampling interval (in seconds).
+    ///
+    /// Spawns a background thread named `"metrics-monitor"` that logs:
+    /// - `counters.memory_gib` — physical memory usage
+    /// - `counters.cpu_percent` — global CPU utilization
+    /// - `counters.cores_active_avg` — average active cores
+    /// - `counters.cores_active` — cores with >0.1% usage
+    /// - `counters.thread_count` — active thread count (Linux only, 0 elsewhere)
+    pub fn start(interval_secs: f64) -> Self {
+        let stop_flag = Arc::new(AtomicBool::new(false));
+        let stop = stop_flag.clone();
+
+        let spawn_result = thread::Builder::new()
+            .name("metrics-monitor".to_string())
+            .spawn(move || {
+                let interval = Duration::from_millis(((interval_secs * 1000.0) as u64).max(50));
+                let mut system = System::new();
+
+                thread::sleep(sysinfo::MINIMUM_CPU_UPDATE_INTERVAL);
+
+                while !stop.load(Ordering::Acquire) {
+                    system.refresh_cpu_all();
+
+                    let memory_gib = memory_stats()
+                        .map(|s| s.physical_mem as f64 / BYTES_PER_GIB)
+                        .unwrap_or(0.0);
+                    let cpu_percent = system.global_cpu_usage();
+                    let cores_active_avg = cpu_percent / 100.0 * (system.cpus().len() as f32);
+                    let active_cores = system
+                        .cpus()
+                        .iter()
+                        .filter(|cpu| cpu.cpu_usage() > 0.1)
+                        .count();
+
+                    #[cfg(target_os = "linux")]
+                    let active_threads = std::fs::read_dir("/proc/self/task")
+                        .map(|entries| entries.count())
+                        .unwrap_or(0);
+
+                    #[cfg(not(target_os = "linux"))]
+                    let active_threads = 0_usize;
+
+                    tracing::debug!(
+                        counters.memory_gib = memory_gib,
+                        counters.cpu_percent = cpu_percent,
+                        counters.cores_active_avg = cores_active_avg,
+                        counters.cores_active = active_cores,
+                        counters.thread_count = active_threads,
+                    );
+
+                    thread::sleep(interval);
+                }
+
+                tracing::info!("MetricsMonitor stopping");
+            });
+
+        let handle = match spawn_result {
+            Ok(h) => Some(h),
+            Err(e) => {
+                tracing::warn!(error = %e, "failed to spawn metrics monitor thread");
+                None
+            }
+        };
+
+        MetricsMonitor { handle, stop_flag }
+    }
+}
+
+impl Drop for MetricsMonitor {
+    fn drop(&mut self) {
+        self.stop_flag.store(true, Ordering::Release);
+        if let Some(handle) = self.handle.take() {
+            let _ = handle.join();
+        }
+    }
+}
diff --git a/crates/jolt-profiling/src/pprof_guard.rs b/crates/jolt-profiling/src/pprof_guard.rs
new file mode 100644
index 000000000..376c2d417
--- /dev/null
+++ b/crates/jolt-profiling/src/pprof_guard.rs
@@ -0,0 +1,147 @@
+//! Scoped CPU profiler guard for `pprof` integration.
+//!
+//! Use the [`pprof_scope!`] macro to create a guard that starts a CPU profiler
+//! on creation and writes a `.pb` flamegraph file on drop.
+//!
+//! Requires the `pprof` feature. Without it, the macro expands to `None::<PprofGuard>`.
+//!
+//! ```no_run
+//! use jolt_profiling::pprof_scope;
+//!
+//! let _guard = pprof_scope!("my_function");
+//! // ... profiled code ...
+//! // guard drops here, writing benchmark-runs/pprof/my_function.pb
+//! ```
+//!
+//! View with: `go tool pprof -http=:8080 benchmark-runs/pprof/my_function.pb`
+
+/// Guard that holds a running pprof profiler and writes output on drop.
+#[cfg(feature = "pprof")]
+pub struct PprofGuard {
+    guard: pprof::ProfilerGuard<'static>,
+    label: &'static str,
+}
+
+#[cfg(feature = "pprof")]
+impl PprofGuard {
+    /// Creates a new profiler guard with the given label and sampling frequency.
+    ///
+    /// The label determines the output filename: `{PPROF_PREFIX}{label}.pb`.
+    /// Typically called via the [`pprof_scope!`] macro rather than directly.
+    pub fn new(label: &'static str, frequency: i32) -> Option<Self> {
+        match pprof::ProfilerGuardBuilder::default()
+            .frequency(frequency)
+            .blocklist(&["libc", "libgcc", "pthread", "vdso"])
+            .build()
+        {
+            Ok(guard) => Some(Self { guard, label }),
+            Err(e) => {
+                tracing::warn!(label = label, error = %e, "failed to initialize profiler");
+                None
+            }
+        }
+    }
+}
+
+/// Stub type when `pprof` feature is not enabled.
+#[cfg(not(feature = "pprof"))]
+pub struct PprofGuard;
+
+#[cfg(feature = "pprof")]
+impl Drop for PprofGuard {
+    fn drop(&mut self) {
+        use std::io::Write;
+
+        let Ok(report) = self.guard.report().build() else {
+            tracing::warn!(label = self.label, "failed to build pprof report");
+            return;
+        };
+
+        let prefix = crate::setup::PPROF_PREFIX
+            .get()
+            .map(String::as_str)
+            .unwrap_or("benchmark-runs/pprof/");
+        let filename = format!("{prefix}{}.pb", self.label);
+
+        if let Some(dir) = std::path::Path::new(&filename).parent() {
+            let _ = std::fs::create_dir_all(dir);
+        }
+
+        let Ok(mut f) = std::fs::File::create(&filename) else {
+            tracing::warn!(path = %filename, "failed to create pprof output file");
+            return;
+        };
+
+        if let Ok(p) = report.pprof() {
+            use pprof::protos::Message;
+            let mut buf = Vec::new();
+            if p.encode(&mut buf).is_ok() {
+                if f.write_all(&buf).is_ok() {
+                    tracing::info!(path = %filename, "wrote pprof profile");
+                } else {
+                    tracing::warn!(path = %filename, "failed to write pprof data");
+                }
+            }
+        }
+    }
+}
+
+/// Creates a scoped CPU profiler guard.
+///
+/// With the `pprof` feature enabled, returns `Some(PprofGuard)` that writes a
+/// `.pb` file on drop. Without the feature, returns `None::<PprofGuard>`.
+///
+/// When called without arguments, uses `"default"` as the label.
+///
+/// Configure via environment variables:
+/// - `PPROF_PREFIX` — output directory prefix (default: `"benchmark-runs/pprof/"`)
+/// - `PPROF_FREQ` — sampling frequency in Hz (default: 100)
+#[macro_export]
+macro_rules! pprof_scope {
+    ($label:expr) => {{
+        #[cfg(feature = "pprof")]
+        {
+            $crate::PprofGuard::new(
+                $label,
+                std::env::var("PPROF_FREQ")
+                    .unwrap_or_else(|_| "100".to_string())
+                    .parse::<i32>()
+                    .unwrap_or(100),
+            )
+        }
+        #[cfg(not(feature = "pprof"))]
+        None::<$crate::PprofGuard>
+    }};
+    () => {
+        $crate::pprof_scope!("default")
+    };
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn pprof_scope_without_feature_returns_none() {
+        let guard = pprof_scope!("test_label");
+        #[cfg(not(feature = "pprof"))]
+        assert!(guard.is_none());
+        #[cfg(feature = "pprof")]
+        assert!(guard.is_some());
+    }
+
+    #[test]
+    fn pprof_scope_no_arg_variant() {
+        let guard = pprof_scope!();
+        #[cfg(not(feature = "pprof"))]
+        assert!(guard.is_none());
+        #[cfg(feature = "pprof")]
+        assert!(guard.is_some());
+    }
+
+    #[test]
+    fn pprof_guard_stub_exists() {
+        #[cfg(not(feature = "pprof"))]
+        {
+            let _guard = super::PprofGuard;
+        }
+    }
+}
diff --git a/crates/jolt-profiling/src/setup.rs b/crates/jolt-profiling/src/setup.rs
new file mode 100644
index 000000000..8e7bacfc0
--- /dev/null
+++ b/crates/jolt-profiling/src/setup.rs
@@ -0,0 +1,139 @@
+//! Tracing subscriber configuration for Perfetto and console output.
+//!
+//! Call [`setup_tracing`] once at binary startup. The returned [`TracingGuards`]
+//! must be held alive for the duration of the program — dropping them flushes
+//! and closes trace files.
+
+use std::any::Any;
+use std::sync::OnceLock;
+
+use tracing_chrome::ChromeLayerBuilder;
+use tracing_subscriber::{fmt::format::FmtSpan, prelude::*, EnvFilter};
+
+/// Thread-safe storage for the pprof output prefix.
+///
+/// Initialized once during [`setup_tracing`] and read by [`PprofGuard`](crate::PprofGuard)
+/// on drop. Avoids `std::env::set_var` which is unsound in multi-threaded contexts.
+pub(crate) static PPROF_PREFIX: OnceLock<String> = OnceLock::new();
+
+/// Output format for tracing subscribers.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum TracingFormat {
+    /// Console output with span close events and compact formatting.
+    Default,
+    /// Chrome/Perfetto JSON trace file. View at <https://ui.perfetto.dev/>.
+    Chrome,
+}
+
+/// Opaque container for tracing flush guards.
+///
+/// Must be held alive for the duration of profiling. Dropping this flushes
+/// all pending trace data and stops background monitors.
+#[must_use = "guards must be held alive for the duration of profiling"]
+pub struct TracingGuards(#[expect(dead_code)] Vec<Box<dyn Any>>);
+
+/// Initializes the global tracing subscriber with the requested output formats.
+///
+/// Always installs a minimal log layer that respects `RUST_LOG`. Additional
+/// layers are added based on the `formats` slice.
+///
+/// Returns a [`TracingGuards`] value that **must be kept alive** until the
+/// program exits. Dropping the guards flushes pending trace data.
+///
+/// # Chrome format
+///
+/// Writes to `benchmark-runs/perfetto_traces/{trace_name}.json`.
+/// Open in [Perfetto UI](https://ui.perfetto.dev/) for timeline visualization.
+///
+/// # Panics
+///
+/// Panics if called more than once (the global subscriber can only be set once).
+pub fn setup_tracing(formats: &[TracingFormat], trace_name: &str) -> TracingGuards {
+    let _ = PPROF_PREFIX.get_or_init(|| {
+        std::env::var("PPROF_PREFIX")
+            .unwrap_or_else(|_| format!("benchmark-runs/pprof/{trace_name}_"))
+    });
+
+    let mut layers = Vec::new();
+
+    let log_layer = tracing_subscriber::fmt::layer()
+        .compact()
+        .with_target(false)
+        .with_file(false)
+        .with_line_number(false)
+        .with_thread_ids(false)
+        .with_thread_names(false)
+        .with_filter(EnvFilter::from_default_env())
+        .boxed();
+    layers.push(log_layer);
+
+    let mut guards: Vec<Box<dyn Any>> = vec![];
+
+    if formats.contains(&TracingFormat::Default) {
+        let collector_layer = tracing_subscriber::fmt::layer()
+            .with_span_events(FmtSpan::CLOSE)
+            .compact()
+            .with_target(false)
+            .with_file(false)
+            .with_line_number(false)
+            .with_thread_ids(false)
+            .with_thread_names(false)
+            .boxed();
+        layers.push(collector_layer);
+    }
+    if formats.contains(&TracingFormat::Chrome) {
+        let trace_file = format!("benchmark-runs/perfetto_traces/{trace_name}.json");
+        let _ = std::fs::create_dir_all("benchmark-runs/perfetto_traces");
+        let (chrome_layer, guard) = ChromeLayerBuilder::new()
+            .include_args(true)
+            .file(trace_file)
+            .build();
+        layers.push(chrome_layer.boxed());
+        guards.push(Box::new(guard));
+        tracing::info!(
+            "Chrome tracing enabled. Output: benchmark-runs/perfetto_traces/{trace_name}.json"
+        );
+    }
+
+    tracing_subscriber::registry().with(layers).init();
+
+    #[cfg(all(not(target_arch = "wasm32"), feature = "monitor"))]
+    guards.push(Box::new({
+        tracing::info!(
+            "Starting MetricsMonitor — run python3 scripts/postprocess_trace.py on the output"
+        );
+        crate::monitor::MetricsMonitor::start(
+            std::env::var("MONITOR_INTERVAL")
+                .unwrap_or_else(|_| "0.1".to_string())
+                .parse::<f64>()
+                .unwrap_or(0.1),
+        )
+    }));
+
+    TracingGuards(guards)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn tracing_format_is_copy() {
+        let fmt = TracingFormat::Chrome;
+        let fmt2 = fmt;
+        assert_eq!(fmt, fmt2);
+    }
+
+    #[test]
+    fn tracing_format_debug() {
+        let fmt = TracingFormat::Default;
+        let s = format!("{fmt:?}");
+        assert_eq!(s, "Default");
+    }
+
+    #[test]
+    fn tracing_format_eq() {
+        assert_eq!(TracingFormat::Chrome, TracingFormat::Chrome);
+        assert_ne!(TracingFormat::Chrome, TracingFormat::Default);
+    }
+}
diff --git a/crates/jolt-profiling/src/units.rs b/crates/jolt-profiling/src/units.rs
new file mode 100644
index 000000000..b8cf4288d
--- /dev/null
+++ b/crates/jolt-profiling/src/units.rs
@@ -0,0 +1,55 @@
+//! Memory size unit constants and formatting helpers.
+
+/// Bytes per gibibyte (GiB, binary, 2^30).
+pub const BYTES_PER_GIB: f64 = 1_073_741_824.0;
+
+/// Bytes per mebibyte (MiB, binary, 2^20).
+pub const BYTES_PER_MIB: f64 = 1_048_576.0;
+
+/// Formats a memory size given in GiB to a human-readable string.
+///
+/// Uses GiB for values >= 1.0, otherwise MiB.
+pub fn format_memory_size(gib: f64) -> String {
+    if gib.abs() >= 1.0 {
+        format!("{gib:.2} GiB")
+    } else {
+        format!("{:.2} MiB", gib * 1024.0)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn format_large_value_uses_gib() {
+        assert_eq!(format_memory_size(2.5), "2.50 GiB");
+    }
+
+    #[test]
+    fn format_exactly_one_gib() {
+        assert_eq!(format_memory_size(1.0), "1.00 GiB");
+    }
+
+    #[test]
+    fn format_small_value_uses_mib() {
+        assert_eq!(format_memory_size(0.5), "512.00 MiB");
+    }
+
+    #[test]
+    fn format_zero() {
+        assert_eq!(format_memory_size(0.0), "0.00 MiB");
+    }
+
+    #[test]
+    fn format_tiny_value() {
+        let result = format_memory_size(0.001);
+        assert!(result.contains("MiB"));
+    }
+
+    #[test]
+    fn constants_are_correct() {
+        assert_eq!(BYTES_PER_GIB, (1u64 << 30) as f64);
+        assert_eq!(BYTES_PER_MIB, (1u64 << 20) as f64);
+    }
+}