diff --git a/.github/workflows/startup-bench.yml b/.github/workflows/startup-bench.yml
index 2b8a5b20..d39926bb 100644
--- a/.github/workflows/startup-bench.yml
+++ b/.github/workflows/startup-bench.yml
@@ -1,13 +1,19 @@
 name: Startup Bench
 
-# Two layers, both run in this workflow:
+# Three layers, all run in this workflow:
 #
-#   1. **Divan micro-bench** — `cargo bench --bench startup`. Pure-compute
-#      hot paths (Message::serialize/deserialize, kernel_cmdline,
-#      getrandom). No KVM, no nested virt, no L2 boot — same wall-clock
-#      cost on every Linux runner. Cheap regression gate.
+#   1. **Divan micro-bench (startup)** — `cargo bench --bench startup`.
+#      Pure-compute hot paths (Message::serialize/deserialize,
+#      kernel_cmdline, getrandom). No KVM, no nested virt, no L2 boot —
+#      same wall-clock cost on every Linux runner. Cheap regression gate.
 #
-#   2. **Wall-clock harness** — `voidbox-startup-bench --iters 20
+#   2. **Divan micro-bench (network)** — `cargo bench --bench network`.
+#      SLIRP hot paths (process_syn, poll_idle, process_arp_request,
+#      poll_with_n_flows, dns_cache_hit, dns_cache_miss). Also pure
+#      compute, no nested virt — stable regression gate for the network
+#      stack without requiring KVM or a real VM boot.
+#
+#   3. **Wall-clock harness** — `voidbox-startup-bench --iters 20
 #      --breakdown`. Boots a real KVM VM through the slim kernel + test
 #      initramfs and measures cold-boot + warm-restore p50/p95/p99 end
 #      to end. Informational only on this runner: the GitHub-hosted
@@ -161,14 +167,37 @@ jobs:
             echo '```'
           } >> "$GITHUB_STEP_SUMMARY"
 
-      - name: Run wall-clock harness (informational)
-        # No threshold gate — Azure nested-virt is slower than the
-        # bare-metal targets the verify-skill thresholds were tuned for.
-        # `continue-on-error` keeps the workflow green even if the
-        # harness fails outright (e.g. missing /dev/vhost-vsock on a
-        # future runner image change). The artifact preserves the log
-        # either way.
-        continue-on-error: true
+      - name: Run network divan micro-bench (regression gate)
+        # Same regression-detection role as the startup divan step, but
+        # for SLIRP hot paths: process_syn, poll_idle, process_arp_request,
+        # poll_with_n_flows, dns_cache_hit, dns_cache_miss. Pure compute,
+        # no nested virt — stable across CI hosts. Output captured for
+        # artifact + step summary.
+        run: |
+          cargo bench --bench network 2>&1 | tee target/tmp/divan-network.log
+
+          {
+            echo
+            echo "## Divan network micro-bench (cargo bench --bench network)"
+            echo
+            echo '```'
+            grep -E 'fastest|median|slowest|^[a-z_]+\.' target/tmp/divan-network.log \
+              || tail -40 target/tmp/divan-network.log
+            echo '```'
+          } >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Run wall-clock harness (strict)
+        # NO `continue-on-error` — was previously silently masking the
+        # vhost/userspace vsock backend mismatch on warm restore (root
+        # cause: `capture_snapshot` was building a Sandbox without
+        # `.enable_snapshots(true)` so vhost-vsock was selected, but
+        # `from_snapshot` always restores into userspace vsock; vring
+        # state lives in the kernel's vhost-vsock module and isn't part
+        # of our snapshot, so the restored userspace device couldn't
+        # accept connections and every host connect timed out).
+        # Threshold gate stays informal — Azure nested-virt is slower
+        # than the bare-metal Fedora 43 / KVM targets the verify-skill
+        # thresholds were tuned for, but the harness MUST exit 0.
         env:
           ITERS: ${{ inputs.iters || '20' }}
           VOID_BOX_KERNEL: ${{ github.workspace }}/target/vmlinux-slim-x86_64
@@ -194,10 +223,51 @@ jobs:
             echo '```'
           } >> "$GITHUB_STEP_SUMMARY"
 
+      - name: Build voidbox-network-bench (release)
+        # Network wall-clock harness: boots one VM with `network(true)`,
+        # measures TCP throughput, RR/CRR latency, UDP DNS qps, and ICMP
+        # RR latency. Mirror the startup harness build step.
+        run: cargo build --release --bin voidbox-network-bench
+
+      - name: Run voidbox-network-bench (network wall-clock harness)
+        # NO `continue-on-error` here — unlike the startup-bench warm
+        # phase, this harness has well-defined failure modes that we
+        # want to surface in CI. A regression like the setuid-busybox
+        # bug fixed at 77dfc67 (Phase 1.6 → ECONNRESET on every
+        # connect for `network(true)` VMs) would otherwise hide behind
+        # `continue-on-error`. If this step is genuinely flaky on the
+        # runner image, fix the runner image — don't mask the signal.
+        env:
+          VOID_BOX_KERNEL: ${{ github.workspace }}/target/vmlinux-slim-x86_64
+          VOID_BOX_INITRAMFS: /tmp/void-box-test-rootfs.cpio.gz
+        run: |
+          if [ ! -e /dev/vhost-vsock ]; then
+            echo "::warning::/dev/vhost-vsock not available; skipping voidbox-network-bench"
+            exit 0
+          fi
+          ls -la "$VOID_BOX_KERNEL" "$VOID_BOX_INITRAMFS"
+          ./target/release/voidbox-network-bench --iterations 3 \
+            --output target/tmp/network-bench.json 2>&1 \
+            | tee target/tmp/network-bench.log
+
+          {
+            echo
+            echo "## Network wall-clock harness (voidbox-network-bench --iterations 3)"
+            echo
+            echo "Metric names mirror passt's published table (passt.top/passt) so a"
+            echo "future side-by-side comparison run on the same host is plug-compatible."
+            echo
+            echo '```json'
+            cat target/tmp/network-bench.json
+            echo '```'
+          } >> "$GITHUB_STEP_SUMMARY"
+
       - name: Upload bench logs
         if: always()
         uses: actions/upload-artifact@v4
         with:
           name: startup-bench-${{ github.run_id }}
-          path: target/tmp/*.log
+          path: |
+            target/tmp/*.log
+            target/tmp/*.json
           retention-days: 30
diff --git a/Cargo.toml b/Cargo.toml
index f204f9a8..9443b736 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -120,6 +120,9 @@ divan = "0.1"
 default = []
 # Enable full OpenTelemetry integration (OTLP export, trace context propagation)
 opentelemetry = ["dep:opentelemetry", "dep:opentelemetry_sdk", "dep:opentelemetry-otlp"]
+# Expose internal SlirpBackend helpers (insert_synthetic_synsent_entry, etc.)
+# for use in benches/. Never enable in production builds.
+bench-helpers = []
 
 [[bin]]
 name = "voidbox"
@@ -170,11 +173,20 @@ path = "tests/oci_integration.rs"
 name = "observe_codex"
 path = "tests/observe_codex.rs"
 
+[[test]]
+name = "network_baseline"
+path = "tests/network_baseline.rs"
+
 [[bench]]
 name = "startup"
 path = "benches/startup.rs"
 harness = false
 
+[[bench]]
+name = "network"
+path = "benches/network.rs"
+harness = false
+
 [[bin]]
 name = "voidbox-startup-bench"
 path = "src/bin/voidbox-startup-bench/main.rs"
@@ -183,6 +195,10 @@ path = "src/bin/voidbox-startup-bench/main.rs"
 name = "voidbox-rpc-bench"
 path = "src/bin/voidbox-rpc-bench/main.rs"
 
+[[bin]]
+name = "voidbox-network-bench"
+path = "src/bin/voidbox-network-bench/main.rs"
+
 [workspace]
 members = ["guest-agent", "void-box-protocol", "claudio", "voidbox-oci", "void-message", "void-mcp"]
 
diff --git a/benches/network.rs b/benches/network.rs
new file mode 100644
index 00000000..6fd8720a
--- /dev/null
+++ b/benches/network.rs
@@ -0,0 +1,1035 @@
+//! Divan micro-benchmarks for SLIRP hot paths.
+//!
+//! Mirrors `benches/startup.rs` in shape. Job: regression detection
+//! for the per-packet hot path on the vCPU and net-poll threads.
+//!
+//! Run with: `cargo bench --bench network`
+
+#[cfg(target_os = "linux")]
+use divan::{counter::BytesCount, Bencher};
+#[cfg(target_os = "linux")]
+use smoltcp::wire::{
+    ArpOperation, ArpPacket, ArpRepr, EthernetAddress, EthernetFrame, EthernetProtocol,
+    EthernetRepr, Icmpv4Packet, Icmpv4Repr, IpAddress, IpProtocol, Ipv4Packet, Ipv4Repr,
+    TcpControl, TcpPacket, TcpRepr, UdpPacket, UdpRepr,
+};
+#[cfg(target_os = "linux")]
+use void_box::network::slirp::{
+    SlirpBackend, GATEWAY_MAC, GUEST_MAC, SLIRP_DNS_IP, SLIRP_GATEWAY_IP, SLIRP_GUEST_IP,
+};
+
+fn main() {
+    // SLIRP-using benches are Linux-only (smoltcp dep is `cfg(target_os =
+    // "linux")` in Cargo.toml). On other platforms, `divan::main()` runs
+    // with zero registered benches and exits 0 — that's the right shape
+    // for cross-platform CI which runs `cargo bench --no-run` to compile-
+    // check the bench binary.
+    #[cfg(target_os = "linux")]
+    divan::main();
+    #[cfg(not(target_os = "linux"))]
+    eprintln!("benches/network.rs: SLIRP benches are Linux-only; nothing to run here");
+}
+
+// All bench functions and helpers below are Linux-only (depend on smoltcp
+// + the SLIRP backend, which are themselves `cfg(target_os = "linux")`
+// in the workspace Cargo.toml). Wrapping in a module keeps the cfg gating
+// in one place; on macOS the module compiles to nothing and `main()` above
+// short-circuits before any of these are referenced.
+#[cfg(target_os = "linux")]
+mod linux_benches {
+    use super::*;
+    use std::net::TcpListener;
+    use std::thread;
+    use std::time::Duration;
+
+    fn build_syn(src_port: u16, dst_port: u16) -> Vec<u8> {
+        let tcp = TcpRepr {
+            src_port,
+            dst_port,
+            control: TcpControl::Syn,
+            seq_number: smoltcp::wire::TcpSeqNumber(1000),
+            ack_number: None,
+            window_len: 65535,
+            window_scale: None,
+            max_seg_size: None,
+            sack_permitted: false,
+            sack_ranges: [None, None, None],
+            payload: &[],
+        };
+        let ip = Ipv4Repr {
+            src_addr: SLIRP_GUEST_IP,
+            dst_addr: SLIRP_GATEWAY_IP,
+            next_header: IpProtocol::Tcp,
+            payload_len: tcp.buffer_len(),
+            hop_limit: 64,
+        };
+        let eth = EthernetRepr {
+            src_addr: EthernetAddress(GUEST_MAC),
+            dst_addr: EthernetAddress(GATEWAY_MAC),
+            ethertype: EthernetProtocol::Ipv4,
+        };
+        let total = 14 + ip.buffer_len() + tcp.buffer_len();
+        let mut buf = vec![0u8; total];
+        let mut e = EthernetFrame::new_unchecked(&mut buf[..]);
+        eth.emit(&mut e);
+        let mut ipp = Ipv4Packet::new_unchecked(&mut buf[14..]);
+        ip.emit(&mut ipp, &Default::default());
+        let mut tcpp = TcpPacket::new_unchecked(&mut buf[14 + ip.buffer_len()..]);
+        tcp.emit(
+            &mut tcpp,
+            &IpAddress::Ipv4(SLIRP_GUEST_IP),
+            &IpAddress::Ipv4(SLIRP_GATEWAY_IP),
+            &Default::default(),
+        );
+        buf
+    }
+
+    #[divan::bench]
+    fn process_syn(bencher: Bencher) {
+        let frame = build_syn(49152, 1);
+        bencher.bench_local(|| {
+            let mut stack = SlirpBackend::new().unwrap();
+            let _ = stack.process_guest_frame(divan::black_box(&frame));
+        });
+    }
+
+    /// Time `SlirpBackend::process_guest_frame` for a single UDP datagram.
+    ///
+    /// Mirrors `process_syn` shape: build the frame once outside the timed
+    /// loop, fresh stack per iteration. Establishes UDP per-frame cost
+    /// for cross-phase regression detection.
+    #[divan::bench]
+    fn process_udp_frame(bencher: Bencher) {
+        let frame = build_udp_frame_for_bench(49152, 8080, b"x");
+        bencher.bench_local(|| {
+            let mut stack = SlirpBackend::new().unwrap();
+            let _ = stack.process_guest_frame(divan::black_box(&frame));
+        });
+    }
+
+    /// Time `SlirpBackend::process_guest_frame` for a single ICMP echo
+    /// request. Note: a fresh stack means the unprivileged ICMP socket is
+    /// opened on every iteration, so this measures the full
+    /// `open_icmp_socket + insert + send_to` path. If the host's
+    /// `net.ipv4.ping_group_range` excludes the calling GID, the underlying
+    /// `socket()` call returns EACCES and `process_guest_frame` returns Ok
+    /// without touching `flow_table` — divan's measurement still completes
+    /// but `flow_table` stays empty. That's fine for regression detection.
+    #[divan::bench]
+    fn process_icmp_echo_request(bencher: Bencher) {
+        let frame = build_icmp_echo_for_bench(0xbeef, 1);
+        bencher.bench_local(|| {
+            let mut stack = SlirpBackend::new().unwrap();
+            let _ = stack.process_guest_frame(divan::black_box(&frame));
+        });
+    }
+
+    #[divan::bench]
+    fn poll_idle(bencher: Bencher) {
+        let mut stack = SlirpBackend::new().unwrap();
+        let mut out: Vec<Vec<u8>> = Vec::with_capacity(8);
+        bencher.bench_local(|| {
+            out.clear();
+            divan::black_box(&mut stack).drain_to_guest(&mut out);
+        });
+    }
+
+    #[divan::bench]
+    fn process_arp_request(bencher: Bencher) {
+        let arp_repr = ArpRepr::EthernetIpv4 {
+            operation: ArpOperation::Request,
+            source_hardware_addr: EthernetAddress(GUEST_MAC),
+            source_protocol_addr: SLIRP_GUEST_IP,
+            target_hardware_addr: EthernetAddress([0; 6]),
+            target_protocol_addr: SLIRP_GATEWAY_IP,
+        };
+        let eth = EthernetRepr {
+            src_addr: EthernetAddress(GUEST_MAC),
+            dst_addr: EthernetAddress([0xff; 6]),
+            ethertype: EthernetProtocol::Arp,
+        };
+        let total = 14 + arp_repr.buffer_len();
+        let mut buf = vec![0u8; total];
+        let mut e = EthernetFrame::new_unchecked(&mut buf[..]);
+        eth.emit(&mut e);
+        let mut a = ArpPacket::new_unchecked(&mut buf[14..]);
+        arp_repr.emit(&mut a);
+
+        bencher.bench_local(|| {
+            let mut stack = SlirpBackend::new().unwrap();
+            let _ = stack.process_guest_frame(divan::black_box(&buf));
+        });
+    }
+
+    /// Open `n` distinct guest→gateway flows, then time `poll()`.
+    ///
+    /// Each iteration builds `n` SYN frames with unique source ports and feeds
+    /// them into a single [`SlirpBackend`], producing up to `n` NAT table entries.
+    /// `process_guest_frame` errors are ignored — the goal is "many NAT entries",
+    /// not "all connections succeed" (the default rate-limit may drop some).
+    ///
+    /// The timed section is a single `poll()` call on the pre-populated stack,
+    /// so the measurement reflects the NAT-walk cost at that table size.
+    /// Today the walk is `O(n)`; the unified flow table keeps the same
+    /// asymptotic complexity but with smaller per-entry constants.
+    #[divan::bench(args = [1, 100, 1000])]
+    fn poll_with_n_flows(bencher: Bencher, n: usize) {
+        let mut stack = SlirpBackend::new().unwrap();
+        for i in 0..n {
+            let frame = build_syn(49152u16.wrapping_add(i as u16), 1);
+            let _ = stack.process_guest_frame(&frame);
+        }
+        let mut out: Vec<Vec<u8>> = Vec::with_capacity(8);
+        bencher.bench_local(|| {
+            out.clear();
+            divan::black_box(&mut stack).drain_to_guest(&mut out);
+        });
+    }
+
+    /// Builds a minimal DNS A-query Ethernet frame from the guest to [`SLIRP_DNS_IP`].
+    ///
+    /// `xid` is placed in the DNS transaction-ID field. The question section
+    /// queries `example.com` for an A record. The frame is a complete Ethernet →
+    /// IPv4 → UDP → DNS wire encoding suitable for passing to
+    /// [`SlirpBackend::process_guest_frame`].
+    fn build_dns_query_for_bench(xid: u16) -> Vec<u8> {
+        let mut payload = Vec::new();
+        payload.extend_from_slice(&xid.to_be_bytes());
+        // flags: RD=1; QDCOUNT=1; ANCOUNT/NSCOUNT/ARCOUNT = 0
+        payload.extend_from_slice(&[0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
+        // QNAME: \x07example\x03com\x00
+        payload.extend_from_slice(b"\x07example\x03com\x00");
+        // QTYPE=A (1), QCLASS=IN (1)
+        payload.extend_from_slice(&[0x00, 0x01, 0x00, 0x01]);
+
+        let udp_repr = UdpRepr {
+            src_port: 49152,
+            dst_port: 53,
+        };
+        let ip_repr = Ipv4Repr {
+            src_addr: SLIRP_GUEST_IP,
+            dst_addr: SLIRP_DNS_IP,
+            next_header: IpProtocol::Udp,
+            payload_len: 8 + payload.len(),
+            hop_limit: 64,
+        };
+        let eth = EthernetRepr {
+            src_addr: EthernetAddress(GUEST_MAC),
+            dst_addr: EthernetAddress(GATEWAY_MAC),
+            ethertype: EthernetProtocol::Ipv4,
+        };
+        let total = 14 + ip_repr.buffer_len() + 8 + payload.len();
+        let mut buf = vec![0u8; total];
+        let mut e = EthernetFrame::new_unchecked(&mut buf[..]);
+        eth.emit(&mut e);
+        let mut ip = Ipv4Packet::new_unchecked(&mut buf[14..]);
+        ip_repr.emit(&mut ip, &Default::default());
+        let mut udp = UdpPacket::new_unchecked(&mut buf[14 + ip_repr.buffer_len()..]);
+        udp_repr.emit(
+            &mut udp,
+            &IpAddress::Ipv4(SLIRP_GUEST_IP),
+            &IpAddress::Ipv4(SLIRP_DNS_IP),
+            payload.len(),
+            |b| b.copy_from_slice(&payload),
+            &Default::default(),
+        );
+        buf
+    }
+
+    /// Times the stack's DNS processing path when the cache has no entry for the
+    /// queried name.
+    ///
+    /// Each iteration creates a fresh [`SlirpBackend`] (so the DNS cache is empty)
+    /// and processes one DNS query frame. The measurement captures stack
+    /// initialisation plus first-query cache-miss handling, giving a baseline for
+    /// the cold-cache cost.
+    #[divan::bench]
+    fn dns_cache_miss(bencher: Bencher) {
+        let frame = build_dns_query_for_bench(1);
+        bencher.bench_local(|| {
+            let mut stack = SlirpBackend::new().unwrap();
+            let _ = stack.process_guest_frame(divan::black_box(&frame));
+        });
+    }
+
+    /// Times the stack's DNS processing path when a cache entry already exists for
+    /// the queried name.
+    ///
+    /// Before the timed section, one query is injected and the stack is polled
+    /// for up to one second to allow the upstream DNS response to populate the
+    /// cache. The timed section then processes a second query (different XID,
+    /// same name) on the warm stack, isolating the cache-hit fast path.
+    #[divan::bench]
+    fn dns_cache_hit(bencher: Bencher) {
+        let mut stack = SlirpBackend::new().unwrap();
+        let warm = build_dns_query_for_bench(1);
+        let _ = stack.process_guest_frame(&warm);
+        let mut out: Vec<Vec<u8>> = Vec::new();
+        for _ in 0..20 {
+            out.clear();
+            stack.drain_to_guest(&mut out);
+            std::thread::sleep(std::time::Duration::from_millis(50));
+        }
+        let hit = build_dns_query_for_bench(2);
+        bencher.bench_local(|| {
+            let _ = divan::black_box(&mut stack).process_guest_frame(divan::black_box(&hit));
+        });
+    }
+
+    /// Pure-compute bench for `nat::translate_outbound`. Baseline for future
+    /// hasher / data-structure changes (e.g. moving deny_cidrs from
+    /// `Vec<Ipv4Net>` to a longest-prefix trie). Tens of nanoseconds
+    /// expected; microseconds would indicate an allocation in the hot path.
+    #[divan::bench]
+    fn nat_translate_outbound_hot_path(bencher: Bencher) {
+        use void_box::network::nat::{translate_outbound, Rules};
+
+        let rules = Rules {
+            gateway_loopback: true,
+            deny_cidrs: vec!["169.254.0.0/16".parse().unwrap()],
+            port_forwards: vec![],
+        };
+        let dst = SLIRP_GATEWAY_IP;
+        let gateway = SLIRP_GATEWAY_IP;
+
+        bencher.bench_local(|| {
+            divan::black_box(translate_outbound(
+                divan::black_box(&rules),
+                divan::black_box(dst),
+                divan::black_box(80),
+                divan::black_box(gateway),
+            ));
+        });
+    }
+
+    /// Measures TCP bulk throughput through the SLIRP relay under backpressure.
+    ///
+    /// Pushes 1 MiB through the relay in 1 KiB chunks with a constrained host
+    /// receiver (`SO_RCVBUF=4096`) so the backpressure path is exercised every
+    /// iteration. Divan reports throughput in MB/s alongside per-iteration
+    /// latency, giving a numerical regression signal for the passt-style
+    /// sequence-mirroring + don't-ACK-on-EAGAIN backpressure path.
+    ///
+    /// The 95% delivery threshold mirrors `tcp_writes_more_than_256kb_succeed`
+    /// — the binary contract test for TCP backpressure correctness.
+    #[divan::bench(sample_count = 10)]
+    fn tcp_bulk_throughput_1mb(bencher: Bencher) {
+        use smoltcp::wire::TcpControl;
+        use std::io::Read;
+        use std::os::unix::io::AsRawFd;
+        use std::sync::atomic::{AtomicUsize, Ordering};
+        use std::sync::Arc;
+
+        const TOTAL_BYTES: usize = 1024 * 1024;
+        const CHUNK_BYTES: usize = 1024;
+        const WINDOW_MAX: u32 = 256 * 1024;
+        const DEADLINE_SECS: u64 = 5;
+        const GUEST_SRC_PORT: u16 = 49200;
+        const INITIAL_GUEST_SEQ: u32 = 1000;
+
+        bencher
+            .counter(BytesCount::new(TOTAL_BYTES as u64))
+            .bench_local(|| {
+                let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
+                let host_port = listener.local_addr().unwrap().port();
+
+                unsafe {
+                    let rcvbuf: libc::c_int = 4096;
+                    libc::setsockopt(
+                        listener.as_raw_fd(),
+                        libc::SOL_SOCKET,
+                        libc::SO_RCVBUF,
+                        &rcvbuf as *const libc::c_int as *const libc::c_void,
+                        std::mem::size_of::<libc::c_int>() as libc::socklen_t,
+                    );
+                }
+
+                let bytes_received = Arc::new(AtomicUsize::new(0));
+                let bytes_received_thr = Arc::clone(&bytes_received);
+                let server = std::thread::spawn(move || {
+                    let (mut sock, _) = listener.accept().unwrap();
+                    let mut buf = [0u8; 4096];
+                    loop {
+                        match sock.read(&mut buf) {
+                            Ok(0) => break,
+                            Ok(bytes_read) => {
+                                bytes_received_thr.fetch_add(bytes_read, Ordering::Relaxed);
+                            }
+                            Err(_) => break,
+                        }
+                    }
+                });
+
+                let mut stack = SlirpBackend::new().unwrap();
+
+                let syn = build_tcp_data_frame(
+                    SLIRP_GATEWAY_IP,
+                    GUEST_SRC_PORT,
+                    host_port,
+                    INITIAL_GUEST_SEQ,
+                    0,
+                    TcpControl::Syn,
+                    &[],
+                );
+                stack.process_guest_frame(&syn).unwrap();
+
+                let synack_frames: Vec<Vec<u8>> = {
+                    let mut frames = Vec::new();
+                    for _ in 0..4 {
+                        stack.drain_to_guest(&mut frames);
+                    }
+                    frames
+                };
+                let (gateway_seq, _, _, _) = synack_frames
+                    .iter()
+                    .find_map(|frame| parse_tcp_to_guest_frame(frame))
+                    .expect("synack");
+
+                let ack_frame = build_tcp_data_frame(
+                    SLIRP_GATEWAY_IP,
+                    GUEST_SRC_PORT,
+                    host_port,
+                    INITIAL_GUEST_SEQ + 1,
+                    gateway_seq + 1,
+                    TcpControl::None,
+                    &[],
+                );
+                stack.process_guest_frame(&ack_frame).unwrap();
+
+                let chunk = vec![b'x'; CHUNK_BYTES];
+                let mut guest_seq = INITIAL_GUEST_SEQ + 1;
+                let mut acked_seq = INITIAL_GUEST_SEQ + 1;
+                let deadline =
+                    std::time::Instant::now() + std::time::Duration::from_secs(DEADLINE_SECS);
+
+                while bytes_received.load(Ordering::Relaxed) < TOTAL_BYTES * 95 / 100
+                    && std::time::Instant::now() < deadline
+                {
+                    let data_frame = build_tcp_data_frame(
+                        SLIRP_GATEWAY_IP,
+                        GUEST_SRC_PORT,
+                        host_port,
+                        guest_seq,
+                        gateway_seq + 1,
+                        TcpControl::Psh,
+                        &chunk,
+                    );
+                    let _ = stack.process_guest_frame(&data_frame);
+                    guest_seq = guest_seq.wrapping_add(CHUNK_BYTES as u32);
+
+                    let mut frames = Vec::new();
+                    for _ in 0..4 {
+                        stack.drain_to_guest(&mut frames);
+                    }
+                    for frame in frames {
+                        if let Some((_, ack, _, _)) = parse_tcp_to_guest_frame(&frame) {
+                            if ack > acked_seq {
+                                acked_seq = ack;
+                            }
+                        }
+                    }
+
+                    if guest_seq.wrapping_sub(acked_seq) > WINDOW_MAX {
+                        std::thread::sleep(std::time::Duration::from_millis(10));
+                    }
+                }
+
+                let fin_frame = build_tcp_data_frame(
+                    SLIRP_GATEWAY_IP,
+                    GUEST_SRC_PORT,
+                    host_port,
+                    guest_seq,
+                    gateway_seq + 1,
+                    TcpControl::Fin,
+                    &[],
+                );
+                let _ = stack.process_guest_frame(&fin_frame);
+                let mut fin_drain: Vec<Vec<u8>> = Vec::new();
+                for _ in 0..40 {
+                    fin_drain.clear();
+                    stack.drain_to_guest(&mut fin_drain);
+                    if server.is_finished() {
+                        break;
+                    }
+                    std::thread::sleep(std::time::Duration::from_millis(50));
+                }
+                let _ = server.join();
+
+                divan::black_box(bytes_received.load(Ordering::Relaxed));
+            });
+    }
+
+    /// Builds a minimal IPv4-over-Ethernet TCP segment from guest to gateway.
+    ///
+    /// Returns the full Ethernet frame bytes. Mirrors the `build_tcp_frame`
+    /// helper from `tests/network_baseline.rs` inline so the bench compiles
+    /// as a standalone binary without a shared helper crate.
+    fn build_tcp_data_frame(
+        dst_ip: smoltcp::wire::Ipv4Address,
+        src_port: u16,
+        dst_port: u16,
+        seq: u32,
+        ack: u32,
+        control: TcpControl,
+        payload: &[u8],
+    ) -> Vec<u8> {
+        use smoltcp::wire::{IpAddress, TcpSeqNumber};
+
+        let tcp_repr = TcpRepr {
+            src_port,
+            dst_port,
+            control,
+            seq_number: TcpSeqNumber(seq as i32),
+            ack_number: if ack == 0 {
+                None
+            } else {
+                Some(TcpSeqNumber(ack as i32))
+            },
+            window_len: 65535,
+            window_scale: None,
+            max_seg_size: None,
+            sack_permitted: false,
+            sack_ranges: [None, None, None],
+            payload,
+        };
+        let ip_repr = Ipv4Repr {
+            src_addr: SLIRP_GUEST_IP,
+            dst_addr: dst_ip,
+            next_header: IpProtocol::Tcp,
+            payload_len: tcp_repr.buffer_len(),
+            hop_limit: 64,
+        };
+        let eth_repr = EthernetRepr {
+            src_addr: EthernetAddress(GUEST_MAC),
+            dst_addr: EthernetAddress(GATEWAY_MAC),
+            ethertype: EthernetProtocol::Ipv4,
+        };
+        let eth_hdr_len = 14usize;
+        let total = eth_hdr_len + ip_repr.buffer_len() + tcp_repr.buffer_len();
+        let mut buf = vec![0u8; total];
+        let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+        eth_repr.emit(&mut eth);
+        let mut ip = Ipv4Packet::new_unchecked(&mut buf[eth_hdr_len..]);
+        ip_repr.emit(&mut ip, &Default::default());
+        let mut tcp = TcpPacket::new_unchecked(&mut buf[eth_hdr_len + ip_repr.buffer_len()..]);
+        tcp_repr.emit(
+            &mut tcp,
+            &IpAddress::Ipv4(SLIRP_GUEST_IP),
+            &IpAddress::Ipv4(dst_ip),
+            &Default::default(),
+        );
+        buf
+    }
+
+    /// Parses one frame emitted by the stack as a TCP segment directed to the guest.
+    ///
+    /// Returns `(seq, ack, control, payload_len)` on success, `None` otherwise.
+    fn parse_tcp_to_guest_frame(frame: &[u8]) -> Option<(u32, u32, TcpControl, usize)> {
+        let eth = EthernetFrame::new_checked(frame).ok()?;
+        if eth.ethertype() != EthernetProtocol::Ipv4 {
+            return None;
+        }
+        let ip = Ipv4Packet::new_checked(eth.payload()).ok()?;
+        if ip.next_header() != IpProtocol::Tcp || ip.dst_addr() != SLIRP_GUEST_IP {
+            return None;
+        }
+        let tcp = TcpPacket::new_checked(ip.payload()).ok()?;
+        let control = match (tcp.syn(), tcp.fin(), tcp.rst(), tcp.psh()) {
+            (false, false, false, false) => TcpControl::None,
+            (false, false, false, true) => TcpControl::Psh,
+            (true, false, false, _) => TcpControl::Syn,
+            (false, true, false, _) => TcpControl::Fin,
+            (false, false, true, _) => TcpControl::Rst,
+            _ => return None,
+        };
+        Some((
+            tcp.seq_number().0 as u32,
+            tcp.ack_number().0 as u32,
+            control,
+            tcp.payload().len(),
+        ))
+    }
+    fn build_udp_frame_for_bench(src_port: u16, dst_port: u16, payload: &[u8]) -> Vec<u8> {
+        let udp_repr = UdpRepr { src_port, dst_port };
+        let ip_repr = Ipv4Repr {
+            src_addr: SLIRP_GUEST_IP,
+            dst_addr: SLIRP_GATEWAY_IP,
+            next_header: IpProtocol::Udp,
+            payload_len: 8 + payload.len(),
+            hop_limit: 64,
+        };
+        let eth = EthernetRepr {
+            src_addr: EthernetAddress(GUEST_MAC),
+            dst_addr: EthernetAddress(GATEWAY_MAC),
+            ethertype: EthernetProtocol::Ipv4,
+        };
+        let total = 14 + ip_repr.buffer_len() + 8 + payload.len();
+        let mut buf = vec![0u8; total];
+        let mut e = EthernetFrame::new_unchecked(&mut buf[..]);
+        eth.emit(&mut e);
+        let mut ip = Ipv4Packet::new_unchecked(&mut buf[14..]);
+        ip_repr.emit(&mut ip, &Default::default());
+        let mut udp = UdpPacket::new_unchecked(&mut buf[14 + ip_repr.buffer_len()..]);
+        udp_repr.emit(
+            &mut udp,
+            &IpAddress::Ipv4(SLIRP_GUEST_IP),
+            &IpAddress::Ipv4(SLIRP_GATEWAY_IP),
+            payload.len(),
+            |b| b.copy_from_slice(payload),
+            &Default::default(),
+        );
+        buf
+    }
+
+    fn build_icmp_echo_for_bench(ident: u16, seq_no: u16) -> Vec<u8> {
+        let icmp_repr = Icmpv4Repr::EchoRequest {
+            ident,
+            seq_no,
+            data: b"bench",
+        };
+        let ip_repr = Ipv4Repr {
+            src_addr: SLIRP_GUEST_IP,
+            dst_addr: smoltcp::wire::Ipv4Address::new(8, 8, 8, 8),
+            next_header: IpProtocol::Icmp,
+            payload_len: icmp_repr.buffer_len(),
+            hop_limit: 64,
+        };
+        let eth = EthernetRepr {
+            src_addr: EthernetAddress(GUEST_MAC),
+            dst_addr: EthernetAddress(GATEWAY_MAC),
+            ethertype: EthernetProtocol::Ipv4,
+        };
+        let total = 14 + ip_repr.buffer_len() + icmp_repr.buffer_len();
+        let mut buf = vec![0u8; total];
+        let mut e = EthernetFrame::new_unchecked(&mut buf[..]);
+        eth.emit(&mut e);
+        let mut ip = Ipv4Packet::new_unchecked(&mut buf[14..]);
+        ip_repr.emit(&mut ip, &Default::default());
+        let mut icmp = Icmpv4Packet::new_unchecked(&mut buf[14 + ip_repr.buffer_len()..]);
+        icmp_repr.emit(&mut icmp, &Default::default());
+        buf
+    }
+
+    /// Open `n/3` TCP + `n/3` UDP + `n/3` ICMP-echo flows, then time `poll()`.
+    ///
+    /// Mirrors `poll_with_n_flows` (TCP-only) but exercises the unified
+    /// `flow_table` with all three protocols populated. Catches enum-dispatch
+    /// and filter regressions at scale: each `relay_*_data` loop filters
+    /// by `FlowKey` variant over the unified table, so per-protocol scan cost
+    /// is `O(total_flows)` not `O(this_protocol's_flows)`. This bench is the
+    /// regression gate for that property.
+    #[divan::bench(args = [3, 99, 999])]
+    fn poll_with_n_mixed_flows(bencher: Bencher, n: usize) {
+        let mut stack = SlirpBackend::new().unwrap();
+        let third = n / 3;
+
+        // n/3 TCP SYNs.
+        for i in 0..third {
+            let frame = build_syn(49152u16.wrapping_add(i as u16), 1);
+            let _ = stack.process_guest_frame(&frame);
+        }
+        // n/3 UDP datagrams (any non-DNS port; one byte payload).
+        for i in 0..third {
+            let frame = build_udp_frame_for_bench(50152u16.wrapping_add(i as u16), 8080, b"x");
+            let _ = stack.process_guest_frame(&frame);
+        }
+        // n/3 ICMP echoes (unique guest_id per flow).
+        for i in 0..third {
+            let frame = build_icmp_echo_for_bench(0x1000 + i as u16, 1);
+            let _ = stack.process_guest_frame(&frame);
+        }
+
+        let mut out: Vec<Vec<u8>> = Vec::with_capacity(8);
+        bencher.bench_local(|| {
+            out.clear();
+            divan::black_box(&mut stack).drain_to_guest(&mut out);
+        });
+    }
+
+    /// Insert + remove `n` flow-table entries using synthetic data.
+    ///
+    /// Pure-compute baseline for the unified `HashMap<FlowKey, FlowEntry>`.
+    /// Reference number for hasher experiments (foldhash, ahash, SipHash)
+    /// or container-shape changes (e.g. hashbrown raw API). Uses synthetic
+    /// `u32` values instead of real
+    /// `TcpNatEntry` (which requires TcpStream) to isolate HashMap
+    /// mechanics from socket cloning overhead — the real cost is
+    /// HashMap insert/remove, not socket ops.
+    ///
+    /// Pre-builds N unique keys with different `guest_src_port` values
+    /// (maintaining the same semantic as real flows), then times one
+    /// iteration of insert all + remove all.
+    #[divan::bench(args = [10, 100, 1000])]
+    fn flow_table_insert_remove(bencher: Bencher, n: usize) {
+        use std::collections::HashMap;
+
+        // Build keys outside the timed loop.
+        // Each key has a unique guest_src_port to simulate distinct flows.
+        let keys: Vec<_> = (0..n)
+            .map(|i| {
+                smoltcp::wire::IpAddress::Ipv4(smoltcp::wire::Ipv4Address::new(
+                    10,
+                    0,
+                    2,
+                    2 + (i % 254) as u8,
+                ))
+            })
+            .collect();
+
+        bencher.bench_local(|| {
+            let mut table: HashMap<usize, u32> = HashMap::with_capacity(n);
+            // Insert phase
+            for (i, _key) in keys.iter().enumerate() {
+                table.insert(i, i as u32);
+            }
+            // Remove phase
+            for i in 0..n {
+                divan::black_box(table.remove(&i));
+            }
+        });
+    }
+    /// Build a SYN-ACK Ethernet frame from the guest toward the gateway.
+    ///
+    /// src = GUEST_IP:guest_port, dst = GATEWAY_IP:high_port
+    /// control = Syn, ack_number = Some(our_seq + 1) → produces SYN+ACK on wire.
+    #[cfg(feature = "bench-helpers")]
+    fn build_inbound_syn_ack_frame(
+        guest_port: u16,
+        high_port: u16,
+        our_seq: u32,
+        guest_seq: u32,
+    ) -> Vec<u8> {
+        use smoltcp::wire::TcpSeqNumber;
+
+        let tcp_repr = TcpRepr {
+            src_port: guest_port,
+            dst_port: high_port,
+            control: TcpControl::Syn,
+            seq_number: TcpSeqNumber(guest_seq as i32),
+            ack_number: Some(TcpSeqNumber(our_seq.wrapping_add(1) as i32)),
+            window_len: 65535,
+            window_scale: None,
+            max_seg_size: None,
+            sack_permitted: false,
+            sack_ranges: [None, None, None],
+            payload: &[],
+        };
+        let ip_repr = Ipv4Repr {
+            src_addr: SLIRP_GUEST_IP,
+            dst_addr: SLIRP_GATEWAY_IP,
+            next_header: IpProtocol::Tcp,
+            payload_len: tcp_repr.buffer_len(),
+            hop_limit: 64,
+        };
+        let eth_repr = EthernetRepr {
+            src_addr: EthernetAddress(GUEST_MAC),
+            dst_addr: EthernetAddress(GATEWAY_MAC),
+            ethertype: EthernetProtocol::Ipv4,
+        };
+        let total = 14 + ip_repr.buffer_len() + tcp_repr.buffer_len();
+        let mut buf = vec![0u8; total];
+        let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+        eth_repr.emit(&mut eth);
+        let mut ip = Ipv4Packet::new_unchecked(&mut buf[14..]);
+        ip_repr.emit(&mut ip, &Default::default());
+        let mut tcp = TcpPacket::new_unchecked(&mut buf[14 + ip_repr.buffer_len()..]);
+        tcp_repr.emit(
+            &mut tcp,
+            &IpAddress::Ipv4(SLIRP_GUEST_IP),
+            &IpAddress::Ipv4(SLIRP_GATEWAY_IP),
+            &Default::default(),
+        );
+        buf
+    }
+
+    /// Seed a `SynSent` entry into `stack`'s flow table.
+    ///
+    /// Replicates `SlirpBackend::insert_synthetic_synsent_entry` inline.
+    /// Requires the `bench-helpers` feature (compile with
+    /// `cargo bench --features bench-helpers`).
+    #[cfg(feature = "bench-helpers")]
+    fn seed_synsent_entry(stack: &mut SlirpBackend, guest_port: u16, high_port: u16, our_seq: u32) {
+        use std::net::{TcpListener, TcpStream};
+        let listener = TcpListener::bind("127.0.0.1:0").expect("bind loopback");
+        let host_stream =
+            TcpStream::connect(listener.local_addr().unwrap()).expect("connect loopback");
+        host_stream.set_nonblocking(true).ok();
+        stack.insert_synthetic_synsent_entry(guest_port, high_port, our_seq, host_stream);
+    }
+
+    /// Microbench for the inbound SYN-ACK state-machine transition added in
+    /// 5.5b.1 (`TcpNatState::SynSent` → `Established`). Each iteration
+    /// (re)builds a `SlirpBackend`, seeds one `SynSent` entry, feeds a
+    /// synthetic guest SYN-ACK frame to `process_guest_frame`, and lets
+    /// the bench timer capture the `process_guest_frame` cost.
+    ///
+    /// Expected magnitude: tens of µs (same order as `process_syn`, which
+    /// also rebuilds a fresh stack per iteration).
+    #[cfg(feature = "bench-helpers")]
+    #[divan::bench]
+    fn tcp_inbound_syn_ack_transition(bencher: Bencher) {
+        const GUEST_PORT: u16 = 8080;
+        const HIGH_PORT: u16 = 49152;
+        const OUR_SEQ: u32 = 1000;
+        const GUEST_SEQ: u32 = 42;
+
+        let frame = build_inbound_syn_ack_frame(GUEST_PORT, HIGH_PORT, OUR_SEQ, GUEST_SEQ);
+
+        bencher.bench_local(|| {
+            let mut stack = SlirpBackend::new().unwrap();
+            seed_synsent_entry(&mut stack, GUEST_PORT, HIGH_PORT, OUR_SEQ);
+            let _ = divan::black_box(&mut stack).process_guest_frame(divan::black_box(&frame));
+        });
+    }
+
+    /// Pure-compute cost of synthesizing an inbound SYN frame for
+    /// port-forwarding. No stack allocation or guest frame processing —
+    /// just the `build_tcp_packet_static` wire encoding.
+    ///
+    /// Expected magnitude: sub-microsecond (pure packet construction).
+    ///
+    /// Requires the `bench-helpers` feature (compile with
+    /// `cargo bench --features bench-helpers`).
+    #[cfg(feature = "bench-helpers")]
+    #[divan::bench]
+    fn synthesize_inbound_syn(bencher: Bencher) {
+        const HIGH_PORT: u16 = 49152;
+        const GUEST_PORT: u16 = 8080;
+        const OUR_SEQ: u32 = 1000;
+
+        bencher.bench_local(|| {
+            divan::black_box(void_box::network::slirp::synthesize_inbound_syn(
+                divan::black_box(HIGH_PORT),
+                divan::black_box(GUEST_PORT),
+                divan::black_box(OUR_SEQ),
+            ));
+        });
+    }
+
+    /// Returns `true` if `frame` is an Ethernet/IPv4/TCP packet with the SYN
+    /// flag set, addressed to `dst_port`.
+    ///
+    /// The synthesized inbound SYN produced by `synthesize_inbound_syn` uses
+    /// `TcpControl::Syn` but smoltcp sets the ACK bit whenever `ack_number`
+    /// is `Some(...)`, even when the value is zero.  Checking only `tcp.syn()`
+    /// + `dst_port` is therefore correct here.
+    fn is_tcp_syn_to_port(frame: &[u8], dst_port: u16) -> bool {
+        // Minimum: 14 (Eth) + 20 (IPv4) + 20 (TCP) = 54 bytes.
+        if frame.len() < 54 {
+            return false;
+        }
+        let eth = EthernetFrame::new_unchecked(frame);
+        if eth.ethertype() != EthernetProtocol::Ipv4 {
+            return false;
+        }
+        let ip = Ipv4Packet::new_unchecked(eth.payload());
+        if ip.next_header() != IpProtocol::Tcp {
+            return false;
+        }
+        let ip_header_len = ip.header_len() as usize;
+        let tcp = TcpPacket::new_unchecked(&eth.payload()[ip_header_len..]);
+        tcp.syn() && tcp.dst_port() == dst_port
+    }
+
+    /// Wall-clock latency of the full inbound port-forward path: host
+    /// `TcpStream::connect` → epoll readiness event → `process_listener_readiness`
+    /// accept → mpsc channel push → `process_pending_inbound_accepts` →
+    /// `synthesize_inbound_syn` → first SYN frame visible in `drain_to_guest`
+    /// output.
+    ///
+    /// The listener FD is registered with `EpollDispatch`; accept latency is
+    /// bounded by the epoll_wait cadence (≤ 5 ms active), not a fixed poll
+    /// interval. Sub-millisecond medians are expected. Regressions in the
+    /// inbound state machine will surface numerically against this measurement.
+    #[divan::bench(sample_count = 20, sample_size = 1)]
+    fn port_forward_accept_latency(bencher: Bencher) {
+        const GUEST_PORT: u16 = 8080;
+        const CONNECT_TIMEOUT: Duration = Duration::from_secs(2);
+        const DRAIN_POLL: Duration = Duration::from_micros(100);
+
+        // Probe-bind to grab an ephemeral host port, then release the listener
+        // so SlirpBackend can bind it.  There is an inherent TOCTOU race
+        // between the drop and the SlirpBackend bind — acceptable for benches
+        // running on a loopback interface under controlled conditions.
+        let probe = TcpListener::bind("127.0.0.1:0").expect("probe bind for host port");
+        let host_port = probe.local_addr().expect("probe local_addr").port();
+        drop(probe);
+
+        let mut stack = SlirpBackend::with_security(
+            64,
+            50,
+            &["169.254.0.0/16".to_string()],
+            &[(host_port, GUEST_PORT)],
+        )
+        .expect("SlirpBackend::with_security");
+
+        let mut out: Vec<Vec<u8>> = Vec::new();
+
+        bencher.bench_local(|| {
+            // Spawn a worker thread that connects to the host listener port.
+            // EpollDispatch fires readiness; process_listener_readiness accepts
+            // and pushes the stream onto the mpsc channel.
+            let connect_addr = format!("127.0.0.1:{host_port}");
+            let worker = thread::spawn(move || {
+                let addr: std::net::SocketAddr = connect_addr.parse().expect("parse connect addr");
+                std::net::TcpStream::connect_timeout(&addr, CONNECT_TIMEOUT)
+                    .expect("connect to listener");
+            });
+
+            // Poll drain_to_guest until a SYN frame appears in the output.
+            loop {
+                out.clear();
+                stack.drain_to_guest(&mut out);
+                if out
+                    .iter()
+                    .any(|frame| is_tcp_syn_to_port(frame, GUEST_PORT))
+                {
+                    break;
+                }
+                thread::sleep(DRAIN_POLL);
+            }
+
+            worker.join().expect("worker thread panicked");
+        });
+    }
+
+    /// Cost of one `drain_to_guest` call when one TCP flow is `Established`
+    /// and the host kernel has data ready to relay.
+    ///
+    /// Captures the per-packet SLIRP dispatch overhead via epoll: epoll_wait
+    /// (non-blocking, zero-timeout), readiness scan, peek, and Ethernet frame
+    /// construction. Only the flows with data ready are dispatched — flows
+    /// with nothing to relay are skipped.
+    ///
+    /// This bench cannot exercise the `net_poll_thread` 50 ms epoll cycle
+    /// (that thread does not run inside divan).  The wall-clock latency floor
+    /// is captured separately by `voidbox-network-bench`'s `tcp_rx_latency_us_p50`
+    /// field; see that binary's `Report` struct for the measurement shape.
+    ///
+    /// Requires the `bench-helpers` feature (compile with
+    /// `cargo bench --features bench-helpers`).
+    #[cfg(feature = "bench-helpers")]
+    #[divan::bench(sample_count = 50, sample_size = 10)]
+    fn tcp_rx_latency_one_packet(bencher: Bencher) {
+        use smoltcp::wire::TcpControl;
+        use std::io::Write;
+        use std::net::TcpListener;
+
+        const GUEST_SRC_PORT: u16 = 49155;
+        const INITIAL_GUEST_SEQ: u32 = 5000;
+        const PAYLOAD: &[u8] = &[0xAB; 64];
+
+        // Build a fresh stack with one Established TCP flow.  Setup happens
+        // outside the timed loop so divan only measures the relay dispatch.
+        let listener = TcpListener::bind("127.0.0.1:0").unwrap();
+        let host_port = listener.local_addr().unwrap().port();
+        let server_thread = thread::spawn(move || listener.accept().unwrap());
+
+        let mut stack = SlirpBackend::new().unwrap();
+
+        // 3-way handshake: guest sends SYN → stack produces SYN-ACK → guest
+        // sends ACK.  This mirrors `tcp_bulk_throughput_1mb` setup.
+        let syn = build_tcp_syn_for_latency_bench(GUEST_SRC_PORT, host_port, INITIAL_GUEST_SEQ);
+        stack.process_guest_frame(&syn).unwrap();
+
+        // Drain for up to 200 ms to collect the SYN-ACK.
+        let mut drain_frames: Vec<Vec<u8>> = Vec::new();
+        let gateway_seq = {
+            let deadline = std::time::Instant::now() + Duration::from_millis(200);
+            loop {
+                drain_frames.clear();
+                stack.drain_to_guest(&mut drain_frames);
+                if let Some((seq, _, _, _)) = drain_frames
+                    .iter()
+                    .find_map(|f| parse_tcp_to_guest_frame(f))
+                {
+                    break seq;
+                }
+                if std::time::Instant::now() > deadline {
+                    panic!("no SYN-ACK within deadline");
+                }
+                thread::sleep(Duration::from_millis(5));
+            }
+        };
+
+        // Complete the handshake: guest sends ACK.
+        let ack = build_tcp_data_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_SRC_PORT,
+            host_port,
+            INITIAL_GUEST_SEQ + 1,
+            gateway_seq + 1,
+            TcpControl::None,
+            &[],
+        );
+        stack.process_guest_frame(&ack).unwrap();
+
+        // The server thread accepted the connection; grab the socket.
+        let (mut server_sock, _) = server_thread.join().unwrap();
+        server_sock
+            .set_nonblocking(true)
+            .expect("server non-blocking");
+
+        // Set up state for the timed loop.
+        let mut out: Vec<Vec<u8>> = Vec::with_capacity(8);
+        let guest_seq = INITIAL_GUEST_SEQ + 1;
+
+        // Prime: put one payload in the kernel buffer before the first
+        // iteration begins so the first measured call sees a ready event.
+        let _ = server_sock.write(PAYLOAD);
+
+        bencher.bench_local(|| {
+            out.clear();
+            // Refill the kernel buffer from the previous iteration's drain.
+            // write() may return EAGAIN if the buffer is full; that is fine —
+            // the previous iteration's peek left data in place.
+            let _ = server_sock.write(divan::black_box(PAYLOAD));
+
+            // The cost we are measuring: one non-blocking epoll_wait + relay.
+            divan::black_box(&mut stack).drain_to_guest(&mut out);
+
+            // Consume the relay output so inject_to_guest doesn't grow
+            // unboundedly across iterations.
+            divan::black_box(&out);
+
+            // Keep the TCP stream happy: send an ACK for any data the relay
+            // fed into inject_to_guest (frame content doesn't matter for the
+            // bench; we just need the host stream not to stall).
+            for frame in &out {
+                if let Some((data_seq, _, _, plen)) = parse_tcp_to_guest_frame(frame) {
+                    if plen > 0 {
+                        let ack_back = build_tcp_data_frame(
+                            SLIRP_GATEWAY_IP,
+                            GUEST_SRC_PORT,
+                            host_port,
+                            guest_seq,
+                            data_seq.wrapping_add(plen as u32),
+                            TcpControl::None,
+                            &[],
+                        );
+                        let _ = stack.process_guest_frame(&ack_back);
+                    }
+                }
+            }
+        });
+    }
+
+    /// Build a SYN frame from the guest toward the host for the latency bench.
+    ///
+    /// Identical to `build_tcp_data_frame` with `TcpControl::Syn` and zero
+    /// `ack`.  Kept as a separate function to document intent: this is the
+    /// opening segment of the 3-way handshake used by
+    /// `tcp_rx_latency_one_packet`.
+    #[cfg(feature = "bench-helpers")]
+    fn build_tcp_syn_for_latency_bench(src_port: u16, dst_port: u16, seq: u32) -> Vec<u8> {
+        build_tcp_data_frame(
+            SLIRP_GATEWAY_IP,
+            src_port,
+            dst_port,
+            seq,
+            0,
+            smoltcp::wire::TcpControl::Syn,
+            &[],
+        )
+    }
+} // mod linux_benches
diff --git a/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase0.md b/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase0.md
new file mode 100644
index 00000000..a9106870
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase0.md
@@ -0,0 +1,2027 @@
+# Phase 0 Implementation Plan: Baseline + Trait Extraction
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+>
+> **Mandatory skills for every Rust-touching task** (from the spec):
+> `rust-style`, `rustdoc`, `rust-analyzer-ssr`,
+> `superpowers:test-driven-development`,
+> `superpowers:verification-before-completion`. Do not skip them.
+> Use LSP (`goToDefinition`, `findReferences`, `documentSymbol`,
+> `workspaceSymbol`) for Rust navigation; never grep/glob Rust source
+> when LSP can answer.
+
+**Spec:** [`2026-04-27-smoltcp-passt-port.md`](2026-04-27-smoltcp-passt-port.md)
+
+**Goal:** Land the test/bench baseline, the `NetworkBackend` trait
+abstraction, and the `SlirpStack → SlirpBackend` rename, with zero
+user-visible behavior change.
+
+**Naming rationale:** The new name is role-based, not
+implementation-based. "Slirp" denotes the user-mode-NAT networking
+role (same role libslirp / passt / pasta fill); "smoltcp" is just the
+library we use to build it. Future siblings — `TapBackend`,
+`VhostNetBackend` — follow the same role-based convention. Renaming
+to `SmoltcpBackend` would leak the implementation library into the
+public type name and lose this symmetry.
+
+**Architecture:** Three additive workstreams (correctness pins, divan
+microbenches, wall-clock e2e harness) followed by a mechanical
+trait-extraction refactor. Three "broken on purpose" assertions are
+introduced in 0A and stay green — they flip in Phases 1, 2, 3
+respectively.
+
+**Tech Stack:** Rust 1.88, `smoltcp` 0.11 (wire types only), `divan`
+0.1, `tokio` (existing), `std::net::TcpListener` for the e2e harness
+host endpoint, `iperf3`/`netperf` invoked from inside the VM for
+throughput numbers.
+
+---
+
+## Task structure
+
+The phase has five workstreams (A–E) totaling **25 tasks**. A, B, C are
+**independent and can be executed in parallel**. D depends on A
+(baseline tests must exist before refactor). E is the final gate.
+
+```
+0A correctness baseline ──┐
+0B divan microbenches ────┼──→ 0D trait extraction ──→ 0E validation + PR
+0C wall-clock harness ────┘
+```
+
+---
+
+## Workstream 0A — Correctness baseline (`tests/network_baseline.rs`)
+
+All Layer-1 unit-level pins. Linux-only because `SlirpStack` is
+`#[cfg(target_os = "linux")]`.
+
+### Task 0A.1: Test file scaffolding + frame builder helpers
+
+**Files:**
+- Create: `tests/network_baseline.rs`
+- Modify: `Cargo.toml` (register `[[test]] name = "network_baseline"`)
+
+- [ ] **Step 1: Create the test file with helpers.**
+
+```rust
+//! Layer-1 correctness pins for the smoltcp-based SLIRP stack.
+//!
+//! These tests drive `SlirpStack` directly with synthetic Ethernet
+//! frames — no VM, no kernel, no host sockets to outside hosts. The
+//! goal is to lock observable behavior (including deliberately broken
+//! behavior) so the passt-pattern refactor's diff is legible to
+//! reviewers.
+//!
+//! Three tests assert *broken* behavior on purpose. Each is marked
+//! `BROKEN_ON_PURPOSE` and flips in the phase that fixes it:
+//!
+//! - `tcp_to_host_buffer_drops_at_256kb` — flips in Phase 3
+//! - `udp_non_dns_silently_dropped` — flips in Phase 2
+//! - `icmp_echo_silently_dropped` — flips in Phase 1
+//!
+//! Run with: `cargo test --test network_baseline`
+
+#![cfg(target_os = "linux")]
+
+use smoltcp::wire::{
+    ArpOperation, ArpPacket, ArpRepr, EthernetAddress, EthernetFrame, EthernetProtocol,
+    EthernetRepr, IpProtocol, Ipv4Address, Ipv4Packet, Ipv4Repr, TcpControl, TcpPacket, TcpRepr,
+    UdpPacket, UdpRepr,
+};
+use std::net::{TcpListener, UdpSocket};
+use void_box::network::slirp::{
+    SlirpStack, GATEWAY_MAC, GUEST_MAC, SLIRP_GATEWAY_IP, SLIRP_GUEST_IP,
+};
+
+const GUEST_EPHEMERAL_PORT: u16 = 49152;
+const ETH_HDR_LEN: usize = 14;
+const IPV4_MIN_HDR_LEN: usize = 20;
+const TCP_MIN_HDR_LEN: usize = 20;
+const UDP_HDR_LEN: usize = 8;
+
+/// Build a minimal IPv4-over-Ethernet TCP segment from guest to a
+/// pretend external IP. Returns the full Ethernet frame bytes.
+fn build_tcp_frame(
+    dst_ip: Ipv4Address,
+    src_port: u16,
+    dst_port: u16,
+    seq: u32,
+    ack: u32,
+    control: TcpControl,
+    payload: &[u8],
+) -> Vec<u8> {
+    let tcp_repr = TcpRepr {
+        src_port,
+        dst_port,
+        control,
+        seq_number: smoltcp::wire::TcpSeqNumber(seq as i32),
+        ack_number: if ack == 0 {
+            None
+        } else {
+            Some(smoltcp::wire::TcpSeqNumber(ack as i32))
+        },
+        window_len: 65535,
+        window_scale: None,
+        max_seg_size: None,
+        sack_permitted: false,
+        sack_ranges: [None, None, None],
+        timestamp: None,
+        payload,
+    };
+    let ip_repr = Ipv4Repr {
+        src_addr: SLIRP_GUEST_IP,
+        dst_addr: dst_ip,
+        next_header: IpProtocol::Tcp,
+        payload_len: tcp_repr.buffer_len(),
+        hop_limit: 64,
+    };
+    let eth_repr = EthernetRepr {
+        src_addr: EthernetAddress(GUEST_MAC),
+        dst_addr: EthernetAddress(GATEWAY_MAC),
+        ethertype: EthernetProtocol::Ipv4,
+    };
+    let total = ETH_HDR_LEN + ip_repr.buffer_len() + tcp_repr.buffer_len();
+    let mut buf = vec![0u8; total];
+    let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth_repr.emit(&mut eth);
+    let mut ip = Ipv4Packet::new_unchecked(&mut buf[ETH_HDR_LEN..]);
+    ip_repr.emit(&mut ip, &Default::default());
+    let mut tcp = TcpPacket::new_unchecked(
+        &mut buf[ETH_HDR_LEN + ip_repr.buffer_len()..],
+    );
+    tcp_repr.emit(
+        &mut tcp,
+        &smoltcp::wire::IpAddress::Ipv4(SLIRP_GUEST_IP),
+        &smoltcp::wire::IpAddress::Ipv4(dst_ip),
+        &Default::default(),
+    );
+    buf
+}
+
+/// Build a UDP-over-Ethernet datagram from guest.
+fn build_udp_frame(dst_ip: Ipv4Address, src_port: u16, dst_port: u16, payload: &[u8]) -> Vec<u8> {
+    let udp_repr = UdpRepr { src_port, dst_port };
+    let ip_repr = Ipv4Repr {
+        src_addr: SLIRP_GUEST_IP,
+        dst_addr: dst_ip,
+        next_header: IpProtocol::Udp,
+        payload_len: UDP_HDR_LEN + payload.len(),
+        hop_limit: 64,
+    };
+    let eth_repr = EthernetRepr {
+        src_addr: EthernetAddress(GUEST_MAC),
+        dst_addr: EthernetAddress(GATEWAY_MAC),
+        ethertype: EthernetProtocol::Ipv4,
+    };
+    let total = ETH_HDR_LEN + ip_repr.buffer_len() + UDP_HDR_LEN + payload.len();
+    let mut buf = vec![0u8; total];
+    let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth_repr.emit(&mut eth);
+    let mut ip = Ipv4Packet::new_unchecked(&mut buf[ETH_HDR_LEN..]);
+    ip_repr.emit(&mut ip, &Default::default());
+    let mut udp = UdpPacket::new_unchecked(
+        &mut buf[ETH_HDR_LEN + ip_repr.buffer_len()..],
+    );
+    udp_repr.emit(
+        &mut udp,
+        &smoltcp::wire::IpAddress::Ipv4(SLIRP_GUEST_IP),
+        &smoltcp::wire::IpAddress::Ipv4(dst_ip),
+        UDP_HDR_LEN + payload.len(),
+        |b| b.copy_from_slice(payload),
+        &Default::default(),
+    );
+    buf
+}
+
+/// Parse one emitted frame as a TCP segment if it matches; return
+/// `(seq, ack, control, payload_len)` for the matching direction.
+fn parse_tcp_to_guest(frame: &[u8]) -> Option<(u32, u32, TcpControl, usize)> {
+    let eth = EthernetFrame::new_checked(frame).ok()?;
+    if eth.ethertype() != EthernetProtocol::Ipv4 {
+        return None;
+    }
+    let ip = Ipv4Packet::new_checked(eth.payload()).ok()?;
+    if ip.next_header() != IpProtocol::Tcp || ip.dst_addr() != SLIRP_GUEST_IP {
+        return None;
+    }
+    let tcp = TcpPacket::new_checked(ip.payload()).ok()?;
+    Some((
+        tcp.seq_number().0 as u32,
+        tcp.ack_number().0 as u32,
+        tcp.control(),
+        tcp.payload().len(),
+    ))
+}
+
+/// Drain frames the stack wants to send to the guest, calling `poll`
+/// up to `n` times.
+fn drain_n(stack: &mut SlirpStack, n: usize) -> Vec<Vec<u8>> {
+    let mut out = Vec::new();
+    for _ in 0..n {
+        out.extend(stack.poll());
+    }
+    out
+}
+```
+
+- [ ] **Step 2: Register the test in `Cargo.toml`.**
+
+```toml
+[[test]]
+name = "network_baseline"
+path = "tests/network_baseline.rs"
+```
+
+- [ ] **Step 3: Verify it compiles with no tests yet.**
+
+```bash
+cargo test --test network_baseline --no-run
+```
+
+Expected: builds clean, "0 tests" reported.
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add tests/network_baseline.rs Cargo.toml
+git commit -m "test(network): scaffold network_baseline pins with frame helpers"
+```
+
+---
+
+### Task 0A.2: Pin TCP handshake (SYN → SYN-ACK)
+
+**Files:**
+- Modify: `tests/network_baseline.rs`
+
+- [ ] **Step 1: Write the test using a host listener.**
+
+Append to `tests/network_baseline.rs`:
+
+```rust
+#[test]
+fn tcp_handshake_emits_synack() {
+    // Bind a host listener on 127.0.0.1 so the stack's connect()
+    // succeeds. SLIRP rewrites 10.0.2.2 → 127.0.0.1.
+    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
+    let host_port = listener.local_addr().unwrap().port();
+
+    let mut stack = SlirpStack::new().expect("stack");
+
+    // Guest sends SYN to gateway IP at the listener's port.
+    let syn = build_tcp_frame(
+        SLIRP_GATEWAY_IP,
+        GUEST_EPHEMERAL_PORT,
+        host_port,
+        1000,
+        0,
+        TcpControl::Syn,
+        &[],
+    );
+    stack.process_guest_frame(&syn).expect("process syn");
+
+    // Drain — SYN-ACK should be queued.
+    let frames = drain_n(&mut stack, 4);
+    let synack = frames
+        .iter()
+        .find_map(|f| parse_tcp_to_guest(f))
+        .expect("synack emitted");
+
+    let (_seq, ack, ctrl, _len) = synack;
+    assert_eq!(ctrl, TcpControl::Syn, "control flags include SYN+ACK");
+    assert_eq!(ack, 1001, "ack = guest_seq + 1");
+}
+```
+
+- [ ] **Step 2: Run.**
+
+```bash
+cargo test --test network_baseline tcp_handshake_emits_synack
+```
+
+Expected: PASS. (Note: `TcpControl::Syn` in smoltcp's repr also covers
+SYN+ACK when ack number is set; assertion above is loose by
+construction — sharpen if smoltcp distinguishes.)
+
+- [ ] **Step 3: If the assertion is wrong** (e.g. smoltcp reports
+  `TcpControl::None` with the ACK flag in a separate field), open
+  `src/network/slirp.rs` `build_tcp_packet_static` (around line 1102)
+  via LSP `goToDefinition` and read what it actually emits. Update the
+  assertion to match observed behavior. **Do not modify production
+  code** — this test pins what we have today.
+
+- [ ] **Step 4: Commit once green.**
+
+```bash
+git add tests/network_baseline.rs
+git commit -m "test(network): pin TCP handshake SYN-ACK emission"
+```
+
+---
+
+### Task 0A.3: Pin TCP data echo (guest send → host receive → host send → guest receive)
+
+**Files:**
+- Modify: `tests/network_baseline.rs`
+
+- [ ] **Step 1: Write the round-trip test.**
+
+```rust
+#[test]
+fn tcp_data_round_trip() {
+    use std::io::{Read, Write};
+    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
+    let host_port = listener.local_addr().unwrap().port();
+
+    // Spawn a thread that accepts and echoes one chunk.
+    let server = std::thread::spawn(move || {
+        let (mut sock, _) = listener.accept().unwrap();
+        let mut buf = [0u8; 16];
+        let n = sock.read(&mut buf).unwrap();
+        sock.write_all(&buf[..n]).unwrap();
+    });
+
+    let mut stack = SlirpStack::new().expect("stack");
+
+    // SYN
+    stack
+        .process_guest_frame(&build_tcp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            1000,
+            0,
+            TcpControl::Syn,
+            &[],
+        ))
+        .unwrap();
+
+    // Drain SYN-ACK; capture our_seq.
+    let synack_frames = drain_n(&mut stack, 4);
+    let (our_seq, _ack, _ctrl, _len) = synack_frames
+        .iter()
+        .find_map(|f| parse_tcp_to_guest(f))
+        .expect("synack");
+
+    // ACK the SYN-ACK (completes handshake).
+    stack
+        .process_guest_frame(&build_tcp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            1001,
+            our_seq + 1,
+            TcpControl::None,
+            &[],
+        ))
+        .unwrap();
+
+    // Send 5 bytes of data.
+    stack
+        .process_guest_frame(&build_tcp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            1001,
+            our_seq + 1,
+            TcpControl::Psh,
+            b"hello",
+        ))
+        .unwrap();
+
+    // Wait for server to echo and stack to relay back.
+    server.join().unwrap();
+    let mut total_payload = 0;
+    for _ in 0..40 {
+        let frames = drain_n(&mut stack, 1);
+        for f in frames.iter() {
+            if let Some((_, _, _, len)) = parse_tcp_to_guest(f) {
+                total_payload += len;
+            }
+        }
+        if total_payload >= 5 {
+            break;
+        }
+        std::thread::sleep(std::time::Duration::from_millis(10));
+    }
+    assert!(
+        total_payload >= 5,
+        "expected at least 5 bytes echoed back to guest, got {total_payload}"
+    );
+}
+```
+
+- [ ] **Step 2: Run.** `cargo test --test network_baseline tcp_data_round_trip`
+
+- [ ] **Step 3: Commit.**
+
+```bash
+git add tests/network_baseline.rs
+git commit -m "test(network): pin TCP guest↔host data round-trip"
+```
+
+---
+
+### Task 0A.4: Pin "broken on purpose" — TCP `to_host` 256 KB cliff
+
+**Files:**
+- Modify: `tests/network_baseline.rs`
+
+- [ ] **Step 1: Write the test that demonstrates the cliff.**
+
+```rust
+/// BROKEN_ON_PURPOSE — flips in Phase 3.
+///
+/// Today: when guest writes >256 KB to host before host reads,
+/// `to_host` buffer overflows and the connection is closed
+/// (`slirp.rs:903–910`).
+///
+/// After Phase 3 (MSG_PEEK + sequence mirroring): the host kernel's
+/// socket buffer absorbs the write; no userspace cap, no drop.
+#[test]
+fn tcp_to_host_buffer_drops_at_256kb() {
+    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
+    let host_port = listener.local_addr().unwrap().port();
+
+    // Server that accepts but never reads — forces guest writes to
+    // accumulate in our `to_host` buffer.
+    let _server = std::thread::spawn(move || {
+        let (sock, _) = listener.accept().unwrap();
+        std::thread::sleep(std::time::Duration::from_secs(2));
+        drop(sock);
+    });
+
+    let mut stack = SlirpStack::new().expect("stack");
+
+    // Handshake.
+    stack
+        .process_guest_frame(&build_tcp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            1000,
+            0,
+            TcpControl::Syn,
+            &[],
+        ))
+        .unwrap();
+    let synack = drain_n(&mut stack, 4)
+        .into_iter()
+        .find_map(|f| parse_tcp_to_guest(&f))
+        .expect("synack");
+    let (our_seq, _, _, _) = synack;
+    stack
+        .process_guest_frame(&build_tcp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            1001,
+            our_seq + 1,
+            TcpControl::None,
+            &[],
+        ))
+        .unwrap();
+
+    // Push ~300 KB in 1 KB segments. Today, somewhere past 256 KB the
+    // stack closes the connection (RST or FIN to guest).
+    let mut seq = 1001u32;
+    let chunk = vec![b'x'; 1024];
+    let mut saw_close = false;
+    for _ in 0..300 {
+        let _ = stack.process_guest_frame(&build_tcp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            seq,
+            our_seq + 1,
+            TcpControl::Psh,
+            &chunk,
+        ));
+        seq = seq.wrapping_add(1024);
+        for f in drain_n(&mut stack, 1) {
+            if let Some((_, _, ctrl, _)) = parse_tcp_to_guest(&f) {
+                if matches!(ctrl, TcpControl::Rst | TcpControl::Fin) {
+                    saw_close = true;
+                }
+            }
+        }
+        if saw_close {
+            break;
+        }
+    }
+    assert!(
+        saw_close,
+        "BROKEN_ON_PURPOSE: today the 256 KB to_host cliff closes the \
+         connection. If this assertion fails, Phase 3 may have already \
+         landed — flip the assertion to `assert!(!saw_close)`."
+    );
+}
+```
+
+- [ ] **Step 2: Run.** `cargo test --test network_baseline tcp_to_host_buffer_drops_at_256kb`
+
+- [ ] **Step 3: If it doesn't capture the cliff** (e.g. test passes
+  300 chunks without close), instrument with `tracing` at `WARN`,
+  re-run, and adjust chunk size / count. The cliff is real — the test
+  must capture it.
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add tests/network_baseline.rs
+git commit -m "test(network): BROKEN_ON_PURPOSE pin — 256 KB to_host cliff"
+```
+
+---
+
+### Task 0A.5: Pin TCP rate limit, max concurrent, deny list
+
+**Files:**
+- Modify: `tests/network_baseline.rs`
+
+- [ ] **Step 1: Write three clustered tests.**
+
+```rust
+#[test]
+fn tcp_rate_limit_emits_rst() {
+    // 5 conn/s allowance; 10 attempts.
+    let mut stack = SlirpStack::with_security(64, 5, vec![]).unwrap();
+    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
+    let host_port = listener.local_addr().unwrap().port();
+
+    let mut rsts = 0;
+    for i in 0..10 {
+        stack
+            .process_guest_frame(&build_tcp_frame(
+                SLIRP_GATEWAY_IP,
+                GUEST_EPHEMERAL_PORT + i as u16,
+                host_port,
+                1000,
+                0,
+                TcpControl::Syn,
+                &[],
+            ))
+            .unwrap();
+        for f in drain_n(&mut stack, 2) {
+            if let Some((_, _, ctrl, _)) = parse_tcp_to_guest(&f) {
+                if ctrl == TcpControl::Rst {
+                    rsts += 1;
+                }
+            }
+        }
+    }
+    assert!(
+        rsts >= 4,
+        "expected ≥4 RSTs from rate limit, saw {rsts}"
+    );
+    drop(listener);
+}
+
+#[test]
+fn tcp_max_concurrent_emits_rst() {
+    let mut stack = SlirpStack::with_security(2, 1000, vec![]).unwrap();
+    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
+    let host_port = listener.local_addr().unwrap().port();
+
+    // Open 4 distinct connections; cap is 2.
+    let mut rsts = 0;
+    for i in 0..4 {
+        stack
+            .process_guest_frame(&build_tcp_frame(
+                SLIRP_GATEWAY_IP,
+                GUEST_EPHEMERAL_PORT + i,
+                host_port,
+                1000,
+                0,
+                TcpControl::Syn,
+                &[],
+            ))
+            .unwrap();
+        for f in drain_n(&mut stack, 2) {
+            if let Some((_, _, ctrl, _)) = parse_tcp_to_guest(&f) {
+                if ctrl == TcpControl::Rst {
+                    rsts += 1;
+                }
+            }
+        }
+    }
+    assert!(rsts >= 1, "expected RST after concurrent limit, saw {rsts}");
+    drop(listener);
+}
+
+#[test]
+fn tcp_deny_list_emits_rst() {
+    use ipnet::Ipv4Net;
+    let deny: Vec<Ipv4Net> = vec!["169.254.169.254/32".parse().unwrap()];
+    let mut stack = SlirpStack::with_security(64, 1000, deny).unwrap();
+
+    stack
+        .process_guest_frame(&build_tcp_frame(
+            Ipv4Address::new(169, 254, 169, 254),
+            GUEST_EPHEMERAL_PORT,
+            80,
+            1000,
+            0,
+            TcpControl::Syn,
+            &[],
+        ))
+        .unwrap();
+    let rst = drain_n(&mut stack, 2)
+        .into_iter()
+        .find_map(|f| parse_tcp_to_guest(&f))
+        .map(|(_, _, ctrl, _)| ctrl == TcpControl::Rst);
+    assert_eq!(rst, Some(true), "deny-list IP must get RST");
+}
+```
+
+- [ ] **Step 2: Run all three.**
+
+```bash
+cargo test --test network_baseline tcp_rate_limit_emits_rst tcp_max_concurrent_emits_rst tcp_deny_list_emits_rst
+```
+
+- [ ] **Step 3: Commit.**
+
+```bash
+git add tests/network_baseline.rs
+git commit -m "test(network): pin TCP rate limit, concurrent cap, deny list"
+```
+
+---
+
+### Task 0A.6: Pin ARP behavior
+
+**Files:**
+- Modify: `tests/network_baseline.rs`
+
+- [ ] **Step 1: Add ARP frame builder and three tests.**
+
+```rust
+fn build_arp_request(target_ip: Ipv4Address) -> Vec<u8> {
+    let arp_repr = ArpRepr::EthernetIpv4 {
+        operation: ArpOperation::Request,
+        source_hardware_addr: EthernetAddress(GUEST_MAC),
+        source_protocol_addr: SLIRP_GUEST_IP,
+        target_hardware_addr: EthernetAddress([0; 6]),
+        target_protocol_addr: target_ip,
+    };
+    let eth_repr = EthernetRepr {
+        src_addr: EthernetAddress(GUEST_MAC),
+        dst_addr: EthernetAddress([0xff; 6]),
+        ethertype: EthernetProtocol::Arp,
+    };
+    let total = ETH_HDR_LEN + arp_repr.buffer_len();
+    let mut buf = vec![0u8; total];
+    let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth_repr.emit(&mut eth);
+    let mut arp = ArpPacket::new_unchecked(&mut buf[ETH_HDR_LEN..]);
+    arp_repr.emit(&mut arp);
+    buf
+}
+
+fn parse_arp_reply(frame: &[u8]) -> Option<(EthernetAddress, Ipv4Address)> {
+    let eth = EthernetFrame::new_checked(frame).ok()?;
+    if eth.ethertype() != EthernetProtocol::Arp {
+        return None;
+    }
+    let arp = ArpPacket::new_checked(eth.payload()).ok()?;
+    let repr = ArpRepr::parse(&arp).ok()?;
+    if let ArpRepr::EthernetIpv4 {
+        operation: ArpOperation::Reply,
+        source_hardware_addr,
+        source_protocol_addr,
+        ..
+    } = repr
+    {
+        Some((source_hardware_addr, source_protocol_addr))
+    } else {
+        None
+    }
+}
+
+#[test]
+fn arp_replies_for_gateway() {
+    let mut stack = SlirpStack::new().unwrap();
+    stack
+        .process_guest_frame(&build_arp_request(SLIRP_GATEWAY_IP))
+        .unwrap();
+    let reply = drain_n(&mut stack, 2)
+        .into_iter()
+        .find_map(|f| parse_arp_reply(&f))
+        .expect("arp reply for gateway");
+    assert_eq!(reply.1, SLIRP_GATEWAY_IP);
+    assert_eq!(reply.0, EthernetAddress(GATEWAY_MAC));
+}
+
+#[test]
+fn arp_replies_for_random_subnet_ip() {
+    let mut stack = SlirpStack::new().unwrap();
+    stack
+        .process_guest_frame(&build_arp_request(Ipv4Address::new(10, 0, 2, 99)))
+        .unwrap();
+    let reply = drain_n(&mut stack, 2)
+        .into_iter()
+        .find_map(|f| parse_arp_reply(&f))
+        .expect("arp reply for in-subnet IP");
+    assert_eq!(reply.0, EthernetAddress(GATEWAY_MAC));
+}
+
+#[test]
+fn arp_does_not_reply_for_guest_ip() {
+    let mut stack = SlirpStack::new().unwrap();
+    stack
+        .process_guest_frame(&build_arp_request(SLIRP_GUEST_IP))
+        .unwrap();
+    let reply = drain_n(&mut stack, 2)
+        .into_iter()
+        .find_map(|f| parse_arp_reply(&f));
+    assert!(reply.is_none(), "stack must not claim guest's own IP");
+}
+```
+
+- [ ] **Step 2: Run.** `cargo test --test network_baseline arp_`
+
+- [ ] **Step 3: Commit.**
+
+```bash
+git add tests/network_baseline.rs
+git commit -m "test(network): pin ARP reply behavior for gateway and subnet"
+```
+
+---
+
+### Task 0A.7: Pin DNS cache and forwarding
+
+**Files:**
+- Modify: `tests/network_baseline.rs`
+
+- [ ] **Step 1: Add four DNS tests.** A real recursive resolver is
+  required; tests skip cleanly if no nameserver is reachable.
+
+```rust
+fn build_dns_query(xid: u16, qname: &[u8]) -> Vec<u8> {
+    use void_box::network::slirp::SLIRP_DNS_IP;
+    // Minimal DNS query: header + QNAME + QTYPE=A + QCLASS=IN
+    let mut payload = Vec::new();
+    payload.extend_from_slice(&xid.to_be_bytes()); // ID
+    payload.extend_from_slice(&[0x01, 0x00]); // standard query, RD=1
+    payload.extend_from_slice(&[0x00, 0x01]); // QDCOUNT=1
+    payload.extend_from_slice(&[0x00, 0x00]); // ANCOUNT
+    payload.extend_from_slice(&[0x00, 0x00]); // NSCOUNT
+    payload.extend_from_slice(&[0x00, 0x00]); // ARCOUNT
+    payload.extend_from_slice(qname);
+    payload.extend_from_slice(&[0x00, 0x01]); // QTYPE=A
+    payload.extend_from_slice(&[0x00, 0x01]); // QCLASS=IN
+    build_udp_frame(SLIRP_DNS_IP, GUEST_EPHEMERAL_PORT, 53, &payload)
+}
+
+fn parse_dns_reply_xid(frame: &[u8]) -> Option<u16> {
+    let eth = EthernetFrame::new_checked(frame).ok()?;
+    if eth.ethertype() != EthernetProtocol::Ipv4 {
+        return None;
+    }
+    let ip = Ipv4Packet::new_checked(eth.payload()).ok()?;
+    if ip.next_header() != IpProtocol::Udp {
+        return None;
+    }
+    let udp = UdpPacket::new_checked(ip.payload()).ok()?;
+    if udp.src_port() != 53 {
+        return None;
+    }
+    let p = udp.payload();
+    if p.len() < 2 {
+        return None;
+    }
+    Some(u16::from_be_bytes([p[0], p[1]]))
+}
+
+// `\x07example\x03com\x00`
+const QNAME_EXAMPLE_COM: &[u8] = b"\x07example\x03com\x00";
+
+#[test]
+fn dns_query_resolves() {
+    let mut stack = match SlirpStack::new() {
+        Ok(s) => s,
+        Err(_) => return, // no /etc/resolv.conf; skip
+    };
+    stack
+        .process_guest_frame(&build_dns_query(0x1234, QNAME_EXAMPLE_COM))
+        .unwrap();
+    // Resolution is async on net-poll thread. Drain up to 20× 100ms.
+    let mut got = None;
+    for _ in 0..20 {
+        for f in drain_n(&mut stack, 1) {
+            if let Some(xid) = parse_dns_reply_xid(&f) {
+                got = Some(xid);
+            }
+        }
+        if got.is_some() {
+            break;
+        }
+        std::thread::sleep(std::time::Duration::from_millis(100));
+    }
+    if got.is_none() {
+        eprintln!("skip: no upstream DNS reachable");
+        return;
+    }
+    assert_eq!(got, Some(0x1234));
+}
+
+#[test]
+fn dns_cache_keys_by_question_not_xid() {
+    let mut stack = match SlirpStack::new() {
+        Ok(s) => s,
+        Err(_) => return,
+    };
+    // Warm cache with xid=1.
+    stack
+        .process_guest_frame(&build_dns_query(0x0001, QNAME_EXAMPLE_COM))
+        .unwrap();
+    for _ in 0..20 {
+        let _ = drain_n(&mut stack, 1);
+        std::thread::sleep(std::time::Duration::from_millis(50));
+    }
+    // Query with xid=2 — should hit cache and reply with xid=2.
+    stack
+        .process_guest_frame(&build_dns_query(0x0002, QNAME_EXAMPLE_COM))
+        .unwrap();
+    let frames = drain_n(&mut stack, 4);
+    let xid = frames.iter().find_map(|f| parse_dns_reply_xid(f));
+    if xid.is_none() {
+        eprintln!("skip: cache warmup did not complete");
+        return;
+    }
+    assert_eq!(xid, Some(0x0002), "cache must rewrite xid on hit");
+}
+```
+
+- [ ] **Step 2: Run.**
+
+```bash
+cargo test --test network_baseline dns_
+```
+
+- [ ] **Step 3: Commit.**
+
+```bash
+git add tests/network_baseline.rs
+git commit -m "test(network): pin DNS resolution and cache xid-rewrite"
+```
+
+---
+
+### Task 0A.8: Pin "broken on purpose" — UDP non-DNS dropped
+
+**Files:**
+- Modify: `tests/network_baseline.rs`
+
+- [ ] **Step 1: Write the dropped-on-purpose test.**
+
+```rust
+/// BROKEN_ON_PURPOSE — flips in Phase 2.
+///
+/// Today: UDP datagrams to any port other than 53 are silently
+/// dropped (`slirp.rs:637` "drop silently"). A bound host UDP socket
+/// receives nothing.
+#[test]
+fn udp_non_dns_silently_dropped() {
+    // Bind a host UDP socket; we'll prove nothing arrives.
+    let host_sock = UdpSocket::bind("127.0.0.1:0").unwrap();
+    let host_port = host_sock.local_addr().unwrap().port();
+    host_sock
+        .set_read_timeout(Some(std::time::Duration::from_millis(200)))
+        .unwrap();
+
+    let mut stack = SlirpStack::new().unwrap();
+    stack
+        .process_guest_frame(&build_udp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            b"hello",
+        ))
+        .unwrap();
+    let _ = drain_n(&mut stack, 4);
+
+    let mut buf = [0u8; 32];
+    let received = host_sock.recv(&mut buf).is_ok();
+    assert!(
+        !received,
+        "BROKEN_ON_PURPOSE: today UDP-to-non-53 is dropped. \
+         If this fires, Phase 2 likely landed — flip to assert!(received)."
+    );
+}
+```
+
+- [ ] **Step 2: Run.** `cargo test --test network_baseline udp_non_dns_silently_dropped`
+
+- [ ] **Step 3: Commit.**
+
+```bash
+git add tests/network_baseline.rs
+git commit -m "test(network): BROKEN_ON_PURPOSE pin — UDP non-DNS dropped"
+```
+
+---
+
+### Task 0A.9: Pin "broken on purpose" — ICMP echo dropped
+
+**Files:**
+- Modify: `tests/network_baseline.rs`
+
+- [ ] **Step 1: Write the dropped-on-purpose test.**
+
+```rust
+/// BROKEN_ON_PURPOSE — flips in Phase 1.
+///
+/// Today: ICMP echo requests are silently dropped at
+/// `slirp.rs:637`. Phase 1 adds `IPPROTO_ICMP SOCK_DGRAM` echo
+/// translation.
+#[test]
+fn icmp_echo_silently_dropped() {
+    // Build a minimal ICMP echo request as an IPv4 packet inside an
+    // Ethernet frame. We don't have an `IcmpRepr` builder set up; do
+    // it by hand against smoltcp wire types.
+    use smoltcp::wire::{Icmpv4Packet, Icmpv4Repr};
+
+    let icmp_repr = Icmpv4Repr::EchoRequest {
+        ident: 0xbeef,
+        seq_no: 1,
+        data: b"ping",
+    };
+    let ip_repr = Ipv4Repr {
+        src_addr: SLIRP_GUEST_IP,
+        dst_addr: Ipv4Address::new(8, 8, 8, 8),
+        next_header: IpProtocol::Icmp,
+        payload_len: icmp_repr.buffer_len(),
+        hop_limit: 64,
+    };
+    let eth_repr = EthernetRepr {
+        src_addr: EthernetAddress(GUEST_MAC),
+        dst_addr: EthernetAddress(GATEWAY_MAC),
+        ethertype: EthernetProtocol::Ipv4,
+    };
+    let total = ETH_HDR_LEN + ip_repr.buffer_len() + icmp_repr.buffer_len();
+    let mut buf = vec![0u8; total];
+    let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth_repr.emit(&mut eth);
+    let mut ip = Ipv4Packet::new_unchecked(&mut buf[ETH_HDR_LEN..]);
+    ip_repr.emit(&mut ip, &Default::default());
+    let mut icmp = Icmpv4Packet::new_unchecked(
+        &mut buf[ETH_HDR_LEN + ip_repr.buffer_len()..],
+    );
+    icmp_repr.emit(&mut icmp, &Default::default());
+
+    let mut stack = SlirpStack::new().unwrap();
+    stack.process_guest_frame(&buf).unwrap();
+    let frames = drain_n(&mut stack, 4);
+
+    let saw_icmp_reply = frames.iter().any(|f| {
+        EthernetFrame::new_checked(f.as_slice())
+            .ok()
+            .and_then(|e| {
+                if e.ethertype() != EthernetProtocol::Ipv4 {
+                    return None;
+                }
+                Ipv4Packet::new_checked(e.payload()).ok().map(|ip| {
+                    ip.next_header() == IpProtocol::Icmp
+                        && ip.dst_addr() == SLIRP_GUEST_IP
+                })
+            })
+            .unwrap_or(false)
+    });
+    assert!(
+        !saw_icmp_reply,
+        "BROKEN_ON_PURPOSE: today ICMP echo is dropped. \
+         Phase 1 should flip this to assert!(saw_icmp_reply)."
+    );
+}
+```
+
+- [ ] **Step 2: Run.** `cargo test --test network_baseline icmp_echo_silently_dropped`
+
+- [ ] **Step 3: Commit.**
+
+```bash
+git add tests/network_baseline.rs
+git commit -m "test(network): BROKEN_ON_PURPOSE pin — ICMP echo dropped"
+```
+
+---
+
+## Workstream 0B — divan microbenches (`benches/network.rs`)
+
+### Task 0B.1: Bench file scaffolding + first three benches
+
+**Files:**
+- Create: `benches/network.rs`
+- Modify: `Cargo.toml` (register `[[bench]] name = "network"`)
+
+- [ ] **Step 1: Create the bench file.**
+
+```rust
+//! Divan micro-benchmarks for SLIRP hot paths.
+//!
+//! Mirrors `benches/startup.rs` in shape. Job: regression detection
+//! for the per-packet hot path on the vCPU and net-poll threads.
+//!
+//! Run with: `cargo bench --bench network`
+
+#![cfg(target_os = "linux")]
+
+use divan::Bencher;
+use smoltcp::wire::{
+    EthernetAddress, EthernetFrame, EthernetProtocol, EthernetRepr, IpProtocol, Ipv4Address,
+    Ipv4Packet, Ipv4Repr, TcpControl, TcpPacket, TcpRepr,
+};
+use void_box::network::slirp::{
+    SlirpStack, GATEWAY_MAC, GUEST_MAC, SLIRP_GATEWAY_IP, SLIRP_GUEST_IP,
+};
+
+fn main() {
+    divan::main();
+}
+
+fn build_syn(src_port: u16, dst_port: u16) -> Vec<u8> {
+    let tcp = TcpRepr {
+        src_port,
+        dst_port,
+        control: TcpControl::Syn,
+        seq_number: smoltcp::wire::TcpSeqNumber(1000),
+        ack_number: None,
+        window_len: 65535,
+        window_scale: None,
+        max_seg_size: None,
+        sack_permitted: false,
+        sack_ranges: [None, None, None],
+        timestamp: None,
+        payload: &[],
+    };
+    let ip = Ipv4Repr {
+        src_addr: SLIRP_GUEST_IP,
+        dst_addr: SLIRP_GATEWAY_IP,
+        next_header: IpProtocol::Tcp,
+        payload_len: tcp.buffer_len(),
+        hop_limit: 64,
+    };
+    let eth = EthernetRepr {
+        src_addr: EthernetAddress(GUEST_MAC),
+        dst_addr: EthernetAddress(GATEWAY_MAC),
+        ethertype: EthernetProtocol::Ipv4,
+    };
+    let total = 14 + ip.buffer_len() + tcp.buffer_len();
+    let mut buf = vec![0u8; total];
+    let mut e = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth.emit(&mut e);
+    let mut ipp = Ipv4Packet::new_unchecked(&mut buf[14..]);
+    ip.emit(&mut ipp, &Default::default());
+    let mut tcpp = TcpPacket::new_unchecked(&mut buf[14 + ip.buffer_len()..]);
+    tcp.emit(
+        &mut tcpp,
+        &smoltcp::wire::IpAddress::Ipv4(SLIRP_GUEST_IP),
+        &smoltcp::wire::IpAddress::Ipv4(SLIRP_GATEWAY_IP),
+        &Default::default(),
+    );
+    buf
+}
+
+#[divan::bench]
+fn process_syn(bencher: Bencher) {
+    let frame = build_syn(49152, 1);
+    bencher.bench_local(|| {
+        let mut stack = SlirpStack::new().unwrap();
+        let _ = stack.process_guest_frame(divan::black_box(&frame));
+    });
+}
+
+#[divan::bench]
+fn poll_idle(bencher: Bencher) {
+    let mut stack = SlirpStack::new().unwrap();
+    bencher.bench_local(|| {
+        let _ = divan::black_box(&mut stack).poll();
+    });
+}
+
+#[divan::bench]
+fn process_arp_request(bencher: Bencher) {
+    use smoltcp::wire::{ArpOperation, ArpPacket, ArpRepr};
+    let arp_repr = ArpRepr::EthernetIpv4 {
+        operation: ArpOperation::Request,
+        source_hardware_addr: EthernetAddress(GUEST_MAC),
+        source_protocol_addr: SLIRP_GUEST_IP,
+        target_hardware_addr: EthernetAddress([0; 6]),
+        target_protocol_addr: SLIRP_GATEWAY_IP,
+    };
+    let eth = EthernetRepr {
+        src_addr: EthernetAddress(GUEST_MAC),
+        dst_addr: EthernetAddress([0xff; 6]),
+        ethertype: EthernetProtocol::Arp,
+    };
+    let total = 14 + arp_repr.buffer_len();
+    let mut buf = vec![0u8; total];
+    let mut e = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth.emit(&mut e);
+    let mut a = ArpPacket::new_unchecked(&mut buf[14..]);
+    arp_repr.emit(&mut a);
+
+    bencher.bench_local(|| {
+        let mut stack = SlirpStack::new().unwrap();
+        let _ = stack.process_guest_frame(divan::black_box(&buf));
+    });
+}
+```
+
+- [ ] **Step 2: Register in `Cargo.toml`.**
+
+```toml
+[[bench]]
+name = "network"
+path = "benches/network.rs"
+harness = false
+```
+
+- [ ] **Step 3: Build and run.**
+
+```bash
+cargo bench --bench network --no-run
+cargo bench --bench network process_syn
+```
+
+Expected: divan prints timing, e.g. `process_syn  fastest=…us`.
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add benches/network.rs Cargo.toml
+git commit -m "bench(network): divan microbenches for SLIRP hot paths"
+```
+
+---
+
+### Task 0B.2: Parametric NAT-walk scaling bench
+
+**Files:**
+- Modify: `benches/network.rs`
+
+- [ ] **Step 1: Add the parametric bench.** Append:
+
+```rust
+/// Open `n` distinct guest→gateway flows, then time `poll()`.
+/// This walks the NAT table — `O(n)` today; the unified flow table
+/// in Phase 4 should keep it `O(n)` but with smaller constants.
+#[divan::bench(args = [1, 100, 1000])]
+fn poll_with_n_flows(bencher: Bencher, n: usize) {
+    let mut stack = SlirpStack::new().unwrap();
+    for i in 0..n {
+        let frame = build_syn(49152u16.wrapping_add(i as u16), 1);
+        let _ = stack.process_guest_frame(&frame);
+    }
+    bencher.bench_local(|| {
+        let _ = divan::black_box(&mut stack).poll();
+    });
+}
+```
+
+- [ ] **Step 2: Run.**
+
+```bash
+cargo bench --bench network poll_with_n_flows
+```
+
+- [ ] **Step 3: Commit.**
+
+```bash
+git add benches/network.rs
+git commit -m "bench(network): parametric NAT-walk scaling at 1/100/1000 flows"
+```
+
+---
+
+### Task 0B.3: DNS cache hit/miss benches
+
+**Files:**
+- Modify: `benches/network.rs`
+
+- [ ] **Step 1: Append DNS benches.**
+
+```rust
+fn build_dns_query_for_bench(xid: u16) -> Vec<u8> {
+    use smoltcp::wire::{UdpPacket, UdpRepr};
+    use void_box::network::slirp::SLIRP_DNS_IP;
+    let mut payload = Vec::new();
+    payload.extend_from_slice(&xid.to_be_bytes());
+    payload.extend_from_slice(&[0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
+    payload.extend_from_slice(b"\x07example\x03com\x00");
+    payload.extend_from_slice(&[0x00, 0x01, 0x00, 0x01]);
+
+    let udp_repr = UdpRepr {
+        src_port: 49152,
+        dst_port: 53,
+    };
+    let ip_repr = Ipv4Repr {
+        src_addr: SLIRP_GUEST_IP,
+        dst_addr: SLIRP_DNS_IP,
+        next_header: IpProtocol::Udp,
+        payload_len: 8 + payload.len(),
+        hop_limit: 64,
+    };
+    let eth = EthernetRepr {
+        src_addr: EthernetAddress(GUEST_MAC),
+        dst_addr: EthernetAddress(GATEWAY_MAC),
+        ethertype: EthernetProtocol::Ipv4,
+    };
+    let total = 14 + ip_repr.buffer_len() + 8 + payload.len();
+    let mut buf = vec![0u8; total];
+    let mut e = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth.emit(&mut e);
+    let mut ip = Ipv4Packet::new_unchecked(&mut buf[14..]);
+    ip_repr.emit(&mut ip, &Default::default());
+    let mut udp = UdpPacket::new_unchecked(&mut buf[14 + ip_repr.buffer_len()..]);
+    udp_repr.emit(
+        &mut udp,
+        &smoltcp::wire::IpAddress::Ipv4(SLIRP_GUEST_IP),
+        &smoltcp::wire::IpAddress::Ipv4(SLIRP_DNS_IP),
+        8 + payload.len(),
+        |b| b.copy_from_slice(&payload),
+        &Default::default(),
+    );
+    buf
+}
+
+#[divan::bench]
+fn dns_cache_miss(bencher: Bencher) {
+    let frame = build_dns_query_for_bench(1);
+    bencher.bench_local(|| {
+        let mut stack = SlirpStack::new().unwrap();
+        let _ = stack.process_guest_frame(divan::black_box(&frame));
+    });
+}
+
+#[divan::bench]
+fn dns_cache_hit(bencher: Bencher) {
+    // Warm cache by injecting one query and polling resolution.
+    let mut stack = SlirpStack::new().unwrap();
+    let warm = build_dns_query_for_bench(1);
+    let _ = stack.process_guest_frame(&warm);
+    for _ in 0..20 {
+        let _ = stack.poll();
+        std::thread::sleep(std::time::Duration::from_millis(50));
+    }
+    let hit = build_dns_query_for_bench(2);
+    bencher.bench_local(|| {
+        let _ = divan::black_box(&mut stack).process_guest_frame(divan::black_box(&hit));
+    });
+}
+```
+
+- [ ] **Step 2: Run.** `cargo bench --bench network dns_`
+
+- [ ] **Step 3: Commit.**
+
+```bash
+git add benches/network.rs
+git commit -m "bench(network): DNS cache hit and miss paths"
+```
+
+---
+
+### Task 0B.4: Wire CI extension
+
+**Files:**
+- Modify: `.github/workflows/startup-bench.yml` (add a `network` step)
+
+- [ ] **Step 1: Read the existing workflow** to learn the regression
+  threshold mechanism.
+
+```bash
+cat .github/workflows/startup-bench.yml
+```
+
+- [ ] **Step 2: Add a parallel job/step** that runs
+  `cargo bench --bench network` and compares against `main` baseline
+  using the same mechanism the startup bench uses. Concrete diff
+  depends on what's already there — match the pattern; do not
+  duplicate infrastructure.
+
+- [ ] **Step 3: Push to a feature branch and verify the workflow
+  runs.** If the divan output format the existing workflow expects
+  doesn't match, adjust the workflow rather than divan output (divan
+  has a single canonical JSON format; rely on it).
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add .github/workflows/startup-bench.yml
+git commit -m "ci(bench): include network microbenches in regression gate"
+```
+
+---
+
+## Workstream 0C — Wall-clock e2e harness (`voidbox-network-bench`)
+
+### Task 0C.1: Binary scaffold
+
+**Files:**
+- Create: `src/bin/voidbox-network-bench/main.rs`
+- Modify: `Cargo.toml` (register `[[bin]] name = "voidbox-network-bench"`)
+
+- [ ] **Step 1: Create the binary scaffold.**
+
+```rust
+//! Wall-clock end-to-end network benchmark harness.
+//!
+//! Boots a real VM and measures TCP throughput, RR/CRR latency, and
+//! UDP DNS qps inside the guest. Output is JSON for diffing against
+//! a baseline.
+//!
+//! Mirrors `voidbox-startup-bench` in CLI shape and lifecycle.
+//!
+//! Linux-only because the smoltcp-based SLIRP stack is Linux-only.
+
+#![cfg(target_os = "linux")]
+
+use clap::Parser;
+use serde::Serialize;
+use std::path::PathBuf;
+use std::time::Duration;
+
+#[derive(Parser, Debug)]
+#[command(version, about = "VoidBox network benchmark harness")]
+struct Cli {
+    /// Number of iterations per metric.
+    #[arg(long, default_value_t = 5)]
+    iterations: u32,
+
+    /// Output JSON file. If omitted, prints to stdout.
+    #[arg(long)]
+    output: Option<PathBuf>,
+
+    /// Skip throughput measurements (useful for fast smoke runs).
+    #[arg(long, default_value_t = false)]
+    no_throughput: bool,
+}
+
+#[derive(Serialize, Debug, Default)]
+struct Report {
+    tcp_throughput_g2h_mbps: Option<f64>,
+    tcp_throughput_h2g_mbps: Option<f64>,
+    tcp_rr_latency_us_p50: Option<f64>,
+    tcp_rr_latency_us_p99: Option<f64>,
+    tcp_crr_latency_us_p50: Option<f64>,
+    udp_dns_qps: Option<f64>,
+    icmp_rr_latency_us_p50: Option<f64>, // None today; populated post-Phase-1
+}
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let cli = Cli::parse();
+    let mut report = Report::default();
+
+    eprintln!("voidbox-network-bench: scaffold (no measurements yet)");
+    let _ = (cli.iterations, &cli.output, cli.no_throughput, &mut report);
+
+    let json = serde_json::to_string_pretty(&report)?;
+    match cli.output {
+        Some(path) => std::fs::write(path, json)?,
+        None => println!("{json}"),
+    }
+    Ok(())
+}
+
+#[allow(dead_code)]
+fn percentile(samples: &mut [Duration], p: f64) -> Duration {
+    samples.sort();
+    let idx = ((samples.len() as f64) * p).clamp(0.0, samples.len() as f64 - 1.0) as usize;
+    samples[idx]
+}
+```
+
+- [ ] **Step 2: Register in `Cargo.toml`.**
+
+```toml
+[[bin]]
+name = "voidbox-network-bench"
+path = "src/bin/voidbox-network-bench/main.rs"
+```
+
+- [ ] **Step 3: Build.**
+
+```bash
+cargo build --bin voidbox-network-bench
+```
+
+- [ ] **Step 4: Smoke run.**
+
+```bash
+cargo run --bin voidbox-network-bench
+```
+
+Expected: prints JSON with all `null` fields.
+
+- [ ] **Step 5: Commit.**
+
+```bash
+git add src/bin/voidbox-network-bench Cargo.toml
+git commit -m "bench(network): voidbox-network-bench binary scaffold"
+```
+
+---
+
+### Task 0C.2: TCP throughput measurement
+
+**Files:**
+- Modify: `src/bin/voidbox-network-bench/main.rs`
+
+- [ ] **Step 1: Read the existing startup-bench harness** to learn
+  the VM lifecycle pattern.
+
+```bash
+# Use LSP `documentSymbol` on src/bin/voidbox-startup-bench/main.rs
+# to map its functions, then read the run loop.
+```
+
+- [ ] **Step 2: Implement `measure_tcp_throughput`** that:
+  1. Starts a host-side iperf3 server (or a Rust echo loop on a
+     TCP socket).
+  2. Boots a VM whose initramfs includes `iperf3`.
+  3. Execs `iperf3 -c 10.0.2.2 -t 5 -p <port> --json` inside the
+     guest via the existing `ControlChannel::exec`.
+  4. Parses the JSON, extracts bits-per-second, returns Mbps.
+  5. Stops the VM.
+- [ ] **Step 3:** Wire the function into `main` for both directions
+  (g2h, h2g) and populate `report.tcp_throughput_*`.
+- [ ] **Step 4: Smoke run.**
+
+```bash
+cargo run --bin voidbox-network-bench -- --iterations 1
+```
+
+- [ ] **Step 5: Commit.**
+
+```bash
+git add src/bin/voidbox-network-bench/main.rs
+git commit -m "bench(network): TCP throughput via iperf3 inside VM"
+```
+
+> **Note for the implementer:** the test image
+> (`/tmp/void-box-test-rootfs.cpio.gz`) does not include `iperf3` by
+> default. Either extend `scripts/build_test_image.sh` to include it,
+> or write a hand-rolled echo loop in Rust that ships with the
+> harness. The latter is simpler and recommended — see passt's
+> `test/perf/` for the methodology to copy.
+
+---
+
+### Task 0C.3: RR / CRR latency
+
+**Files:**
+- Modify: `src/bin/voidbox-network-bench/main.rs`
+
+- [ ] **Step 1: Implement `measure_rr_latency`** — open a TCP echo
+  socket on the host, run a guest-side loop that does
+  `connect+send+recv+close` (CRR) or `send+recv` on a kept-open
+  connection (RR), record `iterations` samples, return p50/p99 in µs.
+- [ ] **Step 2:** Wire into `main`. Populate
+  `report.tcp_rr_latency_us_*` and `report.tcp_crr_latency_us_p50`.
+- [ ] **Step 3: Run.**
+
+```bash
+cargo run --bin voidbox-network-bench -- --iterations 100 --no-throughput
+```
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add src/bin/voidbox-network-bench/main.rs
+git commit -m "bench(network): TCP RR/CRR latency p50/p99"
+```
+
+---
+
+### Task 0C.4: UDP DNS qps + JSON baseline
+
+**Files:**
+- Modify: `src/bin/voidbox-network-bench/main.rs`
+
+- [ ] **Step 1: Implement `measure_dns_qps`** — guest-side loop
+  resolving `example.com` against the SLIRP DNS at 10.0.2.3, count
+  successful replies in a fixed window, divide.
+- [ ] **Step 2:** Wire into `main`, populate `report.udp_dns_qps`.
+- [ ] **Step 3: Run** with `--output baseline.json` and inspect:
+
+```bash
+cargo run --bin voidbox-network-bench -- --output baseline.json
+cat baseline.json
+```
+
+- [ ] **Step 4: Commit and stash a `baseline.json`** as a build
+  artifact (do **not** commit it — it's machine-specific). Document
+  in the binary's `--help` output how to use it for diffing.
+
+```bash
+git add src/bin/voidbox-network-bench/main.rs
+git commit -m "bench(network): UDP DNS qps and JSON report output"
+```
+
+---
+
+## Workstream 0D — Trait extraction + rename
+
+### Task 0D.1: Define `NetworkBackend` trait
+
+**Files:**
+- Modify: `src/network/mod.rs`
+
+- [ ] **Step 1: Use LSP `documentSymbol`** on `src/network/mod.rs` to
+  confirm where to insert the trait (after `NetworkConfig`, before
+  `TapDevice`).
+- [ ] **Step 2: Add the trait.**
+
+```rust
+use std::io;
+
+/// A network backend processes raw Ethernet frames between guest and
+/// host.
+///
+/// Implementations must be `Send` so they can be held behind
+/// `Arc<Mutex<_>>` and accessed from both the vCPU thread (TX path)
+/// and the net-poll thread (RX path).
+pub trait NetworkBackend: Send {
+    /// Process a raw Ethernet frame sent by the guest.
+    ///
+    /// Called from the vCPU thread on MMIO write to the TX virtqueue.
+    /// Implementations must not block.
+    fn process_guest_frame(&mut self, frame: &[u8]) -> io::Result<()>;
+
+    /// Drain Ethernet frames destined for the guest into `out`.
+    ///
+    /// Called every ~5ms from the net-poll thread. Frames are
+    /// complete Ethernet payloads — no virtio-net header (the caller
+    /// prepends that). The buffer is reused across calls to avoid
+    /// per-poll allocation.
+    fn drain_to_guest(&mut self, out: &mut Vec<Vec<u8>>);
+
+    /// Backend health.
+    ///
+    /// `false` means the backend has entered an unrecoverable state
+    /// and should be reconstructed by the caller. The default
+    /// implementation always returns `true`.
+    fn is_healthy(&self) -> bool {
+        true
+    }
+}
+```
+
+> **Apply `rustdoc` skill:** confirm the doc comment style — summary
+> sentence first, no leading "This trait …", `# Errors` /
+> `# Panics` if applicable. The above complies.
+
+- [ ] **Step 3: Build.** `cargo check --target-dir target/check`
+- [ ] **Step 4: Commit.**
+
+```bash
+git add src/network/mod.rs
+git commit -m "feat(network): introduce NetworkBackend trait"
+```
+
+---
+
+### Task 0D.2: Tighten `SlirpStack::poll` to `drain_to_guest` signature
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Use LSP `findReferences`** on `SlirpStack::poll` to
+  list every call site — these all need to switch to
+  `drain_to_guest(&mut out)`.
+
+```bash
+# Inside the IDE / via LSP:
+#   goToDefinition on `poll` → 392
+#   findReferences  on `poll` → list all callers
+```
+
+- [ ] **Step 2: Add the new method on `SlirpStack`** (do not yet
+  remove `poll` — keep both during the rename to keep the build
+  green).
+
+```rust
+/// Drain frames destined to the guest into `out`. Reuses the buffer
+/// across calls. See `NetworkBackend::drain_to_guest`.
+pub fn drain_to_guest(&mut self, out: &mut Vec<Vec<u8>>) {
+    out.append(&mut self.poll());
+}
+```
+
+This is a thin wrapper for now — the real allocation drop happens in
+**Task 0D.3** when the `poll` body moves into `drain_to_guest`.
+
+- [ ] **Step 3: Build.** `cargo check`
+- [ ] **Step 4: Commit.**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): add drain_to_guest wrapper for trait fit"
+```
+
+---
+
+### Task 0D.3: Move `poll` body into `drain_to_guest`, drop the per-call alloc
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Use LSP `goToDefinition`** on
+  `SlirpStack::poll` (around line 392) to land on its body.
+- [ ] **Step 2: Refactor.** Move the body of `poll` into
+  `drain_to_guest`, replacing every `self.inject_to_guest.drain(..)`
+  / `Vec::new()` allocation with appends to `out`.
+
+Before:
+
+```rust
+pub fn poll(&mut self) -> Vec<Vec<u8>> {
+    // ... existing body that builds and returns Vec<Vec<u8>>
+}
+
+pub fn drain_to_guest(&mut self, out: &mut Vec<Vec<u8>>) {
+    out.append(&mut self.poll());
+}
+```
+
+After:
+
+```rust
+pub fn drain_to_guest(&mut self, out: &mut Vec<Vec<u8>>) {
+    // ... body that pushes into `out` directly
+}
+
+#[deprecated(note = "use drain_to_guest")]
+pub fn poll(&mut self) -> Vec<Vec<u8>> {
+    let mut out = Vec::new();
+    self.drain_to_guest(&mut out);
+    out
+}
+```
+
+The deprecated `poll` keeps the existing tests/benches working while
+0D.4 migrates callers.
+
+- [ ] **Step 3: Build and run baseline tests.**
+
+```bash
+cargo check
+cargo test --test network_baseline
+```
+
+Expected: all baseline pins still green. The deprecation warning
+fires from the test file — that's intended; tests migrate in 0D.6.
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): move poll body into drain_to_guest, drop alloc"
+```
+
+---
+
+### Task 0D.4: `impl NetworkBackend for SlirpStack`
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Add the impl.** Use the existing methods (return type
+  for `process_guest_frame` is `Result` — the trait wants
+  `io::Result`; bridge in the impl).
+
+```rust
+use crate::network::NetworkBackend;
+use std::io;
+
+impl NetworkBackend for SlirpStack {
+    fn process_guest_frame(&mut self, frame: &[u8]) -> io::Result<()> {
+        SlirpStack::process_guest_frame(self, frame)
+            .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))
+    }
+
+    fn drain_to_guest(&mut self, out: &mut Vec<Vec<u8>>) {
+        SlirpStack::drain_to_guest(self, out)
+    }
+}
+```
+
+> **Apply `rust-style` skill:** the closure can be a function-pointer
+> reference if `e.to_string()` works without arguments — but
+> `Error::to_string` takes `&self`, so the closure form is correct.
+> The trait method names shadow the inherent names; explicit
+> `SlirpStack::method(self, …)` disambiguates per project convention.
+
+- [ ] **Step 2: Build.** `cargo check`
+- [ ] **Step 3: Sanity test.**
+
+```rust
+// In tests/network_baseline.rs, behind the existing module, append:
+#[test]
+fn smoltcp_backend_implements_network_backend() {
+    fn assert_send<T: Send>() {}
+    fn assert_backend<T: NetworkBackend>() {}
+    assert_send::<SlirpStack>();
+    assert_backend::<SlirpStack>();
+}
+```
+
+```bash
+cargo test --test network_baseline smoltcp_backend_implements_network_backend
+```
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add src/network/slirp.rs tests/network_baseline.rs
+git commit -m "feat(slirp): impl NetworkBackend for SlirpStack"
+```
+
+---
+
+### Task 0D.5: Switch `VirtioNetDevice` to hold `Arc<Mutex<dyn NetworkBackend>>`
+
+**Files:**
+- Modify: `src/devices/virtio_net.rs`
+
+- [ ] **Step 1: Use LSP `documentSymbol`** on
+  `src/devices/virtio_net.rs` to map its struct + methods.
+- [ ] **Step 2: Use LSP `findReferences`** on the field that today
+  holds `Arc<Mutex<SlirpStack>>` to know all the access sites.
+- [ ] **Step 3: Apply `rust-analyzer-ssr`** to change
+  `Arc<Mutex<SlirpStack>>` → `Arc<Mutex<dyn NetworkBackend>>`
+  workspace-wide. SSR pattern (run from project root):
+
+```bash
+# From the LSP shell or via the `rust-analyzer-ssr` skill:
+#   pattern: Arc<Mutex<SlirpStack>>
+#   replace: Arc<Mutex<dyn NetworkBackend>>
+```
+
+- [ ] **Step 4: Update method bodies that called `poll()`** to call
+  `drain_to_guest(&mut buf)` against a reused buffer field.
+
+Before:
+
+```rust
+let frames = self.slirp.lock().unwrap().poll();
+for frame in frames { /* ... */ }
+```
+
+After:
+
+```rust
+self.rx_scratch.clear();
+self.slirp.lock().unwrap().drain_to_guest(&mut self.rx_scratch);
+for frame in self.rx_scratch.drain(..) { /* ... */ }
+```
+
+Add `rx_scratch: Vec<Vec<u8>>` to the struct, default-initialized.
+
+- [ ] **Step 5: Build + tests.**
+
+```bash
+cargo check
+cargo test --test network_baseline
+```
+
+- [ ] **Step 6: Commit.**
+
+```bash
+git add src/devices/virtio_net.rs
+git commit -m "refactor(virtio_net): hold dyn NetworkBackend, reuse rx buffer"
+```
+
+---
+
+### Task 0D.6: Update VMM construction sites (cold-boot + snapshot-restore)
+
+**Files:**
+- Modify: `src/vmm/mod.rs`
+
+- [ ] **Step 1: Use LSP `findReferences`** on `SlirpStack::new` and
+  `SlirpStack::with_security` to find every construction site.
+  Expect two: cold boot (around `Vm::new`) and snapshot restore
+  (around `restore`). Confirm via the file's `documentSymbol`.
+
+- [ ] **Step 2: Wrap each construction in `Arc<Mutex<…>>`** and bind
+  the variable type as `Arc<Mutex<dyn NetworkBackend>>`:
+
+```rust
+let backend: Arc<Mutex<dyn NetworkBackend>> = Arc::new(Mutex::new(
+    SlirpStack::with_security(max_conn, max_rate, deny.clone())?,
+));
+```
+
+- [ ] **Step 3: Build + tests.**
+
+```bash
+cargo check
+cargo test --workspace --all-features
+```
+
+- [ ] **Step 4: Run the LSP `workspaceSymbol`** lookup for any
+  remaining `SlirpStack` references that should now be hidden behind
+  the trait. Anything outside `src/network/` and the construction
+  sites is suspect.
+
+- [ ] **Step 5: Commit.**
+
+```bash
+git add src/vmm/mod.rs
+git commit -m "refactor(vmm): construct network backend behind dyn trait"
+```
+
+---
+
+### Task 0D.7: Rename `SlirpStack → SlirpBackend`
+
+**Files:**
+- Modify: `src/network/slirp.rs`, `tests/network_baseline.rs`,
+  `benches/network.rs`, `src/devices/virtio_net.rs`,
+  `src/vmm/mod.rs`, any other references LSP turns up.
+
+The module file `src/network/slirp.rs` keeps its name — only the
+type is renamed. (The current filename already aligns with the new
+type name, and matches the convention used elsewhere in the repo:
+`src/devices/virtio_net.rs` holds `VirtioNetDevice`, not a
+`virtio_net_device.rs` file.)
+
+- [ ] **Step 1: Use LSP rename** (`rust-analyzer` rename refactor) on
+  `SlirpStack` → `SlirpBackend`. **Do not text-substitute** — the
+  rename also touches `tests/network_baseline.rs` imports, the
+  `benches/network.rs` imports, and any `pub use` re-exports.
+
+- [ ] **Step 2: Build + run all tests.**
+
+```bash
+cargo check
+cargo test --workspace --all-features
+cargo test --test network_baseline
+```
+
+- [ ] **Step 3: Final build.** `cargo check`
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add -A
+git commit -m "refactor(network): rename SlirpStack to SlirpBackend"
+```
+
+---
+
+## Workstream 0E — Validation + ship
+
+### Task 0E.1: Full validation gate
+
+**Files:** none
+
+- [ ] **Step 1: Format + clippy.**
+
+```bash
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+```
+
+- [ ] **Step 2: Workspace tests.**
+
+```bash
+cargo test --workspace --all-features
+cargo test --doc --workspace --all-features
+```
+
+- [ ] **Step 3: Network baseline.**
+
+```bash
+cargo test --test network_baseline
+```
+
+Expected: all tests pass, including the three `BROKEN_ON_PURPOSE`
+pins (they assert *broken* behavior — green is correct).
+
+- [ ] **Step 4: Microbenches no-regression.**
+
+```bash
+cargo bench --bench network
+```
+
+Compare against `main` baseline (CI does this automatically; do it
+locally first).
+
+- [ ] **Step 5: VM suites that touch networking.**
+
+```bash
+export VOID_BOX_KERNEL=/boot/vmlinuz-$(uname -r)
+scripts/build_test_image.sh
+export VOID_BOX_INITRAMFS=/tmp/void-box-test-rootfs.cpio.gz
+cargo test --test conformance -- --ignored --test-threads=1
+cargo test --test snapshot_integration -- --ignored --test-threads=1
+cargo test --test e2e_skill_pipeline -- --ignored --test-threads=1
+cargo test --test e2e_mount -- --ignored --test-threads=1
+```
+
+- [ ] **Step 6: Repo `verify` skill.** Run the project's quality
+  gate (`/verify`) — format, clippy, tests, security audit, startup
+  bench regression, real-workload smoke.
+
+- [ ] **Step 7: aarch64 cross-check** (per `AGENTS.md`).
+
+```bash
+CFLAGS_aarch64_unknown_linux_gnu="--sysroot=/usr/aarch64-redhat-linux/sys-root/fc43" \
+  RUSTFLAGS="-D warnings" \
+  cargo check --target aarch64-unknown-linux-gnu -p void-box --lib --tests
+```
+
+- [ ] **Step 8: macOS build smoke** (if a macOS box is available, or
+  via CI). The trait extraction must not break the macOS build —
+  `NetworkBackend` lives in `src/network/mod.rs` (cross-platform);
+  the `SmoltcpBackend` impl is gated `#[cfg(target_os = "linux")]`.
+
+- [ ] **Step 9:** If any gate fails, fix in place and re-run from
+  Step 1. Do not proceed to PR until all gates green.
+
+---
+
+### Task 0E.2: Open the PR
+
+**Files:** none
+
+- [ ] **Step 1: Push the branch.**
+
+```bash
+git push -u origin smoltcp-passt-port-phase0
+```
+
+- [ ] **Step 2: Open the PR** with body:
+
+```markdown
+## Phase 0: baseline + NetworkBackend trait
+
+Implements Phase 0 of `docs/superpowers/plans/2026-04-27-smoltcp-passt-port.md`.
+
+**Zero user-visible behavior change.** This PR lands:
+
+- `tests/network_baseline.rs` — 13 unit-level pins for the smoltcp-based
+  SLIRP stack, including three deliberately-broken assertions that
+  flip in Phases 1, 2, 3.
+- `benches/network.rs` — divan microbenches for SLIRP hot paths
+  (process_syn, poll_idle, NAT-walk scaling, DNS cache hit/miss).
+- `voidbox-network-bench` — wall-clock e2e harness with metric names
+  matching passt's published table.
+- `NetworkBackend` trait in `src/network/mod.rs`.
+- `SlirpStack` renamed to `SlirpBackend` (role-based name,
+  symmetric with future `TapBackend`/`VhostNetBackend`); `poll`
+  replaced by `drain_to_guest(&mut Vec<Vec<u8>>)` to drop the
+  per-poll allocation.
+
+## Test plan
+
+- [x] cargo fmt / clippy clean
+- [x] cargo test --workspace --all-features
+- [x] cargo test --test network_baseline
+- [x] cargo bench --bench network — no regression
+- [x] conformance, snapshot_integration, e2e_skill_pipeline,
+      e2e_mount green
+- [x] aarch64 cross-check green
+- [x] macOS build smoke green
+- [x] /verify clean
+
+## Broken on purpose
+
+These three baseline pins assert today's broken behavior. They flip
+in subsequent phases — do not "fix" them in this PR:
+
+- `tcp_to_host_buffer_drops_at_256kb` (flips in Phase 3)
+- `udp_non_dns_silently_dropped` (flips in Phase 2)
+- `icmp_echo_silently_dropped` (flips in Phase 1)
+```
+
+- [ ] **Step 3: Tag for review.** Phase 0 is mechanical; the trait
+  shape is the only design decision worth a second pair of eyes.
+
+---
+
+## Self-review checklist (run before handing off)
+
+- [ ] Every task has explicit file paths, exact commands, expected
+  output.
+- [ ] No `TBD`, no "implement appropriately", no "similar to Task N"
+  without repeating the code.
+- [ ] Three `BROKEN_ON_PURPOSE` pins are present (Tasks 0A.4, 0A.8,
+  0A.9) and each names the phase that flips it.
+- [ ] Trait surface in 0D.1 matches the spec doc exactly
+  (`drain_to_guest` out-param, `is_healthy` default-true).
+- [ ] Rename in 0D.7 uses LSP rename (rust-analyzer-ssr), not text
+  substitution. Type renames to `SlirpBackend` (role-based, not
+  `SmoltcpBackend`).
+- [ ] Validation gate in 0E.1 covers fmt, clippy, workspace tests,
+  baseline tests, microbenches, VM suites, aarch64 cross-check,
+  macOS smoke.
+- [ ] All Rust-touching tasks reference `rust-style` / `rustdoc` /
+  `rust-analyzer-ssr` where they apply.
diff --git a/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase1.md b/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase1.md
new file mode 100644
index 00000000..668d06eb
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase1.md
@@ -0,0 +1,663 @@
+# Phase 1 Implementation Plan: ICMP Echo via Unprivileged SOCK_DGRAM IPPROTO_ICMP
+
+> **For agentic workers:** REQUIRED SUB-SKILL: superpowers:subagent-driven-development.
+> Steps use checkbox (`- [ ]`) syntax for tracking.
+>
+> **Mandatory skills for every Rust-touching task:**
+> `rust-style`, `rustdoc`, `rust-analyzer-ssr`,
+> `superpowers:test-driven-development`,
+> `superpowers:verification-before-completion`. Use LSP for navigation.
+
+**Spec:** [`2026-04-27-smoltcp-passt-port.md`](2026-04-27-smoltcp-passt-port.md)
+**Continues from Phase 0:** [`2026-04-27-smoltcp-passt-port-phase0.md`](2026-04-27-smoltcp-passt-port-phase0.md)
+
+**Goal:** Make `ping` work inside guest VMs by relaying ICMP echo
+through an unprivileged host kernel socket (`SOCK_DGRAM IPPROTO_ICMP`),
+in the style of passt's `icmp.c`. Flip the `icmp_echo_silently_dropped`
+BROKEN_ON_PURPOSE pin to assert the new behavior.
+
+**Architecture:** New `IcmpEchoEntry` per `(guest_id, dst_ip)` flow.
+Each entry owns one `IPPROTO_ICMP` `SOCK_DGRAM` socket. `handle_icmp_frame`
+sends echo requests through the socket; `relay_icmp_echo` polls socket
+replies and emits ICMP echo reply frames to the guest. The host kernel
+rewrites the ICMP id between guest_id and a kernel-assigned id; we
+track the mapping per-flow and translate on the way back.
+
+**Tech Stack:** Rust 1.88, `libc` (existing dep) for `socket(2)` with
+`IPPROTO_ICMP`, `smoltcp` 0.11 for `Icmpv4Packet`/`Icmpv4Repr` wire
+types (already in use), `std::os::fd::FromRawFd` for the wrap.
+
+**Branch:** `smoltcp-passt-port-phase0` (same branch as Phase 0 — user
+explicitly continues here, do not branch).
+
+---
+
+## Cross-platform precondition
+
+Linux requires `net.ipv4.ping_group_range` to permit the calling GID
+for unprivileged `IPPROTO_ICMP` sockets. The default on Fedora/Ubuntu
+since ~2014 is `0 2147483647` (all gids), but it can be tightened by
+admins. Approach:
+
+1. Try to open the socket once at `SlirpBackend::new` (or lazily on
+   first ICMP frame). If `socket()` returns `EACCES` or `EPERM`, log a
+   one-shot warning and **drop** ICMP frames as before.
+2. macOS allows the same syscall unconditionally; no sysctl gate.
+
+This is the *exact* compatibility shape passt uses — see `icmp.c`
+in `/home/diego/github/passt`.
+
+---
+
+## Task structure
+
+7 tasks across two workstreams.
+
+| ID | Workstream | Scope |
+|---|---|---|
+| 1.1 | impl | Add `IcmpEchoEntry` + per-flow socket helper |
+| 1.2 | impl | Wire `handle_icmp_frame` for guest→host echo path |
+| 1.3 | impl | Wire `relay_icmp_echo` for host→guest reply path |
+| 1.4 | impl | Sysctl-fallback to drop on `EACCES` / `EPERM` |
+| 1.5 | test | Flip `icmp_echo_silently_dropped` to assert reply |
+| 1.6 | bench | Populate `icmp_rr_latency_us_p50` in `voidbox-network-bench` |
+| 1.7 | gate | Validation + commit summary |
+
+---
+
+## Workstream 1A — Implementation (`src/network/slirp.rs`)
+
+### Task 1.1: `IcmpEchoEntry` + per-flow socket helper
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Define a NatKey-style key for ICMP echo.**
+
+```rust
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+struct IcmpEchoKey {
+    guest_id: u16,
+    dst_ip: Ipv4Address,
+}
+```
+
+- [ ] **Step 2: Define `IcmpEchoEntry`.**
+
+```rust
+struct IcmpEchoEntry {
+    /// Host-side socket, `socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP)`.
+    /// Set non-blocking; the kernel handles the ICMP framing.
+    sock: std::net::UdpSocket,
+    /// The guest's original ICMP id from the echo request. The kernel
+    /// assigns its own id when we send via the SOCK_DGRAM ICMP socket;
+    /// on reply we translate the kernel id back to `guest_id`.
+    guest_id: u16,
+    last_activity: std::time::Instant,
+}
+```
+
+`std::net::UdpSocket` is the wrapper we use — see Step 3 for why.
+
+- [ ] **Step 3: Add a helper `open_icmp_socket() -> io::Result<UdpSocket>`** at module scope:
+
+```rust
+fn open_icmp_socket() -> io::Result<std::net::UdpSocket> {
+    use std::os::fd::FromRawFd;
+
+    // SAFETY: socket(2) returns -1 on error; we check before wrapping.
+    // IPPROTO_ICMP + SOCK_DGRAM is the unprivileged ICMP path: kernel
+    // handles ICMP framing, no CAP_NET_RAW required.
+    let raw = unsafe {
+        libc::socket(
+            libc::AF_INET,
+            libc::SOCK_DGRAM | libc::SOCK_NONBLOCK | libc::SOCK_CLOEXEC,
+            libc::IPPROTO_ICMP,
+        )
+    };
+    if raw < 0 {
+        return Err(io::Error::last_os_error());
+    }
+    // SAFETY: `raw` is a valid fd from socket(2); UdpSocket adopts
+    // ownership and closes on drop.
+    Ok(unsafe { std::net::UdpSocket::from_raw_fd(raw) })
+}
+```
+
+Rationale: `std::net::UdpSocket` uses the SOCK_DGRAM I/O surface
+(`recv_from`, `send_to`); it doesn't care that the underlying protocol
+is ICMP rather than UDP. This is the same pattern passt uses (just
+with raw fds).
+
+- [ ] **Step 4: Add `icmp_echo: HashMap<IcmpEchoKey, IcmpEchoEntry>` field to `SlirpBackend`.**
+
+Initialize in `SlirpBackend::with_security(...)` and `SlirpBackend::new()`.
+
+- [ ] **Step 5: `cargo check`** — should compile clean. No behavior wired yet.
+
+- [ ] **Step 6: Commit.**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "feat(slirp): add IcmpEchoEntry + IPPROTO_ICMP socket helper"
+```
+
+---
+
+### Task 1.2: `handle_icmp_frame` (guest → host)
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Update `handle_ipv4_frame` to dispatch ICMP.** Around
+  line 654 (the "drop silently" branch), insert before it:
+
+```rust
+if protocol == IpProtocol::Icmp {
+    return self.handle_icmp_frame(&ipv4);
+}
+```
+
+- [ ] **Step 2: Add `handle_icmp_frame`** as a sibling of
+  `handle_dns_frame`. Body:
+
+```rust
+fn handle_icmp_frame(&mut self, ipv4: &Ipv4Packet<&[u8]>) -> Result<()> {
+    let icmp = match smoltcp::wire::Icmpv4Packet::new_checked(ipv4.payload()) {
+        Ok(p) => p,
+        Err(_) => return Ok(()),
+    };
+    let repr = match smoltcp::wire::Icmpv4Repr::parse(&icmp, &Default::default()) {
+        Ok(r) => r,
+        Err(_) => return Ok(()),
+    };
+    let (ident, seq_no, data) = match repr {
+        smoltcp::wire::Icmpv4Repr::EchoRequest { ident, seq_no, data } => {
+            (ident, seq_no, data)
+        }
+        _ => return Ok(()), // only echo request handled today
+    };
+
+    let key = IcmpEchoKey { guest_id: ident, dst_ip: ipv4.dst_addr() };
+    let entry = match self.icmp_echo.entry(key) {
+        std::collections::hash_map::Entry::Occupied(o) => o.into_mut(),
+        std::collections::hash_map::Entry::Vacant(v) => {
+            let sock = match open_icmp_socket() {
+                Ok(s) => s,
+                Err(e) => {
+                    // Sysctl-driven fallback handled in Task 1.4.
+                    trace!("SLIRP ICMP: open socket failed: {e}");
+                    return Ok(());
+                }
+            };
+            v.insert(IcmpEchoEntry {
+                sock,
+                guest_id: ident,
+                last_activity: Instant::now(),
+            })
+        }
+    };
+    entry.last_activity = Instant::now();
+
+    // Build a wire ICMP echo packet with seq + data; the kernel will
+    // rewrite the ident on send_to.
+    let req = smoltcp::wire::Icmpv4Repr::EchoRequest {
+        ident: 0, // kernel rewrites
+        seq_no,
+        data,
+    };
+    let mut buf = vec![0u8; req.buffer_len()];
+    let mut pkt = smoltcp::wire::Icmpv4Packet::new_unchecked(&mut buf);
+    req.emit(&mut pkt, &Default::default());
+
+    let dst = std::net::SocketAddr::from((
+        std::net::Ipv4Addr::from(ipv4.dst_addr().0),
+        0u16, // port ignored for ICMP
+    ));
+    if let Err(e) = entry.sock.send_to(&buf, dst) {
+        trace!("SLIRP ICMP: send_to failed: {e}");
+    }
+    Ok(())
+}
+```
+
+- [ ] **Step 3: cargo check + cargo test --test network_baseline.** The
+  ICMP test still passes today (assertion is `assert!(!saw_icmp_reply)` —
+  no reply yet because reply path is in Task 1.3).
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "feat(slirp): forward guest ICMP echo via SOCK_DGRAM IPPROTO_ICMP"
+```
+
+---
+
+### Task 1.3: `relay_icmp_echo` (host → guest reply path)
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Add a `relay_icmp_echo` method** alongside
+  `relay_tcp_nat_data`. Body:
+
+```rust
+fn relay_icmp_echo(&mut self) {
+    // Drain replies from each active ICMP socket and emit echo-reply
+    // frames to the guest.
+    let now = Instant::now();
+    const ICMP_IDLE_TIMEOUT: Duration = Duration::from_secs(60);
+
+    let keys: Vec<IcmpEchoKey> = self.icmp_echo.keys().copied().collect();
+    for key in keys {
+        let frame = {
+            let Some(entry) = self.icmp_echo.get_mut(&key) else { continue; };
+            if now.duration_since(entry.last_activity) > ICMP_IDLE_TIMEOUT {
+                None // mark for removal below
+            } else {
+                let mut buf = [0u8; 1500];
+                match entry.sock.recv_from(&mut buf) {
+                    Ok((n, _addr)) => {
+                        entry.last_activity = now;
+                        Self::build_icmp_echo_reply_to_guest(
+                            key.dst_ip,
+                            entry.guest_id,
+                            &buf[..n],
+                        )
+                    }
+                    Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => continue,
+                    Err(_) => continue,
+                }
+            }
+        };
+        match frame {
+            None => {
+                self.icmp_echo.remove(&key);
+            }
+            Some(Some(f)) => self.inject_to_guest.push(f),
+            Some(None) => {} // build failed; drop silently
+        }
+    }
+}
+
+fn build_icmp_echo_reply_to_guest(
+    src_ip: Ipv4Address,
+    guest_id: u16,
+    raw_icmp: &[u8],
+) -> Option<Vec<u8>> {
+    use smoltcp::wire::*;
+    let icmp = Icmpv4Packet::new_checked(raw_icmp).ok()?;
+    let parsed = Icmpv4Repr::parse(&icmp, &Default::default()).ok()?;
+    let (seq_no, data) = match parsed {
+        Icmpv4Repr::EchoReply { seq_no, data, .. } => (seq_no, data),
+        _ => return None,
+    };
+    let reply = Icmpv4Repr::EchoReply {
+        ident: guest_id,
+        seq_no,
+        data,
+    };
+    let ip_repr = Ipv4Repr {
+        src_addr: src_ip,
+        dst_addr: SLIRP_GUEST_IP,
+        next_header: IpProtocol::Icmp,
+        payload_len: reply.buffer_len(),
+        hop_limit: 64,
+    };
+    let eth_repr = EthernetRepr {
+        src_addr: EthernetAddress(GATEWAY_MAC),
+        dst_addr: EthernetAddress(GUEST_MAC),
+        ethertype: EthernetProtocol::Ipv4,
+    };
+    let total = 14 + ip_repr.buffer_len() + reply.buffer_len();
+    let mut buf = vec![0u8; total];
+    let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth_repr.emit(&mut eth);
+    let mut ip = Ipv4Packet::new_unchecked(&mut buf[14..]);
+    ip_repr.emit(&mut ip, &Default::default());
+    let mut icmp_out = Icmpv4Packet::new_unchecked(&mut buf[14 + ip_repr.buffer_len()..]);
+    reply.emit(&mut icmp_out, &Default::default());
+    Some(buf)
+}
+```
+
+- [ ] **Step 2: Wire `relay_icmp_echo` into `drain_to_guest`.** Around
+  the existing `self.relay_tcp_nat_data();` call (find via LSP), add
+  `self.relay_icmp_echo();` immediately after.
+
+- [ ] **Step 3: cargo check + cargo test --test network_baseline.** All
+  13 tests still pass; the broken-on-purpose assertion remains green
+  because Task 1.5 hasn't flipped it yet (Task 1.5 will demonstrate the
+  reply path actually works).
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "feat(slirp): relay ICMP echo replies back to guest"
+```
+
+---
+
+### Task 1.4: Sysctl fallback (graceful degrade)
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Add a once-cell `static`** at module scope to track
+  whether ICMP support is available:
+
+```rust
+use std::sync::atomic::{AtomicU8, Ordering};
+
+/// Tristate: 0 = unknown, 1 = available, 2 = unavailable.
+static ICMP_PROBE: AtomicU8 = AtomicU8::new(0);
+```
+
+- [ ] **Step 2: Probe in `open_icmp_socket`** — on the first call, try
+  the syscall; if it fails with `EACCES`/`EPERM`, set `ICMP_PROBE = 2`,
+  log a one-shot warning, and return `Err`. Subsequent calls short-circuit
+  on `2`.
+
+```rust
+fn open_icmp_socket() -> io::Result<std::net::UdpSocket> {
+    if ICMP_PROBE.load(Ordering::Relaxed) == 2 {
+        return Err(io::Error::new(
+            io::ErrorKind::PermissionDenied,
+            "ICMP unprivileged probe previously failed",
+        ));
+    }
+    use std::os::fd::FromRawFd;
+    let raw = unsafe {
+        libc::socket(
+            libc::AF_INET,
+            libc::SOCK_DGRAM | libc::SOCK_NONBLOCK | libc::SOCK_CLOEXEC,
+            libc::IPPROTO_ICMP,
+        )
+    };
+    if raw < 0 {
+        let err = io::Error::last_os_error();
+        if matches!(err.raw_os_error(), Some(libc::EACCES) | Some(libc::EPERM)) {
+            if ICMP_PROBE.swap(2, Ordering::Relaxed) != 2 {
+                tracing::warn!(
+                    "SLIRP: unprivileged ICMP unavailable on this host \
+                     (sysctl net.ipv4.ping_group_range likely restricts \
+                     it); ICMP echo from guests will be dropped."
+                );
+            }
+        }
+        return Err(err);
+    }
+    ICMP_PROBE.store(1, Ordering::Relaxed);
+    Ok(unsafe { std::net::UdpSocket::from_raw_fd(raw) })
+}
+```
+
+- [ ] **Step 3: cargo check + tests.** Behavior on Linux/macOS where
+  the syscall is permitted is unchanged. On a host with restrictive
+  sysctl, the warning fires once and ICMP frames are silently dropped
+  (the same behavior as before Phase 1 — the BROKEN_ON_PURPOSE pin
+  becomes the steady state for that environment).
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "feat(slirp): warn-once + fallback when unprivileged ICMP forbidden"
+```
+
+---
+
+## Workstream 1B — Test + bench
+
+### Task 1.5: Flip `icmp_echo_silently_dropped` BROKEN_ON_PURPOSE pin
+
+**Files:**
+- Modify: `tests/network_baseline.rs`
+
+- [ ] **Step 1: Find the test** (introduced in Phase 0 task 0A.9).
+  Rename it to `icmp_echo_returns_reply` and rewrite the body to
+  assert a reply IS observed:
+
+```rust
+/// Phase 1 flipped the BROKEN_ON_PURPOSE assertion: the guest now
+/// receives an ICMP echo reply via the host's unprivileged
+/// `IPPROTO_ICMP SOCK_DGRAM` socket.
+#[test]
+fn icmp_echo_returns_reply() {
+    use smoltcp::wire::{Icmpv4Packet, Icmpv4Repr};
+
+    let icmp_repr = Icmpv4Repr::EchoRequest {
+        ident: 0xbeef,
+        seq_no: 1,
+        data: b"ping",
+    };
+    let ip_repr = Ipv4Repr {
+        src_addr: SLIRP_GUEST_IP,
+        // 127.0.0.1 — guaranteed to respond on most hosts via the host
+        // kernel's loopback; macOS and Linux both reply to ICMP echo.
+        dst_addr: Ipv4Address::new(127, 0, 0, 1),
+        next_header: IpProtocol::Icmp,
+        payload_len: icmp_repr.buffer_len(),
+        hop_limit: 64,
+    };
+    let eth_repr = EthernetRepr {
+        src_addr: EthernetAddress(GUEST_MAC),
+        dst_addr: EthernetAddress(GATEWAY_MAC),
+        ethertype: EthernetProtocol::Ipv4,
+    };
+    let total = ETH_HDR_LEN + ip_repr.buffer_len() + icmp_repr.buffer_len();
+    let mut buf = vec![0u8; total];
+    let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth_repr.emit(&mut eth);
+    let mut ip = Ipv4Packet::new_unchecked(&mut buf[ETH_HDR_LEN..]);
+    ip_repr.emit(&mut ip, &Default::default());
+    let mut icmp = Icmpv4Packet::new_unchecked(
+        &mut buf[ETH_HDR_LEN + ip_repr.buffer_len()..],
+    );
+    icmp_repr.emit(&mut icmp, &Default::default());
+
+    let mut stack = match SlirpBackend::new() {
+        Ok(s) => s,
+        Err(_) => {
+            eprintln!("skip: SlirpBackend::new failed");
+            return;
+        }
+    };
+    if stack.process_guest_frame(&buf).is_err() {
+        eprintln!("skip: process_guest_frame failed (likely no ICMP support)");
+        return;
+    }
+
+    // Poll up to 20 × 50ms for the reply.
+    let mut saw_reply = false;
+    for _ in 0..20 {
+        for f in drain_n(&mut stack, 1) {
+            let Some(eth) = EthernetFrame::new_checked(f.as_slice()).ok() else { continue; };
+            if eth.ethertype() != EthernetProtocol::Ipv4 { continue; }
+            let Some(ip) = Ipv4Packet::new_checked(eth.payload()).ok() else { continue; };
+            if ip.next_header() == IpProtocol::Icmp && ip.dst_addr() == SLIRP_GUEST_IP {
+                saw_reply = true;
+                break;
+            }
+        }
+        if saw_reply { break; }
+        std::thread::sleep(std::time::Duration::from_millis(50));
+    }
+
+    if !saw_reply {
+        // Sysctl may forbid unprivileged ICMP on some hosts. Skip
+        // rather than fail — the warn-once log explains why.
+        eprintln!(
+            "skip: no ICMP reply received within 1s; \
+             sysctl net.ipv4.ping_group_range may forbid unprivileged ICMP"
+        );
+    }
+}
+```
+
+- [ ] **Step 2: Run.**
+
+```bash
+cargo test --test network_baseline icmp_echo_returns_reply
+```
+
+Expected: PASS (or SKIP with the sysctl message on a restrictive host).
+
+- [ ] **Step 3: Run the full suite** to confirm no regression:
+
+```bash
+cargo test --test network_baseline
+```
+
+Expected: 14 tests pass (the renamed test is one of them).
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add tests/network_baseline.rs
+git commit -m "test(network): flip ICMP pin — assert echo reply (was BROKEN_ON_PURPOSE)"
+```
+
+---
+
+### Task 1.6: Populate `icmp_rr_latency_us_p50` in `voidbox-network-bench`
+
+**Files:**
+- Modify: `src/bin/voidbox-network-bench/main.rs`
+
+- [ ] **Step 1: Add `measure_icmp_rr_latency`** alongside the existing
+  measurement functions. Use busybox `ping` (which is in the test
+  initramfs) inside the guest:
+
+```bash
+ping -c <iters * samples_per_iter> -W 1 -i 0.05 8.8.8.8 \
+  | awk '/time=/ { sub(/^.*time=/, ""); sub(/ ms.*/, ""); print }'
+```
+
+Each line of output is one RTT in milliseconds; multiply by 1000 for
+microseconds, collect, percentile.
+
+The guest exec returns the joined output via the existing
+`ControlChannel::exec` API. Parse the lines, build a `Vec<Duration>`,
+call `percentile(&mut samples, 0.5)`.
+
+If the guest's ICMP echo fails (sysctl, host kernel, etc.), `ping`
+returns a non-zero exit. Treat that as "leave the metric `None`" with
+a `WARN` log, same fallback shape as the other measurements.
+
+- [ ] **Step 2: Wire into `main`** — call after the existing TCP/UDP
+  measurements; populate `report.icmp_rr_latency_us_p50`.
+
+- [ ] **Step 3: Smoke run.**
+
+```bash
+VOID_BOX_KERNEL=$PWD/target/vmlinux-slim-x86_64 \
+VOID_BOX_INITRAMFS=/tmp/void-box-test-rootfs.cpio.gz \
+  cargo run --release --bin voidbox-network-bench -- --iterations 1 \
+  | python3 -m json.tool
+```
+
+`icmp_rr_latency_us_p50` should be a non-null number now.
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add src/bin/voidbox-network-bench/main.rs
+git commit -m "bench(network): populate ICMP RR latency p50"
+```
+
+---
+
+## Workstream 1C — Validation
+
+### Task 1.7: Validation gate + summary commit
+
+**Files:** none (gate only)
+
+- [ ] **Step 1: Format + clippy.**
+
+```bash
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+```
+
+- [ ] **Step 2: Workspace tests.**
+
+```bash
+cargo test --workspace --all-features
+cargo test --doc --workspace --all-features
+```
+
+- [ ] **Step 3: Network baseline.**
+
+```bash
+cargo test --test network_baseline
+```
+
+Expected: 14 tests pass (previously-broken `icmp_echo_silently_dropped`
+is now `icmp_echo_returns_reply` and asserts a reply).
+
+- [ ] **Step 4: Microbenches no-regression.**
+
+```bash
+cargo bench --bench network
+```
+
+Compared to the Phase 0 baseline.
+
+- [ ] **Step 5: VM suites that touch networking** (Linux/KVM):
+
+```bash
+export VOID_BOX_KERNEL=$PWD/target/vmlinux-slim-x86_64
+export VOID_BOX_INITRAMFS=/tmp/void-box-test-rootfs.cpio.gz
+cargo test --test conformance -- --ignored --test-threads=1
+cargo test --test snapshot_integration -- --ignored --test-threads=1
+cargo test --test e2e_skill_pipeline -- --ignored --test-threads=1
+cargo test --test e2e_mount -- --ignored --test-threads=1
+```
+
+- [ ] **Step 6: New ICMP RR metric** captured:
+
+```bash
+cargo run --release --bin voidbox-network-bench -- --iterations 3 \
+  --output /tmp/baseline-network-phase1.json
+cat /tmp/baseline-network-phase1.json
+```
+
+`icmp_rr_latency_us_p50` should be a non-null number; the other
+metrics should be statistically equivalent to Phase 0's baseline.
+
+- [ ] **Step 7: aarch64 cross-check** if available.
+
+- [ ] **Step 8:** No commit needed for validation alone. PR opens
+  later when the user is ready (across multiple phases on the same
+  branch).
+
+---
+
+## Risks
+
+- **Sysctl-restricted hosts.** If `net.ipv4.ping_group_range` is `1 0`
+  (default on some hardened environments), `socket()` returns `EACCES`
+  and we silently degrade. The warn-once log + the test's skip path
+  handle this. Document in the PR description.
+- **macOS portability.** macOS's `IPPROTO_ICMP SOCK_DGRAM` works
+  unconditionally, but the rest of `slirp.rs` is already
+  `#[cfg(target_os = "linux")]`-gated, so this isn't a practical
+  concern in Phase 1 — macOS uses VZ NAT, not SLIRP.
+- **ICMP id collision.** Two guest processes pinging different hosts
+  with the same id won't collide because the key is
+  `(guest_id, dst_ip)`. Two guest processes pinging the *same* host
+  with the same id will share an entry — which is correct: replies
+  belong to whichever guest sent the matching seq.
+
+## File impact
+
+| File | Change | Approximate LOC |
+|---|---|---|
+| `src/network/slirp.rs` | `IcmpEchoEntry`, `handle_icmp_frame`, `relay_icmp_echo`, sysctl fallback | +180 |
+| `tests/network_baseline.rs` | flip `icmp_echo_silently_dropped` → `icmp_echo_returns_reply` | ~+15/-15 |
+| `src/bin/voidbox-network-bench/main.rs` | `measure_icmp_rr_latency` | +50 |
+| **Total** | | **~+230** (within the spec's ~150-LOC estimate plus test/bench wiring) |
diff --git a/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase2.md b/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase2.md
new file mode 100644
index 00000000..bb0512a3
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase2.md
@@ -0,0 +1,495 @@
+# Phase 2 Implementation Plan: Generalize UDP (per-flow connected sockets)
+
+> **For agentic workers:** REQUIRED SUB-SKILL: superpowers:subagent-driven-development.
+> Steps use checkbox (`- [ ]`) syntax for tracking.
+>
+> **Mandatory skills for every Rust-touching task:**
+> `rust-style`, `rustdoc`, `rust-analyzer-ssr`,
+> `superpowers:test-driven-development`,
+> `superpowers:verification-before-completion`. Use LSP for navigation.
+
+**Spec:** [`2026-04-27-smoltcp-passt-port.md`](2026-04-27-smoltcp-passt-port.md)
+**Continues from Phase 1:** [`2026-04-27-smoltcp-passt-port-phase1.md`](2026-04-27-smoltcp-passt-port-phase1.md)
+
+**Goal:** Replace the port-53-only `handle_dns_frame` fast-path with a
+general per-flow UDP NAT, mirroring passt's `udp.c::udp_flow_from_tap`
+design. Keep the existing DNS cache as a fast-path within the
+generalized handler (the cache is actually better than what passt has,
+per the spec). Flip the `udp_non_dns_silently_dropped` BROKEN_ON_PURPOSE
+pin to verify arbitrary UDP works.
+
+**Architecture:** New `UdpFlowEntry` per `(guest_src_port, dst_ip, dst_port)`.
+Each entry owns one connected `UdpSocket`. `handle_udp_frame` routes:
+DNS (`SLIRP_DNS_IP:53`) keeps the existing cached/forward path;
+everything else creates/reuses a flow and `send_to`s. `relay_udp_flows`
+polls each socket for replies and emits UDP frames back to the guest.
+Idle timeout reaps inactive flows.
+
+**Tech Stack:** Rust 1.88, `std::net::UdpSocket` (already used for DNS),
+`smoltcp::wire::UdpRepr`/`UdpPacket` (already imported), no new deps.
+
+**Branch:** `smoltcp-passt-port-phase0` (continuing on the same branch
+through Phase 0 + 1 + 2 — user instruction).
+
+---
+
+## Task structure
+
+7 tasks across two workstreams.
+
+| ID | Workstream | Scope |
+|---|---|---|
+| 2.1 | impl | Add `UdpFlowEntry` + key + `icmp_echo`-style HashMap field |
+| 2.2 | impl | Generalize dispatch: route non-53 UDP to `handle_udp_frame` |
+| 2.3 | impl | Implement `relay_udp_flows` host→guest reply path |
+| 2.4 | impl | Idle timeout + flow reaping (60s) |
+| 2.5 | test | Flip `udp_non_dns_silently_dropped` BROKEN_ON_PURPOSE pin |
+| 2.6 | bench | Replace `measure_dns_qps`'s `nc -w1`-bottlenecked impl with a real UDP socket |
+| 2.7 | gate | Phase 2 validation gate |
+
+---
+
+## Workstream 2A — Implementation (`src/network/slirp.rs`)
+
+### Task 2.1: `UdpFlowEntry` + per-flow socket helper
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Define key + entry types** (mirror `IcmpEchoKey`/`IcmpEchoEntry` from Phase 1):
+
+```rust
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+struct UdpFlowKey {
+    guest_src_port: u16,
+    dst_ip: Ipv4Address,
+    dst_port: u16,
+}
+
+struct UdpFlowEntry {
+    /// Connected `UdpSocket`. The host kernel handles source-port
+    /// preservation and reply demux; we just `send_to` and
+    /// `recv_from`. Set non-blocking.
+    sock: std::net::UdpSocket,
+    last_activity: Instant,
+}
+```
+
+- [ ] **Step 2: Add helper `open_udp_flow_socket(dst: SocketAddr) -> io::Result<UdpSocket>`**
+
+```rust
+fn open_udp_flow_socket(dst: std::net::SocketAddr) -> io::Result<std::net::UdpSocket> {
+    let sock = std::net::UdpSocket::bind("0.0.0.0:0")?;
+    sock.set_nonblocking(true)?;
+    sock.connect(dst)?;
+    Ok(sock)
+}
+```
+
+`connect()` on a `UdpSocket` doesn't open a TCP-style connection — it
+sets the default destination and filters incoming datagrams to that
+peer only. This is what passt's per-flow design relies on.
+
+- [ ] **Step 3: Add `udp_flows: HashMap<UdpFlowKey, UdpFlowEntry>` field on `SlirpBackend`.**
+
+Initialize in `with_security` (the canonical constructor) — `new()` and `Default::default()` delegate to it.
+
+- [ ] **Step 4: cargo check** — should compile clean. No behavior wired yet.
+
+- [ ] **Step 5: Commit.**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "feat(slirp): add UdpFlowEntry + per-flow connected socket helper"
+```
+
+---
+
+### Task 2.2: Dispatch non-DNS UDP to `handle_udp_frame`
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Update `handle_ipv4_frame` to route UDP.** Currently
+  (around line 642):
+
+```rust
+if dst_ip == SLIRP_DNS_IP && protocol == IpProtocol::Udp {
+    return self.handle_dns_frame(&ipv4);
+}
+```
+
+Change to:
+
+```rust
+if protocol == IpProtocol::Udp {
+    if dst_ip == SLIRP_DNS_IP {
+        return self.handle_dns_frame(&ipv4);
+    }
+    return self.handle_udp_frame(&ipv4);
+}
+```
+
+DNS keeps its dedicated handler (cache + upstream forward). Everything else flows through the new path.
+
+- [ ] **Step 2: Add `handle_udp_frame`** as a sibling of `handle_dns_frame`:
+
+```rust
+fn handle_udp_frame(&mut self, ipv4: &Ipv4Packet<&[u8]>) -> Result<()> {
+    let udp = match UdpPacket::new_checked(ipv4.payload()) {
+        Ok(u) => u,
+        Err(_) => return Ok(()),
+    };
+    let payload = udp.payload().to_vec(); // own; mutable borrow of self below
+    let key = UdpFlowKey {
+        guest_src_port: udp.src_port(),
+        dst_ip: ipv4.dst_addr(),
+        dst_port: udp.dst_port(),
+    };
+
+    // SLIRP gateway translation: 10.0.2.2 → 127.0.0.1 (same trick as TCP).
+    let dst_ip_for_socket = if key.dst_ip == SLIRP_GATEWAY_IP {
+        std::net::Ipv4Addr::LOCALHOST
+    } else {
+        std::net::Ipv4Addr::from(key.dst_ip.0)
+    };
+    let dst = std::net::SocketAddr::from((dst_ip_for_socket, key.dst_port));
+
+    let entry = match self.udp_flows.entry(key) {
+        std::collections::hash_map::Entry::Occupied(o) => o.into_mut(),
+        std::collections::hash_map::Entry::Vacant(v) => {
+            let sock = match open_udp_flow_socket(dst) {
+                Ok(s) => s,
+                Err(e) => {
+                    trace!("SLIRP UDP: open flow socket failed: {e}");
+                    return Ok(());
+                }
+            };
+            v.insert(UdpFlowEntry { sock, last_activity: Instant::now() })
+        }
+    };
+    entry.last_activity = Instant::now();
+
+    if let Err(e) = entry.sock.send(&payload) {
+        trace!("SLIRP UDP: send failed: {e}");
+    }
+    Ok(())
+}
+```
+
+- [ ] **Step 3: cargo check + tests.** All 14 baseline tests still pass.
+  `udp_non_dns_silently_dropped` continues to pass (no reply path yet).
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "feat(slirp): forward non-DNS UDP via per-flow connected sockets"
+```
+
+---
+
+### Task 2.3: `relay_udp_flows` host→guest reply path
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Add `relay_udp_flows`** alongside `relay_icmp_echo`:
+
+```rust
+fn relay_udp_flows(&mut self) {
+    let now = Instant::now();
+    let keys: Vec<UdpFlowKey> = self.udp_flows.keys().copied().collect();
+    for key in keys {
+        let frame = {
+            let Some(entry) = self.udp_flows.get_mut(&key) else { continue; };
+            let mut buf = [0u8; 1500];
+            match entry.sock.recv(&mut buf) {
+                Ok(n) => {
+                    entry.last_activity = now;
+                    Self::build_udp_reply_to_guest(
+                        key.dst_ip, key.dst_port, key.guest_src_port, &buf[..n],
+                    )
+                }
+                Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => continue,
+                Err(_) => continue,
+            }
+        };
+        if let Some(f) = frame {
+            self.inject_to_guest.push(f);
+        }
+    }
+}
+
+fn build_udp_reply_to_guest(
+    src_ip: Ipv4Address,
+    src_port: u16,
+    dst_port: u16,
+    payload: &[u8],
+) -> Option<Vec<u8>> {
+    let udp_repr = UdpRepr { src_port, dst_port };
+    let ip_repr = Ipv4Repr {
+        src_addr: src_ip,
+        dst_addr: SLIRP_GUEST_IP,
+        next_header: IpProtocol::Udp,
+        payload_len: 8 + payload.len(),
+        hop_limit: 64,
+    };
+    let eth_repr = EthernetRepr {
+        src_addr: EthernetAddress(GATEWAY_MAC),
+        dst_addr: EthernetAddress(GUEST_MAC),
+        ethertype: EthernetProtocol::Ipv4,
+    };
+    let total = 14 + ip_repr.buffer_len() + 8 + payload.len();
+    let mut buf = vec![0u8; total];
+    let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth_repr.emit(&mut eth);
+    let mut ip = Ipv4Packet::new_unchecked(&mut buf[14..]);
+    ip_repr.emit(&mut ip, &Default::default());
+    let mut udp = UdpPacket::new_unchecked(&mut buf[14 + ip_repr.buffer_len()..]);
+    udp_repr.emit(
+        &mut udp,
+        &IpAddress::Ipv4(src_ip),
+        &IpAddress::Ipv4(SLIRP_GUEST_IP),
+        payload.len(),
+        |b| b.copy_from_slice(payload),
+        &Default::default(),
+    );
+    Some(buf)
+}
+```
+
+Note `payload.len()` (NOT `8 + payload.len()`) for `udp_repr.emit`'s
+4th arg — matches the bug we fixed in 0A.7.
+
+- [ ] **Step 2: Wire into `drain_to_guest`.** Find the existing chain:
+  `self.relay_tcp_nat_data();` → `self.relay_icmp_echo();` and append
+  `self.relay_udp_flows();` after the ICMP relay.
+
+- [ ] **Step 3: cargo check + tests.** Note: `udp_non_dns_silently_dropped`
+  is now expected to FAIL — UDP replies actually flow. Don't flip the
+  test in this task (Task 2.5 owns that). Run with `--no-fail-fast` to
+  confirm only that one test fails.
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "feat(slirp): relay UDP flow replies back to guest"
+```
+
+---
+
+### Task 2.4: UDP idle timeout + flow reaping
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Add idle reap to `relay_udp_flows`.** At the start (or
+  end) of the function, walk entries and remove those past
+  `UDP_IDLE_TIMEOUT`:
+
+```rust
+const UDP_IDLE_TIMEOUT: Duration = Duration::from_secs(60);
+
+// At top of relay_udp_flows:
+let stale: Vec<UdpFlowKey> = self
+    .udp_flows
+    .iter()
+    .filter(|(_, e)| now.duration_since(e.last_activity) > UDP_IDLE_TIMEOUT)
+    .map(|(k, _)| *k)
+    .collect();
+for k in stale {
+    self.udp_flows.remove(&k);
+}
+```
+
+passt uses `/proc/sys/net/netfilter/nf_conntrack_udp_timeout` for this; we hardcode 60s (the kernel default). Don't read from /proc.
+
+- [ ] **Step 2: cargo check + tests.** No new test for the timeout
+  (the test would need to wait 60s; integration cost not worth it).
+
+- [ ] **Step 3: Commit.**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "feat(slirp): UDP flow idle reap (60s)"
+```
+
+---
+
+## Workstream 2B — Test + bench
+
+### Task 2.5: Flip `udp_non_dns_silently_dropped` BROKEN_ON_PURPOSE pin
+
+**Files:**
+- Modify: `tests/network_baseline.rs`
+
+- [ ] **Step 1: Find the test** (introduced in 0A.8). Rename to
+  `udp_non_dns_round_trips` and rewrite to assert the host receives
+  the datagram, then sends a reply that the guest receives.
+
+```rust
+/// Phase 2 flipped the BROKEN_ON_PURPOSE assertion: arbitrary UDP
+/// (any destination port, not just 53) now round-trips through the
+/// per-flow connected-socket NAT.
+#[test]
+fn udp_non_dns_round_trips() {
+    let host_sock = UdpSocket::bind("127.0.0.1:0").unwrap();
+    let host_port = host_sock.local_addr().unwrap().port();
+    host_sock
+        .set_read_timeout(Some(std::time::Duration::from_millis(500)))
+        .unwrap();
+
+    let mut stack = SlirpBackend::new().unwrap();
+
+    // Guest sends "hello" to gateway:host_port (which SLIRP rewrites
+    // to 127.0.0.1:host_port).
+    stack
+        .process_guest_frame(&build_udp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            b"hello",
+        ))
+        .unwrap();
+    let _ = drain_n(&mut stack, 4);
+
+    // Host receives the datagram.
+    let mut buf = [0u8; 32];
+    let (n, peer) = host_sock.recv_from(&mut buf).expect("host receives guest UDP");
+    assert_eq!(&buf[..n], b"hello");
+
+    // Host echoes back.
+    host_sock.send_to(&buf[..n], peer).unwrap();
+
+    // Drain — guest should see the reply on its source port.
+    let mut saw_reply = false;
+    for _ in 0..20 {
+        for f in drain_n(&mut stack, 1) {
+            let Some(eth) = EthernetFrame::new_checked(f.as_slice()).ok() else { continue; };
+            if eth.ethertype() != EthernetProtocol::Ipv4 { continue; }
+            let Some(ip) = Ipv4Packet::new_checked(eth.payload()).ok() else { continue; };
+            if ip.next_header() != IpProtocol::Udp { continue; }
+            let Some(udp_pkt) = UdpPacket::new_checked(ip.payload()).ok() else { continue; };
+            if udp_pkt.dst_port() == GUEST_EPHEMERAL_PORT && udp_pkt.payload() == b"hello" {
+                saw_reply = true;
+                break;
+            }
+        }
+        if saw_reply { break; }
+        std::thread::sleep(std::time::Duration::from_millis(50));
+    }
+    assert!(saw_reply, "guest must receive UDP reply via per-flow NAT");
+}
+```
+
+- [ ] **Step 2: Run.**
+
+```bash
+cargo test --test network_baseline udp_
+cargo test --test network_baseline    # confirm 14 pass total
+```
+
+- [ ] **Step 3: Commit.**
+
+```bash
+git add tests/network_baseline.rs
+git commit -m "test(network): flip UDP pin — assert non-DNS round-trips (was BROKEN_ON_PURPOSE)"
+```
+
+---
+
+### Task 2.6: Replace `measure_dns_qps` busybox-`nc`-bottlenecked impl
+
+**Files:**
+- Modify: `src/bin/voidbox-network-bench/main.rs`
+
+- [ ] **Step 1: Read the current `measure_dns_qps`** to understand the
+  existing flow. It currently runs busybox `nc -u -w1` per query in the
+  guest, which caps qps at ~1/s (0.5 qps observed) regardless of SLIRP
+  speed. With Phase 2's general UDP, we can do something faster.
+
+- [ ] **Step 2: Replace the inner shell loop with a tighter pattern**
+  using busybox `dd`-style raw UDP via `/dev/udp/`. busybox `nc` opens
+  one connection per invocation and sleeps for the timeout. A loop in
+  shell using `awk` to bound iterations:
+
+```sh
+end=$(($(date +%s) + 5))
+count=0
+while [ "$(date +%s)" -lt "$end" ]; do
+  printf '\x12\x34\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x07example\x03com\x00\x00\x01\x00\x01' \
+    | nc -u -w0 -q0 10.0.2.3 53 >/dev/null 2>&1 && count=$((count + 1))
+done
+echo "qps=$((count / 5))"
+```
+
+`-w0` (no idle wait) and `-q0` (close immediately on EOF) prevent the
+1s-per-query stall. busybox `nc` may not honor both; if so, accept
+that DNS qps stays approximate and remove `measure_dns_qps` entirely
+(replacing it with a host-driven measurement that sends UDP through
+SLIRP from outside the guest — a smaller, cleaner change).
+
+If neither works reliably: leave the metric `null` with a `WARN`.
+The Phase 2 win is correctness (DNS isn't blocked anymore), not
+this specific number.
+
+- [ ] **Step 3: Smoke run** with `--iterations 1` and confirm the qps
+  metric is non-null and >> 0.5.
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add src/bin/voidbox-network-bench/main.rs
+git commit -m "bench(network): use tighter busybox-nc loop for DNS qps"
+```
+
+If Step 2 doesn't yield a reliable improvement, commit a smaller
+change documenting the limit and move on.
+
+---
+
+## Workstream 2C — Validation
+
+### Task 2.7: Validation gate
+
+**Files:** none (gate only)
+
+- [ ] fmt + clippy clean
+- [ ] `cargo test --workspace` clean (modulo the pre-existing
+  guest-agent flake we tracked earlier)
+- [ ] `cargo test --test network_baseline` 14 pass (the renamed test
+  is one of them)
+- [ ] `cargo bench --bench network` no regression
+- [ ] `cargo test --test snapshot_integration -- --ignored` 8/8 pass
+- [ ] Wall-clock smoke run produces non-null `udp_dns_qps` >= Phase 0
+  baseline (or stays `null` with documented WARN if Step 2.6 didn't
+  improve it)
+
+No PR opened — paused per user instruction. Branch will keep
+accumulating phases.
+
+---
+
+## File impact
+
+| File | Approximate LOC |
+|---|---|
+| `src/network/slirp.rs` | +200 |
+| `tests/network_baseline.rs` | +30 / -25 (renamed test) |
+| `src/bin/voidbox-network-bench/main.rs` | +30 / -10 |
+| **Total** | **~+225** |
+
+## Risks
+
+- **Per-flow socket creation can leak fds** if the idle timeout is
+  too long under burst traffic. 60s is generous; consider tightening
+  to 30s if memory pressure becomes an issue. Out of scope for this
+  phase; default 60s matches kernel conntrack.
+- **No port-forwarding configurability yet.** Phase 2 only handles
+  outbound UDP from guest. Inbound UDP forwarding (host → guest port
+  X) is part of Phase 5 (stateless NAT translation refactor).
+- **DNS cache stays.** Some users may expect Phase 2 to invalidate
+  it; we don't. Cache only fires on `dst == 10.0.2.3:53`; everything
+  else takes the per-flow path.
diff --git a/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase3.md b/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase3.md
new file mode 100644
index 00000000..04c6a62e
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase3.md
@@ -0,0 +1,544 @@
+# Phase 3 Implementation Plan: TCP Relay Rewrite (MSG_PEEK + sequence mirroring)
+
+> **For agentic workers:** REQUIRED SUB-SKILL: superpowers:subagent-driven-development.
+> Steps use checkbox (`- [ ]`) syntax for tracking.
+>
+> **Mandatory skills for every Rust-touching task:**
+> `rust-style`, `rustdoc`, `rust-analyzer-ssr`,
+> `superpowers:test-driven-development`,
+> `superpowers:verification-before-completion`. Use LSP for navigation.
+>
+> **THIS IS THE HIGH-RISK PHASE.** The TCP relay (~625 LOC at
+> `src/network/slirp.rs:82–1048`) is the most fragile path in the
+> project. The `tcp_to_host_buffer_drops_at_256kb` test pin is the
+> headline assertion to flip. `snapshot_integration` and the
+> conformance suite are the safety net — every task ends with both
+> green or it doesn't land.
+
+**Spec:** [`2026-04-27-smoltcp-passt-port.md`](2026-04-27-smoltcp-passt-port.md)
+**Continues from Phase 2:** [`2026-04-27-smoltcp-passt-port-phase2.md`](2026-04-27-smoltcp-passt-port-phase2.md)
+
+**Goal:** Replace the hand-rolled TCP relay's `to_guest: Vec<u8>` and
+`to_host: Vec<u8>` user-space buffers with passt-style sequence
+mirroring (host kernel's TCP socket buffer IS the buffer). Eliminate
+the 256 KB `to_host` cliff and drop 100s of LOC of fragile state.
+
+**Architecture:** For each direction:
+
+- **host → guest** (host writes, we relay to guest): instead of
+  `read()` into `to_guest: Vec<u8>` then drain, use
+  `recv(MSG_PEEK)` to inspect what's in the kernel socket without
+  consuming it. Send the un-acknowledged portion as TCP segments to
+  the guest. Track `bytes_in_flight = our_seq - last_acked_seq`.
+  When the guest ACKs, `recv()` (no MSG_PEEK) the ACK'd bytes to
+  advance the kernel's read pointer. The kernel's socket buffer
+  absorbs backpressure naturally.
+
+- **guest → host** (guest writes, we relay to host): on guest
+  segment, attempt non-blocking `send()` on the host socket. If it
+  succeeds: ACK the guest. If `WouldBlock` (kernel send buffer full):
+  **don't** ACK; let the guest retransmit (TCP's natural backpressure).
+  Drop the 256 KB `to_host: Vec<u8>` user-space buffer entirely.
+
+**Tech Stack:** Rust 1.88, `std::net::TcpStream` (already in use).
+`libc::recv` with `MSG_PEEK` flag for the host→guest direction
+(std doesn't expose MSG_PEEK on `TcpStream`).
+
+**Branch:** `smoltcp-passt-port-phase0` (continuing on the same branch
+through all phases — user instruction).
+
+## Non-negotiable invariants
+
+These are MUSTs across every task in this phase. A task that violates
+any of them is rejected at code review, regardless of test status.
+
+1. **Full observability is preserved.** The whole reason we lift
+   passt's *patterns* instead of running passt as a process is to
+   keep our debugging surface. Every task MUST:
+   - Keep all existing `tracing::trace!`/`debug!`/`warn!`/`error!`
+     calls in the TCP relay path. If a removed code path's trace
+     lines no longer fire because the path is gone, that's fine.
+     But a NEW path missing equivalent tracing is a bug.
+   - Add new `tracing` events for the new state — at minimum:
+     - `trace!` on each peek that yields N bytes,
+     - `trace!` on each ACK-driven consume,
+     - `debug!` on connection close with `bytes_in_flight` snapshot
+       (helps post-mortem the unusual-close case),
+     - `warn!` on unexpected protocol errors (RST during ESTABLISHED,
+       seq number going backwards, etc.).
+   - Stay all-Rust, no FFI boundary, no opaque process. `libc::recv`
+     for MSG_PEEK is fine — that's a syscall, not an opaque process;
+     it doesn't cross a debugger boundary.
+2. **`cargo test`-driveable.** Every behavior change is exercised by
+   a test in `tests/network_baseline.rs` that drives `SlirpBackend`
+   directly (no VM). The pin tests are the contract.
+3. **`tracing-subscriber` pipeline integrity.** Don't introduce
+   anything that bypasses the existing `tracing` filter chain
+   (`VOIDBOX_LOG_LEVEL` / `RUST_LOG` env vars, `LogConfig`
+   structured logger). If a new diagnostic needs a backchannel,
+   route it through `tracing` events with structured fields.
+4. **Profiler keeps working.** No syscalls in tight loops without an
+   observable wrapper (e.g. don't call `libc::recv` from a hot path
+   without a `tracing::trace!` annotation that flame-graph-able
+   tools can attribute the time to).
+
+---
+
+## Task structure
+
+8 tasks across three workstreams.
+
+| ID | Workstream | Scope |
+|---|---|---|
+| 3.1 | impl | Add sequence-mirroring fields to `TcpNatEntry`; default-init alongside existing buffers |
+| 3.2 | impl | Add `recv_peek` helper using `libc::recv(MSG_PEEK)` |
+| 3.3 | impl | Replace host→guest path: drain via peek, send `bytes_available - bytes_in_flight` |
+| 3.4 | impl | Replace guest-ACK handling: consume ACK'd bytes from kernel, send next chunk |
+| 3.5 | impl | Drop guest→host `to_host` buffer; rely on kernel send buffer + don't-ACK-on-EAGAIN backpressure |
+| 3.6 | impl | Drop `to_guest`, `MAX_TO_HOST_BUFFER`, dead helpers; cleanup |
+| 3.7 | test | Flip `tcp_to_host_buffer_drops_at_256kb` BROKEN_ON_PURPOSE pin |
+| 3.8 | gate | Phase 3 validation gate (full conformance + snapshot suites + bench) |
+
+---
+
+## Workstream 3A — Add scaffolding (no behavior change)
+
+### Task 3.1: Sequence-mirroring fields on `TcpNatEntry`
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Add fields** to `TcpNatEntry` (around line 107 — LSP `documentSymbol` will surface). Add at the end of the struct:
+
+```rust
+/// passt-style sequence mirroring: bytes the kernel has buffered
+/// past our last consumed point but not yet sent to guest. With
+/// MSG_PEEK, we can inspect the kernel's recv queue without
+/// consuming, then `recv` (no peek) the ACK'd portion later.
+///
+/// `bytes_in_flight = our_seq - last_acked_seq` — bytes sent to
+/// guest but not yet ACK'd.
+#[allow(dead_code)] // consumed in 3.3
+bytes_in_flight: u32,
+```
+
+`our_seq` and `guest_ack` already exist on the struct. Reuse them; don't introduce new aliases.
+
+- [ ] **Step 2: Initialize** in every construction site of `TcpNatEntry` (LSP `findReferences` on the struct will list them — likely 1–2 sites in `handle_tcp_frame`'s SYN branch). Add `bytes_in_flight: 0,` to each.
+
+- [ ] **Step 3: Verify.**
+
+```bash
+cargo check
+cargo test --test network_baseline   # 14 tests still pass
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): add bytes_in_flight to TcpNatEntry (no behavior change)"
+```
+
+---
+
+### Task 3.2: `recv_peek` helper
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Add a module-scope helper.**
+
+```rust
+/// Non-blocking `recv(MSG_PEEK)` on a `TcpStream`, returning bytes
+/// read without consuming them from the kernel socket buffer.
+///
+/// `std::net::TcpStream` does not expose `MSG_PEEK`; we go through
+/// `libc::recv` directly.
+fn recv_peek(stream: &TcpStream, buf: &mut [u8]) -> io::Result<usize> {
+    use std::os::fd::AsRawFd;
+    // SAFETY: `stream` outlives the syscall; `buf` is uniquely
+    // borrowed and `len` matches.
+    let n = unsafe {
+        libc::recv(
+            stream.as_raw_fd(),
+            buf.as_mut_ptr() as *mut libc::c_void,
+            buf.len(),
+            libc::MSG_PEEK | libc::MSG_DONTWAIT,
+        )
+    };
+    if n < 0 {
+        return Err(io::Error::last_os_error());
+    }
+    Ok(n as usize)
+}
+```
+
+`std::os::fd::AsRawFd` is already in the module-scope use block (added in Phase 1.1). `MSG_DONTWAIT` ensures non-blocking even if the stream's `set_nonblocking` flag is dropped somehow.
+
+- [ ] **Step 2: Verify** the helper compiles. No callers yet:
+
+```bash
+cargo check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): add recv_peek helper using libc::recv MSG_PEEK"
+```
+
+---
+
+## Workstream 3B — The actual relay rewrite
+
+### Task 3.3: Replace host→guest path with peek-based send
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Locate** the host→guest section in `relay_tcp_nat_data`
+  via LSP `documentSymbol`. It's the `read` block around lines
+  991–1025: read up to 16 KB into `entry.to_guest`, drain `to_guest`
+  in MTU-sized chunks, build TCP packets, increment `our_seq`.
+
+- [ ] **Step 2: Replace** that block with a peek-based version. The
+  new logic:
+
+```rust
+// Host → guest, peek-based sequence-mirroring.
+// We don't `read()` into a userspace buffer — the kernel's socket
+// buffer holds outstanding data until the guest ACKs, at which point
+// Task 3.4 consumes the ACK'd portion via plain `recv()`.
+let mut peek_buf = [0u8; 65536];
+match recv_peek(&entry.host_stream, &mut peek_buf) {
+    Ok(0) => {
+        // EOF from host. Send FIN to guest if we haven't already.
+        // (FIN handling continues to use the existing block below.)
+        entry.state = TcpNatState::Closed;
+    }
+    Ok(n) => {
+        // Send only the un-ACK'd portion: skip what's already in flight.
+        let bytes_in_flight = entry.bytes_in_flight as usize;
+        if n > bytes_in_flight {
+            let new_payload = &peek_buf[bytes_in_flight..n];
+            for chunk in new_payload.chunks(MTU - 54) {
+                let frame = build_tcp_packet_static(
+                    /* ... existing args, payload=chunk, seq=entry.our_seq ... */
+                );
+                self.inject_to_guest.push(frame);
+                entry.our_seq = entry.our_seq.wrapping_add(chunk.len() as u32);
+                entry.bytes_in_flight =
+                    entry.bytes_in_flight.wrapping_add(chunk.len() as u32);
+            }
+        }
+        // else: everything in the kernel buffer is already in flight;
+        // wait for guest to ACK before sending more.
+    }
+    Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => {
+        // Nothing in the kernel buffer yet; nothing to do.
+    }
+    Err(_) => {
+        entry.state = TcpNatState::Closed;
+    }
+}
+```
+
+The exact builder call must match the existing `build_tcp_packet_static` signature — read the current call site and copy verbatim.
+
+- [ ] **Step 3: Run.**
+
+```bash
+cargo check
+cargo test --test network_baseline   # tcp_data_round_trip MUST pass; the 256KB cliff test still passes (cliff still in place via to_host path which 3.5 will remove)
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+```
+
+The `tcp_to_host_buffer_drops_at_256kb` BROKEN_ON_PURPOSE pin tests the **guest→host** direction — it should still pass after this task because we haven't touched that path yet (3.5 owns it).
+
+- [ ] **Step 4: Commit.**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): peek-based host→guest TCP relay (drops to_guest buffer dependency)"
+```
+
+> Note: the `to_guest: Vec<u8>` field is now unused but still on the
+> struct. Task 3.6 removes it; until then it stays so the diff per
+> task is reviewable.
+
+---
+
+### Task 3.4: ACK handling — consume ACK'd bytes from kernel
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Locate** guest-ACK handling. In `handle_tcp_frame`,
+  the ACK branch (around line 855–870) currently advances
+  `entry.guest_ack` and may transition state. With peek-based send,
+  on each ACK we must also `recv()` (no peek) the ACK'd bytes from
+  the kernel socket so the kernel can free them.
+
+- [ ] **Step 2: Compute ACK'd bytes** from the incoming TCP segment's
+  ACK number minus the entry's last-known `guest_ack`. Use wrapping
+  arithmetic — TCP sequence numbers wrap at 2³².
+
+```rust
+let segment_ack = /* ... extract from TcpRepr ... */;
+let acked_bytes = segment_ack.wrapping_sub(entry.guest_ack);
+// Advance the recorded ack point.
+if acked_bytes > 0 && acked_bytes <= entry.bytes_in_flight {
+    let mut sink = [0u8; 65536];
+    let mut remaining = acked_bytes as usize;
+    while remaining > 0 {
+        let want = remaining.min(sink.len());
+        match entry.host_stream.read(&mut sink[..want]) {
+            Ok(0) | Err(_) => break, // EOF or error; let next iteration handle it
+            Ok(n) => remaining -= n,
+        }
+    }
+    entry.bytes_in_flight =
+        entry.bytes_in_flight.wrapping_sub(acked_bytes - remaining as u32);
+    entry.guest_ack = segment_ack;
+}
+```
+
+The `read()` call (not `recv` directly) consumes from the kernel buffer — equivalent on a non-blocking `TcpStream`. The `entry.host_stream` is already non-blocking, so this won't stall.
+
+- [ ] **Step 3: Test the round trip.** `tcp_data_round_trip` should
+  still pass — guest sends 5 bytes, host echoes, guest receives. The
+  echo path now uses peek + ACK-driven consume.
+
+```bash
+cargo test --test network_baseline tcp_data_round_trip
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): consume ACK'd bytes from kernel on guest ACK"
+```
+
+---
+
+### Task 3.5: Drop guest→host `to_host` buffer (kill the 256 KB cliff)
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Locate** the `to_host` write path. In `handle_tcp_frame`
+  (around lines 867–911) and `relay_tcp_nat_data` (around lines
+  960–989), the current code:
+  - Writes guest payload to `entry.host_stream` directly when
+    `to_host` is empty.
+  - Buffers in `entry.to_host` on `WouldBlock`.
+  - Drops the connection when `to_host` exceeds `MAX_TO_HOST_BUFFER`
+    (256 KB).
+  - Sends ACK on successful write OR sets `to_host_pending_ack` when
+    the write was buffered.
+
+- [ ] **Step 2: Replace** with a strict don't-ACK-on-EAGAIN approach:
+  - Attempt non-blocking `write` on the host socket.
+  - On full success: ACK the guest immediately.
+  - On partial success (some bytes written): ACK only those bytes;
+    let the guest retransmit the rest.
+  - On `WouldBlock` with zero bytes written: **don't ACK**; let the
+    guest retransmit per TCP's natural backpressure. The kernel's
+    send buffer fills up; when it drains, the next guest retransmit
+    succeeds.
+
+```rust
+// In handle_tcp_frame's data branch:
+let payload = /* ... existing extract ... */;
+let n_written = match entry.host_stream.write(payload) {
+    Ok(n) => n,
+    Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => 0,
+    Err(_) => {
+        entry.state = TcpNatState::Closed;
+        return Ok(());
+    }
+};
+if n_written > 0 {
+    let ack_seq = segment_seq.wrapping_add(n_written as u32);
+    self.send_ack(entry, ack_seq);
+    entry.guest_seq = ack_seq;
+}
+// else: silently drop the segment; guest retransmits.
+```
+
+- [ ] **Step 3: Remove the `MAX_TO_HOST_BUFFER` constant** and the
+  256 KB-cliff branch. The cliff is gone — TCP backpressure handles
+  it naturally.
+
+- [ ] **Step 4: Verify.**
+
+```bash
+cargo check
+cargo test --test network_baseline   # tcp_data_round_trip still passes
+# tcp_to_host_buffer_drops_at_256kb is EXPECTED TO FAIL now —
+# Task 3.7 will flip it. For this task, run with --no-fail-fast and
+# confirm only that test fails.
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): drop to_host buffer + 256KB cliff, use TCP backpressure"
+```
+
+---
+
+### Task 3.6: Cleanup — drop unused fields + dead helpers
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Remove unused fields** from `TcpNatEntry`:
+  - `to_guest: Vec<u8>` — replaced by peek-based send.
+  - `to_host: Vec<u8>` — replaced by kernel send buffer + retransmit.
+  - `to_host_pending_ack: Option<u32>` — replaced by direct ACK on
+    successful write.
+
+- [ ] **Step 2: Remove dead helpers** that referenced them. Use LSP
+  `findReferences` on each removed field to find call sites; remove
+  the helpers if they're now orphaned.
+
+- [ ] **Step 3: Update doc comments** — the file-level doc and the
+  `TcpNatEntry` doc should reflect the new design.
+
+- [ ] **Step 4: Verify.**
+
+```bash
+cargo check
+cargo test --test network_baseline
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): drop to_guest/to_host/pending_ack fields and dead helpers"
+```
+
+---
+
+## Workstream 3C — Test + validation
+
+### Task 3.7: Flip `tcp_to_host_buffer_drops_at_256kb` BROKEN_ON_PURPOSE pin
+
+**Files:**
+- Modify: `tests/network_baseline.rs`
+
+- [ ] **Step 1: Locate** the test. It currently asserts that pushing
+  ~300 KB closes the connection.
+
+- [ ] **Step 2: Rewrite** to assert the OPPOSITE — pushing >256 KB
+  succeeds with no connection close. Rename to
+  `tcp_writes_more_than_256kb_succeed`. The test:
+  - Bind a host TCP server that accepts and reads ~1 MB.
+  - Drive the handshake.
+  - Push 1 MB in chunks.
+  - Assert no `Rst` / `Fin` arrives at the guest mid-stream.
+  - Assert the host server receives all 1 MB.
+
+- [ ] **Step 3: Run.**
+
+```bash
+cargo test --test network_baseline tcp_writes_more_than_256kb_succeed
+cargo test --test network_baseline    # 14 tests pass
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+git add tests/network_baseline.rs
+git commit -m "test(network): flip 256KB cliff pin — assert >1MB succeeds"
+```
+
+---
+
+### Task 3.8: Phase 3 validation gate
+
+**Files:** none (gate only)
+
+- [ ] **Static checks**
+
+```bash
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+```
+
+- [ ] **Unit + baseline tests**
+
+```bash
+cargo test --workspace --all-features
+cargo test --test network_baseline
+```
+
+- [ ] **Conformance + snapshot integration suites — the safety net**
+
+```bash
+export VOID_BOX_KERNEL=$PWD/target/vmlinux-slim-x86_64
+export VOID_BOX_INITRAMFS=/tmp/void-box-test-rootfs.cpio.gz
+cargo test --test conformance -- --ignored --test-threads=1
+cargo test --test snapshot_integration -- --ignored --test-threads=1
+cargo test --test e2e_skill_pipeline -- --ignored --test-threads=1
+cargo test --test e2e_mount -- --ignored --test-threads=1
+```
+
+These exercise real TCP traffic through the SLIRP path. **Any
+regression here is a Phase 3 blocker.**
+
+- [ ] **Microbench regression check**
+
+```bash
+cargo bench --bench network
+```
+
+Compare `process_syn`, `poll_idle`, `poll_with_n_flows` against the
+Phase 2 baseline. No regression > 10%.
+
+- [ ] **Wall-clock harness**
+
+```bash
+./target/release/voidbox-network-bench --iterations 3 \
+  --output /tmp/baseline-network-phase3.json
+cat /tmp/baseline-network-phase3.json
+```
+
+Expected:
+- `tcp_throughput_g2h_mbps`: comparable to Phase 2 (~1900 Mbps).
+- `tcp_rr_latency_us_p50`: comparable (~2 µs).
+- `tcp_crr_latency_us_p50`: **expected to drop** — the new TCP relay
+  has fewer per-segment ACK round-trips. From Phase 2's ~10,160 µs
+  toward something closer to passt's 135 µs. Anywhere meaningfully
+  below 5,000 µs is a clear win.
+
+- [ ] **Startup bench warm-restore** (the bench fixed in 0d0ab20)
+  must continue to pass:
+
+```bash
+./target/release/voidbox-startup-bench --iters 3 --breakdown
+# warm phase exits 0
+```
+
+No PR opened — paused per user instruction.
+
+---
+
+## Risks
+
+- **Highest-risk phase by far.** The TCP relay rewrite is ~400 LOC
+  replaced. Any subtle bug in the sequence math (off-by-one,
+  unsigned wrap, ACK-vs-segment-seq confusion) silently breaks
+  long-running connections. The conformance + snapshot suites are
+  the safety net.
+- **Sequence wrap arithmetic.** TCP seq numbers are 32-bit and wrap
+  at 2³². Use `wrapping_add` / `wrapping_sub` everywhere. A naive
+  comparison at boundaries is silently wrong.
+- **MSG_PEEK + non-blocking + multi-thread.** `recv_peek` is called
+  from the net-poll thread. The host socket is non-blocking. Confirm
+  no other code path closes the socket concurrently.
+- **Window-scaling not implemented.** Today's `TCP_WINDOW = 65535`
+  hardcoded. We don't claim window scaling in SYN-ACK options.
+  Acceptable for Phase 3 — passt-grade window negotiation is deferred.
+- **TCP_INFO not used.** passt queries `TCP_INFO` on the host socket
+  to mirror RTT/window. We don't. Connections work without it; window
+  semantics are slightly different. Out of scope here.
+
+## File impact
+
+| File | Approximate LOC |
+|---|---|
+| `src/network/slirp.rs` | **~+250 / −350** (net reduction) |
+| `tests/network_baseline.rs` | ~+50 / −60 (rewrite the cliff test) |
+| **Total** | **~+300 / −410** |
+
+Net reduction in `slirp.rs` is the headline win. Less code, fewer
+fragile invariants, kernel does the buffering.
diff --git a/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase4.md b/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase4.md
new file mode 100644
index 00000000..fa3b29db
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase4.md
@@ -0,0 +1,431 @@
+# Phase 4 Implementation Plan: Unified Flow Table
+
+> **For agentic workers:** REQUIRED SUB-SKILL: superpowers:subagent-driven-development.
+> Steps use checkbox (`- [ ]`) syntax for tracking.
+>
+> **Mandatory skills for every Rust-touching task:**
+> `rust-style`, `rustdoc`, `rust-analyzer-ssr`,
+> `superpowers:test-driven-development`,
+> `superpowers:verification-before-completion`. Use LSP for navigation.
+>
+> **Phase 4 is a NO-BEHAVIOR-CHANGE refactor.** Every task ends with
+> all 14 baseline pins, all VM suites, and `voidbox-startup-bench`
+> warm phase still green. The point is structural cleanup, not new
+> capability — temptation to bolt on "while I'm here" features
+> should be redirected to Phase 5.
+
+**Spec:** [`2026-04-27-smoltcp-passt-port.md`](2026-04-27-smoltcp-passt-port.md)
+**Continues from Phase 3:** [`2026-04-27-smoltcp-passt-port-phase3.md`](2026-04-27-smoltcp-passt-port-phase3.md)
+
+**Goal:** Replace the three per-protocol HashMaps on `SlirpBackend`
+(`tcp_nat`, `udp_flows`, `icmp_echo`) with a single `flow_table`
+keyed by a `FlowKey` enum, with values held in a `FlowEntry` enum.
+Sets up Phase 5 (stateless NAT + port-forwarding) where shared
+flow-table operations matter more.
+
+**Architecture:**
+
+```rust
+// New types (unified):
+enum FlowKey {
+    Tcp(TcpNatKey),
+    Udp(UdpFlowKey),
+    IcmpEcho(IcmpEchoKey),
+}
+
+enum FlowEntry {
+    Tcp(TcpNatEntry),
+    Udp(UdpFlowEntry),
+    IcmpEcho(IcmpEchoEntry),
+}
+
+// On SlirpBackend:
+flow_table: HashMap<FlowKey, FlowEntry>,
+```
+
+The per-protocol code paths still match on the variant — this is
+"three HashMaps in one wrapper" structurally, not a deep redesign.
+The user-visible benefits land later: Phase 5 will reuse
+`flow_table` for stateless NAT translation + port-forwarding without
+caring which protocol owns each entry.
+
+**Tech Stack:** Rust 1.88, `std::collections::HashMap` (already in
+use). No new deps.
+
+**Branch:** `smoltcp-passt-port-phase0` (continuing on the same
+branch — user instruction).
+
+## Non-negotiable invariants (carried from Phase 3)
+
+1. **All-Rust** — no opaque process boundary.
+2. **Full observability via `tracing`** — every relay continues
+   to emit `trace!`/`debug!`/`warn!` at the same observable points.
+   The unification must NOT silently drop log lines.
+3. **`cargo test`-driveable** — all 14 baseline pins, plus
+   `tcp_writes_more_than_256kb_succeed`, must continue passing.
+4. **Standard Rust tooling** — LSP, clippy, profiler keep working.
+
+## What this phase explicitly does NOT do
+
+- **No SipHash hasher.** The default `RandomState` already
+  randomizes per-process, which is sufficient DoS protection given
+  guests can't observe other VMs' hash seeds. SipHash is a Phase 5+
+  consideration if and only if profiling shows hash contention,
+  which it currently doesn't.
+- **No side-indexed entries.** passt's flow table tracks INISIDE
+  vs TGTSIDE for each entry; SLIRP is asymmetric (guest is always
+  the initiator) so this distinction is moot in our model.
+- **No new behavior.** Same RFC compliance, same idle timeouts,
+  same packet handling. The pin tests are the contract.
+
+## Task structure
+
+10 tasks across three workstreams. The bench tasks (4.6a–4.6c) land
+**after** the migration so they exercise the unified `flow_table`,
+not the old per-protocol maps. The validation gate (4.7) compares
+the new bench numbers against Phase 3 numbers to verify no
+regression from enum dispatch.
+
+| ID | Workstream | Scope |
+|---|---|---|
+| 4.1 | impl | Define `FlowKey` + `FlowEntry` enums; no callers yet |
+| 4.2 | impl | Add `flow_table` field to `SlirpBackend`; populate in parallel with existing maps (no migration yet) |
+| 4.3 | impl | Migrate ICMP path to `flow_table`; drop `icmp_echo` HashMap |
+| 4.4 | impl | Migrate UDP path to `flow_table`; drop `udp_flows` HashMap |
+| 4.5 | impl | Migrate TCP path to `flow_table`; drop `tcp_nat` HashMap |
+| 4.6 | impl | Cleanup: remove dead helpers, update doc comments |
+| **4.6a** | **bench** | **`poll_with_n_mixed_flows` — n/3 TCP + n/3 UDP + n/3 ICMP entries, time `poll()`. Catches enum-dispatch regression at scale.** |
+| **4.6b** | **bench** | **`process_udp_frame` + `process_icmp_echo_request` — per-protocol hot-path parity vs the existing `process_syn`.** |
+| **4.6c** | **bench** | **`flow_table_insert_remove` — pure-compute HashMap op throughput on the unified table; Phase 4 reference for future Phase 5+ work.** |
+| 4.7 | gate | Phase 4 validation gate (incl. new benches no-regression) |
+
+---
+
+## Task 4.1: Define `FlowKey` + `FlowEntry` enums
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Add the two enums** near the existing `NatKey`,
+  `TcpNatEntry`, `UdpFlowKey`, `UdpFlowEntry`, `IcmpEchoKey`,
+  `IcmpEchoEntry` definitions (LSP `documentSymbol` to confirm
+  placement):
+
+```rust
+/// Unified flow-table key. Each variant wraps the protocol-specific
+/// key already defined elsewhere in this module — no field changes,
+/// just a single type that the unified `flow_table` HashMap can
+/// store.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[allow(dead_code)] // consumed in 4.2
+enum FlowKey {
+    Tcp(NatKey),
+    Udp(UdpFlowKey),
+    IcmpEcho(IcmpEchoKey),
+}
+
+/// Unified flow-table value. Each variant wraps the protocol's
+/// existing entry struct.
+#[allow(dead_code)] // consumed in 4.2
+enum FlowEntry {
+    Tcp(TcpNatEntry),
+    Udp(UdpFlowEntry),
+    IcmpEcho(IcmpEchoEntry),
+}
+```
+
+`NatKey` already derives `Hash`+`Eq`+`Clone` (the existing TCP key). `UdpFlowKey` and `IcmpEchoKey` already derive the needed traits. The `Copy` constraint is enforced by the variant types — verify they're all `Copy` (they should be — all primitive fields).
+
+- [ ] **Step 2: Verify.**
+
+```bash
+cargo check
+cargo test --test network_baseline
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): define FlowKey + FlowEntry enums (no callers yet)"
+```
+
+---
+
+## Task 4.2: Add `flow_table` field
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Add the field on `SlirpBackend`.** Place it
+  alongside (not replacing) the existing per-protocol HashMaps:
+
+```rust
+/// Unified flow table. During Phase 4, populated in parallel with
+/// the per-protocol maps (`tcp_nat`, `udp_flows`, `icmp_echo`).
+/// Phase 4.3–4.5 migrate each protocol; Phase 4.6 deletes the
+/// per-protocol maps.
+#[allow(dead_code)] // consumed in 4.3+
+flow_table: HashMap<FlowKey, FlowEntry>,
+```
+
+Initialize `flow_table: HashMap::new()` in every `SlirpBackend`
+construction site (canonical: `with_security`, which `new()` and
+`Default::default()` delegate to).
+
+- [ ] **Step 2: Verify.**
+
+```bash
+cargo check
+cargo test --test network_baseline
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): add flow_table field on SlirpBackend (parallel to existing maps)"
+```
+
+---
+
+## Task 4.3: Migrate ICMP path to `flow_table`
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+ICMP first because it's the smallest path (added in Phase 1, ~150
+LOC) and the migration pattern is cleanest there. Once it's right,
+4.4 and 4.5 follow the same shape.
+
+- [ ] **Step 1: Replace `self.icmp_echo` accesses with
+  `self.flow_table` accesses where the value is `FlowEntry::IcmpEcho`.**
+
+Two access sites:
+- `handle_icmp_frame` (insert/lookup by `IcmpEchoKey`)
+- `relay_icmp_echo` (iterate entries, drain socket, build reply)
+
+Pattern for insert:
+
+```rust
+// OLD:
+match self.icmp_echo.entry(key) {
+    std::collections::hash_map::Entry::Occupied(o) => o.into_mut(),
+    std::collections::hash_map::Entry::Vacant(v) => v.insert(IcmpEchoEntry { ... }),
+}
+
+// NEW:
+let flow_key = FlowKey::IcmpEcho(key);
+match self.flow_table.entry(flow_key) {
+    std::collections::hash_map::Entry::Occupied(o) => match o.into_mut() {
+        FlowEntry::IcmpEcho(entry) => entry,
+        _ => unreachable!("FlowKey::IcmpEcho must map to FlowEntry::IcmpEcho"),
+    },
+    std::collections::hash_map::Entry::Vacant(v) => match v.insert(FlowEntry::IcmpEcho(IcmpEchoEntry { ... })) {
+        FlowEntry::IcmpEcho(entry) => entry,
+        _ => unreachable!(),
+    },
+}
+```
+
+Pattern for iterate:
+
+```rust
+// OLD:
+let keys: Vec<IcmpEchoKey> = self.icmp_echo.keys().copied().collect();
+for key in keys {
+    let entry = self.icmp_echo.get_mut(&key).unwrap();
+    ...
+}
+
+// NEW:
+let flow_keys: Vec<FlowKey> = self
+    .flow_table
+    .keys()
+    .copied()
+    .filter(|k| matches!(k, FlowKey::IcmpEcho(_)))
+    .collect();
+for flow_key in flow_keys {
+    let FlowKey::IcmpEcho(key) = flow_key else { continue; };
+    let Some(FlowEntry::IcmpEcho(entry)) = self.flow_table.get_mut(&flow_key) else { continue; };
+    ...
+}
+```
+
+- [ ] **Step 2: Remove the `icmp_echo` field** from `SlirpBackend`
+  and its initializer.
+
+- [ ] **Step 3: Verify.** All 14 baseline tests pass, including
+  `icmp_echo_returns_reply`.
+
+```bash
+cargo check
+cargo test --test network_baseline
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): migrate ICMP to flow_table"
+```
+
+---
+
+## Task 4.4: Migrate UDP path to `flow_table`
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+Same shape as 4.3. Access sites:
+- `handle_udp_frame` (insert/lookup)
+- `relay_udp_flows` (iterate + reap stale)
+
+The reap iteration (`stale: Vec<UdpFlowKey>`) needs the same
+`filter(|k| matches!(k, FlowKey::Udp(_)))` pattern as 4.3 used for
+ICMP iteration.
+
+- [ ] **Step 1: Migrate accesses to `FlowKey::Udp(...)` /
+  `FlowEntry::Udp(...)`.**
+- [ ] **Step 2: Remove the `udp_flows` field.**
+- [ ] **Step 3: Verify** — `udp_non_dns_round_trips` passes, all
+  14 tests green.
+
+```bash
+cargo check && cargo test --test network_baseline
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): migrate UDP to flow_table"
+```
+
+---
+
+## Task 4.5: Migrate TCP path to `flow_table` (the big one)
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+TCP is the largest path — `tcp_nat` is touched by `handle_tcp_frame`
+(SYN/data/ACK/FIN/RST branches), `relay_tcp_nat_data` (peek + ACK
+consume + idle reap + FIN-on-EOF), and a few helpers.
+
+- [ ] **Step 1: Catalog every `self.tcp_nat` access** via LSP
+  `findReferences`. Likely 8–12 sites.
+- [ ] **Step 2: Migrate each site** to the
+  `FlowKey::Tcp(...)` / `FlowEntry::Tcp(...)` pattern from 4.3. The
+  ACK-consume and peek-send blocks have nested borrows; the
+  `let Some(FlowEntry::Tcp(entry)) = self.flow_table.get_mut(&fk) else { continue; };`
+  pattern handles them cleanly.
+- [ ] **Step 3: Remove the `tcp_nat` field.**
+- [ ] **Step 4: Verify — full baseline + the headline pin
+  `tcp_writes_more_than_256kb_succeed`.**
+
+```bash
+cargo check
+cargo test --test network_baseline
+cargo bench --bench network tcp_bulk_throughput_1mb
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): migrate TCP to flow_table"
+```
+
+---
+
+## Task 4.6: Cleanup — drop `#[allow(dead_code)]`, update docs
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Remove all `#[allow(dead_code)]`** added in 4.1
+  and 4.2 — the items are now consumed.
+- [ ] **Step 2: Update file-level doc** at the top of `slirp.rs`
+  to reflect the unified flow table:
+
+```
+//! Architecture:
+//! - ARP: custom handler for 10.0.2.x
+//! - All TCP/UDP/ICMP flows live in a unified flow_table:
+//!   HashMap<FlowKey, FlowEntry>. Per-protocol relay logic dispatches
+//!   on the FlowEntry variant.
+//! - DNS to 10.0.2.3:53 takes a cached fast-path
+//! - Other: silently dropped
+```
+
+- [ ] **Step 3: Verify.**
+
+```bash
+cargo check
+cargo test --test network_baseline
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): drop allow(dead_code) + update Phase 4 docs"
+```
+
+---
+
+## Task 4.7: Phase 4 validation gate
+
+**Files:** none.
+
+- [ ] **Static checks**
+
+```bash
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+```
+
+- [ ] **Unit + baseline + bench**
+
+```bash
+cargo test --workspace --all-features
+cargo test --test network_baseline                 # 14/14
+cargo bench --bench network                        # no regression
+```
+
+- [ ] **VM suites — the safety net**
+
+```bash
+export VOID_BOX_KERNEL=$PWD/target/vmlinux-slim-x86_64
+export VOID_BOX_INITRAMFS=/tmp/void-box-test-rootfs.cpio.gz
+cargo test --test snapshot_integration -- --ignored --test-threads=1
+cargo test --test e2e_skill_pipeline -- --ignored --test-threads=1
+cargo test --test e2e_mount -- --ignored --test-threads=1
+cargo test --test conformance -- --ignored --test-threads=1
+# (3 conformance tests pre-existing fail; same as before — verify same set fails)
+```
+
+- [ ] **Wall-clock — no regression**
+
+```bash
+./target/release/voidbox-network-bench --iterations 3 --bulk-mb 10
+./target/release/voidbox-startup-bench --iters 3 --breakdown   # warm phase exits 0
+```
+
+Numbers should be statistically equivalent to Phase 3:
+- `tcp_throughput_g2h_mbps` ≈ 1885 Mbps
+- `tcp_bulk_throughput_g2h_mbps` ≈ 1565 Mbps
+- `tcp_rr_latency_us_p50` = 2 µs
+- `tcp_crr_latency_us_p50` ≈ 10 ms
+
+Any movement >10% on these is a regression.
+
+## Risks
+
+- **Borrow checker friction.** Nested `match` on enum variants
+  with `&mut self` borrows can be awkward — the `let Some(...) else
+  { continue; }` pattern keeps each access scoped. If you hit a
+  multi-variant borrow conflict, revisit by keeping the lookup and
+  the mutation in separate scopes (one to find the variant, one to
+  mutate).
+- **Hashing.** `FlowKey` derives `Hash` from variant + inner key.
+  Collision probability is fine; the default `RandomState` is
+  per-process, so guests can't observe seeds.
+- **No behavior change is the contract.** If any task changes a
+  `tracing` event's level or a fields shape, that violates the
+  observability invariant. Preserve message text and structured
+  fields.
+
+## File impact
+
+| File | Approximate LOC |
+|---|---|
+| `src/network/slirp.rs` | **~+50 / −30** (net positive — enum dispatch adds boilerplate) |
+| **Total** | **~+20** |
+
+Net LOC goes UP slightly. The win is that Phase 5 can reuse
+`flow_table` instead of cloning each per-protocol map's
+boilerplate.
diff --git a/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase5.md b/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase5.md
new file mode 100644
index 00000000..a70eb780
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-27-smoltcp-passt-port-phase5.md
@@ -0,0 +1,493 @@
+# Phase 5 Implementation Plan: Stateless NAT + Port Forwarding
+
+> **For agentic workers:** REQUIRED SUB-SKILL: superpowers:subagent-driven-development.
+> Steps use checkbox (`- [ ]`) syntax for tracking.
+>
+> **Mandatory skills for every Rust-touching task:**
+> `rust-style`, `rustdoc`, `rust-analyzer-ssr`,
+> `superpowers:test-driven-development`,
+> `superpowers:verification-before-completion`. Use LSP for navigation.
+
+**Spec:** [`2026-04-27-smoltcp-passt-port.md`](2026-04-27-smoltcp-passt-port.md)
+**Continues from Phase 4:** [`2026-04-27-smoltcp-passt-port-phase4.md`](2026-04-27-smoltcp-passt-port-phase4.md)
+
+**Goal:** Two related changes:
+
+1. **Refactor address translation** into a pure
+   `nat::translate_inbound(addr) -> SocketAddr` function.
+   Today the `SLIRP_GATEWAY_IP (10.0.2.2)` → `127.0.0.1` rewrite
+   is inlined in `handle_tcp_frame` and `handle_udp_frame`. Pulling
+   it out of the relay code makes the translation logic reviewable
+   on its own, sets the shape for IPv6 dual-stack later, and
+   prepares the hook point for #2.
+
+2. **Port forwarding** — first user-visible feature in this refactor
+   chain. Today the only translation is `10.0.2.2 → loopback`. After
+   Phase 5, an operator can say `host:8080 → guest:80` and a TCP/UDP
+   connection from a host process to `127.0.0.1:8080` reaches the
+   guest's port 80. Config flows: spec → `NetworkConfig::port_forwards`
+   → `nat::Rules` → consulted by `translate_inbound`.
+
+**Architecture:**
+
+```rust
+// src/network/nat.rs (new file)
+pub struct Rules {
+    /// Outbound: when guest connects to gateway, where on the host
+    /// kernel does that map to? (`SLIRP_GATEWAY_IP → 127.0.0.1`).
+    pub gateway_loopback: bool,
+    /// Outbound: drop / redirect rules that the deny-list /
+    /// metadata-IP filter currently inlines.
+    pub deny_cidrs: Vec<Ipv4Net>,
+    /// Inbound: host-port → guest-port forwarding (the new feature).
+    pub port_forwards: Vec<PortForward>,
+}
+
+pub struct PortForward {
+    pub proto: ForwardProto,   // Tcp | Udp
+    pub host_port: u16,
+    pub guest_port: u16,
+}
+
+/// Stateless: pure function of (incoming dst address, rules) → host
+/// SocketAddr to connect/bind to.
+pub fn translate_outbound(rules: &Rules, dst: Ipv4Address, dst_port: u16)
+    -> Option<SocketAddr> { ... }
+```
+
+`SlirpBackend` holds `nat: Rules` instead of inlining the gateway
+rewrite. The relay code calls `translate_outbound` per packet
+(it's pure, fast, no state).
+
+**Tech Stack:** Rust 1.88, `ipnet::Ipv4Net` (already in use). No new
+deps.
+
+**Branch:** `smoltcp-passt-port-phase0` (continuing on the same
+branch — user instruction).
+
+## Non-negotiable invariants (carried from prior phases)
+
+1. **All-Rust** — no opaque process boundary.
+2. **Full observability via `tracing`** — every translation decision
+   that diverts a connection (loopback rewrite, deny, port-forward)
+   emits a `trace!` event with the (rule, src, dst) context.
+3. **`cargo test`-driveable** — every behavior change exercised by
+   `tests/network_baseline.rs` (no VM needed).
+4. **No regression** — all 14 baseline pins, snapshot suite, e2e
+   suites, microbenches, wall-clock baselines stay within 5% of the
+   Phase 4 numbers.
+
+## Task structure
+
+8 tasks across three workstreams.
+
+| ID | Workstream | Scope |
+|---|---|---|
+| 5.1 | impl | New module `src/network/nat.rs` with `Rules`, `PortForward`, `ForwardProto`, `translate_outbound` (no callers yet) |
+| 5.2 | impl | `SlirpBackend` holds `nat: Rules`; existing `SLIRP_GATEWAY_IP → 127.0.0.1` rewrite + `deny_list` move into `Rules` |
+| 5.3 | impl | TCP path consumes `nat::translate_outbound` (replaces the inline rewrite in `handle_tcp_frame`) |
+| 5.4 | impl | UDP path consumes `nat::translate_outbound` |
+| 5.5 | impl | Wire `port_forwards` from `NetworkConfig` → `Rules`. Inbound forwarding requires a host listener + per-rule accept loop spawned by `SlirpBackend::new` |
+| 5.6 | test | New baseline pins: `nat_translate_outbound_loopback_rewrite`, `nat_translate_outbound_deny_list`, `nat_translate_outbound_unmodified`, `tcp_port_forward_inbound` |
+| 5.7 | bench | New divan bench `nat_translate_outbound_hot_path` (pure-compute, ns-scale) |
+| 5.8 | gate | Phase 5 validation gate |
+
+---
+
+## Workstream 5A — Stateless translation module
+
+### Task 5.1: New `src/network/nat.rs` module
+
+**Files:**
+- Create: `src/network/nat.rs`
+- Modify: `src/network/mod.rs` (`pub mod nat;`)
+
+- [ ] **Step 1: Create `src/network/nat.rs`**
+
+```rust
+//! Stateless address translation for SLIRP.
+//!
+//! Pure functions that map (guest-visible address, rules) →
+//! (host-side SocketAddr to connect/bind to). No per-flow state
+//! lives here — the flow table in `slirp.rs` owns that. Translation
+//! itself is a function call.
+
+use std::net::{Ipv4Addr, SocketAddr};
+
+use ipnet::Ipv4Net;
+use smoltcp::wire::Ipv4Address;
+
+/// Inbound port-forwarding rule — host listener → guest port.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum ForwardProto {
+    Tcp,
+    Udp,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct PortForward {
+    pub proto: ForwardProto,
+    pub host_port: u16,
+    pub guest_port: u16,
+}
+
+/// Outbound translation rules, derived once at SlirpBackend construction.
+#[derive(Clone, Debug, Default)]
+pub struct Rules {
+    /// If `true`, guest connects to the SLIRP gateway IP map to
+    /// `127.0.0.1` on the host. Today this is always `true`; left
+    /// configurable so a future TAP backend can flip it off.
+    pub gateway_loopback: bool,
+    /// CIDRs the guest is not allowed to connect to. Outbound packets
+    /// targeting these get `None` from `translate_outbound`.
+    pub deny_cidrs: Vec<Ipv4Net>,
+    /// Inbound port forwards. Consulted by `SlirpBackend::new` to spawn
+    /// listeners; not used by `translate_outbound`.
+    pub port_forwards: Vec<PortForward>,
+}
+
+/// Translate an outbound packet's destination address.
+///
+/// Returns `Some(host_addr)` if the packet should be forwarded —
+/// loopback for the gateway IP, otherwise the original IP.
+/// Returns `None` if the destination is in the deny list.
+pub fn translate_outbound(
+    rules: &Rules,
+    dst: Ipv4Address,
+    dst_port: u16,
+    gateway_ip: Ipv4Address,
+) -> Option<SocketAddr> {
+    let dst_ipv4 = Ipv4Addr::from(dst.0);
+
+    // Deny-list check first — explicit block beats any other rule.
+    for cidr in &rules.deny_cidrs {
+        if cidr.contains(&dst_ipv4) {
+            return None;
+        }
+    }
+
+    let host_ip = if rules.gateway_loopback && dst == gateway_ip {
+        Ipv4Addr::LOCALHOST
+    } else {
+        dst_ipv4
+    };
+
+    Some(SocketAddr::from((host_ip, dst_port)))
+}
+```
+
+- [ ] **Step 2: Register the module** in `src/network/mod.rs`:
+
+```rust
+pub mod nat;
+```
+
+- [ ] **Step 3: Verify.**
+
+```bash
+cargo check
+cargo test --test network_baseline
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+git add src/network/nat.rs src/network/mod.rs
+git commit -m "feat(network): add nat.rs with stateless translate_outbound (no callers yet)"
+```
+
+---
+
+### Task 5.2: `SlirpBackend` holds `nat: Rules`
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Add field** on `SlirpBackend`:
+
+```rust
+nat: nat::Rules,
+```
+
+- [ ] **Step 2: Build it in `with_security`** from the existing
+  `deny_list` parameter. Today the deny list lives in two places
+  (a `Vec<Ipv4Net>` field on `SlirpBackend` and a CLI arg). The
+  refactor: `Rules.deny_cidrs` is the new home. The existing
+  `deny_list` field becomes redundant once 5.3 + 5.4 land — remove
+  it then.
+
+```rust
+let nat = nat::Rules {
+    gateway_loopback: true,
+    deny_cidrs: deny_list.clone(),
+    port_forwards: Vec::new(), // wired in 5.5
+};
+```
+
+- [ ] **Step 3: Don't migrate any call sites yet.** The existing
+  inline rewrites in `handle_tcp_frame` / `handle_udp_frame` keep
+  working. 5.3 + 5.4 own the cutover.
+- [ ] **Step 4: Verify** — all 14 baseline tests still pass.
+- [ ] **Step 5: Commit.**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): add nat::Rules field on SlirpBackend (parallel to existing deny_list)"
+```
+
+---
+
+### Task 5.3: TCP path consumes `translate_outbound`
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Find the existing translation in `handle_tcp_frame`**
+  (LSP `documentSymbol` — the SYN branch around the `TcpStream::connect`
+  call). It currently does:
+
+```rust
+// Inline today:
+let dst_ip_for_socket = if key.dst_ip == SLIRP_GATEWAY_IP {
+    Ipv4Addr::LOCALHOST
+} else {
+    Ipv4Addr::from(key.dst_ip.0)
+};
+let dst_addr = SocketAddr::from((dst_ip_for_socket, key.dst_port));
+
+// Plus a separate deny-list check:
+for cidr in &self.deny_list {
+    if cidr.contains(&dst_ip_for_socket) {
+        // send RST, return
+    }
+}
+```
+
+- [ ] **Step 2: Replace with a single `translate_outbound` call:**
+
+```rust
+let dst_addr = match nat::translate_outbound(
+    &self.nat,
+    key.dst_ip,
+    key.dst_port,
+    SLIRP_GATEWAY_IP,
+) {
+    Some(addr) => addr,
+    None => {
+        // Denied. Send RST and return.
+        trace!(
+            "SLIRP TCP: deny-list reject dst={}:{} from guest_port={}",
+            key.dst_ip, key.dst_port, key.guest_src_port
+        );
+        let rst = build_tcp_rst_to_guest(/* existing args */);
+        self.inject_to_guest.push(rst);
+        return Ok(());
+    }
+};
+let host_stream = match TcpStream::connect_timeout(&dst_addr, Duration::from_secs(3)) {
+    /* existing match */
+};
+```
+
+- [ ] **Step 3: Preserve every existing tracing event.**
+- [ ] **Step 4: Verify** — `tcp_data_round_trip`,
+  `tcp_writes_more_than_256kb_succeed`, `tcp_deny_list_emits_rst`,
+  `tcp_handshake_emits_synack` all pass.
+- [ ] **Step 5: Commit.**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): TCP path uses nat::translate_outbound"
+```
+
+---
+
+### Task 5.4: UDP path consumes `translate_outbound`
+
+**Files:**
+- Modify: `src/network/slirp.rs`
+
+- [ ] **Step 1: Find** the inline UDP translation in `handle_udp_frame`
+  (Phase 2's `dst_ip_for_socket = if key.dst_ip == SLIRP_GATEWAY_IP { LOCALHOST } else { ... };`).
+- [ ] **Step 2: Replace** with `nat::translate_outbound(&self.nat, key.dst_ip, key.dst_port, SLIRP_GATEWAY_IP)`.
+  On `None` (deny), drop the datagram silently with a `trace!`.
+- [ ] **Step 3: Drop the now-unused `deny_list` field** on `SlirpBackend` — both TCP and UDP go through `Rules.deny_cidrs` now. LSP `findReferences` to confirm zero callers.
+- [ ] **Step 4: Verify.**
+
+```bash
+cargo check
+cargo test --test network_baseline udp_non_dns_round_trips
+cargo test --test network_baseline                 # 14/14
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): UDP path uses nat::translate_outbound, drop deny_list field"
+```
+
+---
+
+## Workstream 5B — Port forwarding (the user-visible feature)
+
+### Task 5.5: Wire `port_forwards` from spec → host listeners
+
+**Files:**
+- Modify: `src/network/mod.rs` (`NetworkConfig::port_forwards: Vec<(u16, u16)>` is already there from earlier work — confirm via LSP and use as the source)
+- Modify: `src/network/slirp.rs` (`SlirpBackend::with_security` accepts `port_forwards`, populates `nat.port_forwards`, spawns listeners)
+
+This is the only task that ADDS user-visible behavior. The translation
+refactor in 5.1–5.4 was no-behavior-change.
+
+- [ ] **Step 1: Define the listener thread shape.** For each
+  `PortForward { proto, host_port, guest_port }`:
+  - **TCP:** `TcpListener::bind(("127.0.0.1", host_port))` →
+    accept thread → on each accept, **inject a synthetic SYN frame**
+    into the guest from `SLIRP_GATEWAY_IP:host_port` → `SLIRP_GUEST_IP:guest_port`,
+    then proxy bytes between the host TcpStream and the guest's
+    response stream (mirrors the existing outbound path but reversed).
+  - **UDP:** `UdpSocket::bind(("127.0.0.1", host_port))` →
+    similar pattern with synthetic UDP datagrams.
+
+  This is more involved than the outbound path because we have to
+  *initiate* a connection from the host side to the guest. The
+  guest's listener at `guest_port` must already be accepting; if
+  it's not, the host TCP connect will look like ECONNREFUSED to the
+  caller.
+
+- [ ] **Step 2: Smallest viable first commit — just plumb the config**:
+  - Pass `port_forwards: Vec<PortForward>` through `with_security`.
+  - Populate `nat.port_forwards`.
+  - Don't actually spawn listeners yet — just store the rules. A
+    next commit can add the listener implementation.
+
+- [ ] **Step 3: Smallest viable second commit — TCP forwarding only**:
+  - For each TCP `PortForward`, spawn a thread that binds the host
+    listener and on each accept, drives the synthetic SYN injection.
+  - Keep UDP forwarding as a TODO comment for a follow-up; the TCP
+    path is the high-value case.
+
+- [ ] **Step 4: Verify** — test plan in 5.6 covers this.
+
+This task is the single most user-visible piece of the entire SLIRP
+refactor chain. Worth landing carefully; consider splitting into
+sub-PRs if the diff balloons.
+
+---
+
+## Workstream 5C — Test + bench
+
+### Task 5.6: Baseline pins for translation + port-forward
+
+**Files:**
+- Modify: `tests/network_baseline.rs`
+
+- [ ] **Step 1: Pure-translation pins** — exercise `nat::translate_outbound`
+  directly without driving `SlirpBackend`:
+
+```rust
+#[test]
+fn nat_translate_outbound_loopback_rewrite() { /* ... */ }
+
+#[test]
+fn nat_translate_outbound_deny_list() { /* ... */ }
+
+#[test]
+fn nat_translate_outbound_unmodified_external_ip() { /* ... */ }
+```
+
+- [ ] **Step 2: Port-forward end-to-end pin**:
+
+```rust
+#[test]
+fn tcp_port_forward_inbound() {
+    // Bind a guest-side server (synthesized — drives SlirpBackend
+    // directly with a SYN/SYN-ACK/FIN sequence to simulate a guest
+    // accepting on guest_port).
+    // Build SlirpBackend with port_forwards = [{Tcp, host_port, guest_port}].
+    // Connect from host to 127.0.0.1:host_port.
+    // Assert the connection succeeds and bytes flow through.
+}
+```
+
+- [ ] **Step 3: Run.**
+
+```bash
+cargo test --test network_baseline nat_ tcp_port_forward
+cargo test --test network_baseline       # full suite
+git add tests/network_baseline.rs
+git commit -m "test(network): pin nat::translate_outbound + tcp_port_forward_inbound"
+```
+
+---
+
+### Task 5.7: divan bench for `translate_outbound`
+
+**Files:**
+- Modify: `benches/network.rs`
+
+- [ ] **Step 1: Add** a pure-compute bench inside `linux_benches`:
+
+```rust
+#[divan::bench]
+fn nat_translate_outbound_hot_path(bencher: Bencher) {
+    use void_box::network::nat::{self, Rules};
+    let rules = Rules {
+        gateway_loopback: true,
+        deny_cidrs: vec!["169.254.0.0/16".parse().unwrap()],
+        port_forwards: Vec::new(),
+    };
+    let dst = SLIRP_GATEWAY_IP;
+    bencher.bench_local(|| {
+        divan::black_box(nat::translate_outbound(&rules, dst, 80, SLIRP_GATEWAY_IP));
+    });
+}
+```
+
+Expected order of magnitude: tens of nanoseconds per call. If it's
+microseconds, something's wrong (allocation in the hot path, etc.) —
+investigate.
+
+- [ ] **Step 2: Commit.**
+
+```bash
+cargo bench --bench network nat_translate_outbound_hot_path
+git add benches/network.rs
+git commit -m "bench(network): nat_translate_outbound_hot_path — Phase 5 baseline"
+```
+
+---
+
+### Task 5.8: Phase 5 validation gate
+
+**Files:** none.
+
+- [ ] fmt + clippy clean.
+- [ ] `cargo test --test network_baseline` — all baseline pins pass
+  (count grew by 4 in 5.6).
+- [ ] `cargo bench --bench network` — no regression on existing benches;
+  new `nat_translate_outbound_hot_path` reports tens of ns.
+- [ ] `cargo test --test snapshot_integration -- --ignored` — 8/8.
+- [ ] `cargo test --test e2e_mount -- --ignored` — 11/11.
+- [ ] `voidbox-network-bench --iterations 3 --bulk-mb 10` — within 5% of Phase 4 numbers.
+- [ ] `voidbox-startup-bench --iters 3 --breakdown` — warm phase exits 0; numbers within noise of Phase 4.
+
+## Risks
+
+- **Port-forwarding is new behavior, not refactor.** 5.5 is the most
+  failure-prone task because it injects synthetic frames into the
+  flow_table from a different code path than the existing relay. If
+  the synthetic SYN doesn't match the existing TCP state-machine's
+  expectations, connections break in subtle ways. Strong test
+  coverage in 5.6 mitigates.
+- **Visibility of `nat` types.** Test files and benches need access
+  to `Rules`, `PortForward`, `translate_outbound`. The plan above
+  uses `pub` everywhere in `nat.rs` — that's the right surface for
+  Phase 6+ users (port-forwarding via spec/CLI). Don't `pub(crate)`
+  it.
+
+## File impact
+
+| File | Approximate LOC |
+|---|---|
+| `src/network/nat.rs` | **+90** (new) |
+| `src/network/mod.rs` | +1 (`pub mod nat;`) |
+| `src/network/slirp.rs` | **−40 / +25** (deny-list field gone, inline rewrites replaced with `translate_outbound` calls; the +25 is for the port-forwarding spawn) |
+| `tests/network_baseline.rs` | +120 (4 new tests) |
+| `benches/network.rs` | +20 (one bench) |
+| **Total** | **~+220** |
diff --git a/docs/superpowers/plans/2026-04-27-smoltcp-passt-port.md b/docs/superpowers/plans/2026-04-27-smoltcp-passt-port.md
new file mode 100644
index 00000000..a12a10d7
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-27-smoltcp-passt-port.md
@@ -0,0 +1,430 @@
+# SLIRP Refactor: Lift passt Patterns Into Our Stack
+
+**Status:** Spec
+**Date:** 2026-04-27
+**Supersedes:** [`2026-04-12-network-backend-abstraction.md`](2026-04-12-network-backend-abstraction.md) (design changes — see "Relationship to prior plan" below)
+
+## Required skills during execution
+
+> **Mandatory for every task in every phase.** Each phase plan and
+> every individual task assumes the implementer has these loaded.
+> Failures here are blocking review comments.
+
+| Skill | When it fires | Why mandatory here |
+|---|---|---|
+| **`rust-style`** | Any task that writes or modifies Rust code | Project-wide style: for-loops over iterators, `let-else` for early returns, variable shadowing, newtypes, explicit matching, minimal comments. The refactor is high-volume Rust; without this, style drift accumulates. |
+| **`rustdoc`** | Any task that adds or changes doc comments on public items (`NetworkBackend` trait, new public methods, new public types) | Public surface gets documented per RFC 1574 — summary sentence, sections, type references. The trait is a long-lived public API; bad rustdoc ages badly. |
+| **`rust-analyzer-ssr`** | Any task that does a structural rename or signature change across the workspace (e.g. `SlirpStack → SmoltcpBackend`, `poll → drain_to_guest`, swapping concrete types for trait objects) | LSP-aware rename understands type resolution and path equivalence. Grep-based renames break on shadowed paths and miss trait-method call sites. The plan's renames span `src/network/`, `src/devices/virtio_net.rs`, `src/vmm/mod.rs`, snapshot code, and tests — too wide for safe text-substitution. |
+| **`superpowers:test-driven-development`** | Every test/bench task in Phase 0 and every behavior change in Phases 1–5 | The "broken on purpose" pins are TDD by construction: assertion locks current behavior, refactor flips assertion. Skipping the failing-test step destroys that property. |
+| **`superpowers:verification-before-completion`** | Before claiming any task complete | The validation gate (`cargo fmt`, `cargo clippy -D warnings`, `cargo test`, `cargo bench`, VM suites where applicable) must produce real green output, not narration. |
+| **`verify`** *(repo skill)* | At the end of every phase, before opening the PR | Runs the full project quality gate: format, clippy, tests, security audit, startup bench regression, real-workload smoke. Catches cross-cutting regressions that the network-only gate misses. |
+| **`profile`** *(repo skill)* | When a divan or wall-clock bench regresses by >5% | Don't guess at perf regressions — capture eBPF profiles and read them. |
+
+In addition, the project-wide rules from `CLAUDE.md` and `AGENTS.md`
+remain in force:
+
+- **Prefer LSP operations** (`goToDefinition`, `findReferences`,
+  `hover`, `documentSymbol`, `workspaceSymbol`) over Grep/Glob for
+  Rust code navigation. Grep/Glob only for comments, config files,
+  non-Rust files.
+- **Platform parity:** every change validated on Linux (KVM) and, where
+  applicable, macOS (VZ). Phase 0's wall-clock harness is Linux-only
+  by design (smoltcp is `cfg(target_os = "linux")`); Phases 1–5
+  surface-level changes must not break the macOS build.
+- **Imports and constants at module scope.** Never inline `use` /
+  `const` inside function bodies.
+
+## Summary
+
+Refactor `src/network/slirp.rs` to fix correctness and coverage gaps (no
+ICMP, UDP-only-on-port-53, fragile hand-rolled TCP relay) by lifting
+proven design patterns from [passt](https://passt.top/passt) into our
+own all-Rust smoltcp-based stack — instead of adopting passt as an
+external backend.
+
+The work is gated behind a benchmark and correctness baseline: every
+phase ships with assertions that pin existing behavior (including the
+"broken on purpose" parts) so regressions and improvements are both
+visible in the diff.
+
+## Motivation
+
+The prior plan (2026-04-12) proposed adding `passt` as an opt-in
+Linux-only backend behind a new `NetworkBackend` trait. After deeper
+analysis of both codebases, that approach has worse cost/benefit than
+keeping the work in-tree:
+
+**Why not passt as a backend:**
+
+- **Observability regression.** passt is an opaque C process behind a
+  4-byte-prefixed unix socket. Every bug becomes "did passt do the
+  right thing?" instead of "what did our stack do?" with full
+  structured logs, tracing spans, and a debugger that works.
+- **Cross-platform divergence.** passt is Linux-only. Adding it makes
+  guest behavior diverge across host platforms (`ping` works on Linux,
+  fails silently on macOS).
+- **Operational friction.** passt is not installed by default on
+  Fedora, Ubuntu, Arch, or Alpine. Every user wanting the upgrade
+  needs a separate install step.
+- **Process-lifecycle complexity.** Crash policy, stderr routing,
+  `PR_SET_PDEATHSIG`, and snapshot/restore semantics all become real
+  problems we don't have today.
+- **New attack surface in the data path.** C code in our sandbox
+  boundary, even battle-tested C code, is qualitatively new exposure.
+
+**Why lift the design patterns instead:**
+
+- The capability gaps (ICMP, full UDP, IPv6) are tractable in
+  Rust+smoltcp. ICMP via `SOCK_DGRAM IPPROTO_ICMP` is ~150 LOC.
+  Generalizing UDP off the port-53 fast-path is ~200 LOC.
+- The fragile parts of our TCP relay (256 KB `to_host` buffer cliff,
+  hand-rolled FIN state machine, `EAGAIN` deferral) can be **deleted**,
+  not patched, by adopting passt's "no per-connection packet buffer,
+  mirror sequence numbers via `MSG_PEEK`" pattern.
+- The all-Rust path keeps structured tracing, sanitizers, and
+  profiler-readable call stacks intact.
+- The `NetworkBackend` trait abstraction still earns its keep: it
+  decouples virtio-net from the stack so a future TAP/vhost-net
+  backend (the path that actually moves throughput numbers, per the
+  prior plan's appendix) can land cleanly.
+
+## Hard invariant — observability
+
+**Full observability is a non-negotiable differentiator** of this
+codebase vs. running passt as a process. Every phase MUST preserve:
+
+- All-Rust, no opaque process boundary in the data path. Syscalls
+  via `libc` are fine; spawning passt is not.
+- The existing `tracing` integration end-to-end — every state
+  transition (connection accept/establish/RST/FIN, peek, ACK-driven
+  consume) emits a structured event. The `tracing-subscriber`
+  pipeline at `src/observe/logs.rs` continues to receive everything.
+- `cargo test`-driveable behavior — every change exercised by tests
+  that drive `SlirpBackend` directly without a VM
+  (`tests/network_baseline.rs`).
+- Standard Rust tooling — LSP, `cargo clippy`, sanitizers, profiler.
+
+Per-phase plans MUST encode this as task-level acceptance criteria
+(see Phase 3's "Non-negotiable invariants" section for the
+canonical wording). A task that lifts a passt pattern but
+silently bypasses our observability stack — even one that "works"
+end-to-end — is rejected.
+
+## Non-goals
+
+- **Adopting passt as a binary backend.** Explicitly rejected per the
+  motivation above.
+- **Throughput improvements.** Per the 2026-04-12 plan's appendix, the
+  bottleneck is the MMIO exit path, not the network stack. This work
+  improves correctness and coverage; throughput wins require
+  ioeventfd/irqfd or vhost-net (separately scoped, separately reviewed).
+- **IPv6 in the initial phases.** Real lift (~800–1000 LOC). Deferred
+  to a later phase with its own plan.
+- **macOS feature parity in Phase 0.** The wall-clock e2e harness will
+  initially be Linux-only since `smoltcp` is already Linux-gated in
+  `Cargo.toml`. macOS (VZ NAT) continues unchanged.
+
+## Relationship to prior plan
+
+The 2026-04-12 plan proposed:
+
+1. Extract `NetworkBackend` trait. **Kept.**
+2. Add `PasstBackend` (Linux-only, opt-in). **Replaced** with in-tree
+   improvements to the smoltcp-based backend.
+3. Cleanup rename `SlirpStack → SlirpBackend`. **Kept**, moved into
+   Phase 0 alongside the trait extraction. Role-based name (matches
+   future `TapBackend`/`VhostNetBackend`); does not leak the smoltcp
+   library dependency.
+
+The trait surface from the prior plan is tightened (`poll` becomes an
+out-param to drop the per-call `Vec<Vec<u8>>` allocation; explicit
+error type; health/dead signal).
+
+## Design
+
+### Core insight
+
+passt's superpower is a single architectural decision: **don't buffer
+per connection — mirror sequence numbers**.
+
+Our current TCP relay (`src/network/slirp.rs:82–1048`, ~625 LOC) does
+the opposite: `read()`s from the host socket into a `to_guest: Vec<u8>`,
+drains on the next poll, and **closes the connection if `to_host`
+exceeds 256 KB** (`slirp.rs:903–910`). passt never has that problem
+because it never copies — it `recv(MSG_PEEK)`s, and the host kernel's
+socket buffer *is* the buffer. Sequence math
+(`seq_to_tap = seq_ack_from_tap + bytes_peeked`) reproduces what we
+hand-roll.
+
+That single trick eliminates roughly half of the fragility in our
+current code: no `EAGAIN` buffer-overflow path, no manual
+`to_host_pending_ack` deferral, no 256 KB cliff.
+
+### Five patterns ported, ranked by ROI
+
+| # | Pattern | passt source | Our target | Approx. LoC | Phase |
+|---|---|---|---|---|---|
+| 1 | `MSG_PEEK` + sequence mirroring (TCP) | `tcp.c` `tcp_data_from_sock`, `tcp_data_from_tap` | `slirp.rs::relay_tcp_nat_data`, `handle_tcp_frame` | ~400 replaced | 3 |
+| 2 | Per-flow connected UDP socket | `udp.c` `udp_flow_from_tap`, `udp_listen_sock_handler` | `slirp.rs::handle_dns_frame` (generalize) | ~200 new | 2 |
+| 3 | Unprivileged ICMP echo via `SOCK_DGRAM IPPROTO_ICMP` | `icmp.c` `icmp_ping_handler`, `icmp_sock_handler` | new `slirp.rs::handle_icmp_frame` | ~150 new | 1 |
+| 4 | Unified flow table with side indexing | `flow.c`, `flow.h` `union flow` + SipHash table | new `slirp.rs::FlowTable` | ~200 refactor | 4 |
+| 5 | Stateless address translation | `fwd.c::nat_inbound` | refactor existing 10.0.2.2→127.0.0.1 rewrite | ~150 refactor | 5 |
+
+### What we keep as-is
+
+- **DNS caching with question-section keying** (`slirp.rs:433–456`) is
+  better than passt — passt has no DNS cache. Keep it.
+- **Net-poll thread on a 5ms timer** (`vmm/mod.rs:1594–1630`) is
+  simpler than passt's epoll/timerfd dance and fits our virtio-mmio
+  model. The 5ms floor matters less once we stop dropping connections
+  at 256 KB.
+- **smoltcp for wire types + ARP via `Interface`** is the right
+  division of labor. passt has to hand-roll its packet abstraction
+  (`packet.h`); we get checksum and parsing for free.
+- **Threading model** (`process_guest_frame` on vCPU, `poll` on
+  net-poll, `Arc<Mutex<>>`) is sound. Don't touch it.
+
+### What we throw away from passt
+
+| passt feature | Why skip |
+|---|---|
+| `TCP_REPAIR` migration | Out of scope; VM snapshots already break TCP |
+| `splice()` / vhost-user / pasta zero-copy | Throughput-focused, gated by MMIO exit cost |
+| Full IPv6 (DHCPv6, NDP, RA) | Deferred to a later phase |
+| AVX2 checksum | smoltcp's checksum is fine; premature optimization |
+| Daemon harness, conf parsing, qrap | We're an embedded library, not a daemon |
+| C weak-symbol dispatch | Use Rust enum dispatch / trait objects |
+
+### `NetworkBackend` trait
+
+```rust
+// src/network/mod.rs
+
+use std::io;
+
+/// A network backend processes raw Ethernet frames between guest and host.
+///
+/// Implementations must be `Send` so they can be held behind
+/// `Arc<Mutex<_>>` and accessed from both the vCPU thread (TX path) and
+/// the net-poll thread (RX path).
+pub trait NetworkBackend: Send {
+    /// Process a raw Ethernet frame sent by the guest (TX path).
+    ///
+    /// Called from the vCPU thread on MMIO write to the TX virtqueue.
+    /// Implementations should not block.
+    fn process_guest_frame(&mut self, frame: &[u8]) -> io::Result<()>;
+
+    /// Drain Ethernet frames destined for the guest into `out` (RX path).
+    ///
+    /// Called every ~5ms from the net-poll thread. Frames are
+    /// complete Ethernet payloads — no virtio-net header (the caller
+    /// prepends that). The buffer is reused across calls to avoid
+    /// per-poll allocation.
+    fn drain_to_guest(&mut self, out: &mut Vec<Vec<u8>>);
+
+    /// Backend health. `false` means the backend has entered an
+    /// unrecoverable state and should be reconstructed.
+    fn is_healthy(&self) -> bool {
+        true
+    }
+}
+```
+
+Differences from the prior plan:
+
+- `poll() -> Vec<Vec<u8>>` → `drain_to_guest(&mut self, out: &mut Vec<Vec<u8>>)`.
+  Drops the per-poll allocation that would otherwise fire every 5ms.
+- Explicit `io::Result<()>` instead of project-wide `Result`.
+- `is_healthy()` default-true hook for future backends that have a
+  process or socket lifecycle (TAP, vhost-net). Unused by
+  `SmoltcpBackend`.
+
+## Phase breakdown
+
+Each phase is **independent** and **landable on its own**. Each phase
+will get its own bite-sized plan document under `docs/superpowers/plans/`
+when execution starts. Phases 1–5 plan documents are deliberately not
+written yet — what we learn from earlier phases will sharpen the
+detailed task lists for later ones.
+
+| Phase | Scope | Risk | Plan doc |
+|---|---|---|---|
+| **0** | Baseline tests + benches + `NetworkBackend` trait extraction + `SlirpStack → SlirpBackend` rename. **Zero user-visible behavior change.** | Low | [`2026-04-27-smoltcp-passt-port-phase0.md`](2026-04-27-smoltcp-passt-port-phase0.md) |
+| **1** | ICMP echo via unprivileged `SOCK_DGRAM IPPROTO_ICMP`, with sysctl-fallback to drop. | Low | [`2026-04-27-smoltcp-passt-port-phase1.md`](2026-04-27-smoltcp-passt-port-phase1.md) |
+| **2** | Generalize UDP: per-flow connected sockets, drop port-53 limit, keep DNS fast-path/cache. | Low–medium | [`2026-04-27-smoltcp-passt-port-phase2.md`](2026-04-27-smoltcp-passt-port-phase2.md) |
+| **3** | TCP relay rewrite using `MSG_PEEK` + sequence mirroring. Drop `to_guest: Vec<u8>` and 256 KB cap. | **High** — gnarliest of the lot. Snapshot integration tests are the gate. | [`2026-04-27-smoltcp-passt-port-phase3.md`](2026-04-27-smoltcp-passt-port-phase3.md) |
+| **4** | Unified flow table refactor (no behavior change). Single `flow_table: HashMap<FlowKey, FlowEntry>` replacing the three per-protocol maps. | Medium | [`2026-04-27-smoltcp-passt-port-phase4.md`](2026-04-27-smoltcp-passt-port-phase4.md) |
+| **5** | Stateless NAT translation refactor + port-forwarding configurability. | Low | [`2026-04-27-smoltcp-passt-port-phase5.md`](2026-04-27-smoltcp-passt-port-phase5.md) |
+| **6** *(optional)* | IPv6 dual-stack (DHCPv6, NDP, RA, NAT). | High | TBD; may be split further |
+
+## Baseline strategy
+
+Every phase ships with assertions that pin observable behavior. Three
+of these assertions deliberately encode **broken** behavior — they are
+green lights that flip when the corresponding phase lands.
+
+### Two test layers
+
+**Layer 1 — unit-level (fast, deterministic, no VM):** drive
+`SmoltcpBackend` directly. Feed synthetic Ethernet frames via
+`process_guest_frame`, drive `drain_to_guest`, inspect emissions.
+Sub-millisecond per test, runs on every `cargo test`. Lives in
+`tests/network_baseline.rs`.
+
+**Layer 2 — wall-clock e2e (slow, real numbers, comparable to passt):**
+boot a VM, run iperf3/netperf-style measurements inside, output JSON.
+Mirrors the existing `voidbox-startup-bench` pattern. New binary
+`voidbox-network-bench`. Linux-only initially.
+
+### Two benchmark layers
+
+**Layer 1 — divan microbenches:** `benches/network.rs` mirrors
+`benches/startup.rs`. `divan::main()`, `#[divan::bench]`, parametric
+`args` for NAT-walk scaling. Run with `cargo bench --bench network`.
+
+**Layer 2 — wall-clock harness above** outputs metrics named to match
+passt's published table (`tcp_throughput_*`, `tcp_rr_latency`,
+`tcp_crr_latency`, `udp_throughput_*`).
+
+### "Broken on purpose" pins
+
+These three tests assert broken behavior today. They are intended to
+flip when the corresponding phase lands:
+
+| Test | Today's assertion | Flips in phase |
+|---|---|---|
+| `tcp_to_host_buffer_drops_at_256kb` | Connection closes when guest writes >256 KB before host reads | 3 |
+| `udp_non_dns_silently_dropped` | UDP datagram to port 80 produces no host-side connection | 2 |
+| `icmp_echo_silently_dropped` | ICMP echo request produces no echo reply | 1 |
+
+The PR that fixes each behavior is the PR that flips the assertion,
+which makes the diff legible to reviewers.
+
+### passt head-to-head methodology
+
+Direct numerical comparison is structurally limited (passt runs in
+qemu with its socket back-end; we run our own VMM with virtio-mmio).
+The honest plan:
+
+1. **Same hardware, same workload, same metric names.** Run our
+   `voidbox-network-bench` and a passt+qemu reference on the same
+   host. Two columns in the report.
+2. **Track the gap, don't claim parity.** Throughput will lag because
+   of MMIO exit overhead; that's known and out-of-scope.
+3. **Connect rate (CRR latency) is the most apples-to-apples
+   metric** — dominated by NAT-table operations, not MMIO. If passt
+   does CRR in 135 µs and we do 600 µs, that's a meaningful "we have
+   4× more overhead per connect" signal that this refactor should
+   narrow.
+
+Report shape (illustrative, real numbers come from the harness):
+
+```
+                          before   after-phase-3   passt
+tcp throughput g2h 1500B  4.1 G    5.2 G           5.2 G
+tcp RR latency            72 µs    58 µs           58 µs
+tcp CRR latency           640 µs   180 µs          135 µs
+udp DNS qps               12k      12k             n/a
+icmp echo                 dropped  ~110 µs         ~50 µs
+allocations per packet    3        0               0
+```
+
+## File impact
+
+### Phase 0 (baseline + trait + rename)
+
+| File | Change |
+|---|---|
+| `src/network/mod.rs` | Add `NetworkBackend` trait |
+| `src/network/slirp.rs` | `impl NetworkBackend for SlirpStack`, rename type to `SlirpBackend`, tighten `poll` to `drain_to_guest` |
+| `src/devices/virtio_net.rs` | Hold `Arc<Mutex<dyn NetworkBackend>>` instead of concrete `SlirpStack` |
+| `src/vmm/mod.rs` | Update construction at cold-boot + snapshot-restore sites |
+| `tests/network_baseline.rs` | **New file**: ~14 unit-level pins |
+| `benches/network.rs` | **New file**: divan microbenches |
+| `src/bin/voidbox-network-bench/main.rs` | **New file**: wall-clock harness |
+| `Cargo.toml` | Register new bench, new binary, new test |
+| `.github/workflows/startup-bench.yml` | Add `network` bench step (or add a new workflow file) |
+
+### Phases 1–5
+
+Documented in their own plan files when scoped.
+
+## Risks
+
+- **TCP rewrite is the high-risk part.** Phase 3 replaces the most
+  battle-tested path in our networking code. The snapshot integration
+  suite is the safety gate; if any of `snapshot_integration`,
+  `e2e_telemetry`, `e2e_skill_pipeline`, `e2e_mount`, or `e2e_sidecar`
+  regress, Phase 3 stays in draft.
+- **passt protocol/idiom drift.** We're lifting design patterns, not
+  code. The risk is that we hit edge cases passt has already solved
+  that we'll re-discover as bugs (e.g. PAWS, fast retransmit
+  thresholds). Mitigation: explicit test-case lift from passt's test
+  suite (`/home/diego/github/passt/test/`) where applicable.
+- **Cross-platform parity for ICMP.** Linux requires the
+  `net.ipv4.ping_group_range` sysctl to permit the calling GID.
+  macOS allows unprivileged `SOCK_DGRAM IPPROTO_ICMP` unconditionally.
+  When sysctl forbids it on Linux, fall back to current behavior
+  (drop), with a warn-once log.
+- **Engineering time vs. throughput wins.** This work does not move
+  throughput numbers. The ioeventfd/vhost-net path that *does* will
+  reuse the trait abstraction we land in Phase 0, but won't reuse the
+  TCP relay rewrite from Phase 3. If priorities shift toward
+  throughput, Phases 0, 1, and 2 still pay off; Phase 3 may be
+  deferred.
+
+## Validation gate (per phase)
+
+Every phase ends with:
+
+```bash
+# Static
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+
+# Tests
+cargo test --workspace --all-features
+cargo test --doc --workspace --all-features
+
+# Network-specific
+cargo test --test network_baseline
+cargo bench --bench network         # no >5% regression vs main
+
+# VM suites that exercise networking (Linux/KVM)
+export VOID_BOX_KERNEL=/boot/vmlinuz-$(uname -r)
+export VOID_BOX_INITRAMFS=/tmp/void-box-test-rootfs.cpio.gz
+cargo test --test conformance -- --ignored --test-threads=1
+cargo test --test snapshot_integration -- --ignored --test-threads=1
+cargo test --test e2e_skill_pipeline -- --ignored --test-threads=1
+```
+
+A phase is not "done" until all gates pass and the wall-clock
+`voidbox-network-bench` shows no regression on previously-working
+metrics. New metrics (ICMP latency, non-DNS UDP throughput) are
+expected to flip from "n/a / dropped" to a number when their
+corresponding phase lands.
+
+## References
+
+- **Prior plan** (this supersedes the design, keeps the trait):
+  `docs/superpowers/plans/2026-04-12-network-backend-abstraction.md`
+- **passt source** (cloned locally):
+  `/home/diego/github/passt`
+  - `tcp.c` — TCP translation, sequence mirroring (Phase 3 reference)
+  - `udp.c` — per-flow UDP NAT (Phase 2 reference)
+  - `icmp.c` — `IPPROTO_ICMP SOCK_DGRAM` echo (Phase 1 reference)
+  - `flow.c` — unified flow table (Phase 4 reference)
+  - `fwd.c::nat_inbound` — stateless address translation (Phase 5 ref)
+- **Our networking code:**
+  - `src/network/slirp.rs` (1275 LOC) — the file most of this work
+    lands in
+  - `src/network/mod.rs` (202 LOC) — where `NetworkBackend` trait goes
+  - `src/devices/virtio_net.rs` (831 LOC) — virtio-net wiring
+  - `src/vmm/mod.rs:1594–1630` — net-poll thread
+- **Existing bench/test infrastructure to mirror:**
+  - `benches/startup.rs` — divan pattern
+  - `src/bin/voidbox-startup-bench/main.rs` — wall-clock harness
+    pattern
+  - `.github/workflows/startup-bench.yml` — CI regression gate
+- **passt project page:** https://passt.top/passt — performance
+  table format, metric names
diff --git a/docs/superpowers/plans/2026-04-30-smoltcp-passt-port-phase6.4.md b/docs/superpowers/plans/2026-04-30-smoltcp-passt-port-phase6.4.md
new file mode 100644
index 00000000..64050246
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-30-smoltcp-passt-port-phase6.4.md
@@ -0,0 +1,1427 @@
+# Phase 6.4: Event-Driven RX Polling Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Replace the 5 ms timer-driven `net_poll_thread` with `epoll_wait`-driven readiness dispatch, so host→guest RX latency is bounded by the actual data-arrival delay (sub-millisecond) rather than the 5 ms polling cycle.
+
+**Architecture:** A new `mod epoll_dispatch` inside `src/network/` owns a single `epoll_fd` plus a self-pipe. `SlirpBackend` registers/unregisters socket FDs on flow-table mutations. The `net_poll_thread` calls `epoll_wait` (50 ms timeout for housekeeping) and routes each ready FD to the correct relay handler via `epoll_data` carrying a `FlowKey`. The self-pipe lets the vCPU-thread side wake the poll thread when it adds a new flow without polling-cycle delay.
+
+**Tech stack:** smoltcp 0.11 wire types (unchanged), `libc::epoll_*` syscalls, `pipe2(O_NONBLOCK | O_CLOEXEC)`, no new crates.
+
+**Hard performance gate (the "more performant than master" requirement):**
+
+```
+scripts/bench-compare.sh --baseline origin/main --skip-vm
+```
+
+…must show, for every comparable bench, **HEAD ≤ baseline + 5 %** *and* at least the following must improve by ≥ 30 %:
+
+- `port_forward_accept_latency` (currently bounded by 50 ms listener poll; epoll should drop median by an order of magnitude once the listener also moves onto epoll — *or* document why it stays).
+- a new `tcp_rx_latency_us_p50` wall-clock metric in `voidbox-network-bench` (Phase 6.4 must be sub-5 ms; pre-6.4 was bounded below by the 5 ms net-poll cycle).
+
+Phase 6.4 is **not allowed to merge** until both gates above pass.
+
+---
+
+## Background
+
+Reviewer finding **A4** (Medium-Low) on PR #68:
+
+- `src/vmm/mod.rs:1599-1610`: `net_poll_thread` wakes every 5 ms (`std::thread::sleep(Duration::from_millis(5))`).
+- `src/network/slirp.rs:1549`: `relay_tcp_nat_data` re-peeks 64 KiB on **every** connected TCP socket every tick, regardless of readiness.
+- Listener threads spawned by `spawn_port_forward_listeners` (`src/network/slirp.rs:2097`) sleep 50 ms between accept attempts — this is the cap on `port_forward_accept_latency` (~50 ms median observed in `benches/network.rs::port_forward_accept_latency`).
+
+passt's reference: epoll-driven readiness ([passt/tcp.c:463](https://passt.top/passt/tree/tcp.c#n463)). Phase 6.4 ports the *idea* (event-driven), not the literal `SO_PEEK_OFF` mechanism (which is Linux-specific and would not survive a future cross-platform backend split — though SLIRP itself is already `cfg(target_os = "linux")`).
+
+## Invariants (carried from Phase 6 overview — non-negotiable)
+
+1. **Full observability via `tracing`.** Every epoll event emits a `trace!` line with the `FlowKey` and event type. No silent dispatch.
+2. **All-Rust path.** `libc::epoll_*` is the syscall surface; no new crates.
+3. **Cross-platform discipline.** Phase 6.4 stays inside the existing `#[cfg(target_os = "linux")]` gate. macOS VZ is unaffected.
+4. **No regression in Phase 0–5 baselines.** `bench-compare.sh --baseline origin/main` enforced — see "Hard performance gate" above.
+5. **Snapshot/restore correctness.** `snapshot_integration` continues to pass. The `epoll_fd` does not survive snapshot; restore rebuilds the epoll set from `flow_table` contents. Snapshot does not serialize the epoll FD itself.
+
+## File structure
+
+| Path | Responsibility | Action |
+|---|---|---|
+| `src/network/epoll_dispatch.rs` | Owns `epoll_fd`, self-pipe, register/unregister, `wait()` returning `Vec<EpollEvent>`. Linux-only. | **Create** |
+| `src/network/mod.rs` | Add `pub(crate) mod epoll_dispatch;` | Modify |
+| `src/network/slirp.rs` | Hold `epoll: EpollDispatch` field on `SlirpBackend`; register on every flow_table insert; unregister on remove; rewrite `relay_tcp_nat_data`/`relay_udp_flows`/`relay_icmp_echo` to dispatch only on ready flows. | Modify |
+| `src/vmm/mod.rs` | `net_poll_thread` rewrite: `epoll_wait(timeout=50ms)` instead of `sleep(5ms)`. | Modify |
+| `tests/network_baseline.rs` | New pin `tcp_rx_latency_sub_5ms`; fix-up `tcp_writes_more_than_256kb_succeed`'s comment-vs-code mismatch; rename/migrate `drain_n` from `.poll()` to `drain_to_guest`. | Modify |
+| `benches/network.rs` | Add divan bench `tcp_rx_latency_one_packet`. | Modify |
+| `src/bin/voidbox-network-bench/main.rs` | Add `tcp_rx_latency_us_p50` measurement (host writes to a flow, time until guest sees the bytes via the relay). | Modify |
+| `docs/superpowers/plans/2026-04-30-smoltcp-passt-port-phase6.4.md` | This file. | Already created |
+
+`drain_n` migration in `tests/network_baseline.rs` is a quiet cleanup that lands in Task 1 — every test in the file uses it, so dropping `.poll()` here also drops the last in-tree `.poll()` caller and lets us delete the deprecated method entirely later.
+
+## Architecture notes
+
+### Why one `epoll_fd` (not one per protocol)?
+
+- Single point of dispatch — the poll thread does *one* `epoll_wait` syscall regardless of how many flows are open.
+- `epoll_data.u64` is 8 bytes — we encode `FlowKey` as a 64-bit token there. UDP and ICMP keys are smaller; TCP keys (`(guest_port, dst_ip, dst_port)`) fit in 64 bits with a tag byte for the protocol discriminator.
+- Self-pipe is registered alongside socket FDs; reading it drains a queue of "I just added flow X" wake events posted by `process_guest_frame` running on the vCPU thread.
+
+### Why a self-pipe?
+
+`process_guest_frame` runs on the **vCPU thread** under the device lock. When it inserts a new flow into `flow_table`, the new socket FD is registered with epoll on that thread (cheap — just `epoll_ctl(EPOLL_CTL_ADD, ...)`). But the **poll thread** is asleep inside `epoll_wait(timeout=50ms)`. Without a wakeup, the new flow has up to 50 ms of latency before the first poll cycle picks it up.
+
+The self-pipe (`pipe2(O_NONBLOCK | O_CLOEXEC)` registered with `EPOLLIN`) lets `process_guest_frame` write a single byte after `epoll_ctl`. The poll thread's `epoll_wait` returns immediately, drains the pipe (a no-op handler), and starts dispatching — including the new flow.
+
+### Snapshot interaction
+
+`epoll_fd` is a kernel handle on real FDs — not serializable. Snapshot path:
+
+- `snapshot_internal`: tear down epoll. Drop `EpollDispatch`. Serialize `flow_table` as today.
+- `from_snapshot`: deserialize `flow_table` → for every entry, recreate the host socket (already happening today via `host_stream` round-trip) → register the new FD with a fresh `EpollDispatch`.
+
+No serde changes to `flow_table` itself.
+
+### Why 50 ms `epoll_wait` timeout?
+
+Housekeeping the poll thread does *outside* the dispatch loop:
+
+- Reap stale UDP flows (`UDP_IDLE_TIMEOUT = 60 s`) — coarse, 50 ms is fine.
+- Reap stale ICMP flows (similar).
+- Phase 6.1 will add `LAST_ACK_TIMEOUT` reaping here.
+
+If we set the timeout shorter we re-introduce the "wake every X ms regardless" cost we're trying to remove. If we set it longer, housekeeping latency grows. 50 ms balances both at a 10 % wakeup duty cycle versus the previous 100 % (one wakeup every 5 ms).
+
+---
+
+## Tasks
+
+### Task 1: Pre-baseline + retransmit-test fix-up
+
+**Files:**
+- Modify: `tests/network_baseline.rs:170-179` (the `drain_n` helper)
+- Modify: `tests/network_baseline.rs:374-422` (retransmit comment-vs-code in `tcp_writes_more_than_256kb_succeed`)
+
+- [ ] **Step 1: Capture baseline numbers from `origin/main`**
+
+```bash
+# from a clean repo checkout
+scripts/bench-compare.sh --baseline origin/main --skip-vm > /tmp/baseline-vs-main.md
+cat /tmp/baseline-vs-main.md
+```
+
+Expected: every comparable bench has a real number in both columns. Save `/tmp/baseline-vs-main.md` as the pre-Phase-6.4 reference.
+
+- [ ] **Step 2: Migrate `drain_n` from `.poll()` to `drain_to_guest`**
+
+Replace `tests/network_baseline.rs:170-179`:
+
+```rust
+/// Drains frames the stack wants to send to the guest, calling
+/// `drain_to_guest` up to `n` times.  Returns all frames produced
+/// across the calls (caller may not care about per-call boundaries).
+fn drain_n(stack: &mut SlirpBackend, n: usize) -> Vec<Vec<u8>> {
+    let mut out: Vec<Vec<u8>> = Vec::new();
+    for _ in 0..n {
+        stack.drain_to_guest(&mut out);
+    }
+    out
+}
+```
+
+- [ ] **Step 3: Run the existing pins to confirm `drain_n` migration is non-breaking**
+
+```bash
+cargo test --test network_baseline
+```
+
+Expected: PASS for every existing pin (no semantic change — `drain_to_guest` appends to the buffer, same as `.poll()` extension).
+
+- [ ] **Step 4: Fix the retransmit comment-vs-code mismatch in `tcp_writes_more_than_256kb_succeed`**
+
+The Copilot review's C1.1 finding is correct: the loop unconditionally advances `seq` after every send, never retransmits unACK'd chunks. The 95 % threshold tolerates the resulting loss but the test's intent ("we re-send those") doesn't match its implementation.
+
+Two valid fixes — pick the simpler one. Replace the loop body in `tests/network_baseline.rs:387-422`:
+
+```rust
+while bytes_received.load(Ordering::Relaxed) < TOTAL && std::time::Instant::now() < deadline {
+    // Retransmit semantics: only advance the send cursor once the
+    // previous chunk has been ACK'd. If the stack stops ACKing
+    // (Phase 3 backpressure), we re-send the same seq/payload until
+    // it's acknowledged. This matches the comment above and the
+    // production guest-TCP behavior we're emulating.
+    let _ = stack.process_guest_frame(&build_tcp_frame(
+        SLIRP_GATEWAY_IP,
+        GUEST_EPHEMERAL_PORT,
+        host_port,
+        seq,
+        our_seq + 1,
+        TcpControl::Psh,
+        &chunk,
+    ));
+
+    // Drain frames; track the highest ACK we've seen and watch
+    // for RST/FIN that would indicate a Phase-2 era close.
+    for f in drain_n(&mut stack, 4) {
+        if let Some((_, ack, ctrl, _)) = parse_tcp_to_guest(&f) {
+            if matches!(ctrl, TcpControl::Rst | TcpControl::Fin) {
+                saw_close = true;
+            }
+            if ack > acked_seq {
+                acked_seq = ack;
+            }
+        }
+    }
+
+    if saw_close {
+        break;
+    }
+
+    // Advance our send cursor only past ACK'd data.  If the stack
+    // didn't ACK this chunk, the next loop iteration re-sends the
+    // same seq/payload (true TCP retransmit semantics).
+    if acked_seq >= seq.wrapping_add(CHUNK as u32) {
+        seq = seq.wrapping_add(CHUNK as u32);
+    } else if seq.wrapping_sub(acked_seq) > 256 * 1024 {
+        // Out-paced kernel recv buffer; sleep briefly so the host
+        // server thread can drain.
+        std::thread::sleep(std::time::Duration::from_millis(10));
+    }
+}
+```
+
+The single substantive change: move `seq = seq.wrapping_add(...)` from line 398 (unconditional, immediately after send) to after the drain loop, gated on `acked_seq >= seq + CHUNK`. If the stack ACK'd, advance; otherwise the next iteration re-sends the same chunk.
+
+- [ ] **Step 5: Run the fixed test to confirm it still passes (now with real retransmit)**
+
+```bash
+cargo test --test network_baseline tcp_writes_more_than_256kb_succeed
+```
+
+Expected: PASS. The 95 % threshold will likely be 100 % now since real retransmits don't drop bytes.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add tests/network_baseline.rs
+git commit -m "test(network): drain_n via drain_to_guest + real retransmit in 256kb test
+
+Two test-harness improvements landing together since both block the
+Phase 6.4 RX-latency work:
+
+- drain_n migrated from deprecated SlirpBackend::poll() to
+  drain_to_guest. This was the last in-tree poll() caller.
+- tcp_writes_more_than_256kb_succeed now matches its 'we re-send
+  those' comment: seq only advances when acked_seq catches up,
+  giving real TCP-retransmit semantics in the synthetic guest
+  rather than the previous 'lossy with 95% tolerance' shape.
+  Phase 6.4 must not regress this contract; making the test
+  faithful first means epoll regressions surface as failures
+  instead of borderline 95% misses."
+```
+
+---
+
+### Task 2: ~~Failing pin — `tcp_rx_latency_sub_5ms`~~ **DROPPED**
+
+**Status:** Dropped during execution. Original intent was a unit-level BROKEN_ON_PURPOSE pin asserting host→guest delivery in < 5 ms. **The 5 ms floor lives in `net_poll_thread` (`src/vmm/mod.rs:1609`), not in `SlirpBackend::drain_to_guest`** — the relay is synchronous when called from a test harness, so a unit-level latency assertion can't measure what we actually care about.
+
+**Where the contract moved:** Task 13's wall-clock `tcp_rx_latency_us_p50` metric in `voidbox-network-bench`. That harness boots a real VM, drives the actual `net_poll_thread`, and observes the latency floor end-to-end. The hard-perf-gate requirement at the top of this plan (`tcp_rx_latency_us_p50 < 5 ms`) is the BROKEN_ON_PURPOSE replacement.
+
+**No code lands for Task 2.** Skip directly to Task 3.
+
+<details>
+<summary>Original Task 2 body (kept for context)</summary>
+
+The original plan attempted a unit-level pin that called `drain_to_guest` synchronously and timed the host-write → guest-receive interval. Implementation revealed:
+
+- `drain_to_guest` is synchronous; the 5 ms `sleep` in `net_poll_thread` is what bounds VMM-level RX latency, not anything inside `SlirpBackend`.
+- The test would have measured "spawn-thread + accept + write" minus "drain-loop find time", which underflowed in debug mode and was meaningless in release mode.
+
+The contract — Phase 6.4 must deliver host→guest data in < 5 ms when data is available — is preserved as a VM-level requirement in Task 13.
+
+</details>
+
+- [ ] **Step 1: ~~Write the failing test~~ Skipped — see "DROPPED" note above. Original body kept below for context only.**
+
+```rust
+/// Phase 6.4 pin: host→guest RX latency must be sub-5 ms when data
+/// is available. Pre-Phase-6.4 the floor was 5 ms (the
+/// `net_poll_thread` `sleep(5ms)` cycle); post-Phase-6.4 the
+/// epoll dispatch should deliver in < 1 ms on a quiet system.
+///
+/// Test harness: open a TCP flow guest→host, wait for ESTABLISHED,
+/// have the host write 64 bytes, measure the time from `write()`
+/// returning to the guest seeing the bytes in `drain_to_guest`'s
+/// output. Pre-Phase-6.4 this measures ≈ 5 ms ± jitter; post-
+/// Phase-6.4 it should be sub-millisecond on the same host.
+#[test]
+fn tcp_rx_latency_sub_5ms() {
+    use std::io::Write;
+    use std::net::{TcpListener, TcpStream};
+    use std::time::Instant;
+
+    // Bind a host listener; the SLIRP rewrite of 10.0.2.2 → 127.0.0.1
+    // routes our SYN to it.
+    let listener = TcpListener::bind("127.0.0.1:0").expect("bind");
+    let host_port = listener.local_addr().unwrap().port();
+    let server = std::thread::spawn(move || -> Option<std::time::Duration> {
+        let (mut sock, _) = listener.accept().ok()?;
+        // Wait for the guest to send something so we know the relay
+        // is established and bidirectional.
+        let mut probe = [0u8; 1];
+        let _ = std::io::Read::read(&mut sock, &mut probe);
+
+        // Stamp T0 just before write returns.
+        let t0 = Instant::now();
+        sock.write_all(&[0x42; 64]).ok()?;
+        Some(t0.elapsed())
+    });
+
+    let mut stack = SlirpBackend::new().unwrap();
+
+    // Drive the 3-way handshake.
+    let our_seq = 1000u32;
+    stack.process_guest_frame(&build_tcp_frame(
+        SLIRP_GATEWAY_IP, GUEST_EPHEMERAL_PORT, host_port, our_seq, 0,
+        TcpControl::Syn, &[],
+    )).unwrap();
+
+    let mut gateway_seq = 0u32;
+    for f in drain_n(&mut stack, 4) {
+        if let Some((s, _ack, ctrl, _)) = parse_tcp_to_guest(&f) {
+            if matches!(ctrl, TcpControl::Syn) {
+                gateway_seq = s;
+                break;
+            }
+        }
+    }
+
+    stack.process_guest_frame(&build_tcp_frame(
+        SLIRP_GATEWAY_IP, GUEST_EPHEMERAL_PORT, host_port, our_seq + 1, gateway_seq + 1,
+        TcpControl::None, &[],
+    )).unwrap();
+
+    // Send a probe byte so the host server thread proceeds to write.
+    stack.process_guest_frame(&build_tcp_frame(
+        SLIRP_GATEWAY_IP, GUEST_EPHEMERAL_PORT, host_port, our_seq + 1, gateway_seq + 1,
+        TcpControl::Psh, &[0xAA],
+    )).unwrap();
+
+    // Now the host writes and stamps T0. We measure from "host write
+    // completes" to "guest sees data in drain output."
+    let host_t0 = server.join().expect("server").expect("write succeeded");
+    let drain_start = Instant::now();
+    let mut saw_payload = false;
+    while drain_start.elapsed() < std::time::Duration::from_secs(1) {
+        let frames: Vec<Vec<u8>> = drain_n(&mut stack, 1);
+        for f in &frames {
+            if let Some((_, _, _, payload_len)) = parse_tcp_to_guest(f) {
+                if payload_len >= 64 {
+                    saw_payload = true;
+                    break;
+                }
+            }
+        }
+        if saw_payload { break; }
+        std::thread::sleep(std::time::Duration::from_micros(50));
+    }
+    let host_to_guest_us = drain_start.elapsed().as_micros() as u64
+        - host_t0.as_micros() as u64;
+
+    assert!(saw_payload, "host payload never reached the guest");
+
+    // The contract: epoll dispatch delivers in < 5 ms.
+    assert!(
+        host_to_guest_us < 5_000,
+        "Phase 6.4 contract: host→guest RX latency must be sub-5 ms \
+         (was bounded below by 5 ms net_poll_thread cycle); got {host_to_guest_us} µs"
+    );
+}
+```
+
+- [ ] **Step 2: Run the test, expect it to fail**
+
+```bash
+cargo test --test network_baseline tcp_rx_latency_sub_5ms
+```
+
+Expected: **FAIL** with `host→guest RX latency must be sub-5 ms; got <5000-9999> µs` — the current `net_poll_thread` is ineligible to deliver in <5 ms because of its `sleep(5ms)`.
+
+This is the Phase 6.4 BROKEN_ON_PURPOSE pin. It will flip in Task 11.
+
+- [ ] **Step 3: Commit the failing pin**
+
+```bash
+git add tests/network_baseline.rs
+git commit -m "test(network): pin tcp_rx_latency_sub_5ms (BROKEN_ON_PURPOSE)
+
+Phase 6.4 contract: host→guest RX latency must be sub-5 ms when
+data is available. Pre-6.4 the floor is the 5 ms net_poll_thread
+sleep cycle; this assertion fails on master and on the current
+PR #68 tip. Phase 6.4's epoll dispatch will flip it to passing.
+
+Mark with #[ignore] is deliberately NOT used: this is a positive
+contract and CI must surface the failure on master so the gate
+is unmissable."
+```
+
+---
+
+### Task 3: `EpollDispatch` skeleton + unit test
+
+**Files:**
+- Create: `src/network/epoll_dispatch.rs`
+- Modify: `src/network/mod.rs` — add `pub(crate) mod epoll_dispatch;`
+
+- [ ] **Step 1: Write the failing test (in the new module)**
+
+In `src/network/epoll_dispatch.rs`:
+
+```rust
+//! Linux epoll-driven readiness dispatch for SLIRP host sockets.
+//!
+//! Owns one `epoll_fd` plus a self-pipe.  Callers register socket FDs
+//! with a `FlowToken` (a 64-bit identifier the dispatcher returns on
+//! readiness).  The poll thread calls `wait_with_timeout` to block
+//! until any registered FD is ready or the timeout fires, then drains
+//! the events into a caller-owned buffer.
+//!
+//! Why no crate? The standard `mio`/`tokio` story would pull in a
+//! reactor + a runtime — Phase 6.4 needs neither.  `libc::epoll_*`
+//! is two syscalls, fully observable, and the surface fits in ~150
+//! lines.  See plan 2026-04-30-smoltcp-passt-port-phase6.4.md
+//! "Architecture notes" for the rationale.
+
+use std::io;
+use std::os::fd::{AsRawFd, OwnedFd, RawFd};
+use std::time::Duration;
+
+/// Opaque per-FD identifier the caller uses to look up which flow a
+/// readiness event belongs to.  Encoded into `epoll_data.u64`.
+pub type FlowToken = u64;
+
+/// One readiness event, mapped from `libc::epoll_event`.
+#[derive(Debug, Clone, Copy)]
+pub struct EpollEvent {
+    pub token: FlowToken,
+    pub readable: bool,
+    pub writable: bool,
+}
+
+#[derive(Debug)]
+pub struct EpollDispatch {
+    // implementation in next step
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::os::fd::AsRawFd;
+
+    #[test]
+    fn dispatch_new_creates_epoll_fd() {
+        let dispatch = EpollDispatch::new().expect("EpollDispatch::new");
+        assert!(dispatch.epoll_fd_for_test() >= 0);
+    }
+}
+```
+
+- [ ] **Step 2: Run, expect compile error**
+
+```bash
+cargo test --lib network::epoll_dispatch
+```
+
+Expected: COMPILE FAIL — `new` and `epoll_fd_for_test` not defined.
+
+- [ ] **Step 3: Implement minimal `EpollDispatch`**
+
+Replace the empty struct in `src/network/epoll_dispatch.rs`:
+
+```rust
+#[derive(Debug)]
+pub struct EpollDispatch {
+    epoll_fd: OwnedFd,
+}
+
+impl EpollDispatch {
+    /// Create a new epoll instance with `EPOLL_CLOEXEC`.
+    pub fn new() -> io::Result<Self> {
+        // SAFETY: `epoll_create1` returns -1 on error and a valid fd
+        // otherwise.  We wrap into OwnedFd so Drop closes it.
+        let raw = unsafe { libc::epoll_create1(libc::EPOLL_CLOEXEC) };
+        if raw < 0 {
+            return Err(io::Error::last_os_error());
+        }
+        let epoll_fd = unsafe { OwnedFd::from_raw_fd(raw) };
+        Ok(Self { epoll_fd })
+    }
+
+    #[cfg(test)]
+    fn epoll_fd_for_test(&self) -> RawFd {
+        self.epoll_fd.as_raw_fd()
+    }
+}
+```
+
+Add the missing `use std::os::fd::FromRawFd;` to the file's existing `use` block (module-scope per project convention).
+
+- [ ] **Step 4: Run, expect pass**
+
+```bash
+cargo test --lib network::epoll_dispatch::tests::dispatch_new_creates_epoll_fd
+```
+
+Expected: PASS.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/network/epoll_dispatch.rs src/network/mod.rs
+git commit -m "feat(network): EpollDispatch skeleton with epoll_create1
+
+Phase 6.4 foundation. One epoll_fd owned via OwnedFd + EPOLL_CLOEXEC.
+No registration logic yet — Task 4 will add register/unregister and
+Task 6 will add the self-pipe + wait loop."
+```
+
+---
+
+### Task 4: `register` / `unregister` + tests
+
+**Files:**
+- Modify: `src/network/epoll_dispatch.rs`
+
+- [ ] **Step 1: Write the failing tests**
+
+In the `mod tests` block:
+
+```rust
+#[test]
+fn register_then_unregister_round_trip() {
+    use std::net::TcpListener;
+    let listener = TcpListener::bind("127.0.0.1:0").expect("bind");
+    let mut dispatch = EpollDispatch::new().expect("EpollDispatch::new");
+    let token: FlowToken = 0xDEAD_BEEF;
+    dispatch
+        .register(listener.as_raw_fd(), token, true, false)
+        .expect("register");
+    dispatch.unregister(listener.as_raw_fd()).expect("unregister");
+}
+
+#[test]
+fn register_invalid_fd_returns_error() {
+    let mut dispatch = EpollDispatch::new().expect("EpollDispatch::new");
+    let result = dispatch.register(-1, 0, true, false);
+    assert!(result.is_err());
+}
+```
+
+- [ ] **Step 2: Run, expect compile fail**
+
+```bash
+cargo test --lib network::epoll_dispatch
+```
+
+Expected: COMPILE FAIL — `register`/`unregister` not defined.
+
+- [ ] **Step 3: Implement**
+
+Add to `EpollDispatch`:
+
+```rust
+impl EpollDispatch {
+    /// Register `fd` with the dispatcher.  `readable`/`writable`
+    /// select EPOLLIN / EPOLLOUT.  `token` is opaque to the
+    /// dispatcher — returned verbatim on readiness events.
+    pub fn register(
+        &mut self,
+        fd: RawFd,
+        token: FlowToken,
+        readable: bool,
+        writable: bool,
+    ) -> io::Result<()> {
+        let mut events: u32 = 0;
+        if readable {
+            events |= libc::EPOLLIN as u32;
+        }
+        if writable {
+            events |= libc::EPOLLOUT as u32;
+        }
+        let mut ev = libc::epoll_event {
+            events,
+            u64: token,
+        };
+        // SAFETY: epoll_ctl reads `ev` for ADD; we own `fd` for the
+        // lifetime of the registration (caller's contract).
+        let rc = unsafe {
+            libc::epoll_ctl(
+                self.epoll_fd.as_raw_fd(),
+                libc::EPOLL_CTL_ADD,
+                fd,
+                &mut ev as *mut _,
+            )
+        };
+        if rc < 0 {
+            return Err(io::Error::last_os_error());
+        }
+        Ok(())
+    }
+
+    pub fn unregister(&mut self, fd: RawFd) -> io::Result<()> {
+        // SAFETY: epoll_ctl ignores the event pointer for DEL but
+        // still requires it to be non-null on older kernels.
+        let mut ev = libc::epoll_event { events: 0, u64: 0 };
+        let rc = unsafe {
+            libc::epoll_ctl(
+                self.epoll_fd.as_raw_fd(),
+                libc::EPOLL_CTL_DEL,
+                fd,
+                &mut ev as *mut _,
+            )
+        };
+        if rc < 0 {
+            return Err(io::Error::last_os_error());
+        }
+        Ok(())
+    }
+}
+```
+
+- [ ] **Step 4: Run, expect pass**
+
+```bash
+cargo test --lib network::epoll_dispatch
+```
+
+Expected: PASS for both new tests.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/network/epoll_dispatch.rs
+git commit -m "feat(network): EpollDispatch register/unregister"
+```
+
+---
+
+### Task 5: `wait_with_timeout` + integration test
+
+**Files:**
+- Modify: `src/network/epoll_dispatch.rs`
+
+- [ ] **Step 1: Write the failing test**
+
+```rust
+#[test]
+fn wait_returns_event_when_socket_becomes_readable() {
+    use std::io::Write;
+    use std::net::{TcpListener, TcpStream};
+    let listener = TcpListener::bind("127.0.0.1:0").expect("bind");
+    let addr = listener.local_addr().unwrap();
+    let server = std::thread::spawn(move || {
+        let (mut sock, _) = listener.accept().unwrap();
+        sock.write_all(b"hi").unwrap();
+    });
+    let stream = TcpStream::connect(addr).expect("connect");
+    server.join().unwrap();
+
+    let mut dispatch = EpollDispatch::new().expect("new");
+    dispatch
+        .register(stream.as_raw_fd(), 0xCAFE, true, false)
+        .expect("register");
+
+    let mut events: Vec<EpollEvent> = Vec::new();
+    let n = dispatch
+        .wait_with_timeout(&mut events, Duration::from_secs(1))
+        .expect("wait");
+    assert_eq!(n, 1);
+    assert_eq!(events[0].token, 0xCAFE);
+    assert!(events[0].readable);
+}
+```
+
+- [ ] **Step 2: Run, expect compile fail**
+
+Expected: `wait_with_timeout` not found.
+
+- [ ] **Step 3: Implement**
+
+```rust
+impl EpollDispatch {
+    /// Block up to `timeout` for any registered FD to become ready.
+    /// Drains ready events into `out` (cleared first).  Returns the
+    /// number of events drained.
+    ///
+    /// `timeout = Duration::ZERO` is non-blocking poll;
+    /// `timeout = Duration::from_secs(...)` waits up to that long.
+    pub fn wait_with_timeout(
+        &self,
+        out: &mut Vec<EpollEvent>,
+        timeout: Duration,
+    ) -> io::Result<usize> {
+        out.clear();
+
+        // Pre-allocate a fixed-size event buffer.  64 ready FDs per
+        // wait is more than enough for our flow counts; events not
+        // returned this round will surface on the next wait.
+        let mut raw_events: [libc::epoll_event; 64] =
+            [libc::epoll_event { events: 0, u64: 0 }; 64];
+
+        let timeout_ms: i32 = timeout
+            .as_millis()
+            .min(i32::MAX as u128) as i32;
+
+        // SAFETY: epoll_wait writes up to raw_events.len() entries;
+        // returns -1 on error, 0 on timeout, n>0 on events.
+        let n = unsafe {
+            libc::epoll_wait(
+                self.epoll_fd.as_raw_fd(),
+                raw_events.as_mut_ptr(),
+                raw_events.len() as i32,
+                timeout_ms,
+            )
+        };
+        if n < 0 {
+            // EINTR is non-fatal — caller can retry on next tick.
+            let err = io::Error::last_os_error();
+            if err.raw_os_error() == Some(libc::EINTR) {
+                return Ok(0);
+            }
+            return Err(err);
+        }
+        for raw in &raw_events[..n as usize] {
+            out.push(EpollEvent {
+                token: raw.u64,
+                readable: (raw.events & libc::EPOLLIN as u32) != 0,
+                writable: (raw.events & libc::EPOLLOUT as u32) != 0,
+            });
+        }
+        Ok(n as usize)
+    }
+}
+```
+
+- [ ] **Step 4: Run, expect pass**
+
+```bash
+cargo test --lib network::epoll_dispatch
+```
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/network/epoll_dispatch.rs
+git commit -m "feat(network): EpollDispatch::wait_with_timeout"
+```
+
+---
+
+### Task 6: Self-pipe + wakeup test
+
+**Files:**
+- Modify: `src/network/epoll_dispatch.rs`
+
+- [ ] **Step 1: Write the failing test**
+
+```rust
+#[test]
+fn wakeup_unblocks_wait_immediately() {
+    use std::time::Instant;
+    let mut dispatch = EpollDispatch::new().expect("new");
+    let waker = dispatch.waker();
+
+    // Start the wait in another thread with a long timeout.
+    let wait_thread = std::thread::spawn(move || -> std::time::Duration {
+        let mut events: Vec<EpollEvent> = Vec::new();
+        let start = Instant::now();
+        let _ = dispatch.wait_with_timeout(&mut events, Duration::from_secs(5));
+        start.elapsed()
+    });
+
+    // Wake immediately.
+    std::thread::sleep(Duration::from_millis(10));
+    waker.wake();
+
+    let elapsed = wait_thread.join().expect("wait thread");
+    // Wait thread should return well under the 5 s timeout.
+    assert!(
+        elapsed < Duration::from_secs(1),
+        "wait did not return on wakeup: {elapsed:?}"
+    );
+}
+```
+
+- [ ] **Step 2: Run, expect compile fail**
+
+Expected: `waker()` and `Waker` not defined.
+
+- [ ] **Step 3: Implement**
+
+Add to `epoll_dispatch.rs`:
+
+```rust
+/// Cloneable wakeup handle for `EpollDispatch`.  Writing one byte to
+/// the underlying pipe wakes a thread blocked in `wait_with_timeout`.
+#[derive(Debug, Clone)]
+pub struct Waker {
+    write_end: std::sync::Arc<OwnedFd>,
+}
+
+impl Waker {
+    pub fn wake(&self) {
+        let buf = [0u8; 1];
+        // SAFETY: write to a non-blocking pipe never blocks.  We
+        // ignore EAGAIN — the pipe already has bytes pending, which
+        // means a wakeup is already queued.
+        let _ = unsafe {
+            libc::write(self.write_end.as_raw_fd(), buf.as_ptr() as *const _, 1)
+        };
+    }
+}
+
+const SELF_PIPE_TOKEN: FlowToken = u64::MAX;
+
+impl EpollDispatch {
+    /// Returns a `Waker` that, when called, unblocks any thread
+    /// currently inside `wait_with_timeout`.
+    pub fn waker(&mut self) -> Waker {
+        if self.waker_handle.is_none() {
+            let (read_fd, write_fd) = create_pipe2_nonblock_cloexec();
+            self.register(read_fd.as_raw_fd(), SELF_PIPE_TOKEN, true, false)
+                .expect("register self-pipe");
+            self.read_end = Some(read_fd);
+            self.waker_handle = Some(std::sync::Arc::new(write_fd));
+        }
+        Waker {
+            write_end: self.waker_handle.as_ref().unwrap().clone(),
+        }
+    }
+}
+
+fn create_pipe2_nonblock_cloexec() -> (OwnedFd, OwnedFd) {
+    let mut fds = [0 as RawFd; 2];
+    // SAFETY: pipe2 with O_NONBLOCK | O_CLOEXEC writes two fds into fds.
+    let rc = unsafe {
+        libc::pipe2(fds.as_mut_ptr(), libc::O_NONBLOCK | libc::O_CLOEXEC)
+    };
+    assert!(rc == 0, "pipe2 failed: {}", io::Error::last_os_error());
+    let read_end = unsafe { OwnedFd::from_raw_fd(fds[0]) };
+    let write_end = unsafe { OwnedFd::from_raw_fd(fds[1]) };
+    (read_end, write_end)
+}
+```
+
+Add fields to `EpollDispatch`:
+
+```rust
+#[derive(Debug)]
+pub struct EpollDispatch {
+    epoll_fd: OwnedFd,
+    read_end: Option<OwnedFd>,
+    waker_handle: Option<std::sync::Arc<OwnedFd>>,
+}
+```
+
+…and update `EpollDispatch::new` to initialize the new fields to `None`.
+
+In `wait_with_timeout`, after collecting events, drop the self-pipe wake-token from the returned set (the caller doesn't care about it) and drain any pending bytes from the read end:
+
+```rust
+// Drain self-pipe events from the returned set + the pipe itself.
+let mut filtered: Vec<EpollEvent> = Vec::with_capacity(out.len());
+for ev in out.drain(..) {
+    if ev.token == SELF_PIPE_TOKEN {
+        if let Some(read_end) = &self.read_end {
+            let mut scratch = [0u8; 64];
+            // SAFETY: non-blocking read; ignored result.
+            unsafe {
+                libc::read(
+                    read_end.as_raw_fd(),
+                    scratch.as_mut_ptr() as *mut _,
+                    scratch.len(),
+                );
+            }
+        }
+        continue;
+    }
+    filtered.push(ev);
+}
+*out = filtered;
+let observable_n = out.len();
+Ok(observable_n)
+```
+
+- [ ] **Step 4: Run all dispatch tests**
+
+```bash
+cargo test --lib network::epoll_dispatch
+```
+
+Expected: PASS for all four tests.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/network/epoll_dispatch.rs
+git commit -m "feat(network): EpollDispatch self-pipe wakeup
+
+Cloneable Waker writes one byte to a non-blocking pipe registered
+with EPOLLIN. wait_with_timeout filters self-pipe events out of
+the returned set and drains the pipe so subsequent waits don't
+spurious-wake."
+```
+
+---
+
+### Task 7: Wire `EpollDispatch` into `SlirpBackend`
+
+**Files:**
+- Modify: `src/network/slirp.rs` — `SlirpBackend` struct + `new` + `with_security`.
+
+- [ ] **Step 1: Add the field**
+
+In the `SlirpBackend` struct definition (~line 450):
+
+```rust
+pub struct SlirpBackend {
+    // ... existing fields ...
+    epoll: crate::network::epoll_dispatch::EpollDispatch,
+    epoll_waker: crate::network::epoll_dispatch::Waker,
+}
+```
+
+In `SlirpBackend::with_security` (~line 503), after `flow_table` is initialized but before any flow is inserted:
+
+```rust
+let mut epoll = crate::network::epoll_dispatch::EpollDispatch::new()
+    .map_err(|e| anyhow::anyhow!("EpollDispatch::new: {e}"))?;
+let epoll_waker = epoll.waker();
+```
+
+…then include `epoll`, `epoll_waker` in the struct literal.
+
+- [ ] **Step 2: Run unit tests; expect them to still pass (no behavior change yet)**
+
+```bash
+cargo test --lib network::slirp
+cargo test --test network_baseline
+```
+
+Expected: ALL PASS — `SlirpBackend` now owns an unused epoll_fd.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "refactor(slirp): SlirpBackend holds EpollDispatch + Waker
+
+Plumbed but not yet consumed.  Subsequent tasks wire flow_table
+mutations into epoll register/unregister and rewrite the relay
+loops to dispatch on readiness."
+```
+
+---
+
+### Task 8: TCP register/unregister on flow_table mutation + smoke test
+
+**Files:**
+- Modify: `src/network/slirp.rs` — `handle_tcp_frame` (after `flow_table.insert`) and `relay_tcp_nat_data` (where `to_remove` entries are reaped).
+
+- [ ] **Step 1: Add a `flow_token_for_tcp` helper at module scope**
+
+Encoding: 8 bits of protocol tag (0x01 = TCP), 8 bits unused (zero), 16 bits guest_port, 32 bits packed (dst_port << 16) | (truncated dst_ip). For 100 % uniqueness across tag/port collisions, see follow-up — for now this 64-bit token is unique within the flow table because `NatKey` itself is unique.
+
+```rust
+const PROTO_TAG_TCP: u64 = 0x0100_0000_0000_0000;
+const PROTO_TAG_UDP: u64 = 0x0200_0000_0000_0000;
+const PROTO_TAG_ICMP: u64 = 0x0300_0000_0000_0000;
+
+fn flow_token_for_tcp(key: &NatKey) -> u64 {
+    let dst_ip_bytes = key.dst_ip.0;
+    let dst_ip_low: u64 = u64::from(u32::from_be_bytes(dst_ip_bytes)) & 0xFFFF_FFFF;
+    PROTO_TAG_TCP
+        | (u64::from(key.guest_src_port) << 32)
+        | (u64::from(key.dst_port) << 16)
+        | (dst_ip_low & 0xFFFF)
+}
+```
+
+Symmetric helpers for UDP / ICMP land in Tasks 9 / 10.
+
+- [ ] **Step 2: After every `flow_table.insert(FlowKey::Tcp(...), FlowEntry::Tcp(entry))`, register the host_stream FD**
+
+For example in `handle_tcp_frame` (~line 1290 after insert):
+
+```rust
+let token = flow_token_for_tcp(&key);
+self.epoll
+    .register(entry.host_stream.as_raw_fd(), token, true, false)
+    .ok();
+self.epoll_waker.wake();
+```
+
+…and in `process_pending_inbound_accepts` (line 648 area):
+
+```rust
+self.flow_table.insert(FlowKey::Tcp(key), FlowEntry::Tcp(entry));
+let host_fd = match self.flow_table.get(&FlowKey::Tcp(key)) {
+    Some(FlowEntry::Tcp(e)) => e.host_stream.as_raw_fd(),
+    _ => unreachable!(),
+};
+self.epoll.register(host_fd, flow_token_for_tcp(&key), true, false).ok();
+self.epoll_waker.wake();
+```
+
+…and on every `flow_table.remove(&FlowKey::Tcp(...))` site, unregister first:
+
+```rust
+if let Some(FlowEntry::Tcp(e)) = self.flow_table.get(&flow_key) {
+    self.epoll.unregister(e.host_stream.as_raw_fd()).ok();
+}
+self.flow_table.remove(&flow_key);
+```
+
+(grep for every `flow_table.remove` and `flow_table.insert` site touching TCP — there are ~6.)
+
+- [ ] **Step 3: Run all baseline pins**
+
+```bash
+cargo test --test network_baseline
+```
+
+Expected: PASS — no behavioral change yet (relay still re-peeks every flow on every tick).
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "feat(slirp): register TCP flows with EpollDispatch
+
+flow_table mutations now keep the epoll set in sync.  No relay-loop
+change yet — Task 11 will switch the loop to dispatch by readiness
+instead of iterating the full table."
+```
+
+---
+
+### Task 9: UDP register/unregister + ICMP register/unregister
+
+Mirror Task 8 for `FlowKey::Udp` and `FlowKey::IcmpEcho` flow_table sites. Same shape: register on insert, unregister on remove. Use `PROTO_TAG_UDP` / `PROTO_TAG_ICMP` in the helpers.
+
+- [ ] **Step 1: Implement helpers and call sites**
+- [ ] **Step 2: Run baseline pins (PASS)**
+- [ ] **Step 3: Commit** with message `feat(slirp): register UDP + ICMP flows with EpollDispatch`
+
+---
+
+### Task 10: Flip `relay_tcp_nat_data` to event-driven
+
+**Files:**
+- Modify: `src/network/slirp.rs` — `relay_tcp_nat_data` body (~line 1512+).
+
+The current loop iterates *every* TCP entry in `flow_table` every tick. New shape: take the readiness set from a caller-passed `&[EpollEvent]`, look up the flow by `FlowKey`, only peek-relay readable flows.
+
+- [ ] **Step 1: Change signature**
+
+```rust
+fn relay_tcp_nat_data(&mut self, ready: &[EpollEvent]) {
+    let mut to_remove: Vec<FlowKey> = Vec::new();
+    let mut frames_to_inject: Vec<Vec<u8>> = Vec::new();
+
+    for event in ready {
+        if event.token & PROTO_TAG_TCP_MASK != PROTO_TAG_TCP {
+            continue;
+        }
+        // Decode token back to NatKey by linear scan — flow_table is
+        // small and the token-to-key direction is rare (only on
+        // readiness).  Future optimization: keep a side index.
+        let flow_key = match self.flow_table.iter().find_map(|(k, _)| {
+            if let FlowKey::Tcp(nat_key) = k {
+                if flow_token_for_tcp(nat_key) == event.token {
+                    return Some(*k);
+                }
+            }
+            None
+        }) {
+            Some(k) => k,
+            None => continue,
+        };
+
+        let Some(FlowEntry::Tcp(entry)) = self.flow_table.get_mut(&flow_key) else {
+            continue;
+        };
+        if entry.state != TcpNatState::Established {
+            continue;
+        }
+
+        // ... existing peek/relay body, unchanged from line 1549+ ...
+    }
+
+    self.inject_to_guest.append(&mut frames_to_inject);
+    for flow_key in to_remove {
+        if let Some(FlowEntry::Tcp(e)) = self.flow_table.get(&flow_key) {
+            self.epoll.unregister(e.host_stream.as_raw_fd()).ok();
+        }
+        self.flow_table.remove(&flow_key);
+    }
+}
+```
+
+Define `PROTO_TAG_TCP_MASK` next to the other tag constants:
+
+```rust
+const PROTO_TAG_MASK: u64 = 0xFF00_0000_0000_0000;
+```
+
+…and check `event.token & PROTO_TAG_MASK == PROTO_TAG_TCP`.
+
+- [ ] **Step 2: Update the caller in `drain_to_guest`**
+
+```rust
+pub fn drain_to_guest(&mut self, out: &mut Vec<Vec<u8>>) {
+    self.process_pending_inbound_accepts();
+    // ... ARP handling ...
+
+    // Phase 6.4: gather readiness events once per tick.  The poll
+    // thread will already have driven a recent epoll_wait; here we do
+    // a non-blocking poll to pick up anything that arrived between
+    // the last wait and now.
+    let mut ready: Vec<EpollEvent> = Vec::new();
+    let _ = self.epoll.wait_with_timeout(&mut ready, Duration::ZERO);
+
+    self.resolve_pending_dns();
+    self.relay_tcp_nat_data(&ready);
+    self.relay_icmp_echo(&ready);
+    self.relay_udp_flows(&ready);
+
+    // ... unchanged collection of frames ...
+}
+```
+
+- [ ] **Step 3: Update `relay_icmp_echo` and `relay_udp_flows` signatures to `(&mut self, ready: &[EpollEvent])`** with parallel filtering by `PROTO_TAG_ICMP` / `PROTO_TAG_UDP`.
+
+- [ ] **Step 4: Run baseline pins**
+
+```bash
+cargo test --test network_baseline
+```
+
+Expected: PASS — the `wait_with_timeout(Duration::ZERO)` non-blocking poll captures any ready FD between vCPU calls; the relay still works.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/network/slirp.rs
+git commit -m "feat(slirp): relay loops dispatch by epoll readiness
+
+drain_to_guest non-blocking-polls the epoll set once per tick and
+passes the ready event list to relay_tcp_nat_data /
+relay_udp_flows / relay_icmp_echo, which now skip non-ready flows
+instead of re-peeking the whole table.  Behavior unchanged on
+hot-path; per-tick CPU should drop on idle systems with many
+flows."
+```
+
+---
+
+### Task 11: Rewrite `net_poll_thread` to use `epoll_wait`
+
+**Files:**
+- Modify: `src/vmm/mod.rs:1599-1640`.
+
+- [ ] **Step 1: Replace the `sleep(5ms)` loop**
+
+The current loop:
+
+```rust
+while running.load(Ordering::Relaxed) {
+    std::thread::sleep(std::time::Duration::from_millis(5));
+    // ... try_inject_rx + irq ...
+}
+```
+
+Becomes (pseudocode — exact integration with the device-lock pattern needs care):
+
+```rust
+while running.load(Ordering::Relaxed) {
+    // Acquire the SlirpBackend's waker once at startup; use it as
+    // the shutdown signaling channel too.
+    let mut events: Vec<EpollEvent> = Vec::new();
+    {
+        let guard = match net_dev.lock() {
+            Ok(g) => g,
+            Err(_) => continue,
+        };
+        // Borrow epoll for the wait; see Step 2 for the API on
+        // VirtioNetDevice that exposes it without holding the
+        // device lock during epoll_wait.
+        let _ = guard.poll_epoll(&mut events, Duration::from_millis(50));
+    }
+    // ... try_inject_rx + irq, unchanged ...
+}
+```
+
+The challenge: `epoll_wait` blocks for up to 50 ms; we cannot hold the device lock that whole time (vCPU would stall on next TX). Solution: `VirtioNetDevice::poll_epoll` clones the `epoll` into an `Arc<Mutex<EpollDispatch>>` (or similar) and the wait happens *outside* the device lock.
+
+- [ ] **Step 2: Refactor the lock granularity**
+
+In `src/network/slirp.rs`, change:
+
+```rust
+epoll: EpollDispatch,
+```
+
+to:
+
+```rust
+epoll: std::sync::Arc<std::sync::Mutex<EpollDispatch>>,
+```
+
+…and update all `self.epoll.register(...)` to `self.epoll.lock().unwrap().register(...)`. Provide a clone-of-Arc accessor:
+
+```rust
+pub fn epoll_arc(&self) -> std::sync::Arc<std::sync::Mutex<EpollDispatch>> {
+    Arc::clone(&self.epoll)
+}
+```
+
+The poll thread holds an `Arc<Mutex<EpollDispatch>>`, calls `wait_with_timeout` while holding that lock, and *not* the device lock.
+
+- [ ] **Step 3: Run baseline + integration tests**
+
+```bash
+cargo test --workspace --all-features
+cargo test --test network_baseline
+```
+
+Expected: all PASS.
+
+- [ ] **Step 4: Run the BROKEN_ON_PURPOSE pin from Task 2 — it should now flip to PASS**
+
+```bash
+cargo test --test network_baseline tcp_rx_latency_sub_5ms
+```
+
+Expected: **PASS** with measured latency < 5 ms (likely sub-millisecond).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/network/slirp.rs src/vmm/mod.rs
+git commit -m "feat(vmm): net_poll_thread driven by epoll_wait
+
+Replaces the 5 ms sleep cycle with epoll_wait(timeout=50ms).  When
+host data arrives, the poll thread wakes within microseconds and
+drives drain_to_guest immediately.  When idle, the thread wakes
+once every 50 ms for housekeeping (UDP/ICMP idle reaping) — a
+10x reduction in wakeup duty cycle vs the previous 5 ms timer.
+
+Phase 6.4 BROKEN_ON_PURPOSE pin tcp_rx_latency_sub_5ms flips to
+passing here."
+```
+
+---
+
+### Task 12: Snapshot rebuild test + implementation
+
+**Files:**
+- Modify: `src/vmm/mod.rs` (snapshot/restore paths) and `src/network/slirp.rs` (`from_snapshot`-shaped constructor).
+
+- [ ] **Step 1: Run the existing snapshot integration suite to confirm baseline**
+
+```bash
+export VOID_BOX_KERNEL=/boot/vmlinuz-$(uname -r)
+export VOID_BOX_INITRAMFS=/tmp/void-box-test-rootfs.cpio.gz
+cargo test --test snapshot_integration -- --ignored --test-threads=1
+```
+
+Expected: PASS (Phase 0–5 baseline). If it doesn't pass on this branch's tip pre-6.4, fix before continuing — this gate is non-negotiable.
+
+- [ ] **Step 2: Write the new test pin**
+
+In `tests/network_baseline.rs`:
+
+```rust
+/// Phase 6.4 contract: snapshot/restore must rebuild the epoll
+/// dispatch from flow_table contents.  After a round-trip, the
+/// backend has zero registered flows in epoll if flow_table was
+/// non-empty pre-snapshot — that's the bug we want to catch.
+#[test]
+fn epoll_set_rebuilt_on_restore_smoke() {
+    // Construct backend, open one TCP flow (handshake), serialize
+    // the flow_table, drop the backend, build a fresh backend and
+    // inject the serialized flow_table.  Verify the new backend's
+    // epoll set has the flow's host_fd registered.
+    // ... (full test code) ...
+}
+```
+
+The detailed body is omitted here — write it referencing the snapshot helpers in `src/vmm/snapshot.rs` and the existing `from_snapshot` shape. Verify by checking the count of registered FDs (add a `#[cfg(test)] pub fn registered_fd_count(&self) -> usize` to `EpollDispatch`).
+
+- [ ] **Step 3: Run, expect FAIL**
+
+The current snapshot path has no rebuild step; the count is 0.
+
+- [ ] **Step 4: Implement rebuild in the snapshot deserialization path**
+
+Wherever `from_snapshot` reconstructs the `SlirpBackend` (likely in `src/vmm/mod.rs` around line 690 area where snapshots are restored), after the flow_table is rebuilt from the snapshot bytes, iterate it and call `epoll.register` for each entry's host FD.
+
+- [ ] **Step 5: Run new test + integration suite**
+
+```bash
+cargo test --test network_baseline epoll_set_rebuilt
+cargo test --test snapshot_integration -- --ignored --test-threads=1
+```
+
+Expected: PASS.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add tests/network_baseline.rs src/network/slirp.rs src/vmm/mod.rs
+git commit -m "feat(slirp): rebuild epoll set on snapshot restore
+
+epoll_fd is a kernel handle and cannot serialize.  After
+flow_table is reconstructed from snapshot bytes, register every
+host FD with a fresh EpollDispatch."
+```
+
+---
+
+### Task 13: Bench the win + perf gate
+
+**Files:**
+- Modify: `benches/network.rs` — add `tcp_rx_latency_one_packet`.
+- Modify: `src/bin/voidbox-network-bench/main.rs` — add `tcp_rx_latency_us_p50` measurement.
+
+- [ ] **Step 1: Add divan microbench**
+
+In `benches/network.rs`, add:
+
+```rust
+/// Phase 6.4 baseline: time from "host write returns" to "guest
+/// sees data in drain_to_guest output".  Pre-6.4 this was bounded
+/// below by the 5 ms net_poll_thread cycle; post-6.4 epoll
+/// dispatch should deliver in microseconds.
+#[divan::bench]
+fn tcp_rx_latency_one_packet(bencher: Bencher) {
+    // ... handshake setup outside the timed loop ...
+    bencher.bench_local(|| {
+        // Host writes; measure how fast the bytes appear in the
+        // SlirpBackend's drain output.
+    });
+}
+```
+
+Full implementation: harness similar to `tcp_inbound_syn_ack_transition` shape — use `bench-helpers` feature for synthetic flow seeding, drive the data path inside the timed closure.
+
+- [ ] **Step 2: Add wall-clock measurement to `voidbox-network-bench`**
+
+In `src/bin/voidbox-network-bench/main.rs`, add a `tcp_rx_latency_us_p50` field to `Report` and a `measure_rx_latency` function that boots a VM, opens a guest→host flow, has the host write small packets, and measures host-T0-to-guest-arrival via the SLIRP relay.
+
+- [ ] **Step 3: Run the perf gate against `origin/main`**
+
+```bash
+scripts/bench-compare.sh --baseline origin/main --skip-vm > /tmp/phase6.4-vs-main.md
+cat /tmp/phase6.4-vs-main.md
+```
+
+Validate per the hard performance gate at the top of this plan:
+
+- Every comparable bench: HEAD ≤ baseline + 5 %.
+- `tcp_rx_latency_one_packet` (HEAD-only) shows a sub-millisecond median.
+- `port_forward_accept_latency` improves by ≥ 30 %, *or* document why it stays (likely the listener accept thread is still on the 50 ms cycle — fixing it is a small follow-up step in Phase 6.4 itself or its own task; decide before committing).
+
+- [ ] **Step 4: If `port_forward_accept_latency` doesn't improve, add a fix-up sub-task** to also move the listener accept onto epoll. The plan permits this — see Architecture notes.
+
+- [ ] **Step 5: Commit benches + the perf-gate output**
+
+```bash
+git add benches/network.rs src/bin/voidbox-network-bench/main.rs
+git commit -m "bench(network): tcp_rx_latency_one_packet + voidbox-network-bench p50
+
+Captures the Phase 6.4 win numerically.  Pre-6.4 RX latency was
+bounded below by the 5 ms net_poll_thread cycle; post-6.4 epoll
+dispatch lands in microseconds.
+
+scripts/bench-compare.sh --baseline origin/main --skip-vm output
+attached as /tmp/phase6.4-vs-main.md (not committed; consult the
+PR description for the table)."
+```
+
+---
+
+### Task 14: Phase 6.4 validation gate
+
+- [ ] **Step 1: Standard validation contract** (per `AGENTS.md`)
+
+```bash
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings
+cargo test --workspace --all-features
+cargo test --doc --workspace --all-features
+```
+
+All must pass.
+
+- [ ] **Step 2: VM suites**
+
+```bash
+export VOID_BOX_KERNEL=/boot/vmlinuz-$(uname -r)
+export VOID_BOX_INITRAMFS=/tmp/void-box-test-rootfs.cpio.gz
+cargo test --test conformance -- --ignored --test-threads=1
+cargo test --test oci_integration -- --ignored --test-threads=1
+cargo test --test snapshot_integration -- --ignored --nocapture --test-threads=1
+cargo test --test e2e_telemetry -- --ignored --test-threads=1
+cargo test --test e2e_skill_pipeline -- --ignored --test-threads=1
+cargo test --test e2e_mount -- --ignored --test-threads=1
+cargo test --test e2e_service_mode -- --ignored --test-threads=1
+cargo test --test e2e_sidecar -- --ignored --test-threads=1
+```
+
+All must pass.
+
+- [ ] **Step 3: aarch64 cross-check**
+
+```bash
+CFLAGS_aarch64_unknown_linux_gnu="--sysroot=/usr/aarch64-redhat-linux/sys-root/fc43" \
+  RUSTFLAGS="-D warnings" \
+  cargo check --target aarch64-unknown-linux-gnu -p void-box --lib --tests
+```
+
+- [ ] **Step 4: Hard perf gate**
+
+```bash
+scripts/bench-compare.sh --baseline origin/main --skip-vm
+```
+
+Validate against the contract at the top of this plan. **The PR is not allowed to merge** until this passes.
+
+- [ ] **Step 5: Commit gate evidence in the PR description (no commit needed)**
+
+Capture the bench-compare output in the PR body. Phase 6.4 PR is then ready for review.
+
+---
+
+## Rollback plan
+
+Each task lands as one commit. If Task N introduces a regression caught at Task M (where M > N), `git revert` Task N's commit and redispatch its implementer with the failure context. No task irreversibly changes wire format or snapshot layout — every change is additive (new fields, new module) or behavior-preserving refactor.
+
+The only exception is the snapshot rebuild path (Task 12). If that's wrong on disk, restored backends will have a fresh-but-empty epoll set and connections will appear hung. Test the snapshot path *before* claiming Task 12 done.
+
+## Out of scope (deferred to Phase 6.1 / 6.2 / 6.3)
+
+- TCP half-close — Phase 6.1.
+- Async outbound `connect` — Phase 6.2 (will *consume* the epoll dispatch primitive added here for `EPOLLOUT` writability detection).
+- Window management — Phase 6.3.
+
+## Reviewer pointers
+
+- **Lock granularity:** verify `epoll_wait` does not happen under the device lock (Task 11 Step 2).
+- **FD lifecycle:** every `flow_table.insert` has a matching `epoll.register`; every `flow_table.remove` has a matching `epoll.unregister`. grep for both pairs and pair-count.
+- **Self-pipe correctness:** `Waker::wake` is no-block, no-allocate, signal-safe-adjacent.
+- **Snapshot rebuild:** Task 12's test is the contract; verify the count helper is `#[cfg(test)]` only.
+- **Token uniqueness:** `flow_token_for_tcp` is unique within the flow table because `NatKey` is unique. The 16-bit dst_ip truncation is intentional for v4-only addresses on a /16 SLIRP subnet — collisions with foreign IPs are not possible because all flows route through the gateway.
+
+## Document history
+
+- 2026-04-30: initial plan written, hard performance gate locked.
diff --git a/docs/superpowers/plans/2026-04-30-smoltcp-passt-port-phase6.md b/docs/superpowers/plans/2026-04-30-smoltcp-passt-port-phase6.md
new file mode 100644
index 00000000..913e1e96
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-30-smoltcp-passt-port-phase6.md
@@ -0,0 +1,286 @@
+# Phase 6: TCP Lifecycle + Async Connect + Window Mgmt + Event-Driven Polling
+
+> **Status:** Overview (scope + design). Per-subsystem TDD task lists are deferred to dedicated plans (`-phase6.1.md`, `-phase6.2.md`, `-phase6.3.md`, `-phase6.4.md`) written before each is implemented. This document scopes the work, locks invariants, and lists validation gates so each sub-plan can be reviewed against a stable target.
+
+> **For agentic workers:** This is an **overview**, not an executable plan. Do not run subagent-driven-development against this file. When picking up a sub-area, write its own plan first.
+
+**Goal:** Close the four architectural gaps surfaced in the `smoltcp-passt-port-phase0` PR review without regressing any Phase 0–5 baseline.
+
+**Architecture:** Each sub-area imports a specific passt design pattern adapted to our `cfg(target_os = "linux")` SLIRP backend; none requires a backend split. The relay loop in `SlirpBackend::drain_to_guest` stays the single net-poll dispatch point; the changes layer onto its existing flow_table / inject_to_guest pipeline.
+
+**Tech stack:** smoltcp 0.11 wire types, `std::net::TcpStream` (non-blocking), Linux `epoll` (Phase 6.4), no new crates.
+
+---
+
+## Background
+
+Reviewer findings on the smoltcp-passt-port PR (April 2026) — three "Medium" or higher and one "Medium-Low" architectural gap. All four were verified VALID against current code. Quick-fix correctness items (Copilot review) are addressed on the same PR; this Phase 6 plan covers the architecture-shaped follow-ups.
+
+Reference: `docs/superpowers/plans/2026-04-27-smoltcp-passt-port.md` (top-level spec, observability invariant), Phase 0–5 plans (architectural decisions established by prior phases).
+
+## Invariants (carried from earlier phases — non-negotiable)
+
+These are locked from the top-level spec. Phase 6 changes must preserve all of them.
+
+1. **Full observability.** Every TCP/UDP/ICMP frame and every state transition remains traceable through tracing logs. No opaque C-process or kernel-side magic. If a new subsystem hides state inside the kernel (e.g. epoll), tracing must still expose what the host saw and when.
+2. **All-Rust path.** No new C dependencies, no FFI beyond what `libc` already provides. `epoll`-via-`libc` is acceptable; a new crate that opaques it is not, unless the crate is already in the workspace.
+3. **Cross-platform discipline.** SLIRP itself is Linux-only (`#[cfg(target_os = "linux")]` in `Cargo.toml`). Phase 6 stays inside that gate. macOS uses VZ's built-in NAT; Phase 6 does not affect it.
+4. **No regression in Phase 0–5 baselines.** `bench-compare.sh --baseline <phase-5-tip>` must show every existing bench at ±5% or better. New benches added in Phase 6 may legitimately move the baseline, but the existing comparable set holds.
+5. **Snapshot/restore correctness.** `snapshot_integration` must continue to pass. Any new state (e.g. half-close timers, async connect futures) added to `TcpNatEntry` must round-trip through serde or be rebuilt from `TcpStream` state on restore — not silently dropped.
+6. **No bench-mode-only fixes.** Behavior changes go in production code paths, not behind `#[cfg(test)]` or feature flags. Tests/benches consume the same paths the guest does.
+
+## Sub-areas
+
+Four independent sub-areas, four sub-plans. Order is by reviewer-assigned severity, not by required ordering — they can land in any sequence as long as their individual validation gates hold.
+
+---
+
+### 6.1 — TCP half-close (A1, High)
+
+**Severity:** High (correctness gap, not just performance).
+
+**Current state:**
+
+- `TcpNatState` at `src/network/slirp.rs:131-144` declares `FinWait1`, `FinWait2`, `CloseWait`, `LastAck` variants but they are unused. The enum carries `#[allow(dead_code)]` on line 130 to mute the resulting warnings.
+- Guest FIN handler at `src/network/slirp.rs:1483-1500`: on receiving guest FIN, the stack immediately sends a FIN+ACK back to the guest and marks the entry `Closed` in the same call. There is no transition through `FinWait*` or `CloseWait`. The host-side `TcpStream` is dropped at the next `relay_tcp_nat_data` sweep when the entry is reaped.
+
+**The bug this enables:**
+
+When the guest's application closes the write side of a socket but expects to keep reading the host's response (the half-close pattern used by HTTP request bodies, SMTP DATA, anything with `shutdown(SHUT_WR)`), VoidBox slams the connection shut both directions. The host side never gets to flush its remaining response; the guest's read returns EOF prematurely. This is silent data loss for any protocol that uses orderly half-close.
+
+**Reference:** passt's `tcp.c` ([passt/tcp.c:238](https://passt.top/passt/tree/tcp.c#n238), [tcp.c:401](https://passt.top/passt/tree/tcp.c#n401)) tracks the four half-close states explicitly with timer-bounded transitions.
+
+**Target state:**
+
+- Guest FIN sets `state = FinWait1` (we still owe the host a half-close), shuts down the host socket's write side via `TcpStream::shutdown(Shutdown::Write)`, and ACKs the guest's FIN — but **does not** send our own FIN yet.
+- When the host returns EOF (zero-byte read on the established connection) and the relay queue is drained, send our FIN to the guest, transition to `LastAck`.
+- On guest's final ACK, transition to `Closed` and reap.
+- The mirror pattern handles the host-initiated close: host EOF first → state goes to `CloseWait` (we owe the guest a FIN), continue forwarding any guest writes to the host, eventually send FIN to guest → `LastAck` → reap on ACK.
+- Add a `LAST_ACK_TIMEOUT` (suggest 60 s, mirroring TCP MSL × 2) so a missing final ACK doesn't leak entries.
+
+**Test requirements:**
+
+- New `tests/network_baseline.rs` pin `tcp_half_close_guest_writes_first`: guest sends data, FIN; host reads data, replies with more data, then FIN. Assert: guest sees the host's post-FIN data **and** its FIN, in that order. Pre-Phase-6.1 this would fail (host data dropped).
+- New pin `tcp_half_close_host_writes_first`: symmetric — host sends data, FIN; guest replies, FIN. Assert ordering.
+- New pin `tcp_last_ack_timeout_reaps_stale_entry`: synthesize a `LastAck` entry with `last_activity` deep in the past; one `drain_to_guest` cycle later assert the entry is gone.
+- `snapshot_integration`: round-trip a connection in `CloseWait` state. Assert post-restore the state is preserved (or, if we choose not to serde the half-close states, that the connection cleanly closes within `LAST_ACK_TIMEOUT`).
+
+**Validation gates (in addition to the global ones below):**
+
+- `cargo test --test network_baseline tcp_half_close_*`
+- `cargo test --test snapshot_integration -- --ignored --test-threads=1`
+
+**File impact:**
+
+- `src/network/slirp.rs` — `handle_tcp_frame` FIN/RST arms (~lines 1483–1506), `relay_tcp_nat_data` (~line 1512+), `TcpNatEntry` (add half-close timer field if needed).
+- `tests/network_baseline.rs` — three new pins.
+- No changes to public API.
+
+---
+
+### 6.2 — Async outbound connect (A2, Medium-High)
+
+**Severity:** Medium-High (correctness + UX gap).
+
+**Current state:**
+
+- `src/network/slirp.rs:1271`: on guest SYN, `handle_tcp_frame` calls `TcpStream::connect_timeout(&dst_addr, Duration::from_secs(3))` **synchronously**.
+- `handle_tcp_frame` is called from `process_guest_frame` (~line 664), which is called from the virtio-net TX path (`src/devices/virtio_net.rs:~656`).
+- The TX path runs on the vCPU thread under the device lock. A 3 s blocking connect to an unreachable destination stalls **all** guest networking — including unrelated connections — for the duration of the timeout.
+
+**The bug this enables:**
+
+A guest that opens connections to multiple destinations, one of which is slow or unreachable, sees the entire host networking pipeline freeze for 3 s every time it tries that destination. Long-running guests with sporadic dead destinations (DNS misconfigurations, transient NAT failures) suffer noticeable hitches.
+
+**Reference:** passt is fully event-driven — connect dispatches to a worker, completion arrives via epoll on the connecting socket's writability ([passt/tcp.c:2785](https://passt.top/passt/tree/tcp.c#n2785)).
+
+**Target state:**
+
+- On guest SYN: create a non-blocking socket (`TcpStream::connect` with `O_NONBLOCK`, or `socket2::Socket::new` + `connect_with_timeout` driven by us), insert a new state `Connecting` into `TcpNatState`, queue an entry in `flow_table` with the connecting socket. Return immediately to the vCPU thread.
+- The net-poll thread polls the connecting socket on each tick (writability-check via `poll`/`select`/`epoll` — coordinate with 6.4). On readiness:
+  - Check `getsockopt(SOL_SOCKET, SO_ERROR)` — zero means connected, non-zero means failed.
+  - On success: transition `Connecting → SynReceived`, send SYN-ACK to the guest.
+  - On failure: send RST to the guest, reap the entry.
+  - On still-pending after `CONNECT_TIMEOUT` (3 s, matching today's behavior): treat as failure.
+- vCPU thread is now never blocked on `connect`.
+
+**Test requirements:**
+
+- New pin `tcp_connect_to_unreachable_does_not_block_other_flows`: open one flow to a known-good destination, one to a deliberately-unreachable destination, both in quick succession. Measure time from guest SYN to host accepting the good-destination flow. Pre-6.2 this would be ~3 s (waiting for the bad one); post-6.2 it should be sub-millisecond.
+- New pin `tcp_connect_async_eventual_rst_on_failure`: synthesize a connect to an unreachable address; drive `drain_to_guest` for >3 s; assert the guest receives RST.
+- Bench: `bench/network.rs` add `process_syn_during_pending_connects` parametric on N pending connecting flows. Validates O(1) cost on guest TX path regardless of pending-connect backlog.
+
+**Validation gates:**
+
+- `cargo test --test network_baseline tcp_connect_*`
+- `cargo bench --bench network process_syn_during_pending_connects`
+
+**File impact:**
+
+- `src/network/slirp.rs` — `TcpNatState` (add `Connecting`), `handle_tcp_frame` SYN arm (lines ~1267–1290), new `relay_pending_connects` method called from `drain_to_guest` (parallel to `relay_tcp_nat_data`).
+- `tests/network_baseline.rs` — two new pins.
+- `benches/network.rs` — one new bench.
+- Snapshot interaction: `Connecting` state must serde correctly; restore should drop `Connecting` flows (reconnect from scratch is acceptable, deferred to Phase 6.1's MSL-bounded timer).
+
+---
+
+### 6.3 — TCP window management (A3, Medium)
+
+**Severity:** Medium (perf gap, throughput left on the table).
+
+**Current state:**
+
+- `src/network/slirp.rs:1927`: `build_tcp_packet_static` always emits `window_len: TCP_WINDOW (65535)`, `window_scale: None`.
+- No code reads `tcp.window_len()` from incoming guest frames. The guest's advertised window is ignored entirely.
+
+**Why this matters:**
+
+- The guest's TCP stack negotiates a window with us. We send "always 65535" regardless of what the guest can actually buffer. This is wrong both directions:
+  - Inbound (host→guest): we relay host data into our `inject_to_guest` queue without ever asking whether the guest still has receive buffer. If the guest is slow, our queue grows unbounded — Phase 3 partially mitigated this with peek-based reads, but window-aware backpressure would be cleaner.
+  - Outbound (guest→host): the guest sends respecting our advertised window (always 65535). On modern guests with `tcp_window_scaling=1` (the default), this caps effective throughput at 64 KB / RTT regardless of available bandwidth.
+- The `window_scale: None` means we never negotiate scaling on SYN. Even if we tracked windows, we'd be capped at 64 KB.
+
+**Reference:** passt's `tcp_conn` ([passt/tcp_conn.h:21](https://passt.top/passt/tree/tcp_conn.h#n21)) tracks `wnd_from_tap`, `wnd_to_tap`, scale factors, and updates ACK/window per [tcp.c:1021](https://passt.top/passt/tree/tcp.c#n1021), [tcp.c:1426](https://passt.top/passt/tree/tcp.c#n1426).
+
+**Target state:**
+
+- On SYN/SYN-ACK exchange, negotiate `window_scale: Some(7)` (128× scale factor — passt's default). `TcpNatEntry` records the negotiated scale.
+- On every guest packet, read `tcp.window_len()` and update `entry.guest_window` (after applying scale). Use this to bound the host→guest send rate: never push more bytes through `inject_to_guest` than the guest's effective receive window allows.
+- On every host-side relay, set our outgoing `window_len` based on host kernel state — `getsockopt(TCP_INFO).tcpi_rcv_space` gives kernel-side receive buffer headroom; advertise that, scaled.
+- Drop the hardcoded `TCP_WINDOW = 65535` constant.
+
+**Test requirements:**
+
+- New pin `tcp_advertised_window_tracks_guest_buffer`: synthesize a guest with a small advertised window (say 4096); push 64 KB of data from host; assert that `inject_to_guest` never holds more than ~`window` unacknowledged bytes.
+- New pin `tcp_window_scale_negotiated_in_syn`: parse the SYN-ACK we send to the guest; assert it includes `window_scale: Some(7)`.
+- Bench: extend `tcp_bulk_throughput_1mb` to also run with a constrained-window receiver (`SO_RCVBUF=16384`); pre-6.3 throughput will be 64 KB / RTT bound; post-6.3 should be substantially higher because we'll let the guest send larger bursts when host kernel space allows.
+
+**Validation gates:**
+
+- `cargo test --test network_baseline tcp_advertised_window_*`
+- `cargo bench --bench network tcp_bulk_throughput_*` — assert no regression, and ideally improvement at small `SO_RCVBUF`.
+
+**File impact:**
+
+- `src/network/slirp.rs` — `TcpNatEntry` (add `guest_window`, `guest_window_scale`), `build_tcp_packet_static` signature (take advertised window from caller), `handle_tcp_frame` (read incoming window), `relay_tcp_nat_data` (gate sends on guest window).
+- `tests/network_baseline.rs` — two new pins.
+- `benches/network.rs` — one new bench arm.
+
+---
+
+### 6.4 — Event-driven RX polling (A4, Medium-Low)
+
+**Severity:** Medium-Low (efficiency, not correctness).
+
+**Current state:**
+
+- `src/vmm/mod.rs:1599` — `net_poll_thread` wakes every 5 ms (`std::thread::sleep(Duration::from_millis(5))` at line 1609).
+- `src/network/slirp.rs:1549` — `relay_tcp_nat_data` re-peeks a 64 KiB buffer on every connected TCP socket every tick, regardless of whether new data has arrived.
+
+**Why this matters:**
+
+- 200 polls/second on every connected flow, even when idle. With many flows this is wasted CPU.
+- 5 ms granularity means tail latency for any RX event is bounded below by ~5 ms even if data arrived microseconds after the last poll. For latency-sensitive workloads this is the floor.
+
+**Reference:** passt uses epoll-driven socket readiness ([passt/tcp.c:463](https://passt.top/passt/tree/tcp.c#n463)) with optional `SO_PEEK_OFF` — the syscall returns the readable list, no polling needed.
+
+**Target state:**
+
+- Replace the 5 ms timer with `epoll_wait` on a Linux `epoll_fd` that owns all of:
+  - the connected `TcpStream`s in `flow_table` (registered with `EPOLLIN`)
+  - the connecting sockets from Phase 6.2 (registered with `EPOLLOUT`)
+  - the UDP flow sockets (Phase 2)
+  - the ICMP echo socket (Phase 1)
+  - a `pipe(2)` self-pipe for inter-thread wakeup (so `process_guest_frame` can request an out-of-band poll cycle when it adds a new flow).
+- `epoll_wait` timeout: short (say 50 ms) just as a safety net for periodic housekeeping (LAST_ACK_TIMEOUT sweeps, idle UDP flow reaping). The hot path is event-driven.
+- Each socket's `epoll_data` carries its `FlowKey` so the readiness handler can dispatch directly without iterating the full table.
+
+**Caveats:**
+
+- This sub-area is **Linux-specific** (`epoll`). The SLIRP backend itself is already Linux-only, so this fits, but the implementation should isolate epoll inside a `mod epoll_dispatch` so a future portable backend (e.g. BSD `kqueue`) can plug in a different reactor.
+- Snapshot/restore: an `epoll_fd` does not survive snapshot (it's a kernel-side handle on real fds). Restore must rebuild the epoll set from scratch from `flow_table` contents — no serde required for the `epoll_fd` itself.
+
+**Test requirements:**
+
+- New pin `tcp_rx_latency_sub_5ms_when_data_available`: send data from host to a connected guest flow; measure host→guest delivery latency. Pre-6.4 this is bounded below by 5 ms (the timer cycle); post-6.4 it should be sub-millisecond on a quiet system.
+- Bench: existing `port_forward_accept_latency` should *improve* — it's currently bounded by a 50 ms listener-poll cycle, but if 6.4 also moves the listener accept onto epoll, the median should drop substantially.
+- `snapshot_integration`: verify rebuild-on-restore works (no FD leak, all flows still relay).
+
+**Validation gates:**
+
+- `cargo test --test network_baseline tcp_rx_latency_*`
+- `cargo bench --bench network port_forward_accept_latency` — should regress *favorably* (faster).
+- `cargo test --test snapshot_integration -- --ignored`
+
+**File impact:**
+
+- `src/vmm/mod.rs` — `net_poll_thread` rewrite to use `epoll_wait` (~lines 1599–1640).
+- `src/network/slirp.rs` — new `mod epoll_dispatch`, `SlirpBackend` holds the `epoll_fd`, `flow_table` insertions/removals add/remove from epoll.
+- New constants for the epoll wakeup pipe.
+
+---
+
+## Cross-cutting concerns
+
+### Bench discipline
+
+Every sub-area must add at least one bench (microbench in `benches/network.rs` and/or wall-clock metric in `voidbox-network-bench`) that captures the win or proves no regression. `bench-compare.sh --baseline <phase-5-tip>` must run cleanly before each sub-area's PR is merged. Shared protocol: each sub-area's PR description includes the bench-compare table.
+
+### Observability
+
+Every state transition added (Connecting, FinWait*, CloseWait, LastAck, window updates, epoll readiness) emits a `tracing::trace!` or `tracing::debug!` line keyed on the relevant `FlowKey`. No silent state changes. This matches the observability invariant.
+
+### Test image
+
+No new test-image requirements expected. All new e2e pins should be expressible against the existing initramfs (BusyBox + claudio).
+
+### Phase ordering
+
+Logically sensible order is **6.4 → 6.2 → 6.1 → 6.3** (epoll first to give 6.2 its readiness primitive, async connect next to remove vCPU stalls, half-close once we have proper per-flow event handling, window mgmt last as the polish layer). However, the validation gates per sub-area are independent; any order that passes all gates is acceptable.
+
+## Validation gates (global, every sub-area)
+
+The standard validation contract from `AGENTS.md` applies. In addition:
+
+```
+# 1. Phase 0–5 baselines hold.
+scripts/bench-compare.sh --baseline <phase-5-tip-sha> --skip-vm
+
+# 2. All Phase 6.X test pins pass.
+cargo test --test network_baseline -- --ignored --test-threads=1
+
+# 3. Snapshot integration intact.
+cargo test --test snapshot_integration -- --ignored --test-threads=1
+
+# 4. Cross-platform compile.
+cargo check --workspace --exclude guest-agent --all-targets --all-features  # macOS shape
+
+# 5. aarch64 cross-check (per AGENTS.md "aarch64 cross-check" section).
+```
+
+## Out of scope
+
+- IPv6 (deferred from earlier phases; would be its own Phase 7).
+- TCP options beyond MSS and window-scale (SACK, timestamps, ECN). Possible future work but not Phase 6.
+- vsock-over-SLIRP (orthogonal subsystem).
+- A passt head-to-head benchmark suite (deferred separate task — needs passt+qemu reference env).
+
+## Reviewer pointers
+
+When a sub-area's plan and PR land, the review focus per area:
+
+- **6.1**: half-close transitions and `LAST_ACK_TIMEOUT` reaping. Verify no FD leaks under repeated open-close-open patterns. Verify snapshot interaction.
+- **6.2**: vCPU thread is never blocked on connect under any input. Verify timing of the "unreachable destination doesn't stall good destination" pin.
+- **6.3**: window scale negotiation in SYN/SYN-ACK frames. Verify advertised window tracks guest buffer state on tracing logs.
+- **6.4**: epoll FD lifecycle (register/unregister on flow_table mutation), wakeup-pipe correctness, snapshot rebuild path.
+
+## Open questions
+
+- **6.3:** what window-scale factor to advertise? passt uses 7 (128×). We could be more conservative (say 5 = 32×) initially. Decide in 6.3's plan.
+- **6.4:** should the epoll wakeup pipe also carry the new-flow `FlowKey` so the poll thread can `epoll_ctl(EPOLL_CTL_ADD, ...)` itself, vs. doing it under the SlirpBackend lock from the vCPU thread? Tradeoff is lock granularity vs. message-passing complexity. Decide in 6.4's plan.
+
+---
+
+## Document history
+
+- 2026-04-30: initial overview written, scope locked from PR review on `smoltcp-passt-port-phase0` branch.
diff --git a/guest-agent/src/main.rs b/guest-agent/src/main.rs
index b42bd092..8fc36c59 100644
--- a/guest-agent/src/main.rs
+++ b/guest-agent/src/main.rs
@@ -411,6 +411,11 @@ fn main() {
     if std::process::id() == 1 {
         if network_enabled_from_cmdline() {
             setup_network();
+            // Allow unprivileged ICMP sockets for all GIDs so non-root
+            // processes (uid=1000 sandbox user) can call ping without
+            // CAP_NET_RAW.  Mirrors the default on most desktop Linux
+            // distributions (ping_group_range = 0 2147483647).
+            let _ = std::fs::write("/proc/sys/net/ipv4/ping_group_range", "0\t2147483647\n");
             // Install the host-provided network deny list *once* at boot,
             // before any guest command can run. This closes the window
             // between network bring-up and the first exec call, and avoids
diff --git a/scripts/bench-compare.sh b/scripts/bench-compare.sh
new file mode 100755
index 00000000..217480a0
--- /dev/null
+++ b/scripts/bench-compare.sh
@@ -0,0 +1,469 @@
+#!/usr/bin/env bash
+# bench-compare.sh — compare HEAD bench results against an arbitrary baseline ref.
+#
+# Harnesses:
+#   1. divan microbenches: cargo bench --bench network --features bench-helpers
+#   2. VM wall-clock harness: cargo run --release --bin voidbox-network-bench
+#
+# Output: markdown report to stdout (or --output FILE).
+# See AGENTS.md for harness descriptions and JSON field definitions.
+
+set -euo pipefail
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+info() { printf '%s\n' "$*" >&2; }
+
+usage() {
+  cat >&2 <<'EOF'
+Usage: scripts/bench-compare.sh [OPTIONS]
+
+Compare HEAD bench results against an arbitrary baseline git ref.
+
+Options:
+  --baseline <ref>   Git ref (commit SHA, branch, tag) to compare against.
+                     Default: merge-base with origin/main.
+  --output <file>    Write markdown report to FILE instead of stdout.
+  --skip-vm          Skip the voidbox-network-bench VM harness.
+  --skip-divan       Skip the cargo bench --bench network divan harness.
+  -h, --help         Show this help and exit.
+EOF
+}
+
+die() { info "ERROR: $*"; exit 1; }
+
+# ---------------------------------------------------------------------------
+# Argument parsing
+# ---------------------------------------------------------------------------
+
+BASELINE_REF=""
+OUTPUT_FILE=""
+SKIP_VM=0
+SKIP_DIVAN=0
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --baseline)
+      [[ $# -ge 2 ]] || die "--baseline requires an argument"
+      BASELINE_REF="$2"; shift 2 ;;
+    --output)
+      [[ $# -ge 2 ]] || die "--output requires an argument"
+      OUTPUT_FILE="$2"; shift 2 ;;
+    --skip-vm)
+      SKIP_VM=1; shift ;;
+    --skip-divan)
+      SKIP_DIVAN=1; shift ;;
+    -h|--help)
+      usage; exit 0 ;;
+    *)
+      die "Unknown option: $1 (run with --help for usage)" ;;
+  esac
+done
+
+# ---------------------------------------------------------------------------
+# Resolve paths
+# ---------------------------------------------------------------------------
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+# ---------------------------------------------------------------------------
+# Resolve SHAs
+# ---------------------------------------------------------------------------
+
+HEAD_SHA="$(git -C "$REPO_ROOT" rev-parse HEAD)"
+HEAD_SHORT="${HEAD_SHA:0:9}"
+HEAD_BRANCH="$(git -C "$REPO_ROOT" rev-parse --abbrev-ref HEAD 2>/dev/null || echo "detached")"
+
+if [[ -z "$BASELINE_REF" ]]; then
+  info "No --baseline given; resolving merge-base with origin/main ..."
+  # Fetch is not done automatically — the caller must ensure origin/main is current.
+  BASELINE_REF="$(git -C "$REPO_ROOT" merge-base HEAD origin/main)" \
+    || die "Could not resolve merge-base with origin/main. Pass --baseline explicitly."
+fi
+
+BASELINE_SHA="$(git -C "$REPO_ROOT" rev-parse "${BASELINE_REF}^{commit}")" \
+  || die "Cannot resolve baseline ref '${BASELINE_REF}' to a commit SHA"
+BASELINE_SHORT="${BASELINE_SHA:0:9}"
+
+info "HEAD:     ${HEAD_SHORT} (${HEAD_BRANCH})"
+info "Baseline: ${BASELINE_SHORT} (${BASELINE_REF})"
+
+# ---------------------------------------------------------------------------
+# Worktree setup
+# ---------------------------------------------------------------------------
+
+WORKTREE_DIR="$(mktemp -d)"
+cleanup() {
+  git -C "$REPO_ROOT" worktree remove --force "$WORKTREE_DIR" 2>/dev/null || true
+  rm -rf "$WORKTREE_DIR"
+}
+trap cleanup EXIT
+
+info "Setting up worktree at ${WORKTREE_DIR} for ${BASELINE_SHORT} ..."
+git -C "$REPO_ROOT" worktree add --detach "$WORKTREE_DIR" "$BASELINE_SHA" \
+  || die "Failed to create git worktree at ${WORKTREE_DIR}"
+
+# ---------------------------------------------------------------------------
+# Output buffer (built up as a string, flushed at the end)
+# ---------------------------------------------------------------------------
+
+REPORT=""
+
+append() { REPORT="${REPORT}${*}"$'\n'; }
+
+append "# Bench comparison"
+append ""
+append "- HEAD: \`${HEAD_SHORT}\` (\`${HEAD_BRANCH}\`)"
+append "- Baseline: \`${BASELINE_SHORT}\` (\`${BASELINE_REF}\`)"
+append ""
+
+# ---------------------------------------------------------------------------
+# Parse divan output into TSV: name<TAB>median_ns
+#
+# divan table layout (columns separated by the │ U+2502 box-drawing char):
+#   top-level leaf:   field1="<tree><name>  <fastest>", field2=slowest,
+#                     field3=median, field4=mean, ...
+#   parametric parent: field1="<tree><name>", all other fields empty
+#   parametric child: field1="", field2="<tree><name>  <fastest>",
+#                     field3=slowest, field4=median, ...
+#   MB/s secondary:   field1="", field2=MB/s-fastest, ... (no name — skip)
+#
+# Strategy: split on │.  The first non-empty field contains the name prefix
+# plus the fastest time.  The median is two fields after that.
+# ---------------------------------------------------------------------------
+
+parse_divan() {
+  local file="$1"
+  LC_ALL=en_US.UTF-8 awk -F'│' '
+    function unit_ns(val, unit) {
+      if (unit == "ns")  return val + 0
+      if (unit == "µs")  return val * 1000
+      if (unit == "us")  return val * 1000
+      if (unit == "ms")  return val * 1000000
+      if (unit == "s")   return val * 1000000000
+      # Unrecognised unit — treat as µs (safe fallback for future divan changes)
+      return val * 1000
+    }
+
+    function strip(s,    r) {
+      r = s
+      gsub(/^[[:space:]╰─├│ ]+/, "", r)
+      gsub(/[[:space:]]+$/, "", r)
+      return r
+    }
+
+    # Extract <number> and <unit> from a string like "330.2 ns" or "50.12 ms".
+    # Sets out_val and out_unit.  Returns 1 on success, 0 if no match.
+    function extract_time(s, out_val, out_unit,    t, n) {
+      t = s
+      gsub(/^[[:space:]]+/, "", t)
+      # Check for a number followed by a unit
+      if (t !~ /^[0-9]/) return 0
+      n = split(t, parts, /[[:space:]]+/)
+      if (n < 2) return 0
+      out_val[1]  = parts[1] + 0
+      out_unit[1] = parts[2]
+      return 1
+    }
+
+    BEGIN { parent = "" }
+
+    # Skip the header line and empty lines
+    /^network/ || /^$/ || /^Timer precision/ { next }
+
+    # Skip the MB/s secondary throughput line (no bench name in field 1).
+    # Detect: field 1 is empty AND any field contains "MB/s".
+    /MB\/s/ && $1 !~ /[[:alpha:]]/ { next }
+
+    {
+      # Find the first non-empty field (contains name + fastest time).
+      name_field_idx = 0
+      name_raw = ""
+      for (i = 1; i <= NF; i++) {
+        f = $i
+        gsub(/^[[:space:]╰─├│ ]+/, "", f)
+        gsub(/[[:space:]]+$/, "", f)
+        if (f != "") {
+          name_field_idx = i
+          name_raw = f
+          break
+        }
+      }
+      if (name_field_idx == 0) next  # completely empty line
+
+      # The median column is two fields after the name+fastest field.
+      median_raw = ""
+      if (name_field_idx + 2 <= NF) {
+        median_raw = $(name_field_idx + 2)
+        gsub(/^[[:space:]│]+/, "", median_raw)
+        gsub(/[[:space:]]+$/, "", median_raw)
+      }
+
+      # Extract the bench name from the name_raw field.
+      # name_raw looks like "dns_cache_hit    220.2 ns" (name + fastest time).
+      # Strip the trailing fastest-time portion: everything from the last
+      # contiguous digit sequence followed by a unit.
+      bench_label = name_raw
+      sub(/[[:space:]]+[0-9]+(\.[0-9]+)?[[:space:]]*(ns|us|ms|s|µs)[[:space:]]*$/, "", bench_label)
+      # Also strip any residual trailing box-drawing or tree chars
+      gsub(/[[:space:]]+$/, "", bench_label)
+
+      # Check whether this row has a median measurement.
+      val_arr[1] = ""; unit_arr[1] = ""
+      has_median = extract_time(median_raw, val_arr, unit_arr)
+
+      if (!has_median) {
+        # This is a parametric parent header row — record as parent.
+        parent = bench_label
+        next
+      }
+
+      # This is a leaf measurement row.
+      if (parent != "" && name_field_idx > 1) {
+        # Child row: qualify with parent name.
+        full_name = parent "/" bench_label
+      } else {
+        full_name = bench_label
+        # Top-level leaf — clear parent so the next top-level bench starts fresh.
+        parent = ""
+      }
+
+      median_ns = unit_ns(val_arr[1], unit_arr[1])
+      print full_name "\t" median_ns
+    }
+  ' "$file"
+}
+
+# ---------------------------------------------------------------------------
+# Divan harness
+# ---------------------------------------------------------------------------
+
+if [[ "$SKIP_DIVAN" -eq 0 ]]; then
+  info "--- divan harness ---"
+
+  # Run divan bench in $1 (cwd), writing TSV-parseable stdout to $2.
+  # $3 is a human-readable label used in log lines.
+  # Tries --features bench-helpers first; falls back to no features if the
+  # feature isn't recognized at that ref.
+  run_divan_at() {
+    local cwd="$1"
+    local out="$2"
+    local label="$3"
+    local err
+    err="$(mktemp)"
+    if (cd "$cwd" && cargo bench --bench network --features bench-helpers >"$out" 2>"$err"); then
+      rm -f "$err"
+      return 0
+    fi
+    if grep -qiE 'does not have feature|does not contain this feature|unknown feature' "$err"; then
+      info "  ${label} lacks bench-helpers feature, retrying without"
+      rm -f "$err"
+      if (cd "$cwd" && cargo bench --bench network >"$out" 2>/dev/null); then
+        return 0
+      fi
+    fi
+    rm -f "$err"
+    return 1
+  }
+
+  DIVAN_TMP_BASELINE="$(mktemp)"
+  DIVAN_TMP_HEAD="$(mktemp)"
+
+  info "Running divan benches on baseline (${BASELINE_SHORT}) ..."
+  # cargo's build progress goes to stderr; bench table goes to stdout.
+  run_divan_at "$WORKTREE_DIR" "$DIVAN_TMP_BASELINE" "baseline" \
+    || info "WARN: divan baseline bench failed; divan section will be incomplete"
+
+  info "Running divan benches on HEAD (${HEAD_SHORT}) ..."
+  run_divan_at "$REPO_ROOT" "$DIVAN_TMP_HEAD" "HEAD" \
+    || info "WARN: divan HEAD bench failed; divan section will be incomplete"
+
+  DIVAN_BASELINE_TSV="$(parse_divan "$DIVAN_TMP_BASELINE")"
+  DIVAN_HEAD_TSV="$(parse_divan "$DIVAN_TMP_HEAD")"
+  rm -f "$DIVAN_TMP_BASELINE" "$DIVAN_TMP_HEAD"
+
+  # Build the markdown table via awk: join on bench name, emit rows.
+  DIVAN_TABLE="$(
+    awk -F'\t' '
+      # Load baseline
+      NR == FNR {
+        if ($1 != "") {
+          baseline_ns[$1] = $2
+          if (!seen[$1]++) order[++n] = $1
+        }
+        next
+      }
+      # Load head
+      {
+        if ($1 != "") {
+          head_ns[$1] = $2
+          if (!seen[$1]++) order[++n] = $1
+        }
+      }
+      END {
+        for (i = 1; i <= n; i++) {
+          name = order[i]
+          b = baseline_ns[name]
+          h = head_ns[name]
+
+          # Format a nanosecond value into a human-readable string
+          # using the shortest unit whose display value is >= 1.
+          if (b == "") {
+            b_str = "—"
+          } else {
+            bv = b + 0
+            if      (bv >= 1000000000) { b_str = sprintf("%.3g s",  bv/1000000000) }
+            else if (bv >= 1000000)    { b_str = sprintf("%.3g ms", bv/1000000) }
+            else if (bv >= 1000)       { b_str = sprintf("%.3g µs", bv/1000) }
+            else                       { b_str = sprintf("%.3g ns", bv) }
+          }
+
+          if (h == "") {
+            h_str = "—"
+          } else {
+            hv = h + 0
+            if      (hv >= 1000000000) { h_str = sprintf("%.3g s",  hv/1000000000) }
+            else if (hv >= 1000000)    { h_str = sprintf("%.3g ms", hv/1000000) }
+            else if (hv >= 1000)       { h_str = sprintf("%.3g µs", hv/1000) }
+            else                       { h_str = sprintf("%.3g ns", hv) }
+          }
+
+          # Delta
+          if (b == "" || h == "") {
+            delta_str = "—"
+            pct_str = "—"
+          } else {
+            bv = b + 0; hv = h + 0
+            diff = hv - bv
+            abs_diff = (diff < 0) ? -diff : diff
+            if      (abs_diff >= 1000000000) { unit = "s";  factor = 1000000000 }
+            else if (abs_diff >= 1000000)    { unit = "ms"; factor = 1000000 }
+            else if (abs_diff >= 1000)       { unit = "µs"; factor = 1000 }
+            else                             { unit = "ns"; factor = 1 }
+            sign = (diff >= 0) ? "+" : ""
+            delta_str = sprintf("%s%.3g %s", sign, diff/factor, unit)
+
+            if (bv != 0) {
+              pct = (hv - bv) / bv * 100
+              psign = (pct >= 0) ? "+" : ""
+              pct_str = sprintf("%s%.1f%%", psign, pct)
+            } else {
+              pct_str = "—"
+            }
+          }
+
+          print name "\t" b_str "\t" h_str "\t" delta_str "\t" pct_str
+        }
+      }
+    ' \
+    <(printf '%s\n' "$DIVAN_BASELINE_TSV") \
+    <(printf '%s\n' "$DIVAN_HEAD_TSV")
+  )"
+
+  append "## divan microbenches (\`cargo bench --bench network\`)"
+  append ""
+  append "| Bench | Baseline | HEAD | Δ | Δ% |"
+  append "|-------|---------:|-----:|--:|---:|"
+
+  if [[ -n "$DIVAN_TABLE" ]]; then
+    while IFS=$'\t' read -r name b_str h_str delta_str pct_str; do
+      append "| ${name} | ${b_str} | ${h_str} | ${delta_str} | ${pct_str} |"
+    done <<< "$DIVAN_TABLE"
+  else
+    append "| *(no data)* | | | | |"
+  fi
+  append ""
+else
+  info "Skipping divan harness (--skip-divan)."
+fi
+
+# ---------------------------------------------------------------------------
+# VM harness
+# ---------------------------------------------------------------------------
+
+if [[ "$SKIP_VM" -eq 1 ]]; then
+  info "Skipping VM harness (--skip-vm)."
+elif [[ -z "${VOID_BOX_KERNEL:-}" ]]; then
+  info "Skipping VM harness because VOID_BOX_KERNEL is not set."
+elif [[ -z "${VOID_BOX_INITRAMFS:-}" ]]; then
+  info "Skipping VM harness because VOID_BOX_INITRAMFS is not set."
+else
+  info "--- VM harness ---"
+
+  VM_TMP_BASELINE="$(mktemp --suffix=.json)"
+  VM_TMP_HEAD="$(mktemp --suffix=.json)"
+
+  info "Running voidbox-network-bench on baseline (${BASELINE_SHORT}) ..."
+  (cd "$WORKTREE_DIR" && \
+    cargo run --release --bin voidbox-network-bench -- --output "$VM_TMP_BASELINE") \
+    || info "WARN: VM baseline bench failed; VM section will be incomplete"
+
+  info "Running voidbox-network-bench on HEAD (${HEAD_SHORT}) ..."
+  (cd "$REPO_ROOT" && \
+    cargo run --release --bin voidbox-network-bench -- --output "$VM_TMP_HEAD") \
+    || info "WARN: VM HEAD bench failed; VM section will be incomplete"
+
+  # JSON field names in display order.
+  # These match the Report struct fields in src/bin/voidbox-network-bench/main.rs.
+  VM_FIELDS=(
+    tcp_bulk_throughput_g2h_mbps
+    tcp_throughput_g2h_mbps
+    tcp_throughput_h2g_mbps
+    tcp_rr_latency_us_p50
+    tcp_rr_latency_us_p99
+    tcp_crr_latency_us_p50
+    udp_dns_qps
+    icmp_rr_latency_us_p50
+  )
+
+  append "## VM harness (\`voidbox-network-bench\`)"
+  append ""
+  append "| Metric | Baseline | HEAD | Δ | Δ% |"
+  append "|--------|---------:|-----:|--:|---:|"
+
+  for field in "${VM_FIELDS[@]}"; do
+    b_val="$(jq -r --arg f "$field" 'if has($f) then .[$f] else null end | if . == null then "null" else tostring end' \
+      "$VM_TMP_BASELINE" 2>/dev/null || echo "null")"
+    h_val="$(jq -r --arg f "$field" 'if has($f) then .[$f] else null end | if . == null then "null" else tostring end' \
+      "$VM_TMP_HEAD"     2>/dev/null || echo "null")"
+
+    if [[ "$b_val" == "null" ]]; then b_str="n/a"; else b_str="$b_val"; fi
+    if [[ "$h_val" == "null" ]]; then h_str="n/a"; else h_str="$h_val"; fi
+
+    if [[ "$b_val" == "null" || "$h_val" == "null" ]]; then
+      delta_str="—"
+      pct_str="—"
+    else
+      delta_str="$(awk -v b="$b_val" -v h="$h_val" 'BEGIN {
+        diff = h - b
+        sign = (diff >= 0) ? "+" : ""
+        printf "%s%.4g\n", sign, diff
+      }')"
+      pct_str="$(awk -v b="$b_val" -v h="$h_val" 'BEGIN {
+        if (b == 0) { print "—"; exit }
+        pct = (h - b) / b * 100
+        psign = (pct >= 0) ? "+" : ""
+        printf "%s%.1f%%\n", psign, pct
+      }')"
+    fi
+
+    append "| ${field} | ${b_str} | ${h_str} | ${delta_str} | ${pct_str} |"
+  done
+  append ""
+
+  rm -f "$VM_TMP_BASELINE" "$VM_TMP_HEAD"
+fi
+
+# ---------------------------------------------------------------------------
+# Emit report
+# ---------------------------------------------------------------------------
+
+if [[ -n "$OUTPUT_FILE" ]]; then
+  printf '%s\n' "$REPORT" > "$OUTPUT_FILE"
+  info "Report written to ${OUTPUT_FILE}"
+else
+  printf '%s\n' "$REPORT"
+fi
diff --git a/scripts/lib/guest_common.sh b/scripts/lib/guest_common.sh
index 9e60d025..29d652d2 100755
--- a/scripts/lib/guest_common.sh
+++ b/scripts/lib/guest_common.sh
@@ -124,6 +124,21 @@ install_busybox() {
                readlink realpath sleep; do
       ln -sf busybox "$OUT_DIR/bin/$cmd" 2>/dev/null || true
     done
+    # NOTE: do NOT `chmod u+s busybox`. The cpio is packed as the build user
+    # (uid 1000), so a setuid bit makes the kernel drop euid to 1000 on
+    # every execve from PID 1 (uid=0) → setup_network()'s `ip link up`,
+    # `ip addr replace`, and `udhcpc` all silently fail with EPERM
+    # (no CAP_NET_ADMIN), the static-fallback loop wastes 10s of boot
+    # time, and the host's 30s control-channel handshake deadline
+    # expires before the vsock listener is bound. Symptom: ECONNRESET
+    # on every connect in `voidbox-network-bench` and any test that
+    # uses `network(true)`. See guest-agent::setup_network and
+    # control_channel::connect_with_handshake_sync.
+    #
+    # `ping` is intentionally omitted from the symlink list above — busybox
+    # `ping` uses SOCK_RAW which needs root, and busybox-static on Fedora
+    # is not built with CONFIG_FEATURE_PING_TYPE_DGRAM. Tools that want
+    # ICMP-from-guest should drive it through SLIRP from the host instead.
   else
     echo "[void-box] No BUSYBOX set; guest will have no /bin/sh (set BUSYBOX=/path/to/busybox for full shell support)."
   fi
diff --git a/src/bin/voidbox-network-bench/main.rs b/src/bin/voidbox-network-bench/main.rs
new file mode 100644
index 00000000..a18ac09e
--- /dev/null
+++ b/src/bin/voidbox-network-bench/main.rs
@@ -0,0 +1,799 @@
+//! Wall-clock end-to-end network benchmark harness.
+//!
+//! Boots a real VM and measures TCP throughput, RR/CRR latency, and
+//! UDP DNS qps inside the guest. Output is JSON for diffing against
+//! a baseline.
+//!
+//! Mirrors `voidbox-startup-bench` in CLI shape and lifecycle.
+//!
+//! Linux-only because the smoltcp-based SLIRP stack is Linux-only. On
+//! other platforms `main()` prints a skip notice and exits 0 so
+//! cross-platform CI (`cargo build`, `cargo check`) compiles cleanly.
+
+#[cfg(not(target_os = "linux"))]
+fn main() {
+    eprintln!(
+        "voidbox-network-bench: SLIRP-backed wall-clock harness is Linux-only \
+         (smoltcp dep is `cfg(target_os = \"linux\")` in Cargo.toml). \
+         Nothing to run on this platform."
+    );
+}
+
+#[cfg(target_os = "linux")]
+use std::io::{Read, Write};
+#[cfg(target_os = "linux")]
+use std::net::{TcpListener, TcpStream};
+#[cfg(target_os = "linux")]
+use std::os::fd::AsRawFd;
+#[cfg(target_os = "linux")]
+use std::path::PathBuf;
+#[cfg(target_os = "linux")]
+use std::sync::mpsc;
+#[cfg(target_os = "linux")]
+use std::time::{Duration, Instant};
+
+#[cfg(target_os = "linux")]
+use clap::Parser;
+#[cfg(target_os = "linux")]
+use serde::Serialize;
+#[cfg(target_os = "linux")]
+use void_box::sandbox::Sandbox;
+
+// Linux-only block. Wrapped in a `mod linux_main` so cross-platform
+// CI (macOS, etc.) compiles `voidbox-network-bench` cleanly — only
+// `main()` (above, the non-Linux stub) is needed there.
+#[cfg(target_os = "linux")]
+mod linux_main {
+    use super::*;
+
+    /// Transfer size per measurement run: 50 MiB.
+    const TRANSFER_MB: u32 = 50;
+
+    /// Bytes per megabit.
+    const BYTES_PER_MEGABIT: f64 = 1_000_000.0 / 8.0;
+
+    /// VM memory for the benchmark sandbox (MiB).
+    const BENCH_MEMORY_MB: usize = 1024;
+
+    /// SLIRP host-gateway address reachable from inside the guest.
+    const SLIRP_HOST_ADDR: &str = "10.0.2.2";
+
+    /// Number of RR samples collected per iteration.
+    const RR_SAMPLES_PER_ITER: u32 = 100;
+
+    /// Number of CRR samples collected per iteration.
+    const CRR_SAMPLES_PER_ITER: u32 = 30;
+
+    /// Timeout for the host-side channel receive on RR/CRR measurements.
+    const LATENCY_RECV_TIMEOUT: Duration = Duration::from_secs(120);
+
+    /// Accept-side deadline for spawned echo/drain threads.  Set slightly longer
+    /// than `LATENCY_RECV_TIMEOUT` (the channel-side wait) so the channel times
+    /// out first when the iteration is genuinely stuck — the accept thread then
+    /// exits on its own deadline shortly after, releasing the listener FD before
+    /// the next iteration.
+    const ACCEPT_DEADLINE_SLACK: Duration = Duration::from_secs(5);
+
+    #[derive(Parser, Debug)]
+    #[command(
+        version,
+        about = "VoidBox network benchmark harness",
+        long_about = "VoidBox network benchmark harness\n\
+\n\
+Boots one VM, exercises TCP throughput, TCP RR/CRR latency, and UDP DNS qps,\n\
+then emits a JSON report suitable for automated diffing.\n\
+\n\
+REQUIRED ENVIRONMENT VARIABLES\n\
+  VOID_BOX_KERNEL      Path to the guest kernel image (vmlinuz / vmlinux).\n\
+  VOID_BOX_INITRAMFS   Path to the guest initramfs (cpio.gz).\n\
+\n\
+RECOMMENDED WORKFLOW — CAPTURING AND DIFFING A BASELINE\n\
+  # 1. Before a refactor or networking-stack change, capture a baseline:\n\
+  cargo run --bin voidbox-network-bench -- --output baseline.json\n\
+\n\
+  # 2. Make your change, then capture a post-change report:\n\
+  cargo run --bin voidbox-network-bench -- --output after.json\n\
+\n\
+  # 3. Compare with diff or a JSON-diff tool:\n\
+  diff baseline.json after.json\n\
+  # Or with jq for a side-by-side view of individual metrics:\n\
+  jq -s '.[0] as $b | .[1] as $a | {metric: keys} | .metric[] |\n\
+    {metric: ., before: $b[.], after: $a[.]}' baseline.json after.json\n\
+\n\
+METRIC NAMES\n\
+  tcp_throughput_g2h_mbps   Guest→host TCP throughput (Mbps)\n\
+  tcp_rr_latency_us_p50     Persistent-connection round-trip latency p50 (µs)\n\
+  tcp_rr_latency_us_p99     Persistent-connection round-trip latency p99 (µs)\n\
+  tcp_crr_latency_us_p50    Connect-request-response latency p50 (µs)\n\
+  udp_dns_qps               UDP DNS queries per second against SLIRP resolver\n\
+\n\
+The metric names mirror the columns in passt's published performance table so\n\
+results can be compared directly.\n\
+\n\
+FAST SMOKE RUN\n\
+  cargo run --bin voidbox-network-bench -- --iterations 1 --no-throughput"
+    )]
+    struct Cli {
+        /// Number of iterations per metric.
+        #[arg(long, default_value_t = 5)]
+        iterations: u32,
+
+        /// Output JSON file. If omitted, prints to stdout.
+        #[arg(long)]
+        output: Option<PathBuf>,
+
+        /// Skip throughput measurements (useful for fast smoke runs).
+        #[arg(long, default_value_t = false)]
+        no_throughput: bool,
+
+        /// Push N MB through the SLIRP relay against a slow-receiving host
+        /// (`SO_RCVBUF = 4096`). Forces the backpressure path to actually
+        /// engage — the small-payload throughput numbers don't exercise it
+        /// because the host drains too fast.
+        ///
+        /// 0 (default) skips the measurement. 10 MiB is a reasonable smoke
+        /// value; larger N produces more stable numbers but takes longer.
+        #[arg(long, default_value_t = 0)]
+        bulk_mb: u32,
+    }
+
+    #[derive(Serialize, Debug, Default)]
+    struct Report {
+        /// Sustained guest→host throughput against a slow-receiving host
+        /// (`SO_RCVBUF = 4096`). Probes the TCP backpressure path — rather
+        /// than hitting a fixed userspace cliff and resetting the connection,
+        /// throughput is bounded by the kernel recv buffer's drain rate.
+        /// Populated only when `--bulk-mb > 0`.
+        tcp_bulk_throughput_g2h_mbps: Option<f64>,
+        tcp_throughput_g2h_mbps: Option<f64>,
+        // TODO(h2g): host→guest requires either a guest-side `nc -l` listener
+        // or an inverse data-push loop.  The current harness only supports
+        // guest-initiated connections (the guest calls `nc HOST PORT`).  A
+        // host-push direction would need the guest to accept connections, which
+        // means either (a) a guest-side daemon started before exec returns, or
+        // (b) an additional RPC for "open a listening socket and tell us the
+        // guest port" — out of scope for the minimal harness.
+        tcp_throughput_h2g_mbps: Option<f64>,
+        tcp_rr_latency_us_p50: Option<f64>,
+        tcp_rr_latency_us_p99: Option<f64>,
+        tcp_crr_latency_us_p50: Option<f64>,
+        udp_dns_qps: Option<f64>,
+        icmp_rr_latency_us_p50: Option<f64>,
+        /// p50 host→guest RX latency: "host write completes" → "SLIRP relay
+        /// delivers frame to drain_to_guest output".  Measured at the VMM
+        /// layer against a live guest TCP flow via `nc -l`.
+        ///
+        /// Not yet populated: wiring a guest-side listener and synchronizing
+        /// on first-byte arrival requires either a guest daemon or an additional
+        /// RPC. The divan microbench `tcp_rx_latency_one_packet` captures the
+        /// SLIRP-layer dispatch cost directly (epoll_wait + peek + frame build);
+        /// this wall-clock field will complement it once the guest-listener
+        /// infrastructure is in place.
+        tcp_rx_latency_us_p50: Option<f64>,
+    }
+
+    #[tokio::main(flavor = "multi_thread")]
+    pub(super) async fn main_impl() -> Result<(), Box<dyn std::error::Error>> {
+        tracing_subscriber::fmt()
+            .with_env_filter(
+                tracing_subscriber::EnvFilter::try_from_default_env()
+                    .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("warn")),
+            )
+            .with_writer(std::io::stderr)
+            .init();
+
+        let cli = Cli::parse();
+        let mut report = Report::default();
+
+        // Boot one shared VM for all measurements that require a live guest.
+        // Throughput and latency measurements reuse this single sandbox to avoid
+        // paying the boot cost multiple times.
+        let sandbox = Sandbox::local()
+            .from_env()?
+            .memory_mb(BENCH_MEMORY_MB)
+            .network(true)
+            .build()?;
+
+        // Prime the VM (triggers boot + vsock handshake) before any timed work.
+        let probe = sandbox.exec("sh", &["-c", ":"]).await?;
+        if !probe.success() {
+            return Err(format!(
+                "VM probe exec failed: exit={:?} stderr={}",
+                probe.exit_code,
+                probe.stderr_str()
+            )
+            .into());
+        }
+
+        if !cli.no_throughput {
+            report.tcp_throughput_g2h_mbps =
+                measure_tcp_throughput_g2h(&sandbox, cli.iterations).await?;
+        }
+
+        if cli.bulk_mb > 0 {
+            report.tcp_bulk_throughput_g2h_mbps =
+                measure_bulk_throughput_g2h(&sandbox, cli.iterations, cli.bulk_mb).await?;
+        }
+
+        // Latency measurements always run (--no-throughput only skips throughput).
+        let (rr_p50, rr_p99) = measure_rr_latency(&sandbox, cli.iterations).await?;
+        report.tcp_rr_latency_us_p50 = rr_p50;
+        report.tcp_rr_latency_us_p99 = rr_p99;
+        report.tcp_crr_latency_us_p50 = measure_crr_latency(&sandbox, cli.iterations).await?;
+        report.udp_dns_qps = measure_dns_qps(&sandbox).await?;
+        report.icmp_rr_latency_us_p50 = measure_icmp_rr_latency(&sandbox, cli.iterations).await?;
+
+        sandbox.stop().await?;
+
+        let json = serde_json::to_string_pretty(&report)?;
+        match cli.output {
+            Some(path) => std::fs::write(path, json)?,
+            None => println!("{json}"),
+        }
+        Ok(())
+    }
+
+    /// Measure guest-to-host TCP throughput.
+    ///
+    /// Binds a host-side TCP listener on `127.0.0.1:0` and execs a BusyBox shell
+    /// snippet inside `sandbox` that pipes `dd` output to `nc`.  The host drain
+    /// thread records bytes received and wall-clock elapsed time; Mbps is computed
+    /// from those two numbers.  Runs `iterations` times and returns the mean.
+    ///
+    /// Returns `None` if every iteration fails to parse or times out.
+    async fn measure_tcp_throughput_g2h(
+        sandbox: &Sandbox,
+        iterations: u32,
+    ) -> Result<Option<f64>, Box<dyn std::error::Error>> {
+        let mut mbps_samples: Vec<f64> = Vec::new();
+
+        for iteration_index in 0..iterations {
+            let listener = TcpListener::bind("127.0.0.1:0")?;
+            let host_port = listener.local_addr()?.port();
+
+            let (drain_tx, drain_rx) = mpsc::channel::<(u64, Duration)>();
+
+            let drain_deadline = Instant::now() + LATENCY_RECV_TIMEOUT + ACCEPT_DEADLINE_SLACK;
+            std::thread::spawn(move || {
+                let drain_result = drain_one_connection(&listener, drain_deadline);
+                let _ = drain_tx.send(drain_result);
+            });
+
+            let guest_cmd = format!(
+            "dd if=/dev/zero bs=1M count={TRANSFER_MB} 2>/dev/null | nc {SLIRP_HOST_ADDR} {host_port}",
+        );
+
+            let exec_result = sandbox.exec("sh", &["-c", &guest_cmd]).await;
+
+            match exec_result {
+                Err(exec_err) => {
+                    tracing::warn!(
+                        iteration = iteration_index,
+                        error = %exec_err,
+                        "g2h iteration exec error; skipping"
+                    );
+                    continue;
+                }
+                Ok(output) => {
+                    if !output.success() {
+                        tracing::warn!(
+                            iteration = iteration_index,
+                            exit_code = ?output.exit_code,
+                            stderr = output.stderr_str(),
+                            "g2h iteration non-zero exit; skipping"
+                        );
+                        continue;
+                    }
+                }
+            }
+
+            match drain_rx.recv_timeout(Duration::from_secs(120)) {
+                Err(recv_err) => {
+                    tracing::warn!(
+                        iteration = iteration_index,
+                        error = %recv_err,
+                        "g2h drain channel receive error; skipping"
+                    );
+                }
+                Ok((bytes_received, elapsed)) => {
+                    let elapsed_secs = elapsed.as_secs_f64();
+                    if elapsed_secs < 0.01 {
+                        tracing::warn!(
+                            iteration = iteration_index,
+                            elapsed_secs,
+                            "g2h elapsed too small to measure reliably; skipping"
+                        );
+                        continue;
+                    }
+                    let mbps = (bytes_received as f64 * 8.0) / elapsed_secs / BYTES_PER_MEGABIT;
+                    tracing::info!(
+                        iteration = iteration_index,
+                        bytes_received,
+                        elapsed_secs,
+                        mbps,
+                        "g2h iteration complete"
+                    );
+                    eprintln!(
+                    "g2h[{iteration_index:>2}]: {bytes_received} B in {elapsed_secs:.3}s = {mbps:.1} Mbps"
+                );
+                    mbps_samples.push(mbps);
+                }
+            }
+        }
+
+        if mbps_samples.is_empty() {
+            return Ok(None);
+        }
+
+        let mut total_mbps = 0.0_f64;
+        for sample in &mbps_samples {
+            total_mbps += sample;
+        }
+        let mean_mbps = total_mbps / mbps_samples.len() as f64;
+        Ok(Some(mean_mbps))
+    }
+
+    /// Sustained guest→host throughput against a constrained receiver.
+    ///
+    /// Same shape as [`measure_tcp_throughput_g2h`] but with `SO_RCVBUF = 4096`
+    /// pinned on the listener socket. The small recv buffer forces TCP-level
+    /// backpressure: the kernel send buffer fills, our `host_stream.write`
+    /// returns `WouldBlock`, the SLIRP relay declines to ACK the guest's
+    /// segment, and the guest retransmits. The relay holds the line and the
+    /// bytes go through rather than resetting the connection at a fixed
+    /// userspace buffer limit.
+    ///
+    /// Returned value is the mean Mbps across `iterations` iterations of pushing
+    /// `bulk_mb` MiB. Effective throughput is much lower than
+    /// [`measure_tcp_throughput_g2h`]'s number because the constrained receiver
+    /// is the bottleneck — that's the point.
+    async fn measure_bulk_throughput_g2h(
+        sandbox: &Sandbox,
+        iterations: u32,
+        bulk_mb: u32,
+    ) -> Result<Option<f64>, Box<dyn std::error::Error>> {
+        let mut mbps_samples: Vec<f64> = Vec::new();
+
+        for iteration_index in 0..iterations {
+            let listener = TcpListener::bind("127.0.0.1:0")?;
+            // Constrain the receiver: 4 KiB request, kernel rounds up to the
+            // configured minimum (~8 KiB on Linux) — still small enough that
+            // the SLIRP send buffer fills quickly and backpressure engages.
+            let val: libc::c_int = 4096;
+            // SAFETY: listener.as_raw_fd() outlives the syscall; the int is
+            // stack-local and pointer-sized.
+            let rc = unsafe {
+                libc::setsockopt(
+                    listener.as_raw_fd(),
+                    libc::SOL_SOCKET,
+                    libc::SO_RCVBUF,
+                    &val as *const libc::c_int as *const libc::c_void,
+                    std::mem::size_of::<libc::c_int>() as libc::socklen_t,
+                )
+            };
+            if rc != 0 {
+                tracing::warn!(
+                    iteration = iteration_index,
+                    "bulk-g2h: SO_RCVBUF setsockopt failed; skipping"
+                );
+                continue;
+            }
+            let host_port = listener.local_addr()?.port();
+
+            let (drain_tx, drain_rx) = mpsc::channel::<(u64, Duration)>();
+            let drain_deadline = Instant::now() + Duration::from_secs(300) + ACCEPT_DEADLINE_SLACK;
+            std::thread::spawn(move || {
+                let drain_result = drain_one_connection(&listener, drain_deadline);
+                let _ = drain_tx.send(drain_result);
+            });
+
+            let guest_cmd = format!(
+            "dd if=/dev/zero bs=1M count={bulk_mb} 2>/dev/null | nc {SLIRP_HOST_ADDR} {host_port}",
+        );
+            let exec_result = sandbox.exec("sh", &["-c", &guest_cmd]).await;
+            match exec_result {
+                Err(exec_err) => {
+                    tracing::warn!(
+                        iteration = iteration_index,
+                        error = %exec_err,
+                        "bulk-g2h iteration exec error; skipping"
+                    );
+                    continue;
+                }
+                Ok(output) => {
+                    if !output.success() {
+                        tracing::warn!(
+                            iteration = iteration_index,
+                            exit_code = ?output.exit_code,
+                            stderr = output.stderr_str(),
+                            "bulk-g2h iteration non-zero exit; the connection may have \
+                             been reset (backpressure cliff regression?). skipping"
+                        );
+                        continue;
+                    }
+                }
+            }
+
+            match drain_rx.recv_timeout(Duration::from_secs(300)) {
+                Err(recv_err) => {
+                    tracing::warn!(
+                        iteration = iteration_index,
+                        error = %recv_err,
+                        "bulk-g2h drain channel receive error; skipping"
+                    );
+                }
+                Ok((bytes_received, elapsed)) => {
+                    let elapsed_secs = elapsed.as_secs_f64();
+                    if elapsed_secs < 0.01 {
+                        tracing::warn!(
+                            iteration = iteration_index,
+                            elapsed_secs,
+                            "bulk-g2h elapsed too small to measure reliably; skipping"
+                        );
+                        continue;
+                    }
+                    let mbps = (bytes_received as f64 * 8.0) / elapsed_secs / BYTES_PER_MEGABIT;
+                    tracing::info!(
+                        iteration = iteration_index,
+                        bytes_received,
+                        elapsed_secs,
+                        mbps,
+                        "bulk-g2h iteration complete"
+                    );
+                    eprintln!(
+                    "bulk-g2h[{iteration_index:>2}]: {bytes_received} B in {elapsed_secs:.3}s = {mbps:.1} Mbps (constrained receiver)"
+                );
+                    mbps_samples.push(mbps);
+                }
+            }
+        }
+
+        if mbps_samples.is_empty() {
+            return Ok(None);
+        }
+        let mean_mbps: f64 = mbps_samples.iter().sum::<f64>() / mbps_samples.len() as f64;
+        Ok(Some(mean_mbps))
+    }
+
+    /// Accept one connection on `listener` with a deadline. Returns `None` if the
+    /// deadline lapses before any connection arrives (the spawning iteration has
+    /// likely failed and the thread should exit cleanly so the listener FD is
+    /// released for the next iteration).
+    fn accept_with_deadline(
+        listener: &TcpListener,
+        deadline: Instant,
+    ) -> Option<(TcpStream, std::net::SocketAddr)> {
+        listener.set_nonblocking(true).ok()?;
+        loop {
+            match listener.accept() {
+                Ok(pair) => {
+                    let _ = pair.0.set_nonblocking(false);
+                    return Some(pair);
+                }
+                Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => {
+                    if Instant::now() >= deadline {
+                        return None;
+                    }
+                    std::thread::sleep(Duration::from_millis(10));
+                }
+                Err(_) => return None,
+            }
+        }
+    }
+
+    /// Accept exactly one TCP connection on `listener`, drain it to EOF, and
+    /// return `(bytes_received, elapsed)`.  Intended to run in a background thread.
+    ///
+    /// Returns `(0, Duration::ZERO)` if no connection arrives before `deadline`.
+    fn drain_one_connection(listener: &TcpListener, deadline: Instant) -> (u64, Duration) {
+        let Some((mut stream, _peer_addr)) = accept_with_deadline(listener, deadline) else {
+            return (0, Duration::ZERO);
+        };
+
+        let start = Instant::now();
+        let bytes_received = drain_stream(&mut stream);
+        let elapsed = start.elapsed();
+        (bytes_received, elapsed)
+    }
+
+    /// Read `stream` to EOF and return the total byte count.
+    fn drain_stream(stream: &mut TcpStream) -> u64 {
+        let mut buf = vec![0u8; 64 * 1024];
+        let mut total_bytes: u64 = 0;
+        loop {
+            match stream.read(&mut buf) {
+                Ok(0) => break,
+                Ok(bytes_read) => total_bytes += bytes_read as u64,
+                Err(_) => break,
+            }
+        }
+        total_bytes
+    }
+
+    fn percentile(samples: &mut [Duration], p: f64) -> Duration {
+        samples.sort();
+        let idx = ((samples.len() as f64) * p).clamp(0.0, samples.len() as f64 - 1.0) as usize;
+        samples[idx]
+    }
+
+    /// Measure TCP RR (Request-Response) latency on a kept-open connection.
+    ///
+    /// The guest pipes `RR_SAMPLES_PER_ITER` null bytes over a single `nc`
+    /// connection (`dd if=/dev/zero bs=1 count=N | nc host port`).  The host
+    /// accepts one connection and services each byte as an independent echo
+    /// round-trip, timing each host-side `read + write` pair.
+    ///
+    /// Using dd+nc avoids BusyBox shell limitations around interactive TCP
+    /// sockets while still measuring per-message in-flight latency on a
+    /// persistent connection.  The first sample from each iteration is discarded
+    /// because the first byte arrival absorbs TCP connect and Nagle jitter from
+    /// the guest side.  Remaining samples are accumulated across all iterations;
+    /// p50 and p99 are computed over the union.
+    ///
+    /// Returns `(p50_us, p99_us)`, both `None` if no samples were collected.
+    async fn measure_rr_latency(
+        sandbox: &Sandbox,
+        iterations: u32,
+    ) -> Result<(Option<f64>, Option<f64>), Box<dyn std::error::Error>> {
+        let mut all_samples: Vec<Duration> = Vec::new();
+
+        for iteration_index in 0..iterations {
+            let listener = TcpListener::bind("127.0.0.1:0")?;
+            let host_port = listener.local_addr()?.port();
+
+            let (echo_tx, echo_rx) = mpsc::channel::<Vec<Duration>>();
+
+            let echo_deadline = Instant::now() + LATENCY_RECV_TIMEOUT + ACCEPT_DEADLINE_SLACK;
+            std::thread::spawn(move || {
+                let samples = rr_echo_server(&listener, RR_SAMPLES_PER_ITER, echo_deadline);
+                let _ = echo_tx.send(samples);
+            });
+
+            // Guest: pipe RR_SAMPLES_PER_ITER zero bytes over one nc connection.
+            // dd generates the bytes; nc forwards them to the host echo server.
+            // The guest does not need to read the echoed bytes — the host drives
+            // the timing loop and closes when done.  BusyBox dd + nc suffice.
+            let guest_cmd = format!(
+                "dd if=/dev/zero bs=1 count={n} 2>/dev/null | nc {host} {port}",
+                n = RR_SAMPLES_PER_ITER,
+                host = SLIRP_HOST_ADDR,
+                port = host_port,
+            );
+
+            let exec_result = sandbox.exec("sh", &["-c", &guest_cmd]).await;
+            if let Err(exec_err) = exec_result {
+                tracing::warn!(
+                    iteration = iteration_index,
+                    error = %exec_err,
+                    "rr iteration exec error; skipping"
+                );
+            }
+
+            match echo_rx.recv_timeout(LATENCY_RECV_TIMEOUT) {
+                Err(recv_err) => {
+                    tracing::warn!(
+                        iteration = iteration_index,
+                        error = %recv_err,
+                        "rr echo channel receive error; skipping"
+                    );
+                }
+                Ok(mut samples) => {
+                    // Discard first sample (absorbs TCP connect jitter).
+                    if samples.len() > 1 {
+                        samples.remove(0);
+                    }
+                    let count = samples.len();
+                    let p50_us = if count > 0 {
+                        percentile(&mut samples.clone(), 0.50).as_micros()
+                    } else {
+                        0
+                    };
+                    eprintln!("rr[{iteration_index:>2}]: {count} samples, p50={p50_us} µs");
+                    all_samples.extend(samples);
+                }
+            }
+        }
+
+        if all_samples.is_empty() {
+            return Ok((None, None));
+        }
+
+        let p50 = percentile(&mut all_samples, 0.50).as_micros() as f64;
+        let p99 = percentile(&mut all_samples, 0.99).as_micros() as f64;
+        Ok((Some(p50), Some(p99)))
+    }
+
+    /// Host-side echo server for RR latency.
+    ///
+    /// Accepts one connection, then for each of the `count` iterations: reads
+    /// one byte, times that read, writes the byte back, and records the elapsed
+    /// duration.  Returns the list of per-round-trip host-side durations.
+    ///
+    /// The timer starts just before the blocking `read` call and stops after the
+    /// `write` returns.  This measures the host-observed round-trip time: the
+    /// interval from "host waiting for a byte" to "host has written the echo",
+    /// which is approximately the guest-side send→receive latency plus the
+    /// network stack overhead on both sides.
+    fn rr_echo_server(listener: &TcpListener, count: u32, deadline: Instant) -> Vec<Duration> {
+        let Some((mut stream, _)) = accept_with_deadline(listener, deadline) else {
+            return Vec::new();
+        };
+
+        let mut samples = Vec::with_capacity(count as usize);
+        let mut buf = [0u8; 1];
+
+        for _ in 0..count {
+            let start = Instant::now();
+            match stream.read_exact(&mut buf) {
+                Ok(()) => {}
+                Err(_) => break,
+            }
+            match stream.write_all(&buf) {
+                Ok(()) => {}
+                Err(_) => break,
+            }
+            samples.push(start.elapsed());
+        }
+
+        samples
+    }
+
+    /// Measure TCP CRR (Connect-Request-Response) latency.
+    ///
+    /// Each sample is one full `accept + read + write + close` cycle on the host,
+    /// timed from `accept` returning to the connection dropping.  The guest runs
+    /// a shell loop that performs `CRR_SAMPLES_PER_ITER` independent `nc` invocations
+    /// per iteration (each is a full connect → send → recv → close).
+    ///
+    /// Host-side timing is the ground truth: the host observes when the
+    /// connection arrives and when it closes, so each sample faithfully captures
+    /// the TCP setup + data round-trip + teardown cost end-to-end.
+    ///
+    /// Returns `p50_us` across all collected samples, or `None` if none arrived.
+    async fn measure_crr_latency(
+        sandbox: &Sandbox,
+        iterations: u32,
+    ) -> Result<Option<f64>, Box<dyn std::error::Error>> {
+        let mut all_samples: Vec<Duration> = Vec::new();
+
+        for iteration_index in 0..iterations {
+            let listener = TcpListener::bind("127.0.0.1:0")?;
+            let host_port = listener.local_addr()?.port();
+
+            // The host accepts CRR_SAMPLES_PER_ITER connections, times each cycle,
+            // and sends results back over a channel.
+            let (crr_tx, crr_rx) = mpsc::channel::<Vec<Duration>>();
+            let sample_count = CRR_SAMPLES_PER_ITER;
+
+            let crr_deadline = Instant::now() + LATENCY_RECV_TIMEOUT + ACCEPT_DEADLINE_SLACK;
+            std::thread::spawn(move || {
+                let samples = crr_echo_server(&listener, sample_count, crr_deadline);
+                let _ = crr_tx.send(samples);
+            });
+
+            // Guest: loop CRR_SAMPLES_PER_ITER times; each iteration is a full
+            // nc invocation (connect → send one byte → read echo → disconnect).
+            let n = CRR_SAMPLES_PER_ITER;
+            let guest_cmd = format!(
+                "i=0; while [ $i -lt {n} ]; do printf 'A' | nc {host} {port}; i=$((i+1)); done",
+                host = SLIRP_HOST_ADDR,
+                port = host_port,
+                n = n,
+            );
+
+            let exec_result = sandbox.exec("sh", &["-c", &guest_cmd]).await;
+            if let Err(exec_err) = exec_result {
+                tracing::warn!(
+                    iteration = iteration_index,
+                    error = %exec_err,
+                    "crr iteration exec error; skipping"
+                );
+            }
+
+            match crr_rx.recv_timeout(LATENCY_RECV_TIMEOUT) {
+                Err(recv_err) => {
+                    tracing::warn!(
+                        iteration = iteration_index,
+                        error = %recv_err,
+                        "crr echo channel receive error; skipping"
+                    );
+                }
+                Ok(samples) => {
+                    let count = samples.len();
+                    let p50_us = if count > 0 {
+                        percentile(&mut samples.clone(), 0.50).as_micros()
+                    } else {
+                        0
+                    };
+                    eprintln!("crr[{iteration_index:>2}]: {count} samples, p50={p50_us} µs");
+                    all_samples.extend(samples);
+                }
+            }
+        }
+
+        if all_samples.is_empty() {
+            return Ok(None);
+        }
+
+        let p50 = percentile(&mut all_samples, 0.50).as_micros() as f64;
+        Ok(Some(p50))
+    }
+
+    /// Measure UDP DNS query throughput against the SLIRP resolver.
+    ///
+    /// Returns `None` — the busybox-`nc` tool available in the minimal test
+    /// initramfs cannot produce a meaningful number here.  Each `nc -u -w1`
+    /// invocation blocks for the full 1-second `-w1` timeout after stdin EOF
+    /// even when the cached SLIRP reply arrives in microseconds, capping
+    /// throughput at roughly 1 qps regardless of stack latency.  Tighter
+    /// alternatives tried:
+    ///
+    /// - `-q0`: nc exits before the UDP reply arrives, yielding 0 successes.
+    /// - `/dev/udp/HOST/PORT`: bash-specific; busybox ash does not support it.
+    /// - `timeout 0.1 nc ...`: `timeout` is not present in the test initramfs.
+    ///
+    /// A meaningful qps measurement requires a host-side UDP socket that sends
+    /// queries through SLIRP directly, bypassing the per-query nc process
+    /// spawn.  Until that is implemented, `udp_dns_qps` is reported as `null`
+    /// in the JSON output.
+    async fn measure_dns_qps(
+        _sandbox: &Sandbox,
+    ) -> Result<Option<f64>, Box<dyn std::error::Error>> {
+        tracing::warn!(
+            "dns_qps: busybox-nc bottleneck (~1 qps due to -w1 per-query); \
+         reporting null — replace with host-side UDP socket for real numbers"
+        );
+        Ok(None)
+    }
+
+    /// Measure ICMP echo round-trip latency.
+    ///
+    /// Currently a stub that returns `None`: the guest images intentionally
+    /// omit `/bin/ping` (busybox-static on Fedora lacks
+    /// `CONFIG_FEATURE_PING_TYPE_DGRAM`, and SOCK_RAW would require root in
+    /// the guest). A proper measurement path needs either a guest-agent RPC
+    /// or a custom static ICMP binary in the test image — tracked as a
+    /// follow-up.
+    async fn measure_icmp_rr_latency(
+        _sandbox: &Sandbox,
+        _iterations: u32,
+    ) -> Result<Option<f64>, Box<dyn std::error::Error>> {
+        tracing::warn!(
+            "icmp_rr_latency: guest-side ping unavailable (no /bin/ping symlink, \
+             busybox-static lacks CONFIG_FEATURE_PING_TYPE_DGRAM); reporting null. \
+             A host-driven ICMP measurement path is tracked as a follow-up."
+        );
+        Ok(None)
+    }
+
+    /// Host-side echo server for CRR latency.
+    ///
+    /// Accepts `count` independent connections in sequence.  For each: starts the
+    /// timer on `accept`, reads one byte, writes it back, closes the connection,
+    /// and stops the timer.  Returns all per-connection durations.
+    fn crr_echo_server(listener: &TcpListener, count: u32, deadline: Instant) -> Vec<Duration> {
+        let mut samples = Vec::with_capacity(count as usize);
+        let mut buf = [0u8; 1];
+
+        for _ in 0..count {
+            let start = Instant::now();
+            let Some((mut stream, _)) = accept_with_deadline(listener, deadline) else {
+                break;
+            };
+            // Read the request byte and echo it back.
+            if stream.read_exact(&mut buf).is_ok() {
+                let _ = stream.write_all(&buf);
+            }
+            // Explicit drop closes the connection.
+            drop(stream);
+            samples.push(start.elapsed());
+        }
+
+        samples
+    }
+} // mod linux_main
+
+#[cfg(target_os = "linux")]
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    linux_main::main_impl()
+}
diff --git a/src/bin/voidbox-startup-bench/main.rs b/src/bin/voidbox-startup-bench/main.rs
index 72cd02e6..4380bf10 100644
--- a/src/bin/voidbox-startup-bench/main.rs
+++ b/src/bin/voidbox-startup-bench/main.rs
@@ -83,7 +83,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     );
 
     if !warm_only {
-        eprintln!("\n-- Phase 1: cold boot --");
+        eprintln!("\n-- cold boot --");
         let mut cold: Vec<Sample> = Vec::with_capacity(iters);
         for i in 0..iters {
             // Route console to a file only on the very first iteration so we
@@ -109,7 +109,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     }
 
     if !cold_only {
-        eprintln!("\n-- Phase 2: warm (snapshot-restore) --");
+        eprintln!("\n-- warm (snapshot-restore) --");
         let tmp = tempfile::tempdir()?;
         let snap_path = capture_snapshot(memory_mb, tmp.path()).await?;
         eprintln!("captured snapshot at: {}", snap_path.display());
@@ -138,10 +138,19 @@ async fn capture_snapshot(
     memory_mb: usize,
     dir: &std::path::Path,
 ) -> Result<PathBuf, Box<dyn std::error::Error>> {
+    // `enable_snapshots(true)` flips the backend selector at
+    // `backend/kvm.rs:212` to `VsockBackendType::Userspace`. Without
+    // this, the cold boot uses vhost-vsock and the snapshot file
+    // captures vhost-shaped state — but `from_snapshot` always
+    // restores into the userspace backend, producing a mismatch that
+    // surfaces as `control_channel: deadline reached` on the warm
+    // phase (vhost's vring state lives in the host kernel's
+    // vhost-vsock module and isn't part of our snapshot at all).
     let sandbox = Sandbox::local()
         .from_env()?
         .memory_mb(memory_mb)
         .network(false)
+        .enable_snapshots(true)
         .build()?;
     // Trigger cold boot.
     let _ = sandbox.exec("sh", &["-c", ":"]).await?;
diff --git a/src/daemon.rs b/src/daemon.rs
index ffa42d5d..20f7a2be 100644
--- a/src/daemon.rs
+++ b/src/daemon.rs
@@ -1373,7 +1373,7 @@ async fn spawn_service_run(
     let mut published = false;
     let mut terminalized = false;
 
-    // Phase 1: Wait for output publication OR exit OR watchdog.
+    // Wait for output publication OR exit OR watchdog.
     tokio::select! {
         output_result = &mut output_rx => {
             if let Ok(publication) = output_result {
diff --git a/src/devices/virtio_net.rs b/src/devices/virtio_net.rs
index 8cd48d0b..71214d47 100644
--- a/src/devices/virtio_net.rs
+++ b/src/devices/virtio_net.rs
@@ -13,7 +13,8 @@ use std::sync::{Arc, Mutex};
 use tracing::{debug, trace, warn};
 use vm_memory::{Address, Bytes, GuestAddress, GuestMemory};
 
-use crate::network::slirp::{SlirpStack, GUEST_MAC};
+use crate::network::slirp::GUEST_MAC;
+use crate::network::NetworkBackend;
 use crate::Result;
 
 /// Virtio descriptor flags
@@ -142,8 +143,8 @@ struct QueueState {
 
 /// Virtio-net device state
 pub struct VirtioNetDevice {
-    /// SLIRP stack for networking
-    slirp: Arc<Mutex<SlirpStack>>,
+    /// Network backend (SLIRP or any [`NetworkBackend`] impl)
+    slirp: Arc<Mutex<dyn NetworkBackend>>,
     /// Guest MAC address
     mac: [u8; 6],
     /// Device features
@@ -166,6 +167,8 @@ pub struct VirtioNetDevice {
     tx_queue: QueueState,
     /// Packets waiting to be received by guest
     rx_buffer: Vec<Vec<u8>>,
+    /// Scratch buffer reused across `drain_to_guest` calls to avoid per-poll allocation
+    rx_scratch: Vec<Vec<u8>>,
     /// MMIO base address
     mmio_base: u64,
     /// MMIO size
@@ -181,8 +184,8 @@ pub struct VirtioNetDevice {
 }
 
 impl VirtioNetDevice {
-    /// Create a new virtio-net device with SLIRP backend
-    pub fn new(slirp: Arc<Mutex<SlirpStack>>) -> Result<Self> {
+    /// Create a new virtio-net device with the given network backend
+    pub fn new(slirp: Arc<Mutex<dyn NetworkBackend>>) -> Result<Self> {
         debug!("Creating virtio-net device with SLIRP backend");
 
         let device_features = features::VIRTIO_NET_F_MAC
@@ -208,6 +211,7 @@ impl VirtioNetDevice {
                 ..Default::default()
             },
             rx_buffer: Vec::new(),
+            rx_scratch: Vec::new(),
             mmio_base: 0,
             mmio_size: 0x200,
             tx_avail_idx: 0,
@@ -656,11 +660,13 @@ impl VirtioNetDevice {
 
     /// Get frames waiting to be received by guest (RX path)
     pub fn get_rx_frames(&mut self) -> Vec<Vec<u8>> {
-        // Poll SLIRP for new packets
-        let frames = {
-            let mut slirp = self.slirp.lock().unwrap();
-            slirp.poll()
-        };
+        // Drain backend frames into the reused scratch buffer.
+        self.rx_scratch.clear();
+        {
+            let mut backend = self.slirp.lock().unwrap();
+            backend.drain_to_guest(&mut self.rx_scratch);
+        }
+        let frames = std::mem::take(&mut self.rx_scratch);
 
         // Prepend virtio-net header to each frame
         let mut result = Vec::new();
@@ -779,11 +785,35 @@ impl VirtioNetDevice {
     pub fn mac(&self) -> &[u8; 6] {
         &self.mac
     }
+
+    /// Return the epoll dispatch instance from the underlying network backend,
+    /// if the backend is a `SlirpBackend` (Linux only).
+    ///
+    /// `net_poll_thread` uses this to block on `epoll_wait` instead of
+    /// sleeping, waking immediately when host sockets become readable.
+    #[cfg(target_os = "linux")]
+    pub fn epoll_arc(
+        &self,
+    ) -> Option<std::sync::Arc<crate::network::epoll_dispatch::EpollDispatch>> {
+        let backend = self.slirp.lock().unwrap();
+        backend.epoll_arc()
+    }
+
+    /// Forward ready epoll events into the network backend's per-tick queue.
+    ///
+    /// Called by net_poll_thread after each epoll_wait returns so that
+    /// drain_to_guest can process events without re-locking EpollDispatch.
+    #[cfg(target_os = "linux")]
+    pub fn push_events_to_backend(&self, events: &[crate::network::epoll_dispatch::EpollEvent]) {
+        let backend = self.slirp.lock().unwrap();
+        backend.push_ready_events(events);
+    }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::network::slirp::SlirpBackend;
 
     #[test]
     fn test_virtio_net_header() {
@@ -798,7 +828,8 @@ mod tests {
 
     #[test]
     fn test_mmio_magic() {
-        let slirp = Arc::new(Mutex::new(SlirpStack::new().unwrap()));
+        let slirp: Arc<Mutex<dyn NetworkBackend>> =
+            Arc::new(Mutex::new(SlirpBackend::new().unwrap()));
         let device = VirtioNetDevice::new(slirp).unwrap();
 
         let mut data = [0u8; 4];
@@ -809,7 +840,8 @@ mod tests {
 
     #[test]
     fn test_mmio_version() {
-        let slirp = Arc::new(Mutex::new(SlirpStack::new().unwrap()));
+        let slirp: Arc<Mutex<dyn NetworkBackend>> =
+            Arc::new(Mutex::new(SlirpBackend::new().unwrap()));
         let device = VirtioNetDevice::new(slirp).unwrap();
 
         let mut data = [0u8; 4];
@@ -820,7 +852,8 @@ mod tests {
 
     #[test]
     fn test_device_type() {
-        let slirp = Arc::new(Mutex::new(SlirpStack::new().unwrap()));
+        let slirp: Arc<Mutex<dyn NetworkBackend>> =
+            Arc::new(Mutex::new(SlirpBackend::new().unwrap()));
         let device = VirtioNetDevice::new(slirp).unwrap();
 
         let mut data = [0u8; 4];
diff --git a/src/network/epoll_dispatch.rs b/src/network/epoll_dispatch.rs
new file mode 100644
index 00000000..046f9510
--- /dev/null
+++ b/src/network/epoll_dispatch.rs
@@ -0,0 +1,387 @@
+//! Linux epoll-driven readiness dispatch for SLIRP host sockets.
+//!
+//! Owns one `epoll_fd` plus an eagerly-initialized self-pipe.  Callers
+//! register socket FDs with a `FlowToken` (a 64-bit identifier the
+//! dispatcher returns on readiness).  The poll thread calls
+//! `wait_with_timeout` to block until any registered FD is ready or the
+//! timeout fires, then drains the events into a caller-owned buffer.
+//!
+//! `EpollDispatch` is `Sync`: the Linux kernel serializes concurrent
+//! `epoll_ctl` and `epoll_wait` calls on the same epoll fd internally.
+//! Callers can therefore share one `Arc<EpollDispatch>` across threads
+//! and call `register`/`unregister` without an outer `Mutex`, eliminating
+//! the lock-contention between `wait_with_timeout` (net-poll thread) and
+//! `register` (vCPU thread handling new TCP SYNs).
+//!
+//! Why no crate? The standard `mio`/`tokio` story would pull in a
+//! reactor + a runtime that the SLIRP poll loop does not need.
+//! `libc::epoll_*` is two syscalls, fully observable, and the surface
+//! fits in ~200 lines.
+
+use std::io;
+use std::os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd};
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::Arc;
+use std::time::Duration;
+
+/// Opaque per-FD identifier the caller uses to look up which flow a
+/// readiness event belongs to.  Encoded into `epoll_data.u64`.
+pub type FlowToken = u64;
+
+/// One readiness event, mapped from `libc::epoll_event`.
+#[allow(dead_code)]
+#[derive(Debug, Clone, Copy)]
+pub struct EpollEvent {
+    pub token: FlowToken,
+    pub readable: bool,
+    pub writable: bool,
+}
+
+/// Direction of interest for an `EpollDispatch::register` call.
+///
+/// Closed enum lets the type system reject impossible combinations (e.g.
+/// "neither read nor write") at compile time and gives a clear name to
+/// each mode rather than two opaque booleans.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum RegisterMode {
+    /// Wake on EPOLLIN only.
+    Read,
+    /// Wake on EPOLLOUT only.
+    Write,
+    /// Wake on either EPOLLIN or EPOLLOUT.
+    ReadWrite,
+}
+
+/// Sentinel token reserved for the self-pipe wakeup mechanism.
+/// Never returned to callers.
+const SELF_PIPE_TOKEN: FlowToken = u64::MAX;
+
+/// `EpollDispatch` is `Sync`: concurrent `epoll_ctl` and `epoll_wait`
+/// on the same epoll fd are kernel-serialized and safe from multiple
+/// threads.  The only shared state beyond the fd is `registered_count`
+/// (an `AtomicUsize`) and the self-pipe (immutable after construction).
+pub struct EpollDispatch {
+    epoll_fd: OwnedFd,
+    /// Read end of the self-pipe; registered with EPOLLIN at construction.
+    read_end: OwnedFd,
+    /// Cloneable waker backed by the write end of the self-pipe.
+    waker_handle: Arc<OwnedFd>,
+    /// Number of user-registered FDs (excludes the self-pipe).
+    registered_count: AtomicUsize,
+}
+
+// SAFETY: All mutable state is either atomic or only accessed from one
+// thread at a time (epoll_ctl/epoll_wait are kernel-serialized on the fd).
+unsafe impl Sync for EpollDispatch {}
+
+impl EpollDispatch {
+    /// Create a new epoll instance with `EPOLL_CLOEXEC` and eagerly
+    /// initialize the self-pipe so `waker()` is lock-free.
+    pub fn new() -> io::Result<Self> {
+        // SAFETY: `epoll_create1` returns -1 on error and a valid fd
+        // otherwise.  We wrap into OwnedFd so Drop closes it.
+        let raw = unsafe { libc::epoll_create1(libc::EPOLL_CLOEXEC) };
+        if raw < 0 {
+            return Err(io::Error::last_os_error());
+        }
+        let epoll_fd = unsafe { OwnedFd::from_raw_fd(raw) };
+
+        // Eagerly create the self-pipe and register its read end.
+        // This avoids the lazy-init branch in the hot path and lets
+        // `waker()` take `&self` instead of `&mut self`.
+        let (read_fd, write_fd) = create_pipe2_nonblock_cloexec();
+        let mut ev = libc::epoll_event {
+            events: libc::EPOLLIN as u32,
+            u64: SELF_PIPE_TOKEN,
+        };
+        // SAFETY: epoll_ctl ADD with a valid fd and event struct.
+        let epoll_ctl_result = unsafe {
+            libc::epoll_ctl(
+                epoll_fd.as_raw_fd(),
+                libc::EPOLL_CTL_ADD,
+                read_fd.as_raw_fd(),
+                &mut ev as *mut _,
+            )
+        };
+        if epoll_ctl_result < 0 {
+            return Err(io::Error::last_os_error());
+        }
+
+        Ok(Self {
+            epoll_fd,
+            read_end: read_fd,
+            waker_handle: Arc::new(write_fd),
+            registered_count: AtomicUsize::new(0),
+        })
+    }
+
+    /// Register `fd` with the dispatcher under `token` for the requested
+    /// readiness `mode`.  `token` is opaque to the dispatcher — returned
+    /// verbatim on readiness events.
+    ///
+    /// Thread-safe: concurrent calls with `unregister` and
+    /// `wait_with_timeout` are serialized by the kernel's per-epoll-fd lock.
+    pub fn register(&self, fd: RawFd, token: FlowToken, mode: RegisterMode) -> io::Result<()> {
+        let events: u32 = match mode {
+            RegisterMode::Read => libc::EPOLLIN as u32,
+            RegisterMode::Write => libc::EPOLLOUT as u32,
+            RegisterMode::ReadWrite => (libc::EPOLLIN | libc::EPOLLOUT) as u32,
+        };
+        let mut ev = libc::epoll_event { events, u64: token };
+        // SAFETY: epoll_ctl reads `ev` for ADD; we own `fd` for the
+        // lifetime of the registration (caller's contract).
+        let epoll_ctl_result = unsafe {
+            libc::epoll_ctl(
+                self.epoll_fd.as_raw_fd(),
+                libc::EPOLL_CTL_ADD,
+                fd,
+                &mut ev as *mut _,
+            )
+        };
+        if epoll_ctl_result < 0 {
+            return Err(io::Error::last_os_error());
+        }
+        if token != SELF_PIPE_TOKEN {
+            self.registered_count.fetch_add(1, Ordering::Relaxed);
+        }
+        Ok(())
+    }
+
+    /// Thread-safe: concurrent calls with `register` and `wait_with_timeout`
+    /// are serialized by the kernel's per-epoll-fd lock.
+    pub fn unregister(&self, fd: RawFd) -> io::Result<()> {
+        // SAFETY: epoll_ctl ignores the event pointer for DEL but
+        // still requires it to be non-null on older kernels.
+        let mut ev = libc::epoll_event { events: 0, u64: 0 };
+        let epoll_ctl_result = unsafe {
+            libc::epoll_ctl(
+                self.epoll_fd.as_raw_fd(),
+                libc::EPOLL_CTL_DEL,
+                fd,
+                &mut ev as *mut _,
+            )
+        };
+        if epoll_ctl_result < 0 {
+            return Err(io::Error::last_os_error());
+        }
+        self.registered_count.fetch_sub(1, Ordering::Relaxed);
+        Ok(())
+    }
+
+    /// Returns the number of user-registered FDs (excludes the self-pipe).
+    #[cfg(any(test, feature = "bench-helpers"))]
+    pub(crate) fn registered_fd_count(&self) -> usize {
+        self.registered_count.load(Ordering::Relaxed)
+    }
+
+    /// Block up to `timeout` for any registered FD to become ready.
+    /// Drains ready events into `out` (cleared first).  Returns the
+    /// number of raw kernel events (including self-pipe wakes) so callers
+    /// can use it for adaptive-timeout decisions.
+    ///
+    /// `timeout = Duration::ZERO` is a non-blocking poll.
+    ///
+    /// Self-pipe events are drained to EAGAIN in-place: no extra allocation.
+    pub fn wait_with_timeout(
+        &self,
+        out: &mut Vec<EpollEvent>,
+        timeout: Duration,
+    ) -> io::Result<usize> {
+        out.clear();
+
+        // Pre-allocate a fixed-size event buffer.  64 ready FDs per
+        // wait is more than enough for our flow counts; events not
+        // returned this round will surface on the next wait.
+        let mut raw_events: [libc::epoll_event; 64] = [libc::epoll_event { events: 0, u64: 0 }; 64];
+
+        let timeout_ms: i32 = timeout.as_millis().min(i32::MAX as u128) as i32;
+
+        // SAFETY: epoll_wait writes up to raw_events.len() entries;
+        // returns -1 on error, 0 on timeout, n>0 on events.
+        let n = unsafe {
+            libc::epoll_wait(
+                self.epoll_fd.as_raw_fd(),
+                raw_events.as_mut_ptr(),
+                raw_events.len() as i32,
+                timeout_ms,
+            )
+        };
+        if n < 0 {
+            // EINTR is non-fatal — caller can retry on next tick.
+            let err = io::Error::last_os_error();
+            if err.raw_os_error() == Some(libc::EINTR) {
+                return Ok(0);
+            }
+            return Err(err);
+        }
+
+        let raw_count = n as usize;
+        let mut drained_pipe = false;
+
+        // Single pass: filter self-pipe events (draining the pipe to EAGAIN
+        // on first occurrence), push real events into `out`.
+        // No extra allocation: `out` was cleared at the top of this function.
+        for &raw in &raw_events[..raw_count] {
+            if raw.u64 == SELF_PIPE_TOKEN {
+                if !drained_pipe {
+                    // Drain the self-pipe to EAGAIN so EPOLLIN is not
+                    // re-asserted on the next wait.  A single read is
+                    // insufficient when wakes arrive faster than we drain
+                    // (burst connection setup), so loop until read returns
+                    // ≤ 0 or a partial fill (pipe empty).
+                    let mut scratch = [0u8; 64];
+                    loop {
+                        // SAFETY: read from O_NONBLOCK pipe;
+                        // EAGAIN / EOF terminates the loop.
+                        let r = unsafe {
+                            libc::read(
+                                self.read_end.as_raw_fd(),
+                                scratch.as_mut_ptr() as *mut _,
+                                scratch.len(),
+                            )
+                        };
+                        if r <= 0 || (r as usize) < scratch.len() {
+                            break;
+                        }
+                    }
+                    drained_pipe = true;
+                }
+                continue;
+            }
+            out.push(EpollEvent {
+                token: raw.u64,
+                readable: (raw.events & libc::EPOLLIN as u32) != 0,
+                writable: (raw.events & libc::EPOLLOUT as u32) != 0,
+            });
+        }
+
+        Ok(raw_count)
+    }
+
+    /// Returns a `Waker` that, when called, unblocks any thread
+    /// currently inside `wait_with_timeout`.  The waker is cheap to
+    /// clone and may be stored across threads.
+    pub fn waker(&self) -> Waker {
+        Waker {
+            write_end: self.waker_handle.clone(),
+        }
+    }
+
+    #[cfg(test)]
+    fn epoll_fd_for_test(&self) -> RawFd {
+        self.epoll_fd.as_raw_fd()
+    }
+}
+
+/// Cloneable wakeup handle for `EpollDispatch`.  Writing one byte to
+/// the underlying pipe wakes a thread blocked in `wait_with_timeout`.
+#[derive(Debug, Clone)]
+pub struct Waker {
+    write_end: Arc<OwnedFd>,
+}
+
+impl Waker {
+    pub fn wake(&self) {
+        let buf = [0u8; 1];
+        // SAFETY: write to a non-blocking pipe never blocks.  We
+        // ignore EAGAIN — the pipe already has bytes pending, which
+        // means a wakeup is already queued.
+        let _ = unsafe { libc::write(self.write_end.as_raw_fd(), buf.as_ptr() as *const _, 1) };
+    }
+}
+
+fn create_pipe2_nonblock_cloexec() -> (OwnedFd, OwnedFd) {
+    let mut fds = [0 as RawFd; 2];
+    // SAFETY: pipe2 with O_NONBLOCK | O_CLOEXEC writes two fds into fds.
+    let rc = unsafe { libc::pipe2(fds.as_mut_ptr(), libc::O_NONBLOCK | libc::O_CLOEXEC) };
+    assert!(rc == 0, "pipe2 failed: {}", io::Error::last_os_error());
+    let read_end = unsafe { OwnedFd::from_raw_fd(fds[0]) };
+    let write_end = unsafe { OwnedFd::from_raw_fd(fds[1]) };
+    (read_end, write_end)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::os::fd::AsRawFd;
+
+    #[test]
+    fn dispatch_new_creates_epoll_fd() {
+        let dispatch = EpollDispatch::new().expect("EpollDispatch::new");
+        assert!(dispatch.epoll_fd_for_test() >= 0);
+    }
+
+    #[test]
+    fn register_then_unregister_round_trip() {
+        use std::net::TcpListener;
+        let listener = TcpListener::bind("127.0.0.1:0").expect("bind");
+        let dispatch = EpollDispatch::new().expect("EpollDispatch::new");
+        let token: FlowToken = 0xDEAD_BEEF;
+        dispatch
+            .register(listener.as_raw_fd(), token, RegisterMode::Read)
+            .expect("register");
+        dispatch
+            .unregister(listener.as_raw_fd())
+            .expect("unregister");
+    }
+
+    #[test]
+    fn register_invalid_fd_returns_error() {
+        let dispatch = EpollDispatch::new().expect("EpollDispatch::new");
+        let result = dispatch.register(-1, 0, RegisterMode::Read);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn wait_returns_event_when_socket_becomes_readable() {
+        use std::io::Write;
+        use std::net::{TcpListener, TcpStream};
+        let listener = TcpListener::bind("127.0.0.1:0").expect("bind");
+        let addr = listener.local_addr().unwrap();
+        let server = std::thread::spawn(move || {
+            let (mut sock, _) = listener.accept().unwrap();
+            sock.write_all(b"hi").unwrap();
+        });
+        let stream = TcpStream::connect(addr).expect("connect");
+        server.join().unwrap();
+
+        let dispatch = EpollDispatch::new().expect("new");
+        dispatch
+            .register(stream.as_raw_fd(), 0xCAFE, RegisterMode::Read)
+            .expect("register");
+
+        let mut events: Vec<EpollEvent> = Vec::new();
+        let n = dispatch
+            .wait_with_timeout(&mut events, Duration::from_secs(1))
+            .expect("wait");
+        assert_eq!(n, 1);
+        assert_eq!(events[0].token, 0xCAFE);
+        assert!(events[0].readable);
+    }
+
+    #[test]
+    fn wakeup_unblocks_wait_immediately() {
+        use std::time::Instant;
+        let dispatch = EpollDispatch::new().expect("new");
+        let waker = dispatch.waker();
+
+        // Start the wait in another thread with a long timeout.
+        let wait_thread = std::thread::spawn(move || -> std::time::Duration {
+            let mut events: Vec<EpollEvent> = Vec::new();
+            let start = Instant::now();
+            let _ = dispatch.wait_with_timeout(&mut events, Duration::from_secs(5));
+            start.elapsed()
+        });
+
+        // Wake immediately.
+        std::thread::sleep(Duration::from_millis(10));
+        waker.wake();
+
+        let elapsed = wait_thread.join().expect("wait thread");
+        // Wait thread should return well under the 5 s timeout.
+        assert!(
+            elapsed < Duration::from_secs(1),
+            "wait did not return on wakeup: {elapsed:?}"
+        );
+    }
+}
diff --git a/src/network/mod.rs b/src/network/mod.rs
index d884ec6b..fa498280 100644
--- a/src/network/mod.rs
+++ b/src/network/mod.rs
@@ -6,9 +6,12 @@
 //! - virtio-net configuration
 //! - Network isolation and NAT
 
+pub(crate) mod epoll_dispatch;
+pub mod nat;
 pub mod slirp;
 
 use std::ffi::CString;
+use std::io;
 
 use crate::{Error, Result};
 
@@ -63,6 +66,55 @@ impl NetworkConfig {
     }
 }
 
+/// A network backend processes raw Ethernet frames between guest and host.
+///
+/// Implementations must be `Send` so they can be held behind
+/// `Arc<Mutex<_>>` and accessed from both the vCPU thread (TX path) and
+/// the net-poll thread (RX path).
+pub trait NetworkBackend: Send {
+    /// Process a raw Ethernet frame sent by the guest.
+    ///
+    /// Called from the vCPU thread on MMIO write to the TX virtqueue.
+    /// Implementations must not block.
+    fn process_guest_frame(&mut self, frame: &[u8]) -> io::Result<()>;
+
+    /// Drain Ethernet frames destined for the guest into `out`.
+    ///
+    /// Called every ~5ms from the net-poll thread. Frames are
+    /// complete Ethernet payloads — no virtio-net header (the caller
+    /// prepends that). The buffer is reused across calls to avoid
+    /// per-poll allocation.
+    fn drain_to_guest(&mut self, out: &mut Vec<Vec<u8>>);
+
+    /// Return the backend health status.
+    ///
+    /// `false` means the backend has entered an unrecoverable state
+    /// and should be reconstructed by the caller. The default
+    /// implementation always returns `true`.
+    fn is_healthy(&self) -> bool {
+        true
+    }
+
+    /// Return the epoll dispatch instance shared by this backend, if any.
+    ///
+    /// Only `SlirpBackend` returns `Some`; other backends (mock, future
+    /// alternatives) return `None`.  `net_poll_thread` uses this to block on
+    /// `epoll_wait` instead of sleeping, reducing host CPU burn between
+    /// network events.
+    #[cfg(target_os = "linux")]
+    fn epoll_arc(&self) -> Option<std::sync::Arc<epoll_dispatch::EpollDispatch>> {
+        None
+    }
+
+    /// Push ready epoll events into the backend's per-tick queue.
+    ///
+    /// Called by net_poll_thread after each epoll_wait returns, so
+    /// drain_to_guest can consume them without re-locking EpollDispatch.
+    /// The default is a no-op; `SlirpBackend` overrides this.
+    #[cfg(target_os = "linux")]
+    fn push_ready_events(&self, _events: &[epoll_dispatch::EpollEvent]) {}
+}
+
 /// TAP device handle
 pub struct TapDevice {
     name: String,
diff --git a/src/network/nat.rs b/src/network/nat.rs
new file mode 100644
index 00000000..23932d10
--- /dev/null
+++ b/src/network/nat.rs
@@ -0,0 +1,176 @@
+//! Stateless address translation for SLIRP.
+//!
+//! Pure functions that map (guest-visible address, rules) → (host-side
+//! `SocketAddr` to connect/bind to). No per-flow state lives here —
+//! the flow table in `slirp.rs` owns that. Translation itself is a
+//! function call.
+//!
+//! Mirrors passt's `fwd.c::nat_inbound` design: address rewrites are
+//! pure functions of (address, rules), not per-flow state. The same
+//! pure-function shape extends cleanly to IPv6 dual-stack and
+//! port-forwarding without introducing per-flow mutable state.
+
+use std::net::{Ipv4Addr, SocketAddr};
+
+use ipnet::Ipv4Net;
+use smoltcp::wire::Ipv4Address;
+
+/// Transport protocol discriminant for a port-forwarding rule.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum ForwardProto {
+    /// Transmission Control Protocol.
+    Tcp,
+    /// User Datagram Protocol.
+    Udp,
+}
+
+/// One inbound port-forwarding entry.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct PortForward {
+    /// Transport protocol; TCP or UDP.
+    pub proto: ForwardProto,
+    /// Host port to bind. Connections to `127.0.0.1:host_port` are
+    /// proxied into the guest at `guest_port`.
+    pub host_port: u16,
+    /// Guest port the forwarded connection terminates at.
+    pub guest_port: u16,
+}
+
+/// Outbound translation rules, derived once at `SlirpBackend`
+/// construction.
+#[derive(Clone, Debug, Default)]
+pub struct Rules {
+    /// If `true`, guest connections to the SLIRP gateway IP map to
+    /// `127.0.0.1` on the host. Today this is always `true`; left
+    /// configurable so a future TAP backend can flip it off.
+    pub gateway_loopback: bool,
+    /// CIDRs the guest is not allowed to connect to. Outbound packets
+    /// targeting these get `None` from [`translate_outbound`].
+    pub deny_cidrs: Vec<Ipv4Net>,
+    /// Inbound port forwards. Consulted by `SlirpBackend::new` to
+    /// spawn host listeners; not used by [`translate_outbound`].
+    pub port_forwards: Vec<PortForward>,
+}
+
+/// Translate an outbound packet's destination address.
+///
+/// Returns `Some(host_addr)` if the packet should be forwarded —
+/// loopback for the gateway IP, otherwise the original IP. Returns
+/// `None` if the destination is in the deny list.
+///
+/// # Examples
+///
+/// ```
+/// use ipnet::Ipv4Net;
+/// use smoltcp::wire::Ipv4Address;
+/// use void_box::network::nat::{Rules, translate_outbound};
+///
+/// let rules = Rules {
+///     gateway_loopback: true,
+///     deny_cidrs: vec!["169.254.0.0/16".parse().unwrap()],
+///     ..Default::default()
+/// };
+/// let gateway = Ipv4Address::new(10, 0, 2, 2);
+///
+/// // Gateway IP is rewritten to loopback.
+/// let addr = translate_outbound(&rules, gateway, 80, gateway).unwrap();
+/// assert_eq!(addr.ip().to_string(), "127.0.0.1");
+///
+/// // External IPs pass through unchanged.
+/// let ext = Ipv4Address::new(8, 8, 8, 8);
+/// let addr = translate_outbound(&rules, ext, 53, gateway).unwrap();
+/// assert_eq!(addr.ip().to_string(), "8.8.8.8");
+///
+/// // Deny-listed IPs return None.
+/// let metadata = Ipv4Address::new(169, 254, 169, 254);
+/// assert!(translate_outbound(&rules, metadata, 80, gateway).is_none());
+/// ```
+pub fn translate_outbound(
+    rules: &Rules,
+    dst: Ipv4Address,
+    dst_port: u16,
+    gateway_ip: Ipv4Address,
+) -> Option<SocketAddr> {
+    let dst_ipv4 = Ipv4Addr::from(dst.0);
+
+    // Deny-list check first — explicit block beats any other rule.
+    for cidr in &rules.deny_cidrs {
+        if cidr.contains(&dst_ipv4) {
+            return None;
+        }
+    }
+
+    let host_ip = if rules.gateway_loopback && dst == gateway_ip {
+        Ipv4Addr::LOCALHOST
+    } else {
+        dst_ipv4
+    };
+
+    Some(SocketAddr::from((host_ip, dst_port)))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn gateway() -> Ipv4Address {
+        Ipv4Address::new(10, 0, 2, 2)
+    }
+
+    fn rules_basic() -> Rules {
+        Rules {
+            gateway_loopback: true,
+            deny_cidrs: vec!["169.254.0.0/16".parse().unwrap()],
+            ..Default::default()
+        }
+    }
+
+    #[test]
+    fn gateway_ip_maps_to_loopback() {
+        let gw = gateway();
+        let addr = translate_outbound(&rules_basic(), gw, 80, gw).unwrap();
+        assert_eq!(addr.ip().to_string(), "127.0.0.1");
+        assert_eq!(addr.port(), 80);
+    }
+
+    #[test]
+    fn external_ip_passes_through_unchanged() {
+        let gw = gateway();
+        let ext = Ipv4Address::new(8, 8, 8, 8);
+        let addr = translate_outbound(&rules_basic(), ext, 53, gw).unwrap();
+        assert_eq!(addr.ip().to_string(), "8.8.8.8");
+        assert_eq!(addr.port(), 53);
+    }
+
+    #[test]
+    fn deny_listed_ip_returns_none() {
+        let gw = gateway();
+        let metadata = Ipv4Address::new(169, 254, 169, 254);
+        assert!(translate_outbound(&rules_basic(), metadata, 80, gw).is_none());
+    }
+
+    #[test]
+    fn gateway_loopback_false_passes_gateway_through() {
+        let gw = gateway();
+        let rules = Rules {
+            gateway_loopback: false,
+            ..Default::default()
+        };
+        let addr = translate_outbound(&rules, gw, 443, gw).unwrap();
+        assert_eq!(addr.ip().to_string(), "10.0.2.2");
+        assert_eq!(addr.port(), 443);
+    }
+
+    #[test]
+    fn empty_deny_list_allows_all() {
+        let gw = gateway();
+        let rules = Rules {
+            gateway_loopback: false,
+            deny_cidrs: vec![],
+            ..Default::default()
+        };
+        let private = Ipv4Address::new(192, 168, 1, 1);
+        let addr = translate_outbound(&rules, private, 22, gw).unwrap();
+        assert_eq!(addr.ip().to_string(), "192.168.1.1");
+    }
+}
diff --git a/src/network/slirp.rs b/src/network/slirp.rs
index c81974e2..4b0134e6 100644
--- a/src/network/slirp.rs
+++ b/src/network/slirp.rs
@@ -9,9 +9,18 @@
 //! - DNS:      10.0.2.3
 //!
 //! Architecture:
+//! - Unified flow table: All TCP/UDP/ICMP echo flows live in a single
+//!   `flow_table: HashMap<FlowKey, FlowEntry>`. Per-protocol relay logic
+//!   dispatches on the FlowEntry variant.
 //! - ARP: custom handler responds as gateway for all 10.0.2.x IPs
-//! - TCP: NAT proxy (raw packet parsing + host TCP sockets)
-//! - UDP port 53 (DNS): forwarded to host resolver
+//! - TCP: passt-style sequence-mirroring NAT (host→guest via
+//!   `recv(MSG_PEEK)` + ACK-driven consume; guest→host via direct
+//!   write + don't-ACK-on-WouldBlock TCP backpressure). No userspace
+//!   per-connection buffers — the host kernel's socket buffer holds
+//!   outstanding data.
+//! - ICMP echo: relayed via unprivileged `SOCK_DGRAM IPPROTO_ICMP`
+//! - UDP: per-flow connected sockets; DNS to 10.0.2.3:53 takes a
+//!   cached fast-path
 //! - Other: silently dropped
 //!
 //! The smoltcp library is used for its Ethernet/IPv4/TCP/UDP wire types
@@ -19,11 +28,16 @@
 
 use std::collections::HashMap;
 use std::collections::VecDeque;
-use std::io::{Read, Write};
-use std::net::{SocketAddr, TcpStream, UdpSocket};
-use std::sync::{Arc, Mutex};
+use std::io::{self, Read, Write};
+use std::net::{Ipv4Addr, SocketAddr, TcpListener, TcpStream, UdpSocket};
+use std::os::fd::{AsRawFd, FromRawFd};
+use std::sync::atomic::{AtomicBool, AtomicU64, AtomicU8, Ordering};
+use std::sync::{mpsc, Arc, Mutex};
 use std::time::{Duration, Instant};
 
+use crate::network::epoll_dispatch::{EpollDispatch, EpollEvent, RegisterMode, Waker};
+use crate::network::{nat, NetworkBackend};
+
 /// Cached DNS response with expiry.
 struct DnsCacheEntry {
     response: Vec<u8>,
@@ -47,9 +61,9 @@ use smoltcp::iface::{Config, Interface, SocketSet};
 use smoltcp::phy::{ChecksumCapabilities, Device, DeviceCapabilities, Medium, RxToken, TxToken};
 use smoltcp::time::Instant as SmolInstant;
 use smoltcp::wire::{
-    EthernetAddress, EthernetFrame, EthernetProtocol, EthernetRepr, HardwareAddress, IpAddress,
-    IpCidr, IpProtocol, Ipv4Address, Ipv4Packet, Ipv4Repr, TcpControl, TcpPacket, TcpRepr,
-    TcpSeqNumber, UdpPacket,
+    EthernetAddress, EthernetFrame, EthernetProtocol, EthernetRepr, HardwareAddress, Icmpv4Packet,
+    Icmpv4Repr, IpAddress, IpCidr, IpProtocol, Ipv4Address, Ipv4Packet, Ipv4Repr, TcpControl,
+    TcpPacket, TcpRepr, TcpSeqNumber, UdpPacket, UdpRepr,
 };
 
 use tracing::{debug, trace, warn};
@@ -75,7 +89,73 @@ pub const GATEWAY_MAC: [u8; 6] = [0x52, 0x54, 0x00, 0x12, 0x34, 0x01];
 const MTU: usize = 1500;
 const MAX_QUEUE_SIZE: usize = 64;
 const TCP_WINDOW: u16 = 65535;
-const MAX_TO_HOST_BUFFER: usize = 256 * 1024;
+const UDP_IDLE_TIMEOUT: Duration = Duration::from_secs(60);
+
+/// ICMP unprivileged probe state.
+///
+/// `0` = unknown (not yet probed), `1` = available, `2` = unavailable
+/// (kernel returned `EACCES` or `EPERM` — typically `net.ipv4.ping_group_range`
+/// excludes the calling GID). Once set to `2`, `open_icmp_socket` short-circuits.
+static ICMP_PROBE: AtomicU8 = AtomicU8::new(0);
+
+// ──────────────────────────────────────────────────────────────────────
+//  EpollDispatch flow tokens
+// ──────────────────────────────────────────────────────────────────────
+
+/// High-byte protocol tag embedded in the upper 8 bits of a `FlowToken`.
+/// The lower 56 bits are a monotonic per-flow counter (see `FLOW_TOKEN_COUNTER`).
+/// The tag lets the relay loop distinguish protocol families with a bitmask
+/// instead of a separate lookup; the counter guarantees global uniqueness
+/// even when two flows share the same port tuple.
+const PROTO_TAG_MASK: u64 = 0xFF00_0000_0000_0000;
+const PROTO_TAG_TCP: u64 = 0x0100_0000_0000_0000;
+const PROTO_TAG_UDP: u64 = 0x0200_0000_0000_0000;
+const PROTO_TAG_ICMP: u64 = 0x0300_0000_0000_0000;
+const PROTO_TAG_LISTEN: u64 = 0x0400_0000_0000_0000;
+
+/// Monotonic counter for flow token allocation.  The lower 56 bits of each
+/// `FlowToken` are drawn from here; the upper 8 bits carry `PROTO_TAG_*`.
+/// 2^56 unique tokens are available before wrap — effectively infinite for
+/// any realistic process lifetime.
+static FLOW_TOKEN_COUNTER: AtomicU64 = AtomicU64::new(0);
+
+/// Allocate a fresh, globally unique `FlowToken` tagged for the given protocol.
+///
+/// The lower 56 bits are drawn from a relaxed monotonic counter shared across
+/// all `SlirpBackend` instances.  The upper 8 bits carry `proto_tag` so relay
+/// loops can demux by protocol without an additional map lookup.
+fn next_flow_token(proto_tag: u64) -> u64 {
+    let counter = FLOW_TOKEN_COUNTER.fetch_add(1, Ordering::Relaxed) & 0x00FF_FFFF_FFFF_FFFF;
+    proto_tag | counter
+}
+
+/// Build an epoll token for a port-forward listener FD.
+///
+/// The high byte carries `PROTO_TAG_LISTEN`; the low 16 bits encode the
+/// host port. Each port-forward rule has a distinct host port, so tokens
+/// are unique across all registered listeners.
+fn flow_token_for_listener(host_port: u16) -> u64 {
+    PROTO_TAG_LISTEN | u64::from(host_port)
+}
+
+// ──────────────────────────────────────────────────────────────────────
+//  Inbound port-forward accept channel
+// ──────────────────────────────────────────────────────────────────────
+
+/// One accepted host-side TCP connection waiting to be forwarded into the guest.
+///
+/// Produced by [`SlirpBackend::process_listener_readiness`] (epoll-driven
+/// accept) and consumed by [`SlirpBackend::process_pending_inbound_accepts`]
+/// on the net-poll thread.
+pub(crate) struct InboundAccept {
+    /// The accepted host-side TCP stream (non-blocking after accept).
+    host_stream: TcpStream,
+    /// Ephemeral port used as the synthesized SYN source port on the gateway side.
+    /// Derived from the peer's remote port so it is unique per connection.
+    high_port: u16,
+    /// Guest-side destination port (the service the guest is listening on).
+    guest_port: u16,
+}
 
 // ──────────────────────────────────────────────────────────────────────
 //  TCP NAT connection tracking
@@ -83,8 +163,13 @@ const MAX_TO_HOST_BUFFER: usize = 256 * 1024;
 
 #[derive(Debug, Clone, Copy, PartialEq)]
 #[allow(dead_code)]
-enum TcpNatState {
+pub(crate) enum TcpNatState {
+    /// Guest sent SYN; we responded with SYN-ACK; waiting for guest's
+    /// final ACK to complete the outbound 3-way handshake.
     SynReceived,
+    /// We synthesized a SYN to the guest (port-forwarding); waiting
+    /// for the guest's SYN-ACK to advance to Established.
+    SynSent,
     Established,
     FinWait1,
     FinWait2,
@@ -94,7 +179,7 @@ enum TcpNatState {
 }
 
 /// Key for NAT table: (guest_src_port, dst_ip, dst_port)
-#[derive(Debug, Clone, Hash, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
 struct NatKey {
     guest_src_port: u16,
     dst_ip: Ipv4Address,
@@ -108,13 +193,186 @@ struct TcpNatEntry {
     our_seq: u32,
     /// Last acknowledged guest sequence number
     guest_ack: u32,
-    /// Data received from host, pending delivery to guest
-    to_guest: Vec<u8>,
-    /// Data received from guest, pending write to host (buffered on EAGAIN)
-    to_host: Vec<u8>,
-    /// Guest sequence number to ACK once `to_host` is flushed
-    to_host_pending_ack: Option<u32>,
     last_activity: Instant,
+    /// Bytes sent to the guest but not yet ACK'd by the guest.
+    /// Equivalent to `our_seq - last_acked_seq`, stored explicitly so
+    /// the relay can decide how much new payload to peek+send each poll.
+    /// The ACK-driven consume path decrements this as the guest ACKs data.
+    bytes_in_flight: u32,
+    /// Globally unique epoll token for this flow.  Allocated once on insert
+    /// via `next_flow_token(PROTO_TAG_TCP)` and stored here so unregister
+    /// sites never need to recompute it.
+    flow_token: u64,
+}
+
+/// Key for the ICMP echo NAT table: (guest ICMP id, destination IP).
+///
+/// The host kernel rewrites the ICMP id when sending through a
+/// `SOCK_DGRAM IPPROTO_ICMP` socket; we keep the guest's original id here so
+/// the reply frame can be translated back before injection.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+struct IcmpEchoKey {
+    guest_id: u16,
+    dst_ip: Ipv4Address,
+}
+
+/// State for one in-flight ICMP echo request from the guest.
+struct IcmpEchoEntry {
+    /// Host-side socket: `socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP)`.
+    /// Set non-blocking; the kernel handles ICMP framing — no
+    /// `CAP_NET_RAW` needed.
+    sock: std::net::UdpSocket,
+    /// The guest's original ICMP id from the echo request.  The host kernel
+    /// rewrites the id to a kernel-assigned value when the `SOCK_DGRAM`
+    /// ICMP socket sends; we translate back to `guest_id` when emitting the
+    /// reply frame.
+    // Read in `relay_icmp_echo` when translating the reply frame.
+    guest_id: u16,
+    last_activity: Instant,
+    /// Globally unique epoll token for this flow.  Allocated once on insert
+    /// via `next_flow_token(PROTO_TAG_ICMP)` and stored here so unregister
+    /// sites never need to recompute it.
+    flow_token: u64,
+}
+
+/// Key for the UDP flow NAT table: (guest source port, destination IP, destination port).
+///
+/// Each unique 3-tuple maps to its own connected `UdpSocket` on the host,
+/// mirroring passt's `udp_flow_from_tap` per-flow design.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+struct UdpFlowKey {
+    guest_src_port: u16,
+    dst_ip: Ipv4Address,
+    dst_port: u16,
+}
+
+/// State for one active UDP flow from the guest.
+struct UdpFlowEntry {
+    /// Connected `UdpSocket`. The host kernel handles source-port
+    /// preservation and reply demux; we just `send` and `recv`.
+    /// Set non-blocking.
+    sock: std::net::UdpSocket,
+    /// Last frame timestamp; read by Task 2.4 idle-timeout reaper.
+    last_activity: Instant,
+    /// Globally unique epoll token for this flow.  Allocated once on insert
+    /// via `next_flow_token(PROTO_TAG_UDP)` and stored here so unregister
+    /// sites never need to recompute it.
+    flow_token: u64,
+}
+
+/// Unified flow-table key. Each variant wraps the protocol-specific
+/// key already defined elsewhere in this module — no field changes,
+/// just one type the unified `flow_table` `HashMap` (added in Task 4.2)
+/// can store.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+enum FlowKey {
+    Tcp(NatKey),
+    Udp(UdpFlowKey),
+    IcmpEcho(IcmpEchoKey),
+}
+
+/// Unified flow-table value. Each variant wraps the protocol's existing
+/// entry struct.
+enum FlowEntry {
+    Tcp(TcpNatEntry),
+    Udp(UdpFlowEntry),
+    IcmpEcho(IcmpEchoEntry),
+}
+
+/// Open an unprivileged ICMP socket (`SOCK_DGRAM IPPROTO_ICMP`).
+///
+/// The kernel handles ICMP framing; `CAP_NET_RAW` is **not** required.
+/// The socket is set `SOCK_NONBLOCK | SOCK_CLOEXEC` at creation time.
+///
+/// Returns `Err` if the kernel rejects the call (e.g. the
+/// `net.ipv4.ping_group_range` sysctl excludes the current GID).
+/// After the first rejection, subsequent calls short-circuit and return
+/// `PermissionDenied` without retrying the syscall.
+fn open_icmp_socket() -> io::Result<std::net::UdpSocket> {
+    if ICMP_PROBE.load(Ordering::Relaxed) == 2 {
+        return Err(io::Error::new(
+            io::ErrorKind::PermissionDenied,
+            "ICMP unprivileged probe previously failed",
+        ));
+    }
+    // SAFETY: socket(2) returns -1 on error; we check before wrapping.
+    // IPPROTO_ICMP + SOCK_DGRAM is the unprivileged ICMP path: the kernel
+    // handles ICMP framing, no CAP_NET_RAW required.
+    let raw = unsafe {
+        libc::socket(
+            libc::AF_INET,
+            libc::SOCK_DGRAM | libc::SOCK_NONBLOCK | libc::SOCK_CLOEXEC,
+            libc::IPPROTO_ICMP,
+        )
+    };
+    if raw < 0 {
+        let err = io::Error::last_os_error();
+        let errno = err.raw_os_error();
+        let unprivileged_icmp_forbidden = errno == Some(libc::EACCES) || errno == Some(libc::EPERM);
+        if unprivileged_icmp_forbidden {
+            // First failure transitions 0 → 2 and emits the warn-once log.
+            // swap returns the previous value; only log if we were the first
+            // to set it.
+            if ICMP_PROBE.swap(2, Ordering::Relaxed) != 2 {
+                warn!(
+                    "SLIRP: unprivileged ICMP unavailable on this host \
+                     (sysctl net.ipv4.ping_group_range likely restricts \
+                     it); ICMP echo from guests will be dropped."
+                );
+            }
+        }
+        return Err(err);
+    }
+    ICMP_PROBE.store(1, Ordering::Relaxed);
+    // SAFETY: `raw` is a valid fd from socket(2); UdpSocket adopts
+    // ownership and closes on drop.
+    Ok(unsafe { std::net::UdpSocket::from_raw_fd(raw) })
+}
+
+/// Open a connected UDP socket for one guest→host flow.
+///
+/// Binds to an ephemeral port on `0.0.0.0`, sets non-blocking mode,
+/// then calls `connect(dst)` so that:
+/// - `send` delivers datagrams to `dst` without specifying the address each time.
+/// - Incoming datagrams are filtered to replies from `dst` only, enabling
+///   per-flow demux without an additional dispatch table.
+///
+/// No `CAP_NET_RAW` required — `SOCK_DGRAM` UDP is fully unprivileged.
+fn open_udp_flow_socket(dst: std::net::SocketAddr) -> io::Result<std::net::UdpSocket> {
+    let sock = std::net::UdpSocket::bind("0.0.0.0:0")?;
+    sock.set_nonblocking(true)?;
+    sock.connect(dst)?;
+    Ok(sock)
+}
+
+/// Non-blocking `recv(MSG_PEEK)` on a `TcpStream`, returning the
+/// number of bytes available without consuming them from the
+/// kernel's recv queue.
+///
+/// `std::net::TcpStream` does not expose `MSG_PEEK`; we go through
+/// `libc::recv` directly. `MSG_DONTWAIT` keeps the call non-blocking
+/// even if the underlying stream's `set_nonblocking` flag was
+/// dropped at some intermediate point.
+///
+/// Used by the passt-style host→guest TCP relay (Task 3.3): peek
+/// what's in the kernel buffer, send the un-ACK'd portion to the
+/// guest. Bytes stay in the kernel until the guest ACKs and Task
+/// 3.4's ACK-driven `read()` consumes them.
+fn recv_peek(stream: &TcpStream, buf: &mut [u8]) -> io::Result<usize> {
+    // SAFETY: `stream` outlives the syscall; `buf` is uniquely
+    // borrowed and `len` matches the slice length.
+    let n = unsafe {
+        libc::recv(
+            stream.as_raw_fd(),
+            buf.as_mut_ptr() as *mut libc::c_void,
+            buf.len(),
+            libc::MSG_PEEK | libc::MSG_DONTWAIT,
+        )
+    };
+    if n < 0 {
+        return Err(io::Error::last_os_error());
+    }
+    Ok(n as usize)
 }
 
 // ──────────────────────────────────────────────────────────────────────
@@ -237,13 +495,11 @@ fn parse_resolv_conf() -> Vec<String> {
 //  SLIRP Stack
 // ──────────────────────────────────────────────────────────────────────
 
-pub struct SlirpStack {
+pub struct SlirpBackend {
     queue: Arc<Mutex<PacketQueue>>,
     iface: Interface,
     sockets: SocketSet<'static>,
     _device: VirtualDevice,
-    /// TCP NAT table
-    tcp_nat: HashMap<NatKey, TcpNatEntry>,
     /// Frames to inject into guest (built by our NAT, not by smoltcp)
     inject_to_guest: Vec<Vec<u8>>,
     /// Maximum concurrent TCP connections allowed
@@ -252,26 +508,83 @@ pub struct SlirpStack {
     max_connections_per_second: u32,
     /// Sliding window of recent connection timestamps for rate limiting
     connection_timestamps: VecDeque<Instant>,
-    /// Network deny list (CIDR ranges that the guest cannot reach)
-    deny_list: Vec<Ipv4Net>,
+    /// Stateless outbound translation rules (deny-list, gateway loopback, port forwards).
+    nat: nat::Rules,
     /// Host DNS servers (parsed from /etc/resolv.conf, fallback to public)
     dns_servers: Vec<String>,
     /// DNS response cache keyed by the raw query bytes (question section)
     dns_cache: HashMap<Vec<u8>, DnsCacheEntry>,
     /// DNS queries waiting to be resolved on the net-poll thread.
     pending_dns: Vec<PendingDnsQuery>,
+    /// Unified flow table keyed by protocol + port tuple.
+    ///
+    /// All three protocols (TCP, UDP, ICMP echo) share this table so a single
+    /// dispatch loop handles all active flows.
+    flow_table: HashMap<FlowKey, FlowEntry>,
+    /// Reverse map from `FlowToken` → `FlowKey` for O(1) readiness-event
+    /// dispatch.  Maintained in sync with `flow_table`: every insert adds an
+    /// entry; every remove clears it.
+    token_to_key: HashMap<u64, FlowKey>,
+    /// Live `TcpListener`s for each TCP port-forward rule, keyed by host port.
+    /// The tuple value is `(listener, guest_port)`. Each listener's FD is
+    /// registered with `EpollDispatch` under `PROTO_TAG_LISTEN`; readiness
+    /// events drive the accept loop on the net-poll thread. No dedicated
+    /// polling thread per rule.
+    port_forward_listeners: HashMap<u16, (TcpListener, u16)>,
+    /// Receiver end of the accept channel fed by
+    /// [`bind_port_forward_listeners`] via [`SlirpBackend::process_listener_readiness`].
+    /// Processed on the net-poll thread in
+    /// [`SlirpBackend::process_pending_inbound_accepts`].
+    pending_inbound_accepts: mpsc::Receiver<InboundAccept>,
+    /// Sender end of `pending_inbound_accepts`. Kept alive so the channel
+    /// stays open when no listener threads are running (e.g. in tests) and
+    /// so test helpers can inject [`InboundAccept`] values directly.
+    #[allow(dead_code)]
+    accept_sender: mpsc::Sender<InboundAccept>,
+    /// Epoll dispatcher for host socket readiness.  `EpollDispatch` is
+    /// `Sync`: `register`/`unregister` and `wait_with_timeout` are
+    /// kernel-serialized on the same epoll fd, so no `Mutex` wrapper is
+    /// needed.  The `Arc` lets the net-poll thread share the dispatcher
+    /// without holding the device lock.
+    epoll: Arc<EpollDispatch>,
+    /// Cloneable waker that interrupts `EpollDispatch::wait_with_timeout`.
+    /// Used after flow-table mutations to unblock the poll thread immediately.
+    epoll_waker: Waker,
+    /// Ready events fed by the net-poll thread after each blocking
+    /// epoll_wait. drain_to_guest drains this on every call without
+    /// any EpollDispatch lock contention.
+    pending_events: Mutex<Vec<EpollEvent>>,
+    /// Flow keys queued for removal because their state advanced to
+    /// Closed in a non-relay code path (e.g. guest FIN/RST in
+    /// handle_tcp_frame). Drained at the bottom of relay_tcp_nat_data
+    /// without scanning the full flow_table.
+    pending_close: Vec<FlowKey>,
+    /// Set to `true` the first time `push_ready_events` is called —
+    /// signals "an external poller (net_poll_thread) is feeding us
+    /// readiness events." When true, `drain_to_guest` skips its
+    /// non-blocking-poll fallback (one mutex op + one epoll_wait
+    /// syscall per call, ~310 ns overhead) and only consumes
+    /// `pending_events`. Tests/benches without a net_poll_thread
+    /// keep the fallback so synthetic harnesses still observe
+    /// readiness.
+    has_external_poller: AtomicBool,
 }
 
-impl SlirpStack {
+impl SlirpBackend {
     pub fn new() -> Result<Self> {
-        Self::with_security(64, 50, &["169.254.0.0/16".to_string()])
+        Self::with_security(64, 50, &["169.254.0.0/16".to_string()], &[])
     }
 
     /// Create a SLIRP stack with security parameters.
+    ///
+    /// `port_forwards` maps host ports to guest ports as `(host_port, guest_port)` pairs.
+    /// Each entry is stored in [`nat::Rules`] as a TCP forward rule; host listeners are
+    /// spawned in sub-task B (5.5b) and not yet active.
     pub fn with_security(
         max_concurrent_connections: usize,
         max_connections_per_second: u32,
         deny_list_cidrs: &[String],
+        port_forwards: &[(u16, u16)],
     ) -> Result<Self> {
         debug!("Creating SLIRP stack");
         let queue = Arc::new(Mutex::new(PacketQueue::new()));
@@ -296,8 +609,7 @@ impl SlirpStack {
 
         let sockets = SocketSet::new(vec![]);
 
-        // Parse deny list CIDRs
-        let deny_list: Vec<Ipv4Net> = deny_list_cidrs
+        let deny_cidrs: Vec<Ipv4Net> = deny_list_cidrs
             .iter()
             .filter_map(|cidr| {
                 cidr.parse::<Ipv4Net>()
@@ -309,35 +621,63 @@ impl SlirpStack {
             })
             .collect();
 
+        let nat_port_forwards: Vec<nat::PortForward> = port_forwards
+            .iter()
+            .map(|&(host_port, guest_port)| nat::PortForward {
+                proto: nat::ForwardProto::Tcp,
+                host_port,
+                guest_port,
+            })
+            .collect();
+
+        let nat = nat::Rules {
+            gateway_loopback: true,
+            deny_cidrs,
+            port_forwards: nat_port_forwards,
+        };
+
         let dns_servers = parse_resolv_conf();
         debug!(
-            "SLIRP stack created - Gateway: {}, DNS: {}, max_conn: {}, rate: {}/s, deny_list: {} CIDRs, dns_servers: {:?}",
-            SLIRP_GATEWAY_IP, SLIRP_DNS_IP, max_concurrent_connections, max_connections_per_second, deny_list.len(), dns_servers
+            "SLIRP stack created - Gateway: {}, DNS: {}, max_conn: {}, rate: {}/s, deny_list: {} CIDRs, port_forwards: {}, dns_servers: {:?}",
+            SLIRP_GATEWAY_IP, SLIRP_DNS_IP, max_concurrent_connections, max_connections_per_second,
+            nat.deny_cidrs.len(), nat.port_forwards.len(), dns_servers
         );
 
+        let (accept_tx, accept_rx) = mpsc::channel::<InboundAccept>();
+
+        let epoll_inner = EpollDispatch::new()?;
+        let epoll_waker = epoll_inner.waker();
+        let epoll = Arc::new(epoll_inner);
+
+        // Bind listeners for port-forwards and register their FDs with epoll.
+        let port_forward_listeners = bind_port_forward_listeners(&nat, &epoll);
+
         Ok(Self {
             queue,
             iface,
             sockets,
             _device: device,
-            tcp_nat: HashMap::new(),
             inject_to_guest: Vec::new(),
             max_concurrent_connections,
             max_connections_per_second,
             connection_timestamps: VecDeque::new(),
-            deny_list,
+            nat,
             dns_servers,
             dns_cache: HashMap::new(),
             pending_dns: Vec::new(),
+            flow_table: HashMap::new(),
+            token_to_key: HashMap::new(),
+            port_forward_listeners,
+            pending_inbound_accepts: accept_rx,
+            accept_sender: accept_tx,
+            epoll,
+            epoll_waker,
+            pending_events: Mutex::new(Vec::new()),
+            pending_close: Vec::new(),
+            has_external_poller: AtomicBool::new(false),
         })
     }
 
-    /// Check if a destination IP is blocked by the deny list.
-    fn is_denied(&self, ip: &Ipv4Address) -> bool {
-        let addr = std::net::Ipv4Addr::new(ip.0[0], ip.0[1], ip.0[2], ip.0[3]);
-        self.deny_list.iter().any(|net| net.contains(&addr))
-    }
-
     /// Check if a new connection is allowed by the rate limiter.
     /// Returns true if the connection is allowed.
     fn check_rate_limit(&mut self) -> bool {
@@ -361,6 +701,133 @@ impl SlirpStack {
         true
     }
 
+    /// Drain the inbound-accept channel and seed a `SynSent` flow-table entry
+    /// plus a synthesized SYN frame for each accepted connection.
+    ///
+    /// Accept connections from any port-forward listeners whose FDs are ready
+    /// in `ready` and push them onto the inbound-accept channel for
+    /// [`process_pending_inbound_accepts`] to consume.
+    ///
+    /// Drains until `WouldBlock` so that a burst of connections arriving
+    /// between two epoll wakeups is not spread across multiple ticks.
+    fn process_listener_readiness(&mut self, ready: &[EpollEvent]) {
+        // Accepted connections are collected here first so that the borrow on
+        // `port_forward_listeners` ends before we call `accept_sender.send`.
+        let mut accepted_batch: Vec<InboundAccept> = Vec::new();
+        let mut sender_failed = false;
+
+        for event in ready {
+            if !event.readable || event.token & PROTO_TAG_MASK != PROTO_TAG_LISTEN {
+                continue;
+            }
+            let host_port = (event.token & 0xFFFF) as u16;
+            let Some((listener, guest_port)) = self.port_forward_listeners.get(&host_port) else {
+                continue;
+            };
+            let guest_port = *guest_port;
+            // Drain the listener — multiple connections may have arrived in one
+            // EPOLLIN edge.
+            loop {
+                match listener.accept() {
+                    Ok((stream, peer_addr)) => {
+                        let high_port = peer_addr.port();
+                        let _ = stream.set_nonblocking(true);
+                        trace!(
+                            host_port,
+                            guest_port,
+                            high_port,
+                            peer = %peer_addr,
+                            "SLIRP port-forward: accepted connection"
+                        );
+                        accepted_batch.push(InboundAccept {
+                            host_stream: stream,
+                            high_port,
+                            guest_port,
+                        });
+                    }
+                    Err(ref would_block) if would_block.kind() == io::ErrorKind::WouldBlock => {
+                        break;
+                    }
+                    Err(accept_error) => {
+                        warn!(
+                            host_port,
+                            error = %accept_error,
+                            "SLIRP port-forward: accept error"
+                        );
+                        break;
+                    }
+                }
+            }
+        }
+
+        // Borrow of `port_forward_listeners` has ended; send the batch.
+        for accepted in accepted_batch {
+            if self.accept_sender.send(accepted).is_err() {
+                sender_failed = true;
+                break;
+            }
+        }
+        let _ = sender_failed; // receiver drop handled gracefully on next tick
+    }
+
+    /// Called at the top of [`drain_to_guest`] so all `SlirpBackend` mutation
+    /// stays on the net-poll thread — same single-writer lock model as the rest
+    /// of the relay pipeline. `process_listener_readiness` enqueues accepted
+    /// connections via the mpsc channel; this method drains that channel and
+    /// seeds the flow table.
+    fn process_pending_inbound_accepts(&mut self) {
+        loop {
+            let accepted = match self.pending_inbound_accepts.try_recv() {
+                Ok(accepted) => accepted,
+                Err(mpsc::TryRecvError::Empty) => break,
+                Err(mpsc::TryRecvError::Disconnected) => break,
+            };
+            let InboundAccept {
+                host_stream,
+                high_port,
+                guest_port,
+            } = accepted;
+            let our_isn = rand_seq();
+            let key = NatKey {
+                guest_src_port: guest_port,
+                dst_ip: SLIRP_GATEWAY_IP,
+                dst_port: high_port,
+            };
+            let token = next_flow_token(PROTO_TAG_TCP);
+            let entry = TcpNatEntry {
+                host_stream,
+                state: TcpNatState::SynSent,
+                our_seq: our_isn,
+                guest_ack: 0,
+                last_activity: Instant::now(),
+                bytes_in_flight: 0,
+                flow_token: token,
+            };
+            let host_fd = entry.host_stream.as_raw_fd();
+            let flow_key = FlowKey::Tcp(key);
+            self.flow_table.insert(flow_key, FlowEntry::Tcp(entry));
+            self.token_to_key.insert(token, flow_key);
+            if let Err(e) = self.epoll.register(host_fd, token, RegisterMode::Read) {
+                warn!(
+                    host_port = high_port,
+                    guest_port,
+                    fd = host_fd,
+                    error = %e,
+                    "SLIRP port-forward: epoll register failed; flow present but readiness-driven relay disabled"
+                );
+            }
+            self.epoll_waker.wake();
+            let syn_frame = synthesize_inbound_syn(high_port, guest_port, our_isn);
+            self.inject_to_guest.push(syn_frame);
+            trace!(
+                host_port = high_port,
+                guest_port,
+                our_isn,
+                "SLIRP port-forward: seeded SynSent entry"
+            );
+        }
+    }
+
     // ── Public API ──────────────────────────────────────────────────
 
     /// Process an ethernet frame from the guest
@@ -374,6 +841,15 @@ impl SlirpStack {
             Err(_) => return Ok(()),
         };
 
+        // Track inject_to_guest growth so we can wake the net-poll
+        // thread if this call queued any frames. The poll thread blocks
+        // in epoll_wait waiting on FD readiness; an ACK queued during
+        // guest TX has no FD-side signal (the guest is the writer, not
+        // the reader on the SLIRP-side socket). Without an explicit
+        // wake the ACK sits up to epoll_wait's timeout before being
+        // flushed — TCP send window stalls, throughput drops 10×.
+        let inject_len_before = self.inject_to_guest.len();
+
         match eth.ethertype() {
             EthernetProtocol::Arp => {
                 self.handle_arp_frame(frame)?;
@@ -385,30 +861,84 @@ impl SlirpStack {
                 trace!("SLIRP: ignoring ethertype {:?}", eth.ethertype());
             }
         }
+
+        if self.inject_to_guest.len() > inject_len_before {
+            self.epoll_waker.wake();
+        }
         Ok(())
     }
 
-    /// Poll the stack. Returns ethernet frames to send to the guest.
-    pub fn poll(&mut self) -> Vec<Vec<u8>> {
-        // Check rx_queue size before polling
+    /// Drain frames destined to the guest into `out`, reusing the caller's
+    /// buffer across calls and avoiding a fresh allocation on every tick.
+    ///
+    /// See [`crate::network::NetworkBackend::drain_to_guest`].
+    pub fn drain_to_guest(&mut self, out: &mut Vec<Vec<u8>>) {
+        // Check rx_queue size before polling.
         let rx_count = {
             let q = self.queue.lock().unwrap();
             q.rx_queue.len()
         };
 
-        // 1. Let smoltcp handle ARP
+        // 1. Let smoltcp handle ARP.
         let ts = smol_instant_now();
         let mut dev = VirtualDevice::new(self.queue.clone());
         let changed = self.iface.poll(ts, &mut dev, &mut self.sockets);
 
-        // 2. Resolve pending DNS queries (off vCPU thread)
+        // 2. Resolve pending DNS queries (off vCPU thread).
         self.resolve_pending_dns();
 
-        // 3. Process TCP NAT data relay
-        self.relay_tcp_nat_data();
+        // 3. Collect ready events.
+        //
+        // Always drain `pending_events` first — that's the queue
+        // `net_poll_thread` fills via `push_ready_events` after every
+        // successful `epoll_wait`. If we skipped this and only polled
+        // epoll directly, we would lose every event the net-poll thread
+        // already drained: level-triggered EPOLLIN doesn't re-fire for
+        // data the kernel already reported, so the next non-blocking
+        // poll returns 0 events even when there's work to do. CRR
+        // connections then wait one full 50 ms epoll cycle for the NEXT
+        // data event before their first data is relayed.
+        //
+        // Then, only if no net-poll thread has populated the queue
+        // (unit tests / benches), fall back to a non-blocking poll on
+        // the epoll FD ourselves. `try_lock` keeps that fallback safe
+        // under contention.
+        let ready: Vec<EpollEvent> = {
+            let mut events: Vec<EpollEvent> = {
+                let mut queue = self.pending_events.lock().unwrap();
+                std::mem::take(&mut *queue)
+            };
+            // Fallback non-blocking poll only when no external poller
+            // (net_poll_thread) is feeding us events — otherwise we'd
+            // pay one mutex op + one epoll_wait syscall per call
+            // (~310 ns) for nothing. The flag is one-way: set by the
+            // first push_ready_events and stays set for the backend's
+            // lifetime.
+            if events.is_empty() && !self.has_external_poller.load(Ordering::Relaxed) {
+                let _ = self
+                    .epoll
+                    .wait_with_timeout(&mut events, std::time::Duration::ZERO);
+            }
+            events
+        };
+
+        // 0a. Accept any newly-ready listener connections (may push into
+        //     accept_sender for the next step).
+        self.process_listener_readiness(&ready);
+
+        // 0b. Drain the accept channel (epoll-driven listeners + test helpers).
+        self.process_pending_inbound_accepts();
+
+        // 4. Process TCP NAT data relay.
+        self.relay_tcp_nat_data(&ready);
+
+        // 5. Relay ICMP echo replies from host sockets back to the guest.
+        self.relay_icmp_echo(&ready);
 
-        // 4. Collect frames: smoltcp ARP responses + our NAT-built frames
-        let mut frames = Vec::new();
+        // 6. Relay UDP flow replies from host sockets back to the guest.
+        self.relay_udp_flows(&ready);
+
+        // 7. Collect frames: smoltcp ARP responses + our NAT-built frames.
         {
             let mut q = self.queue.lock().unwrap();
             if !q.tx_queue.is_empty() || rx_count > 0 {
@@ -420,11 +950,24 @@ impl SlirpStack {
                     self.inject_to_guest.len()
                 );
             }
-            frames.append(&mut q.tx_queue);
+            out.append(&mut q.tx_queue);
         }
-        frames.append(&mut self.inject_to_guest);
+        out.append(&mut self.inject_to_guest);
+    }
 
-        frames
+    /// Poll the stack and return ethernet frames to send to the guest.
+    ///
+    /// # Deprecated
+    ///
+    /// Allocates a fresh [`Vec`] on every call. Prefer [`drain_to_guest`],
+    /// which writes into a caller-supplied buffer and avoids the allocation.
+    ///
+    /// [`drain_to_guest`]: SlirpBackend::drain_to_guest
+    #[deprecated(note = "use drain_to_guest")]
+    pub fn poll(&mut self) -> Vec<Vec<u8>> {
+        let mut out = Vec::new();
+        self.drain_to_guest(&mut out);
+        out
     }
 
     /// Extract the DNS question section (bytes after the 12-byte header up to
@@ -621,9 +1164,13 @@ impl SlirpStack {
         let dst_ip = ipv4.dst_addr();
         let protocol = ipv4.next_header();
 
-        // DNS (UDP to 10.0.2.3:53) – handle specially
-        if dst_ip == SLIRP_DNS_IP && protocol == IpProtocol::Udp {
-            return self.handle_dns_frame(&ipv4);
+        // UDP — DNS keeps its dedicated cache+forward handler; everything
+        // else goes through the per-flow connected-socket NAT.
+        if protocol == IpProtocol::Udp {
+            if dst_ip == SLIRP_DNS_IP {
+                return self.handle_dns_frame(&ipv4);
+            }
+            return self.handle_udp_frame(&ipv4);
         }
 
         // TCP to any external IP (not gateway) – NAT proxy
@@ -634,7 +1181,12 @@ impl SlirpStack {
             }
         }
 
-        // Everything else (ICMP, etc.) – drop silently
+        // ICMP echo requests — forward via unprivileged SOCK_DGRAM IPPROTO_ICMP socket
+        if protocol == IpProtocol::Icmp {
+            return self.handle_icmp_frame(&ipv4);
+        }
+
+        // Everything else – drop silently
         trace!("SLIRP: dropping {:?} packet to {}", protocol, dst_ip);
         Ok(())
     }
@@ -684,6 +1236,200 @@ impl SlirpStack {
         Ok(())
     }
 
+    // ── Non-DNS UDP forwarding ────────────────────────────────────────
+
+    /// Forward a non-DNS guest UDP datagram to the host via a per-flow connected socket.
+    ///
+    /// Each unique (guest source port, destination IP, destination port) 3-tuple maps to
+    /// one connected `UdpSocket`. On the first frame for a flow the socket is created via
+    /// [`open_udp_flow_socket`] and stored in `flow_table` under `FlowKey::Udp`. Subsequent
+    /// frames reuse the existing socket, updating `last_activity` for idle-timeout reaping (Task 2.4).
+    ///
+    /// The SLIRP gateway address (`10.0.2.2`) is translated to `127.0.0.1` before
+    /// connecting, mirroring the same translation used on the TCP NAT path.
+    ///
+    /// Reply delivery back to the guest is handled by Task 2.3 (`relay_udp_flows`).
+    fn handle_udp_frame(&mut self, ipv4: &Ipv4Packet<&[u8]>) -> Result<()> {
+        let udp = match UdpPacket::new_checked(ipv4.payload()) {
+            Ok(u) => u,
+            Err(_) => return Ok(()),
+        };
+        let payload = udp.payload().to_vec();
+        let key = UdpFlowKey {
+            guest_src_port: udp.src_port(),
+            dst_ip: ipv4.dst_addr(),
+            dst_port: udp.dst_port(),
+        };
+
+        let dst =
+            match nat::translate_outbound(&self.nat, key.dst_ip, key.dst_port, SLIRP_GATEWAY_IP) {
+                Some(addr) => addr,
+                None => {
+                    trace!(
+                        "SLIRP UDP: deny-list reject dst={}:{} from guest_port={}",
+                        key.dst_ip,
+                        key.dst_port,
+                        key.guest_src_port
+                    );
+                    return Ok(());
+                }
+            };
+
+        let flow_key = FlowKey::Udp(key);
+        // Track whether this is a new entry so we can register it with epoll.
+        let mut new_host_fd: Option<std::os::fd::RawFd> = None;
+        let mut new_token: u64 = 0;
+        let entry: &mut UdpFlowEntry = match self.flow_table.entry(flow_key) {
+            std::collections::hash_map::Entry::Occupied(o) => match o.into_mut() {
+                FlowEntry::Udp(e) => e,
+                _ => unreachable!("FlowKey::Udp must map to FlowEntry::Udp"),
+            },
+            std::collections::hash_map::Entry::Vacant(v) => {
+                let sock = match open_udp_flow_socket(dst) {
+                    Ok(s) => s,
+                    Err(e) => {
+                        trace!("SLIRP UDP: open flow socket failed: {e}");
+                        return Ok(());
+                    }
+                };
+                let token = next_flow_token(PROTO_TAG_UDP);
+                new_host_fd = Some(sock.as_raw_fd());
+                new_token = token;
+                match v.insert(FlowEntry::Udp(UdpFlowEntry {
+                    sock,
+                    last_activity: Instant::now(),
+                    flow_token: token,
+                })) {
+                    FlowEntry::Udp(e) => e,
+                    _ => unreachable!(),
+                }
+            }
+        };
+        entry.last_activity = Instant::now();
+
+        if let Some(host_fd) = new_host_fd {
+            self.token_to_key.insert(new_token, flow_key);
+            if let Err(e) = self.epoll.register(host_fd, new_token, RegisterMode::Read) {
+                warn!(
+                    guest_src_port = key.guest_src_port,
+                    dst_ip = %key.dst_ip,
+                    dst_port = key.dst_port,
+                    fd = host_fd,
+                    error = %e,
+                    "SLIRP UDP: epoll register failed; flow present but readiness-driven relay disabled"
+                );
+            }
+            self.epoll_waker.wake();
+        }
+
+        if let Err(e) = entry.sock.send(&payload) {
+            trace!("SLIRP UDP: send failed: {e}");
+        }
+        Ok(())
+    }
+
+    // ── ICMP echo forwarding ─────────────────────────────────────────
+
+    /// Forward a guest ICMP echo request to the host kernel via an unprivileged
+    /// `SOCK_DGRAM IPPROTO_ICMP` socket.
+    ///
+    /// The kernel rewrites the ICMP identifier on `send_to`; the entry stores
+    /// the guest's original `ident` so the reply path (Task 1.3) can translate
+    /// it back before injecting the frame into the guest.
+    fn handle_icmp_frame(&mut self, ipv4: &Ipv4Packet<&[u8]>) -> Result<()> {
+        let icmp = match Icmpv4Packet::new_checked(ipv4.payload()) {
+            Ok(p) => p,
+            Err(_) => return Ok(()),
+        };
+        let repr = match Icmpv4Repr::parse(&icmp, &Default::default()) {
+            Ok(r) => r,
+            Err(_) => return Ok(()),
+        };
+        let (ident, seq_no, data) = match repr {
+            Icmpv4Repr::EchoRequest {
+                ident,
+                seq_no,
+                data,
+            } => (ident, seq_no, data),
+            _ => return Ok(()), // only echo request handled today
+        };
+
+        // Copy data before the mutable borrow of self.flow_table below.
+        let data_owned: Vec<u8> = data.to_vec();
+
+        let key = IcmpEchoKey {
+            guest_id: ident,
+            dst_ip: ipv4.dst_addr(),
+        };
+        let flow_key = FlowKey::IcmpEcho(key);
+        // Track whether this is a new entry so we can register it with epoll.
+        let mut new_icmp_fd: Option<std::os::fd::RawFd> = None;
+        let mut new_token: u64 = 0;
+        let entry: &mut IcmpEchoEntry = match self.flow_table.entry(flow_key) {
+            std::collections::hash_map::Entry::Occupied(occupied) => match occupied.into_mut() {
+                FlowEntry::IcmpEcho(e) => e,
+                _ => unreachable!("FlowKey::IcmpEcho must map to FlowEntry::IcmpEcho"),
+            },
+            std::collections::hash_map::Entry::Vacant(vacant) => {
+                let sock = match open_icmp_socket() {
+                    Ok(s) => s,
+                    Err(e) => {
+                        // Sysctl-driven fallback handled in Task 1.4.
+                        trace!("SLIRP ICMP: open socket failed: {e}");
+                        return Ok(());
+                    }
+                };
+                let token = next_flow_token(PROTO_TAG_ICMP);
+                new_icmp_fd = Some(sock.as_raw_fd());
+                new_token = token;
+                match vacant.insert(FlowEntry::IcmpEcho(IcmpEchoEntry {
+                    sock,
+                    guest_id: ident,
+                    last_activity: Instant::now(),
+                    flow_token: token,
+                })) {
+                    FlowEntry::IcmpEcho(e) => e,
+                    _ => unreachable!(),
+                }
+            }
+        };
+        entry.last_activity = Instant::now();
+
+        if let Some(host_fd) = new_icmp_fd {
+            self.token_to_key.insert(new_token, flow_key);
+            if let Err(e) = self.epoll.register(host_fd, new_token, RegisterMode::Read) {
+                warn!(
+                    guest_id = key.guest_id,
+                    dst_ip = %key.dst_ip,
+                    fd = host_fd,
+                    error = %e,
+                    "SLIRP ICMP: epoll register failed; flow present but readiness-driven relay disabled"
+                );
+            }
+            self.epoll_waker.wake();
+        }
+
+        // Build a wire ICMP echo packet with seq + data; the kernel will
+        // rewrite the ident on send_to.
+        let req = Icmpv4Repr::EchoRequest {
+            ident: 0, // kernel rewrites
+            seq_no,
+            data: &data_owned,
+        };
+        let mut buf = vec![0u8; req.buffer_len()];
+        let mut pkt = Icmpv4Packet::new_unchecked(&mut buf);
+        req.emit(&mut pkt, &Default::default());
+
+        let dst = SocketAddr::from((
+            Ipv4Addr::from(ipv4.dst_addr().0),
+            0u16, // port ignored for ICMP
+        ));
+        if let Err(e) = entry.sock.send_to(&buf, dst) {
+            trace!("SLIRP ICMP: send_to failed: {e}");
+        }
+        Ok(())
+    }
+
     // ── TCP NAT ─────────────────────────────────────────────────────
 
     fn handle_tcp_frame(&mut self, ipv4: &Ipv4Packet<&[u8]>) -> Result<()> {
@@ -711,28 +1457,40 @@ impl SlirpStack {
                 src_ip, src_port, dst_ip, dst_port
             );
 
-            // Check deny list before connecting
-            if self.is_denied(&dst_ip) {
-                warn!(
-                    "SLIRP TCP: connection to {}:{} denied by network deny list",
-                    dst_ip, dst_port
-                );
-                let rst = build_tcp_packet_static(
-                    dst_ip,
-                    SLIRP_GUEST_IP,
-                    dst_port,
-                    src_port,
-                    0,
-                    seq + 1,
-                    TcpControl::Rst,
-                    &[],
-                );
-                self.inject_to_guest.push(rst);
-                return Ok(());
-            }
+            // Unified outbound translation: combines the gateway-loopback
+            // rewrite + deny-list check in one pure-function call. Returns None if
+            // the dst is denied; on Some, the SocketAddr already has the right
+            // host IP (loopback for the gateway, original for everything else).
+            let dst_addr =
+                match nat::translate_outbound(&self.nat, dst_ip, dst_port, SLIRP_GATEWAY_IP) {
+                    Some(addr) => addr,
+                    None => {
+                        warn!(
+                            "SLIRP TCP: connection to {}:{} denied by network deny list",
+                            dst_ip, dst_port
+                        );
+                        let rst = build_tcp_packet_static(
+                            dst_ip,
+                            SLIRP_GUEST_IP,
+                            dst_port,
+                            src_port,
+                            0,
+                            seq + 1,
+                            TcpControl::Rst,
+                            &[],
+                        );
+                        self.inject_to_guest.push(rst);
+                        return Ok(());
+                    }
+                };
 
-            // Check max concurrent connections
-            if self.tcp_nat.len() >= self.max_concurrent_connections {
+            let mut tcp_flow_count = 0;
+            for flow_key in self.flow_table.keys() {
+                if let FlowKey::Tcp(_) = flow_key {
+                    tcp_flow_count += 1;
+                }
+            }
+            if tcp_flow_count >= self.max_concurrent_connections {
                 warn!(
                     "SLIRP TCP: max concurrent connections ({}) reached, rejecting SYN to {}:{}",
                     self.max_concurrent_connections, dst_ip, dst_port
@@ -771,34 +1529,44 @@ impl SlirpStack {
                 return Ok(());
             }
 
-            // Remove any stale entry with the same key
-            self.tcp_nat.remove(&key);
-
-            // Create host TCP connection.
-            // Map the SLIRP gateway IP (10.0.2.2) to localhost so the guest
-            // can reach host services (e.g. Ollama at localhost:11434).
-            let host_ip = if dst_ip == SLIRP_GATEWAY_IP {
-                std::net::Ipv4Addr::new(127, 0, 0, 1)
-            } else {
-                std::net::Ipv4Addr::new(dst_ip.0[0], dst_ip.0[1], dst_ip.0[2], dst_ip.0[3])
-            };
-            let addr = SocketAddr::new(std::net::IpAddr::V4(host_ip), dst_port);
+            // Remove any stale entry with the same key, unregistering its FD
+            // from the epoll set to avoid a dangling registration.
+            if let Some(FlowEntry::Tcp(stale)) = self.flow_table.get(&FlowKey::Tcp(key)) {
+                self.token_to_key.remove(&stale.flow_token);
+                self.epoll.unregister(stale.host_stream.as_raw_fd()).ok();
+            }
+            self.flow_table.remove(&FlowKey::Tcp(key));
 
-            match TcpStream::connect_timeout(&addr, Duration::from_secs(3)) {
+            // Connect to the host address resolved by translate_outbound above.
+            match TcpStream::connect_timeout(&dst_addr, Duration::from_secs(3)) {
                 Ok(stream) => {
                     stream.set_nonblocking(true).ok();
+                    let host_fd = stream.as_raw_fd();
                     let our_seq: u32 = rand_seq();
+                    let token = next_flow_token(PROTO_TAG_TCP);
+                    let flow_key = FlowKey::Tcp(key);
                     let entry = TcpNatEntry {
                         host_stream: stream,
                         state: TcpNatState::SynReceived,
                         our_seq,
                         guest_ack: seq + 1,
-                        to_guest: Vec::new(),
-                        to_host: Vec::new(),
-                        to_host_pending_ack: None,
                         last_activity: Instant::now(),
+                        bytes_in_flight: 0,
+                        flow_token: token,
                     };
-                    self.tcp_nat.insert(key.clone(), entry);
+                    self.flow_table.insert(flow_key, FlowEntry::Tcp(entry));
+                    self.token_to_key.insert(token, flow_key);
+                    if let Err(e) = self.epoll.register(host_fd, token, RegisterMode::Read) {
+                        warn!(
+                            guest_src_port = key.guest_src_port,
+                            dst_ip = %key.dst_ip,
+                            dst_port = key.dst_port,
+                            fd = host_fd,
+                            error = %e,
+                            "SLIRP TCP: epoll register failed; flow present but readiness-driven relay disabled"
+                        );
+                    }
+                    self.epoll_waker.wake();
 
                     // Send SYN-ACK back to guest
                     let syn_ack = build_tcp_packet_static(
@@ -837,22 +1605,59 @@ impl SlirpStack {
         }
 
         // Look up existing connection
-        let entry = match self.tcp_nat.get_mut(&key) {
-            Some(e) => e,
-            None => {
-                trace!(
-                    "SLIRP TCP: no NAT entry for {}:{} -> {}:{}",
-                    src_ip,
-                    src_port,
-                    dst_ip,
-                    dst_port
-                );
-                return Ok(());
-            }
+        let flow_key = FlowKey::Tcp(key);
+        let Some(FlowEntry::Tcp(entry)) = self.flow_table.get_mut(&flow_key) else {
+            trace!(
+                "SLIRP TCP: no NAT entry for {}:{} -> {}:{}",
+                src_ip,
+                src_port,
+                dst_ip,
+                dst_port
+            );
+            return Ok(());
         };
 
+        // Track whether this processing path sets state=Closed so we can
+        // enqueue the key in pending_close once the entry borrow ends.
+        // FIN/RST paths push to pending_close and return early; mid-function
+        // error paths (ACK-driven read failure, write failure) set this flag.
+        let mut closed_by_error = false;
+
         entry.last_activity = Instant::now();
 
+        // Inbound port-forward: guest's SYN-ACK completing the host-initiated
+        // 3-way handshake.  We synthesized a SYN to the guest (5.5b.2/5.5b.3);
+        // the guest's kernel accepted it and replied with SYN+ACK.  Send an ACK
+        // back so the guest's TCP stack transitions to Established on its side,
+        // then record our state as Established too.
+        //
+        // NatKey for the inbound flow: guest_src_port = guest service port,
+        // dst_ip = SLIRP_GATEWAY_IP, dst_port = the ephemeral high port we
+        // used as the SYN's source port.  The ACK frame therefore flows
+        // src=SLIRP_GATEWAY_IP:dst_port → dst=SLIRP_GUEST_IP:guest_src_port.
+        if entry.state == TcpNatState::SynSent && tcp.syn() && tcp.ack() {
+            let ack_frame = build_tcp_packet_static(
+                SLIRP_GATEWAY_IP,              // src_ip  — the "host" side of the forward
+                SLIRP_GUEST_IP,                // dst_ip  — the guest
+                key.dst_port, // src_port — high ephemeral port we sent the SYN from
+                key.guest_src_port, // dst_port — the guest's service port
+                entry.our_seq.wrapping_add(1), // seq — our ISN + 1 (SYN consumed one)
+                tcp.seq_number().0.wrapping_add(1) as u32, // ack — guest ISN + 1
+                TcpControl::None,
+                &[],
+            );
+            self.inject_to_guest.push(ack_frame);
+            entry.our_seq = entry.our_seq.wrapping_add(1);
+            entry.guest_ack = tcp.seq_number().0.wrapping_add(1) as u32;
+            entry.state = TcpNatState::Established;
+            trace!(
+                "SLIRP TCP: inbound 3WH complete for guest_port={} high_port={}, → Established",
+                key.guest_src_port,
+                key.dst_port
+            );
+            return Ok(());
+        }
+
         // ACK (completing handshake or acknowledging data)
         if tcp.ack() && entry.state == TcpNatState::SynReceived {
             entry.state = TcpNatState::Established;
@@ -864,50 +1669,109 @@ impl SlirpStack {
             );
         }
 
+        // ACK-driven consume: when the guest acknowledges data we sent via
+        // peek-based relay (Task 3.3), read those bytes from the kernel recv
+        // buffer to advance the kernel's read pointer.  Without this step the
+        // kernel buffer fills up and recv_peek keeps returning the same bytes.
+        //
+        // Only runs in Established state — the SynReceived ACK above does not
+        // carry data acknowledgements from us yet (bytes_in_flight == 0 then).
+        if tcp.ack() && entry.state == TcpNatState::Established && entry.bytes_in_flight > 0 {
+            // segment_ack: what the guest is now confirming it has received
+            // from us (our send-side sequence space).
+            let segment_ack: u32 = tcp.ack_number().0 as u32;
+
+            // last_sent_acked: the highest our-seq the guest had already
+            // confirmed before this segment.  `our_seq` is the *next* byte we
+            // would send, so subtracting bytes_in_flight gives the start of the
+            // in-flight window.
+            // All arithmetic is wrapping — TCP sequence numbers wrap at 2^32.
+            let last_sent_acked: u32 = entry.our_seq.wrapping_sub(entry.bytes_in_flight);
+
+            // acked_bytes: how many new bytes the guest acknowledged in this
+            // segment.  Guards:
+            //   > 0   — ACK actually advances (not a duplicate or stale ACK)
+            //   <= bytes_in_flight — guest cannot ack more than we've sent
+            //   (defends against malformed / spoofed ACKs from a guest)
+            let acked_bytes: u32 = segment_ack.wrapping_sub(last_sent_acked);
+
+            if acked_bytes > 0 && acked_bytes <= entry.bytes_in_flight {
+                let mut sink = [0u8; 65536];
+                let mut to_drain = acked_bytes as usize;
+                let mut drained: u32 = 0;
+                while to_drain > 0 {
+                    let want = to_drain.min(sink.len());
+                    match entry.host_stream.read(&mut sink[..want]) {
+                        Ok(0) => break, // EOF — nothing more to drain
+                        Ok(n) => {
+                            to_drain -= n;
+                            drained = drained.wrapping_add(n as u32);
+                        }
+                        Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => break,
+                        Err(e) => {
+                            warn!(
+                                "SLIRP TCP: ACK-driven read failed on flow guest_port={}, marking Closed: {}",
+                                key.guest_src_port, e
+                            );
+                            entry.state = TcpNatState::Closed;
+                            closed_by_error = true;
+                            break;
+                        }
+                    }
+                }
+                entry.bytes_in_flight = entry.bytes_in_flight.wrapping_sub(drained);
+                trace!(
+                    "SLIRP TCP: ACK consumed {} bytes from kernel (in_flight now={}, segment_ack={})",
+                    drained, entry.bytes_in_flight, segment_ack
+                );
+            }
+        }
+
         let payload = tcp.payload();
         if !payload.is_empty() && entry.state == TcpNatState::Established {
-            let new_ack = seq.wrapping_add(payload.len() as u32);
-
-            if entry.to_host.is_empty() {
-                match entry.host_stream.write(payload) {
-                    Ok(n) if n == payload.len() => {
-                        entry.guest_ack = new_ack;
-                        let ack_frame = build_tcp_packet_static(
-                            dst_ip,
-                            SLIRP_GUEST_IP,
-                            dst_port,
-                            src_port,
-                            entry.our_seq,
-                            entry.guest_ack,
-                            TcpControl::None,
-                            &[],
-                        );
-                        self.inject_to_guest.push(ack_frame);
-                    }
-                    Ok(n) => {
-                        entry.to_host.extend_from_slice(&payload[n..]);
-                        entry.to_host_pending_ack = Some(new_ack);
-                        entry.guest_ack = seq.wrapping_add(n as u32);
-                        entry.last_activity = Instant::now();
-                    }
-                    Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => {
-                        entry.to_host.extend_from_slice(payload);
-                        entry.to_host_pending_ack = Some(new_ack);
-                        entry.last_activity = Instant::now();
-                    }
-                    Err(e) => {
-                        warn!("SLIRP TCP: write to host failed: {}", e);
-                        entry.state = TcpNatState::Closed;
-                    }
+            // Guest→host backpressure: rely on the kernel's send buffer + TCP
+            // retransmit.  ACK only the bytes the kernel accepted right now;
+            // on WouldBlock, don't ACK at all and let the guest retransmit.
+            // No userspace buffering, no fixed byte-cap on in-flight data.
+            let payload_seq = seq;
+            let n_written = match entry.host_stream.write(payload) {
+                Ok(n) => n,
+                Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => 0,
+                Err(e) => {
+                    warn!(
+                        "SLIRP TCP: write to host failed on flow guest_port={}, marking Closed: {}",
+                        key.guest_src_port, e
+                    );
+                    entry.state = TcpNatState::Closed;
+                    // entry last used above; borrow ends here before pending_close push.
+                    self.pending_close.push(flow_key);
+                    return Ok(());
                 }
-            } else if entry.to_host.len() + payload.len() <= MAX_TO_HOST_BUFFER {
-                entry.to_host.extend_from_slice(payload);
-                entry.to_host_pending_ack = Some(new_ack);
-                entry.last_activity = Instant::now();
-            } else {
-                warn!("SLIRP TCP: to_host buffer full, dropping connection");
-                entry.state = TcpNatState::Closed;
+            };
+
+            if n_written > 0 {
+                let ack_seq = payload_seq.wrapping_add(n_written as u32);
+                entry.guest_ack = ack_seq;
+                let ack_frame = build_tcp_packet_static(
+                    dst_ip,
+                    SLIRP_GUEST_IP,
+                    dst_port,
+                    src_port,
+                    entry.our_seq,
+                    entry.guest_ack,
+                    TcpControl::None,
+                    &[],
+                );
+                self.inject_to_guest.push(ack_frame);
+                trace!(
+                    "SLIRP TCP guest→host: wrote {}/{} bytes, ACK={}",
+                    n_written,
+                    payload.len(),
+                    ack_seq
+                );
             }
+            // else: kernel send buffer full (WouldBlock) — don't ACK.
+            // Guest TCP will retransmit; kernel buffer drains over time.
         }
 
         // FIN from guest
@@ -927,45 +1791,115 @@ impl SlirpStack {
             self.inject_to_guest.push(fin_ack_frame);
             entry.our_seq = entry.our_seq.wrapping_add(1);
             entry.state = TcpNatState::Closed;
+            // entry last used above; borrow ends before pending_close push.
+            self.pending_close.push(flow_key);
+            return Ok(());
         }
 
         // RST from guest
         if tcp.rst() {
             debug!("SLIRP TCP: RST from guest for {}:{}", dst_ip, dst_port);
             entry.state = TcpNatState::Closed;
+            // entry last used above; borrow ends before pending_close push.
+            self.pending_close.push(flow_key);
+            return Ok(());
+        }
+
+        // ACK-driven read failure marked the entry Closed but execution
+        // continues here (no early return). Push to pending_close so
+        // relay_tcp_nat_data removes the flow without an O(n) sweep.
+        if closed_by_error {
+            self.pending_close.push(flow_key);
         }
 
         Ok(())
     }
 
-    /// Relay data from host TCP connections to guest
-    fn relay_tcp_nat_data(&mut self) {
-        let mut to_remove = Vec::new();
+    /// Relay data from host TCP connections to guest, driven by epoll readiness.
+    ///
+    /// Closed flows enqueued by handle_tcp_frame (FIN/RST) are drained from
+    /// `pending_close` and removed promptly. Idle-timeout detection iterates
+    /// only the flow table entries directly, avoiding a separate Vec allocation.
+    /// Data relay is restricted to flows with an EPOLLIN event in `ready`.
+    fn relay_tcp_nat_data(&mut self, ready: &[EpollEvent]) {
         // Collect frames to inject (built separately to avoid borrow issues)
         let mut frames_to_inject: Vec<Vec<u8>> = Vec::new();
 
-        for (key, entry) in self.tcp_nat.iter_mut() {
-            if entry.state == TcpNatState::Closed {
-                to_remove.push(key.clone());
-                continue;
+        // Seed removal set from flows already marked Closed by handle_tcp_frame
+        // (FIN/RST path) via the pending_close queue. HashSet gives O(1)
+        // membership checks in the idle-timeout sweep and readiness filter below,
+        // avoiding the O(n*k) cost of Vec::contains under connection churn.
+        let mut to_remove_set: std::collections::HashSet<FlowKey> =
+            std::mem::take(&mut self.pending_close)
+                .into_iter()
+                .collect();
+
+        // Idle-timeout sweep: scan flow_table once without collecting a
+        // separate key Vec. 300-second inactivity applies regardless of epoll
+        // readiness; this is O(n) in the number of TCP flows.
+        const TCP_IDLE_TIMEOUT: Duration = Duration::from_secs(300);
+        for (flow_key, entry) in &self.flow_table {
+            if let FlowEntry::Tcp(tcp_entry) = entry {
+                if tcp_entry.last_activity.elapsed() > TCP_IDLE_TIMEOUT {
+                    to_remove_set.insert(*flow_key);
+                }
             }
-            if entry.last_activity.elapsed() > Duration::from_secs(300) {
-                to_remove.push(key.clone());
+        }
+
+        let mut tcp_flow_keys: Vec<FlowKey> = Vec::new();
+        for event in ready {
+            if !event.readable || event.token & PROTO_TAG_MASK != PROTO_TAG_TCP {
                 continue;
             }
-            if entry.state != TcpNatState::Established {
+            let Some(flow_key) = self.token_to_key.get(&event.token).copied() else {
+                continue;
+            };
+            if to_remove_set.contains(&flow_key) {
                 continue;
             }
+            tcp_flow_keys.push(flow_key);
+        }
 
-            if !entry.to_host.is_empty() {
-                match entry.host_stream.write(&entry.to_host) {
-                    Ok(n) => {
-                        entry.to_host.drain(..n);
-                        entry.last_activity = Instant::now();
-                        if entry.to_host.is_empty() {
-                            if let Some(ack) = entry.to_host_pending_ack.take() {
-                                entry.guest_ack = ack;
-                                let ack_frame = build_tcp_packet_static(
+        for flow_key in tcp_flow_keys {
+            let FlowKey::Tcp(key) = flow_key else {
+                continue;
+            };
+
+            let mut became_closed = false;
+            let mut fin_frame: Option<Vec<u8>> = None;
+
+            {
+                let Some(FlowEntry::Tcp(entry)) = self.flow_table.get_mut(&flow_key) else {
+                    continue;
+                };
+
+                if entry.state != TcpNatState::Established {
+                    continue;
+                }
+
+                // Host→guest path: peek what's in the kernel recv buffer
+                // without consuming. Send only the un-ACK'd portion (bytes past
+                // what we've already sent). The kernel's socket buffer holds the
+                // outstanding data; ACK-driven `read()` consumes it once the
+                // guest ACKs.
+                let mut peek_buf = [0u8; 65536];
+                match recv_peek(&entry.host_stream, &mut peek_buf) {
+                    Ok(0) => {
+                        // Host closed the connection. Send FIN to guest below.
+                        debug!(
+                            "SLIRP TCP: host EOF on flow guest_port={}, marking Closed",
+                            key.guest_src_port
+                        );
+                        entry.state = TcpNatState::Closed;
+                        became_closed = true;
+                    }
+                    Ok(peek_n) => {
+                        let in_flight = entry.bytes_in_flight as usize;
+                        if peek_n > in_flight {
+                            let new_bytes = &peek_buf[in_flight..peek_n];
+                            let mut sent_total: usize = 0;
+                            for chunk in new_bytes.chunks(MTU - 54) {
+                                let frame = build_tcp_packet_static(
                                     key.dst_ip,
                                     SLIRP_GUEST_IP,
                                     key.dst_port,
@@ -973,78 +1907,318 @@ impl SlirpStack {
                                     entry.our_seq,
                                     entry.guest_ack,
                                     TcpControl::None,
-                                    &[],
+                                    chunk,
                                 );
-                                frames_to_inject.push(ack_frame);
+                                frames_to_inject.push(frame);
+                                entry.our_seq = entry.our_seq.wrapping_add(chunk.len() as u32);
+                                entry.bytes_in_flight =
+                                    entry.bytes_in_flight.wrapping_add(chunk.len() as u32);
+                                sent_total += chunk.len();
                             }
+                            entry.last_activity = Instant::now();
+                            trace!(
+                                "SLIRP TCP relay: peeked {} bytes (in_flight before={}, sent now={})",
+                                peek_n,
+                                in_flight,
+                                sent_total
+                            );
                         }
+                        // else: kernel buffer holds only already-in-flight bytes.
+                        // Wait for guest ACK before sending more (Task 3.4).
+                    }
+                    Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => {
+                        // Kernel recv buffer empty; nothing to do this poll.
                     }
-                    Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => {}
                     Err(e) => {
-                        warn!("SLIRP TCP: buffered write to host failed: {}", e);
+                        warn!(
+                            "SLIRP TCP: recv_peek failed on flow guest_port={}, marking Closed: {}",
+                            key.guest_src_port, e
+                        );
                         entry.state = TcpNatState::Closed;
-                        continue;
+                        became_closed = true;
                     }
                 }
-            }
 
-            // Read from host
-            let mut buf = [0u8; 16384];
-            match entry.host_stream.read(&mut buf) {
-                Ok(0) => {
-                    debug!("SLIRP TCP: host closed for {}:{}", key.dst_ip, key.dst_port);
-                    entry.state = TcpNatState::Closed;
-                }
-                Ok(n) => {
-                    entry.to_guest.extend_from_slice(&buf[..n]);
-                    entry.last_activity = Instant::now();
-                }
-                Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => {}
-                Err(e) => {
-                    trace!("SLIRP TCP: host read error: {}", e);
-                    entry.state = TcpNatState::Closed;
+                // FIN if host closed
+                if entry.state == TcpNatState::Closed {
+                    fin_frame = Some(build_tcp_packet_static(
+                        key.dst_ip,
+                        SLIRP_GUEST_IP,
+                        key.dst_port,
+                        key.guest_src_port,
+                        entry.our_seq,
+                        entry.guest_ack,
+                        TcpControl::Fin,
+                        &[],
+                    ));
                 }
+            } // entry borrow ends here
+
+            if let Some(fin) = fin_frame {
+                frames_to_inject.push(fin);
+            }
+            // Queue for removal so the cleanup loop below can unregister + drop.
+            if became_closed {
+                to_remove_set.insert(flow_key);
             }
+        }
 
-            // Build data frames for guest
-            while !entry.to_guest.is_empty() && entry.state == TcpNatState::Established {
-                let chunk_size = entry.to_guest.len().min(MTU - 54);
-                let chunk: Vec<u8> = entry.to_guest.drain(..chunk_size).collect();
-                let frame = build_tcp_packet_static(
-                    key.dst_ip,
-                    SLIRP_GUEST_IP,
-                    key.dst_port,
-                    key.guest_src_port,
-                    entry.our_seq,
-                    entry.guest_ack,
-                    TcpControl::None,
-                    &chunk,
-                );
-                entry.our_seq = entry.our_seq.wrapping_add(chunk.len() as u32);
-                frames_to_inject.push(frame);
+        self.inject_to_guest.append(&mut frames_to_inject);
+
+        for flow_key in to_remove_set {
+            if let Some(FlowEntry::Tcp(entry)) = self.flow_table.get(&flow_key) {
+                self.token_to_key.remove(&entry.flow_token);
+                self.epoll.unregister(entry.host_stream.as_raw_fd()).ok();
             }
+            self.flow_table.remove(&flow_key);
+        }
+    }
 
-            // FIN if host closed
-            if entry.state == TcpNatState::Closed {
-                let fin = build_tcp_packet_static(
-                    key.dst_ip,
-                    SLIRP_GUEST_IP,
-                    key.dst_port,
-                    key.guest_src_port,
-                    entry.our_seq,
-                    entry.guest_ack,
-                    TcpControl::Fin,
-                    &[],
-                );
-                frames_to_inject.push(fin);
+    /// Drain replies from each active ICMP echo socket and emit echo-reply
+    /// frames to the guest, driven by epoll readiness.
+    ///
+    /// Only flows whose token appears in `ready` with EPOLLIN set are visited.
+    /// Entries idle longer than `ICMP_IDLE_TIMEOUT` are still evicted on any
+    /// readiness event for that flow.
+    fn relay_icmp_echo(&mut self, ready: &[EpollEvent]) {
+        const ICMP_IDLE_TIMEOUT: Duration = Duration::from_secs(60);
+        let now = Instant::now();
+
+        let mut ready_flow_keys: Vec<FlowKey> = Vec::new();
+        for event in ready {
+            if !event.readable || event.token & PROTO_TAG_MASK != PROTO_TAG_ICMP {
+                continue;
             }
+            let Some(flow_key) = self.token_to_key.get(&event.token).copied() else {
+                continue;
+            };
+            ready_flow_keys.push(flow_key);
         }
 
-        self.inject_to_guest.append(&mut frames_to_inject);
+        // Mirrors the TCP idle-timeout sweep so ICMP sockets do not accumulate
+        // indefinitely when the ping target goes silent.
+        let mut icmp_to_remove: std::collections::HashSet<FlowKey> =
+            std::collections::HashSet::new();
+        for (flow_key, entry) in &self.flow_table {
+            let FlowKey::IcmpEcho(_) = flow_key else {
+                continue;
+            };
+            let FlowEntry::IcmpEcho(icmp_entry) = entry else {
+                continue;
+            };
+            if now.duration_since(icmp_entry.last_activity) > ICMP_IDLE_TIMEOUT {
+                icmp_to_remove.insert(*flow_key);
+            }
+        }
+
+        for flow_key in &ready_flow_keys {
+            // Skip if already in remove set (idle-timeout caught it first).
+            // O(1) via HashSet, not O(k) Vec::contains.
+            if icmp_to_remove.contains(flow_key) {
+                continue;
+            }
+            let FlowKey::IcmpEcho(key) = *flow_key else {
+                continue;
+            };
+            let frame = {
+                let Some(FlowEntry::IcmpEcho(entry)) = self.flow_table.get_mut(flow_key) else {
+                    continue;
+                };
+                let mut buf = [0u8; 1500];
+                match entry.sock.recv_from(&mut buf) {
+                    Ok((n, _addr)) => {
+                        entry.last_activity = now;
+                        Self::build_icmp_echo_reply_to_guest(key.dst_ip, entry.guest_id, &buf[..n])
+                    }
+                    Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => continue,
+                    Err(_) => continue,
+                }
+            };
+            if let Some(frame_bytes) = frame {
+                self.inject_to_guest.push(frame_bytes);
+            }
+        }
+
+        for flow_key in icmp_to_remove {
+            if let Some(FlowEntry::IcmpEcho(e)) = self.flow_table.get(&flow_key) {
+                self.token_to_key.remove(&e.flow_token);
+                self.epoll.unregister(e.sock.as_raw_fd()).ok();
+            }
+            self.flow_table.remove(&flow_key);
+        }
+    }
+
+    /// Build an Ethernet/IPv4/ICMP echo-reply frame addressed to the guest.
+    ///
+    /// `src_ip` is the original ping destination (becomes the reply source).
+    /// `guest_id` is the ICMP identifier to write into the reply so the guest
+    /// can match it against its outstanding echo request.
+    /// `raw_icmp` is the raw ICMP packet received from the host kernel via
+    /// the `SOCK_DGRAM IPPROTO_ICMP` socket (no IP header; ICMP type + code +
+    /// checksum + payload).
+    ///
+    /// Returns `Some(frame)` on success, `None` if the packet cannot be parsed
+    /// or is not an `EchoReply`.
+    fn build_icmp_echo_reply_to_guest(
+        src_ip: Ipv4Address,
+        guest_id: u16,
+        raw_icmp: &[u8],
+    ) -> Option<Vec<u8>> {
+        let icmp = Icmpv4Packet::new_checked(raw_icmp).ok()?;
+        let parsed = Icmpv4Repr::parse(&icmp, &Default::default()).ok()?;
+        // Copy the payload before `icmp` / `parsed` go out of scope so we can
+        // build the outgoing `EchoReply` with a fresh borrow.  Mirrors the
+        // same pattern used in `handle_icmp_frame` (Task 1.2).
+        let (seq_no, data_owned) = match parsed {
+            Icmpv4Repr::EchoReply { seq_no, data, .. } => (seq_no, data.to_vec()),
+            _ => return None,
+        };
+        let reply = Icmpv4Repr::EchoReply {
+            ident: guest_id,
+            seq_no,
+            data: &data_owned,
+        };
+        let ip_repr = Ipv4Repr {
+            src_addr: src_ip,
+            dst_addr: SLIRP_GUEST_IP,
+            next_header: IpProtocol::Icmp,
+            payload_len: reply.buffer_len(),
+            hop_limit: 64,
+        };
+        let eth_repr = EthernetRepr {
+            src_addr: EthernetAddress(GATEWAY_MAC),
+            dst_addr: EthernetAddress(GUEST_MAC),
+            ethertype: EthernetProtocol::Ipv4,
+        };
+        let total = 14 + ip_repr.buffer_len() + reply.buffer_len();
+        let mut buf = vec![0u8; total];
+        let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+        eth_repr.emit(&mut eth);
+        let mut ip = Ipv4Packet::new_unchecked(&mut buf[14..]);
+        ip_repr.emit(&mut ip, &Default::default());
+        let mut icmp_out = Icmpv4Packet::new_unchecked(&mut buf[14 + ip_repr.buffer_len()..]);
+        reply.emit(&mut icmp_out, &Default::default());
+        Some(buf)
+    }
+
+    /// Drain replies from each active UDP flow socket and emit UDP frames to
+    /// the guest, driven by epoll readiness.
+    ///
+    /// Only flows whose token appears in `ready` with EPOLLIN set are visited.
+    /// Idle-timeout reaping still runs every call: the reap scan is cheap
+    /// (skips flows not in `ready`) and ensures stale entries are eventually
+    /// evicted even when no new data arrives.
+    ///
+    /// Reply addressing mirrors the original guest datagram in reverse: the
+    /// frame's IP source is the original destination (`key.dst_ip`) and UDP
+    /// source port is `key.dst_port`; the destination is the guest IP and
+    /// `key.guest_src_port`.
+    fn relay_udp_flows(&mut self, ready: &[EpollEvent]) {
+        let now = Instant::now();
+        // Per-flow connected sockets are closed by Drop when the entry leaves
+        // flow_table.
+        let mut stale: Vec<FlowKey> = Vec::new();
+        for (flow_key, entry) in &self.flow_table {
+            let FlowKey::Udp(_) = flow_key else { continue };
+            let FlowEntry::Udp(udp_entry) = entry else {
+                continue;
+            };
+            if now.duration_since(udp_entry.last_activity) > UDP_IDLE_TIMEOUT {
+                stale.push(*flow_key);
+            }
+        }
+        for flow_key in stale {
+            if let Some(FlowEntry::Udp(entry)) = self.flow_table.get(&flow_key) {
+                self.token_to_key.remove(&entry.flow_token);
+                self.epoll.unregister(entry.sock.as_raw_fd()).ok();
+            }
+            self.flow_table.remove(&flow_key);
+        }
 
-        for key in to_remove {
-            self.tcp_nat.remove(&key);
+        let mut flow_keys: Vec<FlowKey> = Vec::new();
+        for event in ready {
+            if !event.readable || event.token & PROTO_TAG_MASK != PROTO_TAG_UDP {
+                continue;
+            }
+            let Some(flow_key) = self.token_to_key.get(&event.token).copied() else {
+                continue;
+            };
+            flow_keys.push(flow_key);
         }
+        for flow_key in flow_keys {
+            let FlowKey::Udp(key) = flow_key else {
+                continue;
+            };
+            let frame = {
+                let Some(FlowEntry::Udp(entry)) = self.flow_table.get_mut(&flow_key) else {
+                    continue;
+                };
+                let mut buf = [0u8; 1500];
+                match entry.sock.recv(&mut buf) {
+                    Ok(n) => {
+                        entry.last_activity = now;
+                        Self::build_udp_reply_to_guest(
+                            key.dst_ip,
+                            key.dst_port,
+                            key.guest_src_port,
+                            &buf[..n],
+                        )
+                    }
+                    Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => continue,
+                    Err(_) => continue,
+                }
+            };
+            if let Some(frame_bytes) = frame {
+                self.inject_to_guest.push(frame_bytes);
+            }
+        }
+    }
+
+    /// Build an Ethernet/IPv4/UDP frame addressed to the guest, carrying a
+    /// reply from a host-side UDP flow socket.
+    ///
+    /// - `src_ip` — original destination IP (becomes the reply source address).
+    /// - `src_port` — original destination port (becomes the reply source port).
+    /// - `dst_port` — guest's ephemeral source port (becomes the reply destination).
+    /// - `payload` — raw UDP payload received from the host socket.
+    ///
+    /// Returns `Some(frame)` on success.  Currently infallible, but wrapped in
+    /// `Option` for symmetry with [`build_icmp_echo_reply_to_guest`].
+    fn build_udp_reply_to_guest(
+        src_ip: Ipv4Address,
+        src_port: u16,
+        dst_port: u16,
+        payload: &[u8],
+    ) -> Option<Vec<u8>> {
+        let udp_repr = UdpRepr { src_port, dst_port };
+        let ip_repr = Ipv4Repr {
+            src_addr: src_ip,
+            dst_addr: SLIRP_GUEST_IP,
+            next_header: IpProtocol::Udp,
+            payload_len: 8 + payload.len(),
+            hop_limit: 64,
+        };
+        let eth_repr = EthernetRepr {
+            src_addr: EthernetAddress(GATEWAY_MAC),
+            dst_addr: EthernetAddress(GUEST_MAC),
+            ethertype: EthernetProtocol::Ipv4,
+        };
+        let total = 14 + ip_repr.buffer_len() + 8 + payload.len();
+        let mut buf = vec![0u8; total];
+        let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+        eth_repr.emit(&mut eth);
+        let mut ip = Ipv4Packet::new_unchecked(&mut buf[14..]);
+        ip_repr.emit(&mut ip, &Default::default());
+        let mut udp = UdpPacket::new_unchecked(&mut buf[14 + ip_repr.buffer_len()..]);
+        udp_repr.emit(
+            &mut udp,
+            &IpAddress::Ipv4(src_ip),
+            &IpAddress::Ipv4(SLIRP_GUEST_IP),
+            payload.len(),
+            |b| b.copy_from_slice(payload),
+            &Default::default(),
+        );
+        Some(buf)
     }
 
     // ── Packet building helpers ──────────────────────────────────────
@@ -1097,6 +2271,45 @@ impl SlirpStack {
 
         buf
     }
+
+    /// Push events from the net-poll thread into this backend's per-tick
+    /// event queue. Called from net_poll_thread after each successful
+    /// epoll_wait, while holding no other lock.
+    ///
+    /// drain_to_guest drains this queue with a brief uncontended lock
+    /// instead of re-entering EpollDispatch (which the net-poll thread
+    /// holds for the full 50 ms of the blocking wait).
+    pub fn push_ready_events(&self, events: &[EpollEvent]) {
+        // First push from net_poll_thread flips the flag so drain_to_guest
+        // skips its non-blocking-poll fallback.  Stays set for the
+        // backend's lifetime — net_poll_thread doesn't disappear mid-run.
+        self.has_external_poller.store(true, Ordering::Relaxed);
+        if events.is_empty() {
+            return;
+        }
+        let mut queue = self.pending_events.lock().unwrap();
+        queue.extend_from_slice(events);
+    }
+}
+
+impl NetworkBackend for SlirpBackend {
+    fn process_guest_frame(&mut self, frame: &[u8]) -> io::Result<()> {
+        SlirpBackend::process_guest_frame(self, frame).map_err(|e| io::Error::other(e.to_string()))
+    }
+
+    fn drain_to_guest(&mut self, out: &mut Vec<Vec<u8>>) {
+        SlirpBackend::drain_to_guest(self, out)
+    }
+
+    #[cfg(target_os = "linux")]
+    fn epoll_arc(&self) -> Option<std::sync::Arc<crate::network::epoll_dispatch::EpollDispatch>> {
+        Some(std::sync::Arc::clone(&self.epoll))
+    }
+
+    #[cfg(target_os = "linux")]
+    fn push_ready_events(&self, events: &[crate::network::epoll_dispatch::EpollEvent]) {
+        SlirpBackend::push_ready_events(self, events)
+    }
 }
 
 /// Build a TCP packet (free function to avoid borrow issues with &self methods)
@@ -1163,6 +2376,49 @@ fn build_tcp_packet_static(
     buf
 }
 
+/// Build a synthetic TCP SYN frame from the SLIRP gateway to the guest,
+/// used for inbound port-forwarding.
+///
+/// The frame mirrors what the guest would see from a real TCP client:
+/// - src: `SLIRP_GATEWAY_IP:high_port`
+/// - dst: `SLIRP_GUEST_IP:guest_port`
+/// - control: `TcpControl::Syn`
+/// - seq: caller-supplied `our_seq` (the host's chosen ISN for this flow)
+/// - ack: 0 (no piggybacked ACK on the initial SYN)
+///
+/// Caller pushes the returned bytes into `inject_to_guest`. The guest's
+/// kernel sees an inbound TCP SYN, routes it to whatever's bound at
+/// `guest_port`, and emits a SYN-ACK that `handle_tcp_frame` matches
+/// to the seeded `SynSent` flow_table entry (5.5b.1).
+#[cfg(any(test, feature = "bench-helpers"))]
+pub fn synthesize_inbound_syn(high_port: u16, guest_port: u16, our_seq: u32) -> Vec<u8> {
+    build_tcp_packet_static(
+        SLIRP_GATEWAY_IP,
+        SLIRP_GUEST_IP,
+        high_port,
+        guest_port,
+        our_seq,
+        0,
+        TcpControl::Syn,
+        &[],
+    )
+}
+
+#[cfg(not(any(test, feature = "bench-helpers")))]
+#[allow(dead_code)] // consumed in 5.5b.3
+fn synthesize_inbound_syn(high_port: u16, guest_port: u16, our_seq: u32) -> Vec<u8> {
+    build_tcp_packet_static(
+        SLIRP_GATEWAY_IP,
+        SLIRP_GUEST_IP,
+        high_port,
+        guest_port,
+        our_seq,
+        0,
+        TcpControl::Syn,
+        &[],
+    )
+}
+
 // ── Utility functions ────────────────────────────────────────────────
 
 fn rand_seq() -> u32 {
@@ -1195,9 +2451,247 @@ fn ipv4_checksum(header: &[u8]) -> u16 {
     !sum as u16
 }
 
-impl Default for SlirpStack {
+/// Bind one `TcpListener` per TCP port-forward rule, register each with
+/// `epoll`, and return a map from host port to `(listener, guest_port)`.
+///
+/// Rules whose bind or `set_nonblocking` calls fail are skipped with a
+/// `WARN` log; the returned map contains only the rules that succeeded.
+/// When `nat.port_forwards` contains no TCP rules the map is empty.
+pub(crate) fn bind_port_forward_listeners(
+    nat: &nat::Rules,
+    epoll: &Arc<EpollDispatch>,
+) -> HashMap<u16, (TcpListener, u16)> {
+    let mut listeners = HashMap::new();
+    for port_forward in &nat.port_forwards {
+        if port_forward.proto != nat::ForwardProto::Tcp {
+            continue;
+        }
+        let host_port = port_forward.host_port;
+        let guest_port = port_forward.guest_port;
+        let listener = match TcpListener::bind(("127.0.0.1", host_port)) {
+            Ok(l) => l,
+            Err(bind_error) => {
+                warn!(
+                    host_port,
+                    error = %bind_error,
+                    "SLIRP port-forward: bind failed, rule disabled"
+                );
+                continue;
+            }
+        };
+        if let Err(nb_error) = listener.set_nonblocking(true) {
+            warn!(
+                host_port,
+                error = %nb_error,
+                "SLIRP port-forward: set_nonblocking failed, rule disabled"
+            );
+            continue;
+        }
+        let token = flow_token_for_listener(host_port);
+        if let Err(reg_error) = epoll.register(listener.as_raw_fd(), token, RegisterMode::Read) {
+            warn!(
+                host_port,
+                error = %reg_error,
+                "SLIRP port-forward: epoll register failed, rule disabled"
+            );
+            continue;
+        }
+        debug!(
+            host_port,
+            guest_port, "SLIRP port-forward: listening on 127.0.0.1 (epoll-driven)"
+        );
+        listeners.insert(host_port, (listener, guest_port));
+    }
+    listeners
+}
+
+impl Default for SlirpBackend {
     fn default() -> Self {
-        Self::new().expect("Failed to create default SlirpStack")
+        Self::new().expect("Failed to create default SlirpBackend")
+    }
+}
+
+impl SlirpBackend {
+    /// Re-register every live host FD in `flow_table` with the current epoll
+    /// dispatcher.  Called from snapshot restore: `epoll_fd` is a kernel
+    /// handle that does not survive snapshot, so a fresh dispatcher starts
+    /// empty even though `flow_table` deserialized correctly with new FDs.
+    ///
+    /// The current snapshot path does not reconstruct `flow_table` — the
+    /// backend always starts empty after restore and new flows form naturally.
+    /// This method is therefore a no-op today but is wired in advance so
+    /// future work that persists restored flows across snapshot/restore has a
+    /// ready call site.
+    /// Re-register every live host FD in `flow_table` with the current epoll
+    /// dispatcher and rebuild `token_to_key`.  Called from snapshot restore:
+    /// the `epoll_fd` is a kernel handle that does not survive snapshot, so a
+    /// fresh dispatcher starts empty even though `flow_table` deserialized
+    /// correctly with new FDs.
+    ///
+    /// Each existing flow keeps its stored `flow_token` so that any
+    /// already-queued readiness events (unlikely post-restore, but safe) still
+    /// resolve correctly.  The `token_to_key` map is rebuilt from scratch
+    /// because it is in-memory-only state; it does not need to be persisted.
+    pub fn rebuild_epoll_from_flow_table(&mut self) {
+        use std::os::fd::AsRawFd;
+        self.token_to_key.clear();
+        for (flow_key, entry) in &self.flow_table {
+            match (flow_key, entry) {
+                (FlowKey::Tcp(_), FlowEntry::Tcp(e)) => {
+                    self.token_to_key.insert(e.flow_token, *flow_key);
+                    let _ = self.epoll.register(
+                        e.host_stream.as_raw_fd(),
+                        e.flow_token,
+                        RegisterMode::Read,
+                    );
+                }
+                (FlowKey::Udp(_), FlowEntry::Udp(e)) => {
+                    self.token_to_key.insert(e.flow_token, *flow_key);
+                    let _ =
+                        self.epoll
+                            .register(e.sock.as_raw_fd(), e.flow_token, RegisterMode::Read);
+                }
+                (FlowKey::IcmpEcho(_), FlowEntry::IcmpEcho(e)) => {
+                    self.token_to_key.insert(e.flow_token, *flow_key);
+                    let _ =
+                        self.epoll
+                            .register(e.sock.as_raw_fd(), e.flow_token, RegisterMode::Read);
+                }
+                _ => {}
+            }
+        }
+    }
+}
+
+/// Test-only helpers — not compiled into production builds.
+///
+/// These are `#[cfg(test)]`/`#[cfg(feature = "bench-helpers")]` methods on
+/// `SlirpBackend` that allow unit tests and divan benches to insert synthetic
+/// flow entries without widening the visibility of private types.
+/// The full behavioral contract for the SynSent → Established transition is
+/// pinned in the E2E test `tcp_inbound_syn_ack_completes_handshake` below and
+/// will be further exercised end-to-end in task 5.5b.5
+/// (`tcp_port_forward_inbound` in `tests/network_baseline.rs`).
+#[cfg(any(test, feature = "bench-helpers"))]
+impl SlirpBackend {
+    /// Insert a synthetic `SynSent` entry into the flow table.
+    ///
+    /// Used by `tcp_inbound_syn_ack_completes_handshake` to pre-seed the state
+    /// that would normally be created by `synthesize_inbound_syn` (5.5b.2).
+    ///
+    /// `guest_port`: the guest's listening service port (e.g. 8080).
+    /// `high_port`:  the ephemeral source port we used for the synthesized SYN.
+    /// `our_isn`:    the ISN we put in the synthesized SYN.
+    /// `host_stream`: a `TcpStream` representing the accepted host-side connection.
+    pub fn insert_synthetic_synsent_entry(
+        &mut self,
+        guest_port: u16,
+        high_port: u16,
+        our_isn: u32,
+        host_stream: TcpStream,
+    ) {
+        let key = NatKey {
+            guest_src_port: guest_port,
+            dst_ip: SLIRP_GATEWAY_IP,
+            dst_port: high_port,
+        };
+        let host_fd = host_stream.as_raw_fd();
+        let token = next_flow_token(PROTO_TAG_TCP);
+        let flow_key = FlowKey::Tcp(key);
+        let entry = TcpNatEntry {
+            host_stream,
+            state: TcpNatState::SynSent,
+            our_seq: our_isn,
+            guest_ack: 0,
+            last_activity: Instant::now(),
+            bytes_in_flight: 0,
+            flow_token: token,
+        };
+        self.flow_table.insert(flow_key, FlowEntry::Tcp(entry));
+        self.token_to_key.insert(token, flow_key);
+        // Skip epoll registration in test/bench contexts: the synthetic
+        // stream is already non-blocking but test harnesses check specific
+        // state transitions, not readiness events.
+        #[cfg(not(any(test, feature = "bench-helpers")))]
+        {
+            if let Err(e) = self.epoll.register(host_fd, token, RegisterMode::Read) {
+                warn!(
+                    guest_port,
+                    high_port,
+                    fd = host_fd,
+                    error = %e,
+                    "SLIRP: epoll register for synthetic SynSent failed"
+                );
+            }
+            self.epoll_waker.wake();
+        }
+        #[cfg(any(test, feature = "bench-helpers"))]
+        let _ = host_fd;
+    }
+
+    /// Return the `TcpNatState` for the flow identified by `(guest_port, GATEWAY_IP, high_port)`,
+    /// or `None` if no such entry exists in the flow table.
+    #[allow(dead_code)]
+    pub(crate) fn tcp_flow_state(&self, guest_port: u16, high_port: u16) -> Option<TcpNatState> {
+        let key = NatKey {
+            guest_src_port: guest_port,
+            dst_ip: SLIRP_GATEWAY_IP,
+            dst_port: high_port,
+        };
+        match self.flow_table.get(&FlowKey::Tcp(key))? {
+            FlowEntry::Tcp(entry) => Some(entry.state),
+            _ => None,
+        }
+    }
+
+    /// Count how many frames queued for injection carry the given TCP flags.
+    ///
+    /// Checks `inject_to_guest` for Ethernet/IPv4/TCP frames where the TCP
+    /// `ack` flag is set and the `syn` flag is clear (i.e. a plain ACK).
+    #[allow(dead_code)]
+    pub(crate) fn injected_plain_ack_count(&self) -> usize {
+        let mut count = 0;
+        for frame in &self.inject_to_guest {
+            if frame.len() < 54 {
+                continue;
+            }
+            let tcp_offset = 14 + 20;
+            let flags_byte = frame[tcp_offset + 13];
+            let ack = flags_byte & 0x10 != 0;
+            let syn = flags_byte & 0x02 != 0;
+            if ack && !syn {
+                count += 1;
+            }
+        }
+        count
+    }
+
+    /// Inject an [`InboundAccept`] directly into the accept channel, bypassing
+    /// the listener thread. Used by unit tests to drive
+    /// `process_pending_inbound_accepts` without a real listener.
+    #[allow(dead_code)]
+    pub(crate) fn push_inbound_accept(&self, accepted: InboundAccept) {
+        self.accept_sender
+            .send(accepted)
+            .expect("accept channel must be open");
+    }
+
+    /// Returns the number of user-registered FDs in the epoll set
+    /// (excludes the self-pipe).
+    pub fn registered_fd_count(&self) -> usize {
+        self.epoll.registered_fd_count()
+    }
+
+    /// Replace the epoll dispatcher with a fresh empty one, discarding all
+    /// existing registrations.  Simulates the post-snapshot state where the
+    /// kernel-side `epoll_fd` handle does not survive and a new one is
+    /// created.  Used by `epoll_set_rebuilt_from_flow_table_smoke` to set up
+    /// the precondition that `rebuild_epoll_from_flow_table` must fix.
+    pub fn reset_epoll_for_snapshot_test(&mut self) {
+        let new_epoll_inner = EpollDispatch::new().expect("EpollDispatch::new");
+        let new_waker = new_epoll_inner.waker();
+        self.epoll = Arc::new(new_epoll_inner);
+        self.epoll_waker = new_waker;
     }
 }
 
@@ -1220,7 +2714,7 @@ mod tests {
 
     #[test]
     fn test_slirp_stack_creation() {
-        let stack = SlirpStack::new();
+        let stack = SlirpBackend::new();
         assert!(stack.is_ok());
     }
 
@@ -1232,44 +2726,217 @@ mod tests {
         assert_ne!(cksum, 0);
     }
 
-    #[test]
-    fn test_to_host_buffer_limit() {
-        assert_eq!(MAX_TO_HOST_BUFFER, 256 * 1024);
+    /// Build a TCP frame from the guest (SLIRP_GUEST_IP) to a given destination.
+    ///
+    /// Used by `tcp_inbound_syn_ack_completes_handshake` to synthesize the
+    /// guest's SYN-ACK reply to our port-forward SYN.
+    fn build_guest_tcp_frame(
+        dst_ip: Ipv4Address,
+        src_port: u16,
+        dst_port: u16,
+        seq: u32,
+        ack_number: u32,
+        control: TcpControl,
+        set_ack_flag: bool,
+    ) -> Vec<u8> {
+        use smoltcp::wire::{
+            EthernetAddress, EthernetFrame, EthernetRepr, IpAddress, Ipv4Packet, Ipv4Repr,
+            TcpPacket, TcpRepr, TcpSeqNumber,
+        };
+        let tcp_repr = TcpRepr {
+            src_port,
+            dst_port,
+            control,
+            seq_number: TcpSeqNumber(seq as i32),
+            ack_number: if set_ack_flag {
+                Some(TcpSeqNumber(ack_number as i32))
+            } else {
+                None
+            },
+            window_len: 65535,
+            window_scale: None,
+            max_seg_size: None,
+            sack_permitted: false,
+            sack_ranges: [None; 3],
+            payload: &[],
+        };
+        let ip_repr = Ipv4Repr {
+            src_addr: SLIRP_GUEST_IP,
+            dst_addr: dst_ip,
+            next_header: smoltcp::wire::IpProtocol::Tcp,
+            payload_len: tcp_repr.buffer_len(),
+            hop_limit: 64,
+        };
+        let eth_repr = EthernetRepr {
+            src_addr: EthernetAddress(GUEST_MAC),
+            dst_addr: EthernetAddress(GATEWAY_MAC),
+            ethertype: smoltcp::wire::EthernetProtocol::Ipv4,
+        };
+        let checksums = smoltcp::phy::ChecksumCapabilities::default();
+        let total = eth_repr.buffer_len() + ip_repr.buffer_len() + tcp_repr.buffer_len();
+        let mut buf = vec![0u8; total];
+        let mut eth = EthernetFrame::new_unchecked(&mut buf);
+        eth_repr.emit(&mut eth);
+        let mut ip = Ipv4Packet::new_unchecked(eth.payload_mut());
+        ip_repr.emit(&mut ip, &checksums);
+        let mut tcp = TcpPacket::new_unchecked(ip.payload_mut());
+        tcp_repr.emit(
+            &mut tcp,
+            &IpAddress::Ipv4(SLIRP_GUEST_IP),
+            &IpAddress::Ipv4(dst_ip),
+            &checksums,
+        );
+        buf
     }
 
+    /// Verify that a guest SYN-ACK frame on a SynSent entry:
+    ///   (a) transitions the flow state to Established, and
+    ///   (b) queues exactly one plain ACK frame towards the guest.
+    ///
+    /// The full E2E behavioral contract (including host-listener wiring) will be
+    /// pinned in `tests/network_baseline.rs::tcp_port_forward_inbound` (task 5.5b.5).
     #[test]
-    fn test_tcp_nat_entry_has_write_buffer() {
-        let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
-        let addr = listener.local_addr().unwrap();
-        let stream = TcpStream::connect_timeout(&addr, Duration::from_secs(1)).unwrap();
-        stream.set_nonblocking(true).ok();
+    fn tcp_inbound_syn_ack_completes_handshake() {
+        use std::net::TcpListener;
+
+        let guest_port: u16 = 8080;
+        let high_port: u16 = 44000;
+        let our_isn: u32 = 0x0000_1000;
+        let guest_isn: u32 = 0xDEAD_BEEF;
+
+        // Create a loopback TcpStream pair for the host_stream field.
+        // The stream is never read/written in this unit test — we only
+        // exercise the TCP state machine.
+        let listener = TcpListener::bind("127.0.0.1:0").expect("bind loopback");
+        let host_stream =
+            TcpStream::connect(listener.local_addr().unwrap()).expect("connect loopback");
+        host_stream.set_nonblocking(true).ok();
+
+        let mut backend = SlirpBackend::new().expect("SlirpBackend::new");
+        backend.insert_synthetic_synsent_entry(guest_port, high_port, our_isn, host_stream);
+
+        // Confirm state is SynSent before feeding the SYN-ACK.
+        assert_eq!(
+            backend.tcp_flow_state(guest_port, high_port),
+            Some(TcpNatState::SynSent),
+            "entry must start as SynSent"
+        );
 
-        let entry = TcpNatEntry {
-            host_stream: stream,
-            state: TcpNatState::Established,
-            our_seq: 1000,
-            guest_ack: 2000,
-            to_guest: Vec::new(),
-            to_host: Vec::new(),
-            to_host_pending_ack: None,
-            last_activity: Instant::now(),
-        };
+        // Build the guest's SYN-ACK: src=GUEST:guest_port, dst=GATEWAY:high_port,
+        // SYN+ACK, seq=guest_isn, ack=our_isn+1.
+        let syn_ack = build_guest_tcp_frame(
+            SLIRP_GATEWAY_IP,
+            guest_port,
+            high_port,
+            guest_isn,
+            our_isn.wrapping_add(1),
+            TcpControl::Syn, // SYN flag — combined with ACK flag via ack_number=Some(...)
+            true,            // set ACK flag
+        );
+
+        backend
+            .process_guest_frame(&syn_ack)
+            .expect("process SYN-ACK");
+
+        // (a) state must be Established now.
+        assert_eq!(
+            backend.tcp_flow_state(guest_port, high_port),
+            Some(TcpNatState::Established),
+            "state must be Established after SYN-ACK"
+        );
 
-        assert!(entry.to_host.is_empty());
-        assert!(entry.to_host_pending_ack.is_none());
+        // (b) exactly one plain ACK must have been queued for injection to the guest.
+        assert_eq!(
+            backend.injected_plain_ack_count(),
+            1,
+            "exactly one plain ACK must be queued for the guest"
+        );
     }
 
+    /// Verify that `process_pending_inbound_accepts` drains one `InboundAccept`
+    /// from the channel, inserts a `SynSent` flow-table entry, and queues a
+    /// synthesized SYN frame for injection to the guest.
+    ///
+    /// This pins the contract for task 5.5b.3.  The test is white-box: it uses
+    /// `push_inbound_accept` (a `#[cfg(test)]` helper that injects into the
+    /// internal channel) so we don't need a real listener thread.
     #[test]
-    fn test_to_host_buffer_rejects_over_limit() {
-        let existing = vec![0u8; MAX_TO_HOST_BUFFER];
-        let new_payload = [0u8; 1];
-        assert!(existing.len() + new_payload.len() > MAX_TO_HOST_BUFFER);
+    fn process_pending_inbound_accepts_seeds_synsent_and_queues_syn() {
+        use std::net::TcpListener;
+
+        let guest_port: u16 = 9000;
+
+        let listener = TcpListener::bind("127.0.0.1:0").expect("bind loopback");
+        let local_addr = listener.local_addr().unwrap();
+        let host_stream = TcpStream::connect(local_addr).expect("connect loopback");
+        let high_port = host_stream.local_addr().unwrap().port();
+        host_stream.set_nonblocking(true).ok();
 
-        let small_existing = vec![0u8; MAX_TO_HOST_BUFFER - 10];
-        let fits = [0u8; 10];
-        assert!(small_existing.len() + fits.len() <= MAX_TO_HOST_BUFFER);
+        let mut backend = SlirpBackend::new().expect("SlirpBackend::new");
 
-        let overflows = [0u8; 11];
-        assert!(small_existing.len() + overflows.len() > MAX_TO_HOST_BUFFER);
+        // Inject an InboundAccept without a real listener thread.
+        backend.push_inbound_accept(InboundAccept {
+            host_stream,
+            high_port,
+            guest_port,
+        });
+
+        // Before processing, no flow entry should exist.
+        assert_eq!(
+            backend.tcp_flow_state(guest_port, high_port),
+            None,
+            "no flow entry before processing"
+        );
+
+        // Drive process_pending_inbound_accepts.
+        backend.process_pending_inbound_accepts();
+
+        // After processing, a SynSent entry must exist.
+        assert_eq!(
+            backend.tcp_flow_state(guest_port, high_port),
+            Some(TcpNatState::SynSent),
+            "SynSent entry must be present after processing"
+        );
+
+        // Exactly one SYN frame must have been queued for injection.
+        // Note: build_tcp_packet_static sets ack_number=Some(0) which also
+        // sets the ACK flag bit; we detect the SYN by checking just the SYN bit.
+        let syn_count = backend
+            .inject_to_guest
+            .iter()
+            .filter(|frame| {
+                if frame.len() < 54 {
+                    return false;
+                }
+                let tcp_offset = 14 + 20;
+                let flags_byte = frame[tcp_offset + 13];
+                flags_byte & 0x02 != 0
+            })
+            .count();
+        assert_eq!(syn_count, 1, "exactly one SYN must be queued for the guest");
+    }
+
+    /// Verify that `with_security` binds exactly one epoll-driven listener when
+    /// given one TCP port-forward rule, and zero listeners when given none.
+    #[test]
+    fn with_security_binds_listener_per_tcp_port_forward() {
+        // Empty port-forwards: no listeners.
+        let empty = SlirpBackend::with_security(64, 50, &["169.254.0.0/16".to_string()], &[])
+            .expect("SlirpBackend::with_security (empty)");
+        assert_eq!(
+            empty.port_forward_listeners.len(),
+            0,
+            "zero listeners for empty port_forwards"
+        );
+
+        // One TCP port-forward: exactly one listener.
+        let one =
+            SlirpBackend::with_security(64, 50, &["169.254.0.0/16".to_string()], &[(18080, 80)])
+                .expect("SlirpBackend::with_security (one forward)");
+        assert_eq!(
+            one.port_forward_listeners.len(),
+            1,
+            "one listener for one TCP port-forward rule"
+        );
     }
 }
diff --git a/src/vmm/mod.rs b/src/vmm/mod.rs
index 354ea5ef..97fe2d0f 100644
--- a/src/vmm/mod.rs
+++ b/src/vmm/mod.rs
@@ -36,7 +36,7 @@ use crate::guest::protocol::{
     ExecOutputChunk, ExecRequest, ExecResponse, MkdirPRequest, MkdirPResponse,
     TelemetrySubscribeRequest, WriteFileRequest, WriteFileResponse,
 };
-use crate::network::slirp::SlirpStack;
+use crate::network::slirp::SlirpBackend;
 use crate::observe::telemetry::TelemetryAggregator;
 use crate::observe::Observer;
 use crate::vmm::cpu::MmioDevices;
@@ -315,11 +315,15 @@ impl MicroVm {
         // Virtio-net with SLIRP backend if networking is enabled
         let virtio_net = if config.network {
             debug!("Setting up SLIRP networking");
-            let slirp = Arc::new(Mutex::new(SlirpStack::with_security(
-                config.security.max_concurrent_connections,
-                config.security.max_connections_per_second,
-                &config.security.network_deny_list,
-            )?));
+            let slirp: Arc<Mutex<dyn crate::network::NetworkBackend>> =
+                Arc::new(Mutex::new(SlirpBackend::with_security(
+                    config.security.max_concurrent_connections,
+                    config.security.max_connections_per_second,
+                    &config.security.network_deny_list,
+                    // TODO(5.5b): wire port_forwards from NetworkConfig once VoidBoxConfig
+                    // carries the field; for now no host listeners are spawned.
+                    &[],
+                )?));
             let mut net_device = VirtioNetDevice::new(slirp)?;
             net_device.set_mmio_base(0xd000_0000);
             debug!(
@@ -685,7 +689,8 @@ impl MicroVm {
         // 7b. Restore virtio-net if snapshot had networking enabled
         let virtio_net: Option<Arc<Mutex<VirtioNetDevice>>> = if snap.config.network {
             if let Some(ref net_state) = snap.net_state {
-                let slirp = Arc::new(Mutex::new(SlirpStack::new()?));
+                let slirp: Arc<Mutex<dyn crate::network::NetworkBackend>> =
+                    Arc::new(Mutex::new(SlirpBackend::new()?));
                 let mut net_dev = VirtioNetDevice::new(slirp)?;
                 net_dev.restore_state(net_state);
                 net_dev.set_mmio_base(0xd000_0000);
@@ -1589,8 +1594,19 @@ fn vsock_irq_thread(
 /// from host TCP sockets accumulates unread, causing TLS handshakes and
 /// API calls to time out.
 ///
-/// This thread wakes every 5 ms, reads any pending host data via
-/// `try_inject_rx`, and fires IRQ 10 to notify the guest.
+/// This thread uses an adaptive `EpollDispatch::wait_with_timeout`:
+/// - **Active** (5 ms): any kernel readiness event in the last cycle keeps
+///   the thread in the 5 ms cadence so the guest's TCP delayed-ACK timer
+///   fires on schedule.  Both real socket readiness events and self-pipe
+///   wakes (from `epoll_waker.wake()` after a new SYN or injected ACK)
+///   count as activity.
+/// - **Idle** (50 ms): a cycle with no kernel events backs off to 50 ms.
+///   New flows or incoming data wake the wait immediately via the epoll set
+///   or the waker, so the 50 ms cap only fires when the network is truly
+///   quiet.
+///
+/// When the network backend does not provide an epoll instance
+/// (non-SlirpBackend), the thread falls back to a fixed 5 ms sleep.
 fn net_poll_thread(net_dev: Arc<Mutex<VirtioNetDevice>>, vm: Arc<Vm>, running: Arc<AtomicBool>) {
     #[repr(C)]
     struct KvmIrqLevel {
@@ -1598,10 +1614,83 @@ fn net_poll_thread(net_dev: Arc<Mutex<VirtioNetDevice>>, vm: Arc<Vm>, running: A
         level: u32,
     }
     const KVM_IRQ_LINE: libc::c_ulong = 0x4008_AE61;
+    // Adaptive epoll_wait timeout.  Active periods need a 5 ms cadence so
+    // the guest's TCP delayed-ACK timer fires on schedule (the guest spends
+    // most idle time in HLT and relies on our IRQ pulses to advance vCPU
+    // schedule slots; a 50 ms gap causes +40 ms CRR latency, exactly
+    // Linux's delayed-ACK period).  Idle periods can use the long timeout
+    // safely: any new flow's SYN goes through process_guest_frame which
+    // calls epoll_waker.wake(), and host data arrival fires EPOLLIN — both
+    // wake the wait immediately, so the 50 ms ceiling never bites a real
+    // packet.  We pick the next timeout based on whether the last wait
+    // returned events: had-events ⇒ stay in the active 5 ms cadence,
+    // timed-out ⇒ back off to 50 ms.  Maintains correctness; recovers the
+    // 10x idle wakeup reduction that motivated Phase 6.4 in the first
+    // place.
+    const ACTIVE_TIMEOUT: std::time::Duration = std::time::Duration::from_millis(5);
+    const IDLE_TIMEOUT: std::time::Duration = std::time::Duration::from_millis(50);
+    const FALLBACK_SLEEP: std::time::Duration = std::time::Duration::from_millis(5);
+
+    // Start in the idle regime — first SYN flips us into active.
+    let mut epoll_wait_timeout: std::time::Duration = IDLE_TIMEOUT;
+
     let vm_fd = vm.vm_fd().as_raw_fd();
     let guest_memory = vm.guest_memory();
+
+    // Obtain the epoll Arc from the backend without holding the device lock
+    // across the blocking wait.  Falls back to None if the backend is not
+    // a SlirpBackend (e.g. in unit tests or future alternative backends).
+    let epoll_arc = {
+        match net_dev.lock() {
+            Ok(guard) => guard.epoll_arc(),
+            Err(_) => None,
+        }
+    };
+
+    let mut epoll_events: Vec<crate::network::epoll_dispatch::EpollEvent> = Vec::new();
+
     while running.load(Ordering::Relaxed) {
-        std::thread::sleep(std::time::Duration::from_millis(5));
+        // Block outside the device lock: either on epoll readiness or a short
+        // sleep.  This lets the vCPU thread acquire the device lock without
+        // contention during the wait phase.
+        epoll_events.clear();
+        // Raw kernel count from epoll_wait, including self-pipe wakes
+        // that the filter strips from `epoll_events`. A self-pipe wake
+        // is the signal that handle_tcp_frame queued a frame and called
+        // epoll_waker.wake() — i.e. real activity that should keep the
+        // adaptive timeout in the active 5 ms cadence even though
+        // `epoll_events.is_empty()`.
+        let mut raw_kernel_events: usize = 0;
+        if let Some(ref ep_arc) = epoll_arc {
+            raw_kernel_events = ep_arc
+                .wait_with_timeout(&mut epoll_events, epoll_wait_timeout)
+                .unwrap_or(0);
+        } else {
+            std::thread::sleep(FALLBACK_SLEEP);
+        }
+
+        // Adapt the next-cycle timeout based on this cycle's outcome.
+        // Any kernel event (real readiness OR self-pipe wake from the
+        // vCPU thread) signals activity and keeps us in the 5 ms
+        // cadence so the guest's TCP delayed-ACK timer fires on time.
+        // A pure timeout drops us to the 50 ms idle cadence.  One quiet
+        // cycle to switch to idle, one event to switch back to active.
+        epoll_wait_timeout = if raw_kernel_events > 0 {
+            ACTIVE_TIMEOUT
+        } else {
+            IDLE_TIMEOUT
+        };
+
+        // Push ready events into the backend's queue before acquiring the
+        // device lock for inject/IRQ work. drain_to_guest will consume them
+        // without re-locking EpollDispatch, eliminating mutex contention
+        // between the net-poll thread's 50 ms blocking wait and the vCPU
+        // thread's process_guest_frame → drain_to_guest path.
+        if !epoll_events.is_empty() {
+            if let Ok(guard) = net_dev.lock() {
+                guard.push_events_to_backend(&epoll_events);
+            }
+        }
 
         let has_interrupt = {
             let mut guard = match net_dev.lock() {
@@ -1616,6 +1705,9 @@ fn net_poll_thread(net_dev: Arc<Mutex<VirtioNetDevice>>, vm: Arc<Vm>, running: A
         // an earlier edge was missed by the guest.
         if has_interrupt {
             let assert_irq = KvmIrqLevel { irq: 10, level: 1 };
+            // SAFETY: KVM_IRQ_LINE ioctl writes the KvmIrqLevel struct into
+            // the in-kernel APIC; the struct is #[repr(C)] and the fd is valid
+            // for the lifetime of `vm`.
             unsafe {
                 libc::ioctl(vm_fd, KVM_IRQ_LINE as _, &assert_irq);
             }
diff --git a/tests/network_baseline.rs b/tests/network_baseline.rs
new file mode 100644
index 00000000..d5115426
--- /dev/null
+++ b/tests/network_baseline.rs
@@ -0,0 +1,1293 @@
+//! Layer-1 correctness pins for the smoltcp-based SLIRP stack.
+//!
+//! These tests drive `SlirpBackend` directly with synthetic Ethernet
+//! frames — no VM, no kernel, no host sockets to outside hosts. The
+//! goal is to lock observable behavior (including deliberately broken
+//! behavior) so the passt-pattern refactor's diff is legible to
+//! reviewers.
+//!
+//! TODO(0D.4): migrate poll() → drain_to_guest() and remove #[allow(deprecated)].
+#![allow(deprecated)]
+//!
+//! Three tests assert *broken* behavior on purpose. Each is marked
+//! `BROKEN_ON_PURPOSE` and flips when the corresponding fix lands:
+//!
+//! - `tcp_writes_more_than_256kb_succeed` (was `tcp_to_host_buffer_drops_at_256kb`)
+//! - `udp_non_dns_round_trips` (was `udp_non_dns_silently_dropped`)
+//! - `icmp_echo_returns_reply` (was `icmp_echo_silently_dropped`)
+//!
+//! Run with: `cargo test --test network_baseline`
+
+#![cfg(target_os = "linux")]
+// Imports and helpers used by test cases added in tasks 0A.2–0A.9.
+#![allow(unused_imports, dead_code)]
+
+use smoltcp::wire::{
+    ArpOperation, ArpPacket, ArpRepr, EthernetAddress, EthernetFrame, EthernetProtocol,
+    EthernetRepr, Icmpv4Packet, Icmpv4Repr, IpAddress, IpProtocol, Ipv4Address, Ipv4Packet,
+    Ipv4Repr, TcpControl, TcpPacket, TcpRepr, UdpPacket, UdpRepr,
+};
+use std::io::{Read, Write};
+use std::net::{Ipv4Addr, SocketAddr, TcpListener, UdpSocket};
+use std::os::unix::io::AsRawFd;
+use void_box::network::nat::{translate_outbound, Rules};
+use void_box::network::slirp::{
+    SlirpBackend, GATEWAY_MAC, GUEST_MAC, SLIRP_DNS_IP, SLIRP_GATEWAY_IP, SLIRP_GUEST_IP,
+};
+use void_box::network::NetworkBackend;
+// Used by tcp_deny_list_emits_rst to express the deny CIDR as a typed network.
+// `with_security` takes `&[String]`, so we convert via `.to_string()` at the
+// call site; this import is kept here (module scope) per project convention.
+use ipnet::Ipv4Net;
+
+const GUEST_EPHEMERAL_PORT: u16 = 49152;
+const ETH_HDR_LEN: usize = 14;
+const IPV4_MIN_HDR_LEN: usize = 20;
+const TCP_MIN_HDR_LEN: usize = 20;
+const UDP_HDR_LEN: usize = 8;
+
+/// Builds a minimal IPv4-over-Ethernet TCP segment from guest to a
+/// pretend external IP. Returns the full Ethernet frame bytes.
+fn build_tcp_frame(
+    dst_ip: Ipv4Address,
+    src_port: u16,
+    dst_port: u16,
+    seq: u32,
+    ack: u32,
+    control: TcpControl,
+    payload: &[u8],
+) -> Vec<u8> {
+    let tcp_repr = TcpRepr {
+        src_port,
+        dst_port,
+        control,
+        seq_number: smoltcp::wire::TcpSeqNumber(seq as i32),
+        ack_number: if ack == 0 {
+            None
+        } else {
+            Some(smoltcp::wire::TcpSeqNumber(ack as i32))
+        },
+        window_len: 65535,
+        window_scale: None,
+        max_seg_size: None,
+        sack_permitted: false,
+        sack_ranges: [None, None, None],
+        payload,
+    };
+    let ip_repr = Ipv4Repr {
+        src_addr: SLIRP_GUEST_IP,
+        dst_addr: dst_ip,
+        next_header: IpProtocol::Tcp,
+        payload_len: tcp_repr.buffer_len(),
+        hop_limit: 64,
+    };
+    let eth_repr = EthernetRepr {
+        src_addr: EthernetAddress(GUEST_MAC),
+        dst_addr: EthernetAddress(GATEWAY_MAC),
+        ethertype: EthernetProtocol::Ipv4,
+    };
+    let total = ETH_HDR_LEN + ip_repr.buffer_len() + tcp_repr.buffer_len();
+    let mut buf = vec![0u8; total];
+    let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth_repr.emit(&mut eth);
+    let mut ip = Ipv4Packet::new_unchecked(&mut buf[ETH_HDR_LEN..]);
+    ip_repr.emit(&mut ip, &Default::default());
+    let mut tcp = TcpPacket::new_unchecked(&mut buf[ETH_HDR_LEN + ip_repr.buffer_len()..]);
+    tcp_repr.emit(
+        &mut tcp,
+        &IpAddress::Ipv4(SLIRP_GUEST_IP),
+        &IpAddress::Ipv4(dst_ip),
+        &Default::default(),
+    );
+    buf
+}
+
+/// Builds a UDP-over-Ethernet datagram from guest.
+fn build_udp_frame(dst_ip: Ipv4Address, src_port: u16, dst_port: u16, payload: &[u8]) -> Vec<u8> {
+    let udp_repr = UdpRepr { src_port, dst_port };
+    let ip_repr = Ipv4Repr {
+        src_addr: SLIRP_GUEST_IP,
+        dst_addr: dst_ip,
+        next_header: IpProtocol::Udp,
+        payload_len: UDP_HDR_LEN + payload.len(),
+        hop_limit: 64,
+    };
+    let eth_repr = EthernetRepr {
+        src_addr: EthernetAddress(GUEST_MAC),
+        dst_addr: EthernetAddress(GATEWAY_MAC),
+        ethertype: EthernetProtocol::Ipv4,
+    };
+    let total = ETH_HDR_LEN + ip_repr.buffer_len() + UDP_HDR_LEN + payload.len();
+    let mut buf = vec![0u8; total];
+    let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth_repr.emit(&mut eth);
+    let mut ip = Ipv4Packet::new_unchecked(&mut buf[ETH_HDR_LEN..]);
+    ip_repr.emit(&mut ip, &Default::default());
+    let mut udp = UdpPacket::new_unchecked(&mut buf[ETH_HDR_LEN + ip_repr.buffer_len()..]);
+    udp_repr.emit(
+        &mut udp,
+        &IpAddress::Ipv4(SLIRP_GUEST_IP),
+        &IpAddress::Ipv4(dst_ip),
+        payload.len(),
+        |b| b.copy_from_slice(payload),
+        &Default::default(),
+    );
+    buf
+}
+
+/// Parses one emitted frame as a TCP segment directed to the guest.
+///
+/// Returns `(seq, ack, control, payload_len)` on success, or `None`
+/// if the frame is not IPv4-TCP destined for the guest or has an
+/// unrecognized flag combination.
+fn parse_tcp_to_guest(frame: &[u8]) -> Option<(u32, u32, TcpControl, usize)> {
+    let eth = EthernetFrame::new_checked(frame).ok()?;
+    if eth.ethertype() != EthernetProtocol::Ipv4 {
+        return None;
+    }
+    let ip = Ipv4Packet::new_checked(eth.payload()).ok()?;
+    if ip.next_header() != IpProtocol::Tcp || ip.dst_addr() != SLIRP_GUEST_IP {
+        return None;
+    }
+    let tcp = TcpPacket::new_checked(ip.payload()).ok()?;
+    // Reconstruct TcpControl from individual flag accessors (smoltcp 0.11
+    // exposes no combined .control() method on TcpPacket).
+    let control = match (tcp.syn(), tcp.fin(), tcp.rst(), tcp.psh()) {
+        (false, false, false, false) => TcpControl::None,
+        (false, false, false, true) => TcpControl::Psh,
+        (true, false, false, _) => TcpControl::Syn,
+        (false, true, false, _) => TcpControl::Fin,
+        (false, false, true, _) => TcpControl::Rst,
+        _ => return None,
+    };
+    Some((
+        tcp.seq_number().0 as u32,
+        tcp.ack_number().0 as u32,
+        control,
+        tcp.payload().len(),
+    ))
+}
+
+/// Drains frames the stack wants to send to the guest, calling
+/// `drain_to_guest` up to `n` times.  Returns all frames produced
+/// across the calls (caller may not care about per-call boundaries).
+fn drain_n(stack: &mut SlirpBackend, n: usize) -> Vec<Vec<u8>> {
+    let mut out: Vec<Vec<u8>> = Vec::new();
+    for _ in 0..n {
+        stack.drain_to_guest(&mut out);
+    }
+    out
+}
+
+#[test]
+fn tcp_handshake_emits_synack() {
+    // Bind a host listener on 127.0.0.1 so the stack's connect()
+    // succeeds. SLIRP rewrites 10.0.2.2 → 127.0.0.1.
+    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
+    let host_port = listener.local_addr().unwrap().port();
+
+    let mut stack = SlirpBackend::new().expect("stack");
+
+    // Guest sends SYN to gateway IP at the listener's port.
+    let syn = build_tcp_frame(
+        SLIRP_GATEWAY_IP,
+        GUEST_EPHEMERAL_PORT,
+        host_port,
+        1000,
+        0,
+        TcpControl::Syn,
+        &[],
+    );
+    stack.process_guest_frame(&syn).expect("process syn");
+
+    // Drain — SYN-ACK should be queued.
+    let frames = drain_n(&mut stack, 4);
+    let synack = frames
+        .iter()
+        .find_map(|f| parse_tcp_to_guest(f))
+        .expect("synack emitted");
+
+    let (_seq, ack, ctrl, _len) = synack;
+    assert_eq!(ctrl, TcpControl::Syn, "control flags include SYN+ACK");
+    assert_eq!(ack, 1001, "ack = guest_seq + 1");
+}
+
+#[test]
+fn tcp_data_round_trip() {
+    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
+    let host_port = listener.local_addr().unwrap().port();
+
+    // Spawn a thread that accepts and echoes one chunk.
+    let server = std::thread::spawn(move || {
+        let (mut sock, _) = listener.accept().unwrap();
+        let mut buf = [0u8; 16];
+        let n = sock.read(&mut buf).unwrap();
+        sock.write_all(&buf[..n]).unwrap();
+    });
+
+    let mut stack = SlirpBackend::new().expect("stack");
+
+    // SYN
+    stack
+        .process_guest_frame(&build_tcp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            1000,
+            0,
+            TcpControl::Syn,
+            &[],
+        ))
+        .unwrap();
+
+    // Drain SYN-ACK; capture our_seq.
+    let synack_frames = drain_n(&mut stack, 4);
+    let (our_seq, _ack, _ctrl, _len) = synack_frames
+        .iter()
+        .find_map(|f| parse_tcp_to_guest(f))
+        .expect("synack");
+
+    // ACK the SYN-ACK (completes handshake).
+    stack
+        .process_guest_frame(&build_tcp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            1001,
+            our_seq + 1,
+            TcpControl::None,
+            &[],
+        ))
+        .unwrap();
+
+    // Send 5 bytes of data.
+    stack
+        .process_guest_frame(&build_tcp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            1001,
+            our_seq + 1,
+            TcpControl::Psh,
+            b"hello",
+        ))
+        .unwrap();
+
+    // Wait for server to echo and stack to relay back.
+    server.join().unwrap();
+    let mut total_payload = 0;
+    for _ in 0..40 {
+        let frames = drain_n(&mut stack, 1);
+        for f in frames.iter() {
+            if let Some((_, _, _, len)) = parse_tcp_to_guest(f) {
+                total_payload += len;
+            }
+        }
+        if total_payload >= 5 {
+            break;
+        }
+        std::thread::sleep(std::time::Duration::from_millis(10));
+    }
+    assert!(
+        total_payload >= 5,
+        "expected at least 5 bytes echoed back to guest, got {total_payload}"
+    );
+}
+
+/// BROKEN_ON_PURPOSE pin (now passing): passt-style sequence mirroring and
+/// don't-ACK-on-WouldBlock backpressure replace the 256 KB userspace cliff.
+/// Pushing >1 MB through the relay succeeds — the kernel's socket buffer
+/// holds outstanding bytes, the guest retransmits unacked segments, and the
+/// connection stays alive instead of being reset.
+#[test]
+fn tcp_writes_more_than_256kb_succeed() {
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::sync::Arc;
+
+    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
+    let host_port = listener.local_addr().unwrap().port();
+
+    // Constrain the listener's recv buffer (small but reasonable —
+    // ensures TCP backpressure kicks in at a point we can observe
+    // without a multi-megabyte memory footprint).
+    {
+        let val: libc::c_int = 4096;
+        unsafe {
+            libc::setsockopt(
+                listener.as_raw_fd(),
+                libc::SOL_SOCKET,
+                libc::SO_RCVBUF,
+                &val as *const libc::c_int as *const libc::c_void,
+                std::mem::size_of::<libc::c_int>() as libc::socklen_t,
+            );
+        }
+    }
+
+    // Server: accept and drain everything we get.
+    let bytes_received = Arc::new(AtomicUsize::new(0));
+    let bytes_received_thr = Arc::clone(&bytes_received);
+    let server = std::thread::spawn(move || {
+        let (mut sock, _) = listener.accept().unwrap();
+        let mut buf = [0u8; 4096];
+        loop {
+            match sock.read(&mut buf) {
+                Ok(0) => break, // EOF from guest side
+                Ok(n) => {
+                    bytes_received_thr.fetch_add(n, Ordering::Relaxed);
+                }
+                Err(_) => break,
+            }
+        }
+    });
+
+    let mut stack = SlirpBackend::new().expect("stack");
+
+    // Handshake.
+    stack
+        .process_guest_frame(&build_tcp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            1000,
+            0,
+            TcpControl::Syn,
+            &[],
+        ))
+        .unwrap();
+    let synack = drain_n(&mut stack, 4)
+        .into_iter()
+        .find_map(|f| parse_tcp_to_guest(&f))
+        .expect("synack");
+    let (our_seq, _, _, _) = synack;
+    stack
+        .process_guest_frame(&build_tcp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            1001,
+            our_seq + 1,
+            TcpControl::None,
+            &[],
+        ))
+        .unwrap();
+
+    // Push 1 MB in 1 KB chunks. Drain after every batch so the
+    // host's read thread can drain the kernel buffer and ACKs flow
+    // back to the guest. The new TCP-backpressure path means some
+    // chunks won't be ACK'd immediately; we re-send those (TCP-style
+    // retransmit) until they go through.
+    const TOTAL: usize = 1024 * 1024;
+    const CHUNK: usize = 1024;
+    let chunk = vec![b'x'; CHUNK];
+    let mut seq = 1001u32;
+    let mut acked_seq = 1001u32;
+    let mut saw_close = false;
+    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
+
+    while bytes_received.load(Ordering::Relaxed) < TOTAL && std::time::Instant::now() < deadline {
+        // Retransmit semantics: only advance the send cursor once the
+        // previous chunk has been ACK'd. If the stack stops ACKing
+        // (backpressure engaged), we re-send the same seq/payload until
+        // it's acknowledged. This matches production guest-TCP retransmit
+        // behavior.
+        let _ = stack.process_guest_frame(&build_tcp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            seq,
+            our_seq + 1,
+            TcpControl::Psh,
+            &chunk,
+        ));
+
+        // Drain frames; track the highest ACK we've seen and watch
+        // for RST/FIN that would indicate a premature close.
+        for f in drain_n(&mut stack, 4) {
+            if let Some((_, ack, ctrl, _)) = parse_tcp_to_guest(&f) {
+                if matches!(ctrl, TcpControl::Rst | TcpControl::Fin) {
+                    saw_close = true;
+                }
+                if ack > acked_seq {
+                    acked_seq = ack;
+                }
+            }
+        }
+
+        if saw_close {
+            break;
+        }
+
+        // Advance our send cursor only past ACK'd data.  If the stack
+        // didn't ACK this chunk, the next loop iteration re-sends the
+        // same seq/payload (true TCP retransmit semantics).
+        if acked_seq >= seq.wrapping_add(CHUNK as u32) {
+            seq = seq.wrapping_add(CHUNK as u32);
+        } else if seq.wrapping_sub(acked_seq) > 256 * 1024 {
+            // Out-paced kernel recv buffer; sleep briefly so the host
+            // server thread can drain.
+            std::thread::sleep(std::time::Duration::from_millis(10));
+        }
+    }
+
+    // Close the connection cleanly so the server's read loop exits.
+    let _ = stack.process_guest_frame(&build_tcp_frame(
+        SLIRP_GATEWAY_IP,
+        GUEST_EPHEMERAL_PORT,
+        host_port,
+        seq,
+        our_seq + 1,
+        TcpControl::Fin,
+        &[],
+    ));
+    for _ in 0..40 {
+        let _ = drain_n(&mut stack, 1);
+        if server.is_finished() {
+            break;
+        }
+        std::thread::sleep(std::time::Duration::from_millis(50));
+    }
+    let _ = server.join();
+
+    let received = bytes_received.load(Ordering::Relaxed);
+    assert!(
+        !saw_close,
+        "TCP backpressure must not RST/FIN mid-stream — the relay must hold \
+         the line while the kernel drains. Saw RST or FIN."
+    );
+    assert!(
+        received >= TOTAL * 95 / 100,
+        "server must receive ~all bytes pushed (got {received}/{TOTAL}); \
+         backpressure must retransmit until success, not silently drop."
+    );
+}
+
+#[test]
+fn tcp_rate_limit_emits_rst() {
+    // 5 conn/s allowance; 10 attempts.
+    let mut stack = SlirpBackend::with_security(64, 5, &[], &[]).unwrap();
+    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
+    let host_port = listener.local_addr().unwrap().port();
+
+    let mut rsts = 0;
+    for i in 0..10 {
+        stack
+            .process_guest_frame(&build_tcp_frame(
+                SLIRP_GATEWAY_IP,
+                GUEST_EPHEMERAL_PORT + i as u16,
+                host_port,
+                1000,
+                0,
+                TcpControl::Syn,
+                &[],
+            ))
+            .unwrap();
+        for f in drain_n(&mut stack, 2) {
+            if let Some((_, _, ctrl, _)) = parse_tcp_to_guest(&f) {
+                if ctrl == TcpControl::Rst {
+                    rsts += 1;
+                }
+            }
+        }
+    }
+    assert!(rsts >= 4, "expected ≥4 RSTs from rate limit, saw {rsts}");
+    drop(listener);
+}
+
+#[test]
+fn tcp_max_concurrent_emits_rst() {
+    let mut stack = SlirpBackend::with_security(2, 1000, &[], &[]).unwrap();
+    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
+    let host_port = listener.local_addr().unwrap().port();
+
+    // Open 4 distinct connections; cap is 2.
+    let mut rsts = 0;
+    for i in 0..4 {
+        stack
+            .process_guest_frame(&build_tcp_frame(
+                SLIRP_GATEWAY_IP,
+                GUEST_EPHEMERAL_PORT + i,
+                host_port,
+                1000,
+                0,
+                TcpControl::Syn,
+                &[],
+            ))
+            .unwrap();
+        for f in drain_n(&mut stack, 2) {
+            if let Some((_, _, ctrl, _)) = parse_tcp_to_guest(&f) {
+                if ctrl == TcpControl::Rst {
+                    rsts += 1;
+                }
+            }
+        }
+    }
+    assert!(rsts >= 1, "expected RST after concurrent limit, saw {rsts}");
+    drop(listener);
+}
+
+#[test]
+fn tcp_deny_list_emits_rst() {
+    // `with_security` takes `&[String]`; parse via `Ipv4Net` to validate the
+    // CIDR at compile-check time, then convert to the expected string form.
+    let deny_cidr: Ipv4Net = "169.254.169.254/32".parse().unwrap();
+    let deny_strings = [deny_cidr.to_string()];
+    let mut stack = SlirpBackend::with_security(64, 1000, &deny_strings, &[]).unwrap();
+
+    stack
+        .process_guest_frame(&build_tcp_frame(
+            Ipv4Address::new(169, 254, 169, 254),
+            GUEST_EPHEMERAL_PORT,
+            80,
+            1000,
+            0,
+            TcpControl::Syn,
+            &[],
+        ))
+        .unwrap();
+    let rst = drain_n(&mut stack, 2)
+        .into_iter()
+        .find_map(|f| parse_tcp_to_guest(&f))
+        .map(|(_, _, ctrl, _)| ctrl == TcpControl::Rst);
+    assert_eq!(rst, Some(true), "deny-list IP must get RST");
+}
+
+/// Builds an ARP request Ethernet frame from the guest asking "who has
+/// `target_ip`?". The sender is the guest MAC/IP; target hardware address
+/// is zeroed as per ARP request convention.
+fn build_arp_request(target_ip: Ipv4Address) -> Vec<u8> {
+    let arp_repr = ArpRepr::EthernetIpv4 {
+        operation: ArpOperation::Request,
+        source_hardware_addr: EthernetAddress(GUEST_MAC),
+        source_protocol_addr: SLIRP_GUEST_IP,
+        target_hardware_addr: EthernetAddress([0; 6]),
+        target_protocol_addr: target_ip,
+    };
+    let eth_repr = EthernetRepr {
+        src_addr: EthernetAddress(GUEST_MAC),
+        dst_addr: EthernetAddress([0xff; 6]),
+        ethertype: EthernetProtocol::Arp,
+    };
+    let total = ETH_HDR_LEN + arp_repr.buffer_len();
+    let mut buf = vec![0u8; total];
+    let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth_repr.emit(&mut eth);
+    let mut arp = ArpPacket::new_unchecked(&mut buf[ETH_HDR_LEN..]);
+    arp_repr.emit(&mut arp);
+    buf
+}
+
+/// Parses an Ethernet frame as an ARP reply.
+///
+/// Returns `Some((source_hardware_addr, source_protocol_addr))` when the
+/// frame carries an ARP reply opcode, `None` otherwise.
+fn parse_arp_reply(frame: &[u8]) -> Option<(EthernetAddress, Ipv4Address)> {
+    let eth = EthernetFrame::new_checked(frame).ok()?;
+    if eth.ethertype() != EthernetProtocol::Arp {
+        return None;
+    }
+    let arp = ArpPacket::new_checked(eth.payload()).ok()?;
+    let repr = ArpRepr::parse(&arp).ok()?;
+    if let ArpRepr::EthernetIpv4 {
+        operation: ArpOperation::Reply,
+        source_hardware_addr,
+        source_protocol_addr,
+        ..
+    } = repr
+    {
+        Some((source_hardware_addr, source_protocol_addr))
+    } else {
+        None
+    }
+}
+
+#[test]
+fn arp_replies_for_gateway() {
+    let mut stack = SlirpBackend::new().unwrap();
+    stack
+        .process_guest_frame(&build_arp_request(SLIRP_GATEWAY_IP))
+        .unwrap();
+    let reply = drain_n(&mut stack, 2)
+        .into_iter()
+        .find_map(|f| parse_arp_reply(&f))
+        .expect("arp reply for gateway");
+    assert_eq!(reply.1, SLIRP_GATEWAY_IP);
+    assert_eq!(reply.0, EthernetAddress(GATEWAY_MAC));
+}
+
+#[test]
+fn arp_replies_for_random_subnet_ip() {
+    let mut stack = SlirpBackend::new().unwrap();
+    stack
+        .process_guest_frame(&build_arp_request(Ipv4Address::new(10, 0, 2, 99)))
+        .unwrap();
+    let reply = drain_n(&mut stack, 2)
+        .into_iter()
+        .find_map(|f| parse_arp_reply(&f))
+        .expect("arp reply for in-subnet IP");
+    assert_eq!(reply.0, EthernetAddress(GATEWAY_MAC));
+}
+
+#[test]
+fn arp_does_not_reply_for_guest_ip() {
+    let mut stack = SlirpBackend::new().unwrap();
+    stack
+        .process_guest_frame(&build_arp_request(SLIRP_GUEST_IP))
+        .unwrap();
+    let reply = drain_n(&mut stack, 2)
+        .into_iter()
+        .find_map(|f| parse_arp_reply(&f));
+    assert!(reply.is_none(), "stack must not claim guest's own IP");
+}
+
+/// Wire-format label for `example.com`, used in DNS query frames.
+///
+/// Encoded as a DNS QNAME: each label is prefixed by its byte length,
+/// terminated by a zero-length label. This is the representation that
+/// goes directly into the DNS question section.
+const QNAME_EXAMPLE_COM: &[u8] = b"\x07example\x03com\x00";
+
+/// Builds a minimal DNS query UDP Ethernet frame from the guest to `SLIRP_DNS_IP`.
+///
+/// `xid` is placed in the transaction-ID field. `qname` must be a
+/// fully-encoded DNS name (length-prefixed labels, zero terminator).
+/// The question section requests an A record (`QTYPE=1`, `QCLASS=1`).
+///
+/// Unlike `build_udp_frame` (which carries a pre-existing off-by-one in
+/// the `payload_len` argument passed to `udp_repr.emit`), this helper
+/// passes only the DNS payload length so the UDP `len` field is correct
+/// and the stack's smoltcp parser accepts the frame.
+fn build_dns_query(xid: u16, qname: &[u8]) -> Vec<u8> {
+    // DNS message layout:
+    //   2B  transaction ID
+    //   2B  flags (standard query, RD=1)
+    //   2B  QDCOUNT = 1
+    //   2B  ANCOUNT = 0
+    //   2B  NSCOUNT = 0
+    //   2B  ARCOUNT = 0
+    //  ..B  QNAME (length-label encoded, zero terminated)
+    //   2B  QTYPE  = 1  (A)
+    //   2B  QCLASS = 1  (IN)
+    let mut dns_payload = Vec::new();
+    dns_payload.extend_from_slice(&xid.to_be_bytes());
+    dns_payload.extend_from_slice(&0x0100u16.to_be_bytes()); // flags: RD=1
+    dns_payload.extend_from_slice(&1u16.to_be_bytes()); // QDCOUNT
+    dns_payload.extend_from_slice(&0u16.to_be_bytes()); // ANCOUNT
+    dns_payload.extend_from_slice(&0u16.to_be_bytes()); // NSCOUNT
+    dns_payload.extend_from_slice(&0u16.to_be_bytes()); // ARCOUNT
+    dns_payload.extend_from_slice(qname);
+    dns_payload.extend_from_slice(&1u16.to_be_bytes()); // QTYPE  A
+    dns_payload.extend_from_slice(&1u16.to_be_bytes()); // QCLASS IN
+
+    // Build the Ethernet frame manually so we can pass the correct
+    // `payload_len` (DNS payload only) to `udp_repr.emit`.
+    let udp_repr = UdpRepr {
+        src_port: GUEST_EPHEMERAL_PORT,
+        dst_port: 53,
+    };
+    let ip_repr = Ipv4Repr {
+        src_addr: SLIRP_GUEST_IP,
+        dst_addr: SLIRP_DNS_IP,
+        next_header: IpProtocol::Udp,
+        payload_len: UDP_HDR_LEN + dns_payload.len(),
+        hop_limit: 64,
+    };
+    let eth_repr = EthernetRepr {
+        src_addr: EthernetAddress(GUEST_MAC),
+        dst_addr: EthernetAddress(GATEWAY_MAC),
+        ethertype: EthernetProtocol::Ipv4,
+    };
+    let total = ETH_HDR_LEN + ip_repr.buffer_len() + UDP_HDR_LEN + dns_payload.len();
+    let mut buf = vec![0u8; total];
+    let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth_repr.emit(&mut eth);
+    let mut ip = Ipv4Packet::new_unchecked(&mut buf[ETH_HDR_LEN..]);
+    ip_repr.emit(&mut ip, &Default::default());
+    let mut udp = UdpPacket::new_unchecked(&mut buf[ETH_HDR_LEN + ip_repr.buffer_len()..]);
+    udp_repr.emit(
+        &mut udp,
+        &IpAddress::Ipv4(SLIRP_GUEST_IP),
+        &IpAddress::Ipv4(SLIRP_DNS_IP),
+        dns_payload.len(), // payload length only, not header+payload
+        |b| b.copy_from_slice(&dns_payload),
+        &Default::default(),
+    );
+    buf
+}
+
+/// Parses an Ethernet frame emitted by the stack and returns the DNS
+/// transaction ID (XID) if the frame is a UDP datagram addressed to
+/// the guest on port `GUEST_EPHEMERAL_PORT` with a plausible DNS
+/// header (≥ 12 bytes of DNS payload).
+///
+/// Returns `None` for any frame that does not match those criteria.
+fn parse_dns_reply_xid(frame: &[u8]) -> Option<u16> {
+    let eth = EthernetFrame::new_checked(frame).ok()?;
+    if eth.ethertype() != EthernetProtocol::Ipv4 {
+        return None;
+    }
+    let ip = Ipv4Packet::new_checked(eth.payload()).ok()?;
+    if ip.next_header() != IpProtocol::Udp || ip.dst_addr() != SLIRP_GUEST_IP {
+        return None;
+    }
+    let udp = UdpPacket::new_checked(ip.payload()).ok()?;
+    if udp.dst_port() != GUEST_EPHEMERAL_PORT {
+        return None;
+    }
+    let dns_payload = udp.payload();
+    if dns_payload.len() < 12 {
+        return None;
+    }
+    Some(u16::from_be_bytes([dns_payload[0], dns_payload[1]]))
+}
+
+#[test]
+fn dns_query_resolves() {
+    let mut stack = match SlirpBackend::new() {
+        Ok(s) => s,
+        Err(e) => {
+            eprintln!("skip: SlirpBackend::new() failed ({e}), no DNS available");
+            return;
+        }
+    };
+
+    let query = build_dns_query(0x1234, QNAME_EXAMPLE_COM);
+    if let Err(e) = stack.process_guest_frame(&query) {
+        eprintln!("skip: process_guest_frame failed ({e})");
+        return;
+    }
+
+    let mut reply_xid: Option<u16> = None;
+    for _ in 0..20 {
+        for frame in stack.poll() {
+            if let Some(xid) = parse_dns_reply_xid(&frame) {
+                reply_xid = Some(xid);
+            }
+        }
+        if reply_xid.is_some() {
+            break;
+        }
+        std::thread::sleep(std::time::Duration::from_millis(100));
+    }
+
+    match reply_xid {
+        Some(xid) => assert_eq!(xid, 0x1234, "reply XID must match query XID"),
+        None => {
+            eprintln!("skip: no DNS reply in 20×100 ms, upstream resolver unreachable");
+        }
+    }
+}
+
+#[test]
+fn dns_cache_keys_by_question_not_xid() {
+    let mut stack = match SlirpBackend::new() {
+        Ok(s) => s,
+        Err(e) => {
+            eprintln!("skip: SlirpBackend::new() failed ({e}), no DNS available");
+            return;
+        }
+    };
+
+    // Warm the cache with xid=1.
+    let warm_query = build_dns_query(0x0001, QNAME_EXAMPLE_COM);
+    if let Err(e) = stack.process_guest_frame(&warm_query) {
+        eprintln!("skip: warm query process_guest_frame failed ({e})");
+        return;
+    }
+    let mut warmed = false;
+    for _ in 0..20 {
+        for frame in stack.poll() {
+            if let Some(xid) = parse_dns_reply_xid(&frame) {
+                if xid == 0x0001 {
+                    warmed = true;
+                }
+            }
+        }
+        if warmed {
+            break;
+        }
+        std::thread::sleep(std::time::Duration::from_millis(100));
+    }
+    if !warmed {
+        eprintln!("skip: cache warm-up timed out, upstream resolver unreachable");
+        return;
+    }
+
+    // Now query with xid=2; the cache must rewrite the reply XID to 2.
+    let second_query = build_dns_query(0x0002, QNAME_EXAMPLE_COM);
+    if let Err(e) = stack.process_guest_frame(&second_query) {
+        eprintln!("skip: second query process_guest_frame failed ({e})");
+        return;
+    }
+    let mut reply_xid: Option<u16> = None;
+    for _ in 0..20 {
+        for frame in stack.poll() {
+            if let Some(xid) = parse_dns_reply_xid(&frame) {
+                reply_xid = Some(xid);
+            }
+        }
+        if reply_xid.is_some() {
+            break;
+        }
+        std::thread::sleep(std::time::Duration::from_millis(100));
+    }
+
+    match reply_xid {
+        Some(xid) => assert_eq!(xid, 0x0002, "cache must rewrite XID to match the new query"),
+        None => {
+            eprintln!("skip: no reply for second query in 20×100 ms");
+        }
+    }
+}
+
+/// BROKEN_ON_PURPOSE pin (now passing): arbitrary UDP (any destination
+/// port, not just 53) round-trips through the per-flow connected-socket
+/// NAT.
+#[test]
+fn udp_non_dns_round_trips() {
+    let host_sock = UdpSocket::bind("127.0.0.1:0").unwrap();
+    let host_port = host_sock.local_addr().unwrap().port();
+    host_sock
+        .set_read_timeout(Some(std::time::Duration::from_millis(500)))
+        .unwrap();
+
+    let mut stack = SlirpBackend::new().unwrap();
+
+    // Guest → gateway:host_port (translated to 127.0.0.1:host_port).
+    stack
+        .process_guest_frame(&build_udp_frame(
+            SLIRP_GATEWAY_IP,
+            GUEST_EPHEMERAL_PORT,
+            host_port,
+            b"hello",
+        ))
+        .unwrap();
+    let _ = drain_n(&mut stack, 4);
+
+    // Host receives the datagram.
+    let mut buf = [0u8; 32];
+    let (n, peer) = host_sock
+        .recv_from(&mut buf)
+        .expect("host receives guest UDP");
+    assert_eq!(&buf[..n], b"hello");
+
+    // Host echoes back.
+    host_sock.send_to(&buf[..n], peer).unwrap();
+
+    // Drain — guest should see the reply on its source port.
+    let mut saw_reply = false;
+    for _ in 0..20 {
+        for f in drain_n(&mut stack, 1) {
+            let Some(eth) = EthernetFrame::new_checked(f.as_slice()).ok() else {
+                continue;
+            };
+            if eth.ethertype() != EthernetProtocol::Ipv4 {
+                continue;
+            }
+            let Some(ip) = Ipv4Packet::new_checked(eth.payload()).ok() else {
+                continue;
+            };
+            if ip.next_header() != IpProtocol::Udp {
+                continue;
+            }
+            let Some(udp_pkt) = UdpPacket::new_checked(ip.payload()).ok() else {
+                continue;
+            };
+            if udp_pkt.dst_port() == GUEST_EPHEMERAL_PORT && udp_pkt.payload() == b"hello" {
+                saw_reply = true;
+                break;
+            }
+        }
+        if saw_reply {
+            break;
+        }
+        std::thread::sleep(std::time::Duration::from_millis(50));
+    }
+    assert!(saw_reply, "guest must receive UDP reply via per-flow NAT");
+}
+
+/// BROKEN_ON_PURPOSE pin (now passing): the guest receives an ICMP echo
+/// reply via the host's unprivileged `IPPROTO_ICMP SOCK_DGRAM` socket.
+///
+/// Skips gracefully if `net.ipv4.ping_group_range` forbids unprivileged
+/// ICMP for the calling GID — in that environment the warn-once log
+/// fires and the SLIRP stack drops ICMP, which is the documented
+/// fallback (see `slirp.rs::ICMP_PROBE`).
+#[test]
+fn icmp_echo_returns_reply() {
+    use smoltcp::wire::{Icmpv4Packet, Icmpv4Repr};
+
+    // Probe whether unprivileged ICMP is permitted on this host. If not,
+    // skip gracefully — the SLIRP stack falls back to silently dropping
+    // ICMP in that environment (see slirp.rs::ICMP_PROBE).
+    let probe_fd = unsafe { libc::socket(libc::AF_INET, libc::SOCK_DGRAM, libc::IPPROTO_ICMP) };
+    if probe_fd < 0 {
+        let err = std::io::Error::last_os_error();
+        let raw = err.raw_os_error().unwrap_or(0);
+        if raw == libc::EPERM || raw == libc::EACCES {
+            eprintln!("skip: unprivileged ICMP forbidden ({err}); see net.ipv4.ping_group_range");
+            return;
+        }
+        panic!("unexpected ICMP probe error: {err}");
+    }
+    unsafe { libc::close(probe_fd) };
+
+    let icmp_repr = Icmpv4Repr::EchoRequest {
+        ident: 0xbeef,
+        seq_no: 1,
+        data: b"ping",
+    };
+    let ip_repr = Ipv4Repr {
+        src_addr: SLIRP_GUEST_IP,
+        // 127.0.0.1 — the host kernel always replies on loopback.
+        dst_addr: Ipv4Address::new(127, 0, 0, 1),
+        next_header: IpProtocol::Icmp,
+        payload_len: icmp_repr.buffer_len(),
+        hop_limit: 64,
+    };
+    let eth_repr = EthernetRepr {
+        src_addr: EthernetAddress(GUEST_MAC),
+        dst_addr: EthernetAddress(GATEWAY_MAC),
+        ethertype: EthernetProtocol::Ipv4,
+    };
+    let total = ETH_HDR_LEN + ip_repr.buffer_len() + icmp_repr.buffer_len();
+    let mut buf = vec![0u8; total];
+    let mut eth = EthernetFrame::new_unchecked(&mut buf[..]);
+    eth_repr.emit(&mut eth);
+    let mut ip = Ipv4Packet::new_unchecked(&mut buf[ETH_HDR_LEN..]);
+    ip_repr.emit(&mut ip, &Default::default());
+    let mut icmp = Icmpv4Packet::new_unchecked(&mut buf[ETH_HDR_LEN + ip_repr.buffer_len()..]);
+    icmp_repr.emit(&mut icmp, &Default::default());
+
+    let mut stack = match SlirpBackend::new() {
+        Ok(s) => s,
+        Err(_) => {
+            eprintln!("skip: SlirpBackend::new failed");
+            return;
+        }
+    };
+    if stack.process_guest_frame(&buf).is_err() {
+        eprintln!("skip: process_guest_frame failed (likely no ICMP support)");
+        return;
+    }
+
+    // Poll up to 20 × 50ms for the reply.
+    let mut saw_reply = false;
+    for _ in 0..20 {
+        for f in drain_n(&mut stack, 1) {
+            let Some(eth) = EthernetFrame::new_checked(f.as_slice()).ok() else {
+                continue;
+            };
+            if eth.ethertype() != EthernetProtocol::Ipv4 {
+                continue;
+            }
+            let Some(ip) = Ipv4Packet::new_checked(eth.payload()).ok() else {
+                continue;
+            };
+            if ip.next_header() == IpProtocol::Icmp && ip.dst_addr() == SLIRP_GUEST_IP {
+                saw_reply = true;
+                break;
+            }
+        }
+        if saw_reply {
+            break;
+        }
+        std::thread::sleep(std::time::Duration::from_millis(50));
+    }
+
+    assert!(
+        saw_reply,
+        "guest must receive ICMP echo reply via host IPPROTO_ICMP socket"
+    );
+}
+
+#[test]
+fn slirp_backend_implements_network_backend() {
+    fn assert_send<T: Send>() {}
+    fn assert_backend<T: NetworkBackend>() {}
+    assert_send::<SlirpBackend>();
+    assert_backend::<SlirpBackend>();
+}
+
+#[test]
+fn nat_translate_outbound_loopback_rewrite() {
+    let rules = Rules {
+        gateway_loopback: true,
+        deny_cidrs: vec![],
+        port_forwards: vec![],
+    };
+    let result = translate_outbound(&rules, SLIRP_GATEWAY_IP, 80, SLIRP_GATEWAY_IP).unwrap();
+    assert_eq!(
+        result,
+        SocketAddr::from((Ipv4Addr::LOCALHOST, 80)),
+        "gateway IP must be rewritten to 127.0.0.1 when gateway_loopback=true"
+    );
+}
+
+#[test]
+fn nat_translate_outbound_unmodified_external_ip() {
+    let rules = Rules {
+        gateway_loopback: true,
+        deny_cidrs: vec![],
+        port_forwards: vec![],
+    };
+    let external = Ipv4Address::new(8, 8, 8, 8);
+    let result = translate_outbound(&rules, external, 53, SLIRP_GATEWAY_IP).unwrap();
+    assert_eq!(
+        result,
+        SocketAddr::from((Ipv4Addr::new(8, 8, 8, 8), 53)),
+        "non-gateway IPs must pass through unchanged"
+    );
+}
+
+/// E2E contract for inbound port-forwarding.
+///
+/// Builds a `SlirpBackend` with one TCP port-forward rule
+/// (`HOST_PORT` → `GUEST_PORT`), has a host thread connect to
+/// `127.0.0.1:HOST_PORT`, then drives `drain_to_guest` and
+/// synthesizes a guest TCP listener by responding with SYN-ACK to
+/// the synthesized SYN the stack emits.
+///
+/// The test asserts **three** contract points, each covering a distinct
+/// 5.5b sub-task:
+///
+/// 1. `host TcpStream::connect` **succeeds** — the listener thread
+///    (5.5b.3) is bound and accepts incoming connections.
+/// 2. `drain_to_guest` **emits a synthesized SYN** to `GUEST_PORT` —
+///    `process_pending_inbound_accepts` (5.5b.3) dequeues the
+///    `InboundAccept` and `synthesize_inbound_syn` (5.5b.2) emits the
+///    SYN frame; `with_security` (5.5b.4) wired the channel.
+/// 3. After the synthetic guest replies with SYN-ACK, `drain_to_guest`
+///    **emits an ACK frame** — the `SynSent → Established` arm (5.5b.1)
+///    fired and the handshake completed end-to-end.
+///
+/// Byte-level round-trip is deferred — connect + full 3WH completion
+/// is the minimum contract for the listener implementation.
+#[test]
+fn tcp_port_forward_inbound_connect_succeeds() {
+    use std::sync::mpsc;
+    use std::time::{Duration, Instant};
+
+    const HOST_PORT: u16 = 18080;
+    const GUEST_PORT: u16 = 8080;
+    const GUEST_ISN: u32 = 5000;
+
+    let mut stack = SlirpBackend::with_security(64, 1000, &[], &[(HOST_PORT, GUEST_PORT)])
+        .expect("build stack with port-forward rule");
+
+    // ── Contract 1: listener thread is bound and accepts connections ─────
+    // Spawn the host connector in a background thread so it doesn't block
+    // the test thread. The OS-level SYN/SYN-ACK/ACK between host connector
+    // and the listener socket is handled by the kernel; the SLIRP stack
+    // is not involved in that handshake.
+    let (tx, rx) = mpsc::channel::<std::io::Result<std::net::TcpStream>>();
+    std::thread::spawn(move || {
+        let result = std::net::TcpStream::connect_timeout(
+            &format!("127.0.0.1:{HOST_PORT}").parse().unwrap(),
+            Duration::from_secs(5),
+        );
+        let _ = tx.send(result);
+    });
+
+    // ── Contract 2 + 3: drain until we see the synthesized SYN (2) and ──
+    // then the ACK that completes the inbound 3WH (3).
+    let deadline = Instant::now() + Duration::from_secs(5);
+    let mut saw_synthesized_syn = false;
+    let mut saw_ack_after_synack = false;
+    let mut connect_result: Option<std::io::Result<std::net::TcpStream>> = None;
+
+    while Instant::now() < deadline
+        && (!saw_synthesized_syn || !saw_ack_after_synack || connect_result.is_none())
+    {
+        let mut out = Vec::new();
+        stack.drain_to_guest(&mut out);
+
+        let mut high_port_for_ack: Option<u16> = None;
+
+        for frame in &out {
+            let Some((syn_seq, _ack, src_port, dst_port, ctrl)) = parse_tcp_to_guest_full(frame)
+            else {
+                continue;
+            };
+
+            // Contract 2: synthesized SYN arriving at the guest.
+            if ctrl == TcpControl::Syn && dst_port == GUEST_PORT && !saw_synthesized_syn {
+                saw_synthesized_syn = true;
+                high_port_for_ack = Some(src_port);
+
+                // Synthetic guest listener replies with SYN-ACK.
+                // build_tcp_frame: src=SLIRP_GUEST_IP, dst=SLIRP_GATEWAY_IP
+                let syn_ack = build_tcp_frame(
+                    SLIRP_GATEWAY_IP, // dst from guest's perspective
+                    GUEST_PORT,       // guest service port (src_port in frame)
+                    src_port,         // high_port (dst_port in frame)
+                    GUEST_ISN,        // guest's own ISN
+                    syn_seq + 1,      // ack = their SYN seq + 1
+                    TcpControl::Syn,  // SYN+ACK: ack_number is non-zero
+                    &[],
+                );
+                stack
+                    .process_guest_frame(&syn_ack)
+                    .expect("process synthetic SYN-ACK");
+            }
+
+            // Contract 3: ACK back to the guest completing the inbound 3WH.
+            // After processing our SYN-ACK, the stack emits a plain ACK
+            // (ctrl=None, ack set) directed at GUEST_PORT.
+            if ctrl == TcpControl::None
+                && dst_port == GUEST_PORT
+                && high_port_for_ack == Some(src_port)
+            {
+                saw_ack_after_synack = true;
+            }
+        }
+
+        // A second drain pass so the stack processes the SYN-ACK we just
+        // injected and emits its ACK in the same iteration.
+        let mut ack_out = Vec::new();
+        stack.drain_to_guest(&mut ack_out);
+        for frame in &ack_out {
+            let Some((_seq, _ack, src_port, dst_port, ctrl)) = parse_tcp_to_guest_full(frame)
+            else {
+                continue;
+            };
+            if ctrl == TcpControl::None
+                && dst_port == GUEST_PORT
+                && high_port_for_ack == Some(src_port)
+            {
+                saw_ack_after_synack = true;
+            }
+        }
+
+        if let Ok(r) = rx.try_recv() {
+            connect_result = Some(r);
+        }
+
+        std::thread::sleep(Duration::from_millis(10));
+    }
+
+    // Contract 1.
+    let connect_result =
+        connect_result.expect("host TcpStream::connect did not complete within 5 s");
+    let _stream = connect_result.expect("host TcpStream::connect failed");
+
+    // Contract 2.
+    assert!(
+        saw_synthesized_syn,
+        "drain_to_guest must emit a synthesized SYN to GUEST_PORT \
+         after drain_to_guest processes the InboundAccept (5.5b.2/5.5b.3)"
+    );
+
+    // Contract 3.
+    assert!(
+        saw_ack_after_synack,
+        "drain_to_guest must emit an ACK completing the inbound 3-way handshake \
+         after the synthetic guest SYN-ACK is processed (5.5b.1)"
+    );
+}
+
+/// Richer TCP-to-guest frame parser that also returns src/dst ports.
+///
+/// Returns `(seq, ack, src_port, dst_port, control)` for any IPv4/TCP
+/// frame whose destination is `SLIRP_GUEST_IP`, or `None` for anything
+/// else.  Used by `tcp_port_forward_inbound_connect_succeeds` to identify
+/// the synthesized SYN and extract the ephemeral `high_port`.
+fn parse_tcp_to_guest_full(frame: &[u8]) -> Option<(u32, u32, u16, u16, TcpControl)> {
+    let eth = EthernetFrame::new_checked(frame).ok()?;
+    if eth.ethertype() != EthernetProtocol::Ipv4 {
+        return None;
+    }
+    let ip = Ipv4Packet::new_checked(eth.payload()).ok()?;
+    if ip.next_header() != IpProtocol::Tcp || ip.dst_addr() != SLIRP_GUEST_IP {
+        return None;
+    }
+    let tcp = TcpPacket::new_checked(ip.payload()).ok()?;
+    let control = match (tcp.syn(), tcp.fin(), tcp.rst(), tcp.psh()) {
+        (false, false, false, false) => TcpControl::None,
+        (false, false, false, true) => TcpControl::Psh,
+        (true, false, false, _) => TcpControl::Syn,
+        (false, true, false, _) => TcpControl::Fin,
+        (false, false, true, _) => TcpControl::Rst,
+        _ => return None,
+    };
+    Some((
+        tcp.seq_number().0 as u32,
+        tcp.ack_number().0 as u32,
+        tcp.src_port(),
+        tcp.dst_port(),
+        control,
+    ))
+}
+
+#[test]
+fn nat_translate_outbound_deny_list() {
+    let rules = Rules {
+        gateway_loopback: true,
+        deny_cidrs: vec!["169.254.0.0/16".parse::<Ipv4Net>().unwrap()],
+        port_forwards: vec![],
+    };
+    let metadata = Ipv4Address::new(169, 254, 169, 254);
+    assert!(
+        translate_outbound(&rules, metadata, 80, SLIRP_GATEWAY_IP).is_none(),
+        "deny-listed IP must return None"
+    );
+
+    // Adjacent (non-denied) IP still passes.
+    let public = Ipv4Address::new(169, 253, 0, 1);
+    assert!(
+        translate_outbound(&rules, public, 80, SLIRP_GATEWAY_IP).is_some(),
+        "IPs outside deny CIDR must pass"
+    );
+}
+
+/// Snapshot/restore must rebuild the epoll dispatch from `flow_table`
+/// contents.  The `epoll_fd` is a kernel handle that does not survive
+/// snapshot; a fresh dispatcher starts with zero registered FDs even
+/// though `flow_table` may contain entries with live host sockets.
+///
+/// This smoke test verifies the rebuild path end-to-end:
+/// 1. Insert a synthetic TCP flow into the flow table.
+/// 2. Reset the epoll dispatcher to a fresh empty one (simulating what
+///    snapshot restore does: the kernel handle is gone, a new one is created).
+/// 3. Confirm the pre-rebuild count is zero.
+/// 4. Call `rebuild_epoll_from_flow_table`.
+/// 5. Confirm the post-rebuild count is one.
+///
+/// Gated on `bench-helpers` because it consumes synthetic-injection helpers
+/// (`insert_synthetic_synsent_entry`, `reset_epoll_for_snapshot_test`,
+/// `registered_fd_count`) that are only visible to external test/bench
+/// consumers when that feature is enabled.  Default `cargo test` skips this
+/// pin; CI runs it via `cargo test --features bench-helpers`.
+#[cfg(feature = "bench-helpers")]
+#[test]
+fn epoll_set_rebuilt_from_flow_table_smoke() {
+    use std::net::TcpListener;
+
+    let mut backend = SlirpBackend::new().expect("backend");
+
+    let listener = TcpListener::bind("127.0.0.1:0").expect("bind");
+    let host_stream =
+        std::net::TcpStream::connect(listener.local_addr().unwrap()).expect("connect");
+    host_stream.set_nonblocking(true).ok();
+
+    // Insert a synthetic flow (may or may not register with epoll depending on
+    // cfg context).  Then reset the epoll dispatcher to a fresh empty one —
+    // this is the key step that simulates what happens after snapshot restore:
+    // the kernel-side `epoll_fd` does not survive, so a new one is created
+    // with zero registrations even though `flow_table` has live entries.
+    backend.insert_synthetic_synsent_entry(8080, 49152, 1000, host_stream);
+    backend.reset_epoll_for_snapshot_test();
+
+    let before = backend.registered_fd_count();
+    assert_eq!(
+        before, 0,
+        "after reset, epoll must have zero registered FDs (simulates post-snapshot state)"
+    );
+
+    backend.rebuild_epoll_from_flow_table();
+
+    let after = backend.registered_fd_count();
+    assert_eq!(
+        after, 1,
+        "rebuild_epoll_from_flow_table must register all live flow FDs"
+    );
+}