diff --git a/docs/superpowers/plans/2026-04-30-smoltcp-passt-port-phase6.1.md b/docs/superpowers/plans/2026-04-30-smoltcp-passt-port-phase6.1.md new file mode 100644 index 00000000..ea74d7fc --- /dev/null +++ b/docs/superpowers/plans/2026-04-30-smoltcp-passt-port-phase6.1.md @@ -0,0 +1,525 @@ +# Phase 6.1: TCP Half-Close Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Wire `TcpNatState`'s currently-unused `FinWait1`, `CloseWait`, `LastAck` variants into the SLIRP relay so guest `shutdown(SHUT_WR)` mid-write doesn't lose tail data. + +**Severity:** High — silent data loss on every protocol that uses orderly half-close (HTTP request bodies, SMTP DATA, anything with `shutdown(SHUT_WR)`). + +**Architecture:** Replace the current "guest FIN → immediate close" short-circuit with a five-state machine driven by guest FIN events and host-side EOF on `host_stream`. Add a `LAST_ACK_TIMEOUT` of 60 s (TCP MSL×2) so missing final ACKs don't leak entries. + +**Tech stack:** Existing `TcpNatEntry` + `TcpNatState` types in `src/network/slirp.rs`. No new dependencies. + +--- + +## Background + +`TcpNatState` (`src/network/slirp.rs:131-144`) declares `FinWait1`, `FinWait2`, `CloseWait`, `LastAck` but the implementation only ever uses `Syn*`, `Established`, `Closed`. The enum carries `#[allow(dead_code)]` (line 130) to mute warnings. + +Guest FIN handler (`src/network/slirp.rs:1657-1676`): + +```rust +if tcp.fin() { + entry.guest_ack = seq.wrapping_add(1); + let fin_ack_frame = build_tcp_packet_static(..., TcpControl::Fin, &[]); + self.inject_to_guest.push(fin_ack_frame); + entry.our_seq = entry.our_seq.wrapping_add(1); + entry.state = TcpNatState::Closed; // ← LOSES IN-FLIGHT HOST DATA + self.pending_close.push(flow_key); // ← FORCES IMMEDIATE REAP + return Ok(()); +} +``` + +This pushes the host-side `TcpStream` into `pending_close`, which `relay_tcp_nat_data` reaps the same tick. The host's pending write data never makes it back to the guest. + +## Target state machine + +``` + guest FIN (we ACK + shutdown(Write)) + Established ──────────────────────────────────────────► FinWait1 + │ │ + │ host EOF (we send FIN) │ host EOF (we send FIN) + ▼ ▼ + CloseWait LastAck + │ guest FIN (we ACK) │ guest's final ACK + ▼ │ — or — + LastAck ◄──────────────────────────────────────────────────── │ LAST_ACK_TIMEOUT (60 s) + │ │ + │ guest's final ACK / LAST_ACK_TIMEOUT ▼ + └─────────────────────────────────► Closed ◄──────────────┘ + │ + ▼ unregister from epoll + flow_table + .remove() +``` + +We collapse `FinWait2` (in real TCP, "we sent FIN, peer ACKed our FIN, peer hasn't sent its own FIN yet"). Since we don't observe per-segment ACKs from the kernel — only data + EOF — we treat `FinWait1` as continuing until host EOF arrives, then jump straight to `LastAck`. + +## Invariants (carried) + +1. All-Rust path. No new crate deps. +2. Full observability — every state transition logs at `trace!` or `debug!`. +3. Cross-platform discipline (Linux-only SLIRP unchanged). +4. No regression in Phase 0–5 + 5.5b + 6.4 baselines. `bench-compare.sh --baseline 47868f0 --skip-vm` enforced. +5. Snapshot/restore correctness — `snapshot_integration` continues to pass. New states must round-trip OR cleanly degrade to `Closed` within `LAST_ACK_TIMEOUT`. + +--- + +## File impact + +| File | Action | +|---|---| +| `src/network/slirp.rs` | Modify `TcpNatEntry` (add `last_state_change: Instant`), rewrite FIN handler, add CloseWait/LastAck transitions in `relay_tcp_nat_data`, add `LAST_ACK_TIMEOUT` reaping. Drop `#[allow(dead_code)]` on `TcpNatState`. | +| `tests/network_baseline.rs` | Three new pins. | +| Snapshot serde | Verify the new state transitions round-trip; if not, document the cleanup-on-restore behavior. | + +No public API change. + +--- + +## Tasks + +### Task 1: Failing pin — `tcp_half_close_guest_writes_first` + +Pure synthetic harness. Drives `SlirpBackend` directly without a VM. + +```rust +#[test] +fn tcp_half_close_guest_writes_first() { + use std::io::{Read, Write}; + use std::net::TcpListener; + + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let host_port = listener.local_addr().unwrap().port(); + + let server = std::thread::spawn(move || -> Vec { + let (mut sock, _) = listener.accept().unwrap(); + // Read until EOF (guest will send data + FIN). + let mut request = Vec::new(); + let _ = sock.read_to_end(&mut request); + // After guest's shutdown(WRITE), we still want to send response. + sock.write_all(b"HTTP/1.1 200 OK\r\n\r\nBODY").unwrap(); + // Then close. + drop(sock); + request + }); + + let mut stack = SlirpBackend::new().unwrap(); + + // Guest 3-way handshake. + let our_seq = 1000u32; + stack.process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, GUEST_EPHEMERAL_PORT, host_port, our_seq, 0, + TcpControl::Syn, &[], + )).unwrap(); + let mut gateway_seq = 0u32; + for f in drain_n(&mut stack, 4) { + if let Some((s, _, ctrl, _)) = parse_tcp_to_guest(&f) { + if matches!(ctrl, TcpControl::Syn) { gateway_seq = s; break; } + } + } + stack.process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, GUEST_EPHEMERAL_PORT, host_port, + our_seq + 1, gateway_seq + 1, TcpControl::None, &[], + )).unwrap(); + + // Guest sends "HELLO" data + FIN. + let request = b"HELLO"; + stack.process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, GUEST_EPHEMERAL_PORT, host_port, + our_seq + 1, gateway_seq + 1, TcpControl::Psh, request, + )).unwrap(); + stack.process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, GUEST_EPHEMERAL_PORT, host_port, + our_seq + 1 + request.len() as u32, gateway_seq + 1, + TcpControl::Fin, &[], + )).unwrap(); + + // Drive drain_to_guest until we see host's response data AND its FIN. + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5); + let mut response_bytes: Vec = Vec::new(); + let mut saw_host_fin = false; + while std::time::Instant::now() < deadline { + for f in drain_n(&mut stack, 1) { + if let Some((_, _, ctrl, payload_len)) = parse_tcp_to_guest(&f) { + // Extract payload bytes. + let eth = EthernetFrame::new_unchecked(f.as_slice()); + let ip = Ipv4Packet::new_unchecked(eth.payload()); + let tcp = TcpPacket::new_unchecked(ip.payload()); + if payload_len > 0 { + response_bytes.extend_from_slice(tcp.payload()); + } + if matches!(ctrl, TcpControl::Fin) { + saw_host_fin = true; + } + } + } + if !response_bytes.is_empty() && saw_host_fin { + break; + } + std::thread::sleep(std::time::Duration::from_millis(10)); + } + + let _ = server.join(); + + assert_eq!(&response_bytes[..], b"HTTP/1.1 200 OK\r\n\r\nBODY", + "guest must receive ALL host response data after sending FIN"); + assert!(saw_host_fin, "guest must receive host's FIN"); +} +``` + +Run: `cargo test --test network_baseline tcp_half_close_guest_writes_first`. Expected: **FAIL** — current code marks state=Closed on guest FIN; host's response data is dropped. + +**Commit:** `test(network): pin tcp_half_close_guest_writes_first (BROKEN_ON_PURPOSE)` + +--- + +### Task 2: Wire `FinWait1` transition + `shutdown(Write)` on guest FIN + +In `src/network/slirp.rs::handle_tcp_frame`, replace the FIN handler: + +```rust +// FIN from guest +if tcp.fin() { + debug!("SLIRP TCP: FIN from guest for {}:{}", dst_ip, dst_port); + entry.guest_ack = seq.wrapping_add(1); + + // ACK the guest's FIN — but don't send our own FIN yet. Host + // application may have data still to send. We transition to + // FinWait1 and shut down the host socket's write side so the + // host knows no more data is coming from the guest. + let ack_frame = build_tcp_packet_static( + dst_ip, SLIRP_GUEST_IP, dst_port, src_port, + entry.our_seq, entry.guest_ack, + TcpControl::None, &[], + ); + self.inject_to_guest.push(ack_frame); + + if let Err(e) = entry.host_stream.shutdown(std::net::Shutdown::Write) { + warn!("SLIRP TCP: shutdown(Write) failed on guest FIN, falling back \ + to immediate close: {}", e); + entry.state = TcpNatState::Closed; + self.pending_close.push(flow_key); + return Ok(()); + } + + entry.state = TcpNatState::FinWait1; + entry.last_state_change = Instant::now(); + trace!("SLIRP TCP: state Established → FinWait1 for {}:{}", dst_ip, dst_port); + return Ok(()); +} +``` + +Add `last_state_change: Instant` field to `TcpNatEntry` (used for `LAST_ACK_TIMEOUT` reaping): + +```rust +struct TcpNatEntry { + // ... existing fields ... + /// Wall clock when the entry's state last changed. Used by + /// LAST_ACK_TIMEOUT reaping in relay_tcp_nat_data so a missing + /// final ACK doesn't leak the entry forever. + last_state_change: Instant, +} +``` + +Initialize at every existing `TcpNatEntry { ... }` literal site (search for `TcpNatEntry {` in slirp.rs). Set to `Instant::now()` everywhere. + +Run: `cargo test --test network_baseline tcp_half_close_guest_writes_first`. Expected: **still FAIL** — host's FIN doesn't reach the guest yet (relay loop doesn't know about FinWait1). + +**Commit:** `feat(slirp): FinWait1 transition on guest FIN, host write-side shutdown` + +--- + +### Task 3: Wire `FinWait1 → LastAck` on host EOF + send our FIN to guest + +In `src/network/slirp.rs::relay_tcp_nat_data`, find the recv_peek `Ok(0)` arm (host EOF, ~line 1773 area): + +```rust +Ok(0) => { + // Host closed the connection. + debug!("SLIRP TCP: host EOF on flow guest_port={}", key.guest_src_port); + match entry.state { + TcpNatState::Established => { + // Host closed first → CloseWait. We send FIN to guest; + // guest may still send data which we'll forward to host + // (but host's write side may be closed — that's a guest + // write failure, not our concern). + entry.state = TcpNatState::CloseWait; + entry.last_state_change = Instant::now(); + became_closed = false; // no longer immediately reap + // Build FIN to guest below via the existing fin_frame branch. + } + TcpNatState::FinWait1 => { + // Guest closed first; now host has finished writing. + // Send FIN to guest, transition to LastAck. + entry.state = TcpNatState::LastAck; + entry.last_state_change = Instant::now(); + became_closed = false; + } + _ => { + // Already in a closing state or Closed. + became_closed = false; + } + } +} +``` + +Then update the FIN-emit logic just below (~line 1824): + +```rust +// Send FIN if we just transitioned to a state that demands one. +let needs_fin = matches!( + entry.state, + TcpNatState::CloseWait | TcpNatState::LastAck +); +if needs_fin && !entry.our_fin_sent { + fin_frame = Some(build_tcp_packet_static( + key.dst_ip, SLIRP_GUEST_IP, key.dst_port, key.guest_src_port, + entry.our_seq, entry.guest_ack, + TcpControl::Fin, &[], + )); + entry.our_seq = entry.our_seq.wrapping_add(1); + entry.our_fin_sent = true; + trace!("SLIRP TCP: sent FIN to guest, state={:?}", entry.state); +} +``` + +Add `our_fin_sent: bool` field to `TcpNatEntry` so we don't re-send FIN on repeat polls (every Established-side recv_peek would otherwise queue another FIN). + +Run: `cargo test --test network_baseline tcp_half_close_guest_writes_first`. Expected: **PASS** — host's response data + FIN now flow back through. + +**Commit:** `feat(slirp): FinWait1 → LastAck and Established → CloseWait on host EOF` + +--- + +### Task 4: Wire `CloseWait → LastAck` on guest FIN; `LastAck → Closed` on guest's final ACK + +Update `handle_tcp_frame` to handle FIN from a CloseWait entry: + +```rust +if tcp.fin() { + match entry.state { + TcpNatState::Established => { + // ... Task 2 path: → FinWait1 ... + } + TcpNatState::CloseWait => { + // Host already closed; guest just closed too. ACK the + // guest's FIN and transition to LastAck. + entry.guest_ack = seq.wrapping_add(1); + let ack_frame = build_tcp_packet_static( + dst_ip, SLIRP_GUEST_IP, dst_port, src_port, + entry.our_seq, entry.guest_ack, + TcpControl::None, &[], + ); + self.inject_to_guest.push(ack_frame); + entry.state = TcpNatState::LastAck; + entry.last_state_change = Instant::now(); + return Ok(()); + } + _ => { + // Repeat FIN or unexpected — ACK and stay where we are. + } + } +} +``` + +Update the ACK handler to handle the guest's final ACK in LastAck: + +```rust +// ACK while in LastAck — guest acknowledged our FIN. Reap. +if tcp.ack() && entry.state == TcpNatState::LastAck { + debug!("SLIRP TCP: LastAck → Closed for {}:{}", dst_ip, dst_port); + entry.state = TcpNatState::Closed; + self.pending_close.push(flow_key); + return Ok(()); +} +``` + +Place this branch BEFORE the existing "ACK transitions SynReceived → Established" block (otherwise the SynReceived branch would catch ACKs in LastAck — they're mutually exclusive but explicit ordering is clearer). + +Run all baseline pins: `cargo test --test network_baseline`. Expected: 18/18 pass. + +**Commit:** `feat(slirp): CloseWait → LastAck on guest FIN; LastAck → Closed on final ACK` + +--- + +### Task 5: Failing pin — `tcp_half_close_host_writes_first` + +Symmetric mirror of Task 1's pin. Host writes first, then closes; guest replies with data + FIN. + +```rust +#[test] +fn tcp_half_close_host_writes_first() { + use std::io::{Read, Write}; + use std::net::TcpListener; + + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let host_port = listener.local_addr().unwrap().port(); + + let server = std::thread::spawn(move || { + let (mut sock, _) = listener.accept().unwrap(); + sock.write_all(b"GREETING").unwrap(); + sock.shutdown(std::net::Shutdown::Write).unwrap(); + let mut buf = Vec::new(); + let _ = sock.read_to_end(&mut buf); + // Mirror back what the guest sent. + buf + }); + + let mut stack = SlirpBackend::new().unwrap(); + // ... handshake (same shape as Task 1) ... + + // Drive drain_to_guest until we see GREETING + host's FIN. + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5); + let mut received: Vec = Vec::new(); + let mut saw_host_fin = false; + while std::time::Instant::now() < deadline { + for f in drain_n(&mut stack, 1) { + // Same parsing as Task 1. + } + if &received[..] == b"GREETING" && saw_host_fin { + break; + } + std::thread::sleep(std::time::Duration::from_millis(10)); + } + + assert_eq!(&received[..], b"GREETING"); + assert!(saw_host_fin); + + // Guest sends reply data + FIN. Host must receive both. + let reply = b"REPLY"; + // ACK host's FIN first to advance gateway_seq, then send reply data + // ... (synthesized guest frames) ... + + let host_received = server.join().unwrap(); + assert_eq!(&host_received[..], b"REPLY", "host must receive guest's reply post-FIN"); +} +``` + +The full test body should mirror Task 1's structure. Run: should PASS post-Task-3 (host EOF triggers CloseWait + FIN to guest correctly). + +**Commit:** `test(network): pin tcp_half_close_host_writes_first` + +--- + +### Task 6: `LAST_ACK_TIMEOUT` reaping + +In `src/network/slirp.rs`, near the existing `TCP_IDLE_TIMEOUT`: + +```rust +const LAST_ACK_TIMEOUT: Duration = Duration::from_secs(60); // TCP MSL × 2 +``` + +In `relay_tcp_nat_data`'s idle-timeout sweep (after the existing `TCP_IDLE_TIMEOUT` check): + +```rust +for (flow_key, entry) in &self.flow_table { + if let FlowEntry::Tcp(tcp_entry) = entry { + if tcp_entry.state == TcpNatState::LastAck + && tcp_entry.last_state_change.elapsed() > LAST_ACK_TIMEOUT + && !to_remove.contains(flow_key) + { + warn!("SLIRP TCP: LastAck timeout for guest_port={}, reaping", + // ... key.guest_src_port from match ...); + to_remove.push(*flow_key); + } + } +} +``` + +(merge with the existing `TCP_IDLE_TIMEOUT` sweep into one pass for cache-friendliness.) + +Run baseline pins: 18/18 + 2 new = 20 pass. + +**Commit:** `feat(slirp): LAST_ACK_TIMEOUT reaping prevents LastAck entry leak` + +--- + +### Task 7: Failing pin — `tcp_last_ack_timeout_reaps_stale_entry` + +Uses the existing `#[cfg(any(test, feature = "bench-helpers"))]` synthetic-injection pattern: + +```rust +#[cfg(feature = "bench-helpers")] +#[test] +fn tcp_last_ack_timeout_reaps_stale_entry() { + let mut stack = SlirpBackend::new().unwrap(); + // ... open a flow, drive it into LastAck via a helper ... + // Synthesize last_state_change = Instant::now() - 70 seconds (>LAST_ACK_TIMEOUT). + stack.set_synthetic_last_state_change(guest_port, high_port, /* 70s ago */); + // One drain_to_guest cycle. + let mut out = Vec::new(); + stack.drain_to_guest(&mut out); + // Entry should be gone. + assert!(stack.tcp_flow_state(guest_port, high_port).is_none(), + "LastAck entry past LAST_ACK_TIMEOUT must be reaped"); +} +``` + +May need a new bench-helpers helper `set_synthetic_last_state_change` on `SlirpBackend`. + +Gate the test with `#[cfg(feature = "bench-helpers")]` per the existing snapshot-rebuild smoke pin pattern (`tests/network_baseline.rs::epoll_set_rebuilt_from_flow_table_smoke`). Default `cargo test` skips it; `cargo test --features bench-helpers -- --test-threads=1` runs it. + +**Commit:** `test(network): pin tcp_last_ack_timeout_reaps_stale_entry (bench-helpers)` + +--- + +### Task 8: Drop `#[allow(dead_code)]` on `TcpNatState` + +Now that all variants are wired. Verify clippy doesn't complain about any remaining unused variant. + +Verify `FinWait2` is actually unused — it should be safe to remove the variant entirely (or document why we keep it for future symmetry). Suggest: remove `FinWait2` variant; if anyone needs the distinction later they can re-add it. + +Update the doc comment on `TcpNatState` to reflect the new state machine. + +**Commit:** `refactor(slirp): drop allow(dead_code) on TcpNatState; remove unused FinWait2` + +--- + +### Task 9: Phase 6.1 validation gate + +Standard contract per `AGENTS.md` plus the new pins. + +```bash +cargo fmt --all -- --check +cargo clippy --workspace --all-targets --all-features -- -D warnings +cargo test --workspace --all-features -- --test-threads=1 +cargo test --test network_baseline # 20 pass +cargo test --test network_baseline --features bench-helpers -- --test-threads=1 # 21 pass +cargo build --release +scripts/bench-compare.sh --baseline 47868f0 --skip-vm # no regressions +``` + +If `VOID_BOX_KERNEL` and `VOID_BOX_INITRAMFS` are set: + +```bash +cargo test --test snapshot_integration -- --ignored --test-threads=1 # passes +cargo test --test conformance -- --ignored --test-threads=1 +``` + +Wall-clock sanity: + +```bash +voidbox-network-bench --iterations 3 --bulk-mb 10 # g2h ≥ 6 Gbps, CRR ~10 ms +``` + +--- + +## Out of scope + +- TCP window management (Phase 6.3). +- Async outbound connect (Phase 6.2). +- IPv6 (Phase 7). +- Real `FinWait2` distinction — would require kernel-side ACK observation we don't currently have. + +## Reviewer pointers + +- Verify FIN is sent EXACTLY ONCE per state transition (search for `our_fin_sent` checks). +- Verify the ACK path correctly transitions LastAck → Closed without consuming a SYN-state ACK by mistake. +- Verify `last_state_change` is updated on every state transition, not just initial. +- Verify guest writes during CloseWait are still relayed to host (host has full read side; only its write side is closed, which it learns from our shutdown — but writes from us to it are valid). +- Snapshot interaction: `TcpNatState` serde already exists; new fields (`last_state_change`, `our_fin_sent`) need `#[serde(default)]` for backward compatibility with pre-6.1 snapshots. + +## Document history + +- 2026-05-04: initial plan written. diff --git a/src/network/slirp.rs b/src/network/slirp.rs index 827134ae..93c62a4d 100644 --- a/src/network/slirp.rs +++ b/src/network/slirp.rs @@ -91,6 +91,11 @@ const MTU: usize = 1500; const MAX_QUEUE_SIZE: usize = 64; const TCP_WINDOW: u16 = 65535; const UDP_IDLE_TIMEOUT: Duration = Duration::from_secs(60); +/// Timeout for TCP entries stuck in the LastAck state (i.e. we sent a FIN +/// but the guest's final ACK never arrived). Two TCP MSLs (2 × 30 s = 60 s) +/// matches the POSIX TIME_WAIT recommendation and prevents LastAck entries +/// from leaking indefinitely when a guest drops the final ACK. +const LAST_ACK_TIMEOUT: Duration = Duration::from_secs(60); /// Sleep interval for the port-forward listener thread between non-blocking /// accept polls. Short enough to keep accept latency low; long enough to @@ -157,20 +162,48 @@ pub(crate) struct InboundAccept { // TCP NAT connection tracking // ────────────────────────────────────────────────────────────────────── +/// TCP connection state for the SLIRP NAT relay. +/// +/// The state machine models both guest-initiated and host-initiated half-close +/// sequences. `FinWait2` is omitted: distinguishing it from `FinWait1` requires +/// observing per-segment ACKs from the kernel, which the relay does not track. +/// Instead, the relay stays in `FinWait1` until host EOF arrives, then jumps +/// directly to `LastAck`. +/// +/// State transitions: +/// +/// ```text +/// SynReceived ──ACK──► Established ──guest FIN──► FinWait1 +/// SynSent ──SYN+ACK──► Established │ │ +/// │ │ └─ host EOF ──► LastAck +/// │ host EOF │ │ +/// ▼ │ guest ACK ──┘ +/// CloseWait ◄──────────────┘ └──► Closed +/// │ guest FIN +/// ▼ +/// LastAck ──── LAST_ACK_TIMEOUT ────► Closed +/// ``` #[derive(Debug, Clone, Copy, PartialEq)] -#[allow(dead_code)] -pub(crate) enum TcpNatState { +pub enum TcpNatState { /// Guest sent SYN; we responded with SYN-ACK; waiting for guest's /// final ACK to complete the outbound 3-way handshake. SynReceived, /// We synthesized a SYN to the guest (port-forwarding); waiting /// for the guest's SYN-ACK to advance to Established. SynSent, + /// Both sides exchanged handshake; data flows in both directions. Established, + /// Guest closed its write side (sent FIN); we ACKed and called + /// shutdown(Write) on the host socket. Host response data may still + /// be in-flight — relay continues until host EOF. FinWait1, - FinWait2, + /// Host closed its write side first (we saw EOF); we sent a FIN to + /// the guest. Guest may still send data, which the relay forwards. CloseWait, + /// We have sent our FIN to the guest; waiting for the guest's final + /// ACK. Reaped on ACK or after LAST_ACK_TIMEOUT. LastAck, + /// Connection fully closed; entry pending removal from the flow table. Closed, } @@ -199,6 +232,14 @@ struct TcpNatEntry { /// via `next_flow_token(PROTO_TAG_TCP)` and stored here so unregister /// sites never need to recompute it. flow_token: u64, + /// Wall clock when the entry's state last changed. Used by + /// LAST_ACK_TIMEOUT reaping in relay_tcp_nat_data so a missing + /// final ACK doesn't leak the entry forever. + last_state_change: Instant, + /// True once we have sent our FIN to the guest. Prevents re-sending + /// FIN on repeated epoll readiness events for the same transition. + /// Read in relay_tcp_nat_data's FIN-emit logic (Task 3). + our_fin_sent: bool, } /// Key for the ICMP echo NAT table: (guest ICMP id, destination IP). @@ -732,6 +773,8 @@ impl SlirpBackend { last_activity: Instant::now(), bytes_in_flight: 0, flow_token: token, + last_state_change: Instant::now(), + our_fin_sent: false, }; let host_fd = entry.host_stream.as_raw_fd(); let flow_key = FlowKey::Tcp(key); @@ -1479,6 +1522,8 @@ impl SlirpBackend { last_activity: Instant::now(), bytes_in_flight: 0, flow_token: token, + last_state_change: Instant::now(), + our_fin_sent: false, }; self.flow_table.insert(flow_key, FlowEntry::Tcp(entry)); self.token_to_key.insert(token, flow_key); @@ -1584,6 +1629,16 @@ impl SlirpBackend { return Ok(()); } + // ACK while in LastAck — guest acknowledged our FIN. Reap. + // Placed before the SynReceived ACK branch to be explicit (the + // states are mutually exclusive, but explicit ordering is clearer). + if tcp.ack() && entry.state == TcpNatState::LastAck { + debug!("SLIRP TCP: LastAck → Closed for {}:{}", dst_ip, dst_port); + entry.state = TcpNatState::Closed; + self.pending_close.push(flow_key); + return Ok(()); + } + // ACK (completing handshake or acknowledging data) if tcp.ack() && entry.state == TcpNatState::SynReceived { entry.state = TcpNatState::Established; @@ -1600,9 +1655,18 @@ impl SlirpBackend { // buffer to advance the kernel's read pointer. Without this step the // kernel buffer fills up and recv_peek keeps returning the same bytes. // - // Only runs in Established state — the SynReceived ACK above does not - // carry data acknowledgements from us yet (bytes_in_flight == 0 then). - if tcp.ack() && entry.state == TcpNatState::Established && entry.bytes_in_flight > 0 { + // Runs in Established and FinWait1: even after the guest half-closes + // (FinWait1), the relay may have already sent host response data that + // the guest must ACK. Draining the kernel buffer on ACK lets + // recv_peek see Ok(0) (EOF) once all data is consumed, which then + // triggers the FinWait1 → LastAck transition and the final FIN to guest. + // + // Does not run in SynReceived — that ACK doesn't carry data acks yet. + let ack_consume_state = matches!( + entry.state, + TcpNatState::Established | TcpNatState::FinWait1 + ); + if tcp.ack() && ack_consume_state && entry.bytes_in_flight > 0 { // segment_ack: what the guest is now confirming it has received // from us (our send-side sequence space). let segment_ack: u32 = tcp.ack_number().0 as u32; @@ -1654,7 +1718,15 @@ impl SlirpBackend { } let payload = tcp.payload(); - if !payload.is_empty() && entry.state == TcpNatState::Established { + // Forward guest data in Established and CloseWait: in CloseWait the + // host closed its write side but can still read data the guest sends. + // FinWait1 is not included — in that state the guest already closed + // its write side (we called shutdown(Write) on the host socket). + let forward_data = matches!( + entry.state, + TcpNatState::Established | TcpNatState::CloseWait + ); + if !payload.is_empty() && forward_data { // Guest→host backpressure: rely on the kernel's send buffer + TCP // retransmit. ACK only the bytes the kernel accepted right now; // on WouldBlock, don't ACK at all and let the guest retransmit. @@ -1703,23 +1775,83 @@ impl SlirpBackend { // FIN from guest if tcp.fin() { debug!("SLIRP TCP: FIN from guest for {}:{}", dst_ip, dst_port); - entry.guest_ack = seq.wrapping_add(1); - let fin_ack_frame = build_tcp_packet_static( - dst_ip, - SLIRP_GUEST_IP, - dst_port, - src_port, - entry.our_seq, - entry.guest_ack, - TcpControl::Fin, - &[], - ); - self.inject_to_guest.push(fin_ack_frame); - entry.our_seq = entry.our_seq.wrapping_add(1); - entry.state = TcpNatState::Closed; - // entry last used above; borrow ends before pending_close push. - self.pending_close.push(flow_key); - return Ok(()); + match entry.state { + TcpNatState::Established => { + entry.guest_ack = seq.wrapping_add(1); + + // ACK the guest's FIN — but don't send our own FIN yet. Host + // application may have data still to send. We transition to + // FinWait1 and shut down the host socket's write side so the + // host knows no more data is coming from the guest. + let ack_frame = build_tcp_packet_static( + dst_ip, + SLIRP_GUEST_IP, + dst_port, + src_port, + entry.our_seq, + entry.guest_ack, + TcpControl::None, + &[], + ); + self.inject_to_guest.push(ack_frame); + + if let Err(e) = entry.host_stream.shutdown(std::net::Shutdown::Write) { + warn!( + "SLIRP TCP: shutdown(Write) failed on guest FIN, falling back \ + to immediate close: {}", + e + ); + entry.state = TcpNatState::Closed; + self.pending_close.push(flow_key); + return Ok(()); + } + + entry.state = TcpNatState::FinWait1; + entry.last_state_change = Instant::now(); + trace!( + "SLIRP TCP: state Established → FinWait1 for {}:{}", + dst_ip, + dst_port + ); + return Ok(()); + } + TcpNatState::CloseWait => { + // Host already closed its write side; guest just closed + // too. Shut down our write side of host_stream so the + // host application's read sees EOF, ACK the guest's FIN, + // and transition to LastAck waiting for guest's final ACK + // of our FIN (which was already sent when we entered + // CloseWait). + entry.guest_ack = seq.wrapping_add(1); + let ack_frame = build_tcp_packet_static( + dst_ip, + SLIRP_GUEST_IP, + dst_port, + src_port, + entry.our_seq, + entry.guest_ack, + TcpControl::None, + &[], + ); + self.inject_to_guest.push(ack_frame); + if let Err(e) = entry.host_stream.shutdown(std::net::Shutdown::Write) { + // Non-fatal: host already closed; ENOTCONN or similar + // is expected in some cases. + trace!("SLIRP TCP: shutdown(Write) in CloseWait (non-fatal): {}", e); + } + entry.state = TcpNatState::LastAck; + entry.last_state_change = Instant::now(); + trace!( + "SLIRP TCP: state CloseWait → LastAck for {}:{}", + dst_ip, + dst_port + ); + return Ok(()); + } + _ => { + // Repeat FIN or unexpected — ACK and stay where we are. + } + } } // RST from guest @@ -1760,14 +1892,34 @@ impl SlirpBackend { .into_iter() .collect(); - // Idle-timeout sweep: scan flow_table once without collecting a - // separate key Vec. 300-second inactivity applies regardless of epoll - // readiness; this is O(n) in the number of TCP flows. + // Timeout sweep: one pass over the flow table handles two independent + // timeout conditions without a separate Vec or extra allocation. + // Uses to_remove_set (HashSet) for O(1) membership checks. const TCP_IDLE_TIMEOUT: Duration = Duration::from_secs(300); for (flow_key, entry) in &self.flow_table { if let FlowEntry::Tcp(tcp_entry) = entry { + if to_remove_set.contains(flow_key) { + continue; + } + // Standard idle-timeout: 300 s of inactivity in any state. if tcp_entry.last_activity.elapsed() > TCP_IDLE_TIMEOUT { to_remove_set.insert(*flow_key); + continue; + } + // LastAck-timeout: final guest ACK never arrived. Reap so a + // misbehaving or crashed guest doesn't leak entries forever. + if tcp_entry.state == TcpNatState::LastAck + && tcp_entry.last_state_change.elapsed() > LAST_ACK_TIMEOUT + { + warn!( + "SLIRP TCP: LastAck timeout for guest_port={}, reaping", + if let FlowKey::Tcp(k) = flow_key { + k.guest_src_port + } else { + 0 + } + ); + to_remove_set.insert(*flow_key); } } } @@ -1799,7 +1951,13 @@ impl SlirpBackend { continue; }; - if entry.state != TcpNatState::Established { + // Relay data for Established and FinWait1 (guest half-closed; + // host may still have data to send). Skip all other states. + let relay_data = matches!( + entry.state, + TcpNatState::Established | TcpNatState::FinWait1 + ); + if !relay_data { continue; } @@ -1811,13 +1969,42 @@ impl SlirpBackend { let mut peek_buf = [0u8; 65536]; match recv_peek(&entry.host_stream, &mut peek_buf) { Ok(0) => { - // Host closed the connection. Send FIN to guest below. + // Host closed the connection. debug!( - "SLIRP TCP: host EOF on flow guest_port={}, marking Closed", + "SLIRP TCP: host EOF on flow guest_port={}", key.guest_src_port ); - entry.state = TcpNatState::Closed; - became_closed = true; + match entry.state { + TcpNatState::Established => { + // Host closed first → CloseWait. We send FIN to + // guest; guest may still have data to send which + // we'll forward (host's write side may be closed + // already — that's a guest write failure, not our + // concern). + entry.state = TcpNatState::CloseWait; + entry.last_state_change = Instant::now(); + trace!( + "SLIRP TCP: Established → CloseWait for guest_port={}", + key.guest_src_port + ); + became_closed = false; + } + TcpNatState::FinWait1 => { + // Guest closed first; now host has finished writing. + // Send FIN to guest, transition to LastAck. + entry.state = TcpNatState::LastAck; + entry.last_state_change = Instant::now(); + trace!( + "SLIRP TCP: FinWait1 → LastAck for guest_port={}", + key.guest_src_port + ); + became_closed = false; + } + _ => { + // Already in a closing state or Closed — no action. + became_closed = false; + } + } } Ok(peek_n) => { let in_flight = entry.bytes_in_flight as usize; @@ -1850,7 +2037,7 @@ impl SlirpBackend { ); } // else: kernel buffer holds only already-in-flight bytes. - // Wait for guest ACK before sending more (Task 3.4). + // Wait for guest ACK before sending more. } Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { // Kernel recv buffer empty; nothing to do this poll. @@ -1865,8 +2052,27 @@ impl SlirpBackend { } } - // FIN if host closed - if entry.state == TcpNatState::Closed { + // Send FIN if we just transitioned to a state that demands one. + let needs_fin = + matches!(entry.state, TcpNatState::CloseWait | TcpNatState::LastAck); + if needs_fin && !entry.our_fin_sent { + fin_frame = Some(build_tcp_packet_static( + key.dst_ip, + SLIRP_GUEST_IP, + key.dst_port, + key.guest_src_port, + entry.our_seq, + entry.guest_ack, + TcpControl::Fin, + &[], + )); + entry.our_seq = entry.our_seq.wrapping_add(1); + entry.our_fin_sent = true; + trace!("SLIRP TCP: sent FIN to guest, state={:?}", entry.state); + } + + // Legacy: FIN for the immediate Closed path (error or RST). + if entry.state == TcpNatState::Closed && became_closed && fin_frame.is_none() { fin_frame = Some(build_tcp_packet_static( key.dst_ip, SLIRP_GUEST_IP, @@ -2605,6 +2811,8 @@ impl SlirpBackend { last_activity: Instant::now(), bytes_in_flight: 0, flow_token: token, + last_state_change: Instant::now(), + our_fin_sent: false, }; self.flow_table.insert(flow_key, FlowEntry::Tcp(entry)); self.token_to_key.insert(token, flow_key); @@ -2631,7 +2839,7 @@ impl SlirpBackend { /// Return the `TcpNatState` for the flow identified by `(guest_port, GATEWAY_IP, high_port)`, /// or `None` if no such entry exists in the flow table. #[allow(dead_code)] - pub(crate) fn tcp_flow_state(&self, guest_port: u16, high_port: u16) -> Option { + pub fn tcp_flow_state(&self, guest_port: u16, high_port: u16) -> Option { let key = NatKey { guest_src_port: guest_port, dst_ip: SLIRP_GATEWAY_IP, @@ -2692,6 +2900,64 @@ impl SlirpBackend { self.epoll = Arc::new(new_epoll_inner); self.epoll_waker = new_waker; } + + /// Insert a synthetic `LastAck` entry into the flow table. + /// + /// Used by `tcp_last_ack_timeout_reaps_stale_entry` to pre-seed a flow + /// in the LastAck state without going through a full half-close exchange. + /// + /// The entry's `last_state_change` is set to `Instant::now()` and can be + /// back-dated with [`Self::set_synthetic_last_state_change`] to simulate + /// an expired timeout. + pub fn insert_synthetic_lastack_entry( + &mut self, + guest_port: u16, + high_port: u16, + host_stream: TcpStream, + ) { + let key = NatKey { + guest_src_port: guest_port, + dst_ip: SLIRP_GATEWAY_IP, + dst_port: high_port, + }; + let token = next_flow_token(PROTO_TAG_TCP); + let entry = TcpNatEntry { + host_stream, + state: TcpNatState::LastAck, + our_seq: 1, + guest_ack: 1, + last_activity: Instant::now(), + bytes_in_flight: 0, + flow_token: token, + last_state_change: Instant::now(), + our_fin_sent: true, + }; + self.flow_table + .insert(FlowKey::Tcp(key), FlowEntry::Tcp(entry)); + } + + /// Back-date the `last_state_change` of the flow identified by + /// `(guest_port, GATEWAY_IP, high_port)` by `age`. Used by + /// `tcp_last_ack_timeout_reaps_stale_entry` to simulate a LastAck + /// entry that has been sitting past `LAST_ACK_TIMEOUT` without + /// receiving the guest's final ACK. + pub fn set_synthetic_last_state_change( + &mut self, + guest_port: u16, + high_port: u16, + age: Duration, + ) { + let key = FlowKey::Tcp(NatKey { + guest_src_port: guest_port, + dst_ip: SLIRP_GATEWAY_IP, + dst_port: high_port, + }); + if let Some(FlowEntry::Tcp(entry)) = self.flow_table.get_mut(&key) { + // Instant::now() - age: subtract by creating an instant that + // appears to have occurred `age` ago. + entry.last_state_change = Instant::now().checked_sub(age).unwrap_or_else(Instant::now); + } + } } #[cfg(test)] diff --git a/tests/network_baseline.rs b/tests/network_baseline.rs index d5115426..f7c33393 100644 --- a/tests/network_baseline.rs +++ b/tests/network_baseline.rs @@ -1291,3 +1291,353 @@ fn epoll_set_rebuilt_from_flow_table_smoke() { "rebuild_epoll_from_flow_table must register all live flow FDs" ); } + +// ── Phase 6.1: TCP half-close pins ────────────────────────────────────── + +/// BROKEN_ON_PURPOSE: guest sends FIN after data; current code marks state=Closed +/// immediately on guest FIN, so host's response data is dropped. +/// Flips to PASS when Task 2–3 implementation lands. +#[test] +fn tcp_half_close_guest_writes_first() { + use std::io::{Read, Write}; + use std::net::TcpListener; + + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let host_port = listener.local_addr().unwrap().port(); + + let server = std::thread::spawn(move || -> Vec { + let (mut sock, _) = listener.accept().unwrap(); + // Read until EOF (guest will send data + FIN). + let mut request = Vec::new(); + let _ = sock.read_to_end(&mut request); + // After guest's shutdown(WRITE), we still want to send response. + sock.write_all(b"HTTP/1.1 200 OK\r\n\r\nBODY").unwrap(); + // Then close. + drop(sock); + request + }); + + let mut stack = SlirpBackend::new().unwrap(); + + // Guest 3-way handshake. + let our_seq = 1000u32; + stack + .process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, + GUEST_EPHEMERAL_PORT, + host_port, + our_seq, + 0, + TcpControl::Syn, + &[], + )) + .unwrap(); + let mut gateway_seq = 0u32; + for f in drain_n(&mut stack, 4) { + if let Some((s, _, ctrl, _)) = parse_tcp_to_guest(&f) { + if matches!(ctrl, TcpControl::Syn) { + gateway_seq = s; + break; + } + } + } + // Complete 3-way handshake: ACK the SYN-ACK. + stack + .process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, + GUEST_EPHEMERAL_PORT, + host_port, + our_seq + 1, + gateway_seq + 1, + TcpControl::None, + &[], + )) + .unwrap(); + + // Guest sends "HELLO" data + FIN. + let request = b"HELLO"; + stack + .process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, + GUEST_EPHEMERAL_PORT, + host_port, + our_seq + 1, + gateway_seq + 1, + TcpControl::Psh, + request, + )) + .unwrap(); + stack + .process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, + GUEST_EPHEMERAL_PORT, + host_port, + our_seq + 1 + request.len() as u32, + gateway_seq + 1, + TcpControl::Fin, + &[], + )) + .unwrap(); + + // Drive drain_to_guest until we see host's response data AND its FIN. + // We must ACK each data segment as it arrives so the kernel recv buffer + // drains — recv_peek returns Ok(0) (EOF) only once all buffered bytes + // are consumed via the ACK-driven read path. + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5); + let mut response_bytes: Vec = Vec::new(); + let mut saw_host_fin = false; + while std::time::Instant::now() < deadline { + let frames = drain_n(&mut stack, 1); + for f in &frames { + if let Some((seq, _ack, ctrl, payload_len)) = parse_tcp_to_guest(f) { + if payload_len > 0 { + let eth = EthernetFrame::new_unchecked(f.as_slice()); + let ip = Ipv4Packet::new_unchecked(eth.payload()); + let tcp = TcpPacket::new_unchecked(ip.payload()); + response_bytes.extend_from_slice(tcp.payload()); + // ACK the data so the relay's ACK-driven consume path can + // drain the kernel recv buffer and eventually see EOF. + let ack_num = seq.wrapping_add(payload_len as u32); + stack + .process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, + GUEST_EPHEMERAL_PORT, + host_port, + our_seq + 1 + request.len() as u32 + 1, // seq after FIN + ack_num, + TcpControl::None, + &[], + )) + .unwrap(); + } + if matches!(ctrl, TcpControl::Fin) { + saw_host_fin = true; + } + } + } + if !response_bytes.is_empty() && saw_host_fin { + break; + } + std::thread::sleep(std::time::Duration::from_millis(10)); + } + + let _ = server.join(); + + assert_eq!( + &response_bytes[..], + b"HTTP/1.1 200 OK\r\n\r\nBODY", + "guest must receive ALL host response data after sending FIN" + ); + assert!(saw_host_fin, "guest must receive host's FIN"); +} + +/// Pin for the symmetric half-close path: host writes first, then closes its +/// write side (Established → CloseWait); guest replies with data + FIN. +/// The host must receive the guest's reply data even after its own shutdown. +#[test] +fn tcp_half_close_host_writes_first() { + use std::io::{Read, Write}; + use std::net::TcpListener; + + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let host_port = listener.local_addr().unwrap().port(); + + // Server: write greeting, shutdown write side, then read what guest sends back. + let server = std::thread::spawn(move || -> Vec { + let (mut sock, _) = listener.accept().unwrap(); + sock.write_all(b"GREETING").unwrap(); + sock.shutdown(std::net::Shutdown::Write).unwrap(); + let mut buf = Vec::new(); + let _ = sock.read_to_end(&mut buf); + buf + }); + + let mut stack = SlirpBackend::new().unwrap(); + + // Guest 3-way handshake. + let our_seq = 2000u32; + stack + .process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, + GUEST_EPHEMERAL_PORT, + host_port, + our_seq, + 0, + TcpControl::Syn, + &[], + )) + .unwrap(); + let mut gateway_seq = 0u32; + for f in drain_n(&mut stack, 4) { + if let Some((s, _, ctrl, _)) = parse_tcp_to_guest(&f) { + if matches!(ctrl, TcpControl::Syn) { + gateway_seq = s; + break; + } + } + } + // Complete handshake: ACK the SYN-ACK. + stack + .process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, + GUEST_EPHEMERAL_PORT, + host_port, + our_seq + 1, + gateway_seq + 1, + TcpControl::None, + &[], + )) + .unwrap(); + + // Drive drain_to_guest until we see "GREETING" data AND host's FIN. + // ACK each data segment so the relay's ACK-driven consume path works. + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5); + let mut received: Vec = Vec::new(); + let mut saw_host_fin = false; + // gateway_seq_next: the relay's current sequence number (advances with data + FIN). + let mut gateway_seq_next = gateway_seq + 1; // after SYN-ACK + while std::time::Instant::now() < deadline { + let frames = drain_n(&mut stack, 1); + for f in &frames { + if let Some((seq, _ack, ctrl, payload_len)) = parse_tcp_to_guest(f) { + if payload_len > 0 { + let eth = EthernetFrame::new_unchecked(f.as_slice()); + let ip = Ipv4Packet::new_unchecked(eth.payload()); + let tcp = TcpPacket::new_unchecked(ip.payload()); + received.extend_from_slice(tcp.payload()); + gateway_seq_next = seq.wrapping_add(payload_len as u32); + // ACK the data. + stack + .process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, + GUEST_EPHEMERAL_PORT, + host_port, + our_seq + 1, + gateway_seq_next, + TcpControl::None, + &[], + )) + .unwrap(); + } + if matches!(ctrl, TcpControl::Fin) { + saw_host_fin = true; + // ACK the host FIN so the CloseWait entry receives it. + stack + .process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, + GUEST_EPHEMERAL_PORT, + host_port, + our_seq + 1, + gateway_seq_next.wrapping_add(1), + TcpControl::None, + &[], + )) + .unwrap(); + } + } + } + if received == b"GREETING" && saw_host_fin { + break; + } + std::thread::sleep(std::time::Duration::from_millis(10)); + } + + assert_eq!( + &received[..], + b"GREETING", + "guest must receive host's greeting" + ); + assert!( + saw_host_fin, + "guest must receive host's FIN (CloseWait transition)" + ); + + // Guest sends reply data + FIN after receiving host's greeting and FIN. + // The relay is now in CloseWait; guest data should still be forwarded to host. + let reply = b"REPLY"; + stack + .process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, + GUEST_EPHEMERAL_PORT, + host_port, + our_seq + 1, + gateway_seq_next.wrapping_add(1), + TcpControl::Psh, + reply, + )) + .unwrap(); + stack + .process_guest_frame(&build_tcp_frame( + SLIRP_GATEWAY_IP, + GUEST_EPHEMERAL_PORT, + host_port, + our_seq + 1 + reply.len() as u32, + gateway_seq_next.wrapping_add(1), + TcpControl::Fin, + &[], + )) + .unwrap(); + // Drain to process the guest FIN (CloseWait → LastAck). + drain_n(&mut stack, 4); + + let host_received = server.join().unwrap(); + assert_eq!( + &host_received[..], + b"REPLY", + "host must receive guest's reply data after CloseWait" + ); +} + +/// Verify that a LastAck entry whose `last_state_change` is older than +/// `LAST_ACK_TIMEOUT` (60 s) is reaped by the next `drain_to_guest` sweep, +/// preventing a leaked flow table entry when the guest drops the final ACK. +/// +/// Gated on `bench-helpers` because it uses synthetic-injection helpers +/// (`insert_synthetic_lastack_entry`, `set_synthetic_last_state_change`, +/// `tcp_flow_state`) that widen internal visibility for external test/bench +/// consumers. Default `cargo test` skips this pin; CI runs it via +/// `cargo test --features bench-helpers -- --test-threads=1`. +#[cfg(feature = "bench-helpers")] +#[test] +fn tcp_last_ack_timeout_reaps_stale_entry() { + use std::net::TcpListener; + + const GUEST_PORT: u16 = 8080; + const HIGH_PORT: u16 = 60000; + + let mut backend = SlirpBackend::new().expect("backend"); + + // Open a real TCP connection so host_stream is a valid socket. + let listener = TcpListener::bind("127.0.0.1:0").expect("bind"); + let host_stream = + std::net::TcpStream::connect(listener.local_addr().unwrap()).expect("connect"); + host_stream.set_nonblocking(true).ok(); + + // Insert a synthetic LastAck entry (already past our FIN, waiting for + // guest's final ACK which will never arrive). + backend.insert_synthetic_lastack_entry(GUEST_PORT, HIGH_PORT, host_stream); + + // Verify the entry is present. + assert_eq!( + backend.tcp_flow_state(GUEST_PORT, HIGH_PORT), + Some(void_box::network::slirp::TcpNatState::LastAck), + "entry must start in LastAck" + ); + + // Back-date last_state_change by 70 s (> LAST_ACK_TIMEOUT = 60 s). + backend.set_synthetic_last_state_change( + GUEST_PORT, + HIGH_PORT, + std::time::Duration::from_secs(70), + ); + + // One drain_to_guest cycle triggers the timeout sweep. + let mut out = Vec::new(); + backend.drain_to_guest(&mut out); + + // The entry should now be gone. + assert!( + backend.tcp_flow_state(GUEST_PORT, HIGH_PORT).is_none(), + "LastAck entry past LAST_ACK_TIMEOUT must be reaped by drain_to_guest" + ); +}