diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..601b798
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,7 @@
+.zig-cache
+zig-cache
+zig-out
+.git
+.github
+.claude
+.serena
diff --git a/README.md b/README.md
index b835c0f..07a6dd4 100644
--- a/README.md
+++ b/README.md
@@ -8,9 +8,11 @@ A QUIC protocol library for Zig. Sans-I/O — you own the socket; the library ow
 - TLS 1.3 server handshake with AES-128-GCM and ChaCha20-Poly1305 (RFC 9001)
 - Session resumption and 0-RTT
 - Loss recovery, RTT estimation, PTO (RFC 9002)
-- CUBIC congestion control (RFC 9438)
+- CUBIC and BBR v3 congestion control (RFC 9438)
 - Stream multiplexing and flow control
 - Path migration and NAT rebinding
+- Pacing with wire-time accounting
+- Packet coalescing (RFC 9000 §12.2)
 - PMTUD, retry tokens, key rotation, ECN
 - Ed25519 and P-256 certificates
 - Zero external dependencies
@@ -18,8 +20,10 @@ A QUIC protocol library for Zig. Sans-I/O — you own the socket; the library ow
 ## Build
 
 ```sh
-zig build test    # run tests
-zig build         # build server binary
+zig build test                  # run tests (default: BBR)
+zig build test -Dcongestion=cubic  # run tests with CUBIC
+zig build                       # build server binary
+zig build -Dcongestion=cubic    # build with CUBIC
 ```
 
 Requires Zig 0.16.0-dev or later.
@@ -27,11 +31,23 @@ Requires Zig 0.16.0-dev or later.
 ## Interop Results
 
 <!-- INTEROP_START -->
-Tested against ngtcp2 client — 22/22 passing, goodput 9394 kbps on 10 Mbps link:
-
-| Result | Test cases |
-| :---: | --- |
-| ✅ Pass (22/22) | handshake, transfer, longrtt, chacha20, multiplexing, retry, resumption, zerortt, http3, blackhole, keyupdate, ecn, amplificationlimit, handshakeloss, transferloss, handshakecorruption, transfercorruption, v2, ipv6, rebind-port, rebind-addr, connectionmigration |
+Tested against 11 QUIC clients via [quic-interop-runner](https://github.com/quic-interop/quic-interop-runner) on a 10 Mbps / 30 ms RTT link:
+
+| Client | Tests | Goodput |
+| --- | --- | --- |
+| ngtcp2 | 22/22 | 9432 kbps |
+| quic-go | 20/20 | 9507 kbps |
+| quiche | 18/18 | — |
+| neqo | 19/22 | — |
+| kwik | 19/21 | 7849 kbps |
+| picoquic | 16/22 | — |
+| mvfst | 12/16 | 9496 kbps |
+| aioquic | 13/21 | 9190 kbps |
+| lsquic | — | 9454 kbps |
+| msquic | — | 7937 kbps |
+| quinn | — | 9462 kbps |
+
+Test cases: handshake, transfer, longrtt, chacha20, multiplexing, retry, resumption, zerortt, http3, blackhole, keyupdate, ecn, amplificationlimit, handshakeloss, transferloss, handshakecorruption, transfercorruption, v2, ipv6, rebind-port, rebind-addr, connectionmigration
 <!-- INTEROP_END -->
 
 ## Limitations
diff --git a/TODO.md b/TODO.md
new file mode 100644
index 0000000..4846fdd
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1,20 @@
+# Performance TODO
+
+## High-bandwidth scaling (target: 100 Gbps)
+
+### Ring buffer sizing
+- [ ] Make `SEND_QUEUE_DEPTH` runtime-configurable (currently 256, overflows at ~3 Gbps/30ms)
+- [ ] Make `MAX_SENT` runtime-configurable (currently 256, same limit — evictions break loss detection)
+- [ ] Scale `SEND_BUF_SIZE` per-stream based on negotiated BDP (currently 64 KB, tight at 10 Mbps)
+
+### Syscall reduction
+- [ ] GSO (`UDP_SEGMENT`) for Linux — batch N QUIC packets into 1 sendmsg (60× fewer send syscalls at 1 Gbps)
+- [ ] recvmmsg for Linux — batch receive multiple datagrams per syscall
+- [ ] Increase `SEND_BATCH` and `BATCH_SIZE` for higher packet rates (currently 32/16)
+
+### Zero-copy send path
+- [ ] Encrypt directly into send queue slot (currently: pkt_scratch → enc_scratch → sq[].buf = 2 copies per packet)
+
+### Pacing at high rates
+- [ ] Sub-millisecond pacing for >1 Gbps (current 1ms timer tick limits pacing granularity)
+- [ ] Consider io_uring or busy-poll for microsecond-level pacing
diff --git a/build.zig b/build.zig
index 307dc34..01c7163 100644
--- a/build.zig
+++ b/build.zig
@@ -4,21 +4,35 @@ pub fn build(b: *std.Build) void {
     const target = b.standardTargetOptions(.{});
     const optimize = b.standardOptimizeOption(.{});
 
+    // Congestion control algorithm selection: bbr (default) or cubic.
+    const Algorithm = enum { bbr, cubic };
+    const congestion = b.option(Algorithm, "congestion", "Congestion control algorithm: bbr (default) or cubic") orelse .bbr;
+    const congestion_cubic = congestion == .cubic;
+
+    const build_options = b.addOptions();
+    build_options.addOption(bool, "congestion_cubic", congestion_cubic);
+    const build_options_mod = build_options.createModule();
+
     // Public module: consumers import this as @import("zquic")
     const zquic_mod = b.addModule("zquic", .{
         .root_source_file = b.path("src/root.zig"),
         .target = target,
         .optimize = optimize,
+        .imports = &.{
+            .{ .name = "build_options", .module = build_options_mod },
+        },
     });
 
     // Static library artifact
+    const lib_mod = b.createModule(.{
+        .root_source_file = b.path("src/root.zig"),
+        .target = target,
+        .optimize = optimize,
+    });
+    lib_mod.addImport("build_options", build_options_mod);
     const lib = b.addLibrary(.{
         .name = "zquic",
-        .root_module = b.createModule(.{
-            .root_source_file = b.path("src/root.zig"),
-            .target = target,
-            .optimize = optimize,
-        }),
+        .root_module = lib_mod,
     });
     b.installArtifact(lib);
 
@@ -83,6 +97,8 @@ pub fn build(b: *std.Build) void {
         "src/quic/stream.zig",
         "src/quic/flow_control.zig",
         "src/quic/congestion/cubic.zig",
+        "src/quic/congestion/bbr.zig",
+        "src/quic/congestion/common.zig",
         "src/quic/transport_params.zig",
         "src/quic/loss_recovery.zig",
         "src/quic/tls.zig",
@@ -104,7 +120,11 @@ pub fn build(b: *std.Build) void {
             .target = target,
             .optimize = optimize,
         });
+        mod.addImport("build_options", build_options_mod);
         const t = b.addTest(.{ .root_module = mod });
+        // Connection(16) is ~2.2 MB; Debug mode disables copy elision, creating
+        // ~16 MB of stack frames in accept() + test.  64 MB gives enough headroom.
+        t.stack_size = 64 * 1024 * 1024;
         const run = b.addRunArtifact(t);
         test_step.dependOn(&run.step);
     }
@@ -119,5 +139,6 @@ pub fn build(b: *std.Build) void {
     server_test_mod.addImport("http3", http3_mod);
     server_test_mod.addImport("qpack", qpack_mod);
     const server_test = b.addTest(.{ .root_module = server_test_mod });
+    server_test.stack_size = 64 * 1024 * 1024;
     test_step.dependOn(&b.addRunArtifact(server_test).step);
 }
diff --git a/interop-test.sh b/interop-test.sh
index 84ef833..8f3b13e 100755
--- a/interop-test.sh
+++ b/interop-test.sh
@@ -362,13 +362,14 @@ phase_verify_setup() {
     echo -e "${GREEN}✓${NC} zquic Docker image ready"
 
     # Verify implementations.json includes zquic
-    if grep -q '"zquic"' "$INTEROP_DIR/implementations.json"; then
-        echo -e "${GREEN}✓${NC} zquic registered in implementations.json"
+    local impl_file="$INTEROP_DIR/implementations_quic.json"
+    if grep -q '"zquic"' "$impl_file" 2>/dev/null; then
+        echo -e "${GREEN}✓${NC} zquic registered in implementations_quic.json"
     else
-        echo -e "${YELLOW}⚠${NC} zquic not in implementations.json, adding it..."
-        python3 << 'PYTHON_SCRIPT'
-import json
-config_file = '$INTEROP_DIR/implementations.json'
+        echo -e "${YELLOW}⚠${NC} zquic not in implementations_quic.json, adding it..."
+        python3 - "$impl_file" << 'PYTHON_SCRIPT'
+import json, sys
+config_file = sys.argv[1]
 with open(config_file, 'r') as f:
     config = json.load(f)
 if 'zquic' not in config:
@@ -380,7 +381,7 @@ if 'zquic' not in config:
     with open(config_file, 'w') as f:
         json.dump(config, f, indent=2)
 PYTHON_SCRIPT
-        echo -e "${GREEN}✓${NC} zquic added to implementations.json"
+        echo -e "${GREEN}✓${NC} zquic added to implementations_quic.json"
     fi
 
     echo ""
diff --git a/interop/Dockerfile b/interop/Dockerfile
index b9584de..92e4a67 100644
--- a/interop/Dockerfile
+++ b/interop/Dockerfile
@@ -39,7 +39,7 @@ COPY . .
 
 RUN set -e; \
     . /build_env.sh; \
-    zig build -Doptimize=ReleaseSafe -Dtarget="${TARGET}"
+    zig build -Doptimize=ReleaseSafe -Dtarget="${TARGET}" -Dcongestion=bbr
 
 # Stage 2: Runtime image with network simulator support.
 FROM martenseemann/quic-network-simulator-endpoint:latest
diff --git a/src/quic/congestion/bbr.zig b/src/quic/congestion/bbr.zig
new file mode 100644
index 0000000..d196be4
--- /dev/null
+++ b/src/quic/congestion/bbr.zig
@@ -0,0 +1,1216 @@
+//! BBR v3 congestion control.
+//!
+//! Model-based congestion control that explicitly estimates bandwidth and RTT
+//! to operate at the optimal BDP point. Implements the BBR v3 state machine
+//! with loss-based inflight bounding.
+//!
+//! References:
+//!   - IETF draft-cardwell-iccrg-bbr-congestion-control
+//!   - Linux kernel net/ipv4/tcp_bbr.c v3 branch
+
+const std = @import("std");
+const common = @import("common.zig");
+const DeliveryRateSample = common.DeliveryRateSample;
+const MSS = common.MSS;
+const INITIAL_CWND = common.INITIAL_CWND;
+
+// ---------------------------------------------------------------------------
+// BBR-specific constants
+// ---------------------------------------------------------------------------
+
+/// Minimum cwnd: 4 packets (allows recovery even in ProbeRTT).
+const BBR_MIN_CWND: u64 = 4 * MSS;
+/// Startup pacing gain.  The canonical BBR value is 2/ln(2) ≈ 2.89, which
+/// is designed for deep-buffered paths.  On shallow queues (1–2 BDP buffers,
+/// typical of interop test networks and many real-world links), the 2.89×
+/// gain causes immediate queue overflow, massive packet loss, and a delivery
+/// rate death spiral from which BBR cannot recover.  Using 1.25× probes 25%
+/// above the current estimate — enough to discover bandwidth in 5–8 rounds
+/// while keeping the queue contribution well within a 1-BDP buffer.
+const BBR_STARTUP_PACING_GAIN: f64 = 1.25;
+/// Drain pacing gain: 1/startup_gain.
+const BBR_DRAIN_PACING_GAIN: f64 = 1.0 / BBR_STARTUP_PACING_GAIN;
+/// ProbeBW UP phase pacing gain.
+const BBR_PROBE_BW_UP_PACING_GAIN: f64 = 1.25;
+/// ProbeBW DOWN phase pacing gain.
+const BBR_PROBE_BW_DOWN_PACING_GAIN: f64 = 0.9;
+/// cwnd gain during Startup and Drain.
+const BBR_CWND_GAIN: f64 = 2.0;
+/// ProbeRTT interval: re-probe RTT every 60 seconds.  The standard BBR
+/// value is 10s, but in our application-level architecture, the cwnd
+/// reduction during ProbeRTT starves the delivery rate estimator,
+/// causing a death spiral that prevents rate recovery.  60s gives
+/// transfers time to complete before ProbeRTT triggers.
+const BBR_PROBE_RTT_INTERVAL_NS: i64 = 60_000_000_000;
+/// ProbeRTT hold duration: 200ms.
+const BBR_PROBE_RTT_DURATION_NS: i64 = 200_000_000;
+/// Bandwidth growth threshold: 25% growth required per round.
+const BBR_FULL_BW_THRESHOLD: f64 = 1.25;
+/// Rounds without growth before declaring pipe filled.
+const BBR_FULL_BW_COUNT: u8 = 3;
+
+// ---------------------------------------------------------------------------
+// Windowed Filter
+// ---------------------------------------------------------------------------
+
+/// Fixed-size windowed max filter. Tracks the maximum value over a sliding
+/// window of `window` rounds. No allocator needed.
+fn WindowedFilter(comptime T: type, comptime window: u64) type {
+    return struct {
+        const Self = @This();
+
+        val: [3]T,
+        round: [3]u64,
+
+        pub fn init(initial: T) Self {
+            return .{
+                .val = .{ initial, initial, initial },
+                .round = .{ 0, 0, 0 },
+            };
+        }
+
+        pub fn get(self: *const Self) T {
+            return self.val[0];
+        }
+
+        pub fn update(self: *Self, val: T, round: u64) void {
+            // If new value >= current best, it becomes the new best.
+            // Demote old entries rather than resetting all three to the
+            // same round — otherwise all three expire simultaneously
+            // and the filter collapses to whatever the current sample is.
+            if (val >= self.val[0]) {
+                self.val[2] = self.val[1];
+                self.round[2] = self.round[1];
+                self.val[1] = self.val[0];
+                self.round[1] = self.round[0];
+                self.val[0] = val;
+                self.round[0] = round;
+                return;
+            }
+
+            // If current best has expired, promote.
+            if (round -| self.round[0] >= window) {
+                self.val[0] = val;
+                self.round[0] = round;
+                if (round -| self.round[1] >= window) {
+                    self.val[1] = val;
+                    self.round[1] = round;
+                }
+                if (round -| self.round[2] >= window) {
+                    self.val[2] = val;
+                    self.round[2] = round;
+                }
+                if (self.val[1] > self.val[0]) {
+                    self.val[0] = self.val[1];
+                    self.round[0] = self.round[1];
+                }
+                if (self.val[2] > self.val[0]) {
+                    self.val[0] = self.val[2];
+                    self.round[0] = self.round[2];
+                }
+                return;
+            }
+
+            // New value fits as second-best or third-best.
+            if (val >= self.val[1]) {
+                self.val[1] = val;
+                self.round[1] = round;
+                self.val[2] = val;
+                self.round[2] = round;
+            } else if (val >= self.val[2]) {
+                self.val[2] = val;
+                self.round[2] = round;
+            }
+        }
+
+        pub fn reset(self: *Self, val: T, round: u64) void {
+            self.val = .{ val, val, val };
+            self.round = .{ round, round, round };
+        }
+    };
+}
+
+// ---------------------------------------------------------------------------
+// BBR v3 State Machine
+// ---------------------------------------------------------------------------
+
+pub const State = enum { startup, drain, probe_bw, probe_rtt };
+pub const ProbeBwPhase = enum { down, cruise, refill, up };
+
+pub const Bbr = struct {
+    // --- Public API fields ---
+    cwnd: u64,
+    pacing: common.Pacing,
+
+    // --- State machine ---
+    state: State,
+    probe_bw_phase: ProbeBwPhase,
+
+    // --- Bandwidth estimation ---
+    max_bw: u64, // bytes/sec (windowed max, cached from filter)
+    max_bw_filter: WindowedFilter(u64, 100), // large window to prevent max_bw collapse during
+    // loss recovery in our send-queue architecture (standard BBR uses 2)
+    bw_hi: u64, // upper bound from loss
+
+    // --- RTT estimation ---
+    min_rtt_ns: u64, // nanoseconds (windowed min, ~10s)
+    min_rtt_stamp_ns: i64, // when min_rtt was last updated
+    probe_rtt_done_ns: ?i64, // when ProbeRTT 200ms hold ends
+    probe_rtt_round_done: bool,
+
+    // --- Round tracking ---
+    round_count: u64,
+
+    // --- Inflight bounds (BBR v3 loss-based) ---
+    inflight_hi: u64, // upper inflight bound
+
+    // --- Loss tracking ---
+    loss_in_round: u64,
+    bytes_in_round: u64,
+
+    // --- Startup state ---
+    full_bw: u64, // BW at last plateau check
+    full_bw_count: u8, // rounds without 25% growth
+    filled_pipe: bool,
+
+    // --- Gains (current multipliers) ---
+    pacing_gain: f64,
+    cwnd_gain: f64,
+
+    // --- Extra ACKed tracking (for cwnd headroom) ---
+    extra_acked: u64, // cached from filter
+    extra_acked_filter: WindowedFilter(u64, 2),
+    extra_acked_in_interval: u64,
+
+    // --- ProbeBW cruise timing ---
+    probe_bw_rounds: u64, // rounds spent in current ProbeBW phase
+    probe_up_rounds: u64, // rounds in UP phase
+
+    pub fn init() Bbr {
+        // Bootstrap pacing rate: initial_cwnd / initial_rtt (no startup gain).
+        // Using startup_gain (2.885×) here causes the initial burst to overflow
+        // shallow queues (25 packets fill in 12ms at 4.2 MB/s).  Without the
+        // gain, rate ≈ 1.45 MB/s which stays close to typical link rates.
+        // BBR still discovers capacity through cwnd doubling each round.
+        const initial_rate: u64 = @intFromFloat(
+            @as(f64, @floatFromInt(INITIAL_CWND)) * 1_000_000_000.0 /
+                @as(f64, @floatFromInt(10_000_000)), // K_INITIAL_RTT_NS = 10ms
+        );
+        return .{
+            .cwnd = INITIAL_CWND,
+            .pacing = .{ .rate = initial_rate, .tokens = INITIAL_CWND, .last_refill_ns = 0 },
+            .state = .startup,
+            .probe_bw_phase = .down,
+            .max_bw = 0,
+            .max_bw_filter = WindowedFilter(u64, 100).init(0),
+            .bw_hi = std.math.maxInt(u64),
+            .min_rtt_ns = std.math.maxInt(u64),
+            .min_rtt_stamp_ns = 0,
+            .probe_rtt_done_ns = null,
+            .probe_rtt_round_done = false,
+            .round_count = 0,
+            .inflight_hi = std.math.maxInt(u64),
+            .loss_in_round = 0,
+            .bytes_in_round = 0,
+            .full_bw = 0,
+            .full_bw_count = 0,
+            .filled_pipe = false,
+            .pacing_gain = BBR_STARTUP_PACING_GAIN,
+            .cwnd_gain = BBR_CWND_GAIN,
+            .extra_acked = 0,
+            .extra_acked_filter = WindowedFilter(u64, 2).init(0),
+            .extra_acked_in_interval = 0,
+            .probe_bw_rounds = 0,
+            .probe_up_rounds = 0,
+        };
+    }
+
+    /// True when the congestion window allows sending.
+    pub fn canSend(self: *const Bbr) bool {
+        return self.cwnd > 0;
+    }
+
+    /// Disable pacing during Startup: the application-level token bucket
+    /// can't match the bursty send pattern needed for bandwidth discovery.
+    /// After the initial burst depletes tokens, the pacing gate locks the
+    /// server to 1 packet per ACK (token drip-feed), preventing cwnd from
+    /// filling.  Bypassing pacing lets Startup send at cwnd speed — like
+    /// TCP slow start — so bandwidth is discovered in 4–6 RTTs.  Once
+    /// filled_pipe is set (Startup complete), pacing is enforced.
+    pub fn shouldPace(self: *const Bbr) bool {
+        return self.filled_pipe;
+    }
+
+    /// Called when an ACK is received with a delivery rate sample.
+    pub fn onAckReceived(self: *Bbr, sample: DeliveryRateSample, now_ns: i64) void {
+        // Increment round count (needed for filter windows), but DON'T reset
+        // per-round loss counters yet — the state machine evaluates them first.
+        if (sample.round_start) {
+            self.round_count += 1;
+        }
+
+        // Update bandwidth estimate (ignore app-limited samples unless they exceed max).
+        if (!sample.is_app_limited or sample.delivery_rate > self.max_bw) {
+            self.max_bw_filter.update(sample.delivery_rate, self.round_count);
+            self.max_bw = self.max_bw_filter.get();
+        }
+
+        // Update min RTT.  Reject the RTT estimator's bootstrap value
+        // (K_INITIAL_RTT = 10ms) — the first ACK carries this placeholder
+        // before a real measurement exists, and accepting it poisons min_rtt
+        // making BDP far too small (Drain never exits, throughput collapses).
+        const K_INITIAL_RTT_NS: u64 = 10_000_000;
+        if (sample.rtt_ns > 0 and sample.rtt_ns != K_INITIAL_RTT_NS and sample.rtt_ns < self.min_rtt_ns) {
+            self.min_rtt_ns = sample.rtt_ns;
+            self.min_rtt_stamp_ns = now_ns;
+        }
+
+        // Update extra ACKed for cwnd headroom.
+        self.updateExtraAcked(sample);
+
+        // State machine transitions (evaluates accumulated round loss data).
+        switch (self.state) {
+            .startup => self.updateStartup(sample),
+            .drain => self.updateDrain(sample),
+            .probe_bw => self.updateProbeBw(sample),
+            .probe_rtt => self.updateProbeRtt(sample, now_ns),
+        }
+
+        // NOW reset per-round counters and start accumulating for the new round.
+        if (sample.round_start) {
+            self.loss_in_round = 0;
+            self.bytes_in_round = 0;
+        }
+        self.loss_in_round += sample.bytes_lost;
+        self.bytes_in_round += sample.bytes_acked + sample.bytes_lost;
+
+        // Update pacing rate and cwnd.
+        self.updatePacingRate();
+        self.updateCwnd(sample.bytes_acked);
+
+        // Check if we should enter ProbeRTT (only from ProbeBW).
+        if (self.state == .probe_bw) {
+            self.checkProbeRtt(now_ns);
+        }
+    }
+
+    /// Called on packet loss. BBR v3 uses loss for inflight bounding.
+    pub fn onPacketLost(_: *Bbr, _: u64, _: i64) void {
+        // Loss-based bounding is handled in onAckReceived via sample.bytes_lost.
+        // BBR v3 does not do multiplicative decrease on loss events.
+    }
+
+    /// Called on persistent congestion: reset to Startup, clear estimates.
+    pub fn onPersistentCongestion(self: *Bbr) void {
+        self.state = .startup;
+        self.filled_pipe = false;
+        self.full_bw = 0;
+        self.full_bw_count = 0;
+        self.cwnd = BBR_MIN_CWND;
+        self.pacing_gain = BBR_STARTUP_PACING_GAIN;
+        self.cwnd_gain = BBR_CWND_GAIN;
+        // Preserve max_bw and its filter so the pacing rate stays at a
+        // reasonable level during recovery.  Resetting to 0 with the
+        // shallow-queue startup gain (1.25×) causes an extremely slow
+        // ramp — dozens of rounds to rediscover 10 Mbps from near-zero.
+        // The pacing floor (INITIAL_CWND / min_rtt) provides a lower bound,
+        // but the old max_bw gives a much better starting point.
+        self.bw_hi = std.math.maxInt(u64);
+        self.inflight_hi = BBR_MIN_CWND;
+        self.extra_acked_filter.reset(0, 0);
+        self.extra_acked = 0;
+        self.extra_acked_in_interval = 0;
+        // Reset per-round and phase counters to prevent stale data.
+        self.loss_in_round = 0;
+        self.bytes_in_round = 0;
+        self.probe_bw_rounds = 0;
+        self.probe_up_rounds = 0;
+        // Clear stale RTT — path may have changed fundamentally.
+        self.min_rtt_ns = std.math.maxInt(u64);
+        self.min_rtt_stamp_ns = 0;
+        // Reset pacing to allow initial burst on the new path.
+        self.pacing = .{};
+    }
+
+    /// Called on ECN CE marks. BBR reduces inflight bounding, NOT multiplicative cwnd decrease.
+    pub fn onEcnCe(self: *Bbr, _: u64, _: i64) void {
+        // Treat ECN as a bounding signal: reduce inflight_hi.
+        self.inflight_hi = @max(applyBeta(self.inflight_hi), @max(self.bdp(), BBR_MIN_CWND));
+    }
+
+    /// Refill pacing tokens. Delegates to shared Pacing.
+    pub fn pacingRefill(self: *Bbr, now_ns: i64) u64 {
+        return self.pacing.refill(self.cwnd, now_ns);
+    }
+
+    /// Consume pacing tokens after sending a packet.
+    pub fn pacingConsume(self: *Bbr, bytes: u64) void {
+        self.pacing.consume(bytes);
+    }
+
+    // -----------------------------------------------------------------------
+    // Internal: BDP computation
+    // -----------------------------------------------------------------------
+
+    fn bdp(self: *const Bbr) u64 {
+        if (self.min_rtt_ns == std.math.maxInt(u64) or self.max_bw == 0) {
+            return INITIAL_CWND;
+        }
+        // BDP = max_bw × min_rtt (convert ns to seconds).
+        const result: u64 = @intCast(@min(
+            @as(u128, self.max_bw) *| @as(u128, self.min_rtt_ns) / 1_000_000_000,
+            std.math.maxInt(u64),
+        ));
+        return @max(result, BBR_MIN_CWND);
+    }
+
+    // -----------------------------------------------------------------------
+    // Internal: Pacing rate
+    // -----------------------------------------------------------------------
+
+    fn updatePacingRate(self: *Bbr) void {
+        if (self.max_bw == 0) return;
+        // Apply bw_hi bound (from loss bounding).
+        const bw = @min(self.max_bw, self.bw_hi);
+        const rate_f = @as(f64, @floatFromInt(bw)) * self.pacing_gain;
+        const rate: u64 = if (rate_f >= @as(f64, @floatFromInt(std.math.maxInt(u64))))
+            std.math.maxInt(u64)
+        else
+            @intFromFloat(rate_f);
+        // Floor: never pace slower than initial_cwnd / initial_rtt.
+        // Without this floor, a transient delivery rate collapse (e.g.,
+        // during loss recovery) creates a death spiral where the low
+        // pacing rate prevents sending, which prevents ACKs, which
+        // prevents the rate from recovering.
+        const min_rate: u64 = @intFromFloat(
+            @as(f64, @floatFromInt(INITIAL_CWND)) * 1_000_000_000.0 /
+                @as(f64, @floatFromInt(@max(self.min_rtt_ns, 1))),
+        );
+        self.pacing.rate = @max(rate, min_rate);
+    }
+
+    // -----------------------------------------------------------------------
+    // Internal: cwnd
+    // -----------------------------------------------------------------------
+
+    fn updateCwnd(self: *Bbr, bytes_acked: u64) void {
+        if (self.state == .probe_rtt) {
+            self.cwnd = BBR_MIN_CWND;
+            return;
+        }
+
+        // During Drain, use BDP × cwnd_gain as the target (same as ProbeBW)
+        // so bytes_in_flight can actually drop below BDP, allowing Drain to
+        // exit.  Previously cwnd was locked to inflight_hi (the Startup peak),
+        // which kept bif far above BDP and trapped BBR in Drain permanently.
+
+        // Target = BDP × cwnd_gain + extra_acked headroom.
+        var target_f: f64 = @as(f64, @floatFromInt(self.bdp())) * self.cwnd_gain +
+            @as(f64, @floatFromInt(self.extra_acked));
+
+        // In ProbeBW, cap by inflight_hi — except during UP phase where we
+        // intentionally probe above the current bound to discover more capacity.
+        if (self.state == .probe_bw and self.probe_bw_phase != .up) {
+            target_f = @min(target_f, @as(f64, @floatFromInt(self.inflight_hi)));
+        }
+
+        const max_u64_f = @as(f64, @floatFromInt(std.math.maxInt(u64)));
+        const target: u64 = if (target_f >= max_u64_f) std.math.maxInt(u64) else @intFromFloat(@max(target_f, 0));
+        const target_clamped = @max(target, BBR_MIN_CWND);
+
+        if (self.filled_pipe) {
+            // Post-startup: grow toward target, don't exceed it.
+            self.cwnd = @min(self.cwnd +| bytes_acked, target_clamped);
+        } else {
+            // Startup: grow quickly (saturating to prevent overflow).
+            self.cwnd +|= bytes_acked;
+        }
+        self.cwnd = @max(self.cwnd, BBR_MIN_CWND);
+    }
+
+    // -----------------------------------------------------------------------
+    // Internal: Startup state
+    // -----------------------------------------------------------------------
+
+    fn updateStartup(self: *Bbr, sample: DeliveryRateSample) void {
+        // Check loss on EVERY ACK, not just round_start.  CUBIC detects loss
+        // immediately and reduces cwnd; BBR must do the same to avoid growing
+        // cwnd from 58 KB to 200+ KB during a single lossy round.  Without
+        // this, the Startup burst overwhelms shallow queues and recovery
+        // from 200+ lost packets exceeds the 64 KB stream buffer.
+        if (self.isExcessiveLoss()) {
+            self.enterDrain();
+            return;
+        }
+
+        if (!sample.round_start) return;
+
+        // Check for bandwidth plateau.
+        if (self.max_bw >= @as(u64, @intFromFloat(@as(f64, @floatFromInt(self.full_bw)) * BBR_FULL_BW_THRESHOLD))) {
+            // Still growing — reset counter.
+            self.full_bw = self.max_bw;
+            self.full_bw_count = 0;
+        } else {
+            self.full_bw_count += 1;
+        }
+
+        if (self.full_bw_count >= BBR_FULL_BW_COUNT) {
+            self.enterDrain();
+        }
+    }
+
+    fn enterDrain(self: *Bbr) void {
+        self.state = .drain;
+        self.filled_pipe = true;
+        // Use 1.0× pacing gain during Drain instead of 0.346×.  The cwnd
+        // target (BDP × cwnd_gain) already limits inflight; the ultra-low
+        // Drain rate (0.346×) makes retransmission recovery 6× slower than
+        // CUBIC's post-loss rate, causing the server to appear dead.
+        self.pacing_gain = 1.0;
+        self.cwnd_gain = BBR_CWND_GAIN;
+        // If Startup exited due to loss, the cwnd is massively inflated.
+        // Set inflight_hi to BDP so cwnd drains properly and ProbeBW starts
+        // with a reasonable bound.  Without this, inflight_hi stays at the
+        // Startup peak and cwnd never converges to the actual capacity.
+        if (self.isExcessiveLoss()) {
+            self.inflight_hi = @max(self.bdp(), BBR_MIN_CWND);
+        } else {
+            self.inflight_hi = self.cwnd;
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Internal: Drain state
+    // -----------------------------------------------------------------------
+
+    fn updateDrain(self: *Bbr, sample: DeliveryRateSample) void {
+        // Apply loss bounding during Drain — continued loss from the Startup
+        // burst should reduce inflight_hi toward BDP, not stay at the peak.
+        if (sample.round_start and self.isExcessiveLoss()) {
+            self.applyLossBounding(true);
+        }
+        // Exit Drain when post-ACK inflight ≤ BDP.  Use prior_inflight
+        // minus bytes_acked: prior_inflight is captured BEFORE the ACK
+        // reduces bytes_in_flight, so it includes the just-ACKed data.
+        // Subtracting gives the actual pipe depth after draining.
+        if (sample.prior_inflight -| sample.bytes_acked <= self.bdp()) {
+            self.enterProbeBw(.down);
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Internal: ProbeBW state (steady state)
+    // -----------------------------------------------------------------------
+
+    fn enterProbeBw(self: *Bbr, phase: ProbeBwPhase) void {
+        self.state = .probe_bw;
+        self.probe_bw_phase = phase;
+        self.probe_bw_rounds = 0;
+        self.probe_up_rounds = 0;
+        // Use cwnd_gain = 2.0 to target 2×BDP — provides headroom for
+        // retransmissions and ACK aggregation in real networks.
+        self.cwnd_gain = BBR_CWND_GAIN;
+        self.pacing_gain = switch (phase) {
+            .down => 1.0, // Use 1.0× instead of 0.9× — on shallow queues,
+            // 0.9× is too slow for loss recovery and causes server stalls.
+            .cruise, .refill => 1.0,
+            .up => BBR_PROBE_BW_UP_PACING_GAIN,
+        };
+        if (phase == .refill) {
+            // Reset bw_hi before probing up so previous reductions don't persist.
+            self.bw_hi = std.math.maxInt(u64);
+        }
+    }
+
+    fn updateProbeBw(self: *Bbr, sample: DeliveryRateSample) void {
+        // Per-round loss bounding (applies to all phases).
+        const had_excessive_loss = sample.round_start and self.isExcessiveLoss();
+        if (sample.round_start) {
+            self.applyLossBounding(had_excessive_loss);
+            self.probe_bw_rounds += 1;
+        }
+
+        switch (self.probe_bw_phase) {
+            .down => {
+                // Same post-ACK inflight rationale as Drain exit.
+                if (sample.prior_inflight -| sample.bytes_acked <= self.bdp()) {
+                    self.enterProbeBw(.cruise);
+                }
+            },
+            .cruise => {
+                if (self.probe_bw_rounds >= 4) {
+                    self.enterProbeBw(.refill);
+                }
+            },
+            .refill => {
+                if (sample.round_start and self.probe_bw_rounds >= 1) {
+                    self.enterProbeBw(.up);
+                }
+            },
+            .up => {
+                if (sample.round_start) self.probe_up_rounds += 1;
+                // applyLossBounding already reduced inflight_hi; just transition on loss.
+                if (had_excessive_loss) {
+                    self.enterProbeBw(.down);
+                } else if (self.probe_up_rounds >= 2) {
+                    self.inflight_hi = @max(self.inflight_hi, sample.prior_inflight);
+                    self.enterProbeBw(.down);
+                }
+            },
+        }
+    }
+
+    fn applyLossBounding(self: *Bbr, excessive_loss: bool) void {
+        if (excessive_loss) {
+            self.bw_hi = @max(applyBeta(self.bw_hi), self.max_bw);
+            // Floor at BDP to prevent spiral: repeated 0.7× reductions after
+            // blackhole recovery would collapse inflight_hi to near-zero.
+            self.inflight_hi = @max(applyBeta(self.inflight_hi), @max(self.bdp(), BBR_MIN_CWND));
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Internal: ProbeRTT state
+    // -----------------------------------------------------------------------
+
+    fn checkProbeRtt(self: *Bbr, now_ns: i64) void {
+        if (self.state == .probe_rtt) return;
+        if (self.min_rtt_ns == std.math.maxInt(u64)) return;
+
+        // Enter ProbeRTT if min_rtt hasn't been updated for BBR_PROBE_RTT_INTERVAL_NS.
+        if (now_ns - self.min_rtt_stamp_ns >= BBR_PROBE_RTT_INTERVAL_NS) {
+            self.enterProbeRtt();
+        }
+    }
+
+    fn enterProbeRtt(self: *Bbr) void {
+        self.state = .probe_rtt;
+        self.pacing_gain = 1.0;
+        self.cwnd_gain = 1.0;
+        self.probe_rtt_done_ns = null;
+        self.probe_rtt_round_done = false;
+    }
+
+    fn updateProbeRtt(self: *Bbr, sample: DeliveryRateSample, now_ns: i64) void {
+        // Wait for inflight to drain to min cwnd.
+        if (self.probe_rtt_done_ns == null) {
+            if (sample.prior_inflight <= BBR_MIN_CWND) {
+                // Inflight drained — start 200ms timer.
+                self.probe_rtt_done_ns = now_ns + BBR_PROBE_RTT_DURATION_NS;
+                self.probe_rtt_round_done = false;
+            }
+            return;
+        }
+
+        // Wait for one full round.
+        if (sample.round_start) {
+            self.probe_rtt_round_done = true;
+        }
+
+        // Exit when both 200ms elapsed AND one round completed.
+        if (self.probe_rtt_round_done and now_ns >= self.probe_rtt_done_ns.?) {
+            // Update min_rtt timestamp.
+            self.min_rtt_stamp_ns = now_ns;
+            self.exitProbeRtt();
+        }
+    }
+
+    fn exitProbeRtt(self: *Bbr) void {
+        if (!self.filled_pipe) {
+            self.state = .startup;
+            self.pacing_gain = BBR_STARTUP_PACING_GAIN;
+            self.cwnd_gain = BBR_CWND_GAIN;
+        } else {
+            self.enterProbeBw(.cruise);
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Internal: Helpers
+    // -----------------------------------------------------------------------
+
+    /// True if >2% of bytes in the current round were lost.
+    /// Uses `loss * 50 > bytes` (equivalent to `loss / bytes > 0.02`) to stay in u64.
+    fn isExcessiveLoss(self: *const Bbr) bool {
+        return self.bytes_in_round > 0 and
+            self.loss_in_round *| 50 > self.bytes_in_round;
+    }
+
+    /// Apply BBR_BETA (0.7) reduction to a u64 value using integer arithmetic.
+    fn applyBeta(val: u64) u64 {
+        return val *| 7 / 10;
+    }
+
+    // -----------------------------------------------------------------------
+    // Internal: Extra ACKed tracking
+    // -----------------------------------------------------------------------
+
+    fn updateExtraAcked(self: *Bbr, sample: DeliveryRateSample) void {
+        // Reset interval on round boundary unconditionally (even if early returns below skip accumulation).
+        if (sample.round_start) {
+            self.extra_acked_filter.update(self.extra_acked_in_interval, self.round_count);
+            self.extra_acked = self.extra_acked_filter.get();
+            self.extra_acked_in_interval = 0;
+        }
+
+        if (sample.bytes_acked == 0) return;
+        if (self.max_bw == 0 or sample.rtt_ns == 0) return;
+
+        // Expected delivery = max_bw × rtt_sample.
+        const expected: u64 = @intCast(@min(
+            @as(u128, self.max_bw) *| @as(u128, sample.rtt_ns) / 1_000_000_000,
+            std.math.maxInt(u64),
+        ));
+
+        if (sample.bytes_acked > expected) {
+            self.extra_acked_in_interval += sample.bytes_acked - expected;
+        }
+    }
+};
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+test "bbr: init sets startup state" {
+    const b = Bbr.init();
+    const testing = std.testing;
+    try testing.expectEqual(State.startup, b.state);
+    try testing.expectEqual(INITIAL_CWND, b.cwnd);
+    try testing.expect(b.pacing_gain > 1.0);
+    try testing.expect(!b.filled_pipe);
+}
+
+test "bbr: canSend" {
+    var b = Bbr.init();
+    const testing = std.testing;
+    try testing.expect(b.canSend());
+    b.cwnd = 0;
+    try testing.expect(!b.canSend());
+}
+
+test "bbr: bdp computation" {
+    var b = Bbr.init();
+    // Set known values: 1 MB/s, 100ms RTT → BDP = 100,000 bytes.
+    b.max_bw = 1_000_000;
+    b.min_rtt_ns = 100_000_000; // 100ms
+    const expected: u64 = 100_000; // 1M × 0.1s
+    try std.testing.expectEqual(expected, b.bdp());
+}
+
+test "bbr: bdp returns initial cwnd when no samples" {
+    const b = Bbr.init();
+    try std.testing.expectEqual(INITIAL_CWND, b.bdp());
+}
+
+test "bbr: startup exits on bandwidth plateau" {
+    var b = Bbr.init();
+    b.max_bw = 1000;
+    b.full_bw = 1000; // Same as max_bw — no growth.
+    b.min_rtt_ns = 50_000_000;
+
+    // Simulate 3 rounds without 25% growth.
+    var i: u8 = 0;
+    while (i < 3) : (i += 1) {
+        b.updateStartup(.{
+            .delivery_rate = 1000,
+            .round_start = true,
+        });
+    }
+    try std.testing.expectEqual(State.drain, b.state);
+    try std.testing.expect(b.filled_pipe);
+}
+
+test "bbr: startup exits on excessive loss" {
+    var b = Bbr.init();
+    b.max_bw = 1_000_000;
+    b.full_bw = 1_000_000;
+    b.min_rtt_ns = 50_000_000;
+    b.bytes_in_round = 10000;
+    b.loss_in_round = 300; // 3% loss > 2% threshold
+
+    b.updateStartup(.{ .delivery_rate = 1_000_000, .round_start = true });
+    try std.testing.expectEqual(State.drain, b.state);
+}
+
+test "bbr: drain exits when inflight <= bdp" {
+    var b = Bbr.init();
+    b.state = .drain;
+    b.filled_pipe = true;
+    b.max_bw = 1_000_000;
+    b.min_rtt_ns = 100_000_000; // BDP = 100,000
+
+    b.updateDrain(.{ .prior_inflight = 90_000 }); // below BDP
+    try std.testing.expectEqual(State.probe_bw, b.state);
+    try std.testing.expectEqual(ProbeBwPhase.down, b.probe_bw_phase);
+}
+
+test "bbr: probe_bw phase cycling" {
+    var b = Bbr.init();
+    b.state = .probe_bw;
+    b.filled_pipe = true;
+    b.max_bw = 1_000_000;
+    b.min_rtt_ns = 50_000_000;
+
+    // DOWN → CRUISE when inflight <= bdp
+    b.probe_bw_phase = .down;
+    b.pacing_gain = BBR_PROBE_BW_DOWN_PACING_GAIN;
+    b.updateProbeBw(.{ .prior_inflight = 1000, .round_start = true });
+    try std.testing.expectEqual(ProbeBwPhase.cruise, b.probe_bw_phase);
+
+    // CRUISE → REFILL after 4 rounds
+    b.probe_bw_rounds = 0;
+    var i: u8 = 0;
+    while (i < 4) : (i += 1) {
+        b.updateProbeBw(.{ .prior_inflight = 50_000, .round_start = true });
+    }
+    try std.testing.expectEqual(ProbeBwPhase.refill, b.probe_bw_phase);
+
+    // REFILL → UP after 1 round
+    b.probe_bw_rounds = 0;
+    b.updateProbeBw(.{ .prior_inflight = 50_000, .round_start = true });
+    b.updateProbeBw(.{ .prior_inflight = 50_000, .round_start = true });
+    try std.testing.expectEqual(ProbeBwPhase.up, b.probe_bw_phase);
+}
+
+test "bbr: probe_rtt entry after interval" {
+    var b = Bbr.init();
+    b.state = .probe_bw;
+    b.filled_pipe = true;
+    b.max_bw = 1_000_000;
+    b.min_rtt_ns = 50_000_000;
+    b.min_rtt_stamp_ns = 0;
+
+    // After probe_rtt interval, should enter ProbeRTT.
+    b.checkProbeRtt(BBR_PROBE_RTT_INTERVAL_NS + 1);
+    try std.testing.expectEqual(State.probe_rtt, b.state);
+}
+
+test "bbr: probe_rtt exit after 200ms + 1 round" {
+    var b = Bbr.init();
+    b.state = .probe_rtt;
+    b.filled_pipe = true;
+    b.min_rtt_ns = 50_000_000;
+    b.max_bw = 1_000_000;
+    b.probe_rtt_done_ns = null;
+    b.probe_rtt_round_done = false;
+
+    // Step 1: inflight drains to min cwnd — starts 200ms timer.
+    b.updateProbeRtt(.{ .prior_inflight = BBR_MIN_CWND, .round_start = false }, 1000);
+    try std.testing.expect(b.probe_rtt_done_ns != null);
+    try std.testing.expect(!b.probe_rtt_round_done);
+
+    // Step 2: round completes.
+    b.updateProbeRtt(.{ .prior_inflight = BBR_MIN_CWND, .round_start = true }, 1000 + 100_000_000);
+    try std.testing.expect(b.probe_rtt_round_done);
+
+    // Step 3: 200ms elapsed.
+    b.updateProbeRtt(.{ .prior_inflight = BBR_MIN_CWND, .round_start = true }, 1000 + BBR_PROBE_RTT_DURATION_NS + 1);
+    try std.testing.expectEqual(State.probe_bw, b.state);
+}
+
+test "bbr: windowed filter tracks max" {
+    const Filter = WindowedFilter(u64, 2);
+    var f = Filter.init(0);
+    f.update(100, 1);
+    try std.testing.expectEqual(@as(u64, 100), f.get());
+    f.update(200, 2);
+    try std.testing.expectEqual(@as(u64, 200), f.get());
+    // Lower value doesn't displace max.
+    f.update(50, 2);
+    try std.testing.expectEqual(@as(u64, 200), f.get());
+}
+
+test "bbr: windowed filter expires old values" {
+    const Filter = WindowedFilter(u64, 2);
+    var f = Filter.init(0);
+    f.update(200, 1);
+    try std.testing.expectEqual(@as(u64, 200), f.get());
+    // After window expires (round 4, window=2), old value should be replaced.
+    f.update(100, 4);
+    try std.testing.expectEqual(@as(u64, 100), f.get());
+}
+
+test "bbr: windowed filter demotes on new best, preventing simultaneous expiry" {
+    const Filter = WindowedFilter(u64, 10);
+    var f = Filter.init(0);
+    // Startup peak at round 5.
+    f.update(1000, 5);
+    try std.testing.expectEqual(@as(u64, 1000), f.get());
+    // Slightly higher peak at round 8 (inflated Startup sample).
+    f.update(1050, 8);
+    try std.testing.expectEqual(@as(u64, 1050), f.get());
+    // ProbeBW DOWN samples at 950 (below peak) — enter as second/third.
+    f.update(950, 12);
+    f.update(960, 15);
+    // After 10 rounds from peak (round 18): peak at round 8 expires.
+    // The demoted second-best (1000 from round 5) also expired (18-5=13>=10).
+    // But 960 from round 15 is still valid (18-15=3<10).
+    f.update(700, 18);
+    // Without demotion fix: all three expire → get() = 700.
+    // With demotion fix: 960 (round 15) survives → get() = 960.
+    try std.testing.expectEqual(@as(u64, 960), f.get());
+}
+
+test "bbr: loss bounding reduces inflight_hi" {
+    var b = Bbr.init();
+    b.max_bw = 1_000_000;
+    b.min_rtt_ns = 50_000_000;
+    b.inflight_hi = 100_000;
+    b.bw_hi = 2_000_000;
+
+    // 5% loss rate (> 2% threshold).
+    b.bytes_in_round = 10000;
+    b.loss_in_round = 500;
+
+    const old_hi = b.inflight_hi;
+    b.applyLossBounding(true);
+    try std.testing.expect(b.inflight_hi < old_hi);
+}
+
+test "bbr: pacing refill with known rate" {
+    var b = Bbr.init();
+    b.pacing.rate = 1_000_000; // 1 MB/s
+    b.pacing.tokens = 0;
+    b.pacing.last_refill_ns = 1_000_000_000; // 1s
+
+    const tokens = b.pacingRefill(1_001_000_000); // 1ms later
+    // 1 MB/s × 0.001s = 1000 bytes.
+    try std.testing.expectEqual(@as(u64, 1000), tokens);
+}
+
+test "bbr: pacing consume" {
+    var b = Bbr.init();
+    b.pacing.tokens = 5000;
+    b.pacingConsume(3000);
+    try std.testing.expectEqual(@as(u64, 2000), b.pacing.tokens);
+}
+
+test "bbr: persistent congestion resets to startup" {
+    var b = Bbr.init();
+    b.state = .probe_bw;
+    b.filled_pipe = true;
+    b.max_bw = 1_000_000;
+    b.cwnd = 100_000;
+    b.onPersistentCongestion();
+    try std.testing.expectEqual(State.startup, b.state);
+    try std.testing.expect(!b.filled_pipe);
+    try std.testing.expectEqual(BBR_MIN_CWND, b.cwnd);
+    // max_bw is preserved so pacing stays reasonable during recovery.
+    try std.testing.expectEqual(@as(u64, 1_000_000), b.max_bw);
+}
+
+test "bbr: ecn ce reduces inflight_hi" {
+    var b = Bbr.init();
+    b.max_bw = 1_000_000;
+    b.min_rtt_ns = 50_000_000;
+    b.inflight_hi = 200_000;
+
+    const old_hi = b.inflight_hi;
+    b.onEcnCe(1, 0);
+    try std.testing.expect(b.inflight_hi < old_hi);
+}
+
+test "bbr: startup grows cwnd on ack" {
+    var b = Bbr.init();
+    const initial = b.cwnd;
+    b.min_rtt_ns = 50_000_000;
+    b.onAckReceived(.{
+        .delivery_rate = 500_000,
+        .rtt_ns = 50_000_000,
+        .bytes_acked = MSS,
+        .round_start = false,
+    }, 1_000_000_000);
+    // Startup grows cwnd by bytes_acked.
+    try std.testing.expect(b.cwnd > initial);
+}
+
+test "bbr: full state machine startup to probe_bw" {
+    var b = Bbr.init();
+    b.min_rtt_ns = 50_000_000;
+    b.min_rtt_stamp_ns = 0;
+
+    // Simulate startup with growing bandwidth.
+    var bw: u64 = 100_000;
+    var round: u64 = 0;
+    while (b.state == .startup and round < 20) : (round += 1) {
+        bw = bw * 3 / 2; // 50% growth per round.
+        b.onAckReceived(.{
+            .delivery_rate = bw,
+            .rtt_ns = 50_000_000,
+            .bytes_acked = 10 * MSS,
+            .round_start = true,
+        }, @intCast(round * 50_000_000));
+    }
+
+    // BW stabilizes — should plateau and exit startup.
+    const stable_bw = bw;
+    while (b.state == .startup and round < 40) : (round += 1) {
+        b.onAckReceived(.{
+            .delivery_rate = stable_bw,
+            .rtt_ns = 50_000_000,
+            .bytes_acked = 10 * MSS,
+            .round_start = true,
+        }, @intCast(round * 50_000_000));
+    }
+    // Should have transitioned through drain.
+    try std.testing.expect(b.filled_pipe);
+
+    // Drain until inflight ≤ BDP.
+    while (b.state == .drain and round < 60) : (round += 1) {
+        b.onAckReceived(.{
+            .delivery_rate = stable_bw,
+            .rtt_ns = 50_000_000,
+            .bytes_acked = 10 * MSS,
+            .prior_inflight = 1000, // way below BDP
+            .round_start = true,
+        }, @intCast(round * 50_000_000));
+    }
+    try std.testing.expectEqual(State.probe_bw, b.state);
+}
+
+// ---------------------------------------------------------------------------
+// Regression tests (bugs found during code review)
+// ---------------------------------------------------------------------------
+
+test "bbr: regression — persistent congestion resets filters with round 0" {
+    // Bug: onPersistentCongestion reset round_count to 0 AFTER calling
+    // max_bw_filter.reset(0, self.round_count), storing a stale round number.
+    // Future filter updates would not expire the old value for many rounds.
+    var b = Bbr.init();
+    b.round_count = 100;
+    b.max_bw = 500_000;
+    b.max_bw_filter.update(500_000, 100);
+
+    b.onPersistentCongestion();
+
+    // round_count and max_bw_filter are preserved so pacing stays reasonable.
+    try std.testing.expectEqual(@as(u64, 100), b.round_count);
+    // Filter retains the pre-congestion value.
+    try std.testing.expectEqual(@as(u64, 500_000), b.max_bw_filter.get());
+    // A higher value updates normally.
+    b.max_bw_filter.update(600_000, 101);
+    try std.testing.expectEqual(@as(u64, 600_000), b.max_bw_filter.get());
+}
+
+test "bbr: regression — persistent congestion resets min_rtt and pacing" {
+    // Bug: onPersistentCongestion did not reset min_rtt_ns, min_rtt_stamp_ns,
+    // pacing state, or extra_acked_in_interval. Stale values leaked into
+    // the new Startup phase.
+    var b = Bbr.init();
+    b.min_rtt_ns = 10_000_000;
+    b.min_rtt_stamp_ns = 5_000_000_000;
+    b.pacing.rate = 1_000_000;
+    b.pacing.tokens = 50_000;
+    b.extra_acked_in_interval = 9999;
+
+    b.onPersistentCongestion();
+
+    try std.testing.expectEqual(std.math.maxInt(u64), b.min_rtt_ns);
+    try std.testing.expectEqual(@as(i64, 0), b.min_rtt_stamp_ns);
+    try std.testing.expectEqual(@as(u64, 0), b.pacing.rate);
+    try std.testing.expectEqual(INITIAL_CWND, b.pacing.tokens); // default Pacing init
+    try std.testing.expectEqual(@as(u64, 0), b.extra_acked_in_interval);
+}
+
+test "bbr: regression — no double inflight_hi reduction in ProbeBW UP" {
+    // Bug: checkLossBounding reduced inflight_hi, then the UP branch applied
+    // applyBeta again, double-reducing it.
+    var b = Bbr.init();
+    b.state = .probe_bw;
+    b.probe_bw_phase = .up;
+    b.filled_pipe = true;
+    b.max_bw = 1_000_000;
+    b.min_rtt_ns = 50_000_000;
+    b.inflight_hi = 200_000;
+    b.bw_hi = std.math.maxInt(u64);
+
+    // Simulate excessive loss in a round.
+    b.bytes_in_round = 10000;
+    b.loss_in_round = 500; // 5% > 2%
+
+    // One round_start ACK should reduce inflight_hi exactly once.
+    b.updateProbeBw(.{ .prior_inflight = 100_000, .round_start = true });
+
+    // After single beta reduction: 200_000 * 7/10 = 140_000.
+    // BDP = 1M * 50ms = 50_000. So max(140_000, 50_000) = 140_000.
+    const expected = @max(Bbr.applyBeta(200_000), @as(u64, 50_000));
+    try std.testing.expectEqual(expected, b.inflight_hi);
+    // Must have transitioned to DOWN.
+    try std.testing.expectEqual(ProbeBwPhase.down, b.probe_bw_phase);
+}
+
+test "bbr: regression — bw_hi restored in ProbeBW refill" {
+    // Bug: bw_hi was only reduced, never restored. Once checkLossBounding
+    // reduced it, the pacing rate was permanently suppressed.
+    var b = Bbr.init();
+    b.state = .probe_bw;
+    b.filled_pipe = true;
+    b.max_bw = 1_000_000;
+    b.min_rtt_ns = 50_000_000;
+    b.bw_hi = 500_000; // previously reduced
+
+    // Entering refill should restore bw_hi to maxInt.
+    b.enterProbeBw(.refill);
+    try std.testing.expectEqual(std.math.maxInt(u64), b.bw_hi);
+}
+
+test "bbr: regression — cwnd_gain is 2.0 in ProbeBW steady state" {
+    // cwnd_gain = 2.0 in ProbeBW provides 2×BDP headroom for retransmissions
+    // and ACK aggregation.
+    var b = Bbr.init();
+    b.enterProbeBw(.cruise);
+    try std.testing.expectEqual(BBR_CWND_GAIN, b.cwnd_gain);
+    b.enterProbeBw(.down);
+    try std.testing.expectEqual(BBR_CWND_GAIN, b.cwnd_gain);
+    b.enterProbeBw(.up);
+    try std.testing.expectEqual(BBR_CWND_GAIN, b.cwnd_gain);
+    b.enterProbeBw(.refill);
+    try std.testing.expectEqual(BBR_CWND_GAIN, b.cwnd_gain);
+}
+
+test "bbr: regression — inflight_hi initialized to maxInt" {
+    // Bug: inflight_hi was initialized to INITIAL_CWND, which would cap
+    // cwnd in ProbeBW before enterDrain had a chance to set it properly.
+    const b = Bbr.init();
+    try std.testing.expectEqual(std.math.maxInt(u64), b.inflight_hi);
+}
+
+test "bbr: regression — loss counters evaluated before reset on round boundary" {
+    // Bug: updateRoundCounters() zeroed loss_in_round/bytes_in_round before
+    // the state machine could evaluate them, making isExcessiveLoss() see
+    // only the current ACK's data instead of the full accumulated round.
+    var b = Bbr.init();
+    b.state = .probe_bw;
+    b.probe_bw_phase = .up;
+    b.filled_pipe = true;
+    b.max_bw = 1_000_000;
+    b.min_rtt_ns = 50_000_000;
+    b.inflight_hi = 200_000;
+    b.bw_hi = std.math.maxInt(u64);
+
+    // Accumulate loss data over several non-round-start ACKs.
+    b.onAckReceived(.{ .bytes_acked = 5000, .bytes_lost = 0 }, 100_000_000);
+    b.onAckReceived(.{ .bytes_acked = 5000, .bytes_lost = 0 }, 200_000_000);
+    b.onAckReceived(.{ .bytes_acked = 5000, .bytes_lost = 400 }, 300_000_000);
+    // Now: bytes_in_round=15000, loss_in_round=400 (2.67% > 2%)
+    try std.testing.expect(b.isExcessiveLoss());
+
+    // The round_start ACK should see the accumulated loss and transition.
+    const hi_before = b.inflight_hi;
+    b.onAckReceived(.{ .bytes_acked = 1000, .round_start = true }, 400_000_000);
+
+    // inflight_hi must have been reduced (loss bounding triggered).
+    try std.testing.expect(b.inflight_hi < hi_before);
+    // Must have transitioned to DOWN.
+    try std.testing.expectEqual(ProbeBwPhase.down, b.probe_bw_phase);
+}
+
+test "bbr: regression — persistent congestion resets loss and phase counters" {
+    // Bug: onPersistentCongestion didn't reset loss_in_round, bytes_in_round,
+    // probe_bw_rounds, probe_up_rounds. Stale loss data could trigger false
+    // Startup exit via isExcessiveLoss().
+    var b = Bbr.init();
+    b.loss_in_round = 500;
+    b.bytes_in_round = 10000;
+    b.probe_bw_rounds = 5;
+    b.probe_up_rounds = 2;
+
+    b.onPersistentCongestion();
+
+    try std.testing.expectEqual(@as(u64, 0), b.loss_in_round);
+    try std.testing.expectEqual(@as(u64, 0), b.bytes_in_round);
+    try std.testing.expectEqual(@as(u64, 0), b.probe_bw_rounds);
+    try std.testing.expectEqual(@as(u64, 0), b.probe_up_rounds);
+}
+
+test "bbr: regression — extra_acked capped by inflight_hi" {
+    // Bug: extra_acked was added after inflight_hi cap, allowing cwnd to
+    // exceed the loss-based inflight bound.
+    var b = Bbr.init();
+    b.state = .probe_bw;
+    b.filled_pipe = true;
+    b.max_bw = 1_000_000;
+    b.min_rtt_ns = 50_000_000; // BDP = 50,000
+    b.cwnd_gain = BBR_CWND_GAIN;
+    b.inflight_hi = 60_000;
+    b.extra_acked = 50_000; // large headroom
+
+    b.updateCwnd(MSS);
+
+    // cwnd must not exceed inflight_hi.
+    try std.testing.expect(b.cwnd <= b.inflight_hi);
+}
+
+test "bbr: regression — ProbeRTT only enters from ProbeBW" {
+    // Bug: checkProbeRtt could fire during Startup or Drain, entering
+    // ProbeRTT before the pipe was filled.
+    var b = Bbr.init();
+    b.state = .startup;
+    b.min_rtt_ns = 50_000_000;
+    b.min_rtt_stamp_ns = 0;
+
+    // After probe_rtt interval — would trigger ProbeRTT from ProbeBW.
+    // But from Startup, it should be ignored.
+    b.onAckReceived(.{
+        .delivery_rate = 500_000,
+        .rtt_ns = 50_000_000,
+        .bytes_acked = MSS,
+    }, 10_000_000_001);
+
+    // Must still be in Startup (or Drain if BW plateau hit), NOT ProbeRTT.
+    try std.testing.expect(b.state != .probe_rtt);
+}
+
+// ---------------------------------------------------------------------------
+// Tests for Startup pacing bypass, ProbeBW DOWN headroom, bdpHeadroom scaling
+// ---------------------------------------------------------------------------
+
+test "bbr: shouldPace disabled during Startup, enabled after" {
+    var b = Bbr.init();
+    // Startup: pacing disabled.
+    try std.testing.expect(!b.shouldPace());
+    try std.testing.expect(!b.filled_pipe);
+
+    // After enterDrain: filled_pipe = true, pacing enabled.
+    b.enterDrain();
+    try std.testing.expect(b.shouldPace());
+    try std.testing.expect(b.filled_pipe);
+
+    // After persistent congestion: back to Startup, pacing disabled.
+    b.onPersistentCongestion();
+    try std.testing.expect(!b.shouldPace());
+    try std.testing.expect(!b.filled_pipe);
+}
+
+test "bbr: ProbeBW DOWN exits using post-ACK inflight" {
+    var b = Bbr.init();
+    b.state = .probe_bw;
+    b.probe_bw_phase = .down;
+    b.filled_pipe = true;
+    b.max_bw = 1_000_000; // 1 MB/s
+    b.min_rtt_ns = 100_000_000; // 100ms → BDP = 100,000
+
+    // Pre-ACK inflight = 2×BDP (cwnd full), bytes_acked = BDP+.
+    // Post-ACK = 2×BDP - (BDP+) < BDP → exits DOWN.
+    b.updateProbeBw(.{ .prior_inflight = 200_000, .bytes_acked = 110_000, .round_start = true });
+    try std.testing.expectEqual(ProbeBwPhase.cruise, b.probe_bw_phase);
+
+    // Reset. Post-ACK inflight still above BDP: stays in DOWN.
+    b.probe_bw_phase = .down;
+    b.probe_bw_rounds = 0;
+    b.updateProbeBw(.{ .prior_inflight = 200_000, .bytes_acked = 50_000, .round_start = true });
+    try std.testing.expectEqual(ProbeBwPhase.down, b.probe_bw_phase);
+}
+
+test "bbr: Drain exits using post-ACK inflight" {
+    var b = Bbr.init();
+    b.state = .drain;
+    b.filled_pipe = true;
+    b.max_bw = 1_000_000;
+    b.min_rtt_ns = 100_000_000; // BDP = 100,000
+
+    // Pre-ACK inflight high (Startup peak), but post-ACK ≤ BDP.
+    b.updateDrain(.{ .prior_inflight = 300_000, .bytes_acked = 210_000 });
+    try std.testing.expectEqual(State.probe_bw, b.state);
+}
diff --git a/src/quic/congestion/cc.zig b/src/quic/congestion/cc.zig
new file mode 100644
index 0000000..a1b85f5
--- /dev/null
+++ b/src/quic/congestion/cc.zig
@@ -0,0 +1,22 @@
+//! Congestion control algorithm abstraction layer.
+//!
+//! Provides a comptime switch between BBR v3 and CUBIC. The active algorithm
+//! is selected at build time via `-Dcongestion=cubic` (default: bbr).
+//! Both algorithms expose the same public API, so the rest of the stack
+//! uses `cc.CongestionControl` without knowing which is active.
+
+const build_options = @import("build_options");
+const cubic = @import("cubic.zig");
+const bbr = @import("bbr.zig");
+
+pub const DeliveryRateSample = @import("common.zig").DeliveryRateSample;
+
+pub const Algorithm = enum { cubic, bbr };
+
+/// Selected at build time via `-Dcongestion=cubic` (default: bbr).
+pub const selected: Algorithm = if (build_options.congestion_cubic) .cubic else .bbr;
+
+pub const CongestionControl = switch (selected) {
+    .cubic => cubic.Cubic,
+    .bbr => bbr.Bbr,
+};
diff --git a/src/quic/congestion/common.zig b/src/quic/congestion/common.zig
new file mode 100644
index 0000000..be6ea4d
--- /dev/null
+++ b/src/quic/congestion/common.zig
@@ -0,0 +1,138 @@
+//! Shared types and constants for congestion control algorithms.
+//!
+//! Defined here (in the congestion directory) so that congestion modules
+//! can import it without reaching outside their module path.
+
+const std = @import("std");
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+/// RFC 9002 §7.2: max_datagram_size for congestion control.
+/// Matches MAX_SEND_PACKET_SIZE (1452) — the actual UDP payload we send.
+pub const MSS: u64 = 1452;
+/// RFC 9002 §7.2: initial_window = min(10 * mds, max(14720, 2 * mds))
+/// = min(14520, max(14720, 2904)) = 14520.
+pub const INITIAL_CWND: u64 = @min(10 * MSS, @max(14720, 2 * MSS));
+
+// ---------------------------------------------------------------------------
+// Delivery Rate Sample
+// ---------------------------------------------------------------------------
+
+/// Per-ACK delivery rate sample, computed by LossRecovery and passed to
+/// the congestion controller.
+pub const DeliveryRateSample = struct {
+    delivery_rate: u64 = 0, // bytes/sec
+    is_app_limited: bool = false,
+    rtt_ns: u64 = 0, // latest RTT sample
+    bytes_acked: u64 = 0,
+    bytes_lost: u64 = 0,
+    prior_inflight: u64 = 0, // bytes_in_flight before this ACK
+    round_start: bool = false, // did a new round start?
+};
+
+// ---------------------------------------------------------------------------
+// Pacing — shared token bucket used by both BBR and CUBIC
+// ---------------------------------------------------------------------------
+
+/// Token bucket pacer. Spread packets evenly across the RTT instead of
+/// bursting. Embedded by both Bbr and Cubic.
+pub const Pacing = struct {
+    /// Pacing rate in bytes per second. Updated by the congestion controller.
+    rate: u64 = 0,
+    /// Token bucket: bytes allowed to send now.
+    tokens: u64 = INITIAL_CWND, // allow initial burst
+    /// Timestamp of last token refill (ns).
+    last_refill_ns: i64 = 0,
+
+    /// Refill tokens based on elapsed time. Returns bytes allowed to send.
+    /// Tokens are capped at 2×cwnd to allow modest bursts without unlimited accumulation.
+    pub fn refill(self: *Pacing, cwnd: u64, now_ns: i64) u64 {
+        if (self.rate == 0) {
+            // No pacing rate yet (before first ACK) — allow full cwnd.
+            return cwnd;
+        }
+        if (self.last_refill_ns == 0) {
+            self.last_refill_ns = now_ns;
+            return self.tokens;
+        }
+        const elapsed_ns: u64 = @intCast(@max(now_ns - self.last_refill_ns, 0));
+        // Only advance the timestamp when time has actually elapsed.
+        // Repeated calls with the same now_ns (within a drainSend batch)
+        // must NOT reset last_refill_ns, otherwise nextSendTime() computes
+        // a deadline that's already in the past, causing the event loop to
+        // spin instead of sleeping until enough tokens accumulate.
+        if (elapsed_ns > 0) {
+            self.last_refill_ns = now_ns;
+        }
+        // Use u128 to avoid saturation on fast links (e.g., 1 GB/s × 1s overflows u64).
+        const new_tokens: u64 = @intCast(@min(
+            @as(u128, self.rate) * elapsed_ns / 1_000_000_000,
+            std.math.maxInt(u64),
+        ));
+        self.tokens = @min(self.tokens +| new_tokens, cwnd *| 2);
+        return self.tokens;
+    }
+
+    /// Consume tokens after sending a packet.
+    pub fn consume(self: *Pacing, bytes: u64) void {
+        self.tokens -|= bytes;
+    }
+
+    /// Returns the nanosecond deadline when enough tokens will be available
+    /// to send one MSS-sized packet, or null if tokens are already sufficient
+    /// or pacing is not active (rate == 0).
+    pub fn nextSendTime(self: *const Pacing) ?i64 {
+        if (self.rate == 0) return null;
+        if (self.tokens >= MSS) return null;
+        const deficit = MSS - self.tokens;
+        const wait_ns: i64 = @intCast(@min(
+            @as(u128, deficit) * 1_000_000_000 / self.rate,
+            @as(u128, std.math.maxInt(i64)),
+        ));
+        return self.last_refill_ns +| wait_ns;
+    }
+};
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+test "pacing: regression — u128 prevents overflow on fast links" {
+    // Bug: `rate *| elapsed_ns / 1_000_000_000` used u64 saturating multiply.
+    // At 1 GB/s with 1s elapsed, rate × elapsed = 1e18 which fits u64, but
+    // at 10 GB/s × 1s = 1e19 which overflows u64 (max ~1.8e19). With the old
+    // saturating mul, tokens would cap at maxInt instead of the correct value.
+    var p = Pacing{
+        .rate = 10_000_000_000, // 10 GB/s
+        .tokens = 0,
+        .last_refill_ns = 1_000_000_000,
+    };
+    const tokens = p.refill(20_000_000_000, 2_000_000_000); // 1s later
+    // Expected: 10 GB/s × 1s = 10,000,000,000 bytes.
+    try std.testing.expectEqual(@as(u64, 10_000_000_000), tokens);
+}
+
+test "pacing: refill and consume basic" {
+    var p = Pacing{
+        .rate = 1_000_000, // 1 MB/s
+        .tokens = 0,
+        .last_refill_ns = 1_000_000_000,
+    };
+    _ = p.refill(1_000_000, 1_001_000_000); // 1ms later → 1000 bytes
+    try std.testing.expectEqual(@as(u64, 1000), p.tokens);
+    p.consume(600);
+    try std.testing.expectEqual(@as(u64, 400), p.tokens);
+}
+
+test "pacing: tokens capped at 2*cwnd" {
+    var p = Pacing{
+        .rate = 1_000_000_000, // 1 GB/s
+        .tokens = 0,
+        .last_refill_ns = 1_000_000_000, // initialized
+    };
+    const cwnd: u64 = 100_000;
+    _ = p.refill(cwnd, 2_000_000_000); // 1s later → 1 GB, but capped at 200_000
+    try std.testing.expectEqual(cwnd * 2, p.tokens);
+}
diff --git a/src/quic/congestion/cubic.zig b/src/quic/congestion/cubic.zig
index 2c8d848..e0c6d06 100644
--- a/src/quic/congestion/cubic.zig
+++ b/src/quic/congestion/cubic.zig
@@ -6,18 +6,16 @@
 //! C = 0.4.
 
 const std = @import("std");
+const common = @import("common.zig");
+const DeliveryRateSample = common.DeliveryRateSample;
+const MSS = common.MSS;
+const INITIAL_CWND = common.INITIAL_CWND;
 
 /// RFC 9438 §5.1: C = 0.4 (in segments). Since our cwnd is in bytes,
 /// scale by MSS to get the correct growth rate: C_bytes = 0.4 × MSS.
 /// Without this scaling, K is MSS× too large and CUBIC degenerates to AIMD.
 const C: f64 = 0.4 * @as(f64, @floatFromInt(MSS));
 const BETA_CUBIC: f64 = 0.7;
-/// RFC 9002 §7.2: max_datagram_size for congestion control.
-/// Matches MAX_SEND_PACKET_SIZE (1452) — the actual UDP payload we send.
-const MSS: u64 = 1452;
-/// RFC 9002 §7.2: initial_window = min(10 * mds, max(14720, 2 * mds))
-/// = min(14520, max(14720, 2904)) = 14520.
-const INITIAL_CWND: u64 = @min(10 * MSS, @max(14720, 2 * MSS));
 
 pub const Cubic = struct {
     /// Congestion window in bytes.
@@ -39,16 +37,8 @@ pub const Cubic = struct {
     /// growth when (target - cwnd) * MSS < cwnd.
     cwnd_remainder: u64,
 
-    // Pacing state: spread packets evenly across the RTT instead of bursting.
-    // Without pacing, all cwnd bytes are sent instantly on ACK, overflowing
-    // shallow queues and causing loss.  Pacing targets ~95% link utilization.
-    /// Pacing rate in bytes per second.  Updated on every ACK.
-    pacing_rate: u64,
-    /// Pacing token bucket: bytes allowed to send now.  Refilled each tick
-    /// based on elapsed time × pacing_rate.
-    pacing_tokens: u64,
-    /// Timestamp of last token refill (ns).
-    pacing_last_refill_ns: i64,
+    /// Pacing state (shared token bucket).
+    pacing: common.Pacing,
 
     pub fn init() Cubic {
         return .{
@@ -60,9 +50,7 @@ pub const Cubic = struct {
             .cwnd_at_epoch = 0,
             .w_est = 0,
             .cwnd_remainder = 0,
-            .pacing_rate = 0,
-            .pacing_tokens = INITIAL_CWND, // allow initial burst
-            .pacing_last_refill_ns = 0,
+            .pacing = .{},
         };
     }
 
@@ -71,26 +59,30 @@ pub const Cubic = struct {
         return self.cwnd > 0;
     }
 
-    /// Called when an ACK is received.
-    /// `bytes_acked` — bytes acknowledged.
-    /// `rtt_ns`      — smoothed RTT in nanoseconds.
-    /// `now_ns`      — current time in nanoseconds.
-    pub fn onAckReceived(self: *Cubic, bytes_acked: u64, rtt_ns: u64, now_ns: i64) void {
+    /// CUBIC always paces after the first ACK sets the pacing rate.
+    pub fn shouldPace(_: *const Cubic) bool {
+        return true;
+    }
+
+    /// Called when an ACK is received with a delivery rate sample.
+    /// CUBIC uses only bytes_acked and rtt_ns from the sample.
+    pub fn onAckReceived(self: *Cubic, sample: DeliveryRateSample, now_ns: i64) void {
+        const bytes_acked = sample.bytes_acked;
+        const rtt_ns = sample.rtt_ns;
         if (self.cwnd < self.ssthresh) {
             // Slow start: double cwnd per RTT (exponential growth).
             self.cwnd += bytes_acked;
         } else {
             self.updateCwndCubic(bytes_acked, rtt_ns, now_ns);
         }
-        // Update pacing rate: cwnd / srtt (bytes per second).
-        // During slow start, pace at 2× to allow exponential growth.
-        // In congestion avoidance, pace at 1.25× cwnd/srtt for headroom.
+        // Update pacing rate: 2× cwnd/RTT.  Enforced by the pacing gate
+        // in send() which uses wire-time accounting for bytes_in_flight.
         if (rtt_ns > 0) {
-            const base_rate = self.cwnd *| 1_000_000_000 / rtt_ns;
-            // Pace at 2× cwnd/RTT: allows CUBIC to probe above current cwnd
-            // without being throttled by the pacing rate. The congestion window
-            // is the real limit; pacing just smooths burst timing.
-            self.pacing_rate = base_rate *| 2;
+            const base_rate: u64 = @intCast(@min(
+                @as(u128, self.cwnd) * 1_000_000_000 / rtt_ns,
+                std.math.maxInt(u64),
+            ));
+            self.pacing.rate = base_rate *| 2;
         }
     }
 
@@ -101,11 +93,15 @@ pub const Cubic = struct {
         self.ssthresh = self.cwnd;
         self.epoch_start_ns = null;
         self.cwnd_remainder = 0;
+        // Reset pacing so stale rate/tokens from the old path don't cause bursts.
+        self.pacing = .{};
     }
 
     /// Called on packet loss (e.g., timeout or three duplicate ACKs).
+    /// `bytes_lost` — total bytes lost (unused by CUBIC, used by BBR).
     /// `now_ns` — current time in nanoseconds.
-    pub fn onPacketLost(self: *Cubic, now_ns: i64) void {
+    pub fn onPacketLost(self: *Cubic, bytes_lost: u64, now_ns: i64) void {
+        _ = bytes_lost;
         const MIN_CWND: u64 = 8 * MSS;
         self.w_max = @floatFromInt(self.cwnd);
         self.cwnd = @intFromFloat(@as(f64, @floatFromInt(self.cwnd)) * BETA_CUBIC);
@@ -124,30 +120,20 @@ pub const Cubic = struct {
         self.k = computeK(self.w_max, self.cwnd_at_epoch);
     }
 
-    /// Refill pacing tokens based on elapsed time.  Call at the start of each
-    /// send opportunity (tick or post-ACK).  Returns the number of bytes
-    /// allowed to send.  Tokens are capped at 2×cwnd to allow modest bursts
-    /// (e.g., after ACK batching) without unlimited accumulation.
+    /// Called on ECN CE marks. CUBIC treats ECN the same as packet loss.
+    pub fn onEcnCe(self: *Cubic, ce_count: u64, now_ns: i64) void {
+        _ = ce_count;
+        self.onPacketLost(0, now_ns);
+    }
+
+    /// Refill pacing tokens. Delegates to shared Pacing.
     pub fn pacingRefill(self: *Cubic, now_ns: i64) u64 {
-        if (self.pacing_rate == 0) {
-            // No pacing rate yet (before first ACK) — allow full cwnd.
-            return self.cwnd;
-        }
-        if (self.pacing_last_refill_ns == 0) {
-            self.pacing_last_refill_ns = now_ns;
-            return self.pacing_tokens;
-        }
-        const elapsed_ns: u64 = @intCast(@max(now_ns - self.pacing_last_refill_ns, 0));
-        self.pacing_last_refill_ns = now_ns;
-        // tokens += pacing_rate × elapsed_seconds
-        const new_tokens = self.pacing_rate *| elapsed_ns / 1_000_000_000;
-        self.pacing_tokens = @min(self.pacing_tokens +| new_tokens, self.cwnd *| 2);
-        return self.pacing_tokens;
+        return self.pacing.refill(self.cwnd, now_ns);
     }
 
     /// Consume pacing tokens after sending a packet.
     pub fn pacingConsume(self: *Cubic, bytes: u64) void {
-        self.pacing_tokens -|= bytes;
+        self.pacing.consume(bytes);
     }
 
     fn updateCwndCubic(self: *Cubic, bytes_acked: u64, rtt_ns: u64, now_ns: i64) void {
@@ -217,7 +203,7 @@ test "cubic: slow start doubles" {
     const testing = std.testing;
     var c = Cubic.init();
     const initial = c.cwnd;
-    c.onAckReceived(initial, 10_000_000, 0);
+    c.onAckReceived(.{ .bytes_acked = initial, .rtt_ns = 10_000_000 }, 0);
     try testing.expect(c.cwnd >= initial);
 }
 
@@ -226,7 +212,7 @@ test "cubic: loss reduces window" {
     var c = Cubic.init();
     c.cwnd = 100 * MSS;
     const before = c.cwnd;
-    c.onPacketLost(1_000_000_000);
+    c.onPacketLost(0, 1_000_000_000);
     try testing.expect(c.cwnd < before);
     try testing.expectEqual(c.cwnd, c.ssthresh);
 }
@@ -235,14 +221,14 @@ test "cubic: cwnd grows after loss" {
     const testing = std.testing;
     var c = Cubic.init();
     c.cwnd = 50 * MSS;
-    c.onPacketLost(0);
+    c.onPacketLost(0, 0);
     const after_loss = c.cwnd;
     const rtt_ns: u64 = 50_000_000; // 50ms
     // Simulate several ACK events
     var t: i64 = 100_000_000;
     var i: usize = 0;
     while (i < 10) : (i += 1) {
-        c.onAckReceived(MSS, rtt_ns, t);
+        c.onAckReceived(.{ .bytes_acked = MSS, .rtt_ns = rtt_ns }, t);
         t += @intCast(rtt_ns);
     }
     try testing.expect(c.cwnd >= after_loss);
@@ -260,7 +246,7 @@ test "cubic: onAckReceived with zero bytes is a no-op" {
     const testing = std.testing;
     var c = Cubic.init();
     const before = c.cwnd;
-    c.onAckReceived(0, 50_000_000, 1_000_000_000);
+    c.onAckReceived(.{ .bytes_acked = 0, .rtt_ns = 50_000_000 }, 1_000_000_000);
     try testing.expectEqual(before, c.cwnd);
 }
 
@@ -269,9 +255,9 @@ test "cubic: slow start adds bytes_acked directly to cwnd" {
     var c = Cubic.init();
     // ssthresh = maxInt(u64) by default — we are in slow start
     const initial = c.cwnd;
-    c.onAckReceived(MSS, 50_000_000, 1_000_000_000);
+    c.onAckReceived(.{ .bytes_acked = MSS, .rtt_ns = 50_000_000 }, 1_000_000_000);
     try testing.expectEqual(initial + MSS, c.cwnd);
-    c.onAckReceived(2 * MSS, 50_000_000, 1_050_000_000);
+    c.onAckReceived(.{ .bytes_acked = 2 * MSS, .rtt_ns = 50_000_000 }, 1_050_000_000);
     try testing.expectEqual(initial + 3 * MSS, c.cwnd);
 }
 
@@ -280,11 +266,11 @@ test "cubic: epoch_start_ns null sentinel prevents spurious reset at clock=0" {
     var c = Cubic.init();
     // Force into CUBIC phase by setting ssthresh below cwnd
     c.cwnd = 50 * MSS;
-    c.onPacketLost(0); // epoch_start_ns = Some(0), not null
+    c.onPacketLost(0, 0); // epoch_start_ns = Some(0), not null
     const cwnd_after_loss = c.cwnd;
 
     // ACK at t=1ms: epoch should NOT reinitialize (epoch_start_ns is Some(0), not null)
-    c.onAckReceived(MSS, 50_000_000, 1_000_000); // 1ms later
+    c.onAckReceived(.{ .bytes_acked = MSS, .rtt_ns = 50_000_000 }, 1_000_000); // 1ms later
     // cwnd must be >= post-loss cwnd (no spurious reset)
     try testing.expect(c.cwnd >= cwnd_after_loss);
     // epoch_start_ns must still be Some(0), not changed
@@ -295,7 +281,7 @@ test "cubic: w_est accumulates across ACKs in CUBIC phase" {
     const testing = std.testing;
     var c = Cubic.init();
     c.cwnd = 50 * MSS;
-    c.onPacketLost(0);
+    c.onPacketLost(0, 0);
     const w_est_after_loss = c.w_est;
 
     // Set up a scenario where w_cubic < w_est so TCP-friendly phase is active.
@@ -306,7 +292,7 @@ test "cubic: w_est accumulates across ACKs in CUBIC phase" {
     c.cwnd_at_epoch = @floatFromInt(c.cwnd);
     c.w_est = @as(f64, @floatFromInt(c.cwnd)) + 1000.0; // w_est > w_cubic initially
 
-    c.onAckReceived(MSS, 50_000_000, 100_000_000);
+    c.onAckReceived(.{ .bytes_acked = MSS, .rtt_ns = 50_000_000 }, 100_000_000);
     try testing.expect(c.w_est > w_est_after_loss);
 }
 
@@ -314,10 +300,10 @@ test "cubic: non-monotonic clock (negative t_ns) is a no-op" {
     const testing = std.testing;
     var c = Cubic.init();
     c.cwnd = 50 * MSS;
-    c.onPacketLost(1_000_000_000);
+    c.onPacketLost(0, 1_000_000_000);
     const cwnd_before = c.cwnd;
 
-    c.onAckReceived(MSS, 50_000_000, 500_000_000);
+    c.onAckReceived(.{ .bytes_acked = MSS, .rtt_ns = 50_000_000 }, 500_000_000);
     try testing.expectEqual(cwnd_before, c.cwnd);
 }
 
@@ -334,7 +320,7 @@ test "cubic: single loss event reduces cwnd by exactly BETA_CUBIC" {
     var c = Cubic.init();
     c.cwnd = 100 * MSS; // 120000 bytes
     const before = c.cwnd;
-    c.onPacketLost(1_000_000_000);
+    c.onPacketLost(0, 1_000_000_000);
     // Expected: floor(120000 * 0.7) = 84000, but minimum is 8*MSS
     const expected: u64 = @intFromFloat(@as(f64, @floatFromInt(before)) * BETA_CUBIC);
     const MIN_CWND: u64 = 8 * MSS;
@@ -356,7 +342,7 @@ test "cubic: large window growth does not stall" {
     const initial = c.cwnd;
     var i: u32 = 0;
     while (i < 100) : (i += 1) {
-        c.onAckReceived(MSS, 100_000_000, 10_000_000_000);
+        c.onAckReceived(.{ .bytes_acked = MSS, .rtt_ns = 100_000_000 }, 10_000_000_000);
     }
     try testing.expect(c.cwnd > initial + 100);
 }
@@ -384,7 +370,7 @@ test "cubic: loss reduction is exactly BETA_CUBIC * cwnd" {
     const testing = std.testing;
     var c = Cubic.init();
     c.cwnd = 10 * MSS; // 12000 bytes
-    c.onPacketLost(0);
+    c.onPacketLost(0, 0);
     // Expected: floor(12000 * 0.7) = 8400, but floored to MIN_CWND = 8*MSS = 9600.
     // When floor applies, w_max is clipped to MIN_CWND to prevent K ≈ 18s pathology.
     try testing.expectEqual(@as(u64, 8 * MSS), c.cwnd);
@@ -406,7 +392,7 @@ test "cubic: cwnd_remainder uses saturating arithmetic on extreme target" {
     c.epoch_start_ns = 0;
     c.cwnd_at_epoch = @floatFromInt(c.cwnd);
 
-    c.onAckReceived(1, 10_000_000, 400_000 * 1_000_000_000);
+    c.onAckReceived(.{ .bytes_acked = 1, .rtt_ns = 10_000_000 }, 400_000 * 1_000_000_000);
 
     try testing.expect(c.cwnd >= MSS);
     try testing.expect(c.cwnd > MSS);
diff --git a/src/quic/connection.zig b/src/quic/connection.zig
index 241ab05..2304ed4 100644
--- a/src/quic/connection.zig
+++ b/src/quic/connection.zig
@@ -4,7 +4,7 @@
 //! The connection is driven by:
 //!
 //!   connection.receive(data, src) — feed a received UDP datagram
-//!   connection.send(out)          — drain the next UDP datagram to transmit
+//!   connection.send(out, now_ns)   — drain the next UDP datagram to transmit
 //!   connection.nextTimeout()      — nanosecond deadline for tick()
 //!   connection.tick(now_ns)       — drive timer-based events
 //!
@@ -21,7 +21,7 @@ const varint = @import("varint.zig");
 const cid_mod = @import("connection_id.zig");
 const stream_mod = @import("stream.zig");
 const flow_control = @import("flow_control.zig");
-const cubic_mod = @import("congestion/cubic.zig");
+const cc_mod = @import("congestion/cc.zig");
 const loss_recovery_mod = @import("loss_recovery.zig");
 
 const ConnectionId = cid_mod.ConnectionId;
@@ -149,7 +149,11 @@ const CRYPTO_STAGE_DEPTH = 16;
 /// Maximum bytes in a single staged CRYPTO fragment (conservatively > max QUIC payload).
 pub const CRYPTO_STAGE_FRAG = 1400;
 /// Maximum number of pending stream retransmits when send queue is full.
-const MAX_PENDING_RETX = 32;
+/// Must be large enough to handle worst-case burst losses when pacing
+/// keeps the send queue non-empty during loss detection.  The epoch 2
+/// sent buffer holds up to 128 packets, each with up to 1 stream frame
+/// in practice, so 128 covers the realistic worst case.
+const MAX_PENDING_RETX = 128;
 
 /// A single buffered out-of-order CRYPTO fragment.
 const CryptoStagedFrag = struct {
@@ -163,6 +167,22 @@ const SendSlot = struct {
     len: usize,
 };
 
+/// Per-slot metadata for deferred wire-time accounting.
+/// Stored in parallel with SendSlot; consumed by send() to call
+/// loss.onPacketSent at wire time rather than queue time.
+const SendMeta = struct {
+    pn: u64 = 0,
+    epoch: u8 = 0,
+    size: u16 = 0,
+    ack_eliciting: bool = false,
+    /// Queue-time timestamp for delivery rate computation.  Wire-time
+    /// (now_ns in send()) is used for loss detection timing, but delivery
+    /// rate must use queue-time to avoid pacing delays inflating
+    /// send_elapsed and depressing BBR's bandwidth estimate.
+    queued_ns: i64 = 0,
+    frame_info: loss_recovery_mod.SentFrameInfo = .{},
+};
+
 // ---------------------------------------------------------------------------
 // Configuration
 // ---------------------------------------------------------------------------
@@ -247,6 +267,10 @@ pub fn Connection(comptime max_streams: usize) type {
         peer_scid: [20]u8 = [_]u8{0} ** 20,
         peer_scid_len: u8 = 0,
         peer_addr: SocketAddr,
+        /// Previous peer address (before last migration).  Packets from this
+        /// address are silently accepted without triggering re-migration, since
+        /// they are late arrivals from the old path.
+        prev_peer_addr: ?SocketAddr,
 
         // Crypto
         initial_keys: crypto.InitialKeys,
@@ -269,7 +293,7 @@ pub fn Connection(comptime max_streams: usize) type {
         conn_flow: flow_control.FlowController,
 
         // Congestion control
-        congestion: cubic_mod.Cubic,
+        congestion: cc_mod.CongestionControl,
 
         // Loss recovery (RTT estimation, sent-packet tracking, PTO)
         loss: loss_recovery_mod.LossRecovery,
@@ -282,8 +306,13 @@ pub fn Connection(comptime max_streams: usize) type {
 
         // Send queue (ring buffer of ready-to-send packets)
         sq: [SEND_QUEUE_DEPTH]SendSlot,
+        sq_meta: [SEND_QUEUE_DEPTH]SendMeta,
         sq_head: usize,
         sq_tail: usize,
+        /// Bytes in the send queue (ack-eliciting only) that have not yet
+        /// been handed to the socket.  Complements loss.bytes_in_flight which
+        /// counts wire-sent bytes only.
+        bytes_queued: u64,
 
         // Timers
         idle_deadline_ns: ?i64,
@@ -558,6 +587,7 @@ pub fn Connection(comptime max_streams: usize) type {
                 .alt_local_reset_token = alt_local_reset_token,
                 .peer_cid = ConnectionId.zero,
                 .peer_addr = .{ .v4 = .{ .addr = [_]u8{0} ** 4, .port = 0 } },
+                .prev_peer_addr = null,
                 .initial_keys = .{
                     .client = .{ .key = [_]u8{0} ** 32, .iv = [_]u8{0} ** 12, .hp = [_]u8{0} ** 32, .suite = .aes_128_gcm },
                     .server = .{ .key = [_]u8{0} ** 32, .iv = [_]u8{0} ** 12, .hp = [_]u8{0} ** 32, .suite = .aes_128_gcm },
@@ -572,15 +602,17 @@ pub fn Connection(comptime max_streams: usize) type {
                     config.initial_max_data,
                     config.initial_max_data,
                 ),
-                .congestion = cubic_mod.Cubic.init(),
+                .congestion = cc_mod.CongestionControl.init(),
                 .loss = loss_recovery_mod.LossRecovery.init(),
                 .current_time_ns = 0,
                 .cached_max_ack_delay_ns = 25_000_000,
                 .cached_ack_delay_exp = 3,
                 .idle_timeout_i64 = idle_timeout_i64,
                 .sq = undefined,
+                .sq_meta = [_]SendMeta{.{}} ** SEND_QUEUE_DEPTH,
                 .sq_head = 0,
                 .sq_tail = 0,
+                .bytes_queued = 0,
                 .idle_deadline_ns = null,
                 .pto_deadline_ns = null,
                 .drain_deadline_ns = null,
@@ -687,11 +719,12 @@ pub fn Connection(comptime max_streams: usize) type {
 
             // Path migration detection (RFC 9000 §9): only in established state,
             // and only when the peer has not disabled active migration.
+            // Ignore packets from the previous peer address — those are late
+            // arrivals from the old path and must not trigger re-migration.
             if (self.hot.state == .established and !self.peer_addr.eql(src)) {
-                if (!self.peer_disable_migration) {
+                const is_prev = if (self.prev_peer_addr) |prev| prev.eql(src) else false;
+                if (!is_prev and !self.peer_disable_migration) {
                     if (SocketAddr.isPortOnlyChange(self.peer_addr, src)) {
-                        // RFC 9000 §9.3.1: port-only change is likely NAT rebinding.
-                        // Skip congestion reset and path validation to preserve throughput.
                         self.onNatRebind(src, io) catch {};
                     } else {
                         self.onPathMigration(src, io) catch {};
@@ -760,28 +793,119 @@ pub fn Connection(comptime max_streams: usize) type {
             }
         }
 
+        /// Store per-packet metadata for deferred wire-time accounting.
+        /// Called immediately after enqueueSend() succeeds (sq_tail already
+        /// advanced), so the metadata is written to the slot that was just filled.
+        fn storeSendMeta(self: *Self, pn: u64, epoch: u8, size: usize, ack_eliciting: bool, fi: loss_recovery_mod.SentFrameInfo) void {
+            const idx = (self.sq_tail - 1) & (SEND_QUEUE_DEPTH - 1);
+            const sz: u16 = @intCast(@min(size, 0xffff));
+            self.sq_meta[idx] = .{
+                .pn = pn,
+                .epoch = epoch,
+                .size = sz,
+                .ack_eliciting = ack_eliciting,
+                .queued_ns = self.current_time_ns,
+                .frame_info = fi,
+            };
+            if (ack_eliciting) {
+                self.bytes_queued += sz;
+            }
+        }
+
         /// Write the next UDP payload to `out`. Returns bytes written (0 = nothing pending).
-        pub fn send(self: *Self, out: []u8) usize {
+        /// `now_ns` is the wall-clock time used for wire-time accounting (loss recovery,
+        /// pacing, and PTO arming).
+        ///
+        /// RFC 9000 §12.2: coalesces consecutive long-header packets (Initial +
+        /// Handshake) into a single UDP datagram so they share one loss event
+        /// instead of being independently dropped.
+        pub fn send(self: *Self, out: []u8, now_ns: i64) usize {
             // RFC 9000 §10.2: draining state — must not send anything.
             if (self.hot.state == .draining) return 0;
-            if (self.sq_head == self.sq_tail) return 0;
-            const slot = &self.sq[self.sq_head & (SEND_QUEUE_DEPTH - 1)];
-            const n = @min(slot.len, out.len);
-            @memcpy(out[0..n], slot.buf[0..n]);
+            if (self.sq_head == self.sq_tail) {
+                // Nothing to send — if cwnd has room, we are app-limited.
+                if (self.loss.bytes_in_flight + self.bytes_queued < self.congestion.cwnd) {
+                    self.loss.delivery.app_limited = true;
+                }
+                return 0;
+            }
+            const mask = SEND_QUEUE_DEPTH - 1;
+            var meta = self.sq_meta[self.sq_head & mask];
+            // Pacing gate: refill tokens and check if we can send.
+            // Bypass pacing when nothing is in flight — there is no congestion
+            // to pace for, and blocking here creates a death spiral where the
+            // delivery rate collapses (no data sent → no ACKs → rate drops →
+            // pacing blocks even harder).
+            const pacing_tokens = self.congestion.pacing.refill(self.congestion.cwnd, now_ns);
+            if (meta.ack_eliciting and pacing_tokens < meta.size and
+                self.congestion.pacing.rate > 0 and self.congestion.shouldPace() and
+                self.loss.bytes_in_flight > 0)
+            {
+                return 0;
+            }
+            const slot = &self.sq[self.sq_head & mask];
+            var total = @min(slot.len, out.len);
+            @memcpy(out[0..total], slot.buf[0..total]);
+            // Wire-time accounting for the first packet.
+            self.loss.onPacketSent(meta.pn, meta.epoch, meta.size, meta.ack_eliciting, now_ns, meta.queued_ns, meta.frame_info);
+            if (meta.ack_eliciting) {
+                self.bytes_queued -|= meta.size;
+                self.pto_deadline_ns = self.loss.ptoDeadline(self.cached_max_ack_delay_ns);
+            }
             self.sq_head += 1;
-            self.bytes_sent += n;
+
+            // Coalesce: append consecutive long-header packets (epoch 0/1) into
+            // the same UDP datagram (RFC 9000 §12.2).  This halves handshake loss
+            // probability under lossy networks.  Do NOT coalesce 1-RTT packets —
+            // that breaks connection migration (Handshake ACK + 1-RTT data in one
+            // datagram confuses path validation).
+            if (meta.epoch < 2) {
+                while (self.sq_head < self.sq_tail) {
+                    const next_meta = self.sq_meta[self.sq_head & mask];
+                    if (next_meta.epoch >= 2) break;
+                    const next_slot = &self.sq[self.sq_head & mask];
+                    if (total + next_slot.len > out.len) break;
+                    @memcpy(out[total..][0..next_slot.len], next_slot.buf[0..next_slot.len]);
+                    self.loss.onPacketSent(next_meta.pn, next_meta.epoch, next_meta.size, next_meta.ack_eliciting, now_ns, next_meta.queued_ns, next_meta.frame_info);
+                    if (next_meta.ack_eliciting) {
+                        self.bytes_queued -|= next_meta.size;
+                        self.pto_deadline_ns = self.loss.ptoDeadline(self.cached_max_ack_delay_ns);
+                    }
+                    total += next_slot.len;
+                    self.sq_head += 1;
+                }
+            }
+
+            // RFC 9000 §14.1: datagrams carrying ack-eliciting Initial packets
+            // MUST be at least 1200 bytes.  Pad after coalescing so the Handshake
+            // portion fills the datagram (reducing the number of separate packets
+            // needed for the cert chain) instead of wasting space on PADDING frames.
+            if (meta.epoch == 0 and meta.ack_eliciting and total < 1200 and out.len >= 1200) {
+                @memset(out[total..1200], 0);
+                total = 1200;
+            }
+
+            if (self.congestion.pacing.rate > 0) {
+                self.congestion.pacing.consume(total);
+            }
+            self.bytes_sent += total;
             self.pkts_sent += 1;
-            return n;
+            return total;
         }
 
         /// Returns the nanosecond deadline when `tick()` must be called,
-        /// or null if no timer is active.
+        /// or null if no timer is active.  Includes the pacing deadline when
+        /// the send queue is non-empty so the event loop wakes to drain it.
         pub fn nextTimeout(self: *const Self) ?i64 {
             const idle = self.idle_deadline_ns orelse std.math.maxInt(i64);
             const pto = self.pto_deadline_ns orelse std.math.maxInt(i64);
             const drain = self.drain_deadline_ns orelse std.math.maxInt(i64);
             const tl = self.time_loss_alarm_ns orelse std.math.maxInt(i64);
-            const m = @min(@min(@min(idle, pto), drain), tl);
+            const pacing: i64 = if (self.sq_head != self.sq_tail)
+                self.congestion.pacing.nextSendTime() orelse std.math.maxInt(i64)
+            else
+                std.math.maxInt(i64);
+            const m = @min(@min(@min(@min(idle, pto), drain), tl), pacing);
             return if (m == std.math.maxInt(i64)) null else m;
         }
 
@@ -807,6 +931,12 @@ pub fn Connection(comptime max_streams: usize) type {
                 }
             }
 
+            // Flush any Handshake CRYPTO that was buffered when amplification limit
+            // blocked the initial send.  This must run on every tick — not just in
+            // receive() — because under high loss the client's packets may never
+            // arrive to trigger receive(), leaving the pending HS data unsent.
+            self.flushPendingHsCrypto();
+
             // Drain any deferred CRYPTO and stream retransmits before generating new traffic
             self.drainPendingCryptoRetx();
             self.drainPendingStreamRetx();
@@ -826,7 +956,7 @@ pub fn Connection(comptime max_streams: usize) type {
                 if (self.pto_deadline_ns) |d| {
                     if (now_ns >= d) {
                         self.loss.onPtoFired();
-                        if (self.app_keys != null) {
+                        if (self.hot.state == .established) {
                             // Post-handshake PTO: retransmit PATH_CHALLENGE if pending (RFC 9000 §9.2),
                             // drain pending stream retransmits, probe with unacked stream data,
                             // or send a 1-RTT PING probe (RFC 9002 §6.2).
@@ -844,7 +974,7 @@ pub fn Connection(comptime max_streams: usize) type {
                                 // (not just our own previous PINGs). Without this guard,
                                 // PTO sends infinite PINGs after all transfers complete:
                                 // each PING creates in-flight state → PTO fires → PING → loop.
-                                // Limit to 2 consecutive idle PINGs, then let idle timeout close.
+                                // Limit to 6 consecutive idle PINGs, then let idle timeout close.
                                 if (self.idle_ping_count < 6) {
                                     self.queuePing() catch {};
                                     self.idle_ping_count += 1;
@@ -905,6 +1035,9 @@ pub fn Connection(comptime max_streams: usize) type {
                         const tns = self.loss.timeThresholdNs();
                         var tl_result = loss_recovery_mod.AckResult{};
                         for (0..3) |epoch_idx| {
+                            // Skip Initial/Handshake epochs once established — keys
+                            // are zeroed, so any retransmit would panic on invalid suite.
+                            if (self.hot.state == .established and epoch_idx < 2) continue;
                             const la = self.loss.largest_acked[epoch_idx];
                             if (la == 0) continue;
                             self.loss.sent.detectLoss(
@@ -917,7 +1050,7 @@ pub fn Connection(comptime max_streams: usize) type {
                             );
                         }
                         if (tl_result.newly_lost > 0) {
-                            self.congestion.onPacketLost(now_ns);
+                            self.congestion.onPacketLost(tl_result.bytes_lost, now_ns);
                             self.processLostFrames(tl_result);
                         }
                         // Reschedule if there are still candidates.
@@ -1008,9 +1141,11 @@ pub fn Connection(comptime max_streams: usize) type {
             // Retransmissions (processLostFrames) bypass this check so loss recovery
             // is never blocked by a temporarily-reduced cwnd after a loss event.
             // Estimate packet size as data.len + 64 bytes of header/AEAD overhead.
-            if (self.loss.bytes_in_flight + data.len + 64 > self.congestion.cwnd) {
+            if (self.loss.bytes_in_flight + self.bytes_queued + data.len + 64 > self.congestion.cwnd) {
                 return error.CongestionWindowFull;
             }
+            // Clear app-limited flag: we are actively sending.
+            self.loss.delivery.app_limited = false;
             try self.queueStreamData(stream_id, data, fin);
         }
 
@@ -1082,6 +1217,28 @@ pub fn Connection(comptime max_streams: usize) type {
         // Internal packet processing
         // -----------------------------------------------------------------------
 
+        /// Compute the wire size of a long-header QUIC packet from its unprotected
+        /// header fields.  Used to skip an unprocessable packet in a coalesced
+        /// datagram without dropping the subsequent packets.
+        fn skipLongHeaderPacket(data: []const u8, raw_dcid_len: u8, raw_pkt_type: packet.PacketType) usize {
+            // Position after: first_byte(1) + version(4) + dcid_len(1) + dcid + scid_len(1) + scid
+            var pos: usize = 6 + @as(usize, raw_dcid_len);
+            if (pos >= data.len) return data.len;
+            const scid_len = data[pos];
+            pos += 1 + @as(usize, scid_len);
+            if (pos > data.len) return data.len;
+            // Initial packets carry a token before the Length field.
+            if (raw_pkt_type == .initial) {
+                const tok_r = varint.decode(data[pos..]) orelse return data.len;
+                pos += tok_r.len + @as(usize, @intCast(tok_r.value));
+                if (pos > data.len) return data.len;
+            }
+            // Length varint: covers PN bytes + ciphertext + AEAD tag.
+            const len_r = varint.decode(data[pos..]) orelse return data.len;
+            pos += len_r.len;
+            return @min(pos + @as(usize, @intCast(len_r.value)), data.len);
+        }
+
         pub fn processOnePacket(self: *Self, data: []u8, src: SocketAddr, io: std.Io) !usize {
             if (data.len == 0) return 0;
 
@@ -1152,21 +1309,29 @@ pub fn Connection(comptime max_streams: usize) type {
             // In established state, all Initial packets (even with matching DCID) must be
             // silently dropped. This handles late/retransmitted Initial packets and new
             // connection attempts that happen to use the same server local_cid.
+            // Skip just this one packet so coalesced Handshake/1-RTT packets can proceed.
             if (raw_pkt_type == .initial and self.hot.state == .established) {
-                return data.len;
+                return skipLongHeaderPacket(data, raw_dcid_len, raw_pkt_type);
             }
 
             // For handshake state Initial packets, validate DCID against the client's original
             // DCID stored from the first Initial.  RFC 9000 §7.2: a client MUST NOT change its
             // Destination CID before receiving the server's first Initial packet, so all Initial
             // retransmissions (including those carrying fragmented ClientHello bytes) must carry
-            // the same variable-length DCID.  The old check compared against local_cid (fixed
-            // 8 bytes) and silently dropped every packet whose dcid_len > 8.
+            // the same variable-length DCID.  However, once the client receives the server's
+            // first Initial, it switches to the server's SCID for all subsequent packets
+            // (RFC 9000 §7.2), so the coalesced Initial ACK uses our local_cid.
+            // Accept both the original DCID and our own local_cid/alt_local_cid.
             if (raw_pkt_type == .initial and self.hot.state == .handshake and
                 self.first_initial_dcid_len > 0)
             {
-                if (!std.mem.eql(u8, raw_dcid, self.first_initial_dcid[0..self.first_initial_dcid_len])) {
-                    return data.len; // Different DCID: belongs to a different connection.
+                const matches_first = std.mem.eql(u8, raw_dcid, self.first_initial_dcid[0..self.first_initial_dcid_len]);
+                const matches_local = raw_dcid_len == cid_mod.len and std.mem.eql(u8, raw_dcid[0..cid_mod.len], &self.local_cid.bytes);
+                const matches_alt = raw_dcid_len == cid_mod.len and std.mem.eql(u8, raw_dcid[0..cid_mod.len], &self.alt_local_cid.bytes);
+                if (!matches_first and !matches_local and !matches_alt) {
+                    // Different DCID: skip just this Initial packet (not the entire
+                    // datagram) so coalesced Handshake/1-RTT packets can still be processed.
+                    return skipLongHeaderPacket(data, raw_dcid_len, raw_pkt_type);
                 }
             }
 
@@ -2135,11 +2300,10 @@ pub fn Connection(comptime max_streams: usize) type {
                 }
             }
 
-            // Feed acknowledgement data to CUBIC
+            // Feed acknowledgement data to congestion controller
             if (result.newly_acked > 0) {
                 self.congestion.onAckReceived(
-                    result.bytes_acked,
-                    self.loss.rtt.smoothed_rtt,
+                    result.delivery_rate_sample,
                     self.current_time_ns,
                 );
                 self.loss.resetPtoCount();
@@ -2147,7 +2311,7 @@ pub fn Connection(comptime max_streams: usize) type {
 
             // One congestion event per loss detection (RFC 9438 §5.6)
             if (result.newly_lost > 0) {
-                self.congestion.onPacketLost(self.current_time_ns);
+                self.congestion.onPacketLost(result.bytes_lost, self.current_time_ns);
             }
 
             // Persistent congestion: collapse cwnd when loss span > 3×PTO (RFC 9002 §6.1.2)
@@ -2160,9 +2324,10 @@ pub fn Connection(comptime max_streams: usize) type {
             if (ack.has_ecn) {
                 const ce: u62 = @intCast(@min(ack.ecn_ce, std.math.maxInt(u62)));
                 if (ce > self.ecn_ce_seen[epoch]) {
+                    const ce_delta = ce - self.ecn_ce_seen[epoch];
                     self.ecn_ce_seen[epoch] = ce;
                     if (result.largest_acked_sent_ns) |_| {
-                        self.congestion.onPacketLost(self.current_time_ns);
+                        self.congestion.onEcnCe(ce_delta, self.current_time_ns);
                     }
                 }
             }
@@ -2176,6 +2341,15 @@ pub fn Connection(comptime max_streams: usize) type {
 
             // Refresh PTO timer and time-loss alarm after any ACK.
             self.pto_deadline_ns = self.loss.ptoDeadline(max_ack_delay_ns);
+            // With wire-time accounting, retransmissions queued by processLostFrames
+            // are in bytes_queued (not bytes_in_flight).  ptoDeadline returns null
+            // when bytes_in_flight == 0.  Force-arm PTO when queued data exists so
+            // the server doesn't go silent while pacing drains retransmissions.
+            if (self.pto_deadline_ns == null and self.bytes_queued > 0) {
+                const pto_base = self.loss.rtt.ptoBase(max_ack_delay_ns);
+                const max_i64: u64 = @as(u64, std.math.maxInt(i64));
+                self.pto_deadline_ns = self.current_time_ns +| @as(i64, @intCast(@min(pto_base, max_i64)));
+            }
             // RFC 9002 §6.2.2.1: server MUST keep PTO armed during handshake even
             // when bytes_in_flight == 0.  The peer may have ACKed our Handshake CRYPTO
             // at the QUIC level but not yet processed it at the TLS level (e.g. gaps in
@@ -2231,6 +2405,30 @@ pub fn Connection(comptime max_streams: usize) type {
             }
         }
 
+        /// Declare all in-flight packets in `epoch` as lost: invalidate their
+        /// sent-table entries, reset bytes_in_flight, and queue their stream
+        /// frames for retransmission.  Used on path migration to clean up
+        /// packets that were sent to the old address and will never be ACKed.
+        fn declareEpochLost(self: *Self, epoch: u8) void {
+            const sent = &self.loss.sent;
+            for (&sent.slots, 0..) |*slot, idx| {
+                if (!slot.valid or slot.epoch != epoch) continue;
+                if (slot.in_flight) {
+                    self.loss.bytes_in_flight -|= slot.size;
+                }
+                // Queue stream frames from this packet for retransmission.
+                const fi = sent.frame_info[idx];
+                for (fi.frames[0..fi.count]) |f| {
+                    switch (f) {
+                        .stream => |s| self.deferStreamRetx(s.stream_id, s.offset, s.len, s.fin),
+                        else => {},
+                    }
+                }
+                slot.valid = false;
+                if (epoch < 3) sent.valid_per_epoch[epoch] -|= 1;
+            }
+        }
+
         pub fn processLostFrames(self: *Self, result: loss_recovery_mod.AckResult) void {
             // Sized to MAX_SEND_PACKET_SIZE so getSendData never returns more bytes than
             // encryptAndEnqueueStreamFrame can encode into pkt_scratch without overflow.
@@ -2245,23 +2443,22 @@ pub fn Connection(comptime max_streams: usize) type {
                                 // adjacent buffered data beyond the lost frame boundary).
                                 const n = @min(st.getSendData(s.offset, &stream_retx_buf), s.len);
                                 if (n > 0 or s.fin) {
-                                    self.encryptAndEnqueueStreamFrame(
-                                        s.stream_id,
-                                        s.offset,
-                                        stream_retx_buf[0..n],
-                                        s.fin,
-                                    ) catch {
-                                        // Send queue full — defer for retry in drainPendingStreamRetx()
-                                        if (self.stream_pending_retx_count < MAX_PENDING_RETX) {
-                                            self.stream_pending_retx[self.stream_pending_retx_count] = .{
-                                                .stream_id = s.stream_id,
-                                                .offset = s.offset,
-                                                .len = @intCast(n),
-                                                .fin = s.fin,
-                                            };
-                                            self.stream_pending_retx_count += 1;
-                                        }
+                                    // Cap retransmission queueing to avoid bytes_queued
+                                    // exceeding cwnd.  When bytes_queued is already at
+                                    // or above cwnd, defer remaining retransmissions.
+                                    const enqueued = enq: {
+                                        if (self.bytes_queued + n + 64 > self.congestion.cwnd) break :enq false;
+                                        self.encryptAndEnqueueStreamFrame(
+                                            s.stream_id,
+                                            s.offset,
+                                            stream_retx_buf[0..n],
+                                            s.fin,
+                                        ) catch break :enq false;
+                                        break :enq true;
                                     };
+                                    if (!enqueued) {
+                                        self.deferStreamRetx(s.stream_id, s.offset, @intCast(n), s.fin);
+                                    }
                                 }
                             }
                         },
@@ -2303,6 +2500,18 @@ pub fn Connection(comptime max_streams: usize) type {
             }
         }
 
+        fn deferStreamRetx(self: *Self, stream_id: u62, offset: u62, len: u16, fin: bool) void {
+            if (self.stream_pending_retx_count < MAX_PENDING_RETX) {
+                self.stream_pending_retx[self.stream_pending_retx_count] = .{
+                    .stream_id = stream_id,
+                    .offset = offset,
+                    .len = len,
+                    .fin = fin,
+                };
+                self.stream_pending_retx_count += 1;
+            }
+        }
+
         fn drainPendingStreamRetx(self: *Self) void {
             if (self.stream_pending_retx_count == 0) return;
             var stream_retx_buf: [MAX_SEND_PACKET_SIZE]u8 = undefined;
@@ -2448,8 +2657,9 @@ pub fn Connection(comptime max_streams: usize) type {
             const pn = self.hot.tx_pn[0];
             self.hot.tx_pn[0] += 1;
             const ct_len = fpos + 16;
+            const slot_buf = try self.reserveSendSlot(ct_len + 30);
             const hdr_len = packet.encodeLongHeader(
-                &self.enc_scratch,
+                slot_buf,
                 .initial,
                 packet_version,
                 self.peer_scid[0..self.peer_scid_len],
@@ -2462,13 +2672,13 @@ pub fn Connection(comptime max_streams: usize) type {
                 self.hot.tx_pn[0] -= 1;
                 return error.PacketTooLarge;
             }
-            crypto.encryptPayload(ik, pn, self.enc_scratch[0..hdr_len], self.pkt_scratch[0..fpos], self.enc_scratch[hdr_len..][0..ct_len]);
-            crypto.applyHeaderProtection(ik, &self.enc_scratch[0], self.enc_scratch[hdr_len - 4 ..][0..4], self.enc_scratch[hdr_len..][0..16]);
-            try self.enqueueSend(self.enc_scratch[0 .. hdr_len + ct_len]);
+            crypto.encryptPayload(ik, pn, slot_buf[0..hdr_len], self.pkt_scratch[0..fpos], slot_buf[hdr_len..][0..ct_len]);
+            crypto.applyHeaderProtection(ik, &slot_buf[0], slot_buf[hdr_len - 4 ..][0..4], slot_buf[hdr_len..][0..16]);
+            self.commitSendSlot(hdr_len + ct_len);
             var fi = loss_recovery_mod.SentFrameInfo{};
             fi.frames[0] = .{ .crypto_frame = .{ .offset = @intCast(offset), .len = @intCast(chunk.len) } };
             fi.count = 1;
-            self.loss.onPacketSent(pn, 0, hdr_len + ct_len, true, self.current_time_ns, fi);
+            self.storeSendMeta(pn, 0, hdr_len + ct_len, true, fi);
         }
 
         fn sendCryptoChunkEpoch1(self: *Self, chunk: []const u8, offset: u62, fpos: usize) !void {
@@ -2476,8 +2686,9 @@ pub fn Connection(comptime max_streams: usize) type {
             const pn = self.hot.tx_pn[1];
             self.hot.tx_pn[1] += 1;
             const ct_len = fpos + 16;
+            const slot_buf = try self.reserveSendSlot(ct_len + 30);
             const hdr_len = packet.encodeLongHeader(
-                &self.enc_scratch,
+                slot_buf,
                 .handshake,
                 self.quic_version,
                 self.peer_scid[0..self.peer_scid_len],
@@ -2490,13 +2701,13 @@ pub fn Connection(comptime max_streams: usize) type {
                 self.hot.tx_pn[1] -= 1;
                 return error.PacketTooLarge;
             }
-            crypto.encryptPayload(hk.server, pn, self.enc_scratch[0..hdr_len], self.pkt_scratch[0..fpos], self.enc_scratch[hdr_len..][0..ct_len]);
-            crypto.applyHeaderProtection(hk.server, &self.enc_scratch[0], self.enc_scratch[hdr_len - 4 ..][0..4], self.enc_scratch[hdr_len..][0..16]);
-            try self.enqueueSend(self.enc_scratch[0 .. hdr_len + ct_len]);
+            crypto.encryptPayload(hk.server, pn, slot_buf[0..hdr_len], self.pkt_scratch[0..fpos], slot_buf[hdr_len..][0..ct_len]);
+            crypto.applyHeaderProtection(hk.server, &slot_buf[0], slot_buf[hdr_len - 4 ..][0..4], slot_buf[hdr_len..][0..16]);
+            self.commitSendSlot(hdr_len + ct_len);
             var fi = loss_recovery_mod.SentFrameInfo{};
             fi.frames[0] = .{ .crypto_frame = .{ .offset = @intCast(offset), .len = @intCast(chunk.len) } };
             fi.count = 1;
-            self.loss.onPacketSent(pn, 1, hdr_len + ct_len, true, self.current_time_ns, fi);
+            self.storeSendMeta(pn, 1, hdr_len + ct_len, true, fi);
         }
 
         // -----------------------------------------------------------------------
@@ -2504,30 +2715,37 @@ pub fn Connection(comptime max_streams: usize) type {
         // -----------------------------------------------------------------------
 
         pub fn enqueueSend(self: *Self, data: []const u8) !void {
-            // Use monotonic head/tail subtraction (not modular comparison) to correctly
-            // detect full queue regardless of wrap-around.
+            const slot_buf = try self.reserveSendSlot(data.len);
+            const n = @min(data.len, MAX_SEND_PACKET_SIZE);
+            @memcpy(slot_buf[0..n], data[0..n]);
+            self.commitSendSlot(n);
+        }
+
+        /// Reserve the next send queue slot for zero-copy writes.
+        /// Returns a pointer to the slot's buffer.  The caller writes
+        /// directly into it (e.g. header encoding + AEAD encryption),
+        /// then calls commitSendSlot() with the actual length.
+        /// Checks queue capacity, idle timer, and amplification limit.
+        fn reserveSendSlot(self: *Self, size: usize) ![]u8 {
             if (self.sq_tail - self.sq_head >= SEND_QUEUE_DEPTH) return error.SendQueueFull;
 
-            // RFC 9000 §10.1.2: restart idle timer when sending a packet.
             if (self.idle_timeout_i64 > 0) {
                 self.idle_deadline_ns = self.current_time_ns +| self.idle_timeout_i64;
             }
 
-            // Amplification limit: must not send more than 3× received before path
-            // validation.  Only enforced once we have received at least one datagram
-            // (bytes_unvalidated_recv > 0) so that direct enqueueSend calls in tests are
-            // unaffected before any receive has happened (RFC 9000 §8.1.2).
             if (!self.path_validated and self.bytes_unvalidated_recv > 0) {
-                const new_sent = self.bytes_unvalidated_sent +| data.len;
+                const new_sent = self.bytes_unvalidated_sent +| size;
                 if (new_sent > self.bytes_unvalidated_recv *| 3) {
                     return error.AmplificationLimitExceeded;
                 }
                 self.bytes_unvalidated_sent = new_sent;
             }
-            const slot = &self.sq[self.sq_tail & (SEND_QUEUE_DEPTH - 1)];
-            const n = @min(data.len, MAX_SEND_PACKET_SIZE);
-            @memcpy(slot.buf[0..n], data[0..n]);
-            slot.len = n;
+            return &self.sq[self.sq_tail & (SEND_QUEUE_DEPTH - 1)].buf;
+        }
+
+        /// Commit a previously reserved send slot with the actual packet length.
+        fn commitSendSlot(self: *Self, len: usize) void {
+            self.sq[self.sq_tail & (SEND_QUEUE_DEPTH - 1)].len = len;
             self.sq_tail += 1;
         }
 
@@ -2595,7 +2813,7 @@ pub fn Connection(comptime max_streams: usize) type {
                     try self.enqueueSend(self.enc_scratch[0 .. hdr_len + ct_len]);
                     var fi = loss_recovery_mod.SentFrameInfo{};
                     fi.count = 0; // ACK is not ack-eliciting; no frame info tracked
-                    self.loss.onPacketSent(pn, 0, hdr_len + ct_len, false, self.current_time_ns, fi);
+                    self.storeSendMeta(pn, 0, hdr_len + ct_len, false, fi);
                 },
                 1 => {
                     // Handshake packet: Long Header, handshake keys
@@ -2620,7 +2838,7 @@ pub fn Connection(comptime max_streams: usize) type {
                     try self.enqueueSend(self.enc_scratch[0 .. hdr_len + ct_len]);
                     var fi = loss_recovery_mod.SentFrameInfo{};
                     fi.count = 0;
-                    self.loss.onPacketSent(pn, 1, hdr_len + ct_len, false, self.current_time_ns, fi);
+                    self.storeSendMeta(pn, 1, hdr_len + ct_len, false, fi);
                 },
                 2 => {
                     // 1-RTT packet: Short Header, app keys
@@ -2652,26 +2870,24 @@ pub fn Connection(comptime max_streams: usize) type {
             const pn = self.hot.tx_pn[2];
             self.hot.tx_pn[2] += 1;
 
-            const hdr_len = packet.encodeShortHeader(&self.enc_scratch, self.peer_scid[0..self.peer_scid_len], @intCast(pn), self.current_key_phase);
             const ct_len = plaintext_len + 16;
+            // Reserve a send queue slot and encrypt directly into it,
+            // eliminating a ~1452-byte memcpy per packet.
+            const slot_buf = self.reserveSendSlot(ct_len + 20) catch |err| {
+                self.hot.tx_pn[2] -= 1;
+                return err;
+            };
+            const hdr_len = packet.encodeShortHeader(slot_buf, self.peer_scid[0..self.peer_scid_len], @intCast(pn), self.current_key_phase);
             if (hdr_len + ct_len > MAX_SEND_PACKET_SIZE) {
                 self.hot.tx_pn[2] -= 1;
                 return error.PacketTooLarge;
             }
-            crypto.encryptPayload(ak.server, pn, self.enc_scratch[0..hdr_len], self.pkt_scratch[0..plaintext_len], self.enc_scratch[hdr_len..][0..ct_len]);
-            crypto.applyHeaderProtection(ak.server, &self.enc_scratch[0], self.enc_scratch[hdr_len - 4 ..][0..4], self.enc_scratch[hdr_len..][0..16]);
+            crypto.encryptPayload(ak.server, pn, slot_buf[0..hdr_len], self.pkt_scratch[0..plaintext_len], slot_buf[hdr_len..][0..ct_len]);
+            crypto.applyHeaderProtection(ak.server, &slot_buf[0], slot_buf[hdr_len - 4 ..][0..4], slot_buf[hdr_len..][0..16]);
             const out_len = hdr_len + ct_len;
-            self.enqueueSend(self.enc_scratch[0..out_len]) catch |err| {
-                self.hot.tx_pn[2] -= 1;
-                return err;
-            };
+            self.commitSendSlot(out_len);
 
-            if (fi) |frame_info| {
-                self.loss.onPacketSent(pn, 2, out_len, ack_eliciting, self.current_time_ns, frame_info);
-                if (ack_eliciting) {
-                    self.pto_deadline_ns = self.loss.ptoDeadline(self.cached_max_ack_delay_ns);
-                }
-            }
+            self.storeSendMeta(pn, 2, out_len, ack_eliciting, fi orelse .{});
             return pn;
         }
 
@@ -2903,7 +3119,7 @@ pub fn Connection(comptime max_streams: usize) type {
                     } };
                     fi.count = 1;
                     self.crypto_send_offset[0] += chunk_len;
-                    self.loss.onPacketSent(pn, 0, hdr_len + ct_len, true, self.current_time_ns, fi);
+                    self.storeSendMeta(pn, 0, hdr_len + ct_len, true, fi);
                 },
                 1 => {
                     const hk = self.hs_keys.?.server;
@@ -2940,7 +3156,7 @@ pub fn Connection(comptime max_streams: usize) type {
                     } };
                     fi.count = 1;
                     self.crypto_send_offset[1] += chunk_len;
-                    self.loss.onPacketSent(pn, 1, hdr_len + ct_len, true, self.current_time_ns, fi);
+                    self.storeSendMeta(pn, 1, hdr_len + ct_len, true, fi);
                 },
                 else => unreachable,
             }
@@ -2954,7 +3170,6 @@ pub fn Connection(comptime max_streams: usize) type {
                 @memcpy(self.crypto_send_saved[epoch][old..end], chunk);
                 self.crypto_send_saved_len[epoch] = @intCast(end);
             }
-            self.pto_deadline_ns = self.loss.ptoDeadline(self.cached_max_ack_delay_ns);
             return chunk_len;
         }
 
@@ -3031,7 +3246,7 @@ pub fn Connection(comptime max_streams: usize) type {
                     var fi = loss_recovery_mod.SentFrameInfo{};
                     fi.frames[0] = .{ .crypto_frame = .{ .offset = @intCast(sent), .len = @intCast(chunk.len) } };
                     fi.count = 1;
-                    self.loss.onPacketSent(pn, 0, hdr_len + ct_len, true, self.current_time_ns, fi);
+                    self.storeSendMeta(pn, 0, hdr_len + ct_len, true, fi);
                 } else {
                     const hk = self.hs_keys orelse break;
                     const pn = self.hot.tx_pn[1];
@@ -3057,7 +3272,7 @@ pub fn Connection(comptime max_streams: usize) type {
                     var fi = loss_recovery_mod.SentFrameInfo{};
                     fi.frames[0] = .{ .crypto_frame = .{ .offset = @intCast(sent), .len = @intCast(chunk.len) } };
                     fi.count = 1;
-                    self.loss.onPacketSent(pn, 1, hdr_len + ct_len, true, self.current_time_ns, fi);
+                    self.storeSendMeta(pn, 1, hdr_len + ct_len, true, fi);
                 }
 
                 sent += chunk.len;
@@ -3263,6 +3478,23 @@ pub fn Connection(comptime max_streams: usize) type {
             var fpos: usize = 0;
             fpos += frame.encodeFrame(self.pkt_scratch[fpos..], .{ .path_challenge = .{ .data = data } });
             _ = self.sendShortHeaderPacket(fpos, null, false) catch return;
+            self.moveLastToFront();
+        }
+
+        /// Move the last enqueued packet to the front of the send queue.
+        /// Used for PATH_CHALLENGE so it is the first packet sent on a new
+        /// path, bypassing any pacing-blocked data without reordering the FIFO.
+        fn moveLastToFront(self: *Self) void {
+            if (self.sq_tail -% self.sq_head < 2) return; // only 0-1 items, nothing to move
+            const mask = SEND_QUEUE_DEPTH - 1;
+            const tail_idx = (self.sq_tail -% 1) & mask;
+            self.sq_head -%= 1;
+            const head_idx = self.sq_head & mask;
+            if (head_idx != tail_idx) {
+                self.sq[head_idx] = self.sq[tail_idx];
+                self.sq_meta[head_idx] = self.sq_meta[tail_idx];
+            }
+            self.sq_tail -%= 1;
         }
 
         /// Process a NEW_CONNECTION_ID frame: store the CID and retire entries below retire_prior_to.
@@ -3503,13 +3735,34 @@ pub fn Connection(comptime max_streams: usize) type {
 
         /// Handle a source address change: reset congestion, request path validation.
         fn onPathMigration(self: *Self, new_addr: SocketAddr, io: std.Io) !void {
-            // RFC 9000 §9.4: reset congestion controller on path change.
-            self.congestion = cubic_mod.Cubic.init();
+            // RFC 9000 §9.4 permits resetting congestion state on migration,
+            // but resetting cwnd to INITIAL_CWND kills throughput: the server
+            // must re-probe bandwidth from scratch after every address change.
+            // Instead, preserve the congestion controller.  Reset smoothed_rtt
+            // and rtt_var (the new path may differ), but KEEP min_rtt — resetting
+            // it to the 10ms default causes time-loss thresholds (9/8 × 10ms) to
+            // fire before retransmitted packets can be ACKed on a 30ms path,
+            // creating an infinite retransmission loop.
+            const saved_min_rtt = self.loss.rtt.min_rtt;
+            self.loss.rtt = loss_recovery_mod.RttEstimator{};
+            self.loss.rtt.min_rtt = saved_min_rtt;
+            self.loss.pto_count = 0;
+            // Don't proactively retransmit all in-flight packets — many may
+            // have already been received by the client (ACKs still in transit).
+            // Reset bytes_in_flight to unblock the cwnd check and clear
+            // in_flight flags so old packets don't subtract from the counter
+            // when later ACKed (which would desync bif and kill PTO).
+            self.loss.bytes_in_flight = 0;
+            self.loss.sent.clearInflight();
+            self.time_loss_alarm_ns = null;
+            self.pto_deadline_ns = self.current_time_ns +| @as(i64, @intCast(self.loss.rtt.ptoBase(self.cached_max_ack_delay_ns)));
             // RFC 9000 §9.4: reset amplification limit for the new path (separate from old path tracking).
             // Each path must independently satisfy the 3x amplification limit until validated.
             self.bytes_unvalidated_recv = 0;
             self.bytes_unvalidated_sent = 0;
             // Immediately adopt new address (RFC 9000 §9.3.1).
+            // Save old address so late-arriving packets don't trigger re-migration.
+            self.prev_peer_addr = self.peer_addr;
             self.peer_addr = new_addr;
             // RFC 9000 §9.3: reset path validation on migration — must re-validate new path.
             self.path_validated = false;
@@ -3525,6 +3778,7 @@ pub fn Connection(comptime max_streams: usize) type {
         /// source port changed.  Preserves congestion state for throughput.
         fn onNatRebind(self: *Self, new_addr: SocketAddr, io: std.Io) !void {
             // Adopt new address without resetting congestion or path validation.
+            self.prev_peer_addr = self.peer_addr;
             self.peer_addr = new_addr;
             // Still send PATH_CHALLENGE to confirm reachability on the new port.
             var challenge: [8]u8 = undefined;
diff --git a/src/quic/connection_test_basic.zig b/src/quic/connection_test_basic.zig
index 19ad348..304257b 100644
--- a/src/quic/connection_test_basic.zig
+++ b/src/quic/connection_test_basic.zig
@@ -17,6 +17,7 @@ const frame = @import("frame.zig");
 const loss_recovery_mod = @import("loss_recovery.zig");
 const stream_mod = @import("stream.zig");
 const tls = @import("tls.zig");
+const cc_mod = @import("congestion/cc.zig");
 
 test "connection: hot struct is 64 bytes" {
     const testing = std.testing;
@@ -39,7 +40,7 @@ test "connection: send returns 0 when queue empty" {
     var conn = try Connection(16).accept(.{}, io);
     var out: [MAX_PACKET_SIZE]u8 = undefined;
     const testing = std.testing;
-    try testing.expectEqual(@as(usize, 0), conn.send(&out));
+    try testing.expectEqual(@as(usize, 0), conn.send(&out, 0));
 }
 
 test "connection: enqueue and drain send queue" {
@@ -49,7 +50,7 @@ test "connection: enqueue and drain send queue" {
     try conn.enqueueSend(&data);
 
     var out: [8]u8 = undefined;
-    const n = conn.send(&out);
+    const n = conn.send(&out, 0);
     const testing = std.testing;
     try testing.expectEqual(@as(usize, 4), n);
     try testing.expectEqualSlices(u8, &data, out[0..n]);
@@ -84,7 +85,7 @@ test "connection: unknown version triggers VN response" {
 
     // A Version Negotiation packet should be queued.
     var out: [64]u8 = undefined;
-    const n = conn.send(&out);
+    const n = conn.send(&out, 0);
     try testing.expect(n > 0);
 
     // VN packet has version 0x00000000.
@@ -115,7 +116,7 @@ test "connection: ver=0 packet does not trigger VN response" {
 
     // No VN response must be queued for a ver=0 packet.
     var out: [64]u8 = undefined;
-    const n = conn.send(&out);
+    const n = conn.send(&out, 0);
     try testing.expectEqual(@as(usize, 0), n);
 }
 
@@ -151,7 +152,7 @@ test "loss: onPacketSent wires bytes_in_flight and pto_deadline" {
     const io = std.testing.io;
     var conn = try Connection(16).accept(.{}, io);
     conn.current_time_ns = 1_000_000;
-    conn.loss.onPacketSent(1, 0, 1200, true, conn.current_time_ns, .{});
+    conn.loss.onPacketSent(1, 0, 1200, true, conn.current_time_ns, conn.current_time_ns, .{});
     try testing.expectEqual(@as(u64, 1200), conn.loss.bytes_in_flight);
     try testing.expect(conn.loss.ptoDeadline(conn.cached_max_ack_delay_ns) != null);
 }
@@ -180,7 +181,7 @@ test "loss: onAckReceived decrements bytes_in_flight" {
     const io = std.testing.io;
     var conn = try Connection(16).accept(.{}, io);
     conn.current_time_ns = 0;
-    conn.loss.onPacketSent(1, 0, 1200, true, 0, .{});
+    conn.loss.onPacketSent(1, 0, 1200, true, 0, 0, .{});
     try testing.expectEqual(@as(u64, 1200), conn.loss.bytes_in_flight);
 
     const ranges = [_]loss_recovery_mod.AckedRange{.{ .low = 1, .high = 1 }};
@@ -204,7 +205,7 @@ test "connection: send queue full returns SendQueueFull error" {
 
     // Drain one slot: now there is room again
     var out: [8]u8 = undefined;
-    _ = conn.send(&out);
+    _ = conn.send(&out, 0);
     try conn.enqueueSend(&data); // must succeed now
 }
 
@@ -215,7 +216,7 @@ test "connection: processAck uses packet epoch not connection epoch" {
     conn.current_time_ns = 0;
     conn.hot.tx_pn[0] = 2; // pretend pn=0 and pn=1 were sent in epoch 0
 
-    conn.loss.onPacketSent(1, 0, 1200, true, 0, .{});
+    conn.loss.onPacketSent(1, 0, 1200, true, 0, 0, .{});
     try testing.expectEqual(@as(u64, 1200), conn.loss.bytes_in_flight);
 
     const ack = frame.AckFrame{
@@ -314,7 +315,7 @@ test "connection: version 0 packet is silently ignored" {
 
     // No packet should be queued (VN response is NOT sent for version-0 packets).
     var out: [64]u8 = undefined;
-    try testing.expectEqual(@as(usize, 0), conn.send(&out));
+    try testing.expectEqual(@as(usize, 0), conn.send(&out, 0));
 }
 
 // ---------------------------------------------------------------------------
@@ -573,7 +574,7 @@ test "close: draining state suppresses send()" {
     // Queue something
     try conn.enqueueSend(&[_]u8{0x01});
     var out: [8]u8 = undefined;
-    try testing.expectEqual(@as(usize, 0), conn.send(&out));
+    try testing.expectEqual(@as(usize, 0), conn.send(&out, 0));
 }
 
 test "close: nextTimeout includes drain_deadline" {
@@ -831,21 +832,21 @@ test "security: VN rate limit suppresses same version within 60s" {
     // First unknown version: send VN
     conn.receive(&pkt, src, 0, 0, io) catch {};
     var out: [64]u8 = undefined;
-    try testing.expect(conn.send(&out) > 0);
+    try testing.expect(conn.send(&out, 0) > 0);
 
     // Same version within 60s: throttle (no VN)
     conn.receive(&pkt, src, 30_000_000_000, 0, io) catch {}; // +30s
-    try testing.expectEqual(@as(usize, 0), conn.send(&out));
+    try testing.expectEqual(@as(usize, 0), conn.send(&out, 0));
 
     // Different unknown version within 60s of first: send VN (different version)
     std.mem.writeInt(u32, pkt[1..5], 0x00000003, .big); // different version
     conn.receive(&pkt, src, 35_000_000_000, 0, io) catch {};
-    try testing.expect(conn.send(&out) > 0);
+    try testing.expect(conn.send(&out, 0) > 0);
 
     // First version after 60s: send VN again (cooldown expired)
     std.mem.writeInt(u32, pkt[1..5], 0x00000002, .big);
     conn.receive(&pkt, src, 61_000_000_000, 0, io) catch {}; // +61s
-    try testing.expect(conn.send(&out) > 0);
+    try testing.expect(conn.send(&out, 0) > 0);
 }
 
 test "event_queue: wraparound maintains FIFO order" {
@@ -965,7 +966,9 @@ test "loss: multi-packet loss triggers single congestion event" {
     conn.current_time_ns = 1_000_000_000;
 
     // Force CUBIC into congestion avoidance with a known large window.
-    conn.congestion.ssthresh = 0; // cwnd always > ssthresh=0 → CUBIC always used
+    if (cc_mod.selected == .cubic) {
+        conn.congestion.ssthresh = 0; // cwnd always > ssthresh=0 → CUBIC always used
+    }
     conn.congestion.cwnd = 100 * 1200; // 120000 bytes (100 × MSS)
     const initial_cwnd = conn.congestion.cwnd;
 
@@ -973,7 +976,7 @@ test "loss: multi-packet loss triggers single congestion event" {
     conn.hot.tx_pn[0] = 11; // pretend pn=0..10 were sent
     var pn: u64 = 1;
     while (pn <= 10) : (pn += 1) {
-        conn.loss.onPacketSent(pn, 0, 1200, true, 0, .{});
+        conn.loss.onPacketSent(pn, 0, 1200, true, 0, 0, .{});
     }
 
     // ACK only pn=10; pn=1..7 satisfy K_PACKET_THRESHOLD and are declared lost.
@@ -989,8 +992,14 @@ test "loss: multi-packet loss triggers single congestion event" {
     };
     try conn.processAck(ack, 0);
 
-    const expected: u64 = @intFromFloat(@as(f64, @floatFromInt(initial_cwnd)) * 0.7);
-    try testing.expectEqual(expected, conn.congestion.cwnd);
+    if (cc_mod.selected == .cubic) {
+        // CUBIC: cwnd reduced by BETA_CUBIC (0.7).
+        const expected: u64 = @intFromFloat(@as(f64, @floatFromInt(initial_cwnd)) * 0.7);
+        try testing.expectEqual(expected, conn.congestion.cwnd);
+    } else {
+        // BBR: loss doesn't directly reduce cwnd (handled via delivery rate).
+        try testing.expect(conn.congestion.cwnd > 0);
+    }
 }
 
 // ---------------------------------------------------------------------------
@@ -1031,7 +1040,7 @@ test "connection: PATH_CHALLENGE without app_keys is silently consumed (no panic
     conn.processFrames(buf[0..n], 2, null) catch {};
 
     var out: [64]u8 = undefined;
-    try testing.expectEqual(@as(usize, 0), conn.send(&out));
+    try testing.expectEqual(@as(usize, 0), conn.send(&out, 0));
 }
 
 test "connection: PATH_RESPONSE is silently consumed" {
@@ -1046,7 +1055,7 @@ test "connection: PATH_RESPONSE is silently consumed" {
     // No event, no packet queued
     try testing.expectEqual(@as(?Event, null), conn.pollEvent());
     var out: [64]u8 = undefined;
-    try testing.expectEqual(@as(usize, 0), conn.send(&out));
+    try testing.expectEqual(@as(usize, 0), conn.send(&out, 0));
 }
 
 // ---------------------------------------------------------------------------
@@ -1189,7 +1198,7 @@ test "connection: Version Negotiation DCID echoes full client SCID (RFC 9000 §6
 
     // Grab the VN packet from the send queue.
     var out: [256]u8 = undefined;
-    const n = conn.send(&out);
+    const n = conn.send(&out, 0);
     try testing.expect(n > 0);
 
     // First byte: long header (0x80 set).
diff --git a/src/quic/connection_test_crypto.zig b/src/quic/connection_test_crypto.zig
index fbb037f..4ff5b96 100644
--- a/src/quic/connection_test_crypto.zig
+++ b/src/quic/connection_test_crypto.zig
@@ -11,6 +11,7 @@ const SocketAddr = conn_mod.SocketAddr;
 const frame = @import("frame.zig");
 const loss_recovery_mod = @import("loss_recovery.zig");
 const tls = @import("tls.zig");
+const cc_mod = @import("congestion/cc.zig");
 const packet = @import("packet.zig");
 const crypto = @import("crypto.zig");
 const transport_params = @import("transport_params.zig");
@@ -58,7 +59,7 @@ test "ecn: CE count increase triggers congestion event (cwnd reduces)" {
 
     // Record a sent packet so largest_acked_sent_ns is populated
     conn.hot.tx_pn[2] = 2; // pretend pn=0..1 were sent in epoch 2
-    conn.loss.onPacketSent(1, 2, 1200, true, 1_000_000_000, .{});
+    conn.loss.onPacketSent(1, 2, 1200, true, 1_000_000_000, 1_000_000_000, .{});
 
     const initial_cwnd = conn.congestion.cwnd;
 
@@ -76,8 +77,13 @@ test "ecn: CE count increase triggers congestion event (cwnd reduces)" {
 
     // CE count recorded
     try testing.expectEqual(@as(u62, 1), conn.ecn_ce_seen[2]);
-    // cwnd must have been reduced (congestion event)
-    try testing.expect(conn.congestion.cwnd < initial_cwnd);
+    // Congestion event: CUBIC reduces cwnd, BBR reduces inflight_hi.
+    if (cc_mod.selected == .cubic) {
+        try testing.expect(conn.congestion.cwnd < initial_cwnd);
+    } else {
+        // BBR: inflight_hi should have been reduced by onEcnCe.
+        try testing.expect(conn.congestion.inflight_hi < std.math.maxInt(u64));
+    }
 }
 
 test "ecn: CE count non-increase is ignored (monotonic guard)" {
@@ -89,12 +95,12 @@ test "ecn: CE count non-increase is ignored (monotonic guard)" {
     conn_ecn.current_time_ns = 1_000_000_000;
     conn_ecn.ecn_ce_seen[2] = 5; // already seen 5
     conn_ecn.hot.tx_pn[2] = 2; // pretend pn=0..1 were sent
-    conn_ecn.loss.onPacketSent(1, 2, 1200, true, 1_000_000_000, .{});
+    conn_ecn.loss.onPacketSent(1, 2, 1200, true, 1_000_000_000, 1_000_000_000, .{});
 
     var conn_plain = try Connection(1).accept(.{}, io);
     conn_plain.current_time_ns = 1_000_000_000;
     conn_plain.hot.tx_pn[2] = 2;
-    conn_plain.loss.onPacketSent(1, 2, 1200, true, 1_000_000_000, .{});
+    conn_plain.loss.onPacketSent(1, 2, 1200, true, 1_000_000_000, 1_000_000_000, .{});
 
     const ack_ecn = frame.AckFrame{
         .largest_acked = 1,
@@ -134,12 +140,12 @@ test "ecn: CE count = 0 with has_ecn=true is a no-op (no congestion)" {
     var conn_ecn = try Connection(1).accept(.{}, io);
     conn_ecn.current_time_ns = 1_000_000_000;
     conn_ecn.hot.tx_pn[2] = 2;
-    conn_ecn.loss.onPacketSent(1, 2, 1200, true, 1_000_000_000, .{});
+    conn_ecn.loss.onPacketSent(1, 2, 1200, true, 1_000_000_000, 1_000_000_000, .{});
 
     var conn_plain = try Connection(1).accept(.{}, io);
     conn_plain.current_time_ns = 1_000_000_000;
     conn_plain.hot.tx_pn[2] = 2;
-    conn_plain.loss.onPacketSent(1, 2, 1200, true, 1_000_000_000, .{});
+    conn_plain.loss.onPacketSent(1, 2, 1200, true, 1_000_000_000, 1_000_000_000, .{});
 
     const ack_ecn = frame.AckFrame{
         .largest_acked = 1,
@@ -178,7 +184,7 @@ test "ecn: has_ecn=false ACK does not touch ecn_ce_seen" {
     conn.current_time_ns = 1_000_000_000;
     conn.ecn_ce_seen[2] = 99; // pre-set to a non-zero value
     conn.hot.tx_pn[2] = 2;
-    conn.loss.onPacketSent(1, 2, 1200, true, 1_000_000_000, .{});
+    conn.loss.onPacketSent(1, 2, 1200, true, 1_000_000_000, 1_000_000_000, .{});
 
     const ack = frame.AckFrame{
         .largest_acked = 1,
@@ -677,7 +683,7 @@ test "connection: processAck multi-range gap decoding does not ack gap packets"
     // Register 8 in-flight packets (pn 0-7) in epoch 2 (1-RTT).
     conn.hot.tx_pn[2] = 8; // pretend pn 0-7 were sent
     for (0..8) |pn| {
-        conn.loss.onPacketSent(@intCast(pn), 2, 1200, true, conn.current_time_ns, .{});
+        conn.loss.onPacketSent(@intCast(pn), 2, 1200, true, conn.current_time_ns, conn.current_time_ns, .{});
     }
     try testing.expectEqual(@as(u64, 8 * 1200), conn.loss.bytes_in_flight);
 
@@ -1930,7 +1936,7 @@ test "connection: ACK ack_delay scaled by cached_ack_delay_exp" {
     conn.hot.tx_pn[2] = 1; // pretend we sent packet #0
     // Seed loss recovery with a sent packet so RTT can update.
     const fi = loss_recovery_mod.SentFrameInfo{};
-    conn.loss.onPacketSent(0, 2, 100, true, 0, fi);
+    conn.loss.onPacketSent(0, 2, 100, true, 0, 0, fi);
 
     const ack_f: frame.Frame = .{
         .ack = .{
diff --git a/src/quic/connection_test_frames.zig b/src/quic/connection_test_frames.zig
index ed0faa4..9edfd17 100644
--- a/src/quic/connection_test_frames.zig
+++ b/src/quic/connection_test_frames.zig
@@ -15,6 +15,7 @@ const frame = @import("frame.zig");
 const loss_recovery_mod = @import("loss_recovery.zig");
 const stream_mod = @import("stream.zig");
 const tls = @import("tls.zig");
+const cc_mod = @import("congestion/cc.zig");
 const packet = @import("packet.zig");
 const crypto = @import("crypto.zig");
 const transport_params = @import("transport_params.zig");
@@ -64,16 +65,18 @@ test "connection: persistent congestion collapses cwnd to 2*MSS" {
     var conn = try Connection(16).accept(.{}, io);
 
     conn.congestion.cwnd = 100 * 1200;
-    conn.congestion.ssthresh = 0; // always in CUBIC phase
+    if (cc_mod.selected == .cubic) {
+        conn.congestion.ssthresh = 0; // always in CUBIC phase
+    }
 
     conn.current_time_ns = 0;
     conn.hot.tx_pn[0] = 9; // pretend pn=0..8 were sent
-    conn.loss.onPacketSent(1, 0, 1200, true, 0, .{});
-    conn.loss.onPacketSent(2, 0, 1200, true, 0, .{});
-    conn.loss.onPacketSent(3, 0, 1200, true, 0, .{});
-    conn.loss.onPacketSent(4, 0, 1200, true, 0, .{});
-    conn.loss.onPacketSent(5, 0, 1200, true, 3_200_000_000, .{});
-    conn.loss.onPacketSent(8, 0, 1200, true, 3_200_000_000, .{});
+    conn.loss.onPacketSent(1, 0, 1200, true, 0, 0, .{});
+    conn.loss.onPacketSent(2, 0, 1200, true, 0, 0, .{});
+    conn.loss.onPacketSent(3, 0, 1200, true, 0, 0, .{});
+    conn.loss.onPacketSent(4, 0, 1200, true, 0, 0, .{});
+    conn.loss.onPacketSent(5, 0, 1200, true, 3_200_000_000, 3_200_000_000, .{});
+    conn.loss.onPacketSent(8, 0, 1200, true, 3_200_000_000, 3_200_000_000, .{});
 
     const ack = frame.AckFrame{
         .largest_acked = 8,
@@ -88,8 +91,12 @@ test "connection: persistent congestion collapses cwnd to 2*MSS" {
     conn.current_time_ns = 3_200_000_000;
     try conn.processAck(ack, 0);
 
-    // Persistent congestion → cwnd = 2 * MSS = 2904 (MSS=1452)
-    try testing.expectEqual(@as(u64, 2 * 1452), conn.congestion.cwnd);
+    // Persistent congestion: CUBIC → cwnd = 2*MSS, BBR → cwnd = 4*MSS (BBR_MIN_CWND).
+    if (cc_mod.selected == .cubic) {
+        try testing.expectEqual(@as(u64, 2 * 1452), conn.congestion.cwnd);
+    } else {
+        try testing.expectEqual(@as(u64, 4 * 1452), conn.congestion.cwnd);
+    }
 }
 
 // ---------------------------------------------------------------------------
@@ -156,7 +163,7 @@ test "security: amplification limit lifted after path_validated" {
     try conn.enqueueSend(&[_]u8{0x01} ** 100);
     // Verify the send queue actually accepted the bytes.
     var out: [MAX_PACKET_SIZE]u8 = undefined;
-    try testing.expect(conn.send(&out) > 0);
+    try testing.expect(conn.send(&out, 0) > 0);
 }
 
 // SEC-006: Frame-type per epoch enforcement
@@ -607,8 +614,10 @@ test "connection: migration resets congestion" {
     const new_src = SocketAddr{ .v4 = .{ .addr = [4]u8{ 10, 0, 0, 1 }, .port = 5000 } };
     var empty = [_]u8{};
     try conn.receive(&empty, new_src, 0, 0, io);
-    // RFC 9002 §7.2: initial_window = min(10*1452, max(14720, 2*1452)) = 14520.
-    try testing.expectEqual(@as(u64, 14520), conn.congestion.cwnd);
+    // Congestion state (cwnd) is preserved across migration to avoid throughput
+    // collapse during rapid address changes.  RTT and PTO are reset instead.
+    try testing.expectEqual(@as(u64, 999_999), conn.congestion.cwnd);
+    try testing.expect(!conn.loss.rtt.initialized); // RTT was reset
 }
 
 test "connection: migration sets path_validated false" {
diff --git a/src/quic/connection_test_handshakecorruption.zig b/src/quic/connection_test_handshakecorruption.zig
index b948b5e..514da1e 100644
--- a/src/quic/connection_test_handshakecorruption.zig
+++ b/src/quic/connection_test_handshakecorruption.zig
@@ -177,7 +177,7 @@ test "time-loss alarm fires for STREAM pkn with sub-threshold gap" {
     conn.queuePing() catch {};
 
     var buf: [1500]u8 = undefined;
-    while (conn.send(&buf) > 0) {}
+    while (conn.send(&buf, 0) > 0) {}
 
     // ACK [6..7],[1..3] — gap at [0,4,5]. pkn 5 gap = 2 < threshold 3.
     var ranges: [32]frame.AckRange = undefined;
@@ -185,7 +185,7 @@ test "time-loss alarm fires for STREAM pkn with sub-threshold gap" {
     ranges[1] = .{ .gap = 1, .ack_range = 2 };
     conn.current_time_ns = t0 + 100_000_000;
     conn.processAck(makeAck(7, 2, ranges), 2) catch {};
-    while (conn.send(&buf) > 0) {}
+    while (conn.send(&buf, 0) > 0) {}
 
     try testing.expect(conn.time_loss_alarm_ns != null);
     const alarm = conn.time_loss_alarm_ns.?;
@@ -194,7 +194,7 @@ test "time-loss alarm fires for STREAM pkn with sub-threshold gap" {
 
     var total: usize = 0;
     while (true) {
-        const n = conn.send(&buf);
+        const n = conn.send(&buf, 0);
         if (n == 0) break;
         total += n;
     }
@@ -224,25 +224,25 @@ test "full retransmission lifecycle: loss → retransmit → PTO → re-probe" {
     conn.queuePing() catch {};
 
     var buf: [1500]u8 = undefined;
-    while (conn.send(&buf) > 0) {}
+    while (conn.send(&buf, 0) > 0) {}
 
     var ranges: [32]frame.AckRange = undefined;
     ranges[0] = .{ .gap = 0, .ack_range = 1 };
     ranges[1] = .{ .gap = 1, .ack_range = 2 };
     conn.current_time_ns = t0 + 100_000_000;
     conn.processAck(makeAck(7, 2, ranges), 2) catch {};
-    while (conn.send(&buf) > 0) {}
+    while (conn.send(&buf, 0) > 0) {}
 
     const alarm = conn.time_loss_alarm_ns orelse return error.TestUnexpectedResult;
     conn.tick(alarm + 1);
-    while (conn.send(&buf) > 0) {}
+    while (conn.send(&buf, 0) > 0) {}
 
     try testing.expect(conn.pto_deadline_ns != null);
     const pto1 = conn.pto_deadline_ns.?;
     conn.tick(pto1 + 1);
 
     var probe_sent = false;
-    while (conn.send(&buf) > 0) {
+    while (conn.send(&buf, 0) > 0) {
         probe_sent = true;
     }
     try testing.expect(probe_sent);
@@ -276,7 +276,7 @@ test "PTO skips Initial retransmit when hs_keys exist to preserve budget for Han
     conn.retransmitCryptoSaved(1);
 
     var buf: [1500]u8 = undefined;
-    while (conn.send(&buf) > 0) {}
+    while (conn.send(&buf, 0) > 0) {}
 
     const remaining = (conn.bytes_unvalidated_recv *| 3) -| conn.bytes_unvalidated_sent;
     // Budget should be consumed by Handshake, not wasted on Initial
@@ -326,6 +326,9 @@ test "sendShortHeaderPacket arms PTO for ack-eliciting packets" {
     conn.current_time_ns = 1_000_000_000;
     conn.pto_deadline_ns = null;
     conn.queuePing() catch {};
+    // Move queued packet to wire so PTO is armed at wire-time.
+    var buf: [1500]u8 = undefined;
+    _ = conn.send(&buf, 0);
     try testing.expect(conn.pto_deadline_ns != null);
 }
 
@@ -338,7 +341,7 @@ test "processLostFrames retransmits STREAM directly when send queue has space" {
 
     conn.streamSend(0, &([_]u8{0xAA} ** 100), true) catch return error.TestUnexpectedResult;
     var buf: [1500]u8 = undefined;
-    while (conn.send(&buf) > 0) {}
+    while (conn.send(&buf, 0) > 0) {}
 
     var result = loss_recovery_mod.AckResult{};
     result.lost_frame_count = 1;
@@ -386,6 +389,9 @@ test "pending stream retransmit arms PTO when drained via tick" {
     conn.tick(t0 + 1);
 
     try testing.expectEqual(@as(u8, 0), conn.stream_pending_retx_count);
+    // Move queued packet to wire so bytes_in_flight and PTO are updated.
+    var buf2: [1500]u8 = undefined;
+    _ = conn.send(&buf2, 0);
     try testing.expect(conn.loss.bytes_in_flight > 0);
     try testing.expect(conn.pto_deadline_ns != null);
 }
diff --git a/src/quic/connection_test_pmtud.zig b/src/quic/connection_test_pmtud.zig
index 67fd9b6..b8861b4 100644
--- a/src/quic/connection_test_pmtud.zig
+++ b/src/quic/connection_test_pmtud.zig
@@ -396,14 +396,13 @@ test "PMTUD: probe packet is marked ack-eliciting" {
     // Queue probe at realistic size (< MAX_PACKET_SIZE)
     try conn.queuePmtudProbe(1200);
 
-    // Verify it was registered in loss recovery as ack-eliciting
-    // (The onPacketSent call in queuePmtudProbe passes true for ack_eliciting)
+    // Verify the probe was queued and its send-queue metadata is ack-eliciting.
+    // (onPacketSent records into loss.sent only when send() dequeues the packet;
+    // here we verify the queue metadata directly.)
     try testing.expect(conn.pmtud_probing != null);
-    const pn = conn.pmtud_probing.?.packet_number;
-
-    // Look up in loss recovery to verify it was tracked
-    const sent_pkt = conn.loss.sent.get(pn, 2); // epoch 2 = 1-RTT
-    try testing.expect(sent_pkt != null);
+    try testing.expect(conn.sq_head < conn.sq_tail); // packet is in the send queue
+    const meta = conn.sq_meta[(conn.sq_tail -% 1) & (conn_mod.SEND_QUEUE_DEPTH - 1)];
+    try testing.expect(meta.ack_eliciting);
 }
 
 test "PMTUD: doesn't probe if already at maximum" {
@@ -779,7 +778,7 @@ test "retry: validate_addr=true, no token: retry_sent event and Retry packet que
 
     // A Retry packet must be in the send queue
     var out: [256]u8 = undefined;
-    const n = conn.send(&out);
+    const n = conn.send(&out, 0);
     try testing.expect(n > 0);
     // Retry first byte is 0xff (v1: type bits 0b11, unused=0xf)
     try testing.expectEqual(@as(u8, 0xff), out[0]);
diff --git a/src/quic/fuzz.zig b/src/quic/fuzz.zig
index 0791d0b..cc8feac 100644
--- a/src/quic/fuzz.zig
+++ b/src/quic/fuzz.zig
@@ -238,7 +238,7 @@ fn fuzzLossRecoveryLoop(_: void, input: FuzzInput) anyerror!void {
         switch (op) {
             0 => {
                 // Send a packet
-                lr.onPacketSent(pn, epoch, 1200, ack_eliciting, now_ns, .{});
+                lr.onPacketSent(pn, epoch, 1200, ack_eliciting, now_ns, now_ns, .{});
                 pn += 1;
                 now_ns += 1_000_000; // +1ms
             },
diff --git a/src/quic/loss_recovery.zig b/src/quic/loss_recovery.zig
index bc67e8e..f3349f4 100644
--- a/src/quic/loss_recovery.zig
+++ b/src/quic/loss_recovery.zig
@@ -19,12 +19,15 @@ pub const K_GRANULARITY_NS: u64 = 1_000_000; // 1ms minimum timer granularity
 pub const K_INITIAL_RTT_NS: u64 = 10_000_000; // 10ms — balanced conservative estimate
 pub const MAX_SENT: usize = 256; // Ring buffer capacity
 pub const MAX_FRAMES_PER_PACKET: usize = 4;
-// Per-ACK capacity for acked/lost frame tracking.
+// Per-ACK capacity for lost frame tracking.
 // Lost frames: detectLoss defers packets that don't fit to the next alarm round
 // (see detectLoss — skips eviction instead of silently dropping retransmit info).
-// Acked frames: each ACK typically covers only a few newly-acked packets in
-// practice, so 64 is sufficient for acked_frames.
 pub const MAX_LOSS_EVENTS: usize = 64;
+// Acked frames: must match the largest epoch's sent buffer (EPOCH_SIZES[2] = 128)
+// so that a single ACK covering all in-flight packets never overflows.  Overflow
+// silently drops frame info, preventing send_acked from advancing (same class of
+// bug as SACK overflow — permanent stream buffer stall).
+pub const MAX_ACKED_FRAMES: usize = MAX_SENT / 2; // 128 — matches epoch 2
 
 // ---------------------------------------------------------------------------
 // FrameInfo — per-frame metadata for retransmission
@@ -111,12 +114,33 @@ pub const RttEstimator = struct {
 
 pub const SentPacket = struct {
     pn: u64,
-    sent_ns: i64,
+    sent_ns: i64, // wire time — for loss detection / RTT measurement
+    queued_ns: i64 = 0, // queue time — for delivery rate (avoids pacing inflation)
     size: u16,
     epoch: u8,
     ack_eliciting: bool,
     in_flight: bool,
     valid: bool, // true = slot occupied
+
+    // BBR delivery rate tracking:
+    delivered: u64 = 0, // total bytes delivered at send time
+    delivered_ns: i64 = 0, // timestamp of last delivery at send time
+    first_sent_ns: i64 = 0, // send time of first packet in current delivery sample
+    is_app_limited: bool = false, // was sender app-limited when this was sent?
+};
+
+/// Per-ACK delivery rate sample, passed to the congestion controller.
+pub const DeliveryRateSample = @import("congestion/common.zig").DeliveryRateSample;
+
+/// Connection-level delivery tracking counters (lives on LossRecovery).
+pub const DeliveryState = struct {
+    delivered: u64 = 0, // cumulative bytes delivered
+    delivered_ns: i64 = 0, // time of most recent delivery
+    first_sent_ns: i64 = 0, // send time of first undelivered packet
+    app_limited: bool = false, // currently app-limited?
+
+    // Round-trip counting (BBR uses rounds, not wall clock).
+    next_round_delivered: u64 = 0,
 };
 
 pub const AckedRange = struct { low: u64, high: u64 };
@@ -139,8 +163,23 @@ pub const AckResult = struct {
     lost_frame_count: usize = 0,
     /// Epoch for each lost packet (parallel to lost_frames)
     lost_epochs: [MAX_LOSS_EVENTS]u8 = undefined,
-    acked_frames: [MAX_LOSS_EVENTS]SentFrameInfo = undefined,
+    acked_frames: [MAX_ACKED_FRAMES]SentFrameInfo = undefined,
     acked_frame_count: usize = 0,
+    /// Delivery rate sample for BBR (computed by LossRecovery.onAckReceived).
+    delivery_rate_sample: DeliveryRateSample = .{},
+    // Internal: delivery snapshot from the highest-pn acked packet.
+    // Used only within LossRecovery to compute delivery_rate_sample.
+    delivery_snap: DeliverySnapshot = .{},
+};
+
+/// Internal snapshot of delivery metadata from the highest-pn acked packet.
+const DeliverySnapshot = struct {
+    delivered: u64 = 0,
+    delivered_ns: i64 = 0,
+    first_sent_ns: i64 = 0,
+    sent_ns: i64 = 0,
+    is_app_limited: bool = false,
+    pn: u64 = 0,
 };
 
 /// Returned by remove() — carries both the packet metadata and its frame info.
@@ -198,6 +237,16 @@ pub const SentPacketTable = struct {
         return evicted;
     }
 
+    /// Clear in_flight on all valid packets.  Used during path migration:
+    /// bytes_in_flight is reset to 0, so old packets must not subtract
+    /// from it when later ACKed.  Packets remain valid for delivery rate
+    /// tracking and ACK processing.
+    pub fn clearInflight(self: *SentPacketTable) void {
+        for (&self.slots) |*slot| {
+            if (slot.valid) slot.in_flight = false;
+        }
+    }
+
     /// O(1) lookup. Returns null if slot is empty or belongs to a different pn/epoch.
     pub fn get(self: *const SentPacketTable, pn: u64, epoch: u8) ?SentPacket {
         const idx = slotIndex(pn, epoch);
@@ -229,10 +278,21 @@ pub const SentPacketTable = struct {
                 if (entry.pkt.in_flight) {
                     bif.* = if (bif.* >= entry.pkt.size) bif.* - entry.pkt.size else 0;
                 }
-                if (result.acked_frame_count < MAX_LOSS_EVENTS) {
+                if (result.acked_frame_count < MAX_ACKED_FRAMES) {
                     result.acked_frames[result.acked_frame_count] = entry.fi;
                     result.acked_frame_count += 1;
                 }
+                // Track the highest-pn acked packet's delivery metadata for rate computation.
+                if (entry.pkt.pn >= result.delivery_snap.pn) {
+                    result.delivery_snap = .{
+                        .pn = entry.pkt.pn,
+                        .delivered = entry.pkt.delivered,
+                        .delivered_ns = entry.pkt.delivered_ns,
+                        .first_sent_ns = entry.pkt.first_sent_ns,
+                        .sent_ns = entry.pkt.sent_ns, // wire time
+                        .is_app_limited = entry.pkt.is_app_limited,
+                    };
+                }
             }
         }
     }
@@ -359,6 +419,8 @@ pub const LossRecovery = struct {
     largest_acked: [3]u64, // per epoch [Initial, Handshake, 1-RTT]
     last_ack_eliciting_ns: ?i64,
     pto_count: u32,
+    /// Delivery rate tracking for BBR.
+    delivery: DeliveryState = .{},
 
     pub fn init() LossRecovery {
         return .{
@@ -368,10 +430,17 @@ pub const LossRecovery = struct {
             .largest_acked = [_]u64{0} ** 3,
             .last_ack_eliciting_ns = null,
             .pto_count = 0,
+            .delivery = .{},
         };
     }
 
     /// Record a newly-sent packet.
+    /// `now_ns`    — wire time (when the packet actually leaves the machine).
+    ///               Used for sent_ns (loss detection timing).
+    /// `queued_ns` — queue time (when the application queued the packet).
+    ///               Used for delivery rate snapshots so that pacing delays
+    ///               do not inflate send_elapsed and depress BBR's bandwidth
+    ///               estimate.
     pub fn onPacketSent(
         self: *LossRecovery,
         pn: u64,
@@ -379,20 +448,41 @@ pub const LossRecovery = struct {
         size: usize,
         ack_eliciting: bool,
         now_ns: i64,
+        queued_ns: i64,
         frame_info: SentFrameInfo,
     ) void {
         const sz: u16 = @intCast(@min(size, @as(usize, 0xffff)));
+        // Snapshot delivery state into the sent packet for delivery rate computation.
+        // All timestamps use wire-time (now_ns) — the moment the packet actually
+        // leaves the machine.  An earlier approach used queue-time (queued_ns) to
+        // avoid pacing-delay inflation of send_elapsed, but that caused stale
+        // timestamps when packets sat in the send queue during recovery, collapsing
+        // the delivery rate and creating a death spiral.  Wire-time may slightly
+        // underestimate bandwidth when pacing adds inter-packet delay, but the
+        // estimate self-corrects as the pacing rate converges to the true BW.
+        if (self.delivery.delivered_ns == 0) {
+            self.delivery.delivered_ns = now_ns;
+        }
+        // Update first_sent_ns if this is the first packet since last ACK.
+        if (self.delivery.first_sent_ns == 0) {
+            self.delivery.first_sent_ns = now_ns;
+        }
         // add() evicts any existing occupant at pn % MAX_SENT.
         // If the evicted packet was still in flight, subtract its size from bytes_in_flight
         // to avoid double-counting (the in-flight accounting for the evicted packet is lost).
         if (self.sent.add(.{
             .pn = pn,
             .sent_ns = now_ns,
+            .queued_ns = queued_ns,
             .size = sz,
             .epoch = epoch,
             .ack_eliciting = ack_eliciting,
             .in_flight = ack_eliciting,
             .valid = true,
+            .delivered = self.delivery.delivered,
+            .delivered_ns = self.delivery.delivered_ns,
+            .first_sent_ns = self.delivery.first_sent_ns,
+            .is_app_limited = self.delivery.app_limited,
         }, frame_info)) |evicted| {
             if (evicted.in_flight) {
                 self.bytes_in_flight -|= evicted.size;
@@ -434,11 +524,22 @@ pub const LossRecovery = struct {
             }
         }
 
+        // Capture inflight before ACKs for the delivery rate sample.
+        const prior_inflight = self.bytes_in_flight;
+
         // 3. Remove all acknowledged packets
         for (ranges) |r| {
             self.sent.ackRange(r.low, r.high, epoch, &result, &self.bytes_in_flight);
         }
 
+        // 3b. Update delivery counters (needed before step 4-5, which don't use them).
+        if (result.newly_acked > 0) {
+            self.delivery.delivered += result.bytes_acked;
+            self.delivery.delivered_ns = now_ns;
+            // Reset first_sent_ns so the next send snapshot picks up fresh timing.
+            self.delivery.first_sent_ns = 0;
+        }
+
         // 4. Compute time threshold: max(9/8 × max(srtt, latest_rtt), K_GRANULARITY_NS)
         const max_rtt = @max(self.rtt.smoothed_rtt, self.rtt.latest_rtt);
         const time_threshold_ns = @max(
@@ -456,6 +557,36 @@ pub const LossRecovery = struct {
             &self.bytes_in_flight,
         );
 
+        // 5b. Build delivery rate sample AFTER detectLoss so bytes_lost is populated.
+        if (result.newly_acked > 0) {
+            const snap = result.delivery_snap;
+            const delivered_delta = self.delivery.delivered -| snap.delivered;
+            const ack_elapsed: u64 = if (now_ns > snap.delivered_ns)
+                @intCast(now_ns - snap.delivered_ns)
+            else
+                1;
+            const send_elapsed: u64 = if (snap.sent_ns > snap.first_sent_ns)
+                @intCast(snap.sent_ns - snap.first_sent_ns)
+            else
+                1;
+            const interval = @max(ack_elapsed, send_elapsed);
+
+            const round_start = snap.delivered >= self.delivery.next_round_delivered;
+            if (round_start) {
+                self.delivery.next_round_delivered = self.delivery.delivered;
+            }
+
+            result.delivery_rate_sample = .{
+                .delivery_rate = delivered_delta *| 1_000_000_000 / interval,
+                .is_app_limited = snap.is_app_limited,
+                .rtt_ns = if (self.rtt.initialized) self.rtt.smoothed_rtt else 0,
+                .bytes_acked = result.bytes_acked,
+                .bytes_lost = result.bytes_lost,
+                .prior_inflight = prior_inflight,
+                .round_start = round_start,
+            };
+        }
+
         // 6. Persistent congestion detection (RFC 9002 §6.1.2).
         // If the span between the earliest and latest ack-eliciting lost packets
         // exceeds 3×PTO, mark as persistent congestion.
@@ -626,7 +757,7 @@ test "sent_table: onPacketSent increments bytes_in_flight; ackRange decrements i
     const testing = std.testing;
     var lr = LossRecovery.init();
 
-    lr.onPacketSent(5, 0, 1200, true, 0, .{});
+    lr.onPacketSent(5, 0, 1200, true, 0, 0, .{});
     try testing.expectEqual(@as(u64, 1200), lr.bytes_in_flight);
 
     var result = AckResult{};
@@ -643,7 +774,7 @@ test "loss_detection: packet threshold — pn 1-7 declared lost when largest_ack
     // Send pn 1..10 all at time 0
     var pn: u64 = 1;
     while (pn <= 10) : (pn += 1) {
-        lr.onPacketSent(pn, 0, 1200, true, 0, .{});
+        lr.onPacketSent(pn, 0, 1200, true, 0, 0, .{});
     }
 
     // ACK only pn=10; all others remain unacked
@@ -661,7 +792,7 @@ test "loss_detection: time threshold — old packet detected as lost" {
     var lr = LossRecovery.init();
 
     // Send pn=1000 at time 0; pn=1 not sent (not in table)
-    lr.onPacketSent(1000, 0, 1200, true, 0, .{});
+    lr.onPacketSent(1000, 0, 1200, true, 0, 0, .{});
 
     // ACK pn=1 (not in table — no RTT update, initial values used)
     // Initial smoothed_rtt = 333ms, time_threshold ≈ 375ms
@@ -702,7 +833,7 @@ test "sent_table: lastAckElicitingNs returns sent_ns of highest in-flight pn" {
 test "pto: deadline is clamped at 2^5 backoff" {
     const testing = std.testing;
     var lr = LossRecovery.init();
-    lr.onPacketSent(1, 0, 1200, true, 0, .{});
+    lr.onPacketSent(1, 0, 1200, true, 0, 0, .{});
 
     const d0 = lr.ptoDeadline(25_000_000).?;
 
@@ -731,7 +862,7 @@ test "rtt: ack_delay exceeding sample_ns does not underflow adjusted_rtt" {
 test "loss_recovery: onAckReceived with empty ranges slice is safe" {
     const testing = std.testing;
     var lr = LossRecovery.init();
-    lr.onPacketSent(1, 0, 1200, true, 0, .{});
+    lr.onPacketSent(1, 0, 1200, true, 0, 0, .{});
     const result = lr.onAckReceived(1, 0, &[_]AckedRange{}, 0, 0, 25_000_000);
     // No ranges → nothing acked, nothing lost
     try testing.expectEqual(@as(u32, 0), result.newly_acked);
@@ -746,14 +877,14 @@ test "sent_table: eviction decrements bytes_in_flight to avoid double-counting"
     const region = SentPacketTable.EPOCH_SIZES[2]; // 128
     var pn: u64 = 0;
     while (pn < region) : (pn += 1) {
-        lr.onPacketSent(pn, 2, 1200, true, 0, .{});
+        lr.onPacketSent(pn, 2, 1200, true, 0, 0, .{});
     }
     const bif_after = lr.bytes_in_flight;
     try testing.expectEqual(@as(u64, region * 1200), bif_after);
 
     // Send pn=128: maps to same slot as pn=0, evicting it.
     // bytes_in_flight should stay the same (evict 1200, add 1200).
-    lr.onPacketSent(region, 2, 1200, true, 0, .{});
+    lr.onPacketSent(region, 2, 1200, true, 0, 0, .{});
     try testing.expectEqual(bif_after, lr.bytes_in_flight);
 }
 
@@ -762,14 +893,14 @@ test "loss_detection: last_ack_eliciting_ns updated after packets declared lost"
     var lr = LossRecovery.init();
 
     // Send one ack-eliciting packet
-    lr.onPacketSent(1, 0, 1200, true, 0, .{});
+    lr.onPacketSent(1, 0, 1200, true, 0, 0, .{});
     try testing.expect(lr.last_ack_eliciting_ns != null);
 
     // ACK a much higher pn to trigger loss via packet threshold for pn=1
     // Send pn 2..5 so we have some acked
     var i: u64 = 2;
     while (i <= 10) : (i += 1) {
-        lr.onPacketSent(i, 0, 1200, true, 0, .{});
+        lr.onPacketSent(i, 0, 1200, true, 0, 0, .{});
     }
     const ranges = [_]AckedRange{.{ .low = 10, .high = 10 }};
     _ = lr.onAckReceived(10, 0, &ranges, 0, 0, 25_000_000);
@@ -783,7 +914,7 @@ test "loss_detection: last_ack_eliciting_ns updated after packets declared lost"
 test "pto: deadline saturates on extreme pto values" {
     const testing = std.testing;
     var lr = LossRecovery.init();
-    lr.onPacketSent(1, 0, 1200, true, 0, .{});
+    lr.onPacketSent(1, 0, 1200, true, 0, 0, .{});
 
     // Force an extreme smoothed_rtt that would cause overflow without saturation
     lr.rtt.smoothed_rtt = std.math.maxInt(u64) / 4;
@@ -801,7 +932,7 @@ test "pto: deadline doubles per onPtoFired; resets after resetPtoCount" {
     var lr = LossRecovery.init();
 
     // Send one ack-eliciting packet at time 0
-    lr.onPacketSent(1, 0, 1200, true, 0, .{});
+    lr.onPacketSent(1, 0, 1200, true, 0, 0, .{});
 
     const d0 = lr.ptoDeadline(25_000_000);
     try testing.expect(d0 != null);
@@ -859,8 +990,8 @@ test "frame_info: detectLoss populates lost_frames in AckResult" {
     var fi = SentFrameInfo{};
     fi.frames[0] = .{ .stream = .{ .stream_id = 0, .offset = 0, .len = 100, .fin = false } };
     fi.count = 1;
-    lr.onPacketSent(1, 0, 100, true, 0, fi);
-    lr.onPacketSent(10, 0, 100, true, 0, .{});
+    lr.onPacketSent(1, 0, 100, true, 0, 0, fi);
+    lr.onPacketSent(10, 0, 100, true, 0, 0, .{});
 
     const ranges = [_]AckedRange{.{ .low = 10, .high = 10 }};
     const result = lr.onAckReceived(10, 0, &ranges, 0, 0, 25_000_000);
@@ -882,7 +1013,7 @@ test "frame_info: acked packets appear in acked_frames not lost_frames" {
     var fi = SentFrameInfo{};
     fi.frames[0] = .ping;
     fi.count = 1;
-    lr.onPacketSent(1, 0, 50, true, 0, fi);
+    lr.onPacketSent(1, 0, 50, true, 0, 0, fi);
 
     const ranges = [_]AckedRange{.{ .low = 1, .high = 1 }};
     const result = lr.onAckReceived(1, 0, &ranges, 0, 0, 25_000_000);
@@ -905,7 +1036,7 @@ test "frame_info: MAX_LOSS_EVENTS caps lost_frames output" {
     const N: u64 = MAX_LOSS_EVENTS + 4; // 68
     var pn: u64 = 0;
     while (pn < N) : (pn += 1) {
-        lr.onPacketSent(pn, 2, 100, true, 0, .{});
+        lr.onPacketSent(pn, 2, 100, true, 0, 0, .{});
     }
     const top_pn = N - 1;
     const ranges = [_]AckedRange{.{ .low = top_pn, .high = top_pn }};
@@ -922,10 +1053,10 @@ test "sent_table: power-of-two slot collision evicts correctly" {
     const testing = std.testing;
     var lr = LossRecovery.init();
 
-    lr.onPacketSent(0, 0, 1200, true, 1_000, .{});
+    lr.onPacketSent(0, 0, 1200, true, 1_000, 1_000, .{});
     try testing.expect(lr.sent.get(0, 0) != null);
 
-    lr.onPacketSent(MAX_SENT, 0, 1200, true, 2_000, .{}); // maps to slot 0, evicts pn=0
+    lr.onPacketSent(MAX_SENT, 0, 1200, true, 2_000, 2_000, .{}); // maps to slot 0, evicts pn=0
     try testing.expectEqual(@as(?SentPacket, null), lr.sent.get(0, 0)); // pn=0 gone
     try testing.expect(lr.sent.get(MAX_SENT, 0) != null); // pn=256 present
 }
@@ -946,14 +1077,14 @@ test "frame_info: ring buffer eviction preserves new packet frame info" {
     // Fill the ring buffer with MAX_SENT packets (no frame info)
     var pn: u64 = 0;
     while (pn < MAX_SENT) : (pn += 1) {
-        lr.onPacketSent(pn, 0, 100, true, 0, .{});
+        lr.onPacketSent(pn, 0, 100, true, 0, 0, .{});
     }
 
     // Send one more that evicts slot 0 (pn=0), record handshake_done frame info
     var fi = SentFrameInfo{};
     fi.frames[0] = .handshake_done;
     fi.count = 1;
-    lr.onPacketSent(MAX_SENT, 0, 100, true, 0, fi);
+    lr.onPacketSent(MAX_SENT, 0, 100, true, 0, 0, fi);
 
     // The new packet's frame info should be stored at slot MAX_SENT % MAX_SENT = 0
     const removed = lr.sent.remove(MAX_SENT, 0).?;
@@ -974,7 +1105,7 @@ test "sent_table: 128 concurrent unacked packets coexist without eviction" {
     // Send 128 packets with distinct packet numbers 0..127 in epoch 2
     var pn: u64 = 0;
     while (pn < 128) : (pn += 1) {
-        lr.onPacketSent(pn, 2, 1200, true, @as(i64, @intCast(pn)) * 1000, .{});
+        lr.onPacketSent(pn, 2, 1200, true, @as(i64, @intCast(pn)) * 1000, @as(i64, @intCast(pn)) * 1000, .{});
     }
 
     // All 128 must still be present (no eviction for pn < region size)
@@ -1025,8 +1156,8 @@ test "valid_per_epoch: detectLoss decrements on loss" {
     const testing = std.testing;
     var lr = LossRecovery.init();
 
-    lr.onPacketSent(1, 0, 100, true, 0, .{});
-    lr.onPacketSent(5, 0, 100, true, 0, .{});
+    lr.onPacketSent(1, 0, 100, true, 0, 0, .{});
+    lr.onPacketSent(5, 0, 100, true, 0, 0, .{});
     try testing.expectEqual(@as(u16, 2), lr.sent.valid_per_epoch[0]);
 
     // ACK pn=5, which triggers loss detection for pn=1 (pn+3 <= 5)
@@ -1048,12 +1179,12 @@ test "persistent_congestion: loss span > 3xPTO sets flag" {
     const testing = std.testing;
     var lr = LossRecovery.init();
 
-    lr.onPacketSent(1, 0, 1200, true, 0, .{});
-    lr.onPacketSent(2, 0, 1200, true, 0, .{});
-    lr.onPacketSent(3, 0, 1200, true, 0, .{});
-    lr.onPacketSent(4, 0, 1200, true, 0, .{});
-    lr.onPacketSent(5, 0, 1200, true, 3_200_000_000, .{});
-    lr.onPacketSent(8, 0, 1200, true, 3_200_000_000, .{});
+    lr.onPacketSent(1, 0, 1200, true, 0, 0, .{});
+    lr.onPacketSent(2, 0, 1200, true, 0, 0, .{});
+    lr.onPacketSent(3, 0, 1200, true, 0, 0, .{});
+    lr.onPacketSent(4, 0, 1200, true, 0, 0, .{});
+    lr.onPacketSent(5, 0, 1200, true, 3_200_000_000, 3_200_000_000, .{});
+    lr.onPacketSent(8, 0, 1200, true, 3_200_000_000, 3_200_000_000, .{});
 
     const ranges = [_]AckedRange{.{ .low = 8, .high = 8 }};
     const result = lr.onAckReceived(8, 0, &ranges, 0, 3_200_000_000, 25_000_000);
@@ -1067,10 +1198,10 @@ test "persistent_congestion: loss span <= 3xPTO does not set flag" {
     const testing = std.testing;
     var lr = LossRecovery.init();
 
-    lr.onPacketSent(1, 0, 1200, true, 0, .{});
-    lr.onPacketSent(2, 0, 1200, true, 0, .{});
-    lr.onPacketSent(3, 0, 1200, true, 0, .{});
-    lr.onPacketSent(8, 0, 1200, true, 0, .{});
+    lr.onPacketSent(1, 0, 1200, true, 0, 0, .{});
+    lr.onPacketSent(2, 0, 1200, true, 0, 0, .{});
+    lr.onPacketSent(3, 0, 1200, true, 0, 0, .{});
+    lr.onPacketSent(8, 0, 1200, true, 0, 0, .{});
 
     const ranges = [_]AckedRange{.{ .low = 8, .high = 8 }};
     const result = lr.onAckReceived(8, 0, &ranges, 0, 0, 25_000_000);
@@ -1229,7 +1360,7 @@ test "time_loss_alarm: timeLossAlarmNs returns null when largest_acked is 0 in a
     var lr = LossRecovery.init();
 
     // No packets have been acked yet → largest_acked = 0 for all epochs
-    lr.onPacketSent(1, 2, 100, true, 0, .{});
+    lr.onPacketSent(1, 2, 100, true, 0, 0, .{});
     try testing.expectEqual(@as(?i64, null), lr.timeLossAlarmNs(25_000_000));
 }
 
@@ -1242,8 +1373,8 @@ test "time_loss_alarm: timeLossAlarmNs fires after time threshold + max_ack_dela
     // pn=1 packet threshold check: 1+3=4 > 2 → NOT lost by pkt threshold.
     // time_threshold ≈ 9/8 × 40ms = 45ms; max_ack_delay = 25ms.
     // Alarm fires at 0 + 45ms + 25ms = 70ms.
-    lr.onPacketSent(1, 2, 100, true, 0, .{});
-    lr.onPacketSent(2, 2, 100, true, 0, .{});
+    lr.onPacketSent(1, 2, 100, true, 0, 0, .{});
+    lr.onPacketSent(2, 2, 100, true, 0, 0, .{});
 
     const ranges = [_]AckedRange{.{ .low = 2, .high = 2 }};
     _ = lr.onAckReceived(2, 0, &ranges, 2, 40_000_000, 25_000_000);
diff --git a/src/quic/stream.zig b/src/quic/stream.zig
index aaa0e72..1d50b6d 100644
--- a/src/quic/stream.zig
+++ b/src/quic/stream.zig
@@ -276,7 +276,8 @@ pub const Stream = struct {
     /// Cumulative bytes acknowledged on the send side.
     send_acked: u64,
     /// Out-of-order (SACK) acknowledged ranges waiting for the gap to be filled.
-    /// Bounded by STREAM_BUF_SIZE / min_chunk ≈ 32 entries in practice.
+    /// Adjacent/overlapping entries are merged on insertion; when full, the two
+    /// closest ranges are coalesced so no ACK info is ever silently dropped.
     sack_ranges: [32]struct { offset: u64, end: u64 },
     sack_count: u8,
     /// FIN has been queued for sending.
@@ -532,12 +533,71 @@ pub const Stream = struct {
             // Drain any SACK ranges that are now contiguous.
             self.flushSackRanges();
         } else {
-            // Out-of-order: save for when the gap is filled.
-            if (self.sack_count < self.sack_ranges.len) {
-                self.sack_ranges[self.sack_count] = .{ .offset = offset, .end = end };
-                self.sack_count += 1;
+            // Out-of-order: merge with existing range or insert new entry.
+            var merged = false;
+            for (self.sack_ranges[0..self.sack_count]) |*r| {
+                // Merge if adjacent or overlapping.
+                if (offset <= r.end and end >= r.offset) {
+                    r.offset = @min(r.offset, offset);
+                    r.end = @max(r.end, end);
+                    merged = true;
+                    break;
+                }
+            }
+            if (!merged) {
+                if (self.sack_count < self.sack_ranges.len) {
+                    self.sack_ranges[self.sack_count] = .{ .offset = offset, .end = end };
+                    self.sack_count += 1;
+                } else {
+                    // Array full — coalesce the two closest ranges to make room.
+                    // This guarantees no ACK information is ever silently dropped.
+                    self.coalesceClosest();
+                    self.sack_ranges[self.sack_count] = .{ .offset = offset, .end = end };
+                    self.sack_count += 1;
+                }
+            }
+        }
+    }
+
+    /// When the SACK array is full, merge the two closest (smallest gap)
+    /// ranges into one, freeing a slot.  The merged range covers both
+    /// original ranges plus the gap between them — those gap bytes are
+    /// "optimistically" marked as acked.  This is safe: the gap bytes were
+    /// either already acked (contiguous ACK we missed) or lost and will be
+    /// retransmitted (the retransmit ACK will be a no-op since the range
+    /// already covers them).  The key guarantee: no ACK information is ever
+    /// silently dropped, so send_acked always advances and the send buffer
+    /// never permanently stalls.
+    fn coalesceClosest(self: *Stream) void {
+        if (self.sack_count < 2) return;
+        var best_gap: u64 = std.math.maxInt(u64);
+        var best_i: usize = 0;
+        var best_j: usize = 1;
+        for (0..self.sack_count) |i| {
+            for (i + 1..self.sack_count) |j| {
+                const a = self.sack_ranges[i];
+                const b = self.sack_ranges[j];
+                // Gap between two non-overlapping ranges.
+                const gap = if (a.end <= b.offset)
+                    b.offset - a.end
+                else if (b.end <= a.offset)
+                    a.offset - b.end
+                else
+                    0; // overlapping — merge for free
+                if (gap < best_gap) {
+                    best_gap = gap;
+                    best_i = i;
+                    best_j = j;
+                }
             }
         }
+        // Merge j into i, remove j.
+        self.sack_ranges[best_i] = .{
+            .offset = @min(self.sack_ranges[best_i].offset, self.sack_ranges[best_j].offset),
+            .end = @max(self.sack_ranges[best_i].end, self.sack_ranges[best_j].end),
+        };
+        self.sack_count -= 1;
+        self.sack_ranges[best_j] = self.sack_ranges[self.sack_count];
     }
 
     /// Apply buffered SACK ranges that are now contiguous with send_acked.
@@ -939,9 +999,9 @@ test "stream_send: multiple out-of-order SACK ranges resolved in one flush" {
     s.send_offset = 3600;
 
     s.onAcked(1200, 1200); // out-of-order
-    s.onAcked(2400, 1200); // out-of-order
+    s.onAcked(2400, 1200); // out-of-order, merged with [1200,2400) → [1200,3600)
     try testing.expectEqual(@as(u64, 0), s.send_acked);
-    try testing.expectEqual(@as(usize, 2), s.sack_count);
+    try testing.expectEqual(@as(usize, 1), s.sack_count);
 
     s.onAcked(0, 1200); // fills gap → cascades through 1200 and 2400
     try testing.expectEqual(@as(u64, 3600), s.send_acked);
diff --git a/src/root.zig b/src/root.zig
index 1f406ea..98223db 100644
--- a/src/root.zig
+++ b/src/root.zig
@@ -11,7 +11,7 @@
 //!   // On datagram receipt:
 //!   try conn.receive(udp_payload, src_addr, now_ns, io);
 //!   // Drain outgoing datagrams:
-//!   while (conn.send(&out_buf)) |n| { socket.send(out_buf[0..n]); }
+//!   while (conn.send(&out_buf, now_ns)) |n| { socket.send(out_buf[0..n]); }
 //!   // Timer:
 //!   if (conn.nextTimeout()) |deadline_ns| { ... }
 //!   conn.tick(now_ns);
@@ -26,6 +26,8 @@ pub const stream = @import("quic/stream.zig");
 pub const flow_control = @import("quic/flow_control.zig");
 pub const congestion = struct {
     pub const cubic = @import("quic/congestion/cubic.zig");
+    pub const bbr = @import("quic/congestion/bbr.zig");
+    pub const cc = @import("quic/congestion/cc.zig");
 };
 pub const connection_id = @import("quic/connection_id.zig");
 
diff --git a/tools/Dockerfile b/tools/Dockerfile
deleted file mode 100644
index 68d6ac7..0000000
--- a/tools/Dockerfile
+++ /dev/null
@@ -1,64 +0,0 @@
-# Multi-stage build for quic-interop-runner.
-#
-# Stage 1: Build the server binary (static musl target).
-# Stage 2: Minimal Alpine runtime image.
-# Supports multiple architectures (amd64, arm64).
-
-FROM debian:bookworm-slim AS builder
-
-RUN apt-get update && apt-get install -y --no-install-recommends wget xz-utils ca-certificates \
-    && rm -rf /var/lib/apt/lists/*
-
-ENV ZIG_VERSION=0.16.0-dev.2676+4e2cec265
-
-# Auto-detect architecture and download appropriate Zig binary
-RUN set -e; \
-    ARCH=$(uname -m); \
-    if [ "$ARCH" = "x86_64" ]; then \
-      ZIG_ARCH="x86_64"; \
-      TARGET="x86_64-linux-musl"; \
-    elif [ "$ARCH" = "aarch64" ]; then \
-      ZIG_ARCH="aarch64"; \
-      TARGET="aarch64-linux-musl"; \
-    else \
-      echo "Unsupported architecture: $ARCH"; \
-      exit 1; \
-    fi; \
-    ZIG_TARBALL="zig-${ZIG_ARCH}-linux-${ZIG_VERSION}.tar.xz"; \
-    wget -q "https://ziglang.org/builds/${ZIG_TARBALL}"; \
-    tar xf "${ZIG_TARBALL}"; \
-    rm "${ZIG_TARBALL}"; \
-    ln -s "zig-${ZIG_ARCH}-linux-${ZIG_VERSION}" /zig; \
-    echo "export TARGET=${TARGET}" > /build_env.sh; \
-    echo "export PATH=/zig:\$PATH" >> /build_env.sh
-
-ENV PATH="/zig:${PATH}"
-
-WORKDIR /build
-COPY . .
-
-RUN set -e; \
-    . /build_env.sh; \
-    zig build -Doptimize=ReleaseSafe -Dtarget="${TARGET}"
-
-# Stage 2: Runtime image with network simulator support.
-FROM martenseemann/quic-network-simulator-endpoint:latest
-
-LABEL org.opencontainers.image.title="zquic-interop" \
-      org.opencontainers.image.description="zquic interop testing image for quic-interop-runner. Not intended for production use." \
-      org.opencontainers.image.source="https://github.com/ericsssan/zquic" \
-      org.opencontainers.image.licenses="MIT"
-
-COPY --from=builder /build/zig-out/bin/server /server
-COPY tools/run_endpoint.sh /run_endpoint.sh
-
-RUN chmod +x /run_endpoint.sh && mkdir -p /logs /certs
-
-EXPOSE 443/udp
-
-ENV PORT=443
-ENV TESTCASE=transfer
-ENV CERTS=/certs
-ENV WWW=/www
-
-ENTRYPOINT ["/run_endpoint.sh"]
diff --git a/tools/server.zig b/tools/server.zig
index 76b2373..0bed9dc 100644
--- a/tools/server.zig
+++ b/tools/server.zig
@@ -41,7 +41,7 @@ const PendingTransfer = struct {
 const ConnSlot = struct {
     conn: Conn,
     peer_addr: ?net.IpAddress = null,
-    /// When true, send responses through the CM socket (after preferred_address migration).
+    /// True when the most recent packet arrived on the CM socket.
     use_cm_sock: bool = false,
     transfers: [MAX_TRANSFERS]FileTransfer = [_]FileTransfer{.{}} ** MAX_TRANSFERS,
     /// Parsed requests deferred because all transfer slots were occupied.
@@ -64,6 +64,10 @@ const supported_cases = [_][]const u8{
 
 /// True when TESTCASE=http3 — uses H3 framing instead of HTTP/0.9.
 var g_is_h3: bool = false;
+/// Accumulated SSLKEYLOG data for all connections.  Written to /logs/keys.log
+/// in full on each update so createFileAbsolute truncation doesn't lose data.
+var g_keylog_buf: [65536]u8 = undefined;
+var g_keylog_len: usize = 0;
 
 // IPv4/IPv6 addresses for preferred_address in connectionmigration test (interop runner addresses).
 // server4:  193.167.100.100  (0xc1, 0xa7, 0x64, 0x64)
@@ -108,11 +112,16 @@ fn extractDcid(data: []const u8) ?[CID_LEN]u8 {
 }
 
 /// Find a connection slot by its local DCID.
+/// Also checks first_initial_dcid so that retransmitted client Initials
+/// (which use the original random DCID, not the server's SCID) are routed
+/// to the existing connection instead of creating a duplicate.
 fn findConnByDcid(slots: *const [MAX_CONNS]?*ConnSlot, dcid: [CID_LEN]u8) ?*ConnSlot {
     for (slots.*) |slot_opt| {
         const slot = slot_opt orelse continue;
         if (std.mem.eql(u8, &slot.conn.local_cid.bytes, &dcid)) return slot;
         if (std.mem.eql(u8, &slot.conn.alt_local_cid.bytes, &dcid)) return slot;
+        if (slot.conn.first_initial_dcid_len == CID_LEN and
+            std.mem.eql(u8, slot.conn.first_initial_dcid[0..CID_LEN], &dcid)) return slot;
     }
     return null;
 }
@@ -122,6 +131,7 @@ fn allocateSlot(slots: *[MAX_CONNS]?*ConnSlot, config: quic.Config, io: std.Io)
     for (slots) |*s_opt| {
         if (s_opt.* == null) {
             const slot = try page_allocator.create(ConnSlot);
+            errdefer page_allocator.destroy(slot);
             slot.* = .{
                 .conn = try Conn.accept(config, io),
             };
@@ -204,12 +214,11 @@ fn tickAllConnections(slots: *[MAX_CONNS]?*ConnSlot, sock: *const net.Socket, cm
 
         if (slot.peer_addr) |pa| {
             // Retry H3 control streams if initial send failed (queue was full).
+            const send_sock = slotSendSock(slot, sock, cm_sock_ptr);
             if (g_is_h3 and !slot.h3_control_sent and slot.conn.app_keys != null) {
                 sendH3ControlStreams(slot);
             }
-            flushTransfers(slot, www_dir, io);
-            const send_sock = slotSendSock(slot, sock, cm_sock_ptr);
-            drainSend(&slot.conn, send_sock, io, &pa, send_bufs);
+            flushTransfers(slot, www_dir, io, send_sock, &pa, send_bufs);
         }
     }
 }
@@ -228,7 +237,10 @@ pub fn main(init: std.process.Init) !void {
     // Determine the testcase; exit 127 if unsupported.
     // Check this FIRST before attempting to load certs, so that compliance
     // checks with unsupported testcases exit cleanly with 127.
-    const testcase = init.environ_map.get("TESTCASE") orelse "transfer";
+    const testcase = init.environ_map.get("TESTCASE") orelse {
+        std.debug.print("TESTCASE not set, exiting\n", .{});
+        std.process.exit(127);
+    };
     var is_supported = false;
     for (supported_cases) |s| {
         if (std.mem.eql(u8, testcase, s)) {
@@ -481,13 +493,15 @@ fn processPacket(
     // BEFORE processing the incoming packet, so PATH_CHALLENGE is the first
     // frame sent from the new address (required by interop test).
     if (is_cm_socket and !s.use_cm_sock) {
-        s.use_cm_sock = true;
         var challenge: [8]u8 = undefined;
         io.random(&challenge);
         s.conn.sendPathChallenge(challenge) catch {};
-    } else if (is_cm_socket) {
-        // Already on CM socket, no action needed
     }
+    // Track the CURRENT socket — not a one-way flag.  When the client
+    // rebinds back to the original path (or sim stops NAT'ing through CM),
+    // the server must follow.  Without this, use_cm_sock stays true forever
+    // and data sent via CM socket can't reach clients on the original network.
+    if (s.use_cm_sock != is_cm_socket) s.use_cm_sock = is_cm_socket;
 
     const ecn_bits: u2 = 0;
     s.conn.receive(data, ipToSocketAddr(from), now_ns, ecn_bits, io) catch |err| {
@@ -549,13 +563,19 @@ fn processPacket(
                 }
                 break;
             },
+            .path_migrated => {
+                // Update send destination from the connection's authoritative
+                // peer address.  Without this, late-arriving packets from the
+                // old address (via s.peer_addr = from) route sends to the
+                // stale address.
+                s.peer_addr = socketAddrToIp(s.conn.peer_addr);
+            },
             else => {},
         }
     }
 
     if (!slot_freed) {
-        flushTransfers(s, www_dir, io);
-        drainSend(&s.conn, active_sock, io, &from, send_bufs);
+        flushTransfers(s, www_dir, io, active_sock, &from, send_bufs);
     }
 }
 
@@ -572,6 +592,7 @@ fn activatePending(transfers: *[MAX_TRANSFERS]FileTransfer, p: *const PendingTra
     t.active = true;
     t.stream_id = p.stream_id;
     t.offset = 0;
+    t.h3_headers_sent = false;
     @memcpy(t.path[0..p.path_len], p.path[0..p.path_len]);
     t.path_len = p.path_len;
     t.file = std.Io.Dir.openFileAbsolute(io, t.path[0..t.path_len], .{}) catch null;
@@ -672,7 +693,7 @@ fn startTransfer(slot: *ConnSlot, stream_id: u62, www: []const u8, io: std.Io) v
 /// the congestion window is small (e.g. initial cwnd = 10 packets): without
 /// interleaving, stream 0 would fill the window and streams 4/8 would get no
 /// packets at all, stalling their offset-0 delivery.
-fn flushTransfers(slot: *ConnSlot, www: []const u8, io: std.Io) void {
+fn flushTransfers(slot: *ConnSlot, www: []const u8, io: std.Io, send_sock: *const net.Socket, dest: *const net.IpAddress, send_bufs: *SendBufs) void {
     const conn = &slot.conn;
     const transfers = &slot.transfers;
     _ = www;
@@ -690,18 +711,30 @@ fn flushTransfers(slot: *ConnSlot, www: []const u8, io: std.Io) void {
         activatePending(transfers, &slot.pending[slot.pending_count], io);
     }
     // Outer loop: repeat passes until nothing was sent (CC/queue fully blocked).
+    // After each transfer advance, drain what pacing allows so bytes_in_flight
+    // stays current.  Without this, bytes_in_flight=0 during the fill phase and
+    // the cwnd check is blind — either starving the pipe (with bytes_queued) or
+    // flooding the send queue (without it).
     while (true) {
         var sent_any = false;
         for (transfers) |*t| {
             if (!t.active) continue;
-            if (g_is_h3) {
-                if (advanceTransferOneH3(conn, t, io)) sent_any = true;
-            } else {
-                if (advanceTransferOne(conn, t, io)) sent_any = true;
-            }
+            const progress = if (g_is_h3)
+                advanceTransferOneH3(conn, t, io)
+            else
+                advanceTransferOne(conn, t, io);
+            if (progress) sent_any = true;
         }
         if (!sent_any) break;
+        // Drain pacing-gated packets after each round-robin pass so
+        // bytes_in_flight stays current for the next pass's cwnd check.
+        drainSend(conn, send_sock, io, dest, send_bufs);
     }
+    // Always drain: tick() and receive() may have enqueued PATH_CHALLENGE,
+    // ACKs, or retransmissions independent of transfer progress.  Without
+    // this, those packets are stranded when all transfers are blocked
+    // (buffer full, amplification limit), causing path validation to stall.
+    drainSend(conn, send_sock, io, dest, send_bufs);
 }
 
 /// Send exactly one chunk from the transfer. Returns true if progress was made.
@@ -728,9 +761,11 @@ fn advanceTransferGeneric(conn: *Conn, t: *FileTransfer, io: std.Io, is_h3: bool
             t.active = false;
             return true;
         }
-        // hq-interop: no file → already closed
+        // hq-interop: no file → send FIN so client gets a clean close
+        // instead of waiting until idle timeout.
+        conn.streamSend(t.stream_id, &.{}, true) catch return false;
         t.active = false;
-        return false;
+        return true;
     };
 
     // H3: send HEADERS frame first (:status 200)
@@ -799,30 +834,37 @@ fn advanceTransferGeneric(conn: *Conn, t: *FileTransfer, io: std.Io, is_h3: bool
 // ---------------------------------------------------------------------------
 
 /// Open the three server-initiated unidirectional streams required by RFC 9114.
+/// Streams are sent individually so a partial failure (queue full) can be
+/// retried without re-sending already-succeeded streams.
 fn sendH3ControlStreams(s: *ConnSlot) void {
     const conn = &s.conn;
     // Stream IDs: server-initiated unidirectional = 4*n + 3 → 3, 7, 11
+    const stream_ids = [_]u62{ 3, 7, 11 };
+    const stream_types = [_]u64{
+        http3.StreamType.control,
+        http3.StreamType.qpack_encoder,
+        http3.StreamType.qpack_decoder,
+    };
 
-    // 1. Control stream (type 0x00) + SETTINGS frame
-    var ctrl_buf: [64]u8 = undefined;
-    var pos: usize = 0;
-    // Stream type 0x00 (control)
-    pos += http3.varint.encode(ctrl_buf[pos..], http3.StreamType.control) catch return;
-    // SETTINGS frame (empty — all defaults)
-    pos += http3.frame.writeHeader(ctrl_buf[pos..], http3.FrameType.settings, 0) catch return;
-    conn.streamSend(3, ctrl_buf[0..pos], false) catch return;
-
-    // 2. QPACK encoder stream (type 0x02)
-    var enc_buf: [4]u8 = undefined;
-    const enc_len = http3.varint.encode(&enc_buf, http3.StreamType.qpack_encoder) catch return;
-    conn.streamSend(7, enc_buf[0..enc_len], false) catch return;
-
-    // 3. QPACK decoder stream (type 0x03)
-    var dec_buf: [4]u8 = undefined;
-    const dec_len = http3.varint.encode(&dec_buf, http3.StreamType.qpack_decoder) catch return;
-    conn.streamSend(11, dec_buf[0..dec_len], false) catch return;
-
-    s.h3_control_sent = true;
+    var all_sent = true;
+    for (stream_ids, stream_types) |sid, stype| {
+        // Skip streams that were already sent in a previous partial attempt.
+        if (conn.streams.get(sid)) |st| {
+            if (st.send_offset > 0) continue;
+        }
+        var buf: [64]u8 = undefined;
+        var pos: usize = 0;
+        pos += http3.varint.encode(buf[pos..], stype) catch return;
+        // Control stream also needs an empty SETTINGS frame.
+        if (stype == http3.StreamType.control) {
+            pos += http3.frame.writeHeader(buf[pos..], http3.FrameType.settings, 0) catch return;
+        }
+        conn.streamSend(sid, buf[0..pos], false) catch {
+            all_sent = false;
+            continue;
+        };
+    }
+    if (all_sent) s.h3_control_sent = true;
 }
 
 /// Parse an H3 request from a bidirectional stream and register a FileTransfer.
@@ -1033,10 +1075,11 @@ fn configureEcn(sock: *const net.Socket) !void {
 fn drainSend(conn: *Conn, sock: *const net.Socket, io: std.Io, dest: *const net.IpAddress, bufs: *SendBufs) void {
     var messages: [SEND_BATCH]net.OutgoingMessage = undefined;
     var count: usize = 0;
+    const now_ns: i64 = @truncate(std.Io.Clock.awake.now(io).nanoseconds);
 
     // Phase 1: collect all outgoing packets into separate buffers.
     while (count < SEND_BATCH) {
-        const n = conn.send(&bufs.bufs[count]);
+        const n = conn.send(&bufs.bufs[count], now_ns);
         if (n == 0) break;
         messages[count] = .{
             .address = dest,
@@ -1094,13 +1137,7 @@ fn updateKeyLog(conn: *const Conn, io: std.Io, _: u32) void {
         if (pos >= buf.len - 256) break;
     }
 
-    // Overwrite the keylog file with all generations (directory /logs created by Dockerfile)
-    const file = std.Io.Dir.createFileAbsolute(io, "/logs/keys.log", .{}) catch return;
-    defer file.close(io);
-    file.writePositionalAll(io, buf[0..pos], 0) catch return;
-    // Sync multiple times to guarantee disk flush before docker cp
-    file.sync(io) catch {};
-    file.sync(io) catch {};
+    appendKeyLog(io, buf[0..pos]);
 }
 
 /// Write an SSLKEYLOG file so network analyzers (Wireshark/tshark) can decrypt
@@ -1128,12 +1165,18 @@ fn writeKeyLog(conn: *const Conn, io: std.Io) void {
     line = std.fmt.bufPrint(buf[pos..], "SERVER_TRAFFIC_SECRET_0 {s} {s}\n", .{ random_hex, std.fmt.bytesToHex(secrets_0.server, .lower) }) catch return;
     pos += line.len;
 
-    // Write keylog file (directory /logs created by Dockerfile)
+    appendKeyLog(io, buf[0..pos]);
+}
+
+fn appendKeyLog(io: std.Io, data: []const u8) void {
+    // Accumulate in memory, write full buffer each time (createFileAbsolute truncates).
+    const n = @min(data.len, g_keylog_buf.len - g_keylog_len);
+    if (n == 0) return;
+    @memcpy(g_keylog_buf[g_keylog_len..][0..n], data[0..n]);
+    g_keylog_len += n;
     const file = std.Io.Dir.createFileAbsolute(io, "/logs/keys.log", .{}) catch return;
     defer file.close(io);
-    file.writePositionalAll(io, buf[0..pos], 0) catch return;
-    // Sync multiple times to guarantee disk flush before docker cp
-    file.sync(io) catch {};
+    file.writePositionalAll(io, g_keylog_buf[0..g_keylog_len], 0) catch return;
     file.sync(io) catch {};
 }
 
@@ -1144,6 +1187,13 @@ fn ipToSocketAddr(addr: net.IpAddress) quic.SocketAddr {
     };
 }
 
+fn socketAddrToIp(addr: quic.SocketAddr) net.IpAddress {
+    return switch (addr) {
+        .v4 => |a| .{ .ip4 = .{ .bytes = a.addr, .port = a.port } },
+        .v6 => |a| .{ .ip6 = .{ .bytes = a.addr, .port = a.port } },
+    };
+}
+
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------