From 385bfd4e1c8afc6907c2d45dec97bb5001b57135 Mon Sep 17 00:00:00 2001 From: Alex Godoroja Date: Tue, 5 May 2026 23:54:46 -0700 Subject: [PATCH] fix: update python and js pilot clients to 1.9-* --- CHANGELOG.md | 26 +- cmd/pilotctl/main.go | 12 +- cmd/pilotctl/redact_test.go | 18 +- cmd/pilotctl/updates.go | 4 +- pkg/daemon/accept_queue_bug_test.go | 10 +- pkg/daemon/beacon_discovery_test.go | 2 +- pkg/daemon/beacon_select_test.go | 16 +- pkg/daemon/ca_growth_abc_cap_bug_test.go | 4 +- pkg/daemon/daemon.go | 12 +- pkg/daemon/daemon_ipc_test.go | 2 +- pkg/daemon/daemon_networkipc_test.go | 2 +- pkg/daemon/dial_precancelled_ctx_bug_test.go | 10 +- ...esh_recovery_per_ack_inflation_bug_test.go | 12 +- ...k_in_recovery_ssthresh_halving_bug_test.go | 8 +- ...ut_recovery_additional_inflate_bug_test.go | 4 +- ...eout_recovery_cwnd_reinflation_bug_test.go | 8 +- ...w_episode_in_recovery_ssthresh_bug_test.go | 10 +- ...ut_recovery_fast_recovery_flag_bug_test.go | 20 +- ...covery_cwnd_inflation_windowch_bug_test.go | 19 +- .../fast_recovery_exit_cwnd_bug_test.go | 2 +- ...t_recovery_exit_deflation_noop_bug_test.go | 2 +- ...ery_partial_ack_aimd_inflation_bug_test.go | 4 +- .../fast_recovery_partial_ack_bug_test.go | 14 +- ...post_partial_ack_dup_inflation_bug_test.go | 8 +- ...dup_ack_same_episode_inflation_bug_test.go | 6 +- ...fast_retransmit_entry_windowch_bug_test.go | 15 +- .../fast_retransmit_max_attempts_bug_test.go | 2 +- ...transmit_noop_congestion_state_bug_test.go | 4 +- pkg/daemon/ipc_async_write_test.go | 2 +- pkg/daemon/ipc_dialcancel_leak_bug_test.go | 4 +- .../keepalive_zero_window_probe_bug_test.go | 4 +- .../listener_closed_channel_bug_test.go | 2 +- .../nagle_all_sacked_hasunacked_bug_test.go | 2 +- .../peer_recv_win_growth_windowch_bug_test.go | 4 +- pkg/daemon/policy_runner.go | 10 +- pkg/daemon/ports.go | 13 +- pkg/daemon/ports_logic_test.go | 2 +- .../process_ack_resets_sack_state_bug_test.go | 8 +- .../process_sack_wraparound_bug_test.go | 12 +- pkg/daemon/remove_peer_leak_bug_test.go | 18 +- ...it_timeout_ssthresh_flightsize_bug_test.go | 6 +- .../retx_sacked_ordering_break_bug_test.go | 38 +- ..._second_partial_ack_retransmit_bug_test.go | 16 +- .../rto_backoff_in_recovery_bug_test.go | 8 +- .../rtt_multiple_samples_per_ack_bug_test.go | 6 +- .../rtt_sacked_segment_skipped_bug_test.go | 20 +- pkg/daemon/sack_blocks_wraparound_bug_test.go | 6 +- ...mulative_ack_aimd_overcounting_bug_test.go | 4 +- pkg/daemon/sendbuf_caller_bug_test.go | 1 + pkg/daemon/services.go | 6 +- pkg/daemon/ss_growth_abc_cap_bug_test.go | 6 +- ...ssthresh_congwin_vs_flightsize_bug_test.go | 6 +- pkg/daemon/ssthresh_floor_two_mss_bug_test.go | 6 +- pkg/daemon/throughput_bench_test.go | 12 +- pkg/daemon/timeout_cwnd_reset_bug_test.go | 6 +- .../timeout_ssthresh_in_recovery_bug_test.go | 6 +- pkg/daemon/tunnel.go | 6 +- pkg/daemon/tunnel_blackhole_bug_test.go | 14 +- pkg/daemon/tunnel_desync_salvage_test.go | 6 +- pkg/daemon/tunnel_handle_test.go | 1 - .../window_update_dup_ack_count_bug_test.go | 2 +- pkg/daemon/window_update_wakeup_bug_test.go | 20 +- .../zero_window_peerrecvwin_bug_test.go | 14 +- pkg/registry/binary_client_test.go | 28 +- pkg/registry/panic_recovery.go | 12 +- pkg/registry/panic_recovery_test.go | 4 +- pkg/registry/replication.go | 4 +- pkg/registry/server.go | 135 ++--- pkg/registry/server_perf_test.go | 1 - pkg/registry/wal_replay.go | 16 +- pkg/registry/wal_replay_test.go | 8 +- pkg/skillinject/manifest.go | 2 +- pkg/skillinject/skillinject.go | 4 +- pkg/tasksubmit/tasksubmit.go | 10 +- sdk/cgo/bindings.go | 307 +++++++++++ sdk/node/package-lock.json | 23 +- sdk/node/package.json | 2 +- sdk/node/scripts/build-binaries.sh | 24 + sdk/node/src/cli.ts | 101 +--- sdk/node/src/client.ts | 150 +++++- sdk/node/src/ffi.ts | 93 +++- sdk/node/src/runtime.ts | 485 ++++++++++++++++++ sdk/node/tests/client.test.ts | 485 ++++++++++++++++++ sdk/node/tests/runtime.test.ts | 298 +++++++++++ sdk/node/tests/smoke_list_agents.mjs | 136 +++++ sdk/python/MANIFEST.in | 5 +- sdk/python/README.md | 2 +- sdk/python/pilotprotocol/_runtime.py | 382 ++++++++++++++ sdk/python/pilotprotocol/cli.py | 194 ++----- sdk/python/pilotprotocol/client.py | 273 +++++++++- sdk/python/pyproject.toml | 4 +- sdk/python/scripts/build-binaries.sh | 24 + sdk/python/tests/smoke_list_agents.py | 133 +++++ sdk/python/tests/test_client.py | 401 ++++++++++++++- sdk/python/tests/test_runtime.py | 403 +++++++++++++++ tests/bench_concurrent_test.go | 7 +- tests/bench_recovery_test.go | 8 +- 97 files changed, 4072 insertions(+), 645 deletions(-) create mode 100644 sdk/node/src/runtime.ts create mode 100644 sdk/node/tests/runtime.test.ts create mode 100644 sdk/node/tests/smoke_list_agents.mjs create mode 100644 sdk/python/pilotprotocol/_runtime.py create mode 100644 sdk/python/tests/smoke_list_agents.py create mode 100644 sdk/python/tests/test_runtime.py diff --git a/CHANGELOG.md b/CHANGELOG.md index fe9c96b3..0a824f40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,31 @@ project uses [Semantic Versioning](https://semver.org/). Detailed per-release notes for tagged versions are published on the [GitHub Releases page](https://github.com/TeoSlayer/pilotprotocol/releases). -## [Unreleased] +## [1.9.2] - 2026-05-05 + +### Changed +- **SDK: updated sdk clients and cgo bindings to the latest version** + +- **SDK: removed polo-score surface from Node and Python bindings.** + Dropped `PilotMyPolo` from the CGO export layer (`sdk/cgo/bindings.go`) + and the corresponding `Driver.myPoloScore()` / `Driver.my_polo_score()` + wrappers, fake-lib hooks, and tests in both SDKs. The driver-level + `MyPoloScore()` and the underlying registry/daemon machinery are + untouched — this only narrows what the language SDKs expose. Removed + the now-stale `polo.pilotprotocol.network` "Live Dashboard" project + URL from `sdk/python/pyproject.toml` and a stray polo mention from + the Python SDK README. + +### Fixed + +- **SDK: macOS binaries shipped via npm/pip codesign parity.** + `sdk/{node,python}/scripts/build-binaries.sh` now mirror the main + release workflow — after building `pilot-daemon`, `pilotctl`, + `pilot-gateway`, `pilot-updater`, and `libpilot.dylib` on darwin, + each artifact is codesigned (`codesign --force --deep --sign -`) + and stripped of the quarantine xattr. Without this, npm/pip-installed + binaries triggered Gatekeeper "killed: 9" / "cannot be opened + because Apple cannot check it for malicious software" on first run. ## [1.9.1] - 2026-05-05 diff --git a/cmd/pilotctl/main.go b/cmd/pilotctl/main.go index a510e011..04c8eba5 100644 --- a/cmd/pilotctl/main.go +++ b/cmd/pilotctl/main.go @@ -1281,12 +1281,12 @@ func cmdContext() { "enable-tasks": map[string]interface{}{"args": []string{}, "description": "Advertise task-execution capability on port 1003"}, "disable-tasks": map[string]interface{}{"args": []string{}, "description": "Stop advertising task-execution capability"}, // Low-level / plumbing - "connect": map[string]interface{}{"args": []string{"", "[port]", "[--message ]"}, "description": "Open a raw stream connection"}, - "send": map[string]interface{}{"args": []string{"", "", "--data "}, "description": "Send a single raw message to a port"}, - "recv": map[string]interface{}{"args": []string{"", "[--count ]"}, "description": "Accept and print incoming stream messages"}, - "dgram": map[string]interface{}{"args": []string{"", "", "--data "}, "description": "Send a UDP-style datagram"}, - "listen": map[string]interface{}{"args": []string{"", "[--count ]"}, "description": "Listen for incoming datagrams"}, - "broadcast": map[string]interface{}{"args": []string{"", ""}, "description": "Broadcast a datagram to all network members"}, + "connect": map[string]interface{}{"args": []string{"", "[port]", "[--message ]"}, "description": "Open a raw stream connection"}, + "send": map[string]interface{}{"args": []string{"", "", "--data "}, "description": "Send a single raw message to a port"}, + "recv": map[string]interface{}{"args": []string{"", "[--count ]"}, "description": "Accept and print incoming stream messages"}, + "dgram": map[string]interface{}{"args": []string{"", "", "--data "}, "description": "Send a UDP-style datagram"}, + "listen": map[string]interface{}{"args": []string{"", "[--count ]"}, "description": "Listen for incoming datagrams"}, + "broadcast": map[string]interface{}{"args": []string{"", ""}, "description": "Broadcast a datagram to all network members"}, // Connection management "connections": map[string]interface{}{"args": []string{}, "description": "List active daemon connections"}, "disconnect": map[string]interface{}{"args": []string{""}, "description": "Close a connection by ID"}, diff --git a/cmd/pilotctl/redact_test.go b/cmd/pilotctl/redact_test.go index 6d553f5e..d7c529ce 100644 --- a/cmd/pilotctl/redact_test.go +++ b/cmd/pilotctl/redact_test.go @@ -17,24 +17,24 @@ func TestRedactPeerEndpointsRemovesIPFields(t *testing.T) { "node_id": 42, "address": "0:0000.0000.002A", "hostname": "agent-test", - "endpoint": "203.0.113.5:4000", // must go - "real_addr": "203.0.113.5:4000", // must go - "public_addr": "203.0.113.5:4000", // must go + "endpoint": "203.0.113.5:4000", // must go + "real_addr": "203.0.113.5:4000", // must go + "public_addr": "203.0.113.5:4000", // must go "lan_addrs": []interface{}{"10.0.0.5:4000"}, // must go - "observed_addr": "203.0.113.5:4000", // must go - "stun_addr": "203.0.113.5:4000", // must go + "observed_addr": "203.0.113.5:4000", // must go + "stun_addr": "203.0.113.5:4000", // must go "peers": 7, "encrypted_peers": 7, "peer_list": []interface{}{ map[string]interface{}{ - "node_id": 10, - "endpoint": "198.51.100.10:4000", + "node_id": 10, + "endpoint": "198.51.100.10:4000", "real_addr": "198.51.100.10:4000", "encrypted": true, }, map[string]interface{}{ - "node_id": 11, - "endpoint": "198.51.100.11:4000", + "node_id": 11, + "endpoint": "198.51.100.11:4000", "encrypted": false, }, }, diff --git a/cmd/pilotctl/updates.go b/cmd/pilotctl/updates.go index 58aa49f2..0b5426f2 100644 --- a/cmd/pilotctl/updates.go +++ b/cmd/pilotctl/updates.go @@ -25,8 +25,8 @@ var changelogFeedURL = "https://teoslayer.github.io/pilot-changelog/feed.xml" // for the human-readable + JSON output are decoded; unknown elements are // ignored by encoding/xml. type rssDoc struct { - XMLName xml.Name `xml:"rss"` - Channel rssChan `xml:"channel"` + XMLName xml.Name `xml:"rss"` + Channel rssChan `xml:"channel"` } type rssChan struct { diff --git a/pkg/daemon/accept_queue_bug_test.go b/pkg/daemon/accept_queue_bug_test.go index 79ede76e..578d748b 100644 --- a/pkg/daemon/accept_queue_bug_test.go +++ b/pkg/daemon/accept_queue_bug_test.go @@ -17,11 +17,11 @@ import ( // to call Accept and the queue has filled to AcceptQueueLen=64), the // SYN handler at pkg/daemon/daemon.go:1841 currently: // -// 1. Sends the SYN-ACK back to the dialer -// 2. Marks the connection StateEstablished -// 3. Tries to push to AcceptCh -// 4. On full: hits the `default` branch, sends a RST, removes the -// Connection, logs WARN +// 1. Sends the SYN-ACK back to the dialer +// 2. Marks the connection StateEstablished +// 3. Tries to push to AcceptCh +// 4. On full: hits the `default` branch, sends a RST, removes the +// Connection, logs WARN // // The RST is good — peer learns immediately. But: // - No Daemon-level counter is incremented (no AcceptQueueDrops) diff --git a/pkg/daemon/beacon_discovery_test.go b/pkg/daemon/beacon_discovery_test.go index d3b8f01d..f1e2d911 100644 --- a/pkg/daemon/beacon_discovery_test.go +++ b/pkg/daemon/beacon_discovery_test.go @@ -18,7 +18,7 @@ import ( type fakeRegistry struct { mu sync.Mutex beacons []string - failNext int // if >0, the next N Send() calls error + failNext int // if >0, the next N Send() calls error calls atomic.Int64 lastError error } diff --git a/pkg/daemon/beacon_select_test.go b/pkg/daemon/beacon_select_test.go index 7fe86fab..14893cfa 100644 --- a/pkg/daemon/beacon_select_test.go +++ b/pkg/daemon/beacon_select_test.go @@ -156,15 +156,15 @@ func TestPickBeaconStableAcrossSeparateListInstances(t *testing.T) { // are kept because they may resolve to public IPs. func TestFilterUnreachableDropsPrivateAndLoopback(t *testing.T) { in := []string{ - "34.71.57.205:9001", // public — kept - "10.128.0.78:9001", // private RFC1918 — dropped - "192.168.1.5:9001", // private RFC1918 — dropped - "172.16.0.5:9001", // private RFC1918 — dropped - "127.0.0.1:9001", // loopback — dropped - "169.254.1.1:9001", // link-local — dropped - "0.0.0.0:9001", // unspecified — dropped + "34.71.57.205:9001", // public — kept + "10.128.0.78:9001", // private RFC1918 — dropped + "192.168.1.5:9001", // private RFC1918 — dropped + "172.16.0.5:9001", // private RFC1918 — dropped + "127.0.0.1:9001", // loopback — dropped + "169.254.1.1:9001", // link-local — dropped + "0.0.0.0:9001", // unspecified — dropped "beacon.example.com:9001", // DNS hostname — kept - "8.8.8.8:9001", // public — kept + "8.8.8.8:9001", // public — kept } got := filterUnreachable(in) want := []string{ diff --git a/pkg/daemon/ca_growth_abc_cap_bug_test.go b/pkg/daemon/ca_growth_abc_cap_bug_test.go index 0d11aad1..5ad83ea9 100644 --- a/pkg/daemon/ca_growth_abc_cap_bug_test.go +++ b/pkg/daemon/ca_growth_abc_cap_bug_test.go @@ -70,9 +70,9 @@ func TestCAGrowthCapsIncrementAtSMSS(t *testing.T) { // SMSS*SMSS/cwnd = 4096*4096/40960 = 409. // Bug: SMSS*bytes_acked/cwnd = 4096*8192/40960 = 819. const ( - wantIncrement = MaxSegmentSize * MaxSegmentSize / initialCongWin // 409 + wantIncrement = MaxSegmentSize * MaxSegmentSize / initialCongWin // 409 bugIncrement = MaxSegmentSize * (2 * MaxSegmentSize) / initialCongWin // 819 - wantCongWin = initialCongWin + wantIncrement // 41369 + wantCongWin = initialCongWin + wantIncrement // 41369 ) if c.CongWin != wantCongWin { t.Errorf("CA growth with bytes_acked=2*SMSS: CongWin=%d, want %d "+ diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 578abb97..03da8850 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -140,10 +140,10 @@ const ( // Dial and retransmission constants. const ( - DialDirectRetries = 3 // direct connection attempts before relay - DialMaxRetries = 7 // total attempts (direct + relay). 3 direct + 4 relay. With DialInitialRTO=250ms exponential-backoff capped at DialMaxRTO=8s, the relay phase is ~7.75s — covers cold-start handshake (key_exchange + flushPending + SYN/SYN-ACK round trip) for typical peers while keeping bad dials from blocking longer than the user's --timeout. The probe-and-adapt machinery (see srttHistory below) will let us shorten this for peers we've successfully dialed before. - DialInitialRTO = 250 * time.Millisecond // initial SYN retransmission timeout. Lowered from 1s — modern relay RTT is <200ms; waiting a full second before assuming loss makes cold dials feel like a stall. Three direct retries with exponential backoff (250→500→1000) still cover up to 1.75s of jitter before flipping to relay; that's plenty for an unhealthy direct path while letting the common case (peer is reachable, single retry needed) feel snappy. - DialMaxRTO = 8 * time.Second // max backoff for SYN retransmission + DialDirectRetries = 3 // direct connection attempts before relay + DialMaxRetries = 7 // total attempts (direct + relay). 3 direct + 4 relay. With DialInitialRTO=250ms exponential-backoff capped at DialMaxRTO=8s, the relay phase is ~7.75s — covers cold-start handshake (key_exchange + flushPending + SYN/SYN-ACK round trip) for typical peers while keeping bad dials from blocking longer than the user's --timeout. The probe-and-adapt machinery (see srttHistory below) will let us shorten this for peers we've successfully dialed before. + DialInitialRTO = 250 * time.Millisecond // initial SYN retransmission timeout. Lowered from 1s — modern relay RTT is <200ms; waiting a full second before assuming loss makes cold dials feel like a stall. Three direct retries with exponential backoff (250→500→1000) still cover up to 1.75s of jitter before flipping to relay; that's plenty for an unhealthy direct path while letting the common case (peer is reachable, single retry needed) feel snappy. + DialMaxRTO = 8 * time.Second // max backoff for SYN retransmission DialCheckInterval = 10 * time.Millisecond // poll interval for state changes during dial RetxCheckInterval = 100 * time.Millisecond // retransmission check ticker MaxRetxAttempts = 8 // abandon connection after this many retransmissions @@ -3368,8 +3368,8 @@ func (d *Daemon) hostnameCachePath() string { // hostnameCacheDisk is the on-disk format for the hostname cache. type hostnameCacheDisk struct { - SavedAt time.Time `json:"saved_at"` - Hostnames map[string]hostnameCacheDiskEntry `json:"hostnames"` + SavedAt time.Time `json:"saved_at"` + Hostnames map[string]hostnameCacheDiskEntry `json:"hostnames"` } type hostnameCacheDiskEntry struct { diff --git a/pkg/daemon/daemon_ipc_test.go b/pkg/daemon/daemon_ipc_test.go index f8db19cc..4c454ea7 100644 --- a/pkg/daemon/daemon_ipc_test.go +++ b/pkg/daemon/daemon_ipc_test.go @@ -122,7 +122,7 @@ func TestHandleBindDoubleBindSendsError(t *testing.T) { } ic, client := newIPCTestConn(t) reply := runHandler(t, client, func() { s.handleBind(ic, []byte{0x23, 0x28}) }) // port 9000 - assertErrorReply(t, reply, "port") // "already bound" or similar + assertErrorReply(t, reply, "port") // "already bound" or similar } // --- handleDial --- diff --git a/pkg/daemon/daemon_networkipc_test.go b/pkg/daemon/daemon_networkipc_test.go index 3df6227a..fabcaa1b 100644 --- a/pkg/daemon/daemon_networkipc_test.go +++ b/pkg/daemon/daemon_networkipc_test.go @@ -282,7 +282,7 @@ func TestHandleNetworkRespondInviteNoInviteSendsError(t *testing.T) { payload := make([]byte, 4) payload[0] = SubNetworkRespondInvite binary.BigEndian.PutUint16(payload[1:3], 0xBEEF) // non-existent network - payload[3] = 1 // accept=true + payload[3] = 1 // accept=true reply := runHandler(t, client, func() { s.handleNetwork(ic, payload) }) // Either registry rejects or reply is OK — the code path exercises the diff --git a/pkg/daemon/dial_precancelled_ctx_bug_test.go b/pkg/daemon/dial_precancelled_ctx_bug_test.go index af32969f..cd0be38b 100644 --- a/pkg/daemon/dial_precancelled_ctx_bug_test.go +++ b/pkg/daemon/dial_precancelled_ctx_bug_test.go @@ -24,11 +24,11 @@ import ( // request, or an upstream timeout fired during request queueing), // the daemon still: // -// 1. Calls ensureTunnel (potentially up to 30 s blocked on a -// slow registry — see iter 13 audit notes) -// 2. Allocates an ephemeral port -// 3. Creates a Connection in StateSynSent -// 4. Sends a SYN over the tunnel to the peer +// 1. Calls ensureTunnel (potentially up to 30 s blocked on a +// slow registry — see iter 13 audit notes) +// 2. Allocates an ephemeral port +// 3. Creates a Connection in StateSynSent +// 4. Sends a SYN over the tunnel to the peer // // Only AFTER all of that does the for-loop's ctx.Done case fire. // The peer received a phantom SYN they'll respond to (SYN-ACK) diff --git a/pkg/daemon/dup_ack_fresh_recovery_per_ack_inflation_bug_test.go b/pkg/daemon/dup_ack_fresh_recovery_per_ack_inflation_bug_test.go index c36d0a33..925efb2c 100644 --- a/pkg/daemon/dup_ack_fresh_recovery_per_ack_inflation_bug_test.go +++ b/pkg/daemon/dup_ack_fresh_recovery_per_ack_inflation_bug_test.go @@ -71,12 +71,12 @@ func TestFreshFastRecoveryPerDupAckInflation(t *testing.T) { now := time.Now() c.RetxMu.Lock() c.LastAck = seqA - c.DupAckCount = 3 // third dup ACK just fired - c.InRecovery = true // entered by DupAckCount==3 path - c.FastRecovery = true // fast retransmit entered this episode - c.RecoveryPoint = sendSeq // set to sendSeq by DupAckCount==3 path - c.SSThresh = 2 * MaxSegmentSize // halved: max(3*MSS/2, 2*MSS) = 2*MSS - c.CongWin = c.SSThresh + 3*MaxSegmentSize // = 5*MSS = 20480 + c.DupAckCount = 3 // third dup ACK just fired + c.InRecovery = true // entered by DupAckCount==3 path + c.FastRecovery = true // fast retransmit entered this episode + c.RecoveryPoint = sendSeq // set to sendSeq by DupAckCount==3 path + c.SSThresh = 2 * MaxSegmentSize // halved: max(3*MSS/2, 2*MSS) = 2*MSS + c.CongWin = c.SSThresh + 3*MaxSegmentSize // = 5*MSS = 20480 c.Unacked = []*retxEntry{ {seq: seqB, data: make([]byte, MaxSegmentSize), attempts: 2, sentAt: now}, // fast-retransmitted {seq: seqC, data: make([]byte, MaxSegmentSize), attempts: 1, sentAt: now}, diff --git a/pkg/daemon/dup_ack_in_recovery_ssthresh_halving_bug_test.go b/pkg/daemon/dup_ack_in_recovery_ssthresh_halving_bug_test.go index 852b3631..c9b8c534 100644 --- a/pkg/daemon/dup_ack_in_recovery_ssthresh_halving_bug_test.go +++ b/pkg/daemon/dup_ack_in_recovery_ssthresh_halving_bug_test.go @@ -57,10 +57,10 @@ func TestDupAckFastRetransmitInRecoveryDoesNotRehalveSSThresh(t *testing.T) { // SSThresh halved once, CongWin = InitialCongWin, InRecovery = true. conn.RetxMu.Lock() conn.LastAck = seqA - conn.CongWin = InitialCongWin // timeout set this (10*MSS) - conn.SSThresh = 10 * MaxSegmentSize // timeout halved from 20*MSS → 10*MSS - conn.DupAckCount = 0 // timeout reset this (iter-51) - conn.InRecovery = true // timeout set this + conn.CongWin = InitialCongWin // timeout set this (10*MSS) + conn.SSThresh = 10 * MaxSegmentSize // timeout halved from 20*MSS → 10*MSS + conn.DupAckCount = 0 // timeout reset this (iter-51) + conn.InRecovery = true // timeout set this conn.RecoveryPoint = seqA + MaxSegmentSize conn.RTO = InitialRTO conn.Unacked = []*retxEntry{{ diff --git a/pkg/daemon/dup_ack_in_timeout_recovery_additional_inflate_bug_test.go b/pkg/daemon/dup_ack_in_timeout_recovery_additional_inflate_bug_test.go index 55a2f3ba..14e263ce 100644 --- a/pkg/daemon/dup_ack_in_timeout_recovery_additional_inflate_bug_test.go +++ b/pkg/daemon/dup_ack_in_timeout_recovery_additional_inflate_bug_test.go @@ -58,7 +58,7 @@ func TestAdditionalDupAckInTimeoutRecoveryDoesNotInflateConn(t *testing.T) { conn.RetxStop = make(chan struct{}) const ( - seqA = uint32(1000) + seqA = uint32(1000) ssthreshAfterTimeout = 5 * MaxSegmentSize ) @@ -70,7 +70,7 @@ func TestAdditionalDupAckInTimeoutRecoveryDoesNotInflateConn(t *testing.T) { conn.RetxMu.Lock() conn.LastAck = seqA - conn.CongWin = MaxSegmentSize // post-timeout: 1 SMSS + conn.CongWin = MaxSegmentSize // post-timeout: 1 SMSS conn.SSThresh = ssthreshAfterTimeout conn.DupAckCount = 0 conn.InRecovery = true diff --git a/pkg/daemon/dup_ack_in_timeout_recovery_cwnd_reinflation_bug_test.go b/pkg/daemon/dup_ack_in_timeout_recovery_cwnd_reinflation_bug_test.go index 8f74100a..461b8576 100644 --- a/pkg/daemon/dup_ack_in_timeout_recovery_cwnd_reinflation_bug_test.go +++ b/pkg/daemon/dup_ack_in_timeout_recovery_cwnd_reinflation_bug_test.go @@ -69,7 +69,7 @@ func TestDupAckInTimeoutRecoveryDoesNotReinflateConn(t *testing.T) { conn.RetxStop = make(chan struct{}) const ( - seqA = uint32(1000) + seqA = uint32(1000) ssthreshAfterTimeout = 5 * MaxSegmentSize // 20480 — timeout halved from 10*MSS ) @@ -82,11 +82,11 @@ func TestDupAckInTimeoutRecoveryDoesNotReinflateConn(t *testing.T) { conn.RetxMu.Lock() conn.LastAck = seqA - conn.CongWin = MaxSegmentSize // RFC 5681 §3.1: post-timeout cwnd = 1 SMSS + conn.CongWin = MaxSegmentSize // RFC 5681 §3.1: post-timeout cwnd = 1 SMSS conn.SSThresh = ssthreshAfterTimeout conn.DupAckCount = 0 - conn.InRecovery = true // set by the retransmission timeout - conn.FastRecovery = false // cleared by retransmitUnacked + conn.InRecovery = true // set by the retransmission timeout + conn.FastRecovery = false // cleared by retransmitUnacked conn.RecoveryPoint = seqA + MaxSegmentSize // = SendSeq (no new data) conn.RTO = InitialRTO conn.Unacked = []*retxEntry{{ diff --git a/pkg/daemon/dup_ack_new_episode_in_recovery_ssthresh_bug_test.go b/pkg/daemon/dup_ack_new_episode_in_recovery_ssthresh_bug_test.go index a471c81f..7826696a 100644 --- a/pkg/daemon/dup_ack_new_episode_in_recovery_ssthresh_bug_test.go +++ b/pkg/daemon/dup_ack_new_episode_in_recovery_ssthresh_bug_test.go @@ -76,7 +76,7 @@ func TestNewEpisodeDupAcksInRecoveryHalveSSThresh(t *testing.T) { const ( seqA = uint32(1000) - seqB = seqA + MaxSegmentSize // 5096 — timeout-retransmitted, in Unacked + seqB = seqA + MaxSegmentSize // 5096 — timeout-retransmitted, in Unacked seqD = seqA + 2*MaxSegmentSize // 9192 — RecoveryPoint from timeout seqE = seqA + 3*MaxSegmentSize // 13288 — new data, in Unacked ssthreshAfterTimeout = 5 * MaxSegmentSize // 20480 — halved from 10*MSS by timeout @@ -92,12 +92,12 @@ func TestNewEpisodeDupAcksInRecoveryHalveSSThresh(t *testing.T) { conn.RetxMu.Lock() conn.LastAck = seqA - conn.CongWin = MaxSegmentSize // RFC 5681 §3.1: post-timeout cwnd = 1 SMSS + conn.CongWin = MaxSegmentSize // RFC 5681 §3.1: post-timeout cwnd = 1 SMSS conn.SSThresh = ssthreshAfterTimeout // 5*MSS, set by the retransmission timeout conn.DupAckCount = 0 - conn.InRecovery = true // timeout set InRecovery=true - conn.FastRecovery = false // cleared by timeout - conn.RecoveryPoint = seqD // timeout's recovery window ends at seqD + conn.InRecovery = true // timeout set InRecovery=true + conn.FastRecovery = false // cleared by timeout + conn.RecoveryPoint = seqD // timeout's recovery window ends at seqD conn.RTO = InitialRTO now := time.Now() conn.Unacked = []*retxEntry{ diff --git a/pkg/daemon/dup_ack_timeout_recovery_fast_recovery_flag_bug_test.go b/pkg/daemon/dup_ack_timeout_recovery_fast_recovery_flag_bug_test.go index 7e7418d9..1d6151f4 100644 --- a/pkg/daemon/dup_ack_timeout_recovery_fast_recovery_flag_bug_test.go +++ b/pkg/daemon/dup_ack_timeout_recovery_fast_recovery_flag_bug_test.go @@ -49,9 +49,9 @@ import ( // (same-episode dup ACKs must not set FastRecovery). // // 2. Narrow the step-6 guard from -// (oldDupAckCount >= 3 || wasFastRecovery) && wasInRecovery +// (oldDupAckCount >= 3 || wasFastRecovery) && wasInRecovery // to -// wasFastRecovery && wasInRecovery +// wasFastRecovery && wasInRecovery // so that step 6 only fires when FastRecovery was explicitly set // (i.e. recovery was entered via fast retransmit for a new episode). // This prevents leftover DupAckCount >= 3 from triggering spurious @@ -70,10 +70,10 @@ func TestSameEpisodeDupAcksDoNotSetFastRecoveryFlag(t *testing.T) { const ( seqA = uint32(1000) - seqB = seqA + MaxSegmentSize // 5096 — SACKED by receiver - seqC = seqB + MaxSegmentSize // 9192 — still outstanding - seqD = seqC + MaxSegmentSize // 13288 — RecoveryPoint - ssthreshAfterTimeout = 5 * MaxSegmentSize // 20480 + seqB = seqA + MaxSegmentSize // 5096 — SACKED by receiver + seqC = seqB + MaxSegmentSize // 9192 — still outstanding + seqD = seqC + MaxSegmentSize // 13288 — RecoveryPoint + ssthreshAfterTimeout = 5 * MaxSegmentSize // 20480 ) conn.Mu.Lock() @@ -85,12 +85,12 @@ func TestSameEpisodeDupAcksDoNotSetFastRecoveryFlag(t *testing.T) { conn.RetxMu.Lock() conn.LastAck = seqA - conn.CongWin = MaxSegmentSize // RFC 5681 §3.1: post-timeout cwnd = 1 SMSS + conn.CongWin = MaxSegmentSize // RFC 5681 §3.1: post-timeout cwnd = 1 SMSS conn.SSThresh = ssthreshAfterTimeout conn.DupAckCount = 0 - conn.InRecovery = true // set by the retransmission timeout - conn.FastRecovery = false // cleared by retransmitUnacked - conn.RecoveryPoint = seqD // = SendSeq (no new data) + conn.InRecovery = true // set by the retransmission timeout + conn.FastRecovery = false // cleared by retransmitUnacked + conn.RecoveryPoint = seqD // = SendSeq (no new data) conn.RTO = InitialRTO now := time.Now() conn.Unacked = []*retxEntry{ diff --git a/pkg/daemon/fast_recovery_cwnd_inflation_windowch_bug_test.go b/pkg/daemon/fast_recovery_cwnd_inflation_windowch_bug_test.go index d1752124..d4274331 100644 --- a/pkg/daemon/fast_recovery_cwnd_inflation_windowch_bug_test.go +++ b/pkg/daemon/fast_recovery_cwnd_inflation_windowch_bug_test.go @@ -38,13 +38,14 @@ import ( // the congestion-window-full state. // // Concrete example (8 segments in flight, MaxSegmentSize = 4096 bytes): -// initial: CongWin = 8*MSS = 32768, BytesInFlight = 32768 -// 3rd dup-ACK (fast retransmit): -// SSThresh = 16384, CongWin = 16384+3*4096 = 28672 -// WindowAvailable = (32768 < 28672) = false -// 4th dup-ACK: CongWin = 32768. WindowAvailable = (32768 < 32768) = false -// 5th dup-ACK: CongWin = 36864. WindowAvailable = (32768 < 36864) = true -// → window opened but WindowCh NOT signaled (bug) +// +// initial: CongWin = 8*MSS = 32768, BytesInFlight = 32768 +// 3rd dup-ACK (fast retransmit): +// SSThresh = 16384, CongWin = 16384+3*4096 = 28672 +// WindowAvailable = (32768 < 28672) = false +// 4th dup-ACK: CongWin = 32768. WindowAvailable = (32768 < 32768) = false +// 5th dup-ACK: CongWin = 36864. WindowAvailable = (32768 < 36864) = true +// → window opened but WindowCh NOT signaled (bug) // // GREEN assertion: after the 5th dup-ACK, WindowCh has a token because the // window became available. Against unpatched code WindowCh is empty and a @@ -59,8 +60,8 @@ func TestFastRecoveryExtraACKInflationSignalsWindowCh(t *testing.T) { // 8 in-flight segments, window exactly full. conn.RetxMu.Lock() conn.LastAck = 1000 - conn.CongWin = numSegs * MaxSegmentSize // 32768 - conn.SSThresh = conn.CongWin // high enough to be in AIMD territory + conn.CongWin = numSegs * MaxSegmentSize // 32768 + conn.SSThresh = conn.CongWin // high enough to be in AIMD territory for i := 0; i < numSegs; i++ { conn.Unacked = append(conn.Unacked, &retxEntry{ seq: uint32(1000 + i*MaxSegmentSize), diff --git a/pkg/daemon/fast_recovery_exit_cwnd_bug_test.go b/pkg/daemon/fast_recovery_exit_cwnd_bug_test.go index 5ce56748..a82d9cb5 100644 --- a/pkg/daemon/fast_recovery_exit_cwnd_bug_test.go +++ b/pkg/daemon/fast_recovery_exit_cwnd_bug_test.go @@ -62,7 +62,7 @@ func TestFastRecoveryExitDeflatesCongWin(t *testing.T) { // iter-59 gates the deflation on wasInRecovery; without this field the // test setup misrepresents the state and the deflation would not fire. conn.InRecovery = true - conn.FastRecovery = true // fast retransmit entered recovery (new episode, not timeout) + conn.FastRecovery = true // fast retransmit entered recovery (new episode, not timeout) conn.RecoveryPoint = 1000 + MaxSegmentSize // new ACK will reach this and clear it // Put one unacked entry so ProcessAck has something to remove when ack > LastAck conn.Unacked = []*retxEntry{ diff --git a/pkg/daemon/fast_recovery_exit_deflation_noop_bug_test.go b/pkg/daemon/fast_recovery_exit_deflation_noop_bug_test.go index 030405eb..cd736568 100644 --- a/pkg/daemon/fast_recovery_exit_deflation_noop_bug_test.go +++ b/pkg/daemon/fast_recovery_exit_deflation_noop_bug_test.go @@ -64,7 +64,7 @@ func TestFastRecoveryExitDeflationRequiresActualRecovery(t *testing.T) { conn.RetxMu.Lock() conn.LastAck = seqA - conn.CongWin = InitialCongWin // 10*MSS + conn.CongWin = InitialCongWin // 10*MSS conn.SSThresh = initialSSThresh // 20*MSS (intentionally > InitialCongWin) conn.InRecovery = false conn.DupAckCount = 0 diff --git a/pkg/daemon/fast_recovery_partial_ack_aimd_inflation_bug_test.go b/pkg/daemon/fast_recovery_partial_ack_aimd_inflation_bug_test.go index dffd7651..5649226d 100644 --- a/pkg/daemon/fast_recovery_partial_ack_aimd_inflation_bug_test.go +++ b/pkg/daemon/fast_recovery_partial_ack_aimd_inflation_bug_test.go @@ -71,8 +71,8 @@ func TestFastRecoveryPartialAckNoAIMDInflation(t *testing.T) { c.InRecovery = true c.FastRecovery = true c.RecoveryPoint = recoveryPoint - c.SSThresh = 2 * MaxSegmentSize // 8192 - c.CongWin = c.SSThresh + 3*MaxSegmentSize // 5*MSS = 20480 + c.SSThresh = 2 * MaxSegmentSize // 8192 + c.CongWin = c.SSThresh + 3*MaxSegmentSize // 5*MSS = 20480 now := time.Now() c.Unacked = []*retxEntry{ diff --git a/pkg/daemon/fast_recovery_partial_ack_bug_test.go b/pkg/daemon/fast_recovery_partial_ack_bug_test.go index aba5456f..c09cc685 100644 --- a/pkg/daemon/fast_recovery_partial_ack_bug_test.go +++ b/pkg/daemon/fast_recovery_partial_ack_bug_test.go @@ -77,18 +77,18 @@ func TestPartialAckInFastRecoveryDoesNotDeflateToSSThresh(t *testing.T) { c := newAckTestConn(t) const ( - ssthresh = 5 * MaxSegmentSize // 20480 - fastRecoveryCongWin = ssthresh + 3*MaxSegmentSize // 32768 — entered fast recovery - seqA = uint32(1000) - seqB = seqA + MaxSegmentSize // 5096 - seqC = seqB + MaxSegmentSize // 9192 — RecoveryPoint (beyond partial ACK) + ssthresh = 5 * MaxSegmentSize // 20480 + fastRecoveryCongWin = ssthresh + 3*MaxSegmentSize // 32768 — entered fast recovery + seqA = uint32(1000) + seqB = seqA + MaxSegmentSize // 5096 + seqC = seqB + MaxSegmentSize // 9192 — RecoveryPoint (beyond partial ACK) ) c.LastAck = seqA c.SSThresh = ssthresh c.CongWin = fastRecoveryCongWin - c.DupAckCount = 3 // just triggered fast retransmit - c.InRecovery = true // in fast recovery + c.DupAckCount = 3 // just triggered fast retransmit + c.InRecovery = true // in fast recovery c.RecoveryPoint = seqC c.Unacked = []*retxEntry{ // seqA: already retransmitted (attempts=2), will be acked by partial ACK diff --git a/pkg/daemon/fast_recovery_post_partial_ack_dup_inflation_bug_test.go b/pkg/daemon/fast_recovery_post_partial_ack_dup_inflation_bug_test.go index f03339a0..6a6a2d8f 100644 --- a/pkg/daemon/fast_recovery_post_partial_ack_dup_inflation_bug_test.go +++ b/pkg/daemon/fast_recovery_post_partial_ack_dup_inflation_bug_test.go @@ -71,13 +71,13 @@ func TestFastRecoveryPostPartialAckDupAckInflation(t *testing.T) { // seqA was cumulatively acked and removed from Unacked. // DupAckCount reset to 0 by the new-ACK path in ProcessAck. // CongWin: deflated by MSS, add-back MSS (bytesAcked==SMSS, neutral) → 5*MSS. - c.LastAck = seqB // partial ACK advanced LastAck to seqB - c.DupAckCount = 0 // reset by the new-ACK path + c.LastAck = seqB // partial ACK advanced LastAck to seqB + c.DupAckCount = 0 // reset by the new-ACK path c.InRecovery = true c.FastRecovery = true c.RecoveryPoint = recoveryPoint - c.SSThresh = 2 * MaxSegmentSize // 8192 - c.CongWin = c.SSThresh + 3*MaxSegmentSize // 5*MSS = 20480 + c.SSThresh = 2 * MaxSegmentSize // 8192 + c.CongWin = c.SSThresh + 3*MaxSegmentSize // 5*MSS = 20480 now := time.Now() c.Unacked = []*retxEntry{ diff --git a/pkg/daemon/fast_recovery_third_dup_ack_same_episode_inflation_bug_test.go b/pkg/daemon/fast_recovery_third_dup_ack_same_episode_inflation_bug_test.go index 3550bde6..9ea0766a 100644 --- a/pkg/daemon/fast_recovery_third_dup_ack_same_episode_inflation_bug_test.go +++ b/pkg/daemon/fast_recovery_third_dup_ack_same_episode_inflation_bug_test.go @@ -79,12 +79,12 @@ func TestFastRecoveryThirdDupAckSameEpisodeInflation(t *testing.T) { // State after a partial ACK reset DupAckCount to 0 and 2 subsequent dup // ACKs have already inflated CongWin by 2*MSS (iter-82 fix): // DupAckCount=2, CongWin = SSThresh + 3*MSS + 2*MSS = 7*MSS = 28672 - c.LastAck = seqB // partial ACK set LastAck to seqB; 3rd dup ACK repeats it - c.DupAckCount = 2 // two dup ACKs have already fired since the reset + c.LastAck = seqB // partial ACK set LastAck to seqB; 3rd dup ACK repeats it + c.DupAckCount = 2 // two dup ACKs have already fired since the reset c.InRecovery = true c.FastRecovery = true c.RecoveryPoint = recoveryPoint - c.SSThresh = 2 * MaxSegmentSize // 8192 + c.SSThresh = 2 * MaxSegmentSize // 8192 c.CongWin = c.SSThresh + 3*MaxSegmentSize + 2*MaxSegmentSize // 7*MSS = 28672 now := time.Now() diff --git a/pkg/daemon/fast_retransmit_entry_windowch_bug_test.go b/pkg/daemon/fast_retransmit_entry_windowch_bug_test.go index 56081cca..e7b7599b 100644 --- a/pkg/daemon/fast_retransmit_entry_windowch_bug_test.go +++ b/pkg/daemon/fast_retransmit_entry_windowch_bug_test.go @@ -24,12 +24,13 @@ import ( // that was blocked by a full window may now have room — but is not told. // // Concrete example (window = 2 segments): -// initial: CongWin = 2*MSS = 8192, BytesInFlight = 8192 (window full) -// 3rd dup-ACK (fast retransmit): -// SSThresh = max(8192/2, MSS) = 4096 -// CongWin = 4096 + 3*4096 = 16384 -// WindowAvailable = (8192 < 16384) = true -// → window opened but WindowCh NOT signaled (bug) +// +// initial: CongWin = 2*MSS = 8192, BytesInFlight = 8192 (window full) +// 3rd dup-ACK (fast retransmit): +// SSThresh = max(8192/2, MSS) = 4096 +// CongWin = 4096 + 3*4096 = 16384 +// WindowAvailable = (8192 < 16384) = true +// → window opened but WindowCh NOT signaled (bug) // // This differs from the iter-39 bug (which fixed the DupAckCount>3 path): // the DupAckCount==3 path on entry to fast recovery also inflates CongWin for @@ -48,7 +49,7 @@ func TestFastRetransmitEntryInflatesWindowAndSignalsWindowCh(t *testing.T) { // 2 in-flight segments filling a 2-segment congestion window. conn.RetxMu.Lock() conn.LastAck = 1000 - conn.CongWin = numSegs * MaxSegmentSize // 8192 bytes + conn.CongWin = numSegs * MaxSegmentSize // 8192 bytes conn.SSThresh = 4 * conn.CongWin // high: won't constrain for i := 0; i < numSegs; i++ { conn.Unacked = append(conn.Unacked, &retxEntry{ diff --git a/pkg/daemon/fast_retransmit_max_attempts_bug_test.go b/pkg/daemon/fast_retransmit_max_attempts_bug_test.go index f5ef084d..8bb12f26 100644 --- a/pkg/daemon/fast_retransmit_max_attempts_bug_test.go +++ b/pkg/daemon/fast_retransmit_max_attempts_bug_test.go @@ -67,7 +67,7 @@ func TestFastRetransmitStopsAtMaxAttempts(t *testing.T) { conn.Unacked = []*retxEntry{{ seq: seqA, data: make([]byte, MaxSegmentSize), - attempts: MaxRetxAttempts, // already at the limit + attempts: MaxRetxAttempts, // already at the limit sentAt: time.Now().Add(-100 * time.Millisecond), // not past RTO sacked: false, }} diff --git a/pkg/daemon/fast_retransmit_noop_congestion_state_bug_test.go b/pkg/daemon/fast_retransmit_noop_congestion_state_bug_test.go index cbf3c337..abef6ef3 100644 --- a/pkg/daemon/fast_retransmit_noop_congestion_state_bug_test.go +++ b/pkg/daemon/fast_retransmit_noop_congestion_state_bug_test.go @@ -135,8 +135,8 @@ func TestFastRetransmitNoopDoesNotAdjustCongestionState(t *testing.T) { // InRecovery must remain false: no phantom recovery entry. // Bug: 'c.InRecovery = true' fires unconditionally when !c.InRecovery. if inRecovery { - t.Errorf("InRecovery=true after no-op fastRetransmit, want false; "+ - "phantom InRecovery was set even though no packet was sent; "+ + t.Errorf("InRecovery=true after no-op fastRetransmit, want false; " + + "phantom InRecovery was set even though no packet was sent; " + "fix: gate 'c.InRecovery = true' on fastRetransmit returning true", ) } diff --git a/pkg/daemon/ipc_async_write_test.go b/pkg/daemon/ipc_async_write_test.go index 7318c3f4..ff4604e2 100644 --- a/pkg/daemon/ipc_async_write_test.go +++ b/pkg/daemon/ipc_async_write_test.go @@ -129,7 +129,7 @@ func TestIPCConnAsyncWriteBackpressure(t *testing.T) { // Write since nothing reads. So we expect roughly buffer+1 to succeed // before backpressure. var ( - successes atomic.Int64 + successes atomic.Int64 gotPressure atomic.Bool ) deadline := time.After(3 * time.Second) diff --git a/pkg/daemon/ipc_dialcancel_leak_bug_test.go b/pkg/daemon/ipc_dialcancel_leak_bug_test.go index b68cd354..25eb7eba 100644 --- a/pkg/daemon/ipc_dialcancel_leak_bug_test.go +++ b/pkg/daemon/ipc_dialcancel_leak_bug_test.go @@ -40,8 +40,8 @@ func TestIPCDialCancelsLeakOnCompletedDials(t *testing.T) { for i := 0; i < N; i++ { _, cancel := context.WithCancel(context.Background()) id := ic.addDialCancel(cancel) - cancel() // simulate defer dialCancel() - ic.removeDialCancel(id) // v1.9.1 fix: remove after dial completes + cancel() // simulate defer dialCancel() + ic.removeDialCancel(id) // v1.9.1 fix: remove after dial completes } got := ic.dialCancelCount() diff --git a/pkg/daemon/keepalive_zero_window_probe_bug_test.go b/pkg/daemon/keepalive_zero_window_probe_bug_test.go index 63f227ef..3fb23ae0 100644 --- a/pkg/daemon/keepalive_zero_window_probe_bug_test.go +++ b/pkg/daemon/keepalive_zero_window_probe_bug_test.go @@ -73,7 +73,7 @@ func TestKeepaliveProbeWithWindowDoesNotStallPeer(t *testing.T) { SrcPort: remotePort, DstPort: localPort, Seq: 200, - Ack: 100, // == conn.LastAck (dup-ACK path, keepalive-like) + Ack: 100, // == conn.LastAck (dup-ACK path, keepalive-like) Window: senderRecvWin, // fixed: sender includes its recv window } @@ -146,7 +146,7 @@ func TestKeepaliveZeroWindowProbeStallsMechanism(t *testing.T) { "(unexpected; receiver should always update PeerRecvWin from pkt.Window)", peerRecvWin, avail) } else { - t.Logf("confirmed: zero-window probe sets PeerRecvWin=0, avail=false — "+ + t.Logf("confirmed: zero-window probe sets PeerRecvWin=0, avail=false — " + "500ms stall mechanism documented; fix: idleSweepLoop must include Window: conn.RecvWindow()") } } diff --git a/pkg/daemon/listener_closed_channel_bug_test.go b/pkg/daemon/listener_closed_channel_bug_test.go index f2d01874..804aff9e 100644 --- a/pkg/daemon/listener_closed_channel_bug_test.go +++ b/pkg/daemon/listener_closed_channel_bug_test.go @@ -51,7 +51,7 @@ func TestListenerSendAfterUnbindSafe(t *testing.T) { go func() { defer func() { if r := recover(); r != nil { - panicked <- true // panic → bug present + panicked <- true // panic → bug present } else { panicked <- false // clean → fix applied } diff --git a/pkg/daemon/nagle_all_sacked_hasunacked_bug_test.go b/pkg/daemon/nagle_all_sacked_hasunacked_bug_test.go index e087523b..09704f45 100644 --- a/pkg/daemon/nagle_all_sacked_hasunacked_bug_test.go +++ b/pkg/daemon/nagle_all_sacked_hasunacked_bug_test.go @@ -76,7 +76,7 @@ func TestNagleFlushAllSackedSendsImmediately(t *testing.T) { conn.RetxMu.Lock() conn.LastAck = 3000 - conn.CongWin = 16 * MaxSegmentSize // large — window is open + conn.CongWin = 16 * MaxSegmentSize // large — window is open conn.SSThresh = conn.CongWin conn.PeerRecvWin = 16 * MaxSegmentSize // 3 entries, all sacked — peer has every byte. diff --git a/pkg/daemon/peer_recv_win_growth_windowch_bug_test.go b/pkg/daemon/peer_recv_win_growth_windowch_bug_test.go index c86f372b..9e96480d 100644 --- a/pkg/daemon/peer_recv_win_growth_windowch_bug_test.go +++ b/pkg/daemon/peer_recv_win_growth_windowch_bug_test.go @@ -93,8 +93,8 @@ func TestPeerRecvWinGrowthSignalsWindowCh(t *testing.T) { SrcPort: remotePort, DstPort: localPort, Seq: 200, - Ack: 500, // == conn.LastAck → dup-ACK (no new data acked) - Window: 2, // 2 segments → PeerRecvWin = 2*MSS + Ack: 500, // == conn.LastAck → dup-ACK (no new data acked) + Window: 2, // 2 segments → PeerRecvWin = 2*MSS } d.handleStreamPacket(windowUpdate) diff --git a/pkg/daemon/policy_runner.go b/pkg/daemon/policy_runner.go index fe5f269d..284c616f 100644 --- a/pkg/daemon/policy_runner.go +++ b/pkg/daemon/policy_runner.go @@ -25,8 +25,8 @@ type PolicyRunner struct { compiled *policy.CompiledPolicy daemon *Daemon - mu sync.RWMutex - peers map[uint32]*managedPeer // reuse managedPeer from managed.go + mu sync.RWMutex + peers map[uint32]*managedPeer // reuse managedPeer from managed.go // Peers that local evict / deny decisions removed from pr.peers. // Reconciler's applyMembershipDiff refuses to re-add entries during // the cooldown window — otherwise the next reconcile tick (5s) @@ -54,9 +54,9 @@ type PolicyRunner struct { // of latency to ANY other call (resolve_hostname, lookup, etc) that // shares regConn. Track consecutive failures and skip ticks until // the next backoff deadline. - fetchFailMu sync.Mutex - fetchFailures int // consecutive failure count - fetchSkipUntil time.Time // skip ticks before this time + fetchFailMu sync.Mutex + fetchFailures int // consecutive failure count + fetchSkipUntil time.Time // skip ticks before this time } // policySnapshot is the JSON format persisted to disk. diff --git a/pkg/daemon/ports.go b/pkg/daemon/ports.go index a2d9b3f1..1cb189fe 100644 --- a/pkg/daemon/ports.go +++ b/pkg/daemon/ports.go @@ -191,8 +191,8 @@ type Connection struct { // stale `SendSeq-1` that has drifted forward once data flowed. SynAckSeq uint32 SynAckSeqSet bool - SendBuf chan []byte - RecvBuf chan []byte + SendBuf chan []byte + RecvBuf chan []byte // Sliding window + retransmission (send side) RetxMu sync.Mutex Unacked []*retxEntry // ordered by seq @@ -1111,10 +1111,11 @@ func seqAfterOrEqual(a, b uint32) bool { // ACK number (next expected seq). // // Three-phase design to avoid both deadlock and sequence leaks: -// Phase 1: Collect segments to deliver under RecvMu (don't advance ExpectedSeq). -// Phase 2: Deliver outside lock (prevents routeLoop deadlock, C1 fix). -// Phase 3: Re-acquire lock, advance ExpectedSeq only for delivered segments, -// re-buffer undelivered OOO segments. +// +// Phase 1: Collect segments to deliver under RecvMu (don't advance ExpectedSeq). +// Phase 2: Deliver outside lock (prevents routeLoop deadlock, C1 fix). +// Phase 3: Re-acquire lock, advance ExpectedSeq only for delivered segments, +// re-buffer undelivered OOO segments. // // Safe because routeLoop is single-goroutine — no concurrent DeliverInOrder // calls for the same connection between Phase 2 and Phase 3. diff --git a/pkg/daemon/ports_logic_test.go b/pkg/daemon/ports_logic_test.go index 1896b46d..8952d9fe 100644 --- a/pkg/daemon/ports_logic_test.go +++ b/pkg/daemon/ports_logic_test.go @@ -464,7 +464,7 @@ func TestProcessAckThirdDupACKTriggersFastRetransmit(t *testing.T) { func TestProcessAckGrowsCongWinInSlowStart(t *testing.T) { c := newAckTestConn(t) c.LastAck = 1000 - c.CongWin = 4000 // < SSThresh → slow start + c.CongWin = 4000 // < SSThresh → slow start c.SSThresh = 50000 c.Unacked = []*retxEntry{{seq: 1000, data: make([]byte, 1000), attempts: 1, sentAt: time.Now()}} diff --git a/pkg/daemon/process_ack_resets_sack_state_bug_test.go b/pkg/daemon/process_ack_resets_sack_state_bug_test.go index 0ede5906..8577638f 100644 --- a/pkg/daemon/process_ack_resets_sack_state_bug_test.go +++ b/pkg/daemon/process_ack_resets_sack_state_bug_test.go @@ -52,7 +52,7 @@ func TestProcessAckPartialDoesNotResetSACKedState(t *testing.T) { // Three entries: A is in-flight (unsacked), B and C are at the peer (sacked). const ( seqA = uint32(1000) - seqB = uint32(1000 + MaxSegmentSize) // = 5096 + seqB = uint32(1000 + MaxSegmentSize) // = 5096 seqC = uint32(1000 + 2*MaxSegmentSize) // = 9192 ) @@ -104,15 +104,15 @@ func TestProcessAckPartialDoesNotResetSACKedState(t *testing.T) { // B must retain sacked=true — the peer confirmed receiving it via SACK; // a partial ACK that doesn't cover B must not discard that information. if !bSacked { - t.Errorf("ProcessAck partial ACK for A: B.sacked=false, want true; "+ - "RFC 2018 §5 requires retaining SACK state above cumulative ACK; "+ + t.Errorf("ProcessAck partial ACK for A: B.sacked=false, want true; " + + "RFC 2018 §5 requires retaining SACK state above cumulative ACK; " + "fix: remove 'e.sacked = false' from the remaining-entries loop in ProcessAck", ) } // C must retain sacked=true for the same reason. if !cSacked { - t.Errorf("ProcessAck partial ACK for A: C.sacked=false, want true; "+ + t.Errorf("ProcessAck partial ACK for A: C.sacked=false, want true; " + "RFC 2018 §5 requires retaining SACK state above cumulative ACK", ) } diff --git a/pkg/daemon/process_sack_wraparound_bug_test.go b/pkg/daemon/process_sack_wraparound_bug_test.go index 51c54a0b..bb6fb526 100644 --- a/pkg/daemon/process_sack_wraparound_bug_test.go +++ b/pkg/daemon/process_sack_wraparound_bug_test.go @@ -77,15 +77,15 @@ func TestProcessSACKWraparound(t *testing.T) { // FIXED: use seqAfterOrEqual for both sides of the containment check. if !sackedA { - t.Errorf("segment A (seq=0xFFFFF000, segEnd=0xFFFFF010) not sacked "+ - "by block [0xFFFFF000, 0x00001000] — ProcessSACK raw 'segEnd <= Right' "+ - "comparison fails at uint32 wraparound (0xFFFFF010 <= 0x00001000 = false); "+ + t.Errorf("segment A (seq=0xFFFFF000, segEnd=0xFFFFF010) not sacked " + + "by block [0xFFFFF000, 0x00001000] — ProcessSACK raw 'segEnd <= Right' " + + "comparison fails at uint32 wraparound (0xFFFFF010 <= 0x00001000 = false); " + "fix: use seqAfterOrEqual(b.Right, segEnd)") } if !sackedB { - t.Errorf("segment B (seq=0x00000010, segEnd=0x00000020) not sacked "+ - "by block [0xFFFFF000, 0x00001000] — ProcessSACK raw 'e.seq >= Left' "+ - "comparison fails at uint32 wraparound (0x00000010 >= 0xFFFFF000 = false); "+ + t.Errorf("segment B (seq=0x00000010, segEnd=0x00000020) not sacked " + + "by block [0xFFFFF000, 0x00001000] — ProcessSACK raw 'e.seq >= Left' " + + "comparison fails at uint32 wraparound (0x00000010 >= 0xFFFFF000 = false); " + "fix: use seqAfterOrEqual(e.seq, b.Left)") } } diff --git a/pkg/daemon/remove_peer_leak_bug_test.go b/pkg/daemon/remove_peer_leak_bug_test.go index d74f1f22..c7c946c9 100644 --- a/pkg/daemon/remove_peer_leak_bug_test.go +++ b/pkg/daemon/remove_peer_leak_bug_test.go @@ -13,15 +13,15 @@ import ( // // Symptom: TunnelManager has eight per-peer maps that get populated // during the lifetime of a peer relationship: -// 1. peers — populated on AddPeer/handleEncrypted/key-exchange -// 2. crypto — populated on key-exchange paths -// 3. lastOutboundSend — populated on every writeFrame success (iter 7) -// 4. sendErrCount — populated on ICMP-unreachable errors (iter 8) -// 5. lastDirectRecv — populated on every authenticated decrypt (iter 5/3) -// 6. blackholeMissCount — populated by writeFrame's hysteresis (iter 3) -// 7. directClearCount — populated by clearRelayOnDirectLocked (iter 3) -// 8. relayPeers — populated by relay flip / SetRelayPeer / iter 8 -// 9. peerPubKeys — populated on auth key-exchange +// 1. peers — populated on AddPeer/handleEncrypted/key-exchange +// 2. crypto — populated on key-exchange paths +// 3. lastOutboundSend — populated on every writeFrame success (iter 7) +// 4. sendErrCount — populated on ICMP-unreachable errors (iter 8) +// 5. lastDirectRecv — populated on every authenticated decrypt (iter 5/3) +// 6. blackholeMissCount — populated by writeFrame's hysteresis (iter 3) +// 7. directClearCount — populated by clearRelayOnDirectLocked (iter 3) +// 8. relayPeers — populated by relay flip / SetRelayPeer / iter 8 +// 9. peerPubKeys — populated on auth key-exchange // 10. pendingRekey — populated by markPendingRekey (rkPendingMu) // 11. lastInboundDecrypt — populated by recordInboundDecrypt (rkPendingMu) // diff --git a/pkg/daemon/retransmit_timeout_ssthresh_flightsize_bug_test.go b/pkg/daemon/retransmit_timeout_ssthresh_flightsize_bug_test.go index 65a86076..34040238 100644 --- a/pkg/daemon/retransmit_timeout_ssthresh_flightsize_bug_test.go +++ b/pkg/daemon/retransmit_timeout_ssthresh_flightsize_bug_test.go @@ -120,9 +120,9 @@ func TestTimeoutSSThreshUsesFlightSizeNotCongWin(t *testing.T) { // Bug: ssthresh = CongWin/2 = 40960/2 = 20480 — uses window capacity, not // actual bytes outstanding. const ( - flightSizeBytes = MaxSegmentSize // 4096 — only entry in Unacked - wantSSThresh = 2 * MaxSegmentSize // max(4096/2=2048, 2*MSS=8192) - badSSThresh = initialCongWin / 2 // 20480 — CongWin/2 (wrong) + flightSizeBytes = MaxSegmentSize // 4096 — only entry in Unacked + wantSSThresh = 2 * MaxSegmentSize // max(4096/2=2048, 2*MSS=8192) + badSSThresh = initialCongWin / 2 // 20480 — CongWin/2 (wrong) ) if ssthresh != wantSSThresh { t.Errorf("RTO retransmit with CongWin=%d, FlightSize=%d: SSThresh=%d, want %d "+ diff --git a/pkg/daemon/retx_sacked_ordering_break_bug_test.go b/pkg/daemon/retx_sacked_ordering_break_bug_test.go index d47b2bd3..1195a3b3 100644 --- a/pkg/daemon/retx_sacked_ordering_break_bug_test.go +++ b/pkg/daemon/retx_sacked_ordering_break_bug_test.go @@ -27,21 +27,23 @@ import ( // The comment "segments are ordered by time; if first hasn't timed out, none // have" is correct only when Unacked entries are in strict sentAt order. // That ordering invariant breaks when: -// 1. A leading segment is SACKed (skipped by continue). -// 2. The first non-sacked entry is recent (sentAt updated by a prior -// retransmit of that exact segment). -// 3. A later non-sacked entry was originally sent much earlier (sentAt -// predates the retransmit) and is therefore timed out. +// 1. A leading segment is SACKed (skipped by continue). +// 2. The first non-sacked entry is recent (sentAt updated by a prior +// retransmit of that exact segment). +// 3. A later non-sacked entry was originally sent much earlier (sentAt +// predates the retransmit) and is therefore timed out. // // Concretely: -// Unacked[0]: sacked=true, sentAt=T-2s (very old, SACK-skipped) -// Unacked[1]: sacked=false, sentAt=T-50ms (recently retransmitted; B) -// Unacked[2]: sacked=false, sentAt=T-2s (original send; C — timed out) +// +// Unacked[0]: sacked=true, sentAt=T-2s (very old, SACK-skipped) +// Unacked[1]: sacked=false, sentAt=T-50ms (recently retransmitted; B) +// Unacked[2]: sacked=false, sentAt=T-2s (original send; C — timed out) // // With RTO=200ms: -// Entry[0] sacked → continue -// Entry[1] B: now-50ms = 50ms NOT > 200ms → break (before C is checked!) -// Entry[2] C: timed out but NEVER REACHED +// +// Entry[0] sacked → continue +// Entry[1] B: now-50ms = 50ms NOT > 200ms → break (before C is checked!) +// Entry[2] C: timed out but NEVER REACHED // // Consequence: C sits in Unacked indefinitely, never retransmitted until B // is finally ACKed (removing it from Unacked) and C becomes the first entry. @@ -88,13 +90,13 @@ func TestRetransmitUnackedBreakSkipsTimedOutEntryAfterRecentNonSacked(t *testing // FAILS against unpatched code: no packet is sent because B (not timed out) // fires the break before C is ever checked. if len(pkts) == 0 { - t.Errorf("retransmitUnacked with [A(sacked), B(recent,not-timeout), C(timed-out)]: "+ - "expected 1 retransmit (C), got 0; "+ - "'break' after B fires before C is checked — sacked entry A causes "+ - "B to be the first non-sacked entry examined; B's sentAt is recent "+ - "(retransmit updated it) so it is not timed out and the break fires; "+ - "C's older sentAt makes it timed out but it is never reached; "+ - "fix: replace 'break' with 'continue' so all non-sacked entries "+ + t.Errorf("retransmitUnacked with [A(sacked), B(recent,not-timeout), C(timed-out)]: " + + "expected 1 retransmit (C), got 0; " + + "'break' after B fires before C is checked — sacked entry A causes " + + "B to be the first non-sacked entry examined; B's sentAt is recent " + + "(retransmit updated it) so it is not timed out and the break fires; " + + "C's older sentAt makes it timed out but it is never reached; " + + "fix: replace 'break' with 'continue' so all non-sacked entries " + "are checked for timeout regardless of ordering") } if len(pkts) == 1 && string(pkts[0].Payload) != "C" { diff --git a/pkg/daemon/rfc6582_second_partial_ack_retransmit_bug_test.go b/pkg/daemon/rfc6582_second_partial_ack_retransmit_bug_test.go index 342b45da..06da87ea 100644 --- a/pkg/daemon/rfc6582_second_partial_ack_retransmit_bug_test.go +++ b/pkg/daemon/rfc6582_second_partial_ack_retransmit_bug_test.go @@ -31,8 +31,8 @@ import ( // the second partial ACK, oldDupAckCount < 3 and the condition is false — // step 6 never runs for the second partial ACK: // -// - fastRetransmit not called → first still-unacked segment not retransmitted -// - cwnd not deflated → AIMD growth fires instead, inflating cwnd +// - fastRetransmit not called → first still-unacked segment not retransmitted +// - cwnd not deflated → AIMD growth fires instead, inflating cwnd // // Concrete scenario (after entering fast recovery): // @@ -82,7 +82,7 @@ func TestSecondPartialAckInFastRecoveryRetransmitsFirstUnacked(t *testing.T) { c.InRecovery = true c.FastRecovery = true // set by fast retransmit entry (ProcessAck DupAckCount==3 path) c.RecoveryPoint = seqD - c.DupAckCount = 1 // only 1 dup ACK between partial ACKs (< 3 threshold) + c.DupAckCount = 1 // only 1 dup ACK between partial ACKs (< 3 threshold) c.SSThresh = 5 * MaxSegmentSize c.CongWin = c.SSThresh + 3*MaxSegmentSize // 32768 — fast recovery inflated @@ -107,11 +107,11 @@ func TestSecondPartialAckInFastRecoveryRetransmitsFirstUnacked(t *testing.T) { pkts := cs.all() if len(pkts) == 0 { - t.Errorf("RFC 6582 §3 step 6a: second partial ACK in fast recovery did not "+ - "call fastRetransmit; bug: step 6 is gated on oldDupAckCount >= 3, but "+ - "DupAckCount was reset to 0 by the first partial ACK and only reached 1 "+ - "before the second partial ACK; fix: track fast-recovery entry with "+ - "FastRecovery bool field, use '(oldDupAckCount >= 3 || wasFastRecovery) && "+ + t.Errorf("RFC 6582 §3 step 6a: second partial ACK in fast recovery did not " + + "call fastRetransmit; bug: step 6 is gated on oldDupAckCount >= 3, but " + + "DupAckCount was reset to 0 by the first partial ACK and only reached 1 " + + "before the second partial ACK; fix: track fast-recovery entry with " + + "FastRecovery bool field, use '(oldDupAckCount >= 3 || wasFastRecovery) && " + "wasInRecovery' so that every partial ACK during fast recovery fires step 6") } } diff --git a/pkg/daemon/rto_backoff_in_recovery_bug_test.go b/pkg/daemon/rto_backoff_in_recovery_bug_test.go index a82b86da..82851c81 100644 --- a/pkg/daemon/rto_backoff_in_recovery_bug_test.go +++ b/pkg/daemon/rto_backoff_in_recovery_bug_test.go @@ -65,10 +65,10 @@ func TestRetransmitUnackedDoublesRTOOnEachTimeout(t *testing.T) { conn.RetxMu.Lock() conn.RTO = startRTO - conn.InRecovery = true // simulate: already in recovery from first timeout - conn.RecoveryPoint = 9999 // high — won't exit recovery during this test - conn.CongWin = InitialCongWin // already reduced by first timeout - conn.SSThresh = MaxSegmentSize // already reduced + conn.InRecovery = true // simulate: already in recovery from first timeout + conn.RecoveryPoint = 9999 // high — won't exit recovery during this test + conn.CongWin = InitialCongWin // already reduced by first timeout + conn.SSThresh = MaxSegmentSize // already reduced conn.LastAck = 1000 conn.Unacked = []*retxEntry{ // attempts=2: retransmitted once already — Karn's algorithm applies, diff --git a/pkg/daemon/rtt_multiple_samples_per_ack_bug_test.go b/pkg/daemon/rtt_multiple_samples_per_ack_bug_test.go index 7daf4233..f8b17dd6 100644 --- a/pkg/daemon/rtt_multiple_samples_per_ack_bug_test.go +++ b/pkg/daemon/rtt_multiple_samples_per_ack_bug_test.go @@ -42,9 +42,9 @@ import ( // With the bug (two updateRTT calls starting from SRTT=0): // - Call 1: SRTT = 1000 ms, RTTVAR = 500 ms // - Call 2: diff = |1000-10| = 990 ms -// RTTVAR = 500*3/4 + 990/4 = 375+247.5 = 622.5 ms -// SRTT = 1000*7/8 + 10/8 = 875+1.25 = 876.25 ms -// → SRTT ≈ 876 ms, pulled down by the short-RTT segment +// RTTVAR = 500*3/4 + 990/4 = 375+247.5 = 622.5 ms +// SRTT = 1000*7/8 + 10/8 = 875+1.25 = 876.25 ms +// → SRTT ≈ 876 ms, pulled down by the short-RTT segment // // With the fix (one updateRTT call using the first/oldest acked segment): // - SRTT = 1000 ms (first measurement, set directly per RFC 6298 §2.2) diff --git a/pkg/daemon/rtt_sacked_segment_skipped_bug_test.go b/pkg/daemon/rtt_sacked_segment_skipped_bug_test.go index 2c261838..5f95222c 100644 --- a/pkg/daemon/rtt_sacked_segment_skipped_bug_test.go +++ b/pkg/daemon/rtt_sacked_segment_skipped_bug_test.go @@ -60,7 +60,7 @@ func TestRTTUpdateSkippedForSackedSegments(t *testing.T) { conn.RetxMu.Lock() conn.LastAck = 1000 - conn.SRTT = 0 // initial state — no RTT measurements yet + conn.SRTT = 0 // initial state — no RTT measurements yet conn.RTTVAR = 0 conn.RTO = InitialRTO conn.Unacked = []*retxEntry{ @@ -68,7 +68,7 @@ func TestRTTUpdateSkippedForSackedSegments(t *testing.T) { seq: 1000, data: make([]byte, segLen), attempts: 1, - sacked: true, // peer reported this segment via SACK already + sacked: true, // peer reported this segment via SACK already sentAt: time.Now().Add(-5 * time.Millisecond), // sent 5ms ago }, } @@ -87,14 +87,14 @@ func TestRTTUpdateSkippedForSackedSegments(t *testing.T) { // the cumulative ACK stay at SRTT=0/RTO=InitialRTO forever, using a // 1s retransmit timeout on connections that may have 1ms RTT. if srtt == 0 { - t.Errorf("ProcessAck with sacked segment (attempts=1, sacked=true): SRTT=0 "+ - "after cumulative ACK covers the segment; "+ - "'!e.sacked' guard prevents updateRTT for segments the peer already "+ - "confirmed via SACK, so when all in-flight data is SACKed before the "+ - "cumulative ACK arrives (common on SACK-heavy connections), the sender "+ - "never refines SRTT from InitialRTO=1s, causing spurious retransmission "+ - "and slow loss recovery; fix: remove the !e.sacked condition — "+ - "once-sent segments (attempts==1) always yield valid RTT samples per "+ + t.Errorf("ProcessAck with sacked segment (attempts=1, sacked=true): SRTT=0 " + + "after cumulative ACK covers the segment; " + + "'!e.sacked' guard prevents updateRTT for segments the peer already " + + "confirmed via SACK, so when all in-flight data is SACKed before the " + + "cumulative ACK arrives (common on SACK-heavy connections), the sender " + + "never refines SRTT from InitialRTO=1s, causing spurious retransmission " + + "and slow loss recovery; fix: remove the !e.sacked condition — " + + "once-sent segments (attempts==1) always yield valid RTT samples per " + "RFC 6298 regardless of SACK state") } } diff --git a/pkg/daemon/sack_blocks_wraparound_bug_test.go b/pkg/daemon/sack_blocks_wraparound_bug_test.go index 1bfaff4c..513b736e 100644 --- a/pkg/daemon/sack_blocks_wraparound_bug_test.go +++ b/pkg/daemon/sack_blocks_wraparound_bug_test.go @@ -101,8 +101,10 @@ func TestSACKBlocksWraparound(t *testing.T) { // // seg1: [0xFFFFF000, 0x00000000) (wraps to 0; segEnd = 0x00000000 after add) // Wait — use seg1 that ends exactly where seg2 begins: -// seg1: [0xFFFFFF00, 0xFFFFFFFF+1) = [0xFFFFFF00, 0x00000000) -// seg2: [0x00000000, 0x00000010) +// +// seg1: [0xFFFFFF00, 0xFFFFFFFF+1) = [0xFFFFFF00, 0x00000000) +// seg2: [0x00000000, 0x00000010) +// // They are contiguous: seg1's right edge == seg2's left edge. func TestSACKBlocksWraparoundContiguous(t *testing.T) { pm := NewPortManager() diff --git a/pkg/daemon/sack_cumulative_ack_aimd_overcounting_bug_test.go b/pkg/daemon/sack_cumulative_ack_aimd_overcounting_bug_test.go index bf4ae180..68b38eca 100644 --- a/pkg/daemon/sack_cumulative_ack_aimd_overcounting_bug_test.go +++ b/pkg/daemon/sack_cumulative_ack_aimd_overcounting_bug_test.go @@ -70,8 +70,8 @@ func TestSACKCumulativeAckDoesNotInflateBytesAcked(t *testing.T) { conn.RetxMu.Lock() conn.LastAck = seqA // In slow start: CongWin < SSThresh so every new ACK grows CongWin by bytesAcked. - conn.CongWin = InitialCongWin // 10*MSS = 40960 - conn.SSThresh = 40 * MaxSegmentSize // 163840 — well above CongWin + conn.CongWin = InitialCongWin // 10*MSS = 40960 + conn.SSThresh = 40 * MaxSegmentSize // 163840 — well above CongWin conn.InRecovery = false conn.DupAckCount = 0 conn.Unacked = []*retxEntry{ diff --git a/pkg/daemon/sendbuf_caller_bug_test.go b/pkg/daemon/sendbuf_caller_bug_test.go index a478c67c..58586a27 100644 --- a/pkg/daemon/sendbuf_caller_bug_test.go +++ b/pkg/daemon/sendbuf_caller_bug_test.go @@ -48,6 +48,7 @@ import ( // - Surface a non-transient error only if the connection is // actually broken (not Established, peer closed, etc.) OR if a // deadline elapses +// // This keeps net.Conn semantics intact: callers see Write block // briefly under back-pressure (just like a real TCP socket whose // kernel send buffer is full), then succeed. diff --git a/pkg/daemon/services.go b/pkg/daemon/services.go index 4f436a13..394913ec 100644 --- a/pkg/daemon/services.go +++ b/pkg/daemon/services.go @@ -1409,9 +1409,9 @@ func (d *Daemon) handleTaskResults(adapter *connAdapter, conn *Connection, frame slog.Info("tasksubmit: polo scores updated", "task_id", msg.TaskID, "receiver_reward", reward) d.webhook.Emit("polo.updated", map[string]interface{}{ - "task_id": msg.TaskID, - "submitter_delta": -1, - "receiver_reward": reward, + "task_id": msg.TaskID, + "submitter_delta": -1, + "receiver_reward": reward, }) } } diff --git a/pkg/daemon/ss_growth_abc_cap_bug_test.go b/pkg/daemon/ss_growth_abc_cap_bug_test.go index 8046c19f..285d73d5 100644 --- a/pkg/daemon/ss_growth_abc_cap_bug_test.go +++ b/pkg/daemon/ss_growth_abc_cap_bug_test.go @@ -75,9 +75,9 @@ func TestSSGrowthCapsIncrementAt2SMSS(t *testing.T) { // 2*SMSS = 8192. // Bug: cwnd += 12288 (3*MSS) uses raw bytes_acked exceeding RFC limit. const ( - wantIncrement = 2 * MaxSegmentSize // 8192 - bugIncrement = 3 * MaxSegmentSize // 12288 - wantCongWin = initialCongWin + wantIncrement // 20480 + wantIncrement = 2 * MaxSegmentSize // 8192 + bugIncrement = 3 * MaxSegmentSize // 12288 + wantCongWin = initialCongWin + wantIncrement // 20480 ) if c.CongWin != wantCongWin { t.Errorf("SS growth with bytes_acked=3*SMSS: CongWin=%d, want %d "+ diff --git a/pkg/daemon/ssthresh_congwin_vs_flightsize_bug_test.go b/pkg/daemon/ssthresh_congwin_vs_flightsize_bug_test.go index 09ef7867..63cdb59a 100644 --- a/pkg/daemon/ssthresh_congwin_vs_flightsize_bug_test.go +++ b/pkg/daemon/ssthresh_congwin_vs_flightsize_bug_test.go @@ -111,9 +111,9 @@ func TestFastRetransmitSSThreshUsesFlightSizeNotCongWin(t *testing.T) { // window capacity, not the measured flight size; using it overestimates SSThresh // and causes the connection to resume from a rate higher than what caused the loss. const ( - flightSize = 3 * MaxSegmentSize // 12288 — sum of all Unacked - wantSSThresh = 2 * MaxSegmentSize // max(flightSize/2=6144, 2*MSS=8192) - badSSThresh = initialCongWin / 2 // 20480 — what CongWin/2 produces + flightSize = 3 * MaxSegmentSize // 12288 — sum of all Unacked + wantSSThresh = 2 * MaxSegmentSize // max(flightSize/2=6144, 2*MSS=8192) + badSSThresh = initialCongWin / 2 // 20480 — what CongWin/2 produces ) if ssthresh != wantSSThresh { t.Errorf("fast retransmit with CongWin=%d, FlightSize=%d: SSThresh=%d, want %d "+ diff --git a/pkg/daemon/ssthresh_floor_two_mss_bug_test.go b/pkg/daemon/ssthresh_floor_two_mss_bug_test.go index 9b459649..5e788ecc 100644 --- a/pkg/daemon/ssthresh_floor_two_mss_bug_test.go +++ b/pkg/daemon/ssthresh_floor_two_mss_bug_test.go @@ -69,9 +69,9 @@ func TestFastRetransmitSSThreshFloorTwoSMSS(t *testing.T) { conn.RetxStop = make(chan struct{}) const ( - lastAck = uint32(1000) - initialCongWin = 2 * MaxSegmentSize // 8192 — floor scenario - highSSThresh = 40 * MaxSegmentSize // >> CongWin, not the binding constraint + lastAck = uint32(1000) + initialCongWin = 2 * MaxSegmentSize // 8192 — floor scenario + highSSThresh = 40 * MaxSegmentSize // >> CongWin, not the binding constraint ) conn.Mu.Lock() diff --git a/pkg/daemon/throughput_bench_test.go b/pkg/daemon/throughput_bench_test.go index 7502151a..ee40f15b 100644 --- a/pkg/daemon/throughput_bench_test.go +++ b/pkg/daemon/throughput_bench_test.go @@ -32,7 +32,7 @@ import ( const benchTransferBytes = 4 * 1024 * 1024 // 4 MB -func BenchmarkThroughputNoLoss(b *testing.B) { runThroughputBench(b, 0.000) } +func BenchmarkThroughputNoLoss(b *testing.B) { runThroughputBench(b, 0.000) } func BenchmarkThroughput01PctLoss(b *testing.B) { runThroughputBench(b, 0.001) } func BenchmarkThroughput1PctLoss(b *testing.B) { runThroughputBench(b, 0.010) } func BenchmarkThroughput5PctLoss(b *testing.B) { runThroughputBench(b, 0.050) } @@ -68,10 +68,10 @@ func TestThroughputReport(t *testing.T) { } type result struct { - name string - rate float64 - elapsed time.Duration - mbps float64 + name string + rate float64 + elapsed time.Duration + mbps float64 retransmits int } @@ -240,7 +240,7 @@ func simulateTransferFull(lossRate float64, totalBytes int, seed int64) int { // Re-initialize Unacked so TrackSend uses our sim time epoch. // TrackSend sets sentAt=time.Now() internally; we overwrite after each call. - sendIdx := 0 // next segment index to send + sendIdx := 0 // next segment index to send noProgressRounds := 0 maxIterations := totalSegs * 200 // safety bound; sim converges fast diff --git a/pkg/daemon/timeout_cwnd_reset_bug_test.go b/pkg/daemon/timeout_cwnd_reset_bug_test.go index 5ce01563..853d9662 100644 --- a/pkg/daemon/timeout_cwnd_reset_bug_test.go +++ b/pkg/daemon/timeout_cwnd_reset_bug_test.go @@ -75,9 +75,9 @@ func TestTimeoutResetsCongWinTo1SMSS(t *testing.T) { conn.RetxStop = make(chan struct{}) const ( - seqA = uint32(1000) - initialCongWin = 20 * MaxSegmentSize // 81920 — large established window - initialSSThresh = 10 * MaxSegmentSize // 40960 — initial ssthresh + seqA = uint32(1000) + initialCongWin = 20 * MaxSegmentSize // 81920 — large established window + initialSSThresh = 10 * MaxSegmentSize // 40960 — initial ssthresh ) conn.Mu.Lock() diff --git a/pkg/daemon/timeout_ssthresh_in_recovery_bug_test.go b/pkg/daemon/timeout_ssthresh_in_recovery_bug_test.go index a3ba82fe..e467d3ac 100644 --- a/pkg/daemon/timeout_ssthresh_in_recovery_bug_test.go +++ b/pkg/daemon/timeout_ssthresh_in_recovery_bug_test.go @@ -76,9 +76,9 @@ func TestTimeoutDuringFastRecoveryRecomputesSSThresh(t *testing.T) { conn.RetxStop = make(chan struct{}) const ( - seqA = uint32(1000) - fastRecoverySSThresh = 5 * MaxSegmentSize // 20480 — from a larger flightSize - fastRecoveryCongWin = fastRecoverySSThresh + 3*MaxSegmentSize + seqA = uint32(1000) + fastRecoverySSThresh = 5 * MaxSegmentSize // 20480 — from a larger flightSize + fastRecoveryCongWin = fastRecoverySSThresh + 3*MaxSegmentSize ) conn.Mu.Lock() diff --git a/pkg/daemon/tunnel.go b/pkg/daemon/tunnel.go index d1a993f6..c454514b 100644 --- a/pkg/daemon/tunnel.go +++ b/pkg/daemon/tunnel.go @@ -104,7 +104,6 @@ const salvageMaxEntries = 4 // margin for slow handshakes under loss. const salvageMaxAge = 5 * time.Second - // decryptFailDropThreshold is how many consecutive AEAD-authentication // failures from a single peer trigger a full peerCrypto drop + // re-handshake. Sized to swallow a small burst of legitimate packet @@ -204,8 +203,8 @@ type TunnelManager struct { // Rate-limit rekey-request responses triggered by "encrypted packet but no // key" events. Prevents amplification if a peer floods us with gibberish. - rekeyMu sync.Mutex - lastRekeyReq map[uint32]time.Time + rekeyMu sync.Mutex + lastRekeyReq map[uint32]time.Time // P1-010 tunnel-state half: track in-flight key exchanges so a single // dropped reply under packet loss doesn't leave the tunnel wedged for @@ -221,7 +220,6 @@ type TunnelManager struct { beaconAddr *net.UDPAddr // beacon address for punch/relay relayPeers map[uint32]bool // peers that need relay (symmetric NAT) - // relayPinned marks peers whose relay flag was set by an authoritative // signal (registry's relay_only=true on the resolve response, OR an // operator forcing relay via SetRelayPeer with pin=true). For pinned diff --git a/pkg/daemon/tunnel_blackhole_bug_test.go b/pkg/daemon/tunnel_blackhole_bug_test.go index c9f6d285..6170106b 100644 --- a/pkg/daemon/tunnel_blackhole_bug_test.go +++ b/pkg/daemon/tunnel_blackhole_bug_test.go @@ -40,13 +40,13 @@ import ( // - 50 MB direct transfer iter 3: 4.27 MB/s // // What v1.9.x's tunnel-stability fix should change. Any of: -// 1. Skip the flip if conn-level retransmit budget hasn't been -// exhausted (direct path is actively being used; absence of recv -// ACK means the peer is slow, not unreachable). -// 2. Require N consecutive 8 s gaps WITH active sends in between -// (transient pause shouldn't latch the relay flag). -// 3. Raise the threshold (30 s+) under normal load and only drop -// it when the application explicitly requests fast-failover. +// 1. Skip the flip if conn-level retransmit budget hasn't been +// exhausted (direct path is actively being used; absence of recv +// ACK means the peer is slow, not unreachable). +// 2. Require N consecutive 8 s gaps WITH active sends in between +// (transient pause shouldn't latch the relay flag). +// 3. Raise the threshold (30 s+) under normal load and only drop +// it when the application explicitly requests fast-failover. // // This test pins the CURRENT behavior so the fix has a concrete // regression target. After the fix, the assertion below flips: diff --git a/pkg/daemon/tunnel_desync_salvage_test.go b/pkg/daemon/tunnel_desync_salvage_test.go index 158f475b..406b0de1 100644 --- a/pkg/daemon/tunnel_desync_salvage_test.go +++ b/pkg/daemon/tunnel_desync_salvage_test.go @@ -103,9 +103,9 @@ func TestRecordSalvageNilPCIsNoop(t *testing.T) { func TestReplaySalvageNilArgsIsNoop(t *testing.T) { tm := NewTunnelManager() pc := fakePC(t) - tm.replaySalvage(nil, pc, 1, nil) // oldPC nil - tm.replaySalvage(pc, nil, 1, nil) // newPC nil - tm.replaySalvage(pc, pc, 1, nil) // addr nil + tm.replaySalvage(nil, pc, 1, nil) // oldPC nil + tm.replaySalvage(pc, nil, 1, nil) // newPC nil + tm.replaySalvage(pc, pc, 1, nil) // addr nil // no panic = pass } diff --git a/pkg/daemon/tunnel_handle_test.go b/pkg/daemon/tunnel_handle_test.go index e8352c58..55461dcf 100644 --- a/pkg/daemon/tunnel_handle_test.go +++ b/pkg/daemon/tunnel_handle_test.go @@ -711,7 +711,6 @@ func TestHandlersAreRobustAgainstEmptyData(_ *testing.T) { // Ensure compile-time coverage of stdlib imports unused in some builds var _ = fmt.Errorf - // setupAuthKeyExchangeTest builds the common scaffolding: a tunnel // manager with encryption enabled, a peer Ed25519 identity registered, // and a valid signed auth key_exchange frame ready to feed into diff --git a/pkg/daemon/window_update_dup_ack_count_bug_test.go b/pkg/daemon/window_update_dup_ack_count_bug_test.go index 2dacf598..b420c84b 100644 --- a/pkg/daemon/window_update_dup_ack_count_bug_test.go +++ b/pkg/daemon/window_update_dup_ack_count_bug_test.go @@ -118,7 +118,7 @@ func TestWindowUpdateDoesNotIncrementDupAckCount(t *testing.T) { SrcPort: remotePort, DstPort: localPort, Seq: uint32(100 + i), - Ack: 500, // == conn.LastAck → same cumulative ACK + Ack: 500, // == conn.LastAck → same cumulative ACK Window: winSegs, // grows: 2, 3, 4 segments } d.handleStreamPacket(pkt) diff --git a/pkg/daemon/window_update_wakeup_bug_test.go b/pkg/daemon/window_update_wakeup_bug_test.go index 5e9a5d94..c404a130 100644 --- a/pkg/daemon/window_update_wakeup_bug_test.go +++ b/pkg/daemon/window_update_wakeup_bug_test.go @@ -115,8 +115,8 @@ func TestWindowUpdateDoesNotWakeSender(t *testing.T) { SrcPort: remotePort, DstPort: localPort, Seq: 500, - Ack: 1000, // == LastAck — dup-ACK path in ProcessAck - Window: 1, // non-zero: peer's window just opened + Ack: 1000, // == LastAck — dup-ACK path in ProcessAck + Window: 1, // non-zero: peer's window just opened } d.handleStreamPacket(windowUpdatePkt) @@ -126,14 +126,14 @@ func TestWindowUpdateDoesNotWakeSender(t *testing.T) { case <-conn.WindowCh: // good — sender wakes up promptly case <-time.After(100 * time.Millisecond): - t.Errorf("window-update ACK (Ack=LastAck, Window=1 with PeerRecvWin=0) did not "+ - "signal conn.WindowCh within 100ms; "+ - "handleStreamPacket updates PeerRecvWin but never signals WindowCh; "+ - "ProcessAck is called with ack=LastAck (dup-ACK path) which returns "+ - "before the WindowCh signal at the bottom of the new-ACK path; "+ - "sendSegment blocked on WindowCh will not wake until the next "+ - "zero-window probe timer fires (up to 30s with exponential backoff); "+ - "fix: in handleStreamPacket, signal conn.WindowCh after setting "+ + t.Errorf("window-update ACK (Ack=LastAck, Window=1 with PeerRecvWin=0) did not " + + "signal conn.WindowCh within 100ms; " + + "handleStreamPacket updates PeerRecvWin but never signals WindowCh; " + + "ProcessAck is called with ack=LastAck (dup-ACK path) which returns " + + "before the WindowCh signal at the bottom of the new-ACK path; " + + "sendSegment blocked on WindowCh will not wake until the next " + + "zero-window probe timer fires (up to 30s with exponential backoff); " + + "fix: in handleStreamPacket, signal conn.WindowCh after setting " + "PeerRecvWin when transitioning from 0 to > 0") } } diff --git a/pkg/daemon/zero_window_peerrecvwin_bug_test.go b/pkg/daemon/zero_window_peerrecvwin_bug_test.go index 18fb674a..b6cd8112 100644 --- a/pkg/daemon/zero_window_peerrecvwin_bug_test.go +++ b/pkg/daemon/zero_window_peerrecvwin_bug_test.go @@ -61,13 +61,13 @@ func TestZeroWindowAdvertisementNotHonored(t *testing.T) { // FAILS against unpatched code: PeerRecvWin>0 guard treats 0 as "unknown", // EffectiveWindow returns CongWin=InitialCongWin, WindowAvailable=true. if available { - t.Errorf("zero-window advertisement not honored: WindowAvailable()=true "+ - "when PeerRecvWin=0 (peer sent Window=0); "+ - "EffectiveWindow() guard 'c.PeerRecvWin > 0' treats 0 as the "+ - "uninitialized/unknown sentinel, so an explicit zero-window "+ - "advertisement is silently ignored and the sender is allowed to "+ - "transmit data that the peer has no buffer space to accept; "+ - "fix: initialize PeerRecvWin to -1 in NewConnection and change "+ + t.Errorf("zero-window advertisement not honored: WindowAvailable()=true " + + "when PeerRecvWin=0 (peer sent Window=0); " + + "EffectiveWindow() guard 'c.PeerRecvWin > 0' treats 0 as the " + + "uninitialized/unknown sentinel, so an explicit zero-window " + + "advertisement is silently ignored and the sender is allowed to " + + "transmit data that the peer has no buffer space to accept; " + + "fix: initialize PeerRecvWin to -1 in NewConnection and change " + "the guard to c.PeerRecvWin >= 0") } } diff --git a/pkg/registry/binary_client_test.go b/pkg/registry/binary_client_test.go index cdbb1ed4..2ff37ed1 100644 --- a/pkg/registry/binary_client_test.go +++ b/pkg/registry/binary_client_test.go @@ -22,12 +22,12 @@ import ( // --- fakeBinaryServer: minimal TCP server speaking the binary wire protocol --- type fakeBinaryServer struct { - ln net.Listener - handler func(msgType byte, payload []byte) (respType byte, respPayload []byte) - mu sync.Mutex + ln net.Listener + handler func(msgType byte, payload []byte) (respType byte, respPayload []byte) + mu sync.Mutex handshakes atomic.Uint32 - frames atomic.Uint32 - done chan struct{} + frames atomic.Uint32 + done chan struct{} } func newFakeBinaryServer(t *testing.T, handler func(msgType byte, payload []byte) (byte, []byte)) *fakeBinaryServer { @@ -246,15 +246,15 @@ func TestLookupHappyPathDecodesResult(t *testing.T) { return wireMsgError, encodeWireError("bad type") } return wireMsgLookupOK, encodeLookupResp( - 42, // nodeID - true, false, // public, taskExec - 7, // polo - []uint16{1, 2}, // networks - []byte{0xAB}, // pubkey - "host.example", // hostname - []string{"t1"}, // tags - "1.2.3.4:444", // realAddr - "ext-123", // externalID + 42, // nodeID + true, false, // public, taskExec + 7, // polo + []uint16{1, 2}, // networks + []byte{0xAB}, // pubkey + "host.example", // hostname + []string{"t1"}, // tags + "1.2.3.4:444", // realAddr + "ext-123", // externalID ) }) diff --git a/pkg/registry/panic_recovery.go b/pkg/registry/panic_recovery.go index 0fcf1bfa..f710a130 100644 --- a/pkg/registry/panic_recovery.go +++ b/pkg/registry/panic_recovery.go @@ -23,14 +23,14 @@ func RecoveredPanicCount() uint64 { // recoverHandler is the standard panic-recovery shim used at the top of // every connection-handling goroutine and every background loop. Usage: // -// defer recoverHandler("handleConn", nil) +// defer recoverHandler("handleConn", nil) // // On panic it: -// 1. Recovers (process keeps running) -// 2. Logs at ERROR with the panic value + full goroutine stack trace -// 3. Increments the global recoveredPanicCount metric -// 4. Calls onPanic(count) if non-nil — callers can use this to drop -// a connection / restart a loop / etc. +// 1. Recovers (process keeps running) +// 2. Logs at ERROR with the panic value + full goroutine stack trace +// 3. Increments the global recoveredPanicCount metric +// 4. Calls onPanic(count) if non-nil — callers can use this to drop +// a connection / restart a loop / etc. // // recoverHandler must be the OUTERMOST defer in the goroutine: defers // run LIFO, so other defers (conn.Close, mu.Unlock) run first; we want diff --git a/pkg/registry/panic_recovery_test.go b/pkg/registry/panic_recovery_test.go index 6aa3ffe4..61a530f2 100644 --- a/pkg/registry/panic_recovery_test.go +++ b/pkg/registry/panic_recovery_test.go @@ -55,8 +55,8 @@ func TestRecoverHandlerConcurrent(t *testing.T) { t.Parallel() var ( - mu sync.Mutex - seenSet = map[uint64]struct{}{} + mu sync.Mutex + seenSet = map[uint64]struct{}{} ) const N = 32 diff --git a/pkg/registry/replication.go b/pkg/registry/replication.go index 4b5cd5af..54ac7984 100644 --- a/pkg/registry/replication.go +++ b/pkg/registry/replication.go @@ -702,8 +702,8 @@ func (s *Server) applySnapshot(data []byte) error { // I/O-free string parsing but is still extra work we don't need to keep // inside the swap critical section. var ( - acceptIDPConfig bool - acceptAuditExport bool + acceptIDPConfig bool + acceptAuditExport bool ) if snap.IDPConfig != nil { if err := urlvalidate.Validate(snap.IDPConfig.URL); err != nil { diff --git a/pkg/registry/server.go b/pkg/registry/server.go index 95d007ba..fb4bf9d2 100644 --- a/pkg/registry/server.go +++ b/pkg/registry/server.go @@ -286,10 +286,10 @@ func (s *Server) appendAudit(action string, netID uint16, nodeID uint32, attrs . const numNodeShards = 256 type Server struct { - mu sync.RWMutex - nodeShards [numNodeShards]sync.RWMutex // per-node field locks (nodeID % N) - nodes map[uint32]*NodeInfo - maxNodes int // max registered nodes (0 = unlimited); prevents memory exhaustion + mu sync.RWMutex + nodeShards [numNodeShards]sync.RWMutex // per-node field locks (nodeID % N) + nodes map[uint32]*NodeInfo + maxNodes int // max registered nodes (0 = unlimited); prevents memory exhaustion startTime time.Time restartEvents []int64 // unix-millis of each process start after the first downtimeIntervals [][2]int64 // [start,end] unix-millis pairs, pruned to last 30d @@ -309,14 +309,14 @@ type Server struct { pulseIdx int pulseFilled bool - networks map[uint16]*NetworkInfo - pubKeyIdx map[string]uint32 // base64(pubkey) -> nodeID for re-registration - ownerIdx map[string]uint32 // owner -> nodeID for key rotation - hostnameIdx map[string]uint32 // hostname -> nodeID (unique index) - nextNode uint32 - nextNet uint16 - listener net.Listener - readyCh chan struct{} + networks map[uint16]*NetworkInfo + pubKeyIdx map[string]uint32 // base64(pubkey) -> nodeID for re-registration + ownerIdx map[string]uint32 // owner -> nodeID for key rotation + hostnameIdx map[string]uint32 // hostname -> nodeID (unique index) + nextNode uint32 + nextNet uint16 + listener net.Listener + readyCh chan struct{} // Beacon coordination beaconAddr string @@ -356,15 +356,14 @@ type Server struct { // Network invite inbox: target nodeID -> pending invites inviteInbox map[uint32][]*NetworkInvite - // Connection tracking connCount atomic.Int64 maxConnections int64 // Replication - replMgr *replicationManager - replToken string // H4 fix: required for subscribe_replication; empty = replication disabled - standby bool // if true, reject writes and receive snapshots from primary + replMgr *replicationManager + replToken string // H4 fix: required for subscribe_replication; empty = replication disabled + standby bool // if true, reject writes and receive snapshots from primary adminToken string // required for create_network; empty = creation disabled dashboardToken string // token for per-network stats on dashboard; empty = public-only maintenanceBanner string // optional notice rendered on the dashboard alongside release banner @@ -447,8 +446,8 @@ type Server struct { // listings ("data-exchange" 45k members, "high-trust-society" 28k) all // route through the same singleflight + 1s-TTL cache. Each network // (and the admin path, key=0) has its own state inside listNodesPerNet. - listNodesCache listNodesCacheState // legacy backbone admin cache - listNodesPerNetMu sync.Mutex // guards the map itself + listNodesCache listNodesCacheState // legacy backbone admin cache + listNodesPerNetMu sync.Mutex // guards the map itself listNodesPerNet map[uint16]*listNodesCacheState } @@ -1752,8 +1751,6 @@ func (s *Server) handleBinaryLookup(conn net.Conn, payload []byte, host string) s.metrics.requestDuration.WithLabel("lookup").Observe(time.Since(start).Seconds()) }() - - // Brief global lock for map lookup s.mu.RLock() node, ok := s.nodes[nodeID] @@ -1802,8 +1799,6 @@ func (s *Server) handleBinaryResolve(conn net.Conn, payload []byte, host string) s.metrics.requestDuration.WithLabel("resolve").Observe(time.Since(start).Seconds()) }() - - // Phase 1: copy pubkey for verification s.mu.RLock() requester, ok := s.nodes[requesterID] @@ -2279,9 +2274,11 @@ func (s *Server) handleRegister(msg map[string]interface{}, remoteAddr string) ( // // 3-PHASE LOCK PATTERN — see [[X-Tasks/backlog/30-mutex-risk-map]] § fix #5 // and the lock-ordering invariants doc at the top of this file. -// Phase 1 (RLock): snapshot the current pubkey for verification. -// Phase 2 (no lock): ~28µs Ed25519 verify runs OUTSIDE the lock. -// Phase 3 (Lock): re-check the pubkey is unchanged; commit the swap. +// +// Phase 1 (RLock): snapshot the current pubkey for verification. +// Phase 2 (no lock): ~28µs Ed25519 verify runs OUTSIDE the lock. +// Phase 3 (Lock): re-check the pubkey is unchanged; commit the swap. +// // If a concurrent rotation lands between Phase 1 and Phase 3 the verify is // stale; we reject this caller and let it retry. Rotate is rare, so the // retry surface is acceptable. @@ -2376,10 +2373,11 @@ func (s *Server) handleRotateKey(msg map[string]interface{}) (map[string]interfa // Only the node itself can set its own key expiry (signature-verified). // // 3-PHASE LOCK PATTERN — see [[X-Tasks/backlog/30-mutex-risk-map]] § fix #6. -// Phase 1 (RLock): snapshot pubkey + adminToken for verification. -// Phase 2 (no lock): ~28µs Ed25519 verify runs OUTSIDE the lock. -// Phase 3 (Lock): re-check node + pubkey unchanged + enterprise gate; -// commit the new expiry. +// +// Phase 1 (RLock): snapshot pubkey + adminToken for verification. +// Phase 2 (no lock): ~28µs Ed25519 verify runs OUTSIDE the lock. +// Phase 3 (Lock): re-check node + pubkey unchanged + enterprise gate; +// commit the new expiry. func (s *Server) handleSetKeyExpiry(msg map[string]interface{}) (map[string]interface{}, error) { nodeID := jsonUint32(msg, "node_id") @@ -6107,12 +6105,13 @@ const adminListNodesTTL = 1 * time.Second // json.Marshal entirely on cache hits. // // Why pre-build the wrapper: -// The previous implementation returned just the inner nodes array as -// json.RawMessage and let json.Marshal wrap it. Even though the bytes -// were already valid JSON, the encoder called appendCompact() on every -// call to re-validate them byte-by-byte — burning ~65% of total CPU at -// ~320 calls/sec on a 16 MB payload (measured 2026-04-29 profile). -// Pre-wrapping eliminates the encoder pass entirely. +// +// The previous implementation returned just the inner nodes array as +// json.RawMessage and let json.Marshal wrap it. Even though the bytes +// were already valid JSON, the encoder called appendCompact() on every +// call to re-validate them byte-by-byte — burning ~65% of total CPU at +// ~320 calls/sec on a 16 MB payload (measured 2026-04-29 profile). +// Pre-wrapping eliminates the encoder pass entirely. // // Race-clean: cache rebuild runs without any registry lock held. The // inner build acquires s.mu.RLock briefly and (via the iteration) per-node @@ -6290,8 +6289,10 @@ func (s *Server) handleDeregister(msg map[string]interface{}) (map[string]interf // Pre-built fragments for the heartbeat-ok response. Go's json.Marshal sorts // map keys alphabetically, so the wire shape is: -// without warning: {"time":,"type":"heartbeat_ok"} -// with warning: {"key_expiry_warning":true,"time":,"type":"heartbeat_ok"} +// +// without warning: {"time":,"type":"heartbeat_ok"} +// with warning: {"key_expiry_warning":true,"time":,"type":"heartbeat_ok"} +// // Pre-building the static prefix/suffix and only sprintf'ing the timestamp // saves the ~8% of remaining CPU spent in json.Marshal on the heartbeat // response — this is the single most-frequent message in the system. @@ -6468,8 +6469,8 @@ type snapshot struct { LastHeartbeat int64 `json:"last_heartbeat,omitempty"` ProbeStates map[string]*ProbeState `json:"probe_states,omitempty"` // Time-series history for dashboard charts - HourlyHistory []StatsSample `json:"hourly_history,omitempty"` - DailyHistory []StatsSample `json:"daily_history,omitempty"` + HourlyHistory []StatsSample `json:"hourly_history,omitempty"` + DailyHistory []StatsSample `json:"daily_history,omitempty"` NetHourlyHistory map[string][]NetworkSampleEntry `json:"net_hourly_history,omitempty"` NetDailyHistory map[string][]NetworkSampleEntry `json:"net_daily_history,omitempty"` // Audit log persistence (most recent entries, capped at maxAuditEntries) @@ -7773,19 +7774,19 @@ func (s *Server) SetBeaconStats(b BeaconStatsProvider) { } type DashboardStats struct { - TotalNodes int `json:"total_nodes"` - ActiveNodes int `json:"active_nodes"` - TotalTrustLinks int `json:"-"` - TotalRequests int64 `json:"total_requests"` - RelayForwarded uint64 `json:"relay_forwarded,omitempty"` - RelayDropped uint64 `json:"relay_dropped,omitempty"` - RelayNotFound uint64 `json:"relay_not_found,omitempty"` - ReqPerDay int64 `json:"req_per_day"` - UptimeSecs int64 `json:"uptime_secs"` - Versions map[string]int `json:"versions,omitempty"` - Networks []NetworkStats `json:"networks,omitempty"` // only populated with dashboard token - Hourly []StatsSample `json:"hourly,omitempty"` - Daily []StatsSample `json:"daily,omitempty"` + TotalNodes int `json:"total_nodes"` + ActiveNodes int `json:"active_nodes"` + TotalTrustLinks int `json:"-"` + TotalRequests int64 `json:"total_requests"` + RelayForwarded uint64 `json:"relay_forwarded,omitempty"` + RelayDropped uint64 `json:"relay_dropped,omitempty"` + RelayNotFound uint64 `json:"relay_not_found,omitempty"` + ReqPerDay int64 `json:"req_per_day"` + UptimeSecs int64 `json:"uptime_secs"` + Versions map[string]int `json:"versions,omitempty"` + Networks []NetworkStats `json:"networks,omitempty"` // only populated with dashboard token + Hourly []StatsSample `json:"hourly,omitempty"` + Daily []StatsSample `json:"daily,omitempty"` RestartEvents []int64 `json:"restart_events,omitempty"` DowntimeIntervals [][2]int64 `json:"downtime_intervals,omitempty"` Probes map[string]*ProbeState `json:"probes,omitempty"` @@ -7794,12 +7795,12 @@ type DashboardStats struct { // NetworkStats holds per-network statistics for the authenticated dashboard view. type NetworkStats struct { - ID uint16 `json:"id"` - Name string `json:"name"` - Members int `json:"members"` - Online int `json:"online"` - Requests int64 `json:"requests"` - TrustLinks int `json:"-"` + ID uint16 `json:"id"` + Name string `json:"name"` + Members int `json:"members"` + Online int `json:"online"` + Requests int64 `json:"requests"` + TrustLinks int `json:"-"` Hourly []NetworkSampleEntry `json:"hourly,omitempty"` Daily []NetworkSampleEntry `json:"daily,omitempty"` } @@ -8130,15 +8131,15 @@ func (s *Server) GetDashboardStatsExtended() DashboardStats { s.probeMu.Unlock() return DashboardStats{ - TotalNodes: int(s.nextNode - 1), - ActiveNodes: activeCount, - TotalTrustLinks: len(s.trustPairs), - TotalRequests: s.requestCount.Load(), - ReqPerDay: reqPerDay, - UptimeSecs: int64(now.Sub(s.startTime).Seconds()), - Versions: versions, - Networks: networks, - Hourly: hourly, + TotalNodes: int(s.nextNode - 1), + ActiveNodes: activeCount, + TotalTrustLinks: len(s.trustPairs), + TotalRequests: s.requestCount.Load(), + ReqPerDay: reqPerDay, + UptimeSecs: int64(now.Sub(s.startTime).Seconds()), + Versions: versions, + Networks: networks, + Hourly: hourly, Daily: daily, RestartEvents: restartEvents, DowntimeIntervals: downtimeIntervals, diff --git a/pkg/registry/server_perf_test.go b/pkg/registry/server_perf_test.go index b3ef5da8..4b01947f 100644 --- a/pkg/registry/server_perf_test.go +++ b/pkg/registry/server_perf_test.go @@ -176,4 +176,3 @@ func TestNodeInfo_AtomicLastSeen_Concurrent(t *testing.T) { t.Errorf("final value %v is before base %v", got, base) } } - diff --git a/pkg/registry/wal_replay.go b/pkg/registry/wal_replay.go index bc59a02f..4e4fa4b4 100644 --- a/pkg/registry/wal_replay.go +++ b/pkg/registry/wal_replay.go @@ -55,14 +55,14 @@ type deregisterDelta struct { // Member set is empty at creation; the creator is added by a separate // join delta if applicable. type networkCreateDelta struct { - NetworkID uint16 `json:"network_id"` - Name string `json:"name"` - JoinRule string `json:"join_rule"` - Token string `json:"token,omitempty"` - AdminToken string `json:"admin_token,omitempty"` - Enterprise bool `json:"enterprise,omitempty"` - CreatorNodeID uint32 `json:"creator_node_id,omitempty"` - CreatedAt string `json:"created_at"` // RFC3339 + NetworkID uint16 `json:"network_id"` + Name string `json:"name"` + JoinRule string `json:"join_rule"` + Token string `json:"token,omitempty"` + AdminToken string `json:"admin_token,omitempty"` + Enterprise bool `json:"enterprise,omitempty"` + CreatorNodeID uint32 `json:"creator_node_id,omitempty"` + CreatedAt string `json:"created_at"` // RFC3339 } // networkDeleteDelta marks a network as removed. diff --git a/pkg/registry/wal_replay_test.go b/pkg/registry/wal_replay_test.go index e9987324..012d9c57 100644 --- a/pkg/registry/wal_replay_test.go +++ b/pkg/registry/wal_replay_test.go @@ -260,10 +260,10 @@ func TestWALReplayRestoresNetworkCreate(t *testing.T) { created := time.Now().UTC().Format(time.RFC3339) appendWALDelta(t, storePath, DeltaNetworkCreate, 0, networkCreateDelta{ - NetworkID: 17, - Name: "fresh-net", - JoinRule: "open", - CreatedAt: created, + NetworkID: 17, + Name: "fresh-net", + JoinRule: "open", + CreatedAt: created, }) s := NewWithStore("", storePath) diff --git a/pkg/skillinject/manifest.go b/pkg/skillinject/manifest.go index 3cb4f89b..fc2eae51 100644 --- a/pkg/skillinject/manifest.go +++ b/pkg/skillinject/manifest.go @@ -67,7 +67,7 @@ type ManifestTool struct { // fetcher is a small wrapper around http.Client that returns response // bodies. Pulled out so tests can inject a fake. type fetcher struct { - httpClient *http.Client + httpClient *http.Client manifestURL string repoBase string } diff --git a/pkg/skillinject/skillinject.go b/pkg/skillinject/skillinject.go index c7cc7747..298e4fee 100644 --- a/pkg/skillinject/skillinject.go +++ b/pkg/skillinject/skillinject.go @@ -169,7 +169,7 @@ func Tick(ctx context.Context, cfg Config) (*Report, error) { if err != nil { report.Outcomes = append(report.Outcomes, Outcome{ Tool: mt.Name, Kind: KindMarker, - Path: expandHome(mt.HeartbeatPath, home), + Path: expandHome(mt.HeartbeatPath, home), Action: ActionError, Err: fmt.Sprintf("fetch %s: %v", mt.HeartbeatTemplate, err), }) @@ -181,7 +181,7 @@ func Tick(ctx context.Context, cfg Config) (*Report, error) { if err != nil { report.Outcomes = append(report.Outcomes, Outcome{ Tool: mt.Name, Kind: KindMarker, - Path: expandHome(mt.HeartbeatPath, home), + Path: expandHome(mt.HeartbeatPath, home), Action: ActionError, Err: err.Error(), }) continue diff --git a/pkg/tasksubmit/tasksubmit.go b/pkg/tasksubmit/tasksubmit.go index 7c8b63f8..7ee92a9c 100644 --- a/pkg/tasksubmit/tasksubmit.go +++ b/pkg/tasksubmit/tasksubmit.go @@ -57,11 +57,11 @@ const ( // The frame reader already caps the whole frame at 16 MiB; these are // tighter semantic bounds checked before the content is persisted. const ( - MaxTaskDescription = 16 * 1024 // 16 KiB — any reasonable prompt/description - MaxTaskResultText = 1 * 1024 * 1024 // 1 MiB — inline text results - MaxTaskResultFilename = 256 // filesystem-safe length cap - MaxTaskResultFileBytes = 15 * 1024 * 1024 // ~15 MiB; frame cap is 16 MiB - MaxTaskJustification = 4 * 1024 // status/decline reasons + MaxTaskDescription = 16 * 1024 // 16 KiB — any reasonable prompt/description + MaxTaskResultText = 1 * 1024 * 1024 // 1 MiB — inline text results + MaxTaskResultFilename = 256 // filesystem-safe length cap + MaxTaskResultFileBytes = 15 * 1024 * 1024 // ~15 MiB; frame cap is 16 MiB + MaxTaskJustification = 4 * 1024 // status/decline reasons ) // ValidateSubmitRequest rejects submissions whose description exceeds the diff --git a/sdk/cgo/bindings.go b/sdk/cgo/bindings.go index 2e99d146..3981db02 100644 --- a/sdk/cgo/bindings.go +++ b/sdk/cgo/bindings.go @@ -12,6 +12,7 @@ import ( "encoding/json" "fmt" "sync" + "time" "unsafe" "github.com/TeoSlayer/pilotprotocol/pkg/driver" @@ -477,5 +478,311 @@ func PilotRecvFrom(h C.uint64_t) *C.char { }) } +// ---------- Health / rotate-key ---------- + +//export PilotHealth +func PilotHealth(h C.uint64_t) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.Health() + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +//export PilotRotateKey +func PilotRotateKey(h C.uint64_t) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.RotateKey() + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +// ---------- Broadcast (admin-token gated) ---------- + +//export PilotBroadcast +func PilotBroadcast(h C.uint64_t, netID C.uint16_t, port C.uint16_t, data unsafe.Pointer, dataLen C.int, adminToken *C.char) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + if err := d.Broadcast(uint16(netID), uint16(port), C.GoBytes(data, dataLen), C.GoString(adminToken)); err != nil { + return errJSON(err) + } + return okJSON(map[string]interface{}{"ok": true}) +} + +// ---------- Dial with timeout ---------- + +//export PilotDialTimeout +func PilotDialTimeout(h C.uint64_t, addr *C.char, timeoutMs C.uint64_t) (C.uint64_t, *C.char) { + d, err := driverFromHandle(h) + if err != nil { + return 0, errJSON(err) + } + sa, err := protocol.ParseSocketAddr(C.GoString(addr)) + if err != nil { + return 0, errJSON(err) + } + conn, err := d.DialAddrTimeout(sa.Addr, sa.Port, time.Duration(timeoutMs)*time.Millisecond) + if err != nil { + return 0, errJSON(err) + } + return C.uint64_t(storeHandle(conn)), nil +} + +// ---------- Conn read deadline ---------- + +// PilotConnSetReadDeadline sets the read deadline as Unix nanoseconds. +// Pass 0 to clear the deadline. +// +//export PilotConnSetReadDeadline +func PilotConnSetReadDeadline(ch C.uint64_t, deadlineUnixNanos C.int64_t) *C.char { + v, ok := loadHandle(uint64(ch)) + if !ok { + return errJSON(fmt.Errorf("invalid conn handle")) + } + c, ok := v.(*driver.Conn) + if !ok { + return errJSON(fmt.Errorf("handle is not a Conn")) + } + var t time.Time + if int64(deadlineUnixNanos) != 0 { + t = time.Unix(0, int64(deadlineUnixNanos)) + } + if err := c.SetReadDeadline(t); err != nil { + return errJSON(err) + } + return nil +} + +// ---------- Networks ---------- + +//export PilotNetworkList +func PilotNetworkList(h C.uint64_t) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.NetworkList() + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +//export PilotNetworkJoin +func PilotNetworkJoin(h C.uint64_t, networkID C.uint16_t, token *C.char) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.NetworkJoin(uint16(networkID), C.GoString(token)) + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +//export PilotNetworkLeave +func PilotNetworkLeave(h C.uint64_t, networkID C.uint16_t) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.NetworkLeave(uint16(networkID)) + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +//export PilotNetworkMembers +func PilotNetworkMembers(h C.uint64_t, networkID C.uint16_t) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.NetworkMembers(uint16(networkID)) + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +//export PilotNetworkInvite +func PilotNetworkInvite(h C.uint64_t, networkID C.uint16_t, targetNodeID C.uint32_t) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.NetworkInvite(uint16(networkID), uint32(targetNodeID)) + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +//export PilotNetworkPollInvites +func PilotNetworkPollInvites(h C.uint64_t) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.NetworkPollInvites() + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +//export PilotNetworkRespondInvite +func PilotNetworkRespondInvite(h C.uint64_t, networkID C.uint16_t, accept C.int) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.NetworkRespondInvite(uint16(networkID), accept != 0) + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +// ---------- Managed networks ---------- + +//export PilotManagedScore +func PilotManagedScore(h C.uint64_t, networkID C.uint16_t, nodeID C.uint32_t, delta C.int32_t, topic *C.char) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.ManagedScore(uint16(networkID), uint32(nodeID), int(int32(delta)), C.GoString(topic)) + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +//export PilotManagedStatus +func PilotManagedStatus(h C.uint64_t, networkID C.uint16_t) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.ManagedStatus(uint16(networkID)) + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +//export PilotManagedRankings +func PilotManagedRankings(h C.uint64_t, networkID C.uint16_t) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.ManagedRankings(uint16(networkID)) + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +//export PilotManagedForceCycle +func PilotManagedForceCycle(h C.uint64_t, networkID C.uint16_t) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.ManagedForceCycle(uint16(networkID)) + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +//export PilotManagedReconcile +func PilotManagedReconcile(h C.uint64_t, networkID C.uint16_t) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.ManagedReconcile(uint16(networkID)) + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +// ---------- Policy ---------- + +//export PilotPolicyGet +func PilotPolicyGet(h C.uint64_t, networkID C.uint16_t) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.PolicyGet(uint16(networkID)) + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +//export PilotPolicySet +func PilotPolicySet(h C.uint64_t, networkID C.uint16_t, policyJSON *C.char) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.PolicySet(uint16(networkID), []byte(C.GoString(policyJSON))) + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +// ---------- Member tags ---------- + +//export PilotMemberTagsGet +func PilotMemberTagsGet(h C.uint64_t, networkID C.uint16_t, nodeID C.uint32_t) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + r, err := d.MemberTagsGet(uint16(networkID), uint32(nodeID)) + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + +//export PilotMemberTagsSet +func PilotMemberTagsSet(h C.uint64_t, networkID C.uint16_t, nodeID C.uint32_t, tagsJSON *C.char) *C.char { + d, err := driverFromHandle(h) + if err != nil { + return errJSON(err) + } + var tags []string + if err := json.Unmarshal([]byte(C.GoString(tagsJSON)), &tags); err != nil { + return errJSON(fmt.Errorf("invalid tags JSON: %w", err)) + } + r, err := d.MemberTagsSet(uint16(networkID), uint32(nodeID), tags) + if err != nil { + return errJSON(err) + } + return okJSON(r) +} + // main is required for c-shared build mode. func main() {} diff --git a/sdk/node/package-lock.json b/sdk/node/package-lock.json index 847b00e4..90c6bb3c 100644 --- a/sdk/node/package-lock.json +++ b/sdk/node/package-lock.json @@ -1,16 +1,30 @@ { "name": "pilotprotocol", - "version": "0.1.0", + "version": "1.9.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "pilotprotocol", - "version": "0.1.0", - "license": "MIT", + "version": "1.9.1", + "cpu": [ + "x64", + "arm64" + ], + "license": "AGPL-3.0-or-later", + "os": [ + "darwin", + "linux" + ], "dependencies": { "koffi": "^2.9.0" }, + "bin": { + "pilot-daemon": "bin-stubs/pilot-daemon.js", + "pilot-gateway": "bin-stubs/pilot-gateway.js", + "pilot-updater": "bin-stubs/pilot-updater.js", + "pilotctl": "bin-stubs/pilotctl.js" + }, "devDependencies": { "@types/node": "^25.5.0", "typescript": "^5.7.0", @@ -850,7 +864,6 @@ "integrity": "sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "undici-types": "~7.18.0" } @@ -1237,7 +1250,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -1451,7 +1463,6 @@ "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.27.0", "fdir": "^6.5.0", diff --git a/sdk/node/package.json b/sdk/node/package.json index e01a30e9..7ef05be6 100644 --- a/sdk/node/package.json +++ b/sdk/node/package.json @@ -1,6 +1,6 @@ { "name": "pilotprotocol", - "version": "0.1.1", + "version": "1.9.1", "description": "Node.js SDK for Pilot Protocol — the network stack for AI agents", "type": "module", "main": "dist/index.js", diff --git a/sdk/node/scripts/build-binaries.sh b/sdk/node/scripts/build-binaries.sh index 8a8210fa..46eef5ea 100755 --- a/sdk/node/scripts/build-binaries.sh +++ b/sdk/node/scripts/build-binaries.sh @@ -6,6 +6,9 @@ set -euo pipefail cd "$(dirname "$0")/../../.." # Go to repo root +# Read SDK version (from package.json) so the seeder marker matches it. +SDK_VERSION=$(node -e "console.log(JSON.parse(require('fs').readFileSync('sdk/node/package.json','utf8')).version)") + # Detect platform OS=$(uname -s | tr '[:upper:]' '[:lower:]') ARCH=$(uname -m) @@ -63,6 +66,27 @@ cd ../.. echo " ✓ Built: $OUTPUT_DIR/libpilot.$EXT" echo "" +# 6. Write .pilot-version marker so the runtime seeder can compare against +# whatever's already installed at ~/.pilot/bin/. +echo "$SDK_VERSION" > "$OUTPUT_DIR/.pilot-version" +echo "6. Wrote $OUTPUT_DIR/.pilot-version → $SDK_VERSION" +echo "" + +# 7. macOS ad-hoc codesign + strip quarantine. Mirrors the main release +# workflow so SDK-shipped binaries don't trigger Gatekeeper "killed: 9" +# or "cannot be opened because Apple cannot check it for malicious +# software" when downloaded via npm. +if [ "$OS" = "darwin" ]; then + echo "7. macOS ad-hoc codesign + strip quarantine..." + for bin in "$OUTPUT_DIR/pilot-daemon" "$OUTPUT_DIR/pilotctl" "$OUTPUT_DIR/pilot-gateway" "$OUTPUT_DIR/pilot-updater" "$OUTPUT_DIR/libpilot.$EXT"; do + codesign --force --deep --sign - "$bin" + xattr -cr "$bin" || true + codesign -dv "$bin" 2>&1 | grep -E "Signature|Authority|TeamIdentifier" | head -1 || true + done + echo " ✓ codesigned ${OS} binaries" + echo "" +fi + # Show sizes echo "================================================================" echo "Build Summary:" diff --git a/sdk/node/src/cli.ts b/sdk/node/src/cli.ts index 40cb94af..77606077 100644 --- a/sdk/node/src/cli.ts +++ b/sdk/node/src/cli.ts @@ -1,97 +1,30 @@ /** - * CLI wrappers for bundled Pilot Protocol binaries. + * CLI entry points for the Pilot Protocol Node SDK. * - * These functions are used as npm "bin" entry points. Each wrapper: - * 1. Ensures ~/.pilot/ directory and default config.json exist - * 2. Locates the bundled Go binary - * 3. Executes it with all CLI arguments passed through + * Each wrapper: + * 1. Seeds `~/.pilot/bin/` from the package's bundled binaries (the + * `runtime` module is idempotent and concurrency-safe). + * 2. Execs the seeded binary with all CLI arguments passed through. * - * This mirrors the Python SDK's cli.py approach. + * This keeps a single canonical runtime location at `~/.pilot/bin/`, + * shared with `install.sh` and any other Pilot SDK install on the host. */ -import { execFileSync } from 'node:child_process'; -import { existsSync, mkdirSync, writeFileSync } from 'node:fs'; -import { homedir } from 'node:os'; -import { join, resolve } from 'node:path'; -import { fileURLToPath } from 'node:url'; +import { spawnSync } from 'node:child_process'; +import { ensureRuntimeSeeded, runtimeBinaryPath } from './runtime.js'; -/** - * Ensure ~/.pilot/ directory and config.json exist. - * Called before every binary execution to initialize the runtime environment. - */ -function ensurePilotEnv(): void { - const home = homedir(); - const pilotDir = join(home, '.pilot'); - const configFile = join(pilotDir, 'config.json'); - - // Create ~/.pilot/ if it doesn't exist - if (!existsSync(pilotDir)) { - mkdirSync(pilotDir, { recursive: true }); - } - - // Create default config.json if it doesn't exist - if (!existsSync(configFile)) { - const defaultConfig = { - registry: '34.71.57.205:9000', - beacon: '34.71.57.205:9001', - socket: '/tmp/pilot.sock', - encrypt: true, - identity: join(pilotDir, 'identity.json'), - }; - writeFileSync(configFile, JSON.stringify(defaultConfig, null, 2)); - } -} - -/** - * Get absolute path to a bundled binary. - * Searches in the package's bin/ directory (relative to this file's location). - */ -function getBinaryPath(binaryName: string): string { - const thisDir = resolve(fileURLToPath(import.meta.url), '..'); - - // When compiled: dist/cli.js → look for ../bin/ - const pkgBin = resolve(thisDir, '..', 'bin', binaryName); - if (existsSync(pkgBin)) return pkgBin; - - // Development: src/cli.ts → look for ../../bin/ (through sdk/node/) - const devBin = resolve(thisDir, '..', '..', 'bin', binaryName); - if (existsSync(devBin)) return devBin; - - throw new Error( - `Binary '${binaryName}' not found.\n` + - '\n' + - 'Expected locations:\n' + - ` - ${pkgBin} (npm package)\n` + - ` - ${devBin} (development)\n` + - '\n' + - 'Build binaries with:\n' + - ' cd sdk/node && ./scripts/build-binaries.sh\n', - ); -} - -/** - * Execute a bundled binary with all CLI arguments passed through. - * Exits with the same code as the binary. - */ -function runBinary(binaryName: string): void { - ensurePilotEnv(); - const binaryPath = getBinaryPath(binaryName); +function runBinary(name: string): void { + ensureRuntimeSeeded(); + const binary = runtimeBinaryPath(name); const args = process.argv.slice(2); - - try { - execFileSync(binaryPath, args, { - stdio: 'inherit', - env: process.env, - }); - } catch (err: unknown) { - // execFileSync throws on non-zero exit codes - const exitCode = (err as { status?: number }).status ?? 1; - process.exit(exitCode); + const r = spawnSync(binary, args, { stdio: 'inherit', env: process.env }); + if (r.error) { + process.stderr.write(`pilot: failed to launch ${name}: ${String(r.error)}\n`); + process.exit(1); } + process.exit(r.status ?? 1); } -// --- Entry points (one per binary) --- - export function runPilotctl(): void { runBinary('pilotctl'); } diff --git a/sdk/node/src/client.ts b/sdk/node/src/client.ts index 9898879e..b4dc420e 100644 --- a/sdk/node/src/client.ts +++ b/sdk/node/src/client.ts @@ -109,6 +109,28 @@ export class Conn { checkErr(ptr); } + /** + * Set the read deadline. Pass a Date (absolute time), a number of + * milliseconds from now, or null to clear. + * + * After the deadline passes, in-flight and subsequent `read()` calls + * fail with a "deadline exceeded" PilotError. + */ + setReadDeadline(deadline: Date | number | null): void { + if (this._closed) throw new PilotError('connection closed'); + const lib = getLib(); + let nanos: bigint; + if (deadline === null) { + nanos = 0n; + } else if (deadline instanceof Date) { + nanos = BigInt(deadline.getTime()) * 1_000_000n; + } else { + nanos = BigInt(Date.now() + deadline) * 1_000_000n; + } + const ptr = lib.PilotConnSetReadDeadline(this._h, nanos); + checkErr(ptr); + } + /** Support TC39 explicit resource management. */ [Symbol.dispose](): void { this.close(); @@ -195,6 +217,16 @@ export class Driver { return this._callJSON('PilotInfo'); } + /** Lightweight health check from the daemon. */ + health(): Record { + return this._callJSON('PilotHealth'); + } + + /** Rotate the daemon's Ed25519 identity at the registry. */ + rotateKey(): Record { + return this._callJSON('PilotRotateKey'); + } + // -- Handshake / Trust -- /** Send a trust handshake request to a remote node. */ @@ -277,10 +309,17 @@ export class Driver { // -- Streams -- - /** Open a stream connection to addr (format: "N:XXXX.YYYY.YYYY:PORT"). */ - dial(addr: string): Conn { + /** + * Open a stream connection to addr (format: "N:XXXX.YYYY.YYYY:PORT"). + * If `timeoutMs` is provided, the dial is cancelled if the daemon does + * not respond within that many milliseconds. + */ + dial(addr: string, timeoutMs?: number): Conn { const lib = getLib(); - const res = lib.PilotDial(this._h, addr); + const res = + typeof timeoutMs === 'number' + ? lib.PilotDialTimeout(this._h, addr, BigInt(Math.max(0, Math.floor(timeoutMs)))) + : lib.PilotDial(this._h, addr); const handle = unwrapHandleErr(res); return new Conn(handle); } @@ -305,11 +344,116 @@ export class Driver { checkErr(ptr); } + /** + * Broadcast an unreliable datagram to every member of a network. + * Requires the daemon's admin token; see Driver.Broadcast in pkg/driver. + */ + broadcast(networkId: number, port: number, data: Buffer | Uint8Array | string, adminToken: string): void { + const lib = getLib(); + const src = typeof data === 'string' ? Buffer.from(data) : data; + const buf = Buffer.allocUnsafe(src.length); + Buffer.from(src).copy(buf); + const ptr = lib.PilotBroadcast(this._h, networkId, port, buf, buf.length, adminToken); + checkErr(ptr); + } + /** Receive the next incoming datagram (blocks). */ recvFrom(): Record { return this._callJSON('PilotRecvFrom'); } + // -- Networks -- + + /** List all networks known to the registry. */ + networkList(): Record { + return this._callJSON('PilotNetworkList'); + } + + /** Join a network by ID, optionally with a token for token-gated networks. */ + networkJoin(networkId: number, token = ''): Record { + return this._callJSON('PilotNetworkJoin', networkId, token); + } + + /** Leave a network by ID. */ + networkLeave(networkId: number): Record { + return this._callJSON('PilotNetworkLeave', networkId); + } + + /** List all members of a network. */ + networkMembers(networkId: number): Record { + return this._callJSON('PilotNetworkMembers', networkId); + } + + /** Invite a target node to a network (requires admin token on daemon). */ + networkInvite(networkId: number, targetNodeId: number): Record { + return this._callJSON('PilotNetworkInvite', networkId, targetNodeId); + } + + /** Return pending network invites for this node. */ + networkPollInvites(): Record { + return this._callJSON('PilotNetworkPollInvites'); + } + + /** Accept or reject a pending network invite. */ + networkRespondInvite(networkId: number, accept: boolean): Record { + return this._callJSON('PilotNetworkRespondInvite', networkId, accept ? 1 : 0); + } + + // -- Managed networks -- + + /** Adjust a peer's score in a managed network. */ + managedScore(networkId: number, nodeId: number, delta: number, topic = ''): Record { + return this._callJSON('PilotManagedScore', networkId, nodeId, delta, topic); + } + + /** Return the status of a managed network engine. */ + managedStatus(networkId: number): Record { + return this._callJSON('PilotManagedStatus', networkId); + } + + /** Return ranked peers in a managed network. */ + managedRankings(networkId: number): Record { + return this._callJSON('PilotManagedRankings', networkId); + } + + /** Force a prune/fill cycle in a managed network. */ + managedForceCycle(networkId: number): Record { + return this._callJSON('PilotManagedForceCycle', networkId); + } + + /** Refresh the managed network's peer set from the registry without a policy cycle. */ + managedReconcile(networkId: number): Record { + return this._callJSON('PilotManagedReconcile', networkId); + } + + // -- Policy -- + + /** Retrieve the active policy for a network. */ + policyGet(networkId: number): Record { + return this._callJSON('PilotPolicyGet', networkId); + } + + /** Apply a policy document to a network. */ + policySet(networkId: number, policy: Record | string | Buffer): Record { + let json: string; + if (typeof policy === 'string') json = policy; + else if (Buffer.isBuffer(policy)) json = policy.toString('utf-8'); + else json = JSON.stringify(policy); + return this._callJSON('PilotPolicySet', networkId, json); + } + + // -- Member tags -- + + /** Retrieve admin-assigned member tags for a node in a network. */ + memberTagsGet(networkId: number, nodeId: number): Record { + return this._callJSON('PilotMemberTagsGet', networkId, nodeId); + } + + /** Set admin-assigned member tags for a node in a network. */ + memberTagsSet(networkId: number, nodeId: number, tags: string[]): Record { + return this._callJSON('PilotMemberTagsSet', networkId, nodeId, JSON.stringify(tags)); + } + // -- High-level service methods -- /** Resolve a target to a protocol address. Passes through if already an address. */ diff --git a/sdk/node/src/ffi.ts b/sdk/node/src/ffi.ts index a278b40e..f0afccdc 100644 --- a/sdk/node/src/ffi.ts +++ b/sdk/node/src/ffi.ts @@ -21,6 +21,7 @@ import { existsSync } from 'node:fs'; import { homedir, platform } from 'node:os'; import { join, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; +import { runtimeLibraryPath } from './runtime.js'; // --------------------------------------------------------------------------- // Error class (defined here to avoid circular deps with client.ts) @@ -49,27 +50,35 @@ export function findLibrary(): string { throw new Error(`unsupported platform: ${platform()}`); } - // 1. PILOT_LIB_PATH env var + // 1. PILOT_LIB_PATH env var (explicit override — bypasses the seeder). const envPath = process.env['PILOT_LIB_PATH']; if (envPath) { if (existsSync(envPath)) return envPath; throw new Error(`PILOT_LIB_PATH=${envPath} does not exist`); } - // 2. ~/.pilot/bin/ + // 2. The seeded library at ~/.pilot/bin/ (canonical runtime). + try { + return runtimeLibraryPath(); + } catch { + // Seeder failed (read-only home, missing wheel binary) — fall through + // to the legacy locations so the SDK still loads in dev / weird envs. + } + + // 3. ~/.pilot/bin/ (already-installed copy, no seeding). const pilotBin = join(homedir(), '.pilot', 'bin', libName); if (existsSync(pilotBin)) return pilotBin; - // 3. /bin/ (npm package layout: dist/ffi.js → ../bin/) + // 4. /bin/ (npm package layout: dist/ffi.js → ../bin/). const thisDir = resolve(fileURLToPath(import.meta.url), '..'); const pkgBin = resolve(thisDir, '..', 'bin', libName); if (existsSync(pkgBin)) return pkgBin; - // 4. Same directory as this file + // 5. Same directory as this file. const colocated = join(thisDir, libName); if (existsSync(colocated)) return colocated; - // 5. /bin/ (development layout — 3 levels up from dist/) + // 6. /bin/ (development layout — 3 levels up from dist/). const repoBin = resolve(thisDir, '..', '..', '..', 'bin', libName); if (existsSync(repoBin)) return repoBin; @@ -101,6 +110,8 @@ export interface PilotLib { // JSON-RPC (return JSON string or null) PilotInfo(h: bigint): string | null; + PilotHealth(h: bigint): string | null; + PilotRotateKey(h: bigint): string | null; PilotHandshake(h: bigint, nodeId: number, justification: string): string | null; PilotApproveHandshake(h: bigint, nodeId: number): string | null; PilotRejectHandshake(h: bigint, nodeId: number, reason: string): string | null; @@ -117,8 +128,33 @@ export interface PilotLib { PilotDisconnect(h: bigint, connId: number): string | null; PilotRecvFrom(h: bigint): string | null; + // Networks + PilotNetworkList(h: bigint): string | null; + PilotNetworkJoin(h: bigint, networkId: number, token: string): string | null; + PilotNetworkLeave(h: bigint, networkId: number): string | null; + PilotNetworkMembers(h: bigint, networkId: number): string | null; + PilotNetworkInvite(h: bigint, networkId: number, targetNodeId: number): string | null; + PilotNetworkPollInvites(h: bigint): string | null; + PilotNetworkRespondInvite(h: bigint, networkId: number, accept: number): string | null; + + // Managed networks + PilotManagedScore(h: bigint, networkId: number, nodeId: number, delta: number, topic: string): string | null; + PilotManagedStatus(h: bigint, networkId: number): string | null; + PilotManagedRankings(h: bigint, networkId: number): string | null; + PilotManagedForceCycle(h: bigint, networkId: number): string | null; + PilotManagedReconcile(h: bigint, networkId: number): string | null; + + // Policy + PilotPolicyGet(h: bigint, networkId: number): string | null; + PilotPolicySet(h: bigint, networkId: number, policyJson: string): string | null; + + // Member tags + PilotMemberTagsGet(h: bigint, networkId: number, nodeId: number): string | null; + PilotMemberTagsSet(h: bigint, networkId: number, nodeId: number, tagsJson: string): string | null; + // Stream connections PilotDial(h: bigint, addr: string): { handle: bigint; err: string | null }; + PilotDialTimeout(h: bigint, addr: string, timeoutMs: bigint): { handle: bigint; err: string | null }; PilotListen(h: bigint, port: number): { handle: bigint; err: string | null }; PilotListenerAccept(h: bigint): { handle: bigint; err: string | null }; PilotListenerClose(h: bigint): string | null; @@ -127,9 +163,11 @@ export interface PilotLib { PilotConnRead(h: bigint, bufSize: number): { n: number; data: Buffer | null; err: string | null }; PilotConnWrite(h: bigint, data: Buffer, dataLen: number): { n: number; err: string | null }; PilotConnClose(h: bigint): string | null; + PilotConnSetReadDeadline(h: bigint, deadlineUnixNanos: bigint): string | null; // Datagrams PilotSendTo(h: bigint, addr: string, data: Buffer, dataLen: number): string | null; + PilotBroadcast(h: bigint, networkId: number, port: number, data: Buffer, dataLen: number, adminToken: string): string | null; } // --------------------------------------------------------------------------- @@ -167,6 +205,8 @@ export function loadLibrary(path?: string): PilotLib { const rawConnect = lib.func('PilotConnect', HandleErrStruct, ['str']); const rawClose = lib.func('PilotClose', 'void *', ['uint64']); const rawInfo = lib.func('PilotInfo', 'void *', ['uint64']); + const rawHealth = lib.func('PilotHealth', 'void *', ['uint64']); + const rawRotateKey = lib.func('PilotRotateKey', 'void *', ['uint64']); const rawHandshake = lib.func('PilotHandshake', 'void *', ['uint64', 'uint32', 'str']); const rawApproveHandshake = lib.func('PilotApproveHandshake', 'void *', ['uint64', 'uint32']); const rawRejectHandshake = lib.func('PilotRejectHandshake', 'void *', ['uint64', 'uint32', 'str']); @@ -182,14 +222,33 @@ export function loadLibrary(path?: string): PilotLib { const rawSetWebhook = lib.func('PilotSetWebhook', 'void *', ['uint64', 'str']); const rawDisconnect = lib.func('PilotDisconnect', 'void *', ['uint64', 'uint32']); const rawRecvFrom = lib.func('PilotRecvFrom', 'void *', ['uint64']); + const rawNetworkList = lib.func('PilotNetworkList', 'void *', ['uint64']); + const rawNetworkJoin = lib.func('PilotNetworkJoin', 'void *', ['uint64', 'uint16', 'str']); + const rawNetworkLeave = lib.func('PilotNetworkLeave', 'void *', ['uint64', 'uint16']); + const rawNetworkMembers = lib.func('PilotNetworkMembers', 'void *', ['uint64', 'uint16']); + const rawNetworkInvite = lib.func('PilotNetworkInvite', 'void *', ['uint64', 'uint16', 'uint32']); + const rawNetworkPollInvites = lib.func('PilotNetworkPollInvites', 'void *', ['uint64']); + const rawNetworkRespondInvite = lib.func('PilotNetworkRespondInvite', 'void *', ['uint64', 'uint16', 'int']); + const rawManagedScore = lib.func('PilotManagedScore', 'void *', ['uint64', 'uint16', 'uint32', 'int32', 'str']); + const rawManagedStatus = lib.func('PilotManagedStatus', 'void *', ['uint64', 'uint16']); + const rawManagedRankings = lib.func('PilotManagedRankings', 'void *', ['uint64', 'uint16']); + const rawManagedForceCycle = lib.func('PilotManagedForceCycle', 'void *', ['uint64', 'uint16']); + const rawManagedReconcile = lib.func('PilotManagedReconcile', 'void *', ['uint64', 'uint16']); + const rawPolicyGet = lib.func('PilotPolicyGet', 'void *', ['uint64', 'uint16']); + const rawPolicySet = lib.func('PilotPolicySet', 'void *', ['uint64', 'uint16', 'str']); + const rawMemberTagsGet = lib.func('PilotMemberTagsGet', 'void *', ['uint64', 'uint16', 'uint32']); + const rawMemberTagsSet = lib.func('PilotMemberTagsSet', 'void *', ['uint64', 'uint16', 'uint32', 'str']); const rawDial = lib.func('PilotDial', HandleErrStruct, ['uint64', 'str']); + const rawDialTimeout = lib.func('PilotDialTimeout', HandleErrStruct, ['uint64', 'str', 'uint64']); const rawListen = lib.func('PilotListen', HandleErrStruct, ['uint64', 'uint16']); const rawListenerAccept = lib.func('PilotListenerAccept', HandleErrStruct, ['uint64']); const rawListenerClose = lib.func('PilotListenerClose', 'void *', ['uint64']); const rawConnRead = lib.func('PilotConnRead', ReadResultStruct, ['uint64', 'int']); const rawConnWrite = lib.func('PilotConnWrite', WriteResultStruct, ['uint64', 'void *', 'int']); const rawConnClose = lib.func('PilotConnClose', 'void *', ['uint64']); + const rawConnSetReadDeadline = lib.func('PilotConnSetReadDeadline', 'void *', ['uint64', 'int64']); const rawSendTo = lib.func('PilotSendTo', 'void *', ['uint64', 'str', 'void *', 'int']); + const rawBroadcast = lib.func('PilotBroadcast', 'void *', ['uint64', 'uint16', 'uint16', 'void *', 'int', 'str']); /** Decode a void* C string, free the pointer, return JS string. */ function decodeAndFree(ptr: unknown): string | null { @@ -213,6 +272,8 @@ export function loadLibrary(path?: string): PilotLib { PilotConnect: (socketPath) => unwrapHandle(rawConnect(socketPath)), PilotClose: (h) => decodeAndFree(rawClose(h)), PilotInfo: wrapJSON(rawInfo), + PilotHealth: wrapJSON(rawHealth), + PilotRotateKey: wrapJSON(rawRotateKey), PilotHandshake: wrapJSON(rawHandshake), PilotApproveHandshake: wrapJSON(rawApproveHandshake), PilotRejectHandshake: wrapJSON(rawRejectHandshake), @@ -228,7 +289,24 @@ export function loadLibrary(path?: string): PilotLib { PilotSetWebhook: wrapJSON(rawSetWebhook), PilotDisconnect: wrapJSON(rawDisconnect), PilotRecvFrom: wrapJSON(rawRecvFrom), + PilotNetworkList: wrapJSON(rawNetworkList), + PilotNetworkJoin: wrapJSON(rawNetworkJoin), + PilotNetworkLeave: wrapJSON(rawNetworkLeave), + PilotNetworkMembers: wrapJSON(rawNetworkMembers), + PilotNetworkInvite: wrapJSON(rawNetworkInvite), + PilotNetworkPollInvites: wrapJSON(rawNetworkPollInvites), + PilotNetworkRespondInvite: wrapJSON(rawNetworkRespondInvite), + PilotManagedScore: wrapJSON(rawManagedScore), + PilotManagedStatus: wrapJSON(rawManagedStatus), + PilotManagedRankings: wrapJSON(rawManagedRankings), + PilotManagedForceCycle: wrapJSON(rawManagedForceCycle), + PilotManagedReconcile: wrapJSON(rawManagedReconcile), + PilotPolicyGet: wrapJSON(rawPolicyGet), + PilotPolicySet: wrapJSON(rawPolicySet), + PilotMemberTagsGet: wrapJSON(rawMemberTagsGet), + PilotMemberTagsSet: wrapJSON(rawMemberTagsSet), PilotDial: (h, addr) => unwrapHandle(rawDial(h, addr)), + PilotDialTimeout: (h, addr, timeoutMs) => unwrapHandle(rawDialTimeout(h, addr, timeoutMs)), PilotListen: (h, port) => unwrapHandle(rawListen(h, port)), PilotListenerAccept: (h) => unwrapHandle(rawListenerAccept(h)), PilotListenerClose: (h) => decodeAndFree(rawListenerClose(h)), @@ -250,10 +328,15 @@ export function loadLibrary(path?: string): PilotLib { return { n: res.n as number, err: decodeAndFree(res.err) }; }, PilotConnClose: (h) => decodeAndFree(rawConnClose(h)), + PilotConnSetReadDeadline: (h, deadlineUnixNanos) => + decodeAndFree(rawConnSetReadDeadline(h, deadlineUnixNanos)), PilotSendTo(h, addr, buf, dataLen) { // Pass Buffer directly — koffi handles byteOffset correctly for void* return decodeAndFree(rawSendTo(h, addr, buf, dataLen)); }, + PilotBroadcast(h, networkId, port, buf, dataLen, adminToken) { + return decodeAndFree(rawBroadcast(h, networkId, port, buf, dataLen, adminToken)); + }, }; } diff --git a/sdk/node/src/runtime.ts b/sdk/node/src/runtime.ts new file mode 100644 index 00000000..aeacf67a --- /dev/null +++ b/sdk/node/src/runtime.ts @@ -0,0 +1,485 @@ +/** + * Runtime environment seeder for the Pilot Protocol Node SDK. + * + * Both the CLI shims (`cli.ts`) and the FFI loader (`ffi.ts:findLibrary`) + * funnel through `ensureRuntimeSeeded`, which idempotently mirrors the + * binaries shipped inside the npm package into `~/.pilot/bin/` (the + * canonical runtime directory shared with `install.sh`). + * + * Goals: + * - The package is the seed cache; `~/.pilot/bin/` is the runtime. + * - No install-time code runs; seeding happens lazily on first SDK use. + * - Concurrency-safe via O_EXCL lock + retry; crash-safe via atomic rename. + * - Never downgrades; never replaces a running daemon binary. + * - Coexists with `install.sh` — same layout, same `.pilot-version`. + */ + +import { Socket } from 'node:net'; +import { + closeSync, + copyFileSync, + existsSync, + mkdirSync, + openSync, + readFileSync, + renameSync, + statSync, + unlinkSync, + writeFileSync, + chmodSync, + accessSync, + constants as fsConstants, +} from 'node:fs'; +import { homedir, platform as osPlatform } from 'node:os'; +import { dirname, join, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const BIN_NAMES = ['pilotctl', 'pilot-daemon', 'pilot-gateway', 'pilot-updater'] as const; + +const LIB_NAMES: Record = { + darwin: 'libpilot.dylib', + linux: 'libpilot.so', + win32: 'libpilot.dll', +}; + +export const DEFAULT_REGISTRY = '34.71.57.205:9000'; +export const DEFAULT_BEACON = '34.71.57.205:9001'; +export const DEFAULT_SOCKET = '/tmp/pilot.sock'; + +// --------------------------------------------------------------------------- +// Path helpers +// --------------------------------------------------------------------------- + +/** + * Where the npm package ships its bundled binaries (the seed cache). + * + * dist/runtime.js → ../bin/ (npm package layout) + * src/runtime.ts → ../../bin/ (development layout, run via tsx) + */ +function pkgBinDir(): string { + // Test override: a one-shot way to point at a fake bundled bin/ without + // resorting to vi.spyOn on a live binding. Honored only when set. + const override = process.env['PILOT_PKG_BIN_DIR']; + if (override) return override; + + const thisDir = resolve(fileURLToPath(import.meta.url), '..'); + + // Compiled (dist/runtime.js) + const compiledBin = resolve(thisDir, '..', 'bin'); + if (existsSync(compiledBin)) return compiledBin; + + // Source (src/runtime.ts) — sdk/node/bin + const sourceBin = resolve(thisDir, '..', '..', 'bin'); + return sourceBin; +} + +function runtimeRoot(): string { + const override = process.env['PILOT_HOME']; + if (override) return override; + return join(homedir(), '.pilot'); +} + +function runtimeBin(): string { + return join(runtimeRoot(), 'bin'); +} + +function platformLibName(): string { + const name = LIB_NAMES[osPlatform()]; + if (!name) throw new Error(`unsupported platform: ${osPlatform()}`); + return name; +} + +// --------------------------------------------------------------------------- +// Version helpers +// --------------------------------------------------------------------------- + +function semverTuple(v: string | undefined | null): number[] | null { + if (!v) return null; + const cleaned = v.trim().replace(/^v/, '').split('-')[0]?.split('+')[0]; + if (!cleaned) return null; + const parts = cleaned.split('.').map((p) => Number(p)); + if (parts.some((n) => !Number.isFinite(n))) return null; + return parts; +} + +function compareSemver(a: number[] | null, b: number[] | null): number { + if (!a && !b) return 0; + if (!a) return -1; + if (!b) return 1; + const len = Math.max(a.length, b.length); + for (let i = 0; i < len; i++) { + const ai = a[i] ?? 0; + const bi = b[i] ?? 0; + if (ai > bi) return 1; + if (ai < bi) return -1; + } + return 0; +} + +function bundledVersion(): string { + const f = join(pkgBinDir(), '.pilot-version'); + if (existsSync(f)) { + try { + return readFileSync(f, 'utf8').trim(); + } catch { + // fall through + } + } + // Fallback: read package.json beside dist/ + const thisDir = resolve(fileURLToPath(import.meta.url), '..'); + const candidates = [ + resolve(thisDir, '..', 'package.json'), + resolve(thisDir, '..', '..', 'package.json'), + ]; + for (const c of candidates) { + if (existsSync(c)) { + try { + return JSON.parse(readFileSync(c, 'utf8')).version ?? ''; + } catch { + // ignore + } + } + } + return ''; +} + +function runtimeVersion(rt: string): string { + const f = join(rt, '.pilot-version'); + if (!existsSync(f)) return ''; + try { + return readFileSync(f, 'utf8').trim(); + } catch { + return ''; + } +} + +// --------------------------------------------------------------------------- +// Daemon liveness +// --------------------------------------------------------------------------- + +async function probeDaemonLive(timeoutMs = 200): Promise { + let sockPath = DEFAULT_SOCKET; + const cfgPath = join(runtimeRoot(), 'config.json'); + if (existsSync(cfgPath)) { + try { + const cfg = JSON.parse(readFileSync(cfgPath, 'utf8')); + if (typeof cfg.socket === 'string' && cfg.socket) sockPath = cfg.socket; + } catch { + // ignore + } + } + if (!existsSync(sockPath)) return false; + + return new Promise((resolveProbe) => { + const s = new Socket(); + const finish = (ok: boolean) => { + try { + s.destroy(); + } catch { + // ignore + } + resolveProbe(ok); + }; + s.setTimeout(timeoutMs); + s.once('connect', () => finish(true)); + s.once('timeout', () => finish(false)); + s.once('error', () => finish(false)); + try { + s.connect(sockPath); + } catch { + finish(false); + } + }); +} + +/** Synchronous probe used by the seeder. Loops on a short setImmediate. */ +function probeDaemonLiveSync(): boolean { + const sockPath = readSocketPath(); + if (!existsSync(sockPath)) return false; + // Best-effort sync: try connecting via a child process. Falls back to + // "assume not running" if we can't decide quickly. + try { + const { spawnSync } = require('node:child_process') as typeof import('node:child_process'); + // `nc -z -U ` is the cleanest sync probe; fall back to true if nc is missing. + const r = spawnSync('nc', ['-z', '-U', sockPath], { timeout: 250 }); + if (r.error) return existsSync(sockPath); // nc missing — be conservative + return r.status === 0; + } catch { + // Conservative: if a socket file is present, assume the daemon is up. + return existsSync(sockPath); + } +} + +function readSocketPath(): string { + const cfgPath = join(runtimeRoot(), 'config.json'); + if (existsSync(cfgPath)) { + try { + const cfg = JSON.parse(readFileSync(cfgPath, 'utf8')); + if (typeof cfg.socket === 'string' && cfg.socket) return cfg.socket; + } catch { + // ignore + } + } + return DEFAULT_SOCKET; +} + +// --------------------------------------------------------------------------- +// File ops +// --------------------------------------------------------------------------- + +function ensureDirWritable(p: string): void { + if (!existsSync(p)) mkdirSync(p, { recursive: true }); + try { + accessSync(p, fsConstants.W_OK); + } catch { + throw new Error( + `${p} is not writable. Repair with: chown -R $USER ${p}`, + ); + } +} + +function atomicInstall(src: string, dst: string): void { + const tmp = `${dst}.tmp.${process.pid}`; + if (existsSync(tmp)) unlinkSync(tmp); + copyFileSync(src, tmp); + try { + chmodSync(tmp, 0o755); + renameSync(tmp, dst); + } catch (err) { + try { + unlinkSync(tmp); + } catch { + // ignore + } + throw err; + } +} + +function ensureDefaultConfig(): string { + const root = runtimeRoot(); + ensureDirWritable(root); + const cfgPath = join(root, 'config.json'); + if (existsSync(cfgPath)) return cfgPath; + const cfg = { + registry: DEFAULT_REGISTRY, + beacon: DEFAULT_BEACON, + socket: DEFAULT_SOCKET, + encrypt: true, + identity: join(root, 'identity.json'), + }; + const tmp = `${cfgPath}.tmp.${process.pid}`; + writeFileSync(tmp, JSON.stringify(cfg, null, 2) + '\n'); + renameSync(tmp, cfgPath); + return cfgPath; +} + +// --------------------------------------------------------------------------- +// Lock +// --------------------------------------------------------------------------- + +/** O_EXCL-based lockfile with bounded retry. Returns the fd to close. */ +function acquireLock(rt: string, timeoutMs = 5000): number { + const lockPath = join(rt, '.seed.lock'); + const start = Date.now(); + while (true) { + try { + return openSync(lockPath, fsConstants.O_RDWR | fsConstants.O_CREAT | fsConstants.O_EXCL, 0o644); + } catch (err: unknown) { + const e = err as NodeJS.ErrnoException; + if (e.code !== 'EEXIST') throw err; + // Stale lock detection: > 30s old → reclaim. + try { + const age = Date.now() - statSync(lockPath).mtimeMs; + if (age > 30_000) { + try { + unlinkSync(lockPath); + } catch { + // ignore; race + } + continue; + } + } catch { + // ignore + } + if (Date.now() - start > timeoutMs) { + // Last resort: proceed without exclusive lock. Steady state seeders + // will be no-ops anyway, so worst case is two redundant copies. + return -1; + } + // Busy-wait briefly; this is a *cold* path (first run only). + const until = Date.now() + 50; + while (Date.now() < until) { + // spin + } + } + } +} + +function releaseLock(rt: string, fd: number): void { + if (fd < 0) return; + try { + closeSync(fd); + } catch { + // ignore + } + try { + unlinkSync(join(rt, '.seed.lock')); + } catch { + // ignore + } +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +export interface SeedReport { + action: 'noop' | 'seed' | 'upgrade' | 'daemon-skip'; + copied: string[]; + skipped: string[]; + bundledVersion: string; + installedVersion: string; + runtimeDir: string; +} + +let _seededOnce = false; + +export function ensureRuntimeSeeded(force = false): string { + if (_seededOnce && !force) return runtimeBin(); + const report = runSeeder(); + _seededOnce = true; + return report.runtimeDir; +} + +export function runSeeder(): SeedReport { + const rtRoot = runtimeRoot(); + const rt = runtimeBin(); + const pkg = pkgBinDir(); + + ensureDirWritable(rtRoot); + ensureDirWritable(rt); + ensureDefaultConfig(); + + const lockFd = acquireLock(rt); + try { + const bundledStr = bundledVersion(); + const installedStr = runtimeVersion(rt); + const report: SeedReport = { + action: 'noop', + copied: [], + skipped: [], + bundledVersion: bundledStr, + installedVersion: installedStr, + runtimeDir: rt, + }; + + const bundled = semverTuple(bundledStr); + const installed = semverTuple(installedStr); + + const force = process.env['PILOT_FORCE_SEED'] === '1'; + // Same-or-newer already installed → just verify completeness. + if (!force && installed && bundled && compareSemver(bundled, installed) <= 0) { + let needSeed = false; + const required = [...BIN_NAMES, platformLibName()]; + for (const name of required) { + if (!existsSync(join(rt, name))) { + needSeed = true; + break; + } + } + if (!needSeed) { + report.action = 'noop'; + return report; + } + } + + report.action = installedStr ? 'upgrade' : 'seed'; + const daemonBusy = probeDaemonLiveSync(); + + const required = [...BIN_NAMES, platformLibName()]; + for (const name of required) { + const src = join(pkg, name); + if (!existsSync(src)) { + // Wrong-platform package or partial bundle. + continue; + } + const dst = join(rt, name); + if (name === 'pilot-daemon' && daemonBusy && existsSync(dst)) { + report.skipped.push(name); + report.action = 'daemon-skip'; + continue; + } + try { + atomicInstall(src, dst); + report.copied.push(name); + } catch (err: unknown) { + const e = err as NodeJS.ErrnoException; + if (e.code === 'ETXTBSY' || e.code === 'EBUSY') { + report.skipped.push(name); + continue; + } + throw err; + } + } + + if (bundledStr) { + const verPath = join(rt, '.pilot-version'); + const tmp = `${verPath}.tmp.${process.pid}`; + writeFileSync(tmp, bundledStr + '\n'); + renameSync(tmp, verPath); + } + + return report; + } finally { + releaseLock(rt, lockFd); + } +} + +export function runtimeBinaryPath(name: string): string { + const rt = ensureRuntimeSeeded(); + const p = join(rt, name); + if (existsSync(p)) return p; + // Last-ditch: run from the package. + const fallback = join(pkgBinDir(), name); + if (existsSync(fallback)) return fallback; + throw new Error( + `Binary '${name}' not found in ${rt} or ${pkgBinDir()}. ` + + `This package may be for a different platform.`, + ); +} + +export function runtimeLibraryPath(): string { + const rt = ensureRuntimeSeeded(); + const name = platformLibName(); + const p = join(rt, name); + if (existsSync(p)) return p; + const fallback = join(pkgBinDir(), name); + if (existsSync(fallback)) return fallback; + throw new Error(`libpilot (${name}) not found in ${rt} or ${pkgBinDir()}.`); +} + +/** Test helper. */ +export function _resetSeededMarker(): void { + _seededOnce = false; +} + +/** Async daemon probe — exposed for callers that don't want the sync nc spawn. */ +export async function isDaemonLive(): Promise { + return probeDaemonLive(); +} + +/** For tests: expose the raw paths. */ +export const _internals = { + pkgBinDir, + runtimeRoot, + runtimeBin, + platformLibName, + bundledVersion, + runtimeVersion, + semverTuple, + compareSemver, + atomicInstall, +}; + +// Avoid unused-import warnings when this file is type-only consumed. +void dirname; diff --git a/sdk/node/tests/client.test.ts b/sdk/node/tests/client.test.ts index a41cf364..b743426c 100644 --- a/sdk/node/tests/client.test.ts +++ b/sdk/node/tests/client.test.ts @@ -86,6 +86,132 @@ function createFakeLib(): PilotLib & { }, PilotConnClose(_h: bigint) { return null as string | null; }, PilotSendTo(_h: bigint, _addr: string, _data: Buffer, _len: number) { return null as string | null; }, + + // ---- 1.9.1 additions ---- + + // Captured-arg fields for assertions (typed loosely on purpose) + _lastDialTimeout: null as null | { addr: string; ms: bigint }, + _lastSetReadDeadline: null as bigint | null, + _lastBroadcast: null as null | { + networkId: number; + port: number; + dataLen: number; + adminToken: string; + payload: Buffer; + }, + _lastNetworkJoin: null as null | { networkId: number; token: string }, + _lastNetworkInvite: null as null | { networkId: number; targetNodeId: number }, + _lastNetworkRespond: null as null | { networkId: number; accept: number }, + _lastManagedScore: null as null | { + networkId: number; + nodeId: number; + delta: number; + topic: string; + }, + _lastPolicySet: null as null | { networkId: number; policyJson: string }, + _lastMemberTagsSet: null as null | { + networkId: number; + nodeId: number; + tagsJson: string; + }, + + PilotHealth(_h: bigint) { + return fake._jsonReturns['PilotHealth'] ?? jsonOk({ ok: true, uptime_s: 42 }); + }, + PilotRotateKey(_h: bigint) { + return fake._jsonReturns['PilotRotateKey'] ?? jsonOk({ new_pubkey: 'abc' }); + }, + PilotDialTimeout(_h: bigint, addr: string, timeoutMs: bigint) { + fake._lastDialTimeout = { addr, ms: timeoutMs }; + return { handle: 11n, err: null as string | null }; + }, + PilotConnSetReadDeadline(_h: bigint, deadlineUnixNanos: bigint) { + fake._lastSetReadDeadline = deadlineUnixNanos; + return null as string | null; + }, + PilotBroadcast( + _h: bigint, + networkId: number, + port: number, + data: Buffer, + dataLen: number, + adminToken: string, + ) { + fake._lastBroadcast = { + networkId, + port, + dataLen, + adminToken, + payload: Buffer.from(data.subarray(0, dataLen)), + }; + return fake._jsonReturns['PilotBroadcast'] ?? null; + }, + PilotNetworkList(_h: bigint) { + return fake._jsonReturns['PilotNetworkList'] ?? jsonOk({ networks: [{ id: 0 }] }); + }, + PilotNetworkJoin(_h: bigint, networkId: number, token: string) { + fake._lastNetworkJoin = { networkId, token }; + return fake._jsonReturns['PilotNetworkJoin'] ?? jsonOk({ status: 'joined' }); + }, + PilotNetworkLeave(_h: bigint, _networkId: number) { + return fake._jsonReturns['PilotNetworkLeave'] ?? jsonOk({ status: 'left' }); + }, + PilotNetworkMembers(_h: bigint, _networkId: number) { + return fake._jsonReturns['PilotNetworkMembers'] ?? jsonOk({ members: [] }); + }, + PilotNetworkInvite(_h: bigint, networkId: number, targetNodeId: number) { + fake._lastNetworkInvite = { networkId, targetNodeId }; + return fake._jsonReturns['PilotNetworkInvite'] ?? jsonOk({ status: 'invited' }); + }, + PilotNetworkPollInvites(_h: bigint) { + return fake._jsonReturns['PilotNetworkPollInvites'] ?? jsonOk({ invites: [] }); + }, + PilotNetworkRespondInvite(_h: bigint, networkId: number, accept: number) { + fake._lastNetworkRespond = { networkId, accept }; + return fake._jsonReturns['PilotNetworkRespondInvite'] ?? jsonOk({ status: 'responded' }); + }, + PilotManagedScore( + _h: bigint, + networkId: number, + nodeId: number, + delta: number, + topic: string, + ) { + fake._lastManagedScore = { networkId, nodeId, delta, topic }; + return fake._jsonReturns['PilotManagedScore'] ?? jsonOk({ status: 'ok' }); + }, + PilotManagedStatus(_h: bigint, networkId: number) { + return fake._jsonReturns['PilotManagedStatus'] ?? jsonOk({ network_id: networkId }); + }, + PilotManagedRankings(_h: bigint, _networkId: number) { + return fake._jsonReturns['PilotManagedRankings'] ?? jsonOk({ rankings: [] }); + }, + PilotManagedForceCycle(_h: bigint, _networkId: number) { + return fake._jsonReturns['PilotManagedForceCycle'] ?? jsonOk({ status: 'cycled' }); + }, + PilotManagedReconcile(_h: bigint, networkId: number) { + return ( + fake._jsonReturns['PilotManagedReconcile'] ?? + jsonOk({ network_id: networkId, peers: [] }) + ); + }, + PilotPolicyGet(_h: bigint, networkId: number) { + return ( + fake._jsonReturns['PilotPolicyGet'] ?? + jsonOk({ network_id: networkId, policy: {} }) + ); + }, + PilotPolicySet(_h: bigint, networkId: number, policyJson: string) { + fake._lastPolicySet = { networkId, policyJson }; + return fake._jsonReturns['PilotPolicySet'] ?? jsonOk({ status: 'applied' }); + }, + PilotMemberTagsGet(_h: bigint, _networkId: number, _nodeId: number) { + return fake._jsonReturns['PilotMemberTagsGet'] ?? jsonOk({ tags: [] }); + }, + PilotMemberTagsSet(_h: bigint, networkId: number, nodeId: number, tagsJson: string) { + fake._lastMemberTagsSet = { networkId, nodeId, tagsJson }; + return fake._jsonReturns['PilotMemberTagsSet'] ?? jsonOk({ status: 'ok' }); + }, }; return fake; @@ -598,3 +724,362 @@ describe('Driver sendFile', () => { d.close(); }); }); + +// --------------------------------------------------------------------------- +// 1.9.1 additions: health / rotate-key +// --------------------------------------------------------------------------- + +describe('Driver health', () => { + it('returns the daemon health blob', () => { + const d = new Driver(); + const r = d.health(); + expect(r['ok']).toBe(true); + expect(r['uptime_s']).toBe(42); + d.close(); + }); + + it('throws on health error', () => { + fakeLib._jsonReturns['PilotHealth'] = jsonErr('daemon down'); + const d = new Driver(); + expect(() => d.health()).toThrow('daemon down'); + d.close(); + }); +}); + +describe('Driver rotateKey', () => { + it('returns new key info', () => { + const d = new Driver(); + expect(d.rotateKey()).toEqual({ new_pubkey: 'abc' }); + d.close(); + }); + + it('throws on error', () => { + fakeLib._jsonReturns['PilotRotateKey'] = jsonErr('registry rejected'); + const d = new Driver(); + expect(() => d.rotateKey()).toThrow('registry rejected'); + d.close(); + }); +}); + +// --------------------------------------------------------------------------- +// 1.9.1 additions: dial timeout +// --------------------------------------------------------------------------- + +describe('Driver dial timeout', () => { + it('uses PilotDial when no timeout', () => { + const d = new Driver(); + const conn = d.dial('0:0001.0000.0002:8080'); + // Default PilotDial returns handle 10 + expect(conn).toBeInstanceOf(Conn); + expect(fakeLib._lastDialTimeout).toBeNull(); + conn.close(); + d.close(); + }); + + it('uses PilotDialTimeout when timeoutMs is given', () => { + const d = new Driver(); + const conn = d.dial('0:0001.0000.0002:8080', 2500); + expect(conn).toBeInstanceOf(Conn); + expect(fakeLib._lastDialTimeout).not.toBeNull(); + expect(fakeLib._lastDialTimeout?.addr).toBe('0:0001.0000.0002:8080'); + expect(fakeLib._lastDialTimeout?.ms).toBe(2500n); + conn.close(); + d.close(); + }); + + it('clamps negative timeoutMs to 0', () => { + const d = new Driver(); + d.dial('0:0001.0000.0002:8080', -10); + expect(fakeLib._lastDialTimeout?.ms).toBe(0n); + d.close(); + }); + + it('throws on dial-timeout error', () => { + fakeLib.PilotDialTimeout = (_h: bigint, _addr: string, _ms: bigint) => ({ + handle: 0n, + err: jsonErr('dial timeout'), + }); + const d = new Driver(); + expect(() => d.dial('bad:addr', 1000)).toThrow('dial timeout'); + d.close(); + }); +}); + +// --------------------------------------------------------------------------- +// 1.9.1 additions: Conn.setReadDeadline +// --------------------------------------------------------------------------- + +describe('Conn setReadDeadline', () => { + it('clears the deadline with null', () => { + const conn = new Conn(10n); + conn.setReadDeadline(null); + expect(fakeLib._lastSetReadDeadline).toBe(0n); + }); + + it('converts a Date to nanoseconds', () => { + const conn = new Conn(10n); + const d = new Date(1700000000500); // 1.7e12 ms = 1.7e21 ns? No: 1.7e12 ms * 1e6 = 1.7e18 ns + conn.setReadDeadline(d); + expect(fakeLib._lastSetReadDeadline).toBe(BigInt(1700000000500) * 1_000_000n); + }); + + it('treats a number as ms-from-now', () => { + const before = Date.now(); + const conn = new Conn(10n); + conn.setReadDeadline(5000); + const after = Date.now(); + const got = fakeLib._lastSetReadDeadline ?? 0n; + // Expected nanos must be in [before+5000, after+5000] ms range + const lo = BigInt(before + 5000) * 1_000_000n; + const hi = BigInt(after + 5000) * 1_000_000n; + expect(got >= lo).toBe(true); + expect(got <= hi).toBe(true); + }); + + it('throws if the connection is closed', () => { + const conn = new Conn(10n); + conn.close(); + expect(() => conn.setReadDeadline(null)).toThrow('connection closed'); + }); + + it('propagates errors from Go', () => { + fakeLib.PilotConnSetReadDeadline = (_h: bigint, _d: bigint) => jsonErr('bad handle'); + const conn = new Conn(10n); + expect(() => conn.setReadDeadline(null)).toThrow('bad handle'); + }); +}); + +// --------------------------------------------------------------------------- +// 1.9.1 additions: broadcast +// --------------------------------------------------------------------------- + +describe('Driver broadcast', () => { + it('passes networkId, port, payload, and admin token', () => { + const d = new Driver(); + d.broadcast(7, 1234, Buffer.from('hello'), 'secret'); + expect(fakeLib._lastBroadcast).not.toBeNull(); + expect(fakeLib._lastBroadcast?.networkId).toBe(7); + expect(fakeLib._lastBroadcast?.port).toBe(1234); + expect(fakeLib._lastBroadcast?.dataLen).toBe(5); + expect(fakeLib._lastBroadcast?.adminToken).toBe('secret'); + expect(fakeLib._lastBroadcast?.payload.toString()).toBe('hello'); + d.close(); + }); + + it('accepts a string payload', () => { + const d = new Driver(); + d.broadcast(0, 9999, 'ping', 'tok'); + expect(fakeLib._lastBroadcast?.payload.toString()).toBe('ping'); + d.close(); + }); + + it('throws when daemon rejects the broadcast', () => { + fakeLib._jsonReturns['PilotBroadcast'] = jsonErr('admin token required'); + const d = new Driver(); + expect(() => d.broadcast(0, 9000, Buffer.from('x'), '')).toThrow('admin token required'); + d.close(); + }); +}); + +// --------------------------------------------------------------------------- +// 1.9.1 additions: networks +// --------------------------------------------------------------------------- + +describe('Driver networks', () => { + it('networkList', () => { + const d = new Driver(); + const r = d.networkList(); + expect(r).toHaveProperty('networks'); + d.close(); + }); + + it('networkJoin passes networkId and token', () => { + const d = new Driver(); + expect(d.networkJoin(7, 'joinme')).toEqual({ status: 'joined' }); + expect(fakeLib._lastNetworkJoin).toEqual({ networkId: 7, token: 'joinme' }); + d.close(); + }); + + it('networkJoin defaults token to empty string', () => { + const d = new Driver(); + d.networkJoin(2); + expect(fakeLib._lastNetworkJoin?.token).toBe(''); + d.close(); + }); + + it('networkLeave', () => { + const d = new Driver(); + expect(d.networkLeave(7)).toEqual({ status: 'left' }); + d.close(); + }); + + it('networkMembers', () => { + const d = new Driver(); + expect(d.networkMembers(7)).toHaveProperty('members'); + d.close(); + }); + + it('networkInvite captures both ids', () => { + const d = new Driver(); + expect(d.networkInvite(7, 4242)).toEqual({ status: 'invited' }); + expect(fakeLib._lastNetworkInvite).toEqual({ networkId: 7, targetNodeId: 4242 }); + d.close(); + }); + + it('networkPollInvites', () => { + const d = new Driver(); + expect(d.networkPollInvites()).toHaveProperty('invites'); + d.close(); + }); + + it('networkRespondInvite accept=true → 1', () => { + const d = new Driver(); + d.networkRespondInvite(7, true); + expect(fakeLib._lastNetworkRespond).toEqual({ networkId: 7, accept: 1 }); + d.close(); + }); + + it('networkRespondInvite accept=false → 0', () => { + const d = new Driver(); + d.networkRespondInvite(7, false); + expect(fakeLib._lastNetworkRespond).toEqual({ networkId: 7, accept: 0 }); + d.close(); + }); + + it('networkJoin propagates daemon error', () => { + fakeLib._jsonReturns['PilotNetworkJoin'] = jsonErr('token rejected'); + const d = new Driver(); + expect(() => d.networkJoin(7, 'wrong')).toThrow('token rejected'); + d.close(); + }); +}); + +// --------------------------------------------------------------------------- +// 1.9.1 additions: managed +// --------------------------------------------------------------------------- + +describe('Driver managed', () => { + it('managedScore captures all args', () => { + const d = new Driver(); + d.managedScore(7, 4242, -3, 'spam'); + expect(fakeLib._lastManagedScore).toEqual({ + networkId: 7, + nodeId: 4242, + delta: -3, + topic: 'spam', + }); + d.close(); + }); + + it('managedScore default topic is empty', () => { + const d = new Driver(); + d.managedScore(0, 1, 5); + expect(fakeLib._lastManagedScore?.topic).toBe(''); + d.close(); + }); + + it('managedStatus echoes networkId', () => { + const d = new Driver(); + expect(d.managedStatus(42)).toEqual({ network_id: 42 }); + d.close(); + }); + + it('managedRankings', () => { + const d = new Driver(); + expect(d.managedRankings(42)).toHaveProperty('rankings'); + d.close(); + }); + + it('managedForceCycle', () => { + const d = new Driver(); + expect(d.managedForceCycle(42)).toEqual({ status: 'cycled' }); + d.close(); + }); + + it('managedReconcile', () => { + const d = new Driver(); + const r = d.managedReconcile(42); + expect(r['network_id']).toBe(42); + expect(r['peers']).toEqual([]); + d.close(); + }); +}); + +// --------------------------------------------------------------------------- +// 1.9.1 additions: policy +// --------------------------------------------------------------------------- + +describe('Driver policy', () => { + it('policyGet', () => { + const d = new Driver(); + expect(d.policyGet(7)).toEqual({ network_id: 7, policy: {} }); + d.close(); + }); + + it('policySet serializes a dict to JSON', () => { + const d = new Driver(); + d.policySet(7, { min_score: 3, tags: ['good'] }); + expect(fakeLib._lastPolicySet?.networkId).toBe(7); + expect(JSON.parse(fakeLib._lastPolicySet?.policyJson ?? '')).toEqual({ + min_score: 3, + tags: ['good'], + }); + d.close(); + }); + + it('policySet passes a string through unchanged', () => { + const d = new Driver(); + d.policySet(0, '{"raw":true}'); + expect(fakeLib._lastPolicySet?.policyJson).toBe('{"raw":true}'); + d.close(); + }); + + it('policySet decodes a Buffer to UTF-8', () => { + const d = new Driver(); + d.policySet(0, Buffer.from('{"raw":1}')); + expect(fakeLib._lastPolicySet?.policyJson).toBe('{"raw":1}'); + d.close(); + }); + + it('policySet propagates daemon error', () => { + fakeLib._jsonReturns['PilotPolicySet'] = jsonErr('invalid policy'); + const d = new Driver(); + expect(() => d.policySet(0, {})).toThrow('invalid policy'); + d.close(); + }); +}); + +// --------------------------------------------------------------------------- +// 1.9.1 additions: member tags +// --------------------------------------------------------------------------- + +describe('Driver memberTags', () => { + it('memberTagsGet', () => { + const d = new Driver(); + expect(d.memberTagsGet(7, 4242)).toHaveProperty('tags'); + d.close(); + }); + + it('memberTagsSet serializes the list', () => { + const d = new Driver(); + d.memberTagsSet(7, 4242, ['gpu', 'fast']); + expect(fakeLib._lastMemberTagsSet?.networkId).toBe(7); + expect(fakeLib._lastMemberTagsSet?.nodeId).toBe(4242); + expect(JSON.parse(fakeLib._lastMemberTagsSet?.tagsJson ?? '')).toEqual(['gpu', 'fast']); + d.close(); + }); + + it('memberTagsSet handles empty list', () => { + const d = new Driver(); + d.memberTagsSet(7, 4242, []); + expect(JSON.parse(fakeLib._lastMemberTagsSet?.tagsJson ?? '')).toEqual([]); + d.close(); + }); + + it('memberTagsSet propagates daemon error', () => { + fakeLib._jsonReturns['PilotMemberTagsSet'] = jsonErr('not admin'); + const d = new Driver(); + expect(() => d.memberTagsSet(7, 1, ['x'])).toThrow('not admin'); + d.close(); + }); +}); diff --git a/sdk/node/tests/runtime.test.ts b/sdk/node/tests/runtime.test.ts new file mode 100644 index 00000000..fe2a4459 --- /dev/null +++ b/sdk/node/tests/runtime.test.ts @@ -0,0 +1,298 @@ +/** + * Unit tests for the Node SDK runtime seeder (src/runtime.ts). + * + * Mirrors the Python seeder tests: covers the 5 state-machine states, + * the daemon-running guard, atomic-rename behavior, version compare. + * + * The tests redirect ~/.pilot/ to a tmp dir via the PILOT_HOME env var + * and stub the package-bin-dir helper to a controllable location. + */ + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { + chmodSync, + existsSync, + mkdirSync, + mkdtempSync, + readFileSync, + rmSync, + statSync, + writeFileSync, +} from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +// Import under test. +import * as runtime from '../src/runtime.js'; + +const BIN_NAMES = ['pilotctl', 'pilot-daemon', 'pilot-gateway', 'pilot-updater'] as const; + +function platformLib(): string { + return runtime._internals.platformLibName(); +} + +function makeFakePkgBin(parentTmp: string, version: string): string { + const pkg = join(parentTmp, 'pkg-bin'); + mkdirSync(pkg, { recursive: true }); + for (const n of BIN_NAMES) { + const p = join(pkg, n); + writeFileSync(p, `#!/bin/sh\necho ${n} ${version}\n`); + chmodSync(p, 0o755); + } + const lib = join(pkg, platformLib()); + writeFileSync(lib, `LIB ${version}\n`); + chmodSync(lib, 0o755); + writeFileSync(join(pkg, '.pilot-version'), version + '\n'); + return pkg; +} + +let tmpRoot: string; +let fakeHome: string; +let pkgBin: string; +let restoreEnv: { home: string | undefined; pkg: string | undefined }; + +beforeEach(() => { + // Use a *short* tmp root so AF_UNIX paths fit in 104 chars on macOS. + tmpRoot = mkdtempSync(join('/tmp', 'pilot-rt-')); + fakeHome = join(tmpRoot, 'home', '.pilot'); + mkdirSync(fakeHome, { recursive: true }); + pkgBin = makeFakePkgBin(tmpRoot, '1.9.1'); + + restoreEnv = { + home: process.env['PILOT_HOME'], + pkg: process.env['PILOT_PKG_BIN_DIR'], + }; + process.env['PILOT_HOME'] = fakeHome; + process.env['PILOT_PKG_BIN_DIR'] = pkgBin; + + runtime._resetSeededMarker(); +}); + +afterEach(() => { + vi.restoreAllMocks(); + if (restoreEnv.home === undefined) delete process.env['PILOT_HOME']; + else process.env['PILOT_HOME'] = restoreEnv.home; + if (restoreEnv.pkg === undefined) delete process.env['PILOT_PKG_BIN_DIR']; + else process.env['PILOT_PKG_BIN_DIR'] = restoreEnv.pkg; + rmSync(tmpRoot, { recursive: true, force: true }); + runtime._resetSeededMarker(); +}); + +function setPkgBin(p: string): void { + process.env['PILOT_PKG_BIN_DIR'] = p; +} + +// --------------------------------------------------------------------------- +// State machine +// --------------------------------------------------------------------------- + +describe('seeder state machine', () => { + it('seeds everything when the runtime dir is empty', () => { + const report = runtime.runSeeder(); + expect(report.action).toBe('seed'); + const expected = new Set([...BIN_NAMES, platformLib()]); + for (const f of report.copied) expected.delete(f); + expect(expected.size).toBe(0); + + for (const n of [...BIN_NAMES, platformLib()]) { + expect(existsSync(join(fakeHome, 'bin', n))).toBe(true); + } + const v = readFileSync(join(fakeHome, 'bin', '.pilot-version'), 'utf8').trim(); + expect(v).toBe('1.9.1'); + }); + + it('is a noop when versions match', () => { + runtime.runSeeder(); + runtime._resetSeededMarker(); + const r2 = runtime.runSeeder(); + expect(r2.action).toBe('noop'); + expect(r2.copied).toEqual([]); + }); + + it('does not downgrade when bundled version is older', () => { + runtime.runSeeder(); + runtime._resetSeededMarker(); + + // Replace the package with an older version. + const olderPkg = makeFakePkgBin(join(tmpRoot, 'older'), '1.8.0'); + setPkgBin(olderPkg); + + const r = runtime.runSeeder(); + expect(r.action).toBe('noop'); + expect(r.copied).toEqual([]); + const v = readFileSync(join(fakeHome, 'bin', '.pilot-version'), 'utf8').trim(); + expect(v).toBe('1.9.1'); + }); + + it('upgrades to a newer bundled version', () => { + runtime.runSeeder(); + runtime._resetSeededMarker(); + + const newerPkg = makeFakePkgBin(join(tmpRoot, 'newer'), '2.0.0'); + setPkgBin(newerPkg); + + const r = runtime.runSeeder(); + expect(r.action).toBe('upgrade'); + expect(r.copied.length).toBeGreaterThan(0); + const v = readFileSync(join(fakeHome, 'bin', '.pilot-version'), 'utf8').trim(); + expect(v).toBe('2.0.0'); + const ctlContents = readFileSync(join(fakeHome, 'bin', 'pilotctl'), 'utf8'); + expect(ctlContents).toContain('2.0.0'); + }); + + it('re-seeds files that disappeared from a same-version runtime', () => { + runtime.runSeeder(); + rmSync(join(fakeHome, 'bin', 'pilotctl')); + runtime._resetSeededMarker(); + const r = runtime.runSeeder(); + expect(r.copied).toContain('pilotctl'); + expect(existsSync(join(fakeHome, 'bin', 'pilotctl'))).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// Atomic install +// --------------------------------------------------------------------------- + +describe('atomic install', () => { + it('survives an in-flight reader of the target file', () => { + runtime.runSeeder(); + const target = join(fakeHome, 'bin', 'pilotctl'); + const before = readFileSync(target, 'utf8'); + + // Atomic-replace with new content. + const newSrc = join(tmpRoot, 'newctl'); + writeFileSync(newSrc, 'DIFFERENT\n'); + runtime._internals.atomicInstall(newSrc, target); + + const after = readFileSync(target, 'utf8'); + expect(after).toBe('DIFFERENT\n'); + expect(after).not.toBe(before); + }); + + it('leaves no .tmp.* files behind', () => { + runtime.runSeeder(); + const dir = join(fakeHome, 'bin'); + const stat = statSync(dir); + expect(stat.isDirectory()).toBe(true); + // No leftover tmp files. + const fs = require('node:fs'); + const entries: string[] = fs.readdirSync(dir); + const leftovers = entries.filter((e: string) => e.includes('.tmp.')); + expect(leftovers).toEqual([]); + }); +}); + +// --------------------------------------------------------------------------- +// Config bootstrap +// --------------------------------------------------------------------------- + +describe('config bootstrap', () => { + it('writes a default config.json when missing', () => { + runtime.runSeeder(); + const cfgPath = join(fakeHome, 'config.json'); + const cfg = JSON.parse(readFileSync(cfgPath, 'utf8')); + expect(cfg.registry).toBe(runtime.DEFAULT_REGISTRY); + expect(cfg.beacon).toBe(runtime.DEFAULT_BEACON); + expect(cfg.socket).toBe(runtime.DEFAULT_SOCKET); + expect(cfg.encrypt).toBe(true); + // We never auto-set an email. + expect('email' in cfg).toBe(false); + }); + + it('preserves an existing config.json', () => { + const cfgPath = join(fakeHome, 'config.json'); + writeFileSync(cfgPath, JSON.stringify({ email: 'foo@bar.com', preserved: true })); + runtime.runSeeder(); + const cfg = JSON.parse(readFileSync(cfgPath, 'utf8')); + expect(cfg.preserved).toBe(true); + expect(cfg.email).toBe('foo@bar.com'); + }); +}); + +// --------------------------------------------------------------------------- +// Public entry points +// --------------------------------------------------------------------------- + +describe('public entry points', () => { + it('runtimeBinaryPath seeds and returns the path', () => { + const p = runtime.runtimeBinaryPath('pilotctl'); + expect(p).toBe(join(fakeHome, 'bin', 'pilotctl')); + expect(existsSync(p)).toBe(true); + }); + + it('runtimeLibraryPath seeds and returns the path', () => { + const p = runtime.runtimeLibraryPath(); + expect(p).toBe(join(fakeHome, 'bin', platformLib())); + expect(existsSync(p)).toBe(true); + }); + + it('runtimeBinaryPath throws for unknown name', () => { + expect(() => runtime.runtimeBinaryPath('bogus')).toThrow(/bogus/); + }); + + it('ensureRuntimeSeeded short-circuits subsequent calls', () => { + runtime.ensureRuntimeSeeded(); + const before = statSync(join(fakeHome, 'bin', '.pilot-version')).mtimeMs; + // Sleep briefly to ensure mtime would change if it ran again. + const t = Date.now() + 30; + while (Date.now() < t) { + // tight wait + } + runtime.ensureRuntimeSeeded(); + const after = statSync(join(fakeHome, 'bin', '.pilot-version')).mtimeMs; + expect(after).toBe(before); + }); +}); + +// --------------------------------------------------------------------------- +// SemVer compare +// --------------------------------------------------------------------------- + +describe('semver compare', () => { + const t = runtime._internals.semverTuple; + const cmp = runtime._internals.compareSemver; + + it('parses common forms', () => { + expect(t('1.9.1')).toEqual([1, 9, 1]); + expect(t('v1.9.1')).toEqual([1, 9, 1]); + expect(t('1.9.1-rc4')).toEqual([1, 9, 1]); + expect(t('1.9.1+meta')).toEqual([1, 9, 1]); + expect(t('')).toBeNull(); + expect(t('garbage')).toBeNull(); + }); + + it('orders correctly', () => { + expect(cmp(t('2.0.0'), t('1.9.99'))).toBe(1); + expect(cmp(t('1.9.0'), t('1.9.1'))).toBe(-1); + expect(cmp(t('1.9.1'), t('1.9.1'))).toBe(0); + // null < anything + expect(cmp(null, t('0.0.0'))).toBe(-1); + }); +}); + +// --------------------------------------------------------------------------- +// Wrong-platform handling +// --------------------------------------------------------------------------- + +describe('wrong-platform package', () => { + it('seeder skips missing files cleanly', () => { + // Build a pkg without the platform lib. + const incomplete = join(tmpRoot, 'incomplete'); + mkdirSync(incomplete, { recursive: true }); + for (const n of BIN_NAMES) { + const p = join(incomplete, n); + writeFileSync(p, '#!/bin/sh\n'); + chmodSync(p, 0o755); + } + writeFileSync(join(incomplete, '.pilot-version'), '1.9.1\n'); + setPkgBin(incomplete); + + const r = runtime.runSeeder(); + expect(r.copied).not.toContain(platformLib()); + + // runtimeLibraryPath should raise a clear error since lib is absent + // from both runtime dir and package. + expect(() => runtime.runtimeLibraryPath()).toThrow(/libpilot/); + }); +}); diff --git a/sdk/node/tests/smoke_list_agents.mjs b/sdk/node/tests/smoke_list_agents.mjs new file mode 100644 index 00000000..2361df8b --- /dev/null +++ b/sdk/node/tests/smoke_list_agents.mjs @@ -0,0 +1,136 @@ +#!/usr/bin/env node +/** + * End-to-end smoke test for the Node SDK against a real daemon. + * + * Identical contract to the Python smoke script (sdk/python/scripts/ + * smoke_list_agents.py): construct Driver → info → handshake list-agents + * → send_message('/data {...}') → poll ~/.pilot/inbox/ for the reply. + * + * Run with the just-built SDK: + * cd sdk/node && npx tsc && node scripts/smoke_list_agents.mjs + */ + +import { homedir } from 'node:os'; +import { join } from 'node:path'; +import { readdirSync, readFileSync, statSync } from 'node:fs'; + +import { Driver, PilotError } from '../dist/client.js'; + +const LIST_AGENTS_HOST = 'list-agents'; +const LIST_AGENTS_NODE_ID = 16398; +const INBOX_DIR = join(homedir(), '.pilot', 'inbox'); +const WAIT_MS = 8_000; + +function newestInboxFileSince(afterMtime) { + let best = null; + let bestMtime = 0; + for (const name of readdirSync(INBOX_DIR)) { + if (!name.endsWith('.json')) continue; + const p = join(INBOX_DIR, name); + const st = statSync(p); + if (st.mtimeMs > afterMtime && st.mtimeMs > bestMtime) { + best = p; + bestMtime = st.mtimeMs; + } + } + return best; +} + +async function sleep(ms) { + return new Promise((r) => setTimeout(r, ms)); +} + +async function main() { + console.log('[1/5] Constructing Driver…'); + let d; + try { + d = new Driver(); + } catch (e) { + if (e instanceof PilotError) { + console.log(` FAIL: cannot reach daemon: ${e.message}`); + process.exit(2); + } + throw e; + } + console.log(' OK'); + + console.log('[2/5] Calling info()…'); + const info = d.info(); + console.log( + ` node_id=${info.node_id} addr=${info.address} peers=${info.peers}`, + ); + + console.log(`[3/5] Handshake list-agents (node ${LIST_AGENTS_NODE_ID})…`); + try { + const h = d.handshake(LIST_AGENTS_NODE_ID, 'node sdk smoke test'); + console.log(` OK: ${JSON.stringify(h)}`); + } catch (e) { + const msg = String(e?.message ?? e).toLowerCase(); + if (msg.includes('already') || msg.includes('trust')) { + console.log(` OK (already trusted): ${e}`); + } else { + console.log(` FAIL: ${e}`); + process.exit(3); + } + } + + console.log('[4/5] sendMessage → list-agents …'); + const tStart = Date.now() / 1000 - 1; + let result; + try { + result = d.sendMessage( + LIST_AGENTS_HOST, + '/data {"search":"","limit":1}', + 'text', + ); + } catch (e) { + console.log(` FAIL: sendMessage: ${e}`); + process.exit(4); + } + console.log(` sent: ${JSON.stringify(result)}`); + + console.log(`[5/5] Waiting up to ${WAIT_MS / 1000}s for inbox reply…`); + const deadline = Date.now() + WAIT_MS; + let replyFile = null; + while (Date.now() < deadline) { + replyFile = newestInboxFileSince(tStart * 1000); + if (replyFile) break; + await sleep(500); + } + if (!replyFile) { + console.log(' FAIL: no inbox reply within window'); + process.exit(5); + } + console.log(` reply file: ${replyFile}`); + + let envelope; + try { + envelope = JSON.parse(readFileSync(replyFile, 'utf8')); + } catch (e) { + console.log(` FAIL: cannot parse reply: ${e}`); + process.exit(6); + } + console.log( + ` agent=${envelope.agent} command=${envelope.command} ok=${envelope.ok}`, + ); + + if (typeof envelope.data === 'string') { + try { + const payload = JSON.parse(envelope.data); + const total = + payload.total ?? payload.count ?? (payload.tiers?.free?.items?.length ?? null); + if (total !== null) console.log(` list-agents total: ${total}`); + } catch { + console.log(' (data not JSON; envelope OK)'); + } + } + + d.close(); + console.log('\nSMOKE TEST PASSED (node)'); + process.exit(0); +} + +main().catch((e) => { + console.error(`unhandled: ${e}`); + process.exit(99); +}); diff --git a/sdk/python/MANIFEST.in b/sdk/python/MANIFEST.in index 84f4e5c6..fe134aee 100644 --- a/sdk/python/MANIFEST.in +++ b/sdk/python/MANIFEST.in @@ -3,8 +3,11 @@ include README.md include LICENSE include CHANGELOG.md -# Include all binaries in bin/ directory +# Include all binaries in bin/ directory (the seed cache). +# Dotfiles like .pilot-version need an explicit pattern because some +# setuptools versions skip them under recursive-include. recursive-include pilotprotocol/bin * +include pilotprotocol/bin/.pilot-version # Include type stubs if any recursive-include pilotprotocol *.pyi diff --git a/sdk/python/README.md b/sdk/python/README.md index 9ac610f1..988793f2 100644 --- a/sdk/python/README.md +++ b/sdk/python/README.md @@ -273,7 +273,7 @@ See `examples/python_sdk/` for comprehensive examples: - **`basic_usage.py`** — Connection, identity, trust management - **`data_exchange_demo.py`** — Send messages, files, JSON - **`event_stream_demo.py`** — Pub/sub patterns -- **`task_submit_demo.py`** — Task delegation and polo score +- **`task_submit_demo.py`** — Task delegation - **`pydantic_ai_agent.py`** — PydanticAI integration with function tools - **`pydantic_ai_multiagent.py`** — Multi-agent collaboration system diff --git a/sdk/python/pilotprotocol/_runtime.py b/sdk/python/pilotprotocol/_runtime.py new file mode 100644 index 00000000..73be4c2a --- /dev/null +++ b/sdk/python/pilotprotocol/_runtime.py @@ -0,0 +1,382 @@ +"""Runtime environment seeder for the Pilot Protocol Python SDK. + +Both the CLI shims (``cli.py``) and the FFI loader (``client._load_lib``) +funnel through :func:`ensure_runtime_seeded`, which idempotently mirrors +the binaries shipped inside the wheel into ``~/.pilot/bin/``. + +Design goals: +- The wheel is the *seed cache*; ``~/.pilot/bin/`` is the canonical runtime. +- No install-time code runs; seeding happens lazily on first SDK use. +- Concurrency-safe (flock) and crash-safe (atomic rename). +- Never downgrades; never replaces a running daemon binary. +- Coexists with ``install.sh`` (same layout, same ``.pilot-version`` marker). +""" + +from __future__ import annotations + +import errno +import json +import os +import platform +import shutil +import socket +import sys +import tempfile +import threading +from pathlib import Path +from typing import Optional + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +_BIN_NAMES = ("pilotctl", "pilot-daemon", "pilot-gateway", "pilot-updater") +_LIB_NAMES = { + "Darwin": "libpilot.dylib", + "Linux": "libpilot.so", + "Windows": "libpilot.dll", +} + +DEFAULT_REGISTRY = "34.71.57.205:9000" +DEFAULT_BEACON = "34.71.57.205:9001" +DEFAULT_SOCKET = "/tmp/pilot.sock" + + +# --------------------------------------------------------------------------- +# Path helpers +# --------------------------------------------------------------------------- + +def _pkg_bin_dir() -> Path: + """Where the wheel ships its bundled binaries (the seed cache).""" + return Path(__file__).resolve().parent / "bin" + + +def _runtime_root() -> Path: + """Canonical runtime dir. Honours ``PILOT_HOME`` for CI / multi-tenant.""" + override = os.environ.get("PILOT_HOME") + if override: + return Path(override).expanduser() + return Path.home() / ".pilot" + + +def _runtime_bin() -> Path: + return _runtime_root() / "bin" + + +def _platform_lib_name() -> str: + name = _LIB_NAMES.get(platform.system()) + if name is None: + raise OSError(f"unsupported platform: {platform.system()}") + return name + + +# --------------------------------------------------------------------------- +# Version helpers +# --------------------------------------------------------------------------- + +def _semver_tuple(v: str) -> tuple[int, ...]: + """Parse a SemVer-ish string into a comparable tuple. Unparseable → ().""" + s = (v or "").strip().lstrip("v").split("-", 1)[0].split("+", 1)[0] + if not s: + return () + parts = [] + for p in s.split("."): + try: + parts.append(int(p)) + except ValueError: + return () + return tuple(parts) + + +def _bundled_version() -> str: + """Version of the binaries bundled in this wheel.""" + f = _pkg_bin_dir() / ".pilot-version" + if f.is_file(): + try: + return f.read_text().strip() + except OSError: + pass + # Fall back to the package metadata if the marker file is missing. + try: + from importlib.metadata import version as _pkg_version + return _pkg_version("pilotprotocol") + except Exception: + return "" + + +def _runtime_version(rt: Path) -> str: + f = rt / ".pilot-version" + if f.is_file(): + try: + return f.read_text().strip() + except OSError: + return "" + return "" + + +# --------------------------------------------------------------------------- +# Daemon liveness probe +# --------------------------------------------------------------------------- + +def _daemon_running() -> bool: + """True if a pilot daemon is reachable on its IPC socket.""" + sock_path = DEFAULT_SOCKET + try: + with open(_runtime_root() / "config.json") as f: + cfg = json.load(f) + sock_path = cfg.get("socket", sock_path) or sock_path + except (OSError, ValueError): + pass + + if not Path(sock_path).exists(): + return False + s = socket.socket(socket.AF_UNIX) + s.settimeout(0.2) + try: + s.connect(sock_path) + return True + except OSError: + return False + finally: + try: + s.close() + except OSError: + pass + + +# --------------------------------------------------------------------------- +# Atomic file ops +# --------------------------------------------------------------------------- + +def _atomic_install(src: Path, dst: Path) -> None: + """Copy *src* → *dst* atomically, surviving in-flight execs. + + Writes to ``.tmp.`` then ``os.replace()`` over the target. + On POSIX this unlinks the old inode while leaving any running process + that mapped it untouched. + """ + tmp = dst.with_name(f"{dst.name}.tmp.{os.getpid()}.{threading.get_ident()}") + if tmp.exists(): + tmp.unlink() + shutil.copy2(src, tmp) + try: + tmp.chmod(0o755) + os.replace(tmp, dst) + except OSError: + try: + tmp.unlink() + except OSError: + pass + raise + + +def _ensure_dir_writable(p: Path) -> None: + """Create *p* if it does not exist; raise a clear error if we cannot + write to it (e.g. owned by root after a botched install).""" + p.mkdir(parents=True, exist_ok=True) + if not os.access(p, os.W_OK): + raise PermissionError( + f"{p} is not writable by user {os.getuid()}. " + f"Repair with: chown -R $USER {p}" + ) + + +# --------------------------------------------------------------------------- +# Config seeding +# --------------------------------------------------------------------------- + +def _ensure_default_config() -> Path: + """Make sure ``~/.pilot/config.json`` exists. Never overwrites an + existing one — install.sh or the user may have set an email. + """ + root = _runtime_root() + _ensure_dir_writable(root) + cfg_path = root / "config.json" + if cfg_path.is_file(): + return cfg_path + cfg = { + "registry": DEFAULT_REGISTRY, + "beacon": DEFAULT_BEACON, + "socket": DEFAULT_SOCKET, + "encrypt": True, + "identity": str(root / "identity.json"), + } + tmp = cfg_path.with_name( + f"config.json.tmp.{os.getpid()}.{threading.get_ident()}" + ) + tmp.write_text(json.dumps(cfg, indent=2) + "\n") + try: + os.replace(tmp, cfg_path) + except FileNotFoundError: + # Another thread won the race; that's fine. + if tmp.exists(): + try: + tmp.unlink() + except OSError: + pass + return cfg_path + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +class SeedReport: + """Summary of what a seeder pass did. Useful for tests + diagnostics.""" + + def __init__(self) -> None: + self.copied: list[str] = [] + self.skipped: list[str] = [] + self.action: str = "noop" # one of: noop, seed, upgrade, daemon-skip + self.bundled_version: str = "" + self.installed_version: str = "" + self.runtime_dir: Path = _runtime_bin() + + +_SEEDED_ONCE = False + + +def ensure_runtime_seeded(force: bool = False) -> Path: + """Idempotently mirror bundled binaries into ``~/.pilot/bin/``. + + Returns the runtime bin dir. Safe to call on every CLI invocation and + every Driver() construction; the steady state is a single stat() + + string compare. + + Set ``force=True`` to re-run even if this process has already seeded. + """ + global _SEEDED_ONCE + if _SEEDED_ONCE and not force: + return _runtime_bin() + + report = run_seeder() + _SEEDED_ONCE = True + return report.runtime_dir + + +def run_seeder() -> SeedReport: + """Run one seeder pass and return a structured report.""" + report = SeedReport() + rt_root = _runtime_root() + rt = _runtime_bin() + pkg = _pkg_bin_dir() + + # Make sure ~/.pilot/ exists and is writable. + _ensure_dir_writable(rt_root) + _ensure_dir_writable(rt) + _ensure_default_config() + + # Cross-platform fcntl shim. flock is POSIX-only; on Windows we use + # msvcrt.locking. Tests run on POSIX so the Windows path is best-effort. + lock_path = rt / ".seed.lock" + lock_path.touch(exist_ok=True) + lock_fd = os.open(lock_path, os.O_RDWR) + try: + if os.name == "posix": + import fcntl + fcntl.flock(lock_fd, fcntl.LOCK_EX) + else: # pragma: no cover - Windows + import msvcrt + msvcrt.locking(lock_fd, msvcrt.LK_LOCK, 1) + + bundled_str = _bundled_version() + installed_str = _runtime_version(rt) + report.bundled_version = bundled_str + report.installed_version = installed_str + + bundled = _semver_tuple(bundled_str) + installed = _semver_tuple(installed_str) + + # Decide overall action. + force = os.environ.get("PILOT_FORCE_SEED") == "1" + if not force and installed and bundled and bundled <= installed: + # Same or newer already installed. Still verify each file exists. + need_seed = False + for name in _BIN_NAMES + (_platform_lib_name(),): + if not (rt / name).is_file(): + need_seed = True + break + if not need_seed: + report.action = "noop" + return report + + report.action = "upgrade" if installed else "seed" + daemon_busy = _daemon_running() + + for name in _BIN_NAMES + (_platform_lib_name(),): + src = pkg / name + if not src.is_file(): + # Wrong-platform wheel or partial bundle. Skip — caller will + # surface a clear error when the missing binary is needed. + continue + dst = rt / name + if name == "pilot-daemon" and daemon_busy and dst.is_file(): + report.skipped.append(name) + report.action = "daemon-skip" + continue + try: + _atomic_install(src, dst) + report.copied.append(name) + except OSError as e: + # ETXTBSY can hit Linux despite atomic rename if a tool has + # the file mmap'd. Skip with a notice; caller can retry. + if e.errno in (errno.ETXTBSY, errno.EBUSY): + report.skipped.append(name) + continue + raise + + # Update the marker last; a partial seed leaves the old marker. + if bundled_str: + ver_path = rt / ".pilot-version" + tmp = ver_path.with_name(f".pilot-version.tmp.{os.getpid()}") + tmp.write_text(bundled_str + "\n") + os.replace(tmp, ver_path) + + return report + finally: + try: + if os.name == "posix": + import fcntl + fcntl.flock(lock_fd, fcntl.LOCK_UN) + finally: + os.close(lock_fd) + + +def runtime_binary(name: str) -> Path: + """Resolve a binary by name, seeding if needed. + + Use this from CLI shims; it returns the path to exec. + """ + rt = ensure_runtime_seeded() + p = rt / name + if not p.is_file(): + # Last-ditch fallback: run from the wheel itself. + fallback = _pkg_bin_dir() / name + if fallback.is_file(): + return fallback + raise FileNotFoundError( + f"Binary {name!r} not found in {rt} or {_pkg_bin_dir()}. " + f"This wheel may be for a different platform." + ) + return p + + +def runtime_library() -> Path: + """Resolve libpilot.{so,dylib,dll}, seeding if needed.""" + rt = ensure_runtime_seeded() + name = _platform_lib_name() + p = rt / name + if p.is_file(): + return p + fallback = _pkg_bin_dir() / name + if fallback.is_file(): + return fallback + raise FileNotFoundError( + f"libpilot ({name}) not found in {rt} or {_pkg_bin_dir()}." + ) + + +def reset_seeded_marker() -> None: + """Test helper: forget that this process has already seeded.""" + global _SEEDED_ONCE + _SEEDED_ONCE = False diff --git a/sdk/python/pilotprotocol/cli.py b/sdk/python/pilotprotocol/cli.py index 85452f91..8a5eb658 100644 --- a/sdk/python/pilotprotocol/cli.py +++ b/sdk/python/pilotprotocol/cli.py @@ -1,169 +1,55 @@ -"""Command-line interface wrappers for Pilot Protocol binaries. - -This module provides entry points for the bundled Go binaries: -- pilotctl: CLI tool for managing the daemon -- pilot-daemon: Background service -- pilot-gateway: IP traffic bridge - -Each wrapper: -1. Ensures ~/.pilot/ directory exists -2. Creates default config.json if missing -3. Executes the bundled binary with all arguments passed through +"""Command-line entry points for the Pilot Protocol CLI binaries. + +The wheel ships pre-built Go binaries inside ``pilotprotocol/bin/``. On +first call, :mod:`pilotprotocol._runtime` mirrors those into +``~/.pilot/bin/`` (the canonical runtime directory shared with +``install.sh``) and these wrappers exec the seeded copy. + +This means: +- pip-installed and curl-installed users converge on the same daemon. +- Multiple venvs, multiple SDK versions: highest version wins, no + parallel binary trees. +- Uninstalling the wheel never deletes ``~/.pilot/`` (identity, config, + daemon state are preserved). """ -import json -import os import subprocess import sys -from pathlib import Path - - -def _ensure_pilot_env(): - """Ensure ~/.pilot/ directory and config.json exist. - - Creates: - - ~/.pilot/ directory - - ~/.pilot/config.json with default settings (if not present) - - This function is called before every binary execution to ensure - the runtime environment is properly initialized. - """ - # Get user's home directory - home = Path.home() - pilot_dir = home / ".pilot" - config_file = pilot_dir / "config.json" - - # Create ~/.pilot/ if it doesn't exist - pilot_dir.mkdir(parents=True, exist_ok=True) - - # Create default config.json if it doesn't exist - if not config_file.exists(): - default_config = { - "registry": "34.71.57.205:9000", - "beacon": "34.71.57.205:9001", - "socket": "/tmp/pilot.sock", - "encrypt": True, - "identity": str(pilot_dir / "identity.json") - } - - with open(config_file, 'w') as f: - json.dump(default_config, f, indent=2) - - -def _get_binary_path(binary_name: str) -> Path: - """Get absolute path to a bundled binary. - - Args: - binary_name: Name of the binary (e.g., 'pilotctl', 'pilot-daemon') - - Returns: - Absolute path to the binary - - Raises: - FileNotFoundError: If binary not found in package - """ - # Find the bin/ directory relative to this file - package_dir = Path(__file__).resolve().parent - bin_dir = package_dir / "bin" - binary_path = bin_dir / binary_name - - if not binary_path.exists(): - raise FileNotFoundError( - f"Binary '{binary_name}' not found at {binary_path}\n" - f"Expected location: {bin_dir}\n" - "The wheel may not have been built correctly." - ) - - return binary_path - - -def run_pilotctl(): - """Entry point for pilotctl CLI tool. - - This is called when the user runs 'pilotctl' from the command line. - All arguments are passed through to the Go binary. - - Example: - $ pilotctl daemon start --hostname my-agent - $ pilotctl info - $ pilotctl ping other-agent - """ - # Ensure environment is set up - _ensure_pilot_env() - - # Get path to bundled binary - binary = _get_binary_path("pilotctl") - - # Execute the binary with all arguments - # subprocess.call() returns the exit code directly - exit_code = subprocess.call([str(binary)] + sys.argv[1:]) - - # Exit with the same code as the binary - sys.exit(exit_code) - - -def run_daemon(): - """Entry point for pilot-daemon background service. - - This is called when the user runs 'pilot-daemon' from the command line. - All arguments are passed through to the Go binary. - - Example: - $ pilot-daemon -registry 34.71.57.205:9000 -beacon 34.71.57.205:9001 - $ pilot-daemon -hostname my-agent -public - """ - # Ensure environment is set up - _ensure_pilot_env() - - # Get path to bundled binary - binary = _get_binary_path("pilot-daemon") - - # Execute the binary with all arguments - exit_code = subprocess.call([str(binary)] + sys.argv[1:]) - - # Exit with the same code as the binary - sys.exit(exit_code) - - -def run_gateway(): - """Entry point for pilot-gateway IP traffic bridge. - - This is called when the user runs 'pilot-gateway' from the command line. - All arguments are passed through to the Go binary. - - Example: - $ pilot-gateway --ports 80,3000 - """ - # Ensure environment is set up - _ensure_pilot_env() - # Get path to bundled binary - binary = _get_binary_path("pilot-gateway") +from ._runtime import ensure_runtime_seeded, runtime_binary + + +def _exec_runtime_binary(name: str) -> None: + """Seed ``~/.pilot/bin/`` if needed, then exec the named binary.""" + ensure_runtime_seeded() + binary = runtime_binary(name) + sys.exit(subprocess.call([str(binary)] + sys.argv[1:])) - # Execute the binary with all arguments - exit_code = subprocess.call([str(binary)] + sys.argv[1:]) - # Exit with the same code as the binary - sys.exit(exit_code) +def run_pilotctl() -> None: + """Entry point for the ``pilotctl`` console script.""" + _exec_runtime_binary("pilotctl") -def run_updater(): - """Entry point for pilot-updater auto-update sidecar. +def run_daemon() -> None: + """Entry point for the ``pilot-daemon`` console script. - This is called when the user runs 'pilot-updater' from the command line. - All arguments are passed through to the Go binary. + Note: the daemon needs an email address (passed via ``--email`` or + set in ``~/.pilot/config.json``) to register at the registry. The + SDK does not auto-prompt for one — call:: - Example: - $ pilot-updater -install-dir ~/.pilot/bin + pilotctl daemon start --email you@example.com + + on first launch, after which the email is cached in ``config.json``. """ - # Ensure environment is set up - _ensure_pilot_env() + _exec_runtime_binary("pilot-daemon") + - # Get path to bundled binary - binary = _get_binary_path("pilot-updater") +def run_gateway() -> None: + """Entry point for the ``pilot-gateway`` console script.""" + _exec_runtime_binary("pilot-gateway") - # Execute the binary with all arguments - exit_code = subprocess.call([str(binary)] + sys.argv[1:]) - # Exit with the same code as the binary - sys.exit(exit_code) +def run_updater() -> None: + """Entry point for the ``pilot-updater`` console script.""" + _exec_runtime_binary("pilot-updater") diff --git a/sdk/python/pilotprotocol/client.py b/sdk/python/pilotprotocol/client.py index 556d032c..a256bf19 100644 --- a/sdk/python/pilotprotocol/client.py +++ b/sdk/python/pilotprotocol/client.py @@ -101,8 +101,24 @@ def _find_library() -> str: def _load_lib() -> ctypes.CDLL: # pragma: no cover - path = _find_library() - return ctypes.CDLL(path) + """Load libpilot. + + Order: + 1. ``PILOT_LIB_PATH`` (explicit override) — bypasses the seeder. + 2. The seeded library at ``~/.pilot/bin/`` (canonical runtime). + 3. Legacy fallback via :func:`_find_library` (system search etc.). + """ + env = os.environ.get("PILOT_LIB_PATH") + if env: + return ctypes.CDLL(_find_library()) + + try: + from ._runtime import runtime_library + return ctypes.CDLL(str(runtime_library())) + except Exception: + # Seeder failed (read-only home, etc.) — fall back to legacy lookup + # so the SDK still loads from the wheel-bundled location. + return ctypes.CDLL(_find_library()) _lib: Optional[ctypes.CDLL] = None @@ -168,8 +184,10 @@ def _setup_signatures(lib: ctypes.CDLL) -> None: # pragma: no cover # JSON-RPC (single *C.char return → c_void_p) for name in ( - "PilotInfo", "PilotPendingHandshakes", "PilotTrustedPeers", + "PilotInfo", "PilotHealth", "PilotRotateKey", + "PilotPendingHandshakes", "PilotTrustedPeers", "PilotDeregister", "PilotRecvFrom", + "PilotNetworkList", "PilotNetworkPollInvites", ): fn = getattr(lib, name) fn.argtypes = [ctypes.c_uint64] @@ -209,6 +227,9 @@ def _setup_signatures(lib: ctypes.CDLL) -> None: # pragma: no cover lib.PilotDial.argtypes = [ctypes.c_uint64, ctypes.c_char_p] lib.PilotDial.restype = _HandleErr + lib.PilotDialTimeout.argtypes = [ctypes.c_uint64, ctypes.c_char_p, ctypes.c_uint64] + lib.PilotDialTimeout.restype = _HandleErr + # Listen: (handle, uint16) -> struct{handle, err} lib.PilotListen.argtypes = [ctypes.c_uint64, ctypes.c_uint16] lib.PilotListen.restype = _HandleErr @@ -220,7 +241,7 @@ def _setup_signatures(lib: ctypes.CDLL) -> None: # pragma: no cover lib.PilotListenerClose.argtypes = [ctypes.c_uint64] lib.PilotListenerClose.restype = ctypes.c_void_p - # Conn Read / Write / Close + # Conn Read / Write / Close / SetReadDeadline lib.PilotConnRead.argtypes = [ctypes.c_uint64, ctypes.c_int] lib.PilotConnRead.restype = _ReadResult @@ -230,10 +251,69 @@ def _setup_signatures(lib: ctypes.CDLL) -> None: # pragma: no cover lib.PilotConnClose.argtypes = [ctypes.c_uint64] lib.PilotConnClose.restype = ctypes.c_void_p + lib.PilotConnSetReadDeadline.argtypes = [ctypes.c_uint64, ctypes.c_int64] + lib.PilotConnSetReadDeadline.restype = ctypes.c_void_p + # SendTo: (handle, string, void*, int) -> *char lib.PilotSendTo.argtypes = [ctypes.c_uint64, ctypes.c_char_p, ctypes.c_void_p, ctypes.c_int] lib.PilotSendTo.restype = ctypes.c_void_p + # Broadcast: (handle, uint16 net, uint16 port, void* data, int len, *char token) -> *char + lib.PilotBroadcast.argtypes = [ + ctypes.c_uint64, ctypes.c_uint16, ctypes.c_uint16, + ctypes.c_void_p, ctypes.c_int, ctypes.c_char_p, + ] + lib.PilotBroadcast.restype = ctypes.c_void_p + + # Networks (handle, uint16) -> *char + for name in ("PilotNetworkLeave", "PilotNetworkMembers"): + fn = getattr(lib, name) + fn.argtypes = [ctypes.c_uint64, ctypes.c_uint16] + fn.restype = ctypes.c_void_p + + # PilotNetworkJoin: (handle, uint16, *char token) -> *char + lib.PilotNetworkJoin.argtypes = [ctypes.c_uint64, ctypes.c_uint16, ctypes.c_char_p] + lib.PilotNetworkJoin.restype = ctypes.c_void_p + + # PilotNetworkInvite: (handle, uint16, uint32) -> *char + lib.PilotNetworkInvite.argtypes = [ctypes.c_uint64, ctypes.c_uint16, ctypes.c_uint32] + lib.PilotNetworkInvite.restype = ctypes.c_void_p + + # PilotNetworkRespondInvite: (handle, uint16, int) -> *char + lib.PilotNetworkRespondInvite.argtypes = [ctypes.c_uint64, ctypes.c_uint16, ctypes.c_int] + lib.PilotNetworkRespondInvite.restype = ctypes.c_void_p + + # Managed (handle, uint16) -> *char + for name in ( + "PilotManagedStatus", "PilotManagedRankings", + "PilotManagedForceCycle", "PilotManagedReconcile", + "PilotPolicyGet", + ): + fn = getattr(lib, name) + fn.argtypes = [ctypes.c_uint64, ctypes.c_uint16] + fn.restype = ctypes.c_void_p + + # PilotManagedScore: (handle, uint16 net, uint32 node, int32 delta, *char topic) + lib.PilotManagedScore.argtypes = [ + ctypes.c_uint64, ctypes.c_uint16, ctypes.c_uint32, + ctypes.c_int32, ctypes.c_char_p, + ] + lib.PilotManagedScore.restype = ctypes.c_void_p + + # PilotPolicySet: (handle, uint16, *char json) + lib.PilotPolicySet.argtypes = [ctypes.c_uint64, ctypes.c_uint16, ctypes.c_char_p] + lib.PilotPolicySet.restype = ctypes.c_void_p + + # PilotMemberTagsGet: (handle, uint16 net, uint32 node) -> *char + lib.PilotMemberTagsGet.argtypes = [ctypes.c_uint64, ctypes.c_uint16, ctypes.c_uint32] + lib.PilotMemberTagsGet.restype = ctypes.c_void_p + + # PilotMemberTagsSet: (handle, uint16 net, uint32 node, *char tagsJson) -> *char + lib.PilotMemberTagsSet.argtypes = [ + ctypes.c_uint64, ctypes.c_uint16, ctypes.c_uint32, ctypes.c_char_p, + ] + lib.PilotMemberTagsSet.restype = ctypes.c_void_p + # --------------------------------------------------------------------------- # Error helpers @@ -351,6 +431,23 @@ def close(self) -> None: if "error" in obj: raise PilotError(obj["error"]) + def set_read_deadline(self, deadline: Optional[float]) -> None: + """Set the read deadline. + + ``deadline`` is a Unix timestamp in seconds (e.g. ``time.time() + 5``) + or ``None`` to clear. After the deadline passes, ``read()`` returns + a ``PilotError`` with a "deadline exceeded" message. + """ + if self._closed: + raise PilotError("connection closed") + if deadline is None: + nanos = 0 + else: + nanos = int(deadline * 1_000_000_000) + lib = _get_lib() + ptr = lib.PilotConnSetReadDeadline(self._h, ctypes.c_int64(nanos)) + _check_err(ptr) + def __enter__(self) -> "Conn": return self @@ -472,6 +569,14 @@ def info(self) -> dict[str, Any]: """Return the daemon's status information.""" return self._call_json("PilotInfo") + def health(self) -> dict[str, Any]: + """Lightweight health check from the daemon.""" + return self._call_json("PilotHealth") + + def rotate_key(self) -> dict[str, Any]: + """Rotate the daemon's Ed25519 identity at the registry.""" + return self._call_json("PilotRotateKey") + # -- Handshake / Trust -- def handshake(self, node_id: int, justification: str = "") -> dict[str, Any]: @@ -540,10 +645,18 @@ def disconnect(self, conn_id: int) -> None: # -- Streams -- - def dial(self, addr: str) -> Conn: - """Open a stream connection to addr (format: "N:XXXX.YYYY.YYYY:PORT").""" + def dial(self, addr: str, timeout: Optional[float] = None) -> Conn: + """Open a stream connection to addr (format: "N:XXXX.YYYY.YYYY:PORT"). + + If ``timeout`` is given (seconds), the dial is cancelled if the daemon + does not respond within that window. + """ lib = _get_lib() - res = lib.PilotDial(self._h, addr.encode()) + if timeout is None: + res = lib.PilotDial(self._h, addr.encode()) + else: + ms = max(0, int(timeout * 1000)) + res = lib.PilotDialTimeout(self._h, addr.encode(), ctypes.c_uint64(ms)) if res.err: raw = ctypes.string_at(res.err) lib.FreeString(res.err) @@ -576,6 +689,152 @@ def recv_from(self) -> dict[str, Any]: """ return self._call_json("PilotRecvFrom") + def broadcast( + self, + network_id: int, + port: int, + data: bytes, + admin_token: str, + ) -> None: + """Broadcast an unreliable datagram to every member of a network. + + Requires the daemon's admin token; an empty or mismatched token is + rejected. Permitted on every network including network 0 (backbone). + """ + lib = _get_lib() + buf = ctypes.create_string_buffer(data) + ptr = lib.PilotBroadcast( + self._h, + ctypes.c_uint16(network_id), + ctypes.c_uint16(port), + buf, + ctypes.c_int(len(data)), + admin_token.encode(), + ) + _check_err(ptr) + + # -- Networks -- + + def network_list(self) -> dict[str, Any]: + """List all networks known to the registry.""" + return self._call_json("PilotNetworkList") + + def network_join(self, network_id: int, token: str = "") -> dict[str, Any]: + """Join a network by ID, optionally with a token for token-gated networks.""" + return self._call_json( + "PilotNetworkJoin", ctypes.c_uint16(network_id), token.encode() + ) + + def network_leave(self, network_id: int) -> dict[str, Any]: + """Leave a network by ID.""" + return self._call_json("PilotNetworkLeave", ctypes.c_uint16(network_id)) + + def network_members(self, network_id: int) -> dict[str, Any]: + """List all members of a network.""" + return self._call_json("PilotNetworkMembers", ctypes.c_uint16(network_id)) + + def network_invite(self, network_id: int, target_node_id: int) -> dict[str, Any]: + """Invite a target node to a network (requires admin token on daemon).""" + return self._call_json( + "PilotNetworkInvite", + ctypes.c_uint16(network_id), + ctypes.c_uint32(target_node_id), + ) + + def network_poll_invites(self) -> dict[str, Any]: + """Return pending network invites for this node.""" + return self._call_json("PilotNetworkPollInvites") + + def network_respond_invite(self, network_id: int, accept: bool) -> dict[str, Any]: + """Accept or reject a pending network invite.""" + return self._call_json( + "PilotNetworkRespondInvite", + ctypes.c_uint16(network_id), + ctypes.c_int(1 if accept else 0), + ) + + # -- Managed networks -- + + def managed_score( + self, + network_id: int, + node_id: int, + delta: int, + topic: str = "", + ) -> dict[str, Any]: + """Adjust a peer's score in a managed network.""" + return self._call_json( + "PilotManagedScore", + ctypes.c_uint16(network_id), + ctypes.c_uint32(node_id), + ctypes.c_int32(delta), + topic.encode(), + ) + + def managed_status(self, network_id: int) -> dict[str, Any]: + """Return the status of a managed network engine.""" + return self._call_json("PilotManagedStatus", ctypes.c_uint16(network_id)) + + def managed_rankings(self, network_id: int) -> dict[str, Any]: + """Return ranked peers in a managed network.""" + return self._call_json("PilotManagedRankings", ctypes.c_uint16(network_id)) + + def managed_force_cycle(self, network_id: int) -> dict[str, Any]: + """Force a prune/fill cycle in a managed network.""" + return self._call_json("PilotManagedForceCycle", ctypes.c_uint16(network_id)) + + def managed_reconcile(self, network_id: int) -> dict[str, Any]: + """Refresh the managed network's peer set without running a policy cycle.""" + return self._call_json("PilotManagedReconcile", ctypes.c_uint16(network_id)) + + # -- Policy -- + + def policy_get(self, network_id: int) -> dict[str, Any]: + """Retrieve the active policy for a network.""" + return self._call_json("PilotPolicyGet", ctypes.c_uint16(network_id)) + + def policy_set(self, network_id: int, policy: Any) -> dict[str, Any]: + """Apply a policy document to a network. + + ``policy`` may be a dict, a JSON string, or pre-encoded bytes. + """ + if isinstance(policy, (bytes, bytearray)): + payload = bytes(policy) + elif isinstance(policy, str): + payload = policy.encode() + else: + payload = json.dumps(policy).encode() + return self._call_json( + "PilotPolicySet", ctypes.c_uint16(network_id), payload + ) + + # -- Member tags -- + + def member_tags_get(self, network_id: int, node_id: int) -> dict[str, Any]: + """Retrieve admin-assigned member tags for a node in a network.""" + return self._call_json( + "PilotMemberTagsGet", + ctypes.c_uint16(network_id), + ctypes.c_uint32(node_id), + ) + + def member_tags_set( + self, network_id: int, node_id: int, tags: list[str] + ) -> dict[str, Any]: + """Set admin-assigned member tags for a node in a network.""" + return self._call_json( + "PilotMemberTagsSet", + ctypes.c_uint16(network_id), + ctypes.c_uint32(node_id), + json.dumps(tags).encode(), + ) + + # -- Identity -- + + def rotate_identity(self) -> dict[str, Any]: + """Alias for :meth:`rotate_key`.""" + return self.rotate_key() + # -- High-level service methods -- def send_message(self, target: str, data: bytes, msg_type: str = "text") -> dict[str, Any]: diff --git a/sdk/python/pyproject.toml b/sdk/python/pyproject.toml index 52eb9a03..bf89dcca 100644 --- a/sdk/python/pyproject.toml +++ b/sdk/python/pyproject.toml @@ -9,12 +9,13 @@ include-package-data = true [tool.setuptools.package-data] pilotprotocol = [ "bin/*", + "bin/.pilot-version", "py.typed" ] [project] name = "pilotprotocol" -version = "0.1.1" # Auto-updated by CI workflow +version = "1.9.1" # Auto-updated by CI workflow description = "Python SDK for Pilot Protocol - the network stack for AI agents" readme = "README.md" requires-python = ">=3.10" @@ -68,7 +69,6 @@ Documentation = "https://pilotprotocol.network/docs/" Repository = "https://github.com/TeoSlayer/pilotprotocol" "Bug Tracker" = "https://github.com/TeoSlayer/pilotprotocol/issues" Changelog = "https://github.com/TeoSlayer/pilotprotocol/blob/main/sdk/python/CHANGELOG.md" -"Live Dashboard" = "https://polo.pilotprotocol.network" [project.optional-dependencies] dev = [ diff --git a/sdk/python/scripts/build-binaries.sh b/sdk/python/scripts/build-binaries.sh index 2665cbcf..137c7ae1 100755 --- a/sdk/python/scripts/build-binaries.sh +++ b/sdk/python/scripts/build-binaries.sh @@ -6,6 +6,9 @@ set -euo pipefail cd "$(dirname "$0")/../../.." # Go to repo root +# Read SDK version from pyproject.toml so the seeder marker matches it. +SDK_VERSION=$(awk -F\" '/^version = /{print $2; exit}' sdk/python/pyproject.toml) + # Detect platform OS=$(uname -s | tr '[:upper:]' '[:lower:]') ARCH=$(uname -m) @@ -63,6 +66,27 @@ cd ../.. echo " ✓ Built: $OUTPUT_DIR/libpilot.$EXT" echo "" +# 6. Write .pilot-version marker so the runtime seeder can compare against +# whatever's already installed at ~/.pilot/bin/. +echo "$SDK_VERSION" > "$OUTPUT_DIR/.pilot-version" +echo "6. Wrote $OUTPUT_DIR/.pilot-version → $SDK_VERSION" +echo "" + +# 7. macOS ad-hoc codesign + strip quarantine. Mirrors the main release +# workflow so SDK-shipped binaries don't trigger Gatekeeper "killed: 9" +# or "cannot be opened because Apple cannot check it for malicious +# software" when downloaded via pip. +if [ "$OS" = "darwin" ]; then + echo "7. macOS ad-hoc codesign + strip quarantine..." + for bin in "$OUTPUT_DIR/pilot-daemon" "$OUTPUT_DIR/pilotctl" "$OUTPUT_DIR/pilot-gateway" "$OUTPUT_DIR/pilot-updater" "$OUTPUT_DIR/libpilot.$EXT"; do + codesign --force --deep --sign - "$bin" + xattr -cr "$bin" || true + codesign -dv "$bin" 2>&1 | grep -E "Signature|Authority|TeamIdentifier" | head -1 || true + done + echo " ✓ codesigned ${OS} binaries" + echo "" +fi + # Show sizes echo "================================================================" echo "Build Summary:" diff --git a/sdk/python/tests/smoke_list_agents.py b/sdk/python/tests/smoke_list_agents.py new file mode 100644 index 00000000..60bbbbeb --- /dev/null +++ b/sdk/python/tests/smoke_list_agents.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +"""End-to-end smoke test for the Python SDK against a real daemon. + +Test plan (run against the locally running pilot daemon): +1. Construct ``Driver`` — proves the seeder wired ``libpilot.dylib`` correctly. +2. Call ``info()`` — confirms the JSON-RPC path works. +3. Idempotently handshake the list-agents host (already trusted is OK). +4. ``send_message(target='list-agents', data='/data {...}', msg_type='text')`` + — exercises hostname resolve + dial + frame protocol. +5. Wait for the asynchronous reply to land in ``~/.pilot/inbox/`` and print + a digest of the highest-tier specialist count. + +The script exits 0 on success, non-zero on any failure. It writes the +reply file path to stdout so a caller can grep for it. +""" + +from __future__ import annotations + +import json +import os +import sys +import time +from pathlib import Path + +# Allow running straight from a source checkout. +HERE = Path(__file__).resolve().parent +sys.path.insert(0, str(HERE.parent)) + +from pilotprotocol import Driver, PilotError # noqa: E402 + +LIST_AGENTS_HOST = "list-agents" +LIST_AGENTS_NODE_ID = 16398 +INBOX_DIR = Path.home() / ".pilot" / "inbox" +WAIT_SECONDS = 8 + + +def _newest_inbox_file(after_mtime: float) -> Path | None: + if not INBOX_DIR.is_dir(): + return None + candidates = [] + for f in INBOX_DIR.glob("*.json"): + try: + st = f.stat() + except OSError: + continue + if st.st_mtime > after_mtime: + candidates.append((st.st_mtime, f)) + if not candidates: + return None + candidates.sort(reverse=True) + return candidates[0][1] + + +def main() -> int: + print("[1/5] Constructing Driver…") + try: + d = Driver() + except PilotError as e: + print(f" FAIL: cannot reach daemon: {e}") + return 2 + print(" OK") + + print("[2/5] Calling info()…") + info = d.info() + print(f" node_id={info.get('node_id')} addr={info.get('address')} peers={info.get('peers')}") + + print(f"[3/5] Handshake list-agents (node {LIST_AGENTS_NODE_ID})…") + try: + h = d.handshake(LIST_AGENTS_NODE_ID, "python sdk smoke test") + print(f" OK: {h}") + except PilotError as e: + # Already trusted is acceptable. + msg = str(e).lower() + if "already" in msg or "trust" in msg: + print(f" OK (already trusted): {e}") + else: + print(f" FAIL: {e}") + return 3 + + print("[4/5] send_message → list-agents …") + record_mtime = time.time() - 1 + try: + result = d.send_message( + LIST_AGENTS_HOST, + b'/data {"search":"","limit":1}', + msg_type="text", + ) + except PilotError as e: + print(f" FAIL: send_message: {e}") + return 4 + print(f" sent: {result}") + + print(f"[5/5] Waiting up to {WAIT_SECONDS}s for inbox reply…") + deadline = time.time() + WAIT_SECONDS + reply_file: Path | None = None + while time.time() < deadline: + reply_file = _newest_inbox_file(record_mtime) + if reply_file is not None: + break + time.sleep(0.5) + if reply_file is None: + print(" FAIL: no inbox reply within window") + return 5 + + print(f" reply file: {reply_file}") + try: + envelope = json.loads(reply_file.read_text()) + except (OSError, ValueError) as e: + print(f" FAIL: cannot parse reply: {e}") + return 6 + + print(f" agent={envelope.get('agent')} command={envelope.get('command')} ok={envelope.get('ok')}") + + # Try to extract the total count if the payload is a list-agents response. + raw = envelope.get("data") + if isinstance(raw, str): + try: + payload = json.loads(raw) + total = payload.get("total") or payload.get("count") + if total is None: + items = payload.get("tiers", {}).get("free", {}).get("items", []) + total = len(items) + print(f" list-agents total: {total}") + except (ValueError, AttributeError): + print(" (data not JSON; envelope OK)") + + d.close() + print("\nSMOKE TEST PASSED (python)") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py index e8def76d..ad4be836 100644 --- a/sdk/python/tests/test_client.py +++ b/sdk/python/tests/test_client.py @@ -66,6 +66,17 @@ def _mock_write_result(n: int = 0, err: bytes | None = None): return types.SimpleNamespace(n=n, err=err) +def _unwrap(x): + """Coerce a ctypes-wrapped scalar into its plain Python value. + + The Driver wraps ints in ctypes types (c_uint16, c_int32, etc.) before + calling into the C library. Real ctypes converts those to plain ints at + the FFI boundary, but our FakeLib receives them as objects, so we strip + the wrapper here for clean assertions. + """ + return x.value if hasattr(x, "value") else x + + class FakeLib: """Mimics the ctypes.CDLL object with controllable return values.""" @@ -161,6 +172,101 @@ def PilotConnClose(self, ch): def PilotSendTo(self, h, addr, data, data_len): return None + # --- 1.9.1 additions --- + + def PilotHealth(self, h): + return self._json_returns.get("PilotHealth", _json_ok({"ok": True, "uptime_s": 42})) + + def PilotRotateKey(self, h): + return self._json_returns.get("PilotRotateKey", _json_ok({"new_pubkey": "abc"})) + + def PilotDialTimeout(self, h, addr, timeout_ms): + # capture for assertions + self._last_dial_timeout = (addr, _unwrap(timeout_ms)) + return _HandleErr(handle=11, err=None) + + def PilotConnSetReadDeadline(self, h, deadline_unix_nanos): + # capture deadline for assertions + self._last_set_read_deadline = _unwrap(deadline_unix_nanos) + return None + + def PilotBroadcast(self, h, network_id, port, data, data_len, admin_token): + self._last_broadcast = { + "network_id": _unwrap(network_id), + "port": _unwrap(port), + "data_len": _unwrap(data_len), + "admin_token": admin_token, + } + return self._json_returns.get("PilotBroadcast", None) + + def PilotNetworkList(self, h): + return self._json_returns.get("PilotNetworkList", _json_ok({"networks": [{"id": 0}]})) + + def PilotNetworkJoin(self, h, network_id, token): + self._last_network_join = (_unwrap(network_id), token) + return self._json_returns.get("PilotNetworkJoin", _json_ok({"status": "joined"})) + + def PilotNetworkLeave(self, h, network_id): + return self._json_returns.get("PilotNetworkLeave", _json_ok({"status": "left"})) + + def PilotNetworkMembers(self, h, network_id): + return self._json_returns.get("PilotNetworkMembers", _json_ok({"members": []})) + + def PilotNetworkInvite(self, h, network_id, target_node_id): + self._last_network_invite = (_unwrap(network_id), _unwrap(target_node_id)) + return self._json_returns.get("PilotNetworkInvite", _json_ok({"status": "invited"})) + + def PilotNetworkPollInvites(self, h): + return self._json_returns.get("PilotNetworkPollInvites", _json_ok({"invites": []})) + + def PilotNetworkRespondInvite(self, h, network_id, accept): + self._last_network_respond = (_unwrap(network_id), _unwrap(accept)) + return self._json_returns.get( + "PilotNetworkRespondInvite", _json_ok({"status": "responded"}) + ) + + def PilotManagedScore(self, h, network_id, node_id, delta, topic): + self._last_managed_score = ( + _unwrap(network_id), _unwrap(node_id), _unwrap(delta), topic, + ) + return self._json_returns.get("PilotManagedScore", _json_ok({"status": "ok"})) + + def PilotManagedStatus(self, h, network_id): + return self._json_returns.get( + "PilotManagedStatus", _json_ok({"network_id": _unwrap(network_id)}) + ) + + def PilotManagedRankings(self, h, network_id): + return self._json_returns.get("PilotManagedRankings", _json_ok({"rankings": []})) + + def PilotManagedForceCycle(self, h, network_id): + return self._json_returns.get("PilotManagedForceCycle", _json_ok({"status": "cycled"})) + + def PilotManagedReconcile(self, h, network_id): + return self._json_returns.get( + "PilotManagedReconcile", + _json_ok({"network_id": _unwrap(network_id), "peers": []}), + ) + + def PilotPolicyGet(self, h, network_id): + return self._json_returns.get( + "PilotPolicyGet", + _json_ok({"network_id": _unwrap(network_id), "policy": {}}), + ) + + def PilotPolicySet(self, h, network_id, policy_json): + self._last_policy_set = (_unwrap(network_id), policy_json) + return self._json_returns.get("PilotPolicySet", _json_ok({"status": "applied"})) + + def PilotMemberTagsGet(self, h, network_id, node_id): + return self._json_returns.get("PilotMemberTagsGet", _json_ok({"tags": []})) + + def PilotMemberTagsSet(self, h, network_id, node_id, tags_json): + self._last_member_tags_set = ( + _unwrap(network_id), _unwrap(node_id), tags_json, + ) + return self._json_returns.get("PilotMemberTagsSet", _json_ok({"status": "ok"})) + @pytest.fixture(autouse=True) def _mock_lib(monkeypatch): @@ -624,8 +730,301 @@ def test_del_calls_close(self, fake_lib): def test_del_catches_exceptions(self, fake_lib): """Test Listener.__del__ catches close() exceptions.""" fake_lib.PilotListenerClose = lambda h: _json_err("error") - + ln = client_mod.Listener(20) # Should not raise even though close() would raise ln.__del__() assert ln._closed + + +# --------------------------------------------------------------------------- +# 1.9.1 additions: health / rotate-key +# --------------------------------------------------------------------------- + +class TestDriverHealth: + def test_health_success(self, fake_lib): + d = client_mod.Driver() + r = d.health() + assert r["ok"] is True + assert r["uptime_s"] == 42 + + def test_health_error(self, fake_lib): + fake_lib._json_returns["PilotHealth"] = _json_err("daemon down") + d = client_mod.Driver() + with pytest.raises(PilotError, match="daemon down"): + d.health() + + +class TestDriverRotateKey: + def test_rotate_key(self, fake_lib): + d = client_mod.Driver() + r = d.rotate_key() + assert r["new_pubkey"] == "abc" + + def test_rotate_identity_alias(self, fake_lib): + d = client_mod.Driver() + # rotate_identity should delegate to rotate_key + r = d.rotate_identity() + assert r["new_pubkey"] == "abc" + + def test_rotate_key_error(self, fake_lib): + fake_lib._json_returns["PilotRotateKey"] = _json_err("registry rejected") + d = client_mod.Driver() + with pytest.raises(PilotError, match="registry rejected"): + d.rotate_key() + + +# --------------------------------------------------------------------------- +# 1.9.1 additions: dial timeout +# --------------------------------------------------------------------------- + +class TestDriverDialTimeout: + def test_dial_without_timeout_uses_pilot_dial(self, fake_lib): + # No timeout → original PilotDial path (handle=10) + d = client_mod.Driver() + conn = d.dial("0:0001.0000.0002:8080") + assert conn._h == 10 + + def test_dial_with_timeout_uses_pilot_dial_timeout(self, fake_lib): + d = client_mod.Driver() + conn = d.dial("0:0001.0000.0002:8080", timeout=2.5) + # Timeout path returns handle=11 + assert conn._h == 11 + # 2.5 s = 2500 ms + assert fake_lib._last_dial_timeout == (b"0:0001.0000.0002:8080", 2500) + + def test_dial_timeout_zero_floor(self, fake_lib): + d = client_mod.Driver() + d.dial("0:0001.0000.0002:8080", timeout=-1.0) + # Negative → clamped to 0 ms + _, ms = fake_lib._last_dial_timeout + assert ms == 0 + + def test_dial_timeout_error(self, fake_lib): + fake_lib.PilotDialTimeout = lambda h, addr, ms: _mock_handle_err( + handle=0, err=_json_err("dial timeout") + ) + d = client_mod.Driver() + with pytest.raises(PilotError, match="dial timeout"): + d.dial("bad:addr", timeout=1.0) + + +# --------------------------------------------------------------------------- +# 1.9.1 additions: Conn.set_read_deadline +# --------------------------------------------------------------------------- + +class TestConnReadDeadline: + def test_clear_deadline_with_none(self, fake_lib): + conn = client_mod.Conn(10) + conn.set_read_deadline(None) + assert fake_lib._last_set_read_deadline == 0 + + def test_set_deadline_seconds_to_nanos(self, fake_lib): + conn = client_mod.Conn(10) + # 1700000000.5 s → 1_700_000_000_500_000_000 ns + conn.set_read_deadline(1_700_000_000.5) + assert fake_lib._last_set_read_deadline == 1_700_000_000_500_000_000 + + def test_set_deadline_on_closed_conn_raises(self, fake_lib): + conn = client_mod.Conn(10) + conn.close() + with pytest.raises(PilotError, match="closed"): + conn.set_read_deadline(0.0) + + def test_set_deadline_propagates_error(self, fake_lib): + fake_lib.PilotConnSetReadDeadline = lambda h, d: _json_err("bad handle") + conn = client_mod.Conn(10) + with pytest.raises(PilotError, match="bad handle"): + conn.set_read_deadline(None) + + +# --------------------------------------------------------------------------- +# 1.9.1 additions: broadcast +# --------------------------------------------------------------------------- + +class TestDriverBroadcast: + def test_broadcast_passes_args(self, fake_lib): + d = client_mod.Driver() + d.broadcast(7, 1234, b"hello", "secret") + captured = fake_lib._last_broadcast + assert captured["network_id"] == 7 + assert captured["port"] == 1234 + assert captured["data_len"] == 5 + assert captured["admin_token"] == b"secret" + + def test_broadcast_propagates_error(self, fake_lib): + fake_lib._json_returns["PilotBroadcast"] = _json_err("admin token required") + d = client_mod.Driver() + with pytest.raises(PilotError, match="admin token required"): + d.broadcast(0, 9000, b"x", "") + + +# --------------------------------------------------------------------------- +# 1.9.1 additions: networks +# --------------------------------------------------------------------------- + +class TestDriverNetworks: + def test_network_list(self, fake_lib): + d = client_mod.Driver() + r = d.network_list() + assert "networks" in r + + def test_network_join_passes_args(self, fake_lib): + d = client_mod.Driver() + r = d.network_join(7, "joinme") + assert r["status"] == "joined" + assert fake_lib._last_network_join == (7, b"joinme") + + def test_network_join_default_empty_token(self, fake_lib): + d = client_mod.Driver() + d.network_join(2) + assert fake_lib._last_network_join == (2, b"") + + def test_network_leave(self, fake_lib): + d = client_mod.Driver() + r = d.network_leave(7) + assert r["status"] == "left" + + def test_network_members(self, fake_lib): + d = client_mod.Driver() + r = d.network_members(7) + assert "members" in r + + def test_network_invite(self, fake_lib): + d = client_mod.Driver() + r = d.network_invite(7, 4242) + assert r["status"] == "invited" + assert fake_lib._last_network_invite == (7, 4242) + + def test_network_poll_invites(self, fake_lib): + d = client_mod.Driver() + r = d.network_poll_invites() + assert "invites" in r + + def test_network_respond_invite_accept(self, fake_lib): + d = client_mod.Driver() + d.network_respond_invite(7, True) + assert fake_lib._last_network_respond == (7, 1) + + def test_network_respond_invite_reject(self, fake_lib): + d = client_mod.Driver() + d.network_respond_invite(7, False) + assert fake_lib._last_network_respond == (7, 0) + + def test_network_join_error(self, fake_lib): + fake_lib._json_returns["PilotNetworkJoin"] = _json_err("token rejected") + d = client_mod.Driver() + with pytest.raises(PilotError, match="token rejected"): + d.network_join(7, "wrong") + + +# --------------------------------------------------------------------------- +# 1.9.1 additions: managed networks +# --------------------------------------------------------------------------- + +class TestDriverManaged: + def test_managed_score_passes_args(self, fake_lib): + d = client_mod.Driver() + r = d.managed_score(7, 4242, -3, "spam") + assert r["status"] == "ok" + assert fake_lib._last_managed_score == (7, 4242, -3, b"spam") + + def test_managed_score_default_topic(self, fake_lib): + d = client_mod.Driver() + d.managed_score(0, 1, 5) + assert fake_lib._last_managed_score == (0, 1, 5, b"") + + def test_managed_score_negative_delta_preserved(self, fake_lib): + # int32 delta — make sure negative numbers survive + d = client_mod.Driver() + d.managed_score(0, 1, -100000, "x") + assert fake_lib._last_managed_score[2] == -100000 + + def test_managed_status(self, fake_lib): + d = client_mod.Driver() + r = d.managed_status(42) + assert r["network_id"] == 42 + + def test_managed_rankings(self, fake_lib): + d = client_mod.Driver() + r = d.managed_rankings(42) + assert "rankings" in r + + def test_managed_force_cycle(self, fake_lib): + d = client_mod.Driver() + r = d.managed_force_cycle(42) + assert r["status"] == "cycled" + + def test_managed_reconcile(self, fake_lib): + d = client_mod.Driver() + r = d.managed_reconcile(42) + assert r["network_id"] == 42 + assert r["peers"] == [] + + +# --------------------------------------------------------------------------- +# 1.9.1 additions: policy +# --------------------------------------------------------------------------- + +class TestDriverPolicy: + def test_policy_get(self, fake_lib): + d = client_mod.Driver() + r = d.policy_get(7) + assert r["network_id"] == 7 + + def test_policy_set_dict_serializes_to_json(self, fake_lib): + d = client_mod.Driver() + d.policy_set(7, {"min_score": 3, "tags": ["good"]}) + net_id, payload = fake_lib._last_policy_set + assert net_id == 7 + # The payload was JSON-serialized + assert json.loads(payload) == {"min_score": 3, "tags": ["good"]} + + def test_policy_set_string_passthrough(self, fake_lib): + d = client_mod.Driver() + d.policy_set(0, '{"raw":true}') + _, payload = fake_lib._last_policy_set + assert payload == b'{"raw":true}' + + def test_policy_set_bytes_passthrough(self, fake_lib): + d = client_mod.Driver() + d.policy_set(0, b'{"raw":1}') + _, payload = fake_lib._last_policy_set + assert payload == b'{"raw":1}' + + def test_policy_set_error(self, fake_lib): + fake_lib._json_returns["PilotPolicySet"] = _json_err("invalid policy") + d = client_mod.Driver() + with pytest.raises(PilotError, match="invalid policy"): + d.policy_set(0, {}) + + +# --------------------------------------------------------------------------- +# 1.9.1 additions: member tags +# --------------------------------------------------------------------------- + +class TestDriverMemberTags: + def test_member_tags_get(self, fake_lib): + d = client_mod.Driver() + r = d.member_tags_get(7, 4242) + assert "tags" in r + + def test_member_tags_set_serializes_list(self, fake_lib): + d = client_mod.Driver() + d.member_tags_set(7, 4242, ["gpu", "fast"]) + net_id, node_id, tags_json = fake_lib._last_member_tags_set + assert net_id == 7 + assert node_id == 4242 + assert json.loads(tags_json) == ["gpu", "fast"] + + def test_member_tags_set_empty_list(self, fake_lib): + d = client_mod.Driver() + d.member_tags_set(7, 4242, []) + _, _, tags_json = fake_lib._last_member_tags_set + assert json.loads(tags_json) == [] + + def test_member_tags_set_error(self, fake_lib): + fake_lib._json_returns["PilotMemberTagsSet"] = _json_err("not admin") + d = client_mod.Driver() + with pytest.raises(PilotError, match="not admin"): + d.member_tags_set(7, 1, ["x"]) diff --git a/sdk/python/tests/test_runtime.py b/sdk/python/tests/test_runtime.py new file mode 100644 index 00000000..7ba31b2f --- /dev/null +++ b/sdk/python/tests/test_runtime.py @@ -0,0 +1,403 @@ +"""Unit tests for the runtime seeder (pilotprotocol/_runtime.py). + +These tests exercise the 5 seeder states (missing, older, equal, newer, +corrupt), the daemon-running guard, the lock contention path, and the +atomic-rename behavior. They do NOT require a real daemon or libpilot.so; +the bundled "binaries" are stub files written into a tmpdir. +""" + +from __future__ import annotations + +import json +import os +import platform as platform_mod +import socket +import sys +import tempfile +import threading +import time +from pathlib import Path + +import pytest + +import pilotprotocol._runtime as rt + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_fake_pkg_bin(tmp: Path, version: str, names: list[str]) -> Path: + """Build a fake bundled bin/ directory with stub executables and marker.""" + pkg = tmp / "pkg-bin" + pkg.mkdir(parents=True, exist_ok=True) + for n in names: + (pkg / n).write_text(f"#!/bin/sh\necho {n} {version}\n") + (pkg / n).chmod(0o755) + (pkg / ".pilot-version").write_text(version + "\n") + return pkg + + +def _platform_lib() -> str: + return rt._LIB_NAMES[platform_mod.system()] + + +@pytest.fixture(autouse=True) +def _isolate(tmp_path, monkeypatch): + """Redirect ~/.pilot/ to a tmpdir and the package bin/ to another. + + Also stubs the daemon-liveness probe to "not running" so tests do not + pick up the real pilot daemon that may be running on the developer + machine. Tests that need the probe enabled re-monkeypatch ``_daemon_running``. + """ + fake_home = tmp_path / "home" + fake_home.mkdir() + monkeypatch.setenv("PILOT_HOME", str(fake_home / ".pilot")) + + pkg = _make_fake_pkg_bin( + tmp_path, + "1.9.1", + list(rt._BIN_NAMES) + [_platform_lib()], + ) + monkeypatch.setattr(rt, "_pkg_bin_dir", lambda: pkg) + monkeypatch.setattr(rt, "_daemon_running", lambda: False) + rt.reset_seeded_marker() + yield {"home": fake_home, "pkg": pkg, "tmp": tmp_path, "monkeypatch": monkeypatch} + rt.reset_seeded_marker() + + +# --------------------------------------------------------------------------- +# State machine +# --------------------------------------------------------------------------- + +class TestSeederStates: + def test_missing_seeds_everything(self, _isolate): + report = rt.run_seeder() + assert report.action == "seed" + # All four executables + libpilot should be copied + assert set(report.copied) == set(rt._BIN_NAMES) | {_platform_lib()} + assert report.skipped == [] + + rtbin = _isolate["home"] / ".pilot" / "bin" + for name in report.copied: + assert (rtbin / name).is_file(), f"{name} not seeded" + assert (rtbin / ".pilot-version").read_text().strip() == "1.9.1" + + def test_equal_version_is_noop(self, _isolate): + # First pass seeds. + rt.run_seeder() + rt.reset_seeded_marker() + + # Second pass with identical bundled version → noop. + report = rt.run_seeder() + assert report.action == "noop" + assert report.copied == [] + + def test_older_bundle_does_not_downgrade(self, _isolate, tmp_path, monkeypatch): + # Seed at 1.9.1 + rt.run_seeder() + rt.reset_seeded_marker() + + # Replace the package bin/ with a 1.8.0 build. + pkg = _make_fake_pkg_bin( + tmp_path / "older", + "1.8.0", + list(rt._BIN_NAMES) + [_platform_lib()], + ) + monkeypatch.setattr(rt, "_pkg_bin_dir", lambda: pkg) + + report = rt.run_seeder() + assert report.action == "noop" + assert report.copied == [] + rtbin = _isolate["home"] / ".pilot" / "bin" + assert (rtbin / ".pilot-version").read_text().strip() == "1.9.1" + + def test_newer_bundle_upgrades(self, _isolate, tmp_path, monkeypatch): + rt.run_seeder() + rt.reset_seeded_marker() + + pkg = _make_fake_pkg_bin( + tmp_path / "newer", + "2.0.0", + list(rt._BIN_NAMES) + [_platform_lib()], + ) + monkeypatch.setattr(rt, "_pkg_bin_dir", lambda: pkg) + + report = rt.run_seeder() + assert report.action == "upgrade" + assert set(report.copied) == set(rt._BIN_NAMES) | {_platform_lib()} + rtbin = _isolate["home"] / ".pilot" / "bin" + assert (rtbin / ".pilot-version").read_text().strip() == "2.0.0" + # Content actually replaced + assert "2.0.0" in (rtbin / "pilotctl").read_text() + + def test_corrupt_runtime_re_seeds_missing_files(self, _isolate): + rt.run_seeder() + rtbin = _isolate["home"] / ".pilot" / "bin" + # Simulate corruption: delete pilotctl but leave the marker. + (rtbin / "pilotctl").unlink() + rt.reset_seeded_marker() + + report = rt.run_seeder() + # Same version, but a file was missing → seeder noticed and re-seeded. + assert "pilotctl" in report.copied + assert (rtbin / "pilotctl").is_file() + + +# --------------------------------------------------------------------------- +# Daemon-running guard +# --------------------------------------------------------------------------- + +class TestDaemonGuard: + def test_skips_pilot_daemon_when_socket_live(self, _isolate, monkeypatch, tmp_path): + # First seed normally so pilot-daemon exists. + rt.run_seeder() + rt.reset_seeded_marker() + + # Replace package with a newer version. + pkg = _make_fake_pkg_bin( + tmp_path / "newer", + "2.0.0", + list(rt._BIN_NAMES) + [_platform_lib()], + ) + monkeypatch.setattr(rt, "_pkg_bin_dir", lambda: pkg) + + # Stub _daemon_running → True. + monkeypatch.setattr(rt, "_daemon_running", lambda: True) + + report = rt.run_seeder() + assert "pilot-daemon" in report.skipped + assert "pilot-daemon" not in report.copied + # Other binaries still upgrade. + assert "pilotctl" in report.copied + assert report.action == "daemon-skip" + + def test_first_install_seeds_daemon_even_if_socket_present( + self, _isolate, monkeypatch + ): + # No prior install. Even with daemon "running" (somehow), there's + # no existing pilot-daemon to preserve, so we seed fresh. + monkeypatch.setattr(rt, "_daemon_running", lambda: True) + report = rt.run_seeder() + assert "pilot-daemon" in report.copied + + +class TestDaemonProbe: + """Direct tests of _daemon_running. The fixture stubs it to False, so + these tests un-stub by importing the module fresh and re-resolving.""" + + def _real_daemon_running(self, _isolate): + # Replace config to point socket somewhere we control. + cfg_path = _isolate["home"] / ".pilot" / "config.json" + return cfg_path + + def test_no_socket_means_not_running(self, _isolate): + cfg = self._real_daemon_running(_isolate) + cfg.parent.mkdir(parents=True, exist_ok=True) + cfg.write_text(json.dumps({"socket": str(_isolate["tmp"] / "no.sock")})) + # Importlib-reload to bypass the autouse monkeypatch on the symbol. + # Easier: call the original via __wrapped__ — but we don't have it. + # Cleanest: import the function from the module directly under + # a different binding. + import pilotprotocol._runtime as rt_mod + # Save and restore. + stub = rt_mod._daemon_running + orig = type(stub).__name__ # noqa: F841 — debug breadcrumb + # Recover the original from the module dict (we never deleted it). + # The fixture set rt._daemon_running to a lambda; the function is + # still bound at module import time only via attribute access. To + # get the original, we need to undo the monkeypatch. + _isolate["monkeypatch"].setattr(rt_mod, "_daemon_running", _orig_daemon_running) + assert rt_mod._daemon_running() is False + + def test_unconnectable_socket_means_not_running(self, _isolate, tmp_path): + cfg = self._real_daemon_running(_isolate) + cfg.parent.mkdir(parents=True, exist_ok=True) + sock_path = tmp_path / "fake.sock" + sock_path.touch() + cfg.write_text(json.dumps({"socket": str(sock_path)})) + _isolate["monkeypatch"].setattr(rt, "_daemon_running", _orig_daemon_running) + assert rt._daemon_running() is False + + def test_listening_socket_means_running(self, _isolate): + cfg = self._real_daemon_running(_isolate) + cfg.parent.mkdir(parents=True, exist_ok=True) + # AF_UNIX has a ~104 char path limit on macOS, so use a short + # tmpdir under /tmp rather than the very long pytest tmp_path. + short = Path(tempfile.mkdtemp(prefix="psk", dir="/tmp")) + sock_path = short / "live.sock" + cfg.write_text(json.dumps({"socket": str(sock_path)})) + + srv = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + srv.bind(str(sock_path)) + srv.listen(1) + try: + _isolate["monkeypatch"].setattr(rt, "_daemon_running", _orig_daemon_running) + assert rt._daemon_running() is True + finally: + srv.close() + sock_path.unlink(missing_ok=True) + short.rmdir() + + +# Capture the original _daemon_running once, before any fixture monkeypatches it. +_orig_daemon_running = rt._daemon_running + + +# --------------------------------------------------------------------------- +# Atomic install + concurrent seeders +# --------------------------------------------------------------------------- + +class TestAtomicInstall: + def test_atomic_replace_survives_existing_target(self, _isolate, tmp_path): + rt.run_seeder() + rtbin = _isolate["home"] / ".pilot" / "bin" + # Pretend pilotctl is "running": grab a file handle and overwrite. + target = rtbin / "pilotctl" + with open(target, "rb") as f: + initial = f.read() + # Now atomic-install something different. + src = tmp_path / "newctl" + src.write_text("DIFFERENT\n") + rt._atomic_install(src, target) + # The held handle still sees the old content (Unix semantics). + f.seek(0) + assert f.read() == initial + # And the on-disk file is the new one. + assert target.read_text() == "DIFFERENT\n" + + def test_no_tmp_files_left_behind(self, _isolate): + rt.run_seeder() + rtbin = _isolate["home"] / ".pilot" / "bin" + leftovers = list(rtbin.glob("*.tmp.*")) + assert leftovers == [] + + +class TestConcurrentSeeders: + def test_two_threads_only_one_writes(self, _isolate): + # Both threads see "missing" state; both attempt to seed; flock + # serializes them so the second sees the freshly-seeded marker + # and ends up doing a noop. The final state is consistent. + results: list[rt.SeedReport] = [] + barrier = threading.Barrier(2) + + def worker(): + barrier.wait() + rt.reset_seeded_marker() + results.append(rt.run_seeder()) + + threads = [threading.Thread(target=worker) for _ in range(2)] + for t in threads: + t.start() + for t in threads: + t.join(timeout=5) + + # Exactly one thread did the actual seeding; the other was a noop. + actions = sorted(r.action for r in results) + assert actions in (["noop", "seed"], ["seed", "seed"]) + # Either way, the runtime is intact. + rtbin = _isolate["home"] / ".pilot" / "bin" + for name in rt._BIN_NAMES: + assert (rtbin / name).is_file() + + +# --------------------------------------------------------------------------- +# Config + directory bootstrap +# --------------------------------------------------------------------------- + +class TestConfigBootstrap: + def test_creates_default_config_when_missing(self, _isolate): + rt.run_seeder() + cfg_path = _isolate["home"] / ".pilot" / "config.json" + assert cfg_path.is_file() + cfg = json.loads(cfg_path.read_text()) + assert cfg["registry"] == rt.DEFAULT_REGISTRY + assert cfg["beacon"] == rt.DEFAULT_BEACON + assert cfg["socket"] == rt.DEFAULT_SOCKET + assert cfg["encrypt"] is True + # No email — we never auto-set one; user supplies via daemon start. + assert "email" not in cfg + + def test_preserves_existing_config(self, _isolate): + cfg_path = _isolate["home"] / ".pilot" / "config.json" + cfg_path.parent.mkdir(parents=True, exist_ok=True) + cfg_path.write_text(json.dumps({"email": "foo@bar.com", "preserved": True})) + rt.run_seeder() + cfg = json.loads(cfg_path.read_text()) + assert cfg.get("preserved") is True + assert cfg.get("email") == "foo@bar.com" + + +# --------------------------------------------------------------------------- +# Wrong-platform package +# --------------------------------------------------------------------------- + +class TestWrongPlatform: + def test_missing_lib_does_not_crash_seeder(self, _isolate, tmp_path, monkeypatch): + # Build a pkg with executables but no platform lib. + pkg = tmp_path / "no-lib" + pkg.mkdir() + for n in rt._BIN_NAMES: + (pkg / n).write_text("stub") + (pkg / n).chmod(0o755) + (pkg / ".pilot-version").write_text("1.9.1\n") + monkeypatch.setattr(rt, "_pkg_bin_dir", lambda: pkg) + + # Seeder runs without exception; the lib name is just absent from copied. + report = rt.run_seeder() + assert _platform_lib() not in report.copied + + # runtime_library() raises a clear error, since the lib isn't anywhere. + with pytest.raises(FileNotFoundError, match="libpilot"): + rt.runtime_library() + + +# --------------------------------------------------------------------------- +# Public entry points +# --------------------------------------------------------------------------- + +class TestPublicEntryPoints: + def test_runtime_binary_returns_seeded_path(self, _isolate): + p = rt.runtime_binary("pilotctl") + assert p == _isolate["home"] / ".pilot" / "bin" / "pilotctl" + assert p.is_file() + + def test_runtime_binary_unknown_name_raises(self, _isolate): + with pytest.raises(FileNotFoundError, match="bogus"): + rt.runtime_binary("bogus") + + def test_runtime_library_seeds_and_returns_path(self, _isolate): + p = rt.runtime_library() + assert p == _isolate["home"] / ".pilot" / "bin" / _platform_lib() + assert p.is_file() + + def test_ensure_runtime_seeded_idempotent_in_process(self, _isolate): + rt.ensure_runtime_seeded() + # Subsequent calls are short-circuited by the in-process flag. + rtbin_marker = _isolate["home"] / ".pilot" / "bin" / ".pilot-version" + first_mtime = rtbin_marker.stat().st_mtime + time.sleep(0.01) + rt.ensure_runtime_seeded() + assert rtbin_marker.stat().st_mtime == first_mtime + + +# --------------------------------------------------------------------------- +# SemVer comparison +# --------------------------------------------------------------------------- + +class TestSemverTuple: + def test_basic_parsing(self): + assert rt._semver_tuple("1.9.1") == (1, 9, 1) + assert rt._semver_tuple("v1.9.1") == (1, 9, 1) + assert rt._semver_tuple("1.9.1-rc4") == (1, 9, 1) + assert rt._semver_tuple("1.9.1+meta") == (1, 9, 1) + + def test_unparseable_returns_empty_tuple(self): + assert rt._semver_tuple("") == () + assert rt._semver_tuple("garbage") == () + assert rt._semver_tuple("1.x.0") == () + + def test_ordering(self): + assert rt._semver_tuple("1.9.1") > rt._semver_tuple("1.9.0") + assert rt._semver_tuple("2.0.0") > rt._semver_tuple("1.9.99") + assert rt._semver_tuple("1.9.1") == rt._semver_tuple("1.9.1") diff --git a/tests/bench_concurrent_test.go b/tests/bench_concurrent_test.go index 4aa17dd5..4a5cca73 100644 --- a/tests/bench_concurrent_test.go +++ b/tests/bench_concurrent_test.go @@ -9,7 +9,7 @@ import ( ) // BenchmarkConcurrentStreams5 measures aggregate throughput across 5 simultaneous streams. -func BenchmarkConcurrentStreams5(b *testing.B) { runConcurrentBench(b, 5) } +func BenchmarkConcurrentStreams5(b *testing.B) { runConcurrentBench(b, 5) } // BenchmarkConcurrentStreams10 measures aggregate throughput across 10 simultaneous streams. func BenchmarkConcurrentStreams10(b *testing.B) { runConcurrentBench(b, 10) } @@ -35,7 +35,10 @@ func runConcurrentBench(b *testing.B, n int) { // Pre-create all n listeners on B, one per stream port. // Ports benchPort … benchPort+n-1 (benchPort=9201, max n=25 → 9225). type listenerState struct { - ln interface{ Accept() (net.Conn, error); Close() error } + ln interface { + Accept() (net.Conn, error) + Close() error + } } listeners := make([]*listenerState, n) diff --git a/tests/bench_recovery_test.go b/tests/bench_recovery_test.go index ca15342b..5a65a4e5 100644 --- a/tests/bench_recovery_test.go +++ b/tests/bench_recovery_test.go @@ -15,10 +15,10 @@ import ( // packets in the A→B direction on demand, then resume forwarding normally. // B→A traffic is always forwarded without drops. type burstProxy struct { - toB *net.UDPConn - toA *net.UDPConn - realA *net.UDPAddr - realB *net.UDPAddr + toB *net.UDPConn + toA *net.UDPConn + realA *net.UDPAddr + realB *net.UDPAddr dropN atomic.Int64 stopped atomic.Bool }