diff --git a/CHANGELOG.md b/CHANGELOG.md
index fe9c96b3..0a824f40 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,31 @@ project uses [Semantic Versioning](https://semver.org/).
Detailed per-release notes for tagged versions are published on the
[GitHub Releases page](https://github.com/TeoSlayer/pilotprotocol/releases).
-## [Unreleased]
+## [1.9.2] - 2026-05-05
+
+### Changed
+- **SDK: updated sdk clients and cgo bindings to the latest version**
+
+- **SDK: removed polo-score surface from Node and Python bindings.**
+ Dropped `PilotMyPolo` from the CGO export layer (`sdk/cgo/bindings.go`)
+ and the corresponding `Driver.myPoloScore()` / `Driver.my_polo_score()`
+ wrappers, fake-lib hooks, and tests in both SDKs. The driver-level
+ `MyPoloScore()` and the underlying registry/daemon machinery are
+ untouched — this only narrows what the language SDKs expose. Removed
+ the now-stale `polo.pilotprotocol.network` "Live Dashboard" project
+ URL from `sdk/python/pyproject.toml` and a stray polo mention from
+ the Python SDK README.
+
+### Fixed
+
+- **SDK: macOS binaries shipped via npm/pip codesign parity.**
+ `sdk/{node,python}/scripts/build-binaries.sh` now mirror the main
+ release workflow — after building `pilot-daemon`, `pilotctl`,
+ `pilot-gateway`, `pilot-updater`, and `libpilot.dylib` on darwin,
+ each artifact is codesigned (`codesign --force --deep --sign -`)
+ and stripped of the quarantine xattr. Without this, npm/pip-installed
+ binaries triggered Gatekeeper "killed: 9" / "cannot be opened
+ because Apple cannot check it for malicious software" on first run.
## [1.9.1] - 2026-05-05
diff --git a/cmd/pilotctl/main.go b/cmd/pilotctl/main.go
index a510e011..04c8eba5 100644
--- a/cmd/pilotctl/main.go
+++ b/cmd/pilotctl/main.go
@@ -1281,12 +1281,12 @@ func cmdContext() {
"enable-tasks": map[string]interface{}{"args": []string{}, "description": "Advertise task-execution capability on port 1003"},
"disable-tasks": map[string]interface{}{"args": []string{}, "description": "Stop advertising task-execution capability"},
// Low-level / plumbing
- "connect": map[string]interface{}{"args": []string{"
", "[port]", "[--message ]"}, "description": "Open a raw stream connection"},
- "send": map[string]interface{}{"args": []string{"", "", "--data "}, "description": "Send a single raw message to a port"},
- "recv": map[string]interface{}{"args": []string{"", "[--count ]"}, "description": "Accept and print incoming stream messages"},
- "dgram": map[string]interface{}{"args": []string{"", "", "--data "}, "description": "Send a UDP-style datagram"},
- "listen": map[string]interface{}{"args": []string{"", "[--count ]"}, "description": "Listen for incoming datagrams"},
- "broadcast": map[string]interface{}{"args": []string{"", ""}, "description": "Broadcast a datagram to all network members"},
+ "connect": map[string]interface{}{"args": []string{"", "[port]", "[--message ]"}, "description": "Open a raw stream connection"},
+ "send": map[string]interface{}{"args": []string{"", "", "--data "}, "description": "Send a single raw message to a port"},
+ "recv": map[string]interface{}{"args": []string{"", "[--count ]"}, "description": "Accept and print incoming stream messages"},
+ "dgram": map[string]interface{}{"args": []string{"", "", "--data "}, "description": "Send a UDP-style datagram"},
+ "listen": map[string]interface{}{"args": []string{"", "[--count ]"}, "description": "Listen for incoming datagrams"},
+ "broadcast": map[string]interface{}{"args": []string{"", ""}, "description": "Broadcast a datagram to all network members"},
// Connection management
"connections": map[string]interface{}{"args": []string{}, "description": "List active daemon connections"},
"disconnect": map[string]interface{}{"args": []string{""}, "description": "Close a connection by ID"},
diff --git a/cmd/pilotctl/redact_test.go b/cmd/pilotctl/redact_test.go
index 6d553f5e..d7c529ce 100644
--- a/cmd/pilotctl/redact_test.go
+++ b/cmd/pilotctl/redact_test.go
@@ -17,24 +17,24 @@ func TestRedactPeerEndpointsRemovesIPFields(t *testing.T) {
"node_id": 42,
"address": "0:0000.0000.002A",
"hostname": "agent-test",
- "endpoint": "203.0.113.5:4000", // must go
- "real_addr": "203.0.113.5:4000", // must go
- "public_addr": "203.0.113.5:4000", // must go
+ "endpoint": "203.0.113.5:4000", // must go
+ "real_addr": "203.0.113.5:4000", // must go
+ "public_addr": "203.0.113.5:4000", // must go
"lan_addrs": []interface{}{"10.0.0.5:4000"}, // must go
- "observed_addr": "203.0.113.5:4000", // must go
- "stun_addr": "203.0.113.5:4000", // must go
+ "observed_addr": "203.0.113.5:4000", // must go
+ "stun_addr": "203.0.113.5:4000", // must go
"peers": 7,
"encrypted_peers": 7,
"peer_list": []interface{}{
map[string]interface{}{
- "node_id": 10,
- "endpoint": "198.51.100.10:4000",
+ "node_id": 10,
+ "endpoint": "198.51.100.10:4000",
"real_addr": "198.51.100.10:4000",
"encrypted": true,
},
map[string]interface{}{
- "node_id": 11,
- "endpoint": "198.51.100.11:4000",
+ "node_id": 11,
+ "endpoint": "198.51.100.11:4000",
"encrypted": false,
},
},
diff --git a/cmd/pilotctl/updates.go b/cmd/pilotctl/updates.go
index 58aa49f2..0b5426f2 100644
--- a/cmd/pilotctl/updates.go
+++ b/cmd/pilotctl/updates.go
@@ -25,8 +25,8 @@ var changelogFeedURL = "https://teoslayer.github.io/pilot-changelog/feed.xml"
// for the human-readable + JSON output are decoded; unknown elements are
// ignored by encoding/xml.
type rssDoc struct {
- XMLName xml.Name `xml:"rss"`
- Channel rssChan `xml:"channel"`
+ XMLName xml.Name `xml:"rss"`
+ Channel rssChan `xml:"channel"`
}
type rssChan struct {
diff --git a/pkg/daemon/accept_queue_bug_test.go b/pkg/daemon/accept_queue_bug_test.go
index 79ede76e..578d748b 100644
--- a/pkg/daemon/accept_queue_bug_test.go
+++ b/pkg/daemon/accept_queue_bug_test.go
@@ -17,11 +17,11 @@ import (
// to call Accept and the queue has filled to AcceptQueueLen=64), the
// SYN handler at pkg/daemon/daemon.go:1841 currently:
//
-// 1. Sends the SYN-ACK back to the dialer
-// 2. Marks the connection StateEstablished
-// 3. Tries to push to AcceptCh
-// 4. On full: hits the `default` branch, sends a RST, removes the
-// Connection, logs WARN
+// 1. Sends the SYN-ACK back to the dialer
+// 2. Marks the connection StateEstablished
+// 3. Tries to push to AcceptCh
+// 4. On full: hits the `default` branch, sends a RST, removes the
+// Connection, logs WARN
//
// The RST is good — peer learns immediately. But:
// - No Daemon-level counter is incremented (no AcceptQueueDrops)
diff --git a/pkg/daemon/beacon_discovery_test.go b/pkg/daemon/beacon_discovery_test.go
index d3b8f01d..f1e2d911 100644
--- a/pkg/daemon/beacon_discovery_test.go
+++ b/pkg/daemon/beacon_discovery_test.go
@@ -18,7 +18,7 @@ import (
type fakeRegistry struct {
mu sync.Mutex
beacons []string
- failNext int // if >0, the next N Send() calls error
+ failNext int // if >0, the next N Send() calls error
calls atomic.Int64
lastError error
}
diff --git a/pkg/daemon/beacon_select_test.go b/pkg/daemon/beacon_select_test.go
index 7fe86fab..14893cfa 100644
--- a/pkg/daemon/beacon_select_test.go
+++ b/pkg/daemon/beacon_select_test.go
@@ -156,15 +156,15 @@ func TestPickBeaconStableAcrossSeparateListInstances(t *testing.T) {
// are kept because they may resolve to public IPs.
func TestFilterUnreachableDropsPrivateAndLoopback(t *testing.T) {
in := []string{
- "34.71.57.205:9001", // public — kept
- "10.128.0.78:9001", // private RFC1918 — dropped
- "192.168.1.5:9001", // private RFC1918 — dropped
- "172.16.0.5:9001", // private RFC1918 — dropped
- "127.0.0.1:9001", // loopback — dropped
- "169.254.1.1:9001", // link-local — dropped
- "0.0.0.0:9001", // unspecified — dropped
+ "34.71.57.205:9001", // public — kept
+ "10.128.0.78:9001", // private RFC1918 — dropped
+ "192.168.1.5:9001", // private RFC1918 — dropped
+ "172.16.0.5:9001", // private RFC1918 — dropped
+ "127.0.0.1:9001", // loopback — dropped
+ "169.254.1.1:9001", // link-local — dropped
+ "0.0.0.0:9001", // unspecified — dropped
"beacon.example.com:9001", // DNS hostname — kept
- "8.8.8.8:9001", // public — kept
+ "8.8.8.8:9001", // public — kept
}
got := filterUnreachable(in)
want := []string{
diff --git a/pkg/daemon/ca_growth_abc_cap_bug_test.go b/pkg/daemon/ca_growth_abc_cap_bug_test.go
index 0d11aad1..5ad83ea9 100644
--- a/pkg/daemon/ca_growth_abc_cap_bug_test.go
+++ b/pkg/daemon/ca_growth_abc_cap_bug_test.go
@@ -70,9 +70,9 @@ func TestCAGrowthCapsIncrementAtSMSS(t *testing.T) {
// SMSS*SMSS/cwnd = 4096*4096/40960 = 409.
// Bug: SMSS*bytes_acked/cwnd = 4096*8192/40960 = 819.
const (
- wantIncrement = MaxSegmentSize * MaxSegmentSize / initialCongWin // 409
+ wantIncrement = MaxSegmentSize * MaxSegmentSize / initialCongWin // 409
bugIncrement = MaxSegmentSize * (2 * MaxSegmentSize) / initialCongWin // 819
- wantCongWin = initialCongWin + wantIncrement // 41369
+ wantCongWin = initialCongWin + wantIncrement // 41369
)
if c.CongWin != wantCongWin {
t.Errorf("CA growth with bytes_acked=2*SMSS: CongWin=%d, want %d "+
diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go
index 578abb97..03da8850 100644
--- a/pkg/daemon/daemon.go
+++ b/pkg/daemon/daemon.go
@@ -140,10 +140,10 @@ const (
// Dial and retransmission constants.
const (
- DialDirectRetries = 3 // direct connection attempts before relay
- DialMaxRetries = 7 // total attempts (direct + relay). 3 direct + 4 relay. With DialInitialRTO=250ms exponential-backoff capped at DialMaxRTO=8s, the relay phase is ~7.75s — covers cold-start handshake (key_exchange + flushPending + SYN/SYN-ACK round trip) for typical peers while keeping bad dials from blocking longer than the user's --timeout. The probe-and-adapt machinery (see srttHistory below) will let us shorten this for peers we've successfully dialed before.
- DialInitialRTO = 250 * time.Millisecond // initial SYN retransmission timeout. Lowered from 1s — modern relay RTT is <200ms; waiting a full second before assuming loss makes cold dials feel like a stall. Three direct retries with exponential backoff (250→500→1000) still cover up to 1.75s of jitter before flipping to relay; that's plenty for an unhealthy direct path while letting the common case (peer is reachable, single retry needed) feel snappy.
- DialMaxRTO = 8 * time.Second // max backoff for SYN retransmission
+ DialDirectRetries = 3 // direct connection attempts before relay
+ DialMaxRetries = 7 // total attempts (direct + relay). 3 direct + 4 relay. With DialInitialRTO=250ms exponential-backoff capped at DialMaxRTO=8s, the relay phase is ~7.75s — covers cold-start handshake (key_exchange + flushPending + SYN/SYN-ACK round trip) for typical peers while keeping bad dials from blocking longer than the user's --timeout. The probe-and-adapt machinery (see srttHistory below) will let us shorten this for peers we've successfully dialed before.
+ DialInitialRTO = 250 * time.Millisecond // initial SYN retransmission timeout. Lowered from 1s — modern relay RTT is <200ms; waiting a full second before assuming loss makes cold dials feel like a stall. Three direct retries with exponential backoff (250→500→1000) still cover up to 1.75s of jitter before flipping to relay; that's plenty for an unhealthy direct path while letting the common case (peer is reachable, single retry needed) feel snappy.
+ DialMaxRTO = 8 * time.Second // max backoff for SYN retransmission
DialCheckInterval = 10 * time.Millisecond // poll interval for state changes during dial
RetxCheckInterval = 100 * time.Millisecond // retransmission check ticker
MaxRetxAttempts = 8 // abandon connection after this many retransmissions
@@ -3368,8 +3368,8 @@ func (d *Daemon) hostnameCachePath() string {
// hostnameCacheDisk is the on-disk format for the hostname cache.
type hostnameCacheDisk struct {
- SavedAt time.Time `json:"saved_at"`
- Hostnames map[string]hostnameCacheDiskEntry `json:"hostnames"`
+ SavedAt time.Time `json:"saved_at"`
+ Hostnames map[string]hostnameCacheDiskEntry `json:"hostnames"`
}
type hostnameCacheDiskEntry struct {
diff --git a/pkg/daemon/daemon_ipc_test.go b/pkg/daemon/daemon_ipc_test.go
index f8db19cc..4c454ea7 100644
--- a/pkg/daemon/daemon_ipc_test.go
+++ b/pkg/daemon/daemon_ipc_test.go
@@ -122,7 +122,7 @@ func TestHandleBindDoubleBindSendsError(t *testing.T) {
}
ic, client := newIPCTestConn(t)
reply := runHandler(t, client, func() { s.handleBind(ic, []byte{0x23, 0x28}) }) // port 9000
- assertErrorReply(t, reply, "port") // "already bound" or similar
+ assertErrorReply(t, reply, "port") // "already bound" or similar
}
// --- handleDial ---
diff --git a/pkg/daemon/daemon_networkipc_test.go b/pkg/daemon/daemon_networkipc_test.go
index 3df6227a..fabcaa1b 100644
--- a/pkg/daemon/daemon_networkipc_test.go
+++ b/pkg/daemon/daemon_networkipc_test.go
@@ -282,7 +282,7 @@ func TestHandleNetworkRespondInviteNoInviteSendsError(t *testing.T) {
payload := make([]byte, 4)
payload[0] = SubNetworkRespondInvite
binary.BigEndian.PutUint16(payload[1:3], 0xBEEF) // non-existent network
- payload[3] = 1 // accept=true
+ payload[3] = 1 // accept=true
reply := runHandler(t, client, func() { s.handleNetwork(ic, payload) })
// Either registry rejects or reply is OK — the code path exercises the
diff --git a/pkg/daemon/dial_precancelled_ctx_bug_test.go b/pkg/daemon/dial_precancelled_ctx_bug_test.go
index af32969f..cd0be38b 100644
--- a/pkg/daemon/dial_precancelled_ctx_bug_test.go
+++ b/pkg/daemon/dial_precancelled_ctx_bug_test.go
@@ -24,11 +24,11 @@ import (
// request, or an upstream timeout fired during request queueing),
// the daemon still:
//
-// 1. Calls ensureTunnel (potentially up to 30 s blocked on a
-// slow registry — see iter 13 audit notes)
-// 2. Allocates an ephemeral port
-// 3. Creates a Connection in StateSynSent
-// 4. Sends a SYN over the tunnel to the peer
+// 1. Calls ensureTunnel (potentially up to 30 s blocked on a
+// slow registry — see iter 13 audit notes)
+// 2. Allocates an ephemeral port
+// 3. Creates a Connection in StateSynSent
+// 4. Sends a SYN over the tunnel to the peer
//
// Only AFTER all of that does the for-loop's ctx.Done case fire.
// The peer received a phantom SYN they'll respond to (SYN-ACK)
diff --git a/pkg/daemon/dup_ack_fresh_recovery_per_ack_inflation_bug_test.go b/pkg/daemon/dup_ack_fresh_recovery_per_ack_inflation_bug_test.go
index c36d0a33..925efb2c 100644
--- a/pkg/daemon/dup_ack_fresh_recovery_per_ack_inflation_bug_test.go
+++ b/pkg/daemon/dup_ack_fresh_recovery_per_ack_inflation_bug_test.go
@@ -71,12 +71,12 @@ func TestFreshFastRecoveryPerDupAckInflation(t *testing.T) {
now := time.Now()
c.RetxMu.Lock()
c.LastAck = seqA
- c.DupAckCount = 3 // third dup ACK just fired
- c.InRecovery = true // entered by DupAckCount==3 path
- c.FastRecovery = true // fast retransmit entered this episode
- c.RecoveryPoint = sendSeq // set to sendSeq by DupAckCount==3 path
- c.SSThresh = 2 * MaxSegmentSize // halved: max(3*MSS/2, 2*MSS) = 2*MSS
- c.CongWin = c.SSThresh + 3*MaxSegmentSize // = 5*MSS = 20480
+ c.DupAckCount = 3 // third dup ACK just fired
+ c.InRecovery = true // entered by DupAckCount==3 path
+ c.FastRecovery = true // fast retransmit entered this episode
+ c.RecoveryPoint = sendSeq // set to sendSeq by DupAckCount==3 path
+ c.SSThresh = 2 * MaxSegmentSize // halved: max(3*MSS/2, 2*MSS) = 2*MSS
+ c.CongWin = c.SSThresh + 3*MaxSegmentSize // = 5*MSS = 20480
c.Unacked = []*retxEntry{
{seq: seqB, data: make([]byte, MaxSegmentSize), attempts: 2, sentAt: now}, // fast-retransmitted
{seq: seqC, data: make([]byte, MaxSegmentSize), attempts: 1, sentAt: now},
diff --git a/pkg/daemon/dup_ack_in_recovery_ssthresh_halving_bug_test.go b/pkg/daemon/dup_ack_in_recovery_ssthresh_halving_bug_test.go
index 852b3631..c9b8c534 100644
--- a/pkg/daemon/dup_ack_in_recovery_ssthresh_halving_bug_test.go
+++ b/pkg/daemon/dup_ack_in_recovery_ssthresh_halving_bug_test.go
@@ -57,10 +57,10 @@ func TestDupAckFastRetransmitInRecoveryDoesNotRehalveSSThresh(t *testing.T) {
// SSThresh halved once, CongWin = InitialCongWin, InRecovery = true.
conn.RetxMu.Lock()
conn.LastAck = seqA
- conn.CongWin = InitialCongWin // timeout set this (10*MSS)
- conn.SSThresh = 10 * MaxSegmentSize // timeout halved from 20*MSS → 10*MSS
- conn.DupAckCount = 0 // timeout reset this (iter-51)
- conn.InRecovery = true // timeout set this
+ conn.CongWin = InitialCongWin // timeout set this (10*MSS)
+ conn.SSThresh = 10 * MaxSegmentSize // timeout halved from 20*MSS → 10*MSS
+ conn.DupAckCount = 0 // timeout reset this (iter-51)
+ conn.InRecovery = true // timeout set this
conn.RecoveryPoint = seqA + MaxSegmentSize
conn.RTO = InitialRTO
conn.Unacked = []*retxEntry{{
diff --git a/pkg/daemon/dup_ack_in_timeout_recovery_additional_inflate_bug_test.go b/pkg/daemon/dup_ack_in_timeout_recovery_additional_inflate_bug_test.go
index 55a2f3ba..14e263ce 100644
--- a/pkg/daemon/dup_ack_in_timeout_recovery_additional_inflate_bug_test.go
+++ b/pkg/daemon/dup_ack_in_timeout_recovery_additional_inflate_bug_test.go
@@ -58,7 +58,7 @@ func TestAdditionalDupAckInTimeoutRecoveryDoesNotInflateConn(t *testing.T) {
conn.RetxStop = make(chan struct{})
const (
- seqA = uint32(1000)
+ seqA = uint32(1000)
ssthreshAfterTimeout = 5 * MaxSegmentSize
)
@@ -70,7 +70,7 @@ func TestAdditionalDupAckInTimeoutRecoveryDoesNotInflateConn(t *testing.T) {
conn.RetxMu.Lock()
conn.LastAck = seqA
- conn.CongWin = MaxSegmentSize // post-timeout: 1 SMSS
+ conn.CongWin = MaxSegmentSize // post-timeout: 1 SMSS
conn.SSThresh = ssthreshAfterTimeout
conn.DupAckCount = 0
conn.InRecovery = true
diff --git a/pkg/daemon/dup_ack_in_timeout_recovery_cwnd_reinflation_bug_test.go b/pkg/daemon/dup_ack_in_timeout_recovery_cwnd_reinflation_bug_test.go
index 8f74100a..461b8576 100644
--- a/pkg/daemon/dup_ack_in_timeout_recovery_cwnd_reinflation_bug_test.go
+++ b/pkg/daemon/dup_ack_in_timeout_recovery_cwnd_reinflation_bug_test.go
@@ -69,7 +69,7 @@ func TestDupAckInTimeoutRecoveryDoesNotReinflateConn(t *testing.T) {
conn.RetxStop = make(chan struct{})
const (
- seqA = uint32(1000)
+ seqA = uint32(1000)
ssthreshAfterTimeout = 5 * MaxSegmentSize // 20480 — timeout halved from 10*MSS
)
@@ -82,11 +82,11 @@ func TestDupAckInTimeoutRecoveryDoesNotReinflateConn(t *testing.T) {
conn.RetxMu.Lock()
conn.LastAck = seqA
- conn.CongWin = MaxSegmentSize // RFC 5681 §3.1: post-timeout cwnd = 1 SMSS
+ conn.CongWin = MaxSegmentSize // RFC 5681 §3.1: post-timeout cwnd = 1 SMSS
conn.SSThresh = ssthreshAfterTimeout
conn.DupAckCount = 0
- conn.InRecovery = true // set by the retransmission timeout
- conn.FastRecovery = false // cleared by retransmitUnacked
+ conn.InRecovery = true // set by the retransmission timeout
+ conn.FastRecovery = false // cleared by retransmitUnacked
conn.RecoveryPoint = seqA + MaxSegmentSize // = SendSeq (no new data)
conn.RTO = InitialRTO
conn.Unacked = []*retxEntry{{
diff --git a/pkg/daemon/dup_ack_new_episode_in_recovery_ssthresh_bug_test.go b/pkg/daemon/dup_ack_new_episode_in_recovery_ssthresh_bug_test.go
index a471c81f..7826696a 100644
--- a/pkg/daemon/dup_ack_new_episode_in_recovery_ssthresh_bug_test.go
+++ b/pkg/daemon/dup_ack_new_episode_in_recovery_ssthresh_bug_test.go
@@ -76,7 +76,7 @@ func TestNewEpisodeDupAcksInRecoveryHalveSSThresh(t *testing.T) {
const (
seqA = uint32(1000)
- seqB = seqA + MaxSegmentSize // 5096 — timeout-retransmitted, in Unacked
+ seqB = seqA + MaxSegmentSize // 5096 — timeout-retransmitted, in Unacked
seqD = seqA + 2*MaxSegmentSize // 9192 — RecoveryPoint from timeout
seqE = seqA + 3*MaxSegmentSize // 13288 — new data, in Unacked
ssthreshAfterTimeout = 5 * MaxSegmentSize // 20480 — halved from 10*MSS by timeout
@@ -92,12 +92,12 @@ func TestNewEpisodeDupAcksInRecoveryHalveSSThresh(t *testing.T) {
conn.RetxMu.Lock()
conn.LastAck = seqA
- conn.CongWin = MaxSegmentSize // RFC 5681 §3.1: post-timeout cwnd = 1 SMSS
+ conn.CongWin = MaxSegmentSize // RFC 5681 §3.1: post-timeout cwnd = 1 SMSS
conn.SSThresh = ssthreshAfterTimeout // 5*MSS, set by the retransmission timeout
conn.DupAckCount = 0
- conn.InRecovery = true // timeout set InRecovery=true
- conn.FastRecovery = false // cleared by timeout
- conn.RecoveryPoint = seqD // timeout's recovery window ends at seqD
+ conn.InRecovery = true // timeout set InRecovery=true
+ conn.FastRecovery = false // cleared by timeout
+ conn.RecoveryPoint = seqD // timeout's recovery window ends at seqD
conn.RTO = InitialRTO
now := time.Now()
conn.Unacked = []*retxEntry{
diff --git a/pkg/daemon/dup_ack_timeout_recovery_fast_recovery_flag_bug_test.go b/pkg/daemon/dup_ack_timeout_recovery_fast_recovery_flag_bug_test.go
index 7e7418d9..1d6151f4 100644
--- a/pkg/daemon/dup_ack_timeout_recovery_fast_recovery_flag_bug_test.go
+++ b/pkg/daemon/dup_ack_timeout_recovery_fast_recovery_flag_bug_test.go
@@ -49,9 +49,9 @@ import (
// (same-episode dup ACKs must not set FastRecovery).
//
// 2. Narrow the step-6 guard from
-// (oldDupAckCount >= 3 || wasFastRecovery) && wasInRecovery
+// (oldDupAckCount >= 3 || wasFastRecovery) && wasInRecovery
// to
-// wasFastRecovery && wasInRecovery
+// wasFastRecovery && wasInRecovery
// so that step 6 only fires when FastRecovery was explicitly set
// (i.e. recovery was entered via fast retransmit for a new episode).
// This prevents leftover DupAckCount >= 3 from triggering spurious
@@ -70,10 +70,10 @@ func TestSameEpisodeDupAcksDoNotSetFastRecoveryFlag(t *testing.T) {
const (
seqA = uint32(1000)
- seqB = seqA + MaxSegmentSize // 5096 — SACKED by receiver
- seqC = seqB + MaxSegmentSize // 9192 — still outstanding
- seqD = seqC + MaxSegmentSize // 13288 — RecoveryPoint
- ssthreshAfterTimeout = 5 * MaxSegmentSize // 20480
+ seqB = seqA + MaxSegmentSize // 5096 — SACKED by receiver
+ seqC = seqB + MaxSegmentSize // 9192 — still outstanding
+ seqD = seqC + MaxSegmentSize // 13288 — RecoveryPoint
+ ssthreshAfterTimeout = 5 * MaxSegmentSize // 20480
)
conn.Mu.Lock()
@@ -85,12 +85,12 @@ func TestSameEpisodeDupAcksDoNotSetFastRecoveryFlag(t *testing.T) {
conn.RetxMu.Lock()
conn.LastAck = seqA
- conn.CongWin = MaxSegmentSize // RFC 5681 §3.1: post-timeout cwnd = 1 SMSS
+ conn.CongWin = MaxSegmentSize // RFC 5681 §3.1: post-timeout cwnd = 1 SMSS
conn.SSThresh = ssthreshAfterTimeout
conn.DupAckCount = 0
- conn.InRecovery = true // set by the retransmission timeout
- conn.FastRecovery = false // cleared by retransmitUnacked
- conn.RecoveryPoint = seqD // = SendSeq (no new data)
+ conn.InRecovery = true // set by the retransmission timeout
+ conn.FastRecovery = false // cleared by retransmitUnacked
+ conn.RecoveryPoint = seqD // = SendSeq (no new data)
conn.RTO = InitialRTO
now := time.Now()
conn.Unacked = []*retxEntry{
diff --git a/pkg/daemon/fast_recovery_cwnd_inflation_windowch_bug_test.go b/pkg/daemon/fast_recovery_cwnd_inflation_windowch_bug_test.go
index d1752124..d4274331 100644
--- a/pkg/daemon/fast_recovery_cwnd_inflation_windowch_bug_test.go
+++ b/pkg/daemon/fast_recovery_cwnd_inflation_windowch_bug_test.go
@@ -38,13 +38,14 @@ import (
// the congestion-window-full state.
//
// Concrete example (8 segments in flight, MaxSegmentSize = 4096 bytes):
-// initial: CongWin = 8*MSS = 32768, BytesInFlight = 32768
-// 3rd dup-ACK (fast retransmit):
-// SSThresh = 16384, CongWin = 16384+3*4096 = 28672
-// WindowAvailable = (32768 < 28672) = false
-// 4th dup-ACK: CongWin = 32768. WindowAvailable = (32768 < 32768) = false
-// 5th dup-ACK: CongWin = 36864. WindowAvailable = (32768 < 36864) = true
-// → window opened but WindowCh NOT signaled (bug)
+//
+// initial: CongWin = 8*MSS = 32768, BytesInFlight = 32768
+// 3rd dup-ACK (fast retransmit):
+// SSThresh = 16384, CongWin = 16384+3*4096 = 28672
+// WindowAvailable = (32768 < 28672) = false
+// 4th dup-ACK: CongWin = 32768. WindowAvailable = (32768 < 32768) = false
+// 5th dup-ACK: CongWin = 36864. WindowAvailable = (32768 < 36864) = true
+// → window opened but WindowCh NOT signaled (bug)
//
// GREEN assertion: after the 5th dup-ACK, WindowCh has a token because the
// window became available. Against unpatched code WindowCh is empty and a
@@ -59,8 +60,8 @@ func TestFastRecoveryExtraACKInflationSignalsWindowCh(t *testing.T) {
// 8 in-flight segments, window exactly full.
conn.RetxMu.Lock()
conn.LastAck = 1000
- conn.CongWin = numSegs * MaxSegmentSize // 32768
- conn.SSThresh = conn.CongWin // high enough to be in AIMD territory
+ conn.CongWin = numSegs * MaxSegmentSize // 32768
+ conn.SSThresh = conn.CongWin // high enough to be in AIMD territory
for i := 0; i < numSegs; i++ {
conn.Unacked = append(conn.Unacked, &retxEntry{
seq: uint32(1000 + i*MaxSegmentSize),
diff --git a/pkg/daemon/fast_recovery_exit_cwnd_bug_test.go b/pkg/daemon/fast_recovery_exit_cwnd_bug_test.go
index 5ce56748..a82d9cb5 100644
--- a/pkg/daemon/fast_recovery_exit_cwnd_bug_test.go
+++ b/pkg/daemon/fast_recovery_exit_cwnd_bug_test.go
@@ -62,7 +62,7 @@ func TestFastRecoveryExitDeflatesCongWin(t *testing.T) {
// iter-59 gates the deflation on wasInRecovery; without this field the
// test setup misrepresents the state and the deflation would not fire.
conn.InRecovery = true
- conn.FastRecovery = true // fast retransmit entered recovery (new episode, not timeout)
+ conn.FastRecovery = true // fast retransmit entered recovery (new episode, not timeout)
conn.RecoveryPoint = 1000 + MaxSegmentSize // new ACK will reach this and clear it
// Put one unacked entry so ProcessAck has something to remove when ack > LastAck
conn.Unacked = []*retxEntry{
diff --git a/pkg/daemon/fast_recovery_exit_deflation_noop_bug_test.go b/pkg/daemon/fast_recovery_exit_deflation_noop_bug_test.go
index 030405eb..cd736568 100644
--- a/pkg/daemon/fast_recovery_exit_deflation_noop_bug_test.go
+++ b/pkg/daemon/fast_recovery_exit_deflation_noop_bug_test.go
@@ -64,7 +64,7 @@ func TestFastRecoveryExitDeflationRequiresActualRecovery(t *testing.T) {
conn.RetxMu.Lock()
conn.LastAck = seqA
- conn.CongWin = InitialCongWin // 10*MSS
+ conn.CongWin = InitialCongWin // 10*MSS
conn.SSThresh = initialSSThresh // 20*MSS (intentionally > InitialCongWin)
conn.InRecovery = false
conn.DupAckCount = 0
diff --git a/pkg/daemon/fast_recovery_partial_ack_aimd_inflation_bug_test.go b/pkg/daemon/fast_recovery_partial_ack_aimd_inflation_bug_test.go
index dffd7651..5649226d 100644
--- a/pkg/daemon/fast_recovery_partial_ack_aimd_inflation_bug_test.go
+++ b/pkg/daemon/fast_recovery_partial_ack_aimd_inflation_bug_test.go
@@ -71,8 +71,8 @@ func TestFastRecoveryPartialAckNoAIMDInflation(t *testing.T) {
c.InRecovery = true
c.FastRecovery = true
c.RecoveryPoint = recoveryPoint
- c.SSThresh = 2 * MaxSegmentSize // 8192
- c.CongWin = c.SSThresh + 3*MaxSegmentSize // 5*MSS = 20480
+ c.SSThresh = 2 * MaxSegmentSize // 8192
+ c.CongWin = c.SSThresh + 3*MaxSegmentSize // 5*MSS = 20480
now := time.Now()
c.Unacked = []*retxEntry{
diff --git a/pkg/daemon/fast_recovery_partial_ack_bug_test.go b/pkg/daemon/fast_recovery_partial_ack_bug_test.go
index aba5456f..c09cc685 100644
--- a/pkg/daemon/fast_recovery_partial_ack_bug_test.go
+++ b/pkg/daemon/fast_recovery_partial_ack_bug_test.go
@@ -77,18 +77,18 @@ func TestPartialAckInFastRecoveryDoesNotDeflateToSSThresh(t *testing.T) {
c := newAckTestConn(t)
const (
- ssthresh = 5 * MaxSegmentSize // 20480
- fastRecoveryCongWin = ssthresh + 3*MaxSegmentSize // 32768 — entered fast recovery
- seqA = uint32(1000)
- seqB = seqA + MaxSegmentSize // 5096
- seqC = seqB + MaxSegmentSize // 9192 — RecoveryPoint (beyond partial ACK)
+ ssthresh = 5 * MaxSegmentSize // 20480
+ fastRecoveryCongWin = ssthresh + 3*MaxSegmentSize // 32768 — entered fast recovery
+ seqA = uint32(1000)
+ seqB = seqA + MaxSegmentSize // 5096
+ seqC = seqB + MaxSegmentSize // 9192 — RecoveryPoint (beyond partial ACK)
)
c.LastAck = seqA
c.SSThresh = ssthresh
c.CongWin = fastRecoveryCongWin
- c.DupAckCount = 3 // just triggered fast retransmit
- c.InRecovery = true // in fast recovery
+ c.DupAckCount = 3 // just triggered fast retransmit
+ c.InRecovery = true // in fast recovery
c.RecoveryPoint = seqC
c.Unacked = []*retxEntry{
// seqA: already retransmitted (attempts=2), will be acked by partial ACK
diff --git a/pkg/daemon/fast_recovery_post_partial_ack_dup_inflation_bug_test.go b/pkg/daemon/fast_recovery_post_partial_ack_dup_inflation_bug_test.go
index f03339a0..6a6a2d8f 100644
--- a/pkg/daemon/fast_recovery_post_partial_ack_dup_inflation_bug_test.go
+++ b/pkg/daemon/fast_recovery_post_partial_ack_dup_inflation_bug_test.go
@@ -71,13 +71,13 @@ func TestFastRecoveryPostPartialAckDupAckInflation(t *testing.T) {
// seqA was cumulatively acked and removed from Unacked.
// DupAckCount reset to 0 by the new-ACK path in ProcessAck.
// CongWin: deflated by MSS, add-back MSS (bytesAcked==SMSS, neutral) → 5*MSS.
- c.LastAck = seqB // partial ACK advanced LastAck to seqB
- c.DupAckCount = 0 // reset by the new-ACK path
+ c.LastAck = seqB // partial ACK advanced LastAck to seqB
+ c.DupAckCount = 0 // reset by the new-ACK path
c.InRecovery = true
c.FastRecovery = true
c.RecoveryPoint = recoveryPoint
- c.SSThresh = 2 * MaxSegmentSize // 8192
- c.CongWin = c.SSThresh + 3*MaxSegmentSize // 5*MSS = 20480
+ c.SSThresh = 2 * MaxSegmentSize // 8192
+ c.CongWin = c.SSThresh + 3*MaxSegmentSize // 5*MSS = 20480
now := time.Now()
c.Unacked = []*retxEntry{
diff --git a/pkg/daemon/fast_recovery_third_dup_ack_same_episode_inflation_bug_test.go b/pkg/daemon/fast_recovery_third_dup_ack_same_episode_inflation_bug_test.go
index 3550bde6..9ea0766a 100644
--- a/pkg/daemon/fast_recovery_third_dup_ack_same_episode_inflation_bug_test.go
+++ b/pkg/daemon/fast_recovery_third_dup_ack_same_episode_inflation_bug_test.go
@@ -79,12 +79,12 @@ func TestFastRecoveryThirdDupAckSameEpisodeInflation(t *testing.T) {
// State after a partial ACK reset DupAckCount to 0 and 2 subsequent dup
// ACKs have already inflated CongWin by 2*MSS (iter-82 fix):
// DupAckCount=2, CongWin = SSThresh + 3*MSS + 2*MSS = 7*MSS = 28672
- c.LastAck = seqB // partial ACK set LastAck to seqB; 3rd dup ACK repeats it
- c.DupAckCount = 2 // two dup ACKs have already fired since the reset
+ c.LastAck = seqB // partial ACK set LastAck to seqB; 3rd dup ACK repeats it
+ c.DupAckCount = 2 // two dup ACKs have already fired since the reset
c.InRecovery = true
c.FastRecovery = true
c.RecoveryPoint = recoveryPoint
- c.SSThresh = 2 * MaxSegmentSize // 8192
+ c.SSThresh = 2 * MaxSegmentSize // 8192
c.CongWin = c.SSThresh + 3*MaxSegmentSize + 2*MaxSegmentSize // 7*MSS = 28672
now := time.Now()
diff --git a/pkg/daemon/fast_retransmit_entry_windowch_bug_test.go b/pkg/daemon/fast_retransmit_entry_windowch_bug_test.go
index 56081cca..e7b7599b 100644
--- a/pkg/daemon/fast_retransmit_entry_windowch_bug_test.go
+++ b/pkg/daemon/fast_retransmit_entry_windowch_bug_test.go
@@ -24,12 +24,13 @@ import (
// that was blocked by a full window may now have room — but is not told.
//
// Concrete example (window = 2 segments):
-// initial: CongWin = 2*MSS = 8192, BytesInFlight = 8192 (window full)
-// 3rd dup-ACK (fast retransmit):
-// SSThresh = max(8192/2, MSS) = 4096
-// CongWin = 4096 + 3*4096 = 16384
-// WindowAvailable = (8192 < 16384) = true
-// → window opened but WindowCh NOT signaled (bug)
+//
+// initial: CongWin = 2*MSS = 8192, BytesInFlight = 8192 (window full)
+// 3rd dup-ACK (fast retransmit):
+// SSThresh = max(8192/2, MSS) = 4096
+// CongWin = 4096 + 3*4096 = 16384
+// WindowAvailable = (8192 < 16384) = true
+// → window opened but WindowCh NOT signaled (bug)
//
// This differs from the iter-39 bug (which fixed the DupAckCount>3 path):
// the DupAckCount==3 path on entry to fast recovery also inflates CongWin for
@@ -48,7 +49,7 @@ func TestFastRetransmitEntryInflatesWindowAndSignalsWindowCh(t *testing.T) {
// 2 in-flight segments filling a 2-segment congestion window.
conn.RetxMu.Lock()
conn.LastAck = 1000
- conn.CongWin = numSegs * MaxSegmentSize // 8192 bytes
+ conn.CongWin = numSegs * MaxSegmentSize // 8192 bytes
conn.SSThresh = 4 * conn.CongWin // high: won't constrain
for i := 0; i < numSegs; i++ {
conn.Unacked = append(conn.Unacked, &retxEntry{
diff --git a/pkg/daemon/fast_retransmit_max_attempts_bug_test.go b/pkg/daemon/fast_retransmit_max_attempts_bug_test.go
index f5ef084d..8bb12f26 100644
--- a/pkg/daemon/fast_retransmit_max_attempts_bug_test.go
+++ b/pkg/daemon/fast_retransmit_max_attempts_bug_test.go
@@ -67,7 +67,7 @@ func TestFastRetransmitStopsAtMaxAttempts(t *testing.T) {
conn.Unacked = []*retxEntry{{
seq: seqA,
data: make([]byte, MaxSegmentSize),
- attempts: MaxRetxAttempts, // already at the limit
+ attempts: MaxRetxAttempts, // already at the limit
sentAt: time.Now().Add(-100 * time.Millisecond), // not past RTO
sacked: false,
}}
diff --git a/pkg/daemon/fast_retransmit_noop_congestion_state_bug_test.go b/pkg/daemon/fast_retransmit_noop_congestion_state_bug_test.go
index cbf3c337..abef6ef3 100644
--- a/pkg/daemon/fast_retransmit_noop_congestion_state_bug_test.go
+++ b/pkg/daemon/fast_retransmit_noop_congestion_state_bug_test.go
@@ -135,8 +135,8 @@ func TestFastRetransmitNoopDoesNotAdjustCongestionState(t *testing.T) {
// InRecovery must remain false: no phantom recovery entry.
// Bug: 'c.InRecovery = true' fires unconditionally when !c.InRecovery.
if inRecovery {
- t.Errorf("InRecovery=true after no-op fastRetransmit, want false; "+
- "phantom InRecovery was set even though no packet was sent; "+
+ t.Errorf("InRecovery=true after no-op fastRetransmit, want false; " +
+ "phantom InRecovery was set even though no packet was sent; " +
"fix: gate 'c.InRecovery = true' on fastRetransmit returning true",
)
}
diff --git a/pkg/daemon/ipc_async_write_test.go b/pkg/daemon/ipc_async_write_test.go
index 7318c3f4..ff4604e2 100644
--- a/pkg/daemon/ipc_async_write_test.go
+++ b/pkg/daemon/ipc_async_write_test.go
@@ -129,7 +129,7 @@ func TestIPCConnAsyncWriteBackpressure(t *testing.T) {
// Write since nothing reads. So we expect roughly buffer+1 to succeed
// before backpressure.
var (
- successes atomic.Int64
+ successes atomic.Int64
gotPressure atomic.Bool
)
deadline := time.After(3 * time.Second)
diff --git a/pkg/daemon/ipc_dialcancel_leak_bug_test.go b/pkg/daemon/ipc_dialcancel_leak_bug_test.go
index b68cd354..25eb7eba 100644
--- a/pkg/daemon/ipc_dialcancel_leak_bug_test.go
+++ b/pkg/daemon/ipc_dialcancel_leak_bug_test.go
@@ -40,8 +40,8 @@ func TestIPCDialCancelsLeakOnCompletedDials(t *testing.T) {
for i := 0; i < N; i++ {
_, cancel := context.WithCancel(context.Background())
id := ic.addDialCancel(cancel)
- cancel() // simulate defer dialCancel()
- ic.removeDialCancel(id) // v1.9.1 fix: remove after dial completes
+ cancel() // simulate defer dialCancel()
+ ic.removeDialCancel(id) // v1.9.1 fix: remove after dial completes
}
got := ic.dialCancelCount()
diff --git a/pkg/daemon/keepalive_zero_window_probe_bug_test.go b/pkg/daemon/keepalive_zero_window_probe_bug_test.go
index 63f227ef..3fb23ae0 100644
--- a/pkg/daemon/keepalive_zero_window_probe_bug_test.go
+++ b/pkg/daemon/keepalive_zero_window_probe_bug_test.go
@@ -73,7 +73,7 @@ func TestKeepaliveProbeWithWindowDoesNotStallPeer(t *testing.T) {
SrcPort: remotePort,
DstPort: localPort,
Seq: 200,
- Ack: 100, // == conn.LastAck (dup-ACK path, keepalive-like)
+ Ack: 100, // == conn.LastAck (dup-ACK path, keepalive-like)
Window: senderRecvWin, // fixed: sender includes its recv window
}
@@ -146,7 +146,7 @@ func TestKeepaliveZeroWindowProbeStallsMechanism(t *testing.T) {
"(unexpected; receiver should always update PeerRecvWin from pkt.Window)",
peerRecvWin, avail)
} else {
- t.Logf("confirmed: zero-window probe sets PeerRecvWin=0, avail=false — "+
+ t.Logf("confirmed: zero-window probe sets PeerRecvWin=0, avail=false — " +
"500ms stall mechanism documented; fix: idleSweepLoop must include Window: conn.RecvWindow()")
}
}
diff --git a/pkg/daemon/listener_closed_channel_bug_test.go b/pkg/daemon/listener_closed_channel_bug_test.go
index f2d01874..804aff9e 100644
--- a/pkg/daemon/listener_closed_channel_bug_test.go
+++ b/pkg/daemon/listener_closed_channel_bug_test.go
@@ -51,7 +51,7 @@ func TestListenerSendAfterUnbindSafe(t *testing.T) {
go func() {
defer func() {
if r := recover(); r != nil {
- panicked <- true // panic → bug present
+ panicked <- true // panic → bug present
} else {
panicked <- false // clean → fix applied
}
diff --git a/pkg/daemon/nagle_all_sacked_hasunacked_bug_test.go b/pkg/daemon/nagle_all_sacked_hasunacked_bug_test.go
index e087523b..09704f45 100644
--- a/pkg/daemon/nagle_all_sacked_hasunacked_bug_test.go
+++ b/pkg/daemon/nagle_all_sacked_hasunacked_bug_test.go
@@ -76,7 +76,7 @@ func TestNagleFlushAllSackedSendsImmediately(t *testing.T) {
conn.RetxMu.Lock()
conn.LastAck = 3000
- conn.CongWin = 16 * MaxSegmentSize // large — window is open
+ conn.CongWin = 16 * MaxSegmentSize // large — window is open
conn.SSThresh = conn.CongWin
conn.PeerRecvWin = 16 * MaxSegmentSize
// 3 entries, all sacked — peer has every byte.
diff --git a/pkg/daemon/peer_recv_win_growth_windowch_bug_test.go b/pkg/daemon/peer_recv_win_growth_windowch_bug_test.go
index c86f372b..9e96480d 100644
--- a/pkg/daemon/peer_recv_win_growth_windowch_bug_test.go
+++ b/pkg/daemon/peer_recv_win_growth_windowch_bug_test.go
@@ -93,8 +93,8 @@ func TestPeerRecvWinGrowthSignalsWindowCh(t *testing.T) {
SrcPort: remotePort,
DstPort: localPort,
Seq: 200,
- Ack: 500, // == conn.LastAck → dup-ACK (no new data acked)
- Window: 2, // 2 segments → PeerRecvWin = 2*MSS
+ Ack: 500, // == conn.LastAck → dup-ACK (no new data acked)
+ Window: 2, // 2 segments → PeerRecvWin = 2*MSS
}
d.handleStreamPacket(windowUpdate)
diff --git a/pkg/daemon/policy_runner.go b/pkg/daemon/policy_runner.go
index fe5f269d..284c616f 100644
--- a/pkg/daemon/policy_runner.go
+++ b/pkg/daemon/policy_runner.go
@@ -25,8 +25,8 @@ type PolicyRunner struct {
compiled *policy.CompiledPolicy
daemon *Daemon
- mu sync.RWMutex
- peers map[uint32]*managedPeer // reuse managedPeer from managed.go
+ mu sync.RWMutex
+ peers map[uint32]*managedPeer // reuse managedPeer from managed.go
// Peers that local evict / deny decisions removed from pr.peers.
// Reconciler's applyMembershipDiff refuses to re-add entries during
// the cooldown window — otherwise the next reconcile tick (5s)
@@ -54,9 +54,9 @@ type PolicyRunner struct {
// of latency to ANY other call (resolve_hostname, lookup, etc) that
// shares regConn. Track consecutive failures and skip ticks until
// the next backoff deadline.
- fetchFailMu sync.Mutex
- fetchFailures int // consecutive failure count
- fetchSkipUntil time.Time // skip ticks before this time
+ fetchFailMu sync.Mutex
+ fetchFailures int // consecutive failure count
+ fetchSkipUntil time.Time // skip ticks before this time
}
// policySnapshot is the JSON format persisted to disk.
diff --git a/pkg/daemon/ports.go b/pkg/daemon/ports.go
index a2d9b3f1..1cb189fe 100644
--- a/pkg/daemon/ports.go
+++ b/pkg/daemon/ports.go
@@ -191,8 +191,8 @@ type Connection struct {
// stale `SendSeq-1` that has drifted forward once data flowed.
SynAckSeq uint32
SynAckSeqSet bool
- SendBuf chan []byte
- RecvBuf chan []byte
+ SendBuf chan []byte
+ RecvBuf chan []byte
// Sliding window + retransmission (send side)
RetxMu sync.Mutex
Unacked []*retxEntry // ordered by seq
@@ -1111,10 +1111,11 @@ func seqAfterOrEqual(a, b uint32) bool {
// ACK number (next expected seq).
//
// Three-phase design to avoid both deadlock and sequence leaks:
-// Phase 1: Collect segments to deliver under RecvMu (don't advance ExpectedSeq).
-// Phase 2: Deliver outside lock (prevents routeLoop deadlock, C1 fix).
-// Phase 3: Re-acquire lock, advance ExpectedSeq only for delivered segments,
-// re-buffer undelivered OOO segments.
+//
+// Phase 1: Collect segments to deliver under RecvMu (don't advance ExpectedSeq).
+// Phase 2: Deliver outside lock (prevents routeLoop deadlock, C1 fix).
+// Phase 3: Re-acquire lock, advance ExpectedSeq only for delivered segments,
+// re-buffer undelivered OOO segments.
//
// Safe because routeLoop is single-goroutine — no concurrent DeliverInOrder
// calls for the same connection between Phase 2 and Phase 3.
diff --git a/pkg/daemon/ports_logic_test.go b/pkg/daemon/ports_logic_test.go
index 1896b46d..8952d9fe 100644
--- a/pkg/daemon/ports_logic_test.go
+++ b/pkg/daemon/ports_logic_test.go
@@ -464,7 +464,7 @@ func TestProcessAckThirdDupACKTriggersFastRetransmit(t *testing.T) {
func TestProcessAckGrowsCongWinInSlowStart(t *testing.T) {
c := newAckTestConn(t)
c.LastAck = 1000
- c.CongWin = 4000 // < SSThresh → slow start
+ c.CongWin = 4000 // < SSThresh → slow start
c.SSThresh = 50000
c.Unacked = []*retxEntry{{seq: 1000, data: make([]byte, 1000), attempts: 1, sentAt: time.Now()}}
diff --git a/pkg/daemon/process_ack_resets_sack_state_bug_test.go b/pkg/daemon/process_ack_resets_sack_state_bug_test.go
index 0ede5906..8577638f 100644
--- a/pkg/daemon/process_ack_resets_sack_state_bug_test.go
+++ b/pkg/daemon/process_ack_resets_sack_state_bug_test.go
@@ -52,7 +52,7 @@ func TestProcessAckPartialDoesNotResetSACKedState(t *testing.T) {
// Three entries: A is in-flight (unsacked), B and C are at the peer (sacked).
const (
seqA = uint32(1000)
- seqB = uint32(1000 + MaxSegmentSize) // = 5096
+ seqB = uint32(1000 + MaxSegmentSize) // = 5096
seqC = uint32(1000 + 2*MaxSegmentSize) // = 9192
)
@@ -104,15 +104,15 @@ func TestProcessAckPartialDoesNotResetSACKedState(t *testing.T) {
// B must retain sacked=true — the peer confirmed receiving it via SACK;
// a partial ACK that doesn't cover B must not discard that information.
if !bSacked {
- t.Errorf("ProcessAck partial ACK for A: B.sacked=false, want true; "+
- "RFC 2018 §5 requires retaining SACK state above cumulative ACK; "+
+ t.Errorf("ProcessAck partial ACK for A: B.sacked=false, want true; " +
+ "RFC 2018 §5 requires retaining SACK state above cumulative ACK; " +
"fix: remove 'e.sacked = false' from the remaining-entries loop in ProcessAck",
)
}
// C must retain sacked=true for the same reason.
if !cSacked {
- t.Errorf("ProcessAck partial ACK for A: C.sacked=false, want true; "+
+ t.Errorf("ProcessAck partial ACK for A: C.sacked=false, want true; " +
"RFC 2018 §5 requires retaining SACK state above cumulative ACK",
)
}
diff --git a/pkg/daemon/process_sack_wraparound_bug_test.go b/pkg/daemon/process_sack_wraparound_bug_test.go
index 51c54a0b..bb6fb526 100644
--- a/pkg/daemon/process_sack_wraparound_bug_test.go
+++ b/pkg/daemon/process_sack_wraparound_bug_test.go
@@ -77,15 +77,15 @@ func TestProcessSACKWraparound(t *testing.T) {
// FIXED: use seqAfterOrEqual for both sides of the containment check.
if !sackedA {
- t.Errorf("segment A (seq=0xFFFFF000, segEnd=0xFFFFF010) not sacked "+
- "by block [0xFFFFF000, 0x00001000] — ProcessSACK raw 'segEnd <= Right' "+
- "comparison fails at uint32 wraparound (0xFFFFF010 <= 0x00001000 = false); "+
+ t.Errorf("segment A (seq=0xFFFFF000, segEnd=0xFFFFF010) not sacked " +
+ "by block [0xFFFFF000, 0x00001000] — ProcessSACK raw 'segEnd <= Right' " +
+ "comparison fails at uint32 wraparound (0xFFFFF010 <= 0x00001000 = false); " +
"fix: use seqAfterOrEqual(b.Right, segEnd)")
}
if !sackedB {
- t.Errorf("segment B (seq=0x00000010, segEnd=0x00000020) not sacked "+
- "by block [0xFFFFF000, 0x00001000] — ProcessSACK raw 'e.seq >= Left' "+
- "comparison fails at uint32 wraparound (0x00000010 >= 0xFFFFF000 = false); "+
+ t.Errorf("segment B (seq=0x00000010, segEnd=0x00000020) not sacked " +
+ "by block [0xFFFFF000, 0x00001000] — ProcessSACK raw 'e.seq >= Left' " +
+ "comparison fails at uint32 wraparound (0x00000010 >= 0xFFFFF000 = false); " +
"fix: use seqAfterOrEqual(e.seq, b.Left)")
}
}
diff --git a/pkg/daemon/remove_peer_leak_bug_test.go b/pkg/daemon/remove_peer_leak_bug_test.go
index d74f1f22..c7c946c9 100644
--- a/pkg/daemon/remove_peer_leak_bug_test.go
+++ b/pkg/daemon/remove_peer_leak_bug_test.go
@@ -13,15 +13,15 @@ import (
//
// Symptom: TunnelManager has eight per-peer maps that get populated
// during the lifetime of a peer relationship:
-// 1. peers — populated on AddPeer/handleEncrypted/key-exchange
-// 2. crypto — populated on key-exchange paths
-// 3. lastOutboundSend — populated on every writeFrame success (iter 7)
-// 4. sendErrCount — populated on ICMP-unreachable errors (iter 8)
-// 5. lastDirectRecv — populated on every authenticated decrypt (iter 5/3)
-// 6. blackholeMissCount — populated by writeFrame's hysteresis (iter 3)
-// 7. directClearCount — populated by clearRelayOnDirectLocked (iter 3)
-// 8. relayPeers — populated by relay flip / SetRelayPeer / iter 8
-// 9. peerPubKeys — populated on auth key-exchange
+// 1. peers — populated on AddPeer/handleEncrypted/key-exchange
+// 2. crypto — populated on key-exchange paths
+// 3. lastOutboundSend — populated on every writeFrame success (iter 7)
+// 4. sendErrCount — populated on ICMP-unreachable errors (iter 8)
+// 5. lastDirectRecv — populated on every authenticated decrypt (iter 5/3)
+// 6. blackholeMissCount — populated by writeFrame's hysteresis (iter 3)
+// 7. directClearCount — populated by clearRelayOnDirectLocked (iter 3)
+// 8. relayPeers — populated by relay flip / SetRelayPeer / iter 8
+// 9. peerPubKeys — populated on auth key-exchange
// 10. pendingRekey — populated by markPendingRekey (rkPendingMu)
// 11. lastInboundDecrypt — populated by recordInboundDecrypt (rkPendingMu)
//
diff --git a/pkg/daemon/retransmit_timeout_ssthresh_flightsize_bug_test.go b/pkg/daemon/retransmit_timeout_ssthresh_flightsize_bug_test.go
index 65a86076..34040238 100644
--- a/pkg/daemon/retransmit_timeout_ssthresh_flightsize_bug_test.go
+++ b/pkg/daemon/retransmit_timeout_ssthresh_flightsize_bug_test.go
@@ -120,9 +120,9 @@ func TestTimeoutSSThreshUsesFlightSizeNotCongWin(t *testing.T) {
// Bug: ssthresh = CongWin/2 = 40960/2 = 20480 — uses window capacity, not
// actual bytes outstanding.
const (
- flightSizeBytes = MaxSegmentSize // 4096 — only entry in Unacked
- wantSSThresh = 2 * MaxSegmentSize // max(4096/2=2048, 2*MSS=8192)
- badSSThresh = initialCongWin / 2 // 20480 — CongWin/2 (wrong)
+ flightSizeBytes = MaxSegmentSize // 4096 — only entry in Unacked
+ wantSSThresh = 2 * MaxSegmentSize // max(4096/2=2048, 2*MSS=8192)
+ badSSThresh = initialCongWin / 2 // 20480 — CongWin/2 (wrong)
)
if ssthresh != wantSSThresh {
t.Errorf("RTO retransmit with CongWin=%d, FlightSize=%d: SSThresh=%d, want %d "+
diff --git a/pkg/daemon/retx_sacked_ordering_break_bug_test.go b/pkg/daemon/retx_sacked_ordering_break_bug_test.go
index d47b2bd3..1195a3b3 100644
--- a/pkg/daemon/retx_sacked_ordering_break_bug_test.go
+++ b/pkg/daemon/retx_sacked_ordering_break_bug_test.go
@@ -27,21 +27,23 @@ import (
// The comment "segments are ordered by time; if first hasn't timed out, none
// have" is correct only when Unacked entries are in strict sentAt order.
// That ordering invariant breaks when:
-// 1. A leading segment is SACKed (skipped by continue).
-// 2. The first non-sacked entry is recent (sentAt updated by a prior
-// retransmit of that exact segment).
-// 3. A later non-sacked entry was originally sent much earlier (sentAt
-// predates the retransmit) and is therefore timed out.
+// 1. A leading segment is SACKed (skipped by continue).
+// 2. The first non-sacked entry is recent (sentAt updated by a prior
+// retransmit of that exact segment).
+// 3. A later non-sacked entry was originally sent much earlier (sentAt
+// predates the retransmit) and is therefore timed out.
//
// Concretely:
-// Unacked[0]: sacked=true, sentAt=T-2s (very old, SACK-skipped)
-// Unacked[1]: sacked=false, sentAt=T-50ms (recently retransmitted; B)
-// Unacked[2]: sacked=false, sentAt=T-2s (original send; C — timed out)
+//
+// Unacked[0]: sacked=true, sentAt=T-2s (very old, SACK-skipped)
+// Unacked[1]: sacked=false, sentAt=T-50ms (recently retransmitted; B)
+// Unacked[2]: sacked=false, sentAt=T-2s (original send; C — timed out)
//
// With RTO=200ms:
-// Entry[0] sacked → continue
-// Entry[1] B: now-50ms = 50ms NOT > 200ms → break (before C is checked!)
-// Entry[2] C: timed out but NEVER REACHED
+//
+// Entry[0] sacked → continue
+// Entry[1] B: now-50ms = 50ms NOT > 200ms → break (before C is checked!)
+// Entry[2] C: timed out but NEVER REACHED
//
// Consequence: C sits in Unacked indefinitely, never retransmitted until B
// is finally ACKed (removing it from Unacked) and C becomes the first entry.
@@ -88,13 +90,13 @@ func TestRetransmitUnackedBreakSkipsTimedOutEntryAfterRecentNonSacked(t *testing
// FAILS against unpatched code: no packet is sent because B (not timed out)
// fires the break before C is ever checked.
if len(pkts) == 0 {
- t.Errorf("retransmitUnacked with [A(sacked), B(recent,not-timeout), C(timed-out)]: "+
- "expected 1 retransmit (C), got 0; "+
- "'break' after B fires before C is checked — sacked entry A causes "+
- "B to be the first non-sacked entry examined; B's sentAt is recent "+
- "(retransmit updated it) so it is not timed out and the break fires; "+
- "C's older sentAt makes it timed out but it is never reached; "+
- "fix: replace 'break' with 'continue' so all non-sacked entries "+
+ t.Errorf("retransmitUnacked with [A(sacked), B(recent,not-timeout), C(timed-out)]: " +
+ "expected 1 retransmit (C), got 0; " +
+ "'break' after B fires before C is checked — sacked entry A causes " +
+ "B to be the first non-sacked entry examined; B's sentAt is recent " +
+ "(retransmit updated it) so it is not timed out and the break fires; " +
+ "C's older sentAt makes it timed out but it is never reached; " +
+ "fix: replace 'break' with 'continue' so all non-sacked entries " +
"are checked for timeout regardless of ordering")
}
if len(pkts) == 1 && string(pkts[0].Payload) != "C" {
diff --git a/pkg/daemon/rfc6582_second_partial_ack_retransmit_bug_test.go b/pkg/daemon/rfc6582_second_partial_ack_retransmit_bug_test.go
index 342b45da..06da87ea 100644
--- a/pkg/daemon/rfc6582_second_partial_ack_retransmit_bug_test.go
+++ b/pkg/daemon/rfc6582_second_partial_ack_retransmit_bug_test.go
@@ -31,8 +31,8 @@ import (
// the second partial ACK, oldDupAckCount < 3 and the condition is false —
// step 6 never runs for the second partial ACK:
//
-// - fastRetransmit not called → first still-unacked segment not retransmitted
-// - cwnd not deflated → AIMD growth fires instead, inflating cwnd
+// - fastRetransmit not called → first still-unacked segment not retransmitted
+// - cwnd not deflated → AIMD growth fires instead, inflating cwnd
//
// Concrete scenario (after entering fast recovery):
//
@@ -82,7 +82,7 @@ func TestSecondPartialAckInFastRecoveryRetransmitsFirstUnacked(t *testing.T) {
c.InRecovery = true
c.FastRecovery = true // set by fast retransmit entry (ProcessAck DupAckCount==3 path)
c.RecoveryPoint = seqD
- c.DupAckCount = 1 // only 1 dup ACK between partial ACKs (< 3 threshold)
+ c.DupAckCount = 1 // only 1 dup ACK between partial ACKs (< 3 threshold)
c.SSThresh = 5 * MaxSegmentSize
c.CongWin = c.SSThresh + 3*MaxSegmentSize // 32768 — fast recovery inflated
@@ -107,11 +107,11 @@ func TestSecondPartialAckInFastRecoveryRetransmitsFirstUnacked(t *testing.T) {
pkts := cs.all()
if len(pkts) == 0 {
- t.Errorf("RFC 6582 §3 step 6a: second partial ACK in fast recovery did not "+
- "call fastRetransmit; bug: step 6 is gated on oldDupAckCount >= 3, but "+
- "DupAckCount was reset to 0 by the first partial ACK and only reached 1 "+
- "before the second partial ACK; fix: track fast-recovery entry with "+
- "FastRecovery bool field, use '(oldDupAckCount >= 3 || wasFastRecovery) && "+
+ t.Errorf("RFC 6582 §3 step 6a: second partial ACK in fast recovery did not " +
+ "call fastRetransmit; bug: step 6 is gated on oldDupAckCount >= 3, but " +
+ "DupAckCount was reset to 0 by the first partial ACK and only reached 1 " +
+ "before the second partial ACK; fix: track fast-recovery entry with " +
+ "FastRecovery bool field, use '(oldDupAckCount >= 3 || wasFastRecovery) && " +
"wasInRecovery' so that every partial ACK during fast recovery fires step 6")
}
}
diff --git a/pkg/daemon/rto_backoff_in_recovery_bug_test.go b/pkg/daemon/rto_backoff_in_recovery_bug_test.go
index a82b86da..82851c81 100644
--- a/pkg/daemon/rto_backoff_in_recovery_bug_test.go
+++ b/pkg/daemon/rto_backoff_in_recovery_bug_test.go
@@ -65,10 +65,10 @@ func TestRetransmitUnackedDoublesRTOOnEachTimeout(t *testing.T) {
conn.RetxMu.Lock()
conn.RTO = startRTO
- conn.InRecovery = true // simulate: already in recovery from first timeout
- conn.RecoveryPoint = 9999 // high — won't exit recovery during this test
- conn.CongWin = InitialCongWin // already reduced by first timeout
- conn.SSThresh = MaxSegmentSize // already reduced
+ conn.InRecovery = true // simulate: already in recovery from first timeout
+ conn.RecoveryPoint = 9999 // high — won't exit recovery during this test
+ conn.CongWin = InitialCongWin // already reduced by first timeout
+ conn.SSThresh = MaxSegmentSize // already reduced
conn.LastAck = 1000
conn.Unacked = []*retxEntry{
// attempts=2: retransmitted once already — Karn's algorithm applies,
diff --git a/pkg/daemon/rtt_multiple_samples_per_ack_bug_test.go b/pkg/daemon/rtt_multiple_samples_per_ack_bug_test.go
index 7daf4233..f8b17dd6 100644
--- a/pkg/daemon/rtt_multiple_samples_per_ack_bug_test.go
+++ b/pkg/daemon/rtt_multiple_samples_per_ack_bug_test.go
@@ -42,9 +42,9 @@ import (
// With the bug (two updateRTT calls starting from SRTT=0):
// - Call 1: SRTT = 1000 ms, RTTVAR = 500 ms
// - Call 2: diff = |1000-10| = 990 ms
-// RTTVAR = 500*3/4 + 990/4 = 375+247.5 = 622.5 ms
-// SRTT = 1000*7/8 + 10/8 = 875+1.25 = 876.25 ms
-// → SRTT ≈ 876 ms, pulled down by the short-RTT segment
+// RTTVAR = 500*3/4 + 990/4 = 375+247.5 = 622.5 ms
+// SRTT = 1000*7/8 + 10/8 = 875+1.25 = 876.25 ms
+// → SRTT ≈ 876 ms, pulled down by the short-RTT segment
//
// With the fix (one updateRTT call using the first/oldest acked segment):
// - SRTT = 1000 ms (first measurement, set directly per RFC 6298 §2.2)
diff --git a/pkg/daemon/rtt_sacked_segment_skipped_bug_test.go b/pkg/daemon/rtt_sacked_segment_skipped_bug_test.go
index 2c261838..5f95222c 100644
--- a/pkg/daemon/rtt_sacked_segment_skipped_bug_test.go
+++ b/pkg/daemon/rtt_sacked_segment_skipped_bug_test.go
@@ -60,7 +60,7 @@ func TestRTTUpdateSkippedForSackedSegments(t *testing.T) {
conn.RetxMu.Lock()
conn.LastAck = 1000
- conn.SRTT = 0 // initial state — no RTT measurements yet
+ conn.SRTT = 0 // initial state — no RTT measurements yet
conn.RTTVAR = 0
conn.RTO = InitialRTO
conn.Unacked = []*retxEntry{
@@ -68,7 +68,7 @@ func TestRTTUpdateSkippedForSackedSegments(t *testing.T) {
seq: 1000,
data: make([]byte, segLen),
attempts: 1,
- sacked: true, // peer reported this segment via SACK already
+ sacked: true, // peer reported this segment via SACK already
sentAt: time.Now().Add(-5 * time.Millisecond), // sent 5ms ago
},
}
@@ -87,14 +87,14 @@ func TestRTTUpdateSkippedForSackedSegments(t *testing.T) {
// the cumulative ACK stay at SRTT=0/RTO=InitialRTO forever, using a
// 1s retransmit timeout on connections that may have 1ms RTT.
if srtt == 0 {
- t.Errorf("ProcessAck with sacked segment (attempts=1, sacked=true): SRTT=0 "+
- "after cumulative ACK covers the segment; "+
- "'!e.sacked' guard prevents updateRTT for segments the peer already "+
- "confirmed via SACK, so when all in-flight data is SACKed before the "+
- "cumulative ACK arrives (common on SACK-heavy connections), the sender "+
- "never refines SRTT from InitialRTO=1s, causing spurious retransmission "+
- "and slow loss recovery; fix: remove the !e.sacked condition — "+
- "once-sent segments (attempts==1) always yield valid RTT samples per "+
+ t.Errorf("ProcessAck with sacked segment (attempts=1, sacked=true): SRTT=0 " +
+ "after cumulative ACK covers the segment; " +
+ "'!e.sacked' guard prevents updateRTT for segments the peer already " +
+ "confirmed via SACK, so when all in-flight data is SACKed before the " +
+ "cumulative ACK arrives (common on SACK-heavy connections), the sender " +
+ "never refines SRTT from InitialRTO=1s, causing spurious retransmission " +
+ "and slow loss recovery; fix: remove the !e.sacked condition — " +
+ "once-sent segments (attempts==1) always yield valid RTT samples per " +
"RFC 6298 regardless of SACK state")
}
}
diff --git a/pkg/daemon/sack_blocks_wraparound_bug_test.go b/pkg/daemon/sack_blocks_wraparound_bug_test.go
index 1bfaff4c..513b736e 100644
--- a/pkg/daemon/sack_blocks_wraparound_bug_test.go
+++ b/pkg/daemon/sack_blocks_wraparound_bug_test.go
@@ -101,8 +101,10 @@ func TestSACKBlocksWraparound(t *testing.T) {
//
// seg1: [0xFFFFF000, 0x00000000) (wraps to 0; segEnd = 0x00000000 after add)
// Wait — use seg1 that ends exactly where seg2 begins:
-// seg1: [0xFFFFFF00, 0xFFFFFFFF+1) = [0xFFFFFF00, 0x00000000)
-// seg2: [0x00000000, 0x00000010)
+//
+// seg1: [0xFFFFFF00, 0xFFFFFFFF+1) = [0xFFFFFF00, 0x00000000)
+// seg2: [0x00000000, 0x00000010)
+//
// They are contiguous: seg1's right edge == seg2's left edge.
func TestSACKBlocksWraparoundContiguous(t *testing.T) {
pm := NewPortManager()
diff --git a/pkg/daemon/sack_cumulative_ack_aimd_overcounting_bug_test.go b/pkg/daemon/sack_cumulative_ack_aimd_overcounting_bug_test.go
index bf4ae180..68b38eca 100644
--- a/pkg/daemon/sack_cumulative_ack_aimd_overcounting_bug_test.go
+++ b/pkg/daemon/sack_cumulative_ack_aimd_overcounting_bug_test.go
@@ -70,8 +70,8 @@ func TestSACKCumulativeAckDoesNotInflateBytesAcked(t *testing.T) {
conn.RetxMu.Lock()
conn.LastAck = seqA
// In slow start: CongWin < SSThresh so every new ACK grows CongWin by bytesAcked.
- conn.CongWin = InitialCongWin // 10*MSS = 40960
- conn.SSThresh = 40 * MaxSegmentSize // 163840 — well above CongWin
+ conn.CongWin = InitialCongWin // 10*MSS = 40960
+ conn.SSThresh = 40 * MaxSegmentSize // 163840 — well above CongWin
conn.InRecovery = false
conn.DupAckCount = 0
conn.Unacked = []*retxEntry{
diff --git a/pkg/daemon/sendbuf_caller_bug_test.go b/pkg/daemon/sendbuf_caller_bug_test.go
index a478c67c..58586a27 100644
--- a/pkg/daemon/sendbuf_caller_bug_test.go
+++ b/pkg/daemon/sendbuf_caller_bug_test.go
@@ -48,6 +48,7 @@ import (
// - Surface a non-transient error only if the connection is
// actually broken (not Established, peer closed, etc.) OR if a
// deadline elapses
+//
// This keeps net.Conn semantics intact: callers see Write block
// briefly under back-pressure (just like a real TCP socket whose
// kernel send buffer is full), then succeed.
diff --git a/pkg/daemon/services.go b/pkg/daemon/services.go
index 4f436a13..394913ec 100644
--- a/pkg/daemon/services.go
+++ b/pkg/daemon/services.go
@@ -1409,9 +1409,9 @@ func (d *Daemon) handleTaskResults(adapter *connAdapter, conn *Connection, frame
slog.Info("tasksubmit: polo scores updated", "task_id", msg.TaskID, "receiver_reward", reward)
d.webhook.Emit("polo.updated", map[string]interface{}{
- "task_id": msg.TaskID,
- "submitter_delta": -1,
- "receiver_reward": reward,
+ "task_id": msg.TaskID,
+ "submitter_delta": -1,
+ "receiver_reward": reward,
})
}
}
diff --git a/pkg/daemon/ss_growth_abc_cap_bug_test.go b/pkg/daemon/ss_growth_abc_cap_bug_test.go
index 8046c19f..285d73d5 100644
--- a/pkg/daemon/ss_growth_abc_cap_bug_test.go
+++ b/pkg/daemon/ss_growth_abc_cap_bug_test.go
@@ -75,9 +75,9 @@ func TestSSGrowthCapsIncrementAt2SMSS(t *testing.T) {
// 2*SMSS = 8192.
// Bug: cwnd += 12288 (3*MSS) uses raw bytes_acked exceeding RFC limit.
const (
- wantIncrement = 2 * MaxSegmentSize // 8192
- bugIncrement = 3 * MaxSegmentSize // 12288
- wantCongWin = initialCongWin + wantIncrement // 20480
+ wantIncrement = 2 * MaxSegmentSize // 8192
+ bugIncrement = 3 * MaxSegmentSize // 12288
+ wantCongWin = initialCongWin + wantIncrement // 20480
)
if c.CongWin != wantCongWin {
t.Errorf("SS growth with bytes_acked=3*SMSS: CongWin=%d, want %d "+
diff --git a/pkg/daemon/ssthresh_congwin_vs_flightsize_bug_test.go b/pkg/daemon/ssthresh_congwin_vs_flightsize_bug_test.go
index 09ef7867..63cdb59a 100644
--- a/pkg/daemon/ssthresh_congwin_vs_flightsize_bug_test.go
+++ b/pkg/daemon/ssthresh_congwin_vs_flightsize_bug_test.go
@@ -111,9 +111,9 @@ func TestFastRetransmitSSThreshUsesFlightSizeNotCongWin(t *testing.T) {
// window capacity, not the measured flight size; using it overestimates SSThresh
// and causes the connection to resume from a rate higher than what caused the loss.
const (
- flightSize = 3 * MaxSegmentSize // 12288 — sum of all Unacked
- wantSSThresh = 2 * MaxSegmentSize // max(flightSize/2=6144, 2*MSS=8192)
- badSSThresh = initialCongWin / 2 // 20480 — what CongWin/2 produces
+ flightSize = 3 * MaxSegmentSize // 12288 — sum of all Unacked
+ wantSSThresh = 2 * MaxSegmentSize // max(flightSize/2=6144, 2*MSS=8192)
+ badSSThresh = initialCongWin / 2 // 20480 — what CongWin/2 produces
)
if ssthresh != wantSSThresh {
t.Errorf("fast retransmit with CongWin=%d, FlightSize=%d: SSThresh=%d, want %d "+
diff --git a/pkg/daemon/ssthresh_floor_two_mss_bug_test.go b/pkg/daemon/ssthresh_floor_two_mss_bug_test.go
index 9b459649..5e788ecc 100644
--- a/pkg/daemon/ssthresh_floor_two_mss_bug_test.go
+++ b/pkg/daemon/ssthresh_floor_two_mss_bug_test.go
@@ -69,9 +69,9 @@ func TestFastRetransmitSSThreshFloorTwoSMSS(t *testing.T) {
conn.RetxStop = make(chan struct{})
const (
- lastAck = uint32(1000)
- initialCongWin = 2 * MaxSegmentSize // 8192 — floor scenario
- highSSThresh = 40 * MaxSegmentSize // >> CongWin, not the binding constraint
+ lastAck = uint32(1000)
+ initialCongWin = 2 * MaxSegmentSize // 8192 — floor scenario
+ highSSThresh = 40 * MaxSegmentSize // >> CongWin, not the binding constraint
)
conn.Mu.Lock()
diff --git a/pkg/daemon/throughput_bench_test.go b/pkg/daemon/throughput_bench_test.go
index 7502151a..ee40f15b 100644
--- a/pkg/daemon/throughput_bench_test.go
+++ b/pkg/daemon/throughput_bench_test.go
@@ -32,7 +32,7 @@ import (
const benchTransferBytes = 4 * 1024 * 1024 // 4 MB
-func BenchmarkThroughputNoLoss(b *testing.B) { runThroughputBench(b, 0.000) }
+func BenchmarkThroughputNoLoss(b *testing.B) { runThroughputBench(b, 0.000) }
func BenchmarkThroughput01PctLoss(b *testing.B) { runThroughputBench(b, 0.001) }
func BenchmarkThroughput1PctLoss(b *testing.B) { runThroughputBench(b, 0.010) }
func BenchmarkThroughput5PctLoss(b *testing.B) { runThroughputBench(b, 0.050) }
@@ -68,10 +68,10 @@ func TestThroughputReport(t *testing.T) {
}
type result struct {
- name string
- rate float64
- elapsed time.Duration
- mbps float64
+ name string
+ rate float64
+ elapsed time.Duration
+ mbps float64
retransmits int
}
@@ -240,7 +240,7 @@ func simulateTransferFull(lossRate float64, totalBytes int, seed int64) int {
// Re-initialize Unacked so TrackSend uses our sim time epoch.
// TrackSend sets sentAt=time.Now() internally; we overwrite after each call.
- sendIdx := 0 // next segment index to send
+ sendIdx := 0 // next segment index to send
noProgressRounds := 0
maxIterations := totalSegs * 200 // safety bound; sim converges fast
diff --git a/pkg/daemon/timeout_cwnd_reset_bug_test.go b/pkg/daemon/timeout_cwnd_reset_bug_test.go
index 5ce01563..853d9662 100644
--- a/pkg/daemon/timeout_cwnd_reset_bug_test.go
+++ b/pkg/daemon/timeout_cwnd_reset_bug_test.go
@@ -75,9 +75,9 @@ func TestTimeoutResetsCongWinTo1SMSS(t *testing.T) {
conn.RetxStop = make(chan struct{})
const (
- seqA = uint32(1000)
- initialCongWin = 20 * MaxSegmentSize // 81920 — large established window
- initialSSThresh = 10 * MaxSegmentSize // 40960 — initial ssthresh
+ seqA = uint32(1000)
+ initialCongWin = 20 * MaxSegmentSize // 81920 — large established window
+ initialSSThresh = 10 * MaxSegmentSize // 40960 — initial ssthresh
)
conn.Mu.Lock()
diff --git a/pkg/daemon/timeout_ssthresh_in_recovery_bug_test.go b/pkg/daemon/timeout_ssthresh_in_recovery_bug_test.go
index a3ba82fe..e467d3ac 100644
--- a/pkg/daemon/timeout_ssthresh_in_recovery_bug_test.go
+++ b/pkg/daemon/timeout_ssthresh_in_recovery_bug_test.go
@@ -76,9 +76,9 @@ func TestTimeoutDuringFastRecoveryRecomputesSSThresh(t *testing.T) {
conn.RetxStop = make(chan struct{})
const (
- seqA = uint32(1000)
- fastRecoverySSThresh = 5 * MaxSegmentSize // 20480 — from a larger flightSize
- fastRecoveryCongWin = fastRecoverySSThresh + 3*MaxSegmentSize
+ seqA = uint32(1000)
+ fastRecoverySSThresh = 5 * MaxSegmentSize // 20480 — from a larger flightSize
+ fastRecoveryCongWin = fastRecoverySSThresh + 3*MaxSegmentSize
)
conn.Mu.Lock()
diff --git a/pkg/daemon/tunnel.go b/pkg/daemon/tunnel.go
index d1a993f6..c454514b 100644
--- a/pkg/daemon/tunnel.go
+++ b/pkg/daemon/tunnel.go
@@ -104,7 +104,6 @@ const salvageMaxEntries = 4
// margin for slow handshakes under loss.
const salvageMaxAge = 5 * time.Second
-
// decryptFailDropThreshold is how many consecutive AEAD-authentication
// failures from a single peer trigger a full peerCrypto drop +
// re-handshake. Sized to swallow a small burst of legitimate packet
@@ -204,8 +203,8 @@ type TunnelManager struct {
// Rate-limit rekey-request responses triggered by "encrypted packet but no
// key" events. Prevents amplification if a peer floods us with gibberish.
- rekeyMu sync.Mutex
- lastRekeyReq map[uint32]time.Time
+ rekeyMu sync.Mutex
+ lastRekeyReq map[uint32]time.Time
// P1-010 tunnel-state half: track in-flight key exchanges so a single
// dropped reply under packet loss doesn't leave the tunnel wedged for
@@ -221,7 +220,6 @@ type TunnelManager struct {
beaconAddr *net.UDPAddr // beacon address for punch/relay
relayPeers map[uint32]bool // peers that need relay (symmetric NAT)
-
// relayPinned marks peers whose relay flag was set by an authoritative
// signal (registry's relay_only=true on the resolve response, OR an
// operator forcing relay via SetRelayPeer with pin=true). For pinned
diff --git a/pkg/daemon/tunnel_blackhole_bug_test.go b/pkg/daemon/tunnel_blackhole_bug_test.go
index c9f6d285..6170106b 100644
--- a/pkg/daemon/tunnel_blackhole_bug_test.go
+++ b/pkg/daemon/tunnel_blackhole_bug_test.go
@@ -40,13 +40,13 @@ import (
// - 50 MB direct transfer iter 3: 4.27 MB/s
//
// What v1.9.x's tunnel-stability fix should change. Any of:
-// 1. Skip the flip if conn-level retransmit budget hasn't been
-// exhausted (direct path is actively being used; absence of recv
-// ACK means the peer is slow, not unreachable).
-// 2. Require N consecutive 8 s gaps WITH active sends in between
-// (transient pause shouldn't latch the relay flag).
-// 3. Raise the threshold (30 s+) under normal load and only drop
-// it when the application explicitly requests fast-failover.
+// 1. Skip the flip if conn-level retransmit budget hasn't been
+// exhausted (direct path is actively being used; absence of recv
+// ACK means the peer is slow, not unreachable).
+// 2. Require N consecutive 8 s gaps WITH active sends in between
+// (transient pause shouldn't latch the relay flag).
+// 3. Raise the threshold (30 s+) under normal load and only drop
+// it when the application explicitly requests fast-failover.
//
// This test pins the CURRENT behavior so the fix has a concrete
// regression target. After the fix, the assertion below flips:
diff --git a/pkg/daemon/tunnel_desync_salvage_test.go b/pkg/daemon/tunnel_desync_salvage_test.go
index 158f475b..406b0de1 100644
--- a/pkg/daemon/tunnel_desync_salvage_test.go
+++ b/pkg/daemon/tunnel_desync_salvage_test.go
@@ -103,9 +103,9 @@ func TestRecordSalvageNilPCIsNoop(t *testing.T) {
func TestReplaySalvageNilArgsIsNoop(t *testing.T) {
tm := NewTunnelManager()
pc := fakePC(t)
- tm.replaySalvage(nil, pc, 1, nil) // oldPC nil
- tm.replaySalvage(pc, nil, 1, nil) // newPC nil
- tm.replaySalvage(pc, pc, 1, nil) // addr nil
+ tm.replaySalvage(nil, pc, 1, nil) // oldPC nil
+ tm.replaySalvage(pc, nil, 1, nil) // newPC nil
+ tm.replaySalvage(pc, pc, 1, nil) // addr nil
// no panic = pass
}
diff --git a/pkg/daemon/tunnel_handle_test.go b/pkg/daemon/tunnel_handle_test.go
index e8352c58..55461dcf 100644
--- a/pkg/daemon/tunnel_handle_test.go
+++ b/pkg/daemon/tunnel_handle_test.go
@@ -711,7 +711,6 @@ func TestHandlersAreRobustAgainstEmptyData(_ *testing.T) {
// Ensure compile-time coverage of stdlib imports unused in some builds
var _ = fmt.Errorf
-
// setupAuthKeyExchangeTest builds the common scaffolding: a tunnel
// manager with encryption enabled, a peer Ed25519 identity registered,
// and a valid signed auth key_exchange frame ready to feed into
diff --git a/pkg/daemon/window_update_dup_ack_count_bug_test.go b/pkg/daemon/window_update_dup_ack_count_bug_test.go
index 2dacf598..b420c84b 100644
--- a/pkg/daemon/window_update_dup_ack_count_bug_test.go
+++ b/pkg/daemon/window_update_dup_ack_count_bug_test.go
@@ -118,7 +118,7 @@ func TestWindowUpdateDoesNotIncrementDupAckCount(t *testing.T) {
SrcPort: remotePort,
DstPort: localPort,
Seq: uint32(100 + i),
- Ack: 500, // == conn.LastAck → same cumulative ACK
+ Ack: 500, // == conn.LastAck → same cumulative ACK
Window: winSegs, // grows: 2, 3, 4 segments
}
d.handleStreamPacket(pkt)
diff --git a/pkg/daemon/window_update_wakeup_bug_test.go b/pkg/daemon/window_update_wakeup_bug_test.go
index 5e9a5d94..c404a130 100644
--- a/pkg/daemon/window_update_wakeup_bug_test.go
+++ b/pkg/daemon/window_update_wakeup_bug_test.go
@@ -115,8 +115,8 @@ func TestWindowUpdateDoesNotWakeSender(t *testing.T) {
SrcPort: remotePort,
DstPort: localPort,
Seq: 500,
- Ack: 1000, // == LastAck — dup-ACK path in ProcessAck
- Window: 1, // non-zero: peer's window just opened
+ Ack: 1000, // == LastAck — dup-ACK path in ProcessAck
+ Window: 1, // non-zero: peer's window just opened
}
d.handleStreamPacket(windowUpdatePkt)
@@ -126,14 +126,14 @@ func TestWindowUpdateDoesNotWakeSender(t *testing.T) {
case <-conn.WindowCh:
// good — sender wakes up promptly
case <-time.After(100 * time.Millisecond):
- t.Errorf("window-update ACK (Ack=LastAck, Window=1 with PeerRecvWin=0) did not "+
- "signal conn.WindowCh within 100ms; "+
- "handleStreamPacket updates PeerRecvWin but never signals WindowCh; "+
- "ProcessAck is called with ack=LastAck (dup-ACK path) which returns "+
- "before the WindowCh signal at the bottom of the new-ACK path; "+
- "sendSegment blocked on WindowCh will not wake until the next "+
- "zero-window probe timer fires (up to 30s with exponential backoff); "+
- "fix: in handleStreamPacket, signal conn.WindowCh after setting "+
+ t.Errorf("window-update ACK (Ack=LastAck, Window=1 with PeerRecvWin=0) did not " +
+ "signal conn.WindowCh within 100ms; " +
+ "handleStreamPacket updates PeerRecvWin but never signals WindowCh; " +
+ "ProcessAck is called with ack=LastAck (dup-ACK path) which returns " +
+ "before the WindowCh signal at the bottom of the new-ACK path; " +
+ "sendSegment blocked on WindowCh will not wake until the next " +
+ "zero-window probe timer fires (up to 30s with exponential backoff); " +
+ "fix: in handleStreamPacket, signal conn.WindowCh after setting " +
"PeerRecvWin when transitioning from 0 to > 0")
}
}
diff --git a/pkg/daemon/zero_window_peerrecvwin_bug_test.go b/pkg/daemon/zero_window_peerrecvwin_bug_test.go
index 18fb674a..b6cd8112 100644
--- a/pkg/daemon/zero_window_peerrecvwin_bug_test.go
+++ b/pkg/daemon/zero_window_peerrecvwin_bug_test.go
@@ -61,13 +61,13 @@ func TestZeroWindowAdvertisementNotHonored(t *testing.T) {
// FAILS against unpatched code: PeerRecvWin>0 guard treats 0 as "unknown",
// EffectiveWindow returns CongWin=InitialCongWin, WindowAvailable=true.
if available {
- t.Errorf("zero-window advertisement not honored: WindowAvailable()=true "+
- "when PeerRecvWin=0 (peer sent Window=0); "+
- "EffectiveWindow() guard 'c.PeerRecvWin > 0' treats 0 as the "+
- "uninitialized/unknown sentinel, so an explicit zero-window "+
- "advertisement is silently ignored and the sender is allowed to "+
- "transmit data that the peer has no buffer space to accept; "+
- "fix: initialize PeerRecvWin to -1 in NewConnection and change "+
+ t.Errorf("zero-window advertisement not honored: WindowAvailable()=true " +
+ "when PeerRecvWin=0 (peer sent Window=0); " +
+ "EffectiveWindow() guard 'c.PeerRecvWin > 0' treats 0 as the " +
+ "uninitialized/unknown sentinel, so an explicit zero-window " +
+ "advertisement is silently ignored and the sender is allowed to " +
+ "transmit data that the peer has no buffer space to accept; " +
+ "fix: initialize PeerRecvWin to -1 in NewConnection and change " +
"the guard to c.PeerRecvWin >= 0")
}
}
diff --git a/pkg/registry/binary_client_test.go b/pkg/registry/binary_client_test.go
index cdbb1ed4..2ff37ed1 100644
--- a/pkg/registry/binary_client_test.go
+++ b/pkg/registry/binary_client_test.go
@@ -22,12 +22,12 @@ import (
// --- fakeBinaryServer: minimal TCP server speaking the binary wire protocol ---
type fakeBinaryServer struct {
- ln net.Listener
- handler func(msgType byte, payload []byte) (respType byte, respPayload []byte)
- mu sync.Mutex
+ ln net.Listener
+ handler func(msgType byte, payload []byte) (respType byte, respPayload []byte)
+ mu sync.Mutex
handshakes atomic.Uint32
- frames atomic.Uint32
- done chan struct{}
+ frames atomic.Uint32
+ done chan struct{}
}
func newFakeBinaryServer(t *testing.T, handler func(msgType byte, payload []byte) (byte, []byte)) *fakeBinaryServer {
@@ -246,15 +246,15 @@ func TestLookupHappyPathDecodesResult(t *testing.T) {
return wireMsgError, encodeWireError("bad type")
}
return wireMsgLookupOK, encodeLookupResp(
- 42, // nodeID
- true, false, // public, taskExec
- 7, // polo
- []uint16{1, 2}, // networks
- []byte{0xAB}, // pubkey
- "host.example", // hostname
- []string{"t1"}, // tags
- "1.2.3.4:444", // realAddr
- "ext-123", // externalID
+ 42, // nodeID
+ true, false, // public, taskExec
+ 7, // polo
+ []uint16{1, 2}, // networks
+ []byte{0xAB}, // pubkey
+ "host.example", // hostname
+ []string{"t1"}, // tags
+ "1.2.3.4:444", // realAddr
+ "ext-123", // externalID
)
})
diff --git a/pkg/registry/panic_recovery.go b/pkg/registry/panic_recovery.go
index 0fcf1bfa..f710a130 100644
--- a/pkg/registry/panic_recovery.go
+++ b/pkg/registry/panic_recovery.go
@@ -23,14 +23,14 @@ func RecoveredPanicCount() uint64 {
// recoverHandler is the standard panic-recovery shim used at the top of
// every connection-handling goroutine and every background loop. Usage:
//
-// defer recoverHandler("handleConn", nil)
+// defer recoverHandler("handleConn", nil)
//
// On panic it:
-// 1. Recovers (process keeps running)
-// 2. Logs at ERROR with the panic value + full goroutine stack trace
-// 3. Increments the global recoveredPanicCount metric
-// 4. Calls onPanic(count) if non-nil — callers can use this to drop
-// a connection / restart a loop / etc.
+// 1. Recovers (process keeps running)
+// 2. Logs at ERROR with the panic value + full goroutine stack trace
+// 3. Increments the global recoveredPanicCount metric
+// 4. Calls onPanic(count) if non-nil — callers can use this to drop
+// a connection / restart a loop / etc.
//
// recoverHandler must be the OUTERMOST defer in the goroutine: defers
// run LIFO, so other defers (conn.Close, mu.Unlock) run first; we want
diff --git a/pkg/registry/panic_recovery_test.go b/pkg/registry/panic_recovery_test.go
index 6aa3ffe4..61a530f2 100644
--- a/pkg/registry/panic_recovery_test.go
+++ b/pkg/registry/panic_recovery_test.go
@@ -55,8 +55,8 @@ func TestRecoverHandlerConcurrent(t *testing.T) {
t.Parallel()
var (
- mu sync.Mutex
- seenSet = map[uint64]struct{}{}
+ mu sync.Mutex
+ seenSet = map[uint64]struct{}{}
)
const N = 32
diff --git a/pkg/registry/replication.go b/pkg/registry/replication.go
index 4b5cd5af..54ac7984 100644
--- a/pkg/registry/replication.go
+++ b/pkg/registry/replication.go
@@ -702,8 +702,8 @@ func (s *Server) applySnapshot(data []byte) error {
// I/O-free string parsing but is still extra work we don't need to keep
// inside the swap critical section.
var (
- acceptIDPConfig bool
- acceptAuditExport bool
+ acceptIDPConfig bool
+ acceptAuditExport bool
)
if snap.IDPConfig != nil {
if err := urlvalidate.Validate(snap.IDPConfig.URL); err != nil {
diff --git a/pkg/registry/server.go b/pkg/registry/server.go
index 95d007ba..fb4bf9d2 100644
--- a/pkg/registry/server.go
+++ b/pkg/registry/server.go
@@ -286,10 +286,10 @@ func (s *Server) appendAudit(action string, netID uint16, nodeID uint32, attrs .
const numNodeShards = 256
type Server struct {
- mu sync.RWMutex
- nodeShards [numNodeShards]sync.RWMutex // per-node field locks (nodeID % N)
- nodes map[uint32]*NodeInfo
- maxNodes int // max registered nodes (0 = unlimited); prevents memory exhaustion
+ mu sync.RWMutex
+ nodeShards [numNodeShards]sync.RWMutex // per-node field locks (nodeID % N)
+ nodes map[uint32]*NodeInfo
+ maxNodes int // max registered nodes (0 = unlimited); prevents memory exhaustion
startTime time.Time
restartEvents []int64 // unix-millis of each process start after the first
downtimeIntervals [][2]int64 // [start,end] unix-millis pairs, pruned to last 30d
@@ -309,14 +309,14 @@ type Server struct {
pulseIdx int
pulseFilled bool
- networks map[uint16]*NetworkInfo
- pubKeyIdx map[string]uint32 // base64(pubkey) -> nodeID for re-registration
- ownerIdx map[string]uint32 // owner -> nodeID for key rotation
- hostnameIdx map[string]uint32 // hostname -> nodeID (unique index)
- nextNode uint32
- nextNet uint16
- listener net.Listener
- readyCh chan struct{}
+ networks map[uint16]*NetworkInfo
+ pubKeyIdx map[string]uint32 // base64(pubkey) -> nodeID for re-registration
+ ownerIdx map[string]uint32 // owner -> nodeID for key rotation
+ hostnameIdx map[string]uint32 // hostname -> nodeID (unique index)
+ nextNode uint32
+ nextNet uint16
+ listener net.Listener
+ readyCh chan struct{}
// Beacon coordination
beaconAddr string
@@ -356,15 +356,14 @@ type Server struct {
// Network invite inbox: target nodeID -> pending invites
inviteInbox map[uint32][]*NetworkInvite
-
// Connection tracking
connCount atomic.Int64
maxConnections int64
// Replication
- replMgr *replicationManager
- replToken string // H4 fix: required for subscribe_replication; empty = replication disabled
- standby bool // if true, reject writes and receive snapshots from primary
+ replMgr *replicationManager
+ replToken string // H4 fix: required for subscribe_replication; empty = replication disabled
+ standby bool // if true, reject writes and receive snapshots from primary
adminToken string // required for create_network; empty = creation disabled
dashboardToken string // token for per-network stats on dashboard; empty = public-only
maintenanceBanner string // optional notice rendered on the dashboard alongside release banner
@@ -447,8 +446,8 @@ type Server struct {
// listings ("data-exchange" 45k members, "high-trust-society" 28k) all
// route through the same singleflight + 1s-TTL cache. Each network
// (and the admin path, key=0) has its own state inside listNodesPerNet.
- listNodesCache listNodesCacheState // legacy backbone admin cache
- listNodesPerNetMu sync.Mutex // guards the map itself
+ listNodesCache listNodesCacheState // legacy backbone admin cache
+ listNodesPerNetMu sync.Mutex // guards the map itself
listNodesPerNet map[uint16]*listNodesCacheState
}
@@ -1752,8 +1751,6 @@ func (s *Server) handleBinaryLookup(conn net.Conn, payload []byte, host string)
s.metrics.requestDuration.WithLabel("lookup").Observe(time.Since(start).Seconds())
}()
-
-
// Brief global lock for map lookup
s.mu.RLock()
node, ok := s.nodes[nodeID]
@@ -1802,8 +1799,6 @@ func (s *Server) handleBinaryResolve(conn net.Conn, payload []byte, host string)
s.metrics.requestDuration.WithLabel("resolve").Observe(time.Since(start).Seconds())
}()
-
-
// Phase 1: copy pubkey for verification
s.mu.RLock()
requester, ok := s.nodes[requesterID]
@@ -2279,9 +2274,11 @@ func (s *Server) handleRegister(msg map[string]interface{}, remoteAddr string) (
//
// 3-PHASE LOCK PATTERN — see [[X-Tasks/backlog/30-mutex-risk-map]] § fix #5
// and the lock-ordering invariants doc at the top of this file.
-// Phase 1 (RLock): snapshot the current pubkey for verification.
-// Phase 2 (no lock): ~28µs Ed25519 verify runs OUTSIDE the lock.
-// Phase 3 (Lock): re-check the pubkey is unchanged; commit the swap.
+//
+// Phase 1 (RLock): snapshot the current pubkey for verification.
+// Phase 2 (no lock): ~28µs Ed25519 verify runs OUTSIDE the lock.
+// Phase 3 (Lock): re-check the pubkey is unchanged; commit the swap.
+//
// If a concurrent rotation lands between Phase 1 and Phase 3 the verify is
// stale; we reject this caller and let it retry. Rotate is rare, so the
// retry surface is acceptable.
@@ -2376,10 +2373,11 @@ func (s *Server) handleRotateKey(msg map[string]interface{}) (map[string]interfa
// Only the node itself can set its own key expiry (signature-verified).
//
// 3-PHASE LOCK PATTERN — see [[X-Tasks/backlog/30-mutex-risk-map]] § fix #6.
-// Phase 1 (RLock): snapshot pubkey + adminToken for verification.
-// Phase 2 (no lock): ~28µs Ed25519 verify runs OUTSIDE the lock.
-// Phase 3 (Lock): re-check node + pubkey unchanged + enterprise gate;
-// commit the new expiry.
+//
+// Phase 1 (RLock): snapshot pubkey + adminToken for verification.
+// Phase 2 (no lock): ~28µs Ed25519 verify runs OUTSIDE the lock.
+// Phase 3 (Lock): re-check node + pubkey unchanged + enterprise gate;
+// commit the new expiry.
func (s *Server) handleSetKeyExpiry(msg map[string]interface{}) (map[string]interface{}, error) {
nodeID := jsonUint32(msg, "node_id")
@@ -6107,12 +6105,13 @@ const adminListNodesTTL = 1 * time.Second
// json.Marshal entirely on cache hits.
//
// Why pre-build the wrapper:
-// The previous implementation returned just the inner nodes array as
-// json.RawMessage and let json.Marshal wrap it. Even though the bytes
-// were already valid JSON, the encoder called appendCompact() on every
-// call to re-validate them byte-by-byte — burning ~65% of total CPU at
-// ~320 calls/sec on a 16 MB payload (measured 2026-04-29 profile).
-// Pre-wrapping eliminates the encoder pass entirely.
+//
+// The previous implementation returned just the inner nodes array as
+// json.RawMessage and let json.Marshal wrap it. Even though the bytes
+// were already valid JSON, the encoder called appendCompact() on every
+// call to re-validate them byte-by-byte — burning ~65% of total CPU at
+// ~320 calls/sec on a 16 MB payload (measured 2026-04-29 profile).
+// Pre-wrapping eliminates the encoder pass entirely.
//
// Race-clean: cache rebuild runs without any registry lock held. The
// inner build acquires s.mu.RLock briefly and (via the iteration) per-node
@@ -6290,8 +6289,10 @@ func (s *Server) handleDeregister(msg map[string]interface{}) (map[string]interf
// Pre-built fragments for the heartbeat-ok response. Go's json.Marshal sorts
// map keys alphabetically, so the wire shape is:
-// without warning: {"time":,"type":"heartbeat_ok"}
-// with warning: {"key_expiry_warning":true,"time":,"type":"heartbeat_ok"}
+//
+// without warning: {"time":,"type":"heartbeat_ok"}
+// with warning: {"key_expiry_warning":true,"time":,"type":"heartbeat_ok"}
+//
// Pre-building the static prefix/suffix and only sprintf'ing the timestamp
// saves the ~8% of remaining CPU spent in json.Marshal on the heartbeat
// response — this is the single most-frequent message in the system.
@@ -6468,8 +6469,8 @@ type snapshot struct {
LastHeartbeat int64 `json:"last_heartbeat,omitempty"`
ProbeStates map[string]*ProbeState `json:"probe_states,omitempty"`
// Time-series history for dashboard charts
- HourlyHistory []StatsSample `json:"hourly_history,omitempty"`
- DailyHistory []StatsSample `json:"daily_history,omitempty"`
+ HourlyHistory []StatsSample `json:"hourly_history,omitempty"`
+ DailyHistory []StatsSample `json:"daily_history,omitempty"`
NetHourlyHistory map[string][]NetworkSampleEntry `json:"net_hourly_history,omitempty"`
NetDailyHistory map[string][]NetworkSampleEntry `json:"net_daily_history,omitempty"`
// Audit log persistence (most recent entries, capped at maxAuditEntries)
@@ -7773,19 +7774,19 @@ func (s *Server) SetBeaconStats(b BeaconStatsProvider) {
}
type DashboardStats struct {
- TotalNodes int `json:"total_nodes"`
- ActiveNodes int `json:"active_nodes"`
- TotalTrustLinks int `json:"-"`
- TotalRequests int64 `json:"total_requests"`
- RelayForwarded uint64 `json:"relay_forwarded,omitempty"`
- RelayDropped uint64 `json:"relay_dropped,omitempty"`
- RelayNotFound uint64 `json:"relay_not_found,omitempty"`
- ReqPerDay int64 `json:"req_per_day"`
- UptimeSecs int64 `json:"uptime_secs"`
- Versions map[string]int `json:"versions,omitempty"`
- Networks []NetworkStats `json:"networks,omitempty"` // only populated with dashboard token
- Hourly []StatsSample `json:"hourly,omitempty"`
- Daily []StatsSample `json:"daily,omitempty"`
+ TotalNodes int `json:"total_nodes"`
+ ActiveNodes int `json:"active_nodes"`
+ TotalTrustLinks int `json:"-"`
+ TotalRequests int64 `json:"total_requests"`
+ RelayForwarded uint64 `json:"relay_forwarded,omitempty"`
+ RelayDropped uint64 `json:"relay_dropped,omitempty"`
+ RelayNotFound uint64 `json:"relay_not_found,omitempty"`
+ ReqPerDay int64 `json:"req_per_day"`
+ UptimeSecs int64 `json:"uptime_secs"`
+ Versions map[string]int `json:"versions,omitempty"`
+ Networks []NetworkStats `json:"networks,omitempty"` // only populated with dashboard token
+ Hourly []StatsSample `json:"hourly,omitempty"`
+ Daily []StatsSample `json:"daily,omitempty"`
RestartEvents []int64 `json:"restart_events,omitempty"`
DowntimeIntervals [][2]int64 `json:"downtime_intervals,omitempty"`
Probes map[string]*ProbeState `json:"probes,omitempty"`
@@ -7794,12 +7795,12 @@ type DashboardStats struct {
// NetworkStats holds per-network statistics for the authenticated dashboard view.
type NetworkStats struct {
- ID uint16 `json:"id"`
- Name string `json:"name"`
- Members int `json:"members"`
- Online int `json:"online"`
- Requests int64 `json:"requests"`
- TrustLinks int `json:"-"`
+ ID uint16 `json:"id"`
+ Name string `json:"name"`
+ Members int `json:"members"`
+ Online int `json:"online"`
+ Requests int64 `json:"requests"`
+ TrustLinks int `json:"-"`
Hourly []NetworkSampleEntry `json:"hourly,omitempty"`
Daily []NetworkSampleEntry `json:"daily,omitempty"`
}
@@ -8130,15 +8131,15 @@ func (s *Server) GetDashboardStatsExtended() DashboardStats {
s.probeMu.Unlock()
return DashboardStats{
- TotalNodes: int(s.nextNode - 1),
- ActiveNodes: activeCount,
- TotalTrustLinks: len(s.trustPairs),
- TotalRequests: s.requestCount.Load(),
- ReqPerDay: reqPerDay,
- UptimeSecs: int64(now.Sub(s.startTime).Seconds()),
- Versions: versions,
- Networks: networks,
- Hourly: hourly,
+ TotalNodes: int(s.nextNode - 1),
+ ActiveNodes: activeCount,
+ TotalTrustLinks: len(s.trustPairs),
+ TotalRequests: s.requestCount.Load(),
+ ReqPerDay: reqPerDay,
+ UptimeSecs: int64(now.Sub(s.startTime).Seconds()),
+ Versions: versions,
+ Networks: networks,
+ Hourly: hourly,
Daily: daily,
RestartEvents: restartEvents,
DowntimeIntervals: downtimeIntervals,
diff --git a/pkg/registry/server_perf_test.go b/pkg/registry/server_perf_test.go
index b3ef5da8..4b01947f 100644
--- a/pkg/registry/server_perf_test.go
+++ b/pkg/registry/server_perf_test.go
@@ -176,4 +176,3 @@ func TestNodeInfo_AtomicLastSeen_Concurrent(t *testing.T) {
t.Errorf("final value %v is before base %v", got, base)
}
}
-
diff --git a/pkg/registry/wal_replay.go b/pkg/registry/wal_replay.go
index bc59a02f..4e4fa4b4 100644
--- a/pkg/registry/wal_replay.go
+++ b/pkg/registry/wal_replay.go
@@ -55,14 +55,14 @@ type deregisterDelta struct {
// Member set is empty at creation; the creator is added by a separate
// join delta if applicable.
type networkCreateDelta struct {
- NetworkID uint16 `json:"network_id"`
- Name string `json:"name"`
- JoinRule string `json:"join_rule"`
- Token string `json:"token,omitempty"`
- AdminToken string `json:"admin_token,omitempty"`
- Enterprise bool `json:"enterprise,omitempty"`
- CreatorNodeID uint32 `json:"creator_node_id,omitempty"`
- CreatedAt string `json:"created_at"` // RFC3339
+ NetworkID uint16 `json:"network_id"`
+ Name string `json:"name"`
+ JoinRule string `json:"join_rule"`
+ Token string `json:"token,omitempty"`
+ AdminToken string `json:"admin_token,omitempty"`
+ Enterprise bool `json:"enterprise,omitempty"`
+ CreatorNodeID uint32 `json:"creator_node_id,omitempty"`
+ CreatedAt string `json:"created_at"` // RFC3339
}
// networkDeleteDelta marks a network as removed.
diff --git a/pkg/registry/wal_replay_test.go b/pkg/registry/wal_replay_test.go
index e9987324..012d9c57 100644
--- a/pkg/registry/wal_replay_test.go
+++ b/pkg/registry/wal_replay_test.go
@@ -260,10 +260,10 @@ func TestWALReplayRestoresNetworkCreate(t *testing.T) {
created := time.Now().UTC().Format(time.RFC3339)
appendWALDelta(t, storePath, DeltaNetworkCreate, 0, networkCreateDelta{
- NetworkID: 17,
- Name: "fresh-net",
- JoinRule: "open",
- CreatedAt: created,
+ NetworkID: 17,
+ Name: "fresh-net",
+ JoinRule: "open",
+ CreatedAt: created,
})
s := NewWithStore("", storePath)
diff --git a/pkg/skillinject/manifest.go b/pkg/skillinject/manifest.go
index 3cb4f89b..fc2eae51 100644
--- a/pkg/skillinject/manifest.go
+++ b/pkg/skillinject/manifest.go
@@ -67,7 +67,7 @@ type ManifestTool struct {
// fetcher is a small wrapper around http.Client that returns response
// bodies. Pulled out so tests can inject a fake.
type fetcher struct {
- httpClient *http.Client
+ httpClient *http.Client
manifestURL string
repoBase string
}
diff --git a/pkg/skillinject/skillinject.go b/pkg/skillinject/skillinject.go
index c7cc7747..298e4fee 100644
--- a/pkg/skillinject/skillinject.go
+++ b/pkg/skillinject/skillinject.go
@@ -169,7 +169,7 @@ func Tick(ctx context.Context, cfg Config) (*Report, error) {
if err != nil {
report.Outcomes = append(report.Outcomes, Outcome{
Tool: mt.Name, Kind: KindMarker,
- Path: expandHome(mt.HeartbeatPath, home),
+ Path: expandHome(mt.HeartbeatPath, home),
Action: ActionError,
Err: fmt.Sprintf("fetch %s: %v", mt.HeartbeatTemplate, err),
})
@@ -181,7 +181,7 @@ func Tick(ctx context.Context, cfg Config) (*Report, error) {
if err != nil {
report.Outcomes = append(report.Outcomes, Outcome{
Tool: mt.Name, Kind: KindMarker,
- Path: expandHome(mt.HeartbeatPath, home),
+ Path: expandHome(mt.HeartbeatPath, home),
Action: ActionError, Err: err.Error(),
})
continue
diff --git a/pkg/tasksubmit/tasksubmit.go b/pkg/tasksubmit/tasksubmit.go
index 7c8b63f8..7ee92a9c 100644
--- a/pkg/tasksubmit/tasksubmit.go
+++ b/pkg/tasksubmit/tasksubmit.go
@@ -57,11 +57,11 @@ const (
// The frame reader already caps the whole frame at 16 MiB; these are
// tighter semantic bounds checked before the content is persisted.
const (
- MaxTaskDescription = 16 * 1024 // 16 KiB — any reasonable prompt/description
- MaxTaskResultText = 1 * 1024 * 1024 // 1 MiB — inline text results
- MaxTaskResultFilename = 256 // filesystem-safe length cap
- MaxTaskResultFileBytes = 15 * 1024 * 1024 // ~15 MiB; frame cap is 16 MiB
- MaxTaskJustification = 4 * 1024 // status/decline reasons
+ MaxTaskDescription = 16 * 1024 // 16 KiB — any reasonable prompt/description
+ MaxTaskResultText = 1 * 1024 * 1024 // 1 MiB — inline text results
+ MaxTaskResultFilename = 256 // filesystem-safe length cap
+ MaxTaskResultFileBytes = 15 * 1024 * 1024 // ~15 MiB; frame cap is 16 MiB
+ MaxTaskJustification = 4 * 1024 // status/decline reasons
)
// ValidateSubmitRequest rejects submissions whose description exceeds the
diff --git a/sdk/cgo/bindings.go b/sdk/cgo/bindings.go
index 2e99d146..3981db02 100644
--- a/sdk/cgo/bindings.go
+++ b/sdk/cgo/bindings.go
@@ -12,6 +12,7 @@ import (
"encoding/json"
"fmt"
"sync"
+ "time"
"unsafe"
"github.com/TeoSlayer/pilotprotocol/pkg/driver"
@@ -477,5 +478,311 @@ func PilotRecvFrom(h C.uint64_t) *C.char {
})
}
+// ---------- Health / rotate-key ----------
+
+//export PilotHealth
+func PilotHealth(h C.uint64_t) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.Health()
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+//export PilotRotateKey
+func PilotRotateKey(h C.uint64_t) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.RotateKey()
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+// ---------- Broadcast (admin-token gated) ----------
+
+//export PilotBroadcast
+func PilotBroadcast(h C.uint64_t, netID C.uint16_t, port C.uint16_t, data unsafe.Pointer, dataLen C.int, adminToken *C.char) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ if err := d.Broadcast(uint16(netID), uint16(port), C.GoBytes(data, dataLen), C.GoString(adminToken)); err != nil {
+ return errJSON(err)
+ }
+ return okJSON(map[string]interface{}{"ok": true})
+}
+
+// ---------- Dial with timeout ----------
+
+//export PilotDialTimeout
+func PilotDialTimeout(h C.uint64_t, addr *C.char, timeoutMs C.uint64_t) (C.uint64_t, *C.char) {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return 0, errJSON(err)
+ }
+ sa, err := protocol.ParseSocketAddr(C.GoString(addr))
+ if err != nil {
+ return 0, errJSON(err)
+ }
+ conn, err := d.DialAddrTimeout(sa.Addr, sa.Port, time.Duration(timeoutMs)*time.Millisecond)
+ if err != nil {
+ return 0, errJSON(err)
+ }
+ return C.uint64_t(storeHandle(conn)), nil
+}
+
+// ---------- Conn read deadline ----------
+
+// PilotConnSetReadDeadline sets the read deadline as Unix nanoseconds.
+// Pass 0 to clear the deadline.
+//
+//export PilotConnSetReadDeadline
+func PilotConnSetReadDeadline(ch C.uint64_t, deadlineUnixNanos C.int64_t) *C.char {
+ v, ok := loadHandle(uint64(ch))
+ if !ok {
+ return errJSON(fmt.Errorf("invalid conn handle"))
+ }
+ c, ok := v.(*driver.Conn)
+ if !ok {
+ return errJSON(fmt.Errorf("handle is not a Conn"))
+ }
+ var t time.Time
+ if int64(deadlineUnixNanos) != 0 {
+ t = time.Unix(0, int64(deadlineUnixNanos))
+ }
+ if err := c.SetReadDeadline(t); err != nil {
+ return errJSON(err)
+ }
+ return nil
+}
+
+// ---------- Networks ----------
+
+//export PilotNetworkList
+func PilotNetworkList(h C.uint64_t) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.NetworkList()
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+//export PilotNetworkJoin
+func PilotNetworkJoin(h C.uint64_t, networkID C.uint16_t, token *C.char) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.NetworkJoin(uint16(networkID), C.GoString(token))
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+//export PilotNetworkLeave
+func PilotNetworkLeave(h C.uint64_t, networkID C.uint16_t) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.NetworkLeave(uint16(networkID))
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+//export PilotNetworkMembers
+func PilotNetworkMembers(h C.uint64_t, networkID C.uint16_t) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.NetworkMembers(uint16(networkID))
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+//export PilotNetworkInvite
+func PilotNetworkInvite(h C.uint64_t, networkID C.uint16_t, targetNodeID C.uint32_t) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.NetworkInvite(uint16(networkID), uint32(targetNodeID))
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+//export PilotNetworkPollInvites
+func PilotNetworkPollInvites(h C.uint64_t) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.NetworkPollInvites()
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+//export PilotNetworkRespondInvite
+func PilotNetworkRespondInvite(h C.uint64_t, networkID C.uint16_t, accept C.int) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.NetworkRespondInvite(uint16(networkID), accept != 0)
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+// ---------- Managed networks ----------
+
+//export PilotManagedScore
+func PilotManagedScore(h C.uint64_t, networkID C.uint16_t, nodeID C.uint32_t, delta C.int32_t, topic *C.char) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.ManagedScore(uint16(networkID), uint32(nodeID), int(int32(delta)), C.GoString(topic))
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+//export PilotManagedStatus
+func PilotManagedStatus(h C.uint64_t, networkID C.uint16_t) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.ManagedStatus(uint16(networkID))
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+//export PilotManagedRankings
+func PilotManagedRankings(h C.uint64_t, networkID C.uint16_t) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.ManagedRankings(uint16(networkID))
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+//export PilotManagedForceCycle
+func PilotManagedForceCycle(h C.uint64_t, networkID C.uint16_t) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.ManagedForceCycle(uint16(networkID))
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+//export PilotManagedReconcile
+func PilotManagedReconcile(h C.uint64_t, networkID C.uint16_t) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.ManagedReconcile(uint16(networkID))
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+// ---------- Policy ----------
+
+//export PilotPolicyGet
+func PilotPolicyGet(h C.uint64_t, networkID C.uint16_t) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.PolicyGet(uint16(networkID))
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+//export PilotPolicySet
+func PilotPolicySet(h C.uint64_t, networkID C.uint16_t, policyJSON *C.char) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.PolicySet(uint16(networkID), []byte(C.GoString(policyJSON)))
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+// ---------- Member tags ----------
+
+//export PilotMemberTagsGet
+func PilotMemberTagsGet(h C.uint64_t, networkID C.uint16_t, nodeID C.uint32_t) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ r, err := d.MemberTagsGet(uint16(networkID), uint32(nodeID))
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
+//export PilotMemberTagsSet
+func PilotMemberTagsSet(h C.uint64_t, networkID C.uint16_t, nodeID C.uint32_t, tagsJSON *C.char) *C.char {
+ d, err := driverFromHandle(h)
+ if err != nil {
+ return errJSON(err)
+ }
+ var tags []string
+ if err := json.Unmarshal([]byte(C.GoString(tagsJSON)), &tags); err != nil {
+ return errJSON(fmt.Errorf("invalid tags JSON: %w", err))
+ }
+ r, err := d.MemberTagsSet(uint16(networkID), uint32(nodeID), tags)
+ if err != nil {
+ return errJSON(err)
+ }
+ return okJSON(r)
+}
+
// main is required for c-shared build mode.
func main() {}
diff --git a/sdk/node/package-lock.json b/sdk/node/package-lock.json
index 847b00e4..90c6bb3c 100644
--- a/sdk/node/package-lock.json
+++ b/sdk/node/package-lock.json
@@ -1,16 +1,30 @@
{
"name": "pilotprotocol",
- "version": "0.1.0",
+ "version": "1.9.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "pilotprotocol",
- "version": "0.1.0",
- "license": "MIT",
+ "version": "1.9.1",
+ "cpu": [
+ "x64",
+ "arm64"
+ ],
+ "license": "AGPL-3.0-or-later",
+ "os": [
+ "darwin",
+ "linux"
+ ],
"dependencies": {
"koffi": "^2.9.0"
},
+ "bin": {
+ "pilot-daemon": "bin-stubs/pilot-daemon.js",
+ "pilot-gateway": "bin-stubs/pilot-gateway.js",
+ "pilot-updater": "bin-stubs/pilot-updater.js",
+ "pilotctl": "bin-stubs/pilotctl.js"
+ },
"devDependencies": {
"@types/node": "^25.5.0",
"typescript": "^5.7.0",
@@ -850,7 +864,6 @@
"integrity": "sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"undici-types": "~7.18.0"
}
@@ -1237,7 +1250,6 @@
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
"dev": true,
"license": "MIT",
- "peer": true,
"engines": {
"node": ">=12"
},
@@ -1451,7 +1463,6 @@
"integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"esbuild": "^0.27.0",
"fdir": "^6.5.0",
diff --git a/sdk/node/package.json b/sdk/node/package.json
index e01a30e9..7ef05be6 100644
--- a/sdk/node/package.json
+++ b/sdk/node/package.json
@@ -1,6 +1,6 @@
{
"name": "pilotprotocol",
- "version": "0.1.1",
+ "version": "1.9.1",
"description": "Node.js SDK for Pilot Protocol — the network stack for AI agents",
"type": "module",
"main": "dist/index.js",
diff --git a/sdk/node/scripts/build-binaries.sh b/sdk/node/scripts/build-binaries.sh
index 8a8210fa..46eef5ea 100755
--- a/sdk/node/scripts/build-binaries.sh
+++ b/sdk/node/scripts/build-binaries.sh
@@ -6,6 +6,9 @@ set -euo pipefail
cd "$(dirname "$0")/../../.." # Go to repo root
+# Read SDK version (from package.json) so the seeder marker matches it.
+SDK_VERSION=$(node -e "console.log(JSON.parse(require('fs').readFileSync('sdk/node/package.json','utf8')).version)")
+
# Detect platform
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
ARCH=$(uname -m)
@@ -63,6 +66,27 @@ cd ../..
echo " ✓ Built: $OUTPUT_DIR/libpilot.$EXT"
echo ""
+# 6. Write .pilot-version marker so the runtime seeder can compare against
+# whatever's already installed at ~/.pilot/bin/.
+echo "$SDK_VERSION" > "$OUTPUT_DIR/.pilot-version"
+echo "6. Wrote $OUTPUT_DIR/.pilot-version → $SDK_VERSION"
+echo ""
+
+# 7. macOS ad-hoc codesign + strip quarantine. Mirrors the main release
+# workflow so SDK-shipped binaries don't trigger Gatekeeper "killed: 9"
+# or "cannot be opened because Apple cannot check it for malicious
+# software" when downloaded via npm.
+if [ "$OS" = "darwin" ]; then
+ echo "7. macOS ad-hoc codesign + strip quarantine..."
+ for bin in "$OUTPUT_DIR/pilot-daemon" "$OUTPUT_DIR/pilotctl" "$OUTPUT_DIR/pilot-gateway" "$OUTPUT_DIR/pilot-updater" "$OUTPUT_DIR/libpilot.$EXT"; do
+ codesign --force --deep --sign - "$bin"
+ xattr -cr "$bin" || true
+ codesign -dv "$bin" 2>&1 | grep -E "Signature|Authority|TeamIdentifier" | head -1 || true
+ done
+ echo " ✓ codesigned ${OS} binaries"
+ echo ""
+fi
+
# Show sizes
echo "================================================================"
echo "Build Summary:"
diff --git a/sdk/node/src/cli.ts b/sdk/node/src/cli.ts
index 40cb94af..77606077 100644
--- a/sdk/node/src/cli.ts
+++ b/sdk/node/src/cli.ts
@@ -1,97 +1,30 @@
/**
- * CLI wrappers for bundled Pilot Protocol binaries.
+ * CLI entry points for the Pilot Protocol Node SDK.
*
- * These functions are used as npm "bin" entry points. Each wrapper:
- * 1. Ensures ~/.pilot/ directory and default config.json exist
- * 2. Locates the bundled Go binary
- * 3. Executes it with all CLI arguments passed through
+ * Each wrapper:
+ * 1. Seeds `~/.pilot/bin/` from the package's bundled binaries (the
+ * `runtime` module is idempotent and concurrency-safe).
+ * 2. Execs the seeded binary with all CLI arguments passed through.
*
- * This mirrors the Python SDK's cli.py approach.
+ * This keeps a single canonical runtime location at `~/.pilot/bin/`,
+ * shared with `install.sh` and any other Pilot SDK install on the host.
*/
-import { execFileSync } from 'node:child_process';
-import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
-import { homedir } from 'node:os';
-import { join, resolve } from 'node:path';
-import { fileURLToPath } from 'node:url';
+import { spawnSync } from 'node:child_process';
+import { ensureRuntimeSeeded, runtimeBinaryPath } from './runtime.js';
-/**
- * Ensure ~/.pilot/ directory and config.json exist.
- * Called before every binary execution to initialize the runtime environment.
- */
-function ensurePilotEnv(): void {
- const home = homedir();
- const pilotDir = join(home, '.pilot');
- const configFile = join(pilotDir, 'config.json');
-
- // Create ~/.pilot/ if it doesn't exist
- if (!existsSync(pilotDir)) {
- mkdirSync(pilotDir, { recursive: true });
- }
-
- // Create default config.json if it doesn't exist
- if (!existsSync(configFile)) {
- const defaultConfig = {
- registry: '34.71.57.205:9000',
- beacon: '34.71.57.205:9001',
- socket: '/tmp/pilot.sock',
- encrypt: true,
- identity: join(pilotDir, 'identity.json'),
- };
- writeFileSync(configFile, JSON.stringify(defaultConfig, null, 2));
- }
-}
-
-/**
- * Get absolute path to a bundled binary.
- * Searches in the package's bin/ directory (relative to this file's location).
- */
-function getBinaryPath(binaryName: string): string {
- const thisDir = resolve(fileURLToPath(import.meta.url), '..');
-
- // When compiled: dist/cli.js → look for ../bin/
- const pkgBin = resolve(thisDir, '..', 'bin', binaryName);
- if (existsSync(pkgBin)) return pkgBin;
-
- // Development: src/cli.ts → look for ../../bin/ (through sdk/node/)
- const devBin = resolve(thisDir, '..', '..', 'bin', binaryName);
- if (existsSync(devBin)) return devBin;
-
- throw new Error(
- `Binary '${binaryName}' not found.\n` +
- '\n' +
- 'Expected locations:\n' +
- ` - ${pkgBin} (npm package)\n` +
- ` - ${devBin} (development)\n` +
- '\n' +
- 'Build binaries with:\n' +
- ' cd sdk/node && ./scripts/build-binaries.sh\n',
- );
-}
-
-/**
- * Execute a bundled binary with all CLI arguments passed through.
- * Exits with the same code as the binary.
- */
-function runBinary(binaryName: string): void {
- ensurePilotEnv();
- const binaryPath = getBinaryPath(binaryName);
+function runBinary(name: string): void {
+ ensureRuntimeSeeded();
+ const binary = runtimeBinaryPath(name);
const args = process.argv.slice(2);
-
- try {
- execFileSync(binaryPath, args, {
- stdio: 'inherit',
- env: process.env,
- });
- } catch (err: unknown) {
- // execFileSync throws on non-zero exit codes
- const exitCode = (err as { status?: number }).status ?? 1;
- process.exit(exitCode);
+ const r = spawnSync(binary, args, { stdio: 'inherit', env: process.env });
+ if (r.error) {
+ process.stderr.write(`pilot: failed to launch ${name}: ${String(r.error)}\n`);
+ process.exit(1);
}
+ process.exit(r.status ?? 1);
}
-// --- Entry points (one per binary) ---
-
export function runPilotctl(): void {
runBinary('pilotctl');
}
diff --git a/sdk/node/src/client.ts b/sdk/node/src/client.ts
index 9898879e..b4dc420e 100644
--- a/sdk/node/src/client.ts
+++ b/sdk/node/src/client.ts
@@ -109,6 +109,28 @@ export class Conn {
checkErr(ptr);
}
+ /**
+ * Set the read deadline. Pass a Date (absolute time), a number of
+ * milliseconds from now, or null to clear.
+ *
+ * After the deadline passes, in-flight and subsequent `read()` calls
+ * fail with a "deadline exceeded" PilotError.
+ */
+ setReadDeadline(deadline: Date | number | null): void {
+ if (this._closed) throw new PilotError('connection closed');
+ const lib = getLib();
+ let nanos: bigint;
+ if (deadline === null) {
+ nanos = 0n;
+ } else if (deadline instanceof Date) {
+ nanos = BigInt(deadline.getTime()) * 1_000_000n;
+ } else {
+ nanos = BigInt(Date.now() + deadline) * 1_000_000n;
+ }
+ const ptr = lib.PilotConnSetReadDeadline(this._h, nanos);
+ checkErr(ptr);
+ }
+
/** Support TC39 explicit resource management. */
[Symbol.dispose](): void {
this.close();
@@ -195,6 +217,16 @@ export class Driver {
return this._callJSON('PilotInfo');
}
+ /** Lightweight health check from the daemon. */
+ health(): Record {
+ return this._callJSON('PilotHealth');
+ }
+
+ /** Rotate the daemon's Ed25519 identity at the registry. */
+ rotateKey(): Record {
+ return this._callJSON('PilotRotateKey');
+ }
+
// -- Handshake / Trust --
/** Send a trust handshake request to a remote node. */
@@ -277,10 +309,17 @@ export class Driver {
// -- Streams --
- /** Open a stream connection to addr (format: "N:XXXX.YYYY.YYYY:PORT"). */
- dial(addr: string): Conn {
+ /**
+ * Open a stream connection to addr (format: "N:XXXX.YYYY.YYYY:PORT").
+ * If `timeoutMs` is provided, the dial is cancelled if the daemon does
+ * not respond within that many milliseconds.
+ */
+ dial(addr: string, timeoutMs?: number): Conn {
const lib = getLib();
- const res = lib.PilotDial(this._h, addr);
+ const res =
+ typeof timeoutMs === 'number'
+ ? lib.PilotDialTimeout(this._h, addr, BigInt(Math.max(0, Math.floor(timeoutMs))))
+ : lib.PilotDial(this._h, addr);
const handle = unwrapHandleErr(res);
return new Conn(handle);
}
@@ -305,11 +344,116 @@ export class Driver {
checkErr(ptr);
}
+ /**
+ * Broadcast an unreliable datagram to every member of a network.
+ * Requires the daemon's admin token; see Driver.Broadcast in pkg/driver.
+ */
+ broadcast(networkId: number, port: number, data: Buffer | Uint8Array | string, adminToken: string): void {
+ const lib = getLib();
+ const src = typeof data === 'string' ? Buffer.from(data) : data;
+ const buf = Buffer.allocUnsafe(src.length);
+ Buffer.from(src).copy(buf);
+ const ptr = lib.PilotBroadcast(this._h, networkId, port, buf, buf.length, adminToken);
+ checkErr(ptr);
+ }
+
/** Receive the next incoming datagram (blocks). */
recvFrom(): Record {
return this._callJSON('PilotRecvFrom');
}
+ // -- Networks --
+
+ /** List all networks known to the registry. */
+ networkList(): Record {
+ return this._callJSON('PilotNetworkList');
+ }
+
+ /** Join a network by ID, optionally with a token for token-gated networks. */
+ networkJoin(networkId: number, token = ''): Record {
+ return this._callJSON('PilotNetworkJoin', networkId, token);
+ }
+
+ /** Leave a network by ID. */
+ networkLeave(networkId: number): Record {
+ return this._callJSON('PilotNetworkLeave', networkId);
+ }
+
+ /** List all members of a network. */
+ networkMembers(networkId: number): Record {
+ return this._callJSON('PilotNetworkMembers', networkId);
+ }
+
+ /** Invite a target node to a network (requires admin token on daemon). */
+ networkInvite(networkId: number, targetNodeId: number): Record {
+ return this._callJSON('PilotNetworkInvite', networkId, targetNodeId);
+ }
+
+ /** Return pending network invites for this node. */
+ networkPollInvites(): Record {
+ return this._callJSON('PilotNetworkPollInvites');
+ }
+
+ /** Accept or reject a pending network invite. */
+ networkRespondInvite(networkId: number, accept: boolean): Record {
+ return this._callJSON('PilotNetworkRespondInvite', networkId, accept ? 1 : 0);
+ }
+
+ // -- Managed networks --
+
+ /** Adjust a peer's score in a managed network. */
+ managedScore(networkId: number, nodeId: number, delta: number, topic = ''): Record {
+ return this._callJSON('PilotManagedScore', networkId, nodeId, delta, topic);
+ }
+
+ /** Return the status of a managed network engine. */
+ managedStatus(networkId: number): Record {
+ return this._callJSON('PilotManagedStatus', networkId);
+ }
+
+ /** Return ranked peers in a managed network. */
+ managedRankings(networkId: number): Record {
+ return this._callJSON('PilotManagedRankings', networkId);
+ }
+
+ /** Force a prune/fill cycle in a managed network. */
+ managedForceCycle(networkId: number): Record {
+ return this._callJSON('PilotManagedForceCycle', networkId);
+ }
+
+ /** Refresh the managed network's peer set from the registry without a policy cycle. */
+ managedReconcile(networkId: number): Record {
+ return this._callJSON('PilotManagedReconcile', networkId);
+ }
+
+ // -- Policy --
+
+ /** Retrieve the active policy for a network. */
+ policyGet(networkId: number): Record {
+ return this._callJSON('PilotPolicyGet', networkId);
+ }
+
+ /** Apply a policy document to a network. */
+ policySet(networkId: number, policy: Record | string | Buffer): Record {
+ let json: string;
+ if (typeof policy === 'string') json = policy;
+ else if (Buffer.isBuffer(policy)) json = policy.toString('utf-8');
+ else json = JSON.stringify(policy);
+ return this._callJSON('PilotPolicySet', networkId, json);
+ }
+
+ // -- Member tags --
+
+ /** Retrieve admin-assigned member tags for a node in a network. */
+ memberTagsGet(networkId: number, nodeId: number): Record {
+ return this._callJSON('PilotMemberTagsGet', networkId, nodeId);
+ }
+
+ /** Set admin-assigned member tags for a node in a network. */
+ memberTagsSet(networkId: number, nodeId: number, tags: string[]): Record {
+ return this._callJSON('PilotMemberTagsSet', networkId, nodeId, JSON.stringify(tags));
+ }
+
// -- High-level service methods --
/** Resolve a target to a protocol address. Passes through if already an address. */
diff --git a/sdk/node/src/ffi.ts b/sdk/node/src/ffi.ts
index a278b40e..f0afccdc 100644
--- a/sdk/node/src/ffi.ts
+++ b/sdk/node/src/ffi.ts
@@ -21,6 +21,7 @@ import { existsSync } from 'node:fs';
import { homedir, platform } from 'node:os';
import { join, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
+import { runtimeLibraryPath } from './runtime.js';
// ---------------------------------------------------------------------------
// Error class (defined here to avoid circular deps with client.ts)
@@ -49,27 +50,35 @@ export function findLibrary(): string {
throw new Error(`unsupported platform: ${platform()}`);
}
- // 1. PILOT_LIB_PATH env var
+ // 1. PILOT_LIB_PATH env var (explicit override — bypasses the seeder).
const envPath = process.env['PILOT_LIB_PATH'];
if (envPath) {
if (existsSync(envPath)) return envPath;
throw new Error(`PILOT_LIB_PATH=${envPath} does not exist`);
}
- // 2. ~/.pilot/bin/
+ // 2. The seeded library at ~/.pilot/bin/ (canonical runtime).
+ try {
+ return runtimeLibraryPath();
+ } catch {
+ // Seeder failed (read-only home, missing wheel binary) — fall through
+ // to the legacy locations so the SDK still loads in dev / weird envs.
+ }
+
+ // 3. ~/.pilot/bin/ (already-installed copy, no seeding).
const pilotBin = join(homedir(), '.pilot', 'bin', libName);
if (existsSync(pilotBin)) return pilotBin;
- // 3. /bin/ (npm package layout: dist/ffi.js → ../bin/)
+ // 4. /bin/ (npm package layout: dist/ffi.js → ../bin/).
const thisDir = resolve(fileURLToPath(import.meta.url), '..');
const pkgBin = resolve(thisDir, '..', 'bin', libName);
if (existsSync(pkgBin)) return pkgBin;
- // 4. Same directory as this file
+ // 5. Same directory as this file.
const colocated = join(thisDir, libName);
if (existsSync(colocated)) return colocated;
- // 5. /bin/ (development layout — 3 levels up from dist/)
+ // 6. /bin/ (development layout — 3 levels up from dist/).
const repoBin = resolve(thisDir, '..', '..', '..', 'bin', libName);
if (existsSync(repoBin)) return repoBin;
@@ -101,6 +110,8 @@ export interface PilotLib {
// JSON-RPC (return JSON string or null)
PilotInfo(h: bigint): string | null;
+ PilotHealth(h: bigint): string | null;
+ PilotRotateKey(h: bigint): string | null;
PilotHandshake(h: bigint, nodeId: number, justification: string): string | null;
PilotApproveHandshake(h: bigint, nodeId: number): string | null;
PilotRejectHandshake(h: bigint, nodeId: number, reason: string): string | null;
@@ -117,8 +128,33 @@ export interface PilotLib {
PilotDisconnect(h: bigint, connId: number): string | null;
PilotRecvFrom(h: bigint): string | null;
+ // Networks
+ PilotNetworkList(h: bigint): string | null;
+ PilotNetworkJoin(h: bigint, networkId: number, token: string): string | null;
+ PilotNetworkLeave(h: bigint, networkId: number): string | null;
+ PilotNetworkMembers(h: bigint, networkId: number): string | null;
+ PilotNetworkInvite(h: bigint, networkId: number, targetNodeId: number): string | null;
+ PilotNetworkPollInvites(h: bigint): string | null;
+ PilotNetworkRespondInvite(h: bigint, networkId: number, accept: number): string | null;
+
+ // Managed networks
+ PilotManagedScore(h: bigint, networkId: number, nodeId: number, delta: number, topic: string): string | null;
+ PilotManagedStatus(h: bigint, networkId: number): string | null;
+ PilotManagedRankings(h: bigint, networkId: number): string | null;
+ PilotManagedForceCycle(h: bigint, networkId: number): string | null;
+ PilotManagedReconcile(h: bigint, networkId: number): string | null;
+
+ // Policy
+ PilotPolicyGet(h: bigint, networkId: number): string | null;
+ PilotPolicySet(h: bigint, networkId: number, policyJson: string): string | null;
+
+ // Member tags
+ PilotMemberTagsGet(h: bigint, networkId: number, nodeId: number): string | null;
+ PilotMemberTagsSet(h: bigint, networkId: number, nodeId: number, tagsJson: string): string | null;
+
// Stream connections
PilotDial(h: bigint, addr: string): { handle: bigint; err: string | null };
+ PilotDialTimeout(h: bigint, addr: string, timeoutMs: bigint): { handle: bigint; err: string | null };
PilotListen(h: bigint, port: number): { handle: bigint; err: string | null };
PilotListenerAccept(h: bigint): { handle: bigint; err: string | null };
PilotListenerClose(h: bigint): string | null;
@@ -127,9 +163,11 @@ export interface PilotLib {
PilotConnRead(h: bigint, bufSize: number): { n: number; data: Buffer | null; err: string | null };
PilotConnWrite(h: bigint, data: Buffer, dataLen: number): { n: number; err: string | null };
PilotConnClose(h: bigint): string | null;
+ PilotConnSetReadDeadline(h: bigint, deadlineUnixNanos: bigint): string | null;
// Datagrams
PilotSendTo(h: bigint, addr: string, data: Buffer, dataLen: number): string | null;
+ PilotBroadcast(h: bigint, networkId: number, port: number, data: Buffer, dataLen: number, adminToken: string): string | null;
}
// ---------------------------------------------------------------------------
@@ -167,6 +205,8 @@ export function loadLibrary(path?: string): PilotLib {
const rawConnect = lib.func('PilotConnect', HandleErrStruct, ['str']);
const rawClose = lib.func('PilotClose', 'void *', ['uint64']);
const rawInfo = lib.func('PilotInfo', 'void *', ['uint64']);
+ const rawHealth = lib.func('PilotHealth', 'void *', ['uint64']);
+ const rawRotateKey = lib.func('PilotRotateKey', 'void *', ['uint64']);
const rawHandshake = lib.func('PilotHandshake', 'void *', ['uint64', 'uint32', 'str']);
const rawApproveHandshake = lib.func('PilotApproveHandshake', 'void *', ['uint64', 'uint32']);
const rawRejectHandshake = lib.func('PilotRejectHandshake', 'void *', ['uint64', 'uint32', 'str']);
@@ -182,14 +222,33 @@ export function loadLibrary(path?: string): PilotLib {
const rawSetWebhook = lib.func('PilotSetWebhook', 'void *', ['uint64', 'str']);
const rawDisconnect = lib.func('PilotDisconnect', 'void *', ['uint64', 'uint32']);
const rawRecvFrom = lib.func('PilotRecvFrom', 'void *', ['uint64']);
+ const rawNetworkList = lib.func('PilotNetworkList', 'void *', ['uint64']);
+ const rawNetworkJoin = lib.func('PilotNetworkJoin', 'void *', ['uint64', 'uint16', 'str']);
+ const rawNetworkLeave = lib.func('PilotNetworkLeave', 'void *', ['uint64', 'uint16']);
+ const rawNetworkMembers = lib.func('PilotNetworkMembers', 'void *', ['uint64', 'uint16']);
+ const rawNetworkInvite = lib.func('PilotNetworkInvite', 'void *', ['uint64', 'uint16', 'uint32']);
+ const rawNetworkPollInvites = lib.func('PilotNetworkPollInvites', 'void *', ['uint64']);
+ const rawNetworkRespondInvite = lib.func('PilotNetworkRespondInvite', 'void *', ['uint64', 'uint16', 'int']);
+ const rawManagedScore = lib.func('PilotManagedScore', 'void *', ['uint64', 'uint16', 'uint32', 'int32', 'str']);
+ const rawManagedStatus = lib.func('PilotManagedStatus', 'void *', ['uint64', 'uint16']);
+ const rawManagedRankings = lib.func('PilotManagedRankings', 'void *', ['uint64', 'uint16']);
+ const rawManagedForceCycle = lib.func('PilotManagedForceCycle', 'void *', ['uint64', 'uint16']);
+ const rawManagedReconcile = lib.func('PilotManagedReconcile', 'void *', ['uint64', 'uint16']);
+ const rawPolicyGet = lib.func('PilotPolicyGet', 'void *', ['uint64', 'uint16']);
+ const rawPolicySet = lib.func('PilotPolicySet', 'void *', ['uint64', 'uint16', 'str']);
+ const rawMemberTagsGet = lib.func('PilotMemberTagsGet', 'void *', ['uint64', 'uint16', 'uint32']);
+ const rawMemberTagsSet = lib.func('PilotMemberTagsSet', 'void *', ['uint64', 'uint16', 'uint32', 'str']);
const rawDial = lib.func('PilotDial', HandleErrStruct, ['uint64', 'str']);
+ const rawDialTimeout = lib.func('PilotDialTimeout', HandleErrStruct, ['uint64', 'str', 'uint64']);
const rawListen = lib.func('PilotListen', HandleErrStruct, ['uint64', 'uint16']);
const rawListenerAccept = lib.func('PilotListenerAccept', HandleErrStruct, ['uint64']);
const rawListenerClose = lib.func('PilotListenerClose', 'void *', ['uint64']);
const rawConnRead = lib.func('PilotConnRead', ReadResultStruct, ['uint64', 'int']);
const rawConnWrite = lib.func('PilotConnWrite', WriteResultStruct, ['uint64', 'void *', 'int']);
const rawConnClose = lib.func('PilotConnClose', 'void *', ['uint64']);
+ const rawConnSetReadDeadline = lib.func('PilotConnSetReadDeadline', 'void *', ['uint64', 'int64']);
const rawSendTo = lib.func('PilotSendTo', 'void *', ['uint64', 'str', 'void *', 'int']);
+ const rawBroadcast = lib.func('PilotBroadcast', 'void *', ['uint64', 'uint16', 'uint16', 'void *', 'int', 'str']);
/** Decode a void* C string, free the pointer, return JS string. */
function decodeAndFree(ptr: unknown): string | null {
@@ -213,6 +272,8 @@ export function loadLibrary(path?: string): PilotLib {
PilotConnect: (socketPath) => unwrapHandle(rawConnect(socketPath)),
PilotClose: (h) => decodeAndFree(rawClose(h)),
PilotInfo: wrapJSON(rawInfo),
+ PilotHealth: wrapJSON(rawHealth),
+ PilotRotateKey: wrapJSON(rawRotateKey),
PilotHandshake: wrapJSON(rawHandshake),
PilotApproveHandshake: wrapJSON(rawApproveHandshake),
PilotRejectHandshake: wrapJSON(rawRejectHandshake),
@@ -228,7 +289,24 @@ export function loadLibrary(path?: string): PilotLib {
PilotSetWebhook: wrapJSON(rawSetWebhook),
PilotDisconnect: wrapJSON(rawDisconnect),
PilotRecvFrom: wrapJSON(rawRecvFrom),
+ PilotNetworkList: wrapJSON(rawNetworkList),
+ PilotNetworkJoin: wrapJSON(rawNetworkJoin),
+ PilotNetworkLeave: wrapJSON(rawNetworkLeave),
+ PilotNetworkMembers: wrapJSON(rawNetworkMembers),
+ PilotNetworkInvite: wrapJSON(rawNetworkInvite),
+ PilotNetworkPollInvites: wrapJSON(rawNetworkPollInvites),
+ PilotNetworkRespondInvite: wrapJSON(rawNetworkRespondInvite),
+ PilotManagedScore: wrapJSON(rawManagedScore),
+ PilotManagedStatus: wrapJSON(rawManagedStatus),
+ PilotManagedRankings: wrapJSON(rawManagedRankings),
+ PilotManagedForceCycle: wrapJSON(rawManagedForceCycle),
+ PilotManagedReconcile: wrapJSON(rawManagedReconcile),
+ PilotPolicyGet: wrapJSON(rawPolicyGet),
+ PilotPolicySet: wrapJSON(rawPolicySet),
+ PilotMemberTagsGet: wrapJSON(rawMemberTagsGet),
+ PilotMemberTagsSet: wrapJSON(rawMemberTagsSet),
PilotDial: (h, addr) => unwrapHandle(rawDial(h, addr)),
+ PilotDialTimeout: (h, addr, timeoutMs) => unwrapHandle(rawDialTimeout(h, addr, timeoutMs)),
PilotListen: (h, port) => unwrapHandle(rawListen(h, port)),
PilotListenerAccept: (h) => unwrapHandle(rawListenerAccept(h)),
PilotListenerClose: (h) => decodeAndFree(rawListenerClose(h)),
@@ -250,10 +328,15 @@ export function loadLibrary(path?: string): PilotLib {
return { n: res.n as number, err: decodeAndFree(res.err) };
},
PilotConnClose: (h) => decodeAndFree(rawConnClose(h)),
+ PilotConnSetReadDeadline: (h, deadlineUnixNanos) =>
+ decodeAndFree(rawConnSetReadDeadline(h, deadlineUnixNanos)),
PilotSendTo(h, addr, buf, dataLen) {
// Pass Buffer directly — koffi handles byteOffset correctly for void*
return decodeAndFree(rawSendTo(h, addr, buf, dataLen));
},
+ PilotBroadcast(h, networkId, port, buf, dataLen, adminToken) {
+ return decodeAndFree(rawBroadcast(h, networkId, port, buf, dataLen, adminToken));
+ },
};
}
diff --git a/sdk/node/src/runtime.ts b/sdk/node/src/runtime.ts
new file mode 100644
index 00000000..aeacf67a
--- /dev/null
+++ b/sdk/node/src/runtime.ts
@@ -0,0 +1,485 @@
+/**
+ * Runtime environment seeder for the Pilot Protocol Node SDK.
+ *
+ * Both the CLI shims (`cli.ts`) and the FFI loader (`ffi.ts:findLibrary`)
+ * funnel through `ensureRuntimeSeeded`, which idempotently mirrors the
+ * binaries shipped inside the npm package into `~/.pilot/bin/` (the
+ * canonical runtime directory shared with `install.sh`).
+ *
+ * Goals:
+ * - The package is the seed cache; `~/.pilot/bin/` is the runtime.
+ * - No install-time code runs; seeding happens lazily on first SDK use.
+ * - Concurrency-safe via O_EXCL lock + retry; crash-safe via atomic rename.
+ * - Never downgrades; never replaces a running daemon binary.
+ * - Coexists with `install.sh` — same layout, same `.pilot-version`.
+ */
+
+import { Socket } from 'node:net';
+import {
+ closeSync,
+ copyFileSync,
+ existsSync,
+ mkdirSync,
+ openSync,
+ readFileSync,
+ renameSync,
+ statSync,
+ unlinkSync,
+ writeFileSync,
+ chmodSync,
+ accessSync,
+ constants as fsConstants,
+} from 'node:fs';
+import { homedir, platform as osPlatform } from 'node:os';
+import { dirname, join, resolve } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const BIN_NAMES = ['pilotctl', 'pilot-daemon', 'pilot-gateway', 'pilot-updater'] as const;
+
+const LIB_NAMES: Record = {
+ darwin: 'libpilot.dylib',
+ linux: 'libpilot.so',
+ win32: 'libpilot.dll',
+};
+
+export const DEFAULT_REGISTRY = '34.71.57.205:9000';
+export const DEFAULT_BEACON = '34.71.57.205:9001';
+export const DEFAULT_SOCKET = '/tmp/pilot.sock';
+
+// ---------------------------------------------------------------------------
+// Path helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Where the npm package ships its bundled binaries (the seed cache).
+ *
+ * dist/runtime.js → ../bin/ (npm package layout)
+ * src/runtime.ts → ../../bin/ (development layout, run via tsx)
+ */
+function pkgBinDir(): string {
+ // Test override: a one-shot way to point at a fake bundled bin/ without
+ // resorting to vi.spyOn on a live binding. Honored only when set.
+ const override = process.env['PILOT_PKG_BIN_DIR'];
+ if (override) return override;
+
+ const thisDir = resolve(fileURLToPath(import.meta.url), '..');
+
+ // Compiled (dist/runtime.js)
+ const compiledBin = resolve(thisDir, '..', 'bin');
+ if (existsSync(compiledBin)) return compiledBin;
+
+ // Source (src/runtime.ts) — sdk/node/bin
+ const sourceBin = resolve(thisDir, '..', '..', 'bin');
+ return sourceBin;
+}
+
+function runtimeRoot(): string {
+ const override = process.env['PILOT_HOME'];
+ if (override) return override;
+ return join(homedir(), '.pilot');
+}
+
+function runtimeBin(): string {
+ return join(runtimeRoot(), 'bin');
+}
+
+function platformLibName(): string {
+ const name = LIB_NAMES[osPlatform()];
+ if (!name) throw new Error(`unsupported platform: ${osPlatform()}`);
+ return name;
+}
+
+// ---------------------------------------------------------------------------
+// Version helpers
+// ---------------------------------------------------------------------------
+
+function semverTuple(v: string | undefined | null): number[] | null {
+ if (!v) return null;
+ const cleaned = v.trim().replace(/^v/, '').split('-')[0]?.split('+')[0];
+ if (!cleaned) return null;
+ const parts = cleaned.split('.').map((p) => Number(p));
+ if (parts.some((n) => !Number.isFinite(n))) return null;
+ return parts;
+}
+
+function compareSemver(a: number[] | null, b: number[] | null): number {
+ if (!a && !b) return 0;
+ if (!a) return -1;
+ if (!b) return 1;
+ const len = Math.max(a.length, b.length);
+ for (let i = 0; i < len; i++) {
+ const ai = a[i] ?? 0;
+ const bi = b[i] ?? 0;
+ if (ai > bi) return 1;
+ if (ai < bi) return -1;
+ }
+ return 0;
+}
+
+function bundledVersion(): string {
+ const f = join(pkgBinDir(), '.pilot-version');
+ if (existsSync(f)) {
+ try {
+ return readFileSync(f, 'utf8').trim();
+ } catch {
+ // fall through
+ }
+ }
+ // Fallback: read package.json beside dist/
+ const thisDir = resolve(fileURLToPath(import.meta.url), '..');
+ const candidates = [
+ resolve(thisDir, '..', 'package.json'),
+ resolve(thisDir, '..', '..', 'package.json'),
+ ];
+ for (const c of candidates) {
+ if (existsSync(c)) {
+ try {
+ return JSON.parse(readFileSync(c, 'utf8')).version ?? '';
+ } catch {
+ // ignore
+ }
+ }
+ }
+ return '';
+}
+
+function runtimeVersion(rt: string): string {
+ const f = join(rt, '.pilot-version');
+ if (!existsSync(f)) return '';
+ try {
+ return readFileSync(f, 'utf8').trim();
+ } catch {
+ return '';
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Daemon liveness
+// ---------------------------------------------------------------------------
+
+async function probeDaemonLive(timeoutMs = 200): Promise {
+ let sockPath = DEFAULT_SOCKET;
+ const cfgPath = join(runtimeRoot(), 'config.json');
+ if (existsSync(cfgPath)) {
+ try {
+ const cfg = JSON.parse(readFileSync(cfgPath, 'utf8'));
+ if (typeof cfg.socket === 'string' && cfg.socket) sockPath = cfg.socket;
+ } catch {
+ // ignore
+ }
+ }
+ if (!existsSync(sockPath)) return false;
+
+ return new Promise((resolveProbe) => {
+ const s = new Socket();
+ const finish = (ok: boolean) => {
+ try {
+ s.destroy();
+ } catch {
+ // ignore
+ }
+ resolveProbe(ok);
+ };
+ s.setTimeout(timeoutMs);
+ s.once('connect', () => finish(true));
+ s.once('timeout', () => finish(false));
+ s.once('error', () => finish(false));
+ try {
+ s.connect(sockPath);
+ } catch {
+ finish(false);
+ }
+ });
+}
+
+/** Synchronous probe used by the seeder. Loops on a short setImmediate. */
+function probeDaemonLiveSync(): boolean {
+ const sockPath = readSocketPath();
+ if (!existsSync(sockPath)) return false;
+ // Best-effort sync: try connecting via a child process. Falls back to
+ // "assume not running" if we can't decide quickly.
+ try {
+ const { spawnSync } = require('node:child_process') as typeof import('node:child_process');
+ // `nc -z -U ` is the cleanest sync probe; fall back to true if nc is missing.
+ const r = spawnSync('nc', ['-z', '-U', sockPath], { timeout: 250 });
+ if (r.error) return existsSync(sockPath); // nc missing — be conservative
+ return r.status === 0;
+ } catch {
+ // Conservative: if a socket file is present, assume the daemon is up.
+ return existsSync(sockPath);
+ }
+}
+
+function readSocketPath(): string {
+ const cfgPath = join(runtimeRoot(), 'config.json');
+ if (existsSync(cfgPath)) {
+ try {
+ const cfg = JSON.parse(readFileSync(cfgPath, 'utf8'));
+ if (typeof cfg.socket === 'string' && cfg.socket) return cfg.socket;
+ } catch {
+ // ignore
+ }
+ }
+ return DEFAULT_SOCKET;
+}
+
+// ---------------------------------------------------------------------------
+// File ops
+// ---------------------------------------------------------------------------
+
+function ensureDirWritable(p: string): void {
+ if (!existsSync(p)) mkdirSync(p, { recursive: true });
+ try {
+ accessSync(p, fsConstants.W_OK);
+ } catch {
+ throw new Error(
+ `${p} is not writable. Repair with: chown -R $USER ${p}`,
+ );
+ }
+}
+
+function atomicInstall(src: string, dst: string): void {
+ const tmp = `${dst}.tmp.${process.pid}`;
+ if (existsSync(tmp)) unlinkSync(tmp);
+ copyFileSync(src, tmp);
+ try {
+ chmodSync(tmp, 0o755);
+ renameSync(tmp, dst);
+ } catch (err) {
+ try {
+ unlinkSync(tmp);
+ } catch {
+ // ignore
+ }
+ throw err;
+ }
+}
+
+function ensureDefaultConfig(): string {
+ const root = runtimeRoot();
+ ensureDirWritable(root);
+ const cfgPath = join(root, 'config.json');
+ if (existsSync(cfgPath)) return cfgPath;
+ const cfg = {
+ registry: DEFAULT_REGISTRY,
+ beacon: DEFAULT_BEACON,
+ socket: DEFAULT_SOCKET,
+ encrypt: true,
+ identity: join(root, 'identity.json'),
+ };
+ const tmp = `${cfgPath}.tmp.${process.pid}`;
+ writeFileSync(tmp, JSON.stringify(cfg, null, 2) + '\n');
+ renameSync(tmp, cfgPath);
+ return cfgPath;
+}
+
+// ---------------------------------------------------------------------------
+// Lock
+// ---------------------------------------------------------------------------
+
+/** O_EXCL-based lockfile with bounded retry. Returns the fd to close. */
+function acquireLock(rt: string, timeoutMs = 5000): number {
+ const lockPath = join(rt, '.seed.lock');
+ const start = Date.now();
+ while (true) {
+ try {
+ return openSync(lockPath, fsConstants.O_RDWR | fsConstants.O_CREAT | fsConstants.O_EXCL, 0o644);
+ } catch (err: unknown) {
+ const e = err as NodeJS.ErrnoException;
+ if (e.code !== 'EEXIST') throw err;
+ // Stale lock detection: > 30s old → reclaim.
+ try {
+ const age = Date.now() - statSync(lockPath).mtimeMs;
+ if (age > 30_000) {
+ try {
+ unlinkSync(lockPath);
+ } catch {
+ // ignore; race
+ }
+ continue;
+ }
+ } catch {
+ // ignore
+ }
+ if (Date.now() - start > timeoutMs) {
+ // Last resort: proceed without exclusive lock. Steady state seeders
+ // will be no-ops anyway, so worst case is two redundant copies.
+ return -1;
+ }
+ // Busy-wait briefly; this is a *cold* path (first run only).
+ const until = Date.now() + 50;
+ while (Date.now() < until) {
+ // spin
+ }
+ }
+ }
+}
+
+function releaseLock(rt: string, fd: number): void {
+ if (fd < 0) return;
+ try {
+ closeSync(fd);
+ } catch {
+ // ignore
+ }
+ try {
+ unlinkSync(join(rt, '.seed.lock'));
+ } catch {
+ // ignore
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+export interface SeedReport {
+ action: 'noop' | 'seed' | 'upgrade' | 'daemon-skip';
+ copied: string[];
+ skipped: string[];
+ bundledVersion: string;
+ installedVersion: string;
+ runtimeDir: string;
+}
+
+let _seededOnce = false;
+
+export function ensureRuntimeSeeded(force = false): string {
+ if (_seededOnce && !force) return runtimeBin();
+ const report = runSeeder();
+ _seededOnce = true;
+ return report.runtimeDir;
+}
+
+export function runSeeder(): SeedReport {
+ const rtRoot = runtimeRoot();
+ const rt = runtimeBin();
+ const pkg = pkgBinDir();
+
+ ensureDirWritable(rtRoot);
+ ensureDirWritable(rt);
+ ensureDefaultConfig();
+
+ const lockFd = acquireLock(rt);
+ try {
+ const bundledStr = bundledVersion();
+ const installedStr = runtimeVersion(rt);
+ const report: SeedReport = {
+ action: 'noop',
+ copied: [],
+ skipped: [],
+ bundledVersion: bundledStr,
+ installedVersion: installedStr,
+ runtimeDir: rt,
+ };
+
+ const bundled = semverTuple(bundledStr);
+ const installed = semverTuple(installedStr);
+
+ const force = process.env['PILOT_FORCE_SEED'] === '1';
+ // Same-or-newer already installed → just verify completeness.
+ if (!force && installed && bundled && compareSemver(bundled, installed) <= 0) {
+ let needSeed = false;
+ const required = [...BIN_NAMES, platformLibName()];
+ for (const name of required) {
+ if (!existsSync(join(rt, name))) {
+ needSeed = true;
+ break;
+ }
+ }
+ if (!needSeed) {
+ report.action = 'noop';
+ return report;
+ }
+ }
+
+ report.action = installedStr ? 'upgrade' : 'seed';
+ const daemonBusy = probeDaemonLiveSync();
+
+ const required = [...BIN_NAMES, platformLibName()];
+ for (const name of required) {
+ const src = join(pkg, name);
+ if (!existsSync(src)) {
+ // Wrong-platform package or partial bundle.
+ continue;
+ }
+ const dst = join(rt, name);
+ if (name === 'pilot-daemon' && daemonBusy && existsSync(dst)) {
+ report.skipped.push(name);
+ report.action = 'daemon-skip';
+ continue;
+ }
+ try {
+ atomicInstall(src, dst);
+ report.copied.push(name);
+ } catch (err: unknown) {
+ const e = err as NodeJS.ErrnoException;
+ if (e.code === 'ETXTBSY' || e.code === 'EBUSY') {
+ report.skipped.push(name);
+ continue;
+ }
+ throw err;
+ }
+ }
+
+ if (bundledStr) {
+ const verPath = join(rt, '.pilot-version');
+ const tmp = `${verPath}.tmp.${process.pid}`;
+ writeFileSync(tmp, bundledStr + '\n');
+ renameSync(tmp, verPath);
+ }
+
+ return report;
+ } finally {
+ releaseLock(rt, lockFd);
+ }
+}
+
+export function runtimeBinaryPath(name: string): string {
+ const rt = ensureRuntimeSeeded();
+ const p = join(rt, name);
+ if (existsSync(p)) return p;
+ // Last-ditch: run from the package.
+ const fallback = join(pkgBinDir(), name);
+ if (existsSync(fallback)) return fallback;
+ throw new Error(
+ `Binary '${name}' not found in ${rt} or ${pkgBinDir()}. ` +
+ `This package may be for a different platform.`,
+ );
+}
+
+export function runtimeLibraryPath(): string {
+ const rt = ensureRuntimeSeeded();
+ const name = platformLibName();
+ const p = join(rt, name);
+ if (existsSync(p)) return p;
+ const fallback = join(pkgBinDir(), name);
+ if (existsSync(fallback)) return fallback;
+ throw new Error(`libpilot (${name}) not found in ${rt} or ${pkgBinDir()}.`);
+}
+
+/** Test helper. */
+export function _resetSeededMarker(): void {
+ _seededOnce = false;
+}
+
+/** Async daemon probe — exposed for callers that don't want the sync nc spawn. */
+export async function isDaemonLive(): Promise {
+ return probeDaemonLive();
+}
+
+/** For tests: expose the raw paths. */
+export const _internals = {
+ pkgBinDir,
+ runtimeRoot,
+ runtimeBin,
+ platformLibName,
+ bundledVersion,
+ runtimeVersion,
+ semverTuple,
+ compareSemver,
+ atomicInstall,
+};
+
+// Avoid unused-import warnings when this file is type-only consumed.
+void dirname;
diff --git a/sdk/node/tests/client.test.ts b/sdk/node/tests/client.test.ts
index a41cf364..b743426c 100644
--- a/sdk/node/tests/client.test.ts
+++ b/sdk/node/tests/client.test.ts
@@ -86,6 +86,132 @@ function createFakeLib(): PilotLib & {
},
PilotConnClose(_h: bigint) { return null as string | null; },
PilotSendTo(_h: bigint, _addr: string, _data: Buffer, _len: number) { return null as string | null; },
+
+ // ---- 1.9.1 additions ----
+
+ // Captured-arg fields for assertions (typed loosely on purpose)
+ _lastDialTimeout: null as null | { addr: string; ms: bigint },
+ _lastSetReadDeadline: null as bigint | null,
+ _lastBroadcast: null as null | {
+ networkId: number;
+ port: number;
+ dataLen: number;
+ adminToken: string;
+ payload: Buffer;
+ },
+ _lastNetworkJoin: null as null | { networkId: number; token: string },
+ _lastNetworkInvite: null as null | { networkId: number; targetNodeId: number },
+ _lastNetworkRespond: null as null | { networkId: number; accept: number },
+ _lastManagedScore: null as null | {
+ networkId: number;
+ nodeId: number;
+ delta: number;
+ topic: string;
+ },
+ _lastPolicySet: null as null | { networkId: number; policyJson: string },
+ _lastMemberTagsSet: null as null | {
+ networkId: number;
+ nodeId: number;
+ tagsJson: string;
+ },
+
+ PilotHealth(_h: bigint) {
+ return fake._jsonReturns['PilotHealth'] ?? jsonOk({ ok: true, uptime_s: 42 });
+ },
+ PilotRotateKey(_h: bigint) {
+ return fake._jsonReturns['PilotRotateKey'] ?? jsonOk({ new_pubkey: 'abc' });
+ },
+ PilotDialTimeout(_h: bigint, addr: string, timeoutMs: bigint) {
+ fake._lastDialTimeout = { addr, ms: timeoutMs };
+ return { handle: 11n, err: null as string | null };
+ },
+ PilotConnSetReadDeadline(_h: bigint, deadlineUnixNanos: bigint) {
+ fake._lastSetReadDeadline = deadlineUnixNanos;
+ return null as string | null;
+ },
+ PilotBroadcast(
+ _h: bigint,
+ networkId: number,
+ port: number,
+ data: Buffer,
+ dataLen: number,
+ adminToken: string,
+ ) {
+ fake._lastBroadcast = {
+ networkId,
+ port,
+ dataLen,
+ adminToken,
+ payload: Buffer.from(data.subarray(0, dataLen)),
+ };
+ return fake._jsonReturns['PilotBroadcast'] ?? null;
+ },
+ PilotNetworkList(_h: bigint) {
+ return fake._jsonReturns['PilotNetworkList'] ?? jsonOk({ networks: [{ id: 0 }] });
+ },
+ PilotNetworkJoin(_h: bigint, networkId: number, token: string) {
+ fake._lastNetworkJoin = { networkId, token };
+ return fake._jsonReturns['PilotNetworkJoin'] ?? jsonOk({ status: 'joined' });
+ },
+ PilotNetworkLeave(_h: bigint, _networkId: number) {
+ return fake._jsonReturns['PilotNetworkLeave'] ?? jsonOk({ status: 'left' });
+ },
+ PilotNetworkMembers(_h: bigint, _networkId: number) {
+ return fake._jsonReturns['PilotNetworkMembers'] ?? jsonOk({ members: [] });
+ },
+ PilotNetworkInvite(_h: bigint, networkId: number, targetNodeId: number) {
+ fake._lastNetworkInvite = { networkId, targetNodeId };
+ return fake._jsonReturns['PilotNetworkInvite'] ?? jsonOk({ status: 'invited' });
+ },
+ PilotNetworkPollInvites(_h: bigint) {
+ return fake._jsonReturns['PilotNetworkPollInvites'] ?? jsonOk({ invites: [] });
+ },
+ PilotNetworkRespondInvite(_h: bigint, networkId: number, accept: number) {
+ fake._lastNetworkRespond = { networkId, accept };
+ return fake._jsonReturns['PilotNetworkRespondInvite'] ?? jsonOk({ status: 'responded' });
+ },
+ PilotManagedScore(
+ _h: bigint,
+ networkId: number,
+ nodeId: number,
+ delta: number,
+ topic: string,
+ ) {
+ fake._lastManagedScore = { networkId, nodeId, delta, topic };
+ return fake._jsonReturns['PilotManagedScore'] ?? jsonOk({ status: 'ok' });
+ },
+ PilotManagedStatus(_h: bigint, networkId: number) {
+ return fake._jsonReturns['PilotManagedStatus'] ?? jsonOk({ network_id: networkId });
+ },
+ PilotManagedRankings(_h: bigint, _networkId: number) {
+ return fake._jsonReturns['PilotManagedRankings'] ?? jsonOk({ rankings: [] });
+ },
+ PilotManagedForceCycle(_h: bigint, _networkId: number) {
+ return fake._jsonReturns['PilotManagedForceCycle'] ?? jsonOk({ status: 'cycled' });
+ },
+ PilotManagedReconcile(_h: bigint, networkId: number) {
+ return (
+ fake._jsonReturns['PilotManagedReconcile'] ??
+ jsonOk({ network_id: networkId, peers: [] })
+ );
+ },
+ PilotPolicyGet(_h: bigint, networkId: number) {
+ return (
+ fake._jsonReturns['PilotPolicyGet'] ??
+ jsonOk({ network_id: networkId, policy: {} })
+ );
+ },
+ PilotPolicySet(_h: bigint, networkId: number, policyJson: string) {
+ fake._lastPolicySet = { networkId, policyJson };
+ return fake._jsonReturns['PilotPolicySet'] ?? jsonOk({ status: 'applied' });
+ },
+ PilotMemberTagsGet(_h: bigint, _networkId: number, _nodeId: number) {
+ return fake._jsonReturns['PilotMemberTagsGet'] ?? jsonOk({ tags: [] });
+ },
+ PilotMemberTagsSet(_h: bigint, networkId: number, nodeId: number, tagsJson: string) {
+ fake._lastMemberTagsSet = { networkId, nodeId, tagsJson };
+ return fake._jsonReturns['PilotMemberTagsSet'] ?? jsonOk({ status: 'ok' });
+ },
};
return fake;
@@ -598,3 +724,362 @@ describe('Driver sendFile', () => {
d.close();
});
});
+
+// ---------------------------------------------------------------------------
+// 1.9.1 additions: health / rotate-key
+// ---------------------------------------------------------------------------
+
+describe('Driver health', () => {
+ it('returns the daemon health blob', () => {
+ const d = new Driver();
+ const r = d.health();
+ expect(r['ok']).toBe(true);
+ expect(r['uptime_s']).toBe(42);
+ d.close();
+ });
+
+ it('throws on health error', () => {
+ fakeLib._jsonReturns['PilotHealth'] = jsonErr('daemon down');
+ const d = new Driver();
+ expect(() => d.health()).toThrow('daemon down');
+ d.close();
+ });
+});
+
+describe('Driver rotateKey', () => {
+ it('returns new key info', () => {
+ const d = new Driver();
+ expect(d.rotateKey()).toEqual({ new_pubkey: 'abc' });
+ d.close();
+ });
+
+ it('throws on error', () => {
+ fakeLib._jsonReturns['PilotRotateKey'] = jsonErr('registry rejected');
+ const d = new Driver();
+ expect(() => d.rotateKey()).toThrow('registry rejected');
+ d.close();
+ });
+});
+
+// ---------------------------------------------------------------------------
+// 1.9.1 additions: dial timeout
+// ---------------------------------------------------------------------------
+
+describe('Driver dial timeout', () => {
+ it('uses PilotDial when no timeout', () => {
+ const d = new Driver();
+ const conn = d.dial('0:0001.0000.0002:8080');
+ // Default PilotDial returns handle 10
+ expect(conn).toBeInstanceOf(Conn);
+ expect(fakeLib._lastDialTimeout).toBeNull();
+ conn.close();
+ d.close();
+ });
+
+ it('uses PilotDialTimeout when timeoutMs is given', () => {
+ const d = new Driver();
+ const conn = d.dial('0:0001.0000.0002:8080', 2500);
+ expect(conn).toBeInstanceOf(Conn);
+ expect(fakeLib._lastDialTimeout).not.toBeNull();
+ expect(fakeLib._lastDialTimeout?.addr).toBe('0:0001.0000.0002:8080');
+ expect(fakeLib._lastDialTimeout?.ms).toBe(2500n);
+ conn.close();
+ d.close();
+ });
+
+ it('clamps negative timeoutMs to 0', () => {
+ const d = new Driver();
+ d.dial('0:0001.0000.0002:8080', -10);
+ expect(fakeLib._lastDialTimeout?.ms).toBe(0n);
+ d.close();
+ });
+
+ it('throws on dial-timeout error', () => {
+ fakeLib.PilotDialTimeout = (_h: bigint, _addr: string, _ms: bigint) => ({
+ handle: 0n,
+ err: jsonErr('dial timeout'),
+ });
+ const d = new Driver();
+ expect(() => d.dial('bad:addr', 1000)).toThrow('dial timeout');
+ d.close();
+ });
+});
+
+// ---------------------------------------------------------------------------
+// 1.9.1 additions: Conn.setReadDeadline
+// ---------------------------------------------------------------------------
+
+describe('Conn setReadDeadline', () => {
+ it('clears the deadline with null', () => {
+ const conn = new Conn(10n);
+ conn.setReadDeadline(null);
+ expect(fakeLib._lastSetReadDeadline).toBe(0n);
+ });
+
+ it('converts a Date to nanoseconds', () => {
+ const conn = new Conn(10n);
+ const d = new Date(1700000000500); // 1.7e12 ms = 1.7e21 ns? No: 1.7e12 ms * 1e6 = 1.7e18 ns
+ conn.setReadDeadline(d);
+ expect(fakeLib._lastSetReadDeadline).toBe(BigInt(1700000000500) * 1_000_000n);
+ });
+
+ it('treats a number as ms-from-now', () => {
+ const before = Date.now();
+ const conn = new Conn(10n);
+ conn.setReadDeadline(5000);
+ const after = Date.now();
+ const got = fakeLib._lastSetReadDeadline ?? 0n;
+ // Expected nanos must be in [before+5000, after+5000] ms range
+ const lo = BigInt(before + 5000) * 1_000_000n;
+ const hi = BigInt(after + 5000) * 1_000_000n;
+ expect(got >= lo).toBe(true);
+ expect(got <= hi).toBe(true);
+ });
+
+ it('throws if the connection is closed', () => {
+ const conn = new Conn(10n);
+ conn.close();
+ expect(() => conn.setReadDeadline(null)).toThrow('connection closed');
+ });
+
+ it('propagates errors from Go', () => {
+ fakeLib.PilotConnSetReadDeadline = (_h: bigint, _d: bigint) => jsonErr('bad handle');
+ const conn = new Conn(10n);
+ expect(() => conn.setReadDeadline(null)).toThrow('bad handle');
+ });
+});
+
+// ---------------------------------------------------------------------------
+// 1.9.1 additions: broadcast
+// ---------------------------------------------------------------------------
+
+describe('Driver broadcast', () => {
+ it('passes networkId, port, payload, and admin token', () => {
+ const d = new Driver();
+ d.broadcast(7, 1234, Buffer.from('hello'), 'secret');
+ expect(fakeLib._lastBroadcast).not.toBeNull();
+ expect(fakeLib._lastBroadcast?.networkId).toBe(7);
+ expect(fakeLib._lastBroadcast?.port).toBe(1234);
+ expect(fakeLib._lastBroadcast?.dataLen).toBe(5);
+ expect(fakeLib._lastBroadcast?.adminToken).toBe('secret');
+ expect(fakeLib._lastBroadcast?.payload.toString()).toBe('hello');
+ d.close();
+ });
+
+ it('accepts a string payload', () => {
+ const d = new Driver();
+ d.broadcast(0, 9999, 'ping', 'tok');
+ expect(fakeLib._lastBroadcast?.payload.toString()).toBe('ping');
+ d.close();
+ });
+
+ it('throws when daemon rejects the broadcast', () => {
+ fakeLib._jsonReturns['PilotBroadcast'] = jsonErr('admin token required');
+ const d = new Driver();
+ expect(() => d.broadcast(0, 9000, Buffer.from('x'), '')).toThrow('admin token required');
+ d.close();
+ });
+});
+
+// ---------------------------------------------------------------------------
+// 1.9.1 additions: networks
+// ---------------------------------------------------------------------------
+
+describe('Driver networks', () => {
+ it('networkList', () => {
+ const d = new Driver();
+ const r = d.networkList();
+ expect(r).toHaveProperty('networks');
+ d.close();
+ });
+
+ it('networkJoin passes networkId and token', () => {
+ const d = new Driver();
+ expect(d.networkJoin(7, 'joinme')).toEqual({ status: 'joined' });
+ expect(fakeLib._lastNetworkJoin).toEqual({ networkId: 7, token: 'joinme' });
+ d.close();
+ });
+
+ it('networkJoin defaults token to empty string', () => {
+ const d = new Driver();
+ d.networkJoin(2);
+ expect(fakeLib._lastNetworkJoin?.token).toBe('');
+ d.close();
+ });
+
+ it('networkLeave', () => {
+ const d = new Driver();
+ expect(d.networkLeave(7)).toEqual({ status: 'left' });
+ d.close();
+ });
+
+ it('networkMembers', () => {
+ const d = new Driver();
+ expect(d.networkMembers(7)).toHaveProperty('members');
+ d.close();
+ });
+
+ it('networkInvite captures both ids', () => {
+ const d = new Driver();
+ expect(d.networkInvite(7, 4242)).toEqual({ status: 'invited' });
+ expect(fakeLib._lastNetworkInvite).toEqual({ networkId: 7, targetNodeId: 4242 });
+ d.close();
+ });
+
+ it('networkPollInvites', () => {
+ const d = new Driver();
+ expect(d.networkPollInvites()).toHaveProperty('invites');
+ d.close();
+ });
+
+ it('networkRespondInvite accept=true → 1', () => {
+ const d = new Driver();
+ d.networkRespondInvite(7, true);
+ expect(fakeLib._lastNetworkRespond).toEqual({ networkId: 7, accept: 1 });
+ d.close();
+ });
+
+ it('networkRespondInvite accept=false → 0', () => {
+ const d = new Driver();
+ d.networkRespondInvite(7, false);
+ expect(fakeLib._lastNetworkRespond).toEqual({ networkId: 7, accept: 0 });
+ d.close();
+ });
+
+ it('networkJoin propagates daemon error', () => {
+ fakeLib._jsonReturns['PilotNetworkJoin'] = jsonErr('token rejected');
+ const d = new Driver();
+ expect(() => d.networkJoin(7, 'wrong')).toThrow('token rejected');
+ d.close();
+ });
+});
+
+// ---------------------------------------------------------------------------
+// 1.9.1 additions: managed
+// ---------------------------------------------------------------------------
+
+describe('Driver managed', () => {
+ it('managedScore captures all args', () => {
+ const d = new Driver();
+ d.managedScore(7, 4242, -3, 'spam');
+ expect(fakeLib._lastManagedScore).toEqual({
+ networkId: 7,
+ nodeId: 4242,
+ delta: -3,
+ topic: 'spam',
+ });
+ d.close();
+ });
+
+ it('managedScore default topic is empty', () => {
+ const d = new Driver();
+ d.managedScore(0, 1, 5);
+ expect(fakeLib._lastManagedScore?.topic).toBe('');
+ d.close();
+ });
+
+ it('managedStatus echoes networkId', () => {
+ const d = new Driver();
+ expect(d.managedStatus(42)).toEqual({ network_id: 42 });
+ d.close();
+ });
+
+ it('managedRankings', () => {
+ const d = new Driver();
+ expect(d.managedRankings(42)).toHaveProperty('rankings');
+ d.close();
+ });
+
+ it('managedForceCycle', () => {
+ const d = new Driver();
+ expect(d.managedForceCycle(42)).toEqual({ status: 'cycled' });
+ d.close();
+ });
+
+ it('managedReconcile', () => {
+ const d = new Driver();
+ const r = d.managedReconcile(42);
+ expect(r['network_id']).toBe(42);
+ expect(r['peers']).toEqual([]);
+ d.close();
+ });
+});
+
+// ---------------------------------------------------------------------------
+// 1.9.1 additions: policy
+// ---------------------------------------------------------------------------
+
+describe('Driver policy', () => {
+ it('policyGet', () => {
+ const d = new Driver();
+ expect(d.policyGet(7)).toEqual({ network_id: 7, policy: {} });
+ d.close();
+ });
+
+ it('policySet serializes a dict to JSON', () => {
+ const d = new Driver();
+ d.policySet(7, { min_score: 3, tags: ['good'] });
+ expect(fakeLib._lastPolicySet?.networkId).toBe(7);
+ expect(JSON.parse(fakeLib._lastPolicySet?.policyJson ?? '')).toEqual({
+ min_score: 3,
+ tags: ['good'],
+ });
+ d.close();
+ });
+
+ it('policySet passes a string through unchanged', () => {
+ const d = new Driver();
+ d.policySet(0, '{"raw":true}');
+ expect(fakeLib._lastPolicySet?.policyJson).toBe('{"raw":true}');
+ d.close();
+ });
+
+ it('policySet decodes a Buffer to UTF-8', () => {
+ const d = new Driver();
+ d.policySet(0, Buffer.from('{"raw":1}'));
+ expect(fakeLib._lastPolicySet?.policyJson).toBe('{"raw":1}');
+ d.close();
+ });
+
+ it('policySet propagates daemon error', () => {
+ fakeLib._jsonReturns['PilotPolicySet'] = jsonErr('invalid policy');
+ const d = new Driver();
+ expect(() => d.policySet(0, {})).toThrow('invalid policy');
+ d.close();
+ });
+});
+
+// ---------------------------------------------------------------------------
+// 1.9.1 additions: member tags
+// ---------------------------------------------------------------------------
+
+describe('Driver memberTags', () => {
+ it('memberTagsGet', () => {
+ const d = new Driver();
+ expect(d.memberTagsGet(7, 4242)).toHaveProperty('tags');
+ d.close();
+ });
+
+ it('memberTagsSet serializes the list', () => {
+ const d = new Driver();
+ d.memberTagsSet(7, 4242, ['gpu', 'fast']);
+ expect(fakeLib._lastMemberTagsSet?.networkId).toBe(7);
+ expect(fakeLib._lastMemberTagsSet?.nodeId).toBe(4242);
+ expect(JSON.parse(fakeLib._lastMemberTagsSet?.tagsJson ?? '')).toEqual(['gpu', 'fast']);
+ d.close();
+ });
+
+ it('memberTagsSet handles empty list', () => {
+ const d = new Driver();
+ d.memberTagsSet(7, 4242, []);
+ expect(JSON.parse(fakeLib._lastMemberTagsSet?.tagsJson ?? '')).toEqual([]);
+ d.close();
+ });
+
+ it('memberTagsSet propagates daemon error', () => {
+ fakeLib._jsonReturns['PilotMemberTagsSet'] = jsonErr('not admin');
+ const d = new Driver();
+ expect(() => d.memberTagsSet(7, 1, ['x'])).toThrow('not admin');
+ d.close();
+ });
+});
diff --git a/sdk/node/tests/runtime.test.ts b/sdk/node/tests/runtime.test.ts
new file mode 100644
index 00000000..fe2a4459
--- /dev/null
+++ b/sdk/node/tests/runtime.test.ts
@@ -0,0 +1,298 @@
+/**
+ * Unit tests for the Node SDK runtime seeder (src/runtime.ts).
+ *
+ * Mirrors the Python seeder tests: covers the 5 state-machine states,
+ * the daemon-running guard, atomic-rename behavior, version compare.
+ *
+ * The tests redirect ~/.pilot/ to a tmp dir via the PILOT_HOME env var
+ * and stub the package-bin-dir helper to a controllable location.
+ */
+
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import {
+ chmodSync,
+ existsSync,
+ mkdirSync,
+ mkdtempSync,
+ readFileSync,
+ rmSync,
+ statSync,
+ writeFileSync,
+} from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+
+// Import under test.
+import * as runtime from '../src/runtime.js';
+
+const BIN_NAMES = ['pilotctl', 'pilot-daemon', 'pilot-gateway', 'pilot-updater'] as const;
+
+function platformLib(): string {
+ return runtime._internals.platformLibName();
+}
+
+function makeFakePkgBin(parentTmp: string, version: string): string {
+ const pkg = join(parentTmp, 'pkg-bin');
+ mkdirSync(pkg, { recursive: true });
+ for (const n of BIN_NAMES) {
+ const p = join(pkg, n);
+ writeFileSync(p, `#!/bin/sh\necho ${n} ${version}\n`);
+ chmodSync(p, 0o755);
+ }
+ const lib = join(pkg, platformLib());
+ writeFileSync(lib, `LIB ${version}\n`);
+ chmodSync(lib, 0o755);
+ writeFileSync(join(pkg, '.pilot-version'), version + '\n');
+ return pkg;
+}
+
+let tmpRoot: string;
+let fakeHome: string;
+let pkgBin: string;
+let restoreEnv: { home: string | undefined; pkg: string | undefined };
+
+beforeEach(() => {
+ // Use a *short* tmp root so AF_UNIX paths fit in 104 chars on macOS.
+ tmpRoot = mkdtempSync(join('/tmp', 'pilot-rt-'));
+ fakeHome = join(tmpRoot, 'home', '.pilot');
+ mkdirSync(fakeHome, { recursive: true });
+ pkgBin = makeFakePkgBin(tmpRoot, '1.9.1');
+
+ restoreEnv = {
+ home: process.env['PILOT_HOME'],
+ pkg: process.env['PILOT_PKG_BIN_DIR'],
+ };
+ process.env['PILOT_HOME'] = fakeHome;
+ process.env['PILOT_PKG_BIN_DIR'] = pkgBin;
+
+ runtime._resetSeededMarker();
+});
+
+afterEach(() => {
+ vi.restoreAllMocks();
+ if (restoreEnv.home === undefined) delete process.env['PILOT_HOME'];
+ else process.env['PILOT_HOME'] = restoreEnv.home;
+ if (restoreEnv.pkg === undefined) delete process.env['PILOT_PKG_BIN_DIR'];
+ else process.env['PILOT_PKG_BIN_DIR'] = restoreEnv.pkg;
+ rmSync(tmpRoot, { recursive: true, force: true });
+ runtime._resetSeededMarker();
+});
+
+function setPkgBin(p: string): void {
+ process.env['PILOT_PKG_BIN_DIR'] = p;
+}
+
+// ---------------------------------------------------------------------------
+// State machine
+// ---------------------------------------------------------------------------
+
+describe('seeder state machine', () => {
+ it('seeds everything when the runtime dir is empty', () => {
+ const report = runtime.runSeeder();
+ expect(report.action).toBe('seed');
+ const expected = new Set([...BIN_NAMES, platformLib()]);
+ for (const f of report.copied) expected.delete(f);
+ expect(expected.size).toBe(0);
+
+ for (const n of [...BIN_NAMES, platformLib()]) {
+ expect(existsSync(join(fakeHome, 'bin', n))).toBe(true);
+ }
+ const v = readFileSync(join(fakeHome, 'bin', '.pilot-version'), 'utf8').trim();
+ expect(v).toBe('1.9.1');
+ });
+
+ it('is a noop when versions match', () => {
+ runtime.runSeeder();
+ runtime._resetSeededMarker();
+ const r2 = runtime.runSeeder();
+ expect(r2.action).toBe('noop');
+ expect(r2.copied).toEqual([]);
+ });
+
+ it('does not downgrade when bundled version is older', () => {
+ runtime.runSeeder();
+ runtime._resetSeededMarker();
+
+ // Replace the package with an older version.
+ const olderPkg = makeFakePkgBin(join(tmpRoot, 'older'), '1.8.0');
+ setPkgBin(olderPkg);
+
+ const r = runtime.runSeeder();
+ expect(r.action).toBe('noop');
+ expect(r.copied).toEqual([]);
+ const v = readFileSync(join(fakeHome, 'bin', '.pilot-version'), 'utf8').trim();
+ expect(v).toBe('1.9.1');
+ });
+
+ it('upgrades to a newer bundled version', () => {
+ runtime.runSeeder();
+ runtime._resetSeededMarker();
+
+ const newerPkg = makeFakePkgBin(join(tmpRoot, 'newer'), '2.0.0');
+ setPkgBin(newerPkg);
+
+ const r = runtime.runSeeder();
+ expect(r.action).toBe('upgrade');
+ expect(r.copied.length).toBeGreaterThan(0);
+ const v = readFileSync(join(fakeHome, 'bin', '.pilot-version'), 'utf8').trim();
+ expect(v).toBe('2.0.0');
+ const ctlContents = readFileSync(join(fakeHome, 'bin', 'pilotctl'), 'utf8');
+ expect(ctlContents).toContain('2.0.0');
+ });
+
+ it('re-seeds files that disappeared from a same-version runtime', () => {
+ runtime.runSeeder();
+ rmSync(join(fakeHome, 'bin', 'pilotctl'));
+ runtime._resetSeededMarker();
+ const r = runtime.runSeeder();
+ expect(r.copied).toContain('pilotctl');
+ expect(existsSync(join(fakeHome, 'bin', 'pilotctl'))).toBe(true);
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Atomic install
+// ---------------------------------------------------------------------------
+
+describe('atomic install', () => {
+ it('survives an in-flight reader of the target file', () => {
+ runtime.runSeeder();
+ const target = join(fakeHome, 'bin', 'pilotctl');
+ const before = readFileSync(target, 'utf8');
+
+ // Atomic-replace with new content.
+ const newSrc = join(tmpRoot, 'newctl');
+ writeFileSync(newSrc, 'DIFFERENT\n');
+ runtime._internals.atomicInstall(newSrc, target);
+
+ const after = readFileSync(target, 'utf8');
+ expect(after).toBe('DIFFERENT\n');
+ expect(after).not.toBe(before);
+ });
+
+ it('leaves no .tmp.* files behind', () => {
+ runtime.runSeeder();
+ const dir = join(fakeHome, 'bin');
+ const stat = statSync(dir);
+ expect(stat.isDirectory()).toBe(true);
+ // No leftover tmp files.
+ const fs = require('node:fs');
+ const entries: string[] = fs.readdirSync(dir);
+ const leftovers = entries.filter((e: string) => e.includes('.tmp.'));
+ expect(leftovers).toEqual([]);
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Config bootstrap
+// ---------------------------------------------------------------------------
+
+describe('config bootstrap', () => {
+ it('writes a default config.json when missing', () => {
+ runtime.runSeeder();
+ const cfgPath = join(fakeHome, 'config.json');
+ const cfg = JSON.parse(readFileSync(cfgPath, 'utf8'));
+ expect(cfg.registry).toBe(runtime.DEFAULT_REGISTRY);
+ expect(cfg.beacon).toBe(runtime.DEFAULT_BEACON);
+ expect(cfg.socket).toBe(runtime.DEFAULT_SOCKET);
+ expect(cfg.encrypt).toBe(true);
+ // We never auto-set an email.
+ expect('email' in cfg).toBe(false);
+ });
+
+ it('preserves an existing config.json', () => {
+ const cfgPath = join(fakeHome, 'config.json');
+ writeFileSync(cfgPath, JSON.stringify({ email: 'foo@bar.com', preserved: true }));
+ runtime.runSeeder();
+ const cfg = JSON.parse(readFileSync(cfgPath, 'utf8'));
+ expect(cfg.preserved).toBe(true);
+ expect(cfg.email).toBe('foo@bar.com');
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Public entry points
+// ---------------------------------------------------------------------------
+
+describe('public entry points', () => {
+ it('runtimeBinaryPath seeds and returns the path', () => {
+ const p = runtime.runtimeBinaryPath('pilotctl');
+ expect(p).toBe(join(fakeHome, 'bin', 'pilotctl'));
+ expect(existsSync(p)).toBe(true);
+ });
+
+ it('runtimeLibraryPath seeds and returns the path', () => {
+ const p = runtime.runtimeLibraryPath();
+ expect(p).toBe(join(fakeHome, 'bin', platformLib()));
+ expect(existsSync(p)).toBe(true);
+ });
+
+ it('runtimeBinaryPath throws for unknown name', () => {
+ expect(() => runtime.runtimeBinaryPath('bogus')).toThrow(/bogus/);
+ });
+
+ it('ensureRuntimeSeeded short-circuits subsequent calls', () => {
+ runtime.ensureRuntimeSeeded();
+ const before = statSync(join(fakeHome, 'bin', '.pilot-version')).mtimeMs;
+ // Sleep briefly to ensure mtime would change if it ran again.
+ const t = Date.now() + 30;
+ while (Date.now() < t) {
+ // tight wait
+ }
+ runtime.ensureRuntimeSeeded();
+ const after = statSync(join(fakeHome, 'bin', '.pilot-version')).mtimeMs;
+ expect(after).toBe(before);
+ });
+});
+
+// ---------------------------------------------------------------------------
+// SemVer compare
+// ---------------------------------------------------------------------------
+
+describe('semver compare', () => {
+ const t = runtime._internals.semverTuple;
+ const cmp = runtime._internals.compareSemver;
+
+ it('parses common forms', () => {
+ expect(t('1.9.1')).toEqual([1, 9, 1]);
+ expect(t('v1.9.1')).toEqual([1, 9, 1]);
+ expect(t('1.9.1-rc4')).toEqual([1, 9, 1]);
+ expect(t('1.9.1+meta')).toEqual([1, 9, 1]);
+ expect(t('')).toBeNull();
+ expect(t('garbage')).toBeNull();
+ });
+
+ it('orders correctly', () => {
+ expect(cmp(t('2.0.0'), t('1.9.99'))).toBe(1);
+ expect(cmp(t('1.9.0'), t('1.9.1'))).toBe(-1);
+ expect(cmp(t('1.9.1'), t('1.9.1'))).toBe(0);
+ // null < anything
+ expect(cmp(null, t('0.0.0'))).toBe(-1);
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Wrong-platform handling
+// ---------------------------------------------------------------------------
+
+describe('wrong-platform package', () => {
+ it('seeder skips missing files cleanly', () => {
+ // Build a pkg without the platform lib.
+ const incomplete = join(tmpRoot, 'incomplete');
+ mkdirSync(incomplete, { recursive: true });
+ for (const n of BIN_NAMES) {
+ const p = join(incomplete, n);
+ writeFileSync(p, '#!/bin/sh\n');
+ chmodSync(p, 0o755);
+ }
+ writeFileSync(join(incomplete, '.pilot-version'), '1.9.1\n');
+ setPkgBin(incomplete);
+
+ const r = runtime.runSeeder();
+ expect(r.copied).not.toContain(platformLib());
+
+ // runtimeLibraryPath should raise a clear error since lib is absent
+ // from both runtime dir and package.
+ expect(() => runtime.runtimeLibraryPath()).toThrow(/libpilot/);
+ });
+});
diff --git a/sdk/node/tests/smoke_list_agents.mjs b/sdk/node/tests/smoke_list_agents.mjs
new file mode 100644
index 00000000..2361df8b
--- /dev/null
+++ b/sdk/node/tests/smoke_list_agents.mjs
@@ -0,0 +1,136 @@
+#!/usr/bin/env node
+/**
+ * End-to-end smoke test for the Node SDK against a real daemon.
+ *
+ * Identical contract to the Python smoke script (sdk/python/scripts/
+ * smoke_list_agents.py): construct Driver → info → handshake list-agents
+ * → send_message('/data {...}') → poll ~/.pilot/inbox/ for the reply.
+ *
+ * Run with the just-built SDK:
+ * cd sdk/node && npx tsc && node scripts/smoke_list_agents.mjs
+ */
+
+import { homedir } from 'node:os';
+import { join } from 'node:path';
+import { readdirSync, readFileSync, statSync } from 'node:fs';
+
+import { Driver, PilotError } from '../dist/client.js';
+
+const LIST_AGENTS_HOST = 'list-agents';
+const LIST_AGENTS_NODE_ID = 16398;
+const INBOX_DIR = join(homedir(), '.pilot', 'inbox');
+const WAIT_MS = 8_000;
+
+function newestInboxFileSince(afterMtime) {
+ let best = null;
+ let bestMtime = 0;
+ for (const name of readdirSync(INBOX_DIR)) {
+ if (!name.endsWith('.json')) continue;
+ const p = join(INBOX_DIR, name);
+ const st = statSync(p);
+ if (st.mtimeMs > afterMtime && st.mtimeMs > bestMtime) {
+ best = p;
+ bestMtime = st.mtimeMs;
+ }
+ }
+ return best;
+}
+
+async function sleep(ms) {
+ return new Promise((r) => setTimeout(r, ms));
+}
+
+async function main() {
+ console.log('[1/5] Constructing Driver…');
+ let d;
+ try {
+ d = new Driver();
+ } catch (e) {
+ if (e instanceof PilotError) {
+ console.log(` FAIL: cannot reach daemon: ${e.message}`);
+ process.exit(2);
+ }
+ throw e;
+ }
+ console.log(' OK');
+
+ console.log('[2/5] Calling info()…');
+ const info = d.info();
+ console.log(
+ ` node_id=${info.node_id} addr=${info.address} peers=${info.peers}`,
+ );
+
+ console.log(`[3/5] Handshake list-agents (node ${LIST_AGENTS_NODE_ID})…`);
+ try {
+ const h = d.handshake(LIST_AGENTS_NODE_ID, 'node sdk smoke test');
+ console.log(` OK: ${JSON.stringify(h)}`);
+ } catch (e) {
+ const msg = String(e?.message ?? e).toLowerCase();
+ if (msg.includes('already') || msg.includes('trust')) {
+ console.log(` OK (already trusted): ${e}`);
+ } else {
+ console.log(` FAIL: ${e}`);
+ process.exit(3);
+ }
+ }
+
+ console.log('[4/5] sendMessage → list-agents …');
+ const tStart = Date.now() / 1000 - 1;
+ let result;
+ try {
+ result = d.sendMessage(
+ LIST_AGENTS_HOST,
+ '/data {"search":"","limit":1}',
+ 'text',
+ );
+ } catch (e) {
+ console.log(` FAIL: sendMessage: ${e}`);
+ process.exit(4);
+ }
+ console.log(` sent: ${JSON.stringify(result)}`);
+
+ console.log(`[5/5] Waiting up to ${WAIT_MS / 1000}s for inbox reply…`);
+ const deadline = Date.now() + WAIT_MS;
+ let replyFile = null;
+ while (Date.now() < deadline) {
+ replyFile = newestInboxFileSince(tStart * 1000);
+ if (replyFile) break;
+ await sleep(500);
+ }
+ if (!replyFile) {
+ console.log(' FAIL: no inbox reply within window');
+ process.exit(5);
+ }
+ console.log(` reply file: ${replyFile}`);
+
+ let envelope;
+ try {
+ envelope = JSON.parse(readFileSync(replyFile, 'utf8'));
+ } catch (e) {
+ console.log(` FAIL: cannot parse reply: ${e}`);
+ process.exit(6);
+ }
+ console.log(
+ ` agent=${envelope.agent} command=${envelope.command} ok=${envelope.ok}`,
+ );
+
+ if (typeof envelope.data === 'string') {
+ try {
+ const payload = JSON.parse(envelope.data);
+ const total =
+ payload.total ?? payload.count ?? (payload.tiers?.free?.items?.length ?? null);
+ if (total !== null) console.log(` list-agents total: ${total}`);
+ } catch {
+ console.log(' (data not JSON; envelope OK)');
+ }
+ }
+
+ d.close();
+ console.log('\nSMOKE TEST PASSED (node)');
+ process.exit(0);
+}
+
+main().catch((e) => {
+ console.error(`unhandled: ${e}`);
+ process.exit(99);
+});
diff --git a/sdk/python/MANIFEST.in b/sdk/python/MANIFEST.in
index 84f4e5c6..fe134aee 100644
--- a/sdk/python/MANIFEST.in
+++ b/sdk/python/MANIFEST.in
@@ -3,8 +3,11 @@ include README.md
include LICENSE
include CHANGELOG.md
-# Include all binaries in bin/ directory
+# Include all binaries in bin/ directory (the seed cache).
+# Dotfiles like .pilot-version need an explicit pattern because some
+# setuptools versions skip them under recursive-include.
recursive-include pilotprotocol/bin *
+include pilotprotocol/bin/.pilot-version
# Include type stubs if any
recursive-include pilotprotocol *.pyi
diff --git a/sdk/python/README.md b/sdk/python/README.md
index 9ac610f1..988793f2 100644
--- a/sdk/python/README.md
+++ b/sdk/python/README.md
@@ -273,7 +273,7 @@ See `examples/python_sdk/` for comprehensive examples:
- **`basic_usage.py`** — Connection, identity, trust management
- **`data_exchange_demo.py`** — Send messages, files, JSON
- **`event_stream_demo.py`** — Pub/sub patterns
-- **`task_submit_demo.py`** — Task delegation and polo score
+- **`task_submit_demo.py`** — Task delegation
- **`pydantic_ai_agent.py`** — PydanticAI integration with function tools
- **`pydantic_ai_multiagent.py`** — Multi-agent collaboration system
diff --git a/sdk/python/pilotprotocol/_runtime.py b/sdk/python/pilotprotocol/_runtime.py
new file mode 100644
index 00000000..73be4c2a
--- /dev/null
+++ b/sdk/python/pilotprotocol/_runtime.py
@@ -0,0 +1,382 @@
+"""Runtime environment seeder for the Pilot Protocol Python SDK.
+
+Both the CLI shims (``cli.py``) and the FFI loader (``client._load_lib``)
+funnel through :func:`ensure_runtime_seeded`, which idempotently mirrors
+the binaries shipped inside the wheel into ``~/.pilot/bin/``.
+
+Design goals:
+- The wheel is the *seed cache*; ``~/.pilot/bin/`` is the canonical runtime.
+- No install-time code runs; seeding happens lazily on first SDK use.
+- Concurrency-safe (flock) and crash-safe (atomic rename).
+- Never downgrades; never replaces a running daemon binary.
+- Coexists with ``install.sh`` (same layout, same ``.pilot-version`` marker).
+"""
+
+from __future__ import annotations
+
+import errno
+import json
+import os
+import platform
+import shutil
+import socket
+import sys
+import tempfile
+import threading
+from pathlib import Path
+from typing import Optional
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+_BIN_NAMES = ("pilotctl", "pilot-daemon", "pilot-gateway", "pilot-updater")
+_LIB_NAMES = {
+ "Darwin": "libpilot.dylib",
+ "Linux": "libpilot.so",
+ "Windows": "libpilot.dll",
+}
+
+DEFAULT_REGISTRY = "34.71.57.205:9000"
+DEFAULT_BEACON = "34.71.57.205:9001"
+DEFAULT_SOCKET = "/tmp/pilot.sock"
+
+
+# ---------------------------------------------------------------------------
+# Path helpers
+# ---------------------------------------------------------------------------
+
+def _pkg_bin_dir() -> Path:
+ """Where the wheel ships its bundled binaries (the seed cache)."""
+ return Path(__file__).resolve().parent / "bin"
+
+
+def _runtime_root() -> Path:
+ """Canonical runtime dir. Honours ``PILOT_HOME`` for CI / multi-tenant."""
+ override = os.environ.get("PILOT_HOME")
+ if override:
+ return Path(override).expanduser()
+ return Path.home() / ".pilot"
+
+
+def _runtime_bin() -> Path:
+ return _runtime_root() / "bin"
+
+
+def _platform_lib_name() -> str:
+ name = _LIB_NAMES.get(platform.system())
+ if name is None:
+ raise OSError(f"unsupported platform: {platform.system()}")
+ return name
+
+
+# ---------------------------------------------------------------------------
+# Version helpers
+# ---------------------------------------------------------------------------
+
+def _semver_tuple(v: str) -> tuple[int, ...]:
+ """Parse a SemVer-ish string into a comparable tuple. Unparseable → ()."""
+ s = (v or "").strip().lstrip("v").split("-", 1)[0].split("+", 1)[0]
+ if not s:
+ return ()
+ parts = []
+ for p in s.split("."):
+ try:
+ parts.append(int(p))
+ except ValueError:
+ return ()
+ return tuple(parts)
+
+
+def _bundled_version() -> str:
+ """Version of the binaries bundled in this wheel."""
+ f = _pkg_bin_dir() / ".pilot-version"
+ if f.is_file():
+ try:
+ return f.read_text().strip()
+ except OSError:
+ pass
+ # Fall back to the package metadata if the marker file is missing.
+ try:
+ from importlib.metadata import version as _pkg_version
+ return _pkg_version("pilotprotocol")
+ except Exception:
+ return ""
+
+
+def _runtime_version(rt: Path) -> str:
+ f = rt / ".pilot-version"
+ if f.is_file():
+ try:
+ return f.read_text().strip()
+ except OSError:
+ return ""
+ return ""
+
+
+# ---------------------------------------------------------------------------
+# Daemon liveness probe
+# ---------------------------------------------------------------------------
+
+def _daemon_running() -> bool:
+ """True if a pilot daemon is reachable on its IPC socket."""
+ sock_path = DEFAULT_SOCKET
+ try:
+ with open(_runtime_root() / "config.json") as f:
+ cfg = json.load(f)
+ sock_path = cfg.get("socket", sock_path) or sock_path
+ except (OSError, ValueError):
+ pass
+
+ if not Path(sock_path).exists():
+ return False
+ s = socket.socket(socket.AF_UNIX)
+ s.settimeout(0.2)
+ try:
+ s.connect(sock_path)
+ return True
+ except OSError:
+ return False
+ finally:
+ try:
+ s.close()
+ except OSError:
+ pass
+
+
+# ---------------------------------------------------------------------------
+# Atomic file ops
+# ---------------------------------------------------------------------------
+
+def _atomic_install(src: Path, dst: Path) -> None:
+ """Copy *src* → *dst* atomically, surviving in-flight execs.
+
+ Writes to ``.tmp.`` then ``os.replace()`` over the target.
+ On POSIX this unlinks the old inode while leaving any running process
+ that mapped it untouched.
+ """
+ tmp = dst.with_name(f"{dst.name}.tmp.{os.getpid()}.{threading.get_ident()}")
+ if tmp.exists():
+ tmp.unlink()
+ shutil.copy2(src, tmp)
+ try:
+ tmp.chmod(0o755)
+ os.replace(tmp, dst)
+ except OSError:
+ try:
+ tmp.unlink()
+ except OSError:
+ pass
+ raise
+
+
+def _ensure_dir_writable(p: Path) -> None:
+ """Create *p* if it does not exist; raise a clear error if we cannot
+ write to it (e.g. owned by root after a botched install)."""
+ p.mkdir(parents=True, exist_ok=True)
+ if not os.access(p, os.W_OK):
+ raise PermissionError(
+ f"{p} is not writable by user {os.getuid()}. "
+ f"Repair with: chown -R $USER {p}"
+ )
+
+
+# ---------------------------------------------------------------------------
+# Config seeding
+# ---------------------------------------------------------------------------
+
+def _ensure_default_config() -> Path:
+ """Make sure ``~/.pilot/config.json`` exists. Never overwrites an
+ existing one — install.sh or the user may have set an email.
+ """
+ root = _runtime_root()
+ _ensure_dir_writable(root)
+ cfg_path = root / "config.json"
+ if cfg_path.is_file():
+ return cfg_path
+ cfg = {
+ "registry": DEFAULT_REGISTRY,
+ "beacon": DEFAULT_BEACON,
+ "socket": DEFAULT_SOCKET,
+ "encrypt": True,
+ "identity": str(root / "identity.json"),
+ }
+ tmp = cfg_path.with_name(
+ f"config.json.tmp.{os.getpid()}.{threading.get_ident()}"
+ )
+ tmp.write_text(json.dumps(cfg, indent=2) + "\n")
+ try:
+ os.replace(tmp, cfg_path)
+ except FileNotFoundError:
+ # Another thread won the race; that's fine.
+ if tmp.exists():
+ try:
+ tmp.unlink()
+ except OSError:
+ pass
+ return cfg_path
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+class SeedReport:
+ """Summary of what a seeder pass did. Useful for tests + diagnostics."""
+
+ def __init__(self) -> None:
+ self.copied: list[str] = []
+ self.skipped: list[str] = []
+ self.action: str = "noop" # one of: noop, seed, upgrade, daemon-skip
+ self.bundled_version: str = ""
+ self.installed_version: str = ""
+ self.runtime_dir: Path = _runtime_bin()
+
+
+_SEEDED_ONCE = False
+
+
+def ensure_runtime_seeded(force: bool = False) -> Path:
+ """Idempotently mirror bundled binaries into ``~/.pilot/bin/``.
+
+ Returns the runtime bin dir. Safe to call on every CLI invocation and
+ every Driver() construction; the steady state is a single stat() +
+ string compare.
+
+ Set ``force=True`` to re-run even if this process has already seeded.
+ """
+ global _SEEDED_ONCE
+ if _SEEDED_ONCE and not force:
+ return _runtime_bin()
+
+ report = run_seeder()
+ _SEEDED_ONCE = True
+ return report.runtime_dir
+
+
+def run_seeder() -> SeedReport:
+ """Run one seeder pass and return a structured report."""
+ report = SeedReport()
+ rt_root = _runtime_root()
+ rt = _runtime_bin()
+ pkg = _pkg_bin_dir()
+
+ # Make sure ~/.pilot/ exists and is writable.
+ _ensure_dir_writable(rt_root)
+ _ensure_dir_writable(rt)
+ _ensure_default_config()
+
+ # Cross-platform fcntl shim. flock is POSIX-only; on Windows we use
+ # msvcrt.locking. Tests run on POSIX so the Windows path is best-effort.
+ lock_path = rt / ".seed.lock"
+ lock_path.touch(exist_ok=True)
+ lock_fd = os.open(lock_path, os.O_RDWR)
+ try:
+ if os.name == "posix":
+ import fcntl
+ fcntl.flock(lock_fd, fcntl.LOCK_EX)
+ else: # pragma: no cover - Windows
+ import msvcrt
+ msvcrt.locking(lock_fd, msvcrt.LK_LOCK, 1)
+
+ bundled_str = _bundled_version()
+ installed_str = _runtime_version(rt)
+ report.bundled_version = bundled_str
+ report.installed_version = installed_str
+
+ bundled = _semver_tuple(bundled_str)
+ installed = _semver_tuple(installed_str)
+
+ # Decide overall action.
+ force = os.environ.get("PILOT_FORCE_SEED") == "1"
+ if not force and installed and bundled and bundled <= installed:
+ # Same or newer already installed. Still verify each file exists.
+ need_seed = False
+ for name in _BIN_NAMES + (_platform_lib_name(),):
+ if not (rt / name).is_file():
+ need_seed = True
+ break
+ if not need_seed:
+ report.action = "noop"
+ return report
+
+ report.action = "upgrade" if installed else "seed"
+ daemon_busy = _daemon_running()
+
+ for name in _BIN_NAMES + (_platform_lib_name(),):
+ src = pkg / name
+ if not src.is_file():
+ # Wrong-platform wheel or partial bundle. Skip — caller will
+ # surface a clear error when the missing binary is needed.
+ continue
+ dst = rt / name
+ if name == "pilot-daemon" and daemon_busy and dst.is_file():
+ report.skipped.append(name)
+ report.action = "daemon-skip"
+ continue
+ try:
+ _atomic_install(src, dst)
+ report.copied.append(name)
+ except OSError as e:
+ # ETXTBSY can hit Linux despite atomic rename if a tool has
+ # the file mmap'd. Skip with a notice; caller can retry.
+ if e.errno in (errno.ETXTBSY, errno.EBUSY):
+ report.skipped.append(name)
+ continue
+ raise
+
+ # Update the marker last; a partial seed leaves the old marker.
+ if bundled_str:
+ ver_path = rt / ".pilot-version"
+ tmp = ver_path.with_name(f".pilot-version.tmp.{os.getpid()}")
+ tmp.write_text(bundled_str + "\n")
+ os.replace(tmp, ver_path)
+
+ return report
+ finally:
+ try:
+ if os.name == "posix":
+ import fcntl
+ fcntl.flock(lock_fd, fcntl.LOCK_UN)
+ finally:
+ os.close(lock_fd)
+
+
+def runtime_binary(name: str) -> Path:
+ """Resolve a binary by name, seeding if needed.
+
+ Use this from CLI shims; it returns the path to exec.
+ """
+ rt = ensure_runtime_seeded()
+ p = rt / name
+ if not p.is_file():
+ # Last-ditch fallback: run from the wheel itself.
+ fallback = _pkg_bin_dir() / name
+ if fallback.is_file():
+ return fallback
+ raise FileNotFoundError(
+ f"Binary {name!r} not found in {rt} or {_pkg_bin_dir()}. "
+ f"This wheel may be for a different platform."
+ )
+ return p
+
+
+def runtime_library() -> Path:
+ """Resolve libpilot.{so,dylib,dll}, seeding if needed."""
+ rt = ensure_runtime_seeded()
+ name = _platform_lib_name()
+ p = rt / name
+ if p.is_file():
+ return p
+ fallback = _pkg_bin_dir() / name
+ if fallback.is_file():
+ return fallback
+ raise FileNotFoundError(
+ f"libpilot ({name}) not found in {rt} or {_pkg_bin_dir()}."
+ )
+
+
+def reset_seeded_marker() -> None:
+ """Test helper: forget that this process has already seeded."""
+ global _SEEDED_ONCE
+ _SEEDED_ONCE = False
diff --git a/sdk/python/pilotprotocol/cli.py b/sdk/python/pilotprotocol/cli.py
index 85452f91..8a5eb658 100644
--- a/sdk/python/pilotprotocol/cli.py
+++ b/sdk/python/pilotprotocol/cli.py
@@ -1,169 +1,55 @@
-"""Command-line interface wrappers for Pilot Protocol binaries.
-
-This module provides entry points for the bundled Go binaries:
-- pilotctl: CLI tool for managing the daemon
-- pilot-daemon: Background service
-- pilot-gateway: IP traffic bridge
-
-Each wrapper:
-1. Ensures ~/.pilot/ directory exists
-2. Creates default config.json if missing
-3. Executes the bundled binary with all arguments passed through
+"""Command-line entry points for the Pilot Protocol CLI binaries.
+
+The wheel ships pre-built Go binaries inside ``pilotprotocol/bin/``. On
+first call, :mod:`pilotprotocol._runtime` mirrors those into
+``~/.pilot/bin/`` (the canonical runtime directory shared with
+``install.sh``) and these wrappers exec the seeded copy.
+
+This means:
+- pip-installed and curl-installed users converge on the same daemon.
+- Multiple venvs, multiple SDK versions: highest version wins, no
+ parallel binary trees.
+- Uninstalling the wheel never deletes ``~/.pilot/`` (identity, config,
+ daemon state are preserved).
"""
-import json
-import os
import subprocess
import sys
-from pathlib import Path
-
-
-def _ensure_pilot_env():
- """Ensure ~/.pilot/ directory and config.json exist.
-
- Creates:
- - ~/.pilot/ directory
- - ~/.pilot/config.json with default settings (if not present)
-
- This function is called before every binary execution to ensure
- the runtime environment is properly initialized.
- """
- # Get user's home directory
- home = Path.home()
- pilot_dir = home / ".pilot"
- config_file = pilot_dir / "config.json"
-
- # Create ~/.pilot/ if it doesn't exist
- pilot_dir.mkdir(parents=True, exist_ok=True)
-
- # Create default config.json if it doesn't exist
- if not config_file.exists():
- default_config = {
- "registry": "34.71.57.205:9000",
- "beacon": "34.71.57.205:9001",
- "socket": "/tmp/pilot.sock",
- "encrypt": True,
- "identity": str(pilot_dir / "identity.json")
- }
-
- with open(config_file, 'w') as f:
- json.dump(default_config, f, indent=2)
-
-
-def _get_binary_path(binary_name: str) -> Path:
- """Get absolute path to a bundled binary.
-
- Args:
- binary_name: Name of the binary (e.g., 'pilotctl', 'pilot-daemon')
-
- Returns:
- Absolute path to the binary
-
- Raises:
- FileNotFoundError: If binary not found in package
- """
- # Find the bin/ directory relative to this file
- package_dir = Path(__file__).resolve().parent
- bin_dir = package_dir / "bin"
- binary_path = bin_dir / binary_name
-
- if not binary_path.exists():
- raise FileNotFoundError(
- f"Binary '{binary_name}' not found at {binary_path}\n"
- f"Expected location: {bin_dir}\n"
- "The wheel may not have been built correctly."
- )
-
- return binary_path
-
-
-def run_pilotctl():
- """Entry point for pilotctl CLI tool.
-
- This is called when the user runs 'pilotctl' from the command line.
- All arguments are passed through to the Go binary.
-
- Example:
- $ pilotctl daemon start --hostname my-agent
- $ pilotctl info
- $ pilotctl ping other-agent
- """
- # Ensure environment is set up
- _ensure_pilot_env()
-
- # Get path to bundled binary
- binary = _get_binary_path("pilotctl")
-
- # Execute the binary with all arguments
- # subprocess.call() returns the exit code directly
- exit_code = subprocess.call([str(binary)] + sys.argv[1:])
-
- # Exit with the same code as the binary
- sys.exit(exit_code)
-
-
-def run_daemon():
- """Entry point for pilot-daemon background service.
-
- This is called when the user runs 'pilot-daemon' from the command line.
- All arguments are passed through to the Go binary.
-
- Example:
- $ pilot-daemon -registry 34.71.57.205:9000 -beacon 34.71.57.205:9001
- $ pilot-daemon -hostname my-agent -public
- """
- # Ensure environment is set up
- _ensure_pilot_env()
-
- # Get path to bundled binary
- binary = _get_binary_path("pilot-daemon")
-
- # Execute the binary with all arguments
- exit_code = subprocess.call([str(binary)] + sys.argv[1:])
-
- # Exit with the same code as the binary
- sys.exit(exit_code)
-
-
-def run_gateway():
- """Entry point for pilot-gateway IP traffic bridge.
-
- This is called when the user runs 'pilot-gateway' from the command line.
- All arguments are passed through to the Go binary.
-
- Example:
- $ pilot-gateway --ports 80,3000
- """
- # Ensure environment is set up
- _ensure_pilot_env()
- # Get path to bundled binary
- binary = _get_binary_path("pilot-gateway")
+from ._runtime import ensure_runtime_seeded, runtime_binary
+
+
+def _exec_runtime_binary(name: str) -> None:
+ """Seed ``~/.pilot/bin/`` if needed, then exec the named binary."""
+ ensure_runtime_seeded()
+ binary = runtime_binary(name)
+ sys.exit(subprocess.call([str(binary)] + sys.argv[1:]))
- # Execute the binary with all arguments
- exit_code = subprocess.call([str(binary)] + sys.argv[1:])
- # Exit with the same code as the binary
- sys.exit(exit_code)
+def run_pilotctl() -> None:
+ """Entry point for the ``pilotctl`` console script."""
+ _exec_runtime_binary("pilotctl")
-def run_updater():
- """Entry point for pilot-updater auto-update sidecar.
+def run_daemon() -> None:
+ """Entry point for the ``pilot-daemon`` console script.
- This is called when the user runs 'pilot-updater' from the command line.
- All arguments are passed through to the Go binary.
+ Note: the daemon needs an email address (passed via ``--email`` or
+ set in ``~/.pilot/config.json``) to register at the registry. The
+ SDK does not auto-prompt for one — call::
- Example:
- $ pilot-updater -install-dir ~/.pilot/bin
+ pilotctl daemon start --email you@example.com
+
+ on first launch, after which the email is cached in ``config.json``.
"""
- # Ensure environment is set up
- _ensure_pilot_env()
+ _exec_runtime_binary("pilot-daemon")
+
- # Get path to bundled binary
- binary = _get_binary_path("pilot-updater")
+def run_gateway() -> None:
+ """Entry point for the ``pilot-gateway`` console script."""
+ _exec_runtime_binary("pilot-gateway")
- # Execute the binary with all arguments
- exit_code = subprocess.call([str(binary)] + sys.argv[1:])
- # Exit with the same code as the binary
- sys.exit(exit_code)
+def run_updater() -> None:
+ """Entry point for the ``pilot-updater`` console script."""
+ _exec_runtime_binary("pilot-updater")
diff --git a/sdk/python/pilotprotocol/client.py b/sdk/python/pilotprotocol/client.py
index 556d032c..a256bf19 100644
--- a/sdk/python/pilotprotocol/client.py
+++ b/sdk/python/pilotprotocol/client.py
@@ -101,8 +101,24 @@ def _find_library() -> str:
def _load_lib() -> ctypes.CDLL: # pragma: no cover
- path = _find_library()
- return ctypes.CDLL(path)
+ """Load libpilot.
+
+ Order:
+ 1. ``PILOT_LIB_PATH`` (explicit override) — bypasses the seeder.
+ 2. The seeded library at ``~/.pilot/bin/`` (canonical runtime).
+ 3. Legacy fallback via :func:`_find_library` (system search etc.).
+ """
+ env = os.environ.get("PILOT_LIB_PATH")
+ if env:
+ return ctypes.CDLL(_find_library())
+
+ try:
+ from ._runtime import runtime_library
+ return ctypes.CDLL(str(runtime_library()))
+ except Exception:
+ # Seeder failed (read-only home, etc.) — fall back to legacy lookup
+ # so the SDK still loads from the wheel-bundled location.
+ return ctypes.CDLL(_find_library())
_lib: Optional[ctypes.CDLL] = None
@@ -168,8 +184,10 @@ def _setup_signatures(lib: ctypes.CDLL) -> None: # pragma: no cover
# JSON-RPC (single *C.char return → c_void_p)
for name in (
- "PilotInfo", "PilotPendingHandshakes", "PilotTrustedPeers",
+ "PilotInfo", "PilotHealth", "PilotRotateKey",
+ "PilotPendingHandshakes", "PilotTrustedPeers",
"PilotDeregister", "PilotRecvFrom",
+ "PilotNetworkList", "PilotNetworkPollInvites",
):
fn = getattr(lib, name)
fn.argtypes = [ctypes.c_uint64]
@@ -209,6 +227,9 @@ def _setup_signatures(lib: ctypes.CDLL) -> None: # pragma: no cover
lib.PilotDial.argtypes = [ctypes.c_uint64, ctypes.c_char_p]
lib.PilotDial.restype = _HandleErr
+ lib.PilotDialTimeout.argtypes = [ctypes.c_uint64, ctypes.c_char_p, ctypes.c_uint64]
+ lib.PilotDialTimeout.restype = _HandleErr
+
# Listen: (handle, uint16) -> struct{handle, err}
lib.PilotListen.argtypes = [ctypes.c_uint64, ctypes.c_uint16]
lib.PilotListen.restype = _HandleErr
@@ -220,7 +241,7 @@ def _setup_signatures(lib: ctypes.CDLL) -> None: # pragma: no cover
lib.PilotListenerClose.argtypes = [ctypes.c_uint64]
lib.PilotListenerClose.restype = ctypes.c_void_p
- # Conn Read / Write / Close
+ # Conn Read / Write / Close / SetReadDeadline
lib.PilotConnRead.argtypes = [ctypes.c_uint64, ctypes.c_int]
lib.PilotConnRead.restype = _ReadResult
@@ -230,10 +251,69 @@ def _setup_signatures(lib: ctypes.CDLL) -> None: # pragma: no cover
lib.PilotConnClose.argtypes = [ctypes.c_uint64]
lib.PilotConnClose.restype = ctypes.c_void_p
+ lib.PilotConnSetReadDeadline.argtypes = [ctypes.c_uint64, ctypes.c_int64]
+ lib.PilotConnSetReadDeadline.restype = ctypes.c_void_p
+
# SendTo: (handle, string, void*, int) -> *char
lib.PilotSendTo.argtypes = [ctypes.c_uint64, ctypes.c_char_p, ctypes.c_void_p, ctypes.c_int]
lib.PilotSendTo.restype = ctypes.c_void_p
+ # Broadcast: (handle, uint16 net, uint16 port, void* data, int len, *char token) -> *char
+ lib.PilotBroadcast.argtypes = [
+ ctypes.c_uint64, ctypes.c_uint16, ctypes.c_uint16,
+ ctypes.c_void_p, ctypes.c_int, ctypes.c_char_p,
+ ]
+ lib.PilotBroadcast.restype = ctypes.c_void_p
+
+ # Networks (handle, uint16) -> *char
+ for name in ("PilotNetworkLeave", "PilotNetworkMembers"):
+ fn = getattr(lib, name)
+ fn.argtypes = [ctypes.c_uint64, ctypes.c_uint16]
+ fn.restype = ctypes.c_void_p
+
+ # PilotNetworkJoin: (handle, uint16, *char token) -> *char
+ lib.PilotNetworkJoin.argtypes = [ctypes.c_uint64, ctypes.c_uint16, ctypes.c_char_p]
+ lib.PilotNetworkJoin.restype = ctypes.c_void_p
+
+ # PilotNetworkInvite: (handle, uint16, uint32) -> *char
+ lib.PilotNetworkInvite.argtypes = [ctypes.c_uint64, ctypes.c_uint16, ctypes.c_uint32]
+ lib.PilotNetworkInvite.restype = ctypes.c_void_p
+
+ # PilotNetworkRespondInvite: (handle, uint16, int) -> *char
+ lib.PilotNetworkRespondInvite.argtypes = [ctypes.c_uint64, ctypes.c_uint16, ctypes.c_int]
+ lib.PilotNetworkRespondInvite.restype = ctypes.c_void_p
+
+ # Managed (handle, uint16) -> *char
+ for name in (
+ "PilotManagedStatus", "PilotManagedRankings",
+ "PilotManagedForceCycle", "PilotManagedReconcile",
+ "PilotPolicyGet",
+ ):
+ fn = getattr(lib, name)
+ fn.argtypes = [ctypes.c_uint64, ctypes.c_uint16]
+ fn.restype = ctypes.c_void_p
+
+ # PilotManagedScore: (handle, uint16 net, uint32 node, int32 delta, *char topic)
+ lib.PilotManagedScore.argtypes = [
+ ctypes.c_uint64, ctypes.c_uint16, ctypes.c_uint32,
+ ctypes.c_int32, ctypes.c_char_p,
+ ]
+ lib.PilotManagedScore.restype = ctypes.c_void_p
+
+ # PilotPolicySet: (handle, uint16, *char json)
+ lib.PilotPolicySet.argtypes = [ctypes.c_uint64, ctypes.c_uint16, ctypes.c_char_p]
+ lib.PilotPolicySet.restype = ctypes.c_void_p
+
+ # PilotMemberTagsGet: (handle, uint16 net, uint32 node) -> *char
+ lib.PilotMemberTagsGet.argtypes = [ctypes.c_uint64, ctypes.c_uint16, ctypes.c_uint32]
+ lib.PilotMemberTagsGet.restype = ctypes.c_void_p
+
+ # PilotMemberTagsSet: (handle, uint16 net, uint32 node, *char tagsJson) -> *char
+ lib.PilotMemberTagsSet.argtypes = [
+ ctypes.c_uint64, ctypes.c_uint16, ctypes.c_uint32, ctypes.c_char_p,
+ ]
+ lib.PilotMemberTagsSet.restype = ctypes.c_void_p
+
# ---------------------------------------------------------------------------
# Error helpers
@@ -351,6 +431,23 @@ def close(self) -> None:
if "error" in obj:
raise PilotError(obj["error"])
+ def set_read_deadline(self, deadline: Optional[float]) -> None:
+ """Set the read deadline.
+
+ ``deadline`` is a Unix timestamp in seconds (e.g. ``time.time() + 5``)
+ or ``None`` to clear. After the deadline passes, ``read()`` returns
+ a ``PilotError`` with a "deadline exceeded" message.
+ """
+ if self._closed:
+ raise PilotError("connection closed")
+ if deadline is None:
+ nanos = 0
+ else:
+ nanos = int(deadline * 1_000_000_000)
+ lib = _get_lib()
+ ptr = lib.PilotConnSetReadDeadline(self._h, ctypes.c_int64(nanos))
+ _check_err(ptr)
+
def __enter__(self) -> "Conn":
return self
@@ -472,6 +569,14 @@ def info(self) -> dict[str, Any]:
"""Return the daemon's status information."""
return self._call_json("PilotInfo")
+ def health(self) -> dict[str, Any]:
+ """Lightweight health check from the daemon."""
+ return self._call_json("PilotHealth")
+
+ def rotate_key(self) -> dict[str, Any]:
+ """Rotate the daemon's Ed25519 identity at the registry."""
+ return self._call_json("PilotRotateKey")
+
# -- Handshake / Trust --
def handshake(self, node_id: int, justification: str = "") -> dict[str, Any]:
@@ -540,10 +645,18 @@ def disconnect(self, conn_id: int) -> None:
# -- Streams --
- def dial(self, addr: str) -> Conn:
- """Open a stream connection to addr (format: "N:XXXX.YYYY.YYYY:PORT")."""
+ def dial(self, addr: str, timeout: Optional[float] = None) -> Conn:
+ """Open a stream connection to addr (format: "N:XXXX.YYYY.YYYY:PORT").
+
+ If ``timeout`` is given (seconds), the dial is cancelled if the daemon
+ does not respond within that window.
+ """
lib = _get_lib()
- res = lib.PilotDial(self._h, addr.encode())
+ if timeout is None:
+ res = lib.PilotDial(self._h, addr.encode())
+ else:
+ ms = max(0, int(timeout * 1000))
+ res = lib.PilotDialTimeout(self._h, addr.encode(), ctypes.c_uint64(ms))
if res.err:
raw = ctypes.string_at(res.err)
lib.FreeString(res.err)
@@ -576,6 +689,152 @@ def recv_from(self) -> dict[str, Any]:
"""
return self._call_json("PilotRecvFrom")
+ def broadcast(
+ self,
+ network_id: int,
+ port: int,
+ data: bytes,
+ admin_token: str,
+ ) -> None:
+ """Broadcast an unreliable datagram to every member of a network.
+
+ Requires the daemon's admin token; an empty or mismatched token is
+ rejected. Permitted on every network including network 0 (backbone).
+ """
+ lib = _get_lib()
+ buf = ctypes.create_string_buffer(data)
+ ptr = lib.PilotBroadcast(
+ self._h,
+ ctypes.c_uint16(network_id),
+ ctypes.c_uint16(port),
+ buf,
+ ctypes.c_int(len(data)),
+ admin_token.encode(),
+ )
+ _check_err(ptr)
+
+ # -- Networks --
+
+ def network_list(self) -> dict[str, Any]:
+ """List all networks known to the registry."""
+ return self._call_json("PilotNetworkList")
+
+ def network_join(self, network_id: int, token: str = "") -> dict[str, Any]:
+ """Join a network by ID, optionally with a token for token-gated networks."""
+ return self._call_json(
+ "PilotNetworkJoin", ctypes.c_uint16(network_id), token.encode()
+ )
+
+ def network_leave(self, network_id: int) -> dict[str, Any]:
+ """Leave a network by ID."""
+ return self._call_json("PilotNetworkLeave", ctypes.c_uint16(network_id))
+
+ def network_members(self, network_id: int) -> dict[str, Any]:
+ """List all members of a network."""
+ return self._call_json("PilotNetworkMembers", ctypes.c_uint16(network_id))
+
+ def network_invite(self, network_id: int, target_node_id: int) -> dict[str, Any]:
+ """Invite a target node to a network (requires admin token on daemon)."""
+ return self._call_json(
+ "PilotNetworkInvite",
+ ctypes.c_uint16(network_id),
+ ctypes.c_uint32(target_node_id),
+ )
+
+ def network_poll_invites(self) -> dict[str, Any]:
+ """Return pending network invites for this node."""
+ return self._call_json("PilotNetworkPollInvites")
+
+ def network_respond_invite(self, network_id: int, accept: bool) -> dict[str, Any]:
+ """Accept or reject a pending network invite."""
+ return self._call_json(
+ "PilotNetworkRespondInvite",
+ ctypes.c_uint16(network_id),
+ ctypes.c_int(1 if accept else 0),
+ )
+
+ # -- Managed networks --
+
+ def managed_score(
+ self,
+ network_id: int,
+ node_id: int,
+ delta: int,
+ topic: str = "",
+ ) -> dict[str, Any]:
+ """Adjust a peer's score in a managed network."""
+ return self._call_json(
+ "PilotManagedScore",
+ ctypes.c_uint16(network_id),
+ ctypes.c_uint32(node_id),
+ ctypes.c_int32(delta),
+ topic.encode(),
+ )
+
+ def managed_status(self, network_id: int) -> dict[str, Any]:
+ """Return the status of a managed network engine."""
+ return self._call_json("PilotManagedStatus", ctypes.c_uint16(network_id))
+
+ def managed_rankings(self, network_id: int) -> dict[str, Any]:
+ """Return ranked peers in a managed network."""
+ return self._call_json("PilotManagedRankings", ctypes.c_uint16(network_id))
+
+ def managed_force_cycle(self, network_id: int) -> dict[str, Any]:
+ """Force a prune/fill cycle in a managed network."""
+ return self._call_json("PilotManagedForceCycle", ctypes.c_uint16(network_id))
+
+ def managed_reconcile(self, network_id: int) -> dict[str, Any]:
+ """Refresh the managed network's peer set without running a policy cycle."""
+ return self._call_json("PilotManagedReconcile", ctypes.c_uint16(network_id))
+
+ # -- Policy --
+
+ def policy_get(self, network_id: int) -> dict[str, Any]:
+ """Retrieve the active policy for a network."""
+ return self._call_json("PilotPolicyGet", ctypes.c_uint16(network_id))
+
+ def policy_set(self, network_id: int, policy: Any) -> dict[str, Any]:
+ """Apply a policy document to a network.
+
+ ``policy`` may be a dict, a JSON string, or pre-encoded bytes.
+ """
+ if isinstance(policy, (bytes, bytearray)):
+ payload = bytes(policy)
+ elif isinstance(policy, str):
+ payload = policy.encode()
+ else:
+ payload = json.dumps(policy).encode()
+ return self._call_json(
+ "PilotPolicySet", ctypes.c_uint16(network_id), payload
+ )
+
+ # -- Member tags --
+
+ def member_tags_get(self, network_id: int, node_id: int) -> dict[str, Any]:
+ """Retrieve admin-assigned member tags for a node in a network."""
+ return self._call_json(
+ "PilotMemberTagsGet",
+ ctypes.c_uint16(network_id),
+ ctypes.c_uint32(node_id),
+ )
+
+ def member_tags_set(
+ self, network_id: int, node_id: int, tags: list[str]
+ ) -> dict[str, Any]:
+ """Set admin-assigned member tags for a node in a network."""
+ return self._call_json(
+ "PilotMemberTagsSet",
+ ctypes.c_uint16(network_id),
+ ctypes.c_uint32(node_id),
+ json.dumps(tags).encode(),
+ )
+
+ # -- Identity --
+
+ def rotate_identity(self) -> dict[str, Any]:
+ """Alias for :meth:`rotate_key`."""
+ return self.rotate_key()
+
# -- High-level service methods --
def send_message(self, target: str, data: bytes, msg_type: str = "text") -> dict[str, Any]:
diff --git a/sdk/python/pyproject.toml b/sdk/python/pyproject.toml
index 52eb9a03..bf89dcca 100644
--- a/sdk/python/pyproject.toml
+++ b/sdk/python/pyproject.toml
@@ -9,12 +9,13 @@ include-package-data = true
[tool.setuptools.package-data]
pilotprotocol = [
"bin/*",
+ "bin/.pilot-version",
"py.typed"
]
[project]
name = "pilotprotocol"
-version = "0.1.1" # Auto-updated by CI workflow
+version = "1.9.1" # Auto-updated by CI workflow
description = "Python SDK for Pilot Protocol - the network stack for AI agents"
readme = "README.md"
requires-python = ">=3.10"
@@ -68,7 +69,6 @@ Documentation = "https://pilotprotocol.network/docs/"
Repository = "https://github.com/TeoSlayer/pilotprotocol"
"Bug Tracker" = "https://github.com/TeoSlayer/pilotprotocol/issues"
Changelog = "https://github.com/TeoSlayer/pilotprotocol/blob/main/sdk/python/CHANGELOG.md"
-"Live Dashboard" = "https://polo.pilotprotocol.network"
[project.optional-dependencies]
dev = [
diff --git a/sdk/python/scripts/build-binaries.sh b/sdk/python/scripts/build-binaries.sh
index 2665cbcf..137c7ae1 100755
--- a/sdk/python/scripts/build-binaries.sh
+++ b/sdk/python/scripts/build-binaries.sh
@@ -6,6 +6,9 @@ set -euo pipefail
cd "$(dirname "$0")/../../.." # Go to repo root
+# Read SDK version from pyproject.toml so the seeder marker matches it.
+SDK_VERSION=$(awk -F\" '/^version = /{print $2; exit}' sdk/python/pyproject.toml)
+
# Detect platform
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
ARCH=$(uname -m)
@@ -63,6 +66,27 @@ cd ../..
echo " ✓ Built: $OUTPUT_DIR/libpilot.$EXT"
echo ""
+# 6. Write .pilot-version marker so the runtime seeder can compare against
+# whatever's already installed at ~/.pilot/bin/.
+echo "$SDK_VERSION" > "$OUTPUT_DIR/.pilot-version"
+echo "6. Wrote $OUTPUT_DIR/.pilot-version → $SDK_VERSION"
+echo ""
+
+# 7. macOS ad-hoc codesign + strip quarantine. Mirrors the main release
+# workflow so SDK-shipped binaries don't trigger Gatekeeper "killed: 9"
+# or "cannot be opened because Apple cannot check it for malicious
+# software" when downloaded via pip.
+if [ "$OS" = "darwin" ]; then
+ echo "7. macOS ad-hoc codesign + strip quarantine..."
+ for bin in "$OUTPUT_DIR/pilot-daemon" "$OUTPUT_DIR/pilotctl" "$OUTPUT_DIR/pilot-gateway" "$OUTPUT_DIR/pilot-updater" "$OUTPUT_DIR/libpilot.$EXT"; do
+ codesign --force --deep --sign - "$bin"
+ xattr -cr "$bin" || true
+ codesign -dv "$bin" 2>&1 | grep -E "Signature|Authority|TeamIdentifier" | head -1 || true
+ done
+ echo " ✓ codesigned ${OS} binaries"
+ echo ""
+fi
+
# Show sizes
echo "================================================================"
echo "Build Summary:"
diff --git a/sdk/python/tests/smoke_list_agents.py b/sdk/python/tests/smoke_list_agents.py
new file mode 100644
index 00000000..60bbbbeb
--- /dev/null
+++ b/sdk/python/tests/smoke_list_agents.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+"""End-to-end smoke test for the Python SDK against a real daemon.
+
+Test plan (run against the locally running pilot daemon):
+1. Construct ``Driver`` — proves the seeder wired ``libpilot.dylib`` correctly.
+2. Call ``info()`` — confirms the JSON-RPC path works.
+3. Idempotently handshake the list-agents host (already trusted is OK).
+4. ``send_message(target='list-agents', data='/data {...}', msg_type='text')``
+ — exercises hostname resolve + dial + frame protocol.
+5. Wait for the asynchronous reply to land in ``~/.pilot/inbox/`` and print
+ a digest of the highest-tier specialist count.
+
+The script exits 0 on success, non-zero on any failure. It writes the
+reply file path to stdout so a caller can grep for it.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+import time
+from pathlib import Path
+
+# Allow running straight from a source checkout.
+HERE = Path(__file__).resolve().parent
+sys.path.insert(0, str(HERE.parent))
+
+from pilotprotocol import Driver, PilotError # noqa: E402
+
+LIST_AGENTS_HOST = "list-agents"
+LIST_AGENTS_NODE_ID = 16398
+INBOX_DIR = Path.home() / ".pilot" / "inbox"
+WAIT_SECONDS = 8
+
+
+def _newest_inbox_file(after_mtime: float) -> Path | None:
+ if not INBOX_DIR.is_dir():
+ return None
+ candidates = []
+ for f in INBOX_DIR.glob("*.json"):
+ try:
+ st = f.stat()
+ except OSError:
+ continue
+ if st.st_mtime > after_mtime:
+ candidates.append((st.st_mtime, f))
+ if not candidates:
+ return None
+ candidates.sort(reverse=True)
+ return candidates[0][1]
+
+
+def main() -> int:
+ print("[1/5] Constructing Driver…")
+ try:
+ d = Driver()
+ except PilotError as e:
+ print(f" FAIL: cannot reach daemon: {e}")
+ return 2
+ print(" OK")
+
+ print("[2/5] Calling info()…")
+ info = d.info()
+ print(f" node_id={info.get('node_id')} addr={info.get('address')} peers={info.get('peers')}")
+
+ print(f"[3/5] Handshake list-agents (node {LIST_AGENTS_NODE_ID})…")
+ try:
+ h = d.handshake(LIST_AGENTS_NODE_ID, "python sdk smoke test")
+ print(f" OK: {h}")
+ except PilotError as e:
+ # Already trusted is acceptable.
+ msg = str(e).lower()
+ if "already" in msg or "trust" in msg:
+ print(f" OK (already trusted): {e}")
+ else:
+ print(f" FAIL: {e}")
+ return 3
+
+ print("[4/5] send_message → list-agents …")
+ record_mtime = time.time() - 1
+ try:
+ result = d.send_message(
+ LIST_AGENTS_HOST,
+ b'/data {"search":"","limit":1}',
+ msg_type="text",
+ )
+ except PilotError as e:
+ print(f" FAIL: send_message: {e}")
+ return 4
+ print(f" sent: {result}")
+
+ print(f"[5/5] Waiting up to {WAIT_SECONDS}s for inbox reply…")
+ deadline = time.time() + WAIT_SECONDS
+ reply_file: Path | None = None
+ while time.time() < deadline:
+ reply_file = _newest_inbox_file(record_mtime)
+ if reply_file is not None:
+ break
+ time.sleep(0.5)
+ if reply_file is None:
+ print(" FAIL: no inbox reply within window")
+ return 5
+
+ print(f" reply file: {reply_file}")
+ try:
+ envelope = json.loads(reply_file.read_text())
+ except (OSError, ValueError) as e:
+ print(f" FAIL: cannot parse reply: {e}")
+ return 6
+
+ print(f" agent={envelope.get('agent')} command={envelope.get('command')} ok={envelope.get('ok')}")
+
+ # Try to extract the total count if the payload is a list-agents response.
+ raw = envelope.get("data")
+ if isinstance(raw, str):
+ try:
+ payload = json.loads(raw)
+ total = payload.get("total") or payload.get("count")
+ if total is None:
+ items = payload.get("tiers", {}).get("free", {}).get("items", [])
+ total = len(items)
+ print(f" list-agents total: {total}")
+ except (ValueError, AttributeError):
+ print(" (data not JSON; envelope OK)")
+
+ d.close()
+ print("\nSMOKE TEST PASSED (python)")
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py
index e8def76d..ad4be836 100644
--- a/sdk/python/tests/test_client.py
+++ b/sdk/python/tests/test_client.py
@@ -66,6 +66,17 @@ def _mock_write_result(n: int = 0, err: bytes | None = None):
return types.SimpleNamespace(n=n, err=err)
+def _unwrap(x):
+ """Coerce a ctypes-wrapped scalar into its plain Python value.
+
+ The Driver wraps ints in ctypes types (c_uint16, c_int32, etc.) before
+ calling into the C library. Real ctypes converts those to plain ints at
+ the FFI boundary, but our FakeLib receives them as objects, so we strip
+ the wrapper here for clean assertions.
+ """
+ return x.value if hasattr(x, "value") else x
+
+
class FakeLib:
"""Mimics the ctypes.CDLL object with controllable return values."""
@@ -161,6 +172,101 @@ def PilotConnClose(self, ch):
def PilotSendTo(self, h, addr, data, data_len):
return None
+ # --- 1.9.1 additions ---
+
+ def PilotHealth(self, h):
+ return self._json_returns.get("PilotHealth", _json_ok({"ok": True, "uptime_s": 42}))
+
+ def PilotRotateKey(self, h):
+ return self._json_returns.get("PilotRotateKey", _json_ok({"new_pubkey": "abc"}))
+
+ def PilotDialTimeout(self, h, addr, timeout_ms):
+ # capture for assertions
+ self._last_dial_timeout = (addr, _unwrap(timeout_ms))
+ return _HandleErr(handle=11, err=None)
+
+ def PilotConnSetReadDeadline(self, h, deadline_unix_nanos):
+ # capture deadline for assertions
+ self._last_set_read_deadline = _unwrap(deadline_unix_nanos)
+ return None
+
+ def PilotBroadcast(self, h, network_id, port, data, data_len, admin_token):
+ self._last_broadcast = {
+ "network_id": _unwrap(network_id),
+ "port": _unwrap(port),
+ "data_len": _unwrap(data_len),
+ "admin_token": admin_token,
+ }
+ return self._json_returns.get("PilotBroadcast", None)
+
+ def PilotNetworkList(self, h):
+ return self._json_returns.get("PilotNetworkList", _json_ok({"networks": [{"id": 0}]}))
+
+ def PilotNetworkJoin(self, h, network_id, token):
+ self._last_network_join = (_unwrap(network_id), token)
+ return self._json_returns.get("PilotNetworkJoin", _json_ok({"status": "joined"}))
+
+ def PilotNetworkLeave(self, h, network_id):
+ return self._json_returns.get("PilotNetworkLeave", _json_ok({"status": "left"}))
+
+ def PilotNetworkMembers(self, h, network_id):
+ return self._json_returns.get("PilotNetworkMembers", _json_ok({"members": []}))
+
+ def PilotNetworkInvite(self, h, network_id, target_node_id):
+ self._last_network_invite = (_unwrap(network_id), _unwrap(target_node_id))
+ return self._json_returns.get("PilotNetworkInvite", _json_ok({"status": "invited"}))
+
+ def PilotNetworkPollInvites(self, h):
+ return self._json_returns.get("PilotNetworkPollInvites", _json_ok({"invites": []}))
+
+ def PilotNetworkRespondInvite(self, h, network_id, accept):
+ self._last_network_respond = (_unwrap(network_id), _unwrap(accept))
+ return self._json_returns.get(
+ "PilotNetworkRespondInvite", _json_ok({"status": "responded"})
+ )
+
+ def PilotManagedScore(self, h, network_id, node_id, delta, topic):
+ self._last_managed_score = (
+ _unwrap(network_id), _unwrap(node_id), _unwrap(delta), topic,
+ )
+ return self._json_returns.get("PilotManagedScore", _json_ok({"status": "ok"}))
+
+ def PilotManagedStatus(self, h, network_id):
+ return self._json_returns.get(
+ "PilotManagedStatus", _json_ok({"network_id": _unwrap(network_id)})
+ )
+
+ def PilotManagedRankings(self, h, network_id):
+ return self._json_returns.get("PilotManagedRankings", _json_ok({"rankings": []}))
+
+ def PilotManagedForceCycle(self, h, network_id):
+ return self._json_returns.get("PilotManagedForceCycle", _json_ok({"status": "cycled"}))
+
+ def PilotManagedReconcile(self, h, network_id):
+ return self._json_returns.get(
+ "PilotManagedReconcile",
+ _json_ok({"network_id": _unwrap(network_id), "peers": []}),
+ )
+
+ def PilotPolicyGet(self, h, network_id):
+ return self._json_returns.get(
+ "PilotPolicyGet",
+ _json_ok({"network_id": _unwrap(network_id), "policy": {}}),
+ )
+
+ def PilotPolicySet(self, h, network_id, policy_json):
+ self._last_policy_set = (_unwrap(network_id), policy_json)
+ return self._json_returns.get("PilotPolicySet", _json_ok({"status": "applied"}))
+
+ def PilotMemberTagsGet(self, h, network_id, node_id):
+ return self._json_returns.get("PilotMemberTagsGet", _json_ok({"tags": []}))
+
+ def PilotMemberTagsSet(self, h, network_id, node_id, tags_json):
+ self._last_member_tags_set = (
+ _unwrap(network_id), _unwrap(node_id), tags_json,
+ )
+ return self._json_returns.get("PilotMemberTagsSet", _json_ok({"status": "ok"}))
+
@pytest.fixture(autouse=True)
def _mock_lib(monkeypatch):
@@ -624,8 +730,301 @@ def test_del_calls_close(self, fake_lib):
def test_del_catches_exceptions(self, fake_lib):
"""Test Listener.__del__ catches close() exceptions."""
fake_lib.PilotListenerClose = lambda h: _json_err("error")
-
+
ln = client_mod.Listener(20)
# Should not raise even though close() would raise
ln.__del__()
assert ln._closed
+
+
+# ---------------------------------------------------------------------------
+# 1.9.1 additions: health / rotate-key
+# ---------------------------------------------------------------------------
+
+class TestDriverHealth:
+ def test_health_success(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.health()
+ assert r["ok"] is True
+ assert r["uptime_s"] == 42
+
+ def test_health_error(self, fake_lib):
+ fake_lib._json_returns["PilotHealth"] = _json_err("daemon down")
+ d = client_mod.Driver()
+ with pytest.raises(PilotError, match="daemon down"):
+ d.health()
+
+
+class TestDriverRotateKey:
+ def test_rotate_key(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.rotate_key()
+ assert r["new_pubkey"] == "abc"
+
+ def test_rotate_identity_alias(self, fake_lib):
+ d = client_mod.Driver()
+ # rotate_identity should delegate to rotate_key
+ r = d.rotate_identity()
+ assert r["new_pubkey"] == "abc"
+
+ def test_rotate_key_error(self, fake_lib):
+ fake_lib._json_returns["PilotRotateKey"] = _json_err("registry rejected")
+ d = client_mod.Driver()
+ with pytest.raises(PilotError, match="registry rejected"):
+ d.rotate_key()
+
+
+# ---------------------------------------------------------------------------
+# 1.9.1 additions: dial timeout
+# ---------------------------------------------------------------------------
+
+class TestDriverDialTimeout:
+ def test_dial_without_timeout_uses_pilot_dial(self, fake_lib):
+ # No timeout → original PilotDial path (handle=10)
+ d = client_mod.Driver()
+ conn = d.dial("0:0001.0000.0002:8080")
+ assert conn._h == 10
+
+ def test_dial_with_timeout_uses_pilot_dial_timeout(self, fake_lib):
+ d = client_mod.Driver()
+ conn = d.dial("0:0001.0000.0002:8080", timeout=2.5)
+ # Timeout path returns handle=11
+ assert conn._h == 11
+ # 2.5 s = 2500 ms
+ assert fake_lib._last_dial_timeout == (b"0:0001.0000.0002:8080", 2500)
+
+ def test_dial_timeout_zero_floor(self, fake_lib):
+ d = client_mod.Driver()
+ d.dial("0:0001.0000.0002:8080", timeout=-1.0)
+ # Negative → clamped to 0 ms
+ _, ms = fake_lib._last_dial_timeout
+ assert ms == 0
+
+ def test_dial_timeout_error(self, fake_lib):
+ fake_lib.PilotDialTimeout = lambda h, addr, ms: _mock_handle_err(
+ handle=0, err=_json_err("dial timeout")
+ )
+ d = client_mod.Driver()
+ with pytest.raises(PilotError, match="dial timeout"):
+ d.dial("bad:addr", timeout=1.0)
+
+
+# ---------------------------------------------------------------------------
+# 1.9.1 additions: Conn.set_read_deadline
+# ---------------------------------------------------------------------------
+
+class TestConnReadDeadline:
+ def test_clear_deadline_with_none(self, fake_lib):
+ conn = client_mod.Conn(10)
+ conn.set_read_deadline(None)
+ assert fake_lib._last_set_read_deadline == 0
+
+ def test_set_deadline_seconds_to_nanos(self, fake_lib):
+ conn = client_mod.Conn(10)
+ # 1700000000.5 s → 1_700_000_000_500_000_000 ns
+ conn.set_read_deadline(1_700_000_000.5)
+ assert fake_lib._last_set_read_deadline == 1_700_000_000_500_000_000
+
+ def test_set_deadline_on_closed_conn_raises(self, fake_lib):
+ conn = client_mod.Conn(10)
+ conn.close()
+ with pytest.raises(PilotError, match="closed"):
+ conn.set_read_deadline(0.0)
+
+ def test_set_deadline_propagates_error(self, fake_lib):
+ fake_lib.PilotConnSetReadDeadline = lambda h, d: _json_err("bad handle")
+ conn = client_mod.Conn(10)
+ with pytest.raises(PilotError, match="bad handle"):
+ conn.set_read_deadline(None)
+
+
+# ---------------------------------------------------------------------------
+# 1.9.1 additions: broadcast
+# ---------------------------------------------------------------------------
+
+class TestDriverBroadcast:
+ def test_broadcast_passes_args(self, fake_lib):
+ d = client_mod.Driver()
+ d.broadcast(7, 1234, b"hello", "secret")
+ captured = fake_lib._last_broadcast
+ assert captured["network_id"] == 7
+ assert captured["port"] == 1234
+ assert captured["data_len"] == 5
+ assert captured["admin_token"] == b"secret"
+
+ def test_broadcast_propagates_error(self, fake_lib):
+ fake_lib._json_returns["PilotBroadcast"] = _json_err("admin token required")
+ d = client_mod.Driver()
+ with pytest.raises(PilotError, match="admin token required"):
+ d.broadcast(0, 9000, b"x", "")
+
+
+# ---------------------------------------------------------------------------
+# 1.9.1 additions: networks
+# ---------------------------------------------------------------------------
+
+class TestDriverNetworks:
+ def test_network_list(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.network_list()
+ assert "networks" in r
+
+ def test_network_join_passes_args(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.network_join(7, "joinme")
+ assert r["status"] == "joined"
+ assert fake_lib._last_network_join == (7, b"joinme")
+
+ def test_network_join_default_empty_token(self, fake_lib):
+ d = client_mod.Driver()
+ d.network_join(2)
+ assert fake_lib._last_network_join == (2, b"")
+
+ def test_network_leave(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.network_leave(7)
+ assert r["status"] == "left"
+
+ def test_network_members(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.network_members(7)
+ assert "members" in r
+
+ def test_network_invite(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.network_invite(7, 4242)
+ assert r["status"] == "invited"
+ assert fake_lib._last_network_invite == (7, 4242)
+
+ def test_network_poll_invites(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.network_poll_invites()
+ assert "invites" in r
+
+ def test_network_respond_invite_accept(self, fake_lib):
+ d = client_mod.Driver()
+ d.network_respond_invite(7, True)
+ assert fake_lib._last_network_respond == (7, 1)
+
+ def test_network_respond_invite_reject(self, fake_lib):
+ d = client_mod.Driver()
+ d.network_respond_invite(7, False)
+ assert fake_lib._last_network_respond == (7, 0)
+
+ def test_network_join_error(self, fake_lib):
+ fake_lib._json_returns["PilotNetworkJoin"] = _json_err("token rejected")
+ d = client_mod.Driver()
+ with pytest.raises(PilotError, match="token rejected"):
+ d.network_join(7, "wrong")
+
+
+# ---------------------------------------------------------------------------
+# 1.9.1 additions: managed networks
+# ---------------------------------------------------------------------------
+
+class TestDriverManaged:
+ def test_managed_score_passes_args(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.managed_score(7, 4242, -3, "spam")
+ assert r["status"] == "ok"
+ assert fake_lib._last_managed_score == (7, 4242, -3, b"spam")
+
+ def test_managed_score_default_topic(self, fake_lib):
+ d = client_mod.Driver()
+ d.managed_score(0, 1, 5)
+ assert fake_lib._last_managed_score == (0, 1, 5, b"")
+
+ def test_managed_score_negative_delta_preserved(self, fake_lib):
+ # int32 delta — make sure negative numbers survive
+ d = client_mod.Driver()
+ d.managed_score(0, 1, -100000, "x")
+ assert fake_lib._last_managed_score[2] == -100000
+
+ def test_managed_status(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.managed_status(42)
+ assert r["network_id"] == 42
+
+ def test_managed_rankings(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.managed_rankings(42)
+ assert "rankings" in r
+
+ def test_managed_force_cycle(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.managed_force_cycle(42)
+ assert r["status"] == "cycled"
+
+ def test_managed_reconcile(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.managed_reconcile(42)
+ assert r["network_id"] == 42
+ assert r["peers"] == []
+
+
+# ---------------------------------------------------------------------------
+# 1.9.1 additions: policy
+# ---------------------------------------------------------------------------
+
+class TestDriverPolicy:
+ def test_policy_get(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.policy_get(7)
+ assert r["network_id"] == 7
+
+ def test_policy_set_dict_serializes_to_json(self, fake_lib):
+ d = client_mod.Driver()
+ d.policy_set(7, {"min_score": 3, "tags": ["good"]})
+ net_id, payload = fake_lib._last_policy_set
+ assert net_id == 7
+ # The payload was JSON-serialized
+ assert json.loads(payload) == {"min_score": 3, "tags": ["good"]}
+
+ def test_policy_set_string_passthrough(self, fake_lib):
+ d = client_mod.Driver()
+ d.policy_set(0, '{"raw":true}')
+ _, payload = fake_lib._last_policy_set
+ assert payload == b'{"raw":true}'
+
+ def test_policy_set_bytes_passthrough(self, fake_lib):
+ d = client_mod.Driver()
+ d.policy_set(0, b'{"raw":1}')
+ _, payload = fake_lib._last_policy_set
+ assert payload == b'{"raw":1}'
+
+ def test_policy_set_error(self, fake_lib):
+ fake_lib._json_returns["PilotPolicySet"] = _json_err("invalid policy")
+ d = client_mod.Driver()
+ with pytest.raises(PilotError, match="invalid policy"):
+ d.policy_set(0, {})
+
+
+# ---------------------------------------------------------------------------
+# 1.9.1 additions: member tags
+# ---------------------------------------------------------------------------
+
+class TestDriverMemberTags:
+ def test_member_tags_get(self, fake_lib):
+ d = client_mod.Driver()
+ r = d.member_tags_get(7, 4242)
+ assert "tags" in r
+
+ def test_member_tags_set_serializes_list(self, fake_lib):
+ d = client_mod.Driver()
+ d.member_tags_set(7, 4242, ["gpu", "fast"])
+ net_id, node_id, tags_json = fake_lib._last_member_tags_set
+ assert net_id == 7
+ assert node_id == 4242
+ assert json.loads(tags_json) == ["gpu", "fast"]
+
+ def test_member_tags_set_empty_list(self, fake_lib):
+ d = client_mod.Driver()
+ d.member_tags_set(7, 4242, [])
+ _, _, tags_json = fake_lib._last_member_tags_set
+ assert json.loads(tags_json) == []
+
+ def test_member_tags_set_error(self, fake_lib):
+ fake_lib._json_returns["PilotMemberTagsSet"] = _json_err("not admin")
+ d = client_mod.Driver()
+ with pytest.raises(PilotError, match="not admin"):
+ d.member_tags_set(7, 1, ["x"])
diff --git a/sdk/python/tests/test_runtime.py b/sdk/python/tests/test_runtime.py
new file mode 100644
index 00000000..7ba31b2f
--- /dev/null
+++ b/sdk/python/tests/test_runtime.py
@@ -0,0 +1,403 @@
+"""Unit tests for the runtime seeder (pilotprotocol/_runtime.py).
+
+These tests exercise the 5 seeder states (missing, older, equal, newer,
+corrupt), the daemon-running guard, the lock contention path, and the
+atomic-rename behavior. They do NOT require a real daemon or libpilot.so;
+the bundled "binaries" are stub files written into a tmpdir.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import platform as platform_mod
+import socket
+import sys
+import tempfile
+import threading
+import time
+from pathlib import Path
+
+import pytest
+
+import pilotprotocol._runtime as rt
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_fake_pkg_bin(tmp: Path, version: str, names: list[str]) -> Path:
+ """Build a fake bundled bin/ directory with stub executables and marker."""
+ pkg = tmp / "pkg-bin"
+ pkg.mkdir(parents=True, exist_ok=True)
+ for n in names:
+ (pkg / n).write_text(f"#!/bin/sh\necho {n} {version}\n")
+ (pkg / n).chmod(0o755)
+ (pkg / ".pilot-version").write_text(version + "\n")
+ return pkg
+
+
+def _platform_lib() -> str:
+ return rt._LIB_NAMES[platform_mod.system()]
+
+
+@pytest.fixture(autouse=True)
+def _isolate(tmp_path, monkeypatch):
+ """Redirect ~/.pilot/ to a tmpdir and the package bin/ to another.
+
+ Also stubs the daemon-liveness probe to "not running" so tests do not
+ pick up the real pilot daemon that may be running on the developer
+ machine. Tests that need the probe enabled re-monkeypatch ``_daemon_running``.
+ """
+ fake_home = tmp_path / "home"
+ fake_home.mkdir()
+ monkeypatch.setenv("PILOT_HOME", str(fake_home / ".pilot"))
+
+ pkg = _make_fake_pkg_bin(
+ tmp_path,
+ "1.9.1",
+ list(rt._BIN_NAMES) + [_platform_lib()],
+ )
+ monkeypatch.setattr(rt, "_pkg_bin_dir", lambda: pkg)
+ monkeypatch.setattr(rt, "_daemon_running", lambda: False)
+ rt.reset_seeded_marker()
+ yield {"home": fake_home, "pkg": pkg, "tmp": tmp_path, "monkeypatch": monkeypatch}
+ rt.reset_seeded_marker()
+
+
+# ---------------------------------------------------------------------------
+# State machine
+# ---------------------------------------------------------------------------
+
+class TestSeederStates:
+ def test_missing_seeds_everything(self, _isolate):
+ report = rt.run_seeder()
+ assert report.action == "seed"
+ # All four executables + libpilot should be copied
+ assert set(report.copied) == set(rt._BIN_NAMES) | {_platform_lib()}
+ assert report.skipped == []
+
+ rtbin = _isolate["home"] / ".pilot" / "bin"
+ for name in report.copied:
+ assert (rtbin / name).is_file(), f"{name} not seeded"
+ assert (rtbin / ".pilot-version").read_text().strip() == "1.9.1"
+
+ def test_equal_version_is_noop(self, _isolate):
+ # First pass seeds.
+ rt.run_seeder()
+ rt.reset_seeded_marker()
+
+ # Second pass with identical bundled version → noop.
+ report = rt.run_seeder()
+ assert report.action == "noop"
+ assert report.copied == []
+
+ def test_older_bundle_does_not_downgrade(self, _isolate, tmp_path, monkeypatch):
+ # Seed at 1.9.1
+ rt.run_seeder()
+ rt.reset_seeded_marker()
+
+ # Replace the package bin/ with a 1.8.0 build.
+ pkg = _make_fake_pkg_bin(
+ tmp_path / "older",
+ "1.8.0",
+ list(rt._BIN_NAMES) + [_platform_lib()],
+ )
+ monkeypatch.setattr(rt, "_pkg_bin_dir", lambda: pkg)
+
+ report = rt.run_seeder()
+ assert report.action == "noop"
+ assert report.copied == []
+ rtbin = _isolate["home"] / ".pilot" / "bin"
+ assert (rtbin / ".pilot-version").read_text().strip() == "1.9.1"
+
+ def test_newer_bundle_upgrades(self, _isolate, tmp_path, monkeypatch):
+ rt.run_seeder()
+ rt.reset_seeded_marker()
+
+ pkg = _make_fake_pkg_bin(
+ tmp_path / "newer",
+ "2.0.0",
+ list(rt._BIN_NAMES) + [_platform_lib()],
+ )
+ monkeypatch.setattr(rt, "_pkg_bin_dir", lambda: pkg)
+
+ report = rt.run_seeder()
+ assert report.action == "upgrade"
+ assert set(report.copied) == set(rt._BIN_NAMES) | {_platform_lib()}
+ rtbin = _isolate["home"] / ".pilot" / "bin"
+ assert (rtbin / ".pilot-version").read_text().strip() == "2.0.0"
+ # Content actually replaced
+ assert "2.0.0" in (rtbin / "pilotctl").read_text()
+
+ def test_corrupt_runtime_re_seeds_missing_files(self, _isolate):
+ rt.run_seeder()
+ rtbin = _isolate["home"] / ".pilot" / "bin"
+ # Simulate corruption: delete pilotctl but leave the marker.
+ (rtbin / "pilotctl").unlink()
+ rt.reset_seeded_marker()
+
+ report = rt.run_seeder()
+ # Same version, but a file was missing → seeder noticed and re-seeded.
+ assert "pilotctl" in report.copied
+ assert (rtbin / "pilotctl").is_file()
+
+
+# ---------------------------------------------------------------------------
+# Daemon-running guard
+# ---------------------------------------------------------------------------
+
+class TestDaemonGuard:
+ def test_skips_pilot_daemon_when_socket_live(self, _isolate, monkeypatch, tmp_path):
+ # First seed normally so pilot-daemon exists.
+ rt.run_seeder()
+ rt.reset_seeded_marker()
+
+ # Replace package with a newer version.
+ pkg = _make_fake_pkg_bin(
+ tmp_path / "newer",
+ "2.0.0",
+ list(rt._BIN_NAMES) + [_platform_lib()],
+ )
+ monkeypatch.setattr(rt, "_pkg_bin_dir", lambda: pkg)
+
+ # Stub _daemon_running → True.
+ monkeypatch.setattr(rt, "_daemon_running", lambda: True)
+
+ report = rt.run_seeder()
+ assert "pilot-daemon" in report.skipped
+ assert "pilot-daemon" not in report.copied
+ # Other binaries still upgrade.
+ assert "pilotctl" in report.copied
+ assert report.action == "daemon-skip"
+
+ def test_first_install_seeds_daemon_even_if_socket_present(
+ self, _isolate, monkeypatch
+ ):
+ # No prior install. Even with daemon "running" (somehow), there's
+ # no existing pilot-daemon to preserve, so we seed fresh.
+ monkeypatch.setattr(rt, "_daemon_running", lambda: True)
+ report = rt.run_seeder()
+ assert "pilot-daemon" in report.copied
+
+
+class TestDaemonProbe:
+ """Direct tests of _daemon_running. The fixture stubs it to False, so
+ these tests un-stub by importing the module fresh and re-resolving."""
+
+ def _real_daemon_running(self, _isolate):
+ # Replace config to point socket somewhere we control.
+ cfg_path = _isolate["home"] / ".pilot" / "config.json"
+ return cfg_path
+
+ def test_no_socket_means_not_running(self, _isolate):
+ cfg = self._real_daemon_running(_isolate)
+ cfg.parent.mkdir(parents=True, exist_ok=True)
+ cfg.write_text(json.dumps({"socket": str(_isolate["tmp"] / "no.sock")}))
+ # Importlib-reload to bypass the autouse monkeypatch on the symbol.
+ # Easier: call the original via __wrapped__ — but we don't have it.
+ # Cleanest: import the function from the module directly under
+ # a different binding.
+ import pilotprotocol._runtime as rt_mod
+ # Save and restore.
+ stub = rt_mod._daemon_running
+ orig = type(stub).__name__ # noqa: F841 — debug breadcrumb
+ # Recover the original from the module dict (we never deleted it).
+ # The fixture set rt._daemon_running to a lambda; the function is
+ # still bound at module import time only via attribute access. To
+ # get the original, we need to undo the monkeypatch.
+ _isolate["monkeypatch"].setattr(rt_mod, "_daemon_running", _orig_daemon_running)
+ assert rt_mod._daemon_running() is False
+
+ def test_unconnectable_socket_means_not_running(self, _isolate, tmp_path):
+ cfg = self._real_daemon_running(_isolate)
+ cfg.parent.mkdir(parents=True, exist_ok=True)
+ sock_path = tmp_path / "fake.sock"
+ sock_path.touch()
+ cfg.write_text(json.dumps({"socket": str(sock_path)}))
+ _isolate["monkeypatch"].setattr(rt, "_daemon_running", _orig_daemon_running)
+ assert rt._daemon_running() is False
+
+ def test_listening_socket_means_running(self, _isolate):
+ cfg = self._real_daemon_running(_isolate)
+ cfg.parent.mkdir(parents=True, exist_ok=True)
+ # AF_UNIX has a ~104 char path limit on macOS, so use a short
+ # tmpdir under /tmp rather than the very long pytest tmp_path.
+ short = Path(tempfile.mkdtemp(prefix="psk", dir="/tmp"))
+ sock_path = short / "live.sock"
+ cfg.write_text(json.dumps({"socket": str(sock_path)}))
+
+ srv = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ srv.bind(str(sock_path))
+ srv.listen(1)
+ try:
+ _isolate["monkeypatch"].setattr(rt, "_daemon_running", _orig_daemon_running)
+ assert rt._daemon_running() is True
+ finally:
+ srv.close()
+ sock_path.unlink(missing_ok=True)
+ short.rmdir()
+
+
+# Capture the original _daemon_running once, before any fixture monkeypatches it.
+_orig_daemon_running = rt._daemon_running
+
+
+# ---------------------------------------------------------------------------
+# Atomic install + concurrent seeders
+# ---------------------------------------------------------------------------
+
+class TestAtomicInstall:
+ def test_atomic_replace_survives_existing_target(self, _isolate, tmp_path):
+ rt.run_seeder()
+ rtbin = _isolate["home"] / ".pilot" / "bin"
+ # Pretend pilotctl is "running": grab a file handle and overwrite.
+ target = rtbin / "pilotctl"
+ with open(target, "rb") as f:
+ initial = f.read()
+ # Now atomic-install something different.
+ src = tmp_path / "newctl"
+ src.write_text("DIFFERENT\n")
+ rt._atomic_install(src, target)
+ # The held handle still sees the old content (Unix semantics).
+ f.seek(0)
+ assert f.read() == initial
+ # And the on-disk file is the new one.
+ assert target.read_text() == "DIFFERENT\n"
+
+ def test_no_tmp_files_left_behind(self, _isolate):
+ rt.run_seeder()
+ rtbin = _isolate["home"] / ".pilot" / "bin"
+ leftovers = list(rtbin.glob("*.tmp.*"))
+ assert leftovers == []
+
+
+class TestConcurrentSeeders:
+ def test_two_threads_only_one_writes(self, _isolate):
+ # Both threads see "missing" state; both attempt to seed; flock
+ # serializes them so the second sees the freshly-seeded marker
+ # and ends up doing a noop. The final state is consistent.
+ results: list[rt.SeedReport] = []
+ barrier = threading.Barrier(2)
+
+ def worker():
+ barrier.wait()
+ rt.reset_seeded_marker()
+ results.append(rt.run_seeder())
+
+ threads = [threading.Thread(target=worker) for _ in range(2)]
+ for t in threads:
+ t.start()
+ for t in threads:
+ t.join(timeout=5)
+
+ # Exactly one thread did the actual seeding; the other was a noop.
+ actions = sorted(r.action for r in results)
+ assert actions in (["noop", "seed"], ["seed", "seed"])
+ # Either way, the runtime is intact.
+ rtbin = _isolate["home"] / ".pilot" / "bin"
+ for name in rt._BIN_NAMES:
+ assert (rtbin / name).is_file()
+
+
+# ---------------------------------------------------------------------------
+# Config + directory bootstrap
+# ---------------------------------------------------------------------------
+
+class TestConfigBootstrap:
+ def test_creates_default_config_when_missing(self, _isolate):
+ rt.run_seeder()
+ cfg_path = _isolate["home"] / ".pilot" / "config.json"
+ assert cfg_path.is_file()
+ cfg = json.loads(cfg_path.read_text())
+ assert cfg["registry"] == rt.DEFAULT_REGISTRY
+ assert cfg["beacon"] == rt.DEFAULT_BEACON
+ assert cfg["socket"] == rt.DEFAULT_SOCKET
+ assert cfg["encrypt"] is True
+ # No email — we never auto-set one; user supplies via daemon start.
+ assert "email" not in cfg
+
+ def test_preserves_existing_config(self, _isolate):
+ cfg_path = _isolate["home"] / ".pilot" / "config.json"
+ cfg_path.parent.mkdir(parents=True, exist_ok=True)
+ cfg_path.write_text(json.dumps({"email": "foo@bar.com", "preserved": True}))
+ rt.run_seeder()
+ cfg = json.loads(cfg_path.read_text())
+ assert cfg.get("preserved") is True
+ assert cfg.get("email") == "foo@bar.com"
+
+
+# ---------------------------------------------------------------------------
+# Wrong-platform package
+# ---------------------------------------------------------------------------
+
+class TestWrongPlatform:
+ def test_missing_lib_does_not_crash_seeder(self, _isolate, tmp_path, monkeypatch):
+ # Build a pkg with executables but no platform lib.
+ pkg = tmp_path / "no-lib"
+ pkg.mkdir()
+ for n in rt._BIN_NAMES:
+ (pkg / n).write_text("stub")
+ (pkg / n).chmod(0o755)
+ (pkg / ".pilot-version").write_text("1.9.1\n")
+ monkeypatch.setattr(rt, "_pkg_bin_dir", lambda: pkg)
+
+ # Seeder runs without exception; the lib name is just absent from copied.
+ report = rt.run_seeder()
+ assert _platform_lib() not in report.copied
+
+ # runtime_library() raises a clear error, since the lib isn't anywhere.
+ with pytest.raises(FileNotFoundError, match="libpilot"):
+ rt.runtime_library()
+
+
+# ---------------------------------------------------------------------------
+# Public entry points
+# ---------------------------------------------------------------------------
+
+class TestPublicEntryPoints:
+ def test_runtime_binary_returns_seeded_path(self, _isolate):
+ p = rt.runtime_binary("pilotctl")
+ assert p == _isolate["home"] / ".pilot" / "bin" / "pilotctl"
+ assert p.is_file()
+
+ def test_runtime_binary_unknown_name_raises(self, _isolate):
+ with pytest.raises(FileNotFoundError, match="bogus"):
+ rt.runtime_binary("bogus")
+
+ def test_runtime_library_seeds_and_returns_path(self, _isolate):
+ p = rt.runtime_library()
+ assert p == _isolate["home"] / ".pilot" / "bin" / _platform_lib()
+ assert p.is_file()
+
+ def test_ensure_runtime_seeded_idempotent_in_process(self, _isolate):
+ rt.ensure_runtime_seeded()
+ # Subsequent calls are short-circuited by the in-process flag.
+ rtbin_marker = _isolate["home"] / ".pilot" / "bin" / ".pilot-version"
+ first_mtime = rtbin_marker.stat().st_mtime
+ time.sleep(0.01)
+ rt.ensure_runtime_seeded()
+ assert rtbin_marker.stat().st_mtime == first_mtime
+
+
+# ---------------------------------------------------------------------------
+# SemVer comparison
+# ---------------------------------------------------------------------------
+
+class TestSemverTuple:
+ def test_basic_parsing(self):
+ assert rt._semver_tuple("1.9.1") == (1, 9, 1)
+ assert rt._semver_tuple("v1.9.1") == (1, 9, 1)
+ assert rt._semver_tuple("1.9.1-rc4") == (1, 9, 1)
+ assert rt._semver_tuple("1.9.1+meta") == (1, 9, 1)
+
+ def test_unparseable_returns_empty_tuple(self):
+ assert rt._semver_tuple("") == ()
+ assert rt._semver_tuple("garbage") == ()
+ assert rt._semver_tuple("1.x.0") == ()
+
+ def test_ordering(self):
+ assert rt._semver_tuple("1.9.1") > rt._semver_tuple("1.9.0")
+ assert rt._semver_tuple("2.0.0") > rt._semver_tuple("1.9.99")
+ assert rt._semver_tuple("1.9.1") == rt._semver_tuple("1.9.1")
diff --git a/tests/bench_concurrent_test.go b/tests/bench_concurrent_test.go
index 4aa17dd5..4a5cca73 100644
--- a/tests/bench_concurrent_test.go
+++ b/tests/bench_concurrent_test.go
@@ -9,7 +9,7 @@ import (
)
// BenchmarkConcurrentStreams5 measures aggregate throughput across 5 simultaneous streams.
-func BenchmarkConcurrentStreams5(b *testing.B) { runConcurrentBench(b, 5) }
+func BenchmarkConcurrentStreams5(b *testing.B) { runConcurrentBench(b, 5) }
// BenchmarkConcurrentStreams10 measures aggregate throughput across 10 simultaneous streams.
func BenchmarkConcurrentStreams10(b *testing.B) { runConcurrentBench(b, 10) }
@@ -35,7 +35,10 @@ func runConcurrentBench(b *testing.B, n int) {
// Pre-create all n listeners on B, one per stream port.
// Ports benchPort … benchPort+n-1 (benchPort=9201, max n=25 → 9225).
type listenerState struct {
- ln interface{ Accept() (net.Conn, error); Close() error }
+ ln interface {
+ Accept() (net.Conn, error)
+ Close() error
+ }
}
listeners := make([]*listenerState, n)
diff --git a/tests/bench_recovery_test.go b/tests/bench_recovery_test.go
index ca15342b..5a65a4e5 100644
--- a/tests/bench_recovery_test.go
+++ b/tests/bench_recovery_test.go
@@ -15,10 +15,10 @@ import (
// packets in the A→B direction on demand, then resume forwarding normally.
// B→A traffic is always forwarded without drops.
type burstProxy struct {
- toB *net.UDPConn
- toA *net.UDPConn
- realA *net.UDPAddr
- realB *net.UDPAddr
+ toB *net.UDPConn
+ toA *net.UDPConn
+ realA *net.UDPAddr
+ realB *net.UDPAddr
dropN atomic.Int64
stopped atomic.Bool
}