diff --git a/VERSION.txt b/VERSION.txt index 604e786f2b495..237c0b66ad7cd 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -1.90.3 +1.90.8 diff --git a/client/local/local.go b/client/local/local.go index 582c7b8487957..2382a12252a20 100644 --- a/client/local/local.go +++ b/client/local/local.go @@ -596,6 +596,19 @@ func (lc *Client) DebugResultJSON(ctx context.Context, action string) (any, erro return x, nil } +// QueryOptionalFeatures queries the optional features supported by the Tailscale daemon. +func (lc *Client) QueryOptionalFeatures(ctx context.Context) (*apitype.OptionalFeatures, error) { + body, err := lc.send(ctx, "POST", "/localapi/v0/debug-optional-features", 200, nil) + if err != nil { + return nil, fmt.Errorf("error %w: %s", err, body) + } + var x apitype.OptionalFeatures + if err := json.Unmarshal(body, &x); err != nil { + return nil, err + } + return &x, nil +} + // SetDevStoreKeyValue set a statestore key/value. It's only meant for development. // The schema (including when keys are re-read) is not a stable interface. func (lc *Client) SetDevStoreKeyValue(ctx context.Context, key, value string) error { diff --git a/client/tailscale/apitype/apitype.go b/client/tailscale/apitype/apitype.go index 58cdcecc78d4f..6d239d082cd95 100644 --- a/client/tailscale/apitype/apitype.go +++ b/client/tailscale/apitype/apitype.go @@ -94,3 +94,13 @@ type DNSQueryResponse struct { // Resolvers is the list of resolvers that the forwarder deemed able to resolve the query. Resolvers []*dnstype.Resolver } + +// OptionalFeatures describes which optional features are enabled in the build. +type OptionalFeatures struct { + // Features is the map of optional feature names to whether they are + // enabled. + // + // Disabled features may be absent from the map. (That is, false values + // are not guaranteed to be present.) + Features map[string]bool +} diff --git a/cmd/k8s-operator/generate/main.go b/cmd/k8s-operator/generate/main.go index 6904f1df02ec0..5fd5d551b5e02 100644 --- a/cmd/k8s-operator/generate/main.go +++ b/cmd/k8s-operator/generate/main.go @@ -144,7 +144,7 @@ func generate(baseDir string) error { if _, err := file.Write([]byte(helmConditionalEnd)); err != nil { return fmt.Errorf("error writing helm if-statement end: %w", err) } - return nil + return file.Close() } for _, crd := range []struct { crdPath, templatePath string diff --git a/cmd/k8s-operator/generate/main_test.go b/cmd/k8s-operator/generate/main_test.go index c7956dcdbef8f..5ea7fec80971a 100644 --- a/cmd/k8s-operator/generate/main_test.go +++ b/cmd/k8s-operator/generate/main_test.go @@ -7,26 +7,50 @@ package main import ( "bytes" + "context" + "net" "os" "os/exec" "path/filepath" "strings" "testing" + "time" + + "tailscale.com/tstest/nettest" + "tailscale.com/util/cibuild" ) func Test_generate(t *testing.T) { + nettest.SkipIfNoNetwork(t) + + ctx, cancel := context.WithTimeout(t.Context(), 10*time.Second) + defer cancel() + if _, err := net.DefaultResolver.LookupIPAddr(ctx, "get.helm.sh"); err != nil { + // https://github.com/helm/helm/issues/31434 + t.Skipf("get.helm.sh seems down or unreachable; skipping test") + } + base, err := os.Getwd() base = filepath.Join(base, "../../../") if err != nil { t.Fatalf("error getting current working directory: %v", err) } defer cleanup(base) + + helmCLIPath := filepath.Join(base, "tool/helm") + if out, err := exec.Command(helmCLIPath, "version").CombinedOutput(); err != nil && cibuild.On() { + // It's not just DNS. Azure is generating bogus certs within GitHub Actions at least for + // helm. So try to run it and see if we can even fetch it. + // + // https://github.com/helm/helm/issues/31434 + t.Skipf("error fetching helm; skipping test in CI: %v, %s", err, out) + } + if err := generate(base); err != nil { t.Fatalf("CRD template generation: %v", err) } tempDir := t.TempDir() - helmCLIPath := filepath.Join(base, "tool/helm") helmChartTemplatesPath := filepath.Join(base, "cmd/k8s-operator/deploy/chart") helmPackageCmd := exec.Command(helmCLIPath, "package", helmChartTemplatesPath, "--destination", tempDir, "--version", "0.0.1") helmPackageCmd.Stderr = os.Stderr diff --git a/cmd/tailscale/cli/configure-jetkvm.go b/cmd/tailscale/cli/configure-jetkvm.go index a8e0a7cb542ef..c80bf673605cf 100644 --- a/cmd/tailscale/cli/configure-jetkvm.go +++ b/cmd/tailscale/cli/configure-jetkvm.go @@ -48,9 +48,12 @@ func runConfigureJetKVM(ctx context.Context, args []string) error { if runtime.GOOS != "linux" || distro.Get() != distro.JetKVM { return errors.New("only implemented on JetKVM") } - err := os.WriteFile("/etc/init.d/S22tailscale", bytes.TrimLeft([]byte(` + if err := os.MkdirAll("/userdata/init.d", 0755); err != nil { + return errors.New("unable to create /userdata/init.d") + } + err := os.WriteFile("/userdata/init.d/S22tailscale", bytes.TrimLeft([]byte(` #!/bin/sh -# /etc/init.d/S22tailscale +# /userdata/init.d/S22tailscale # Start/stop tailscaled case "$1" in diff --git a/cmd/tailscale/cli/up.go b/cmd/tailscale/cli/up.go index 91a6b60878a93..61cade8de68d0 100644 --- a/cmd/tailscale/cli/up.go +++ b/cmd/tailscale/cli/up.go @@ -818,6 +818,7 @@ func upWorthyWarning(s string) bool { strings.Contains(s, healthmsg.WarnAcceptRoutesOff) || strings.Contains(s, healthmsg.LockedOut) || strings.Contains(s, healthmsg.WarnExitNodeUsage) || + strings.Contains(s, healthmsg.InMemoryTailnetLockState) || strings.Contains(strings.ToLower(s), "update available: ") } diff --git a/control/controlclient/direct.go b/control/controlclient/direct.go index 63a12b2495fd8..fe7cc235b05f8 100644 --- a/control/controlclient/direct.go +++ b/control/controlclient/direct.go @@ -7,8 +7,6 @@ import ( "bytes" "cmp" "context" - "crypto" - "crypto/sha256" "encoding/binary" "encoding/json" "errors" @@ -948,26 +946,6 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap ConnectionHandleForTest: connectionHandleForTest, } - // If we have a hardware attestation key, sign the node key with it and send - // the key & signature in the map request. - if buildfeatures.HasTPM { - if k := persist.AsStruct().AttestationKey; k != nil && !k.IsZero() { - hwPub := key.HardwareAttestationPublicFromPlatformKey(k) - request.HardwareAttestationKey = hwPub - - t := c.clock.Now() - msg := fmt.Sprintf("%d|%s", t.Unix(), nodeKey.String()) - digest := sha256.Sum256([]byte(msg)) - sig, err := k.Sign(nil, digest[:], crypto.SHA256) - if err != nil { - c.logf("failed to sign node key with hardware attestation key: %v", err) - } else { - request.HardwareAttestationKeySignature = sig - request.HardwareAttestationKeySignatureTimestamp = t - } - } - } - var extraDebugFlags []string if buildfeatures.HasAdvertiseRoutes && hi != nil && c.netMon != nil && !c.skipIPForwardingCheck && ipForwardingBroken(hi.RoutableIPs, c.netMon.InterfaceState()) { diff --git a/feature/feature.go b/feature/feature.go index 0d383b398ab60..110b104daae00 100644 --- a/feature/feature.go +++ b/feature/feature.go @@ -13,6 +13,12 @@ var ErrUnavailable = errors.New("feature not included in this build") var in = map[string]bool{} +// Registered reports the set of registered features. +// +// The returned map should not be modified by the caller, +// not accessed concurrently with calls to Register. +func Registered() map[string]bool { return in } + // Register notes that the named feature is linked into the binary. func Register(name string) { if _, ok := in[name]; ok { diff --git a/feature/identityfederation/identityfederation.go b/feature/identityfederation/identityfederation.go index a4470fc27eaea..ab1b65f1217d1 100644 --- a/feature/identityfederation/identityfederation.go +++ b/feature/identityfederation/identityfederation.go @@ -42,12 +42,12 @@ func resolveAuthKey(ctx context.Context, baseURL, clientID, idToken string, tags baseURL = ipn.DefaultControlURL } - ephemeral, preauth, err := parseOptionalAttributes(clientID) + strippedID, ephemeral, preauth, err := parseOptionalAttributes(clientID) if err != nil { return "", fmt.Errorf("failed to parse optional config attributes: %w", err) } - accessToken, err := exchangeJWTForToken(ctx, baseURL, clientID, idToken) + accessToken, err := exchangeJWTForToken(ctx, baseURL, strippedID, idToken) if err != nil { return "", fmt.Errorf("failed to exchange JWT for access token: %w", err) } @@ -79,15 +79,15 @@ func resolveAuthKey(ctx context.Context, baseURL, clientID, idToken string, tags return authkey, nil } -func parseOptionalAttributes(clientID string) (ephemeral bool, preauthorized bool, err error) { - _, attrs, found := strings.Cut(clientID, "?") +func parseOptionalAttributes(clientID string) (strippedID string, ephemeral bool, preauthorized bool, err error) { + strippedID, attrs, found := strings.Cut(clientID, "?") if !found { - return true, false, nil + return clientID, true, false, nil } parsed, err := url.ParseQuery(attrs) if err != nil { - return false, false, fmt.Errorf("failed to parse optional config attributes: %w", err) + return "", false, false, fmt.Errorf("failed to parse optional config attributes: %w", err) } for k := range parsed { @@ -97,11 +97,14 @@ func parseOptionalAttributes(clientID string) (ephemeral bool, preauthorized boo case "preauthorized": preauthorized, err = strconv.ParseBool(parsed.Get(k)) default: - return false, false, fmt.Errorf("unknown optional config attribute %q", k) + return "", false, false, fmt.Errorf("unknown optional config attribute %q", k) } } + if err != nil { + return "", false, false, err + } - return ephemeral, preauthorized, err + return strippedID, ephemeral, preauthorized, nil } // exchangeJWTForToken exchanges a JWT for a Tailscale access token. diff --git a/feature/identityfederation/identityfederation_test.go b/feature/identityfederation/identityfederation_test.go index 7b75852a819a1..a673a42982706 100644 --- a/feature/identityfederation/identityfederation_test.go +++ b/feature/identityfederation/identityfederation_test.go @@ -87,6 +87,7 @@ func TestParseOptionalAttributes(t *testing.T) { tests := []struct { name string clientID string + wantClientID string wantEphemeral bool wantPreauth bool wantErr string @@ -94,6 +95,7 @@ func TestParseOptionalAttributes(t *testing.T) { { name: "default values", clientID: "client-123", + wantClientID: "client-123", wantEphemeral: true, wantPreauth: false, wantErr: "", @@ -101,6 +103,7 @@ func TestParseOptionalAttributes(t *testing.T) { { name: "custom values", clientID: "client-123?ephemeral=false&preauthorized=true", + wantClientID: "client-123", wantEphemeral: false, wantPreauth: true, wantErr: "", @@ -108,6 +111,7 @@ func TestParseOptionalAttributes(t *testing.T) { { name: "unknown attribute", clientID: "client-123?unknown=value", + wantClientID: "", wantEphemeral: false, wantPreauth: false, wantErr: `unknown optional config attribute "unknown"`, @@ -115,6 +119,7 @@ func TestParseOptionalAttributes(t *testing.T) { { name: "invalid value", clientID: "client-123?ephemeral=invalid", + wantClientID: "", wantEphemeral: false, wantPreauth: false, wantErr: `strconv.ParseBool: parsing "invalid": invalid syntax`, @@ -123,7 +128,7 @@ func TestParseOptionalAttributes(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - ephemeral, preauth, err := parseOptionalAttributes(tt.clientID) + strippedID, ephemeral, preauth, err := parseOptionalAttributes(tt.clientID) if tt.wantErr != "" { if err == nil { t.Errorf("parseOptionalAttributes() error = nil, want %q", tt.wantErr) @@ -138,6 +143,9 @@ func TestParseOptionalAttributes(t *testing.T) { return } } + if strippedID != tt.wantClientID { + t.Errorf("parseOptionalAttributes() strippedID = %v, want %v", strippedID, tt.wantClientID) + } if ephemeral != tt.wantEphemeral { t.Errorf("parseOptionalAttributes() ephemeral = %v, want %v", ephemeral, tt.wantEphemeral) } diff --git a/feature/portmapper/portmapper.go b/feature/portmapper/portmapper.go index e7be00ad17d8c..d1b903cb69c20 100644 --- a/feature/portmapper/portmapper.go +++ b/feature/portmapper/portmapper.go @@ -6,6 +6,7 @@ package portmapper import ( + "tailscale.com/feature" "tailscale.com/net/netmon" "tailscale.com/net/portmapper" "tailscale.com/net/portmapper/portmappertype" @@ -14,6 +15,7 @@ import ( ) func init() { + feature.Register("portmapper") portmappertype.HookNewPortMapper.Set(newPortMapper) } diff --git a/feature/relayserver/relayserver.go b/feature/relayserver/relayserver.go index df2fb4cb7c165..868d5f61a2fa7 100644 --- a/feature/relayserver/relayserver.go +++ b/feature/relayserver/relayserver.go @@ -21,8 +21,10 @@ import ( "tailscale.com/ipn/ipnext" "tailscale.com/ipn/localapi" "tailscale.com/net/udprelay" + "tailscale.com/net/udprelay/endpoint" "tailscale.com/net/udprelay/status" "tailscale.com/tailcfg" + "tailscale.com/types/key" "tailscale.com/types/logger" "tailscale.com/types/ptr" "tailscale.com/util/eventbus" @@ -68,25 +70,41 @@ func servePeerRelayDebugSessions(h *localapi.Handler, w http.ResponseWriter, r * // extension. It is registered with [ipnext.RegisterExtension] if the package is // imported. func newExtension(logf logger.Logf, sb ipnext.SafeBackend) (ipnext.Extension, error) { - return &extension{ + e := &extension{ + newServerFn: func(logf logger.Logf, port int, overrideAddrs []netip.Addr) (relayServer, error) { + return udprelay.NewServer(logf, port, overrideAddrs) + }, logf: logger.WithPrefix(logf, featureName+": "), - bus: sb.Sys().Bus.Get(), - }, nil + } + e.ec = sb.Sys().Bus.Get().Client("relayserver.extension") + e.respPub = eventbus.Publish[magicsock.UDPRelayAllocResp](e.ec) + eventbus.SubscribeFunc(e.ec, e.onDERPMapView) + eventbus.SubscribeFunc(e.ec, e.onAllocReq) + return e, nil +} + +// relayServer is an interface for [udprelay.Server]. +type relayServer interface { + Close() error + AllocateEndpoint(discoA, discoB key.DiscoPublic) (endpoint.ServerEndpoint, error) + GetSessions() []status.ServerSession + SetDERPMapView(tailcfg.DERPMapView) } // extension is an [ipnext.Extension] managing the relay server on platforms // that import this package. type extension struct { - logf logger.Logf - bus *eventbus.Bus + newServerFn func(logf logger.Logf, port int, overrideAddrs []netip.Addr) (relayServer, error) // swappable for tests + logf logger.Logf + ec *eventbus.Client + respPub *eventbus.Publisher[magicsock.UDPRelayAllocResp] - mu sync.Mutex // guards the following fields - shutdown bool - - port *int // ipn.Prefs.RelayServerPort, nil if disabled - eventSubs *eventbus.Monitor // nil if not connected to eventbus - debugSessionsCh chan chan []status.ServerSession // non-nil if consumeEventbusTopics is running - hasNodeAttrDisableRelayServer bool // tailcfg.NodeAttrDisableRelayServer + mu sync.Mutex // guards the following fields + shutdown bool // true if Shutdown() has been called + rs relayServer // nil when disabled + port *int // ipn.Prefs.RelayServerPort, nil if disabled + derpMapView tailcfg.DERPMapView // latest seen over the eventbus + hasNodeAttrDisableRelayServer bool // [tailcfg.NodeAttrDisableRelayServer] } // Name implements [ipnext.Extension]. @@ -104,26 +122,83 @@ func (e *extension) Init(host ipnext.Host) error { return nil } -// handleBusLifetimeLocked handles the lifetime of consumeEventbusTopics. -func (e *extension) handleBusLifetimeLocked() { - busShouldBeRunning := !e.shutdown && e.port != nil && !e.hasNodeAttrDisableRelayServer - if !busShouldBeRunning { - e.disconnectFromBusLocked() +func (e *extension) onDERPMapView(view tailcfg.DERPMapView) { + e.mu.Lock() + defer e.mu.Unlock() + e.derpMapView = view + if e.rs != nil { + e.rs.SetDERPMapView(view) + } +} + +func (e *extension) onAllocReq(req magicsock.UDPRelayAllocReq) { + e.mu.Lock() + defer e.mu.Unlock() + if e.shutdown { + return + } + if e.rs == nil { + if !e.relayServerShouldBeRunningLocked() { + return + } + e.tryStartRelayServerLocked() + if e.rs == nil { + return + } + } + se, err := e.rs.AllocateEndpoint(req.Message.ClientDisco[0], req.Message.ClientDisco[1]) + if err != nil { + e.logf("error allocating endpoint: %v", err) + return + } + e.respPub.Publish(magicsock.UDPRelayAllocResp{ + ReqRxFromNodeKey: req.RxFromNodeKey, + ReqRxFromDiscoKey: req.RxFromDiscoKey, + Message: &disco.AllocateUDPRelayEndpointResponse{ + Generation: req.Message.Generation, + UDPRelayEndpoint: disco.UDPRelayEndpoint{ + ServerDisco: se.ServerDisco, + ClientDisco: se.ClientDisco, + LamportID: se.LamportID, + VNI: se.VNI, + BindLifetime: se.BindLifetime.Duration, + SteadyStateLifetime: se.SteadyStateLifetime.Duration, + AddrPorts: se.AddrPorts, + }, + }, + }) +} + +func (e *extension) tryStartRelayServerLocked() { + rs, err := e.newServerFn(e.logf, *e.port, overrideAddrs()) + if err != nil { + e.logf("error initializing server: %v", err) return - } else if e.eventSubs != nil { - return // already running } + e.rs = rs + e.rs.SetDERPMapView(e.derpMapView) +} - ec := e.bus.Client("relayserver.extension") - e.debugSessionsCh = make(chan chan []status.ServerSession) - e.eventSubs = ptr.To(ec.Monitor(e.consumeEventbusTopics(ec, *e.port))) +func (e *extension) relayServerShouldBeRunningLocked() bool { + return !e.shutdown && e.port != nil && !e.hasNodeAttrDisableRelayServer +} + +// handleRelayServerLifetimeLocked handles the lifetime of [e.rs]. +func (e *extension) handleRelayServerLifetimeLocked() { + if !e.relayServerShouldBeRunningLocked() { + e.stopRelayServerLocked() + return + } else if e.rs != nil { + return // already running + } + e.tryStartRelayServerLocked() } func (e *extension) selfNodeViewChanged(nodeView tailcfg.NodeView) { e.mu.Lock() defer e.mu.Unlock() e.hasNodeAttrDisableRelayServer = nodeView.HasCap(tailcfg.NodeAttrDisableRelayServer) - e.handleBusLifetimeLocked() + e.handleRelayServerLifetimeLocked() } func (e *extension) profileStateChanged(_ ipn.LoginProfileView, prefs ipn.PrefsView, sameNode bool) { @@ -133,13 +208,13 @@ func (e *extension) profileStateChanged(_ ipn.LoginProfileView, prefs ipn.PrefsV enableOrDisableServer := ok != (e.port != nil) portChanged := ok && e.port != nil && newPort != *e.port if enableOrDisableServer || portChanged || !sameNode { - e.disconnectFromBusLocked() + e.stopRelayServerLocked() e.port = nil if ok { e.port = ptr.To(newPort) } } - e.handleBusLifetimeLocked() + e.handleRelayServerLifetimeLocked() } // overrideAddrs returns TS_DEBUG_RELAY_SERVER_ADDRS as []netip.Addr, if set. It @@ -162,88 +237,23 @@ var overrideAddrs = sync.OnceValue(func() (ret []netip.Addr) { return }) -// consumeEventbusTopics serves endpoint allocation requests over the eventbus. -// It also serves [relayServer] debug information on a channel. -// consumeEventbusTopics must never acquire [extension.mu], which can be held -// by other goroutines while waiting to receive on [extension.eventSubs] or the -// inner [extension.debugSessionsCh] channel. -func (e *extension) consumeEventbusTopics(ec *eventbus.Client, port int) func(*eventbus.Client) { - reqSub := eventbus.Subscribe[magicsock.UDPRelayAllocReq](ec) - respPub := eventbus.Publish[magicsock.UDPRelayAllocResp](ec) - debugSessionsCh := e.debugSessionsCh - - return func(ec *eventbus.Client) { - rs, err := udprelay.NewServer(e.logf, port, overrideAddrs()) - if err != nil { - e.logf("error initializing server: %v", err) - } - - defer func() { - if rs != nil { - rs.Close() - } - }() - for { - select { - case <-ec.Done(): - return - case respCh := <-debugSessionsCh: - if rs == nil { - respCh <- nil - continue - } - sessions := rs.GetSessions() - respCh <- sessions - case req := <-reqSub.Events(): - if rs == nil { - // The server may have previously failed to initialize if - // the configured port was in use, try again. - rs, err = udprelay.NewServer(e.logf, port, overrideAddrs()) - if err != nil { - e.logf("error initializing server: %v", err) - continue - } - } - se, err := rs.AllocateEndpoint(req.Message.ClientDisco[0], req.Message.ClientDisco[1]) - if err != nil { - e.logf("error allocating endpoint: %v", err) - continue - } - respPub.Publish(magicsock.UDPRelayAllocResp{ - ReqRxFromNodeKey: req.RxFromNodeKey, - ReqRxFromDiscoKey: req.RxFromDiscoKey, - Message: &disco.AllocateUDPRelayEndpointResponse{ - Generation: req.Message.Generation, - UDPRelayEndpoint: disco.UDPRelayEndpoint{ - ServerDisco: se.ServerDisco, - ClientDisco: se.ClientDisco, - LamportID: se.LamportID, - VNI: se.VNI, - BindLifetime: se.BindLifetime.Duration, - SteadyStateLifetime: se.SteadyStateLifetime.Duration, - AddrPorts: se.AddrPorts, - }, - }, - }) - } - } - } -} - -func (e *extension) disconnectFromBusLocked() { - if e.eventSubs != nil { - e.eventSubs.Close() - e.eventSubs = nil - e.debugSessionsCh = nil +func (e *extension) stopRelayServerLocked() { + if e.rs != nil { + e.rs.Close() } + e.rs = nil } // Shutdown implements [ipnlocal.Extension]. func (e *extension) Shutdown() error { + // [extension.mu] must not be held when closing the [eventbus.Client]. Close + // blocks until all [eventbus.SubscribeFunc]'s have returned, and the ones + // used in this package also acquire [extension.mu]. See #17894. + e.ec.Close() e.mu.Lock() defer e.mu.Unlock() - e.disconnectFromBusLocked() e.shutdown = true + e.stopRelayServerLocked() return nil } @@ -253,23 +263,14 @@ func (e *extension) Shutdown() error { func (e *extension) serverStatus() status.ServerStatus { e.mu.Lock() defer e.mu.Unlock() - st := status.ServerStatus{ UDPPort: nil, Sessions: nil, } - if e.port == nil || e.eventSubs == nil { + if e.rs == nil { return st } st.UDPPort = ptr.To(*e.port) - - ch := make(chan []status.ServerSession) - select { - case e.debugSessionsCh <- ch: - resp := <-ch - st.Sessions = resp - return st - case <-e.eventSubs.Done(): - return st - } + st.Sessions = e.rs.GetSessions() + return st } diff --git a/feature/relayserver/relayserver_test.go b/feature/relayserver/relayserver_test.go index 65c503524c5de..2184b51759b61 100644 --- a/feature/relayserver/relayserver_test.go +++ b/feature/relayserver/relayserver_test.go @@ -4,13 +4,20 @@ package relayserver import ( + "errors" + "net/netip" + "reflect" "testing" "tailscale.com/ipn" + "tailscale.com/net/udprelay/endpoint" + "tailscale.com/net/udprelay/status" + "tailscale.com/tailcfg" "tailscale.com/tsd" + "tailscale.com/tstime" + "tailscale.com/types/key" "tailscale.com/types/logger" "tailscale.com/types/ptr" - "tailscale.com/util/eventbus" ) func Test_extension_profileStateChanged(t *testing.T) { @@ -19,29 +26,33 @@ func Test_extension_profileStateChanged(t *testing.T) { type fields struct { port *int + rs relayServer } type args struct { prefs ipn.PrefsView sameNode bool } tests := []struct { - name string - fields fields - args args - wantPort *int - wantBusRunning bool + name string + fields fields + args args + wantPort *int + wantRelayServerFieldNonNil bool + wantRelayServerFieldMutated bool }{ { - name: "no changes non-nil port", + name: "no changes non-nil port previously running", fields: fields{ port: ptr.To(1), + rs: mockRelayServerNotZeroVal(), }, args: args{ prefs: prefsWithPortOne.View(), sameNode: true, }, - wantPort: ptr.To(1), - wantBusRunning: true, + wantPort: ptr.To(1), + wantRelayServerFieldNonNil: true, + wantRelayServerFieldMutated: false, }, { name: "prefs port nil", @@ -52,8 +63,23 @@ func Test_extension_profileStateChanged(t *testing.T) { prefs: prefsWithNilPort.View(), sameNode: true, }, - wantPort: nil, - wantBusRunning: false, + wantPort: nil, + wantRelayServerFieldNonNil: false, + wantRelayServerFieldMutated: false, + }, + { + name: "prefs port nil previously running", + fields: fields{ + port: ptr.To(1), + rs: mockRelayServerNotZeroVal(), + }, + args: args{ + prefs: prefsWithNilPort.View(), + sameNode: true, + }, + wantPort: nil, + wantRelayServerFieldNonNil: false, + wantRelayServerFieldMutated: true, }, { name: "prefs port changed", @@ -64,8 +90,23 @@ func Test_extension_profileStateChanged(t *testing.T) { prefs: prefsWithPortOne.View(), sameNode: true, }, - wantPort: ptr.To(1), - wantBusRunning: true, + wantPort: ptr.To(1), + wantRelayServerFieldNonNil: true, + wantRelayServerFieldMutated: true, + }, + { + name: "prefs port changed previously running", + fields: fields{ + port: ptr.To(2), + rs: mockRelayServerNotZeroVal(), + }, + args: args{ + prefs: prefsWithPortOne.View(), + sameNode: true, + }, + wantPort: ptr.To(1), + wantRelayServerFieldNonNil: true, + wantRelayServerFieldMutated: true, }, { name: "sameNode false", @@ -76,8 +117,23 @@ func Test_extension_profileStateChanged(t *testing.T) { prefs: prefsWithPortOne.View(), sameNode: false, }, - wantPort: ptr.To(1), - wantBusRunning: true, + wantPort: ptr.To(1), + wantRelayServerFieldNonNil: true, + wantRelayServerFieldMutated: true, + }, + { + name: "sameNode false previously running", + fields: fields{ + port: ptr.To(1), + rs: mockRelayServerNotZeroVal(), + }, + args: args{ + prefs: prefsWithPortOne.View(), + sameNode: false, + }, + wantPort: ptr.To(1), + wantRelayServerFieldNonNil: true, + wantRelayServerFieldMutated: true, }, { name: "prefs port non-nil extension port nil", @@ -88,85 +144,165 @@ func Test_extension_profileStateChanged(t *testing.T) { prefs: prefsWithPortOne.View(), sameNode: false, }, - wantPort: ptr.To(1), - wantBusRunning: true, + wantPort: ptr.To(1), + wantRelayServerFieldNonNil: true, + wantRelayServerFieldMutated: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { sys := tsd.NewSystem() - bus := sys.Bus.Get() - e := &extension{ - logf: logger.Discard, - port: tt.fields.port, - bus: bus, + ipne, err := newExtension(logger.Discard, mockSafeBackend{sys}) + if err != nil { + t.Fatal(err) + } + e := ipne.(*extension) + e.newServerFn = func(logf logger.Logf, port int, overrideAddrs []netip.Addr) (relayServer, error) { + return &mockRelayServer{}, nil } - defer e.disconnectFromBusLocked() + e.port = tt.fields.port + e.rs = tt.fields.rs + defer e.Shutdown() e.profileStateChanged(ipn.LoginProfileView{}, tt.args.prefs, tt.args.sameNode) - if tt.wantBusRunning != (e.eventSubs != nil) { - t.Errorf("wantBusRunning: %v != (e.eventSubs != nil): %v", tt.wantBusRunning, e.eventSubs != nil) + if tt.wantRelayServerFieldNonNil != (e.rs != nil) { + t.Errorf("wantRelayServerFieldNonNil: %v != (e.rs != nil): %v", tt.wantRelayServerFieldNonNil, e.rs != nil) } if (tt.wantPort == nil) != (e.port == nil) { t.Errorf("(tt.wantPort == nil): %v != (e.port == nil): %v", tt.wantPort == nil, e.port == nil) } else if tt.wantPort != nil && *tt.wantPort != *e.port { t.Errorf("wantPort: %d != *e.port: %d", *tt.wantPort, *e.port) } + if tt.wantRelayServerFieldMutated != !reflect.DeepEqual(tt.fields.rs, e.rs) { + t.Errorf("wantRelayServerFieldMutated: %v != !reflect.DeepEqual(tt.fields.rs, e.rs): %v", tt.wantRelayServerFieldMutated, !reflect.DeepEqual(tt.fields.rs, e.rs)) + } }) } } -func Test_extension_handleBusLifetimeLocked(t *testing.T) { +func mockRelayServerNotZeroVal() *mockRelayServer { + return &mockRelayServer{true} +} + +type mockRelayServer struct { + set bool +} + +func (mockRelayServer) Close() error { return nil } +func (mockRelayServer) AllocateEndpoint(_, _ key.DiscoPublic) (endpoint.ServerEndpoint, error) { + return endpoint.ServerEndpoint{}, errors.New("not implemented") +} +func (mockRelayServer) GetSessions() []status.ServerSession { return nil } +func (mockRelayServer) SetDERPMapView(tailcfg.DERPMapView) { return } + +type mockSafeBackend struct { + sys *tsd.System +} + +func (m mockSafeBackend) Sys() *tsd.System { return m.sys } +func (mockSafeBackend) Clock() tstime.Clock { return nil } +func (mockSafeBackend) TailscaleVarRoot() string { return "" } + +func Test_extension_handleRelayServerLifetimeLocked(t *testing.T) { tests := []struct { name string shutdown bool port *int - eventSubs *eventbus.Monitor + rs relayServer hasNodeAttrDisableRelayServer bool - wantBusRunning bool + wantRelayServerFieldNonNil bool + wantRelayServerFieldMutated bool }{ { name: "want running", shutdown: false, port: ptr.To(1), hasNodeAttrDisableRelayServer: false, - wantBusRunning: true, + wantRelayServerFieldNonNil: true, + wantRelayServerFieldMutated: true, + }, + { + name: "want running previously running", + shutdown: false, + port: ptr.To(1), + rs: mockRelayServerNotZeroVal(), + hasNodeAttrDisableRelayServer: false, + wantRelayServerFieldNonNil: true, + wantRelayServerFieldMutated: false, }, { name: "shutdown true", shutdown: true, port: ptr.To(1), hasNodeAttrDisableRelayServer: false, - wantBusRunning: false, + wantRelayServerFieldNonNil: false, + wantRelayServerFieldMutated: false, + }, + { + name: "shutdown true previously running", + shutdown: true, + port: ptr.To(1), + rs: mockRelayServerNotZeroVal(), + hasNodeAttrDisableRelayServer: false, + wantRelayServerFieldNonNil: false, + wantRelayServerFieldMutated: true, }, { name: "port nil", shutdown: false, port: nil, hasNodeAttrDisableRelayServer: false, - wantBusRunning: false, + wantRelayServerFieldNonNil: false, + wantRelayServerFieldMutated: false, + }, + { + name: "port nil previously running", + shutdown: false, + port: nil, + rs: mockRelayServerNotZeroVal(), + hasNodeAttrDisableRelayServer: false, + wantRelayServerFieldNonNil: false, + wantRelayServerFieldMutated: true, }, { name: "hasNodeAttrDisableRelayServer true", shutdown: false, port: nil, hasNodeAttrDisableRelayServer: true, - wantBusRunning: false, + wantRelayServerFieldNonNil: false, + wantRelayServerFieldMutated: false, + }, + { + name: "hasNodeAttrDisableRelayServer true previously running", + shutdown: false, + port: nil, + rs: mockRelayServerNotZeroVal(), + hasNodeAttrDisableRelayServer: true, + wantRelayServerFieldNonNil: false, + wantRelayServerFieldMutated: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - e := &extension{ - logf: logger.Discard, - bus: eventbus.New(), - shutdown: tt.shutdown, - port: tt.port, - eventSubs: tt.eventSubs, - hasNodeAttrDisableRelayServer: tt.hasNodeAttrDisableRelayServer, + sys := tsd.NewSystem() + ipne, err := newExtension(logger.Discard, mockSafeBackend{sys}) + if err != nil { + t.Fatal(err) + } + e := ipne.(*extension) + e.newServerFn = func(logf logger.Logf, port int, overrideAddrs []netip.Addr) (relayServer, error) { + return &mockRelayServer{}, nil + } + e.shutdown = tt.shutdown + e.port = tt.port + e.rs = tt.rs + e.hasNodeAttrDisableRelayServer = tt.hasNodeAttrDisableRelayServer + e.handleRelayServerLifetimeLocked() + defer e.Shutdown() + if tt.wantRelayServerFieldNonNil != (e.rs != nil) { + t.Errorf("wantRelayServerFieldNonNil: %v != (e.rs != nil): %v", tt.wantRelayServerFieldNonNil, e.rs != nil) } - e.handleBusLifetimeLocked() - defer e.disconnectFromBusLocked() - if tt.wantBusRunning != (e.eventSubs != nil) { - t.Errorf("wantBusRunning: %v != (e.eventSubs != nil): %v", tt.wantBusRunning, e.eventSubs != nil) + if tt.wantRelayServerFieldMutated != !reflect.DeepEqual(tt.rs, e.rs) { + t.Errorf("wantRelayServerFieldMutated: %v != !reflect.DeepEqual(tt.rs, e.rs): %v", tt.wantRelayServerFieldMutated, !reflect.DeepEqual(tt.rs, e.rs)) } }) } diff --git a/flake.nix b/flake.nix index 726757f7a76b7..e8b5420c55cef 100644 --- a/flake.nix +++ b/flake.nix @@ -151,5 +151,5 @@ }); }; } -# nix-direnv cache busting line: sha256-rV3C2Vi48FCifGt58OdEO4+Av0HRIs8sUJVvp/gEBLw= +# nix-direnv cache busting line: sha256-AUOjLomba75qfzb9Vxc0Sktyeces6hBSuOMgboWcDnE= diff --git a/go.mod b/go.mod index 3c281fa7a34bf..96df00f65044f 100644 --- a/go.mod +++ b/go.mod @@ -42,7 +42,7 @@ require ( github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da github.com/golang/snappy v0.0.4 github.com/golangci/golangci-lint v1.57.1 - github.com/google/go-cmp v0.6.0 + github.com/google/go-cmp v0.7.0 github.com/google/go-containerregistry v0.20.3 github.com/google/go-tpm v0.9.4 github.com/google/gopacket v1.1.19 diff --git a/go.mod.sri b/go.mod.sri index f94054422c6d7..790d851a19a94 100644 --- a/go.mod.sri +++ b/go.mod.sri @@ -1 +1 @@ -sha256-rV3C2Vi48FCifGt58OdEO4+Av0HRIs8sUJVvp/gEBLw= +sha256-AUOjLomba75qfzb9Vxc0Sktyeces6hBSuOMgboWcDnE= diff --git a/go.sum b/go.sum index bc386d1fdb37f..9864eab71aa9a 100644 --- a/go.sum +++ b/go.sum @@ -490,8 +490,8 @@ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/go-containerregistry v0.20.3 h1:oNx7IdTI936V8CQRveCjaxOiegWwvM7kqkbXTpyiovI= github.com/google/go-containerregistry v0.20.3/go.mod h1:w00pIgBRDVUDFM6bq+Qx8lwNWK+cxgCuX1vd3PIBDNI= github.com/google/go-github/v66 v66.0.0 h1:ADJsaXj9UotwdgK8/iFZtv7MLc8E8WBl62WLd/D/9+M= diff --git a/health/healthmsg/healthmsg.go b/health/healthmsg/healthmsg.go index 2384103738cf3..5ea1c736d8851 100644 --- a/health/healthmsg/healthmsg.go +++ b/health/healthmsg/healthmsg.go @@ -8,9 +8,10 @@ package healthmsg const ( - WarnAcceptRoutesOff = "Some peers are advertising routes but --accept-routes is false" - TailscaleSSHOnBut = "Tailscale SSH enabled, but " // + ... something from caller - LockedOut = "this node is locked out; it will not have connectivity until it is signed. For more info, see https://tailscale.com/s/locked-out" - WarnExitNodeUsage = "The following issues on your machine will likely make usage of exit nodes impossible" - DisableRPFilter = "Please set rp_filter=2 instead of rp_filter=1; see https://github.com/tailscale/tailscale/issues/3310" + WarnAcceptRoutesOff = "Some peers are advertising routes but --accept-routes is false" + TailscaleSSHOnBut = "Tailscale SSH enabled, but " // + ... something from caller + LockedOut = "this node is locked out; it will not have connectivity until it is signed. For more info, see https://tailscale.com/s/locked-out" + WarnExitNodeUsage = "The following issues on your machine will likely make usage of exit nodes impossible" + DisableRPFilter = "Please set rp_filter=2 instead of rp_filter=1; see https://github.com/tailscale/tailscale/issues/3310" + InMemoryTailnetLockState = "Tailnet Lock state is only being stored in-memory. Set --statedir to store state on disk, which is more secure. See https://tailscale.com/kb/1226/tailnet-lock#tailnet-lock-state" ) diff --git a/ipn/ipnlocal/hwattest.go b/ipn/ipnlocal/hwattest.go deleted file mode 100644 index 2c93cad4c97ff..0000000000000 --- a/ipn/ipnlocal/hwattest.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) Tailscale Inc & AUTHORS -// SPDX-License-Identifier: BSD-3-Clause - -//go:build !ts_omit_tpm - -package ipnlocal - -import ( - "errors" - - "tailscale.com/feature" - "tailscale.com/types/key" - "tailscale.com/types/logger" - "tailscale.com/types/persist" -) - -func init() { - feature.HookGenerateAttestationKeyIfEmpty.Set(generateAttestationKeyIfEmpty) -} - -// generateAttestationKeyIfEmpty generates a new hardware attestation key if -// none exists. It returns true if a new key was generated and stored in -// p.AttestationKey. -func generateAttestationKeyIfEmpty(p *persist.Persist, logf logger.Logf) (bool, error) { - // attempt to generate a new hardware attestation key if none exists - var ak key.HardwareAttestationKey - if p != nil { - ak = p.AttestationKey - } - - if ak == nil || ak.IsZero() { - var err error - ak, err = key.NewHardwareAttestationKey() - if err != nil { - if !errors.Is(err, key.ErrUnsupported) { - logf("failed to create hardware attestation key: %v", err) - } - } else if ak != nil { - logf("using new hardware attestation key: %v", ak.Public()) - if p == nil { - p = &persist.Persist{} - } - p.AttestationKey = ak - return true, nil - } - } - return false, nil -} diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index a1fefedc2d1e3..1ffbbbca624a3 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -87,6 +87,7 @@ import ( "tailscale.com/util/clientmetric" "tailscale.com/util/dnsname" "tailscale.com/util/eventbus" + "tailscale.com/util/execqueue" "tailscale.com/util/goroutines" "tailscale.com/util/mak" "tailscale.com/util/osuser" @@ -187,6 +188,7 @@ type LocalBackend struct { statsLogf logger.Logf // for printing peers stats on change sys *tsd.System eventSubs eventbus.Monitor + appcTask execqueue.ExecQueue // handles updates from appc health *health.Tracker // always non-nil polc policyclient.Client // always non-nil @@ -642,12 +644,14 @@ func (b *LocalBackend) consumeEventbusTopics(ec *eventbus.Client) func(*eventbus // We need to find a way to ensure that changes to the backend state are applied // consistently in the presnce of profile changes, which currently may not happen in // a single atomic step. See: https://github.com/tailscale/tailscale/issues/17414 - if err := b.AdvertiseRoute(ru.Advertise...); err != nil { - b.logf("appc: failed to advertise routes: %v: %v", ru.Advertise, err) - } - if err := b.UnadvertiseRoute(ru.Unadvertise...); err != nil { - b.logf("appc: failed to unadvertise routes: %v: %v", ru.Unadvertise, err) - } + b.appcTask.Add(func() { + if err := b.AdvertiseRoute(ru.Advertise...); err != nil { + b.logf("appc: failed to advertise routes: %v: %v", ru.Advertise, err) + } + if err := b.UnadvertiseRoute(ru.Unadvertise...); err != nil { + b.logf("appc: failed to unadvertise routes: %v: %v", ru.Unadvertise, err) + } + }) case ri := <-storeRoutesSub.Events(): // Whether or not routes should be stored can change over time. shouldStoreRoutes := b.ControlKnobs().AppCStoreRoutes.Load() @@ -1113,6 +1117,7 @@ func (b *LocalBackend) Shutdown() { // they can deadlock with c.Shutdown(). // 2. LocalBackend.consumeEventbusTopics event handlers may not guard against // undesirable post/in-progress LocalBackend.Shutdown() behaviors. + b.appcTask.Shutdown() b.eventSubs.Close() b.em.close() @@ -1216,7 +1221,6 @@ func stripKeysFromPrefs(p ipn.PrefsView) ipn.PrefsView { p2.Persist.PrivateNodeKey = key.NodePrivate{} p2.Persist.OldPrivateNodeKey = key.NodePrivate{} p2.Persist.NetworkLockKey = key.NLPrivate{} - p2.Persist.AttestationKey = nil return p2.View() } diff --git a/ipn/ipnlocal/network-lock.go b/ipn/ipnlocal/network-lock.go index 4990824453c47..c769e242d4405 100644 --- a/ipn/ipnlocal/network-lock.go +++ b/ipn/ipnlocal/network-lock.go @@ -23,6 +23,7 @@ import ( "slices" "time" + "tailscale.com/health" "tailscale.com/health/healthmsg" "tailscale.com/ipn" "tailscale.com/ipn/ipnstate" @@ -54,7 +55,7 @@ var ( type tkaState struct { profile ipn.ProfileID authority *tka.Authority - storage *tka.FS + storage tka.CompactableChonk filtered []ipnstate.TKAPeer } @@ -75,7 +76,7 @@ func (b *LocalBackend) initTKALocked() error { root := b.TailscaleVarRoot() if root == "" { b.tka = nil - b.logf("network-lock unavailable; no state directory") + b.logf("cannot fetch existing TKA state; no state directory for network-lock") return nil } @@ -90,6 +91,7 @@ func (b *LocalBackend) initTKALocked() error { if err != nil { return fmt.Errorf("initializing tka: %v", err) } + if err := authority.Compact(storage, tkaCompactionDefaults); err != nil { b.logf("tka compaction failed: %v", err) } @@ -105,6 +107,16 @@ func (b *LocalBackend) initTKALocked() error { return nil } +// noNetworkLockStateDirWarnable is a Warnable to warn the user that Tailnet Lock data +// (in particular, the list of AUMs in the TKA state) is being stored in memory and will +// be lost when tailscaled restarts. +var noNetworkLockStateDirWarnable = health.Register(&health.Warnable{ + Code: "no-tailnet-lock-state-dir", + Title: "No statedir for Tailnet Lock", + Severity: health.SeverityMedium, + Text: health.StaticMessage(healthmsg.InMemoryTailnetLockState), +}) + // tkaFilterNetmapLocked checks the signatures on each node key, dropping // nodes from the netmap whose signature does not verify. // @@ -442,7 +454,7 @@ func (b *LocalBackend) tkaSyncLocked(ourNodeKey key.NodePublic) error { // b.mu must be held & TKA must be initialized. func (b *LocalBackend) tkaApplyDisablementLocked(secret []byte) error { if b.tka.authority.ValidDisablement(secret) { - if err := os.RemoveAll(b.chonkPathLocked()); err != nil { + if err := b.tka.storage.RemoveAll(); err != nil { return err } b.tka = nil @@ -486,19 +498,21 @@ func (b *LocalBackend) tkaBootstrapFromGenesisLocked(g tkatype.MarshaledAUM, per } } - chonkDir := b.chonkPathLocked() - if err := os.Mkdir(filepath.Dir(chonkDir), 0755); err != nil && !os.IsExist(err) { - return fmt.Errorf("creating chonk root dir: %v", err) - } - if err := os.Mkdir(chonkDir, 0755); err != nil && !os.IsExist(err) { - return fmt.Errorf("mkdir: %v", err) - } - - chonk, err := tka.ChonkDir(chonkDir) - if err != nil { - return fmt.Errorf("chonk: %v", err) + root := b.TailscaleVarRoot() + var storage tka.CompactableChonk + if root == "" { + b.health.SetUnhealthy(noNetworkLockStateDirWarnable, nil) + b.logf("network-lock using in-memory storage; no state directory") + storage = &tka.Mem{} + } else { + chonkDir := b.chonkPathLocked() + chonk, err := tka.ChonkDir(chonkDir) + if err != nil { + return fmt.Errorf("chonk: %v", err) + } + storage = chonk } - authority, err := tka.Bootstrap(chonk, genesis) + authority, err := tka.Bootstrap(storage, genesis) if err != nil { return fmt.Errorf("tka bootstrap: %v", err) } @@ -506,7 +520,7 @@ func (b *LocalBackend) tkaBootstrapFromGenesisLocked(g tkatype.MarshaledAUM, per b.tka = &tkaState{ profile: b.pm.CurrentProfile().ID(), authority: authority, - storage: chonk, + storage: storage, } return nil } @@ -519,10 +533,6 @@ func (b *LocalBackend) CanSupportNetworkLock() error { return nil } - if b.TailscaleVarRoot() == "" { - return errors.New("network-lock is not supported in this configuration, try setting --statedir") - } - // There's a var root (aka --statedir), so if network lock gets // initialized we have somewhere to store our AUMs. That's all // we need. @@ -642,6 +652,7 @@ func tkaStateFromPeer(p tailcfg.NodeView) ipnstate.TKAPeer { // needing signatures is returned as a response. // The Finish RPC submits signatures for all these nodes, at which point // Control has everything it needs to atomically enable network lock. +// TODO(alexc): Only with persistent backend func (b *LocalBackend) NetworkLockInit(keys []tka.Key, disablementValues [][]byte, supportDisablement []byte) error { if err := b.CanSupportNetworkLock(); err != nil { return err @@ -762,7 +773,7 @@ func (b *LocalBackend) NetworkLockForceLocalDisable() error { return fmt.Errorf("saving prefs: %w", err) } - if err := os.RemoveAll(b.chonkPathLocked()); err != nil { + if err := b.tka.storage.RemoveAll(); err != nil { return fmt.Errorf("deleting TKA state: %w", err) } b.tka = nil @@ -771,6 +782,7 @@ func (b *LocalBackend) NetworkLockForceLocalDisable() error { // NetworkLockSign signs the given node-key and submits it to the control plane. // rotationPublic, if specified, must be an ed25519 public key. +// TODO(alexc): in-memory only func (b *LocalBackend) NetworkLockSign(nodeKey key.NodePublic, rotationPublic []byte) error { ourNodeKey, sig, err := func(nodeKey key.NodePublic, rotationPublic []byte) (key.NodePublic, tka.NodeKeySignature, error) { b.mu.Lock() diff --git a/ipn/ipnlocal/node_backend.go b/ipn/ipnlocal/node_backend.go index 3408d4cbb325d..dbe23e4d5245a 100644 --- a/ipn/ipnlocal/node_backend.go +++ b/ipn/ipnlocal/node_backend.go @@ -75,10 +75,11 @@ type nodeBackend struct { filterAtomic atomic.Pointer[filter.Filter] // initialized once and immutable - eventClient *eventbus.Client - filterPub *eventbus.Publisher[magicsock.FilterUpdate] - nodeViewsPub *eventbus.Publisher[magicsock.NodeViewsUpdate] - nodeMutsPub *eventbus.Publisher[magicsock.NodeMutationsUpdate] + eventClient *eventbus.Client + filterPub *eventbus.Publisher[magicsock.FilterUpdate] + nodeViewsPub *eventbus.Publisher[magicsock.NodeViewsUpdate] + nodeMutsPub *eventbus.Publisher[magicsock.NodeMutationsUpdate] + derpMapViewPub *eventbus.Publisher[tailcfg.DERPMapView] // TODO(nickkhyl): maybe use sync.RWMutex? mu sync.Mutex // protects the following fields @@ -121,6 +122,7 @@ func newNodeBackend(ctx context.Context, logf logger.Logf, bus *eventbus.Bus) *n nb.filterPub = eventbus.Publish[magicsock.FilterUpdate](nb.eventClient) nb.nodeViewsPub = eventbus.Publish[magicsock.NodeViewsUpdate](nb.eventClient) nb.nodeMutsPub = eventbus.Publish[magicsock.NodeMutationsUpdate](nb.eventClient) + nb.derpMapViewPub = eventbus.Publish[tailcfg.DERPMapView](nb.eventClient) nb.filterPub.Publish(magicsock.FilterUpdate{Filter: nb.filterAtomic.Load()}) return nb } @@ -435,6 +437,9 @@ func (nb *nodeBackend) SetNetMap(nm *netmap.NetworkMap) { if nm != nil { nv.SelfNode = nm.SelfNode nv.Peers = nm.Peers + nb.derpMapViewPub.Publish(nm.DERPMap.View()) + } else { + nb.derpMapViewPub.Publish(tailcfg.DERPMapView{}) } nb.nodeViewsPub.Publish(nv) } diff --git a/ipn/ipnlocal/profiles.go b/ipn/ipnlocal/profiles.go index 9c217637890cc..3e80cdaa93d1f 100644 --- a/ipn/ipnlocal/profiles.go +++ b/ipn/ipnlocal/profiles.go @@ -19,9 +19,7 @@ import ( "tailscale.com/ipn" "tailscale.com/ipn/ipnext" "tailscale.com/tailcfg" - "tailscale.com/types/key" "tailscale.com/types/logger" - "tailscale.com/types/persist" "tailscale.com/util/clientmetric" "tailscale.com/util/eventbus" ) @@ -656,14 +654,6 @@ func (pm *profileManager) loadSavedPrefs(k ipn.StateKey) (ipn.PrefsView, error) return ipn.PrefsView{}, err } savedPrefs := ipn.NewPrefs() - - // if supported by the platform, create an empty hardware attestation key to use when deserializing - // to avoid type exceptions from json.Unmarshaling into an interface{}. - hw, _ := key.NewEmptyHardwareAttestationKey() - savedPrefs.Persist = &persist.Persist{ - AttestationKey: hw, - } - if err := ipn.PrefsFromBytes(bs, savedPrefs); err != nil { return ipn.PrefsView{}, fmt.Errorf("parsing saved prefs: %v", err) } diff --git a/ipn/ipnlocal/profiles_test.go b/ipn/ipnlocal/profiles_test.go index deeab2ade9b15..60c92ff8d3493 100644 --- a/ipn/ipnlocal/profiles_test.go +++ b/ipn/ipnlocal/profiles_test.go @@ -151,7 +151,6 @@ func TestProfileDupe(t *testing.T) { ID: tailcfg.UserID(user), LoginName: fmt.Sprintf("user%d@example.com", user), }, - AttestationKey: nil, } } user1Node1 := newPersist(1, 1) diff --git a/ipn/localapi/debug.go b/ipn/localapi/debug.go index b3b919d31ede2..8aca7f0093f7d 100644 --- a/ipn/localapi/debug.go +++ b/ipn/localapi/debug.go @@ -19,6 +19,7 @@ import ( "sync" "time" + "tailscale.com/client/tailscale/apitype" "tailscale.com/feature" "tailscale.com/feature/buildfeatures" "tailscale.com/ipn" @@ -39,6 +40,7 @@ func init() { Register("debug-packet-filter-matches", (*Handler).serveDebugPacketFilterMatches) Register("debug-packet-filter-rules", (*Handler).serveDebugPacketFilterRules) Register("debug-peer-endpoint-changes", (*Handler).serveDebugPeerEndpointChanges) + Register("debug-optional-features", (*Handler).serveDebugOptionalFeatures) } func (h *Handler) serveDebugPeerEndpointChanges(w http.ResponseWriter, r *http.Request) { @@ -463,3 +465,11 @@ func (h *Handler) serveDebugLog(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNoContent) } + +func (h *Handler) serveDebugOptionalFeatures(w http.ResponseWriter, r *http.Request) { + of := &apitype.OptionalFeatures{ + Features: feature.Registered(), + } + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(of) +} diff --git a/ipn/prefs_test.go b/ipn/prefs_test.go index 2336164096c14..3339a631ce827 100644 --- a/ipn/prefs_test.go +++ b/ipn/prefs_test.go @@ -501,7 +501,7 @@ func TestPrefsPretty(t *testing.T) { }, }, "linux", - `Prefs{ra=false dns=false want=false routes=[] nf=off update=off Persist{o=, n=[B1VKl] u="" ak=-}}`, + `Prefs{ra=false dns=false want=false routes=[] nf=off update=off Persist{o=, n=[B1VKl] u=""}}`, }, { Prefs{ diff --git a/logtail/logtail.go b/logtail/logtail.go index 52823fedf4309..675422890149c 100644 --- a/logtail/logtail.go +++ b/logtail/logtail.go @@ -124,7 +124,6 @@ func NewLogger(cfg Config, logf tslogger.Logf) *Logger { if cfg.Bus != nil { l.eventClient = cfg.Bus.Client("logtail.Logger") - l.changeDeltaSub = eventbus.Subscribe[netmon.ChangeDelta](l.eventClient) } l.SetSockstatsLabel(sockstats.LabelLogtailLogger) l.compressLogs = cfg.CompressLogs @@ -163,7 +162,6 @@ type Logger struct { httpDoCalls atomic.Int32 sockstatsLabel atomicSocktatsLabel eventClient *eventbus.Client - changeDeltaSub *eventbus.Subscriber[netmon.ChangeDelta] procID uint32 includeProcSequence bool @@ -429,23 +427,8 @@ func (l *Logger) internetUp() bool { func (l *Logger) awaitInternetUp(ctx context.Context) { if l.eventClient != nil { - for { - if l.internetUp() { - return - } - select { - case <-ctx.Done(): - return // give up - case <-l.changeDeltaSub.Done(): - return // give up (closing down) - case delta := <-l.changeDeltaSub.Events(): - if delta.New.AnyInterfaceUp() || l.internetUp() { - fmt.Fprintf(l.stderr, "logtail: internet back up\n") - return - } - fmt.Fprintf(l.stderr, "logtail: network changed, but is not up") - } - } + l.awaitInternetUpBus(ctx) + return } upc := make(chan bool, 1) defer l.netMonitor.RegisterChangeCallback(func(delta *netmon.ChangeDelta) { @@ -466,6 +449,24 @@ func (l *Logger) awaitInternetUp(ctx context.Context) { } } +func (l *Logger) awaitInternetUpBus(ctx context.Context) { + if l.internetUp() { + return + } + sub := eventbus.Subscribe[netmon.ChangeDelta](l.eventClient) + defer sub.Close() + select { + case delta := <-sub.Events(): + if delta.New.AnyInterfaceUp() { + fmt.Fprintf(l.stderr, "logtail: internet back up\n") + return + } + fmt.Fprintf(l.stderr, "logtail: network changed, but is not up") + case <-ctx.Done(): + return + } +} + // upload uploads body to the log server. // origlen indicates the pre-compression body length. // origlen of -1 indicates that the body is not compressed. diff --git a/net/netmon/loghelper.go b/net/netmon/loghelper.go index 2e28e8cda7895..675762cd10b18 100644 --- a/net/netmon/loghelper.go +++ b/net/netmon/loghelper.go @@ -18,13 +18,13 @@ import ( // done. func LinkChangeLogLimiter(ctx context.Context, logf logger.Logf, nm *Monitor) logger.Logf { var formatSeen sync.Map // map[string]bool - nm.b.Monitor(nm.changeDeltaWatcher(nm.b, ctx, func(cd ChangeDelta) { + sub := eventbus.SubscribeFunc(nm.b, func(cd ChangeDelta) { // If we're in a major change or a time jump, clear the seen map. if cd.Major || cd.TimeJumped { formatSeen.Clear() } - })) - + }) + context.AfterFunc(ctx, sub.Close) return func(format string, args ...any) { // We only store 'true' in the map, so if it's present then it // means we've already logged this format string. @@ -42,19 +42,3 @@ func LinkChangeLogLimiter(ctx context.Context, logf logger.Logf, nm *Monitor) lo logf(format, args...) } } - -func (nm *Monitor) changeDeltaWatcher(ec *eventbus.Client, ctx context.Context, fn func(ChangeDelta)) func(*eventbus.Client) { - sub := eventbus.Subscribe[ChangeDelta](ec) - return func(ec *eventbus.Client) { - for { - select { - case <-ctx.Done(): - return - case <-sub.Done(): - return - case change := <-sub.Events(): - fn(change) - } - } - } -} diff --git a/net/udprelay/server.go b/net/udprelay/server.go index 83831dd698164..86dee18e12f67 100644 --- a/net/udprelay/server.go +++ b/net/udprelay/server.go @@ -21,7 +21,6 @@ import ( "go4.org/mem" "golang.org/x/net/ipv6" - "tailscale.com/client/local" "tailscale.com/disco" "tailscale.com/net/batching" "tailscale.com/net/netaddr" @@ -32,6 +31,7 @@ import ( "tailscale.com/net/stun" "tailscale.com/net/udprelay/endpoint" "tailscale.com/net/udprelay/status" + "tailscale.com/tailcfg" "tailscale.com/tstime" "tailscale.com/types/key" "tailscale.com/types/logger" @@ -72,16 +72,23 @@ type Server struct { closeCh chan struct{} netChecker *netcheck.Client - mu sync.Mutex // guards the following fields + mu sync.Mutex // guards the following fields + derpMap *tailcfg.DERPMap addrDiscoveryOnce bool // addrDiscovery completed once (successfully or unsuccessfully) addrPorts []netip.AddrPort // the ip:port pairs returned as candidate endpoints closed bool lamportID uint64 - vniPool []uint32 // the pool of available VNIs + nextVNI uint32 byVNI map[uint32]*serverEndpoint byDisco map[key.SortedPairOfDiscoPublic]*serverEndpoint } +const ( + minVNI = uint32(1) + maxVNI = uint32(1<<24 - 1) + totalPossibleVNI = maxVNI - minVNI + 1 +) + // serverEndpoint contains Server-internal [endpoint.ServerEndpoint] state. // serverEndpoint methods are not thread-safe. type serverEndpoint struct { @@ -281,15 +288,10 @@ func NewServer(logf logger.Logf, port int, overrideAddrs []netip.Addr) (s *Serve steadyStateLifetime: defaultSteadyStateLifetime, closeCh: make(chan struct{}), byDisco: make(map[key.SortedPairOfDiscoPublic]*serverEndpoint), + nextVNI: minVNI, byVNI: make(map[uint32]*serverEndpoint), } s.discoPublic = s.disco.Public() - // TODO: instead of allocating 10s of MBs for the full pool, allocate - // smaller chunks and increase as needed - s.vniPool = make([]uint32, 0, 1<<24-1) - for i := 1; i < 1<<24; i++ { - s.vniPool = append(s.vniPool, uint32(i)) - } // TODO(creachadair): Find a way to plumb this in during initialization. // As-written, messages published here will not be seen by other components @@ -373,15 +375,12 @@ func (s *Server) addrDiscoveryLoop() { } } - // fetch DERPMap to feed to netcheck - derpMapCtx, derpMapCancel := context.WithTimeout(context.Background(), time.Second) - defer derpMapCancel() - localClient := &local.Client{} - // TODO(jwhited): We are in-process so use eventbus or similar. - // local.Client gets us going. - dm, err := localClient.CurrentDERPMap(derpMapCtx) - if err != nil { - return nil, err + dm := s.getDERPMap() + if dm == nil { + // We don't have a DERPMap which is required to dynamically + // discover external addresses, but we can return the endpoints we + // do have. + return addrPorts.Slice(), nil } // get addrPorts as visible from DERP @@ -557,7 +556,6 @@ func (s *Server) Close() error { defer s.mu.Unlock() clear(s.byVNI) clear(s.byDisco) - s.vniPool = nil s.closed = true s.bus.Close() }) @@ -579,7 +577,6 @@ func (s *Server) endpointGCLoop() { if v.isExpired(now, s.bindLifetime, s.steadyStateLifetime) { delete(s.byDisco, k) delete(s.byVNI, v.vni) - s.vniPool = append(s.vniPool, v.vni) } } } @@ -714,6 +711,27 @@ func (e ErrServerNotReady) Error() string { return fmt.Sprintf("server not ready, retry after %v", e.RetryAfter) } +// getNextVNILocked returns the next available VNI. It implements the +// "Traditional BSD Port Selection Algorithm" from RFC6056. This algorithm does +// not attempt to obfuscate the selection, i.e. the selection is predictable. +// For now, we favor simplicity and reducing VNI re-use over more complex +// ephemeral port (VNI) selection algorithms. +func (s *Server) getNextVNILocked() (uint32, error) { + for i := uint32(0); i < totalPossibleVNI; i++ { + vni := s.nextVNI + if vni == maxVNI { + s.nextVNI = minVNI + } else { + s.nextVNI++ + } + _, ok := s.byVNI[vni] + if !ok { + return vni, nil + } + } + return 0, errors.New("VNI pool exhausted") +} + // AllocateEndpoint allocates an [endpoint.ServerEndpoint] for the provided pair // of [key.DiscoPublic]'s. If an allocation already exists for discoA and discoB // it is returned without modification/reallocation. AllocateEndpoint returns @@ -762,8 +780,9 @@ func (s *Server) AllocateEndpoint(discoA, discoB key.DiscoPublic) (endpoint.Serv }, nil } - if len(s.vniPool) == 0 { - return endpoint.ServerEndpoint{}, errors.New("VNI pool exhausted") + vni, err := s.getNextVNILocked() + if err != nil { + return endpoint.ServerEndpoint{}, err } s.lamportID++ @@ -771,10 +790,10 @@ func (s *Server) AllocateEndpoint(discoA, discoB key.DiscoPublic) (endpoint.Serv discoPubKeys: pair, lamportID: s.lamportID, allocatedAt: time.Now(), + vni: vni, } e.discoSharedSecrets[0] = s.disco.Shared(e.discoPubKeys.Get()[0]) e.discoSharedSecrets[1] = s.disco.Shared(e.discoPubKeys.Get()[1]) - e.vni, s.vniPool = s.vniPool[0], s.vniPool[1:] s.byDisco[pair] = e s.byVNI[e.vni] = e @@ -828,3 +847,21 @@ func (s *Server) GetSessions() []status.ServerSession { } return sessions } + +// SetDERPMapView sets the [tailcfg.DERPMapView] to use for future netcheck +// reports. +func (s *Server) SetDERPMapView(view tailcfg.DERPMapView) { + s.mu.Lock() + defer s.mu.Unlock() + if !view.Valid() { + s.derpMap = nil + return + } + s.derpMap = view.AsStruct() +} + +func (s *Server) getDERPMap() *tailcfg.DERPMap { + s.mu.Lock() + defer s.mu.Unlock() + return s.derpMap +} diff --git a/net/udprelay/server_test.go b/net/udprelay/server_test.go index 8fc4a4f78cb47..bf7f0a9b5f1de 100644 --- a/net/udprelay/server_test.go +++ b/net/udprelay/server_test.go @@ -10,6 +10,7 @@ import ( "testing" "time" + qt "github.com/frankban/quicktest" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "go4.org/mem" @@ -319,3 +320,25 @@ func TestServer(t *testing.T) { }) } } + +func TestServer_getNextVNILocked(t *testing.T) { + t.Parallel() + c := qt.New(t) + s := &Server{ + nextVNI: minVNI, + byVNI: make(map[uint32]*serverEndpoint), + } + for i := uint64(0); i < uint64(totalPossibleVNI); i++ { + vni, err := s.getNextVNILocked() + if err != nil { // using quicktest here triples test time + t.Fatal(err) + } + s.byVNI[vni] = nil + } + c.Assert(s.nextVNI, qt.Equals, minVNI) + _, err := s.getNextVNILocked() + c.Assert(err, qt.IsNotNil) + delete(s.byVNI, minVNI) + _, err = s.getNextVNILocked() + c.Assert(err, qt.IsNil) +} diff --git a/sessionrecording/connect.go b/sessionrecording/connect.go index 8abf9dd7e9142..9d20b41f9b31a 100644 --- a/sessionrecording/connect.go +++ b/sessionrecording/connect.go @@ -405,10 +405,7 @@ func clientHTTP2(dialCtx context.Context, dial netx.DialFunc) *http.Client { return &http.Client{ Transport: &http.Transport{ Protocols: &p, - // Pretend like we're using TLS, but actually use the provided - // DialFunc underneath. This is necessary to convince the transport - // to actually dial. - DialTLSContext: func(ctx context.Context, network, addr string) (net.Conn, error) { + DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { perAttemptCtx, cancel := context.WithTimeout(ctx, perDialAttemptTimeout) defer cancel() go func() { diff --git a/sessionrecording/connect_test.go b/sessionrecording/connect_test.go index cacf061d79b79..e834828f5a6cc 100644 --- a/sessionrecording/connect_test.go +++ b/sessionrecording/connect_test.go @@ -21,6 +21,7 @@ import ( "golang.org/x/net/http2" "golang.org/x/net/http2/h2c" + "tailscale.com/net/memnet" ) func TestConnectToRecorder(t *testing.T) { @@ -145,7 +146,14 @@ func TestConnectToRecorder(t *testing.T) { t.Run(tt.desc, func(t *testing.T) { mux, uploadHash := tt.setup(t) - srv := httptest.NewUnstartedServer(mux) + memNet := &memnet.Network{} + ln := memNet.NewLocalTCPListener() + + srv := &httptest.Server{ + Config: &http.Server{Handler: mux}, + Listener: ln, + } + if tt.http2 { // Wire up h2c-compatible HTTP/2 server. This is optional // because the v1 recorder didn't support HTTP/2 and we try to @@ -159,10 +167,8 @@ func TestConnectToRecorder(t *testing.T) { srv.Start() t.Cleanup(srv.Close) - d := new(net.Dialer) - ctx := context.Background() - w, _, errc, err := ConnectToRecorder(ctx, []netip.AddrPort{netip.MustParseAddrPort(srv.Listener.Addr().String())}, d.DialContext) + w, _, errc, err := ConnectToRecorder(ctx, []netip.AddrPort{netip.MustParseAddrPort(ln.Addr().String())}, memNet.Dial) if err != nil { t.Fatalf("ConnectToRecorder: %v", err) } diff --git a/shell.nix b/shell.nix index ec345998afe30..99fc7fa4de547 100644 --- a/shell.nix +++ b/shell.nix @@ -16,4 +16,4 @@ ) { src = ./.; }).shellNix -# nix-direnv cache busting line: sha256-rV3C2Vi48FCifGt58OdEO4+Av0HRIs8sUJVvp/gEBLw= +# nix-direnv cache busting line: sha256-AUOjLomba75qfzb9Vxc0Sktyeces6hBSuOMgboWcDnE= diff --git a/tka/tailchonk.go b/tka/tailchonk.go index cb683c273d033..616abaf2b4190 100644 --- a/tka/tailchonk.go +++ b/tka/tailchonk.go @@ -9,6 +9,8 @@ import ( "bytes" "errors" "fmt" + "log" + "maps" "os" "path/filepath" "slices" @@ -74,38 +76,51 @@ type CompactableChonk interface { // PurgeAUMs permanently and irrevocably deletes the specified // AUMs from storage. PurgeAUMs(hashes []AUMHash) error + + // RemoveAll permanently and completely clears the TKA state. This should + // be called when the user disables Tailnet Lock. + RemoveAll() error } // Mem implements in-memory storage of TKA state, suitable for -// tests. +// tests or cases where filesystem storage is unavailable. // // Mem implements the Chonk interface. +// +// Mem is thread-safe. type Mem struct { - l sync.RWMutex + mu sync.RWMutex aums map[AUMHash]AUM + commitTimes map[AUMHash]time.Time + + // parentIndex is a map of AUMs to the AUMs for which they are + // the parent. + // + // For example, if parent index is {1 -> {2, 3, 4}}, that means + // that AUMs 2, 3, 4 all have aum.PrevAUMHash = 1. parentIndex map[AUMHash][]AUMHash lastActiveAncestor *AUMHash } func (c *Mem) SetLastActiveAncestor(hash AUMHash) error { - c.l.Lock() - defer c.l.Unlock() + c.mu.Lock() + defer c.mu.Unlock() c.lastActiveAncestor = &hash return nil } func (c *Mem) LastActiveAncestor() (*AUMHash, error) { - c.l.RLock() - defer c.l.RUnlock() + c.mu.RLock() + defer c.mu.RUnlock() return c.lastActiveAncestor, nil } // Heads returns AUMs for which there are no children. In other // words, the latest AUM in all chains (the 'leaf'). func (c *Mem) Heads() ([]AUM, error) { - c.l.RLock() - defer c.l.RUnlock() + c.mu.RLock() + defer c.mu.RUnlock() out := make([]AUM, 0, 6) // An AUM is a 'head' if there are no nodes for which it is the parent. @@ -119,8 +134,8 @@ func (c *Mem) Heads() ([]AUM, error) { // AUM returns the AUM with the specified digest. func (c *Mem) AUM(hash AUMHash) (AUM, error) { - c.l.RLock() - defer c.l.RUnlock() + c.mu.RLock() + defer c.mu.RUnlock() aum, ok := c.aums[hash] if !ok { return AUM{}, os.ErrNotExist @@ -131,8 +146,8 @@ func (c *Mem) AUM(hash AUMHash) (AUM, error) { // ChildAUMs returns all AUMs with a specified previous // AUM hash. func (c *Mem) ChildAUMs(prevAUMHash AUMHash) ([]AUM, error) { - c.l.RLock() - defer c.l.RUnlock() + c.mu.RLock() + defer c.mu.RUnlock() out := make([]AUM, 0, 6) for _, entry := range c.parentIndex[prevAUMHash] { out = append(out, c.aums[entry]) @@ -146,17 +161,19 @@ func (c *Mem) ChildAUMs(prevAUMHash AUMHash) ([]AUM, error) { // as the rest of the TKA implementation assumes that only // verified AUMs are stored. func (c *Mem) CommitVerifiedAUMs(updates []AUM) error { - c.l.Lock() - defer c.l.Unlock() + c.mu.Lock() + defer c.mu.Unlock() if c.aums == nil { c.parentIndex = make(map[AUMHash][]AUMHash, 64) c.aums = make(map[AUMHash]AUM, 64) + c.commitTimes = make(map[AUMHash]time.Time, 64) } updateLoop: for _, aum := range updates { aumHash := aum.Hash() c.aums[aumHash] = aum + c.commitTimes[aumHash] = time.Now() parent, ok := aum.Parent() if ok { @@ -172,6 +189,71 @@ updateLoop: return nil } +// RemoveAll permanently and completely clears the TKA state. +func (c *Mem) RemoveAll() error { + c.mu.Lock() + defer c.mu.Unlock() + c.aums = nil + c.commitTimes = nil + c.parentIndex = nil + c.lastActiveAncestor = nil + return nil +} + +// AllAUMs returns all AUMs stored in the chonk. +func (c *Mem) AllAUMs() ([]AUMHash, error) { + c.mu.RLock() + defer c.mu.RUnlock() + + return slices.Collect(maps.Keys(c.aums)), nil +} + +// CommitTime returns the time at which the AUM was committed. +// +// If the AUM does not exist, then os.ErrNotExist is returned. +func (c *Mem) CommitTime(h AUMHash) (time.Time, error) { + c.mu.RLock() + defer c.mu.RUnlock() + + t, ok := c.commitTimes[h] + if ok { + return t, nil + } else { + return time.Time{}, os.ErrNotExist + } +} + +// PurgeAUMs marks the specified AUMs for deletion from storage. +func (c *Mem) PurgeAUMs(hashes []AUMHash) error { + c.mu.Lock() + defer c.mu.Unlock() + + for _, h := range hashes { + // Remove the deleted AUM from the list of its parents' children. + // + // However, we leave the list of this AUM's children in parentIndex, + // so we can find them later in ChildAUMs(). + if aum, ok := c.aums[h]; ok { + parent, hasParent := aum.Parent() + if hasParent { + c.parentIndex[parent] = slices.DeleteFunc( + c.parentIndex[parent], + func(other AUMHash) bool { return bytes.Equal(h[:], other[:]) }, + ) + if len(c.parentIndex[parent]) == 0 { + delete(c.parentIndex, parent) + } + } + } + + // Delete this AUM from the list of AUMs and commit times. + delete(c.aums, h) + delete(c.commitTimes, h) + } + + return nil +} + // FS implements filesystem storage of TKA state. // // FS implements the Chonk interface. @@ -183,6 +265,10 @@ type FS struct { // ChonkDir returns an implementation of Chonk which uses the // given directory to store TKA state. func ChonkDir(dir string) (*FS, error) { + if err := os.MkdirAll(dir, 0755); err != nil && !os.IsExist(err) { + return nil, fmt.Errorf("creating chonk root dir: %v", err) + } + stat, err := os.Stat(dir) if err != nil { return nil, err @@ -375,6 +461,11 @@ func (c *FS) Heads() ([]AUM, error) { return out, nil } +// RemoveAll permanently and completely clears the TKA state. +func (c *FS) RemoveAll() error { + return os.RemoveAll(c.base) +} + // AllAUMs returns all AUMs stored in the chonk. func (c *FS) AllAUMs() ([]AUMHash, error) { c.mu.RLock() @@ -403,9 +494,16 @@ func (c *FS) scanHashes(eachHashInfo func(*fsHashInfo)) error { return fmt.Errorf("reading prefix dir: %v", err) } for _, file := range files { + // Ignore files whose names aren't valid AUM hashes, which may be + // temporary files which are partway through being written, or other + // files added by the OS (like .DS_Store) which we can ignore. + // TODO(alexc): it might be useful to append a suffix like `.aum` to + // filenames, so we can more easily distinguish between AUMs and + // arbitrary other files. var h AUMHash if err := h.UnmarshalText([]byte(file.Name())); err != nil { - return fmt.Errorf("invalid aum file: %s: %w", file.Name(), err) + log.Printf("ignoring unexpected non-AUM: %s: %v", file.Name(), err) + continue } info, err := c.get(h) if err != nil { diff --git a/tka/tailchonk_test.go b/tka/tailchonk_test.go index 08686598033b8..70b7dc9a72fbb 100644 --- a/tka/tailchonk_test.go +++ b/tka/tailchonk_test.go @@ -7,6 +7,7 @@ import ( "bytes" "os" "path/filepath" + "slices" "sync" "testing" "time" @@ -83,6 +84,86 @@ func TestTailchonkFS_CommitTime(t *testing.T) { } } +// If we were interrupted while writing a temporary file, AllAUMs() +// should ignore it when scanning the AUM directory. +func TestTailchonkFS_IgnoreTempFile(t *testing.T) { + base := t.TempDir() + chonk := must.Get(ChonkDir(base)) + parentHash := randHash(t, 1) + aum := AUM{MessageKind: AUMNoOp, PrevAUMHash: parentHash[:]} + must.Do(chonk.CommitVerifiedAUMs([]AUM{aum})) + + writeAUMFile := func(filename, contents string) { + t.Helper() + if err := os.MkdirAll(filepath.Join(base, filename[0:2]), os.ModePerm); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(base, filename[0:2], filename), []byte(contents), 0600); err != nil { + t.Fatal(err) + } + } + + // Check that calling AllAUMs() returns the single committed AUM + got, err := chonk.AllAUMs() + if err != nil { + t.Fatalf("AllAUMs() failed: %v", err) + } + want := []AUMHash{aum.Hash()} + if !slices.Equal(got, want) { + t.Fatalf("AllAUMs() is wrong: got %v, want %v", got, want) + } + + // Write some temporary files which are named like partially-committed AUMs, + // then check that AllAUMs() only returns the single committed AUM. + writeAUMFile("AUM1234.tmp", "incomplete AUM\n") + writeAUMFile("AUM1234.tmp_123", "second incomplete AUM\n") + + got, err = chonk.AllAUMs() + if err != nil { + t.Fatalf("AllAUMs() failed: %v", err) + } + if !slices.Equal(got, want) { + t.Fatalf("AllAUMs() is wrong: got %v, want %v", got, want) + } +} + +// If we use a non-existent directory with filesystem Chonk storage, +// it's automatically created. +func TestTailchonkFS_CreateChonkDir(t *testing.T) { + base := filepath.Join(t.TempDir(), "a", "b", "c") + + chonk, err := ChonkDir(base) + if err != nil { + t.Fatalf("ChonkDir: %v", err) + } + + aum := AUM{MessageKind: AUMNoOp} + must.Do(chonk.CommitVerifiedAUMs([]AUM{aum})) + + got, err := chonk.AUM(aum.Hash()) + if err != nil { + t.Errorf("Chonk.AUM: %v", err) + } + if diff := cmp.Diff(got, aum); diff != "" { + t.Errorf("wrong AUM; (-got+want):%v", diff) + } + + if _, err := os.Stat(base); err != nil { + t.Errorf("os.Stat: %v", err) + } +} + +// You can't use a file as the root of your filesystem Chonk storage. +func TestTailchonkFS_CannotUseFile(t *testing.T) { + base := filepath.Join(t.TempDir(), "tka_storage.txt") + must.Do(os.WriteFile(base, []byte("this won't work"), 0644)) + + _, err := ChonkDir(base) + if err == nil { + t.Fatal("ChonkDir succeeded; expected an error") + } +} + func TestMarkActiveChain(t *testing.T) { type aumTemplate struct { AUM AUM @@ -452,7 +533,7 @@ func (c *compactingChonkFake) PurgeAUMs(hashes []AUMHash) error { // Avoid go vet complaining about copying a lock value func cloneMem(src, dst *Mem) { - dst.l = sync.RWMutex{} + dst.mu = sync.RWMutex{} dst.aums = src.aums dst.parentIndex = src.parentIndex dst.lastActiveAncestor = src.lastActiveAncestor diff --git a/tstest/chonktest/chonktest.go b/tstest/chonktest/chonktest.go index bfe394b28fcaf..404f1ec47f16c 100644 --- a/tstest/chonktest/chonktest.go +++ b/tstest/chonktest/chonktest.go @@ -9,6 +9,7 @@ package chonktest import ( "bytes" "encoding/binary" + "errors" "math/rand" "os" "testing" @@ -253,4 +254,50 @@ func RunCompactableChonkTests(t *testing.T, newChonk func(t *testing.T) tka.Comp t.Fatalf("ChildAUMs() output differs (-want, +got):\n%s", diff) } }) + + t.Run("RemoveAll", func(t *testing.T) { + t.Parallel() + chonk := newChonk(t) + parentHash := randHash(t, 1) + data := []tka.AUM{ + { + MessageKind: tka.AUMRemoveKey, + KeyID: []byte{1, 2}, + PrevAUMHash: parentHash[:], + }, + { + MessageKind: tka.AUMRemoveKey, + KeyID: []byte{3, 4}, + PrevAUMHash: parentHash[:], + }, + } + + if err := chonk.CommitVerifiedAUMs(data); err != nil { + t.Fatalf("CommitVerifiedAUMs failed: %v", err) + } + + // Check we can retrieve the AUMs we just stored + for _, want := range data { + got, err := chonk.AUM(want.Hash()) + if err != nil { + t.Fatalf("could not get %s: %v", want.Hash(), err) + } + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("stored AUM %s differs (-want, +got):\n%s", want.Hash(), diff) + } + } + + // Call RemoveAll() to drop all the AUM state + if err := chonk.RemoveAll(); err != nil { + t.Fatalf("RemoveAll failed: %v", err) + } + + // Check we can no longer retrieve the previously-stored AUMs + for _, want := range data { + aum, err := chonk.AUM(want.Hash()) + if !errors.Is(err, os.ErrNotExist) { + t.Fatalf("expected os.ErrNotExist for %s, instead got aum=%v, err=%v", want.Hash(), aum, err) + } + } + }) } diff --git a/tstest/chonktest/tailchonk_test.go b/tstest/chonktest/tailchonk_test.go index ce6b043248de1..6dfab798ed11f 100644 --- a/tstest/chonktest/tailchonk_test.go +++ b/tstest/chonktest/tailchonk_test.go @@ -39,6 +39,12 @@ func TestImplementsCompactableChonk(t *testing.T) { name string newChonk func(t *testing.T) tka.CompactableChonk }{ + { + name: "Mem", + newChonk: func(t *testing.T) tka.CompactableChonk { + return &tka.Mem{} + }, + }, { name: "FS", newChonk: func(t *testing.T) tka.CompactableChonk { diff --git a/tstest/integration/integration_test.go b/tstest/integration/integration_test.go index 234bb8c6ec11a..64f49c7b80afd 100644 --- a/tstest/integration/integration_test.go +++ b/tstest/integration/integration_test.go @@ -175,6 +175,28 @@ func TestControlKnobs(t *testing.T) { } } +func TestExpectedFeaturesLinked(t *testing.T) { + tstest.Shard(t) + tstest.Parallel(t) + env := NewTestEnv(t) + n1 := NewTestNode(t, env) + + d1 := n1.StartDaemon() + n1.AwaitResponding() + lc := n1.LocalClient() + got, err := lc.QueryOptionalFeatures(t.Context()) + if err != nil { + t.Fatal(err) + } + if !got.Features["portmapper"] { + t.Errorf("optional feature portmapper unexpectedly not found: got %v", got.Features) + } + + d1.MustCleanShutdown(t) + + t.Logf("number of HTTP logcatcher requests: %v", env.LogCatcher.numRequests()) +} + func TestCollectPanic(t *testing.T) { flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/15865") tstest.Shard(t) diff --git a/types/persist/persist.go b/types/persist/persist.go index 4b62c79ddd186..d888a6afb6af5 100644 --- a/types/persist/persist.go +++ b/types/persist/persist.go @@ -26,7 +26,6 @@ type Persist struct { UserProfile tailcfg.UserProfile NetworkLockKey key.NLPrivate NodeID tailcfg.StableNodeID - AttestationKey key.HardwareAttestationKey `json:",omitempty"` // DisallowedTKAStateIDs stores the tka.State.StateID values which // this node will not operate network lock on. This is used to @@ -85,20 +84,11 @@ func (p *Persist) Equals(p2 *Persist) bool { return false } - var pub, p2Pub key.HardwareAttestationPublic - if p.AttestationKey != nil && !p.AttestationKey.IsZero() { - pub = key.HardwareAttestationPublicFromPlatformKey(p.AttestationKey) - } - if p2.AttestationKey != nil && !p2.AttestationKey.IsZero() { - p2Pub = key.HardwareAttestationPublicFromPlatformKey(p2.AttestationKey) - } - return p.PrivateNodeKey.Equal(p2.PrivateNodeKey) && p.OldPrivateNodeKey.Equal(p2.OldPrivateNodeKey) && p.UserProfile.Equal(&p2.UserProfile) && p.NetworkLockKey.Equal(p2.NetworkLockKey) && p.NodeID == p2.NodeID && - pub.Equal(p2Pub) && reflect.DeepEqual(nilIfEmpty(p.DisallowedTKAStateIDs), nilIfEmpty(p2.DisallowedTKAStateIDs)) } @@ -106,16 +96,12 @@ func (p *Persist) Pretty() string { var ( ok, nk key.NodePublic ) - akString := "-" if !p.OldPrivateNodeKey.IsZero() { ok = p.OldPrivateNodeKey.Public() } if !p.PrivateNodeKey.IsZero() { nk = p.PublicNodeKey() } - if p.AttestationKey != nil && !p.AttestationKey.IsZero() { - akString = fmt.Sprintf("%v", p.AttestationKey.Public()) - } - return fmt.Sprintf("Persist{o=%v, n=%v u=%#v ak=%s}", - ok.ShortString(), nk.ShortString(), p.UserProfile.LoginName, akString) + return fmt.Sprintf("Persist{o=%v, n=%v u=%#v}", + ok.ShortString(), nk.ShortString(), p.UserProfile.LoginName) } diff --git a/types/persist/persist_clone.go b/types/persist/persist_clone.go index 9dbe7e0f6fa6d..680419ff2f30b 100644 --- a/types/persist/persist_clone.go +++ b/types/persist/persist_clone.go @@ -19,9 +19,6 @@ func (src *Persist) Clone() *Persist { } dst := new(Persist) *dst = *src - if src.AttestationKey != nil { - dst.AttestationKey = src.AttestationKey.Clone() - } dst.DisallowedTKAStateIDs = append(src.DisallowedTKAStateIDs[:0:0], src.DisallowedTKAStateIDs...) return dst } @@ -34,6 +31,5 @@ var _PersistCloneNeedsRegeneration = Persist(struct { UserProfile tailcfg.UserProfile NetworkLockKey key.NLPrivate NodeID tailcfg.StableNodeID - AttestationKey key.HardwareAttestationKey DisallowedTKAStateIDs []string }{}) diff --git a/types/persist/persist_test.go b/types/persist/persist_test.go index 713114b74dcd5..dbf2a6d8c7662 100644 --- a/types/persist/persist_test.go +++ b/types/persist/persist_test.go @@ -21,7 +21,7 @@ func fieldsOf(t reflect.Type) (fields []string) { } func TestPersistEqual(t *testing.T) { - persistHandles := []string{"PrivateNodeKey", "OldPrivateNodeKey", "UserProfile", "NetworkLockKey", "NodeID", "AttestationKey", "DisallowedTKAStateIDs"} + persistHandles := []string{"PrivateNodeKey", "OldPrivateNodeKey", "UserProfile", "NetworkLockKey", "NodeID", "DisallowedTKAStateIDs"} if have := fieldsOf(reflect.TypeFor[Persist]()); !reflect.DeepEqual(have, persistHandles) { t.Errorf("Persist.Equal check might be out of sync\nfields: %q\nhandled: %q\n", have, persistHandles) diff --git a/types/persist/persist_view.go b/types/persist/persist_view.go index dbf8294ef5a7a..7d1507468fc65 100644 --- a/types/persist/persist_view.go +++ b/types/persist/persist_view.go @@ -89,11 +89,10 @@ func (v *PersistView) UnmarshalJSONFrom(dec *jsontext.Decoder) error { func (v PersistView) PrivateNodeKey() key.NodePrivate { return v.ж.PrivateNodeKey } // needed to request key rotation -func (v PersistView) OldPrivateNodeKey() key.NodePrivate { return v.ж.OldPrivateNodeKey } -func (v PersistView) UserProfile() tailcfg.UserProfile { return v.ж.UserProfile } -func (v PersistView) NetworkLockKey() key.NLPrivate { return v.ж.NetworkLockKey } -func (v PersistView) NodeID() tailcfg.StableNodeID { return v.ж.NodeID } -func (v PersistView) AttestationKey() tailcfg.StableNodeID { panic("unsupported") } +func (v PersistView) OldPrivateNodeKey() key.NodePrivate { return v.ж.OldPrivateNodeKey } +func (v PersistView) UserProfile() tailcfg.UserProfile { return v.ж.UserProfile } +func (v PersistView) NetworkLockKey() key.NLPrivate { return v.ж.NetworkLockKey } +func (v PersistView) NodeID() tailcfg.StableNodeID { return v.ж.NodeID } // DisallowedTKAStateIDs stores the tka.State.StateID values which // this node will not operate network lock on. This is used to @@ -111,6 +110,5 @@ var _PersistViewNeedsRegeneration = Persist(struct { UserProfile tailcfg.UserProfile NetworkLockKey key.NLPrivate NodeID tailcfg.StableNodeID - AttestationKey key.HardwareAttestationKey DisallowedTKAStateIDs []string }{}) diff --git a/util/eventbus/bus.go b/util/eventbus/bus.go index d1507d8e67587..b1639136a5133 100644 --- a/util/eventbus/bus.go +++ b/util/eventbus/bus.go @@ -5,10 +5,12 @@ package eventbus import ( "context" + "log" "reflect" "slices" "sync" + "tailscale.com/types/logger" "tailscale.com/util/set" ) @@ -30,6 +32,7 @@ type Bus struct { write chan PublishedEvent snapshot chan chan []PublishedEvent routeDebug hook[RoutedEvent] + logf logger.Logf topicsMu sync.Mutex topics map[reflect.Type][]*subscribeState @@ -40,19 +43,42 @@ type Bus struct { clients set.Set[*Client] } -// New returns a new bus. Use [Publish] to make event publishers, -// and [Subscribe] and [SubscribeFunc] to make event subscribers. -func New() *Bus { +// New returns a new bus with default options. It is equivalent to +// calling [NewWithOptions] with zero [BusOptions]. +func New() *Bus { return NewWithOptions(BusOptions{}) } + +// NewWithOptions returns a new [Bus] with the specified [BusOptions]. +// Use [Bus.Client] to construct clients on the bus. +// Use [Publish] to make event publishers. +// Use [Subscribe] and [SubscribeFunc] to make event subscribers. +func NewWithOptions(opts BusOptions) *Bus { ret := &Bus{ write: make(chan PublishedEvent), snapshot: make(chan chan []PublishedEvent), topics: map[reflect.Type][]*subscribeState{}, clients: set.Set[*Client]{}, + logf: opts.logger(), } ret.router = runWorker(ret.pump) return ret } +// BusOptions are optional parameters for a [Bus]. A zero value is ready for +// use and provides defaults as described. +type BusOptions struct { + // Logf, if non-nil, is used for debug logs emitted by the bus and clients, + // publishers, and subscribers under its care. If it is nil, logs are sent + // to [log.Printf]. + Logf logger.Logf +} + +func (o BusOptions) logger() logger.Logf { + if o.Logf == nil { + return log.Printf + } + return o.Logf +} + // Client returns a new client with no subscriptions. Use [Subscribe] // to receive events, and [Publish] to emit events. // @@ -166,6 +192,9 @@ func (b *Bus) pump(ctx context.Context) { } } +// logger returns a [logger.Logf] to which logs related to bus activity should be written. +func (b *Bus) logger() logger.Logf { return b.logf } + func (b *Bus) dest(t reflect.Type) []*subscribeState { b.topicsMu.Lock() defer b.topicsMu.Unlock() diff --git a/util/eventbus/bus_test.go b/util/eventbus/bus_test.go index de292cf1adb5b..1e0cd8abf2cff 100644 --- a/util/eventbus/bus_test.go +++ b/util/eventbus/bus_test.go @@ -4,8 +4,11 @@ package eventbus_test import ( + "bytes" "errors" "fmt" + "log" + "regexp" "testing" "testing/synctest" "time" @@ -436,6 +439,76 @@ func TestMonitor(t *testing.T) { t.Run("Wait", testMon(t, func(c *eventbus.Client, m eventbus.Monitor) { c.Close(); m.Wait() })) } +func TestSlowSubs(t *testing.T) { + swapLogBuf := func(t *testing.T) *bytes.Buffer { + logBuf := new(bytes.Buffer) + save := log.Writer() + log.SetOutput(logBuf) + t.Cleanup(func() { log.SetOutput(save) }) + return logBuf + } + + t.Run("Subscriber", func(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + buf := swapLogBuf(t) + + b := eventbus.New() + defer b.Close() + + pc := b.Client("pub") + p := eventbus.Publish[EventA](pc) + + sc := b.Client("sub") + s := eventbus.Subscribe[EventA](sc) + + go func() { + time.Sleep(6 * time.Second) // trigger the slow check at 5s. + t.Logf("Subscriber accepted %v", <-s.Events()) + }() + + p.Publish(EventA{12345}) + + time.Sleep(7 * time.Second) // advance time... + synctest.Wait() // subscriber is done + + want := regexp.MustCompile(`^.* tailscale.com/util/eventbus_test bus_test.go:\d+: ` + + `subscriber for eventbus_test.EventA is slow.*`) + if got := buf.String(); !want.MatchString(got) { + t.Errorf("Wrong log output\ngot: %q\nwant: %s", got, want) + } + }) + }) + + t.Run("SubscriberFunc", func(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + buf := swapLogBuf(t) + + b := eventbus.New() + defer b.Close() + + pc := b.Client("pub") + p := eventbus.Publish[EventB](pc) + + sc := b.Client("sub") + eventbus.SubscribeFunc[EventB](sc, func(e EventB) { + time.Sleep(6 * time.Second) // trigger the slow check at 5s. + t.Logf("SubscriberFunc processed %v", e) + }) + + p.Publish(EventB{67890}) + + time.Sleep(7 * time.Second) // advance time... + synctest.Wait() // subscriber is done + + want := regexp.MustCompile(`^.* tailscale.com/util/eventbus_test bus_test.go:\d+: ` + + `subscriber for eventbus_test.EventB is slow.*`) + if got := buf.String(); !want.MatchString(got) { + t.Errorf("Wrong log output\ngot: %q\nwant: %s", got, want) + } + }) + }) +} + func TestRegression(t *testing.T) { bus := eventbus.New() t.Cleanup(bus.Close) diff --git a/util/eventbus/client.go b/util/eventbus/client.go index 9e3f3ee76cc31..c119c67a939c2 100644 --- a/util/eventbus/client.go +++ b/util/eventbus/client.go @@ -7,6 +7,7 @@ import ( "reflect" "sync" + "tailscale.com/types/logger" "tailscale.com/util/set" ) @@ -29,6 +30,8 @@ type Client struct { func (c *Client) Name() string { return c.name } +func (c *Client) logger() logger.Logf { return c.bus.logger() } + // Close closes the client. It implicitly closes all publishers and // subscribers obtained from this client. func (c *Client) Close() { @@ -142,7 +145,7 @@ func Subscribe[T any](c *Client) *Subscriber[T] { } r := c.subscribeStateLocked() - s := newSubscriber[T](r) + s := newSubscriber[T](r, logfForCaller(c.logger())) r.addSubscriber(s) return s } @@ -165,7 +168,7 @@ func SubscribeFunc[T any](c *Client, f func(T)) *SubscriberFunc[T] { } r := c.subscribeStateLocked() - s := newSubscriberFunc[T](r, f) + s := newSubscriberFunc[T](r, f, logfForCaller(c.logger())) r.addSubscriber(s) return s } diff --git a/util/eventbus/debug.go b/util/eventbus/debug.go index 6d5463bece7b2..2f2c9589ad0e2 100644 --- a/util/eventbus/debug.go +++ b/util/eventbus/debug.go @@ -6,12 +6,22 @@ package eventbus import ( "cmp" "fmt" + "path/filepath" "reflect" + "runtime" "slices" + "strings" "sync" "sync/atomic" + "time" + + "tailscale.com/types/logger" ) +// slowSubscriberTimeout is a timeout after which a subscriber that does not +// accept a pending event will be flagged as being slow. +const slowSubscriberTimeout = 5 * time.Second + // A Debugger offers access to a bus's privileged introspection and // debugging facilities. // @@ -204,3 +214,29 @@ type DebugTopic struct { Publisher string Subscribers []string } + +// logfForCaller returns a [logger.Logf] that prefixes its output with the +// package, filename, and line number of the caller's caller. +// If logf == nil, it returns [logger.Discard]. +// If the caller location could not be determined, it returns logf unmodified. +func logfForCaller(logf logger.Logf) logger.Logf { + if logf == nil { + return logger.Discard + } + pc, fpath, line, _ := runtime.Caller(2) // +1 for my caller, +1 for theirs + if f := runtime.FuncForPC(pc); f != nil { + return logger.WithPrefix(logf, fmt.Sprintf("%s %s:%d: ", funcPackageName(f.Name()), filepath.Base(fpath), line)) + } + return logf +} + +func funcPackageName(funcName string) string { + ls := max(strings.LastIndex(funcName, "/"), 0) + for { + i := strings.LastIndex(funcName, ".") + if i <= ls { + return funcName + } + funcName = funcName[:i] + } +} diff --git a/util/eventbus/subscribe.go b/util/eventbus/subscribe.go index c35c7e7f05682..0b821b3f51586 100644 --- a/util/eventbus/subscribe.go +++ b/util/eventbus/subscribe.go @@ -8,6 +8,9 @@ import ( "fmt" "reflect" "sync" + "time" + + "tailscale.com/types/logger" ) type DeliveredEvent struct { @@ -182,12 +185,18 @@ type Subscriber[T any] struct { stop stopFlag read chan T unregister func() + logf logger.Logf + slow *time.Timer // used to detect slow subscriber service } -func newSubscriber[T any](r *subscribeState) *Subscriber[T] { +func newSubscriber[T any](r *subscribeState, logf logger.Logf) *Subscriber[T] { + slow := time.NewTimer(0) + slow.Stop() // reset in dispatch return &Subscriber[T]{ read: make(chan T), unregister: func() { r.deleteSubscriber(reflect.TypeFor[T]()) }, + logf: logf, + slow: slow, } } @@ -212,6 +221,11 @@ func (s *Subscriber[T]) monitor(debugEvent T) { func (s *Subscriber[T]) dispatch(ctx context.Context, vals *queue[DeliveredEvent], acceptCh func() chan DeliveredEvent, snapshot chan chan []DeliveredEvent) bool { t := vals.Peek().Event.(T) + + start := time.Now() + s.slow.Reset(slowSubscriberTimeout) + defer s.slow.Stop() + for { // Keep the cases in this select in sync with subscribeState.pump // above. The only difference should be that this select @@ -226,6 +240,9 @@ func (s *Subscriber[T]) dispatch(ctx context.Context, vals *queue[DeliveredEvent return false case ch := <-snapshot: ch <- vals.Snapshot() + case <-s.slow.C: + s.logf("subscriber for %T is slow (%v elapsed)", t, time.Since(start)) + s.slow.Reset(slowSubscriberTimeout) } } } @@ -260,12 +277,18 @@ type SubscriberFunc[T any] struct { stop stopFlag read func(T) unregister func() + logf logger.Logf + slow *time.Timer // used to detect slow subscriber service } -func newSubscriberFunc[T any](r *subscribeState, f func(T)) *SubscriberFunc[T] { +func newSubscriberFunc[T any](r *subscribeState, f func(T), logf logger.Logf) *SubscriberFunc[T] { + slow := time.NewTimer(0) + slow.Stop() // reset in dispatch return &SubscriberFunc[T]{ read: f, unregister: func() { r.deleteSubscriber(reflect.TypeFor[T]()) }, + logf: logf, + slow: slow, } } @@ -285,6 +308,11 @@ func (s *SubscriberFunc[T]) dispatch(ctx context.Context, vals *queue[DeliveredE t := vals.Peek().Event.(T) callDone := make(chan struct{}) go s.runCallback(t, callDone) + + start := time.Now() + s.slow.Reset(slowSubscriberTimeout) + defer s.slow.Stop() + // Keep the cases in this select in sync with subscribeState.pump // above. The only difference should be that this select // delivers a value by calling s.read. @@ -299,6 +327,9 @@ func (s *SubscriberFunc[T]) dispatch(ctx context.Context, vals *queue[DeliveredE return false case ch := <-snapshot: ch <- vals.Snapshot() + case <-s.slow.C: + s.logf("subscriber for %T is slow (%v elapsed)", t, time.Since(start)) + s.slow.Reset(slowSubscriberTimeout) } } } diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index e3c2d478e9882..6ee14164d0a99 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -651,7 +651,9 @@ func (c *Conn) onUDPRelayAllocResp(allocResp UDPRelayAllocResp) { ep.mu.Lock() defer ep.mu.Unlock() derpAddr := ep.derpAddr - go c.sendDiscoMessage(epAddr{ap: derpAddr}, ep.publicKey, disco.key, allocResp.Message, discoVerboseLog) + if derpAddr.IsValid() { + go c.sendDiscoMessage(epAddr{ap: derpAddr}, ep.publicKey, disco.key, allocResp.Message, discoVerboseLog) + } } // Synchronize waits for all [eventbus] events published @@ -719,9 +721,13 @@ func NewConn(opts Options) (*Conn, error) { newPortMapper, ok := portmappertype.HookNewPortMapper.GetOk() if ok { c.portMapper = newPortMapper(portmapperLogf, opts.EventBus, opts.NetMon, disableUPnP, c.onlyTCP443.Load) - } else if !testenv.InTest() { - panic("unexpected: HookNewPortMapper not set") } + // If !ok, the HookNewPortMapper hook is not set (so feature/portmapper + // isn't linked), but the build tag to explicitly omit the portmapper + // isn't set either. This should only happen to js/wasm builds, where + // the portmapper is a no-op even if linked (but it's no longer linked, + // since the move to feature/portmapper), or if people are wiring up + // their own Tailscale build from pieces. } c.netMon = opts.NetMon @@ -2438,7 +2444,10 @@ func (c *Conn) handleDiscoMessage(msg []byte, src epAddr, shouldBeRelayHandshake if !nodeHasCap(c.filt, c.peers.At(peerI), c.self, tailcfg.PeerCapabilityRelay) { return } - c.allocRelayEndpointPub.Publish(UDPRelayAllocReq{ + // [Conn.mu] must not be held while publishing, or [Conn.onUDPRelayAllocResp] + // can deadlock as the req sub and resp pub are the same goroutine. + // See #17830. + go c.allocRelayEndpointPub.Publish(UDPRelayAllocReq{ RxFromDiscoKey: sender, RxFromNodeKey: nodeKey, Message: req,