From 1804e6db673731210ef48bf37e472b80fffc4cec Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Wed, 14 Jan 2026 17:17:55 +0700 Subject: [PATCH 01/22] fix(windows): improve DNS server discovery for domain-joined machines Add DNS suffix matching for non-physical adapters when domain-joined. This allows interfaces with matching DNS suffix to be considered valid even if not in validInterfacesMap, improving DNS server discovery for remote VPN scenarios. While at it, also replacing context.Background() with proper ctx parameter throughout the function for consistent context propagation. --- nameservers_windows.go | 86 ++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/nameservers_windows.go b/nameservers_windows.go index 68173931..e02b1f5b 100644 --- a/nameservers_windows.go +++ b/nameservers_windows.go @@ -17,6 +17,7 @@ import ( "github.com/microsoft/wmi/pkg/base/query" "github.com/microsoft/wmi/pkg/constant" "github.com/microsoft/wmi/pkg/hardware/network/netadapter" + "github.com/miekg/dns" "golang.org/x/sys/windows" "golang.zx2c4.com/wireguard/windows/tunnel/winipcfg" "tailscale.com/net/netmon" @@ -128,13 +129,15 @@ func getDNSServers(ctx context.Context) ([]string, error) { // Try to get domain controller info if domain-joined var dcServers []string + var adDomain string isDomain := checkDomainJoined() if isDomain { domainName, err := system.GetActiveDirectoryDomain() if err != nil { - Log(context.Background(), logger.Debug(), + Log(ctx, logger.Debug(), "Failed to get local AD domain: %v", err) } else { + adDomain = domainName // Load netapi32.dll netapi32 := windows.NewLazySystemDLL("netapi32.dll") dsDcName := netapi32.NewProc("DsGetDcNameW") @@ -144,10 +147,9 @@ func getDNSServers(ctx context.Context) ([]string, error) { domainUTF16, err := windows.UTF16PtrFromString(domainName) if err != nil { - Log(context.Background(), logger.Debug(), - "Failed to convert domain name to UTF16: %v", err) + Log(ctx, logger.Debug(), "Failed to convert domain name to UTF16: %v", err) } else { - Log(context.Background(), logger.Debug(), + Log(ctx, logger.Debug(), "Attempting to get DC for domain: %s with flags: 0x%x", domainName, flags) // Call DsGetDcNameW with domain name @@ -162,19 +164,19 @@ func getDNSServers(ctx context.Context) ([]string, error) { if ret != 0 { switch ret { case 1355: // ERROR_NO_SUCH_DOMAIN - Log(context.Background(), logger.Debug(), + Log(ctx, logger.Debug(), "Domain not found: %s (%d)", domainName, ret) case 1311: // ERROR_NO_LOGON_SERVERS - Log(context.Background(), logger.Debug(), + Log(ctx, logger.Debug(), "No logon servers available for domain: %s (%d)", domainName, ret) case 1004: // ERROR_DC_NOT_FOUND - Log(context.Background(), logger.Debug(), + Log(ctx, logger.Debug(), "Domain controller not found for domain: %s (%d)", domainName, ret) case 1722: // RPC_S_SERVER_UNAVAILABLE - Log(context.Background(), logger.Debug(), + Log(ctx, logger.Debug(), "RPC server unavailable for domain: %s (%d)", domainName, ret) default: - Log(context.Background(), logger.Debug(), + Log(ctx, logger.Debug(), "Failed to get domain controller info for domain %s: %d, %v", domainName, ret, err) } } else if info != nil { @@ -183,17 +185,16 @@ func getDNSServers(ctx context.Context) ([]string, error) { if info.DomainControllerAddress != nil { dcAddr := windows.UTF16PtrToString(info.DomainControllerAddress) dcAddr = strings.TrimPrefix(dcAddr, "\\\\") - Log(context.Background(), logger.Debug(), + Log(ctx, logger.Debug(), "Found domain controller address: %s", dcAddr) if ip := net.ParseIP(dcAddr); ip != nil { dcServers = append(dcServers, ip.String()) - Log(context.Background(), logger.Debug(), + Log(ctx, logger.Debug(), "Added domain controller DNS servers: %v", dcServers) } } else { - Log(context.Background(), logger.Debug(), - "No domain controller address found") + Log(ctx, logger.Debug(), "No domain controller address found") } } } @@ -208,7 +209,7 @@ func getDNSServers(ctx context.Context) ([]string, error) { // Collect all local IPs for _, aa := range aas { if aa.OperStatus != winipcfg.IfOperStatusUp { - Log(context.Background(), logger.Debug(), + Log(ctx, logger.Debug(), "Skipping adapter %s - not up, status: %d", aa.FriendlyName(), aa.OperStatus) continue } @@ -216,24 +217,25 @@ func getDNSServers(ctx context.Context) ([]string, error) { // Skip if software loopback or other non-physical types // This is to avoid the "Loopback Pseudo-Interface 1" issue we see on windows if aa.IfType == winipcfg.IfTypeSoftwareLoopback { - Log(context.Background(), logger.Debug(), - "Skipping %s (software loopback)", aa.FriendlyName()) + Log(ctx, logger.Debug(), "Skipping %s (software loopback)", aa.FriendlyName()) continue } - Log(context.Background(), logger.Debug(), - "Processing adapter %s", aa.FriendlyName()) + Log(ctx, logger.Debug(), "Processing adapter %s", aa.FriendlyName()) for a := aa.FirstUnicastAddress; a != nil; a = a.Next { ip := a.Address.IP().String() addressMap[ip] = struct{}{} - Log(context.Background(), logger.Debug(), - "Added local IP %s from adapter %s", ip, aa.FriendlyName()) + Log(ctx, logger.Debug(), "Added local IP %s from adapter %s", ip, aa.FriendlyName()) } } validInterfacesMap := validInterfaces() + if isDomain && adDomain == "" { + Log(ctx, logger.Warn(), "The machine is joined domain, but domain name is empty") + } + checkDnsSuffix := isDomain && adDomain != "" // Collect DNS servers for _, aa := range aas { if aa.OperStatus != winipcfg.IfOperStatusUp { @@ -243,23 +245,33 @@ func getDNSServers(ctx context.Context) ([]string, error) { // Skip if software loopback or other non-physical types // This is to avoid the "Loopback Pseudo-Interface 1" issue we see on windows if aa.IfType == winipcfg.IfTypeSoftwareLoopback { - Log(context.Background(), logger.Debug(), - "Skipping %s (software loopback)", aa.FriendlyName()) + Log(ctx, logger.Debug(), "Skipping %s (software loopback)", aa.FriendlyName()) continue } - // if not in the validInterfacesMap, skip - if _, ok := validInterfacesMap[aa.FriendlyName()]; !ok { - Log(context.Background(), logger.Debug(), - "Skipping %s (not in validInterfacesMap)", aa.FriendlyName()) + _, valid := validInterfacesMap[aa.FriendlyName()] + if !valid && checkDnsSuffix { + for suffix := aa.FirstDNSSuffix; suffix != nil; suffix = suffix.Next { + // For non-physical adapters but have the DNS suffix that matches the domain name, + // (or vice versa) consider it valid. This can happen when remote VPN machines. + ds := strings.TrimSpace(suffix.String()) + if dns.IsSubDomain(adDomain, ds) || dns.IsSubDomain(ds, adDomain) { + Log(ctx, logger.Debug(), "Found valid interface %s with DNS suffix %s", aa.FriendlyName(), suffix.String()) + valid = true + break + } + } + } + // if not a valid interface, skip it + if !valid { + Log(ctx, logger.Debug(), "Skipping %s (not in validInterfacesMap)", aa.FriendlyName()) continue } for dns := aa.FirstDNSServerAddress; dns != nil; dns = dns.Next { ip := dns.Address.IP() if ip == nil { - Log(context.Background(), logger.Debug(), - "Skipping nil IP from adapter %s", aa.FriendlyName()) + Log(ctx, logger.Debug(), "Skipping nil IP from adapter %s", aa.FriendlyName()) continue } @@ -292,28 +304,23 @@ func getDNSServers(ctx context.Context) ([]string, error) { if !seen[dcServer] { seen[dcServer] = true ns = append(ns, dcServer) - Log(context.Background(), logger.Debug(), - "Added additional domain controller DNS server: %s", dcServer) + Log(ctx, logger.Debug(), "Added additional domain controller DNS server: %s", dcServer) } } // if we have static DNS servers saved for the current default route, we should add them to the list drIfaceName, err := netmon.DefaultRouteInterface() if err != nil { - Log(context.Background(), logger.Debug(), - "Failed to get default route interface: %v", err) + Log(ctx, logger.Debug(), "Failed to get default route interface: %v", err) } else { drIface, err := net.InterfaceByName(drIfaceName) if err != nil { - Log(context.Background(), logger.Debug(), - "Failed to get interface by name %s: %v", drIfaceName, err) + Log(ctx, logger.Debug(), "Failed to get interface by name %s: %v", drIfaceName, err) } else { staticNs, file := SavedStaticNameservers(drIface) - Log(context.Background(), logger.Debug(), - "static dns servers from %s: %v", file, staticNs) + Log(ctx, logger.Debug(), "static dns servers from %s: %v", file, staticNs) if len(staticNs) > 0 { - Log(context.Background(), logger.Debug(), - "Adding static DNS servers from %s: %v", drIfaceName, staticNs) + Log(ctx, logger.Debug(), "Adding static DNS servers from %s: %v", drIfaceName, staticNs) ns = append(ns, staticNs...) } } @@ -323,8 +330,7 @@ func getDNSServers(ctx context.Context) ([]string, error) { return nil, fmt.Errorf("no valid DNS servers found") } - Log(context.Background(), logger.Debug(), - "DNS server discovery completed, count=%d, servers=%v (including %d DC servers)", + Log(ctx, logger.Debug(), "DNS server discovery completed, count=%d, servers=%v (including %d DC servers)", len(ns), ns, len(dcServers)) return ns, nil } From f05519d1c8b0a233c455bbdf50d9bb15e9d7aad4 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Tue, 20 Jan 2026 17:32:06 +0700 Subject: [PATCH 02/22] refactor(network): consolidate network change monitoring Remove separate watchLinkState function and integrate link state change handling directly into monitorNetworkChanges. This consolidates network monitoring logic into a single place and simplifies the codebase. Update netlink dependency from v1.2.1-beta.2 to v1.3.1 and netns from v0.0.4 to v0.0.5 to use stable versions. --- cmd/cli/dns_proxy.go | 5 +++++ go.mod | 4 ++-- go.sum | 13 ++++++------- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go index ba9d5af5..9474b505 100644 --- a/cmd/cli/dns_proxy.go +++ b/cmd/cli/dns_proxy.go @@ -1280,6 +1280,11 @@ func (p *prog) monitorNetworkChanges() error { return } + mainLog.Load().Debug().Msg("Link state changed, re-bootstrapping") + for _, uc := range p.cfg.Upstream { + uc.ReBootstrap() + } + // Get IPs from default route interface in new state selfIP := defaultRouteIP() diff --git a/go.mod b/go.mod index 56015734..ac73339b 100644 --- a/go.mod +++ b/go.mod @@ -35,7 +35,7 @@ require ( github.com/spf13/pflag v1.0.6 github.com/spf13/viper v1.16.0 github.com/stretchr/testify v1.11.1 - github.com/vishvananda/netlink v1.2.1-beta.2 + github.com/vishvananda/netlink v1.3.1 golang.org/x/net v0.43.0 golang.org/x/sync v0.16.0 golang.org/x/sys v0.35.0 @@ -88,7 +88,7 @@ require ( github.com/spf13/jwalterweatherman v1.1.0 // indirect github.com/subosito/gotenv v1.4.2 // indirect github.com/u-root/uio v0.0.0-20240118234441-a3c409a6018e // indirect - github.com/vishvananda/netns v0.0.4 // indirect + github.com/vishvananda/netns v0.0.5 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect go4.org/mem v0.0.0-20220726221520-4f986261bf13 // indirect go4.org/netipx v0.0.0-20231129151722-fdeea329fbba // indirect diff --git a/go.sum b/go.sum index 266a916a..3bd91a78 100644 --- a/go.sum +++ b/go.sum @@ -317,11 +317,10 @@ github.com/subosito/gotenv v1.4.2 h1:X1TuBLAMDFbaTAChgCBLu3DU3UPyELpnF2jjJ2cz/S8 github.com/subosito/gotenv v1.4.2/go.mod h1:ayKnFf/c6rvx/2iiLrJUk1e6plDbT3edrFNGqEflhK0= github.com/u-root/uio v0.0.0-20240118234441-a3c409a6018e h1:BA9O3BmlTmpjbvajAwzWx4Wo2TRVdpPXZEeemGQcajw= github.com/u-root/uio v0.0.0-20240118234441-a3c409a6018e/go.mod h1:eLL9Nub3yfAho7qB0MzZizFhTU2QkLeoVsWdHtDW264= -github.com/vishvananda/netlink v1.2.1-beta.2 h1:Llsql0lnQEbHj0I1OuKyp8otXp0r3q0mPkuhwHfStVs= -github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= -github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= -github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8= -github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= +github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0= +github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4= +github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY= +github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -462,7 +461,6 @@ golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -471,7 +469,6 @@ golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -490,8 +487,10 @@ golang.org/x/sys v0.0.0-20220622161953-175b2fd9d664/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220817070843-5a390386f1f2/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.4.1-0.20230131160137-e7d7f63158de/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= From 8d63a755ba06f45895d78fc1f538aedf3847d099 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Thu, 22 Jan 2026 13:01:59 +0700 Subject: [PATCH 03/22] Removing outdated netlink codes --- cmd/cli/netlink_linux.go | 34 ---------------------------------- cmd/cli/netlink_others.go | 7 ------- cmd/cli/prog.go | 1 - 3 files changed, 42 deletions(-) delete mode 100644 cmd/cli/netlink_linux.go delete mode 100644 cmd/cli/netlink_others.go diff --git a/cmd/cli/netlink_linux.go b/cmd/cli/netlink_linux.go deleted file mode 100644 index d757f8b7..00000000 --- a/cmd/cli/netlink_linux.go +++ /dev/null @@ -1,34 +0,0 @@ -package cli - -import ( - "context" - - "github.com/vishvananda/netlink" - "golang.org/x/sys/unix" -) - -func (p *prog) watchLinkState(ctx context.Context) { - ch := make(chan netlink.LinkUpdate) - done := make(chan struct{}) - defer close(done) - if err := netlink.LinkSubscribe(ch, done); err != nil { - mainLog.Load().Warn().Err(err).Msg("could not subscribe link") - return - } - for { - select { - case <-ctx.Done(): - return - case lu := <-ch: - if lu.Change == 0xFFFFFFFF { - continue - } - if lu.Change&unix.IFF_UP != 0 { - mainLog.Load().Debug().Msgf("link state changed, re-bootstrapping") - for _, uc := range p.cfg.Upstream { - uc.ReBootstrap() - } - } - } - } -} diff --git a/cmd/cli/netlink_others.go b/cmd/cli/netlink_others.go deleted file mode 100644 index 5a298b99..00000000 --- a/cmd/cli/netlink_others.go +++ /dev/null @@ -1,7 +0,0 @@ -//go:build !linux - -package cli - -import "context" - -func (p *prog) watchLinkState(ctx context.Context) {} diff --git a/cmd/cli/prog.go b/cmd/cli/prog.go index 42e2efe0..c499f847 100644 --- a/cmd/cli/prog.go +++ b/cmd/cli/prog.go @@ -537,7 +537,6 @@ func (p *prog) run(reload bool, reloadCh chan struct{}) { defer wg.Done() p.runClientInfoDiscover(ctx) }() - go p.watchLinkState(ctx) } if !reload { From e8d1a4604ecbcf24447df96313fefdf3db847a31 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Tue, 6 Jan 2026 14:46:00 +0700 Subject: [PATCH 04/22] perf(doq): implement connection pooling for improved performance Implement QUIC connection pooling for DoQ resolver to match DoH3 performance. Previously, DoQ created a new QUIC connection for every DNS query, incurring significant handshake overhead. Now connections are reused across queries, eliminating this overhead for subsequent requests. The implementation follows the same pattern as DoH3, using parallel dialing and connection pooling to achieve comparable performance characteristics. --- config.go | 32 +++++- config_quic.go | 25 +++++ doq.go | 259 +++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 286 insertions(+), 30 deletions(-) diff --git a/config.go b/config.go index 73484d70..1ef58fb1 100644 --- a/config.go +++ b/config.go @@ -276,6 +276,9 @@ type UpstreamConfig struct { http3RoundTripper http.RoundTripper http3RoundTripper4 http.RoundTripper http3RoundTripper6 http.RoundTripper + doqConnPool *doqConnPool + doqConnPool4 *doqConnPool + doqConnPool6 *doqConnPool certPool *x509.CertPool u *url.URL fallbackOnce sync.Once @@ -490,7 +493,7 @@ func (uc *UpstreamConfig) SetupBootstrapIP() { // ReBootstrap re-setup the bootstrap IP and the transport. func (uc *UpstreamConfig) ReBootstrap() { switch uc.Type { - case ResolverTypeDOH, ResolverTypeDOH3: + case ResolverTypeDOH, ResolverTypeDOH3, ResolverTypeDOQ: default: return } @@ -510,6 +513,27 @@ func (uc *UpstreamConfig) SetupTransport() { uc.setupDOHTransport() case ResolverTypeDOH3: uc.setupDOH3Transport() + case ResolverTypeDOQ: + uc.setupDOQTransport() + } +} + +func (uc *UpstreamConfig) setupDOQTransport() { + switch uc.IPStack { + case IpStackBoth, "": + uc.doqConnPool = uc.newDOQConnPool(uc.bootstrapIPs) + case IpStackV4: + uc.doqConnPool = uc.newDOQConnPool(uc.bootstrapIPs4) + case IpStackV6: + uc.doqConnPool = uc.newDOQConnPool(uc.bootstrapIPs6) + case IpStackSplit: + uc.doqConnPool4 = uc.newDOQConnPool(uc.bootstrapIPs4) + if HasIPv6() { + uc.doqConnPool6 = uc.newDOQConnPool(uc.bootstrapIPs6) + } else { + uc.doqConnPool6 = uc.doqConnPool4 + } + uc.doqConnPool = uc.newDOQConnPool(uc.bootstrapIPs) } } @@ -595,7 +619,7 @@ func (uc *UpstreamConfig) ErrorPing() error { func (uc *UpstreamConfig) ping() error { switch uc.Type { - case ResolverTypeDOH, ResolverTypeDOH3: + case ResolverTypeDOH, ResolverTypeDOH3, ResolverTypeDOQ: default: return nil } @@ -629,6 +653,10 @@ func (uc *UpstreamConfig) ping() error { if err := ping(uc.doh3Transport(typ)); err != nil { return err } + case ResolverTypeDOQ: + // For DoQ, we just ensure transport is set up by calling doqTransport + // DoQ doesn't use HTTP, so we can't ping it the same way + _ = uc.doqTransport(typ) } } diff --git a/config_quic.go b/config_quic.go index 33f56b92..97624c07 100644 --- a/config_quic.go +++ b/config_quic.go @@ -91,6 +91,27 @@ func (uc *UpstreamConfig) doh3Transport(dnsType uint16) http.RoundTripper { return uc.http3RoundTripper } +func (uc *UpstreamConfig) doqTransport(dnsType uint16) *doqConnPool { + uc.transportOnce.Do(func() { + uc.SetupTransport() + }) + if uc.rebootstrap.CompareAndSwap(true, false) { + uc.SetupTransport() + } + switch uc.IPStack { + case IpStackBoth, IpStackV4, IpStackV6: + return uc.doqConnPool + case IpStackSplit: + switch dnsType { + case dns.TypeA: + return uc.doqConnPool4 + default: + return uc.doqConnPool6 + } + } + return uc.doqConnPool +} + // Putting the code for quic parallel dialer here: // // - quic dialer is different with net.Dialer @@ -158,3 +179,7 @@ func (d *quicParallelDialer) Dial(ctx context.Context, addrs []string, tlsCfg *t return nil, errors.Join(errs...) } + +func (uc *UpstreamConfig) newDOQConnPool(addrs []string) *doqConnPool { + return newDOQConnPool(uc, addrs) +} diff --git a/doq.go b/doq.go index 0903411c..3191d74f 100644 --- a/doq.go +++ b/doq.go @@ -5,8 +5,11 @@ package ctrld import ( "context" "crypto/tls" + "errors" "io" "net" + "runtime" + "sync" "time" "github.com/miekg/dns" @@ -18,27 +21,73 @@ type doqResolver struct { } func (r *doqResolver) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, error) { - endpoint := r.uc.Endpoint - tlsConfig := &tls.Config{NextProtos: []string{"doq"}} - ip := r.uc.BootstrapIP - if ip == "" { - dnsTyp := uint16(0) - if msg != nil && len(msg.Question) > 0 { - dnsTyp = msg.Question[0].Qtype - } - ip = r.uc.bootstrapIPForDNSType(dnsTyp) + // Get the appropriate connection pool based on DNS type and IP stack + dnsTyp := uint16(0) + if msg != nil && len(msg.Question) > 0 { + dnsTyp = msg.Question[0].Qtype + } + + pool := r.uc.doqTransport(dnsTyp) + if pool == nil { + return nil, errors.New("DoQ connection pool is not available") } - tlsConfig.ServerName = r.uc.Domain - _, port, _ := net.SplitHostPort(endpoint) - endpoint = net.JoinHostPort(ip, port) - return resolve(ctx, msg, endpoint, tlsConfig) + + return pool.Resolve(ctx, msg) +} + +// doqConnPool manages a pool of QUIC connections for DoQ queries. +type doqConnPool struct { + uc *UpstreamConfig + addrs []string + port string + tlsConfig *tls.Config + mu sync.RWMutex + conns map[string]*doqConn + closed bool +} + +type doqConn struct { + conn *quic.Conn + lastUsed time.Time + refCount int + mu sync.Mutex +} + +func newDOQConnPool(uc *UpstreamConfig, addrs []string) *doqConnPool { + _, port, _ := net.SplitHostPort(uc.Endpoint) + if port == "" { + port = "853" + } + + tlsConfig := &tls.Config{ + NextProtos: []string{"doq"}, + RootCAs: uc.certPool, + ServerName: uc.Domain, + } + + pool := &doqConnPool{ + uc: uc, + addrs: addrs, + port: port, + tlsConfig: tlsConfig, + conns: make(map[string]*doqConn), + } + + // Use SetFinalizer here because we need to call a method on the pool itself. + // AddCleanup would require passing the pool as arg (which panics) or capturing + // it in a closure (which prevents GC). SetFinalizer is appropriate for this case. + runtime.SetFinalizer(pool, func(p *doqConnPool) { + p.CloseIdleConnections() + }) + + return pool } -func resolve(ctx context.Context, msg *dns.Msg, endpoint string, tlsConfig *tls.Config) (*dns.Msg, error) { - // DoQ quic-go server returns io.EOF error after running for a long time, - // even for a good stream. So retrying the query for 5 times before giving up. +// Resolve performs a DNS query using a pooled QUIC connection. +func (p *doqConnPool) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, error) { + // Retry logic for io.EOF errors (as per original implementation) for i := 0; i < 5; i++ { - answer, err := doResolve(ctx, msg, endpoint, tlsConfig) + answer, err := p.doResolve(ctx, msg) if err == io.EOF { continue } @@ -47,57 +96,72 @@ func resolve(ctx context.Context, msg *dns.Msg, endpoint string, tlsConfig *tls. } return answer, nil } - return nil, &quic.ApplicationError{ErrorCode: quic.ApplicationErrorCode(quic.InternalError), ErrorMessage: quic.InternalError.Message()} + return nil, &quic.ApplicationError{ + ErrorCode: quic.ApplicationErrorCode(quic.InternalError), + ErrorMessage: quic.InternalError.Message(), + } } -func doResolve(ctx context.Context, msg *dns.Msg, endpoint string, tlsConfig *tls.Config) (*dns.Msg, error) { - session, err := quic.DialAddr(ctx, endpoint, tlsConfig, nil) +func (p *doqConnPool) doResolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, error) { + conn, addr, err := p.getConn(ctx) if err != nil { return nil, err } - defer session.CloseWithError(quic.ApplicationErrorCode(quic.NoError), "") + // Pack the DNS message msgBytes, err := msg.Pack() if err != nil { + p.putConn(addr, conn, false) return nil, err } - stream, err := session.OpenStream() + // Open a new stream for this query + stream, err := conn.OpenStream() if err != nil { + p.putConn(addr, conn, false) return nil, err } + // Set deadline deadline, ok := ctx.Deadline() if !ok { deadline = time.Now().Add(5 * time.Second) } _ = stream.SetDeadline(deadline) + // Write message length (2 bytes) followed by message var msgLen = uint16(len(msgBytes)) var msgLenBytes = []byte{byte(msgLen >> 8), byte(msgLen & 0xFF)} if _, err := stream.Write(msgLenBytes); err != nil { + stream.Close() + p.putConn(addr, conn, false) return nil, err } if _, err := stream.Write(msgBytes); err != nil { + stream.Close() + p.putConn(addr, conn, false) return nil, err } + // Read response buf, err := io.ReadAll(stream) + stream.Close() + + // Return connection to pool (mark as potentially bad if error occurred) + isGood := err == nil && len(buf) > 0 + p.putConn(addr, conn, isGood) + if err != nil { return nil, err } - _ = stream.Close() - - // io.ReadAll hide the io.EOF error returned by quic-go server. - // Once we figure out why quic-go server sends io.EOF after running - // for a long time, we can have a better way to handle this. For now, - // make sure io.EOF error returned, so the caller can handle it cleanly. + // io.ReadAll hides io.EOF error, so check for empty buffer if len(buf) == 0 { return nil, io.EOF } + // Unpack DNS response (skip 2-byte length prefix) answer := new(dns.Msg) if err := answer.Unpack(buf[2:]); err != nil { return nil, err @@ -105,3 +169,142 @@ func doResolve(ctx context.Context, msg *dns.Msg, endpoint string, tlsConfig *tl answer.SetReply(msg) return answer, nil } + +// getConn gets a QUIC connection from the pool or creates a new one. +func (p *doqConnPool) getConn(ctx context.Context) (*quic.Conn, string, error) { + p.mu.Lock() + defer p.mu.Unlock() + + if p.closed { + return nil, "", io.EOF + } + + // Try to reuse an existing connection + for addr, doqConn := range p.conns { + doqConn.mu.Lock() + if doqConn.refCount == 0 && doqConn.conn != nil { + // Check if connection is still alive + select { + case <-doqConn.conn.Context().Done(): + // Connection is closed, remove it + doqConn.mu.Unlock() + delete(p.conns, addr) + continue + default: + } + + doqConn.refCount++ + doqConn.lastUsed = time.Now() + conn := doqConn.conn + doqConn.mu.Unlock() + return conn, addr, nil + } + doqConn.mu.Unlock() + } + + // No available connection, create a new one + addr, conn, err := p.dialConn(ctx) + if err != nil { + return nil, "", err + } + + doqConn := &doqConn{ + conn: conn, + lastUsed: time.Now(), + refCount: 1, + } + p.conns[addr] = doqConn + + return conn, addr, nil +} + +// putConn returns a connection to the pool. +func (p *doqConnPool) putConn(addr string, conn *quic.Conn, isGood bool) { + p.mu.Lock() + defer p.mu.Unlock() + + doqConn, ok := p.conns[addr] + if !ok { + return + } + + doqConn.mu.Lock() + defer doqConn.mu.Unlock() + + doqConn.refCount-- + if doqConn.refCount < 0 { + doqConn.refCount = 0 + } + + // If connection is bad or closed, remove it from pool + if !isGood || conn.Context().Err() != nil { + delete(p.conns, addr) + conn.CloseWithError(quic.ApplicationErrorCode(quic.NoError), "") + return + } + + doqConn.lastUsed = time.Now() +} + +// dialConn creates a new QUIC connection using parallel dialing like DoH3. +func (p *doqConnPool) dialConn(ctx context.Context) (string, *quic.Conn, error) { + logger := ProxyLogger.Load() + + // If we have a bootstrap IP, use it directly + if p.uc.BootstrapIP != "" { + addr := net.JoinHostPort(p.uc.BootstrapIP, p.port) + Log(ctx, logger.Debug(), "Sending DoQ request to: %s", addr) + udpConn, err := net.ListenUDP("udp", nil) + if err != nil { + return "", nil, err + } + remoteAddr, err := net.ResolveUDPAddr("udp", addr) + if err != nil { + udpConn.Close() + return "", nil, err + } + conn, err := quic.DialEarly(ctx, udpConn, remoteAddr, p.tlsConfig, nil) + if err != nil { + udpConn.Close() + return "", nil, err + } + return addr, conn, nil + } + + // Use parallel dialing like DoH3 + dialAddrs := make([]string, len(p.addrs)) + for i := range p.addrs { + dialAddrs[i] = net.JoinHostPort(p.addrs[i], p.port) + } + + pd := &quicParallelDialer{} + conn, err := pd.Dial(ctx, dialAddrs, p.tlsConfig, nil) + if err != nil { + return "", nil, err + } + + addr := conn.RemoteAddr().String() + Log(ctx, logger.Debug(), "Sending DoQ request to: %s", addr) + return addr, conn, nil +} + +// CloseIdleConnections closes all idle connections in the pool. +// When called during cleanup (e.g., from finalizer), it closes all connections +// regardless of refCount to prevent resource leaks. +func (p *doqConnPool) CloseIdleConnections() { + p.mu.Lock() + defer p.mu.Unlock() + + p.closed = true + + for addr, dc := range p.conns { + dc.mu.Lock() + if dc.conn != nil { + // Close all connections to ensure proper cleanup, even if in use + // This prevents resource leaks when the pool is being destroyed + dc.conn.CloseWithError(quic.ApplicationErrorCode(quic.NoError), "") + } + dc.mu.Unlock() + delete(p.conns, addr) + } +} From 1f4c47318e3778e16c3a6d0ade317cbbd4058e5f Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Tue, 6 Jan 2026 18:50:13 +0700 Subject: [PATCH 05/22] refactor(config): consolidate transport setup and eliminate duplication Consolidate DoH/DoH3/DoQ transport initialization into a single SetupTransport method and introduce generic helper functions to eliminate duplicated IP stack selection logic across transport getters. This reduces code duplication by ~77 lines while maintaining the same functionality. --- config.go | 119 ++++++++++++++++++------------------------------- config_quic.go | 47 +++++-------------- doq.go | 2 +- 3 files changed, 54 insertions(+), 114 deletions(-) diff --git a/config.go b/config.go index 1ef58fb1..0975d222 100644 --- a/config.go +++ b/config.go @@ -9,7 +9,6 @@ import ( "errors" "fmt" "io" - "math/rand" "net" "net/http" "net/netip" @@ -509,54 +508,49 @@ func (uc *UpstreamConfig) ReBootstrap() { // For now, only DoH upstream is supported. func (uc *UpstreamConfig) SetupTransport() { switch uc.Type { - case ResolverTypeDOH: - uc.setupDOHTransport() - case ResolverTypeDOH3: - uc.setupDOH3Transport() - case ResolverTypeDOQ: - uc.setupDOQTransport() + case ResolverTypeDOH, ResolverTypeDOH3, ResolverTypeDOQ: + default: + return } -} - -func (uc *UpstreamConfig) setupDOQTransport() { + ips := uc.bootstrapIPs switch uc.IPStack { - case IpStackBoth, "": - uc.doqConnPool = uc.newDOQConnPool(uc.bootstrapIPs) case IpStackV4: - uc.doqConnPool = uc.newDOQConnPool(uc.bootstrapIPs4) + ips = uc.bootstrapIPs4 case IpStackV6: - uc.doqConnPool = uc.newDOQConnPool(uc.bootstrapIPs6) - case IpStackSplit: + ips = uc.bootstrapIPs6 + } + uc.transport = uc.newDOHTransport(ips) + uc.http3RoundTripper = uc.newDOH3Transport(ips) + uc.doqConnPool = uc.newDOQConnPool(ips) + if uc.IPStack == IpStackSplit { + uc.transport4 = uc.newDOHTransport(uc.bootstrapIPs4) + uc.http3RoundTripper4 = uc.newDOH3Transport(uc.bootstrapIPs4) uc.doqConnPool4 = uc.newDOQConnPool(uc.bootstrapIPs4) if HasIPv6() { + uc.transport6 = uc.newDOHTransport(uc.bootstrapIPs6) + uc.http3RoundTripper6 = uc.newDOH3Transport(uc.bootstrapIPs6) uc.doqConnPool6 = uc.newDOQConnPool(uc.bootstrapIPs6) } else { + uc.transport6 = uc.transport4 + uc.http3RoundTripper6 = uc.http3RoundTripper4 uc.doqConnPool6 = uc.doqConnPool4 } - uc.doqConnPool = uc.newDOQConnPool(uc.bootstrapIPs) } } -func (uc *UpstreamConfig) setupDOHTransport() { - switch uc.IPStack { - case IpStackBoth, "": - uc.transport = uc.newDOHTransport(uc.bootstrapIPs) - case IpStackV4: - uc.transport = uc.newDOHTransport(uc.bootstrapIPs4) - case IpStackV6: - uc.transport = uc.newDOHTransport(uc.bootstrapIPs6) - case IpStackSplit: - uc.transport4 = uc.newDOHTransport(uc.bootstrapIPs4) - if HasIPv6() { - uc.transport6 = uc.newDOHTransport(uc.bootstrapIPs6) - } else { - uc.transport6 = uc.transport4 - } - uc.transport = uc.newDOHTransport(uc.bootstrapIPs) +func (uc *UpstreamConfig) ensureSetupTransport() { + uc.transportOnce.Do(func() { + uc.SetupTransport() + }) + if uc.rebootstrap.CompareAndSwap(true, false) { + uc.SetupTransport() } } func (uc *UpstreamConfig) newDOHTransport(addrs []string) *http.Transport { + if uc.Type != ResolverTypeDOH { + return nil + } transport := http.DefaultTransport.(*http.Transport).Clone() transport.MaxIdleConnsPerHost = 100 transport.TLSClientConfig = &tls.Config{ @@ -690,46 +684,8 @@ func (uc *UpstreamConfig) isNextDNS() bool { } func (uc *UpstreamConfig) dohTransport(dnsType uint16) http.RoundTripper { - uc.transportOnce.Do(func() { - uc.SetupTransport() - }) - if uc.rebootstrap.CompareAndSwap(true, false) { - uc.SetupTransport() - } - switch uc.IPStack { - case IpStackBoth, IpStackV4, IpStackV6: - return uc.transport - case IpStackSplit: - switch dnsType { - case dns.TypeA: - return uc.transport4 - default: - return uc.transport6 - } - } - return uc.transport -} - -func (uc *UpstreamConfig) bootstrapIPForDNSType(dnsType uint16) string { - switch uc.IPStack { - case IpStackBoth: - return pick(uc.bootstrapIPs) - case IpStackV4: - return pick(uc.bootstrapIPs4) - case IpStackV6: - return pick(uc.bootstrapIPs6) - case IpStackSplit: - switch dnsType { - case dns.TypeA: - return pick(uc.bootstrapIPs4) - default: - if HasIPv6() { - return pick(uc.bootstrapIPs6) - } - return pick(uc.bootstrapIPs4) - } - } - return pick(uc.bootstrapIPs) + uc.ensureSetupTransport() + return transportByIpStack(uc.IPStack, dnsType, uc.transport, uc.transport4, uc.transport6) } func (uc *UpstreamConfig) netForDNSType(dnsType uint16) (string, string) { @@ -974,10 +930,6 @@ func ResolverTypeFromEndpoint(endpoint string) string { return ResolverTypeDOT } -func pick(s []string) string { - return s[rand.Intn(len(s))] -} - // upstreamUID generates an unique identifier for an upstream. func upstreamUID() string { b := make([]byte, 4) @@ -1013,3 +965,18 @@ func bootstrapIPsFromControlDDomain(domain string) []string { } return nil } + +func transportByIpStack[T any](ipStack string, dnsType uint16, transport, transport4, transport6 T) T { + switch ipStack { + case IpStackBoth, IpStackV4, IpStackV6: + return transport + case IpStackSplit: + switch dnsType { + case dns.TypeA: + return transport4 + default: + return transport6 + } + } + return transport +} diff --git a/config_quic.go b/config_quic.go index 97624c07..f6192d53 100644 --- a/config_quic.go +++ b/config_quic.go @@ -9,7 +9,6 @@ import ( "runtime" "sync" - "github.com/miekg/dns" "github.com/quic-go/quic-go" "github.com/quic-go/quic-go/http3" ) @@ -34,6 +33,9 @@ func (uc *UpstreamConfig) setupDOH3Transport() { } func (uc *UpstreamConfig) newDOH3Transport(addrs []string) http.RoundTripper { + if uc.Type != ResolverTypeDOH3 { + return nil + } rt := &http3.Transport{} rt.TLSClientConfig = &tls.Config{RootCAs: uc.certPool} rt.Dial = func(ctx context.Context, addr string, tlsCfg *tls.Config, cfg *quic.Config) (*quic.Conn, error) { @@ -71,45 +73,13 @@ func (uc *UpstreamConfig) newDOH3Transport(addrs []string) http.RoundTripper { } func (uc *UpstreamConfig) doh3Transport(dnsType uint16) http.RoundTripper { - uc.transportOnce.Do(func() { - uc.SetupTransport() - }) - if uc.rebootstrap.CompareAndSwap(true, false) { - uc.SetupTransport() - } - switch uc.IPStack { - case IpStackBoth, IpStackV4, IpStackV6: - return uc.http3RoundTripper - case IpStackSplit: - switch dnsType { - case dns.TypeA: - return uc.http3RoundTripper4 - default: - return uc.http3RoundTripper6 - } - } - return uc.http3RoundTripper + uc.ensureSetupTransport() + return transportByIpStack(uc.IPStack, dnsType, uc.http3RoundTripper, uc.http3RoundTripper4, uc.http3RoundTripper6) } func (uc *UpstreamConfig) doqTransport(dnsType uint16) *doqConnPool { - uc.transportOnce.Do(func() { - uc.SetupTransport() - }) - if uc.rebootstrap.CompareAndSwap(true, false) { - uc.SetupTransport() - } - switch uc.IPStack { - case IpStackBoth, IpStackV4, IpStackV6: - return uc.doqConnPool - case IpStackSplit: - switch dnsType { - case dns.TypeA: - return uc.doqConnPool4 - default: - return uc.doqConnPool6 - } - } - return uc.doqConnPool + uc.ensureSetupTransport() + return transportByIpStack(uc.IPStack, dnsType, uc.doqConnPool, uc.doqConnPool4, uc.doqConnPool6) } // Putting the code for quic parallel dialer here: @@ -181,5 +151,8 @@ func (d *quicParallelDialer) Dial(ctx context.Context, addrs []string, tlsCfg *t } func (uc *UpstreamConfig) newDOQConnPool(addrs []string) *doqConnPool { + if uc.Type != ResolverTypeDOQ { + return nil + } return newDOQConnPool(uc, addrs) } diff --git a/doq.go b/doq.go index 3191d74f..2b74f83b 100644 --- a/doq.go +++ b/doq.go @@ -86,7 +86,7 @@ func newDOQConnPool(uc *UpstreamConfig, addrs []string) *doqConnPool { // Resolve performs a DNS query using a pooled QUIC connection. func (p *doqConnPool) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, error) { // Retry logic for io.EOF errors (as per original implementation) - for i := 0; i < 5; i++ { + for range 5 { answer, err := p.doResolve(ctx, msg) if err == io.EOF { continue From 2e8a0f00a09caad5abb150755f3fb66485ef562f Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Wed, 7 Jan 2026 17:11:38 +0700 Subject: [PATCH 06/22] fix(config): use three-state atomic for rebootstrap to prevent data race Replace boolean rebootstrap flag with a three-state atomic integer to prevent concurrent SetupTransport calls during rebootstrap. The atomic state machine ensures only one goroutine can proceed from "started" to "in progress", eliminating the need for a mutex while maintaining thread safety. States: NotStarted -> Started -> InProgress -> NotStarted Note that the race condition is still acceptable because any additional transports created during the race are functional. Once the connection is established, the unused transports are safely handled by the garbage collector. --- config.go | 11 +++++++--- config_internal_test.go | 45 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/config.go b/config.go index 0975d222..ad55dada 100644 --- a/config.go +++ b/config.go @@ -82,6 +82,10 @@ const ( endpointPrefixQUIC = "quic://" endpointPrefixH3 = "h3://" endpointPrefixSdns = "sdns://" + + rebootstrapNotStarted = 0 + rebootstrapStarted = 1 + rebootstrapInProgress = 2 ) var ( @@ -264,7 +268,7 @@ type UpstreamConfig struct { Discoverable *bool `mapstructure:"discoverable" toml:"discoverable"` g singleflight.Group - rebootstrap atomic.Bool + rebootstrap atomic.Int64 bootstrapIPs []string bootstrapIPs4 []string bootstrapIPs6 []string @@ -497,7 +501,7 @@ func (uc *UpstreamConfig) ReBootstrap() { return } _, _, _ = uc.g.Do("ReBootstrap", func() (any, error) { - if uc.rebootstrap.CompareAndSwap(false, true) { + if uc.rebootstrap.CompareAndSwap(rebootstrapNotStarted, rebootstrapStarted) { ProxyLogger.Load().Debug().Msgf("re-bootstrapping upstream ip for %v", uc) } return true, nil @@ -542,8 +546,9 @@ func (uc *UpstreamConfig) ensureSetupTransport() { uc.transportOnce.Do(func() { uc.SetupTransport() }) - if uc.rebootstrap.CompareAndSwap(true, false) { + if uc.rebootstrap.CompareAndSwap(rebootstrapStarted, rebootstrapInProgress) { uc.SetupTransport() + uc.rebootstrap.Store(rebootstrapNotStarted) } } diff --git a/config_internal_test.go b/config_internal_test.go index b37e982f..ca2b381a 100644 --- a/config_internal_test.go +++ b/config_internal_test.go @@ -2,6 +2,7 @@ package ctrld import ( "net/url" + "sync" "testing" "github.com/stretchr/testify/assert" @@ -505,6 +506,50 @@ func TestUpstreamConfig_IsDiscoverable(t *testing.T) { } } +func TestRebootstrapRace(t *testing.T) { + uc := &UpstreamConfig{ + Name: "test-doh", + Type: ResolverTypeDOH, + Endpoint: "https://example.com/dns-query", + Domain: "example.com", + bootstrapIPs: []string{"1.1.1.1", "1.0.0.1"}, + } + + uc.SetupTransport() + + if uc.transport == nil { + t.Fatal("initial transport should be set") + } + + const goroutines = 100 + + uc.ReBootstrap() + + started := make(chan struct{}) + go func() { + close(started) + for { + switch uc.rebootstrap.Load() { + case rebootstrapStarted, rebootstrapInProgress: + uc.ReBootstrap() + default: + return + } + } + }() + + <-started + + var wg sync.WaitGroup + for range goroutines { + wg.Go(func() { + uc.ensureSetupTransport() + }) + } + + wg.Wait() +} + func ptrBool(b bool) *bool { return &b } From acbebcf7c22852889a840f41413da37b2a7c8671 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Thu, 8 Jan 2026 20:00:15 +0700 Subject: [PATCH 07/22] perf(dot): implement connection pooling for improved performance Implement TCP/TLS connection pooling for DoT resolver to match DoQ performance. Previously, DoT created a new TCP/TLS connection for every DNS query, incurring significant TLS handshake overhead. Now connections are reused across queries, eliminating this overhead for subsequent requests. The implementation follows the same pattern as DoQ, using parallel dialing and connection pooling to achieve comparable performance characteristics. --- config.go | 20 +++- config_quic.go | 31 ++--- doh.go | 4 + doq.go | 4 + dot.go | 301 ++++++++++++++++++++++++++++++++++++++++++++++--- resolver.go | 20 ++++ 6 files changed, 340 insertions(+), 40 deletions(-) diff --git a/config.go b/config.go index ad55dada..bdfa389e 100644 --- a/config.go +++ b/config.go @@ -282,6 +282,9 @@ type UpstreamConfig struct { doqConnPool *doqConnPool doqConnPool4 *doqConnPool doqConnPool6 *doqConnPool + dotClientPool *dotConnPool + dotClientPool4 *dotConnPool + dotClientPool6 *dotConnPool certPool *x509.CertPool u *url.URL fallbackOnce sync.Once @@ -496,7 +499,7 @@ func (uc *UpstreamConfig) SetupBootstrapIP() { // ReBootstrap re-setup the bootstrap IP and the transport. func (uc *UpstreamConfig) ReBootstrap() { switch uc.Type { - case ResolverTypeDOH, ResolverTypeDOH3, ResolverTypeDOQ: + case ResolverTypeDOH, ResolverTypeDOH3, ResolverTypeDOQ, ResolverTypeDOT: default: return } @@ -508,11 +511,11 @@ func (uc *UpstreamConfig) ReBootstrap() { }) } -// SetupTransport initializes the network transport used to connect to upstream server. -// For now, only DoH upstream is supported. +// SetupTransport initializes the network transport used to connect to upstream servers. +// For now, DoH/DoH3/DoQ/DoT upstreams are supported. func (uc *UpstreamConfig) SetupTransport() { switch uc.Type { - case ResolverTypeDOH, ResolverTypeDOH3, ResolverTypeDOQ: + case ResolverTypeDOH, ResolverTypeDOH3, ResolverTypeDOQ, ResolverTypeDOT: default: return } @@ -523,21 +526,26 @@ func (uc *UpstreamConfig) SetupTransport() { case IpStackV6: ips = uc.bootstrapIPs6 } + uc.transport = uc.newDOHTransport(ips) uc.http3RoundTripper = uc.newDOH3Transport(ips) uc.doqConnPool = uc.newDOQConnPool(ips) + uc.dotClientPool = uc.newDOTClientPool(ips) if uc.IPStack == IpStackSplit { uc.transport4 = uc.newDOHTransport(uc.bootstrapIPs4) uc.http3RoundTripper4 = uc.newDOH3Transport(uc.bootstrapIPs4) uc.doqConnPool4 = uc.newDOQConnPool(uc.bootstrapIPs4) + uc.dotClientPool4 = uc.newDOTClientPool(uc.bootstrapIPs4) if HasIPv6() { uc.transport6 = uc.newDOHTransport(uc.bootstrapIPs6) uc.http3RoundTripper6 = uc.newDOH3Transport(uc.bootstrapIPs6) uc.doqConnPool6 = uc.newDOQConnPool(uc.bootstrapIPs6) + uc.dotClientPool6 = uc.newDOTClientPool(uc.bootstrapIPs6) } else { uc.transport6 = uc.transport4 uc.http3RoundTripper6 = uc.http3RoundTripper4 uc.doqConnPool6 = uc.doqConnPool4 + uc.dotClientPool6 = uc.dotClientPool4 } } } @@ -656,6 +664,10 @@ func (uc *UpstreamConfig) ping() error { // For DoQ, we just ensure transport is set up by calling doqTransport // DoQ doesn't use HTTP, so we can't ping it the same way _ = uc.doqTransport(typ) + case ResolverTypeDOT: + // For DoT, we just ensure transport is set up by calling dotTransport + // DoT doesn't use HTTP, so we can't ping it the same way + _ = uc.dotTransport(typ) } } diff --git a/config_quic.go b/config_quic.go index f6192d53..237bb82d 100644 --- a/config_quic.go +++ b/config_quic.go @@ -13,25 +13,6 @@ import ( "github.com/quic-go/quic-go/http3" ) -func (uc *UpstreamConfig) setupDOH3Transport() { - switch uc.IPStack { - case IpStackBoth, "": - uc.http3RoundTripper = uc.newDOH3Transport(uc.bootstrapIPs) - case IpStackV4: - uc.http3RoundTripper = uc.newDOH3Transport(uc.bootstrapIPs4) - case IpStackV6: - uc.http3RoundTripper = uc.newDOH3Transport(uc.bootstrapIPs6) - case IpStackSplit: - uc.http3RoundTripper4 = uc.newDOH3Transport(uc.bootstrapIPs4) - if HasIPv6() { - uc.http3RoundTripper6 = uc.newDOH3Transport(uc.bootstrapIPs6) - } else { - uc.http3RoundTripper6 = uc.http3RoundTripper4 - } - uc.http3RoundTripper = uc.newDOH3Transport(uc.bootstrapIPs) - } -} - func (uc *UpstreamConfig) newDOH3Transport(addrs []string) http.RoundTripper { if uc.Type != ResolverTypeDOH3 { return nil @@ -82,6 +63,11 @@ func (uc *UpstreamConfig) doqTransport(dnsType uint16) *doqConnPool { return transportByIpStack(uc.IPStack, dnsType, uc.doqConnPool, uc.doqConnPool4, uc.doqConnPool6) } +func (uc *UpstreamConfig) dotTransport(dnsType uint16) *dotConnPool { + uc.ensureSetupTransport() + return transportByIpStack(uc.IPStack, dnsType, uc.dotClientPool, uc.dotClientPool4, uc.dotClientPool6) +} + // Putting the code for quic parallel dialer here: // // - quic dialer is different with net.Dialer @@ -156,3 +142,10 @@ func (uc *UpstreamConfig) newDOQConnPool(addrs []string) *doqConnPool { } return newDOQConnPool(uc, addrs) } + +func (uc *UpstreamConfig) newDOTClientPool(addrs []string) *dotConnPool { + if uc.Type != ResolverTypeDOT { + return nil + } + return newDOTClientPool(uc, addrs) +} diff --git a/doh.go b/doh.go index 58aaf165..6b41c116 100644 --- a/doh.go +++ b/doh.go @@ -85,6 +85,10 @@ type dohResolver struct { // Resolve performs DNS query with given DNS message using DOH protocol. func (r *dohResolver) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, error) { + if err := validateMsg(msg); err != nil { + return nil, err + } + data, err := msg.Pack() if err != nil { return nil, err diff --git a/doq.go b/doq.go index 2b74f83b..8d8a4e86 100644 --- a/doq.go +++ b/doq.go @@ -21,6 +21,10 @@ type doqResolver struct { } func (r *doqResolver) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, error) { + if err := validateMsg(msg); err != nil { + return nil, err + } + // Get the appropriate connection pool based on DNS type and IP stack dnsTyp := uint16(0) if msg != nil && len(msg.Question) > 0 { diff --git a/dot.go b/dot.go index 295134c9..fe65089e 100644 --- a/dot.go +++ b/dot.go @@ -3,7 +3,12 @@ package ctrld import ( "context" "crypto/tls" + "errors" + "io" "net" + "runtime" + "sync" + "time" "github.com/miekg/dns" ) @@ -13,30 +18,292 @@ type dotResolver struct { } func (r *dotResolver) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, error) { + if err := validateMsg(msg); err != nil { + return nil, err + } + + dnsTyp := uint16(0) + if msg != nil && len(msg.Question) > 0 { + dnsTyp = msg.Question[0].Qtype + } + + pool := r.uc.dotTransport(dnsTyp) + if pool == nil { + return nil, errors.New("DoT client pool is not available") + } + + return pool.Resolve(ctx, msg) +} + +// dotConnPool manages a pool of TCP/TLS connections for DoT queries. +type dotConnPool struct { + uc *UpstreamConfig + addrs []string + port string + tlsConfig *tls.Config + dialer *net.Dialer + mu sync.RWMutex + conns map[string]*dotConn + closed bool +} + +type dotConn struct { + conn net.Conn + lastUsed time.Time + refCount int + mu sync.Mutex +} + +func newDOTClientPool(uc *UpstreamConfig, addrs []string) *dotConnPool { + _, port, _ := net.SplitHostPort(uc.Endpoint) + if port == "" { + port = "853" + } + // The dialer is used to prevent bootstrapping cycle. - // If r.endpoint is set to dns.controld.dev, we need to resolve + // If endpoint is set to dns.controld.dev, we need to resolve // dns.controld.dev first. By using a dialer with custom resolver, // we ensure that we can always resolve the bootstrap domain // regardless of the machine DNS status. dialer := newDialer(net.JoinHostPort(controldPublicDns, "53")) - dnsTyp := uint16(0) - if msg != nil && len(msg.Question) > 0 { - dnsTyp = msg.Question[0].Qtype + + tlsConfig := &tls.Config{ + RootCAs: uc.certPool, + } + + if uc.BootstrapIP != "" { + tlsConfig.ServerName = uc.Domain + } + + pool := &dotConnPool{ + uc: uc, + addrs: addrs, + port: port, + tlsConfig: tlsConfig, + dialer: dialer, + conns: make(map[string]*dotConn), + } + + // Use SetFinalizer here because we need to call a method on the pool itself. + // AddCleanup would require passing the pool as arg (which panics) or capturing + // it in a closure (which prevents GC). SetFinalizer is appropriate for this case. + runtime.SetFinalizer(pool, func(p *dotConnPool) { + p.CloseIdleConnections() + }) + + return pool +} + +// Resolve performs a DNS query using a pooled TCP/TLS connection. +func (p *dotConnPool) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, error) { + if msg == nil { + return nil, errors.New("nil DNS message") } - tcpNet, _ := r.uc.netForDNSType(dnsTyp) - dnsClient := &dns.Client{ - Net: tcpNet, - Dialer: dialer, - TLSConfig: &tls.Config{RootCAs: r.uc.certPool}, + + conn, addr, err := p.getConn(ctx) + if err != nil { + return nil, wrapCertificateVerificationError(err) } - endpoint := r.uc.Endpoint - if r.uc.BootstrapIP != "" { - dnsClient.TLSConfig.ServerName = r.uc.Domain - dnsClient.Net = "tcp-tls" - _, port, _ := net.SplitHostPort(endpoint) - endpoint = net.JoinHostPort(r.uc.BootstrapIP, port) + + // Set deadline + deadline, ok := ctx.Deadline() + if !ok { + deadline = time.Now().Add(5 * time.Second) } + _ = conn.SetDeadline(deadline) + + client := dns.Client{Net: "tcp-tls"} + answer, _, err := client.ExchangeWithConnContext(ctx, msg, &dns.Conn{Conn: conn}) + isGood := err == nil + p.putConn(addr, conn, isGood) + + if err != nil { + return nil, wrapCertificateVerificationError(err) + } + + return answer, nil +} - answer, _, err := dnsClient.ExchangeContext(ctx, msg, endpoint) - return answer, wrapCertificateVerificationError(err) +// getConn gets a TCP/TLS connection from the pool or creates a new one. +func (p *dotConnPool) getConn(ctx context.Context) (net.Conn, string, error) { + p.mu.Lock() + defer p.mu.Unlock() + + if p.closed { + return nil, "", io.EOF + } + + // Try to reuse an existing connection + for addr, dotConn := range p.conns { + dotConn.mu.Lock() + if dotConn.refCount == 0 && dotConn.conn != nil { + dotConn.refCount++ + dotConn.lastUsed = time.Now() + conn := dotConn.conn + dotConn.mu.Unlock() + return conn, addr, nil + } + dotConn.mu.Unlock() + } + + // No available connection, create a new one + addr, conn, err := p.dialConn(ctx) + if err != nil { + return nil, "", err + } + + dotConn := &dotConn{ + conn: conn, + lastUsed: time.Now(), + refCount: 1, + } + p.conns[addr] = dotConn + + return conn, addr, nil +} + +// putConn returns a connection to the pool. +func (p *dotConnPool) putConn(addr string, conn net.Conn, isGood bool) { + p.mu.Lock() + defer p.mu.Unlock() + + dotConn, ok := p.conns[addr] + if !ok { + return + } + + dotConn.mu.Lock() + defer dotConn.mu.Unlock() + + dotConn.refCount-- + if dotConn.refCount < 0 { + dotConn.refCount = 0 + } + + // If connection is bad, remove it from pool + if !isGood { + delete(p.conns, addr) + if conn != nil { + conn.Close() + } + return + } + + dotConn.lastUsed = time.Now() +} + +// dialConn creates a new TCP/TLS connection. +func (p *dotConnPool) dialConn(ctx context.Context) (string, net.Conn, error) { + logger := ProxyLogger.Load() + var endpoint string + + if p.uc.BootstrapIP != "" { + endpoint = net.JoinHostPort(p.uc.BootstrapIP, p.port) + Log(ctx, logger.Debug(), "Sending DoT request to: %s", endpoint) + conn, err := p.dialer.DialContext(ctx, "tcp", endpoint) + if err != nil { + return "", nil, err + } + tlsConn := tls.Client(conn, p.tlsConfig) + if err := tlsConn.HandshakeContext(ctx); err != nil { + conn.Close() + return "", nil, err + } + return endpoint, tlsConn, nil + } + + // Try bootstrap IPs in parallel + if len(p.addrs) > 0 { + type result struct { + conn net.Conn + addr string + err error + } + + ch := make(chan result, len(p.addrs)) + done := make(chan struct{}) + defer close(done) + + for _, addr := range p.addrs { + go func(addr string) { + endpoint := net.JoinHostPort(addr, p.port) + conn, err := p.dialer.DialContext(ctx, "tcp", endpoint) + if err != nil { + select { + case ch <- result{conn: nil, addr: endpoint, err: err}: + case <-done: + } + return + } + tlsConfig := p.tlsConfig.Clone() + tlsConfig.ServerName = p.uc.Domain + tlsConn := tls.Client(conn, tlsConfig) + if err := tlsConn.HandshakeContext(ctx); err != nil { + conn.Close() + select { + case ch <- result{conn: nil, addr: endpoint, err: err}: + case <-done: + } + return + } + select { + case ch <- result{conn: tlsConn, addr: endpoint, err: nil}: + case <-done: + if conn != nil { + conn.Close() + } + } + }(addr) + } + + errs := make([]error, 0, len(p.addrs)) + for range len(p.addrs) { + select { + case res := <-ch: + if res.err == nil && res.conn != nil { + Log(ctx, logger.Debug(), "Sending DoT request to: %s", res.addr) + return res.addr, res.conn, nil + } + if res.err != nil { + errs = append(errs, res.err) + } + case <-ctx.Done(): + return "", nil, ctx.Err() + } + } + + return "", nil, errors.Join(errs...) + } + + // Fallback to endpoint resolution + endpoint = p.uc.Endpoint + Log(ctx, logger.Debug(), "Sending DoT request to: %s", endpoint) + conn, err := p.dialer.DialContext(ctx, "tcp", endpoint) + if err != nil { + return "", nil, err + } + tlsConn := tls.Client(conn, p.tlsConfig) + if err := tlsConn.HandshakeContext(ctx); err != nil { + conn.Close() + return "", nil, err + } + return endpoint, tlsConn, nil +} + +// CloseIdleConnections closes all connections in the pool. +func (p *dotConnPool) CloseIdleConnections() { + p.mu.Lock() + defer p.mu.Unlock() + if p.closed { + return + } + p.closed = true + for addr, dotConn := range p.conns { + dotConn.mu.Lock() + if dotConn.conn != nil { + dotConn.conn.Close() + } + dotConn.mu.Unlock() + delete(p.conns, addr) + } } diff --git a/resolver.go b/resolver.go index 3aeddd0d..914233d7 100644 --- a/resolver.go +++ b/resolver.go @@ -291,6 +291,9 @@ const hotCacheTTL = time.Second // for a short period (currently 1 second), reducing unnecessary traffics // sent to upstreams. func (o *osResolver) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, error) { + if err := validateMsg(msg); err != nil { + return nil, err + } if len(msg.Question) == 0 { return nil, errors.New("no question found") } @@ -509,6 +512,10 @@ type legacyResolver struct { } func (r *legacyResolver) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, error) { + if err := validateMsg(msg); err != nil { + return nil, err + } + // See comment in (*dotResolver).resolve method. dialer := newDialer(net.JoinHostPort(controldPublicDns, "53")) dnsTyp := uint16(0) @@ -534,6 +541,9 @@ func (r *legacyResolver) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, e type dummyResolver struct{} func (d dummyResolver) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, error) { + if err := validateMsg(msg); err != nil { + return nil, err + } ans := new(dns.Msg) ans.SetReply(msg) return ans, nil @@ -769,3 +779,13 @@ func isLanAddr(addr netip.Addr) bool { addr.IsLinkLocalUnicast() || tsaddr.CGNATRange().Contains(addr) } + +func validateMsg(msg *dns.Msg) error { + if msg == nil { + return errors.New("nil DNS message") + } + if len(msg.Question) == 0 { + return errors.New("no question found") + } + return nil +} From 209c9211b97af7e25fa455d04f241261ee5f6c6c Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Mon, 26 Jan 2026 16:56:46 +0700 Subject: [PATCH 08/22] fix(dns): handle empty and invalid IP addresses gracefully Add guard checks to prevent panics when processing client info with empty IP addresses. Replace netip.MustParseAddr with ParseAddr to handle invalid IP addresses gracefully instead of panicking. Add test to verify queryFromSelf handles IP addresses safely. --- cmd/cli/dns_proxy.go | 14 ++++++++++++-- cmd/cli/dns_proxy_test.go | 10 ++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go index 9474b505..3d8cc308 100644 --- a/cmd/cli/dns_proxy.go +++ b/cmd/cli/dns_proxy.go @@ -910,7 +910,12 @@ func (p *prog) getClientInfo(remoteIP string, msg *dns.Msg) *ctrld.ClientInfo { } else { ci.Hostname = p.ciTable.LookupHostname(ci.IP, ci.Mac) } - ci.Self = p.queryFromSelf(ci.IP) + + if ci.IP == "" { + mainLog.Load().Debug().Msgf("client info entry with empty IP address: %v", ci) + } else { + ci.Self = p.queryFromSelf(ci.IP) + } // If this is a query from self, but ci.IP is not loopback IP, // try using hostname mapping for lookback IP if presents. if ci.Self { @@ -1026,7 +1031,12 @@ func (p *prog) queryFromSelf(ip string) bool { if val, ok := p.queryFromSelfMap.Load(ip); ok { return val.(bool) } - netIP := netip.MustParseAddr(ip) + netIP, err := netip.ParseAddr(ip) + if err != nil { + mainLog.Load().Debug().Err(err).Msgf("could not parse IP: %q", ip) + return false + } + regularIPs, loopbackIPs, err := netmon.LocalAddresses() if err != nil { mainLog.Load().Warn().Err(err).Msg("could not get local addresses") diff --git a/cmd/cli/dns_proxy_test.go b/cmd/cli/dns_proxy_test.go index 4a4e5b4e..f909e960 100644 --- a/cmd/cli/dns_proxy_test.go +++ b/cmd/cli/dns_proxy_test.go @@ -464,3 +464,13 @@ func Test_isWanClient(t *testing.T) { }) } } + +func Test_prog_queryFromSelf(t *testing.T) { + p := &prog{} + require.NotPanics(t, func() { + p.queryFromSelf("") + }) + require.NotPanics(t, func() { + p.queryFromSelf("foo") + }) +} From da3ea0576328866f5c4e3834ba2ce4d1f562e6a2 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Wed, 28 Jan 2026 17:54:01 +0700 Subject: [PATCH 09/22] fix(dot): validate connections before reuse to prevent io.EOF errors Add connection health check in getConn to validate TLS connections before reusing them from the pool. This prevents io.EOF errors when reusing connections that were closed by the server (e.g., due to idle timeout). --- dot.go | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/dot.go b/dot.go index fe65089e..36531697 100644 --- a/dot.go +++ b/dot.go @@ -48,7 +48,7 @@ type dotConnPool struct { } type dotConn struct { - conn net.Conn + conn *tls.Conn lastUsed time.Time refCount int mu sync.Mutex @@ -105,13 +105,6 @@ func (p *dotConnPool) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, erro return nil, wrapCertificateVerificationError(err) } - // Set deadline - deadline, ok := ctx.Deadline() - if !ok { - deadline = time.Now().Add(5 * time.Second) - } - _ = conn.SetDeadline(deadline) - client := dns.Client{Net: "tcp-tls"} answer, _, err := client.ExchangeWithConnContext(ctx, msg, &dns.Conn{Conn: conn}) isGood := err == nil @@ -136,7 +129,7 @@ func (p *dotConnPool) getConn(ctx context.Context) (net.Conn, string, error) { // Try to reuse an existing connection for addr, dotConn := range p.conns { dotConn.mu.Lock() - if dotConn.refCount == 0 && dotConn.conn != nil { + if dotConn.refCount == 0 && dotConn.conn != nil && isAlive(dotConn.conn) { dotConn.refCount++ dotConn.lastUsed = time.Now() conn := dotConn.conn @@ -193,7 +186,7 @@ func (p *dotConnPool) putConn(addr string, conn net.Conn, isGood bool) { } // dialConn creates a new TCP/TLS connection. -func (p *dotConnPool) dialConn(ctx context.Context) (string, net.Conn, error) { +func (p *dotConnPool) dialConn(ctx context.Context) (string, *tls.Conn, error) { logger := ProxyLogger.Load() var endpoint string @@ -215,7 +208,7 @@ func (p *dotConnPool) dialConn(ctx context.Context) (string, net.Conn, error) { // Try bootstrap IPs in parallel if len(p.addrs) > 0 { type result struct { - conn net.Conn + conn *tls.Conn addr string err error } @@ -307,3 +300,28 @@ func (p *dotConnPool) CloseIdleConnections() { delete(p.conns, addr) } } + +func isAlive(c *tls.Conn) bool { + // Set a very short deadline for the read + c.SetReadDeadline(time.Now().Add(1 * time.Millisecond)) + + // Try to read 1 byte without consuming it (using a small buffer) + one := make([]byte, 1) + _, err := c.Read(one) + + // Reset the deadline for future operations + c.SetReadDeadline(time.Time{}) + + if err == io.EOF { + return false // Connection is definitely closed + } + + // If we get a timeout, it means no data is waiting, + // but the connection is likely still "up." + var netErr net.Error + if errors.As(err, &netErr) && netErr.Timeout() { + return true + } + + return err == nil +} From 4790eb2c88143b9700a933a03c6a7cef49dd13b3 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Wed, 28 Jan 2026 23:52:09 +0700 Subject: [PATCH 10/22] refactor(dot): simplify DoT connection pool implementation Replace the map-based pool and refCount bookkeeping with a channel-based pool. Drop the closed state, per-connection address tracking, and extra mutexes so the pool relies on the channel for concurrency and lifecycle. --- dot.go | 124 ++++++++++++++++++++------------------------------------- 1 file changed, 44 insertions(+), 80 deletions(-) diff --git a/dot.go b/dot.go index 36531697..654fa865 100644 --- a/dot.go +++ b/dot.go @@ -7,7 +7,6 @@ import ( "io" "net" "runtime" - "sync" "time" "github.com/miekg/dns" @@ -35,23 +34,20 @@ func (r *dotResolver) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, erro return pool.Resolve(ctx, msg) } -// dotConnPool manages a pool of TCP/TLS connections for DoT queries. +const dotPoolSize = 16 + +// dotConnPool manages a pool of TCP/TLS connections for DoT queries using a buffered channel. type dotConnPool struct { uc *UpstreamConfig addrs []string port string tlsConfig *tls.Config dialer *net.Dialer - mu sync.RWMutex - conns map[string]*dotConn - closed bool + conns chan *dotConn } type dotConn struct { - conn *tls.Conn - lastUsed time.Time - refCount int - mu sync.Mutex + conn *tls.Conn } func newDOTClientPool(uc *UpstreamConfig, addrs []string) *dotConnPool { @@ -81,7 +77,7 @@ func newDOTClientPool(uc *UpstreamConfig, addrs []string) *dotConnPool { port: port, tlsConfig: tlsConfig, dialer: dialer, - conns: make(map[string]*dotConn), + conns: make(chan *dotConn, dotPoolSize), } // Use SetFinalizer here because we need to call a method on the pool itself. @@ -100,7 +96,7 @@ func (p *dotConnPool) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, erro return nil, errors.New("nil DNS message") } - conn, addr, err := p.getConn(ctx) + conn, err := p.getConn(ctx) if err != nil { return nil, wrapCertificateVerificationError(err) } @@ -108,7 +104,7 @@ func (p *dotConnPool) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, erro client := dns.Client{Net: "tcp-tls"} answer, _, err := client.ExchangeWithConnContext(ctx, msg, &dns.Conn{Conn: conn}) isGood := err == nil - p.putConn(addr, conn, isGood) + p.putConn(conn, isGood) if err != nil { return nil, wrapCertificateVerificationError(err) @@ -118,71 +114,42 @@ func (p *dotConnPool) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, erro } // getConn gets a TCP/TLS connection from the pool or creates a new one. -func (p *dotConnPool) getConn(ctx context.Context) (net.Conn, string, error) { - p.mu.Lock() - defer p.mu.Unlock() - - if p.closed { - return nil, "", io.EOF - } - - // Try to reuse an existing connection - for addr, dotConn := range p.conns { - dotConn.mu.Lock() - if dotConn.refCount == 0 && dotConn.conn != nil && isAlive(dotConn.conn) { - dotConn.refCount++ - dotConn.lastUsed = time.Now() - conn := dotConn.conn - dotConn.mu.Unlock() - return conn, addr, nil +// A connection is taken from the channel while in use; putConn returns it. +func (p *dotConnPool) getConn(ctx context.Context) (net.Conn, error) { + for { + select { + case dc := <-p.conns: + if dc.conn != nil && isAlive(dc.conn) { + return dc.conn, nil + } + if dc.conn != nil { + dc.conn.Close() + } + default: + _, conn, err := p.dialConn(ctx) + if err != nil { + return nil, err + } + return conn, nil } - dotConn.mu.Unlock() } - - // No available connection, create a new one - addr, conn, err := p.dialConn(ctx) - if err != nil { - return nil, "", err - } - - dotConn := &dotConn{ - conn: conn, - lastUsed: time.Now(), - refCount: 1, - } - p.conns[addr] = dotConn - - return conn, addr, nil } -// putConn returns a connection to the pool. -func (p *dotConnPool) putConn(addr string, conn net.Conn, isGood bool) { - p.mu.Lock() - defer p.mu.Unlock() - - dotConn, ok := p.conns[addr] - if !ok { - return - } - - dotConn.mu.Lock() - defer dotConn.mu.Unlock() - - dotConn.refCount-- - if dotConn.refCount < 0 { - dotConn.refCount = 0 - } - - // If connection is bad, remove it from pool - if !isGood { - delete(p.conns, addr) +// putConn returns a connection to the pool for reuse by other goroutines. +func (p *dotConnPool) putConn(conn net.Conn, isGood bool) { + if !isGood || conn == nil { if conn != nil { conn.Close() } return } - - dotConn.lastUsed = time.Now() + dc := &dotConn{conn: conn.(*tls.Conn)} + select { + case p.conns <- dc: + default: + // Channel full, close the connection + dc.conn.Close() + } } // dialConn creates a new TCP/TLS connection. @@ -284,20 +251,17 @@ func (p *dotConnPool) dialConn(ctx context.Context) (string, *tls.Conn, error) { } // CloseIdleConnections closes all connections in the pool. +// Connections currently checked out (in use) are not closed. func (p *dotConnPool) CloseIdleConnections() { - p.mu.Lock() - defer p.mu.Unlock() - if p.closed { - return - } - p.closed = true - for addr, dotConn := range p.conns { - dotConn.mu.Lock() - if dotConn.conn != nil { - dotConn.conn.Close() + for { + select { + case dc := <-p.conns: + if dc.conn != nil { + dc.conn.Close() + } + default: + return } - dotConn.mu.Unlock() - delete(p.conns, addr) } } From 3f30ec30d8961f853358868d9a2ea17b06507e4b Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Wed, 28 Jan 2026 23:52:33 +0700 Subject: [PATCH 11/22] refactor(doq): simplify DoQ connection pool implementation Replace the map-based pool and refCount bookkeeping with a channel-based pool. Drop the closed state, per-connection address tracking, and extra mutexes so the pool relies on the channel for concurrency and lifecycle, matching the approach used in the DoT pool. --- doq.go | 147 ++++++++++++++++++++------------------------------------- 1 file changed, 50 insertions(+), 97 deletions(-) diff --git a/doq.go b/doq.go index 8d8a4e86..eb7ed1c8 100644 --- a/doq.go +++ b/doq.go @@ -9,7 +9,6 @@ import ( "io" "net" "runtime" - "sync" "time" "github.com/miekg/dns" @@ -39,22 +38,19 @@ func (r *doqResolver) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, erro return pool.Resolve(ctx, msg) } -// doqConnPool manages a pool of QUIC connections for DoQ queries. +const doqPoolSize = 16 + +// doqConnPool manages a pool of QUIC connections for DoQ queries using a buffered channel. type doqConnPool struct { uc *UpstreamConfig addrs []string port string tlsConfig *tls.Config - mu sync.RWMutex - conns map[string]*doqConn - closed bool + conns chan *doqConn } type doqConn struct { - conn *quic.Conn - lastUsed time.Time - refCount int - mu sync.Mutex + conn *quic.Conn } func newDOQConnPool(uc *UpstreamConfig, addrs []string) *doqConnPool { @@ -74,7 +70,7 @@ func newDOQConnPool(uc *UpstreamConfig, addrs []string) *doqConnPool { addrs: addrs, port: port, tlsConfig: tlsConfig, - conns: make(map[string]*doqConn), + conns: make(chan *doqConn, doqPoolSize), } // Use SetFinalizer here because we need to call a method on the pool itself. @@ -107,7 +103,7 @@ func (p *doqConnPool) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, erro } func (p *doqConnPool) doResolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, error) { - conn, addr, err := p.getConn(ctx) + conn, err := p.getConn(ctx) if err != nil { return nil, err } @@ -115,14 +111,14 @@ func (p *doqConnPool) doResolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, er // Pack the DNS message msgBytes, err := msg.Pack() if err != nil { - p.putConn(addr, conn, false) + p.putConn(conn, false) return nil, err } // Open a new stream for this query stream, err := conn.OpenStream() if err != nil { - p.putConn(addr, conn, false) + p.putConn(conn, false) return nil, err } @@ -138,13 +134,13 @@ func (p *doqConnPool) doResolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, er var msgLenBytes = []byte{byte(msgLen >> 8), byte(msgLen & 0xFF)} if _, err := stream.Write(msgLenBytes); err != nil { stream.Close() - p.putConn(addr, conn, false) + p.putConn(conn, false) return nil, err } if _, err := stream.Write(msgBytes); err != nil { stream.Close() - p.putConn(addr, conn, false) + p.putConn(conn, false) return nil, err } @@ -154,7 +150,7 @@ func (p *doqConnPool) doResolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, er // Return connection to pool (mark as potentially bad if error occurred) isGood := err == nil && len(buf) > 0 - p.putConn(addr, conn, isGood) + p.putConn(conn, isGood) if err != nil { return nil, err @@ -175,79 +171,42 @@ func (p *doqConnPool) doResolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, er } // getConn gets a QUIC connection from the pool or creates a new one. -func (p *doqConnPool) getConn(ctx context.Context) (*quic.Conn, string, error) { - p.mu.Lock() - defer p.mu.Unlock() - - if p.closed { - return nil, "", io.EOF - } - - // Try to reuse an existing connection - for addr, doqConn := range p.conns { - doqConn.mu.Lock() - if doqConn.refCount == 0 && doqConn.conn != nil { - // Check if connection is still alive - select { - case <-doqConn.conn.Context().Done(): - // Connection is closed, remove it - doqConn.mu.Unlock() - delete(p.conns, addr) - continue - default: +// A connection is taken from the channel while in use; putConn returns it. +func (p *doqConnPool) getConn(ctx context.Context) (*quic.Conn, error) { + for { + select { + case dc := <-p.conns: + if dc.conn != nil && dc.conn.Context().Err() == nil { + return dc.conn, nil } - - doqConn.refCount++ - doqConn.lastUsed = time.Now() - conn := doqConn.conn - doqConn.mu.Unlock() - return conn, addr, nil + if dc.conn != nil { + dc.conn.CloseWithError(quic.ApplicationErrorCode(quic.NoError), "") + } + default: + _, conn, err := p.dialConn(ctx) + if err != nil { + return nil, err + } + return conn, nil } - doqConn.mu.Unlock() } - - // No available connection, create a new one - addr, conn, err := p.dialConn(ctx) - if err != nil { - return nil, "", err - } - - doqConn := &doqConn{ - conn: conn, - lastUsed: time.Now(), - refCount: 1, - } - p.conns[addr] = doqConn - - return conn, addr, nil } -// putConn returns a connection to the pool. -func (p *doqConnPool) putConn(addr string, conn *quic.Conn, isGood bool) { - p.mu.Lock() - defer p.mu.Unlock() - - doqConn, ok := p.conns[addr] - if !ok { +// putConn returns a connection to the pool for reuse by other goroutines. +func (p *doqConnPool) putConn(conn *quic.Conn, isGood bool) { + if !isGood || conn == nil || conn.Context().Err() != nil { + if conn != nil { + conn.CloseWithError(quic.ApplicationErrorCode(quic.NoError), "") + } return } - - doqConn.mu.Lock() - defer doqConn.mu.Unlock() - - doqConn.refCount-- - if doqConn.refCount < 0 { - doqConn.refCount = 0 - } - - // If connection is bad or closed, remove it from pool - if !isGood || conn.Context().Err() != nil { - delete(p.conns, addr) - conn.CloseWithError(quic.ApplicationErrorCode(quic.NoError), "") - return + dc := &doqConn{conn: conn} + select { + case p.conns <- dc: + default: + // Channel full, close the connection + dc.conn.CloseWithError(quic.ApplicationErrorCode(quic.NoError), "") } - - doqConn.lastUsed = time.Now() } // dialConn creates a new QUIC connection using parallel dialing like DoH3. @@ -292,23 +251,17 @@ func (p *doqConnPool) dialConn(ctx context.Context) (string, *quic.Conn, error) return addr, conn, nil } -// CloseIdleConnections closes all idle connections in the pool. -// When called during cleanup (e.g., from finalizer), it closes all connections -// regardless of refCount to prevent resource leaks. +// CloseIdleConnections closes all connections in the pool. +// Connections currently checked out (in use) are not closed. func (p *doqConnPool) CloseIdleConnections() { - p.mu.Lock() - defer p.mu.Unlock() - - p.closed = true - - for addr, dc := range p.conns { - dc.mu.Lock() - if dc.conn != nil { - // Close all connections to ensure proper cleanup, even if in use - // This prevents resource leaks when the pool is being destroyed - dc.conn.CloseWithError(quic.ApplicationErrorCode(quic.NoError), "") + for { + select { + case dc := <-p.conns: + if dc.conn != nil { + dc.conn.CloseWithError(quic.ApplicationErrorCode(quic.NoError), "") + } + default: + return } - dc.mu.Unlock() - delete(p.conns, addr) } } From 40c68a13a117ff5aef0325c532b056d8758a09d0 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Tue, 10 Feb 2026 17:03:33 +0700 Subject: [PATCH 12/22] fix(metadata): detect login user via logname when running under sudo On Darwin 26.2+, sudo no longer preserves SUDO_USER, LOGNAME, and USER (CVE-2025-43416), so env-based detection fails. Use the logname(1) command on Unix first, then fall back to environment variables and user.Current() so the real login user is still reported correctly. --- metadata.go | 45 ++++++++++++++++++++++++++++++--------------- metadata_test.go | 4 ++++ 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/metadata.go b/metadata.go index 11e8fb51..42bb38a6 100644 --- a/metadata.go +++ b/metadata.go @@ -3,7 +3,10 @@ package ctrld import ( "context" "os" + "os/exec" "os/user" + "runtime" + "strings" "github.com/cuonglm/osinfo" @@ -49,30 +52,42 @@ func SystemMetadata(ctx context.Context) map[string]string { // currentLoginUser attempts to find the actual login user, even if the process is running as root. func currentLoginUser(ctx context.Context) string { - // 1. Check SUDO_USER: This is the most reliable way to find the original user - // when a script is run via 'sudo'. - if sudoUser := os.Getenv("SUDO_USER"); sudoUser != "" { - return sudoUser + // On Darwin 26.2+, sudo no longer preserves SUDO_USER, LOGNAME, USER etc., so we cannot + // rely on environment variables when running under sudo. See CVE-2025-43416. + // We use the logname(1) command on Unix, which reports the login name from the session + // (e.g. utmp); there is no portable syscall equivalent in Go, so we exec logname. + if runtime.GOOS != "windows" { + if name := runLogname(ctx); name != "" { + return name + } } - // 2. Check general user login variables. LOGNAME is often preferred over USER. - if logName := os.Getenv("LOGNAME"); logName != "" { - return logName + // Fallback: env vars (still set on older systems or when not using sudo) + if u := os.Getenv("SUDO_USER"); u != "" { + return u } - - // 3. Fallback to USER variable. - if userEnv := os.Getenv("USER"); userEnv != "" { - return userEnv + if u := os.Getenv("LOGNAME"); u != "" { + return u + } + if u := os.Getenv("USER"); u != "" { + return u } - // 4. Final fallback: Use the standard library function to get the *effective* user. - // This will return "root" if the process is running as root. currentUser, err := user.Current() if err != nil { - // Handle error gracefully, returning a placeholder ProxyLogger.Load().Debug().Err(err).Msg("Failed to get current user") return "unknown" } - return currentUser.Username } + +// runLogname runs the logname(1) command and returns the trimmed output, or "" on failure. +func runLogname(ctx context.Context) string { + cmd := exec.CommandContext(ctx, "logname") + out, err := cmd.Output() + if err != nil { + ProxyLogger.Load().Debug().Err(err).Msg("Failed to run logname") + return "" + } + return strings.TrimSpace(string(out)) +} diff --git a/metadata_test.go b/metadata_test.go index b832c7e8..b57e0201 100644 --- a/metadata_test.go +++ b/metadata_test.go @@ -2,10 +2,14 @@ package ctrld import ( "context" + "os" "testing" ) func Test_metadata(t *testing.T) { m := SystemMetadata(context.Background()) t.Logf("metadata: %v", m) + t.Logf("SUDO_USER: %s", os.Getenv("SUDO_USER")) + t.Logf("LOGNAME: %s", os.Getenv("LOGNAME")) + t.Logf("USER: %s", os.Getenv("USER")) } From a4f04188112b6314fbc72ee897cf09a400fe2853 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Mon, 9 Feb 2026 17:27:24 +0700 Subject: [PATCH 13/22] fix(darwin): handle mDNSResponder on port 53 to avoid bind conflicts When mDNSResponder is using port 53 on macOS, adjust listener config to use 0.0.0.0:53, stop mDNSResponder before binding, and run cleanup on install and uninstall so the DNS server can start reliably. --- cmd/cli/cli.go | 21 ++++ cmd/cli/commands.go | 16 +++ cmd/cli/dns_proxy.go | 12 +++ cmd/cli/mdnsresponder_hack_darwin.go | 154 +++++++++++++++++++++++++++ cmd/cli/mdnsresponder_hack_others.go | 21 ++++ 5 files changed, 224 insertions(+) create mode 100644 cmd/cli/mdnsresponder_hack_darwin.go create mode 100644 cmd/cli/mdnsresponder_hack_others.go diff --git a/cmd/cli/cli.go b/cmd/cli/cli.go index 49d8b674..70c23123 100644 --- a/cmd/cli/cli.go +++ b/cmd/cli/cli.go @@ -901,6 +901,9 @@ func selfCheckStatus(ctx context.Context, s service.Service, sockDir string) (bo lc := cfg.FirstListener() addr := net.JoinHostPort(lc.IP, strconv.Itoa(lc.Port)) + if needMdnsResponderHack { + addr = "127.0.0.1:53" + } mainLog.Load().Debug().Msgf("performing listener test, sending queries to %s", addr) @@ -1113,6 +1116,10 @@ func uninstall(p *prog, s service.Service) { // Stop already did router.Cleanup and report any error if happens, // ignoring error here to prevent false positive. _ = p.router.Cleanup() + + // Run mDNS responder cleanup if necessary + doMdnsResponderCleanup() + mainLog.Load().Notice().Msg("Service uninstalled") return } @@ -1230,6 +1237,8 @@ func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, noti nextdnsMode := nextdns != "" // For Windows server with local Dns server running, we can only try on random local IP. hasLocalDnsServer := hasLocalDnsServerRunning() + // For Macos with mDNSResponder running on port 53, we must use 0.0.0.0 to prevent conflicting. + needMdnsResponderHack := needMdnsResponderHack notRouter := router.Name() == "" isDesktop := ctrld.IsDesktopPlatform() for n, listener := range cfg.Listener { @@ -1263,6 +1272,12 @@ func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, noti lcc[n].Port = false } } + if needMdnsResponderHack { + listener.IP = "0.0.0.0" + listener.Port = 53 + lcc[n].IP = false + lcc[n].Port = false + } updated = updated || lcc[n].IP || lcc[n].Port } @@ -1295,6 +1310,9 @@ func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, noti // Created listeners will be kept in listeners slice above, and close // before function finished. tryListen := func(addr string) error { + if needMdnsResponderHack { + killMdnsResponder() + } udpLn, udpErr := net.ListenPacket("udp", addr) if udpLn != nil { closers = append(closers, udpLn) @@ -1358,6 +1376,9 @@ func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, noti } attempts := 0 maxAttempts := 10 + if needMdnsResponderHack { + maxAttempts = 1 + } for { if attempts == maxAttempts { notifyFunc() diff --git a/cmd/cli/commands.go b/cmd/cli/commands.go index a1074f29..dbd13bfd 100644 --- a/cmd/cli/commands.go +++ b/cmd/cli/commands.go @@ -359,6 +359,10 @@ NOTE: running "ctrld start" without any arguments will start already installed c initInteractiveLogging() tasks := []task{ + {func() error { + doMdnsResponderCleanup() + return nil + }, false, "Cleanup service before installation"}, {func() error { // Save current DNS so we can restore later. withEachPhysicalInterfaces("", "saveCurrentStaticDNS", func(i *net.Interface) error { @@ -374,6 +378,10 @@ NOTE: running "ctrld start" without any arguments will start already installed c }, false, "Configure service failure actions"}, {s.Start, true, "Start"}, {noticeWritingControlDConfig, false, "Notice writing ControlD config"}, + {func() error { + doMdnsResponderHackPostInstall() + return nil + }, false, "Configure service post installation"}, } mainLog.Load().Notice().Msg("Starting existing ctrld service") if doTasks(tasks) { @@ -437,6 +445,10 @@ NOTE: running "ctrld start" without any arguments will start already installed c } tasks := []task{ + {func() error { + doMdnsResponderCleanup() + return nil + }, false, "Cleanup service before installation"}, {s.Stop, false, "Stop"}, {func() error { return doGenerateNextDNSConfig(nextdns) }, true, "Checking config"}, {func() error { return ensureUninstall(s) }, false, "Ensure uninstall"}, @@ -459,6 +471,10 @@ NOTE: running "ctrld start" without any arguments will start already installed c // Note that startCmd do not actually write ControlD config, but the config file was // generated after s.Start, so we notice users here for consistent with nextdns mode. {noticeWritingControlDConfig, false, "Notice writing ControlD config"}, + {func() error { + doMdnsResponderHackPostInstall() + return nil + }, false, "Configure service post installation"}, } mainLog.Load().Notice().Msg("Starting service") if doTasks(tasks) { diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go index 3d8cc308..60dfd490 100644 --- a/cmd/cli/dns_proxy.go +++ b/cmd/cli/dns_proxy.go @@ -101,6 +101,15 @@ func (p *prog) serveDNS(listenerNum string) error { _ = w.WriteMsg(answer) return } + // When mDNSResponder hack has been done, ctrld was listening on 0.0.0.0:53, but only requests + // to 127.0.0.1:53 are accepted. Since binding to 0.0.0.0 will make the IP info of the local address + // hidden (appeared as [::]), we checked for requests originated from 127.0.0.1 instead. + if needMdnsResponderHack && !strings.HasPrefix(w.RemoteAddr().String(), "127.0.0.1:") { + answer := new(dns.Msg) + answer.SetRcode(m, dns.RcodeRefused) + _ = w.WriteMsg(answer) + return + } listenerConfig := p.cfg.Listener[listenerNum] reqId := requestID() ctx := context.WithValue(context.Background(), ctrld.ReqIdCtxKey{}, reqId) @@ -854,6 +863,9 @@ func runDNSServer(addr, network string, handler dns.Handler) (*dns.Server, <-cha errCh := make(chan error) go func() { defer close(errCh) + if needMdnsResponderHack { + killMdnsResponder() + } if err := s.ListenAndServe(); err != nil { s.NotifyStartedFunc() mainLog.Load().Error().Err(err).Msgf("could not listen and serve on: %s", s.Addr) diff --git a/cmd/cli/mdnsresponder_hack_darwin.go b/cmd/cli/mdnsresponder_hack_darwin.go new file mode 100644 index 00000000..6687bc5c --- /dev/null +++ b/cmd/cli/mdnsresponder_hack_darwin.go @@ -0,0 +1,154 @@ +package cli + +import ( + "bufio" + "errors" + "io" + "os" + "os/exec" + "path/filepath" + "strings" + + "tailscale.com/net/netmon" +) + +// On macOS, the system daemon mDNSResponder (used for proxy/mDNS/Bonjour discovery) +// listens on UDP and TCP port 53. That conflicts with ctrld when it needs to +// run a DNS proxy on port 53. The kernel does not allow two processes to bind +// the same address/port, so ctrld would fail with "address already in use" if we +// did nothing. +// +// If ctrld started before mDNSResponder and listened only on 127.0.0.1, mDNSResponder +// would bind port 53 on other interfaces, so system processes would use it as the +// DNS resolver instead of ctrld, leading to inconsistent behavior. +// +// This file implements a Darwin-only workaround: +// +// - We detect at startup whether mDNSResponder is using port 53 (or a +// persisted marker file exists from a previous run). +// - When the workaround is active, we force the listener to 0.0.0.0:53 and, +// before binding, run killall mDNSResponder so that ctrld can bind to port 53. +// - We use SO_REUSEPORT (see listener setup) so that the socket can be bound +// even when the port was recently used. +// - On install we create a marker file in the user's home directory so that +// the workaround is applied on subsequent starts; on uninstall we remove +// that file and bounce the en0 interface to restore normal mDNSResponder +// behavior. +// +// Without this, users on macOS would be unable to run ctrld as the system DNS +// on port 53 when mDNSResponder is active. + +var ( + + // needMdnsResponderHack determines if a system-specific workaround for mDNSResponder is necessary at runtime. + needMdnsResponderHack = mDNSResponderHack() + mDNSResponderHackFilename = ".mdnsResponderHack" +) + +// mDNSResponderHack checks if the mDNSResponder process and its environments meet specific criteria for operation. +func mDNSResponderHack() bool { + if st, err := os.Stat(mDNSResponderFile()); err == nil && st.Mode().IsRegular() { + return true + } + out, err := lsofCheckPort53() + if err != nil { + return false + } + if !isMdnsResponderListeningPort53(strings.NewReader(out)) { + return false + } + return true +} + +// mDNSResponderFile constructs and returns the absolute path to the mDNSResponder hack file in the user's home directory. +func mDNSResponderFile() string { + if d, err := userHomeDir(); err == nil && d != "" { + return filepath.Join(d, mDNSResponderHackFilename) + } + return "" +} + +// doMdnsResponderCleanup performs cleanup tasks for the mDNSResponder hack file and resets the network interface "en0". +func doMdnsResponderCleanup() { + fn := mDNSResponderFile() + if fn == "" { + return + } + if st, err := os.Stat(fn); err != nil || !st.Mode().IsRegular() { + return + } + if err := os.Remove(fn); err != nil { + mainLog.Load().Error().Err(err).Msg("failed to remove mDNSResponder hack file") + } + + ifName := "en0" + if din, err := netmon.DefaultRouteInterface(); err == nil { + ifName = din + } + if err := exec.Command("ifconfig", ifName, "down").Run(); err != nil { + mainLog.Load().Error().Err(err).Msg("failed to disable en0") + } + if err := exec.Command("ifconfig", ifName, "up").Run(); err != nil { + mainLog.Load().Error().Err(err).Msg("failed to enable en0") + } +} + +// doMdnsResponderHackPostInstall creates a hack file for mDNSResponder if required and logs debug or error messages. +func doMdnsResponderHackPostInstall() { + if !needMdnsResponderHack { + return + } + fn := mDNSResponderFile() + if fn == "" { + return + } + if f, err := os.OpenFile(fn, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0400); err != nil { + mainLog.Load().Warn().Err(err).Msgf("Could not create %s", fn) + } else { + if err := f.Close(); err != nil { + mainLog.Load().Warn().Err(err).Msgf("Could not close %s", fn) + } else { + mainLog.Load().Debug().Msgf("Created %s", fn) + } + } +} + +// killMdnsResponder attempts to terminate the mDNSResponder process by running the "killall" command multiple times. +// Logs any accumulated errors if the attempts to terminate the process fail. +func killMdnsResponder() { + numAttempts := 10 + errs := make([]error, 0, numAttempts) + for range numAttempts { + if err := exec.Command("killall", "mDNSResponder").Run(); err != nil { + // Exit code 1 means the process not found, do not log it. + if !strings.Contains(err.Error(), "exit status 1") { + errs = append(errs, err) + } + } + } + if len(errs) > 0 { + mainLog.Load().Debug().Err(errors.Join(errs...)).Msg("failed to kill mDNSResponder") + } +} + +// lsofCheckPort53 executes the lsof command to check if any process is listening on port 53 and returns the output. +func lsofCheckPort53() (string, error) { + cmd := exec.Command("lsof", "+c0", "-i:53", "-n", "-P") + out, err := cmd.CombinedOutput() + if err != nil { + return "", err + } + return string(out), nil +} + +// isMdnsResponderListeningPort53 checks if the output provided by the reader contains an mDNSResponder process. +func isMdnsResponderListeningPort53(r io.Reader) bool { + scanner := bufio.NewScanner(r) + for scanner.Scan() { + fields := strings.Fields(scanner.Text()) + if len(fields) > 0 && strings.EqualFold(fields[0], "mDNSResponder") { + return true + } + } + return false +} diff --git a/cmd/cli/mdnsresponder_hack_others.go b/cmd/cli/mdnsresponder_hack_others.go new file mode 100644 index 00000000..5d6ada59 --- /dev/null +++ b/cmd/cli/mdnsresponder_hack_others.go @@ -0,0 +1,21 @@ +//go:build !darwin + +package cli + +// needMdnsResponderHack determines if a system-specific workaround for mDNSResponder is necessary at runtime. +var needMdnsResponderHack = mDNSResponderHack() + +// mDNSResponderHack checks if the mDNSResponder process and its environments meet specific criteria for operation. +func mDNSResponderHack() bool { + return false +} + +// killMdnsResponder attempts to terminate the mDNSResponder process by running the "killall" command multiple times. +// Logs any accumulated errors if the attempts to terminate the process fail. +func killMdnsResponder() {} + +// doMdnsResponderCleanup performs cleanup tasks for the mDNSResponder hack file and resets the network interface "en0". +func doMdnsResponderCleanup() {} + +// doMdnsResponderHackPostInstall creates a hack file for mDNSResponder if required and logs debug or error messages. +func doMdnsResponderHackPostInstall() {} From 147106f2b9a6c5668d4817c0d8df7a37d8880f8f Mon Sep 17 00:00:00 2001 From: Codescribe Date: Wed, 11 Feb 2026 23:19:30 -0500 Subject: [PATCH 14/22] fix(darwin): use scutil for provisioning hostname (#485) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit macOS Sequoia with Private Wi-Fi Address enabled causes os.Hostname() to return generic names like "Mac.lan" from DHCP instead of the real computer name. The /utility provisioning endpoint sends this raw, resulting in devices named "Mac-lan" in the dashboard. Fallback chain: ComputerName → LocalHostName → os.Hostname() LocalHostName can also be affected by DHCP. ComputerName is the user-set display name from System Settings, fully immune to network state. --- internal/controld/config.go | 4 ++-- internal/controld/hostname_darwin.go | 26 ++++++++++++++++++++++++++ internal/controld/hostname_others.go | 10 ++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 internal/controld/hostname_darwin.go create mode 100644 internal/controld/hostname_others.go diff --git a/internal/controld/config.go b/internal/controld/config.go index b6e7047f..a2a1c74f 100644 --- a/internal/controld/config.go +++ b/internal/controld/config.go @@ -10,7 +10,6 @@ import ( "io" "net" "net/http" - "os" "runtime" "strings" "time" @@ -116,7 +115,8 @@ func FetchResolverUID(req *UtilityOrgRequest, version string, cdDev bool) (*Reso return nil, errors.New("invalid request") } if req.Hostname == "" { - hostname, _ := os.Hostname() + hostname, _ := preferredHostname() + ctrld.ProxyLogger.Load().Debug().Msgf("Using system hostname: %s", hostname) req.Hostname = hostname } diff --git a/internal/controld/hostname_darwin.go b/internal/controld/hostname_darwin.go new file mode 100644 index 00000000..107b4cdf --- /dev/null +++ b/internal/controld/hostname_darwin.go @@ -0,0 +1,26 @@ +package controld + +import ( + "os" + "os/exec" + "strings" +) + +// preferredHostname returns the best available hostname on macOS. +// It prefers scutil --get ComputerName which is the user-configured name +// from System Settings → General → About → Name. This is immune to +// DHCP/network state that can cause os.Hostname() and even LocalHostName +// to return generic names like "Mac.lan" on Sequoia with Private Wi-Fi +// Address enabled. +// +// Fallback chain: ComputerName → LocalHostName → os.Hostname() +func preferredHostname() (string, error) { + for _, key := range []string{"ComputerName", "LocalHostName"} { + if out, err := exec.Command("scutil", "--get", key).Output(); err == nil { + if name := strings.TrimSpace(string(out)); name != "" { + return name, nil + } + } + } + return os.Hostname() +} diff --git a/internal/controld/hostname_others.go b/internal/controld/hostname_others.go new file mode 100644 index 00000000..9ae10263 --- /dev/null +++ b/internal/controld/hostname_others.go @@ -0,0 +1,10 @@ +//go:build !darwin + +package controld + +import "os" + +// preferredHostname returns the system hostname on non-Darwin platforms. +func preferredHostname() (string, error) { + return os.Hostname() +} From 12715e6f247da446d229a3017a1ed8c3bd8150cf Mon Sep 17 00:00:00 2001 From: Codescribe Date: Thu, 12 Feb 2026 12:41:25 -0500 Subject: [PATCH 15/22] fix: include hostname hints in metadata for API-side fallback Send all available hostname sources (ComputerName, LocalHostName, HostName, os.Hostname) in the metadata map when provisioning. This allows the API to detect and repair generic hostnames like 'Mac' by picking the best available source server-side. Belt and suspenders: preferredHostname() picks the right one client-side, but metadata gives the API a second chance. --- internal/controld/config.go | 10 ++++++++++ internal/controld/hostname_darwin.go | 18 ++++++++++++++++++ internal/controld/hostname_others.go | 9 +++++++++ 3 files changed, 37 insertions(+) diff --git a/internal/controld/config.go b/internal/controld/config.go index a2a1c74f..436d22db 100644 --- a/internal/controld/config.go +++ b/internal/controld/config.go @@ -120,6 +120,16 @@ func FetchResolverUID(req *UtilityOrgRequest, version string, cdDev bool) (*Reso req.Hostname = hostname } + // Include all hostname sources in metadata so the API can pick the + // best one if the primary looks generic (e.g., "Mac", "Mac.lan"). + if req.Metadata == nil { + req.Metadata = make(map[string]string) + } + for k, v := range hostnameHints() { + req.Metadata["hostname_"+k] = v + } + ctrld.ProxyLogger.Load().Debug().Msgf("Sending UID request to ControlD API") + body, _ := json.Marshal(req) return postUtilityAPI(version, cdDev, false, bytes.NewReader(body)) } diff --git a/internal/controld/hostname_darwin.go b/internal/controld/hostname_darwin.go index 107b4cdf..0b8eb52c 100644 --- a/internal/controld/hostname_darwin.go +++ b/internal/controld/hostname_darwin.go @@ -24,3 +24,21 @@ func preferredHostname() (string, error) { } return os.Hostname() } + +// hostnameHints returns all available hostname sources on macOS for +// diagnostic/fallback purposes. The API can use these to pick the +// best hostname if the primary one looks generic (e.g., "Mac"). +func hostnameHints() map[string]string { + hints := make(map[string]string) + for _, key := range []string{"ComputerName", "LocalHostName", "HostName"} { + if out, err := exec.Command("scutil", "--get", key).Output(); err == nil { + if name := strings.TrimSpace(string(out)); name != "" { + hints[key] = name + } + } + } + if h, err := os.Hostname(); err == nil { + hints["os.Hostname"] = h + } + return hints +} diff --git a/internal/controld/hostname_others.go b/internal/controld/hostname_others.go index 9ae10263..8aa03bc3 100644 --- a/internal/controld/hostname_others.go +++ b/internal/controld/hostname_others.go @@ -8,3 +8,12 @@ import "os" func preferredHostname() (string, error) { return os.Hostname() } + +// hostnameHints returns available hostname sources for diagnostic purposes. +func hostnameHints() map[string]string { + hints := make(map[string]string) + if h, err := os.Hostname(); err == nil { + hints["os.Hostname"] = h + } + return hints +} From 1e8240bd1c1e2237ce0914ab40cb29f1e3c3c990 Mon Sep 17 00:00:00 2001 From: Codescribe Date: Tue, 3 Mar 2026 02:06:49 -0500 Subject: [PATCH 16/22] feat: introduce DNS intercept mode infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add --intercept-mode flag (dns/hard/off) with configuration support, recovery bypass for captive portals, probe-based interception verification, VPN DNS coexistence in the proxy layer, and IPv6 loopback listener guard. Remove standalone mDNSResponder hack files — the port 53 binding logic is now handled within the intercept mode infrastructure. Squashed from intercept mode development on v1.0 branch (#497). --- .gitignore | 2 + cmd/cli/cli.go | 126 ++++-- cmd/cli/commands.go | 170 +++++++-- cmd/cli/dns_intercept_others.go | 39 ++ cmd/cli/dns_proxy.go | 386 ++++++++++++++++--- cmd/cli/dns_proxy_test.go | 24 +- cmd/cli/main.go | 37 ++ cmd/cli/mdnsresponder_hack_darwin.go | 154 -------- cmd/cli/mdnsresponder_hack_others.go | 21 - cmd/cli/prog.go | 119 +++++- cmd/cli/service_args_darwin.go | 134 +++++++ cmd/cli/service_args_others.go | 38 ++ cmd/cli/service_args_windows.go | 153 ++++++++ config.go | 71 ++++ config_internal_test.go | 6 +- docs/dns-intercept-mode.md | 551 +++++++++++++++++++++++++++ resolver.go | 73 ++++ 17 files changed, 1813 insertions(+), 291 deletions(-) create mode 100644 cmd/cli/dns_intercept_others.go delete mode 100644 cmd/cli/mdnsresponder_hack_darwin.go delete mode 100644 cmd/cli/mdnsresponder_hack_others.go create mode 100644 cmd/cli/service_args_darwin.go create mode 100644 cmd/cli/service_args_others.go create mode 100644 cmd/cli/service_args_windows.go create mode 100644 docs/dns-intercept-mode.md diff --git a/.gitignore b/.gitignore index 8e70cc6b..799011f6 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,5 @@ ctrld-* # generated file cmd/cli/rsrc_*.syso +ctrld +ctrld.exe diff --git a/cmd/cli/cli.go b/cmd/cli/cli.go index 70c23123..691d308c 100644 --- a/cmd/cli/cli.go +++ b/cmd/cli/cli.go @@ -345,6 +345,16 @@ func run(appCallback *AppCallback, stopCh chan struct{}) { processLogAndCacheFlags(v, &cfg) } + // Persist intercept_mode to config when provided via CLI flag on full install. + // This ensures the config file reflects the actual running mode for RMM/MDM visibility. + if interceptMode == "dns" || interceptMode == "hard" { + if cfg.Service.InterceptMode != interceptMode { + cfg.Service.InterceptMode = interceptMode + updated = true + mainLog.Load().Info().Msgf("writing intercept_mode = %q to config", interceptMode) + } + } + if updated { if err := writeConfigFile(&cfg); err != nil { notifyExitToLogServer() @@ -647,7 +657,7 @@ func processCDFlags(cfg *ctrld.Config) (*controld.ResolverConfig, error) { req := &controld.ResolverConfigRequest{ RawUID: cdUID, Version: rootCmd.Version, - Metadata: ctrld.SystemMetadata(ctx), + Metadata: ctrld.SystemMetadataRuntime(context.Background()), } resolverConfig, err := controld.FetchResolverConfig(req, cdDev) for { @@ -901,9 +911,6 @@ func selfCheckStatus(ctx context.Context, s service.Service, sockDir string) (bo lc := cfg.FirstListener() addr := net.JoinHostPort(lc.IP, strconv.Itoa(lc.Port)) - if needMdnsResponderHack { - addr = "127.0.0.1:53" - } mainLog.Load().Debug().Msgf("performing listener test, sending queries to %s", addr) @@ -1116,10 +1123,6 @@ func uninstall(p *prog, s service.Service) { // Stop already did router.Cleanup and report any error if happens, // ignoring error here to prevent false positive. _ = p.router.Cleanup() - - // Run mDNS responder cleanup if necessary - doMdnsResponderCleanup() - mainLog.Load().Notice().Msg("Service uninstalled") return } @@ -1227,18 +1230,105 @@ func updateListenerConfig(cfg *ctrld.Config, notifyToLogServerFunc func()) bool return updated } +// tryUpdateListenerConfigIntercept handles listener binding for dns-intercept mode on macOS. +// In intercept mode, pf redirects all outbound port-53 traffic to ctrld's listener, +// so ctrld can safely listen on a non-standard port if port 53 is unavailable +// (e.g., mDNSResponder holds *:53). +// +// Flow: +// 1. If config has explicit (non-default) IP:port → use exactly that, no fallback +// 2. Otherwise → try 127.0.0.1:53, then 127.0.0.1:5354, then fatal +func tryUpdateListenerConfigIntercept(cfg *ctrld.Config, notifyFunc func(), fatal bool) (updated, ok bool) { + ok = true + lc := cfg.FirstListener() + if lc == nil { + return false, true + } + + hasExplicitConfig := lc.IP != "" && lc.IP != "0.0.0.0" && lc.Port != 0 + if !hasExplicitConfig { + // Set defaults for intercept mode + if lc.IP == "" || lc.IP == "0.0.0.0" { + lc.IP = "127.0.0.1" + updated = true + } + if lc.Port == 0 { + lc.Port = 53 + updated = true + } + } + + tryListen := func(ip string, port int) bool { + addr := net.JoinHostPort(ip, strconv.Itoa(port)) + udpLn, udpErr := net.ListenPacket("udp", addr) + if udpLn != nil { + udpLn.Close() + } + tcpLn, tcpErr := net.Listen("tcp", addr) + if tcpLn != nil { + tcpLn.Close() + } + return udpErr == nil && tcpErr == nil + } + + addr := net.JoinHostPort(lc.IP, strconv.Itoa(lc.Port)) + if tryListen(lc.IP, lc.Port) { + mainLog.Load().Debug().Msgf("DNS intercept: listener available at %s", addr) + return updated, true + } + + mainLog.Load().Info().Msgf("DNS intercept: cannot bind %s", addr) + + if hasExplicitConfig { + // User specified explicit address — don't guess, just fail + if fatal { + notifyFunc() + mainLog.Load().Fatal().Msgf("DNS intercept: cannot listen on configured address %s", addr) + } + return updated, false + } + + // Fallback: try port 5354 (mDNSResponder likely holds *:53) + if tryListen("127.0.0.1", 5354) { + mainLog.Load().Info().Msg("DNS intercept: port 53 unavailable (likely mDNSResponder), using 127.0.0.1:5354") + lc.IP = "127.0.0.1" + lc.Port = 5354 + return true, true + } + + if fatal { + notifyFunc() + mainLog.Load().Fatal().Msg("DNS intercept: cannot bind 127.0.0.1:53 or 127.0.0.1:5354") + } + return updated, false +} + // tryUpdateListenerConfig tries updating listener config with a working one. // If fatal is true, and there's listen address conflicted, the function do // fatal error. func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, notifyFunc func(), fatal bool) (updated, ok bool) { + // In intercept mode (macOS), pf redirects all port-53 traffic to ctrld's listener, + // so ctrld can safely listen on a non-standard port. Use a simple two-attempt flow: + // 1. If config has explicit non-default IP:port, use exactly that + // 2. Otherwise: try 127.0.0.1:53, then 127.0.0.1:5354, then fatal + // This bypasses the full cd-mode listener probing loop entirely. + // Check interceptMode (CLI flag) first, then fall back to config value. + // dnsIntercept bool is derived later in prog.run(), but we need to know + // the intercept mode here to select the right listener probing strategy. + im := interceptMode + if im == "" || im == "off" { + im = cfg.Service.InterceptMode + } + if (im == "dns" || im == "hard") && runtime.GOOS == "darwin" { + return tryUpdateListenerConfigIntercept(cfg, notifyFunc, fatal) + } + ok = true lcc := make(map[string]*listenerConfigCheck) cdMode := cdUID != "" nextdnsMode := nextdns != "" // For Windows server with local Dns server running, we can only try on random local IP. hasLocalDnsServer := hasLocalDnsServerRunning() - // For Macos with mDNSResponder running on port 53, we must use 0.0.0.0 to prevent conflicting. - needMdnsResponderHack := needMdnsResponderHack notRouter := router.Name() == "" isDesktop := ctrld.IsDesktopPlatform() for n, listener := range cfg.Listener { @@ -1272,12 +1362,6 @@ func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, noti lcc[n].Port = false } } - if needMdnsResponderHack { - listener.IP = "0.0.0.0" - listener.Port = 53 - lcc[n].IP = false - lcc[n].Port = false - } updated = updated || lcc[n].IP || lcc[n].Port } @@ -1310,9 +1394,6 @@ func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, noti // Created listeners will be kept in listeners slice above, and close // before function finished. tryListen := func(addr string) error { - if needMdnsResponderHack { - killMdnsResponder() - } udpLn, udpErr := net.ListenPacket("udp", addr) if udpLn != nil { closers = append(closers, udpLn) @@ -1376,9 +1457,6 @@ func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, noti } attempts := 0 maxAttempts := 10 - if needMdnsResponderHack { - maxAttempts = 1 - } for { if attempts == maxAttempts { notifyFunc() @@ -1889,10 +1967,12 @@ func runningIface(s service.Service) *ifaceResponse { // doValidateCdRemoteConfig fetches and validates custom config for cdUID. func doValidateCdRemoteConfig(cdUID string, fatal bool) error { + // Username is only sent during initial provisioning (cdUIDFromProvToken). + // All subsequent calls use lightweight metadata to avoid EDR triggers. req := &controld.ResolverConfigRequest{ RawUID: cdUID, Version: rootCmd.Version, - Metadata: ctrld.SystemMetadata(context.Background()), + Metadata: ctrld.SystemMetadataRuntime(context.Background()), } rc, err := controld.FetchResolverConfig(req, cdDev) if err != nil { diff --git a/cmd/cli/commands.go b/cmd/cli/commands.go index dbd13bfd..eaee8129 100644 --- a/cmd/cli/commands.go +++ b/cmd/cli/commands.go @@ -190,6 +190,7 @@ func initRunCmd() *cobra.Command { _ = runCmd.Flags().MarkHidden("iface") runCmd.Flags().StringVarP(&cdUpstreamProto, "proto", "", ctrld.ResolverTypeDOH, `Control D upstream type, either "doh" or "doh3"`) runCmd.Flags().BoolVarP(&rfc1918, "rfc1918", "", false, "Listen on RFC1918 addresses when 127.0.0.1 is the only listener") + runCmd.Flags().StringVarP(&interceptMode, "intercept-mode", "", "", "OS-level DNS interception mode: 'dns' (with VPN split routing) or 'hard' (all DNS through ctrld, no VPN split routing)") runCmd.FParseErrWhitelist = cobra.FParseErrWhitelist{UnknownFlags: true} rootCmd.AddCommand(runCmd) @@ -229,6 +230,14 @@ NOTE: running "ctrld start" without any arguments will start already installed c setDependencies(sc) sc.Arguments = append([]string{"run"}, osArgs...) + // Validate --intercept-mode early, before installing the service. + // Without this, a typo like "--intercept-mode fds" would install the service, + // the child process would Fatal() on the invalid value, and the parent would + // then uninstall — confusing and destructive. + if interceptMode != "" && !validInterceptMode(interceptMode) { + mainLog.Load().Fatal().Msgf("invalid --intercept-mode value %q: must be 'off', 'dns', or 'hard'", interceptMode) + } + p := &prog{ router: router.New(&cfg, cdUID != ""), cfg: &cfg, @@ -247,6 +256,49 @@ NOTE: running "ctrld start" without any arguments will start already installed c // Get current running iface, if any. var currentIface *ifaceResponse + // Handle "ctrld start --intercept-mode dns|hard" on an existing + // service BEFORE the pin check. Adding intercept mode is an enhancement, not + // deactivation, so it doesn't require the deactivation pin. We modify the + // plist/registry directly and restart the service via the OS service manager. + osArgsEarly := os.Args[2:] + if os.Args[1] == "service" { + osArgsEarly = os.Args[3:] + } + osArgsEarly = filterEmptyStrings(osArgsEarly) + interceptOnly := onlyInterceptFlags(osArgsEarly) + svcExists := serviceConfigFileExists() + mainLog.Load().Debug().Msgf("intercept upgrade check: args=%v interceptOnly=%v svcConfigExists=%v interceptMode=%q", osArgsEarly, interceptOnly, svcExists, interceptMode) + if interceptOnly && svcExists { + // Remove any existing intercept flags before applying the new value. + _ = removeServiceFlag("--intercept-mode") + + if interceptMode == "off" { + // "off" = remove intercept mode entirely (just the removal above). + mainLog.Load().Notice().Msg("Existing service detected — removing --intercept-mode from service arguments") + } else { + // Add the new mode value. + mainLog.Load().Notice().Msgf("Existing service detected — appending --intercept-mode %s to service arguments", interceptMode) + if err := appendServiceFlag("--intercept-mode"); err != nil { + mainLog.Load().Fatal().Err(err).Msg("failed to append intercept flag to service arguments") + } + if err := appendServiceFlag(interceptMode); err != nil { + mainLog.Load().Fatal().Err(err).Msg("failed to append intercept mode value to service arguments") + } + } + + // Stop the service if running (bypasses ctrld pin — this is an + // enhancement, not deactivation). Then fall through to the normal + // startOnly path which handles start, self-check, and reporting. + if isCtrldRunning { + mainLog.Load().Notice().Msg("Stopping service for intercept mode upgrade") + _ = s.Stop() + isCtrldRunning = false + } + startOnly = true + isCtrldInstalled = true + // Fall through to startOnly path below. + } + // If pin code was set, do not allow running start command. if isCtrldRunning { if err := checkDeactivationPin(s, nil); isCheckDeactivationPinErr(err) { @@ -271,20 +323,31 @@ NOTE: running "ctrld start" without any arguments will start already installed c return } if res.OK { - name := res.Name - if iff, err := net.InterfaceByName(name); err == nil { - _, _ = patchNetIfaceName(iff) - name = iff.Name - } - logger := mainLog.Load().With().Str("iface", name).Logger() - logger.Debug().Msg("setting DNS successfully") - if res.All { - // Log that DNS is set for other interfaces. - withEachPhysicalInterfaces( - name, - "set DNS", - func(i *net.Interface) error { return nil }, - ) + // In intercept mode, show intercept-specific status instead of + // per-interface DNS messages (which are irrelevant). + if res.InterceptMode != "" { + switch res.InterceptMode { + case "hard": + mainLog.Load().Notice().Msg("DNS hard intercept mode active — all DNS traffic intercepted, no VPN split routing") + default: + mainLog.Load().Notice().Msg("DNS intercept mode active — all DNS traffic intercepted via OS packet filter") + } + } else { + name := res.Name + if iff, err := net.InterfaceByName(name); err == nil { + _, _ = patchNetIfaceName(iff) + name = iff.Name + } + logger := mainLog.Load().With().Str("iface", name).Logger() + logger.Debug().Msg("setting DNS successfully") + if res.All { + // Log that DNS is set for other interfaces. + withEachPhysicalInterfaces( + name, + "set DNS", + func(i *net.Interface) error { return nil }, + ) + } } } } @@ -344,6 +407,7 @@ NOTE: running "ctrld start" without any arguments will start already installed c if !startOnly { startOnly = len(osArgs) == 0 } + // If user run "ctrld start" and ctrld is already installed, starting existing service. if startOnly && isCtrldInstalled { tryReadingConfigWithNotice(false, true) @@ -359,10 +423,6 @@ NOTE: running "ctrld start" without any arguments will start already installed c initInteractiveLogging() tasks := []task{ - {func() error { - doMdnsResponderCleanup() - return nil - }, false, "Cleanup service before installation"}, {func() error { // Save current DNS so we can restore later. withEachPhysicalInterfaces("", "saveCurrentStaticDNS", func(i *net.Interface) error { @@ -378,10 +438,6 @@ NOTE: running "ctrld start" without any arguments will start already installed c }, false, "Configure service failure actions"}, {s.Start, true, "Start"}, {noticeWritingControlDConfig, false, "Notice writing ControlD config"}, - {func() error { - doMdnsResponderHackPostInstall() - return nil - }, false, "Configure service post installation"}, } mainLog.Load().Notice().Msg("Starting existing ctrld service") if doTasks(tasks) { @@ -392,6 +448,10 @@ NOTE: running "ctrld start" without any arguments will start already installed c os.Exit(1) } reportSetDnsOk(sockDir) + // Verify service registration after successful start. + if err := verifyServiceRegistration(); err != nil { + mainLog.Load().Warn().Err(err).Msg("Service registry verification failed") + } } else { mainLog.Load().Error().Err(err).Msg("Failed to start existing ctrld service") os.Exit(1) @@ -400,7 +460,8 @@ NOTE: running "ctrld start" without any arguments will start already installed c } if cdUID != "" { - _ = doValidateCdRemoteConfig(cdUID, true) + // Skip doValidateCdRemoteConfig() here - run command will handle + // validation and config fetch via processCDFlags(). } else if uid := cdUIDFromProvToken(); uid != "" { cdUID = uid mainLog.Load().Debug().Msg("using uid from provision token") @@ -445,10 +506,6 @@ NOTE: running "ctrld start" without any arguments will start already installed c } tasks := []task{ - {func() error { - doMdnsResponderCleanup() - return nil - }, false, "Cleanup service before installation"}, {s.Stop, false, "Stop"}, {func() error { return doGenerateNextDNSConfig(nextdns) }, true, "Checking config"}, {func() error { return ensureUninstall(s) }, false, "Ensure uninstall"}, @@ -471,10 +528,6 @@ NOTE: running "ctrld start" without any arguments will start already installed c // Note that startCmd do not actually write ControlD config, but the config file was // generated after s.Start, so we notice users here for consistent with nextdns mode. {noticeWritingControlDConfig, false, "Notice writing ControlD config"}, - {func() error { - doMdnsResponderHackPostInstall() - return nil - }, false, "Configure service post installation"}, } mainLog.Load().Notice().Msg("Starting service") if doTasks(tasks) { @@ -525,6 +578,10 @@ NOTE: running "ctrld start" without any arguments will start already installed c os.Exit(1) } reportSetDnsOk(sockDir) + // Verify service registration after successful start. + if err := verifyServiceRegistration(); err != nil { + mainLog.Load().Warn().Err(err).Msg("Service registry verification failed") + } } }, } @@ -549,6 +606,7 @@ NOTE: running "ctrld start" without any arguments will start already installed c startCmd.Flags().BoolVarP(&startOnly, "start_only", "", false, "Do not install new service") _ = startCmd.Flags().MarkHidden("start_only") startCmd.Flags().BoolVarP(&rfc1918, "rfc1918", "", false, "Listen on RFC1918 addresses when 127.0.0.1 is the only listener") + startCmd.Flags().StringVarP(&interceptMode, "intercept-mode", "", "", "OS-level DNS interception mode: 'dns' (with VPN split routing) or 'hard' (all DNS through ctrld, no VPN split routing)") routerCmd := &cobra.Command{ Use: "setup", @@ -1411,3 +1469,53 @@ func filterEmptyStrings(slice []string) []string { return s == "" }) } + +// validInterceptMode reports whether the given value is a recognized --intercept-mode. +// This is the single source of truth for mode validation — used by the early start +// command check, the runtime validation in prog.go, and onlyInterceptFlags below. +// Add new modes here to have them recognized everywhere. +func validInterceptMode(mode string) bool { + switch mode { + case "off", "dns", "hard": + return true + } + return false +} + +// onlyInterceptFlags reports whether args contain only intercept mode +// flags (--intercept-mode ) and flags that are auto-added by the +// start command alias (--iface). This is used to detect "ctrld start --intercept-mode dns" +// (or "off" to disable) on an existing installation, where the intent is to modify the +// intercept flag on the existing service without replacing other arguments. +// +// Note: the startCmdAlias appends "--iface=auto" to os.Args when --iface isn't +// explicitly provided, so we must allow it here. +func onlyInterceptFlags(args []string) bool { + hasIntercept := false + for i := 0; i < len(args); i++ { + arg := args[i] + switch { + case arg == "--intercept-mode": + // Next arg must be a valid mode value. + if i+1 < len(args) && validInterceptMode(args[i+1]) { + hasIntercept = true + i++ // skip the value + } else { + return false + } + case strings.HasPrefix(arg, "--intercept-mode="): + val := strings.TrimPrefix(arg, "--intercept-mode=") + if validInterceptMode(val) { + hasIntercept = true + } else { + return false + } + case arg == "--iface=auto" || arg == "--iface" || arg == "auto": + // Auto-added by startCmdAlias or its value; safe to ignore. + continue + default: + return false + } + } + return hasIntercept +} diff --git a/cmd/cli/dns_intercept_others.go b/cmd/cli/dns_intercept_others.go new file mode 100644 index 00000000..9f3c9030 --- /dev/null +++ b/cmd/cli/dns_intercept_others.go @@ -0,0 +1,39 @@ +//go:build !windows && !darwin + +package cli + +import ( + "fmt" +) + +// startDNSIntercept is not supported on this platform. +// DNS intercept mode is only available on Windows (via WFP) and macOS (via pf). +func (p *prog) startDNSIntercept() error { + return fmt.Errorf("dns intercept: not supported on this platform (only Windows and macOS)") +} + +// stopDNSIntercept is a no-op on unsupported platforms. +func (p *prog) stopDNSIntercept() error { + return nil +} + +// exemptVPNDNSServers is a no-op on unsupported platforms. +func (p *prog) exemptVPNDNSServers(exemptions []vpnDNSExemption) error { + return nil +} + +// ensurePFAnchorActive is a no-op on unsupported platforms. +func (p *prog) ensurePFAnchorActive() bool { + return false +} + +// checkTunnelInterfaceChanges is a no-op on unsupported platforms. +func (p *prog) checkTunnelInterfaceChanges() bool { + return false +} + +// scheduleDelayedRechecks is a no-op on unsupported platforms. +func (p *prog) scheduleDelayedRechecks() {} + +// pfInterceptMonitor is a no-op on unsupported platforms. +func (p *prog) pfInterceptMonitor() {} diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go index 60dfd490..ac9d10b6 100644 --- a/cmd/cli/dns_proxy.go +++ b/cmd/cli/dns_proxy.go @@ -101,19 +101,10 @@ func (p *prog) serveDNS(listenerNum string) error { _ = w.WriteMsg(answer) return } - // When mDNSResponder hack has been done, ctrld was listening on 0.0.0.0:53, but only requests - // to 127.0.0.1:53 are accepted. Since binding to 0.0.0.0 will make the IP info of the local address - // hidden (appeared as [::]), we checked for requests originated from 127.0.0.1 instead. - if needMdnsResponderHack && !strings.HasPrefix(w.RemoteAddr().String(), "127.0.0.1:") { - answer := new(dns.Msg) - answer.SetRcode(m, dns.RcodeRefused) - _ = w.WriteMsg(answer) - return - } listenerConfig := p.cfg.Listener[listenerNum] reqId := requestID() ctx := context.WithValue(context.Background(), ctrld.ReqIdCtxKey{}, reqId) - if !listenerConfig.AllowWanClients && isWanClient(w.RemoteAddr()) { + if !listenerConfig.AllowWanClients && isWanClient(w.RemoteAddr()) && !isIPv6LoopbackListener(w.LocalAddr()) { ctrld.Log(ctx, mainLog.Load().Debug(), "query refused, listener does not allow WAN clients: %s", w.RemoteAddr().String()) answer := new(dns.Msg) answer.SetRcode(m, dns.RcodeRefused) @@ -135,6 +126,23 @@ func (p *prog) serveDNS(listenerNum string) error { return } + // Interception probe: if we're expecting a probe query and this matches, + // signal the prober and respond NXDOMAIN. Used by both macOS pf probes + // (_pf-probe-*) and Windows NRPT probes (_nrpt-probe-*) to verify that + // DNS interception is actually routing queries to ctrld's listener. + if probeID, ok := p.pfProbeExpected.Load().(string); ok && probeID != "" && domain == probeID { + if chPtr, ok := p.pfProbeCh.Load().(*chan struct{}); ok && chPtr != nil { + select { + case *chPtr <- struct{}{}: + default: + } + } + answer := new(dns.Msg) + answer.SetRcode(m, dns.RcodeNameError) // NXDOMAIN + _ = w.WriteMsg(answer) + return + } + if _, ok := p.cacheFlushDomainsMap[domain]; ok && p.cache != nil { p.cache.Purge() ctrld.Log(ctx, mainLog.Load().Debug(), "received query %q, local cache is purged", domain) @@ -201,7 +209,7 @@ func (p *prog) serveDNS(listenerNum string) error { g, ctx := errgroup.WithContext(context.Background()) for _, proto := range []string{"udp", "tcp"} { proto := proto - if needLocalIPv6Listener() { + if needLocalIPv6Listener(p.cfg.Service.InterceptMode) { g.Go(func() error { s, errCh := runDNSServer(net.JoinHostPort("::1", strconv.Itoa(listenerConfig.Port)), proto, handler) defer s.Shutdown() @@ -430,6 +438,24 @@ func (p *prog) proxyLanHostnameQuery(ctx context.Context, msg *dns.Msg) *dns.Msg } func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse { + // DNS intercept recovery bypass: forward all queries to OS/DHCP resolver. + // This runs when upstreams are unreachable (e.g., captive portal network) + // and allows the network's DNS to handle authentication pages. + if dnsIntercept && p.recoveryBypass.Load() { + ctrld.Log(ctx, mainLog.Load().Debug(), "Recovery bypass active: forwarding to OS resolver") + resolver, err := ctrld.NewResolver(osUpstreamConfig) + if err == nil { + resolveCtx, cancel := osUpstreamConfig.Context(ctx) + defer cancel() + answer, _ := resolver.Resolve(resolveCtx, req.msg) + if answer != nil { + return &proxyResponse{answer: answer} + } + } + ctrld.Log(ctx, mainLog.Load().Debug(), "OS resolver failed during recovery bypass") + // Fall through to normal flow as last resort + } + var staleAnswer *dns.Msg upstreams := req.ufr.upstreams serveStaleCache := p.cache != nil && p.cfg.Service.CacheServeStale @@ -442,9 +468,9 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse { // However, on Active Directory Domain Controller, where it has local DNS server // running and listening on local addresses, these local addresses must be used // as nameservers, so queries for ADDC could be resolved as expected. - if p.isAdDomainQuery(req.msg) { + if p.isAdDomainQuery(req.msg) && p.hasLocalDNS { ctrld.Log(ctx, mainLog.Load().Debug(), - "AD domain query detected for %s in domain %s", + "AD domain query detected for %s in domain %s, using local DNS server", req.msg.Question[0].Name, p.adDomain) upstreamConfigs = []*ctrld.UpstreamConfig{localUpstreamConfig} upstreams = []string{upstreamOSLocal} @@ -515,6 +541,92 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse { staleAnswer = answer } } + + // VPN DNS split routing (only in dns-intercept mode) + if dnsIntercept && p.vpnDNS != nil && len(req.msg.Question) > 0 { + domain := req.msg.Question[0].Name + if vpnServers := p.vpnDNS.UpstreamForDomain(domain); len(vpnServers) > 0 { + ctrld.Log(ctx, mainLog.Load().Debug(), "VPN DNS route matched for domain %s, using servers: %v", domain, vpnServers) + + for _, server := range vpnServers { + upstreamConfig := p.vpnDNS.upstreamConfigFor(server) + ctrld.Log(ctx, mainLog.Load().Debug(), "Querying VPN DNS server: %s", server) + + dnsResolver, err := ctrld.NewResolver(upstreamConfig) + if err != nil { + ctrld.Log(ctx, mainLog.Load().Error().Err(err), "failed to create VPN DNS resolver") + continue + } + resolveCtx, cancel := upstreamConfig.Context(ctx) + answer, err := dnsResolver.Resolve(resolveCtx, req.msg) + cancel() + if answer != nil { + ctrld.Log(ctx, mainLog.Load().Debug(), "VPN DNS query successful") + if p.cache != nil { + ttl := 60 * time.Second + if len(answer.Answer) > 0 { + ttl = time.Duration(answer.Answer[0].Header().Ttl) * time.Second + } + for _, upstream := range upstreams { + p.cache.Add(dnscache.NewKey(req.msg, upstream), dnscache.NewValue(answer, time.Now().Add(ttl))) + } + } + return &proxyResponse{answer: answer} + } + ctrld.Log(ctx, mainLog.Load().Debug().Err(err), "VPN DNS server %s failed", server) + } + + ctrld.Log(ctx, mainLog.Load().Debug(), "All VPN DNS servers failed, falling back to normal upstreams") + } + } + + // Domain-less VPN DNS fallback: when a query is going to upstream.os via a + // split-rule (matched policy) and we have VPN DNS servers with no associated + // domains, try those servers for this query. This handles cases like F5 VPN + // where the VPN doesn't advertise DNS search domains but its DNS servers + // know the internal zones referenced by split-rules (e.g., *.provisur.local). + // These servers are NOT used for general OS resolver queries to avoid + // polluting captive portal / DHCP flows. + if dnsIntercept && p.vpnDNS != nil && req.ufr.matched && + len(upstreams) > 0 && upstreams[0] == upstreamOS && + len(req.msg.Question) > 0 && !p.isAdDomainQuery(req.msg) { + if dlServers := p.vpnDNS.DomainlessServers(); len(dlServers) > 0 { + domain := req.msg.Question[0].Name + ctrld.Log(ctx, mainLog.Load().Debug(), + "Split-rule query %s going to upstream.os, trying %d domain-less VPN DNS servers first: %v", + domain, len(dlServers), dlServers) + + for _, server := range dlServers { + upstreamCfg := p.vpnDNS.upstreamConfigFor(server) + ctrld.Log(ctx, mainLog.Load().Debug(), "Querying domain-less VPN DNS server: %s", server) + + dnsResolver, err := ctrld.NewResolver(upstreamCfg) + if err != nil { + ctrld.Log(ctx, mainLog.Load().Error().Err(err), "failed to create domain-less VPN DNS resolver") + continue + } + resolveCtx, cancel := upstreamCfg.Context(ctx) + answer, err := dnsResolver.Resolve(resolveCtx, req.msg) + cancel() + if answer != nil && answer.Rcode == dns.RcodeSuccess { + ctrld.Log(ctx, mainLog.Load().Debug(), + "Domain-less VPN DNS server %s answered %s successfully", server, domain) + return &proxyResponse{answer: answer} + } + if answer != nil { + ctrld.Log(ctx, mainLog.Load().Debug(), + "Domain-less VPN DNS server %s returned %s for %s, trying next", + server, dns.RcodeToString[answer.Rcode], domain) + } else { + ctrld.Log(ctx, mainLog.Load().Debug().Err(err), + "Domain-less VPN DNS server %s failed for %s", server, domain) + } + } + ctrld.Log(ctx, mainLog.Load().Debug(), + "All domain-less VPN DNS servers failed for %s, falling back to OS resolver", domain) + } + } + resolve1 := func(upstream string, upstreamConfig *ctrld.UpstreamConfig, msg *dns.Msg) (*dns.Msg, error) { ctrld.Log(ctx, mainLog.Load().Debug(), "sending query to %s: %s", upstream, upstreamConfig.Name) dnsResolver, err := ctrld.NewResolver(upstreamConfig) @@ -780,10 +892,30 @@ func ttlFromMsg(msg *dns.Msg) uint32 { return 0 } -func needLocalIPv6Listener() bool { +func needLocalIPv6Listener(interceptMode string) bool { + if !ctrldnet.SupportsIPv6ListenLocal() { + mainLog.Load().Debug().Msg("IPv6 listener: not needed — SupportsIPv6ListenLocal() is false") + return false + } // On Windows, there's no easy way for disabling/removing IPv6 DNS resolver, so we check whether we can // listen on ::1, then spawn a listener for receiving DNS requests. - return ctrldnet.SupportsIPv6ListenLocal() && runtime.GOOS == "windows" + if runtime.GOOS == "windows" { + mainLog.Load().Debug().Msg("IPv6 listener: enabled (Windows)") + return true + } + // On macOS in intercept mode, pf can't redirect IPv6 DNS to an IPv4 listener (cross-AF rdr + // not supported), and blocking IPv6 DNS causes ~1s timeouts (BSD doesn't deliver ICMP errors + // to unconnected UDP sockets). Listening on [::1] lets us intercept IPv6 DNS directly. + // + // NOTE: We accept the intercept mode string as a parameter instead of reading the global + // dnsIntercept bool, because dnsIntercept is derived later in prog.run() — after the + // listener goroutines are already spawned. Same pattern as the port 5354 fallback fix (MR !860). + if (interceptMode == "dns" || interceptMode == "hard") && runtime.GOOS == "darwin" { + mainLog.Load().Debug().Msg("IPv6 listener: enabled (macOS intercept mode)") + return true + } + mainLog.Load().Debug().Str("os", runtime.GOOS).Str("interceptMode", interceptMode).Msg("IPv6 listener: not needed") + return false } // ipAndMacFromMsg extracts IP and MAC information included in a DNS message, if any. @@ -863,9 +995,6 @@ func runDNSServer(addr, network string, handler dns.Handler) (*dns.Server, <-cha errCh := make(chan error) go func() { defer close(errCh) - if needMdnsResponderHack { - killMdnsResponder() - } if err := s.ListenAndServe(); err != nil { s.NotifyStartedFunc() mainLog.Load().Error().Err(err).Msgf("could not listen and serve on: %s", s.Addr) @@ -928,12 +1057,30 @@ func (p *prog) getClientInfo(remoteIP string, msg *dns.Msg) *ctrld.ClientInfo { } else { ci.Self = p.queryFromSelf(ci.IP) } + + // In DNS intercept mode, ALL queries are from the local machine — pf/WFP + // intercepts outbound DNS and redirects to ctrld. The source IP may be a + // virtual interface (Tailscale, VPN) that has no ARP/MAC entry, causing + // missing x-cd-mac, x-cd-host, and x-cd-os headers. Force Self=true and + // populate from the primary physical interface info. + if dnsIntercept && !ci.Self { + ci.Self = true + } + // If this is a query from self, but ci.IP is not loopback IP, // try using hostname mapping for lookback IP if presents. if ci.Self { if name := p.ciTable.LocalHostname(); name != "" { ci.Hostname = name } + // If MAC is still empty (e.g., query arrived via virtual interface IP + // like Tailscale), fall back to the loopback MAC mapping which addSelf() + // populates from the primary physical interface. + if ci.Mac == "" { + if mac := p.ciTable.LookupMac("127.0.0.1"); mac != "" { + ci.Mac = mac + } + } } p.spoofLoopbackIpInClientInfo(ci) return ci @@ -975,7 +1122,7 @@ func (p *prog) doSelfUninstall(answer *dns.Msg) { req := &controld.ResolverConfigRequest{ RawUID: cdUID, Version: rootCmd.Version, - Metadata: ctrld.SystemMetadata(context.Background()), + Metadata: ctrld.SystemMetadataRuntime(context.Background()), } _, err := controld.FetchResolverConfig(req, cdDev) logger.Debug().Msg("maximum number of refused queries reached, checking device status") @@ -1169,6 +1316,18 @@ func isWanClient(na net.Addr) bool { !tsaddr.CGNATRange().Contains(ip) } +// isIPv6LoopbackListener reports whether the listener address is [::1]. +// The [::1] listener only serves locally-redirected traffic (via pf on macOS +// or system DNS on Windows), so queries arriving on it are always from this +// machine — even when the source IP is a global IPv6 address (pf preserves the +// original source IP during rdr). +func isIPv6LoopbackListener(na net.Addr) bool { + if ap, err := netip.ParseAddrPort(na.String()); err == nil { + return ap.Addr() == netip.IPv6Loopback() + } + return false +} + // resolveInternalDomainTestQuery resolves internal test domain query, returning the answer to the caller. func resolveInternalDomainTestQuery(ctx context.Context, domain string, m *dns.Msg) *dns.Msg { ctrld.Log(ctx, mainLog.Load().Debug(), "internal domain test query") @@ -1294,6 +1453,65 @@ func (p *prog) monitorNetworkChanges() error { mainLog.Load().Debug().Msg("Ignoring interface change - no valid interfaces affected") // check if the default IPs are still on an interface that is up ValidateDefaultLocalIPsFromDelta(delta.New) + // Even minor interface changes can trigger macOS pf reloads — verify anchor. + // We check immediately AND schedule delayed re-checks (2s + 4s) to catch + // programs like Windscribe that modify pf rules and DNS settings + // asynchronously after the network change event fires. + if dnsIntercept && p.dnsInterceptState != nil { + if !p.pfStabilizing.Load() { + p.ensurePFAnchorActive() + } + // Check tunnel interfaces unconditionally — it decides internally + // whether to enter stabilization or rebuild immediately. + p.checkTunnelInterfaceChanges() + // Schedule delayed re-checks to catch async VPN teardown changes. + // These also refresh the OS resolver and VPN DNS routes. + p.scheduleDelayedRechecks() + + // Detect interface appearance/disappearance — hypervisors (Parallels, + // VMware, VirtualBox) reload pf when creating/destroying virtual network + // interfaces, which can corrupt pf's internal translation state. The rdr + // rules survive in text form (watchdog says "intact") but stop evaluating. + // Spawn an async monitor that probes pf interception with backoff and + // forces a full pf reload if broken. + if delta.Old != nil { + interfaceChanged := false + var changedIface string + for ifaceName := range delta.Old.Interface { + if ifaceName == "lo0" { + continue + } + if _, exists := delta.New.Interface[ifaceName]; !exists { + interfaceChanged = true + changedIface = ifaceName + break + } + } + if !interfaceChanged { + for ifaceName := range delta.New.Interface { + if ifaceName == "lo0" { + continue + } + if _, exists := delta.Old.Interface[ifaceName]; !exists { + interfaceChanged = true + changedIface = ifaceName + break + } + } + } + if interfaceChanged { + mainLog.Load().Info().Str("interface", changedIface). + Msg("DNS intercept: interface appeared/disappeared — starting interception probe monitor") + go p.pfInterceptMonitor() + } + } + } + // Refresh VPN DNS on tunnel interface changes (e.g., Tailscale connect/disconnect) + // even though the physical interface didn't change. Runs after tunnel checks + // so the pf anchor rebuild includes current VPN DNS exemptions. + if dnsIntercept && p.vpnDNS != nil { + p.vpnDNS.Refresh(true) + } return } @@ -1367,6 +1585,26 @@ func (p *prog) monitorNetworkChanges() error { if router.Name() == "" { p.handleRecovery(RecoveryReasonNetworkChange) } + + // After network changes, verify our pf anchor is still active and + // refresh VPN DNS state. Order matters: tunnel checks first (may rebuild + // anchor), then VPN DNS refresh (updates exemptions in anchor), then + // delayed re-checks for async VPN teardown. + if dnsIntercept && p.dnsInterceptState != nil { + if !p.pfStabilizing.Load() { + p.ensurePFAnchorActive() + } + // Check tunnel interfaces unconditionally — it decides internally + // whether to enter stabilization or rebuild immediately. + p.checkTunnelInterfaceChanges() + // Refresh VPN DNS routes — runs after tunnel checks so the anchor + // rebuild includes current VPN DNS exemptions. + if p.vpnDNS != nil { + p.vpnDNS.Refresh(true) + } + // Schedule delayed re-checks to catch async VPN teardown changes. + p.scheduleDelayedRechecks() + } }) mon.Start() @@ -1491,22 +1729,57 @@ func (p *prog) handleRecovery(reason RecoveryReason) { p.recoveryCancel = cancel p.recoveryCancelMu.Unlock() - // Immediately remove our DNS settings from the interface. // set recoveryRunning to true to prevent watchdogs from putting the listener back on the interface p.recoveryRunning.Store(true) - // we do not want to restore any static DNS settings - // we must try to get the DHCP values, any static DNS settings - // will be appended to nameservers from the saved interface values - p.resetDNS(false, false) - - // For an OS failure, reinitialize OS resolver nameservers immediately. - if reason == RecoveryReasonOSFailure { - mainLog.Load().Debug().Msg("OS resolver failure detected; reinitializing OS resolver nameservers") - ns := ctrld.InitializeOsResolver(true) - if len(ns) == 0 { - mainLog.Load().Warn().Msg("No nameservers found for OS resolver; using existing values") + + // In DNS intercept mode, don't tear down WFP/pf filters. + // Instead, enable recovery bypass so proxy() forwards queries to + // the OS/DHCP resolver. This handles captive portal authentication + // without the overhead of filter teardown/rebuild. + if dnsIntercept && p.dnsInterceptState != nil { + p.recoveryBypass.Store(true) + mainLog.Load().Info().Msg("DNS intercept recovery: enabling DHCP bypass (filters stay active)") + + // Reinitialize OS resolver to discover DHCP servers on the new network. + mainLog.Load().Debug().Msg("DNS intercept recovery: discovering DHCP nameservers") + dhcpServers := ctrld.InitializeOsResolver(true) + if len(dhcpServers) == 0 { + mainLog.Load().Warn().Msg("DNS intercept recovery: no DHCP nameservers found") } else { - mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns) + mainLog.Load().Info().Msgf("DNS intercept recovery: found DHCP nameservers: %v", dhcpServers) + } + + // Exempt DHCP nameservers from intercept filters so the OS resolver + // can actually reach them on port 53. + if len(dhcpServers) > 0 { + // Build exemptions without an Interface — DHCP servers are not VPN-specific, + // so they only generate group-scoped pf rules (ctrld process only). + exemptions := make([]vpnDNSExemption, 0, len(dhcpServers)) + for _, s := range dhcpServers { + host := s + if h, _, err := net.SplitHostPort(s); err == nil { + host = h + } + exemptions = append(exemptions, vpnDNSExemption{Server: host}) + } + mainLog.Load().Info().Msgf("DNS intercept recovery: exempting DHCP nameservers from filters: %v", exemptions) + if err := p.exemptVPNDNSServers(exemptions); err != nil { + mainLog.Load().Warn().Err(err).Msg("DNS intercept recovery: failed to exempt DHCP nameservers — recovery queries may fail") + } + } + } else { + // Traditional flow: remove DNS settings to expose DHCP nameservers + p.resetDNS(false, false) + + // For an OS failure, reinitialize OS resolver nameservers immediately. + if reason == RecoveryReasonOSFailure { + mainLog.Load().Debug().Msg("OS resolver failure detected; reinitializing OS resolver nameservers") + ns := ctrld.InitializeOsResolver(true) + if len(ns) == 0 { + mainLog.Load().Warn().Msg("No nameservers found for OS resolver; using existing values") + } else { + mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns) + } } } @@ -1527,22 +1800,45 @@ func (p *prog) handleRecovery(reason RecoveryReason) { // reset the upstream failure count and down state p.um.reset(recovered) - // For network changes we also reinitialize the OS resolver. - if reason == RecoveryReasonNetworkChange { - ns := ctrld.InitializeOsResolver(true) - if len(ns) == 0 { - mainLog.Load().Warn().Msg("No nameservers found for OS resolver during network-change recovery; using existing values") - } else { - mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns) + // In DNS intercept mode, just disable the bypass — filters are still active. + if dnsIntercept && p.dnsInterceptState != nil { + p.recoveryBypass.Store(false) + mainLog.Load().Info().Msg("DNS intercept recovery complete: disabling DHCP bypass, resuming normal flow") + + // Refresh VPN DNS routes in case VPN state changed during recovery. + if p.vpnDNS != nil { + p.vpnDNS.Refresh(true) + } + + // Reinitialize OS resolver for the recovered state. + if reason == RecoveryReasonNetworkChange { + ns := ctrld.InitializeOsResolver(true) + if len(ns) == 0 { + mainLog.Load().Warn().Msg("No nameservers found for OS resolver during network-change recovery; using existing values") + } else { + mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns) + } } - } - // Apply our DNS settings back and log the interface state. - p.setDNS() - p.logInterfacesState() + p.recoveryRunning.Store(false) + } else { + // For network changes we also reinitialize the OS resolver. + if reason == RecoveryReasonNetworkChange { + ns := ctrld.InitializeOsResolver(true) + if len(ns) == 0 { + mainLog.Load().Warn().Msg("No nameservers found for OS resolver during network-change recovery; using existing values") + } else { + mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns) + } + } + + // Apply our DNS settings back and log the interface state. + p.setDNS() + p.logInterfacesState() - // allow watchdogs to put the listener back on the interface if its changed for any reason - p.recoveryRunning.Store(false) + // allow watchdogs to put the listener back on the interface if its changed for any reason + p.recoveryRunning.Store(false) + } // Clear the recovery cancellation for a clean slate. p.recoveryCancelMu.Lock() diff --git a/cmd/cli/dns_proxy_test.go b/cmd/cli/dns_proxy_test.go index f909e960..7d94dbd1 100644 --- a/cmd/cli/dns_proxy_test.go +++ b/cmd/cli/dns_proxy_test.go @@ -22,15 +22,15 @@ func Test_wildcardMatches(t *testing.T) { domain string match bool }{ - {"domain - prefix parent should not match", "*.windscribe.com", "windscribe.com", false}, - {"domain - prefix", "*.windscribe.com", "anything.windscribe.com", true}, - {"domain - prefix not match other s", "*.windscribe.com", "example.com", false}, - {"domain - prefix not match s in name", "*.windscribe.com", "wwindscribe.com", false}, - {"domain - suffix", "suffix.*", "suffix.windscribe.com", true}, - {"domain - suffix not match other", "suffix.*", "suffix1.windscribe.com", false}, - {"domain - both", "suffix.*.windscribe.com", "suffix.anything.windscribe.com", true}, - {"domain - both not match", "suffix.*.windscribe.com", "suffix1.suffix.windscribe.com", false}, - {"domain - case-insensitive", "*.WINDSCRIBE.com", "anything.windscribe.com", true}, + {"domain - prefix parent should not match", "*.example.com", "example.com", false}, + {"domain - prefix", "*.example.com", "anything.example.com", true}, + {"domain - prefix not match other s", "*.example.com", "other.org", false}, + {"domain - prefix not match s in name", "*.example.com", "eexample.com", false}, + {"domain - suffix", "suffix.*", "suffix.example.com", true}, + {"domain - suffix not match other", "suffix.*", "suffix1.example.com", false}, + {"domain - both", "suffix.*.example.com", "suffix.anything.example.com", true}, + {"domain - both not match", "suffix.*.example.com", "suffix1.suffix.example.com", false}, + {"domain - case-insensitive", "*.EXAMPLE.com", "anything.example.com", true}, {"mac - prefix", "*:98:05:b4:2b", "d4:67:98:05:b4:2b", true}, {"mac - prefix not match other s", "*:98:05:b4:2b", "0d:ba:54:09:94:2c", false}, {"mac - prefix not match s in name", "*:98:05:b4:2b", "e4:67:97:05:b4:2b", false}, @@ -57,9 +57,9 @@ func Test_canonicalName(t *testing.T) { domain string canonical string }{ - {"fqdn to canonical", "windscribe.com.", "windscribe.com"}, - {"already canonical", "windscribe.com", "windscribe.com"}, - {"case insensitive", "Windscribe.Com.", "windscribe.com"}, + {"fqdn to canonical", "example.com.", "example.com"}, + {"already canonical", "example.com", "example.com"}, + {"case insensitive", "Example.Com.", "example.com"}, } for _, tc := range tests { diff --git a/cmd/cli/main.go b/cmd/cli/main.go index 07839756..972c308d 100644 --- a/cmd/cli/main.go +++ b/cmd/cli/main.go @@ -1,7 +1,9 @@ package cli import ( + "encoding/hex" "io" + "net" "os" "path/filepath" "sync/atomic" @@ -40,6 +42,9 @@ var ( cleanup bool startOnly bool rfc1918 bool + interceptMode string // "", "dns", or "hard" — set via --intercept-mode flag or config + dnsIntercept bool // derived: interceptMode == "dns" || interceptMode == "hard" + hardIntercept bool // derived: interceptMode == "hard" mainLog atomic.Pointer[zerolog.Logger] consoleWriter zerolog.ConsoleWriter @@ -59,6 +64,16 @@ func init() { } func Main() { + // Fast path for pf interception probe subprocess. This runs before cobra + // initialization to minimize startup time. The parent process spawns us with + // "pf-probe-send " and a non-_ctrld GID so pf + // intercepts the DNS query. If pf rdr is working, the query reaches ctrld's + // listener; if not, it goes to the real DNS server and ctrld detects the miss. + if len(os.Args) >= 4 && os.Args[1] == "pf-probe-send" { + pfProbeSend(os.Args[2], os.Args[3]) + return + } + ctrld.InitConfig(v, "ctrld") initCLI() if err := rootCmd.Execute(); err != nil { @@ -189,3 +204,25 @@ func initCache() { cfg.Service.CacheSize = 4096 } } + +// pfProbeSend is a minimal subprocess that sends a pre-built DNS query packet +// to the specified host on port 53. It's invoked by probePFIntercept() with a +// non-_ctrld GID so pf interception applies to the query. +// +// Usage: ctrld pf-probe-send +func pfProbeSend(host, hexPacket string) { + packet, err := hex.DecodeString(hexPacket) + if err != nil { + os.Exit(1) + } + conn, err := net.DialTimeout("udp", net.JoinHostPort(host, "53"), time.Second) + if err != nil { + os.Exit(1) + } + defer conn.Close() + conn.SetDeadline(time.Now().Add(time.Second)) + _, _ = conn.Write(packet) + // Read response (don't care about result, just need the send to happen) + buf := make([]byte, 512) + _, _ = conn.Read(buf) +} diff --git a/cmd/cli/mdnsresponder_hack_darwin.go b/cmd/cli/mdnsresponder_hack_darwin.go deleted file mode 100644 index 6687bc5c..00000000 --- a/cmd/cli/mdnsresponder_hack_darwin.go +++ /dev/null @@ -1,154 +0,0 @@ -package cli - -import ( - "bufio" - "errors" - "io" - "os" - "os/exec" - "path/filepath" - "strings" - - "tailscale.com/net/netmon" -) - -// On macOS, the system daemon mDNSResponder (used for proxy/mDNS/Bonjour discovery) -// listens on UDP and TCP port 53. That conflicts with ctrld when it needs to -// run a DNS proxy on port 53. The kernel does not allow two processes to bind -// the same address/port, so ctrld would fail with "address already in use" if we -// did nothing. -// -// If ctrld started before mDNSResponder and listened only on 127.0.0.1, mDNSResponder -// would bind port 53 on other interfaces, so system processes would use it as the -// DNS resolver instead of ctrld, leading to inconsistent behavior. -// -// This file implements a Darwin-only workaround: -// -// - We detect at startup whether mDNSResponder is using port 53 (or a -// persisted marker file exists from a previous run). -// - When the workaround is active, we force the listener to 0.0.0.0:53 and, -// before binding, run killall mDNSResponder so that ctrld can bind to port 53. -// - We use SO_REUSEPORT (see listener setup) so that the socket can be bound -// even when the port was recently used. -// - On install we create a marker file in the user's home directory so that -// the workaround is applied on subsequent starts; on uninstall we remove -// that file and bounce the en0 interface to restore normal mDNSResponder -// behavior. -// -// Without this, users on macOS would be unable to run ctrld as the system DNS -// on port 53 when mDNSResponder is active. - -var ( - - // needMdnsResponderHack determines if a system-specific workaround for mDNSResponder is necessary at runtime. - needMdnsResponderHack = mDNSResponderHack() - mDNSResponderHackFilename = ".mdnsResponderHack" -) - -// mDNSResponderHack checks if the mDNSResponder process and its environments meet specific criteria for operation. -func mDNSResponderHack() bool { - if st, err := os.Stat(mDNSResponderFile()); err == nil && st.Mode().IsRegular() { - return true - } - out, err := lsofCheckPort53() - if err != nil { - return false - } - if !isMdnsResponderListeningPort53(strings.NewReader(out)) { - return false - } - return true -} - -// mDNSResponderFile constructs and returns the absolute path to the mDNSResponder hack file in the user's home directory. -func mDNSResponderFile() string { - if d, err := userHomeDir(); err == nil && d != "" { - return filepath.Join(d, mDNSResponderHackFilename) - } - return "" -} - -// doMdnsResponderCleanup performs cleanup tasks for the mDNSResponder hack file and resets the network interface "en0". -func doMdnsResponderCleanup() { - fn := mDNSResponderFile() - if fn == "" { - return - } - if st, err := os.Stat(fn); err != nil || !st.Mode().IsRegular() { - return - } - if err := os.Remove(fn); err != nil { - mainLog.Load().Error().Err(err).Msg("failed to remove mDNSResponder hack file") - } - - ifName := "en0" - if din, err := netmon.DefaultRouteInterface(); err == nil { - ifName = din - } - if err := exec.Command("ifconfig", ifName, "down").Run(); err != nil { - mainLog.Load().Error().Err(err).Msg("failed to disable en0") - } - if err := exec.Command("ifconfig", ifName, "up").Run(); err != nil { - mainLog.Load().Error().Err(err).Msg("failed to enable en0") - } -} - -// doMdnsResponderHackPostInstall creates a hack file for mDNSResponder if required and logs debug or error messages. -func doMdnsResponderHackPostInstall() { - if !needMdnsResponderHack { - return - } - fn := mDNSResponderFile() - if fn == "" { - return - } - if f, err := os.OpenFile(fn, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0400); err != nil { - mainLog.Load().Warn().Err(err).Msgf("Could not create %s", fn) - } else { - if err := f.Close(); err != nil { - mainLog.Load().Warn().Err(err).Msgf("Could not close %s", fn) - } else { - mainLog.Load().Debug().Msgf("Created %s", fn) - } - } -} - -// killMdnsResponder attempts to terminate the mDNSResponder process by running the "killall" command multiple times. -// Logs any accumulated errors if the attempts to terminate the process fail. -func killMdnsResponder() { - numAttempts := 10 - errs := make([]error, 0, numAttempts) - for range numAttempts { - if err := exec.Command("killall", "mDNSResponder").Run(); err != nil { - // Exit code 1 means the process not found, do not log it. - if !strings.Contains(err.Error(), "exit status 1") { - errs = append(errs, err) - } - } - } - if len(errs) > 0 { - mainLog.Load().Debug().Err(errors.Join(errs...)).Msg("failed to kill mDNSResponder") - } -} - -// lsofCheckPort53 executes the lsof command to check if any process is listening on port 53 and returns the output. -func lsofCheckPort53() (string, error) { - cmd := exec.Command("lsof", "+c0", "-i:53", "-n", "-P") - out, err := cmd.CombinedOutput() - if err != nil { - return "", err - } - return string(out), nil -} - -// isMdnsResponderListeningPort53 checks if the output provided by the reader contains an mDNSResponder process. -func isMdnsResponderListeningPort53(r io.Reader) bool { - scanner := bufio.NewScanner(r) - for scanner.Scan() { - fields := strings.Fields(scanner.Text()) - if len(fields) > 0 && strings.EqualFold(fields[0], "mDNSResponder") { - return true - } - } - return false -} diff --git a/cmd/cli/mdnsresponder_hack_others.go b/cmd/cli/mdnsresponder_hack_others.go deleted file mode 100644 index 5d6ada59..00000000 --- a/cmd/cli/mdnsresponder_hack_others.go +++ /dev/null @@ -1,21 +0,0 @@ -//go:build !darwin - -package cli - -// needMdnsResponderHack determines if a system-specific workaround for mDNSResponder is necessary at runtime. -var needMdnsResponderHack = mDNSResponderHack() - -// mDNSResponderHack checks if the mDNSResponder process and its environments meet specific criteria for operation. -func mDNSResponderHack() bool { - return false -} - -// killMdnsResponder attempts to terminate the mDNSResponder process by running the "killall" command multiple times. -// Logs any accumulated errors if the attempts to terminate the process fail. -func killMdnsResponder() {} - -// doMdnsResponderCleanup performs cleanup tasks for the mDNSResponder hack file and resets the network interface "en0". -func doMdnsResponderCleanup() {} - -// doMdnsResponderHackPostInstall creates a hack file for mDNSResponder if required and logs debug or error messages. -func doMdnsResponderHackPostInstall() {} diff --git a/cmd/cli/prog.go b/cmd/cli/prog.go index c499f847..c579a801 100644 --- a/cmd/cli/prog.go +++ b/cmd/cli/prog.go @@ -131,6 +131,7 @@ type prog struct { runningIface string requiredMultiNICsConfig bool adDomain string + hasLocalDNS bool runningOnDomainController bool selfUninstallMu sync.Mutex @@ -145,6 +146,55 @@ type prog struct { recoveryCancel context.CancelFunc recoveryRunning atomic.Bool + // recoveryBypass is set when dns-intercept mode enters recovery. + // When true, proxy() forwards all queries to OS/DHCP resolver + // instead of using the normal upstream flow. + recoveryBypass atomic.Bool + + // DNS intercept mode state (platform-specific). + // On Windows: *wfpState, on macOS: *pfState, nil on other platforms. + dnsInterceptState any + + // lastTunnelIfaces tracks the set of active VPN/tunnel interfaces (utun*, ipsec*, etc.) + // discovered during the last pf anchor rule build. When the set changes (e.g., a VPN + // connects and creates utun420), we rebuild the pf anchor to add interface-specific + // intercept rules for the new interface. Protected by mu. + lastTunnelIfaces []string //lint:ignore U1000 used on darwin + + // pfStabilizing is true while we're waiting for a VPN's pf ruleset to settle. + // While true, the watchdog and network change callbacks do NOT restore our rules. + pfStabilizing atomic.Bool + + // pfStabilizeCancel cancels the active stabilization goroutine, if any. + // Protected by mu. + pfStabilizeCancel context.CancelFunc //lint:ignore U1000 used on darwin + + // pfLastRestoreTime records when we last restored our anchor (unix millis). + // Used to detect immediate re-wipes (VPN reconnect cycle). + pfLastRestoreTime atomic.Int64 //lint:ignore U1000 used on darwin + + // pfBackoffMultiplier tracks exponential backoff for stabilization. + // Resets to 0 when rules survive for >60s. + pfBackoffMultiplier atomic.Int32 //lint:ignore U1000 used on darwin + + // pfMonitorRunning ensures only one pfInterceptMonitor goroutine runs at a time. + // When an interface appears/disappears, we spawn a monitor that probes pf + // interception with exponential backoff and auto-heals if broken. + pfMonitorRunning atomic.Bool //lint:ignore U1000 used on darwin + + // pfProbeExpected holds the domain name of a pending pf interception probe. + // When non-empty, the DNS handler checks incoming queries against this value + // and signals pfProbeCh if matched. The probe verifies that pf's rdr rules + // are actually translating packets (not just present in rule text). + pfProbeExpected atomic.Value // string + + // pfProbeCh is signaled when the DNS handler receives the expected probe query. + // The channel is created by probePFIntercept() and closed when the probe arrives. + pfProbeCh atomic.Value // *chan struct{} + + // VPN DNS manager for split DNS routing when intercept mode is active. + vpnDNS *vpnDNSManager + started chan struct{} onStartedDone chan struct{} onStarted []func() @@ -328,7 +378,7 @@ func (p *prog) apiConfigReload() { req := &controld.ResolverConfigRequest{ RawUID: cdUID, Version: rootCmd.Version, - Metadata: ctrld.SystemMetadata(context.Background()), + Metadata: ctrld.SystemMetadataRuntime(context.Background()), } resolverConfig, err := controld.FetchResolverConfig(req, cdDev) selfUninstallCheck(err, p, logger) @@ -491,9 +541,13 @@ func (p *prog) run(reload bool, reloadCh chan struct{}) { } } } - if domain, err := getActiveDirectoryDomain(); err == nil && domain != "" && hasLocalDnsServerRunning() { + if domain, err := getActiveDirectoryDomain(); err == nil && domain != "" { mainLog.Load().Debug().Msgf("active directory domain: %s", domain) p.adDomain = domain + if hasLocalDnsServerRunning() { + mainLog.Load().Debug().Msg("local DNS server detected (Domain Controller)") + p.hasLocalDNS = true + } } var wg sync.WaitGroup @@ -724,6 +778,54 @@ func (p *prog) setDNS() { p.csSetDnsOk = setDnsOK }() + // Validate and resolve intercept mode. + // CLI flag (--intercept-mode) takes priority over config file. + // Valid values: "" (off), "dns" (with VPN split routing), "hard" (all DNS through ctrld). + if interceptMode != "" && !validInterceptMode(interceptMode) { + mainLog.Load().Fatal().Msgf("invalid --intercept-mode value %q: must be 'off', 'dns', or 'hard'", interceptMode) + } + if interceptMode == "" || interceptMode == "off" { + interceptMode = cfg.Service.InterceptMode + if interceptMode != "" && interceptMode != "off" { + mainLog.Load().Info().Msgf("Intercept mode enabled via config (intercept_mode = %q)", interceptMode) + } + } + + // Derive convenience bools from interceptMode. + switch interceptMode { + case "dns": + dnsIntercept = true + case "hard": + dnsIntercept = true + hardIntercept = true + } + + // DNS intercept mode: use OS-level packet interception (WFP/pf) instead of + // modifying interface DNS settings. This eliminates race conditions with VPN + // software that also manages DNS. See issue #489. + if dnsIntercept { + if err := p.startDNSIntercept(); err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept mode failed — falling back to interface DNS settings") + // Fall through to traditional setDNS behavior. + } else { + if hardIntercept { + mainLog.Load().Info().Msg("Hard intercept mode active — all DNS through ctrld, no VPN split routing") + } else { + mainLog.Load().Info().Msg("DNS intercept mode active — skipping interface DNS configuration and watchdog") + + // Initialize VPN DNS manager for split DNS routing. + // Discovers search domains from virtual/VPN interfaces and forwards + // matching queries to the DNS server on that interface. + // Skipped in --intercept-mode hard where all DNS goes through ctrld. + p.vpnDNS = newVPNDNSManager(p.exemptVPNDNSServers) + p.vpnDNS.Refresh(true) + } + + setDnsOK = true + return + } + } + if cfg.Listener == nil { return } @@ -750,7 +852,7 @@ func (p *prog) setDNS() { if needRFC1918Listeners(lc) { nameservers = append(nameservers, ctrld.Rfc1918Addresses()...) } - if needLocalIPv6Listener() { + if needLocalIPv6Listener(p.cfg.Service.InterceptMode) { nameservers = append(nameservers, "::1") } @@ -945,7 +1047,18 @@ func (p *prog) dnsWatchdog(iface *net.Interface, nameservers []string) { } // resetDNS performs a DNS reset for all interfaces. +// In DNS intercept mode, this tears down the WFP/pf filters instead. func (p *prog) resetDNS(isStart bool, restoreStatic bool) { + if dnsIntercept && p.dnsInterceptState != nil { + if err := p.stopDNSIntercept(); err != nil { + mainLog.Load().Error().Err(err).Msg("Failed to stop DNS intercept mode during reset") + } + + // Clean up VPN DNS manager + p.vpnDNS = nil + + return + } netIfaceName := "" if netIface := p.resetDNSForRunningIface(isStart, restoreStatic); netIface != nil { netIfaceName = netIface.Name diff --git a/cmd/cli/service_args_darwin.go b/cmd/cli/service_args_darwin.go new file mode 100644 index 00000000..d5889601 --- /dev/null +++ b/cmd/cli/service_args_darwin.go @@ -0,0 +1,134 @@ +//go:build darwin + +package cli + +import ( + "fmt" + "os" + "os/exec" + "strings" +) + +const launchdPlistPath = "/Library/LaunchDaemons/ctrld.plist" + +// serviceConfigFileExists returns true if the launchd plist for ctrld exists on disk. +// This is more reliable than checking launchctl status, which may report "not found" +// if the service was unloaded but the plist file still exists. +func serviceConfigFileExists() bool { + _, err := os.Stat(launchdPlistPath) + return err == nil +} + +// appendServiceFlag appends a CLI flag (e.g., "--intercept-mode") to the installed +// service's launch arguments. This is used when upgrading an existing installation +// to intercept mode without losing the existing --cd flag and other arguments. +// +// On macOS, this modifies the launchd plist at /Library/LaunchDaemons/ctrld.plist +// using the "defaults" command, which is the standard way to edit plists. +// +// The function is idempotent: if the flag already exists, it's a no-op. +func appendServiceFlag(flag string) error { + // Read current ProgramArguments from plist. + out, err := exec.Command("defaults", "read", launchdPlistPath, "ProgramArguments").CombinedOutput() + if err != nil { + return fmt.Errorf("failed to read plist ProgramArguments: %w (output: %s)", err, strings.TrimSpace(string(out))) + } + + // Check if the flag is already present (idempotent). + args := string(out) + if strings.Contains(args, flag) { + mainLog.Load().Debug().Msgf("Service flag %q already present in plist, skipping", flag) + return nil + } + + // Use PlistBuddy to append the flag to ProgramArguments array. + // PlistBuddy is more reliable than "defaults" for array manipulation. + addCmd := exec.Command( + "/usr/libexec/PlistBuddy", + "-c", fmt.Sprintf("Add :ProgramArguments: string %s", flag), + launchdPlistPath, + ) + if out, err := addCmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to append %q to plist ProgramArguments: %w (output: %s)", flag, err, strings.TrimSpace(string(out))) + } + + mainLog.Load().Info().Msgf("Appended %q to service launch arguments", flag) + return nil +} + +// verifyServiceRegistration is a no-op on macOS (launchd plist verification not needed). +func verifyServiceRegistration() error { + return nil +} + +// removeServiceFlag removes a CLI flag (and its value, if the next argument is not +// a flag) from the installed service's launch arguments. For example, removing +// "--intercept-mode" also removes the following "dns" or "hard" value argument. +// +// The function is idempotent: if the flag doesn't exist, it's a no-op. +func removeServiceFlag(flag string) error { + // Read current ProgramArguments to find the index. + out, err := exec.Command("/usr/libexec/PlistBuddy", "-c", "Print :ProgramArguments", launchdPlistPath).CombinedOutput() + if err != nil { + return fmt.Errorf("failed to read plist ProgramArguments: %w (output: %s)", err, strings.TrimSpace(string(out))) + } + + // Parse the PlistBuddy output to find the flag's index. + // PlistBuddy prints arrays as: + // Array { + // /path/to/ctrld + // run + // --cd=xxx + // --intercept-mode + // dns + // } + lines := strings.Split(string(out), "\n") + var entries []string + for _, line := range lines { + trimmed := strings.TrimSpace(line) + if trimmed == "Array {" || trimmed == "}" || trimmed == "" { + continue + } + entries = append(entries, trimmed) + } + + index := -1 + for i, entry := range entries { + if entry == flag { + index = i + break + } + } + + if index < 0 { + mainLog.Load().Debug().Msgf("Service flag %q not present in plist, skipping removal", flag) + return nil + } + + // Check if the next entry is a value (not a flag). If so, delete it first + // (deleting by index shifts subsequent entries down, so delete value before flag). + hasValue := index+1 < len(entries) && !strings.HasPrefix(entries[index+1], "-") + if hasValue { + delVal := exec.Command( + "/usr/libexec/PlistBuddy", + "-c", fmt.Sprintf("Delete :ProgramArguments:%d", index+1), + launchdPlistPath, + ) + if out, err := delVal.CombinedOutput(); err != nil { + return fmt.Errorf("failed to remove value for %q from plist: %w (output: %s)", flag, err, strings.TrimSpace(string(out))) + } + } + + // Delete the flag itself. + delCmd := exec.Command( + "/usr/libexec/PlistBuddy", + "-c", fmt.Sprintf("Delete :ProgramArguments:%d", index), + launchdPlistPath, + ) + if out, err := delCmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to remove %q from plist ProgramArguments: %w (output: %s)", flag, err, strings.TrimSpace(string(out))) + } + + mainLog.Load().Info().Msgf("Removed %q from service launch arguments", flag) + return nil +} diff --git a/cmd/cli/service_args_others.go b/cmd/cli/service_args_others.go new file mode 100644 index 00000000..07edda21 --- /dev/null +++ b/cmd/cli/service_args_others.go @@ -0,0 +1,38 @@ +//go:build !darwin && !windows + +package cli + +import ( + "fmt" + "os" +) + +// serviceConfigFileExists checks common service config file locations on Linux. +func serviceConfigFileExists() bool { + // systemd unit file + if _, err := os.Stat("/etc/systemd/system/ctrld.service"); err == nil { + return true + } + // SysV init script + if _, err := os.Stat("/etc/init.d/ctrld"); err == nil { + return true + } + return false +} + +// appendServiceFlag is not yet implemented on this platform. +// Linux services (systemd) store args in unit files; intercept mode +// should be set via the config file (intercept_mode) on these platforms. +func appendServiceFlag(flag string) error { + return fmt.Errorf("appending service flags is not supported on this platform; use intercept_mode in config instead") +} + +// verifyServiceRegistration is a no-op on this platform. +func verifyServiceRegistration() error { + return nil +} + +// removeServiceFlag is not yet implemented on this platform. +func removeServiceFlag(flag string) error { + return fmt.Errorf("removing service flags is not supported on this platform; use intercept_mode in config instead") +} diff --git a/cmd/cli/service_args_windows.go b/cmd/cli/service_args_windows.go new file mode 100644 index 00000000..246a009e --- /dev/null +++ b/cmd/cli/service_args_windows.go @@ -0,0 +1,153 @@ +//go:build windows + +package cli + +import ( + "fmt" + "strings" + + "golang.org/x/sys/windows/svc/mgr" +) + +// serviceConfigFileExists returns true if the ctrld Windows service is registered. +func serviceConfigFileExists() bool { + m, err := mgr.Connect() + if err != nil { + return false + } + defer m.Disconnect() + s, err := m.OpenService(ctrldServiceName) + if err != nil { + return false + } + s.Close() + return true +} + +// appendServiceFlag appends a CLI flag (e.g., "--intercept-mode") to the installed +// Windows service's BinPath arguments. This is used when upgrading an existing +// installation to intercept mode without losing the existing --cd flag. +// +// The function is idempotent: if the flag already exists, it's a no-op. +func appendServiceFlag(flag string) error { + m, err := mgr.Connect() + if err != nil { + return fmt.Errorf("failed to connect to Windows SCM: %w", err) + } + defer m.Disconnect() + + s, err := m.OpenService(ctrldServiceName) + if err != nil { + return fmt.Errorf("failed to open service %q: %w", ctrldServiceName, err) + } + defer s.Close() + + config, err := s.Config() + if err != nil { + return fmt.Errorf("failed to read service config: %w", err) + } + + // Check if flag already present (idempotent). + if strings.Contains(config.BinaryPathName, flag) { + mainLog.Load().Debug().Msgf("Service flag %q already present in BinPath, skipping", flag) + return nil + } + + // Append the flag to BinPath. + config.BinaryPathName = strings.TrimSpace(config.BinaryPathName) + " " + flag + + if err := s.UpdateConfig(config); err != nil { + return fmt.Errorf("failed to update service config with %q: %w", flag, err) + } + + mainLog.Load().Info().Msgf("Appended %q to service BinPath", flag) + return nil +} + +// verifyServiceRegistration opens the Windows Service Control Manager and verifies +// that the ctrld service is correctly registered: logs the BinaryPathName, checks +// that --intercept-mode is present if expected, and verifies SERVICE_AUTO_START. +func verifyServiceRegistration() error { + m, err := mgr.Connect() + if err != nil { + return fmt.Errorf("failed to connect to Windows SCM: %w", err) + } + defer m.Disconnect() + + s, err := m.OpenService(ctrldServiceName) + if err != nil { + return fmt.Errorf("failed to open service %q: %w", ctrldServiceName, err) + } + defer s.Close() + + config, err := s.Config() + if err != nil { + return fmt.Errorf("failed to read service config: %w", err) + } + + mainLog.Load().Debug().Msgf("Service registry: BinaryPathName = %q", config.BinaryPathName) + + // If intercept mode is set, verify the flag is present in BinPath. + if interceptMode == "dns" || interceptMode == "hard" { + if !strings.Contains(config.BinaryPathName, "--intercept-mode") { + return fmt.Errorf("service registry: --intercept-mode flag missing from BinaryPathName (expected mode %q)", interceptMode) + } + mainLog.Load().Debug().Msgf("Service registry: --intercept-mode flag present in BinaryPathName") + } + + // Verify auto-start. mgr.StartAutomatic == 2 == SERVICE_AUTO_START. + if config.StartType != mgr.StartAutomatic { + return fmt.Errorf("service registry: StartType is %d, expected SERVICE_AUTO_START (%d)", config.StartType, mgr.StartAutomatic) + } + + return nil +} + +// removeServiceFlag removes a CLI flag (and its value, if present) from the installed +// Windows service's BinPath. For example, removing "--intercept-mode" also removes +// the following "dns" or "hard" value. The function is idempotent. +func removeServiceFlag(flag string) error { + m, err := mgr.Connect() + if err != nil { + return fmt.Errorf("failed to connect to Windows SCM: %w", err) + } + defer m.Disconnect() + + s, err := m.OpenService(ctrldServiceName) + if err != nil { + return fmt.Errorf("failed to open service %q: %w", ctrldServiceName, err) + } + defer s.Close() + + config, err := s.Config() + if err != nil { + return fmt.Errorf("failed to read service config: %w", err) + } + + if !strings.Contains(config.BinaryPathName, flag) { + mainLog.Load().Debug().Msgf("Service flag %q not present in BinPath, skipping removal", flag) + return nil + } + + // Split BinPath into parts, find and remove the flag + its value (if any). + parts := strings.Fields(config.BinaryPathName) + var newParts []string + for i := 0; i < len(parts); i++ { + if parts[i] == flag { + // Skip the flag. Also skip the next part if it's a value (not a flag). + if i+1 < len(parts) && !strings.HasPrefix(parts[i+1], "-") { + i++ // skip value too + } + continue + } + newParts = append(newParts, parts[i]) + } + config.BinaryPathName = strings.Join(newParts, " ") + + if err := s.UpdateConfig(config); err != nil { + return fmt.Errorf("failed to update service config: %w", err) + } + + mainLog.Load().Info().Msgf("Removed %q from service BinPath", flag) + return nil +} diff --git a/config.go b/config.go index bdfa389e..edba183d 100644 --- a/config.go +++ b/config.go @@ -240,6 +240,7 @@ type ServiceConfig struct { RefetchTime *int `mapstructure:"refetch_time" toml:"refetch_time,omitempty"` ForceRefetchWaitTime *int `mapstructure:"force_refetch_wait_time" toml:"force_refetch_wait_time,omitempty"` LeakOnUpstreamFailure *bool `mapstructure:"leak_on_upstream_failure" toml:"leak_on_upstream_failure,omitempty"` + InterceptMode string `mapstructure:"intercept_mode" toml:"intercept_mode,omitempty" validate:"omitempty,oneof=off dns hard"` Daemon bool `mapstructure:"-" toml:"-"` AllocateIP bool `mapstructure:"-" toml:"-"` } @@ -511,6 +512,69 @@ func (uc *UpstreamConfig) ReBootstrap() { }) } +// ForceReBootstrap immediately replaces the upstream transport, closing old +// connections and creating new ones synchronously. Unlike ReBootstrap() which +// sets a lazy flag (new transport created on next query), this ensures the +// transport is ready before any queries arrive. Use when external events +// (e.g. firewall state flush) are known to have killed existing connections. +func (uc *UpstreamConfig) ForceReBootstrap() { + switch uc.Type { + case ResolverTypeDOH, ResolverTypeDOH3, ResolverTypeDOQ, ResolverTypeDOT: + default: + return + } + ProxyLogger.Load().Debug().Msgf("force re-bootstrapping upstream transport for %v", uc) + uc.SetupTransport() + // Clear any pending lazy re-bootstrap flag so ensureSetupTransport() + // doesn't redundantly recreate the transport we just built. + uc.rebootstrap.Store(rebootstrapNotStarted) +} + +// closeTransports closes idle connections on all existing transports. +// This is called before creating new transports during re-bootstrap to +// force in-flight requests on stale connections to fail quickly, rather +// than waiting for the full context deadline (e.g. 5s) after a firewall +// state table flush kills the underlying TCP/QUIC connections. +func (uc *UpstreamConfig) closeTransports() { + if t := uc.transport; t != nil { + t.CloseIdleConnections() + } + if t := uc.transport4; t != nil { + t.CloseIdleConnections() + } + if t := uc.transport6; t != nil { + t.CloseIdleConnections() + } + if p := uc.doqConnPool; p != nil { + p.CloseIdleConnections() + } + if p := uc.doqConnPool4; p != nil { + p.CloseIdleConnections() + } + if p := uc.doqConnPool6; p != nil { + p.CloseIdleConnections() + } + if p := uc.dotClientPool; p != nil { + p.CloseIdleConnections() + } + if p := uc.dotClientPool4; p != nil { + p.CloseIdleConnections() + } + if p := uc.dotClientPool6; p != nil { + p.CloseIdleConnections() + } + // http3RoundTripper is stored as http.RoundTripper but the concrete type + // (*http3.Transport) exposes CloseIdleConnections via this interface. + type idleCloser interface { + CloseIdleConnections() + } + for _, rt := range []http.RoundTripper{uc.http3RoundTripper, uc.http3RoundTripper4, uc.http3RoundTripper6} { + if c, ok := rt.(idleCloser); ok { + c.CloseIdleConnections() + } + } +} + // SetupTransport initializes the network transport used to connect to upstream servers. // For now, DoH/DoH3/DoQ/DoT upstreams are supported. func (uc *UpstreamConfig) SetupTransport() { @@ -519,6 +583,13 @@ func (uc *UpstreamConfig) SetupTransport() { default: return } + + // Close existing transport connections before creating new ones. + // This forces in-flight requests on stale connections (e.g. after a + // firewall state table flush) to fail fast instead of waiting for + // the full context deadline timeout. + uc.closeTransports() + ips := uc.bootstrapIPs switch uc.IPStack { case IpStackV4: diff --git a/config_internal_test.go b/config_internal_test.go index ca2b381a..d470a142 100644 --- a/config_internal_test.go +++ b/config_internal_test.go @@ -541,10 +541,12 @@ func TestRebootstrapRace(t *testing.T) { <-started var wg sync.WaitGroup + wg.Add(goroutines) for range goroutines { - wg.Go(func() { + go func() { + defer wg.Done() uc.ensureSetupTransport() - }) + }() } wg.Wait() diff --git a/docs/dns-intercept-mode.md b/docs/dns-intercept-mode.md new file mode 100644 index 00000000..41dae1f2 --- /dev/null +++ b/docs/dns-intercept-mode.md @@ -0,0 +1,551 @@ +# DNS Intercept Mode + +## Overview + +DNS intercept mode is an alternative approach to DNS management that uses OS-level packet interception instead of modifying network interface DNS settings. This eliminates race conditions with VPN software, endpoint security tools, and other programs that also manage DNS. + +## The Problem + +By default, ctrld sets DNS to `127.0.0.1` on network interfaces so all queries go through ctrld's local listener. However, VPN software (F5 BIG-IP, Cisco AnyConnect, Palo Alto GlobalProtect, etc.) also overwrites interface DNS settings, creating conflicts: + +1. **DNS Setting War**: ctrld sets DNS to `127.0.0.1`, VPN overwrites to its DNS servers, ctrld's watchdog detects the change and restores `127.0.0.1`, VPN overwrites again — infinitely. + +2. **Bypass Window**: During the watchdog polling interval (up to 20 seconds), DNS queries may go to the VPN's DNS servers, bypassing ctrld's filtering profiles (malware blocking, content filtering, etc.). + +3. **Resolution Failures**: During the brief moments when DNS is being rewritten, queries may fail entirely, causing intermittent connectivity loss. + +## The Solution + +DNS intercept mode works at a lower level than interface settings: + +- **Windows**: Uses NRPT (Name Resolution Policy Table) to route all DNS queries to `127.0.0.1` (ctrld's listener) via the Windows DNS Client service. In `hard` mode, additionally uses WFP (Windows Filtering Platform) to block all outbound DNS (port 53) except to localhost and private ranges, preventing any bypass. VPN software can set interface DNS freely — NRPT's most-specific-match ensures VPN-specific domains still resolve correctly while ctrld handles everything else. + +- **macOS**: Uses pf (packet filter) to redirect all outbound DNS (port 53) traffic to ctrld's listener at `127.0.0.1:53`. Any DNS query, regardless of which DNS server the OS thinks it's using, gets transparently redirected to ctrld. + +## Usage + +```bash +# Start ctrld with DNS intercept mode (auto-detects VPN search domains) +ctrld start --intercept-mode dns --cd + +# Hard intercept: all DNS through ctrld, no VPN split routing +ctrld start --intercept-mode hard --cd + +# Or with a config file +ctrld start --intercept-mode dns -c /path/to/ctrld.toml + +# Run in foreground (debug) +ctrld run --intercept-mode dns --cd +ctrld run --intercept-mode hard --cd +``` + +### Intercept Modes + +| Flag | DNS Interception | VPN Split Routing | Captive Portal Recovery | +|------|-----------------|-------------------|------------------------| +| `--intercept-mode dns` | ✅ WFP/pf | ✅ Auto-detect & forward | ✅ Active | +| `--intercept-mode hard` | ✅ WFP/pf | ❌ All through ctrld | ✅ Active | + +**`--intercept-mode dns`** (recommended): Intercepts all DNS via WFP/pf, but automatically discovers search domains from VPN and virtual network adapters (Tailscale, F5, Cisco AnyConnect, etc.) and forwards matching queries to the DNS server on that interface. This allows VPN internal resources (e.g., `*.corp.local`) to resolve correctly while ctrld handles everything else. + +**`--intercept-mode hard`**: Same OS-level interception, but does NOT forward any queries to VPN DNS servers. Every DNS query goes through ctrld's configured upstreams. Use this when you want total DNS control and don't need VPN internal domain resolution. Captive portal recovery still works — network authentication pages are handled automatically. + +## How It Works + +### Windows (NRPT + WFP) + +Windows DNS intercept uses a two-tier architecture with mode-dependent enforcement: + +- **`dns` mode**: NRPT only — graceful DNS routing through the Windows DNS Client service. At worst, a VPN overwrites NRPT and queries bypass ctrld temporarily. DNS never breaks. +- **`hard` mode**: NRPT + WFP — same NRPT routing, plus WFP kernel-level block filters that prevent any outbound DNS bypass. Equivalent enforcement to macOS pf. + +#### Why This Design? + +WFP can only **block** or **permit** connections — it **cannot redirect** them (redirection requires kernel-mode callout drivers). Without NRPT, WFP blocks outbound DNS but doesn't tell applications where to send queries instead — they see DNS failures. NRPT provides the "positive routing" while WFP provides enforcement. + +Separating them into modes means most users get `dns` mode (safe, can never break DNS) while high-security deployments use `hard` mode (full enforcement, same guarantees as macOS pf). + +#### Startup Sequence (dns mode) + +1. Creates NRPT catch-all registry rule (`.` → `127.0.0.1`) under `HKLM\...\DnsPolicyConfig\CtrldCatchAll` +2. Triggers Group Policy refresh via `RefreshPolicyEx` (userenv.dll) so DNS Client loads NRPT immediately +3. Flushes DNS cache to clear stale entries +4. Starts NRPT health monitor (30s periodic check) +5. Launches async NRPT probe-and-heal to verify NRPT is actually routing queries + +#### Startup Sequence (hard mode) + +1. Creates NRPT catch-all rule + GP refresh + DNS flush (same as dns mode) +2. Opens WFP engine with `RPC_C_AUTHN_DEFAULT` (0xFFFFFFFF) +3. Cleans up any stale sublayer from a previous unclean shutdown +4. Creates sublayer with maximum weight (0xFFFF) +5. Adds **permit** filters (weight 10) for DNS to localhost (`127.0.0.1`/`::1` port 53) +6. Adds **permit** filters (weight 10) for DNS to RFC1918 + CGNAT subnets (10/8, 172.16/12, 192.168/16, 100.64/10) +7. Adds **block** filters (weight 1) for all other outbound DNS (port 53 UDP+TCP) +8. Starts NRPT health monitor (also verifies WFP sublayer in hard mode) +9. Launches async NRPT probe-and-heal + +**Atomic guarantee:** NRPT must succeed before WFP starts. If NRPT fails, WFP is not attempted. If WFP fails, NRPT is rolled back. This prevents DNS blackholes where WFP blocks everything but nothing routes to ctrld. + +On shutdown: stops health monitor, removes NRPT rule, flushes DNS, then (hard mode only) removes all WFP filters and closes engine. + +#### NRPT Details + +The **Name Resolution Policy Table** is a Windows feature (originally for DirectAccess) that tells the DNS Client service to route queries matching specific namespace patterns to specific DNS servers. ctrld adds a catch-all rule: + +| Registry Value | Type | Value | Purpose | +|---|---|---|---| +| `Name` | REG_MULTI_SZ | `.` | Namespace pattern (`.` = catch-all, matches everything) | +| `GenericDNSServers` | REG_SZ | `127.0.0.1` | DNS server to use for matching queries | +| `ConfigOptions` | REG_DWORD | `0x8` | Standard DNS resolution (no DirectAccess) | +| `Version` | REG_DWORD | `0x2` | NRPT rule version 2 | + +**Registry path**: `HKLM\SOFTWARE\Policies\Microsoft\Windows NT\DNSClient\DnsPolicyConfig\CtrldCatchAll` + +**Group Policy refresh**: The DNS Client service only reads NRPT from registry during Group Policy processing cycles (default: every 90 minutes). ctrld calls `RefreshPolicyEx(bMachine=TRUE, dwOptions=RP_FORCE)` from `userenv.dll` to trigger an immediate refresh. Falls back to `gpupdate /target:computer /force` if the DLL call fails. + +#### WFP Filter Architecture + +**Filter priority**: Permit filters have weight 10, block filters have weight 1. WFP evaluates higher-weight filters first, so localhost and private-range DNS is always permitted. + +**RFC1918 + CGNAT permits**: Static subnet permit filters allow DNS to private IP ranges (10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 100.64.0.0/10). This means VPN DNS servers on private IPs (Tailscale MagicDNS on 100.100.100.100, corporate VPN DNS on 10.x.x.x, etc.) work without needing dynamic per-server exemptions. + +**VPN coexistence**: VPN software can set DNS to whatever it wants on the interface — for public IPs, the WFP block filter prevents those servers from being reached on port 53. For private IPs, the subnet permits allow it. ctrld handles all DNS routing through NRPT and can forward VPN-specific domains to VPN DNS servers through its own upstream mechanism. + +#### NRPT Probe and Auto-Heal + +`RefreshPolicyEx` returns immediately — it does NOT wait for the DNS Client service to actually load the NRPT rule. On cold machines (first boot, fresh install), the DNS Client may take several seconds to process the policy refresh. During this window, the NRPT rule exists in the registry but isn't active. + +ctrld verifies NRPT is actually working by sending a probe DNS query (`_nrpt-probe-.nrpt-probe.ctrld.test`) through Go's `net.Resolver` (which calls `GetAddrInfoW` → DNS Client → NRPT path). If ctrld receives the probe on its listener, NRPT is active. + +**Startup probe (async, non-blocking):** After NRPT setup, an async goroutine probes with escalating remediation: (1) immediate probe, (2) GP refresh + retry, (3) DNS Client service restart + retry, (4) final retry. Only one probe sequence runs at a time. + +**DNS Client restart (nuclear option):** If GP refresh alone isn't enough, ctrld restarts the `Dnscache` service to force full NRPT re-initialization. This briefly interrupts all DNS (~100ms) but only fires when NRPT is already not working. + +#### NRPT Health Monitor + +A dedicated background goroutine (`nrptHealthMonitor`) runs every 30 seconds and now performs active probing: + +1. **Registry check:** If the NRPT catch-all rule is missing from the registry, restore it + GP refresh + probe-and-heal +2. **Active probe:** If the rule exists, send a probe query to verify it's actually routing — catches cases where the registry key is present but DNS Client hasn't loaded it +3. **(hard mode)** Verify WFP sublayer exists; full restart on loss + +This is periodic (not just network-event-driven) because VPN software can clear NRPT at any time. Additionally, `scheduleDelayedRechecks()` (called on network change events) performs immediate NRPT verification at 2s and 4s after changes. + +#### Known Caveats + +- **`nslookup` bypasses NRPT**: `nslookup.exe` uses its own DNS resolver implementation and does NOT go through the Windows DNS Client service, so it ignores NRPT rules entirely. Use `Resolve-DnsName` (PowerShell) or `ping` to verify DNS resolution through NRPT. This is a well-known Windows behavior, not a ctrld bug. +- **`RPC_C_AUTHN_DEFAULT`**: `FwpmEngineOpen0` requires `RPC_C_AUTHN_DEFAULT` (0xFFFFFFFF) for the authentication service parameter. Using `RPC_C_AUTHN_NONE` (0) returns `ERROR_NOT_SUPPORTED` on some configurations (e.g., Parallels VMs). +- **FWP_DATA_TYPE enum**: The `FWP_DATA_TYPE` enum starts at `FWP_EMPTY=0`, making `FWP_UINT8=1`, `FWP_UINT16=2`, etc. Some documentation examples incorrectly start at 0. + +### macOS (pf) + +1. ctrld writes a pf anchor file at `/etc/pf.anchors/com.controld.ctrld` +2. Adds the anchor reference to `/etc/pf.conf` (if not present) +3. Loads the anchor with `pfctl -a com.controld.ctrld -f ` +4. Enables pf with `pfctl -e` (if not already enabled) +5. The anchor redirects all outbound DNS (port 53) on non-loopback interfaces to `127.0.0.1:53` +6. On shutdown, the anchor is flushed, the file removed, and references cleaned from `pf.conf` + +**ctrld's own traffic**: ctrld's upstream queries use DoH (HTTPS on port 443), not plain DNS on port 53, so the pf redirect does not create a loop for DoH upstreams. **Warning:** If an "os" upstream is configured (which uses plain DNS on port 53 to external servers), the pf redirect will capture ctrld's own outbound queries and create a loop. ctrld will log a warning at startup if this is detected. Use DoH upstreams when DNS intercept mode is active. + +## What Changes vs Default Mode + +| Behavior | Default Mode | DNS Intercept Mode | +|----------|-------------|-------------------| +| Interface DNS settings | Set to `127.0.0.1` | **Not modified** | +| DNS watchdog | Active (polls every 20s) | **Disabled** | +| VPN DNS conflict | Race condition possible | **Eliminated** | +| Profile bypass window | Up to 20 seconds | **Zero** | +| Requires admin/root | Yes | Yes | +| Additional OS requirements | None | WFP (Windows), pf (macOS) | + +## Logging + +DNS intercept mode produces detailed logs for troubleshooting: + +``` +DNS intercept: initializing Windows Filtering Platform (WFP) +DNS intercept: WFP engine opened (handle: 0x1a2b3c) +DNS intercept: WFP sublayer created (weight: 0xFFFF — maximum priority) +DNS intercept: added permit filter "Permit DNS to localhost (IPv4/UDP)" (ID: 12345) +DNS intercept: added block filter "Block outbound DNS (IPv4/UDP)" (ID: 12349) +DNS intercept: WFP filters active — all outbound DNS (port 53) blocked except to localhost +``` + +On macOS: +``` +DNS intercept: initializing macOS packet filter (pf) redirect +DNS intercept: wrote pf anchor file: /etc/pf.anchors/com.controld.ctrld +DNS intercept: loaded pf anchor "com.controld.ctrld" +DNS intercept: pf anchor "com.controld.ctrld" active with 3 rules +DNS intercept: pf redirect active — all outbound DNS (port 53) redirected to 127.0.0.1:53 +``` + +## Troubleshooting + +### Windows + +```powershell +# Check NRPT rules (should show CtrldCatchAll with . → 127.0.0.1) +Get-DnsClientNrptRule + +# Check NRPT registry directly +Get-ChildItem "HKLM:\SOFTWARE\Policies\Microsoft\Windows NT\DNSClient\DnsPolicyConfig" + +# Force Group Policy refresh (if NRPT not taking effect) +gpupdate /target:computer /force + +# Check if WFP filters are active +netsh wfp show filters + +# Check ctrld's specific filters (look for "ctrld" in output) +netsh wfp show filters | Select-String "ctrld" + +# Test DNS resolution (use Resolve-DnsName, NOT nslookup!) +# nslookup bypasses DNS Client / NRPT — it will NOT reflect NRPT routing +Resolve-DnsName example.com +ping example.com + +# If you must use nslookup, specify localhost explicitly: +nslookup example.com 127.0.0.1 +``` + +### macOS + +```bash +# Check if pf is enabled +sudo pfctl -si + +# Check ctrld's anchor rules +sudo pfctl -a com.controld.ctrld -sr +sudo pfctl -a com.controld.ctrld -sn + +# Check pf.conf for anchor reference +cat /etc/pf.conf | grep ctrld + +# Test DNS is going through ctrld +dig @127.0.0.1 example.com +``` + +## Limitations + +- **Linux**: Not supported. Linux uses `systemd-resolved` or `/etc/resolv.conf` which don't have the same VPN conflict issues. If needed in the future, `iptables`/`nftables` REDIRECT could be used. + +- **Split DNS for VPN internal domains**: In `--intercept-mode dns` mode, VPN search domains are auto-detected from virtual network adapters and forwarded to the VPN's DNS servers automatically. In `--intercept-mode hard` mode, VPN internal domains (e.g., `*.corp.local`) will NOT resolve unless configured as explicit upstream rules in ctrld's configuration. + +- **macOS mDNSResponder interaction**: On macOS, ctrld uses a workaround ("mDNSResponder hack") that binds to `0.0.0.0:53` instead of `127.0.0.1:53` and refuses queries from non-localhost sources. In dns-intercept mode, pf's `rdr` rewrites the destination IP to `127.0.0.1:53` but preserves the original source IP (e.g., `192.168.2.73`). The mDNSResponder source-IP check is automatically bypassed in dns-intercept mode because the pf/WFP rules already ensure only legitimate intercepted DNS traffic reaches ctrld's listener. + +- **Other WFP/pf users**: If other software (VPN, firewall, endpoint security) also uses WFP or pf for DNS interception, there may be priority conflicts. ctrld uses maximum sublayer weight on Windows and a named anchor on macOS to minimize this risk. See "VPN App Coexistence" below for macOS-specific defenses. + +## VPN App Coexistence (macOS) + +VPN apps (Windscribe, Cisco AnyConnect, F5 BIG-IP, etc.) often manage pf rules themselves, which can interfere with ctrld's DNS intercept. ctrld uses a multi-layered defense strategy: + +### 1. Anchor Priority Enforcement + +When injecting our anchor reference into the running pf ruleset, ctrld **prepends** both the `rdr-anchor` and `anchor` references before all other anchors. pf evaluates rules top-to-bottom, so our DNS intercept `quick` rules match port 53 traffic before a VPN app's broader rules in their own anchor. + +### 2. Interface-Specific Tunnel Rules + +VPN apps commonly add rules like `pass out quick on ipsec0 inet all` that match ALL traffic on the VPN interface. If their anchor is evaluated before ours (e.g., after a ruleset reload), these broad rules capture DNS. ctrld counters this by adding explicit DNS intercept rules for each active tunnel interface (ipsec*, utun*, ppp*, tap*, tun*). These interface-specific rules match port 53 only, so they take priority over the VPN app's broader "all" match even within the same anchor evaluation pass. + +### 3. Dynamic Tunnel Interface Detection + +The network change monitor (`validInterfacesMap()`) only tracks physical hardware ports (en0, bridge0, etc.) — it doesn't see tunnel interfaces (utun*, ipsec*, etc.) created by VPN software. When a VPN connects and creates a new interface (e.g., utun420 for WireGuard), ctrld detects this through a separate tunnel interface change check and rebuilds the pf anchor to include explicit intercept rules for the new interface. This runs on every network change event, even if no physical interface changed. + +### 4. pf Watchdog + Network Change Hooks + +A background watchdog (30s interval) plus immediate checks on network change events detect when another program replaces the entire pf ruleset (e.g., Windscribe's `pfctl -f /etc/pf.conf`). When detected, ctrld rebuilds its anchor with up-to-date tunnel interface rules and re-injects the anchor reference at the top of the ruleset. A 2-second delayed re-check catches race conditions where the other program clears rules slightly after the network event. + +### 4a. Active Interception Probe (pf Translation State Corruption) + +Programs like Parallels Desktop reload `/etc/pf.conf` when creating/destroying virtual network interfaces (bridge100, vmenet0). This can corrupt pf's internal translation engine — rdr rules survive in text form but stop evaluating, causing DNS interception to silently fail while the watchdog reports "intact." + +ctrld detects interface appearance/disappearance and spawns an async probe monitor: + +1. **Probe mechanism:** A subprocess runs with GID=0 (wheel, not `_ctrld`) and sends a DNS query to the OS resolver. If pf interception is working, the query gets redirected to ctrld (127.0.0.1:53) and is detected in the DNS handler. If broken, it times out after 1s. +2. **Backoff schedule:** Probes at 0, 0.5, 1, 2, 4 seconds (~8s window) to win the race against async pf reloads by the hypervisor. Only one monitor runs at a time (atomic singleton). +3. **Auto-heal:** On probe failure, `forceReloadPFMainRuleset()` dumps the running ruleset and pipes it back through `pfctl -f -`, resetting pf's translation engine. VPN-safe because it reassembles from the current running state. +4. **Watchdog integration:** The 30s watchdog also runs the probe when rule text checks pass, as a safety net for unknown corruption causes. + +This approach detects **actual broken DNS** rather than guessing from trigger events, making it robust against future unknown corruption scenarios. + +### 5. Proactive DoH Connection Pool Reset + +When the watchdog detects a pf ruleset replacement, it force-rebootstraps all upstream transports via `ForceReBootstrap()`. This is necessary because `pfctl -f` flushes the entire pf state table, which kills existing TCP connections (including ctrld's DoH connections to upstream DNS servers like 76.76.2.22:443). + +The force-rebootstrap does two things that the lazy `ReBootstrap()` cannot: +1. **Closes idle connections on the old transport** (`CloseIdleConnections()`), causing in-flight HTTP/2 requests on dead connections to fail immediately instead of waiting for the 5s context deadline +2. **Creates the new transport synchronously**, so it's ready before any DNS queries arrive post-wipe + +Without this, Go's `http.Transport` keeps trying dead connections until each request's context deadline expires (~5s), then the lazy rebootstrap creates a new transport for the *next* request. With force-rebootstrap, the blackout is reduced from ~5s to ~100ms (one fresh TLS handshake). + +### 6. Blanket Process Exemption (group _ctrld) + +ctrld creates a macOS system group (`_ctrld`) and sets its effective GID at startup via `syscall.Setegid()`. The pf anchor includes a blanket rule: + +``` +pass out quick group _ctrld +``` + +This exempts **all** outbound traffic from the ctrld process — not just DNS (port 53), but also DoH (TCP 443), DoT (TCP 853), health checks, and any other connections. This is essential because VPN firewalls like Windscribe load `block drop all` rulesets that would otherwise block ctrld's upstream connections even after the pf anchor is restored. + +Because ctrld's anchor is prepended before all other anchors, and this rule uses `quick`, it evaluates before any VPN firewall rules. The result: ctrld's traffic is never blocked regardless of what other pf rulesets are loaded. + +The per-IP exemptions (OS resolver, VPN DNS) remain as defense-in-depth for the DNS redirect loop prevention — the blanket rule handles everything else. + +### 7. Loopback Outbound Pass Rule + +When `route-to lo0` redirects a DNS packet to loopback, pf re-evaluates the packet **outbound on lo0**. None of the existing route-to rules match on lo0 (they're all `on ! lo0` or `on utunX`), so without an explicit pass rule, the packet falls through to the main ruleset where VPN firewalls' `block drop all` drops it — before it ever reaches the inbound rdr rule. + +``` +pass out quick on lo0 inet proto udp from any to ! 127.0.0.1 port 53 +pass out quick on lo0 inet proto tcp from any to ! 127.0.0.1 port 53 +``` + +This bridges the route-to → rdr gap: route-to sends outbound on lo0 → this rule passes it → loopback reflects it inbound → rdr rewrites destination to 127.0.0.1:53 → ctrld receives the query. Without this rule, DNS intercept fails whenever a `block drop all` firewall (Windscribe, etc.) is active. + +### 8. Response Routing via `reply-to lo0` + +After rdr redirects DNS to 127.0.0.1:53, ctrld responds to the original client source IP (e.g., 100.94.163.168 — a VPN tunnel IP). Without intervention, the kernel routes this response through the VPN tunnel interface (utun420) based on its routing table, and the response is lost. + +``` +pass in quick on lo0 reply-to lo0 inet proto { udp, tcp } from any to 127.0.0.1 port 53 +``` + +`reply-to lo0` tells pf to force response packets for this connection back through lo0, overriding the kernel routing table. The response stays local, rdr reverse NAT rewrites the source from 127.0.0.1 back to the original DNS server IP (e.g., 10.255.255.3), and the client process receives a correctly-addressed response. + +### 9. VPN DNS Split Routing and Exit Mode Detection + +When a VPN like Tailscale MagicDNS is active, two distinct modes require different pf handling: + +#### The Problem: DNS Proxy Loop + +VPN DNS handlers like Tailscale's MagicDNS run as macOS Network Extensions. MagicDNS +listens on 100.100.100.100 and forwards queries to internal upstream nameservers +(e.g., 10.0.0.11, 10.0.0.12) via the VPN tunnel interface (utun13). + +Without special handling, pf's generic `pass out quick on ! lo0 route-to lo0` rule +intercepts MagicDNS's upstream queries on the tunnel interface, routing them back +to ctrld → which matches VPN DNS split routing → forwards to MagicDNS → loop: + +``` +┌──────────────────────────────────────────────────────────────────────┐ +│ THE LOOP (without passthrough rules) │ +│ │ +│ 1. dig vpn-internal.example.com │ +│ → pf intercepts → route-to lo0 → rdr → ctrld (127.0.0.1:53) │ +│ │ +│ 2. ctrld: VPN DNS match → forward to 100.100.100.100:53 │ +│ → group _ctrld exempts → reaches MagicDNS │ +│ │ +│ 3. MagicDNS: forward to upstream 10.0.0.11:53 via utun13 │ +│ → pf generic rule matches (utun13 ≠ lo0, 10.0.0.11 ≠ skip) │ +│ → route-to lo0 → rdr → back to ctrld ← LOOP! │ +└──────────────────────────────────────────────────────────────────────┘ +``` + +#### The Fix: Interface Passthrough + Exit Mode Detection + +**Split DNS mode** (VPN handles only specific domains): + +ctrld adds passthrough rules for VPN DNS interfaces that let MagicDNS's upstream +queries flow without interception. A `` table contains the VPN DNS server +IPs (e.g., 100.100.100.100) — traffic TO those IPs is NOT passed through (still +intercepted by pf → ctrld enforces profile): + +``` +table { 100.100.100.100 } + +# MagicDNS upstream queries (to 10.0.0.11 etc.) — pass through +pass out quick on utun13 inet proto udp from any to ! port 53 +pass out quick on utun13 inet proto tcp from any to ! port 53 + +# Queries TO MagicDNS (100.100.100.100) — not matched above, +# falls through to generic rule → intercepted → ctrld → profile enforced +``` + +``` +┌──────────────────────────────────────────────────────────────────────┐ +│ SPLIT DNS MODE (with passthrough rules) │ +│ │ +│ Non-VPN domain (popads.net): │ +│ dig popads.net → system routes to 100.100.100.100 on utun13 │ +│ → passthrough rule: dest IS in → NOT matched │ +│ → generic rule: route-to lo0 → rdr → ctrld → profile blocks it ✅ │ +│ │ +│ VPN domain (vpn-internal.example.com): │ +│ dig vpn-internal.example.com → pf intercepts → ctrld │ +│ → VPN DNS match → forward to 100.100.100.100 (group exempt) │ +│ → MagicDNS → upstream 10.0.0.11 on utun13 │ +│ → passthrough rule: dest NOT in → MATCHED → passes ✅ │ +│ → 10.0.0.11 returns correct internal answer (10.0.0.113) │ +└──────────────────────────────────────────────────────────────────────┘ +``` + +**Exit mode** (all traffic through VPN): + +When Tailscale exit node is enabled, MagicDNS becomes the system's **default** +resolver (not just supplemental). If we added passthrough rules, ALL DNS would +bypass ctrld — losing profile enforcement. + +Exit mode is detected using two independent signals (either triggers exit mode): + +**1. Default route detection (primary, most reliable):** +Uses `netmon.DefaultRouteInterface()` to check if the system's default route +(0.0.0.0/0) goes through a VPN DNS interface. If `DefaultRouteInterface` matches +a VPN DNS interface name (e.g., utun13), the VPN owns the default route — it's +exit mode. This is the ground truth: the routing table directly reflects whether +all traffic flows through the VPN, regardless of how the VPN presents itself in +scutil. + +**2. scutil flag detection (secondary, fallback):** +If the VPN DNS server IP appears in a `scutil --dns` resolver entry that has +**no search domains** and **no Supplemental flag**, it's acting as the system's +default resolver (exit mode). This catches edge cases where the default route +hasn't changed yet but scutil already shows the VPN as the default DNS. + +``` +# Non-exit mode — default route on en0, 100.100.100.100 is Supplemental: +$ route -n get 0.0.0.0 | grep interface + interface: en0 ← physical NIC, not VPN +resolver #1 + search domain[0] : vpn.example.com + nameserver[0] : 100.100.100.100 + flags : Supplemental, Request A records + +# Exit mode — default route on utun13, 100.100.100.100 is default resolver: +$ route -n get 0.0.0.0 | grep interface + interface: utun13 ← VPN interface! +resolver #2 + nameserver[0] : 100.100.100.100 ← MagicDNS is default + flags : Request A records ← no Supplemental! +``` + +In exit mode, NO passthrough rules are generated. pf intercepts all DNS → ctrld +enforces its profile on everything. VPN search domains still resolve correctly +via ctrld's VPN DNS split routing (forwarded to MagicDNS through the group +exemption). + +#### Summary Table + +| Scenario | Passthrough | Profile Enforced | VPN Domains | +|----------|-------------|-----------------|-------------| +| No VPN | None | ✅ All traffic | N/A | +| Split DNS (Tailscale non-exit) | ✅ VPN interface | ✅ Non-VPN domains | ✅ Via MagicDNS | +| Exit mode (Tailscale exit node) | ❌ None | ✅ All traffic | ✅ Via ctrld split routing | +| Windscribe | None (different flow) | ✅ All traffic | N/A | +| Hard intercept | None | ✅ All traffic | ❌ Not forwarded | + +### Nuclear Option (Future) + +If anchor ordering + interface rules prove insufficient, an alternative approach is available: inject DNS intercept rules directly into the **main pf ruleset** (not inside an anchor). Main ruleset rules are evaluated before ALL anchors, making them impossible for another app to override without explicitly removing them. This is more invasive and not currently implemented, but documented here as a known escalation path. + +## Known VPN Conflicts + +### F5 BIG-IP APM + +F5 BIG-IP APM VPN is a known source of DNS conflicts with ctrld (a known support scenario). The conflict occurs because F5's VPN client aggressively manages DNS: + +**How the conflict manifests:** + +1. ctrld sets system DNS to `127.0.0.1` / `::1` for local forwarding +2. F5 VPN connects and **overwrites DNS on all interfaces** by prepending its own servers (e.g., `10.20.30.1`, `10.20.30.2`) +3. F5 enforces split DNS patterns (e.g., `*.corp.example.com`) and activates its DNS Relay Proxy (`F5FltSrv.exe` / `F5FltSrv.sys`) +4. ctrld's watchdog detects the change and restores `127.0.0.1` — F5 overwrites again +5. This loop causes intermittent resolution failures, slow responses, and VPN disconnects + +**Why `--intercept-mode dns` solves this:** + +- ctrld no longer modifies interface DNS settings — there is nothing for F5 to overwrite +- WFP (Windows) blocks all outbound DNS except to localhost, so F5's prepended DNS servers are unreachable on port 53 +- F5's DNS Relay Proxy (`F5FltSrv`) becomes irrelevant since no queries reach it +- In `--intercept-mode dns` mode, F5's split DNS domains (e.g., `*.corp.example.com`) are auto-detected from the VPN adapter and forwarded to F5's DNS servers through ctrld's upstream mechanism + +**F5-side mitigations (if `--intercept-mode dns` is not available):** + +- In APM Network Access DNS settings, enable **"Allow Local DNS Servers"** (`AllowLocalDNSServersAccess = 1`) +- Disable **"Enforce DNS Name Resolution Order"** +- Switch to IP-based split tunneling instead of DNS-pattern-based to avoid activating F5's relay proxy +- Update F5 to version 17.x+ which includes DNS handling fixes (see F5 KB K80231353) + +**Additional considerations:** + +- CrowdStrike Falcon and similar endpoint security with network inspection can compound the conflict (three-way DNS stomping) +- F5's relay proxy (`F5FltSrv`) performs similar functions to ctrld — they are in direct conflict when both active +- The seemingly random failure pattern is caused by timing-dependent race conditions between ctrld's watchdog, F5's DNS enforcement, and (optionally) endpoint security inspection + +### Cisco AnyConnect + +Cisco AnyConnect exhibits similar DNS override behavior. `--intercept-mode dns` mode prevents the conflict by operating at the packet filter level rather than competing for interface DNS settings. + +### Windscribe Desktop App + +Windscribe's macOS firewall implementation (`FirewallController_mac`) replaces the entire pf ruleset when connecting/disconnecting via `pfctl -f`, which wipes ctrld's anchor references and flushes the pf state table (killing active DoH connections). ctrld handles this with multiple defenses: + +1. **pf watchdog** detects the wipe and restores anchor rules immediately on network change events (or within 30s via periodic check) +2. **DoH transport force-reset** immediately replaces upstream transports when a pf wipe is detected (closing old connections + creating new ones synchronously), reducing the DNS blackout from ~5s to ~100ms +3. **Tunnel interface detection** adds explicit intercept rules for Windscribe's WireGuard interface (e.g., utun420) when it appears +4. **Dual delayed re-checks** (2s + 4s after network event) catch race conditions where VPN apps modify pf rules and DNS settings asynchronously after the initial network change +5. **Deferred pf restore** waits for VPN to finish its pf modifications before restoring ctrld's rules, preventing the reconnect death spiral +6. **Blanket group exemption** (`pass out quick group _ctrld`) ensures all ctrld traffic (including DoH on port 443) passes through VPN firewalls like Windscribe's `block drop all` + +## 7. VPN DNS Lifecycle + +When VPN software connects or disconnects, ctrld must track DNS state changes to ensure correct routing and avoid stale state. + +### Network Change Event Flow (macOS) + +``` +Network change detected (netmon callback) + │ + ├─ Immediate actions: + │ ├─ ensurePFAnchorActive() — verify/restore pf anchor references + │ ├─ checkTunnelInterfaceChanges() — detect new/removed VPN interfaces + │ │ ├─ New tunnel → pfStartStabilization() (wait for VPN to finish pf changes) + │ │ └─ Removed tunnel → rebuild anchor immediately (with VPN DNS exemptions) + │ └─ vpnDNS.Refresh() — re-discover VPN DNS from scutil --dns + │ + ├─ Delayed re-check at 2s: + │ ├─ ensurePFAnchorActive() — catch async pf wipes + │ ├─ checkTunnelInterfaceChanges() + │ ├─ InitializeOsResolver() — clear stale DNS from scutil + │ └─ vpnDNS.Refresh() — clear stale VPN DNS routes + │ + └─ Delayed re-check at 4s: + └─ (same as 2s — catches slower VPN teardowns) +``` + +### VPN Connect Sequence + +1. VPN creates tunnel interface (e.g., utun420) +2. Network change fires → `checkTunnelInterfaceChanges()` detects new tunnel +3. **Stabilization mode** activates — suppresses pf restores while VPN modifies rules +4. Stabilization loop polls `pfctl -sr` hash every 1.5s +5. When hash stable for 6s → VPN finished → restore ctrld's pf anchor +6. `vpnDNS.Refresh()` discovers VPN's search domains and DNS servers from `scutil --dns` +7. Anchor rebuild includes VPN DNS exemptions (so ctrld can reach VPN DNS on port 53) + +### VPN Disconnect Sequence + +1. VPN removes tunnel interface +2. Network change fires → `checkTunnelInterfaceChanges()` detects removal +3. Anchor rebuilt immediately (no stabilization needed for removals) +4. VPN app may asynchronously wipe pf rules (`pfctl -f /etc/pf.conf`) +5. VPN app may asynchronously clean up DNS settings from `scutil --dns` +6. **2s delayed re-check**: restores pf anchor if wiped, refreshes OS resolver +7. **4s delayed re-check**: catches slower VPN teardowns +8. `vpnDNS.Refresh()` returns empty → `onServersChanged(nil)` clears stale exemptions +9. `InitializeOsResolver()` re-reads `scutil --dns` → clears stale LAN nameservers + +### Key Design Decisions + +- **`buildPFAnchorRules()` receives VPN DNS servers**: All call sites (tunnel rebuild, watchdog restore, stabilization exit) pass `vpnDNS.CurrentServers()` so exemptions are preserved for still-active VPNs. +- **`onServersChanged` called even when server list is empty**: Ensures stale pf exemptions from a previous VPN session are cleaned up on disconnect. +- **OS resolver refresh in delayed re-checks**: VPN apps often finish DNS cleanup 1-3s after the network change event. The delayed `InitializeOsResolver()` call ensures stale LAN nameservers (e.g., a VPN's DNS IP (e.g., 10.255.255.3)) don't cause 2s query timeouts. +- **Ordering: tunnel checks → VPN DNS refresh → delayed re-checks**: Ensures anchor rebuilds from tunnel changes include current VPN DNS exemptions. + +## Related + +- F5 BIG-IP APM VPN DNS conflict (a known support scenario) diff --git a/resolver.go b/resolver.go index 914233d7..fbd2ad61 100644 --- a/resolver.go +++ b/resolver.go @@ -234,6 +234,79 @@ type publicResponse struct { server string } +// OsResolverNameservers returns the current OS resolver nameservers (host:port format). +// Returns nil if the OS resolver has not been initialized. +func OsResolverNameservers() []string { + resolverMutex.Lock() + r := or + resolverMutex.Unlock() + if r == nil { + return nil + } + var nss []string + if lan := r.lanServers.Load(); lan != nil { + nss = append(nss, *lan...) + } + if pub := r.publicServers.Load(); pub != nil { + nss = append(nss, *pub...) + } + return nss +} + +// AppendOsResolverNameservers adds additional nameservers to the existing OS resolver +// without reinitializing it. This is used for late-arriving nameservers such as AD +// domain controller IPs discovered via background retry. +// Returns true if nameservers were actually added. +func AppendOsResolverNameservers(servers []string) bool { + if len(servers) == 0 { + return false + } + resolverMutex.Lock() + defer resolverMutex.Unlock() + if or == nil { + return false + } + + // Collect existing nameservers to avoid duplicates. + existing := make(map[string]bool) + if lan := or.lanServers.Load(); lan != nil { + for _, s := range *lan { + existing[s] = true + } + } + if pub := or.publicServers.Load(); pub != nil { + for _, s := range *pub { + existing[s] = true + } + } + + var added bool + for _, s := range servers { + // Normalize to host:port format. + if _, _, err := net.SplitHostPort(s); err != nil { + s = net.JoinHostPort(s, "53") + } + if existing[s] { + continue + } + existing[s] = true + added = true + + ip, _, _ := net.SplitHostPort(s) + addr, _ := netip.ParseAddr(ip) + if isLanAddr(addr) { + lan := or.lanServers.Load() + newLan := append(append([]string{}, (*lan)...), s) + or.lanServers.Store(&newLan) + } else { + pub := or.publicServers.Load() + newPub := append(append([]string{}, (*pub)...), s) + or.publicServers.Store(&newPub) + } + } + return added +} + // SetDefaultLocalIPv4 updates the stored local IPv4. func SetDefaultLocalIPv4(ip net.IP) { Log(context.Background(), ProxyLogger.Load().Debug(), "SetDefaultLocalIPv4: %s", ip) From 289a46dc2c35fbc6d9d63c6aad0cfd28158b4245 Mon Sep 17 00:00:00 2001 From: Codescribe Date: Tue, 3 Mar 2026 02:07:11 -0500 Subject: [PATCH 17/22] feat: add macOS pf DNS interception Implement DNS interception on macOS using pf (packet filter): - Anchor injection into running ruleset (not /etc/pf.conf) - route-to lo0 + rdr rules for locally-originated DNS capture - _ctrld group exemption so ctrld's own queries bypass interception - Watchdog to detect and restore wiped anchor rules - Probe-based auto-heal for Parallels VM pf corruption - IPv6 DNS blocking and block-return for clean timeouts - Interface-specific tunnel detection for VPN coexistence - Port 5354 fallback in intercept mode Includes pf technical reference docs and test scripts. Squashed from intercept mode development on v1.0 branch (#497). --- cmd/cli/dns_intercept_darwin.go | 1872 +++++++++++++++++ cmd/cli/dns_intercept_darwin_test.go | 127 ++ docs/pf-dns-intercept.md | 380 ++++ test-scripts/README.md | 47 + test-scripts/darwin/diag-lo0-capture.sh | 40 + test-scripts/darwin/diag-pf-poll.sh | 62 + test-scripts/darwin/diag-vpn-connect.sh | 183 ++ test-scripts/darwin/test-dns-intercept.sh | 556 +++++ .../darwin/test-pf-group-exemption.sh | 147 ++ test-scripts/darwin/test-recovery-bypass.sh | 301 +++ test-scripts/darwin/validate-pf-rules.sh | 272 +++ test-scripts/windows/test-dns-intercept.ps1 | 544 +++++ test-scripts/windows/test-recovery-bypass.ps1 | 289 +++ 13 files changed, 4820 insertions(+) create mode 100644 cmd/cli/dns_intercept_darwin.go create mode 100644 cmd/cli/dns_intercept_darwin_test.go create mode 100644 docs/pf-dns-intercept.md create mode 100644 test-scripts/README.md create mode 100644 test-scripts/darwin/diag-lo0-capture.sh create mode 100644 test-scripts/darwin/diag-pf-poll.sh create mode 100755 test-scripts/darwin/diag-vpn-connect.sh create mode 100644 test-scripts/darwin/test-dns-intercept.sh create mode 100644 test-scripts/darwin/test-pf-group-exemption.sh create mode 100755 test-scripts/darwin/test-recovery-bypass.sh create mode 100644 test-scripts/darwin/validate-pf-rules.sh create mode 100644 test-scripts/windows/test-dns-intercept.ps1 create mode 100644 test-scripts/windows/test-recovery-bypass.ps1 diff --git a/cmd/cli/dns_intercept_darwin.go b/cmd/cli/dns_intercept_darwin.go new file mode 100644 index 00000000..95fc8a09 --- /dev/null +++ b/cmd/cli/dns_intercept_darwin.go @@ -0,0 +1,1872 @@ +//go:build darwin + +package cli + +import ( + "context" + "crypto/sha256" + "fmt" + "net" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "sync/atomic" + "syscall" + "time" + + "github.com/Control-D-Inc/ctrld" +) + +const ( + // pfWatchdogInterval is how often the periodic pf watchdog checks + // that our anchor references are still present in the running ruleset. + pfWatchdogInterval = 30 * time.Second + + // pfConsecutiveMissThreshold is the number of consecutive watchdog cycles + // where the anchor was found missing before escalating to ERROR level. + // This indicates something is persistently fighting our pf rules. + pfConsecutiveMissThreshold = 3 + + // pfAnchorRecheckDelay is how long to wait after a network change before + // performing a second pf anchor check. This catches race conditions where + // another program (e.g., Windscribe desktop) clears pf rules slightly + // after our network change handler runs. + pfAnchorRecheckDelay = 2 * time.Second + + // pfAnchorRecheckDelayLong is a second, longer delayed re-check after network + // changes. Some VPNs (e.g., Windscribe) take 3-4s to fully tear down their pf + // rules and DNS settings on disconnect. This catches slower teardowns that the + // 2s re-check misses. + pfAnchorRecheckDelayLong = 4 * time.Second + + // pfVPNInterfacePrefixes lists interface name prefixes that indicate VPN/tunnel + // interfaces on macOS. Used to add interface-specific DNS intercept rules so that + // VPN software with "pass out quick on " rules cannot bypass our intercept. + // Common prefixes: + // ipsec* - IKEv2/IPsec VPNs (Windscribe, macOS built-in) + // utun* - TUN interfaces (WireGuard, Tailscale, OpenVPN, etc.) + // ppp* - PPTP/L2TP VPNs + // tap* - TAP interfaces (OpenVPN in bridge mode) + // tun* - Legacy TUN interfaces + // lo0 is excluded since our rules already handle loopback. + pfVPNInterfacePrefixes = "ipsec,utun,ppp,tap,tun" +) + +const ( + // pfProbeDomain is the suffix used for pf interception probe queries. + // The full probe domain is "_pf-probe-.". + // These queries are sent by a subprocess WITHOUT the _ctrld group GID, + // so pf should intercept them and redirect to ctrld. If ctrld receives + // the query, pf interception is working. If not (timeout), rdr is broken. + // No trailing dot — canonicalName() in the DNS handler strips trailing dots. + pfProbeDomain = "pf-probe.ctrld.test" + + // pfProbeTimeout is how long to wait for a probe query to arrive at ctrld. + pfProbeTimeout = 1 * time.Second + + // pfGroupName is the macOS system group used to scope pf exemption rules. + // Only processes running with this effective GID can bypass the DNS redirect, + // preventing other applications from circumventing ctrld by querying exempted IPs directly. + pfGroupName = "_ctrld" + + // pfAnchorName is the pf anchor name used by ctrld for DNS interception. + // Using reverse-DNS convention to avoid conflicts with other software. + pfAnchorName = "com.controld.ctrld" + + // pfAnchorDir is the directory where pf anchor files are stored on macOS. + pfAnchorDir = "/etc/pf.anchors" + + // pfAnchorFile is the full path to ctrld's pf anchor configuration file. + pfAnchorFile = "/etc/pf.anchors/com.controld.ctrld" +) + +// pfState holds the state of the pf DNS interception on macOS. +type pfState struct { + anchorFile string + anchorName string +} + +// ensureCtrldGroup creates the _ctrld system group if it doesn't exist and returns its GID. +// Uses dscl (macOS Directory Services) to manage the group. This function is idempotent — +// safe to call multiple times across restarts. The group is intentionally never removed +// on shutdown to avoid race conditions during rapid restart cycles. +func ensureCtrldGroup() (int, error) { + // Check if the group already exists. + out, err := exec.Command("dscl", ".", "-read", "/Groups/"+pfGroupName, "PrimaryGroupID").CombinedOutput() + if err == nil { + // Group exists — parse and return its GID. + // Output format: "PrimaryGroupID: 350" + line := strings.TrimSpace(string(out)) + parts := strings.SplitN(line, ":", 2) + if len(parts) == 2 { + gid, err := strconv.Atoi(strings.TrimSpace(parts[1])) + if err != nil { + return 0, fmt.Errorf("failed to parse existing group GID from %q: %w", line, err) + } + mainLog.Load().Debug().Msgf("DNS intercept: group %s already exists with GID %d", pfGroupName, gid) + return gid, nil + } + return 0, fmt.Errorf("unexpected dscl output for existing group: %q", line) + } + + // Group doesn't exist — find an unused GID in the 350-450 range (system group range on macOS, + // above Apple's reserved range but below typical user groups). + listOut, err := exec.Command("dscl", ".", "-list", "/Groups", "PrimaryGroupID").CombinedOutput() + if err != nil { + return 0, fmt.Errorf("failed to list existing groups: %w (output: %s)", err, strings.TrimSpace(string(listOut))) + } + + usedGIDs := make(map[int]bool) + for _, line := range strings.Split(string(listOut), "\n") { + fields := strings.Fields(line) + if len(fields) >= 2 { + if gid, err := strconv.Atoi(fields[len(fields)-1]); err == nil { + usedGIDs[gid] = true + } + } + } + + chosenGID := 0 + for gid := 350; gid <= 450; gid++ { + if !usedGIDs[gid] { + chosenGID = gid + break + } + } + if chosenGID == 0 { + return 0, fmt.Errorf("no unused GID found in range 350-450") + } + + // Create the group record. Handle eDSRecordAlreadyExists gracefully in case of a + // race with another ctrld instance. + createOut, err := exec.Command("dscl", ".", "-create", "/Groups/"+pfGroupName).CombinedOutput() + if err != nil { + outStr := strings.TrimSpace(string(createOut)) + if !strings.Contains(outStr, "eDSRecordAlreadyExists") { + return 0, fmt.Errorf("failed to create group record: %w (output: %s)", err, outStr) + } + } + + // Set the GID. This is idempotent — dscl overwrites the attribute if it already exists. + if out, err := exec.Command("dscl", ".", "-create", "/Groups/"+pfGroupName, "PrimaryGroupID", strconv.Itoa(chosenGID)).CombinedOutput(); err != nil { + return 0, fmt.Errorf("failed to set group GID: %w (output: %s)", err, strings.TrimSpace(string(out))) + } + + if out, err := exec.Command("dscl", ".", "-create", "/Groups/"+pfGroupName, "RealName", "ctrld DNS Intercept Group").CombinedOutput(); err != nil { + return 0, fmt.Errorf("failed to set group RealName: %w (output: %s)", err, strings.TrimSpace(string(out))) + } + + mainLog.Load().Info().Msgf("DNS intercept: created system group %s with GID %d", pfGroupName, chosenGID) + return chosenGID, nil +} + +// setCtrldGroupID sets the process's effective GID to the _ctrld group. +// This must be called before any outbound DNS sockets are created so that +// pf's "group _ctrld" matching applies to ctrld's own DNS queries. +// Only ctrld (running as root with this effective GID) will match the exemption rules, +// preventing other processes from bypassing the DNS redirect. +func setCtrldGroupID(gid int) error { + if err := syscall.Setegid(gid); err != nil { + return fmt.Errorf("syscall.Setegid(%d) failed: %w", gid, err) + } + mainLog.Load().Info().Msgf("DNS intercept: set process effective GID to %d (%s)", gid, pfGroupName) + return nil +} + +// startDNSIntercept activates pf-based DNS interception on macOS. +// It creates a pf anchor that redirects all outbound DNS (port 53) traffic +// to ctrld's local listener at 127.0.0.1:53. This eliminates the race condition +// with VPN software that overwrites interface DNS settings. +// +// The approach: +// 1. Write a pf anchor file with redirect rules for all non-loopback interfaces +// 2. Load the anchor into pf +// 3. Ensure pf is enabled +// +// ctrld's own upstream queries use DoH (port 443), so they are NOT affected +// by the port 53 redirect. If an "os" upstream is configured (which uses port 53), +// we skip the redirect for traffic from the ctrld process itself. +func (p *prog) startDNSIntercept() error { + mainLog.Load().Info().Msg("DNS intercept: initializing macOS packet filter (pf) redirect") + + if err := p.validateDNSIntercept(); err != nil { + return err + } + + // Set up _ctrld group for pf exemption scoping. This ensures that only ctrld's + // own DNS queries (matching "group _ctrld" in pf rules) can bypass the redirect. + // Must happen BEFORE loading pf rules so the effective GID is set when sockets are created. + gid, err := ensureCtrldGroup() + if err != nil { + return fmt.Errorf("dns intercept: failed to create %s group: %w", pfGroupName, err) + } + if err := setCtrldGroupID(gid); err != nil { + return fmt.Errorf("dns intercept: failed to set process GID to %s: %w", pfGroupName, err) + } + + // Clean up any stale state from a previous crash. + if _, err := os.Stat(pfAnchorFile); err == nil { + mainLog.Load().Warn().Msg("DNS intercept: found stale pf anchor file from previous run — cleaning up") + exec.Command("pfctl", "-a", pfAnchorName, "-F", "all").CombinedOutput() + os.Remove(pfAnchorFile) + } + + // Pre-discover VPN DNS configurations before building initial rules. + // Without this, there's a startup gap where the initial anchor has no VPN DNS + // exemptions, causing queries to be intercepted and routed to ctrld. The + // vpnDNSManager.Refresh() call later would add the exemptions, but stale pf + // state entries from the gap persist and keep routing packets to lo0. + // By discovering upfront, the initial rules exclude VPN DNS interfaces from interception. + var initialExemptions []vpnDNSExemption + if !hardIntercept { + initialConfigs := ctrld.DiscoverVPNDNS(context.Background()) + type key struct{ server, iface string } + seen := make(map[key]bool) + for _, config := range initialConfigs { + for _, server := range config.Servers { + k := key{server, config.InterfaceName} + if !seen[k] { + seen[k] = true + initialExemptions = append(initialExemptions, vpnDNSExemption{ + Server: server, + Interface: config.InterfaceName, + }) + } + } + } + if len(initialExemptions) > 0 { + mainLog.Load().Info().Msgf("DNS intercept: pre-discovered %d VPN DNS exemptions for initial rules", len(initialExemptions)) + } + } + + rules := p.buildPFAnchorRules(initialExemptions) + + if err := os.MkdirAll(pfAnchorDir, 0755); err != nil { + return fmt.Errorf("dns intercept: failed to create pf anchor directory %s: %w", pfAnchorDir, err) + } + if err := os.WriteFile(pfAnchorFile, []byte(rules), 0644); err != nil { + return fmt.Errorf("dns intercept: failed to write pf anchor file %s: %w", pfAnchorFile, err) + } + mainLog.Load().Debug().Msgf("DNS intercept: wrote pf anchor file: %s", pfAnchorFile) + + out, err := exec.Command("pfctl", "-a", pfAnchorName, "-f", pfAnchorFile).CombinedOutput() + if err != nil { + os.Remove(pfAnchorFile) + return fmt.Errorf("dns intercept: failed to load pf anchor: %w (output: %s)", err, strings.TrimSpace(string(out))) + } + mainLog.Load().Debug().Msgf("DNS intercept: loaded pf anchor %q from %s", pfAnchorName, pfAnchorFile) + + if err := p.ensurePFAnchorReference(); err != nil { + mainLog.Load().Warn().Err(err).Msg("DNS intercept: could not add anchor references to running pf ruleset — anchor may not be active") + } + + out, err = exec.Command("pfctl", "-e").CombinedOutput() + if err != nil { + outStr := strings.TrimSpace(string(out)) + if !strings.Contains(outStr, "already enabled") { + mainLog.Load().Warn().Msgf("DNS intercept: pfctl -e returned: %s (err: %v) — pf may not be enabled", outStr, err) + } + } + + out, err = exec.Command("pfctl", "-a", pfAnchorName, "-sr").CombinedOutput() + if err != nil { + mainLog.Load().Warn().Msgf("DNS intercept: could not verify anchor rules: %v", err) + } else { + ruleCount := strings.Count(strings.TrimSpace(string(out)), "\n") + 1 + mainLog.Load().Info().Msgf("DNS intercept: pf anchor %q active with %d rules", pfAnchorName, ruleCount) + mainLog.Load().Debug().Msgf("DNS intercept: active pf rules:\n%s", strings.TrimSpace(string(out))) + } + + out, err = exec.Command("pfctl", "-a", pfAnchorName, "-sn").CombinedOutput() + if err == nil && len(strings.TrimSpace(string(out))) > 0 { + mainLog.Load().Debug().Msgf("DNS intercept: active pf NAT/redirect rules:\n%s", strings.TrimSpace(string(out))) + } + + // Post-load verification: confirm everything actually took effect. + p.verifyPFState() + + p.dnsInterceptState = &pfState{ + anchorFile: pfAnchorFile, + anchorName: pfAnchorName, + } + + // Store the initial set of tunnel interfaces so we can detect changes later. + p.mu.Lock() + p.lastTunnelIfaces = discoverTunnelInterfaces() + p.mu.Unlock() + + lc := p.cfg.FirstListener() + if lc != nil { + mainLog.Load().Info().Msgf("DNS intercept: pf redirect active — all outbound DNS (port 53) redirected to %s:%d via anchor %q", lc.IP, lc.Port, pfAnchorName) + } else { + mainLog.Load().Info().Msgf("DNS intercept: pf redirect active — all outbound DNS (port 53) redirected via anchor %q", pfAnchorName) + } + + // Start the pf watchdog to detect and restore rules if another program + // (e.g., Windscribe desktop, macOS configd) replaces the pf ruleset. + go p.pfWatchdog() + + return nil +} + +// ensurePFAnchorReference ensures the running pf ruleset includes our anchor +// declarations. We dump the RUNNING ruleset via "pfctl -sr" (filter+scrub rules) +// and "pfctl -sn" (NAT/rdr rules), check if our references exist, and if not, +// inject them and reload the combined ruleset via stdin. +// +// pf enforces strict rule ordering: +// +// options → normalization (scrub) → queueing → translation (nat/rdr) → filtering (pass/block/anchor) +// +// "pfctl -sr" returns BOTH scrub-anchor (normalization) AND anchor/pass/block (filter) rules. +// "pfctl -sn" returns nat-anchor AND rdr-anchor (translation) rules. +// Both commands emit "No ALTQ support in kernel" warnings on stderr. +// +// We must reassemble in correct order: scrub → nat/rdr → filter. +// +// The anchor reference does not survive a reboot, but ctrld re-adds it on every start. +func (p *prog) ensurePFAnchorReference() error { + natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) + rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) + anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) + + // Dump running rules. Use CombinedOutput but filter out stderr warnings. + natOut, err := exec.Command("pfctl", "-sn").CombinedOutput() + if err != nil { + return fmt.Errorf("failed to dump running NAT rules: %w (output: %s)", err, strings.TrimSpace(string(natOut))) + } + + filterOut, err := exec.Command("pfctl", "-sr").CombinedOutput() + if err != nil { + return fmt.Errorf("failed to dump running filter rules: %w (output: %s)", err, strings.TrimSpace(string(filterOut))) + } + + // Filter pfctl output into actual pf rules, stripping stderr warnings. + natLines := pfFilterRuleLines(string(natOut)) + filterLines := pfFilterRuleLines(string(filterOut)) + + hasNatAnchor := pfContainsRule(natLines, natAnchorRef) + hasRdrAnchor := pfContainsRule(natLines, rdrAnchorRef) + hasAnchor := pfContainsRule(filterLines, anchorRef) + + if hasNatAnchor && hasRdrAnchor && hasAnchor { + // Verify anchor ordering: our anchor should appear before other anchors + // for reliable DNS interception priority. Log a warning if out of order, + // but don't force a reload (the interface-specific rules in our anchor + // provide a secondary safety net even if ordering is suboptimal). + p.checkAnchorOrdering(filterLines, anchorRef) + mainLog.Load().Debug().Msg("DNS intercept: anchor references already present in running ruleset") + return nil + } + + mainLog.Load().Info().Msg("DNS intercept: injecting anchor references into running pf ruleset") + + // Separate scrub rules from filter rules (pfctl -sr returns both). + // scrub/scrub-anchor = normalization, must come BEFORE translation. + var scrubLines, pureFilterLines []string + for _, line := range filterLines { + if strings.HasPrefix(line, "scrub") { + scrubLines = append(scrubLines, line) + } else { + pureFilterLines = append(pureFilterLines, line) + } + } + + // Inject our references if missing. PREPEND both references to ensure our + // anchor is evaluated BEFORE any other anchors (e.g., Windscribe's + // "vpn_app_traffic"). pf evaluates rules top-to-bottom, so "quick" + // rules in whichever anchor appears first win. By prepending, our DNS + // intercept rules match port 53 traffic before a VPN app's broader + // "pass out quick on all" rules in their anchor. + if !hasNatAnchor || !hasRdrAnchor { + var newRefs []string + if !hasNatAnchor { + newRefs = append(newRefs, natAnchorRef) + } + if !hasRdrAnchor { + newRefs = append(newRefs, rdrAnchorRef) + } + natLines = append(newRefs, natLines...) + } + if !hasAnchor { + pureFilterLines = append([]string{anchorRef}, pureFilterLines...) + } + + // Dump and clean pf options. VPN apps (e.g., Windscribe) set "set skip on { lo0 }" + // which disables pf processing on loopback, breaking our route-to + rdr mechanism. + // We strip lo0 and tunnel interfaces from the skip list before reloading. + cleanedOptions, hadLoopbackSkip := pfGetCleanedOptions() + if hadLoopbackSkip { + mainLog.Load().Info().Msg("DNS intercept: will reload pf options without lo0 in skip list") + } + + // Reassemble in pf's required order: options → scrub → translation → filtering. + var combined strings.Builder + if cleanedOptions != "" { + combined.WriteString(cleanedOptions) + } + for _, line := range scrubLines { + combined.WriteString(line + "\n") + } + for _, line := range natLines { + combined.WriteString(line + "\n") + } + for _, line := range pureFilterLines { + combined.WriteString(line + "\n") + } + + cmd := exec.Command("pfctl", "-f", "-") + cmd.Stdin = strings.NewReader(combined.String()) + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to load pf ruleset with anchor references: %w (output: %s)", err, strings.TrimSpace(string(out))) + } + + mainLog.Load().Info().Msg("DNS intercept: anchor references active in running pf ruleset") + return nil +} + +// checkAnchorOrdering logs a warning if our anchor reference is not the first +// anchor in the filter ruleset. When another anchor (e.g., Windscribe's +// "vpn_app_traffic") appears before ours, its "quick" rules may match +// DNS traffic first. The interface-specific tunnel rules in our anchor provide +// a secondary defense, but first position is still preferred. +func (p *prog) checkAnchorOrdering(filterLines []string, ourAnchorRef string) { + for _, line := range filterLines { + if strings.HasPrefix(line, "anchor ") { + if strings.Contains(line, ourAnchorRef) { + // Our anchor is first — ideal ordering. + return + } + // Another anchor appears before ours. + mainLog.Load().Warn().Msgf("DNS intercept: anchor ordering suboptimal — %q appears before our anchor %q. "+ + "Interface-specific rules provide fallback protection, but prepending is preferred.", line, pfAnchorName) + return + } + } +} + +// pfGetCleanedOptions dumps the running pf options via "pfctl -sO" and returns +// them with lo0 removed from any "set skip on" directive. VPN apps like Windscribe +// set "set skip on { lo0 }" which tells pf to bypass ALL processing on +// loopback — this breaks our route-to + rdr interception mechanism which depends on +// lo0. We strip lo0 (and any known VPN tunnel interfaces) from the skip list so our +// rdr rules on lo0 can fire. Other options (timeouts, limits, etc.) are preserved. +// +// Returns the cleaned options as a string suitable for prepending to a pfctl -f reload, +// and a boolean indicating whether lo0 was found in the skip list (i.e., we needed to fix it). +func pfGetCleanedOptions() (string, bool) { + out, err := exec.Command("pfctl", "-sO").CombinedOutput() + if err != nil { + mainLog.Load().Debug().Err(err).Msg("DNS intercept: could not dump pf options") + return "", false + } + + var cleaned strings.Builder + hadLoopbackSkip := false + + for _, line := range strings.Split(string(out), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.Contains(line, "ALTQ") { + continue + } + + // Parse "set skip on { lo0 ipsec0 }" or "set skip on lo0" + if strings.HasPrefix(line, "set skip on") { + // Extract interface list from the skip directive. + skipPart := strings.TrimPrefix(line, "set skip on") + skipPart = strings.TrimSpace(skipPart) + skipPart = strings.Trim(skipPart, "{}") + skipPart = strings.TrimSpace(skipPart) + + ifaces := strings.Fields(skipPart) + var kept []string + for _, iface := range ifaces { + if iface == "lo0" { + hadLoopbackSkip = true + continue // Remove lo0 — we need pf to process lo0 for our rdr rules. + } + // Also remove VPN tunnel interfaces — we have explicit intercept + // rules for them in our anchor, so skipping defeats the purpose. + isTunnel := false + for _, prefix := range strings.Split(pfVPNInterfacePrefixes, ",") { + if strings.HasPrefix(iface, strings.TrimSpace(prefix)) { + isTunnel = true + break + } + } + if isTunnel { + mainLog.Load().Debug().Msgf("DNS intercept: removing tunnel interface %q from pf skip list", iface) + continue + } + kept = append(kept, iface) + } + + if len(kept) > 0 { + cleaned.WriteString(fmt.Sprintf("set skip on { %s }\n", strings.Join(kept, " "))) + } + // If no interfaces left, omit the skip directive entirely. + continue + } + + // Preserve all other options (timeouts, limits, etc.). + cleaned.WriteString(line + "\n") + } + + if hadLoopbackSkip { + mainLog.Load().Warn().Msg("DNS intercept: detected 'set skip on lo0' — another program (likely VPN software) " + + "disabled pf processing on loopback, which breaks our DNS interception. Removing lo0 from skip list.") + } + + return cleaned.String(), hadLoopbackSkip +} + +// pfFilterRuleLines filters pfctl output into actual pf rule lines, +// stripping stderr warnings (e.g., "No ALTQ support in kernel") and empty lines. +func pfFilterRuleLines(output string) []string { + var rules []string + for _, line := range strings.Split(output, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + // Skip pfctl stderr warnings that appear in CombinedOutput. + if strings.Contains(line, "ALTQ") { + continue + } + rules = append(rules, line) + } + return rules +} + +// pfContainsRule checks if any line in the slice contains the given rule string. +// Uses substring matching because pfctl may append extra tokens like " all" to rules +// (e.g., `rdr-anchor "com.controld.ctrld" all`), which would fail exact matching. +func pfContainsRule(lines []string, rule string) bool { + for _, line := range lines { + if strings.Contains(line, rule) { + return true + } + } + return false +} + +// stopDNSIntercept removes all pf rules and cleans up the DNS interception. +func (p *prog) stopDNSIntercept() error { + if p.dnsInterceptState == nil { + mainLog.Load().Debug().Msg("DNS intercept: no pf state to clean up") + return nil + } + + mainLog.Load().Info().Msg("DNS intercept: shutting down pf redirect") + + out, err := exec.Command("pfctl", "-a", p.dnsInterceptState.(*pfState).anchorName, "-F", "all").CombinedOutput() + if err != nil { + mainLog.Load().Warn().Msgf("DNS intercept: failed to flush pf anchor %q: %v (output: %s)", + p.dnsInterceptState.(*pfState).anchorName, err, strings.TrimSpace(string(out))) + } else { + mainLog.Load().Debug().Msgf("DNS intercept: flushed pf anchor %q", p.dnsInterceptState.(*pfState).anchorName) + } + + if err := os.Remove(p.dnsInterceptState.(*pfState).anchorFile); err != nil && !os.IsNotExist(err) { + mainLog.Load().Warn().Msgf("DNS intercept: failed to remove anchor file %s: %v", p.dnsInterceptState.(*pfState).anchorFile, err) + } else { + mainLog.Load().Debug().Msgf("DNS intercept: removed anchor file %s", p.dnsInterceptState.(*pfState).anchorFile) + } + + if err := p.removePFAnchorReference(); err != nil { + mainLog.Load().Warn().Err(err).Msg("DNS intercept: failed to remove anchor references from running pf ruleset") + } + + p.dnsInterceptState = nil + mainLog.Load().Info().Msg("DNS intercept: pf shutdown complete") + return nil +} + +// removePFAnchorReference removes our anchor references from the running pf ruleset. +// Uses the same dump → filter → reassemble approach as ensurePFAnchorReference. +// The anchor itself is already flushed by stopDNSIntercept, so even if removal +// fails, the empty anchor is a no-op. +func (p *prog) removePFAnchorReference() error { + natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) + rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) + anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) + + natOut, err := exec.Command("pfctl", "-sn").CombinedOutput() + if err != nil { + return fmt.Errorf("failed to dump running NAT rules: %w (output: %s)", err, strings.TrimSpace(string(natOut))) + } + filterOut, err := exec.Command("pfctl", "-sr").CombinedOutput() + if err != nil { + return fmt.Errorf("failed to dump running filter rules: %w (output: %s)", err, strings.TrimSpace(string(filterOut))) + } + + // Filter and remove our lines. + natLines := pfFilterRuleLines(string(natOut)) + filterLines := pfFilterRuleLines(string(filterOut)) + + var cleanNat []string + for _, line := range natLines { + if !strings.Contains(line, rdrAnchorRef) && !strings.Contains(line, natAnchorRef) { + cleanNat = append(cleanNat, line) + } + } + + // Separate scrub from filter, remove our anchor ref. + var scrubLines, cleanFilter []string + for _, line := range filterLines { + if strings.Contains(line, anchorRef) { + continue + } + if strings.HasPrefix(line, "scrub") { + scrubLines = append(scrubLines, line) + } else { + cleanFilter = append(cleanFilter, line) + } + } + + // Reassemble in correct order: scrub → translation → filtering. + var combined strings.Builder + for _, line := range scrubLines { + combined.WriteString(line + "\n") + } + for _, line := range cleanNat { + combined.WriteString(line + "\n") + } + for _, line := range cleanFilter { + combined.WriteString(line + "\n") + } + + cmd := exec.Command("pfctl", "-f", "-") + cmd.Stdin = strings.NewReader(combined.String()) + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to reload pf ruleset without anchor references: %w (output: %s)", err, strings.TrimSpace(string(out))) + } + + mainLog.Load().Debug().Msg("DNS intercept: removed anchor references from running pf ruleset") + return nil +} + +// pfAddressFamily returns "inet" for IPv4 addresses and "inet6" for IPv6 addresses. +// Used to generate pf rules with the correct address family for each IP. +// flushPFStates flushes ALL pf state entries. Called after anchor reloads to ensure +// packets are re-evaluated against the new rules instead of matching stale state +// entries from the old ruleset. This is necessary because pf checks its state table +// BEFORE rule evaluation — a state entry created by a route-to rule will keep +// routing packets to lo0 even after VPN DNS interfaces are excluded from interception. +// +// We flush all states (not just port 53) because: +// 1. pfctl doesn't support port-based state killing +// 2. State flush is fast and brief — existing TCP connections (DoH) will +// re-establish quickly, and UDP connections are stateless at the transport level +func flushPFStates() { + if out, err := exec.Command("pfctl", "-F", "states").CombinedOutput(); err != nil { + mainLog.Load().Warn().Err(err).Msgf("DNS intercept: failed to flush pf states (output: %s)", strings.TrimSpace(string(out))) + } else { + mainLog.Load().Debug().Msg("DNS intercept: flushed pf states after anchor reload") + } +} + +func pfAddressFamily(ip string) string { + if addr := net.ParseIP(ip); addr != nil && addr.To4() == nil { + return "inet6" + } + return "inet" +} + +// discoverTunnelInterfaces returns the names of active VPN/tunnel network interfaces. +// These interfaces may have pf rules from VPN software (e.g., Windscribe's "pass out quick +// on ipsec0") that would match DNS traffic before our anchor rules. By discovering them, +// we can add interface-specific intercept rules that take priority. +func discoverTunnelInterfaces() []string { + ifaces, err := net.Interfaces() + if err != nil { + mainLog.Load().Warn().Err(err).Msg("DNS intercept: failed to enumerate network interfaces") + return nil + } + + prefixes := strings.Split(pfVPNInterfacePrefixes, ",") + var tunnels []string + + for _, iface := range ifaces { + // Only consider interfaces that are up — down interfaces can't carry DNS traffic. + if iface.Flags&net.FlagUp == 0 { + continue + } + for _, prefix := range prefixes { + if strings.HasPrefix(iface.Name, strings.TrimSpace(prefix)) { + tunnels = append(tunnels, iface.Name) + break + } + } + } + + if len(tunnels) > 0 { + mainLog.Load().Debug().Msgf("DNS intercept: discovered active tunnel interfaces: %v", tunnels) + } + return tunnels +} + +// dnsInterceptSupported reports whether DNS intercept mode is supported on this platform. +func dnsInterceptSupported() bool { + _, err := exec.LookPath("pfctl") + return err == nil +} + +// validateDNSIntercept checks that the system meets requirements for DNS intercept mode. +func (p *prog) validateDNSIntercept() error { + if !dnsInterceptSupported() { + return fmt.Errorf("dns intercept: pfctl not found — pf is required for DNS intercept on macOS") + } + + if os.Geteuid() != 0 { + return fmt.Errorf("dns intercept: root privileges required for pf filter management") + } + + if err := os.MkdirAll(filepath.Dir(pfAnchorFile), 0755); err != nil { + return fmt.Errorf("dns intercept: cannot create anchor directory: %w", err) + } + + if p.cfg != nil { + for name, uc := range p.cfg.Upstream { + if uc.Type == "os" || uc.Type == "" { + return fmt.Errorf("dns intercept: upstream %q uses OS resolver (port 53) which would create "+ + "a redirect loop with pf. Use DoH upstreams (--proto doh) with dns-intercept mode", name) + } + } + } + + return nil +} + +// buildPFAnchorRules generates the pf anchor rules for DNS interception. +// vpnExemptions are VPN DNS server+interface pairs to exempt from interception. +// +// macOS pf "rdr" rules only apply to forwarded traffic, NOT locally-originated +// packets. To intercept DNS from the machine itself, we use a two-step approach: +// 1. "pass out route-to lo0" forces outbound DNS through the loopback interface +// 2. "rdr on lo0" catches it on loopback and redirects to our listener +// +// STATE AND ROUTING (critical for VPN firewall coexistence): +// - route-to rules: keep state (default). State is floating (matches on any interface), +// but "pass out on lo0 no state" ensures no state exists on the lo0 outbound path, +// so rdr still fires on the lo0 inbound pass. +// - pass out on lo0: NO STATE — prevents state from being created on lo0 outbound, +// which would match inbound and bypass rdr. +// - rdr: no "pass" keyword — packet goes through filter so "pass in" creates state. +// - pass in on lo0: keep state + REPLY-TO lo0 — creates state for response routing +// AND forces the response back through lo0. Without reply-to, the response to a +// VPN client IP gets routed through the VPN tunnel and is lost. +// +// ctrld's own OS resolver nameservers (used for bootstrap DNS) must be exempted +// from the redirect to prevent ctrld from querying itself in a loop. +// +// pf requires strict rule ordering: translation (rdr) BEFORE filtering (pass). +func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { + // Read the actual listener address from config. In intercept mode, ctrld may + // be on a non-standard port (e.g., 127.0.0.1:5354) if mDNSResponder holds *:53. + // The pf rdr rules must redirect to wherever ctrld is actually listening. + listenerIP := "127.0.0.1" + listenerPort := 53 + if lc := p.cfg.FirstListener(); lc != nil { + if lc.IP != "" && lc.IP != "0.0.0.0" && lc.IP != "::" { + listenerIP = lc.IP + } else if lc.IP == "0.0.0.0" || lc.IP == "::" { + mainLog.Load().Warn().Str("configured_ip", lc.IP). + Msg("DNS intercept: listener configured with wildcard IP, using 127.0.0.1 for pf rules") + } + if lc.Port != 0 { + listenerPort = lc.Port + } + } + listenerAddr := fmt.Sprintf("%s port %d", listenerIP, listenerPort) + + var rules strings.Builder + rules.WriteString("# ctrld DNS Intercept Mode\n") + rules.WriteString("# Intercepts locally-originated DNS (port 53) via route-to + rdr on lo0.\n") + rules.WriteString("#\n") + rules.WriteString("# How it works:\n") + rules.WriteString("# 1. \"pass out route-to lo0\" forces outbound DNS through the loopback interface\n") + rules.WriteString(fmt.Sprintf("# 2. \"rdr on lo0\" catches it on loopback and redirects to ctrld at %s\n", listenerAddr)) + rules.WriteString("#\n") + rules.WriteString("# All ctrld traffic is blanket-exempted via \"pass out quick group " + pfGroupName + "\",\n") + rules.WriteString("# ensuring ctrld's DoH/DoT upstream connections and DNS queries are never\n") + rules.WriteString("# blocked by VPN firewalls (e.g., Windscribe's \"block drop all\").\n") + rules.WriteString("#\n") + rules.WriteString("# pf requires strict rule ordering: translation (rdr) BEFORE filtering (pass).\n\n") + + // --- Translation rules (must come first per pf ordering) --- + // Uses "rdr" without "pass" so the redirected packet continues to filter evaluation. + // The filter rule "pass in on lo0 ... to 127.0.0.1 port 53 keep state" then creates + // a stateful entry that handles response routing. Using "rdr pass" would skip filter + // evaluation, and its implicit state alone is insufficient for response delivery — + // proven by commit 51cf029 where responses were silently dropped. + rules.WriteString("# --- Translation rules (nat + rdr) ---\n") + + // NAT source to ::1 for IPv6 DNS on loopback. macOS/BSD rejects sendmsg from + // [::1] to a global unicast IPv6 address (EINVAL), unlike IPv4 where sendmsg from + // 127.0.0.1 to local private IPs works fine. The rdr rewrites the destination but + // preserves the original source (machine's global IPv6). Without nat, ctrld cannot + // reply. pf reverses both translations on the response path. + // Note: nat must appear before rdr (pf evaluates nat first in translation phase). + listenerAddr6 := fmt.Sprintf("::1 port %d", listenerPort) + rules.WriteString("nat on lo0 inet6 proto udp from ! ::1 to ! ::1 port 53 -> ::1\n") + rules.WriteString("nat on lo0 inet6 proto tcp from ! ::1 to ! ::1 port 53 -> ::1\n") + + rules.WriteString("# Redirect DNS on loopback to ctrld's listener.\n") + rules.WriteString(fmt.Sprintf("rdr on lo0 inet proto udp from any to ! %s port 53 -> %s\n", listenerIP, listenerAddr)) + rules.WriteString(fmt.Sprintf("rdr on lo0 inet proto tcp from any to ! %s port 53 -> %s\n", listenerIP, listenerAddr)) + rules.WriteString(fmt.Sprintf("rdr on lo0 inet6 proto udp from any to ! ::1 port 53 -> %s\n", listenerAddr6)) + rules.WriteString(fmt.Sprintf("rdr on lo0 inet6 proto tcp from any to ! ::1 port 53 -> %s\n\n", listenerAddr6)) + + // --- Filtering rules --- + rules.WriteString("# --- Filtering rules (pass) ---\n\n") + + // Blanket exemption: allow ALL outbound traffic from ctrld (group _ctrld) through + // without any pf filtering or redirection. This is critical for VPN coexistence — + // VPN apps like Windscribe load "block drop all" rulesets that would otherwise block + // ctrld's DoH connections (TCP 443) to upstream DNS servers (e.g., 76.76.2.22). + // Because our anchor is prepended before other anchors, this rule evaluates first, + // ensuring ctrld's traffic is never blocked by downstream firewall rules. + // + // The per-IP exemptions below (OS resolver, VPN DNS) remain as defense-in-depth: + // they prevent DNS redirect loops for ctrld's own port-53 queries specifically, + // while this rule handles everything else (DoH, DoT, health checks, etc.). + rules.WriteString("# Blanket exemption: let all ctrld traffic through regardless of other pf rules.\n") + rules.WriteString("# VPN firewalls (e.g., Windscribe's \"block drop all\") would otherwise block\n") + rules.WriteString("# ctrld's DoH (TCP 443) connections to upstream DNS servers.\n") + rules.WriteString(fmt.Sprintf("pass out quick group %s\n\n", pfGroupName)) + + // Exempt OS resolver nameservers (read live from the global OS resolver) + // so ctrld's bootstrap DNS queries don't get redirected back to itself. + // IPv4 addresses use "inet", IPv6 addresses use "inet6". + osNS := ctrld.OsResolverNameservers() + if len(osNS) > 0 { + rules.WriteString("# Exempt OS resolver nameservers (ctrld bootstrap DNS) from redirect.\n") + rules.WriteString("# Scoped to group " + pfGroupName + " so only ctrld's own queries are exempted,\n") + rules.WriteString("# preventing other processes from bypassing the redirect by querying these IPs.\n") + for _, ns := range osNS { + host, _, _ := net.SplitHostPort(ns) + if host == "" { + host = ns + } + af := pfAddressFamily(host) + rules.WriteString(fmt.Sprintf("pass out quick on ! lo0 %s proto { udp, tcp } from any to %s port 53 group %s\n", af, host, pfGroupName)) + } + rules.WriteString("\n") + } + + // Build sets of VPN DNS interfaces and server IPs for exclusion from intercept rules. + // + // VPN DNS handlers that use macOS Network Extensions (Tailscale MagicDNS, modern + // Cisco AnyConnect, F5 BIG-IP) intercept packets at the NE layer — BEFORE pf sees + // them on the return path, but AFTER pf's outbound rules fire. Any pf rule that + // touches packets on a VPN DNS interface (even "pass" with "keep state") interferes + // with the NE's packet handling, causing timeouts. + // + // Solution: exclude VPN DNS interfaces from tunnel intercept rules entirely, and + // exclude VPN DNS server IPs from the generic intercept rule. This lets all DNS + // traffic to/from VPN DNS flow naturally without any pf interference. + // + // EXIT MODE EXCEPTION: When a VPN is in exit/full-tunnel mode (VPN DNS server is + // also the system default resolver), we do NOT exempt the interface. In exit mode, + // all traffic routes through the VPN, so exempting the interface would bypass ctrld + // for ALL DNS — losing profile enforcement (blocking, filtering). Instead, we keep + // intercepting and let ctrld's VPN DNS split routing + group exemption handle it. + vpnDNSIfaces := make(map[string]bool) // non-exit interfaces to skip in tunnel intercept + vpnDNSIfacePassthrough := make(map[string]bool) // non-exit interfaces needing passthrough rules + vpnDNSServerIPs := make(map[string]bool) // IPs to exclude from generic intercept + for _, ex := range vpnExemptions { + if ex.Interface != "" && !ex.IsExitMode { + vpnDNSIfaces[ex.Interface] = true + vpnDNSIfacePassthrough[ex.Interface] = true + } + vpnDNSServerIPs[ex.Server] = true + } + + // Group-scoped exemptions for ctrld's own VPN DNS queries. + // When ctrld's proxy() VPN DNS split routing sends queries to VPN DNS servers, + // these rules let ctrld's traffic through without being intercepted by the + // generic route-to rule. Scoped to group _ctrld so only ctrld benefits. + if len(vpnExemptions) > 0 { + rules.WriteString("# Exempt VPN DNS servers: ctrld's own queries (group-scoped).\n") + seen := make(map[string]bool) + for _, ex := range vpnExemptions { + if !seen[ex.Server] { + seen[ex.Server] = true + af := pfAddressFamily(ex.Server) + rules.WriteString(fmt.Sprintf("pass out quick on ! lo0 %s proto { udp, tcp } from any to %s port 53 group %s\n", af, ex.Server, pfGroupName)) + } + } + rules.WriteString("\n") + } + + // NOTE: IPv6 DNS is now intercepted (not blocked). ctrld listens on [::1] and pf + // redirects IPv6 DNS the same way as IPv4. This eliminates the ~1s timeout caused by + // blocking IPv6 DNS (BSD doesn't deliver ICMP errors to unconnected UDP sockets). + + // --- VPN DNS interface passthrough (split DNS mode only) --- + // + // In split DNS mode, the VPN's DNS handler (e.g., Tailscale MagicDNS) runs as a + // Network Extension that intercepts packets on its tunnel interface. MagicDNS then + // forwards queries to its own upstream nameservers (e.g., 10.0.0.11) — IPs we + // can't know in advance. Without these rules, pf's generic "on !lo0" intercept + // catches MagicDNS's upstream queries, routing them back to ctrld in a loop. + // + // These "pass" rules (no route-to) let MagicDNS's upstream queries pass through. + // Traffic TO the VPN DNS server itself (e.g., 100.100.100.100) is excluded so those + // queries get intercepted → ctrld enforces its profile on non-search-domain queries. + // + // NOT applied in exit mode — in exit mode, all traffic routes through the VPN + // interface, so exempting it would bypass ctrld's profile enforcement entirely. + if len(vpnDNSIfacePassthrough) > 0 { + // Build table of VPN DNS server IPs to exclude from passthrough. + // Queries TO these IPs must still be intercepted (profile enforcement). + // Only MagicDNS's upstream queries to other IPs should pass through. + var vpnDNSTableMembers []string + for ip := range vpnDNSServerIPs { + if net.ParseIP(ip) != nil && net.ParseIP(ip).To4() != nil { + vpnDNSTableMembers = append(vpnDNSTableMembers, ip) + } + } + if len(vpnDNSTableMembers) > 0 { + rules.WriteString("# Table of VPN DNS server IPs — queries to these must be intercepted.\n") + rules.WriteString(fmt.Sprintf("table { %s }\n", strings.Join(vpnDNSTableMembers, ", "))) + } + rules.WriteString("# --- VPN DNS interface passthrough (split DNS mode) ---\n") + rules.WriteString("# Pass MagicDNS upstream queries; intercept queries TO MagicDNS itself.\n") + for iface := range vpnDNSIfacePassthrough { + if len(vpnDNSTableMembers) > 0 { + rules.WriteString(fmt.Sprintf("pass out quick on %s inet proto udp from any to ! port 53\n", iface)) + rules.WriteString(fmt.Sprintf("pass out quick on %s inet proto tcp from any to ! port 53\n", iface)) + } else { + rules.WriteString(fmt.Sprintf("pass out quick on %s inet proto udp from any to any port 53\n", iface)) + rules.WriteString(fmt.Sprintf("pass out quick on %s inet proto tcp from any to any port 53\n", iface)) + } + } + rules.WriteString("\n") + } + + // --- Interface-specific VPN/tunnel intercept rules --- + // VPN apps (e.g., Windscribe, Cisco AnyConnect) often add pf rules like: + // pass out quick on ipsec0 inet all flags S/SA keep state + // inside their own anchors. If their anchor is evaluated before ours, their + // "quick" match on the VPN interface captures DNS traffic before our generic + // "on ! lo0" rule can intercept it. To counter this, we add explicit intercept + // rules for each active tunnel interface. These use "quick" and match port 53 + // specifically, so they take priority over the VPN app's broader "all" rules + // regardless of anchor ordering. + // + // NOTE: If anchor ordering alone proves insufficient in the future, a "nuclear + // option" is available: inject DNS intercept rules directly into the MAIN pf + // ruleset (not inside our anchor). Main ruleset rules are evaluated before ALL + // anchors, making them impossible for another app to override without explicitly + // removing them. See docs/dns-intercept-mode.md for details. + tunnelIfaces := discoverTunnelInterfaces() + if len(tunnelIfaces) > 0 { + rules.WriteString("# --- VPN/tunnel interface intercept rules ---\n") + rules.WriteString("# Explicit intercept on tunnel interfaces prevents VPN apps from capturing\n") + rules.WriteString("# DNS traffic with their own broad \"pass out quick on \" rules.\n") + rules.WriteString("# These port-53-specific rules take priority over broader \"all\" matches.\n") + rules.WriteString("#\n") + rules.WriteString("# Interfaces with VPN DNS servers (from scutil) are EXCLUDED — those carry\n") + rules.WriteString("# DNS traffic for Network Extension-based VPNs (e.g., Tailscale MagicDNS)\n") + rules.WriteString("# that must flow without any pf interference.\n") + for _, iface := range tunnelIfaces { + if vpnDNSIfaces[iface] { + rules.WriteString(fmt.Sprintf("# Skipped %s — VPN DNS interface (Network Extension needs unintercepted flow)\n", iface)) + continue + } + rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet proto udp from any to ! %s port 53\n", iface, listenerIP)) + rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet proto tcp from any to ! %s port 53\n", iface, listenerIP)) + rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet6 proto udp from any to ! ::1 port 53\n", iface)) + rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet6 proto tcp from any to ! ::1 port 53\n", iface)) + } + rules.WriteString("\n") + } + + // Force all remaining outbound IPv4 DNS through loopback for interception. + // VPN DNS server IPs are excluded — those must reach their VPN DNS handler + // without pf interference (especially for Network Extension-based VPNs). + // + // IMPORTANT: pf expands negated lists like { !a, !b } into separate rules where + // each rule matches everything the other excludes — effectively matching ALL addresses. + // This is a well-documented pf pitfall (OpenBSD FAQ, "negated lists"). + // Fix: use a pf table with a single negated match: "to ! ". + // Force all remaining outbound IPv4 DNS through loopback for interception. + // Only loopback (127.0.0.1) is excluded — ctrld's own outbound queries to VPN DNS + // servers are handled by the group-scoped exemption rules above (group _ctrld). + rules.WriteString("# Force remaining outbound IPv4 DNS through loopback for interception.\n") + rules.WriteString(fmt.Sprintf("pass out quick on ! lo0 route-to lo0 inet proto udp from any to ! %s port 53\n", listenerIP)) + rules.WriteString(fmt.Sprintf("pass out quick on ! lo0 route-to lo0 inet proto tcp from any to ! %s port 53\n\n", listenerIP)) + + // Force remaining outbound IPv6 DNS through loopback for interception. + rules.WriteString("# Force remaining outbound IPv6 DNS through loopback for interception.\n") + rules.WriteString("pass out quick on ! lo0 route-to lo0 inet6 proto udp from any to ! ::1 port 53\n") + rules.WriteString("pass out quick on ! lo0 route-to lo0 inet6 proto tcp from any to ! ::1 port 53\n\n") + + // Allow route-to'd DNS packets to pass outbound on lo0. + // Without this, VPN firewalls with "block drop all" (e.g., Windscribe) drop the packet + // after route-to redirects it to lo0 but before it can reflect inbound for rdr processing. + // + // CRITICAL: This rule MUST use "no state". If it created state, that state would match + // the packet when it reflects inbound on lo0, causing pf to fast-path it and bypass + // rdr entirely. With "no state", the inbound packet gets fresh evaluation and rdr fires. + rules.WriteString("# Pass route-to'd DNS outbound on lo0 — no state to avoid bypassing rdr inbound.\n") + rules.WriteString(fmt.Sprintf("pass out quick on lo0 inet proto udp from any to ! %s port 53 no state\n", listenerIP)) + rules.WriteString(fmt.Sprintf("pass out quick on lo0 inet proto tcp from any to ! %s port 53 no state\n", listenerIP)) + rules.WriteString("pass out quick on lo0 inet6 proto udp from any to ! ::1 port 53 no state\n") + rules.WriteString("pass out quick on lo0 inet6 proto tcp from any to ! ::1 port 53 no state\n\n") + + // Allow the redirected traffic through on loopback (inbound after rdr). + // + // "reply-to lo0" is CRITICAL for VPN coexistence. Without it, ctrld's response to a + // VPN client IP (e.g., 100.94.163.168) gets routed via the VPN tunnel interface + // (utun420) by the kernel routing table — the response enters the tunnel and is lost. + // "reply-to lo0" forces pf to route the response back through lo0 regardless of the + // kernel routing table, ensuring it stays local and reaches the client process. + // + // "keep state" (the default) creates the stateful entry used by reply-to to route + // the response. The rdr NAT state handles the address rewrite on the response + // (source 127.0.0.1 → original DNS server IP, e.g., 10.255.255.3). + rules.WriteString("# Accept redirected DNS — reply-to lo0 forces response through loopback.\n") + rules.WriteString(fmt.Sprintf("pass in quick on lo0 reply-to lo0 inet proto { udp, tcp } from any to %s\n", listenerAddr)) + rules.WriteString(fmt.Sprintf("pass in quick on lo0 reply-to lo0 inet6 proto { udp, tcp } from any to %s\n", listenerAddr6)) + + return rules.String() +} + +// verifyPFState checks that the pf ruleset is correctly configured after loading. +// It verifies both the anchor references in the main ruleset and the rules within +// our anchor. Failures are logged at ERROR level to make them impossible to miss. +func (p *prog) verifyPFState() { + natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) + rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) + anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) + verified := true + + // Check main ruleset for anchor references (nat-anchor + rdr-anchor in translation rules). + natOut, err := exec.Command("pfctl", "-sn").CombinedOutput() + if err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept: VERIFICATION FAILED — could not dump NAT rules") + verified = false + } else { + natStr := string(natOut) + if !strings.Contains(natStr, rdrAnchorRef) { + mainLog.Load().Error().Msg("DNS intercept: VERIFICATION FAILED — rdr-anchor reference missing from running NAT rules") + verified = false + } + if !strings.Contains(natStr, natAnchorRef) { + mainLog.Load().Error().Msg("DNS intercept: VERIFICATION FAILED — nat-anchor reference missing from running NAT rules") + verified = false + } + } + + filterOut, err := exec.Command("pfctl", "-sr").CombinedOutput() + if err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept: VERIFICATION FAILED — could not dump filter rules") + verified = false + } else if !strings.Contains(string(filterOut), anchorRef) { + mainLog.Load().Error().Msg("DNS intercept: VERIFICATION FAILED — anchor reference missing from running filter rules") + verified = false + } + + // Check our anchor has rules loaded. + anchorFilter, err := exec.Command("pfctl", "-a", pfAnchorName, "-sr").CombinedOutput() + if err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept: VERIFICATION FAILED — could not dump anchor filter rules") + verified = false + } else if len(strings.TrimSpace(string(anchorFilter))) == 0 { + mainLog.Load().Error().Msg("DNS intercept: VERIFICATION FAILED — anchor has no filter rules loaded") + verified = false + } + + anchorNat, err := exec.Command("pfctl", "-a", pfAnchorName, "-sn").CombinedOutput() + if err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept: VERIFICATION FAILED — could not dump anchor NAT rules") + verified = false + } else if len(strings.TrimSpace(string(anchorNat))) == 0 { + mainLog.Load().Error().Msg("DNS intercept: VERIFICATION FAILED — anchor has no NAT/redirect rules loaded") + verified = false + } + + // Check that lo0 is not in the skip list — if it is, our rdr rules are dead. + optOut, err := exec.Command("pfctl", "-sO").CombinedOutput() + if err == nil { + for _, line := range strings.Split(string(optOut), "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "set skip on") && strings.Contains(line, "lo0") { + mainLog.Load().Error().Msg("DNS intercept: VERIFICATION FAILED — 'set skip on lo0' is active, rdr rules on loopback will not fire") + verified = false + break + } + } + } + + if verified { + mainLog.Load().Info().Msg("DNS intercept: post-load verification passed — all pf rules confirmed active") + } +} + +// resetUpstreamTransports forces all DoH/DoT/DoQ upstreams to re-bootstrap their +// network transports. This is called when the pf watchdog detects that the pf state +// table was flushed (e.g., by Windscribe running "pfctl -f"), which kills all existing +// TCP connections including ctrld's DoH connections to upstream DNS servers. +// +// Without this, Go's http.Transport keeps trying to use dead connections until each +// request hits its 5s context deadline — causing a ~5s DNS blackout. +// +// ForceReBootstrap() immediately creates a new transport (closing old idle +// connections), so new queries use fresh connections without waiting for the +// lazy re-bootstrap flag. This reduces the blackout from ~5s to ~100ms. +func (p *prog) resetUpstreamTransports() { + if p.cfg == nil { + return + } + count := 0 + for _, uc := range p.cfg.Upstream { + if uc == nil { + continue + } + uc.ForceReBootstrap() + count++ + } + if count > 0 { + mainLog.Load().Info().Msgf("DNS intercept watchdog: force-reset %d upstream transport(s) — pf state flush likely killed existing DoH connections", count) + } +} + +// checkTunnelInterfaceChanges compares the current set of active tunnel interfaces +// against the last known set. If they differ (e.g., a VPN connected and created utun420), +// it rebuilds and reloads the pf anchor rules to include interface-specific intercept +// rules for the new interface. +// +// Returns true if the anchor was rebuilt, false if no changes detected. +// This is called from the network change callback even when validInterfacesMap() +// reports no changes — because validInterfacesMap() only tracks physical hardware +// ports (en0, bridge0, etc.) and ignores tunnel interfaces (utun*, ipsec*, etc.). +func (p *prog) checkTunnelInterfaceChanges() bool { + if p.dnsInterceptState == nil { + return false + } + + current := discoverTunnelInterfaces() + + p.mu.Lock() + prev := p.lastTunnelIfaces + changed := !stringSlicesEqual(prev, current) + if changed { + p.lastTunnelIfaces = current + } + p.mu.Unlock() + + if !changed { + return false + } + + // Detect NEW tunnel interfaces (not just any change). + prevSet := make(map[string]bool, len(prev)) + for _, iface := range prev { + prevSet[iface] = true + } + hasNewTunnel := false + for _, iface := range current { + if !prevSet[iface] { + hasNewTunnel = true + mainLog.Load().Info().Msgf("DNS intercept: new tunnel interface detected: %s", iface) + break + } + } + + if hasNewTunnel { + // A new VPN tunnel appeared. Enter stabilization mode — the VPN may be + // about to wipe our pf rules (Windscribe does this ~500ms after tunnel creation). + // We can't check pfAnchorIsWiped() here because the wipe hasn't happened yet. + // The stabilization loop will detect whether pf actually gets wiped: + // - If rules change (VPN touches pf): wait for stability, then restore. + // - If rules stay stable for the full wait (Tailscale): exit early and rebuild immediately. + p.pfStartStabilization() + return true + } + + mainLog.Load().Info().Msgf("DNS intercept: tunnel interfaces changed (was %v, now %v) — rebuilding pf anchor rules", prev, current) + + // Rebuild anchor rules with the updated tunnel interface list. + // Pass current VPN DNS servers so exemptions are preserved for still-active VPNs. + var vpnExemptions []vpnDNSExemption + if p.vpnDNS != nil { + vpnExemptions = p.vpnDNS.CurrentExemptions() + } + rulesStr := p.buildPFAnchorRules(vpnExemptions) + if err := os.WriteFile(pfAnchorFile, []byte(rulesStr), 0644); err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept: failed to write rebuilt anchor file") + return true + } + out, err := exec.Command("pfctl", "-a", pfAnchorName, "-f", pfAnchorFile).CombinedOutput() + if err != nil { + mainLog.Load().Error().Err(err).Msgf("DNS intercept: failed to reload rebuilt anchor (output: %s)", strings.TrimSpace(string(out))) + return true + } + + flushPFStates() // Clear stale states so new rules (incl. VPN DNS exemptions) take effect + mainLog.Load().Info().Msgf("DNS intercept: rebuilt pf anchor with %d tunnel interfaces", len(current)) + return true +} + +// stringSlicesEqual reports whether two string slices have the same elements in the same order. +func stringSlicesEqual(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +// pfAnchorIsWiped checks if our pf anchor references have been removed from the +// running ruleset. This is a read-only check — it does NOT attempt to restore. +// Used to distinguish VPNs that wipe pf (Windscribe) from those that don't (Tailscale). +func (p *prog) pfAnchorIsWiped() bool { + natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) + rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) + anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) + + natOut, err := exec.Command("pfctl", "-sn").CombinedOutput() + if err != nil { + return true // Can't check — assume wiped (safer) + } + natStr := string(natOut) + if !strings.Contains(natStr, rdrAnchorRef) || !strings.Contains(natStr, natAnchorRef) { + return true + } + + filterOut, err := exec.Command("pfctl", "-sr").CombinedOutput() + if err != nil { + return true + } + return !strings.Contains(string(filterOut), anchorRef) +} + +// pfStartStabilization enters stabilization mode, suppressing all pf restores +// until the VPN's ruleset stops changing. This prevents a death spiral where +// ctrld and the VPN repeatedly overwrite each other's pf rules. +func (p *prog) pfStartStabilization() { + if p.pfStabilizing.Load() { + // Already stabilizing — extending is handled by backoff. + return + } + p.pfStabilizing.Store(true) + + multiplier := max(int(p.pfBackoffMultiplier.Load()), 1) + baseStableTime := 6000 * time.Millisecond // 4 polls at 1.5s + stableRequired := time.Duration(multiplier) * baseStableTime + if stableRequired > 45*time.Second { + stableRequired = 45 * time.Second + } + + mainLog.Load().Info().Msgf("DNS intercept: VPN connecting — entering stabilization mode (waiting %s for pf to settle)", stableRequired) + + ctx, cancel := context.WithCancel(context.Background()) + p.mu.Lock() + if p.pfStabilizeCancel != nil { + p.pfStabilizeCancel() // Cancel any previous stabilization + } + p.pfStabilizeCancel = cancel + p.mu.Unlock() + + go p.pfStabilizationLoop(ctx, stableRequired) +} + +// pfStabilizationLoop polls pfctl -sr hash until the ruleset is stable for the +// required duration, then restores our anchor rules. +func (p *prog) pfStabilizationLoop(ctx context.Context, stableRequired time.Duration) { + defer p.pfStabilizing.Store(false) + + pollInterval := 1500 * time.Millisecond + var lastHash string + stableSince := time.Time{} + + for { + select { + case <-ctx.Done(): + mainLog.Load().Debug().Msg("DNS intercept: stabilization cancelled") + return + case <-p.stopCh: + return + case <-time.After(pollInterval): + } + + // Hash the current filter ruleset. + out, err := exec.Command("pfctl", "-sr").CombinedOutput() + if err != nil { + continue + } + hash := fmt.Sprintf("%x", sha256.Sum256(out)) + + if hash != lastHash { + // Rules changed — reset stability timer + lastHash = hash + stableSince = time.Now() + mainLog.Load().Debug().Msg("DNS intercept: pf rules changed during stabilization — resetting timer") + continue + } + + if stableSince.IsZero() { + stableSince = time.Now() + continue + } + + if time.Since(stableSince) >= stableRequired { + // Stable long enough — restore our rules. + // Clear stabilizing flag BEFORE calling ensurePFAnchorActive so + // the guard inside that function doesn't suppress our restore. + p.pfStabilizing.Store(false) + mainLog.Load().Info().Msgf("DNS intercept: pf stable for %s — restoring anchor rules", stableRequired) + p.ensurePFAnchorActive() + p.pfLastRestoreTime.Store(time.Now().UnixMilli()) + return + } + } +} + +// ensurePFAnchorActive checks that our pf anchor references and rules are still +// present in the running ruleset. If anything is missing (e.g., another program +// like Windscribe desktop or macOS itself reloaded pf.conf), it restores them. +// +// Returns true if restoration was needed, false if everything was already intact. +// Called both on network changes (immediate) and by the periodic pfWatchdog. +func (p *prog) ensurePFAnchorActive() bool { + if p.dnsInterceptState == nil { + return false + } + + // While stabilizing (VPN connecting), suppress all restores. + // The stabilization loop will restore once pf settles. + if p.pfStabilizing.Load() { + mainLog.Load().Debug().Msg("DNS intercept watchdog: suppressed — VPN stabilization in progress") + return false + } + + // Check if our last restore was very recent and got wiped again. + // This indicates a VPN reconnect cycle — enter stabilization with backoff. + if lastRestore := p.pfLastRestoreTime.Load(); lastRestore > 0 { + elapsed := time.Since(time.UnixMilli(lastRestore)) + if elapsed < 10*time.Second { + // Rules were wiped within 10s of our last restore — VPN is fighting us. + p.pfBackoffMultiplier.Add(1) + mainLog.Load().Warn().Msgf("DNS intercept: rules wiped %s after restore — entering stabilization (backoff multiplier: %d)", + elapsed, p.pfBackoffMultiplier.Load()) + p.pfStartStabilization() + return false + } + // Rules survived >10s — reset backoff + if p.pfBackoffMultiplier.Load() > 0 { + p.pfBackoffMultiplier.Store(0) + } + } + + natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) + rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) + anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) + needsRestore := false + + // Check 1: anchor references in the main ruleset. + natOut, err := exec.Command("pfctl", "-sn").CombinedOutput() + if err != nil { + mainLog.Load().Warn().Err(err).Msg("DNS intercept watchdog: could not dump NAT rules") + return false + } + natStr := string(natOut) + if !strings.Contains(natStr, rdrAnchorRef) { + mainLog.Load().Warn().Msg("DNS intercept watchdog: rdr-anchor reference missing from running ruleset") + needsRestore = true + } + if !strings.Contains(natStr, natAnchorRef) { + mainLog.Load().Warn().Msg("DNS intercept watchdog: nat-anchor reference missing from running ruleset") + needsRestore = true + } + + if !needsRestore { + filterOut, err := exec.Command("pfctl", "-sr").CombinedOutput() + if err != nil { + mainLog.Load().Warn().Err(err).Msg("DNS intercept watchdog: could not dump filter rules") + return false + } + if !strings.Contains(string(filterOut), anchorRef) { + mainLog.Load().Warn().Msg("DNS intercept watchdog: anchor reference missing from running filter rules") + needsRestore = true + } + } + + // Check 2: anchor content (rules inside our anchor). + // Verify BOTH filter rules (-sr) AND rdr/NAT rules (-sn). Programs like Parallels' + // internet-sharing can flush our anchor's rdr rules while leaving filter rules intact. + // Without rdr, route-to sends packets to lo0 but they never get redirected to 127.0.0.1:53, + // causing an infinite packet loop on lo0 and complete DNS failure. + if !needsRestore { + anchorFilter, err := exec.Command("pfctl", "-a", pfAnchorName, "-sr").CombinedOutput() + if err != nil || len(strings.TrimSpace(string(anchorFilter))) == 0 { + mainLog.Load().Warn().Msg("DNS intercept watchdog: anchor has no filter rules — content was flushed") + needsRestore = true + } + } + if !needsRestore { + anchorNat, err := exec.Command("pfctl", "-a", pfAnchorName, "-sn").CombinedOutput() + if err != nil || len(strings.TrimSpace(string(anchorNat))) == 0 { + mainLog.Load().Warn().Msg("DNS intercept watchdog: anchor has no rdr rules — translation was flushed (will cause packet loop on lo0)") + needsRestore = true + } + } + + // Check 3: "set skip on lo0" — VPN apps (e.g., Windscribe) load a complete pf.conf + // with "set skip on { lo0 }" which disables ALL pf processing on loopback. + // Our entire interception mechanism (route-to lo0 + rdr on lo0) depends on lo0 being + // processed by pf. This check detects the skip and triggers a restore that removes it. + if !needsRestore { + optOut, err := exec.Command("pfctl", "-sO").CombinedOutput() + if err == nil { + optStr := string(optOut) + // Check if lo0 appears in any "set skip on" directive. + for _, line := range strings.Split(optStr, "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "set skip on") && strings.Contains(line, "lo0") { + mainLog.Load().Warn().Msg("DNS intercept watchdog: 'set skip on lo0' detected — loopback bypass breaks our rdr rules") + needsRestore = true + break + } + } + } + } + + if !needsRestore { + mainLog.Load().Debug().Msg("DNS intercept watchdog: pf anchor intact") + return false + } + + // Restore: re-inject anchor references into the main ruleset. + mainLog.Load().Info().Msg("DNS intercept watchdog: restoring pf anchor references") + if err := p.ensurePFAnchorReference(); err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept watchdog: failed to restore anchor references") + return true + } + + // Restore: always rebuild anchor rules from scratch to ensure tunnel interface + // rules are up-to-date (VPN interfaces may have appeared/disappeared since the + // anchor file was last written). + mainLog.Load().Info().Msg("DNS intercept watchdog: rebuilding anchor rules with current network state") + var vpnExemptions []vpnDNSExemption + if p.vpnDNS != nil { + vpnExemptions = p.vpnDNS.CurrentExemptions() + } + rulesStr := p.buildPFAnchorRules(vpnExemptions) + if err := os.WriteFile(pfAnchorFile, []byte(rulesStr), 0644); err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept watchdog: failed to write anchor file") + } else if out, err := exec.Command("pfctl", "-a", pfAnchorName, "-f", pfAnchorFile).CombinedOutput(); err != nil { + mainLog.Load().Error().Err(err).Msgf("DNS intercept watchdog: failed to load rebuilt anchor (output: %s)", strings.TrimSpace(string(out))) + } else { + flushPFStates() + mainLog.Load().Info().Msg("DNS intercept watchdog: rebuilt and loaded anchor rules") + } + + // Update tracked tunnel interfaces after rebuild so checkTunnelInterfaceChanges() + // has an accurate baseline for subsequent comparisons. + p.mu.Lock() + p.lastTunnelIfaces = discoverTunnelInterfaces() + p.mu.Unlock() + + // Verify the restoration worked. + p.verifyPFState() + + // Proactively reset upstream transports. When another program replaces the pf + // ruleset with "pfctl -f", it flushes the entire state table — killing all + // existing TCP connections including our DoH connections to upstream DNS servers. + // Without this reset, Go's http.Transport keeps trying dead connections until + // the 5s context deadline, causing a DNS blackout. Re-bootstrapping forces fresh + // TLS handshakes on the next query (~200ms vs ~5s recovery). + p.resetUpstreamTransports() + + p.pfLastRestoreTime.Store(time.Now().UnixMilli()) + mainLog.Load().Info().Msg("DNS intercept watchdog: pf anchor restored successfully") + return true +} + +// pfWatchdog periodically checks that our pf anchor is still active. +// Other programs (e.g., Windscribe desktop app, macOS configd) can replace +// scheduleDelayedRechecks schedules delayed re-checks after a network change event. +// VPN apps often modify pf rules and DNS settings asynchronously after the network +// change that triggered our handler. These delayed checks catch: +// - pf anchor wipes by VPN disconnect (Windscribe's firewallOff) +// - Stale OS resolver nameservers (VPN DNS not yet cleaned from scutil) +// - Stale VPN DNS routes in vpnDNSManager +// - Tunnel interface additions/removals not yet visible +// +// Two delays (2s and 4s) cover both fast and slow VPN teardowns. +func (p *prog) scheduleDelayedRechecks() { + for _, delay := range []time.Duration{pfAnchorRecheckDelay, pfAnchorRecheckDelayLong} { + time.AfterFunc(delay, func() { + if p.dnsInterceptState == nil || p.pfStabilizing.Load() { + return + } + p.ensurePFAnchorActive() + p.checkTunnelInterfaceChanges() + // Refresh OS resolver — VPN may have finished DNS cleanup since the + // immediate handler ran. This clears stale LAN nameservers (e.g., + // a VPN's DNS IP (e.g., 10.255.255.3) lingering in scutil --dns). + ctrld.InitializeOsResolver(true) + if p.vpnDNS != nil { + p.vpnDNS.Refresh(true) + } + }) + } +} + +// the entire pf ruleset with pfctl -f, which wipes our anchor references. +// This watchdog detects and restores them. +func (p *prog) pfWatchdog() { + mainLog.Load().Info().Msgf("DNS intercept: starting pf watchdog (interval: %s)", pfWatchdogInterval) + + var consecutiveMisses atomic.Int32 + ticker := time.NewTicker(pfWatchdogInterval) + defer ticker.Stop() + + for { + select { + case <-p.stopCh: + mainLog.Load().Debug().Msg("DNS intercept: pf watchdog stopped") + return + case <-ticker.C: + if p.dnsInterceptState == nil { + mainLog.Load().Debug().Msg("DNS intercept: pf watchdog exiting — intercept state is nil") + return + } + + restored := p.ensurePFAnchorActive() + if !restored { + // Rules are intact in text form — also probe actual interception. + // This catches cases where rules survive but pf's internal translation + // state is corrupted (e.g., after a hypervisor reloads pf.conf). + if !p.pfStabilizing.Load() && !p.pfMonitorRunning.Load() { + if !p.probePFIntercept() { + mainLog.Load().Warn().Msg("DNS intercept watchdog: rules intact but probe FAILED — forcing full reload") + p.forceReloadPFMainRuleset() + restored = true // treat as a restore for logging + } + } + + // Check if backoff should be reset. + if p.pfBackoffMultiplier.Load() > 0 && p.pfLastRestoreTime.Load() > 0 { + elapsed := time.Since(time.UnixMilli(p.pfLastRestoreTime.Load())) + if elapsed > 60*time.Second { + p.pfBackoffMultiplier.Store(0) + mainLog.Load().Info().Msg("DNS intercept watchdog: rules stable for >60s — reset backoff") + } + } + } + if restored { + misses := consecutiveMisses.Add(1) + if misses >= pfConsecutiveMissThreshold { + mainLog.Load().Error().Msgf("DNS intercept watchdog: pf anchor has been missing for %d consecutive checks — something is persistently overwriting pf rules", misses) + } else { + mainLog.Load().Warn().Msgf("DNS intercept watchdog: pf anchor was missing and restored (consecutive misses: %d)", misses) + } + } else { + if old := consecutiveMisses.Swap(0); old > 0 { + mainLog.Load().Info().Msgf("DNS intercept watchdog: pf anchor stable again after %d consecutive restores", old) + } + } + } + } +} + +// exemptVPNDNSServers rebuilds the pf anchor rules to exclude VPN DNS interfaces +// and server IPs from interception. VPN DNS handlers using Network Extensions +// (e.g., Tailscale MagicDNS) need DNS traffic to flow without any pf interference. +// +// Called by vpnDNSManager.Refresh() whenever VPN DNS servers change. +func (p *prog) exemptVPNDNSServers(exemptions []vpnDNSExemption) error { + if p.dnsInterceptState == nil { + return fmt.Errorf("pf state not available") + } + + rulesStr := p.buildPFAnchorRules(exemptions) + + if err := os.WriteFile(pfAnchorFile, []byte(rulesStr), 0644); err != nil { + return fmt.Errorf("dns intercept: failed to rewrite pf anchor: %w", err) + } + + out, err := exec.Command("pfctl", "-a", pfAnchorName, "-f", pfAnchorFile).CombinedOutput() + if err != nil { + return fmt.Errorf("dns intercept: failed to reload pf anchor: %w (output: %s)", err, strings.TrimSpace(string(out))) + } + + // Flush pf states after anchor reload so packets are re-evaluated against new rules. + // Stale state entries from previous rules would keep routing packets via route-to + // even after VPN DNS interfaces/IPs are excluded from interception. + flushPFStates() + + // Ensure the anchor reference still exists in the main ruleset. + // Another program may have replaced the ruleset since we last checked. + if err := p.ensurePFAnchorReference(); err != nil { + mainLog.Load().Warn().Err(err).Msg("DNS intercept: failed to verify anchor reference during VPN DNS update") + } + + // Count unique excluded interfaces for logging. + excludedIfaces := make(map[string]bool) + for _, ex := range exemptions { + if ex.Interface != "" { + excludedIfaces[ex.Interface] = true + } + } + mainLog.Load().Info().Msgf("DNS intercept: updated pf rules — %d VPN DNS servers (%d interfaces excluded from intercept), %d OS resolver servers", + len(exemptions), len(excludedIfaces), len(ctrld.OsResolverNameservers())) + return nil +} + +// probePFIntercept tests whether pf's rdr translation is actually working by +// sending a DNS query through the interception path from a subprocess that does +// NOT have the _ctrld group GID. If pf interception is working, the query gets +// redirected to 127.0.0.1:53 (ctrld), and the DNS handler signals us. If broken +// (rdr rules present but not evaluating), the query goes to the real DNS server +// and we time out. +// +// Returns true if interception is working, false if broken or indeterminate. +func (p *prog) probePFIntercept() bool { + if p.dnsInterceptState == nil { + return true + } + + nsIPs := ctrld.OsResolverNameservers() + if len(nsIPs) == 0 { + mainLog.Load().Debug().Msg("DNS intercept probe: no OS resolver nameservers available") + return true // can't probe without a target + } + host, _, _ := net.SplitHostPort(nsIPs[0]) + if host == "" || host == "127.0.0.1" || host == "::1" { + mainLog.Load().Debug().Msg("DNS intercept probe: OS resolver is localhost, skipping probe") + return true // can't probe through localhost + } + + // Generate unique probe domain + probeID := fmt.Sprintf("_pf-probe-%x.%s", time.Now().UnixNano()&0xFFFFFFFF, pfProbeDomain) + + // Register probe so DNS handler can detect and signal it + probeCh := make(chan struct{}, 1) + p.pfProbeExpected.Store(probeID) + p.pfProbeCh.Store(&probeCh) + defer func() { + p.pfProbeExpected.Store("") + p.pfProbeCh.Store((*chan struct{})(nil)) + }() + + // Build a minimal DNS query packet for the probe domain. + // We use exec.Command to send from a subprocess with GID=0 (wheel), + // so pf's _ctrld group exemption does NOT apply and the query gets intercepted. + dnsPacket := buildDNSQueryPacket(probeID) + + // Send via a helper subprocess that drops the _ctrld group + cmd := exec.Command(os.Args[0], "pf-probe-send", host, fmt.Sprintf("%x", dnsPacket)) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Credential: &syscall.Credential{ + Uid: 0, + Gid: 0, // wheel group — NOT _ctrld, so pf intercepts it + }, + } + + if err := cmd.Start(); err != nil { + mainLog.Load().Debug().Err(err).Msg("DNS intercept probe: failed to start probe subprocess") + return true // can't probe, assume OK + } + + // Don't leak the subprocess + go func() { + _ = cmd.Wait() + }() + + select { + case <-probeCh: + return true + case <-time.After(pfProbeTimeout): + return false + } +} + +// buildDNSQueryPacket constructs a minimal DNS query packet (wire format) for the given domain. +func buildDNSQueryPacket(domain string) []byte { + // DNS header: ID=0x1234, QR=0, OPCODE=0, RD=1, QDCOUNT=1 + header := []byte{ + 0x12, 0x34, // ID + 0x01, 0x00, // Flags: RD=1 + 0x00, 0x01, // QDCOUNT=1 + 0x00, 0x00, // ANCOUNT=0 + 0x00, 0x00, // NSCOUNT=0 + 0x00, 0x00, // ARCOUNT=0 + } + + // Encode domain name in DNS wire format (label-length encoding) + // Remove trailing dot if present + d := strings.TrimSuffix(domain, ".") + var qname []byte + for _, label := range strings.Split(d, ".") { + qname = append(qname, byte(len(label))) + qname = append(qname, []byte(label)...) + } + qname = append(qname, 0x00) // root label + + // QTYPE=A (1), QCLASS=IN (1) + question := append(qname, 0x00, 0x01, 0x00, 0x01) + + return append(header, question...) +} + +// pfInterceptMonitor runs asynchronously after interface changes are detected. +// It probes pf interception with exponential backoff and forces a full pf reload +// if the probe fails. Only one instance runs at a time (singleton via atomic.Bool). +// +// The backoff schedule provides both fast detection (immediate + 500ms) and extended +// coverage (up to ~8s) to win the race against async pf reloads by hypervisors. +func (p *prog) pfInterceptMonitor() { + if !p.pfMonitorRunning.CompareAndSwap(false, true) { + mainLog.Load().Debug().Msg("DNS intercept monitor: already running, skipping") + return + } + defer p.pfMonitorRunning.Store(false) + + mainLog.Load().Info().Msg("DNS intercept monitor: starting interception probe sequence") + + // Backoff schedule: probe quickly first, then space out. + // Total monitoring window: ~0 + 0.5 + 1 + 2 + 4 = ~7.5s + delays := []time.Duration{0, 500 * time.Millisecond, time.Second, 2 * time.Second, 4 * time.Second} + + for i, delay := range delays { + if delay > 0 { + time.Sleep(delay) + } + if p.dnsInterceptState == nil || p.pfStabilizing.Load() { + mainLog.Load().Debug().Msg("DNS intercept monitor: aborting — intercept disabled or stabilizing") + return + } + + if p.probePFIntercept() { + mainLog.Load().Debug().Msgf("DNS intercept monitor: probe %d/%d passed", i+1, len(delays)) + continue // working now — keep monitoring in case it breaks later in the window + } + + // Probe failed — pf translation is broken. Force full reload. + mainLog.Load().Warn().Msgf("DNS intercept monitor: probe %d/%d FAILED — pf translation broken, forcing full ruleset reload", i+1, len(delays)) + p.forceReloadPFMainRuleset() + + // Verify the reload fixed it + time.Sleep(200 * time.Millisecond) + if p.probePFIntercept() { + mainLog.Load().Info().Msg("DNS intercept monitor: probe passed after reload — interception restored") + // Continue monitoring in case the hypervisor reloads pf again + } else { + mainLog.Load().Error().Msg("DNS intercept monitor: probe still failing after reload — pf may need manual intervention") + } + } + + mainLog.Load().Info().Msg("DNS intercept monitor: probe sequence completed") +} + +// forceReloadPFMainRuleset unconditionally reloads the entire pf ruleset via +// "pfctl -f -". This resets pf's internal translation engine, fixing cases where +// rdr rules exist in text form but aren't being evaluated (e.g., after a hypervisor +// like Parallels reloads /etc/pf.conf as a side effect of creating/destroying +// virtual network interfaces). +// +// Unlike ensurePFAnchorReference() which returns early when anchor references are +// already present, this function always performs the full reload. +// +// The reload is safe for VPN interop because it reassembles from the current running +// ruleset (pfctl -sr/-sn), preserving all existing anchors and rules. +func (p *prog) forceReloadPFMainRuleset() { + natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) + rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) + anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) + + // Dump running rules. + natOut, err := exec.Command("pfctl", "-sn").CombinedOutput() + if err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept: force reload — failed to dump NAT rules") + return + } + + filterOut, err := exec.Command("pfctl", "-sr").CombinedOutput() + if err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept: force reload — failed to dump filter rules") + return + } + + natLines := pfFilterRuleLines(string(natOut)) + filterLines := pfFilterRuleLines(string(filterOut)) + + // Separate scrub rules from filter rules. + var scrubLines, pureFilterLines []string + for _, line := range filterLines { + if strings.HasPrefix(line, "scrub") { + scrubLines = append(scrubLines, line) + } else { + pureFilterLines = append(pureFilterLines, line) + } + } + + // Ensure our anchor references are present (they may have been wiped). + if !pfContainsRule(natLines, natAnchorRef) { + natLines = append([]string{natAnchorRef}, natLines...) + } + if !pfContainsRule(natLines, rdrAnchorRef) { + natLines = append([]string{rdrAnchorRef}, natLines...) + } + if !pfContainsRule(pureFilterLines, anchorRef) { + pureFilterLines = append([]string{anchorRef}, pureFilterLines...) + } + + // Clean pf options (remove "set skip on lo0" if present). + cleanedOptions, _ := pfGetCleanedOptions() + + // Reassemble in pf's required order: options → scrub → translation → filtering. + var combined strings.Builder + if cleanedOptions != "" { + combined.WriteString(cleanedOptions) + } + for _, line := range scrubLines { + combined.WriteString(line + "\n") + } + for _, line := range natLines { + combined.WriteString(line + "\n") + } + for _, line := range pureFilterLines { + combined.WriteString(line + "\n") + } + + cmd := exec.Command("pfctl", "-f", "-") + cmd.Stdin = strings.NewReader(combined.String()) + out, err := cmd.CombinedOutput() + if err != nil { + mainLog.Load().Error().Err(err).Msgf("DNS intercept: force reload — pfctl -f - failed (output: %s)", strings.TrimSpace(string(out))) + return + } + + // Also reload the anchor rules to ensure they're fresh. + var vpnExemptions []vpnDNSExemption + if p.vpnDNS != nil { + vpnExemptions = p.vpnDNS.CurrentExemptions() + } + rulesStr := p.buildPFAnchorRules(vpnExemptions) + if err := os.WriteFile(pfAnchorFile, []byte(rulesStr), 0644); err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept: force reload — failed to write anchor file") + } else if out, err := exec.Command("pfctl", "-a", pfAnchorName, "-f", pfAnchorFile).CombinedOutput(); err != nil { + mainLog.Load().Error().Err(err).Msgf("DNS intercept: force reload — failed to load anchor (output: %s)", strings.TrimSpace(string(out))) + } + + // Reset upstream transports — pf reload flushes state table, killing DoH connections. + p.resetUpstreamTransports() + + mainLog.Load().Info().Msg("DNS intercept: force reload — pf ruleset and anchor reloaded successfully") +} diff --git a/cmd/cli/dns_intercept_darwin_test.go b/cmd/cli/dns_intercept_darwin_test.go new file mode 100644 index 00000000..822f2c5d --- /dev/null +++ b/cmd/cli/dns_intercept_darwin_test.go @@ -0,0 +1,127 @@ +//go:build darwin + +package cli + +import ( + "strings" + "testing" +) + +// ============================================================================= +// buildPFAnchorRules tests +// ============================================================================= + +func TestPFBuildAnchorRules_Basic(t *testing.T) { + p := &prog{} + rules := p.buildPFAnchorRules(nil) + + // rdr (translation) must come before pass (filtering) + rdrIdx := strings.Index(rules, "rdr pass on lo0") + passRouteIdx := strings.Index(rules, "pass out quick on ! lo0 route-to lo0") + passInIdx := strings.Index(rules, "pass in quick on lo0") + + if rdrIdx < 0 { + t.Fatal("missing rdr rule") + } + if passRouteIdx < 0 { + t.Fatal("missing pass out route-to rule") + } + if passInIdx < 0 { + t.Fatal("missing pass in on lo0 rule") + } + if rdrIdx >= passRouteIdx { + t.Error("rdr rules must come before pass out route-to rules") + } + if passRouteIdx >= passInIdx { + t.Error("pass out route-to must come before pass in on lo0") + } + + // Both UDP and TCP rdr rules + if !strings.Contains(rules, "proto udp") || !strings.Contains(rules, "proto tcp") { + t.Error("must have both UDP and TCP rdr rules") + } +} + +func TestPFBuildAnchorRules_WithVPNServers(t *testing.T) { + p := &prog{} + vpnServers := []string{"10.8.0.1", "10.8.0.2"} + rules := p.buildPFAnchorRules(vpnServers) + + // VPN exemption rules must appear + for _, s := range vpnServers { + if !strings.Contains(rules, s) { + t.Errorf("missing VPN exemption for %s", s) + } + } + + // VPN exemptions must come before route-to + exemptIdx := strings.Index(rules, "10.8.0.1") + routeIdx := strings.Index(rules, "route-to lo0") + if exemptIdx >= routeIdx { + t.Error("VPN exemptions must come before route-to rules") + } +} + +func TestPFBuildAnchorRules_IPv4AndIPv6VPN(t *testing.T) { + p := &prog{} + vpnServers := []string{"10.8.0.1", "fd00::1"} + rules := p.buildPFAnchorRules(vpnServers) + + // IPv4 server should use "inet" + lines := strings.Split(rules, "\n") + for _, line := range lines { + if strings.Contains(line, "10.8.0.1") { + if !strings.Contains(line, "inet ") { + t.Error("IPv4 VPN server rule should contain 'inet'") + } + if strings.Contains(line, "inet6") { + t.Error("IPv4 VPN server rule should not contain 'inet6'") + } + } + if strings.Contains(line, "fd00::1") { + if !strings.Contains(line, "inet6") { + t.Error("IPv6 VPN server rule should contain 'inet6'") + } + } + } +} + +func TestPFBuildAnchorRules_Ordering(t *testing.T) { + p := &prog{} + vpnServers := []string{"10.8.0.1"} + rules := p.buildPFAnchorRules(vpnServers) + + // Verify ordering: rdr → exemptions → route-to → pass in on lo0 + rdrIdx := strings.Index(rules, "rdr pass on lo0") + exemptIdx := strings.Index(rules, "pass out quick on ! lo0 inet proto { udp, tcp } from any to 10.8.0.1") + routeIdx := strings.Index(rules, "pass out quick on ! lo0 route-to lo0") + passInIdx := strings.Index(rules, "pass in quick on lo0") + + if rdrIdx < 0 || exemptIdx < 0 || routeIdx < 0 || passInIdx < 0 { + t.Fatalf("missing expected rules: rdr=%d exempt=%d route=%d passIn=%d", rdrIdx, exemptIdx, routeIdx, passInIdx) + } + + if !(rdrIdx < exemptIdx && exemptIdx < routeIdx && routeIdx < passInIdx) { + t.Errorf("incorrect rule ordering: rdr(%d) < exempt(%d) < route(%d) < passIn(%d)", rdrIdx, exemptIdx, routeIdx, passInIdx) + } +} + +// TestPFAddressFamily tests the pfAddressFamily helper. +func TestPFAddressFamily(t *testing.T) { + tests := []struct { + ip string + want string + }{ + {"10.0.0.1", "inet"}, + {"192.168.1.1", "inet"}, + {"127.0.0.1", "inet"}, + {"::1", "inet6"}, + {"fd00::1", "inet6"}, + {"2001:db8::1", "inet6"}, + } + for _, tt := range tests { + if got := pfAddressFamily(tt.ip); got != tt.want { + t.Errorf("pfAddressFamily(%q) = %q, want %q", tt.ip, got, tt.want) + } + } +} diff --git a/docs/pf-dns-intercept.md b/docs/pf-dns-intercept.md new file mode 100644 index 00000000..6c19925f --- /dev/null +++ b/docs/pf-dns-intercept.md @@ -0,0 +1,380 @@ +# macOS pf DNS Interception — Technical Reference + +## Overview + +ctrld uses macOS's built-in packet filter (pf) to intercept all DNS traffic at the kernel level, redirecting it to ctrld's local listeners at `127.0.0.1:53` (IPv4) and `[::1]:53` (IPv6). This operates below interface DNS settings, making it immune to VPN software (F5, Cisco, GlobalProtect, etc.) that overwrites DNS on network interfaces. + +## How pf Works (Relevant Basics) + +pf is a stateful packet filter built into macOS (and BSD). It processes packets through a pipeline with **strict rule ordering**: + +``` +options (set) → normalization (scrub) → queueing → translation (nat/rdr) → filtering (pass/block) +``` + +**Anchors** are named rule containers that allow programs to manage their own rules without modifying the global ruleset. Each anchor type must appear in the correct section: + +| Anchor Type | Section | Purpose | +|-------------|---------|---------| +| `scrub-anchor` | Normalization | Packet normalization | +| `nat-anchor` | Translation | NAT rules | +| `rdr-anchor` | Translation | Redirect rules | +| `anchor` | Filtering | Pass/block rules | + +**Critical constraint:** If you place a `rdr-anchor` line after an `anchor` line, pf rejects the entire config with "Rules must be in order." + +## Why We Can't Just Use `rdr on ! lo0` + +The obvious approach: +``` +rdr pass on ! lo0 proto udp from any to any port 53 -> 127.0.0.1 port 53 +``` + +**This doesn't work.** macOS pf `rdr` rules only apply to *forwarded/routed* traffic — packets passing through the machine to another destination. DNS queries originating from the machine itself (locally-originated) are never matched by `rdr` on non-loopback interfaces. + +This is a well-known pf limitation on macOS/BSD. It means the VPN client's DNS queries would be redirected (if routed through the machine), but the user's own applications querying DNS directly would not. + +## Our Approach: route-to + rdr (Two-Step) + +We use a two-step technique to intercept locally-originated DNS: + +``` +Step 1: Force outbound DNS through loopback + pass out quick on ! lo0 route-to lo0 inet proto udp from any to ! 127.0.0.1 port 53 + +Step 2: Pass the packet outbound on lo0 (needed when VPN firewalls have "block drop all") + pass out quick on lo0 inet proto udp from any to ! 127.0.0.1 port 53 no state + +Step 3: Redirect it on loopback to ctrld's listener + rdr on lo0 inet proto udp from any to ! 127.0.0.1 port 53 -> 127.0.0.1 port 53 + +Step 4: Accept and create state for response routing + pass in quick on lo0 reply-to lo0 inet proto { udp, tcp } from any to 127.0.0.1 port 53 +``` + +> **State handling is critical for VPN firewall coexistence:** +> - **route-to**: `keep state` (default). State is interface-bound on macOS — doesn't match on lo0. +> - **pass out lo0**: `no state`. If this created state, it would match inbound on lo0 and bypass rdr. +> - **rdr**: no `pass` keyword. Packet must go through filter so `pass in` can create response state. +> - **pass in lo0**: `keep state` (default). Creates the ONLY state on lo0 — handles response routing. + +### Packet Flow + +``` +Application queries 10.255.255.3:53 (e.g., VPN DNS server) + ↓ +Kernel: outbound on en0 (or utun420 for VPN) + ↓ +pf filter: "pass out route-to lo0 ... port 53" → redirects to lo0, creates state on en0 + ↓ +pf filter (outbound lo0): "pass out on lo0 ... no state" → passes, NO state created + ↓ +Loopback reflects packet inbound on lo0 + ↓ +pf rdr (inbound lo0): "rdr on lo0 ... port 53 -> 127.0.0.1:53" → rewrites destination + ↓ +pf filter (inbound lo0): "pass in reply-to lo0 ... to 127.0.0.1:53" → creates state + reply route + ↓ +ctrld receives query on 127.0.0.1:53 + ↓ +ctrld resolves via DoH (port 443, exempted by group _ctrld) + ↓ +Response from ctrld: 127.0.0.1:53 → 100.94.163.168:54851 + ↓ +reply-to lo0: forces response through lo0 (without this, kernel routes via utun420 → lost in VPN tunnel) + ↓ +pf applies rdr reverse NAT: src 127.0.0.1 → 10.255.255.3 + ↓ +Application receives response from 10.255.255.3:53 ✓ +``` + +### Why This Works + +1. `route-to lo0` forces the packet onto loopback at the filter stage +2. `pass out on lo0 no state` gets past VPN "block drop all" without creating state +3. No state on lo0 means rdr gets fresh evaluation on the inbound pass +4. `reply-to lo0` on `pass in` forces the response through lo0 — without it, the kernel routes the response to VPN tunnel IPs via the VPN interface and it's lost +4. `rdr` (without `pass`) redirects then hands off to filter rules +5. `pass in keep state` creates the response state — the only state on the lo0 path +6. Traffic already destined for `127.0.0.1` is excluded (`to ! 127.0.0.1`) to prevent loops +7. ctrld's own upstream queries use DoH (port 443), bypassing port 53 rules entirely + +### Why Each State Decision Matters + +| Rule | State | Why | +|------|-------|-----| +| route-to on en0/utun | keep state | Needed for return routing. Interface-bound, won't match on lo0. | +| pass out on lo0 | **no state** | If stateful, it would match inbound lo0 → bypass rdr → DNS broken | +| rdr on lo0 | N/A (no pass) | Must go through filter so pass-in creates response state | +| pass in on lo0 | keep state + reply-to lo0 | Creates lo0 state. `reply-to` forces response through lo0 (not VPN tunnel). | + +## IPv6 DNS Interception + +macOS systems with IPv6 nameservers (common — `scutil --dns` often shows an IPv6 nameserver at index 0) send DNS queries over IPv6. Without IPv6 interception, these queries bypass ctrld, causing ~1s delays (the IPv6 query times out, then the app falls back to IPv4). + +### Why IPv6 Needs Special Handling + +Three problems prevent a simple "mirror the IPv4 rules" approach: + +1. **Cross-AF redirect is impossible**: pf cannot `rdr on lo0 inet6 ... -> 127.0.0.1` (redirecting IPv6 to IPv4). ctrld must listen on `[::1]` to handle IPv6 DNS. + +2. **`block return` is ineffective for IPv6 DNS**: BSD doesn't deliver ICMPv6 unreachable errors to unconnected UDP sockets (which `dig` and most resolvers use). So `block return out inet6 ... port 53` generates the ICMP error, but the application never receives it — it waits for the full timeout (~1s). + +3. **sendmsg from `[::1]` to global unicast fails**: Unlike IPv4 where the kernel allows `sendmsg` from `127.0.0.1` to local private IPs (e.g., `10.x.x.x`), macOS/BSD rejects `sendmsg` from `[::1]` to a global unicast IPv6 address with `EINVAL`. Since pf's `rdr` preserves the original source IP (the machine's global IPv6 address), ctrld's reply would fail. + +### Solution: nat + rdr + [::1] Listener + +``` +# NAT: rewrite source to ::1 so ctrld can reply +nat on lo0 inet6 proto udp from ! ::1 to ! ::1 port 53 -> ::1 +nat on lo0 inet6 proto tcp from ! ::1 to ! ::1 port 53 -> ::1 + +# RDR: redirect destination to ctrld's IPv6 listener +rdr on lo0 inet6 proto udp from any to ! ::1 port 53 -> ::1 port 53 +rdr on lo0 inet6 proto tcp from any to ! ::1 port 53 -> ::1 port 53 + +# Filter: route-to forces IPv6 DNS to loopback (mirrors IPv4 rules) +pass out quick on ! lo0 route-to lo0 inet6 proto udp from any to ! ::1 port 53 +pass out quick on ! lo0 route-to lo0 inet6 proto tcp from any to ! ::1 port 53 + +# Pass on lo0 without state (mirrors IPv4) +pass out quick on lo0 inet6 proto udp from any to ! ::1 port 53 no state +pass out quick on lo0 inet6 proto tcp from any to ! ::1 port 53 no state + +# Accept redirected IPv6 DNS with reply-to (mirrors IPv4) +pass in quick on lo0 reply-to lo0 inet6 proto { udp, tcp } from any to ::1 port 53 +``` + +### IPv6 Packet Flow + +``` +Application queries [2607:f0c8:8000:8210::1]:53 (IPv6 DNS server) + ↓ +pf filter: "pass out route-to lo0 inet6 ... port 53" → redirects to lo0 + ↓ +pf (outbound lo0): "pass out on lo0 inet6 ... no state" → passes + ↓ +Loopback reflects packet inbound on lo0 + ↓ +pf nat: rewrites source 2607:f0c8:...:ec6e → ::1 +pf rdr: rewrites dest [2607:f0c8:8000:8210::1]:53 → [::1]:53 + ↓ +ctrld receives query from [::1]:port → [::1]:53 + ↓ +ctrld resolves via DoH, replies to [::1]:port (kernel accepts ::1 → ::1) + ↓ +pf reverses both translations: + - nat reverse: dest ::1 → 2607:f0c8:...:ec6e (original client) + - rdr reverse: src ::1 → 2607:f0c8:8000:8210::1 (original DNS server) + ↓ +Application receives response from [2607:f0c8:8000:8210::1]:53 ✓ +``` + +### Client IP Recovery + +The `nat` rewrites the source to `::1`, so ctrld sees the client as `::1` (loopback). The existing `spoofLoopbackIpInClientInfo()` logic detects this and replaces it with the machine's real RFC1918 IPv4 address (e.g., `10.0.10.211`). This is the same mechanism used when queries arrive from `127.0.0.1` — no client identity is lost. + +### IPv6 Listener + +The `[::1]` listener reuses the existing infrastructure from Windows (where it was added for the same reason — can't suppress IPv6 DNS resolvers from the system config). The `needLocalIPv6Listener()` function gates it, returning `true` on: +- **Windows**: Always (if IPv6 is available) +- **macOS**: Only in intercept mode + +If the `[::1]` listener fails to bind, it logs a warning and continues — the IPv4 listener is primary. + +### nat-anchor Requirement + +The `nat` rules in our anchor require a `nat-anchor "com.controld.ctrld"` reference in the main pf ruleset, in addition to the existing `rdr-anchor` and `anchor` references. All pf management functions (inject, remove, verify, watchdog, force-reload) handle all three anchor types. + +## Rule Ordering Within the Anchor + +pf requires translation rules before filter rules, even within an anchor: + +```pf +# === Translation rules (MUST come first) === +rdr on lo0 inet proto udp from any to ! 127.0.0.1 port 53 -> 127.0.0.1 port 53 +rdr on lo0 inet proto tcp from any to ! 127.0.0.1 port 53 -> 127.0.0.1 port 53 + +# === Exemptions (filter phase, scoped to _ctrld group) === +pass out quick on ! lo0 inet proto { udp, tcp } from any to port 53 group _ctrld +pass out quick on ! lo0 inet proto { udp, tcp } from any to port 53 group _ctrld + +# === Main intercept (filter phase) === +pass out quick on ! lo0 route-to lo0 inet proto udp from any to ! 127.0.0.1 port 53 +pass out quick on ! lo0 route-to lo0 inet proto tcp from any to ! 127.0.0.1 port 53 + +# === Allow redirected traffic on loopback === +pass in quick on lo0 reply-to lo0 inet proto { udp, tcp } from any to 127.0.0.1 port 53 +``` + +### Exemption Mechanism (Group-Scoped) + +Some IPs must bypass the redirect: + +- **OS resolver nameservers** (e.g., DHCP-assigned DNS): ctrld's recovery/bootstrap path may query these on port 53. Without exemption, these queries loop back to ctrld. +- **VPN DNS servers**: When ctrld forwards VPN-specific domains (split DNS) to the VPN's internal DNS, those queries must reach the VPN DNS server directly. + +Exemptions use `pass out quick` with `group _ctrld` **before** the `route-to` rule. The `group _ctrld` constraint ensures that **only ctrld's own process** can bypass the redirect — other applications cannot circumvent DNS interception by querying the exempted IPs directly. Because pf evaluates filter rules in order and `quick` terminates evaluation, the exempted packet goes directly out the real interface and never hits the `route-to` or `rdr`. + +### The `_ctrld` Group + +To scope pf exemptions to ctrld's process only, we use a dedicated macOS system group: + +1. **Creation**: On startup, `ensureCtrldGroup()` creates a `_ctrld` system group via `dscl` (macOS Directory Services) if it doesn't already exist. The GID is chosen from the 350-450 range to avoid conflicts with Apple's reserved ranges. The function is idempotent. + +2. **Process GID**: Before loading pf rules, ctrld sets its effective GID to `_ctrld` via `syscall.Setegid()`. All sockets created by ctrld after this point are tagged with this GID. + +3. **pf matching**: Exemption rules include `group _ctrld`, so pf only allows bypass for packets from processes with this effective GID. Other processes querying the same exempt IPs are still redirected to ctrld. + +4. **Lifecycle**: The group is **never removed** on shutdown or uninstall. It's a harmless system group, and leaving it avoids race conditions during rapid restart cycles. It is recreated (no-op if exists) on every start. + +## Anchor Injection into pf.conf + +The trickiest part. macOS only processes anchors declared in the active pf ruleset. We must inject our anchor references into the running config. + +### What We Do + +1. Read `/etc/pf.conf` +2. If our anchor reference already exists, reload as-is +3. Otherwise, inject `nat-anchor "com.controld.ctrld"` and `rdr-anchor "com.controld.ctrld"` in the translation section and `anchor "com.controld.ctrld"` in the filter section +4. Write to a **temp file** and load with `pfctl -f ` +5. **We never modify `/etc/pf.conf` on disk** — changes are runtime-only and don't survive reboot (ctrld re-injects on every start) + +### Injection Logic + +Finding the right insertion point requires understanding the existing pf.conf structure. The algorithm: + +1. **Scan** for existing `rdr-anchor`/`nat-anchor`/`binat-anchor` lines (translation section) and `anchor` lines (filter section) +2. **Insert `rdr-anchor`**: + - Before the first existing `rdr-anchor` line (if any exist) + - Else before the first `anchor` line (translation must come before filtering) + - Else before the first `pass`/`block` line + - Last resort: append (but this should never happen with a valid pf.conf) +3. **Insert `anchor`**: + - Before the first existing `anchor` line (if any) + - Else before the first `pass`/`block` line + - Last resort: append + +### Real-World pf.conf Scenarios + +We test against these configurations: + +#### Default macOS (Sequoia/Sonoma) +``` +scrub-anchor "com.apple/*" +nat-anchor "com.apple/*" +rdr-anchor "com.apple/*" +anchor "com.apple/*" +load anchor "com.apple" from "/etc/pf.anchors/com.apple" +``` +Our `rdr-anchor` goes before `rdr-anchor "com.apple/*"`, our `anchor` goes before `anchor "com.apple/*"`. + +#### Little Snitch +Adds `rdr-anchor "com.obdev.littlesnitch"` and `anchor "com.obdev.littlesnitch"` in the appropriate sections. Our anchors coexist — pf processes multiple anchors in order. + +#### Lulu Firewall (Objective-See) +Adds `anchor "com.objective-see.lulu"`. We insert `rdr-anchor` before it (translation before filtering) and `anchor` before it. + +#### Cisco AnyConnect +Adds `nat-anchor "com.cisco.anyconnect"`, `rdr-anchor "com.cisco.anyconnect"`, `anchor "com.cisco.anyconnect"`. Our anchors insert alongside Cisco's in their respective sections. + +#### Minimal pf.conf (no anchors) +Just `set skip on lo0` and `pass all`. We insert `rdr-anchor` and `anchor` before the `pass` line. + +#### Empty pf.conf +Both anchors appended. This is a degenerate case that shouldn't occur in practice. + +## Failure Modes and Safety + +### What happens if our injection fails? +- `ensurePFAnchorReference` returns an error, logged as a warning +- ctrld continues running but DNS interception may not work +- The anchor file and rules are cleaned up on shutdown +- **No damage to existing pf config** — we never modify files on disk + +### What happens if ctrld crashes (SIGKILL)? +- pf anchor rules persist in kernel memory +- DNS is redirected to 127.0.0.1:53 but nothing is listening → DNS breaks +- On next `ctrld start`, we detect the stale anchor file, flush the anchor, and start fresh +- Without ctrld restart: `sudo pfctl -a com.controld.ctrld -F all` manually clears it + +### What if another program flushes all pf rules? +- Our anchor references are removed from the running config +- DNS interception stops (traffic goes direct again — fails open, not closed) +- The periodic watchdog (30s) detects missing rules and restores them +- ctrld continues working for queries sent to 127.0.0.1 directly + +### What if another program reloads pf.conf (corrupting translation state)? +Programs like Parallels Desktop reload `/etc/pf.conf` when creating or destroying +virtual network interfaces (bridge100, vmenet0). This can corrupt pf's internal +translation engine — **rdr rules survive in text form but stop evaluating**. +The watchdog's rule-text checks say "intact" while DNS is silently broken. + +**Detection:** ctrld detects interface appearance/disappearance in the network +change handler and spawns an asynchronous interception probe monitor: + +1. A subprocess sends a DNS query WITHOUT the `_ctrld` group GID, so pf + intercept rules apply to it +2. If ctrld receives the query → pf interception is working +3. If the query times out (1s) → pf translation is broken +4. On failure: `forceReloadPFMainRuleset()` does `pfctl -f -` with the current + running ruleset, resetting pf's translation engine + +The monitor probes with exponential backoff (0, 0.5, 1, 2, 4s) to win the race +against async pf reloads. Only one monitor runs at a time (singleton). The +watchdog also runs the probe every 30s as a safety net. + +The full pf reload is VPN-safe: it reassembles from `pfctl -sr` + `pfctl -sn` +(the current running state), preserving all existing anchors and rules. + +### What if another program adds conflicting rdr rules? +- pf processes anchors in declaration order +- If another program redirects port 53 before our anchor, their redirect wins +- If after, ours wins (first match with `quick` or `rdr pass`) +- Our maximum-weight sublayer approach on Windows (WFP) doesn't apply to pf — pf uses rule ordering, not weights + +### What about `set skip on lo0`? +Some pf.conf files include `set skip on lo0` which tells pf to skip ALL processing on loopback. **This would break our approach** since both the `rdr on lo0` and `pass in on lo0` rules would be skipped. + +**Mitigation:** When injecting anchor references via `ensurePFAnchorReference()`, +we strip `lo0` from any `set skip on` directives before reloading. The watchdog +also checks for `set skip on lo0` and triggers a restore if detected. The +interception probe provides an additional safety net — if `set skip on lo0` gets +re-applied by another program, the probe will fail and trigger a full reload. + +## Cleanup + +On shutdown (`stopDNSIntercept`): +1. `pfctl -a com.controld.ctrld -F all` — flush all rules from our anchor +2. Remove `/etc/pf.anchors/com.controld.ctrld` anchor file +3. `pfctl -f /etc/pf.conf` — reload original pf.conf, removing our injected anchor references from the running config + +This is clean: no files modified on disk, no residual rules. + +## Comparison with Other Approaches + +| Approach | Intercepts local DNS? | Survives VPN DNS override? | Risk of loops? | Complexity | +|----------|----------------------|---------------------------|----------------|------------| +| `rdr on ! lo0` | ❌ No | Yes | Low | Low | +| `route-to lo0` + `rdr on lo0` | ✅ Yes | Yes | Medium (need exemptions) | Medium | +| `/etc/resolver/` | Partial (per-domain only) | No (VPN can overwrite) | Low | Low | +| `NEDNSProxyProvider` | ✅ Yes | Yes | Low | High (needs app bundle) | +| NRPT (Windows only) | N/A | Partial | Low | Medium | + +We chose `route-to + rdr` as the best balance of effectiveness and deployability (no app bundle needed, no kernel extension, works with existing ctrld binary). + +## Key pf Nuances Learned + +1. **`rdr` doesn't match locally-originated traffic** — this is the biggest gotcha +2. **Rule ordering is enforced** — translation before filtering, always +3. **Anchors must be declared in the main ruleset** — just loading an anchor file isn't enough +4. **`rdr` without `pass`** — redirected packets must go through filter rules so `pass in keep state` can create response state. `rdr pass` alone is insufficient for response delivery. +5. **State handling is nuanced** — route-to uses `keep state` (state is floating). `pass out on lo0` must use `no state` (prevents rdr bypass). `pass in on lo0` uses `keep state` + `reply-to lo0` (creates response state AND forces response through loopback instead of VPN tunnel). Getting any of these wrong breaks either the forward or return path. +6. **`quick` terminates evaluation** — exemption rules must use `quick` and appear before the route-to rule +7. **Piping to `pfctl -f -` can fail** — special characters in pf.conf content cause issues; use temp files +8. **`set skip on lo0` would break us** — but it's not in default macOS pf.conf +9. **`pass out quick` exemptions work with route-to** — they fire in the same phase (filter), so `quick` + rule ordering means exempted packets never hit the route-to rule +10. **pf cannot cross-AF redirect** — `rdr on lo0 inet6 ... -> 127.0.0.1` is invalid. IPv6 DNS must be handled by an `[::1]` listener. +11. **`block return` doesn't work for IPv6 DNS** — BSD doesn't deliver ICMPv6 unreachable to unconnected UDP sockets (`sendto`). Apps timeout waiting for a response that never comes. +12. **sendmsg from `::1` to global unicast fails on macOS** — unlike IPv4 where `127.0.0.1` can send to any local address, `::1` cannot send to the machine's own global IPv6 address. `nat` on lo0 is required to rewrite the source. +13. **`nat-anchor` is separate from `rdr-anchor`** — pf requires both in the main ruleset for nat and rdr rules in an anchor to be evaluated. `rdr-anchor` alone does not cover nat rules. diff --git a/test-scripts/README.md b/test-scripts/README.md new file mode 100644 index 00000000..a2461bd8 --- /dev/null +++ b/test-scripts/README.md @@ -0,0 +1,47 @@ +# DNS Intercept Test Scripts + +Manual test scripts for verifying DNS intercept mode behavior. These require root/admin privileges and a running ctrld instance. + +## Structure + +``` +test-scripts/ +├── darwin/ +│ ├── test-recovery-bypass.sh # Captive portal recovery simulation +│ ├── test-dns-intercept.sh # Basic pf intercept verification +│ ├── test-pf-group-exemption.sh # Group-based pf exemption test +│ ├── validate-pf-rules.sh # Dry-run pf rule validation +│ ├── diag-lo0-capture.sh # Capture DNS on lo0 for pf debugging +│ ├── diag-pf-poll.sh # Poll pf rules/states every 2s +│ └── diag-vpn-connect.sh # VPN connect/disconnect diagnostic +└── windows/ + ├── test-recovery-bypass.ps1 # Captive portal recovery simulation + └── test-dns-intercept.ps1 # Basic WFP intercept verification +``` + +## Prerequisites + +- ctrld running with `--intercept-mode dns` (or `--intercept-mode hard`) +- Verbose logging: `-v 1 --log /tmp/dns.log` (macOS) or `--log C:\temp\dns.log` (Windows) +- Root (macOS) or Administrator (Windows) +- For recovery tests: disconnect VPNs (e.g., Tailscale) that provide alternative routes + +## Recovery Bypass Test + +Simulates a captive portal by blackholing ctrld's upstream DoH IPs and cycling wifi. Verifies that ctrld's recovery bypass activates, discovers DHCP nameservers, and forwards queries to them until the upstream recovers. + +### macOS +```bash +sudo bash test-scripts/darwin/test-recovery-bypass.sh en0 +``` + +### Windows (PowerShell as Administrator) +```powershell +.\test-scripts\windows\test-recovery-bypass.ps1 -WifiAdapter "Wi-Fi" +``` + +## Safety + +All scripts clean up on exit (including Ctrl+C): +- **macOS**: Removes route blackholes, re-enables wifi +- **Windows**: Removes firewall rules, re-enables adapter diff --git a/test-scripts/darwin/diag-lo0-capture.sh b/test-scripts/darwin/diag-lo0-capture.sh new file mode 100644 index 00000000..3a446c71 --- /dev/null +++ b/test-scripts/darwin/diag-lo0-capture.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# diag-lo0-capture.sh — Capture DNS on lo0 to see where the pf chain breaks +# Usage: sudo bash diag-lo0-capture.sh +# Run while VPN + ctrld are both active, then dig from another terminal + +set -u +PCAP="/tmp/lo0-dns-$(date +%s).pcap" +echo "=== lo0 DNS Packet Capture ===" +echo "Capturing to: $PCAP" +echo "" + +# Show current rules (verify build) +echo "--- ctrld anchor rdr rules ---" +pfctl -a com.controld.ctrld -sn 2>/dev/null +echo "" +echo "--- ctrld anchor filter rules (lo0 only) ---" +pfctl -a com.controld.ctrld -sr 2>/dev/null | grep lo0 +echo "" + +# Check pf state table for port 53 before +echo "--- port 53 states BEFORE dig ---" +pfctl -ss 2>/dev/null | grep ':53' | head -10 +echo "(total: $(pfctl -ss 2>/dev/null | grep -c ':53'))" +echo "" + +# Start capture on lo0 +echo "Starting tcpdump on lo0 port 53..." +echo ">>> In another terminal, run: dig example.com" +echo ">>> Then press Ctrl-C here" +echo "" +tcpdump -i lo0 -n -v port 53 -w "$PCAP" 2>&1 & +TCPDUMP_PID=$! + +# Also show live output +tcpdump -i lo0 -n port 53 2>&1 & +LIVE_PID=$! + +# Wait for Ctrl-C +trap "kill $TCPDUMP_PID $LIVE_PID 2>/dev/null; echo ''; echo '--- port 53 states AFTER dig ---'; pfctl -ss 2>/dev/null | grep ':53' | head -20; echo '(total: '$(pfctl -ss 2>/dev/null | grep -c ':53')')'; echo ''; echo 'Capture saved to: $PCAP'; echo 'Read with: tcpdump -r $PCAP -n -v'; exit 0" INT +wait diff --git a/test-scripts/darwin/diag-pf-poll.sh b/test-scripts/darwin/diag-pf-poll.sh new file mode 100644 index 00000000..63e354cf --- /dev/null +++ b/test-scripts/darwin/diag-pf-poll.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# diag-pf-poll.sh — Polls pf rules, options, states, and DNS every 2s +# Usage: sudo bash diag-pf-poll.sh | tee /tmp/pf-poll.log +# Steps: 1) Run script 2) Connect VPN 3) Start ctrld 4) Ctrl-C when done + +set -u +LOG="/tmp/pf-poll-$(date +%s).log" +echo "=== PF Poll Diagnostic — logging to $LOG ===" +echo "Press Ctrl-C to stop" +echo "" + +poll() { + local ts=$(date '+%H:%M:%S.%3N') + echo "======== [$ts] POLL ========" + + # 1. pf options — looking for "set skip on lo0" + echo "--- pf options ---" + pfctl -so 2>/dev/null | grep -i skip || echo "(no skip rules)" + + # 2. Main ruleset anchors — where is ctrld relative to block drop all? + echo "--- main filter rules (summary) ---" + pfctl -sr 2>/dev/null | head -30 + + # 3. Main NAT/rdr rules + echo "--- main nat/rdr rules (summary) ---" + pfctl -sn 2>/dev/null | head -20 + + # 4. ctrld anchor content + echo "--- ctrld anchor (filter) ---" + pfctl -a com.apple.internet-sharing/ctrld -sr 2>/dev/null || echo "(no anchor)" + echo "--- ctrld anchor (nat/rdr) ---" + pfctl -a com.apple.internet-sharing/ctrld -sn 2>/dev/null || echo "(no anchor)" + + # 5. State count for rdr target (10.255.255.3) and loopback + echo "--- states summary ---" + local total=$(pfctl -ss 2>/dev/null | wc -l | tr -d ' ') + local rdr=$(pfctl -ss 2>/dev/null | grep -c '10\.255\.255\.3' || true) + local lo0=$(pfctl -ss 2>/dev/null | grep -c 'lo0' || true) + echo "total=$total rdr_target=$rdr lo0=$lo0" + + # 6. Quick DNS test (1s timeout) + echo "--- DNS tests ---" + local direct=$(dig +short +time=1 +tries=1 example.com @127.0.0.1 2>&1 | head -1) + local system=$(dig +short +time=1 +tries=1 example.com 2>&1 | head -1) + echo "direct @127.0.0.1: $direct" + echo "system DNS: $system" + + # 7. VPN tunnel interface + echo "--- tunnel interfaces ---" + ifconfig -l | tr ' ' '\n' | grep -E '^utun' | while read iface; do + echo -n "$iface: " + ifconfig "$iface" 2>/dev/null | grep 'inet ' | awk '{print $2}' || echo "no ip" + done + + echo "" +} + +# Main loop +while true; do + poll 2>&1 | tee -a "$LOG" + sleep 2 +done diff --git a/test-scripts/darwin/diag-vpn-connect.sh b/test-scripts/darwin/diag-vpn-connect.sh new file mode 100755 index 00000000..7ea8b283 --- /dev/null +++ b/test-scripts/darwin/diag-vpn-connect.sh @@ -0,0 +1,183 @@ +#!/bin/bash +# diag-vpn-connect.sh — Diagnostic script for testing ctrld dns-intercept +# during VPN VPN connection on macOS. +# +# Usage: sudo ./diag-vpn-connect.sh +# +# Run this BEFORE connecting VPN. It polls every 0.5s and captures: +# 1. pf anchor state (are ctrld anchors present?) +# 2. pf state table entries (rdr interception working?) +# 3. ctrld log events (watchdog, rebootstrap, errors) +# 4. scutil DNS resolver state +# 5. Active tunnel interfaces +# 6. dig test query results +# +# Output goes to /tmp/diag-vpn-/ +# Press Ctrl-C to stop. A summary is printed at the end. + +set -e + +if [ "$(id -u)" -ne 0 ]; then + echo "ERROR: Must run as root (sudo)" + exit 1 +fi + +CTRLD_LOG="${CTRLD_LOG:-/tmp/dns.log}" +TIMESTAMP=$(date +%Y%m%d-%H%M%S) +OUTDIR="/tmp/diag-vpn-${TIMESTAMP}" +mkdir -p "$OUTDIR" + +echo "=== VPN + ctrld DNS Intercept Diagnostic ===" +echo "Output: $OUTDIR" +echo "ctrld log: $CTRLD_LOG" +echo "" +echo "1. Start this script" +echo "2. Connect VPN" +echo "3. Wait ~30 seconds" +echo "4. Try: dig popads.net / dig @127.0.0.1 popads.net" +echo "5. Ctrl-C to stop and see summary" +echo "" +echo "Polling every 0.5s... Press Ctrl-C to stop." +echo "" + +# Track ctrld log position +if [ -f "$CTRLD_LOG" ]; then + LOG_START_LINE=$(wc -l < "$CTRLD_LOG") +else + LOG_START_LINE=0 +fi + +ITER=0 +DIG_FAIL=0 +DIG_OK=0 +ANCHOR_MISSING=0 +ANCHOR_PRESENT=0 +PF_WIPE_COUNT=0 +FORCE_REBOOT_COUNT=0 +LAST_TUNNEL_IFACES="" + +cleanup() { + echo "" + echo "=== Stopping diagnostic ===" + + # Capture final state + echo "--- Final pf state ---" > "$OUTDIR/final-pfctl.txt" + pfctl -sa 2>/dev/null >> "$OUTDIR/final-pfctl.txt" 2>&1 || true + + echo "--- Final scutil ---" > "$OUTDIR/final-scutil.txt" + scutil --dns >> "$OUTDIR/final-scutil.txt" 2>&1 || true + + # Extract ctrld log events since start + if [ -f "$CTRLD_LOG" ]; then + tail -n +$((LOG_START_LINE + 1)) "$CTRLD_LOG" > "$OUTDIR/ctrld-events.log" 2>/dev/null || true + + # Extract key events + echo "--- Watchdog events ---" > "$OUTDIR/summary-watchdog.txt" + grep -i "watchdog\|anchor.*missing\|anchor.*restored\|force-reset\|re-bootstrapping\|force re-bootstrapping" "$OUTDIR/ctrld-events.log" >> "$OUTDIR/summary-watchdog.txt" 2>/dev/null || true + + echo "--- Errors ---" > "$OUTDIR/summary-errors.txt" + grep '"level":"error"' "$OUTDIR/ctrld-events.log" >> "$OUTDIR/summary-errors.txt" 2>/dev/null || true + + echo "--- Network changes ---" > "$OUTDIR/summary-network.txt" + grep -i "Network change\|tunnel interface\|Ignoring interface" "$OUTDIR/ctrld-events.log" >> "$OUTDIR/summary-network.txt" 2>/dev/null || true + + echo "--- Transport resets ---" > "$OUTDIR/summary-transport.txt" + grep -i "re-bootstrap\|force.*bootstrap\|dialing to\|connected to" "$OUTDIR/ctrld-events.log" >> "$OUTDIR/summary-transport.txt" 2>/dev/null || true + + # Count key events + PF_WIPE_COUNT=$(grep -c "anchor.*missing\|restoring pf" "$OUTDIR/ctrld-events.log" 2>/dev/null || echo 0) + FORCE_REBOOT_COUNT=$(grep -c "force re-bootstrapping\|force-reset" "$OUTDIR/ctrld-events.log" 2>/dev/null || echo 0) + DEADLINE_COUNT=$(grep -c "context deadline exceeded" "$OUTDIR/ctrld-events.log" 2>/dev/null || echo 0) + FALLBACK_COUNT=$(grep -c "OS resolver retry query successful" "$OUTDIR/ctrld-events.log" 2>/dev/null || echo 0) + fi + + echo "" + echo "=========================================" + echo " DIAGNOSTIC SUMMARY" + echo "=========================================" + echo "Duration: $ITER iterations (~$((ITER / 2))s)" + echo "" + echo "pf Anchor Status:" + echo " Present: $ANCHOR_PRESENT times" + echo " Missing: $ANCHOR_MISSING times" + echo "" + echo "dig Tests (popads.net):" + echo " Success: $DIG_OK" + echo " Failed: $DIG_FAIL" + echo "" + echo "ctrld Log Events:" + echo " pf wipes detected: $PF_WIPE_COUNT" + echo " Force rebootstraps: $FORCE_REBOOT_COUNT" + echo " Context deadline errors: ${DEADLINE_COUNT:-0}" + echo " OS resolver fallbacks: ${FALLBACK_COUNT:-0}" + echo "" + echo "Last tunnel interfaces: ${LAST_TUNNEL_IFACES:-none}" + echo "" + echo "Files saved to: $OUTDIR/" + echo " final-pfctl.txt — full pfctl -sa at exit" + echo " final-scutil.txt — scutil --dns at exit" + echo " ctrld-events.log — ctrld log during test" + echo " summary-watchdog.txt — watchdog events" + echo " summary-errors.txt — errors" + echo " summary-transport.txt — transport reset events" + echo " timeline.log — per-iteration state" + echo "=========================================" + exit 0 +} + +trap cleanup INT TERM + +while true; do + ITER=$((ITER + 1)) + NOW=$(date '+%H:%M:%S.%3N' 2>/dev/null || date '+%H:%M:%S') + + # 1. Check pf anchor presence + ANCHOR_STATUS="MISSING" + if pfctl -sr 2>/dev/null | grep -q "com.controld.ctrld"; then + ANCHOR_STATUS="PRESENT" + ANCHOR_PRESENT=$((ANCHOR_PRESENT + 1)) + else + ANCHOR_MISSING=$((ANCHOR_MISSING + 1)) + fi + + # 2. Check tunnel interfaces + TUNNEL_IFACES=$(ifconfig -l 2>/dev/null | tr ' ' '\n' | grep -E '^(utun|ipsec|ppp|tap|tun)' | \ + while read iface; do + # Only list interfaces that are UP and have an IP + if ifconfig "$iface" 2>/dev/null | grep -q "inet "; then + echo -n "$iface " + fi + done) + TUNNEL_IFACES=$(echo "$TUNNEL_IFACES" | xargs) # trim + if [ -n "$TUNNEL_IFACES" ]; then + LAST_TUNNEL_IFACES="$TUNNEL_IFACES" + fi + + # 3. Count rdr states (three-part = intercepted) + RDR_COUNT=$(pfctl -ss 2>/dev/null | grep -c "127.0.0.1:53 <-" || echo 0) + + # 4. Quick dig test (0.5s timeout) + DIG_RESULT="SKIP" + if [ $((ITER % 4)) -eq 0 ]; then # every 2 seconds + if dig +time=1 +tries=1 popads.net A @127.0.0.1 +short >/dev/null 2>&1; then + DIG_RESULT="OK" + DIG_OK=$((DIG_OK + 1)) + else + DIG_RESULT="FAIL" + DIG_FAIL=$((DIG_FAIL + 1)) + fi + fi + + # 5. Check latest ctrld log for recent errors + RECENT_ERR="" + if [ -f "$CTRLD_LOG" ]; then + RECENT_ERR=$(tail -5 "$CTRLD_LOG" 2>/dev/null | grep -o '"message":"[^"]*deadline[^"]*"' | tail -1 || true) + fi + + # Output timeline + LINE="[$NOW] anchor=$ANCHOR_STATUS rdr_states=$RDR_COUNT tunnels=[$TUNNEL_IFACES] dig=$DIG_RESULT $RECENT_ERR" + echo "$LINE" + echo "$LINE" >> "$OUTDIR/timeline.log" + + sleep 0.5 +done diff --git a/test-scripts/darwin/test-dns-intercept.sh b/test-scripts/darwin/test-dns-intercept.sh new file mode 100644 index 00000000..b54e9c15 --- /dev/null +++ b/test-scripts/darwin/test-dns-intercept.sh @@ -0,0 +1,556 @@ +#!/bin/bash +# ============================================================================= +# DNS Intercept Mode Test Script — macOS (pf) +# ============================================================================= +# Run as root: sudo bash test-dns-intercept-mac.sh +# +# Tests the dns-intercept feature end-to-end with validation at each step. +# Logs are read from /tmp/dns.log (ctrld log location on test machine). +# +# Manual steps marked with [MANUAL] require human interaction. +# ============================================================================= + +set -euo pipefail + +CTRLD_LOG="/tmp/dns.log" +PF_ANCHOR="com.controld.ctrld" +PASS=0 +FAIL=0 +WARN=0 +RESULTS=() + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +BOLD='\033[1m' +NC='\033[0m' + +header() { echo -e "\n${CYAN}${BOLD}━━━ $1 ━━━${NC}"; } +info() { echo -e " ${BOLD}ℹ${NC} $1"; } +pass() { echo -e " ${GREEN}✅ PASS${NC}: $1"; PASS=$((PASS+1)); RESULTS+=("PASS: $1"); } +fail() { echo -e " ${RED}❌ FAIL${NC}: $1"; FAIL=$((FAIL+1)); RESULTS+=("FAIL: $1"); } +warn() { echo -e " ${YELLOW}⚠️ WARN${NC}: $1"; WARN=$((WARN+1)); RESULTS+=("WARN: $1"); } +manual() { echo -e " ${YELLOW}[MANUAL]${NC} $1"; } +separator() { echo -e "${CYAN}─────────────────────────────────────────────────────${NC}"; } + +check_root() { + if [[ $EUID -ne 0 ]]; then + echo -e "${RED}This script must be run as root (sudo).${NC}" + exit 1 + fi +} + +wait_for_key() { + echo -e "\n Press ${BOLD}Enter${NC} to continue..." + read -r +} + +# Grep recent log entries (last N lines) +log_grep() { + local pattern="$1" + local lines="${2:-200}" + tail -n "$lines" "$CTRLD_LOG" 2>/dev/null | grep -i "$pattern" 2>/dev/null || true +} + +log_grep_count() { + local pattern="$1" + local lines="${2:-200}" + tail -n "$lines" "$CTRLD_LOG" 2>/dev/null | grep -ci "$pattern" 2>/dev/null || echo "0" +} + +# ============================================================================= +# TEST SECTIONS +# ============================================================================= + +test_prereqs() { + header "0. Prerequisites" + + if command -v pfctl &>/dev/null; then + pass "pfctl available" + else + fail "pfctl not found" + exit 1 + fi + + if [[ -f "$CTRLD_LOG" ]]; then + pass "ctrld log exists at $CTRLD_LOG" + else + warn "ctrld log not found at $CTRLD_LOG — log checks will be skipped" + fi + + if command -v dig &>/dev/null; then + pass "dig available" + else + fail "dig not found — install bind tools" + exit 1 + fi + + info "Default route interface: $(route -n get default 2>/dev/null | grep interface | awk '{print $2}' || echo 'unknown')" + info "Current DNS servers:" + scutil --dns | grep "nameserver\[" | head -5 | sed 's/^/ /' +} + +test_pf_state() { + header "1. PF State Validation" + + # Is pf enabled? + local pf_status + pf_status=$(pfctl -si 2>&1 | grep "Status:" || true) + if echo "$pf_status" | grep -q "Enabled"; then + pass "pf is enabled" + else + fail "pf is NOT enabled (status: $pf_status)" + fi + + # Is our anchor referenced in the running ruleset? + local sr_match sn_match + sr_match=$(pfctl -sr 2>&1 | grep "$PF_ANCHOR" || true) + sn_match=$(pfctl -sn 2>&1 | grep "$PF_ANCHOR" || true) + + if [[ -n "$sr_match" ]]; then + pass "anchor '$PF_ANCHOR' found in filter rules (pfctl -sr)" + info " $sr_match" + else + fail "anchor '$PF_ANCHOR' NOT in filter rules — main ruleset doesn't reference it" + fi + + if [[ -n "$sn_match" ]]; then + pass "rdr-anchor '$PF_ANCHOR' found in NAT rules (pfctl -sn)" + info " $sn_match" + else + fail "rdr-anchor '$PF_ANCHOR' NOT in NAT rules — redirect won't work" + fi + + # Check anchor rules + separator + info "Anchor filter rules (pfctl -a '$PF_ANCHOR' -sr):" + local anchor_sr + anchor_sr=$(pfctl -a "$PF_ANCHOR" -sr 2>&1 | grep -v "ALTQ" || true) + if [[ -n "$anchor_sr" ]]; then + echo "$anchor_sr" | sed 's/^/ /' + # Check for route-to rules + if echo "$anchor_sr" | grep -q "route-to"; then + pass "route-to lo0 rules present (needed for local traffic interception)" + else + warn "No route-to rules found — local DNS may not be intercepted" + fi + else + fail "No filter rules in anchor" + fi + + info "Anchor redirect rules (pfctl -a '$PF_ANCHOR' -sn):" + local anchor_sn + anchor_sn=$(pfctl -a "$PF_ANCHOR" -sn 2>&1 | grep -v "ALTQ" || true) + if [[ -n "$anchor_sn" ]]; then + echo "$anchor_sn" | sed 's/^/ /' + if echo "$anchor_sn" | grep -q "rdr.*lo0.*port = 53"; then + pass "rdr rules on lo0 present (redirect DNS to ctrld)" + else + warn "rdr rules don't match expected pattern" + fi + else + fail "No redirect rules in anchor" + fi + + # Check anchor file exists + if [[ -f "/etc/pf.anchors/$PF_ANCHOR" ]]; then + pass "Anchor file exists: /etc/pf.anchors/$PF_ANCHOR" + else + fail "Anchor file missing: /etc/pf.anchors/$PF_ANCHOR" + fi + + # Check pf.conf was NOT modified + if grep -q "$PF_ANCHOR" /etc/pf.conf 2>/dev/null; then + warn "pf.conf contains '$PF_ANCHOR' reference — should NOT be modified on disk" + else + pass "pf.conf NOT modified on disk (anchor injected at runtime only)" + fi +} + +test_dns_interception() { + header "2. DNS Interception Tests" + + # Mark position in log + local log_lines_before=0 + if [[ -f "$CTRLD_LOG" ]]; then + log_lines_before=$(wc -l < "$CTRLD_LOG") + fi + + # Test 1: Query to external resolver should be intercepted + info "Test: dig @8.8.8.8 example.com (should be intercepted by ctrld)" + local dig_result + dig_result=$(dig @8.8.8.8 example.com +short +timeout=5 2>&1 || true) + + if [[ -n "$dig_result" ]] && ! echo "$dig_result" | grep -q "timed out"; then + pass "dig @8.8.8.8 returned result: $dig_result" + else + fail "dig @8.8.8.8 failed or timed out" + fi + + # Check if ctrld logged the query + sleep 1 + if [[ -f "$CTRLD_LOG" ]]; then + local intercepted + intercepted=$(tail -n +$((log_lines_before+1)) "$CTRLD_LOG" | grep -c "example.com" || echo "0") + if [[ "$intercepted" -gt 0 ]]; then + pass "ctrld logged the intercepted query for example.com" + else + fail "ctrld did NOT log query for example.com — interception may not be working" + fi + fi + + # Check dig reports ctrld answered (not 8.8.8.8) + local full_dig + full_dig=$(dig @8.8.8.8 example.com +timeout=5 2>&1 || true) + local server_line + server_line=$(echo "$full_dig" | grep "SERVER:" || true) + info "dig SERVER line: $server_line" + if echo "$server_line" | grep -q "127.0.0.1"; then + pass "Response came from 127.0.0.1 (ctrld intercepted)" + elif echo "$server_line" | grep -q "8.8.8.8"; then + fail "Response came from 8.8.8.8 directly — NOT intercepted" + else + warn "Could not determine response server from dig output" + fi + + separator + + # Test 2: Query to another external resolver + info "Test: dig @1.1.1.1 cloudflare.com (should also be intercepted)" + local dig2 + dig2=$(dig @1.1.1.1 cloudflare.com +short +timeout=5 2>&1 || true) + if [[ -n "$dig2" ]] && ! echo "$dig2" | grep -q "timed out"; then + pass "dig @1.1.1.1 returned result" + else + fail "dig @1.1.1.1 failed or timed out" + fi + + separator + + # Test 3: Query to localhost should work (not double-redirected) + info "Test: dig @127.0.0.1 example.org (direct to ctrld, should NOT be redirected)" + local dig3 + dig3=$(dig @127.0.0.1 example.org +short +timeout=5 2>&1 || true) + if [[ -n "$dig3" ]] && ! echo "$dig3" | grep -q "timed out"; then + pass "dig @127.0.0.1 works (no loop)" + else + fail "dig @127.0.0.1 failed — possible redirect loop" + fi + + separator + + # Test 4: System DNS resolution + info "Test: host example.net (system resolver, should go through ctrld)" + local host_result + host_result=$(host example.net 2>&1 || true) + if echo "$host_result" | grep -q "has address"; then + pass "System DNS resolution works via host command" + else + fail "System DNS resolution failed" + fi + + separator + + # Test 5: TCP DNS query + info "Test: dig @9.9.9.9 example.com +tcp (TCP DNS should also be intercepted)" + local dig_tcp + dig_tcp=$(dig @9.9.9.9 example.com +tcp +short +timeout=5 2>&1 || true) + if [[ -n "$dig_tcp" ]] && ! echo "$dig_tcp" | grep -q "timed out"; then + pass "TCP DNS query intercepted and resolved" + else + warn "TCP DNS query failed (may not be critical if UDP works)" + fi +} + +test_non_dns_unaffected() { + header "3. Non-DNS Traffic Unaffected" + + # HTTPS should work fine + info "Test: curl https://example.com (HTTPS port 443 should NOT be affected)" + local curl_result + curl_result=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 https://example.com 2>&1 || echo "000") + if [[ "$curl_result" == "200" ]] || [[ "$curl_result" == "301" ]] || [[ "$curl_result" == "302" ]]; then + pass "HTTPS works (HTTP $curl_result)" + else + fail "HTTPS failed (HTTP $curl_result) — pf may be affecting non-DNS traffic" + fi + + # SSH-style connection test (port 22 should be unaffected) + info "Test: nc -z -w5 github.com 22 (SSH port should NOT be affected)" + if nc -z -w5 github.com 22 2>/dev/null; then + pass "SSH port reachable (non-DNS traffic unaffected)" + else + warn "SSH port unreachable (may be firewall, not necessarily our fault)" + fi +} + +test_ctrld_log_health() { + header "4. ctrld Log Health Check" + + if [[ ! -f "$CTRLD_LOG" ]]; then + warn "Skipping log checks — $CTRLD_LOG not found" + return + fi + + # Check for intercept initialization + if log_grep "DNS intercept.*initializing" 500 | grep -q "."; then + pass "DNS intercept initialization logged" + else + fail "No DNS intercept initialization in recent logs" + fi + + # Check for successful anchor load + if log_grep "pf anchor.*active" 500 | grep -q "."; then + pass "PF anchor reported as active" + else + fail "PF anchor not reported as active" + fi + + # Check for anchor reference injection + if log_grep "anchor reference active" 500 | grep -q "."; then + pass "Anchor reference injected into running ruleset" + else + fail "Anchor reference NOT injected — this is the critical step" + fi + + # Check for errors + separator + info "Recent errors/warnings in ctrld log:" + local errors + errors=$(log_grep '"level":"error"' 500) + if [[ -n "$errors" ]]; then + echo "$errors" | tail -5 | sed 's/^/ /' + warn "Errors found in recent logs (see above)" + else + pass "No errors in recent logs" + fi + + local warnings + warnings=$(log_grep '"level":"warn"' 500 | grep -v "skipping self-upgrade" || true) + if [[ -n "$warnings" ]]; then + echo "$warnings" | tail -5 | sed 's/^/ /' + info "(warnings above may be expected)" + fi + + # Check for recovery bypass state + if log_grep "recoveryBypass\|recovery bypass\|prepareForRecovery" 500 | grep -q "."; then + info "Recovery bypass activity detected in logs" + log_grep "recovery" 500 | tail -3 | sed 's/^/ /' + fi + + # Check for VPN DNS detection + if log_grep "VPN DNS" 500 | grep -q "."; then + info "VPN DNS activity in logs:" + log_grep "VPN DNS" 500 | tail -5 | sed 's/^/ /' + else + info "No VPN DNS activity (expected if no VPN is connected)" + fi +} + +test_pf_counters() { + header "5. PF Statistics & Counters" + + info "PF info (pfctl -si):" + pfctl -si 2>&1 | grep -v "ALTQ" | head -15 | sed 's/^/ /' + + info "PF state table entries:" + pfctl -ss 2>&1 | grep -c "." | sed 's/^/ States: /' + + # Count evaluations of our anchor + info "Anchor-specific stats (if available):" + local anchor_info + anchor_info=$(pfctl -a "$PF_ANCHOR" -si 2>&1 | grep -v "ALTQ" || true) + if [[ -n "$anchor_info" ]]; then + echo "$anchor_info" | head -10 | sed 's/^/ /' + else + info " (no per-anchor stats available)" + fi +} + +test_cleanup_on_stop() { + header "6. Cleanup Validation (After ctrld Stop)" + + manual "Stop ctrld now (Ctrl+C or 'ctrld stop'), then press Enter" + wait_for_key + + # Check anchor is flushed + local anchor_rules_after + anchor_rules_after=$(pfctl -a "$PF_ANCHOR" -sr 2>&1 | grep -v "ALTQ" | grep -v "^$" || true) + if [[ -z "$anchor_rules_after" ]]; then + pass "Anchor filter rules flushed after stop" + else + fail "Anchor filter rules still present after stop" + echo "$anchor_rules_after" | sed 's/^/ /' + fi + + local anchor_rdr_after + anchor_rdr_after=$(pfctl -a "$PF_ANCHOR" -sn 2>&1 | grep -v "ALTQ" | grep -v "^$" || true) + if [[ -z "$anchor_rdr_after" ]]; then + pass "Anchor redirect rules flushed after stop" + else + fail "Anchor redirect rules still present after stop" + fi + + # Check anchor file removed + if [[ ! -f "/etc/pf.anchors/$PF_ANCHOR" ]]; then + pass "Anchor file removed after stop" + else + fail "Anchor file still exists: /etc/pf.anchors/$PF_ANCHOR" + fi + + # Check pf.conf is clean + if ! grep -q "$PF_ANCHOR" /etc/pf.conf 2>/dev/null; then + pass "pf.conf is clean (no ctrld references)" + else + fail "pf.conf still has ctrld references after stop" + fi + + # DNS should work normally without ctrld + info "Test: dig example.com (should resolve via system DNS)" + local dig_after + dig_after=$(dig example.com +short +timeout=5 2>&1 || true) + if [[ -n "$dig_after" ]] && ! echo "$dig_after" | grep -q "timed out"; then + pass "DNS works after ctrld stop" + else + fail "DNS broken after ctrld stop — cleanup may have failed" + fi +} + +test_restart_resilience() { + header "7. Restart Resilience" + + manual "Start ctrld again with --dns-intercept, then press Enter" + wait_for_key + + sleep 3 + + # Re-run pf state checks + local sr_match sn_match + sr_match=$(pfctl -sr 2>&1 | grep "$PF_ANCHOR" || true) + sn_match=$(pfctl -sn 2>&1 | grep "$PF_ANCHOR" || true) + + if [[ -n "$sr_match" ]] && [[ -n "$sn_match" ]]; then + pass "Anchor references restored after restart" + else + fail "Anchor references NOT restored after restart" + fi + + # Quick interception test + local dig_after_restart + dig_after_restart=$(dig @8.8.8.8 example.com +short +timeout=5 2>&1 || true) + if [[ -n "$dig_after_restart" ]] && ! echo "$dig_after_restart" | grep -q "timed out"; then + pass "DNS interception works after restart" + else + fail "DNS interception broken after restart" + fi +} + +test_network_change() { + header "8. Network Change Recovery" + + info "This test verifies recovery after network changes." + manual "Switch Wi-Fi networks (or disconnect/reconnect Ethernet), then press Enter" + wait_for_key + + sleep 5 + + # Check pf rules still active + local sr_after sn_after + sr_after=$(pfctl -sr 2>&1 | grep "$PF_ANCHOR" || true) + sn_after=$(pfctl -sn 2>&1 | grep "$PF_ANCHOR" || true) + + if [[ -n "$sr_after" ]] && [[ -n "$sn_after" ]]; then + pass "Anchor references survived network change" + else + fail "Anchor references lost after network change" + fi + + # Check interception still works + local dig_after_net + dig_after_net=$(dig @8.8.8.8 example.com +short +timeout=10 2>&1 || true) + if [[ -n "$dig_after_net" ]] && ! echo "$dig_after_net" | grep -q "timed out"; then + pass "DNS interception works after network change" + else + fail "DNS interception broken after network change" + fi + + # Check logs for recovery bypass activity + if [[ -f "$CTRLD_LOG" ]]; then + local recovery_logs + recovery_logs=$(log_grep "recovery\|network change\|network monitor" 100) + if [[ -n "$recovery_logs" ]]; then + info "Recovery/network change log entries:" + echo "$recovery_logs" | tail -5 | sed 's/^/ /' + fi + fi +} + +# ============================================================================= +# SUMMARY +# ============================================================================= + +print_summary() { + header "TEST SUMMARY" + echo "" + for r in "${RESULTS[@]}"; do + if [[ "$r" == PASS* ]]; then + echo -e " ${GREEN}✅${NC} ${r#PASS: }" + elif [[ "$r" == FAIL* ]]; then + echo -e " ${RED}❌${NC} ${r#FAIL: }" + elif [[ "$r" == WARN* ]]; then + echo -e " ${YELLOW}⚠️${NC} ${r#WARN: }" + fi + done + echo "" + separator + echo -e " ${GREEN}Passed: $PASS${NC} | ${RED}Failed: $FAIL${NC} | ${YELLOW}Warnings: $WARN${NC}" + separator + + if [[ $FAIL -gt 0 ]]; then + echo -e "\n ${RED}${BOLD}Some tests failed.${NC} Check output above for details." + echo -e " Useful debug commands:" + echo -e " pfctl -a '$PF_ANCHOR' -sr # anchor filter rules" + echo -e " pfctl -a '$PF_ANCHOR' -sn # anchor redirect rules" + echo -e " pfctl -sr | grep controld # main ruleset references" + echo -e " tail -100 $CTRLD_LOG # recent ctrld logs" + else + echo -e "\n ${GREEN}${BOLD}All tests passed!${NC}" + fi +} + +# ============================================================================= +# MAIN +# ============================================================================= + +echo -e "${BOLD}╔═══════════════════════════════════════════════════════╗${NC}" +echo -e "${BOLD}║ ctrld DNS Intercept Mode — macOS Test Suite ║${NC}" +echo -e "${BOLD}║ Tests pf-based DNS interception (route-to + rdr) ║${NC}" +echo -e "${BOLD}╚═══════════════════════════════════════════════════════╝${NC}" + +check_root + +echo "" +echo "Make sure ctrld is running with --dns-intercept before starting." +echo "Log location: $CTRLD_LOG" +wait_for_key + +test_prereqs +test_pf_state +test_dns_interception +test_non_dns_unaffected +test_ctrld_log_health +test_pf_counters + +separator +echo "" +echo "The next tests require manual steps (stop/start ctrld, network changes)." +echo "Press Enter to continue, or Ctrl+C to skip and see results so far." +wait_for_key + +test_cleanup_on_stop +test_restart_resilience +test_network_change + +print_summary diff --git a/test-scripts/darwin/test-pf-group-exemption.sh b/test-scripts/darwin/test-pf-group-exemption.sh new file mode 100644 index 00000000..9f47805b --- /dev/null +++ b/test-scripts/darwin/test-pf-group-exemption.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# Test: pf group-based exemption for DNS intercept +# Run as root: sudo bash test-pf-group-exemption.sh + +set -e + +GROUP_NAME="_ctrld" +ANCHOR="com.controld.test" +TEST_DNS="1.1.1.1" + +echo "=== Step 1: Create test group ===" +if dscl . -read /Groups/$GROUP_NAME PrimaryGroupID &>/dev/null; then + echo "Group $GROUP_NAME already exists" +else + # Find an unused GID in 350-450 range + USED_GIDS=$(dscl . -list /Groups PrimaryGroupID 2>/dev/null | awk '{print $2}' | sort -n) + GROUP_ID="" + for gid in $(seq 350 450); do + if ! echo "$USED_GIDS" | grep -q "^${gid}$"; then + GROUP_ID=$gid + break + fi + done + if [ -z "$GROUP_ID" ]; then + echo "ERROR: Could not find unused GID in 350-450 range" + exit 1 + fi + dscl . -create /Groups/$GROUP_NAME + dscl . -create /Groups/$GROUP_NAME PrimaryGroupID $GROUP_ID + dscl . -create /Groups/$GROUP_NAME RealName "Control D DNS Intercept" + echo "Created group $GROUP_NAME (GID $GROUP_ID)" +fi + +ACTUAL_GID=$(dscl . -read /Groups/$GROUP_NAME PrimaryGroupID | awk '{print $2}') +echo "GID: $ACTUAL_GID" + +echo "" +echo "=== Step 2: Enable pf ===" +pfctl -e 2>&1 || true + +echo "" +echo "=== Step 3: Set up pf anchor with group exemption ===" + +cat > /tmp/pf-group-test-anchor.conf << RULES +# Translation: redirect DNS on loopback to our listener +rdr pass on lo0 inet proto udp from any to ! 127.0.0.1 port 53 -> 127.0.0.1 port 53 +rdr pass on lo0 inet proto tcp from any to ! 127.0.0.1 port 53 -> 127.0.0.1 port 53 + +# Exemption: only group _ctrld can talk to $TEST_DNS directly +pass out quick on ! lo0 inet proto { udp, tcp } from any to $TEST_DNS port 53 group $GROUP_NAME + +# Intercept everything else +pass out quick on ! lo0 route-to lo0 inet proto udp from any to ! 127.0.0.1 port 53 +pass out quick on ! lo0 route-to lo0 inet proto tcp from any to ! 127.0.0.1 port 53 +pass in quick on lo0 inet proto { udp, tcp } from any to 127.0.0.1 port 53 +RULES + +pfctl -a $ANCHOR -f /tmp/pf-group-test-anchor.conf 2>/dev/null +echo "Loaded anchor $ANCHOR" + +# Inject anchor refs into running ruleset +NAT_RULES=$(pfctl -sn 2>/dev/null | grep -v "ALTQ" | grep -v "^$") +FILTER_RULES=$(pfctl -sr 2>/dev/null | grep -v "ALTQ" | grep -v "^$") +SCRUB_RULES=$(echo "$FILTER_RULES" | grep "^scrub" || true) +PURE_FILTER=$(echo "$FILTER_RULES" | grep -v "^scrub" | grep -v "com.controld.test" || true) +CLEAN_NAT=$(echo "$NAT_RULES" | grep -v "com.controld.test" || true) + +{ + [ -n "$SCRUB_RULES" ] && echo "$SCRUB_RULES" + [ -n "$CLEAN_NAT" ] && echo "$CLEAN_NAT" + echo "rdr-anchor \"$ANCHOR\"" + echo "anchor \"$ANCHOR\"" + [ -n "$PURE_FILTER" ] && echo "$PURE_FILTER" +} | pfctl -f - 2>/dev/null + +echo "Injected anchor references (no duplicates)" + +echo "" +echo "=== Step 4: Verify rules ===" +echo "NAT rules:" +pfctl -sn 2>/dev/null | grep -v ALTQ +echo "" +echo "Anchor filter rules:" +pfctl -a $ANCHOR -sr 2>/dev/null | grep -v ALTQ +echo "" +echo "Anchor NAT rules:" +pfctl -a $ANCHOR -sn 2>/dev/null | grep -v ALTQ + +echo "" +echo "=== Step 5: Build setgid test binary ===" +# We need a binary that runs with effective group _ctrld. +# sudo -g doesn't work on macOS, so we use a setgid binary. +cat > /tmp/test-dns-group.c << 'EOF' +#include +int main() { + char *args[] = {"dig", "+short", "+timeout=3", "+tries=1", "@1.1.1.1", "popads.net", NULL}; + execvp("dig", args); + return 1; +} +EOF +cc -o /tmp/test-dns-group /tmp/test-dns-group.c +chgrp $GROUP_NAME /tmp/test-dns-group +chmod g+s /tmp/test-dns-group +echo "Built setgid binary /tmp/test-dns-group (group: $GROUP_NAME)" + +echo "" +echo "=== Step 6: Test as regular user (should be INTERCEPTED) ===" +echo "Running: dig @$TEST_DNS popads.net (as root / group wheel — no group exemption)" +echo "If nothing listens on 127.0.0.1:53, this should timeout." +DIG_RESULT=$(dig +short +timeout=3 +tries=1 @$TEST_DNS popads.net 2>&1 || true) +echo "Result: ${DIG_RESULT:-TIMEOUT/INTERCEPTED}" + +echo "" +echo "=== Step 7: Test as group _ctrld (should BYPASS) ===" +echo "Running: setgid binary (effective group: $GROUP_NAME)" +BYPASS_RESULT=$(/tmp/test-dns-group 2>&1 || true) +echo "Result: ${BYPASS_RESULT:-TIMEOUT/BLOCKED}" + +echo "" +echo "=== Results ===" +PASS=true +if [[ -z "$DIG_RESULT" || "$DIG_RESULT" == *"timed out"* || "$DIG_RESULT" == *"connection refused"* ]]; then + echo "✅ Regular query INTERCEPTED (redirected away from $TEST_DNS)" +else + echo "❌ Regular query NOT intercepted — got: $DIG_RESULT" + PASS=false +fi + +if [[ -n "$BYPASS_RESULT" && "$BYPASS_RESULT" != *"timed out"* && "$BYPASS_RESULT" != *"connection refused"* && "$BYPASS_RESULT" != *"TIMEOUT"* ]]; then + echo "✅ Group _ctrld query BYPASSED — got: $BYPASS_RESULT" +else + echo "❌ Group _ctrld query was also intercepted — got: ${BYPASS_RESULT:-TIMEOUT}" + PASS=false +fi + +if $PASS; then + echo "" + echo "🎉 GROUP EXEMPTION WORKS — this approach is viable for dns-intercept mode" +fi + +echo "" +echo "=== Cleanup ===" +pfctl -a $ANCHOR -F all 2>/dev/null +pfctl -f /etc/pf.conf 2>/dev/null +rm -f /tmp/pf-group-test-anchor.conf /tmp/test-dns-group /tmp/test-dns-group.c +echo "Cleaned up. Group $GROUP_NAME left in place." +echo "To remove: sudo dscl . -delete /Groups/$GROUP_NAME" diff --git a/test-scripts/darwin/test-recovery-bypass.sh b/test-scripts/darwin/test-recovery-bypass.sh new file mode 100755 index 00000000..f5aad7e7 --- /dev/null +++ b/test-scripts/darwin/test-recovery-bypass.sh @@ -0,0 +1,301 @@ +#!/bin/bash +# test-recovery-bypass.sh — Test DNS intercept recovery bypass (captive portal simulation) +# +# Simulates a captive portal by: +# 1. Discovering ctrld's upstream IPs from active connections +# 2. Blackholing ALL of them via route table +# 3. Cycling wifi to trigger network change → recovery flow +# 4. Verifying recovery bypass forwards to OS/DHCP resolver +# 5. Unblocking and verifying normal operation resumes +# +# SAFE: Uses route add/delete + networksetup — cleaned up on exit (including Ctrl+C). +# +# Usage: sudo bash test-recovery-bypass.sh [wifi_interface] +# wifi_interface defaults to en0 +# +# Prerequisites: +# - ctrld running with --dns-intercept and -v 1 --log /tmp/dns.log +# - Run as root (sudo) + +set -euo pipefail + +WIFI_IFACE="${1:-en0}" +CTRLD_LOG="/tmp/dns.log" +BLOCKED_IPS=() + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; CYAN='\033[0;36m'; NC='\033[0m' +log() { echo -e "${CYAN}[$(date +%H:%M:%S)]${NC} $*"; } +pass() { echo -e "${GREEN}[PASS]${NC} $*"; } +fail() { echo -e "${RED}[FAIL]${NC} $*"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } + +# ── Safety: always clean up on exit ────────────────────────────────────────── +cleanup() { + echo "" + log "═══ CLEANUP ═══" + + # Ensure wifi is on + log "Ensuring wifi is on..." + networksetup -setairportpower "$WIFI_IFACE" on 2>/dev/null || true + + # Remove all blackhole routes + for ip in "${BLOCKED_IPS[@]}"; do + route delete -host "$ip" 2>/dev/null && log "Removed route for $ip" || true + done + + log "Cleanup complete. Internet should be restored." + log "(If not, run: sudo networksetup -setairportpower $WIFI_IFACE on)" +} +trap cleanup EXIT INT TERM + +# ── Pre-checks ─────────────────────────────────────────────────────────────── +if [[ $EUID -ne 0 ]]; then + echo "Run as root: sudo bash $0 $*" + exit 1 +fi + +if [[ ! -f "$CTRLD_LOG" ]]; then + fail "ctrld log not found at $CTRLD_LOG" + echo "Start ctrld with: ctrld run --dns-intercept --cd -v 1 --log $CTRLD_LOG" + exit 1 +fi + +# Check wifi interface exists +if ! networksetup -getairportpower "$WIFI_IFACE" >/dev/null 2>&1; then + fail "Wifi interface $WIFI_IFACE not found" + echo "Try: networksetup -listallhardwareports" + exit 1 +fi + +log "═══════════════════════════════════════════════════════════" +log " Recovery Bypass Test (Captive Portal Simulation)" +log "═══════════════════════════════════════════════════════════" +log "Wifi interface: $WIFI_IFACE" +log "ctrld log: $CTRLD_LOG" +echo "" + +# ── Phase 1: Discover upstream IPs ────────────────────────────────────────── +log "Phase 1: Discovering ctrld upstream IPs from active connections" + +# Find ctrld's established connections (DoH uses port 443) +CTRLD_CONNS=$(lsof -i -n -P 2>/dev/null | grep -i ctrld | grep ESTABLISHED || true) +if [[ -z "$CTRLD_CONNS" ]]; then + warn "No established ctrld connections found via lsof" + warn "Trying: ss/netstat fallback..." + CTRLD_CONNS=$(netstat -an 2>/dev/null | grep "\.443 " | grep ESTABLISHED || true) +fi + +echo "$CTRLD_CONNS" | head -10 | while read -r line; do + log " $line" +done + +# Extract unique remote IPs from ctrld connections +UPSTREAM_IPS=() +while IFS= read -r ip; do + [[ -n "$ip" ]] && UPSTREAM_IPS+=("$ip") +done < <(echo "$CTRLD_CONNS" | grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -u | while read -r ip; do + # Filter out local/private IPs — we only want the upstream DoH server IPs + if [[ ! "$ip" =~ ^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.) ]]; then + echo "$ip" + fi +done) + +# Also try to resolve known Control D DoH endpoints +for host in dns.controld.com freedns.controld.com; do + for ip in $(dig +short "$host" 2>/dev/null || true); do + if [[ "$ip" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + UPSTREAM_IPS+=("$ip") + fi + done +done + +# Deduplicate +UPSTREAM_IPS=($(printf '%s\n' "${UPSTREAM_IPS[@]}" | sort -u)) + +if [[ ${#UPSTREAM_IPS[@]} -eq 0 ]]; then + fail "Could not discover any upstream IPs!" + echo "Check: lsof -i -n -P | grep ctrld" + exit 1 +fi + +log "Found ${#UPSTREAM_IPS[@]} upstream IP(s):" +for ip in "${UPSTREAM_IPS[@]}"; do + log " $ip" +done +echo "" + +# ── Phase 2: Baseline check ───────────────────────────────────────────────── +log "Phase 2: Baseline — verify DNS works normally" +BASELINE=$(dig +short +timeout=5 example.com @127.0.0.1 2>/dev/null || true) +if [[ -z "$BASELINE" ]]; then + fail "DNS not working before test!" + exit 1 +fi +pass "Baseline: example.com → $BASELINE" + +LOG_LINES_BEFORE=$(wc -l < "$CTRLD_LOG" | tr -d ' ') +log "Log position: line $LOG_LINES_BEFORE" +echo "" + +# ── Phase 3: Block all upstream IPs ───────────────────────────────────────── +log "Phase 3: Blackholing all upstream IPs" +for ip in "${UPSTREAM_IPS[@]}"; do + route delete -host "$ip" 2>/dev/null || true # clean slate + route add -host "$ip" 127.0.0.1 2>/dev/null + BLOCKED_IPS+=("$ip") + log " Blocked: $ip → 127.0.0.1" +done +pass "All ${#UPSTREAM_IPS[@]} upstream IPs blackholed" +echo "" + +# ── Phase 4: Cycle wifi to trigger network change ─────────────────────────── +log "Phase 4: Cycling wifi to trigger network change event" +log " Turning wifi OFF..." +networksetup -setairportpower "$WIFI_IFACE" off +sleep 3 + +log " Turning wifi ON..." +networksetup -setairportpower "$WIFI_IFACE" on + +log " Waiting for wifi to reconnect (up to 15s)..." +WIFI_UP=false +for i in $(seq 1 15); do + # Check if we have an IP on the wifi interface + IF_IP=$(ipconfig getifaddr "$WIFI_IFACE" 2>/dev/null || true) + if [[ -n "$IF_IP" ]]; then + WIFI_UP=true + pass "Wifi reconnected: $WIFI_IFACE → $IF_IP" + break + fi + sleep 1 +done + +if [[ "$WIFI_UP" == "false" ]]; then + fail "Wifi did not reconnect in 15s!" + warn "Cleaning up and exiting..." + exit 1 +fi + +log " Waiting 5s for ctrld network monitor to fire..." +sleep 5 +echo "" + +# ── Phase 5: Query and watch for recovery ──────────────────────────────────── +log "Phase 5: Sending queries — upstream is blocked, recovery should activate" +log " (ctrld should detect upstream failure → enable recovery bypass → use DHCP DNS)" +echo "" + +RECOVERY_DETECTED=false +BYPASS_ACTIVE=false +DNS_DURING_BYPASS=false +QUERY_COUNT=0 + +for i in $(seq 1 30); do + QUERY_COUNT=$((QUERY_COUNT + 1)) + RESULT=$(dig +short +timeout=3 "example.com" @127.0.0.1 2>/dev/null || true) + + if [[ -n "$RESULT" ]]; then + log " Query #$QUERY_COUNT: example.com → $RESULT ✓" + else + log " Query #$QUERY_COUNT: example.com → FAIL ✗" + fi + + # Check logs + NEW_LOGS=$(tail -n +$((LOG_LINES_BEFORE + 1)) "$CTRLD_LOG" 2>/dev/null || true) + + if [[ "$RECOVERY_DETECTED" == "false" ]] && echo "$NEW_LOGS" | grep -qiE "enabling DHCP bypass|triggering recovery|No healthy"; then + echo "" + pass "🎯 Recovery flow triggered!" + RECOVERY_DETECTED=true + echo "$NEW_LOGS" | grep -iE "recovery|bypass|DHCP|No healthy|network change" | tail -8 | while read -r line; do + echo " 📋 $line" + done + echo "" + fi + + if [[ "$BYPASS_ACTIVE" == "false" ]] && echo "$NEW_LOGS" | grep -qi "Recovery bypass active"; then + pass "🔄 Recovery bypass is forwarding queries to OS/DHCP resolver" + BYPASS_ACTIVE=true + fi + + if [[ "$RECOVERY_DETECTED" == "true" && -n "$RESULT" ]]; then + pass "✅ DNS resolves during recovery bypass: example.com → $RESULT" + DNS_DURING_BYPASS=true + break + fi + + sleep 2 +done + +# ── Phase 6: Show all recovery-related log entries ────────────────────────── +echo "" +log "Phase 6: All recovery-related ctrld log entries" +log "────────────────────────────────────────────────" +NEW_LOGS=$(tail -n +$((LOG_LINES_BEFORE + 1)) "$CTRLD_LOG" 2>/dev/null || true) +RELEVANT=$(echo "$NEW_LOGS" | grep -iE "recovery|bypass|DHCP|unhealthy|upstream.*fail|No healthy|network change|network monitor|OS resolver" || true) +if [[ -n "$RELEVANT" ]]; then + echo "$RELEVANT" | head -40 | while read -r line; do + echo " $line" + done +else + warn "No recovery-related log entries found!" + log "Last 15 lines of ctrld log:" + tail -15 "$CTRLD_LOG" | while read -r line; do + echo " $line" + done +fi + +# ── Phase 7: Unblock and verify full recovery ─────────────────────────────── +echo "" +log "Phase 7: Unblocking upstream IPs" +for ip in "${BLOCKED_IPS[@]}"; do + route delete -host "$ip" 2>/dev/null && log " Unblocked: $ip" || true +done +BLOCKED_IPS=() # clear so cleanup doesn't double-delete +pass "All upstream IPs unblocked" + +log "Waiting for ctrld to recover (up to 30s)..." +LOG_LINES_UNBLOCK=$(wc -l < "$CTRLD_LOG" | tr -d ' ') +RECOVERY_COMPLETE=false + +for i in $(seq 1 15); do + dig +short +timeout=3 example.com @127.0.0.1 >/dev/null 2>&1 || true + POST_LOGS=$(tail -n +$((LOG_LINES_UNBLOCK + 1)) "$CTRLD_LOG" 2>/dev/null || true) + + if echo "$POST_LOGS" | grep -qiE "recovery complete|disabling DHCP bypass|Upstream.*recovered"; then + RECOVERY_COMPLETE=true + pass "ctrld recovered — normal operation resumed" + echo "$POST_LOGS" | grep -iE "recovery|recovered|bypass|disabling" | head -5 | while read -r line; do + echo " 📋 $line" + done + break + fi + sleep 2 +done + +[[ "$RECOVERY_COMPLETE" == "false" ]] && warn "Recovery completion not detected (may need more time)" + +# Final check +echo "" +log "Phase 8: Final DNS verification" +sleep 2 +FINAL=$(dig +short +timeout=5 example.com @127.0.0.1 2>/dev/null || true) +if [[ -n "$FINAL" ]]; then + pass "DNS working: example.com → $FINAL" +else + fail "DNS not resolving" +fi + +# ── Summary ────────────────────────────────────────────────────────────────── +echo "" +log "═══════════════════════════════════════════════════════════" +log " Test Summary" +log "═══════════════════════════════════════════════════════════" +[[ "$RECOVERY_DETECTED" == "true" ]] && pass "Recovery bypass activated" || fail "Recovery bypass NOT activated" +[[ "$BYPASS_ACTIVE" == "true" ]] && pass "Queries forwarded to OS/DHCP resolver" || warn "OS resolver forwarding not confirmed" +[[ "$DNS_DURING_BYPASS" == "true" ]] && pass "DNS resolved during bypass (proof of OS resolver leak)" || warn "DNS during bypass not confirmed" +[[ "$RECOVERY_COMPLETE" == "true" ]] && pass "Normal operation resumed after unblock" || warn "Recovery completion not confirmed" +[[ -n "${FINAL:-}" ]] && pass "DNS functional at end of test" || fail "DNS broken at end of test" +echo "" +log "Full log since test: tail -n +$LOG_LINES_BEFORE $CTRLD_LOG" +log "Recovery entries: tail -n +$LOG_LINES_BEFORE $CTRLD_LOG | grep -i recovery" diff --git a/test-scripts/darwin/validate-pf-rules.sh b/test-scripts/darwin/validate-pf-rules.sh new file mode 100644 index 00000000..7cd0d0ac --- /dev/null +++ b/test-scripts/darwin/validate-pf-rules.sh @@ -0,0 +1,272 @@ +#!/bin/bash +# validate-pf-rules.sh +# Standalone test of the pf redirect rules for dns-intercept mode. +# Does NOT require ctrld. Loads the pf anchor, validates interception, cleans up. +# Run as root (sudo). + +set -e + +GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; CYAN='\033[0;36m'; NC='\033[0m' +ok() { echo -e "${GREEN}[OK]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; FAILURES=$((FAILURES+1)); } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } +FAILURES=0 + +ANCHOR="com.controld.ctrld.test" +ANCHOR_FILE="/tmp/pf-dns-intercept-test.conf" +# Use a local DNS listener to prove redirect works (python one-liner) +LISTENER_PID="" + +cleanup() { + echo "" + echo -e "${CYAN}--- Cleanup ---${NC}" + # Remove anchor rules + pfctl -a "$ANCHOR" -F all 2>/dev/null && echo " Flushed anchor $ANCHOR" || true + # Remove anchor file + rm -f "$ANCHOR_FILE" "/tmp/pf-combined-test.conf" && echo " Removed temp files" || true + # Reload original pf.conf to remove anchor reference + pfctl -f /etc/pf.conf 2>/dev/null && echo " Reloaded original pf.conf" || true + # Kill test listener + if [ -n "$LISTENER_PID" ]; then + kill "$LISTENER_PID" 2>/dev/null && echo " Stopped test DNS listener" || true + fi + echo " Cleanup complete" +} +trap cleanup EXIT + +resolve() { + dig "@${1}" "$2" A +short +timeout=3 +tries=1 2>/dev/null | grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -1 +} + +echo -e "${CYAN}=== pf DNS Redirect Rule Validation ===${NC}" +echo " This loads the exact pf rules from the dns-intercept MR," +echo " starts a tiny DNS listener on 127.0.0.1:53, and verifies" +echo " that queries to external IPs get redirected." +echo "" + +# 0. Check we're root +if [ "$(id -u)" -ne 0 ]; then + fail "Must run as root (sudo)" + exit 1 +fi + +# 1. Start a minimal DNS listener on 127.0.0.1:53 +# Uses socat to echo a fixed response — enough to prove redirect works. +# If port 53 is already in use (mDNSResponder), we'll use that instead. +echo "--- Step 1: DNS Listener on 127.0.0.1:53 ---" +if lsof -i :53 -sTCP:LISTEN 2>/dev/null | grep -q "." || lsof -i UDP:53 2>/dev/null | grep -q "."; then + ok "Something already listening on port 53 (likely mDNSResponder or ctrld)" + HAVE_LISTENER=true +else + # Start a simple Python DNS proxy that forwards to 1.1.1.1 + python3 -c " +import socket, threading, sys +def proxy(data, addr, sock): + try: + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.settimeout(3) + s.sendto(data, ('1.1.1.1', 53)) + resp, _ = s.recvfrom(4096) + sock.sendto(resp, addr) + s.close() + except: pass + +sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +sock.bind(('127.0.0.1', 53)) +print('READY', flush=True) +while True: + data, addr = sock.recvfrom(4096) + threading.Thread(target=proxy, args=(data, addr, sock), daemon=True).start() +" & + LISTENER_PID=$! + sleep 1 + if kill -0 "$LISTENER_PID" 2>/dev/null; then + ok "Started test DNS proxy on 127.0.0.1:53 (PID $LISTENER_PID, forwards to 1.1.1.1)" + HAVE_LISTENER=true + else + fail "Could not start DNS listener on port 53 — port may be in use" + HAVE_LISTENER=false + fi +fi +echo "" + +# 2. Verify baseline: direct query to 8.8.8.8 works (before pf rules) +echo "--- Step 2: Baseline (before pf rules) ---" +IP=$(resolve "8.8.8.8" "example.com") +if [ -n "$IP" ]; then + ok "Direct DNS to 8.8.8.8 works (baseline): $IP" +else + warn "Direct DNS to 8.8.8.8 failed — may be blocked by existing firewall" +fi +echo "" + +# 3. Write and load the pf anchor (exact rules from MR) +echo "--- Step 3: Load pf Anchor Rules ---" +TEST_UPSTREAM="1.1.1.1" +cat > "$ANCHOR_FILE" << PFRULES +# ctrld DNS Intercept Mode (test anchor) +# Two-step: route-to lo0 + rdr on lo0 +# +# In production, ctrld uses DoH (port 443) for upstreams so they're not +# affected by port 53 rules. For this test, we exempt our upstream ($TEST_UPSTREAM) +# explicitly — same mechanism ctrld uses for OS resolver exemptions. + +# --- Translation rules (rdr) --- +rdr pass on lo0 inet proto udp from any to ! 127.0.0.1 port 53 -> 127.0.0.1 port 53 +rdr pass on lo0 inet proto tcp from any to ! 127.0.0.1 port 53 -> 127.0.0.1 port 53 + +# --- Filtering rules (pass) --- +# Exempt test upstream (in production: ctrld uses DoH, so this isn't needed). +pass out quick on ! lo0 inet proto { udp, tcp } from any to $TEST_UPSTREAM port 53 + +# Force remaining outbound DNS through loopback for interception. +pass out quick on ! lo0 route-to lo0 inet proto udp from any to ! 127.0.0.1 port 53 no state +pass out quick on ! lo0 route-to lo0 inet proto tcp from any to ! 127.0.0.1 port 53 no state + +# Allow redirected traffic through on loopback. +pass in quick on lo0 inet proto { udp, tcp } from any to 127.0.0.1 port 53 no state +PFRULES + +ok "Wrote anchor file: $ANCHOR_FILE" +cat "$ANCHOR_FILE" | sed 's/^/ /' +echo "" + +# Load anchor +OUTPUT=$(pfctl -a "$ANCHOR" -f "$ANCHOR_FILE" 2>&1) || { + fail "Failed to load anchor: $OUTPUT" + exit 1 +} +ok "Loaded anchor: $ANCHOR" + +# Inject anchor references into running pf config. +# pf enforces strict rule ordering: options, normalization, queueing, translation, filtering. +# We must insert rdr-anchor with other rdr-anchors and anchor with other anchors. +TMPCONF="/tmp/pf-combined-test.conf" +python3 -c " +import sys +lines = open('/etc/pf.conf').read().splitlines() +anchor = '$ANCHOR' +rdr_ref = 'rdr-anchor \"' + anchor + '\"' +anchor_ref = 'anchor \"' + anchor + '\"' +out = [] +rdr_done = False +anc_done = False +for line in lines: + s = line.strip() + # Insert our rdr-anchor before the first existing rdr-anchor + if not rdr_done and s.startswith('rdr-anchor'): + out.append(rdr_ref) + rdr_done = True + # Insert our anchor before the first existing anchor (filter-phase) + if not anc_done and s.startswith('anchor') and not s.startswith('anchor \"com.apple'): + out.append(anchor_ref) + anc_done = True + out.append(line) +# Fallback if no existing anchors found +if not rdr_done: + # Insert before first non-comment, non-blank after any 'set' or 'scrub' lines + out.insert(0, rdr_ref) +if not anc_done: + out.append(anchor_ref) +open('$TMPCONF', 'w').write('\n'.join(out) + '\n') +" || { fail "Failed to build combined pf config"; exit 1; } + +INJECT_OUT=$(pfctl -f "$TMPCONF" 2>&1) || { + fail "Failed to inject anchor reference: $INJECT_OUT" + rm -f "$TMPCONF" + exit 1 +} +rm -f "$TMPCONF" +ok "Injected anchor references into running pf ruleset" + +# Enable pf +pfctl -e 2>/dev/null || true + +# Show loaded rules +echo "" +echo " Active NAT rules:" +pfctl -a "$ANCHOR" -sn 2>/dev/null | sed 's/^/ /' +echo " Active filter rules:" +pfctl -a "$ANCHOR" -sr 2>/dev/null | sed 's/^/ /' +echo "" + +# 4. Test: DNS to 8.8.8.8 should now be redirected to 127.0.0.1:53 +echo "--- Step 4: Redirect Test ---" +if [ "$HAVE_LISTENER" = true ]; then + IP=$(resolve "8.8.8.8" "example.com" 5) + if [ -n "$IP" ]; then + ok "DNS to 8.8.8.8 redirected through 127.0.0.1:53: $IP" + else + fail "DNS to 8.8.8.8 failed — redirect may not be working" + fi + + # Also test another random IP + IP2=$(resolve "9.9.9.9" "example.com" 5) + if [ -n "$IP2" ]; then + ok "DNS to 9.9.9.9 also redirected: $IP2" + else + fail "DNS to 9.9.9.9 failed" + fi +else + warn "No listener on port 53 — cannot test redirect" +fi +echo "" + +# 5. Test: DNS to 127.0.0.1 still works (not double-redirected) +echo "--- Step 5: Localhost DNS (no loop) ---" +if [ "$HAVE_LISTENER" = true ]; then + IP=$(resolve "127.0.0.1" "example.com" 5) + if [ -n "$IP" ]; then + ok "DNS to 127.0.0.1 works normally (not caught by redirect): $IP" + else + fail "DNS to 127.0.0.1 failed — possible redirect loop" + fi +fi +echo "" + +# 6. Simulate VPN DNS override +echo "--- Step 6: VPN DNS Override Simulation ---" +IFACE=$(route -n get default 2>/dev/null | awk '/interface:/{print $2}') +SVC="" +for try_svc in "Wi-Fi" "Ethernet" "Thunderbolt Ethernet"; do + if networksetup -getdnsservers "$try_svc" 2>/dev/null >/dev/null; then + SVC="$try_svc" + break + fi +done + +if [ -n "$SVC" ] && [ "$HAVE_LISTENER" = true ]; then + ORIG_DNS=$(networksetup -getdnsservers "$SVC" 2>/dev/null || echo "") + echo " Service: $SVC" + echo " Current DNS: $ORIG_DNS" + + networksetup -setdnsservers "$SVC" 10.50.10.77 + dscacheutil -flushcache 2>/dev/null || true + killall -HUP mDNSResponder 2>/dev/null || true + echo " Set DNS to 10.50.10.77 (simulating F5 VPN)" + sleep 2 + + IP=$(resolve "10.50.10.77" "google.com" 5) + if [ -n "$IP" ]; then + ok "Query to fake VPN DNS (10.50.10.77) redirected to ctrld: $IP" + else + fail "Query to fake VPN DNS failed" + fi + + # Restore + if echo "$ORIG_DNS" | grep -q "There aren't any DNS Servers"; then + networksetup -setdnsservers "$SVC" Empty + else + networksetup -setdnsservers "$SVC" $ORIG_DNS + fi + echo " Restored DNS" +else + warn "Skipping VPN simulation (no service found or no listener)" +fi + +echo "" +if [ "$FAILURES" -eq 0 ]; then + echo -e "${GREEN}=== All tests passed ===${NC}" +else + echo -e "${RED}=== $FAILURES test(s) failed ===${NC}" +fi diff --git a/test-scripts/windows/test-dns-intercept.ps1 b/test-scripts/windows/test-dns-intercept.ps1 new file mode 100644 index 00000000..fc4cc3fd --- /dev/null +++ b/test-scripts/windows/test-dns-intercept.ps1 @@ -0,0 +1,544 @@ +# ============================================================================= +# DNS Intercept Mode Test Script — Windows (WFP) +# ============================================================================= +# Run as Administrator: powershell -ExecutionPolicy Bypass -File test-dns-intercept-win.ps1 +# +# Tests the dns-intercept feature end-to-end with validation at each step. +# Logs are read from C:\tmp\dns.log (ctrld log location on test machine). +# +# Manual steps marked with [MANUAL] require human interaction. +# ============================================================================= + +$ErrorActionPreference = "Continue" + +$CtrldLog = "C:\tmp\dns.log" +$WfpSubLayerName = "ctrld DNS Intercept" +$Pass = 0 +$Fail = 0 +$Warn = 0 +$Results = @() + +# --- Helpers --- + +function Header($text) { Write-Host "`n━━━ $text ━━━" -ForegroundColor Cyan } +function Info($text) { Write-Host " ℹ $text" } +function Manual($text) { Write-Host " [MANUAL] $text" -ForegroundColor Yellow } +function Separator() { Write-Host "─────────────────────────────────────────────────────" -ForegroundColor Cyan } + +function Pass($text) { + Write-Host " ✅ PASS: $text" -ForegroundColor Green + $script:Pass++ + $script:Results += "PASS: $text" +} + +function Fail($text) { + Write-Host " ❌ FAIL: $text" -ForegroundColor Red + $script:Fail++ + $script:Results += "FAIL: $text" +} + +function Warn($text) { + Write-Host " ⚠️ WARN: $text" -ForegroundColor Yellow + $script:Warn++ + $script:Results += "WARN: $text" +} + +function WaitForKey { + Write-Host "`n Press Enter to continue..." -NoNewline + Read-Host +} + +function LogGrep($pattern, $lines = 200) { + if (Test-Path $CtrldLog) { + Get-Content $CtrldLog -Tail $lines -ErrorAction SilentlyContinue | + Select-String -Pattern $pattern -ErrorAction SilentlyContinue + } +} + +function LogGrepCount($pattern, $lines = 200) { + $matches = LogGrep $pattern $lines + if ($matches) { return @($matches).Count } else { return 0 } +} + +# --- Check Admin --- + +function Check-Admin { + $identity = [Security.Principal.WindowsIdentity]::GetCurrent() + $principal = New-Object Security.Principal.WindowsPrincipal($identity) + if (-not $principal.IsInRole([Security.Principal.WindowsBuiltInRole]::Administrator)) { + Write-Host "This script must be run as Administrator." -ForegroundColor Red + exit 1 + } +} + +# ============================================================================= +# TEST SECTIONS +# ============================================================================= + +function Test-Prereqs { + Header "0. Prerequisites" + + if (Get-Command nslookup -ErrorAction SilentlyContinue) { + Pass "nslookup available" + } else { + Fail "nslookup not found" + } + + if (Get-Command netsh -ErrorAction SilentlyContinue) { + Pass "netsh available" + } else { + Fail "netsh not found" + } + + if (Test-Path $CtrldLog) { + Pass "ctrld log exists at $CtrldLog" + } else { + Warn "ctrld log not found at $CtrldLog — log checks will be skipped" + } + + # Show current DNS config + Info "Current DNS servers:" + Get-DnsClientServerAddress -AddressFamily IPv4 | + Where-Object { $_.ServerAddresses.Count -gt 0 } | + Format-Table InterfaceAlias, ServerAddresses -AutoSize | + Out-String | ForEach-Object { $_.Trim() } | Write-Host +} + +function Test-WfpState { + Header "1. WFP State Validation" + + # Export WFP filters and check for ctrld's sublayer/filters + $wfpExport = "$env:TEMP\wfp_filters.xml" + Info "Exporting WFP filters (this may take a few seconds)..." + + try { + netsh wfp show filters file=$wfpExport 2>$null | Out-Null + + if (Test-Path $wfpExport) { + $wfpContent = Get-Content $wfpExport -Raw -ErrorAction SilentlyContinue + + # Check for ctrld sublayer + if ($wfpContent -match "ctrld") { + Pass "WFP filters contain 'ctrld' references" + + # Count filters + $filterMatches = ([regex]::Matches($wfpContent, "ctrld")).Count + Info "Found $filterMatches 'ctrld' references in WFP export" + } else { + Fail "No 'ctrld' references found in WFP filters" + } + + # Check for DNS port 53 filters + if ($wfpContent -match "port.*53" -or $wfpContent -match "0x0035") { + Pass "Port 53 filter conditions found in WFP" + } else { + Warn "Could not confirm port 53 filters in WFP export" + } + + Remove-Item $wfpExport -ErrorAction SilentlyContinue + } else { + Warn "WFP export file not created" + } + } catch { + Warn "Could not export WFP filters: $_" + } + + Separator + + # Alternative: Check via PowerShell WFP cmdlets if available + Info "Checking WFP via netsh wfp show state..." + $wfpState = netsh wfp show state 2>$null + if ($wfpState) { + Info "WFP state export completed (check $env:TEMP for details)" + } + + # Check Windows Firewall service is running + $fwService = Get-Service -Name "mpssvc" -ErrorAction SilentlyContinue + if ($fwService -and $fwService.Status -eq "Running") { + Pass "Windows Firewall service (BFE/WFP) is running" + } else { + Fail "Windows Firewall service not running — WFP won't work" + } + + # Check BFE (Base Filtering Engine) + $bfeService = Get-Service -Name "BFE" -ErrorAction SilentlyContinue + if ($bfeService -and $bfeService.Status -eq "Running") { + Pass "Base Filtering Engine (BFE) is running" + } else { + Fail "BFE not running — WFP requires this service" + } +} + +function Test-DnsInterception { + Header "2. DNS Interception Tests" + + # Mark log position + $logLinesBefore = 0 + if (Test-Path $CtrldLog) { + $logLinesBefore = @(Get-Content $CtrldLog -ErrorAction SilentlyContinue).Count + } + + # Test 1: Query to external resolver should be intercepted + Info "Test: nslookup example.com 8.8.8.8 (should be intercepted by ctrld)" + $result = $null + try { + $result = nslookup example.com 8.8.8.8 2>&1 | Out-String + } catch { } + + if ($result -and $result -match "\d+\.\d+\.\d+\.\d+") { + Pass "nslookup @8.8.8.8 returned a result" + + # Check which server answered + if ($result -match "Server:\s+(\S+)") { + $server = $Matches[1] + Info "Answered by server: $server" + if ($server -match "127\.0\.0\.1|localhost") { + Pass "Response came from localhost (ctrld intercepted)" + } elseif ($server -match "8\.8\.8\.8") { + Fail "Response came from 8.8.8.8 directly — NOT intercepted" + } + } + } else { + Fail "nslookup @8.8.8.8 failed or returned no address" + } + + # Check ctrld logged it + Start-Sleep -Seconds 1 + if (Test-Path $CtrldLog) { + $newLines = Get-Content $CtrldLog -ErrorAction SilentlyContinue | + Select-Object -Skip $logLinesBefore + $intercepted = $newLines | Select-String "example.com" -ErrorAction SilentlyContinue + if ($intercepted) { + Pass "ctrld logged the intercepted query for example.com" + } else { + Fail "ctrld did NOT log query for example.com" + } + } + + Separator + + # Test 2: Another external resolver + Info "Test: nslookup cloudflare.com 1.1.1.1 (should also be intercepted)" + try { + $result2 = nslookup cloudflare.com 1.1.1.1 2>&1 | Out-String + if ($result2 -match "\d+\.\d+\.\d+\.\d+") { + Pass "nslookup @1.1.1.1 returned result" + } else { + Fail "nslookup @1.1.1.1 failed" + } + } catch { + Fail "nslookup @1.1.1.1 threw exception" + } + + Separator + + # Test 3: Query to localhost should work (no loop) + Info "Test: nslookup example.org 127.0.0.1 (direct to ctrld, no loop)" + try { + $result3 = nslookup example.org 127.0.0.1 2>&1 | Out-String + if ($result3 -match "\d+\.\d+\.\d+\.\d+") { + Pass "nslookup @127.0.0.1 works (no loop)" + } else { + Fail "nslookup @127.0.0.1 failed — possible loop" + } + } catch { + Fail "nslookup @127.0.0.1 exception — possible loop" + } + + Separator + + # Test 4: System DNS via Resolve-DnsName + Info "Test: Resolve-DnsName example.net (system resolver)" + try { + $result4 = Resolve-DnsName example.net -Type A -ErrorAction Stop + if ($result4) { + Pass "System DNS resolution works (Resolve-DnsName)" + } + } catch { + Fail "System DNS resolution failed: $_" + } + + Separator + + # Test 5: TCP DNS + Info "Test: nslookup -vc example.com 9.9.9.9 (TCP DNS)" + try { + $result5 = nslookup -vc example.com 9.9.9.9 2>&1 | Out-String + if ($result5 -match "\d+\.\d+\.\d+\.\d+") { + Pass "TCP DNS query intercepted and resolved" + } else { + Warn "TCP DNS query may not have been intercepted" + } + } catch { + Warn "TCP DNS test inconclusive" + } +} + +function Test-NonDnsUnaffected { + Header "3. Non-DNS Traffic Unaffected" + + # HTTPS + Info "Test: Invoke-WebRequest https://example.com (HTTPS should NOT be affected)" + try { + $web = Invoke-WebRequest -Uri "https://example.com" -UseBasicParsing -TimeoutSec 10 -ErrorAction Stop + if ($web.StatusCode -eq 200) { + Pass "HTTPS works (HTTP 200)" + } else { + Pass "HTTPS returned HTTP $($web.StatusCode)" + } + } catch { + Fail "HTTPS failed: $_" + } + + # Test non-53 port connectivity + Info "Test: Test-NetConnection to github.com:443 (non-DNS port)" + try { + $nc = Test-NetConnection -ComputerName "github.com" -Port 443 -WarningAction SilentlyContinue + if ($nc.TcpTestSucceeded) { + Pass "Port 443 reachable (non-DNS traffic unaffected)" + } else { + Warn "Port 443 unreachable (may be firewall)" + } + } catch { + Warn "Test-NetConnection failed: $_" + } +} + +function Test-CtrldLogHealth { + Header "4. ctrld Log Health Check" + + if (-not (Test-Path $CtrldLog)) { + Warn "Skipping log checks — $CtrldLog not found" + return + } + + # Check for WFP initialization + if (LogGrepCount "initializing Windows Filtering Platform" 500) { + Pass "WFP initialization logged" + } else { + Fail "No WFP initialization in recent logs" + } + + # Check for successful WFP engine open + if (LogGrepCount "WFP engine opened" 500) { + Pass "WFP engine opened successfully" + } else { + Fail "WFP engine open not found in logs" + } + + # Check for sublayer creation + if (LogGrepCount "WFP sublayer created" 500) { + Pass "WFP sublayer created" + } else { + Fail "WFP sublayer creation not logged" + } + + # Check for filter creation + $filterCount = LogGrepCount "added WFP.*filter" 500 + if ($filterCount -gt 0) { + Pass "WFP filters added ($filterCount filter log entries)" + } else { + Fail "No WFP filter creation logged" + } + + # Check for permit-localhost filters + if (LogGrepCount "permit.*localhost\|permit.*127\.0\.0\.1" 500) { + Pass "Localhost permit filters logged" + } else { + Warn "Localhost permit filters not explicitly logged" + } + + Separator + + # Check for errors + Info "Recent errors in ctrld log:" + $errors = LogGrep '"level":"error"' 500 + if ($errors) { + $errors | Select-Object -Last 5 | ForEach-Object { Write-Host " $_" } + Warn "Errors found in recent logs" + } else { + Pass "No errors in recent logs" + } + + # Warnings (excluding expected ones) + $warnings = LogGrep '"level":"warn"' 500 | Where-Object { + $_ -notmatch "skipping self-upgrade" + } + if ($warnings) { + Info "Warnings:" + $warnings | Select-Object -Last 5 | ForEach-Object { Write-Host " $_" } + } + + # VPN DNS detection + $vpnLogs = LogGrep "VPN DNS" 500 + if ($vpnLogs) { + Info "VPN DNS activity:" + $vpnLogs | Select-Object -Last 5 | ForEach-Object { Write-Host " $_" } + } else { + Info "No VPN DNS activity (expected if no VPN connected)" + } +} + +function Test-CleanupOnStop { + Header "5. Cleanup Validation (After ctrld Stop)" + + Manual "Stop ctrld now (ctrld stop or Ctrl+C), then press Enter" + WaitForKey + + Start-Sleep -Seconds 2 + + # Check WFP filters are removed + $wfpExport = "$env:TEMP\wfp_after_stop.xml" + try { + netsh wfp show filters file=$wfpExport 2>$null | Out-Null + if (Test-Path $wfpExport) { + $content = Get-Content $wfpExport -Raw -ErrorAction SilentlyContinue + if ($content -match "ctrld") { + Fail "WFP still contains 'ctrld' filters after stop" + } else { + Pass "WFP filters cleaned up after stop" + } + Remove-Item $wfpExport -ErrorAction SilentlyContinue + } + } catch { + Warn "Could not verify WFP cleanup" + } + + # DNS should work normally + Info "Test: nslookup example.com (should work via system DNS)" + try { + $result = nslookup example.com 2>&1 | Out-String + if ($result -match "\d+\.\d+\.\d+\.\d+") { + Pass "DNS works after ctrld stop" + } else { + Fail "DNS broken after ctrld stop" + } + } catch { + Fail "DNS exception after ctrld stop" + } +} + +function Test-RestartResilience { + Header "6. Restart Resilience" + + Manual "Start ctrld again with --dns-intercept, then press Enter" + WaitForKey + + Start-Sleep -Seconds 3 + + # Quick interception test + Info "Test: nslookup example.com 8.8.8.8 (should be intercepted after restart)" + try { + $result = nslookup example.com 8.8.8.8 2>&1 | Out-String + if ($result -match "\d+\.\d+\.\d+\.\d+") { + Pass "DNS interception works after restart" + } else { + Fail "DNS interception broken after restart" + } + } catch { + Fail "DNS test failed after restart" + } + + # Check WFP filters restored + if (LogGrepCount "WFP engine opened" 100) { + Pass "WFP re-initialized after restart" + } +} + +function Test-NetworkChange { + Header "7. Network Change Recovery" + + Info "This test verifies recovery after network changes." + Manual "Switch Wi-Fi networks, or disable/re-enable network adapter, then press Enter" + WaitForKey + + Start-Sleep -Seconds 5 + + # Test interception still works + Info "Test: nslookup example.com 8.8.8.8 (should still be intercepted)" + try { + $result = nslookup example.com 8.8.8.8 2>&1 | Out-String + if ($result -match "\d+\.\d+\.\d+\.\d+") { + Pass "DNS interception works after network change" + } else { + Fail "DNS interception broken after network change" + } + } catch { + Fail "DNS test failed after network change" + } + + # Check logs for recovery/network events + if (Test-Path $CtrldLog) { + $recoveryLogs = LogGrep "recovery|network change|network monitor" 100 + if ($recoveryLogs) { + Info "Recovery/network log entries:" + $recoveryLogs | Select-Object -Last 5 | ForEach-Object { Write-Host " $_" } + } + } +} + +# ============================================================================= +# SUMMARY +# ============================================================================= + +function Print-Summary { + Header "TEST SUMMARY" + Write-Host "" + foreach ($r in $Results) { + if ($r.StartsWith("PASS")) { + Write-Host " ✅ $($r.Substring(6))" -ForegroundColor Green + } elseif ($r.StartsWith("FAIL")) { + Write-Host " ❌ $($r.Substring(6))" -ForegroundColor Red + } elseif ($r.StartsWith("WARN")) { + Write-Host " ⚠️ $($r.Substring(6))" -ForegroundColor Yellow + } + } + Write-Host "" + Separator + Write-Host " Passed: $Pass | Failed: $Fail | Warnings: $Warn" + Separator + + if ($Fail -gt 0) { + Write-Host "`n Some tests failed. Debug commands:" -ForegroundColor Red + Write-Host " netsh wfp show filters # dump all WFP filters" + Write-Host " Get-Content $CtrldLog -Tail 100 # recent ctrld logs" + Write-Host " Get-DnsClientServerAddress # current DNS config" + Write-Host " netsh wfp show state # WFP state dump" + } else { + Write-Host "`n All tests passed!" -ForegroundColor Green + } +} + +# ============================================================================= +# MAIN +# ============================================================================= + +Write-Host "╔═══════════════════════════════════════════════════════╗" -ForegroundColor White +Write-Host "║ ctrld DNS Intercept Mode — Windows Test Suite ║" -ForegroundColor White +Write-Host "║ Tests WFP-based DNS interception ║" -ForegroundColor White +Write-Host "╚═══════════════════════════════════════════════════════╝" -ForegroundColor White + +Check-Admin + +Write-Host "" +Write-Host "Make sure ctrld is running with --dns-intercept before starting." +Write-Host "Log location: $CtrldLog" +WaitForKey + +Test-Prereqs +Test-WfpState +Test-DnsInterception +Test-NonDnsUnaffected +Test-CtrldLogHealth + +Separator +Write-Host "" +Write-Host "The next tests require manual steps (stop/start ctrld, network changes)." +Write-Host "Press Enter to continue, or Ctrl+C to skip and see results so far." +WaitForKey + +Test-CleanupOnStop +Test-RestartResilience +Test-NetworkChange + +Print-Summary diff --git a/test-scripts/windows/test-recovery-bypass.ps1 b/test-scripts/windows/test-recovery-bypass.ps1 new file mode 100644 index 00000000..005a7feb --- /dev/null +++ b/test-scripts/windows/test-recovery-bypass.ps1 @@ -0,0 +1,289 @@ +# test-recovery-bypass.ps1 — Test DNS intercept recovery bypass (captive portal simulation) +# +# Simulates a captive portal by: +# 1. Discovering ctrld's upstream IPs from active connections +# 2. Blocking them via Windows Firewall rules +# 3. Disabling/re-enabling the wifi adapter to trigger network change +# 4. Verifying recovery bypass forwards to OS/DHCP resolver +# 5. Removing firewall rules and verifying normal operation resumes +# +# SAFE: Uses named firewall rules that are cleaned up on exit. +# +# Usage (run as Administrator): +# .\test-recovery-bypass.ps1 [-WifiAdapter "Wi-Fi"] [-CtrldLog "C:\temp\dns.log"] +# +# Prerequisites: +# - ctrld running with --dns-intercept and -v 1 --log C:\temp\dns.log +# - Run as Administrator + +param( + [string]$WifiAdapter = "Wi-Fi", + [string]$CtrldLog = "C:\temp\dns.log", + [int]$BlockDurationSec = 60 +) + +$ErrorActionPreference = "Stop" +$FwRulePrefix = "ctrld-test-recovery-block" +$BlockedIPs = @() + +function Log($msg) { Write-Host "[$(Get-Date -Format 'HH:mm:ss')] $msg" -ForegroundColor Cyan } +function Pass($msg) { Write-Host "[PASS] $msg" -ForegroundColor Green } +function Fail($msg) { Write-Host "[FAIL] $msg" -ForegroundColor Red } +function Warn($msg) { Write-Host "[WARN] $msg" -ForegroundColor Yellow } + +# ── Safety: cleanup function ───────────────────────────────────────────────── +function Cleanup { + Log "═══ CLEANUP ═══" + + # Ensure wifi is enabled + Log "Ensuring wifi adapter is enabled..." + try { Enable-NetAdapter -Name $WifiAdapter -Confirm:$false -ErrorAction SilentlyContinue } catch {} + + # Remove all test firewall rules + Log "Removing test firewall rules..." + Get-NetFirewallRule -DisplayName "$FwRulePrefix*" -ErrorAction SilentlyContinue | + Remove-NetFirewallRule -ErrorAction SilentlyContinue + Log "Cleanup complete." +} + +# Register cleanup on script exit +$null = Register-EngineEvent -SourceIdentifier PowerShell.Exiting -Action { Cleanup } -ErrorAction SilentlyContinue +trap { Cleanup; break } + +# ── Pre-checks ─────────────────────────────────────────────────────────────── +$isAdmin = ([Security.Principal.WindowsPrincipal][Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole]::Administrator) +if (-not $isAdmin) { + Fail "Run as Administrator!" + exit 1 +} + +if (-not (Test-Path $CtrldLog)) { + Fail "ctrld log not found at $CtrldLog" + Write-Host "Start ctrld with: ctrld run --dns-intercept --cd -v 1 --log $CtrldLog" + exit 1 +} + +# Check wifi adapter exists +$adapter = Get-NetAdapter -Name $WifiAdapter -ErrorAction SilentlyContinue +if (-not $adapter) { + Fail "Wifi adapter '$WifiAdapter' not found" + Write-Host "Available adapters:" + Get-NetAdapter | Format-Table Name, Status, InterfaceDescription + exit 1 +} + +Log "═══════════════════════════════════════════════════════════" +Log " Recovery Bypass Test (Captive Portal Simulation)" +Log "═══════════════════════════════════════════════════════════" +Log "Wifi adapter: $WifiAdapter" +Log "ctrld log: $CtrldLog" +Write-Host "" + +# ── Phase 1: Discover upstream IPs ────────────────────────────────────────── +Log "Phase 1: Discovering ctrld upstream IPs from active connections" + +$ctrldConns = Get-NetTCPConnection -OwningProcess (Get-Process ctrld* -ErrorAction SilentlyContinue).Id -ErrorAction SilentlyContinue | + Where-Object { $_.State -eq "Established" -and $_.RemotePort -eq 443 } + +$upstreamIPs = @() +if ($ctrldConns) { + $upstreamIPs = $ctrldConns | Select-Object -ExpandProperty RemoteAddress -Unique | + Where-Object { $_ -notmatch "^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.)" } + + foreach ($conn in $ctrldConns) { + Log " $($conn.LocalAddress):$($conn.LocalPort) -> $($conn.RemoteAddress):$($conn.RemotePort)" + } +} + +# Also resolve known Control D endpoints +foreach ($host_ in @("dns.controld.com", "freedns.controld.com")) { + try { + $resolved = Resolve-DnsName $host_ -Type A -ErrorAction SilentlyContinue + $resolved | ForEach-Object { if ($_.IPAddress) { $upstreamIPs += $_.IPAddress } } + } catch {} +} + +$upstreamIPs = $upstreamIPs | Sort-Object -Unique + +if ($upstreamIPs.Count -eq 0) { + Fail "Could not discover any upstream IPs!" + exit 1 +} + +Log "Found $($upstreamIPs.Count) upstream IP(s):" +foreach ($ip in $upstreamIPs) { Log " $ip" } +Write-Host "" + +# ── Phase 2: Baseline ─────────────────────────────────────────────────────── +Log "Phase 2: Baseline — verify DNS works normally" +$baseline = Resolve-DnsName example.com -Server 127.0.0.1 -Type A -ErrorAction SilentlyContinue +if ($baseline) { + Pass "Baseline: example.com -> $($baseline[0].IPAddress)" +} else { + Fail "DNS not working!" + exit 1 +} + +$logLinesBefore = (Get-Content $CtrldLog).Count +Log "Log position: line $logLinesBefore" +Write-Host "" + +# ── Phase 3: Block upstream IPs via Windows Firewall ──────────────────────── +Log "Phase 3: Blocking upstream IPs via Windows Firewall" +foreach ($ip in $upstreamIPs) { + $ruleName = "$FwRulePrefix-$ip" + # Remove existing rule if any + Remove-NetFirewallRule -DisplayName $ruleName -ErrorAction SilentlyContinue + # Block outbound to this IP + New-NetFirewallRule -DisplayName $ruleName -Direction Outbound -Action Block ` + -RemoteAddress $ip -Protocol TCP -RemotePort 443 ` + -Description "Temporary test rule for ctrld recovery bypass test" | Out-Null + $BlockedIPs += $ip + Log " Blocked: $ip (outbound TCP 443)" +} +Pass "All $($upstreamIPs.Count) upstream IPs blocked" +Write-Host "" + +# ── Phase 4: Cycle wifi ───────────────────────────────────────────────────── +Log "Phase 4: Cycling wifi to trigger network change event" +Log " Disabling $WifiAdapter..." +Disable-NetAdapter -Name $WifiAdapter -Confirm:$false +Start-Sleep -Seconds 3 + +Log " Enabling $WifiAdapter..." +Enable-NetAdapter -Name $WifiAdapter -Confirm:$false + +Log " Waiting for wifi to reconnect (up to 20s)..." +$wifiUp = $false +for ($i = 0; $i -lt 20; $i++) { + $status = (Get-NetAdapter -Name $WifiAdapter).Status + if ($status -eq "Up") { + # Check for IP + $ipAddr = (Get-NetIPAddress -InterfaceAlias $WifiAdapter -AddressFamily IPv4 -ErrorAction SilentlyContinue).IPAddress + if ($ipAddr) { + $wifiUp = $true + Pass "Wifi reconnected: $WifiAdapter -> $ipAddr" + break + } + } + Start-Sleep -Seconds 1 +} + +if (-not $wifiUp) { + Fail "Wifi did not reconnect in 20s!" + Cleanup + exit 1 +} + +Log " Waiting 5s for ctrld network monitor..." +Start-Sleep -Seconds 5 +Write-Host "" + +# ── Phase 5: Query and watch for recovery ──────────────────────────────────── +Log "Phase 5: Sending queries — upstream blocked, recovery should activate" +Write-Host "" + +$recoveryDetected = $false +$bypassActive = $false +$dnsDuringBypass = $false + +for ($q = 1; $q -le 30; $q++) { + $result = $null + try { + $result = Resolve-DnsName "example.com" -Server 127.0.0.1 -Type A -DnsOnly -ErrorAction SilentlyContinue + } catch {} + + if ($result) { + Log " Query #$q`: example.com -> $($result[0].IPAddress) ✓" + } else { + Log " Query #$q`: example.com -> FAIL ✗" + } + + # Check ctrld log for recovery + $newLogs = Get-Content $CtrldLog | Select-Object -Skip $logLinesBefore + $logText = $newLogs -join "`n" + + if (-not $recoveryDetected -and ($logText -match "enabling DHCP bypass|triggering recovery|No healthy")) { + Write-Host "" + Pass "🎯 Recovery flow triggered!" + $recoveryDetected = $true + } + + if (-not $bypassActive -and ($logText -match "Recovery bypass active")) { + Pass "🔄 Recovery bypass forwarding to OS/DHCP resolver" + $bypassActive = $true + } + + if ($recoveryDetected -and $result) { + Pass "✅ DNS resolves during recovery: example.com -> $($result[0].IPAddress)" + $dnsDuringBypass = $true + break + } + + Start-Sleep -Seconds 2 +} + +# ── Phase 6: Show log entries ──────────────────────────────────────────────── +Write-Host "" +Log "Phase 6: Recovery-related ctrld log entries" +Log "────────────────────────────────────────────" +$newLogs = Get-Content $CtrldLog | Select-Object -Skip $logLinesBefore +$relevant = $newLogs | Where-Object { $_ -match "recovery|bypass|DHCP|unhealthy|upstream.*fail|No healthy|network change|OS resolver" } +if ($relevant) { + $relevant | Select-Object -First 30 | ForEach-Object { Write-Host " $_" } +} else { + Warn "No recovery-related log entries found" + Get-Content $CtrldLog | Select-Object -Last 10 | ForEach-Object { Write-Host " $_" } +} + +# ── Phase 7: Unblock and verify ───────────────────────────────────────────── +Write-Host "" +Log "Phase 7: Removing firewall blocks" +Get-NetFirewallRule -DisplayName "$FwRulePrefix*" -ErrorAction SilentlyContinue | + Remove-NetFirewallRule -ErrorAction SilentlyContinue +$BlockedIPs = @() +Pass "Firewall rules removed" + +Log "Waiting for recovery (up to 30s)..." +$logLinesUnblock = (Get-Content $CtrldLog).Count +$recoveryComplete = $false + +for ($i = 0; $i -lt 15; $i++) { + try { Resolve-DnsName example.com -Server 127.0.0.1 -Type A -DnsOnly -ErrorAction SilentlyContinue } catch {} + $postLogs = (Get-Content $CtrldLog | Select-Object -Skip $logLinesUnblock) -join "`n" + if ($postLogs -match "recovery complete|disabling DHCP bypass|Upstream.*recovered") { + $recoveryComplete = $true + Pass "ctrld recovered — normal operation resumed" + break + } + Start-Sleep -Seconds 2 +} + +if (-not $recoveryComplete) { Warn "Recovery completion not detected (may need more time)" } + +# ── Phase 8: Final check ──────────────────────────────────────────────────── +Write-Host "" +Log "Phase 8: Final DNS verification" +Start-Sleep -Seconds 2 +$final = Resolve-DnsName example.com -Server 127.0.0.1 -Type A -ErrorAction SilentlyContinue +if ($final) { + Pass "DNS working: example.com -> $($final[0].IPAddress)" +} else { + Fail "DNS not resolving" +} + +# ── Summary ────────────────────────────────────────────────────────────────── +Write-Host "" +Log "═══════════════════════════════════════════════════════════" +Log " Test Summary" +Log "═══════════════════════════════════════════════════════════" +if ($recoveryDetected) { Pass "Recovery bypass activated" } else { Fail "Recovery bypass NOT activated" } +if ($bypassActive) { Pass "Queries forwarded to OS/DHCP" } else { Warn "OS resolver forwarding not confirmed" } +if ($dnsDuringBypass) { Pass "DNS resolved during bypass" } else { Warn "DNS during bypass not confirmed" } +if ($recoveryComplete) { Pass "Normal operation resumed" } else { Warn "Recovery completion not confirmed" } +if ($final) { Pass "DNS functional at end of test" } else { Fail "DNS broken at end of test" } +Write-Host "" +Log "Full log: Get-Content $CtrldLog | Select-Object -Skip $logLinesBefore" + +# Cleanup runs via trap +Cleanup From 768cc8185552c41ee9dd6a60017fe4ed8c4505ea Mon Sep 17 00:00:00 2001 From: Codescribe Date: Tue, 3 Mar 2026 02:07:11 -0500 Subject: [PATCH 18/22] feat: add Windows NRPT and WFP DNS interception Implement DNS interception on Windows with dual-mode support: - NRPT for --intercept-mode=dns: catch-all rule redirecting all DNS to ctrld's listener, with GP vs local path detection - WFP for --intercept-mode=hard: sublayer with callout filters intercepting port 53 traffic - NRPT probe-and-heal for async Group Policy refresh race - Service registry verification for intercept mode persistence - NRPT diagnostics script for troubleshooting Includes WFP technical reference docs and Windows test scripts. Squashed from intercept mode development on v1.0 branch (#497). --- cmd/cli/dns_intercept_windows.go | 1674 ++++++++++++++++++++++++++++++ cmd/cli/os_windows.go | 2 +- docs/wfp-dns-intercept.md | 449 ++++++++ nameservers_windows.go | 182 +++- scripts/nrpt-diag.ps1 | 132 +++ 5 files changed, 2430 insertions(+), 9 deletions(-) create mode 100644 cmd/cli/dns_intercept_windows.go create mode 100644 docs/wfp-dns-intercept.md create mode 100644 scripts/nrpt-diag.ps1 diff --git a/cmd/cli/dns_intercept_windows.go b/cmd/cli/dns_intercept_windows.go new file mode 100644 index 00000000..5a07d46c --- /dev/null +++ b/cmd/cli/dns_intercept_windows.go @@ -0,0 +1,1674 @@ +//go:build windows + +package cli + +import ( + "context" + "fmt" + "math/rand" + "net" + "os/exec" + "runtime" + "sync/atomic" + "time" + "unsafe" + + "golang.org/x/sys/windows" + "golang.org/x/sys/windows/registry" + + "github.com/Control-D-Inc/ctrld" +) + +// DNS Intercept Mode — Windows Implementation (WFP) +// +// This file implements DNS interception using Windows Filtering Platform (WFP). +// WFP is a kernel-level network filtering framework that allows applications to +// inspect and modify network traffic at various layers of the TCP/IP stack. +// +// Strategy: +// - Create a WFP sublayer at maximum priority (weight 0xFFFF) +// - Add PERMIT filters (weight 10) for DNS to localhost (ctrld's listener) +// - Add BLOCK filters (weight 1) for all other outbound DNS +// - Dynamically add/remove PERMIT filters for VPN DNS server exemptions +// +// This means even if VPN software overwrites adapter DNS settings, the OS +// cannot reach those DNS servers on port 53 — all DNS must flow through ctrld. +// +// Key advantages over macOS pf: +// - WFP filters are per-process kernel objects — other apps can't wipe them +// - No watchdog or stabilization needed +// - Connection-level filtering — no packet state/return-path complications +// - Full IPv4 + IPv6 support +// +// See docs/wfp-dns-intercept.md for architecture diagrams and debugging tips. + +// WFP GUIDs and constants for DNS interception. +// These are defined by Microsoft's Windows Filtering Platform API. +var ( + // ctrldSubLayerGUID is a unique GUID for ctrld's WFP sublayer. + // Generated specifically for ctrld DNS intercept mode. + ctrldSubLayerGUID = windows.GUID{ + Data1: 0x7a4e5b6c, + Data2: 0x3d2f, + Data3: 0x4a1e, + Data4: [8]byte{0x9b, 0x8c, 0x1d, 0x2e, 0x3f, 0x4a, 0x5b, 0x6c}, + } + + // Well-known WFP layer GUIDs from Microsoft documentation. + // FWPM_LAYER_ALE_AUTH_CONNECT_V4: filters outbound IPv4 connection attempts. + fwpmLayerALEAuthConnectV4 = windows.GUID{ + Data1: 0xc38d57d1, + Data2: 0x05a7, + Data3: 0x4c33, + Data4: [8]byte{0x90, 0x4f, 0x7f, 0xbc, 0xee, 0xe6, 0x0e, 0x82}, + } + // FWPM_LAYER_ALE_AUTH_CONNECT_V6: filters outbound IPv6 connection attempts. + fwpmLayerALEAuthConnectV6 = windows.GUID{ + Data1: 0x4a72393b, + Data2: 0x319f, + Data3: 0x44bc, + Data4: [8]byte{0x84, 0xc3, 0xba, 0x54, 0xdc, 0xb3, 0xb6, 0xb4}, + } + + // FWPM_CONDITION_IP_REMOTE_PORT: condition matching on remote port. + fwpmConditionIPRemotePort = windows.GUID{ + Data1: 0xc35a604d, + Data2: 0xd22b, + Data3: 0x4e1a, + Data4: [8]byte{0x91, 0xb4, 0x68, 0xf6, 0x74, 0xee, 0x67, 0x4b}, + } + // FWPM_CONDITION_IP_REMOTE_ADDRESS: condition matching on remote address. + fwpmConditionIPRemoteAddress = windows.GUID{ + Data1: 0xb235ae9a, + Data2: 0x1d64, + Data3: 0x49b8, + Data4: [8]byte{0xa4, 0x4c, 0x5f, 0xf3, 0xd9, 0x09, 0x50, 0x45}, + } + // FWPM_CONDITION_IP_PROTOCOL: condition matching on IP protocol. + fwpmConditionIPProtocol = windows.GUID{ + Data1: 0x3971ef2b, + Data2: 0x623e, + Data3: 0x4f9a, + Data4: [8]byte{0x8c, 0xb1, 0x6e, 0x79, 0xb8, 0x06, 0xb9, 0xa7}, + } +) + +const ( + // WFP action constants. These combine a base action with the TERMINATING flag. + // See: https://docs.microsoft.com/en-us/windows/win32/api/fwptypes/ne-fwptypes-fwp_action_type + fwpActionFlagTerminating uint32 = 0x00001000 + fwpActionBlock uint32 = 0x00000001 | fwpActionFlagTerminating // 0x00001001 + fwpActionPermit uint32 = 0x00000002 | fwpActionFlagTerminating // 0x00001002 + + // FWP_MATCH_EQUAL is the match type for exact value comparison. + fwpMatchEqual uint32 = 0 // FWP_MATCH_EQUAL + + // FWP_DATA_TYPE constants for condition values. + // Enum starts at FWP_EMPTY=0, so FWP_UINT8=1, etc. + // See: https://learn.microsoft.com/en-us/windows/win32/api/fwptypes/ne-fwptypes-fwp_data_type + fwpUint8 uint32 = 1 // FWP_UINT8 + fwpUint16 uint32 = 2 // FWP_UINT16 + fwpUint32 uint32 = 3 // FWP_UINT32 + fwpByteArray16Type uint32 = 11 // FWP_BYTE_ARRAY16_TYPE + fwpV4AddrMask uint32 = 0x100 // FWP_V4_ADDR_MASK (after FWP_SINGLE_DATA_TYPE_MAX=0xff) + + // IP protocol numbers. + ipprotoUDP uint8 = 17 + ipprotoTCP uint8 = 6 + + // DNS port. + dnsPort uint16 = 53 +) + +// WFP API structures. These mirror the C structures from fwpmtypes.h and fwptypes.h. +// We define them here because golang.org/x/sys/windows doesn't include WFP types. +// +// IMPORTANT: These struct layouts must match the C ABI exactly (64-bit Windows). +// Field alignment and padding are critical. Any mismatch will cause access violations +// or silent corruption. The layouts below are for AMD64 only. +// If issues arise, verify against the Windows SDK headers with offsetof() checks. + +// fwpmSession0 represents FWPM_SESSION0 for opening a WFP engine handle. +type fwpmSession0 struct { + sessionKey windows.GUID + displayData fwpmDisplayData0 + flags uint32 + txnWaitTimeoutInMSec uint32 + processId uint32 + sid *windows.SID + username *uint16 + kernelMode int32 // Windows BOOL is int32, not Go bool + _ [4]byte // padding to next 8-byte boundary +} + +// fwpmDisplayData0 represents FWPM_DISPLAY_DATA0 for naming WFP objects. +type fwpmDisplayData0 struct { + name *uint16 + description *uint16 +} + +// fwpmSublayer0 represents FWPM_SUBLAYER0 for creating a WFP sublayer. +type fwpmSublayer0 struct { + subLayerKey windows.GUID + displayData fwpmDisplayData0 + flags uint32 + _ [4]byte // padding + providerKey *windows.GUID + providerData fwpByteBlob + weight uint16 + _ [6]byte // padding +} + +// fwpByteBlob represents FWP_BYTE_BLOB for raw data blobs. +type fwpByteBlob struct { + size uint32 + _ [4]byte // padding + data *byte +} + +// fwpmFilter0 represents FWPM_FILTER0 for adding WFP filters. +type fwpmFilter0 struct { + filterKey windows.GUID + displayData fwpmDisplayData0 + flags uint32 + _ [4]byte // padding + providerKey *windows.GUID + providerData fwpByteBlob + layerKey windows.GUID + subLayerKey windows.GUID + weight fwpValue0 + numFilterConds uint32 + _ [4]byte // padding + filterCondition *fwpmFilterCondition0 + action fwpmAction0 + // After action is a union of UINT64 (rawContext) and GUID (providerContextKey). + // GUID is 16 bytes, UINT64 is 8 bytes. Union size = 16 bytes. + rawContext uint64 // first 8 bytes of the union + _rawContextPad uint64 // remaining 8 bytes (unused, for GUID alignment) + reserved *windows.GUID + filterId uint64 + effectiveWeight fwpValue0 +} + +// fwpValue0 represents FWP_VALUE0, a tagged union for filter weights and values. +type fwpValue0 struct { + valueType uint32 + _ [4]byte // padding + value uint64 // union: uint8/uint16/uint32/uint64/pointer +} + +// fwpmFilterCondition0 represents FWPM_FILTER_CONDITION0 for filter match conditions. +type fwpmFilterCondition0 struct { + fieldKey windows.GUID + matchType uint32 + _ [4]byte // padding + condValue fwpConditionValue0 +} + +// fwpConditionValue0 represents FWP_CONDITION_VALUE0, the value to match against. +type fwpConditionValue0 struct { + valueType uint32 + _ [4]byte // padding + value uint64 // union +} + +// fwpV4AddrAndMask represents FWP_V4_ADDR_AND_MASK for subnet matching. +// Both addr and mask are in host byte order. +type fwpV4AddrAndMask struct { + addr uint32 + mask uint32 +} + +// fwpmAction0 represents FWPM_ACTION0 for specifying what happens on match. +// Size: 20 bytes (uint32 + GUID). No padding needed — GUID has 4-byte alignment. +type fwpmAction0 struct { + actionType uint32 + filterType windows.GUID // union: filterType or calloutKey +} + +// wfpState holds the state of the WFP DNS interception filters. +// It tracks the engine handle and all filter IDs for cleanup on shutdown. +// All filter IDs are stored so we can remove them individually without +// needing to enumerate the sublayer's filters via WFP API. +// +// The engine handle is opened once at startup and kept for the lifetime +// of the ctrld process. Filter additions/removals happen through this handle. +type wfpState struct { + engineHandle uintptr + filterIDv4UDP uint64 + filterIDv4TCP uint64 + filterIDv6UDP uint64 + filterIDv6TCP uint64 + // Permit filter IDs for localhost traffic (prevent blocking ctrld's own listener). + permitIDv4UDP uint64 + permitIDv4TCP uint64 + permitIDv6UDP uint64 + permitIDv6TCP uint64 + // Dynamic permit filter IDs for VPN DNS server IPs. + vpnPermitFilterIDs []uint64 + // Static permit filter IDs for RFC1918/CGNAT subnet ranges. + // These allow VPN DNS servers on private IPs to work without dynamic exemptions. + subnetPermitFilterIDs []uint64 + // nrptActive tracks whether the NRPT catch-all rule was successfully added. + // Used by stopDNSIntercept to know whether cleanup is needed. + nrptActive bool + // listenerIP is the actual IP address ctrld is listening on (e.g., "127.0.0.1" + // or "127.0.0.2" on AD DC). Used by NRPT rule creation and health monitor to + // ensure NRPT points to the correct address. + listenerIP string + // stopCh is used to shut down the NRPT health monitor goroutine. + stopCh chan struct{} +} + +// Lazy-loaded WFP DLL procedures. +var ( + fwpuclntDLL = windows.NewLazySystemDLL("fwpuclnt.dll") + procFwpmEngineOpen0 = fwpuclntDLL.NewProc("FwpmEngineOpen0") + procFwpmEngineClose0 = fwpuclntDLL.NewProc("FwpmEngineClose0") + procFwpmSubLayerAdd0 = fwpuclntDLL.NewProc("FwpmSubLayerAdd0") + procFwpmSubLayerDeleteByKey0 = fwpuclntDLL.NewProc("FwpmSubLayerDeleteByKey0") + procFwpmFilterAdd0 = fwpuclntDLL.NewProc("FwpmFilterAdd0") + procFwpmFilterDeleteById0 = fwpuclntDLL.NewProc("FwpmFilterDeleteById0") + procFwpmSubLayerGetByKey0 = fwpuclntDLL.NewProc("FwpmSubLayerGetByKey0") + procFwpmFreeMemory0 = fwpuclntDLL.NewProc("FwpmFreeMemory0") +) + +// Lazy-loaded dnsapi.dll for flushing the DNS Client cache after NRPT changes. +var ( + dnsapiDLL = windows.NewLazySystemDLL("dnsapi.dll") + procDnsFlushResolverCache = dnsapiDLL.NewProc("DnsFlushResolverCache") +) + +// Lazy-loaded userenv.dll for triggering Group Policy refresh so DNS Client +// picks up new NRPT registry entries without waiting for the next GP cycle. +var ( + userenvDLL = windows.NewLazySystemDLL("userenv.dll") + procRefreshPolicyEx = userenvDLL.NewProc("RefreshPolicyEx") +) + +// NRPT (Name Resolution Policy Table) Registry Constants +// +// NRPT tells the Windows DNS Client service where to send queries for specific +// namespaces. We add a catch-all rule ("." matches everything) that directs all +// DNS queries to ctrld's listener (typically 127.0.0.1, but may be 127.0.0.x on AD DC). +// +// This complements the WFP block filters: +// - NRPT: tells Windows DNS Client to send queries to ctrld (positive routing) +// - WFP: blocks any DNS that somehow bypasses NRPT (enforcement backstop) +// +// Without NRPT, WFP blocks outbound DNS but doesn't redirect it — applications +// would just see DNS failures instead of getting answers from ctrld. +const ( + // nrptBaseKey is the GP registry path where Windows stores NRPT policy rules. + nrptBaseKey = `SOFTWARE\Policies\Microsoft\Windows NT\DNSClient\DnsPolicyConfig` + // nrptDirectKey is the local service store path. The DNS Client reads NRPT + // from both locations, but on some machines (including stock Win11) it only + // honors the direct path. This is the same path Add-DnsClientNrptRule uses. + nrptDirectKey = `SYSTEM\CurrentControlSet\Services\Dnscache\Parameters\DnsPolicyConfig` + // nrptRuleName is the name of our specific rule key under the GP path. + nrptRuleName = `CtrldCatchAll` + // nrptDirectRuleName is the key name for the direct service store path. + // The DNS Client requires direct-path rules to use GUID-in-braces format. + // Using a plain name like "CtrldCatchAll" makes the rule visible in + // Get-DnsClientNrptRule but DNS Client won't apply it for resolution + // (Get-DnsClientNrptPolicy returns empty). This is a deterministic GUID + // so we can reliably find and clean up our own rule. + nrptDirectRuleName = `{B2E9A3C1-7F4D-4A8E-9D6B-5C1E0F3A2B8D}` +) + +// addNRPTCatchAllRule creates an NRPT catch-all rule that directs all DNS queries +// to the specified listener IP. +// +// Windows NRPT has two registry paths with all-or-nothing precedence: +// - GP path: SOFTWARE\Policies\...\DnsPolicyConfig (Group Policy) +// - Local path: SYSTEM\CurrentControlSet\...\DnsPolicyConfig (service store) +// +// If ANY rules exist in the GP path (from IT policy, VPN, MDM, etc.), DNS Client +// enters "GP mode" and ignores ALL local-path rules entirely. Conversely, if the +// GP path is empty/absent, DNS Client reads from the local path only. +// +// Strategy (matching Tailscale's approach): +// - Always write to the local path (baseline for non-domain machines). +// - Check if OTHER software has GP rules. If yes, also write to the GP path +// so our rule isn't invisible. If no, clean our stale GP rules and delete the +// empty GP key to stay in "local mode". +// - After GP writes, call RefreshPolicyEx to activate. +func addNRPTCatchAllRule(listenerIP string) error { + // Always write to local/direct service store path. + if err := writeNRPTRule(nrptDirectKey+`\`+nrptDirectRuleName, listenerIP); err != nil { + return fmt.Errorf("failed to write NRPT local path rule: %w", err) + } + + // Check if other software has GP NRPT rules. If so, we must also write + // to the GP path — otherwise DNS Client's "GP mode" hides our local rule. + if otherGPRulesExist() { + mainLog.Load().Info().Msg("DNS intercept: other GP NRPT rules detected — also writing to GP path") + if err := writeNRPTRule(nrptBaseKey+`\`+nrptRuleName, listenerIP); err != nil { + mainLog.Load().Warn().Err(err).Msg("DNS intercept: failed to write NRPT GP rule (local rule still active if GP clears)") + } + } else { + // No other GP rules — clean our stale GP entry and delete the empty + // GP parent key so DNS Client stays in "local mode". + cleanGPPath() + } + return nil +} + +// otherGPRulesExist checks if non-ctrld NRPT rules exist in the GP path. +// When other software (IT policy, VPN, MDM) has GP rules, DNS Client enters +// "GP mode" and ignores ALL local-path rules. +func otherGPRulesExist() bool { + k, err := registry.OpenKey(registry.LOCAL_MACHINE, nrptBaseKey, registry.ENUMERATE_SUB_KEYS) + if err != nil { + return false // GP key doesn't exist — no GP rules. + } + names, err := k.ReadSubKeyNames(-1) + k.Close() + if err != nil { + return false + } + for _, name := range names { + if name != nrptRuleName { // Not our CtrldCatchAll + return true + } + } + return false +} + +// cleanGPPath removes our CtrldCatchAll rule from the GP path and deletes +// the GP DnsPolicyConfig parent key if no other rules remain. Removing the +// empty GP key is critical: its mere existence forces DNS Client into "GP mode" +// where local-path rules are ignored. +func cleanGPPath() { + // Delete our specific rule. + registry.DeleteKey(registry.LOCAL_MACHINE, nrptBaseKey+`\`+nrptRuleName) + + // If the GP parent key is now empty, delete it entirely to exit "GP mode". + k, err := registry.OpenKey(registry.LOCAL_MACHINE, nrptBaseKey, registry.ENUMERATE_SUB_KEYS) + if err != nil { + return // Key doesn't exist — clean state. + } + names, err := k.ReadSubKeyNames(-1) + k.Close() + if err != nil || len(names) > 0 { + if len(names) > 0 { + mainLog.Load().Debug().Strs("remaining", names).Msg("DNS intercept: GP path has other rules, leaving parent key") + } + return + } + // Empty — delete it to exit "GP mode". + if err := registry.DeleteKey(registry.LOCAL_MACHINE, nrptBaseKey); err == nil { + mainLog.Load().Info().Msg("DNS intercept: deleted empty GP DnsPolicyConfig key (exits GP mode)") + } +} + +// writeNRPTRule writes a single NRPT catch-all rule at the given registry keyPath. +func writeNRPTRule(keyPath, listenerIP string) error { + k, _, err := registry.CreateKey(registry.LOCAL_MACHINE, keyPath, registry.SET_VALUE) + if err != nil { + return fmt.Errorf("failed to create NRPT registry key %q: %w", keyPath, err) + } + defer k.Close() + + // Name (REG_MULTI_SZ): namespace patterns to match. "." = catch-all. + if err := k.SetStringsValue("Name", []string{"."}); err != nil { + return fmt.Errorf("failed to set NRPT Name value: %w", err) + } + // GenericDNSServers (REG_SZ): DNS server(s) to use for matching queries. + if err := k.SetStringValue("GenericDNSServers", listenerIP); err != nil { + return fmt.Errorf("failed to set NRPT GenericDNSServers value: %w", err) + } + // ConfigOptions (REG_DWORD): 0x8 = use standard DNS resolution (no DirectAccess). + if err := k.SetDWordValue("ConfigOptions", 0x8); err != nil { + return fmt.Errorf("failed to set NRPT ConfigOptions value: %w", err) + } + // Version (REG_DWORD): 0x2 = NRPT rule version 2. + if err := k.SetDWordValue("Version", 0x2); err != nil { + return fmt.Errorf("failed to set NRPT Version value: %w", err) + } + // Match the exact fields Add-DnsClientNrptRule creates. The DNS Client CIM + // provider writes these as empty strings; their absence may cause the service + // to skip the rule on some Windows builds. + k.SetStringValue("Comment", "") + k.SetStringValue("DisplayName", "") + k.SetStringValue("IPSECCARestriction", "") + return nil +} + +// removeNRPTCatchAllRule deletes the ctrld NRPT catch-all registry key and +// cleans up the empty parent key if no other NRPT rules remain. +// +// The empty parent cleanup is critical: an empty DnsPolicyConfig key causes +// DNS Client to cache a "no rules" state. On next start, DNS Client ignores +// newly written rules because it still has the cached empty state. By deleting +// the empty parent on stop, we ensure a clean slate for the next start. +func removeNRPTCatchAllRule() error { + // Remove our GUID-named rule from local/direct path. + if err := registry.DeleteKey(registry.LOCAL_MACHINE, nrptDirectKey+`\`+nrptDirectRuleName); err != nil { + if err != registry.ErrNotExist { + return fmt.Errorf("failed to delete NRPT local rule: %w", err) + } + } + deleteEmptyParentKey(nrptDirectKey) + // Clean up legacy rules from earlier builds (plain name in direct path, GP path rules). + registry.DeleteKey(registry.LOCAL_MACHINE, nrptDirectKey+`\`+nrptRuleName) + cleanGPPath() + return nil +} + +// deleteEmptyParentKey removes a registry key if it exists but has no subkeys. +func deleteEmptyParentKey(keyPath string) { + k, err := registry.OpenKey(registry.LOCAL_MACHINE, keyPath, registry.ENUMERATE_SUB_KEYS) + if err != nil { + return + } + names, err := k.ReadSubKeyNames(-1) + k.Close() + if err != nil || len(names) > 0 { + return + } + registry.DeleteKey(registry.LOCAL_MACHINE, keyPath) +} + +// nrptCatchAllRuleExists checks whether our NRPT catch-all rule exists +// in either the local or GP path. +func nrptCatchAllRuleExists() bool { + for _, path := range []string{ + nrptDirectKey + `\` + nrptDirectRuleName, + nrptBaseKey + `\` + nrptRuleName, + } { + k, err := registry.OpenKey(registry.LOCAL_MACHINE, path, registry.QUERY_VALUE) + if err == nil { + k.Close() + return true + } + } + return false +} + +// refreshNRPTPolicy triggers a machine Group Policy refresh so the DNS Client +// service picks up new/changed NRPT registry entries immediately. Without this, +// NRPT changes only take effect on the next GP cycle (default: 90 minutes). +// +// Uses RefreshPolicyEx(bMachine=TRUE, dwOptions=RP_FORCE=1) from userenv.dll. +// See: https://learn.microsoft.com/en-us/windows/win32/api/userenv/nf-userenv-refreshpolicyex +func refreshNRPTPolicy() { + if err := userenvDLL.Load(); err != nil { + mainLog.Load().Debug().Err(err).Msg("DNS intercept: userenv.dll not available, falling back to gpupdate") + if out, err := exec.Command("gpupdate", "/target:computer", "/force").CombinedOutput(); err != nil { + mainLog.Load().Debug().Msgf("DNS intercept: gpupdate failed: %v: %s", err, string(out)) + } else { + mainLog.Load().Debug().Msg("DNS intercept: triggered GP refresh via gpupdate") + } + return + } + if err := procRefreshPolicyEx.Find(); err != nil { + mainLog.Load().Debug().Err(err).Msg("DNS intercept: RefreshPolicyEx not found, falling back to gpupdate") + exec.Command("gpupdate", "/target:computer", "/force").Run() + return + } + // RefreshPolicyEx(BOOL bMachine, DWORD dwOptions) + // bMachine=1 (TRUE) = refresh computer policy, dwOptions=1 (RP_FORCE) = force refresh + ret, _, _ := procRefreshPolicyEx.Call(1, 1) + if ret != 0 { + mainLog.Load().Debug().Msg("DNS intercept: triggered machine GP refresh via RefreshPolicyEx") + } else { + mainLog.Load().Debug().Msg("DNS intercept: RefreshPolicyEx returned FALSE, falling back to gpupdate") + exec.Command("gpupdate", "/target:computer", "/force").Run() + } +} + +// flushDNSCache flushes the Windows DNS Client resolver cache and triggers a +// Group Policy refresh so NRPT changes take effect immediately. +// Uses DnsFlushResolverCache from dnsapi.dll + RefreshPolicyEx from userenv.dll. +func flushDNSCache() { + // Step 1: Refresh GP so DNS Client loads the new NRPT rules from registry. + refreshNRPTPolicy() + + // Step 2: Flush the DNS cache so stale entries from pre-NRPT resolution are cleared. + if err := dnsapiDLL.Load(); err == nil { + if err := procDnsFlushResolverCache.Find(); err == nil { + ret, _, _ := procDnsFlushResolverCache.Call() + if ret != 0 { + mainLog.Load().Debug().Msg("DNS intercept: flushed DNS resolver cache via DnsFlushResolverCache") + return + } + } + } + // Fallback: use ipconfig /flushdns. + if out, err := exec.Command("ipconfig", "/flushdns").CombinedOutput(); err != nil { + mainLog.Load().Debug().Msgf("DNS intercept: ipconfig /flushdns failed: %v: %s", err, string(out)) + } else { + mainLog.Load().Debug().Msg("DNS intercept: flushed DNS resolver cache via ipconfig /flushdns") + } +} + +// startDNSIntercept activates WFP-based DNS interception on Windows. +// It creates a WFP sublayer and adds filters that block all outbound DNS (port 53) +// traffic except to localhost (127.0.0.1/::1), ensuring all DNS queries must go +// through ctrld's local listener. This eliminates the race condition with VPN +// software that overwrites interface DNS settings. +// +// The approach: +// 1. Permit outbound DNS to 127.0.0.1/::1 (ctrld's listener) +// 2. Block all other outbound DNS (port 53 UDP+TCP) +// +// This means even if a VPN overwrites DNS settings to its own servers, +// the OS cannot reach those servers on port 53 — queries fail and fall back +// to ctrld via the loopback address. +func (p *prog) startDNSIntercept() error { + // Resolve the actual listener IP. On AD DC / Windows Server with a local DNS + // server, ctrld may have fallen back to 127.0.0.x:53 instead of 127.0.0.1:53. + // NRPT must point to whichever address ctrld is actually listening on. + listenerIP := "127.0.0.1" + if lc := p.cfg.FirstListener(); lc != nil && lc.IP != "" && lc.IP != "0.0.0.0" && lc.IP != "::" { + listenerIP = lc.IP + } else if lc != nil && (lc.IP == "0.0.0.0" || lc.IP == "::") { + mainLog.Load().Warn().Str("configured_ip", lc.IP). + Msg("DNS intercept: listener configured with wildcard IP, using 127.0.0.1 for NRPT rules") + } + + state := &wfpState{ + stopCh: make(chan struct{}), + listenerIP: listenerIP, + } + + // Step 1: Add NRPT catch-all rule (both dns and hard modes). + // NRPT must succeed before proceeding with WFP in hard mode. + mainLog.Load().Info().Msgf("DNS intercept: initializing (mode: %s)", interceptMode) + logNRPTParentKeyState("pre-write") + + // Two-phase empty parent key recovery: if the GP DnsPolicyConfig key exists + // but is empty, DNS Client has cached a "no rules" state and won't accept + // new rules even after they're written. Delete the empty key and signal DNS + // Client to reset before writing our rule. + // Two-phase recovery handles its own 2s signaling burst internally. + cleanEmptyNRPTParent() + + if err := addNRPTCatchAllRule(listenerIP); err != nil { + return fmt.Errorf("dns intercept: failed to add NRPT catch-all rule: %w", err) + } + logNRPTParentKeyState("post-write") + state.nrptActive = true + refreshNRPTPolicy() + sendParamChange() + flushDNSCache() + mainLog.Load().Info().Msgf("DNS intercept: NRPT catch-all rule active — all DNS queries directed to %s", listenerIP) + + // Step 2: In hard mode, also set up WFP filters to block non-local DNS. + if hardIntercept { + if err := p.startWFPFilters(state); err != nil { + // Roll back NRPT since WFP failed. + mainLog.Load().Error().Err(err).Msg("DNS intercept: WFP setup failed, rolling back NRPT") + _ = removeNRPTCatchAllRule() + flushDNSCache() + state.nrptActive = false + return fmt.Errorf("dns intercept: WFP setup failed: %w", err) + } + } else { + mainLog.Load().Info().Msg("DNS intercept: dns mode — NRPT only, no WFP filters (graceful)") + } + + p.dnsInterceptState = state + + // Start periodic NRPT health monitor. + go p.nrptHealthMonitor(state) + + // Verify NRPT is actually working (async — doesn't block startup). + // This catches the race condition where RefreshPolicyEx returns before + // the DNS Client service has loaded the NRPT rule from registry. + go p.nrptProbeAndHeal() + + return nil +} + +// startWFPFilters opens the WFP engine and adds all block/permit filters. +// Called only in hard intercept mode. +func (p *prog) startWFPFilters(state *wfpState) error { + mainLog.Load().Info().Msg("DNS intercept: initializing Windows Filtering Platform (WFP)") + + var engineHandle uintptr + session := fwpmSession0{} + sessionName, _ := windows.UTF16PtrFromString("ctrld DNS Intercept") + session.displayData.name = sessionName + + // RPC_C_AUTHN_DEFAULT (0xFFFFFFFF) lets the system pick the appropriate + // authentication service. RPC_C_AUTHN_NONE (0) returns ERROR_NOT_SUPPORTED + // on some Windows configurations (e.g., Parallels VMs). + const rpcCAuthnDefault = 0xFFFFFFFF + r1, _, _ := procFwpmEngineOpen0.Call( + 0, + uintptr(rpcCAuthnDefault), + 0, + uintptr(unsafe.Pointer(&session)), + uintptr(unsafe.Pointer(&engineHandle)), + ) + if r1 != 0 { + return fmt.Errorf("FwpmEngineOpen0 failed: HRESULT 0x%x", r1) + } + mainLog.Load().Info().Msgf("DNS intercept: WFP engine opened (handle: 0x%x)", engineHandle) + + // Clean up any stale sublayer from a previous unclean shutdown. + // If ctrld crashed or was killed, the non-dynamic WFP session may have left + // orphaned filters. Deleting the sublayer removes all its child filters. + r1, _, _ = procFwpmSubLayerDeleteByKey0.Call( + engineHandle, + uintptr(unsafe.Pointer(&ctrldSubLayerGUID)), + ) + if r1 == 0 { + mainLog.Load().Info().Msg("DNS intercept: cleaned up stale WFP sublayer from previous session") + } + // r1 != 0 means sublayer didn't exist — that's fine, nothing to clean up. + + sublayer := fwpmSublayer0{ + subLayerKey: ctrldSubLayerGUID, + weight: 0xFFFF, + } + sublayerName, _ := windows.UTF16PtrFromString("ctrld DNS Intercept Sublayer") + sublayerDesc, _ := windows.UTF16PtrFromString("Blocks outbound DNS except to ctrld listener. Prevents VPN DNS conflicts.") + sublayer.displayData.name = sublayerName + sublayer.displayData.description = sublayerDesc + + r1, _, _ = procFwpmSubLayerAdd0.Call( + engineHandle, + uintptr(unsafe.Pointer(&sublayer)), + 0, + ) + if r1 != 0 { + procFwpmEngineClose0.Call(engineHandle) + return fmt.Errorf("FwpmSubLayerAdd0 failed: HRESULT 0x%x", r1) + } + mainLog.Load().Info().Msg("DNS intercept: WFP sublayer created (weight: 0xFFFF — maximum priority)") + + state.engineHandle = engineHandle + + permitFilters := []struct { + name string + layer windows.GUID + proto uint8 + idField *uint64 + }{ + {"Permit DNS to localhost (IPv4/UDP)", fwpmLayerALEAuthConnectV4, ipprotoUDP, &state.permitIDv4UDP}, + {"Permit DNS to localhost (IPv4/TCP)", fwpmLayerALEAuthConnectV4, ipprotoTCP, &state.permitIDv4TCP}, + {"Permit DNS to localhost (IPv6/UDP)", fwpmLayerALEAuthConnectV6, ipprotoUDP, &state.permitIDv6UDP}, + {"Permit DNS to localhost (IPv6/TCP)", fwpmLayerALEAuthConnectV6, ipprotoTCP, &state.permitIDv6TCP}, + } + + for _, pf := range permitFilters { + filterID, err := p.addWFPPermitLocalhostFilter(engineHandle, pf.name, pf.layer, pf.proto) + if err != nil { + p.cleanupWFPFilters(state) + return fmt.Errorf("failed to add permit filter %q: %w", pf.name, err) + } + *pf.idField = filterID + mainLog.Load().Debug().Msgf("DNS intercept: added permit filter %q (ID: %d)", pf.name, filterID) + } + + blockFilters := []struct { + name string + layer windows.GUID + proto uint8 + idField *uint64 + }{ + {"Block outbound DNS (IPv4/UDP)", fwpmLayerALEAuthConnectV4, ipprotoUDP, &state.filterIDv4UDP}, + {"Block outbound DNS (IPv4/TCP)", fwpmLayerALEAuthConnectV4, ipprotoTCP, &state.filterIDv4TCP}, + {"Block outbound DNS (IPv6/UDP)", fwpmLayerALEAuthConnectV6, ipprotoUDP, &state.filterIDv6UDP}, + {"Block outbound DNS (IPv6/TCP)", fwpmLayerALEAuthConnectV6, ipprotoTCP, &state.filterIDv6TCP}, + } + + for _, bf := range blockFilters { + filterID, err := p.addWFPBlockDNSFilter(engineHandle, bf.name, bf.layer, bf.proto) + if err != nil { + p.cleanupWFPFilters(state) + return fmt.Errorf("failed to add block filter %q: %w", bf.name, err) + } + *bf.idField = filterID + mainLog.Load().Debug().Msgf("DNS intercept: added block filter %q (ID: %d)", bf.name, filterID) + } + + // Add static permit filters for RFC1918 + CGNAT ranges (UDP + TCP). + // This allows VPN DNS servers on private IPs (MagicDNS upstreams, F5, Windscribe, etc.) + // to work without dynamic per-server exemptions. + privateRanges := []struct { + name string + addr uint32 // host byte order + mask uint32 // host byte order + }{ + {"10.0.0.0/8", 0x0A000000, 0xFF000000}, + {"172.16.0.0/12", 0xAC100000, 0xFFF00000}, + {"192.168.0.0/16", 0xC0A80000, 0xFFFF0000}, + {"100.64.0.0/10", 0x64400000, 0xFFC00000}, // CGNAT (includes Tailscale) + } + for _, r := range privateRanges { + for _, proto := range []struct { + num uint8 + name string + }{{ipprotoUDP, "UDP"}, {ipprotoTCP, "TCP"}} { + filterName := fmt.Sprintf("Permit DNS to %s (%s)", r.name, proto.name) + filterID, err := p.addWFPPermitSubnetFilter(engineHandle, filterName, proto.num, r.addr, r.mask) + if err != nil { + mainLog.Load().Warn().Err(err).Msgf("DNS intercept: failed to add subnet permit for %s/%s", r.name, proto.name) + continue + } + state.subnetPermitFilterIDs = append(state.subnetPermitFilterIDs, filterID) + mainLog.Load().Debug().Msgf("DNS intercept: added subnet permit %q (ID: %d)", filterName, filterID) + } + } + mainLog.Load().Info().Msgf("DNS intercept: %d subnet permit filters active (RFC1918 + CGNAT)", len(state.subnetPermitFilterIDs)) + + mainLog.Load().Info().Msgf("DNS intercept: WFP filters active — all outbound DNS (port 53) blocked except to localhost and private ranges. "+ + "Filter IDs: v4UDP=%d, v4TCP=%d, v6UDP=%d, v6TCP=%d (block), "+ + "v4UDP=%d, v4TCP=%d, v6UDP=%d, v6TCP=%d (permit localhost)", + state.filterIDv4UDP, state.filterIDv4TCP, state.filterIDv6UDP, state.filterIDv6TCP, + state.permitIDv4UDP, state.permitIDv4TCP, state.permitIDv6UDP, state.permitIDv6TCP) + + return nil +} + +// addWFPBlockDNSFilter adds a WFP filter that blocks outbound DNS traffic (port 53) +// for the given protocol (UDP or TCP) on the specified layer (V4 or V6). +func (p *prog) addWFPBlockDNSFilter(engineHandle uintptr, name string, layerKey windows.GUID, proto uint8) (uint64, error) { + filterName, _ := windows.UTF16PtrFromString("ctrld: " + name) + + conditions := make([]fwpmFilterCondition0, 2) + + conditions[0] = fwpmFilterCondition0{ + fieldKey: fwpmConditionIPProtocol, + matchType: fwpMatchEqual, + } + conditions[0].condValue.valueType = fwpUint8 + conditions[0].condValue.value = uint64(proto) + + conditions[1] = fwpmFilterCondition0{ + fieldKey: fwpmConditionIPRemotePort, + matchType: fwpMatchEqual, + } + conditions[1].condValue.valueType = fwpUint16 + conditions[1].condValue.value = uint64(dnsPort) + + filter := fwpmFilter0{ + layerKey: layerKey, + subLayerKey: ctrldSubLayerGUID, + numFilterConds: 2, + filterCondition: &conditions[0], + } + filter.displayData.name = filterName + filter.weight.valueType = fwpUint8 + filter.weight.value = 1 + filter.action.actionType = fwpActionBlock + + var filterID uint64 + r1, _, _ := procFwpmFilterAdd0.Call( + engineHandle, + uintptr(unsafe.Pointer(&filter)), + 0, + uintptr(unsafe.Pointer(&filterID)), + ) + runtime.KeepAlive(conditions) + if r1 != 0 { + return 0, fmt.Errorf("FwpmFilterAdd0 failed: HRESULT 0x%x", r1) + } + return filterID, nil +} + +// addWFPPermitLocalhostFilter adds a WFP filter that permits outbound DNS to localhost. +// This ensures ctrld's listener at 127.0.0.1/::1 can receive DNS queries. +// +// TODO: On AD DC where ctrld listens on 127.0.0.x, this filter should match +// the actual listener IP instead of hardcoded 127.0.0.1. Currently hard mode +// is unlikely on AD DC (NRPT dns mode is preferred), but if needed, this must +// be parameterized like addNRPTCatchAllRule. +// These filters have higher weight than block filters so they're matched first. +func (p *prog) addWFPPermitLocalhostFilter(engineHandle uintptr, name string, layerKey windows.GUID, proto uint8) (uint64, error) { + filterName, _ := windows.UTF16PtrFromString("ctrld: " + name) + + ipv6Loopback := [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1} + + conditions := make([]fwpmFilterCondition0, 3) + + conditions[0] = fwpmFilterCondition0{ + fieldKey: fwpmConditionIPProtocol, + matchType: fwpMatchEqual, + } + conditions[0].condValue.valueType = fwpUint8 + conditions[0].condValue.value = uint64(proto) + + conditions[1] = fwpmFilterCondition0{ + fieldKey: fwpmConditionIPRemotePort, + matchType: fwpMatchEqual, + } + conditions[1].condValue.valueType = fwpUint16 + conditions[1].condValue.value = uint64(dnsPort) + + conditions[2] = fwpmFilterCondition0{ + fieldKey: fwpmConditionIPRemoteAddress, + matchType: fwpMatchEqual, + } + if layerKey == fwpmLayerALEAuthConnectV4 { + conditions[2].condValue.valueType = fwpUint32 + conditions[2].condValue.value = 0x7F000001 + } else { + conditions[2].condValue.valueType = fwpByteArray16Type + conditions[2].condValue.value = uint64(uintptr(unsafe.Pointer(&ipv6Loopback))) + } + + filter := fwpmFilter0{ + layerKey: layerKey, + subLayerKey: ctrldSubLayerGUID, + numFilterConds: 3, + filterCondition: &conditions[0], + } + filter.displayData.name = filterName + filter.weight.valueType = fwpUint8 + filter.weight.value = 10 + filter.action.actionType = fwpActionPermit + + var filterID uint64 + r1, _, _ := procFwpmFilterAdd0.Call( + engineHandle, + uintptr(unsafe.Pointer(&filter)), + 0, + uintptr(unsafe.Pointer(&filterID)), + ) + runtime.KeepAlive(&ipv6Loopback) + runtime.KeepAlive(conditions) + if r1 != 0 { + return 0, fmt.Errorf("FwpmFilterAdd0 failed: HRESULT 0x%x", r1) + } + return filterID, nil +} + +// addWFPPermitSubnetFilter adds a WFP filter that permits outbound DNS to a given +// IPv4 subnet (addr/mask in host byte order). Used to exempt RFC1918 and CGNAT ranges +// so VPN DNS servers on private IPs are not blocked. +func (p *prog) addWFPPermitSubnetFilter(engineHandle uintptr, name string, proto uint8, addr, mask uint32) (uint64, error) { + filterName, _ := windows.UTF16PtrFromString("ctrld: " + name) + + addrMask := fwpV4AddrAndMask{addr: addr, mask: mask} + + conditions := make([]fwpmFilterCondition0, 3) + + conditions[0] = fwpmFilterCondition0{ + fieldKey: fwpmConditionIPProtocol, + matchType: fwpMatchEqual, + } + conditions[0].condValue.valueType = fwpUint8 + conditions[0].condValue.value = uint64(proto) + + conditions[1] = fwpmFilterCondition0{ + fieldKey: fwpmConditionIPRemotePort, + matchType: fwpMatchEqual, + } + conditions[1].condValue.valueType = fwpUint16 + conditions[1].condValue.value = uint64(dnsPort) + + conditions[2] = fwpmFilterCondition0{ + fieldKey: fwpmConditionIPRemoteAddress, + matchType: fwpMatchEqual, + } + conditions[2].condValue.valueType = fwpV4AddrMask + conditions[2].condValue.value = uint64(uintptr(unsafe.Pointer(&addrMask))) + + filter := fwpmFilter0{ + layerKey: fwpmLayerALEAuthConnectV4, + subLayerKey: ctrldSubLayerGUID, + numFilterConds: 3, + filterCondition: &conditions[0], + } + filter.displayData.name = filterName + filter.weight.valueType = fwpUint8 + filter.weight.value = 10 + filter.action.actionType = fwpActionPermit + + var filterID uint64 + r1, _, _ := procFwpmFilterAdd0.Call( + engineHandle, + uintptr(unsafe.Pointer(&filter)), + 0, + uintptr(unsafe.Pointer(&filterID)), + ) + runtime.KeepAlive(&addrMask) + runtime.KeepAlive(conditions) + if r1 != 0 { + return 0, fmt.Errorf("FwpmFilterAdd0 failed: HRESULT 0x%x", r1) + } + return filterID, nil +} + +// wfpSublayerExists checks whether our WFP sublayer still exists in the engine. +// Used by the watchdog to detect if another program removed our filters. +func wfpSublayerExists(engineHandle uintptr) bool { + var sublayerPtr uintptr + r1, _, _ := procFwpmSubLayerGetByKey0.Call( + engineHandle, + uintptr(unsafe.Pointer(&ctrldSubLayerGUID)), + uintptr(unsafe.Pointer(&sublayerPtr)), + ) + if r1 != 0 { + return false + } + // Free the returned sublayer struct. + if sublayerPtr != 0 { + procFwpmFreeMemory0.Call(uintptr(unsafe.Pointer(&sublayerPtr))) + } + return true +} + +// cleanupWFPFilters removes all WFP filters and the sublayer, then closes the engine. +// It logs each step and continues cleanup even if individual removals fail, +// to ensure maximum cleanup on shutdown. +func (p *prog) cleanupWFPFilters(state *wfpState) { + if state == nil || state.engineHandle == 0 { + return + } + + for _, filterID := range state.vpnPermitFilterIDs { + r1, _, _ := procFwpmFilterDeleteById0.Call(state.engineHandle, uintptr(filterID)) + if r1 != 0 { + mainLog.Load().Warn().Msgf("DNS intercept: failed to remove VPN permit filter (ID: %d, code: 0x%x)", filterID, r1) + } else { + mainLog.Load().Debug().Msgf("DNS intercept: removed VPN permit filter (ID: %d)", filterID) + } + } + + for _, filterID := range state.subnetPermitFilterIDs { + r1, _, _ := procFwpmFilterDeleteById0.Call(state.engineHandle, uintptr(filterID)) + if r1 != 0 { + mainLog.Load().Warn().Msgf("DNS intercept: failed to remove subnet permit filter (ID: %d, code: 0x%x)", filterID, r1) + } else { + mainLog.Load().Debug().Msgf("DNS intercept: removed subnet permit filter (ID: %d)", filterID) + } + } + + filterIDs := []struct { + name string + id uint64 + }{ + {"permit v4 UDP", state.permitIDv4UDP}, + {"permit v4 TCP", state.permitIDv4TCP}, + {"permit v6 UDP", state.permitIDv6UDP}, + {"permit v6 TCP", state.permitIDv6TCP}, + {"block v4 UDP", state.filterIDv4UDP}, + {"block v4 TCP", state.filterIDv4TCP}, + {"block v6 UDP", state.filterIDv6UDP}, + {"block v6 TCP", state.filterIDv6TCP}, + } + + for _, f := range filterIDs { + if f.id == 0 { + continue + } + r1, _, _ := procFwpmFilterDeleteById0.Call(state.engineHandle, uintptr(f.id)) + if r1 != 0 { + mainLog.Load().Warn().Msgf("DNS intercept: failed to remove WFP filter %q (ID: %d, code: 0x%x)", f.name, f.id, r1) + } else { + mainLog.Load().Debug().Msgf("DNS intercept: removed WFP filter %q (ID: %d)", f.name, f.id) + } + } + + r1, _, _ := procFwpmSubLayerDeleteByKey0.Call( + state.engineHandle, + uintptr(unsafe.Pointer(&ctrldSubLayerGUID)), + ) + if r1 != 0 { + mainLog.Load().Warn().Msgf("DNS intercept: failed to remove WFP sublayer (code: 0x%x)", r1) + } else { + mainLog.Load().Debug().Msg("DNS intercept: removed WFP sublayer") + } + + r1, _, _ = procFwpmEngineClose0.Call(state.engineHandle) + if r1 != 0 { + mainLog.Load().Warn().Msgf("DNS intercept: failed to close WFP engine (code: 0x%x)", r1) + } else { + mainLog.Load().Debug().Msg("DNS intercept: WFP engine closed") + } +} + +// stopDNSIntercept removes all WFP filters and shuts down the DNS interception. +func (p *prog) stopDNSIntercept() error { + if p.dnsInterceptState == nil { + mainLog.Load().Debug().Msg("DNS intercept: no state to clean up") + return nil + } + + state := p.dnsInterceptState.(*wfpState) + + // Stop the health monitor goroutine. + if state.stopCh != nil { + close(state.stopCh) + } + + // Remove NRPT rule BEFORE WFP cleanup — restore normal DNS resolution + // before removing the block filters that enforce it. + if state.nrptActive { + if err := removeNRPTCatchAllRule(); err != nil { + mainLog.Load().Warn().Err(err).Msg("DNS intercept: failed to remove NRPT catch-all rule") + } else { + mainLog.Load().Info().Msg("DNS intercept: removed NRPT catch-all rule") + } + flushDNSCache() + state.nrptActive = false + } + + // Only clean up WFP if we actually opened the engine (hard mode). + if state.engineHandle != 0 { + mainLog.Load().Info().Msg("DNS intercept: shutting down WFP filters") + p.cleanupWFPFilters(state) + mainLog.Load().Info().Msg("DNS intercept: WFP shutdown complete") + } + + p.dnsInterceptState = nil + mainLog.Load().Info().Msg("DNS intercept: shutdown complete") + return nil +} + +// dnsInterceptSupported reports whether DNS intercept mode is supported on this platform. +func dnsInterceptSupported() bool { + if err := fwpuclntDLL.Load(); err != nil { + return false + } + return true +} + +// validateDNSIntercept checks that the system meets requirements for DNS intercept mode. +func (p *prog) validateDNSIntercept() error { + // Hard mode requires WFP and elevation for filter management. + if hardIntercept { + if !dnsInterceptSupported() { + return fmt.Errorf("dns intercept: fwpuclnt.dll not available — WFP requires Windows Vista or later") + } + if !isElevated() { + return fmt.Errorf("dns intercept: administrator privileges required for WFP filter management in hard mode") + } + } + // dns mode only needs NRPT (HKLM registry writes), which services can do + // without explicit elevation checks. + return nil +} + +// isElevated checks if the current process has administrator privileges. +func isElevated() bool { + token := windows.GetCurrentProcessToken() + return token.IsElevated() +} + +// exemptVPNDNSServers updates the WFP filters to permit outbound DNS to the given +// VPN DNS server IPs. This prevents the block filters from intercepting ctrld's own +// forwarded queries to VPN DNS servers (split DNS routing). +// +// The function is idempotent: it first removes ALL existing VPN permit filters, +// then adds new ones for the current server list. When called with nil/empty +// servers (VPN disconnected), it just removes the old permits — leaving only +// the localhost permits and block-all filters active. +// +// Supports both IPv4 and IPv6 VPN DNS servers. +// +// Called by vpnDNSManager.onServersChanged() whenever VPN DNS servers change. +func (p *prog) exemptVPNDNSServers(exemptions []vpnDNSExemption) error { + state, ok := p.dnsInterceptState.(*wfpState) + if !ok || state == nil { + return fmt.Errorf("DNS intercept state not available") + } + // In dns mode (no WFP), VPN DNS exemptions are not needed — there are no + // block filters to exempt from. + if state.engineHandle == 0 { + mainLog.Load().Debug().Msg("DNS intercept: dns mode — skipping VPN DNS exemptions (no WFP filters)") + return nil + } + + for _, filterID := range state.vpnPermitFilterIDs { + r1, _, _ := procFwpmFilterDeleteById0.Call(state.engineHandle, uintptr(filterID)) + if r1 != 0 { + mainLog.Load().Warn().Msgf("DNS intercept: failed to remove old VPN permit filter (ID: %d, code: 0x%x)", filterID, r1) + } + } + state.vpnPermitFilterIDs = nil + + // Extract unique server IPs (WFP doesn't need interface info). + seen := make(map[string]bool) + var servers []string + for _, ex := range exemptions { + if !seen[ex.Server] { + seen[ex.Server] = true + servers = append(servers, ex.Server) + } + } + + for _, server := range servers { + ipv4 := parseIPv4AsUint32(server) + isIPv6 := ipv4 == 0 + + for _, proto := range []uint8{ipprotoUDP, ipprotoTCP} { + protoName := "UDP" + if proto == ipprotoTCP { + protoName = "TCP" + } + filterName := fmt.Sprintf("ctrld: Permit VPN DNS to %s (%s)", server, protoName) + + var filterID uint64 + var err error + if isIPv6 { + ipv6Bytes := parseIPv6AsBytes(server) + if ipv6Bytes == nil { + mainLog.Load().Warn().Msgf("DNS intercept: skipping invalid VPN DNS server: %s", server) + continue + } + filterID, err = p.addWFPPermitIPv6Filter(state.engineHandle, filterName, fwpmLayerALEAuthConnectV6, proto, ipv6Bytes) + } else { + filterID, err = p.addWFPPermitIPFilter(state.engineHandle, filterName, fwpmLayerALEAuthConnectV4, proto, ipv4) + } + if err != nil { + return fmt.Errorf("failed to add VPN DNS permit filter for %s/%s: %w", server, protoName, err) + } + state.vpnPermitFilterIDs = append(state.vpnPermitFilterIDs, filterID) + mainLog.Load().Debug().Msgf("DNS intercept: added VPN DNS permit filter for %s/%s (ID: %d)", server, protoName, filterID) + } + } + + mainLog.Load().Info().Msgf("DNS intercept: exempted %d VPN DNS servers from WFP block (%d filters)", len(servers), len(state.vpnPermitFilterIDs)) + return nil +} + +// addWFPPermitIPFilter adds a WFP permit filter for outbound DNS to a specific IPv4 address. +func (p *prog) addWFPPermitIPFilter(engineHandle uintptr, name string, layerKey windows.GUID, proto uint8, ipAddr uint32) (uint64, error) { + filterName, _ := windows.UTF16PtrFromString(name) + + conditions := make([]fwpmFilterCondition0, 3) + + conditions[0] = fwpmFilterCondition0{ + fieldKey: fwpmConditionIPProtocol, + matchType: fwpMatchEqual, + } + conditions[0].condValue.valueType = fwpUint8 + conditions[0].condValue.value = uint64(proto) + + conditions[1] = fwpmFilterCondition0{ + fieldKey: fwpmConditionIPRemotePort, + matchType: fwpMatchEqual, + } + conditions[1].condValue.valueType = fwpUint16 + conditions[1].condValue.value = uint64(dnsPort) + + conditions[2] = fwpmFilterCondition0{ + fieldKey: fwpmConditionIPRemoteAddress, + matchType: fwpMatchEqual, + } + conditions[2].condValue.valueType = fwpUint32 + conditions[2].condValue.value = uint64(ipAddr) + + filter := fwpmFilter0{ + layerKey: layerKey, + subLayerKey: ctrldSubLayerGUID, + numFilterConds: 3, + filterCondition: &conditions[0], + } + filter.displayData.name = filterName + filter.weight.valueType = fwpUint8 + filter.weight.value = 10 + filter.action.actionType = fwpActionPermit + + var filterID uint64 + r1, _, _ := procFwpmFilterAdd0.Call( + engineHandle, + uintptr(unsafe.Pointer(&filter)), + 0, + uintptr(unsafe.Pointer(&filterID)), + ) + runtime.KeepAlive(conditions) + if r1 != 0 { + return 0, fmt.Errorf("FwpmFilterAdd0 failed: HRESULT 0x%x", r1) + } + return filterID, nil +} + +// addWFPPermitIPv6Filter adds a WFP permit filter for outbound DNS to a specific IPv6 address. +func (p *prog) addWFPPermitIPv6Filter(engineHandle uintptr, name string, layerKey windows.GUID, proto uint8, ipAddr *[16]byte) (uint64, error) { + filterName, _ := windows.UTF16PtrFromString(name) + + conditions := make([]fwpmFilterCondition0, 3) + + conditions[0] = fwpmFilterCondition0{ + fieldKey: fwpmConditionIPProtocol, + matchType: fwpMatchEqual, + } + conditions[0].condValue.valueType = fwpUint8 + conditions[0].condValue.value = uint64(proto) + + conditions[1] = fwpmFilterCondition0{ + fieldKey: fwpmConditionIPRemotePort, + matchType: fwpMatchEqual, + } + conditions[1].condValue.valueType = fwpUint16 + conditions[1].condValue.value = uint64(dnsPort) + + conditions[2] = fwpmFilterCondition0{ + fieldKey: fwpmConditionIPRemoteAddress, + matchType: fwpMatchEqual, + } + conditions[2].condValue.valueType = fwpByteArray16Type + conditions[2].condValue.value = uint64(uintptr(unsafe.Pointer(ipAddr))) + + filter := fwpmFilter0{ + layerKey: layerKey, + subLayerKey: ctrldSubLayerGUID, + numFilterConds: 3, + filterCondition: &conditions[0], + } + filter.displayData.name = filterName + filter.weight.valueType = fwpUint8 + filter.weight.value = 10 + filter.action.actionType = fwpActionPermit + + var filterID uint64 + r1, _, _ := procFwpmFilterAdd0.Call( + engineHandle, + uintptr(unsafe.Pointer(&filter)), + 0, + uintptr(unsafe.Pointer(&filterID)), + ) + runtime.KeepAlive(ipAddr) + runtime.KeepAlive(conditions) + if r1 != 0 { + return 0, fmt.Errorf("FwpmFilterAdd0 failed: HRESULT 0x%x", r1) + } + return filterID, nil +} + +// parseIPv6AsBytes parses an IPv6 address string into a 16-byte array for WFP. +// Returns nil if the string is not a valid IPv6 address. +func parseIPv6AsBytes(ipStr string) *[16]byte { + ip := net.ParseIP(ipStr) + if ip == nil { + return nil + } + ip = ip.To16() + if ip == nil || ip.To4() != nil { + // It's IPv4, not IPv6 + return nil + } + var result [16]byte + copy(result[:], ip) + return &result +} + +// parseIPv4AsUint32 converts an IPv4 string to a uint32 in host byte order for WFP. +func parseIPv4AsUint32(ipStr string) uint32 { + parts := [4]byte{} + n := 0 + val := uint32(0) + for i := 0; i < len(ipStr) && n < 4; i++ { + if ipStr[i] == '.' { + parts[n] = byte(val) + n++ + val = 0 + } else if ipStr[i] >= '0' && ipStr[i] <= '9' { + val = val*10 + uint32(ipStr[i]-'0') + } else { + return 0 + } + } + if n == 3 { + parts[3] = byte(val) + return uint32(parts[0])<<24 | uint32(parts[1])<<16 | uint32(parts[2])<<8 | uint32(parts[3]) + } + return 0 +} + +// ensurePFAnchorActive is a no-op on Windows (WFP handles intercept differently). +func (p *prog) ensurePFAnchorActive() bool { + return false +} + +// pfAnchorIsWiped is a no-op on Windows (WFP handles intercept differently). +func (p *prog) pfAnchorIsWiped() bool { + return false +} + +// checkTunnelInterfaceChanges is a no-op on Windows (WFP handles intercept differently). +func (p *prog) checkTunnelInterfaceChanges() bool { + return false +} + +// pfAnchorRecheckDelay is the delay for deferred pf anchor re-checks. +// Defined here as a stub for Windows (referenced from dns_proxy.go). +const pfAnchorRecheckDelay = 2 * time.Second + +// pfAnchorRecheckDelayLong is the longer delayed re-check for slower VPN teardowns. +const pfAnchorRecheckDelayLong = 4 * time.Second + +// scheduleDelayedRechecks schedules delayed OS resolver and VPN DNS refreshes after +// network change events. While WFP filters don't get wiped like pf anchors, the OS +// resolver and VPN DNS state can still be stale after VPN disconnect (same issue as macOS). +func (p *prog) scheduleDelayedRechecks() { + for _, delay := range []time.Duration{pfAnchorRecheckDelay, pfAnchorRecheckDelayLong} { + time.AfterFunc(delay, func() { + if p.dnsInterceptState == nil { + return + } + // Refresh OS resolver — VPN may have finished DNS cleanup since the + // immediate handler ran. + ctrld.InitializeOsResolver(true) + if p.vpnDNS != nil { + p.vpnDNS.Refresh(true) + } + + // NRPT watchdog: some VPN software clears NRPT policy rules on + // connect/disconnect. Re-add our catch-all rule if it was removed. + state, ok := p.dnsInterceptState.(*wfpState) + if ok && state.nrptActive && !nrptCatchAllRuleExists() { + mainLog.Load().Warn().Msg("DNS intercept: NRPT catch-all rule was removed externally — re-adding") + if err := addNRPTCatchAllRule(state.listenerIP); err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept: failed to re-add NRPT catch-all rule") + state.nrptActive = false + } else { + flushDNSCache() + mainLog.Load().Info().Msg("DNS intercept: NRPT catch-all rule restored") + } + } + + // WFP watchdog: verify our sublayer still exists. If another program + // or a crash removed it, the block filters are gone too. + if ok && state.engineHandle != 0 && !wfpSublayerExists(state.engineHandle) { + mainLog.Load().Warn().Msg("DNS intercept: WFP sublayer was removed externally — re-creating all filters") + // Full teardown + re-init. stopDNSIntercept clears state, + // then startDNSIntercept creates everything fresh. + _ = p.stopDNSIntercept() + if err := p.startDNSIntercept(); err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept: failed to re-create WFP filters") + } + } + }) + } +} + +// nrptHealthMonitor periodically checks that the NRPT catch-all rule is still +// present and re-adds it if removed by VPN software or Group Policy updates. +// In hard mode, it also verifies the WFP sublayer exists and re-initializes +// all filters if they were removed. +func (p *prog) nrptHealthMonitor(state *wfpState) { + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + for { + select { + case <-state.stopCh: + return + case <-ticker.C: + if !state.nrptActive { + continue + } + + // Step 1: Check registry key exists. + if !nrptCatchAllRuleExists() { + mainLog.Load().Warn().Msg("DNS intercept: NRPT health check — catch-all rule missing, restoring") + if err := addNRPTCatchAllRule(state.listenerIP); err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept: failed to restore NRPT catch-all rule") + state.nrptActive = false + continue + } + refreshNRPTPolicy() + flushDNSCache() + mainLog.Load().Info().Msg("DNS intercept: NRPT catch-all rule restored by health monitor") + // After restoring, verify it's actually working. + go p.nrptProbeAndHeal() + continue + } + + // Step 2: Registry key exists — verify NRPT is actually routing + // queries to ctrld (catches the async GP refresh race). + if !p.probeNRPT() { + mainLog.Load().Warn().Msg("DNS intercept: NRPT health check — rule present but probe failed, running heal cycle") + go p.nrptProbeAndHeal() + } + + // Step 3: In hard mode, also verify WFP sublayer. + if state.engineHandle != 0 && !wfpSublayerExists(state.engineHandle) { + mainLog.Load().Warn().Msg("DNS intercept: WFP health check — sublayer missing, re-initializing all filters") + _ = p.stopDNSIntercept() + if err := p.startDNSIntercept(); err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept: failed to re-initialize after WFP sublayer loss") + } else { + mainLog.Load().Info().Msg("DNS intercept: WFP filters restored by health monitor") + } + return // stopDNSIntercept closed our stopCh; startDNSIntercept started a new monitor + } + } + } +} + +// pfInterceptMonitor is a no-op on Windows — WFP filters are kernel objects +// and don't suffer from the pf translation state corruption that macOS has. +func (p *prog) pfInterceptMonitor() {} + +const ( + // nrptProbeDomain is the suffix used for NRPT verification probe queries. + // Probes use "_nrpt-probe-." — ctrld recognizes the + // prefix in the DNS handler and responds immediately without upstream forwarding. + nrptProbeDomain = "nrpt-probe.ctrld.test" + + // nrptProbeTimeout is how long to wait for a single probe query to arrive. + nrptProbeTimeout = 2 * time.Second +) + +// nrptProbeRunning ensures only one NRPT probe sequence runs at a time. +// Prevents the health monitor and startup from overlapping. +var nrptProbeRunning atomic.Bool + +// probeNRPT tests whether the NRPT catch-all rule is actually routing DNS queries +// to ctrld's listener. It sends a DNS query for a synthetic probe domain through +// the Windows DNS Client service (via Go's net.Resolver / GetAddrInfoW). If ctrld +// receives the query on its listener, NRPT is working. +// +// Returns true if NRPT is verified working, false if the probe timed out. +func (p *prog) probeNRPT() bool { + if p.dnsInterceptState == nil { + return true + } + + // Generate unique probe domain to defeat DNS caching. + probeID := fmt.Sprintf("_nrpt-probe-%x.%s", rand.Uint32(), nrptProbeDomain) + + // Register probe so DNS handler can detect and signal it. + // Reuse the same mechanism as macOS pf probes (pfProbeExpected/pfProbeCh). + probeCh := make(chan struct{}, 1) + p.pfProbeExpected.Store(probeID) + p.pfProbeCh.Store(&probeCh) + defer func() { + p.pfProbeExpected.Store("") + p.pfProbeCh.Store((*chan struct{})(nil)) + }() + + mainLog.Load().Debug().Str("domain", probeID).Msg("DNS intercept: sending NRPT verification probe") + + // Use Go's default resolver which calls GetAddrInfoW → DNS Client service → NRPT. + // If NRPT is active, the DNS Client routes this to 127.0.0.1 → ctrld receives it. + // If NRPT isn't loaded, the query goes to interface DNS → times out or NXDOMAIN. + ctx, cancel := context.WithTimeout(context.Background(), nrptProbeTimeout) + defer cancel() + + go func() { + resolver := &net.Resolver{} + // We don't care about the result — only whether ctrld's handler receives it. + _, _ = resolver.LookupHost(ctx, probeID) + }() + + select { + case <-probeCh: + mainLog.Load().Debug().Str("domain", probeID).Msg("DNS intercept: NRPT probe received — interception verified") + return true + case <-ctx.Done(): + mainLog.Load().Debug().Str("domain", probeID).Msg("DNS intercept: NRPT probe timed out — interception not working") + return false + } +} + +// sendParamChange sends SERVICE_CONTROL_PARAMCHANGE to the DNS Client (Dnscache) +// service, signaling it to re-read its configuration including NRPT rules from +// the registry. This is the standard mechanism used by FortiClient, Tailscale, +// and other DNS-aware software — it's reliable and non-disruptive unlike +// restarting the Dnscache service (which always fails on modern Windows because +// Dnscache is a protected shared svchost service). +func sendParamChange() { + if out, err := exec.Command("sc", "control", "dnscache", "paramchange").CombinedOutput(); err != nil { + mainLog.Load().Debug().Err(err).Str("output", string(out)).Msg("DNS intercept: sc control dnscache paramchange failed") + } else { + mainLog.Load().Debug().Msg("DNS intercept: sent paramchange to Dnscache service") + } +} + +// cleanEmptyNRPTParent removes empty NRPT parent keys that block activation. +// An empty DnsPolicyConfig key (exists but no subkeys) causes DNS Client to +// cache "no rules" and ignore subsequently-added rules. +// +// Also cleans the GP path entirely if it has no non-ctrld rules, since the GP +// path's existence forces DNS Client into "GP mode" where it ignores the local +// service store path. +// +// Returns true if cleanup was performed (caller should add a delay). +func cleanEmptyNRPTParent() bool { + cleaned := false + + // Always clean the GP path — its existence blocks local path activation. + cleanGPPath() + + // Clean empty local/direct path parent key. + k, err := registry.OpenKey(registry.LOCAL_MACHINE, nrptDirectKey, registry.ENUMERATE_SUB_KEYS) + if err != nil { + return false + } + names, err := k.ReadSubKeyNames(-1) + k.Close() + if err != nil || len(names) > 0 { + return false + } + + mainLog.Load().Warn().Msg("DNS intercept: found empty NRPT local parent key (blocks activation) — removing") + if err := registry.DeleteKey(registry.LOCAL_MACHINE, nrptDirectKey); err != nil { + mainLog.Load().Warn().Err(err).Msg("DNS intercept: failed to delete empty NRPT local parent key") + return false + } + cleaned = true + + // Signal DNS Client to process the deletion and reset its internal cache. + mainLog.Load().Info().Msg("DNS intercept: empty NRPT parent key removed — signaling DNS Client") + sendParamChange() + flushDNSCache() + return cleaned +} + +// logNRPTParentKeyState logs the state of both NRPT registry paths for diagnostics. +func logNRPTParentKeyState(context string) { + for _, path := range []struct { + name string + key string + }{ + {"GP", nrptBaseKey}, + {"local", nrptDirectKey}, + } { + k, err := registry.OpenKey(registry.LOCAL_MACHINE, path.key, registry.ENUMERATE_SUB_KEYS) + if err != nil { + mainLog.Load().Debug().Str("context", context).Str("path", path.name). + Msg("DNS intercept: NRPT parent key does not exist") + continue + } + names, err := k.ReadSubKeyNames(-1) + k.Close() + if err != nil { + continue + } + if len(names) == 0 { + mainLog.Load().Warn().Str("context", context).Str("path", path.name). + Msg("DNS intercept: NRPT parent key exists but is EMPTY — blocks activation") + } else { + mainLog.Load().Debug().Str("context", context).Str("path", path.name). + Int("subkeys", len(names)).Strs("names", names). + Msg("DNS intercept: NRPT parent key state") + } + } +} + +// nrptProbeAndHeal runs the NRPT probe with retries and escalating remediation. +// Called asynchronously after startup and from the health monitor. +// +// Retry sequence (each attempt: GP refresh + paramchange + flush → sleep → probe): +// 1. Immediate probe +// 2. GP refresh + paramchange + flush → 1s → probe +// 3. GP refresh + paramchange + flush → 2s → probe +// 4. GP refresh + paramchange + flush → 4s → probe +func (p *prog) nrptProbeAndHeal() { + if !nrptProbeRunning.CompareAndSwap(false, true) { + mainLog.Load().Debug().Msg("DNS intercept: NRPT probe already running, skipping") + return + } + defer nrptProbeRunning.Store(false) + + mainLog.Load().Info().Msg("DNS intercept: starting NRPT verification probe sequence") + + // Log parent key state for diagnostics. + logNRPTParentKeyState("probe-start") + + // Attempt 1: immediate probe + if p.probeNRPT() { + mainLog.Load().Info().Msg("DNS intercept: NRPT verified working") + return + } + + // Attempts 2-4: GP refresh + paramchange + flush with increasing backoff + delays := []time.Duration{1 * time.Second, 2 * time.Second, 4 * time.Second} + for i, delay := range delays { + attempt := i + 2 + mainLog.Load().Info().Int("attempt", attempt).Dur("delay", delay). + Msg("DNS intercept: NRPT probe failed, retrying with GP refresh + paramchange") + logNRPTParentKeyState(fmt.Sprintf("probe-attempt-%d", attempt)) + refreshNRPTPolicy() + sendParamChange() + flushDNSCache() + time.Sleep(delay) + if p.probeNRPT() { + mainLog.Load().Info().Int("attempt", attempt). + Msg("DNS intercept: NRPT verified working") + return + } + } + + // Nuclear option: two-phase delete → re-add cycle. + // DNS Client may have cached a stale "no rules" state. Delete our rule, + // signal DNS Client to forget it, wait, then re-add and signal again. + mainLog.Load().Warn().Msg("DNS intercept: all probes failed — attempting two-phase NRPT recovery (delete → signal → re-add)") + listenerIP := "127.0.0.1" + if state, ok := p.dnsInterceptState.(*wfpState); ok { + listenerIP = state.listenerIP + } + + // Phase 1: Remove our rule and the parent key if now empty. + _ = removeNRPTCatchAllRule() + // If parent key is now empty after removing our rule, delete it too. + cleanEmptyNRPTParent() + refreshNRPTPolicy() + sendParamChange() + flushDNSCache() + logNRPTParentKeyState("nuclear-after-delete") + + // Wait for DNS Client to process the deletion. + time.Sleep(1 * time.Second) + + // Phase 2: Re-add the rule. + if err := addNRPTCatchAllRule(listenerIP); err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept: failed to re-add NRPT after nuclear recovery") + return + } + refreshNRPTPolicy() + sendParamChange() + flushDNSCache() + logNRPTParentKeyState("nuclear-after-readd") + + // Final probe after recovery. + time.Sleep(1 * time.Second) + if p.probeNRPT() { + mainLog.Load().Info().Msg("DNS intercept: NRPT verified working after two-phase recovery") + return + } + + logNRPTParentKeyState("probe-failed-final") + mainLog.Load().Error().Msg("DNS intercept: NRPT verification failed after all retries including two-phase recovery — " + + "DNS queries may not be routed through ctrld. A network interface toggle may be needed.") +} diff --git a/cmd/cli/os_windows.go b/cmd/cli/os_windows.go index 7ebc54a8..1959ff04 100644 --- a/cmd/cli/os_windows.go +++ b/cmd/cli/os_windows.go @@ -55,7 +55,7 @@ func setDNS(iface *net.Interface, nameservers []string) error { mainLog.Load().Debug().Msgf("Existing forwarders content: %s", string(oldForwardersContent)) } - hasLocalIPv6Listener := needLocalIPv6Listener() + hasLocalIPv6Listener := needLocalIPv6Listener(interceptMode) mainLog.Load().Debug().Bool("has_ipv6_listener", hasLocalIPv6Listener).Msg("IPv6 listener status") forwarders := slices.DeleteFunc(slices.Clone(nameservers), func(s string) bool { diff --git a/docs/wfp-dns-intercept.md b/docs/wfp-dns-intercept.md new file mode 100644 index 00000000..6b9c3b50 --- /dev/null +++ b/docs/wfp-dns-intercept.md @@ -0,0 +1,449 @@ +# Windows DNS Intercept — Technical Reference + +## Overview + +On Windows, DNS intercept mode uses a two-layer architecture: + +- **`dns` mode (default)**: NRPT only — graceful DNS routing via the Windows DNS Client service +- **`hard` mode**: NRPT + WFP — full enforcement with kernel-level block filters + +This dual-mode design ensures that `dns` mode can never break DNS (at worst, a VPN +overwrites NRPT and queries bypass ctrld temporarily), while `hard` mode provides +the same enforcement guarantees as macOS pf. + +## Architecture: dns vs hard Mode + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ dns mode (NRPT only) │ +│ │ +│ App DNS query → DNS Client service → NRPT lookup │ +│ → "." catch-all matches → forward to 127.0.0.1 (ctrld) │ +│ │ +│ If VPN clears NRPT: health monitor re-adds within 30s │ +│ Worst case: queries go to VPN DNS until NRPT restored │ +│ DNS never breaks — graceful degradation │ +└─────────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────────┐ +│ hard mode (NRPT + WFP) │ +│ │ +│ App DNS query → DNS Client service → NRPT → 127.0.0.1 (ctrld)│ +│ │ +│ Bypass attempt (raw 8.8.8.8:53) → WFP BLOCK filter │ +│ VPN DNS on private IP → WFP subnet PERMIT filter → allowed │ +│ │ +│ NRPT must be active before WFP starts (atomic guarantee) │ +│ If NRPT fails → WFP not started (avoids DNS blackhole) │ +│ If WFP fails → NRPT rolled back (all-or-nothing) │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## NRPT (Name Resolution Policy Table) + +### What It Does + +NRPT is a Windows feature (originally for DirectAccess) that tells the DNS Client +service to route queries matching specific namespace patterns to specific DNS servers. +ctrld adds a catch-all rule that routes ALL DNS to `127.0.0.1`: + +| Registry Value | Type | Value | Purpose | +|---|---|---|---| +| `Name` | REG_MULTI_SZ | `.` | Namespace (`.` = catch-all) | +| `GenericDNSServers` | REG_SZ | `127.0.0.1` | Target DNS server | +| `ConfigOptions` | REG_DWORD | `0x8` | Standard DNS resolution | +| `Version` | REG_DWORD | `0x2` | NRPT rule version 2 | +| `Comment` | REG_SZ | `` | Empty (matches PowerShell behavior) | +| `DisplayName` | REG_SZ | `` | Empty (matches PowerShell behavior) | +| `IPSECCARestriction` | REG_SZ | `` | Empty (matches PowerShell behavior) | + +### Registry Paths — GP vs Local (Critical) + +Windows NRPT has two registry paths with **all-or-nothing** precedence: + +| Path | Name | Mode | +|---|---|---| +| `HKLM\SOFTWARE\Policies\Microsoft\Windows NT\DNSClient\DnsPolicyConfig` | **GP path** | Group Policy mode | +| `HKLM\SYSTEM\CurrentControlSet\Services\Dnscache\Parameters\DnsPolicyConfig` | **Local path** | Local/service store mode | + +**Precedence rule**: If ANY rules exist in the GP path (from IT policy, VPN, MDM, +or our own earlier builds), DNS Client enters "GP mode" and **ignores ALL local-path +rules entirely**. This is not per-rule — it's a binary switch. + +**Consequence**: On non-domain-joined (WORKGROUP) machines, `RefreshPolicyEx` is +unreliable. If we write to the GP path, DNS Client enters GP mode but the rules +never activate — resulting in `Get-DnsClientNrptPolicy` returning empty even though +`Get-DnsClientNrptRule` shows the rule in registry. + +ctrld uses an adaptive strategy (matching [Tailscale's approach](https://github.com/tailscale/tailscale/blob/main/net/dns/nrpt_windows.go)): + +1. **Always write to the local path** using a deterministic GUID key name + (`{B2E9A3C1-7F4D-4A8E-9D6B-5C1E0F3A2B8D}`). This is the baseline that works + on all non-domain machines. +2. **Check if other software has GP NRPT rules** (`otherGPRulesExist()`). If + foreign GP rules are present (IT policy, VPN), DNS Client is already in GP mode + and our local rule would be invisible — so we also write to the GP path. +3. **If no foreign GP rules exist**, clean any stale ctrld GP rules and delete + the empty GP parent key. This ensures DNS Client stays in "local mode" where + the local-path rule activates immediately via `paramchange`. + +### VPN Coexistence + +NRPT uses most-specific-match. VPN NRPT rules for specific domains (e.g., +`*.corp.local` → `10.20.30.1`) take priority over ctrld's `.` catch-all. +This means VPN split DNS works naturally — VPN-specific domains go to VPN DNS, +everything else goes to ctrld. No exemptions or special handling needed. + +### DNS Client Notification + +After writing NRPT rules, DNS Client must be notified to reload: + +1. **`paramchange`**: `sc control dnscache paramchange` — signals DNS Client to + re-read configuration. Works for local-path rules on most machines. +2. **`RefreshPolicyEx`**: `RefreshPolicyEx(bMachine=TRUE, dwOptions=RP_FORCE)` from + `userenv.dll` — triggers GP refresh for GP-path rules. Unreliable on non-domain + machines (WORKGROUP). Fallback: `gpupdate /target:computer /force`. +3. **DNS cache flush**: `DnsFlushResolverCache` from `dnsapi.dll` or `ipconfig /flushdns` + — clears stale cached results from before NRPT was active. + +### DNS Cache Flush + +After NRPT changes, stale DNS cache entries could bypass the new routing. ctrld flushes: + +1. **Primary**: `DnsFlushResolverCache` from `dnsapi.dll` +2. **Fallback**: `ipconfig /flushdns` (subprocess) + +### Known Limitation: nslookup + +`nslookup.exe` implements its own DNS resolver and does NOT use the Windows DNS Client +service. It ignores NRPT entirely. Use `Resolve-DnsName` (PowerShell) or `ping` to +verify DNS resolution through NRPT. This is a well-known Windows behavior. + +## WFP (Windows Filtering Platform) — hard Mode Only + +### Filter Stack + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Sublayer: "ctrld DNS Intercept" (weight 0xFFFF — max priority) │ +│ │ +│ ┌─ Permit Filters (weight 10) ─────────────────────────────┐ │ +│ │ • IPv4/UDP to 127.0.0.1:53 → PERMIT │ │ +│ │ • IPv4/TCP to 127.0.0.1:53 → PERMIT │ │ +│ │ • IPv6/UDP to ::1:53 → PERMIT │ │ +│ │ • IPv6/TCP to ::1:53 → PERMIT │ │ +│ │ • RFC1918 + CGNAT subnets:53 → PERMIT (VPN DNS) │ │ +│ │ • VPN DNS exemptions (dynamic) → PERMIT │ │ +│ └──────────────────────────────────────────────────────────┘ │ +│ │ +│ ┌─ Block Filters (weight 1) ───────────────────────────────┐ │ +│ │ • All IPv4/UDP to *:53 → BLOCK │ │ +│ │ • All IPv4/TCP to *:53 → BLOCK │ │ +│ │ • All IPv6/UDP to *:53 → BLOCK │ │ +│ │ • All IPv6/TCP to *:53 → BLOCK │ │ +│ └──────────────────────────────────────────────────────────┘ │ +│ │ +│ Filter evaluation: higher weight wins → permits checked first │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Why WFP Can't Work Alone + +WFP operates at the connection authorization layer (`FWPM_LAYER_ALE_AUTH_CONNECT`). +It can only **block** or **permit** connections — it **cannot redirect** them. +Redirection requires kernel-mode callout drivers (`FwpsCalloutRegister` in +`fwpkclnt.lib`) using `FWPM_LAYER_ALE_CONNECT_REDIRECT_V4/V6`, which are not +accessible from userspace. + +Without NRPT, WFP blocks outbound DNS but doesn't tell applications where to send +queries instead — they just see DNS failures. This is why `hard` mode requires NRPT +to be active first, and why WFP is rolled back if NRPT setup fails. + +### Sublayer Priority + +Weight `0xFFFF` (maximum) ensures ctrld's filters take priority over any other WFP +sublayers from VPN software, endpoint security, or Windows Defender Firewall. + +### RFC1918 + CGNAT Subnet Permits + +Static permit filters for private IP ranges (10.0.0.0/8, 172.16.0.0/12, +192.168.0.0/16, 100.64.0.0/10) allow VPN DNS servers on private IPs to work +without dynamic per-server exemptions. This covers Tailscale MagicDNS +(100.100.100.100), corporate VPN DNS (10.x.x.x), and similar. + +### VPN DNS Exemption Updates + +When `vpnDNSManager.Refresh()` discovers VPN DNS servers on public IPs: + +1. Delete all existing VPN permit filters (by stored IDs) +2. For each VPN DNS server IP: + - IPv4: `addWFPPermitIPFilter()` on `ALE_AUTH_CONNECT_V4` + - IPv6: `addWFPPermitIPv6Filter()` on `ALE_AUTH_CONNECT_V6` + - Both UDP and TCP for each IP +3. Store new filter IDs for next cleanup cycle + +**In `dns` mode, VPN DNS exemptions are skipped** — there are no WFP block +filters to exempt from. + +### Session Lifecycle + +**Startup (hard mode):** +``` +1. Add NRPT catch-all rule + GP refresh + DNS flush +2. FwpmEngineOpen0() with RPC_C_AUTHN_DEFAULT (0xFFFFFFFF) +3. Delete stale sublayer (crash recovery) +4. FwpmSubLayerAdd0() — weight 0xFFFF +5. Add 4 localhost permit filters +6. Add 4 block filters +7. Add RFC1918 + CGNAT subnet permits +8. Start NRPT health monitor goroutine +``` + +**Startup (dns mode):** +``` +1. Add NRPT catch-all rule + GP refresh + DNS flush +2. Start NRPT health monitor goroutine +3. (No WFP — done) +``` + +**Shutdown:** +``` +1. Stop NRPT health monitor +2. Remove NRPT catch-all rule + DNS flush +3. (hard mode only) Clean up all WFP filters, sublayer, close engine +``` + +**Crash Recovery:** +On startup, `FwpmSubLayerDeleteByKey0` removes any stale sublayer from a previous +unclean shutdown, including all its child filters (deterministic GUID ensures we +only clean up our own). + +## NRPT Probe and Auto-Heal + +### The Problem: Async GP Refresh Race + +`RefreshPolicyEx` triggers a Group Policy refresh but returns immediately — it does +NOT wait for the DNS Client service to actually reload NRPT from the registry. On +cold machines (first boot, fresh install, long sleep), the DNS Client may take +several seconds to process the policy refresh. During this window, NRPT rules exist +in the registry but the DNS Client hasn't loaded them — queries bypass ctrld. + +### The Solution: Active Probing + +After writing NRPT to the registry, ctrld sends a probe DNS query through the +Windows DNS Client path to verify NRPT is actually working: + +1. Generate a unique probe domain: `_nrpt-probe-.nrpt-probe.ctrld.test` +2. Send it via Go's `net.Resolver` (calls `GetAddrInfoW` → DNS Client → NRPT) +3. If NRPT is active, DNS Client routes it to 127.0.0.1 → ctrld receives it +4. ctrld's DNS handler recognizes the probe prefix and signals success +5. If the probe times out (2s), NRPT isn't loaded yet → retry with remediation + +### Startup Probe (Async) + +After NRPT setup, an async goroutine runs the probe-and-heal sequence without +blocking startup: + +``` +Probe attempt 1 (2s timeout) + ├─ Success → "NRPT verified working", done + └─ Timeout → GP refresh + DNS flush, sleep 1s + Probe attempt 2 (2s timeout) + ├─ Success → done + └─ Timeout → Restart DNS Client service (nuclear), sleep 2s + Re-add NRPT + GP refresh + DNS flush + Probe attempt 3 (2s timeout) + ├─ Success → done + └─ Timeout → GP refresh + DNS flush, sleep 4s + Probe attempt 4 (2s timeout) + ├─ Success → done + └─ Timeout → log error, continue +``` + +### DNS Client Restart (Nuclear Option) + +If GP refresh alone isn't enough, ctrld restarts the Windows DNS Client service +(`Dnscache`). This forces the DNS Client to fully re-initialize, including +re-reading all NRPT rules from the registry. This is the equivalent of macOS +`forceReloadPFMainRuleset()`. + +**Trade-offs:** +- Briefly interrupts ALL DNS resolution (few hundred ms during restart) +- Clears the system DNS cache (all apps need to re-resolve) +- VPN NRPT rules survive (they're in registry, re-read on restart) +- Enterprise security tools may log the service restart event + +This only fires as attempt #3 after two GP refresh attempts fail — at that point +DNS isn't working through ctrld anyway, so a brief DNS blip is acceptable. + +### Health Monitor Integration + +The 30s periodic health monitor now does actual probing, not just registry checks: + +``` +Every 30s: + ├─ Registry check: nrptCatchAllRuleExists()? + │ ├─ Missing → re-add + GP refresh + flush + probe-and-heal + │ └─ Present → probe to verify it's actually routing + │ ├─ Probe success → OK + │ └─ Probe failure → probe-and-heal cycle + │ + └─ (hard mode only) Check: wfpSublayerExists()? + ├─ Missing → full restart (stopDNSIntercept + startDNSIntercept) + └─ Present → OK +``` + +**Singleton guard:** Only one probe-and-heal sequence runs at a time (atomic bool). +The startup probe and health monitor cannot overlap. + +**Why periodic, not just network-event?** VPN software or Group Policy updates can +clear NRPT at any time, not just during network changes. A 30s periodic check ensures +recovery within a bounded window. + +**Hard mode safety:** The health monitor verifies NRPT before checking WFP. If NRPT +is gone, it's restored first. WFP is never running without NRPT — this prevents +DNS blackholes where WFP blocks everything but NRPT isn't routing to ctrld. + +## DNS Flow Diagrams + +### Normal Resolution (both modes) + +``` +App → DNS Client → NRPT lookup → "." matches → 127.0.0.1 → ctrld + → Control D DoH (port 443, not affected by WFP port-53 rules) + → response flows back +``` + +### VPN Split DNS (both modes) + +``` +App → DNS Client → NRPT lookup: + VPN domain (*.corp.local) → VPN's NRPT rule wins → VPN DNS server + Everything else → ctrld's "." catch-all → 127.0.0.1 → ctrld + → VPN domain match → forward to VPN DNS (port 53) + → (hard mode: WFP subnet permit allows private IP DNS) +``` + +### Bypass Attempt (hard mode only) + +``` +App → raw socket to 8.8.8.8:53 → WFP ALE_AUTH_CONNECT → BLOCK +``` + +In `dns` mode, this query would succeed (no WFP) — the tradeoff for never +breaking DNS. + +## Key Differences from macOS (pf) + +| Aspect | macOS (pf) | Windows dns mode | Windows hard mode | +|--------|-----------|------------------|-------------------| +| **Routing** | `rdr` redirect | NRPT policy | NRPT policy | +| **Enforcement** | `route-to` + block rules | None (graceful) | WFP block filters | +| **Can break DNS?** | Yes (pf corruption) | No | Yes (if NRPT lost) | +| **VPN coexistence** | Watchdog + stabilization | NRPT most-specific-match | Same + WFP permits | +| **Bypass protection** | pf catches all packets | None | WFP catches all connections | +| **Recovery** | Probe + auto-heal | Health monitor re-adds | Full restart on sublayer loss | + +## WFP API Notes + +### Struct Layouts + +WFP C API structures are manually defined in Go (`golang.org/x/sys/windows` doesn't +include WFP types). Field alignment must match the C ABI exactly — any mismatch +causes access violations or silent corruption. + +### FWP_DATA_TYPE Enum + +``` +FWP_EMPTY = 0 +FWP_UINT8 = 1 +FWP_UINT16 = 2 +FWP_UINT32 = 3 +FWP_UINT64 = 4 +... +``` + +**⚠️** Some documentation examples incorrectly start at 1. The enum starts at 0 +(`FWP_EMPTY`), making all subsequent values offset by 1 from what you might expect. + +### GC Safety + +When passing Go heap objects to WFP syscalls via `unsafe.Pointer`, use +`runtime.KeepAlive()` to prevent garbage collection during the call: + +```go +conditions := make([]fwpmFilterCondition0, 3) +filter.filterCondition = &conditions[0] +r1, _, _ := procFwpmFilterAdd0.Call(...) +runtime.KeepAlive(conditions) +``` + +### Authentication + +`FwpmEngineOpen0` requires `RPC_C_AUTHN_DEFAULT` (0xFFFFFFFF) for the authentication +service parameter. `RPC_C_AUTHN_NONE` (0) returns `ERROR_NOT_SUPPORTED` on some +configurations (e.g., Parallels VMs). + +### Elevation + +WFP requires admin/SYSTEM privileges. `FwpmEngineOpen0` fails with HRESULT 0x32 +when run non-elevated. Services running as SYSTEM have this automatically. + +## Debugging + +### Check NRPT Rules + +```powershell +# PowerShell — show active NRPT rules +Get-DnsClientNrptRule + +# Check registry directly +Get-ChildItem "HKLM:\SOFTWARE\Policies\Microsoft\Windows NT\DNSClient\DnsPolicyConfig" +``` + +### Check WFP Filters (hard mode) + +```powershell +# Show all WFP filters (requires admin) — output is XML +netsh wfp show filters + +# Search for ctrld's filters +Select-String "ctrld" filters.xml +``` + +### Verify DNS Resolution + +```powershell +# Use Resolve-DnsName, NOT nslookup (nslookup bypasses NRPT) +Resolve-DnsName example.com +ping example.com + +# If you must use nslookup, specify localhost: +nslookup example.com 127.0.0.1 + +# Force GP refresh (if NRPT not loading) +gpupdate /target:computer /force + +# Verify service registration +sc qc ctrld +``` + +### Service Verification + +After install, verify the Windows service is correctly registered: + +```powershell +# Check binary path and start type +sc qc ctrld + +# Should show: +# BINARY_PATH_NAME: "C:\...\ctrld.exe" run --cd xxxxx --intercept-mode dns +# START_TYPE: AUTO_START +``` + +## Related + +- [DNS Intercept Mode Overview](dns-intercept-mode.md) — cross-platform documentation +- [pf DNS Intercept](pf-dns-intercept.md) — macOS technical reference +- [Microsoft WFP Documentation](https://docs.microsoft.com/en-us/windows/win32/fwp/windows-filtering-platform-start-page) +- [Microsoft NRPT Documentation](https://docs.microsoft.com/en-us/previous-versions/windows/it-pro/windows-server-2012-r2-and-2012/dn593632(v=ws.11)) diff --git a/nameservers_windows.go b/nameservers_windows.go index e02b1f5b..75fdf8e0 100644 --- a/nameservers_windows.go +++ b/nameservers_windows.go @@ -8,6 +8,7 @@ import ( "net" "os" "strings" + "sync" "syscall" "time" "unsafe" @@ -37,6 +38,28 @@ const ( DS_IP_REQUIRED = 0x00000200 DS_IS_DNS_NAME = 0x00020000 DS_RETURN_DNS_NAME = 0x40000000 + + // AD DC retry constants + dcRetryInitialDelay = 1 * time.Second + dcRetryMaxDelay = 30 * time.Second + dcRetryMaxAttempts = 10 + + // DsGetDcName error codes + errNoSuchDomain uintptr = 1355 + errNoLogonServers uintptr = 1311 + errDCNotFound uintptr = 1004 + errRPCUnavailable uintptr = 1722 + errConnReset uintptr = 10054 + errNetUnreachable uintptr = 1231 +) + +var ( + dcRetryMu sync.Mutex + dcRetryCancel context.CancelFunc + + // Lazy-loaded netapi32 for DsGetDcNameW calls. + netapi32DLL = windows.NewLazySystemDLL("netapi32.dll") + dsGetDcNameW = netapi32DLL.NewProc("DsGetDcNameW") ) type DomainControllerInfo struct { @@ -110,6 +133,9 @@ func dnsFromAdapter() []string { func getDNSServers(ctx context.Context) ([]string, error) { logger := *ProxyLogger.Load() + // Cancel any in-flight DC retry from a previous network state. + cancelDCRetry() + // Check context before making the call if ctx.Err() != nil { return nil, ctx.Err() @@ -139,9 +165,6 @@ func getDNSServers(ctx context.Context) ([]string, error) { } else { adDomain = domainName // Load netapi32.dll - netapi32 := windows.NewLazySystemDLL("netapi32.dll") - dsDcName := netapi32.NewProc("DsGetDcNameW") - var info *DomainControllerInfo flags := uint32(DS_RETURN_DNS_NAME | DS_IP_REQUIRED | DS_IS_DNS_NAME) @@ -153,7 +176,7 @@ func getDNSServers(ctx context.Context) ([]string, error) { "Attempting to get DC for domain: %s with flags: 0x%x", domainName, flags) // Call DsGetDcNameW with domain name - ret, _, err := dsDcName.Call( + ret, _, err := dsGetDcNameW.Call( 0, // ComputerName - can be NULL uintptr(unsafe.Pointer(domainUTF16)), // DomainName 0, // DomainGuid - not needed @@ -163,22 +186,28 @@ func getDNSServers(ctx context.Context) ([]string, error) { if ret != 0 { switch ret { - case 1355: // ERROR_NO_SUCH_DOMAIN + case errNoSuchDomain: Log(ctx, logger.Debug(), "Domain not found: %s (%d)", domainName, ret) - case 1311: // ERROR_NO_LOGON_SERVERS + case errNoLogonServers: Log(ctx, logger.Debug(), "No logon servers available for domain: %s (%d)", domainName, ret) - case 1004: // ERROR_DC_NOT_FOUND + case errDCNotFound: Log(ctx, logger.Debug(), "Domain controller not found for domain: %s (%d)", domainName, ret) - case 1722: // RPC_S_SERVER_UNAVAILABLE + case errRPCUnavailable: Log(ctx, logger.Debug(), "RPC server unavailable for domain: %s (%d)", domainName, ret) default: Log(ctx, logger.Debug(), "Failed to get domain controller info for domain %s: %d, %v", domainName, ret, err) } + // Start background retry for transient DC errors. + if isTransientDCError(ret) { + Log(ctx, logger.Info(), + "AD DC detection failed with transient error %d for %s, starting background retry", ret, domainName) + startDCRetry(domainName) + } } else if info != nil { defer windows.NetApiBufferFree((*byte)(unsafe.Pointer(info))) @@ -357,6 +386,143 @@ func checkDomainJoined() bool { return isDomain } +// isTransientDCError returns true if the DsGetDcName error code indicates +// a transient failure that may succeed on retry. +func isTransientDCError(code uintptr) bool { + switch code { + case errConnReset, errRPCUnavailable, errNoLogonServers, errDCNotFound, errNetUnreachable: + return true + default: + return false + } +} + +// cancelDCRetry cancels any in-flight DC retry goroutine. +func cancelDCRetry() { + dcRetryMu.Lock() + defer dcRetryMu.Unlock() + if dcRetryCancel != nil { + dcRetryCancel() + dcRetryCancel = nil + } +} + +// startDCRetry spawns a background goroutine that retries DsGetDcName with +// exponential backoff. On success it appends the DC IP to the OS resolver. +func startDCRetry(domainName string) { + dcRetryMu.Lock() + // Cancel any previous retry. + if dcRetryCancel != nil { + dcRetryCancel() + } + ctx, cancel := context.WithCancel(context.Background()) + dcRetryCancel = cancel + dcRetryMu.Unlock() + + go func() { + logger := *ProxyLogger.Load() + delay := dcRetryInitialDelay + + for attempt := 1; attempt <= dcRetryMaxAttempts; attempt++ { + select { + case <-ctx.Done(): + Log(context.Background(), logger.Debug(), "AD DC retry cancelled for domain %s", domainName) + return + case <-time.After(delay): + } + + Log(ctx, logger.Debug(), + "AD DC retry attempt %d/%d for domain %s (delay was %v)", + attempt, dcRetryMaxAttempts, domainName, delay) + + dcIP, errCode := tryGetDCAddress(domainName) + if dcIP != "" { + Log(context.Background(), logger.Info(), + "AD DC retry succeeded: found DC at %s for domain %s (attempt %d)", + dcIP, domainName, attempt) + if AppendOsResolverNameservers([]string{dcIP}) { + Log(context.Background(), logger.Info(), + "Added DC %s to OS resolver nameservers", dcIP) + } else { + Log(context.Background(), logger.Warn(), + "AD DC retry: OS resolver not initialized, DC IP %s was not added", dcIP) + } + return + } + + // Permanent error or unexpected empty result — stop retrying. + if errCode != 0 && !isTransientDCError(errCode) { + Log(context.Background(), logger.Debug(), + "AD DC retry stopping: permanent error %d for domain %s", errCode, domainName) + return + } + if errCode == 0 { + // DsGetDcName returned success but no usable address — don't retry. + Log(context.Background(), logger.Debug(), + "AD DC retry stopping: DsGetDcName returned no address for domain %s", domainName) + return + } + + // Exponential backoff. + delay *= 2 + if delay > dcRetryMaxDelay { + delay = dcRetryMaxDelay + } + } + + Log(ctx, logger.Warn(), + "AD DC retry exhausted %d attempts for domain %s", dcRetryMaxAttempts, domainName) + }() +} + +// tryGetDCAddress attempts a single DsGetDcName call and returns the DC IP on success, +// or empty string and the error code on failure. +func tryGetDCAddress(domainName string) (string, uintptr) { + logger := *ProxyLogger.Load() + + var info *DomainControllerInfo + // Use DS_FORCE_REDISCOVERY on retries to bypass the DC locator cache, + // which may have cached the initial transient failure. + flags := uint32(DS_RETURN_DNS_NAME | DS_IP_REQUIRED | DS_IS_DNS_NAME | DS_FORCE_REDISCOVERY) + + domainUTF16, err := windows.UTF16PtrFromString(domainName) + if err != nil { + Log(context.Background(), logger.Debug(), + "Failed to convert domain name to UTF16: %v", err) + return "", 0 + } + + ret, _, _ := dsGetDcNameW.Call( + 0, + uintptr(unsafe.Pointer(domainUTF16)), + 0, + 0, + uintptr(flags), + uintptr(unsafe.Pointer(&info))) + + if ret != 0 { + Log(context.Background(), logger.Debug(), + "DsGetDcName retry failed for %s: error %d", domainName, ret) + return "", ret + } + + if info == nil { + return "", 0 + } + defer windows.NetApiBufferFree((*byte)(unsafe.Pointer(info))) + + if info.DomainControllerAddress == nil { + return "", 0 + } + + dcAddr := windows.UTF16PtrToString(info.DomainControllerAddress) + dcAddr = strings.TrimPrefix(dcAddr, "\\\\") + if ip := net.ParseIP(dcAddr); ip != nil { + return ip.String(), 0 + } + return "", 0 +} + // validInterfaces returns a list of all physical interfaces. // this is a duplicate of what is in net_windows.go, we should // clean this up so there is only one version diff --git a/scripts/nrpt-diag.ps1 b/scripts/nrpt-diag.ps1 new file mode 100644 index 00000000..230ae94f --- /dev/null +++ b/scripts/nrpt-diag.ps1 @@ -0,0 +1,132 @@ +#Requires -RunAsAdministrator +<# +.SYNOPSIS + NRPT diagnostic script for ctrld DNS intercept troubleshooting. +.DESCRIPTION + Captures the full NRPT state: registry keys (both GP and direct paths), + effective policy, active rules, DNS Client service status, and resolver + config. Run as Administrator. +.EXAMPLE + .\nrpt-diag.ps1 + .\nrpt-diag.ps1 | Out-File nrpt-diag-output.txt +#> + +$ErrorActionPreference = 'SilentlyContinue' + +Write-Host "=== NRPT Diagnostic Report ===" -ForegroundColor Cyan +Write-Host "Date: $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')" +Write-Host "Computer: $env:COMPUTERNAME" +Write-Host "OS: $((Get-CimInstance Win32_OperatingSystem).Caption) $((Get-CimInstance Win32_OperatingSystem).BuildNumber)" +Write-Host "" + +# --- 1. DNS Client Service --- +Write-Host "=== 1. DNS Client (Dnscache) Service ===" -ForegroundColor Yellow +$svc = Get-Service Dnscache +Write-Host "Status: $($svc.Status) StartType: $($svc.StartType)" +Write-Host "" + +# --- 2. GP Path (Policy store) --- +$gpPath = "HKLM:\SOFTWARE\Policies\Microsoft\Windows NT\DNSClient\DnsPolicyConfig" +Write-Host "=== 2. GP Path: $gpPath ===" -ForegroundColor Yellow +$gpKey = Get-Item $gpPath 2>$null +if ($gpKey) { + Write-Host "Key EXISTS" + $subkeys = Get-ChildItem $gpPath 2>$null + if ($subkeys) { + foreach ($sk in $subkeys) { + Write-Host "" + Write-Host " Subkey: $($sk.PSChildName)" -ForegroundColor Green + foreach ($prop in $sk.Property) { + $val = $sk.GetValue($prop) + $kind = $sk.GetValueKind($prop) + Write-Host " $prop ($kind) = $val" + } + } + } else { + Write-Host " ** EMPTY (no subkeys) — this blocks NRPT activation! **" -ForegroundColor Red + } +} else { + Write-Host "Key does NOT exist (clean state)" +} +Write-Host "" + +# --- 3. Direct Path (Service store) --- +$directPath = "HKLM:\SYSTEM\CurrentControlSet\Services\Dnscache\Parameters\DnsPolicyConfig" +Write-Host "=== 3. Direct Path: $directPath ===" -ForegroundColor Yellow +$directKey = Get-Item $directPath 2>$null +if ($directKey) { + Write-Host "Key EXISTS" + $subkeys = Get-ChildItem $directPath 2>$null + if ($subkeys) { + foreach ($sk in $subkeys) { + Write-Host "" + Write-Host " Subkey: $($sk.PSChildName)" -ForegroundColor Green + foreach ($prop in $sk.Property) { + $val = $sk.GetValue($prop) + $kind = $sk.GetValueKind($prop) + Write-Host " $prop ($kind) = $val" + } + } + } else { + Write-Host " ** EMPTY (no subkeys) **" -ForegroundColor Red + } +} else { + Write-Host "Key does NOT exist" +} +Write-Host "" + +# --- 4. Effective NRPT Rules (what Windows sees) --- +Write-Host "=== 4. Get-DnsClientNrptRule ===" -ForegroundColor Yellow +$rules = Get-DnsClientNrptRule 2>$null +if ($rules) { + $rules | Format-List Name, Version, Namespace, NameServers, NameEncoding, DnsSecEnabled +} else { + Write-Host "(none)" +} +Write-Host "" + +# --- 5. Effective NRPT Policy (what DNS Client actually applies) --- +Write-Host "=== 5. Get-DnsClientNrptPolicy ===" -ForegroundColor Yellow +$policy = Get-DnsClientNrptPolicy 2>$null +if ($policy) { + $policy | Format-List Namespace, NameServers, NameEncoding, QueryPolicy +} else { + Write-Host "(none — DNS Client is NOT honoring any NRPT rules)" -ForegroundColor Red +} +Write-Host "" + +# --- 6. Interface DNS servers --- +Write-Host "=== 6. Interface DNS Configuration ===" -ForegroundColor Yellow +Get-DnsClientServerAddress -AddressFamily IPv4 | Where-Object { $_.ServerAddresses } | + Format-Table InterfaceAlias, InterfaceIndex, ServerAddresses -AutoSize +Write-Host "" + +# --- 7. DNS resolution test --- +Write-Host "=== 7. DNS Resolution Test ===" -ForegroundColor Yellow +Write-Host "Resolve-DnsName example.com (uses DNS Client / NRPT):" +try { + $result = Resolve-DnsName example.com -Type A -DnsOnly -ErrorAction Stop + $result | Format-Table Name, Type, IPAddress -AutoSize +} catch { + Write-Host " FAILED: $_" -ForegroundColor Red +} +Write-Host "" +Write-Host "nslookup example.com 127.0.0.1 (direct to ctrld, bypasses NRPT):" +$ns = nslookup example.com 127.0.0.1 2>&1 +$ns | ForEach-Object { Write-Host " $_" } +Write-Host "" + +# --- 8. Domain join status --- +Write-Host "=== 8. Domain Status ===" -ForegroundColor Yellow +$cs = Get-CimInstance Win32_ComputerSystem +Write-Host "Domain: $($cs.Domain) PartOfDomain: $($cs.PartOfDomain)" +Write-Host "" + +# --- 9. Group Policy NRPT --- +Write-Host "=== 9. GP Result (NRPT section) ===" -ForegroundColor Yellow +Write-Host "(Running gpresult — may take a few seconds...)" +$gp = gpresult /r 2>&1 +$gp | Select-String -Pattern "DNS|NRPT|Policy" | ForEach-Object { Write-Host " $_" } +Write-Host "" + +Write-Host "=== End of Diagnostic Report ===" -ForegroundColor Cyan From e7040bd9f956ac03ea156138e2eaac161ee184df Mon Sep 17 00:00:00 2001 From: Codescribe Date: Tue, 3 Mar 2026 02:07:11 -0500 Subject: [PATCH 19/22] feat: add VPN DNS split routing Implement VPN DNS discovery and split routing for intercept mode: - Discover VPN DNS servers from F5 BIG-IP, Tailscale, Network Extension VPNs, and traditional VPN adapters - Exit mode detection (split vs full tunnel) via routing table - Interface-scoped pf exemptions for VPN DNS traffic (macOS) - Windows VPN adapter filtering with routable address check - AD domain controller detection with retry on transient failure - Cleanup of stale exemptions on VPN disconnect Squashed from intercept mode development on v1.0 branch (#497). --- cmd/cli/vpn_dns.go | 255 +++++++++++++++++++++++++++++++++++++++++++++ vpn_dns_config.go | 11 ++ vpn_dns_darwin.go | 243 ++++++++++++++++++++++++++++++++++++++++++ vpn_dns_linux.go | 211 +++++++++++++++++++++++++++++++++++++ vpn_dns_others.go | 15 +++ vpn_dns_windows.go | 130 +++++++++++++++++++++++ 6 files changed, 865 insertions(+) create mode 100644 cmd/cli/vpn_dns.go create mode 100644 vpn_dns_config.go create mode 100644 vpn_dns_darwin.go create mode 100644 vpn_dns_linux.go create mode 100644 vpn_dns_others.go create mode 100644 vpn_dns_windows.go diff --git a/cmd/cli/vpn_dns.go b/cmd/cli/vpn_dns.go new file mode 100644 index 00000000..03b54786 --- /dev/null +++ b/cmd/cli/vpn_dns.go @@ -0,0 +1,255 @@ +package cli + +import ( + "context" + "strings" + "sync" + + "tailscale.com/net/netmon" + + "github.com/Control-D-Inc/ctrld" +) + +// vpnDNSExemption represents a VPN DNS server that needs pf/WFP exemption, +// including the interface it was discovered on. The interface is used on macOS +// to create interface-scoped pf exemptions that allow the VPN's local DNS +// handler (e.g., Tailscale's MagicDNS Network Extension) to receive queries +// from all processes — not just ctrld. +type vpnDNSExemption struct { + Server string // DNS server IP (e.g., "100.100.100.100") + Interface string // Interface name from scutil (e.g., "utun11"), may be empty + IsExitMode bool // True if this VPN is in exit/full-tunnel mode (all traffic routed through VPN) +} + +// vpnDNSExemptFunc is called when VPN DNS servers change, to update +// the intercept layer (WFP/pf) to permit VPN DNS traffic. +type vpnDNSExemptFunc func(exemptions []vpnDNSExemption) error + +// vpnDNSManager tracks active VPN DNS configurations and provides +// domain-to-upstream routing for VPN split DNS. +type vpnDNSManager struct { + mu sync.RWMutex + configs []ctrld.VPNDNSConfig + // Map of domain suffix → DNS servers for fast lookup + routes map[string][]string + // DNS servers from VPN interfaces that have no domain/suffix config. + // These are NOT added to the global OS resolver. They're only used + // as additional nameservers for queries that match split-DNS rules + // (from ctrld config, AD domain, or VPN suffix config). + domainlessServers []string + // Called when VPN DNS server list changes, to update intercept exemptions. + onServersChanged vpnDNSExemptFunc +} + +// newVPNDNSManager creates a new manager. Only call when dnsIntercept is active. +// exemptFunc is called whenever VPN DNS servers are discovered/changed, to update +// the OS-level intercept rules to permit ctrld's outbound queries to those IPs. +func newVPNDNSManager(exemptFunc vpnDNSExemptFunc) *vpnDNSManager { + return &vpnDNSManager{ + routes: make(map[string][]string), + onServersChanged: exemptFunc, + } +} + +// Refresh re-discovers VPN DNS configs from the OS. +// Called on network change events. +func (m *vpnDNSManager) Refresh(guardAgainstNoNameservers bool) { + logger := mainLog.Load() + + logger.Debug().Msg("Refreshing VPN DNS configurations") + configs := ctrld.DiscoverVPNDNS(context.Background()) + + // Detect exit mode: if the default route goes through a VPN DNS interface, + // the VPN is routing ALL traffic (exit node / full tunnel). This is more + // reliable than scutil flag parsing because the routing table is the ground + // truth for traffic flow, regardless of how the VPN presents itself in scutil. + if dri, err := netmon.DefaultRouteInterface(); err == nil && dri != "" { + for i := range configs { + if configs[i].InterfaceName == dri { + if !configs[i].IsExitMode { + logger.Info().Msgf("VPN DNS on %s: default route interface match — EXIT MODE (route-based detection)", dri) + } + configs[i].IsExitMode = true + } + } + } + + m.mu.Lock() + defer m.mu.Unlock() + + m.configs = configs + m.routes = make(map[string][]string) + + // Build domain -> DNS servers mapping + for _, config := range configs { + logger.Debug().Msgf("Processing VPN interface %s with %d domains and %d servers", + config.InterfaceName, len(config.Domains), len(config.Servers)) + + for _, domain := range config.Domains { + // Normalize domain: remove leading dot, Linux routing domain prefix (~), + // and convert to lowercase. + domain = strings.TrimPrefix(domain, "~") + domain = strings.TrimPrefix(domain, ".") + domain = strings.ToLower(domain) + + if domain != "" { + m.routes[domain] = append([]string{}, config.Servers...) + logger.Debug().Msgf("Added VPN DNS route: %s -> %v", domain, config.Servers) + } + } + } + + // Collect unique VPN DNS exemptions (server + interface) for pf/WFP rules. + type exemptionKey struct{ server, iface string } + seen := make(map[exemptionKey]bool) + var exemptions []vpnDNSExemption + for _, config := range configs { + for _, server := range config.Servers { + key := exemptionKey{server, config.InterfaceName} + if !seen[key] { + seen[key] = true + exemptions = append(exemptions, vpnDNSExemption{ + Server: server, + Interface: config.InterfaceName, + IsExitMode: config.IsExitMode, + }) + } + } + } + + // Collect domain-less VPN DNS servers. These are NOT added to the global + // OS resolver (that would pollute captive portal / DHCP flows). Instead, + // they're stored separately and only used for queries that match existing + // split-DNS rules (from ctrld config, AD domain, or VPN suffix config). + var domainlessServers []string + seen2 := make(map[string]bool) + for _, config := range configs { + if len(config.Domains) == 0 && len(config.Servers) > 0 { + logger.Debug().Msgf("VPN interface %s has DNS servers but no domains, storing as split-rule fallback: %v", + config.InterfaceName, config.Servers) + for _, s := range config.Servers { + if !seen2[s] { + seen2[s] = true + domainlessServers = append(domainlessServers, s) + } + } + } + } + m.domainlessServers = domainlessServers + + logger.Debug().Msgf("VPN DNS refresh completed: %d configs, %d routes, %d domainless servers, %d unique exemptions", + len(m.configs), len(m.routes), len(m.domainlessServers), len(exemptions)) + + // Update intercept rules to permit VPN DNS traffic. + // Always call onServersChanged — including when exemptions is empty — so that + // stale exemptions from a previous VPN session get cleared on disconnect. + if m.onServersChanged != nil { + if err := m.onServersChanged(exemptions); err != nil { + logger.Error().Err(err).Msg("Failed to update intercept exemptions for VPN DNS servers") + } + } +} + +// UpstreamForDomain checks if the domain matches any VPN search domain. +// Returns VPN DNS servers if matched, nil otherwise. +func (m *vpnDNSManager) UpstreamForDomain(domain string) []string { + if domain == "" { + return nil + } + + m.mu.RLock() + defer m.mu.RUnlock() + + domain = strings.TrimSuffix(domain, ".") + domain = strings.ToLower(domain) + + if servers, ok := m.routes[domain]; ok { + return append([]string{}, servers...) + } + + for vpnDomain, servers := range m.routes { + if strings.HasSuffix(domain, "."+vpnDomain) { + return append([]string{}, servers...) + } + } + + return nil +} + +// DomainlessServers returns VPN DNS servers that have no associated domains. +// These should only be used for queries matching split-DNS rules, not for +// general OS resolver queries (to avoid polluting captive portal / DHCP flows). +func (m *vpnDNSManager) DomainlessServers() []string { + m.mu.RLock() + defer m.mu.RUnlock() + return append([]string{}, m.domainlessServers...) +} + +// CurrentServers returns the current set of unique VPN DNS server IPs. +func (m *vpnDNSManager) CurrentServers() []string { + m.mu.RLock() + defer m.mu.RUnlock() + + seen := make(map[string]bool) + var servers []string + for _, ss := range m.routes { + for _, s := range ss { + if !seen[s] { + seen[s] = true + servers = append(servers, s) + } + } + } + return servers +} + +// CurrentExemptions returns VPN DNS server + interface pairs for pf exemption rules. +func (m *vpnDNSManager) CurrentExemptions() []vpnDNSExemption { + m.mu.RLock() + defer m.mu.RUnlock() + + type key struct{ server, iface string } + seen := make(map[key]bool) + var exemptions []vpnDNSExemption + for _, config := range m.configs { + for _, server := range config.Servers { + k := key{server, config.InterfaceName} + if !seen[k] { + seen[k] = true + exemptions = append(exemptions, vpnDNSExemption{ + Server: server, + Interface: config.InterfaceName, + IsExitMode: config.IsExitMode, + }) + } + } + } + return exemptions +} + +// Routes returns a copy of the current VPN DNS routes for debugging. +func (m *vpnDNSManager) Routes() map[string][]string { + m.mu.RLock() + defer m.mu.RUnlock() + + routes := make(map[string][]string) + for domain, servers := range m.routes { + routes[domain] = append([]string{}, servers...) + } + return routes +} + +// upstreamConfigFor creates a legacy upstream configuration for the given VPN DNS server. +func (m *vpnDNSManager) upstreamConfigFor(server string) *ctrld.UpstreamConfig { + endpoint := server + if !strings.Contains(server, ":") { + endpoint = server + ":53" + } + + return &ctrld.UpstreamConfig{ + Name: "VPN DNS", + Type: ctrld.ResolverTypeLegacy, + Endpoint: endpoint, + Timeout: 2000, + } +} diff --git a/vpn_dns_config.go b/vpn_dns_config.go new file mode 100644 index 00000000..3482dd9a --- /dev/null +++ b/vpn_dns_config.go @@ -0,0 +1,11 @@ +package ctrld + +// VPNDNSConfig represents DNS configuration discovered from a VPN interface. +// Used by the dns-intercept mode to detect VPN split DNS settings and +// route matching queries to VPN DNS servers automatically. +type VPNDNSConfig struct { + InterfaceName string // VPN adapter name (e.g., "F5 Networks VPN") + Servers []string // DNS server IPs (e.g., ["10.20.30.1"]) + Domains []string // Search/match domains (e.g., ["corp.example.com"]) + IsExitMode bool // True if this VPN is also the system default resolver (exit node mode) +} diff --git a/vpn_dns_darwin.go b/vpn_dns_darwin.go new file mode 100644 index 00000000..863ecea8 --- /dev/null +++ b/vpn_dns_darwin.go @@ -0,0 +1,243 @@ +//go:build darwin + +package ctrld + +import ( + "bufio" + "context" + "net" + "os/exec" + "regexp" + "strconv" + "strings" +) + +// DiscoverVPNDNS discovers DNS servers and search domains from VPN interfaces on macOS. +// Parses `scutil --dns` output to find VPN resolver configurations. +func DiscoverVPNDNS(ctx context.Context) []VPNDNSConfig { + logger := *ProxyLogger.Load() + + Log(ctx, logger.Debug(), "Discovering VPN DNS configurations on macOS") + + cmd := exec.CommandContext(ctx, "scutil", "--dns") + output, err := cmd.Output() + if err != nil { + Log(ctx, logger.Error().Err(err), "Failed to execute scutil --dns") + return nil + } + + return parseScutilOutput(ctx, string(output)) +} + +// parseScutilOutput parses the output of `scutil --dns` to extract VPN DNS configurations. +func parseScutilOutput(ctx context.Context, output string) []VPNDNSConfig { + logger := *ProxyLogger.Load() + + Log(ctx, logger.Debug(), "Parsing scutil --dns output") + + resolverBlockRe := regexp.MustCompile(`resolver #(\d+)`) + searchDomainRe := regexp.MustCompile(`search domain\[\d+\] : (.+)`) + // Matches singular "domain : value" entries (e.g., Tailscale per-domain resolvers). + singleDomainRe := regexp.MustCompile(`^domain\s+:\s+(.+)`) + nameserverRe := regexp.MustCompile(`nameserver\[\d+\] : (.+)`) + ifIndexRe := regexp.MustCompile(`if_index : (\d+) \((.+)\)`) + + var vpnConfigs []VPNDNSConfig + var currentResolver *resolverInfo + var allResolvers []resolverInfo + + scanner := bufio.NewScanner(strings.NewReader(output)) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + + if match := resolverBlockRe.FindStringSubmatch(line); match != nil { + if currentResolver != nil { + allResolvers = append(allResolvers, *currentResolver) + } + resolverNum, _ := strconv.Atoi(match[1]) + currentResolver = &resolverInfo{ + Number: resolverNum, + } + continue + } + + if currentResolver == nil { + continue + } + + if match := searchDomainRe.FindStringSubmatch(line); match != nil { + domain := strings.TrimSpace(match[1]) + if domain != "" { + currentResolver.Domains = append(currentResolver.Domains, domain) + } + continue + } + + // Parse singular "domain : value" (used by Tailscale per-domain resolvers). + if match := singleDomainRe.FindStringSubmatch(line); match != nil { + domain := strings.TrimSpace(match[1]) + if domain != "" { + currentResolver.Domains = append(currentResolver.Domains, domain) + } + continue + } + + if match := nameserverRe.FindStringSubmatch(line); match != nil { + server := strings.TrimSpace(match[1]) + if ip := net.ParseIP(server); ip != nil && !ip.IsLoopback() { + currentResolver.Servers = append(currentResolver.Servers, server) + } + continue + } + + if match := ifIndexRe.FindStringSubmatch(line); match != nil { + currentResolver.InterfaceName = strings.TrimSpace(match[2]) + continue + } + + if strings.HasPrefix(line, "flags") { + if idx := strings.Index(line, ":"); idx >= 0 { + currentResolver.Flags = strings.TrimSpace(line[idx+1:]) + } + continue + } + } + + if currentResolver != nil { + allResolvers = append(allResolvers, *currentResolver) + } + + for _, resolver := range allResolvers { + if isSplitDNSResolver(ctx, &resolver) { + ifaceName := resolver.InterfaceName + + // When scutil doesn't provide if_index (common with Tailscale MagicDNS + // per-domain resolvers), look up the outbound interface from the routing + // table. This is needed for interface-scoped pf exemptions — without the + // interface name, we can't generate rules that let the VPN's Network + // Extension handle DNS queries from all processes. + if ifaceName == "" && len(resolver.Servers) > 0 { + if routeIface := resolveInterfaceForIP(ctx, resolver.Servers[0]); routeIface != "" { + ifaceName = routeIface + Log(ctx, logger.Debug(), "Resolver #%d: resolved interface %q from routing table for %s", + resolver.Number, routeIface, resolver.Servers[0]) + } + } + + config := VPNDNSConfig{ + InterfaceName: ifaceName, + Servers: resolver.Servers, + Domains: resolver.Domains, + } + + vpnConfigs = append(vpnConfigs, config) + + Log(ctx, logger.Debug(), "Found VPN DNS config - Interface: %s, Servers: %v, Domains: %v", + config.InterfaceName, config.Servers, config.Domains) + } + } + + // Detect exit mode: if a VPN DNS server IP also appears as the system's default + // resolver (no search domains, no Supplemental flag), the VPN is routing ALL traffic + // (not just specific domains). In exit mode, ctrld must continue intercepting DNS + // on the VPN interface to enforce its profile on all queries. + defaultResolverIPs := make(map[string]bool) + for _, resolver := range allResolvers { + if len(resolver.Servers) > 0 && len(resolver.Domains) == 0 && + !strings.Contains(resolver.Flags, "Supplemental") && + !strings.Contains(resolver.Flags, "Scoped") { + for _, server := range resolver.Servers { + defaultResolverIPs[server] = true + } + } + } + for i := range vpnConfigs { + for _, server := range vpnConfigs[i].Servers { + if defaultResolverIPs[server] { + vpnConfigs[i].IsExitMode = true + Log(ctx, logger.Info(), "VPN DNS config on %s detected as EXIT MODE — server %s is also the system default resolver", + vpnConfigs[i].InterfaceName, server) + break + } + } + } + + Log(ctx, logger.Debug(), "VPN DNS discovery completed: found %d VPN interfaces", len(vpnConfigs)) + return vpnConfigs +} + +// resolveInterfaceForIP uses the macOS routing table to determine which network +// interface would be used to reach the given IP address. This is a fallback for +// when scutil --dns doesn't include if_index in the resolver entry (common with +// Tailscale MagicDNS per-domain resolvers). +// +// Runs: route -n get and parses the "interface:" line from the output. +// Returns empty string on any error (callers should treat as "unknown interface"). +func resolveInterfaceForIP(ctx context.Context, ip string) string { + logger := *ProxyLogger.Load() + + cmd := exec.CommandContext(ctx, "route", "-n", "get", ip) + output, err := cmd.Output() + if err != nil { + Log(ctx, logger.Debug(), "route -n get %s failed: %v", ip, err) + return "" + } + + // Parse "interface: utun11" from route output. + for _, line := range strings.Split(string(output), "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "interface:") { + iface := strings.TrimSpace(strings.TrimPrefix(line, "interface:")) + if iface != "" && iface != "lo0" { + return iface + } + } + } + return "" +} + +type resolverInfo struct { + Number int + InterfaceName string + Servers []string + Domains []string + Flags string // Raw flags line (e.g., "Supplemental, Request A records") +} + +// isSplitDNSResolver reports whether a scutil --dns resolver entry represents a +// split DNS configuration that ctrld should forward to. Any resolver with both +// non-loopback DNS servers and search domains qualifies — this covers VPN adapters +// (F5, Tailscale, Cisco AnyConnect, etc.) and any other virtual interface that +// registers search domains (e.g., corporate proxies, containers). +// +// We intentionally avoid heuristics about interface names or domain suffixes: +// if an interface declares "these domains resolve via these servers," we honor it. +// The only exclusions are mDNS entries (bare ".local" without an interface binding). +// +// Note: loopback servers are already filtered out during parsing in parseScutilOutput. +func isSplitDNSResolver(ctx context.Context, resolver *resolverInfo) bool { + logger := *ProxyLogger.Load() + + // Must have both DNS servers and search domains to be a useful split DNS route. + if len(resolver.Servers) == 0 || len(resolver.Domains) == 0 { + Log(ctx, logger.Debug(), "Resolver #%d: skipping — no servers (%d) or no domains (%d)", + resolver.Number, len(resolver.Servers), len(resolver.Domains)) + return false + } + + // Skip multicast DNS entries. scutil --dns shows a resolver for ".local" that + // handles mDNS — it has no interface binding and the sole domain is "local". + // Real VPN entries with ".local" suffix (e.g., "corp.example.com") will have an + // interface name or additional domains. + if len(resolver.Domains) == 1 { + domain := strings.ToLower(strings.TrimSpace(resolver.Domains[0])) + if domain == "local" || domain == ".local" { + Log(ctx, logger.Debug(), "Resolver #%d: skipping — mDNS resolver", resolver.Number) + return false + } + } + + Log(ctx, logger.Debug(), "Resolver #%d: split DNS resolver — interface: %q, servers: %v, domains: %v", + resolver.Number, resolver.InterfaceName, resolver.Servers, resolver.Domains) + return true +} diff --git a/vpn_dns_linux.go b/vpn_dns_linux.go new file mode 100644 index 00000000..d8e9729b --- /dev/null +++ b/vpn_dns_linux.go @@ -0,0 +1,211 @@ +//go:build linux + +package ctrld + +import ( + "bufio" + "context" + "net" + "os/exec" + "regexp" + "strings" +) + +// DiscoverVPNDNS discovers DNS servers and search domains from VPN interfaces on Linux. +// Uses resolvectl status to find per-link DNS configurations. +func DiscoverVPNDNS(ctx context.Context) []VPNDNSConfig { + logger := *ProxyLogger.Load() + + Log(ctx, logger.Debug(), "Discovering VPN DNS configurations on Linux") + + if configs := parseResolvectlStatus(ctx); len(configs) > 0 { + return configs + } + + Log(ctx, logger.Debug(), "resolvectl not available or no results, trying fallback method") + return parseVPNInterfacesDNS(ctx) +} + +func parseResolvectlStatus(ctx context.Context) []VPNDNSConfig { + logger := *ProxyLogger.Load() + + cmd := exec.CommandContext(ctx, "resolvectl", "status") + output, err := cmd.Output() + if err != nil { + Log(ctx, logger.Debug(), "Failed to execute resolvectl status: %v", err) + return nil + } + + Log(ctx, logger.Debug(), "Parsing resolvectl status output") + + linkRe := regexp.MustCompile(`^Link (\d+) \((.+)\):`) + dnsServersRe := regexp.MustCompile(`^\s+DNS Servers?: (.+)`) + dnsDomainsRe := regexp.MustCompile(`^\s+DNS Domain: (.+)`) + + var vpnConfigs []VPNDNSConfig + var currentLink *linkInfo + + scanner := bufio.NewScanner(strings.NewReader(string(output))) + for scanner.Scan() { + line := scanner.Text() + + if match := linkRe.FindStringSubmatch(line); match != nil { + if currentLink != nil && isVPNLink(ctx, currentLink) { + config := VPNDNSConfig{ + InterfaceName: currentLink.InterfaceName, + Servers: currentLink.Servers, + Domains: currentLink.Domains, + } + vpnConfigs = append(vpnConfigs, config) + + Log(ctx, logger.Debug(), "Found VPN DNS config - Interface: %s, Servers: %v, Domains: %v", + config.InterfaceName, config.Servers, config.Domains) + } + + currentLink = &linkInfo{ + InterfaceName: strings.TrimSpace(match[2]), + } + continue + } + + if currentLink == nil { + continue + } + + if match := dnsServersRe.FindStringSubmatch(line); match != nil { + serverList := strings.TrimSpace(match[1]) + for _, server := range strings.Fields(serverList) { + if ip := net.ParseIP(server); ip != nil && !ip.IsLoopback() { + currentLink.Servers = append(currentLink.Servers, server) + } + } + continue + } + + if match := dnsDomainsRe.FindStringSubmatch(line); match != nil { + domainList := strings.TrimSpace(match[1]) + for _, domain := range strings.Fields(domainList) { + domain = strings.TrimSpace(domain) + if domain != "" { + currentLink.Domains = append(currentLink.Domains, domain) + } + } + continue + } + } + + if currentLink != nil && isVPNLink(ctx, currentLink) { + config := VPNDNSConfig{ + InterfaceName: currentLink.InterfaceName, + Servers: currentLink.Servers, + Domains: currentLink.Domains, + } + vpnConfigs = append(vpnConfigs, config) + + Log(ctx, logger.Debug(), "Found VPN DNS config - Interface: %s, Servers: %v, Domains: %v", + config.InterfaceName, config.Servers, config.Domains) + } + + Log(ctx, logger.Debug(), "resolvectl parsing completed: found %d VPN interfaces", len(vpnConfigs)) + return vpnConfigs +} + +func parseVPNInterfacesDNS(ctx context.Context) []VPNDNSConfig { + logger := *ProxyLogger.Load() + + Log(ctx, logger.Debug(), "Using fallback method to detect VPN DNS") + + interfaces, err := net.Interfaces() + if err != nil { + Log(ctx, logger.Error().Err(err), "Failed to get network interfaces") + return nil + } + + var vpnConfigs []VPNDNSConfig + + for _, iface := range interfaces { + if !isVPNInterfaceName(iface.Name) { + continue + } + if iface.Flags&net.FlagUp == 0 { + continue + } + + Log(ctx, logger.Debug(), "Found potential VPN interface: %s", iface.Name) + Log(ctx, logger.Debug(), "Fallback DNS detection not implemented for interface: %s", iface.Name) + } + + Log(ctx, logger.Debug(), "Fallback method completed: found %d VPN interfaces", len(vpnConfigs)) + return vpnConfigs +} + +type linkInfo struct { + InterfaceName string + Servers []string + Domains []string +} + +func isVPNLink(ctx context.Context, link *linkInfo) bool { + logger := *ProxyLogger.Load() + + if len(link.Servers) == 0 || len(link.Domains) == 0 { + Log(ctx, logger.Debug(), "Link %s: insufficient config (servers: %d, domains: %d)", + link.InterfaceName, len(link.Servers), len(link.Domains)) + return false + } + + if isVPNInterfaceName(link.InterfaceName) { + Log(ctx, logger.Debug(), "Link %s: identified as VPN based on interface name", link.InterfaceName) + return true + } + + hasRoutingDomain := false + for _, domain := range link.Domains { + if strings.HasPrefix(domain, "~") { + hasRoutingDomain = true + break + } + } + + if hasRoutingDomain { + Log(ctx, logger.Debug(), "Link %s: identified as VPN based on routing domain", link.InterfaceName) + return true + } + + hasPrivateDNS := false + for _, server := range link.Servers { + if ip := net.ParseIP(server); ip != nil && ip.IsPrivate() { + hasPrivateDNS = true + break + } + } + + hasVPNDomains := false + for _, domain := range link.Domains { + domain = strings.ToLower(strings.TrimPrefix(domain, "~")) + if strings.HasSuffix(domain, ".local") || + strings.HasSuffix(domain, ".corp") || + strings.HasSuffix(domain, ".internal") || + strings.Contains(domain, "vpn") { + hasVPNDomains = true + break + } + } + + if hasPrivateDNS && hasVPNDomains { + Log(ctx, logger.Debug(), "Link %s: identified as VPN based on private DNS + VPN domains", link.InterfaceName) + return true + } + + Log(ctx, logger.Debug(), "Link %s: not identified as VPN link", link.InterfaceName) + return false +} + +func isVPNInterfaceName(name string) bool { + name = strings.ToLower(name) + return strings.HasPrefix(name, "tun") || + strings.HasPrefix(name, "tap") || + strings.HasPrefix(name, "ppp") || + strings.HasPrefix(name, "vpn") || + strings.Contains(name, "vpn") +} diff --git a/vpn_dns_others.go b/vpn_dns_others.go new file mode 100644 index 00000000..a01a9a9a --- /dev/null +++ b/vpn_dns_others.go @@ -0,0 +1,15 @@ +//go:build !windows && !darwin && !linux + +package ctrld + +import ( + "context" +) + +// DiscoverVPNDNS is a stub implementation for unsupported platforms. +// Returns nil to indicate no VPN DNS configurations found. +func DiscoverVPNDNS(ctx context.Context) []VPNDNSConfig { + logger := *ProxyLogger.Load() + Log(ctx, logger.Debug(), "VPN DNS discovery not implemented for this platform") + return nil +} diff --git a/vpn_dns_windows.go b/vpn_dns_windows.go new file mode 100644 index 00000000..fd0673eb --- /dev/null +++ b/vpn_dns_windows.go @@ -0,0 +1,130 @@ +//go:build windows + +package ctrld + +import ( + "context" + "strings" + "syscall" + + "golang.zx2c4.com/wireguard/windows/tunnel/winipcfg" +) + +// DiscoverVPNDNS discovers DNS servers and search domains from non-physical (VPN) interfaces. +// Only called when dnsIntercept is active. +func DiscoverVPNDNS(ctx context.Context) []VPNDNSConfig { + logger := *ProxyLogger.Load() + + Log(ctx, logger.Debug(), "Discovering VPN DNS configurations on Windows") + + flags := winipcfg.GAAFlagIncludeGateways | winipcfg.GAAFlagIncludePrefix + aas, err := winipcfg.GetAdaptersAddresses(syscall.AF_UNSPEC, flags) + if err != nil { + Log(ctx, logger.Error().Err(err), "Failed to get adapters addresses") + return nil + } + + Log(ctx, logger.Debug(), "Found %d network adapters", len(aas)) + + // Get valid (physical/hardware) interfaces to filter them out + validInterfacesMap := validInterfaces() + + var vpnConfigs []VPNDNSConfig + + for _, aa := range aas { + if aa.OperStatus != winipcfg.IfOperStatusUp { + Log(ctx, logger.Debug(), "Skipping adapter %s - not up, status: %d", + aa.FriendlyName(), aa.OperStatus) + continue + } + + if aa.IfType == winipcfg.IfTypeSoftwareLoopback { + Log(ctx, logger.Debug(), "Skipping %s (software loopback)", aa.FriendlyName()) + continue + } + + // INVERT the validInterfaces filter: we want non-physical/non-hardware adapters + _, isValidPhysical := validInterfacesMap[aa.FriendlyName()] + if isValidPhysical { + Log(ctx, logger.Debug(), "Skipping %s (physical/hardware adapter)", aa.FriendlyName()) + continue + } + + // Skip adapters that have no routable unicast addresses. An adapter + // with only link-local (fe80::) or APIPA (169.254.x.x) addresses is + // not actually connected — its DNS servers are stale. This prevents + // picking up e.g. Tailscale's adapter when the app is installed but + // disconnected (OperStatus reports Up but only APIPA addresses exist). + hasRoutableAddr := false + for a := aa.FirstUnicastAddress; a != nil; a = a.Next { + ip := a.Address.IP() + if ip == nil { + continue + } + if !ip.IsLinkLocalUnicast() { + hasRoutableAddr = true + break + } + } + if !hasRoutableAddr { + Log(ctx, logger.Debug(), "Skipping %s - no routable addresses (likely disconnected)", aa.FriendlyName()) + continue + } + + var servers []string + for dns := aa.FirstDNSServerAddress; dns != nil; dns = dns.Next { + ip := dns.Address.IP() + if ip == nil { + continue + } + + ipStr := ip.String() + if ip.IsLoopback() { + continue + } + + servers = append(servers, ipStr) + } + + // Check adapter-specific (connection-specific) DNS suffix first, + // since we want to map per-adapter DNS servers to per-adapter suffixes. + // This is what most traditional VPNs set (F5, Cisco AnyConnect, GlobalProtect). + var domains []string + if connSuffix := strings.TrimSpace(aa.DNSSuffix()); connSuffix != "" { + domains = append(domains, connSuffix) + Log(ctx, logger.Debug(), "Using connection-specific DNS suffix for %s: %s", + aa.FriendlyName(), connSuffix) + } + + // Then check supplemental DNS suffix list (used by Tailscale and + // VPN clients that register search domains via the DNS Client API). + for suffix := aa.FirstDNSSuffix; suffix != nil; suffix = suffix.Next { + domain := strings.TrimSpace(suffix.String()) + if domain != "" { + domains = append(domains, domain) + } + } + + // Accept VPN adapters with DNS servers even without domains. + // Domain-less configs still provide useful DNS server IPs that + // can serve existing split-rules and OS resolver queries. + if len(servers) > 0 { + config := VPNDNSConfig{ + InterfaceName: aa.FriendlyName(), + Servers: servers, + Domains: domains, + } + + vpnConfigs = append(vpnConfigs, config) + + Log(ctx, logger.Debug(), "Found VPN DNS config - Interface: %s, Servers: %v, Domains: %v", + config.InterfaceName, config.Servers, config.Domains) + } else { + Log(ctx, logger.Debug(), "Skipping %s - no DNS servers found", + aa.FriendlyName()) + } + } + + Log(ctx, logger.Debug(), "VPN DNS discovery completed: found %d VPN interfaces", len(vpnConfigs)) + return vpnConfigs +} From 9b2e51f53a132407c2bcd4ccf71fae9cc09b92ee Mon Sep 17 00:00:00 2001 From: Codescribe Date: Tue, 3 Mar 2026 02:07:11 -0500 Subject: [PATCH 20/22] feat: robust username detection and CI updates Add platform-specific username detection for Control D metadata: - macOS: directory services (dscl) with console user fallback - Linux: systemd loginctl, utmp, /etc/passwd traversal - Windows: WTS session enumeration, registry, token lookup --- cmd/cli/control_server.go | 13 +- discover_user_darwin.go | 135 +++++++++++++++++ discover_user_linux.go | 238 ++++++++++++++++++++++++++++++ discover_user_others.go | 13 ++ discover_user_windows.go | 294 +++++++++++++++++++++++++++++++++++++ docs/username-detection.md | 126 ++++++++++++++++ metadata.go | 66 +++------ 7 files changed, 832 insertions(+), 53 deletions(-) create mode 100644 discover_user_darwin.go create mode 100644 discover_user_linux.go create mode 100644 discover_user_others.go create mode 100644 discover_user_windows.go create mode 100644 docs/username-detection.md diff --git a/cmd/cli/control_server.go b/cmd/cli/control_server.go index 3db444e2..064e7fe5 100644 --- a/cmd/cli/control_server.go +++ b/cmd/cli/control_server.go @@ -32,9 +32,10 @@ const ( ) type ifaceResponse struct { - Name string `json:"name"` - All bool `json:"all"` - OK bool `json:"ok"` + Name string `json:"name"` + All bool `json:"all"` + OK bool `json:"ok"` + InterceptMode string `json:"intercept_mode,omitempty"` // "dns", "hard", or "" (not intercepting) } type controlServer struct { @@ -220,7 +221,7 @@ func (p *prog) registerControlServerHandler() { rcReq := &controld.ResolverConfigRequest{ RawUID: cdUID, Version: rootCmd.Version, - Metadata: ctrld.SystemMetadata(context.Background()), + Metadata: ctrld.SystemMetadataRuntime(context.Background()), } if rc, err := controld.FetchResolverConfig(rcReq, cdDev); rc != nil { if rc.DeactivationPin != nil { @@ -276,6 +277,10 @@ func (p *prog) registerControlServerHandler() { res.Name = p.runningIface res.All = p.requiredMultiNICsConfig res.OK = true + // Report intercept mode to the start command for proper log output. + if interceptMode == "dns" || interceptMode == "hard" { + res.InterceptMode = interceptMode + } } } if err := json.NewEncoder(w).Encode(res); err != nil { diff --git a/discover_user_darwin.go b/discover_user_darwin.go new file mode 100644 index 00000000..b1d08593 --- /dev/null +++ b/discover_user_darwin.go @@ -0,0 +1,135 @@ +//go:build darwin + +package ctrld + +import ( + "context" + "os/exec" + "strconv" + "strings" +) + +// DiscoverMainUser attempts to find the primary user on macOS systems. +// This is designed to work reliably under RMM deployments where traditional +// environment variables and session detection may not be available. +// +// Priority chain (deterministic, lowest UID wins among candidates): +// 1. Console user from stat -f %Su /dev/console +// 2. Active console session user via scutil +// 3. First user with UID >= 501 from dscl (standard macOS user range) +func DiscoverMainUser(ctx context.Context) string { + logger := ProxyLogger.Load().Debug() + + // Method 1: Check console owner via stat + logger.Msg("attempting to discover user via console stat") + if user := getConsoleUser(ctx); user != "" && user != "root" { + logger.Str("method", "stat").Str("user", user).Msg("found user via console stat") + return user + } + + // Method 2: Check active console session via scutil + logger.Msg("attempting to discover user via scutil ConsoleUser") + if user := getScutilConsoleUser(ctx); user != "" && user != "root" { + logger.Str("method", "scutil").Str("user", user).Msg("found user via scutil ConsoleUser") + return user + } + + // Method 3: Find lowest UID >= 501 from directory services + logger.Msg("attempting to discover user via dscl directory scan") + if user := getLowestRegularUser(ctx); user != "" { + logger.Str("method", "dscl").Str("user", user).Msg("found user via dscl scan") + return user + } + + logger.Msg("all user discovery methods failed") + return "unknown" +} + +// getConsoleUser uses stat to find the owner of /dev/console +func getConsoleUser(ctx context.Context) string { + cmd := exec.CommandContext(ctx, "stat", "-f", "%Su", "/dev/console") + out, err := cmd.Output() + if err != nil { + ProxyLogger.Load().Debug().Err(err).Msg("failed to stat /dev/console") + return "" + } + return strings.TrimSpace(string(out)) +} + +// getScutilConsoleUser uses scutil to get the current console user +func getScutilConsoleUser(ctx context.Context) string { + cmd := exec.CommandContext(ctx, "scutil", "-r", "ConsoleUser") + out, err := cmd.Output() + if err != nil { + ProxyLogger.Load().Debug().Err(err).Msg("failed to get ConsoleUser via scutil") + return "" + } + + lines := strings.Split(string(out), "\n") + for _, line := range lines { + if strings.Contains(line, "Name :") { + parts := strings.Fields(line) + if len(parts) >= 3 { + return strings.TrimSpace(parts[2]) + } + } + } + return "" +} + +// getLowestRegularUser finds the user with the lowest UID >= 501 +func getLowestRegularUser(ctx context.Context) string { + // Get list of all users with UID >= 501 + cmd := exec.CommandContext(ctx, "dscl", ".", "list", "/Users", "UniqueID") + out, err := cmd.Output() + if err != nil { + ProxyLogger.Load().Debug().Err(err).Msg("failed to list users via dscl") + return "" + } + + var candidates []struct { + name string + uid int + } + + lines := strings.Split(string(out), "\n") + for _, line := range lines { + fields := strings.Fields(line) + if len(fields) != 2 { + continue + } + + username := fields[0] + uidStr := fields[1] + + uid, err := strconv.Atoi(uidStr) + if err != nil { + continue + } + + // Only consider regular users (UID >= 501 on macOS) + if uid >= 501 { + candidates = append(candidates, struct { + name string + uid int + }{username, uid}) + } + } + + if len(candidates) == 0 { + return "" + } + + // Find the candidate with the lowest UID (deterministic choice) + lowestUID := candidates[0].uid + result := candidates[0].name + + for _, candidate := range candidates[1:] { + if candidate.uid < lowestUID { + lowestUID = candidate.uid + result = candidate.name + } + } + + return result +} \ No newline at end of file diff --git a/discover_user_linux.go b/discover_user_linux.go new file mode 100644 index 00000000..0cf8923b --- /dev/null +++ b/discover_user_linux.go @@ -0,0 +1,238 @@ +//go:build linux + +package ctrld + +import ( + "bufio" + "context" + "os" + "os/exec" + "strconv" + "strings" +) + +// DiscoverMainUser attempts to find the primary user on Linux systems. +// This is designed to work reliably under RMM deployments where traditional +// environment variables and session detection may not be available. +// +// Priority chain (deterministic, lowest UID wins among candidates): +// 1. Active users from loginctl list-users +// 2. Parse /etc/passwd for users with UID >= 1000, prefer admin group members +// 3. Fallback to lowest UID >= 1000 from /etc/passwd +func DiscoverMainUser(ctx context.Context) string { + logger := ProxyLogger.Load().Debug() + + // Method 1: Check active users via loginctl + logger.Msg("attempting to discover user via loginctl") + if user := getLoginctlUser(ctx); user != "" { + logger.Str("method", "loginctl").Str("user", user).Msg("found user via loginctl") + return user + } + + // Method 2: Parse /etc/passwd and find admin users first + logger.Msg("attempting to discover user via /etc/passwd with admin preference") + if user := getPasswdUserWithAdminPreference(ctx); user != "" { + logger.Str("method", "passwd+admin").Str("user", user).Msg("found admin user via /etc/passwd") + return user + } + + // Method 3: Fallback to lowest UID >= 1000 from /etc/passwd + logger.Msg("attempting to discover user via /etc/passwd lowest UID") + if user := getLowestPasswdUser(ctx); user != "" { + logger.Str("method", "passwd").Str("user", user).Msg("found user via /etc/passwd") + return user + } + + logger.Msg("all user discovery methods failed") + return "unknown" +} + +// getLoginctlUser uses loginctl to find active users +func getLoginctlUser(ctx context.Context) string { + cmd := exec.CommandContext(ctx, "loginctl", "list-users", "--no-legend") + out, err := cmd.Output() + if err != nil { + ProxyLogger.Load().Debug().Err(err).Msg("failed to run loginctl list-users") + return "" + } + + var candidates []struct { + name string + uid int + } + + lines := strings.Split(string(out), "\n") + for _, line := range lines { + fields := strings.Fields(line) + if len(fields) < 2 { + continue + } + + uidStr := fields[0] + username := fields[1] + + uid, err := strconv.Atoi(uidStr) + if err != nil { + continue + } + + // Only consider regular users (UID >= 1000 on Linux) + if uid >= 1000 { + candidates = append(candidates, struct { + name string + uid int + }{username, uid}) + } + } + + if len(candidates) == 0 { + return "" + } + + // Return user with lowest UID (deterministic choice) + lowestUID := candidates[0].uid + result := candidates[0].name + + for _, candidate := range candidates[1:] { + if candidate.uid < lowestUID { + lowestUID = candidate.uid + result = candidate.name + } + } + + return result +} + +// getPasswdUserWithAdminPreference parses /etc/passwd and prefers admin group members +func getPasswdUserWithAdminPreference(ctx context.Context) string { + users := parsePasswdFile() + if len(users) == 0 { + return "" + } + + var adminUsers []struct { + name string + uid int + } + var regularUsers []struct { + name string + uid int + } + + // Separate admin and regular users + for _, user := range users { + if isUserInAdminGroups(ctx, user.name) { + adminUsers = append(adminUsers, user) + } else { + regularUsers = append(regularUsers, user) + } + } + + // Prefer admin users, then regular users + candidates := adminUsers + if len(candidates) == 0 { + candidates = regularUsers + } + + if len(candidates) == 0 { + return "" + } + + // Return user with lowest UID (deterministic choice) + lowestUID := candidates[0].uid + result := candidates[0].name + + for _, candidate := range candidates[1:] { + if candidate.uid < lowestUID { + lowestUID = candidate.uid + result = candidate.name + } + } + + return result +} + +// getLowestPasswdUser returns the user with lowest UID >= 1000 from /etc/passwd +func getLowestPasswdUser(ctx context.Context) string { + users := parsePasswdFile() + if len(users) == 0 { + return "" + } + + // Return user with lowest UID (deterministic choice) + lowestUID := users[0].uid + result := users[0].name + + for _, user := range users[1:] { + if user.uid < lowestUID { + lowestUID = user.uid + result = user.name + } + } + + return result +} + +// parsePasswdFile parses /etc/passwd and returns users with UID >= 1000 +func parsePasswdFile() []struct { + name string + uid int +} { + file, err := os.Open("/etc/passwd") + if err != nil { + ProxyLogger.Load().Debug().Err(err).Msg("failed to open /etc/passwd") + return nil + } + defer file.Close() + + var users []struct { + name string + uid int + } + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + fields := strings.Split(line, ":") + if len(fields) < 3 { + continue + } + + username := fields[0] + uidStr := fields[2] + + uid, err := strconv.Atoi(uidStr) + if err != nil { + continue + } + + // Only consider regular users (UID >= 1000 on Linux) + if uid >= 1000 { + users = append(users, struct { + name string + uid int + }{username, uid}) + } + } + + return users +} + +// isUserInAdminGroups checks if a user is in common admin groups +func isUserInAdminGroups(ctx context.Context, username string) bool { + adminGroups := []string{"sudo", "wheel", "admin"} + + for _, group := range adminGroups { + cmd := exec.CommandContext(ctx, "groups", username) + out, err := cmd.Output() + if err != nil { + continue + } + + if strings.Contains(string(out), group) { + return true + } + } + + return false +} \ No newline at end of file diff --git a/discover_user_others.go b/discover_user_others.go new file mode 100644 index 00000000..68bb2528 --- /dev/null +++ b/discover_user_others.go @@ -0,0 +1,13 @@ +//go:build !windows && !linux && !darwin + +package ctrld + +import "context" + +// DiscoverMainUser returns "unknown" for unsupported platforms. +// This is a stub implementation for platforms where username detection +// is not yet implemented. +func DiscoverMainUser(ctx context.Context) string { + ProxyLogger.Load().Debug().Msg("username discovery not implemented for this platform") + return "unknown" +} diff --git a/discover_user_windows.go b/discover_user_windows.go new file mode 100644 index 00000000..3e5db11e --- /dev/null +++ b/discover_user_windows.go @@ -0,0 +1,294 @@ +//go:build windows + +package ctrld + +import ( + "context" + "strconv" + "strings" + "syscall" + "unsafe" + + "golang.org/x/sys/windows" + "golang.org/x/sys/windows/registry" +) + +var ( + kernel32 = windows.NewLazySystemDLL("kernel32.dll") + wtsapi32 = windows.NewLazySystemDLL("wtsapi32.dll") + procGetConsoleWindow = kernel32.NewProc("GetConsoleWindow") + procWTSGetActiveConsoleSessionId = wtsapi32.NewProc("WTSGetActiveConsoleSessionId") + procWTSQuerySessionInformation = wtsapi32.NewProc("WTSQuerySessionInformationW") + procWTSFreeMemory = wtsapi32.NewProc("WTSFreeMemory") +) + +const ( + WTSUserName = 5 +) + +// DiscoverMainUser attempts to find the primary user on Windows systems. +// This is designed to work reliably under RMM deployments where traditional +// environment variables and session detection may not be available. +// +// Priority chain (deterministic, lowest RID wins among candidates): +// 1. Active console session user via WTSGetActiveConsoleSessionId +// 2. Registry ProfileList scan for Administrators group members +// 3. Fallback to lowest RID from ProfileList +func DiscoverMainUser(ctx context.Context) string { + logger := ProxyLogger.Load().Debug() + + // Method 1: Check active console session + logger.Msg("attempting to discover user via active console session") + if user := getActiveConsoleUser(ctx); user != "" { + logger.Str("method", "console").Str("user", user).Msg("found user via active console session") + return user + } + + // Method 2: Scan registry for admin users + logger.Msg("attempting to discover user via registry with admin preference") + if user := getRegistryUserWithAdminPreference(ctx); user != "" { + logger.Str("method", "registry+admin").Str("user", user).Msg("found admin user via registry") + return user + } + + // Method 3: Fallback to lowest RID from registry + logger.Msg("attempting to discover user via registry lowest RID") + if user := getLowestRegistryUser(ctx); user != "" { + logger.Str("method", "registry").Str("user", user).Msg("found user via registry") + return user + } + + logger.Msg("all user discovery methods failed") + return "unknown" +} + +// getActiveConsoleUser gets the username of the active console session +func getActiveConsoleUser(ctx context.Context) string { + // Guard against missing WTS procedures (e.g., Windows Server Core). + if err := procWTSGetActiveConsoleSessionId.Find(); err != nil { + ProxyLogger.Load().Debug().Err(err).Msg("WTSGetActiveConsoleSessionId not available, skipping console session check") + return "" + } + sessionId, _, _ := procWTSGetActiveConsoleSessionId.Call() + if sessionId == 0xFFFFFFFF { // Invalid session + ProxyLogger.Load().Debug().Msg("no active console session found") + return "" + } + + var buffer uintptr + var bytesReturned uint32 + + if err := procWTSQuerySessionInformation.Find(); err != nil { + ProxyLogger.Load().Debug().Err(err).Msg("WTSQuerySessionInformationW not available") + return "" + } + ret, _, _ := procWTSQuerySessionInformation.Call( + 0, // WTS_CURRENT_SERVER_HANDLE + sessionId, + uintptr(WTSUserName), + uintptr(unsafe.Pointer(&buffer)), + uintptr(unsafe.Pointer(&bytesReturned)), + ) + + if ret == 0 { + ProxyLogger.Load().Debug().Msg("failed to query session information") + return "" + } + defer procWTSFreeMemory.Call(buffer) + + // Convert buffer to string + username := windows.UTF16PtrToString((*uint16)(unsafe.Pointer(buffer))) + if username == "" { + return "" + } + + return username +} + +// getRegistryUserWithAdminPreference scans registry profiles and prefers admin users +func getRegistryUserWithAdminPreference(ctx context.Context) string { + profiles := getRegistryProfiles() + if len(profiles) == 0 { + return "" + } + + var adminProfiles []registryProfile + var regularProfiles []registryProfile + + // Separate admin and regular users + for _, profile := range profiles { + if isUserInAdministratorsGroup(profile.username) { + adminProfiles = append(adminProfiles, profile) + } else { + regularProfiles = append(regularProfiles, profile) + } + } + + // Prefer admin users, then regular users + candidates := adminProfiles + if len(candidates) == 0 { + candidates = regularProfiles + } + + if len(candidates) == 0 { + return "" + } + + // Return user with lowest RID (deterministic choice) + lowestRID := candidates[0].rid + result := candidates[0].username + + for _, candidate := range candidates[1:] { + if candidate.rid < lowestRID { + lowestRID = candidate.rid + result = candidate.username + } + } + + return result +} + +// getLowestRegistryUser returns the user with lowest RID from registry +func getLowestRegistryUser(ctx context.Context) string { + profiles := getRegistryProfiles() + if len(profiles) == 0 { + return "" + } + + // Return user with lowest RID (deterministic choice) + lowestRID := profiles[0].rid + result := profiles[0].username + + for _, profile := range profiles[1:] { + if profile.rid < lowestRID { + lowestRID = profile.rid + result = profile.username + } + } + + return result +} + +type registryProfile struct { + username string + rid uint32 + sid string +} + +// getRegistryProfiles scans the registry ProfileList for user profiles +func getRegistryProfiles() []registryProfile { + key, err := registry.OpenKey(registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProfileList`, registry.ENUMERATE_SUB_KEYS) + if err != nil { + ProxyLogger.Load().Debug().Err(err).Msg("failed to open ProfileList registry key") + return nil + } + defer key.Close() + + subkeys, err := key.ReadSubKeyNames(-1) + if err != nil { + ProxyLogger.Load().Debug().Err(err).Msg("failed to read ProfileList subkeys") + return nil + } + + var profiles []registryProfile + + for _, subkey := range subkeys { + // Only process SIDs that start with S-1-5-21 (domain/local user accounts) + if !strings.HasPrefix(subkey, "S-1-5-21-") { + continue + } + + profileKey, err := registry.OpenKey(key, subkey, registry.QUERY_VALUE) + if err != nil { + continue + } + + profileImagePath, _, err := profileKey.GetStringValue("ProfileImagePath") + profileKey.Close() + if err != nil { + continue + } + + // Extract username from profile path (e.g., C:\Users\username) + pathParts := strings.Split(profileImagePath, `\`) + if len(pathParts) == 0 { + continue + } + username := pathParts[len(pathParts)-1] + + // Extract RID from SID (last component after final hyphen) + sidParts := strings.Split(subkey, "-") + if len(sidParts) == 0 { + continue + } + ridStr := sidParts[len(sidParts)-1] + rid, err := strconv.ParseUint(ridStr, 10, 32) + if err != nil { + continue + } + + // Only consider regular users (RID >= 1000, excludes built-in accounts). + // rid == 500 is the default Administrator account (DOMAIN_USER_RID_ADMIN). + // See: https://learn.microsoft.com/en-us/windows/win32/secauthz/well-known-sids + if rid == 500 || rid >= 1000 { + profiles = append(profiles, registryProfile{ + username: username, + rid: uint32(rid), + sid: subkey, + }) + } + } + + return profiles +} + +// isUserInAdministratorsGroup checks if a user is in the Administrators group +func isUserInAdministratorsGroup(username string) bool { + // Open the user account + usernamePtr, err := syscall.UTF16PtrFromString(username) + if err != nil { + return false + } + + var userSID *windows.SID + var domain *uint16 + var userSIDSize, domainSize uint32 + var use uint32 + + // First call to get buffer sizes + err = windows.LookupAccountName(nil, usernamePtr, userSID, &userSIDSize, domain, &domainSize, &use) + if err != nil && err != windows.ERROR_INSUFFICIENT_BUFFER { + return false + } + + // Allocate buffers and make actual call + userSID = (*windows.SID)(unsafe.Pointer(&make([]byte, userSIDSize)[0])) + domain = (*uint16)(unsafe.Pointer(&make([]uint16, domainSize)[0])) + + err = windows.LookupAccountName(nil, usernamePtr, userSID, &userSIDSize, domain, &domainSize, &use) + if err != nil { + return false + } + + // Check if user is member of Administrators group (S-1-5-32-544) + adminSID, err := windows.CreateWellKnownSid(windows.WinBuiltinAdministratorsSid) + if err != nil { + return false + } + + // Open user token (this is a simplified check) + var token windows.Token + err = windows.OpenProcessToken(windows.CurrentProcess(), windows.TOKEN_QUERY, &token) + if err != nil { + return false + } + defer token.Close() + + // Check group membership + member, err := token.IsMember(adminSID) + if err != nil { + return false + } + + return member +} diff --git a/docs/username-detection.md b/docs/username-detection.md new file mode 100644 index 00000000..18cd77ff --- /dev/null +++ b/docs/username-detection.md @@ -0,0 +1,126 @@ +# Username Detection in ctrld + +## Overview + +The ctrld client needs to detect the primary user of a system for telemetry and configuration purposes. This is particularly challenging in RMM (Remote Monitoring and Management) deployments where traditional session-based detection methods fail. + +## The Problem + +In traditional desktop environments, username detection is straightforward using environment variables like `$USER`, `$LOGNAME`, or `$SUDO_USER`. However, RMM deployments present unique challenges: + +- **No active login session**: RMM agents often run as system services without an associated user session +- **Missing environment variables**: Common user environment variables are not available in service contexts +- **Root/SYSTEM execution**: The ctrld process may run with elevated privileges, masking the actual user + +## Solution Approach + +ctrld implements a multi-tier, deterministic username detection system through the `DiscoverMainUser()` function with platform-specific implementations: + +### Key Principles + +1. **Deterministic selection**: No randomness - always returns the same result for the same system state +2. **Priority chain**: Multiple detection methods with clear fallback order +3. **Lowest UID/RID wins**: Among multiple candidates, select the user with the lowest identifier (typically the first user created) +4. **Fast execution**: All operations complete in <100ms using local system resources +5. **Debug logging**: Each decision point logs its rationale for troubleshooting + +## Platform-Specific Implementation + +### macOS (`discover_user_darwin.go`) + +**Detection chain:** +1. **Console owner** (`stat -f %Su /dev/console`) - Most reliable for active GUI sessions +2. **scutil ConsoleUser** - Alternative session detection via System Configuration framework +3. **Directory Services scan** (`dscl . list /Users UniqueID`) - Scan all users with UID ≥ 501, select lowest + +**Rationale**: macOS systems typically have a primary user who owns the console. Service contexts can still access device ownership information. + +### Linux (`discover_user_linux.go`) + +**Detection chain:** +1. **loginctl active users** (`loginctl list-users`) - systemd's session management +2. **Admin user preference** - Parse `/etc/passwd` for UID ≥ 1000, prefer sudo/wheel/admin group members +3. **Lowest UID fallback** - From `/etc/passwd`, select user with UID ≥ 1000 and lowest UID + +**Rationale**: Linux systems may have multiple regular users. Prioritize users in administrative groups as they're more likely to be primary system users. + +### Windows (`discover_user_windows.go`) + +**Detection chain:** +1. **Active console session** (`WTSGetActiveConsoleSessionId` + `WTSQuerySessionInformation`) - Direct Windows API for active user +2. **Registry admin preference** - Scan `HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProfileList`, prefer Administrators group members +3. **Lowest RID fallback** - From ProfileList, select user with RID ≥ 1000 and lowest RID + +**Rationale**: Windows has well-defined APIs for session management. Registry ProfileList provides a complete view of all user accounts when no active session exists. + +### Other Platforms (`discover_user_others.go`) + +Returns `"unknown"` - placeholder for unsupported platforms. + +## Implementation Details + +### Error Handling + +- Individual detection methods log failures at Debug level and continue to next method +- Only final failure (all methods failed) is noteworthy +- Graceful degradation ensures the system continues operating with `"unknown"` user + +### Performance Considerations + +- Registry/file parsing uses native Go where possible +- External command execution limited to necessary cases +- No network calls or blocking operations +- Timeout context honored for all operations + +### Security + +- No privilege escalation required +- Read-only operations on system resources +- No user data collected beyond username +- Respects system access controls + +## Testing Scenarios + +This implementation addresses these common RMM scenarios: + +1. **Windows Service context**: No interactive user session, service running as SYSTEM +2. **Linux systemd service**: No login session, running as root daemon +3. **macOS LaunchDaemon**: No GUI user context, running as root +4. **Multi-user systems**: Multiple valid candidates, deterministic selection +5. **Minimalist systems**: Limited user accounts, fallback to available options + +## Metadata Submission Strategy + +System metadata (OS, chassis, username, domain) is sent to the Control D API via POST `/utility`. To avoid duplicate submissions and minimize EDR-triggering user discovery, ctrld uses a tiered approach: + +### When metadata is sent + +| Scenario | Metadata sent? | Username included? | +|---|---|---| +| `ctrld start` with `--cd-org` (provisioning via `cdUIDFromProvToken`) | ✅ Full | ✅ Yes | +| `ctrld run` startup (config validation / processCDFlags) | ✅ Lightweight | ❌ No | +| Runtime config reload (`doReloadApiConfig`) | ✅ Lightweight | ❌ No | +| Runtime self-uninstall check | ✅ Lightweight | ❌ No | +| Runtime deactivation pin refresh | ✅ Lightweight | ❌ No | + +Username is only collected and sent once — during initial provisioning via `cdUIDFromProvToken()`. All other API calls use `SystemMetadataRuntime()` which omits username discovery entirely. + +### Runtime metadata (`SystemMetadataRuntime`) + +Runtime API calls (config reload, self-uninstall check, deactivation pin refresh) use `SystemMetadataRuntime()` which includes OS and chassis info but **skips username discovery**. This avoids: + +- **EDR false positives**: Repeated user enumeration (registry scans, WTS queries, loginctl calls) can trigger endpoint detection and response alerts +- **Unnecessary work**: Username is unlikely to change while the service is running + +## Migration Notes + +The previous `currentLoginUser()` function has been replaced by `DiscoverMainUser()` with these changes: + +- **Removed dependencies**: No longer uses `logname(1)`, environment variables as primary detection +- **Added platform specificity**: Separate files for each OS with optimized detection logic +- **Improved RMM compatibility**: Designed specifically for service/daemon contexts +- **Maintained compatibility**: Returns same format (string username or "unknown") + +## Future Extensions + +This architecture allows easy addition of new platforms by creating additional `discover_user_.go` files following the same interface pattern. \ No newline at end of file diff --git a/metadata.go b/metadata.go index 42bb38a6..5de110fd 100644 --- a/metadata.go +++ b/metadata.go @@ -2,11 +2,6 @@ package ctrld import ( "context" - "os" - "os/exec" - "os/user" - "runtime" - "strings" "github.com/cuonglm/osinfo" @@ -27,8 +22,21 @@ var ( chassisVendor string ) -// SystemMetadata collects system and user-related SystemMetadata and returns it as a map. +// SystemMetadata collects full system metadata including username discovery. +// Use for initial provisioning and first-run config validation where full +// device identification is needed. func SystemMetadata(ctx context.Context) map[string]string { + return systemMetadata(ctx, true) +} + +// SystemMetadataRuntime collects system metadata without username discovery. +// Use for runtime API calls (config reload, self-uninstall check, deactivation +// pin refresh) to avoid repeated user enumeration that can trigger EDR alerts. +func SystemMetadataRuntime(ctx context.Context) map[string]string { + return systemMetadata(ctx, false) +} + +func systemMetadata(ctx context.Context, includeUsername bool) map[string]string { m := make(map[string]string) oi := osinfo.New() m[metadataOsKey] = oi.String() @@ -39,7 +47,9 @@ func SystemMetadata(ctx context.Context) map[string]string { } m[metadataChassisTypeKey] = chassisType m[metadataChassisVendorKey] = chassisVendor - m[metadataUsernameKey] = currentLoginUser(ctx) + if includeUsername { + m[metadataUsernameKey] = DiscoverMainUser(ctx) + } m[metadataDomainOrWorkgroupKey] = partOfDomainOrWorkgroup(ctx) domain, err := system.GetActiveDirectoryDomain() if err != nil { @@ -49,45 +59,3 @@ func SystemMetadata(ctx context.Context) map[string]string { return m } - -// currentLoginUser attempts to find the actual login user, even if the process is running as root. -func currentLoginUser(ctx context.Context) string { - // On Darwin 26.2+, sudo no longer preserves SUDO_USER, LOGNAME, USER etc., so we cannot - // rely on environment variables when running under sudo. See CVE-2025-43416. - // We use the logname(1) command on Unix, which reports the login name from the session - // (e.g. utmp); there is no portable syscall equivalent in Go, so we exec logname. - if runtime.GOOS != "windows" { - if name := runLogname(ctx); name != "" { - return name - } - } - - // Fallback: env vars (still set on older systems or when not using sudo) - if u := os.Getenv("SUDO_USER"); u != "" { - return u - } - if u := os.Getenv("LOGNAME"); u != "" { - return u - } - if u := os.Getenv("USER"); u != "" { - return u - } - - currentUser, err := user.Current() - if err != nil { - ProxyLogger.Load().Debug().Err(err).Msg("Failed to get current user") - return "unknown" - } - return currentUser.Username -} - -// runLogname runs the logname(1) command and returns the trimmed output, or "" on failure. -func runLogname(ctx context.Context) string { - cmd := exec.CommandContext(ctx, "logname") - out, err := cmd.Output() - if err != nil { - ProxyLogger.Load().Debug().Err(err).Msg("Failed to run logname") - return "" - } - return strings.TrimSpace(string(out)) -} From 9be15aeec8a815f68b1443822d8b5ddfa635df4a Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Tue, 3 Mar 2026 15:11:29 +0700 Subject: [PATCH 21/22] fix(windows): make staticcheck happy --- cmd/cli/dns_intercept_windows.go | 35 -------------------------------- discover_user_windows.go | 2 -- 2 files changed, 37 deletions(-) diff --git a/cmd/cli/dns_intercept_windows.go b/cmd/cli/dns_intercept_windows.go index 5a07d46c..fb567823 100644 --- a/cmd/cli/dns_intercept_windows.go +++ b/cmd/cli/dns_intercept_windows.go @@ -1062,36 +1062,6 @@ func (p *prog) stopDNSIntercept() error { return nil } -// dnsInterceptSupported reports whether DNS intercept mode is supported on this platform. -func dnsInterceptSupported() bool { - if err := fwpuclntDLL.Load(); err != nil { - return false - } - return true -} - -// validateDNSIntercept checks that the system meets requirements for DNS intercept mode. -func (p *prog) validateDNSIntercept() error { - // Hard mode requires WFP and elevation for filter management. - if hardIntercept { - if !dnsInterceptSupported() { - return fmt.Errorf("dns intercept: fwpuclnt.dll not available — WFP requires Windows Vista or later") - } - if !isElevated() { - return fmt.Errorf("dns intercept: administrator privileges required for WFP filter management in hard mode") - } - } - // dns mode only needs NRPT (HKLM registry writes), which services can do - // without explicit elevation checks. - return nil -} - -// isElevated checks if the current process has administrator privileges. -func isElevated() bool { - token := windows.GetCurrentProcessToken() - return token.IsElevated() -} - // exemptVPNDNSServers updates the WFP filters to permit outbound DNS to the given // VPN DNS server IPs. This prevents the block filters from intercepting ctrld's own // forwarded queries to VPN DNS servers (split DNS routing). @@ -1319,11 +1289,6 @@ func (p *prog) ensurePFAnchorActive() bool { return false } -// pfAnchorIsWiped is a no-op on Windows (WFP handles intercept differently). -func (p *prog) pfAnchorIsWiped() bool { - return false -} - // checkTunnelInterfaceChanges is a no-op on Windows (WFP handles intercept differently). func (p *prog) checkTunnelInterfaceChanges() bool { return false diff --git a/discover_user_windows.go b/discover_user_windows.go index 3e5db11e..d4dcd1e9 100644 --- a/discover_user_windows.go +++ b/discover_user_windows.go @@ -14,9 +14,7 @@ import ( ) var ( - kernel32 = windows.NewLazySystemDLL("kernel32.dll") wtsapi32 = windows.NewLazySystemDLL("wtsapi32.dll") - procGetConsoleWindow = kernel32.NewProc("GetConsoleWindow") procWTSGetActiveConsoleSessionId = wtsapi32.NewProc("WTSGetActiveConsoleSessionId") procWTSQuerySessionInformation = wtsapi32.NewProc("WTSQuerySessionInformationW") procWTSFreeMemory = wtsapi32.NewProc("WTSFreeMemory") From fe08f00746e3b7a91983cf8a1d77a5396e96ed44 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Tue, 3 Mar 2026 15:36:46 +0700 Subject: [PATCH 22/22] fix(darwin): correct pf rules tests --- cmd/cli/dns_intercept_darwin.go | 24 ------------ cmd/cli/dns_intercept_darwin_test.go | 56 ++++++++++++++++++---------- 2 files changed, 36 insertions(+), 44 deletions(-) diff --git a/cmd/cli/dns_intercept_darwin.go b/cmd/cli/dns_intercept_darwin.go index 95fc8a09..b761b3ab 100644 --- a/cmd/cli/dns_intercept_darwin.go +++ b/cmd/cli/dns_intercept_darwin.go @@ -1229,30 +1229,6 @@ func stringSlicesEqual(a, b []string) bool { return true } -// pfAnchorIsWiped checks if our pf anchor references have been removed from the -// running ruleset. This is a read-only check — it does NOT attempt to restore. -// Used to distinguish VPNs that wipe pf (Windscribe) from those that don't (Tailscale). -func (p *prog) pfAnchorIsWiped() bool { - natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) - rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) - anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) - - natOut, err := exec.Command("pfctl", "-sn").CombinedOutput() - if err != nil { - return true // Can't check — assume wiped (safer) - } - natStr := string(natOut) - if !strings.Contains(natStr, rdrAnchorRef) || !strings.Contains(natStr, natAnchorRef) { - return true - } - - filterOut, err := exec.Command("pfctl", "-sr").CombinedOutput() - if err != nil { - return true - } - return !strings.Contains(string(filterOut), anchorRef) -} - // pfStartStabilization enters stabilization mode, suppressing all pf restores // until the VPN's ruleset stops changing. This prevents a death spiral where // ctrld and the VPN repeatedly overwrite each other's pf rules. diff --git a/cmd/cli/dns_intercept_darwin_test.go b/cmd/cli/dns_intercept_darwin_test.go index 822f2c5d..d0834d7f 100644 --- a/cmd/cli/dns_intercept_darwin_test.go +++ b/cmd/cli/dns_intercept_darwin_test.go @@ -5,6 +5,8 @@ package cli import ( "strings" "testing" + + "github.com/Control-D-Inc/ctrld" ) // ============================================================================= @@ -12,13 +14,13 @@ import ( // ============================================================================= func TestPFBuildAnchorRules_Basic(t *testing.T) { - p := &prog{} + p := &prog{cfg: &ctrld.Config{Listener: map[string]*ctrld.ListenerConfig{"0": {IP: "127.0.0.1", Port: 53}}}} rules := p.buildPFAnchorRules(nil) // rdr (translation) must come before pass (filtering) - rdrIdx := strings.Index(rules, "rdr pass on lo0") - passRouteIdx := strings.Index(rules, "pass out quick on ! lo0 route-to lo0") - passInIdx := strings.Index(rules, "pass in quick on lo0") + rdrIdx := strings.Index(rules, "rdr on lo0 inet proto udp") + passRouteIdx := strings.Index(rules, "pass out quick on ! lo0 route-to lo0 inet proto udp") + passInIdx := strings.Index(rules, "pass in quick on lo0 reply-to lo0") if rdrIdx < 0 { t.Fatal("missing rdr rule") @@ -43,34 +45,46 @@ func TestPFBuildAnchorRules_Basic(t *testing.T) { } func TestPFBuildAnchorRules_WithVPNServers(t *testing.T) { - p := &prog{} - vpnServers := []string{"10.8.0.1", "10.8.0.2"} + p := &prog{cfg: &ctrld.Config{Listener: map[string]*ctrld.ListenerConfig{"0": {IP: "127.0.0.1", Port: 53}}}} + vpnServers := []vpnDNSExemption{ + {Server: "10.8.0.1"}, + {Server: "10.8.0.2"}, + } rules := p.buildPFAnchorRules(vpnServers) // VPN exemption rules must appear for _, s := range vpnServers { - if !strings.Contains(rules, s) { - t.Errorf("missing VPN exemption for %s", s) + if !strings.Contains(rules, s.Server) { + t.Errorf("missing VPN exemption for %s", s.Server) } } // VPN exemptions must come before route-to - exemptIdx := strings.Index(rules, "10.8.0.1") - routeIdx := strings.Index(rules, "route-to lo0") + exemptIdx := strings.Index(rules, "10.8.0.1 port 53 group") + routeIdx := strings.Index(rules, "pass out quick on ! lo0 route-to lo0 inet proto udp") + if exemptIdx < 0 { + t.Fatal("missing VPN exemption rule for 10.8.0.1") + } + if routeIdx < 0 { + t.Fatal("missing route-to rule") + } if exemptIdx >= routeIdx { t.Error("VPN exemptions must come before route-to rules") } } func TestPFBuildAnchorRules_IPv4AndIPv6VPN(t *testing.T) { - p := &prog{} - vpnServers := []string{"10.8.0.1", "fd00::1"} + p := &prog{cfg: &ctrld.Config{Listener: map[string]*ctrld.ListenerConfig{"0": {IP: "127.0.0.1", Port: 53}}}} + vpnServers := []vpnDNSExemption{ + {Server: "10.8.0.1"}, + {Server: "fd00::1"}, + } rules := p.buildPFAnchorRules(vpnServers) // IPv4 server should use "inet" lines := strings.Split(rules, "\n") for _, line := range lines { - if strings.Contains(line, "10.8.0.1") { + if strings.Contains(line, "10.8.0.1") && strings.HasPrefix(line, "pass") { if !strings.Contains(line, "inet ") { t.Error("IPv4 VPN server rule should contain 'inet'") } @@ -78,7 +92,7 @@ func TestPFBuildAnchorRules_IPv4AndIPv6VPN(t *testing.T) { t.Error("IPv4 VPN server rule should not contain 'inet6'") } } - if strings.Contains(line, "fd00::1") { + if strings.Contains(line, "fd00::1") && strings.HasPrefix(line, "pass") { if !strings.Contains(line, "inet6") { t.Error("IPv6 VPN server rule should contain 'inet6'") } @@ -87,15 +101,17 @@ func TestPFBuildAnchorRules_IPv4AndIPv6VPN(t *testing.T) { } func TestPFBuildAnchorRules_Ordering(t *testing.T) { - p := &prog{} - vpnServers := []string{"10.8.0.1"} + p := &prog{cfg: &ctrld.Config{Listener: map[string]*ctrld.ListenerConfig{"0": {IP: "127.0.0.1", Port: 53}}}} + vpnServers := []vpnDNSExemption{ + {Server: "10.8.0.1"}, + } rules := p.buildPFAnchorRules(vpnServers) // Verify ordering: rdr → exemptions → route-to → pass in on lo0 - rdrIdx := strings.Index(rules, "rdr pass on lo0") - exemptIdx := strings.Index(rules, "pass out quick on ! lo0 inet proto { udp, tcp } from any to 10.8.0.1") - routeIdx := strings.Index(rules, "pass out quick on ! lo0 route-to lo0") - passInIdx := strings.Index(rules, "pass in quick on lo0") + rdrIdx := strings.Index(rules, "rdr on lo0 inet proto udp") + exemptIdx := strings.Index(rules, "pass out quick on ! lo0 inet proto { udp, tcp } from any to 10.8.0.1 port 53 group _ctrld") + routeIdx := strings.Index(rules, "pass out quick on ! lo0 route-to lo0 inet proto udp") + passInIdx := strings.Index(rules, "pass in quick on lo0 reply-to lo0") if rdrIdx < 0 || exemptIdx < 0 || routeIdx < 0 || passInIdx < 0 { t.Fatalf("missing expected rules: rdr=%d exempt=%d route=%d passIn=%d", rdrIdx, exemptIdx, routeIdx, passInIdx)