diff --git a/README.md b/README.md index 5b048ca8..312352ce 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ docker build -t controldns/ctrld . -f docker/Dockerfile # Usage -The cli is self documenting, so free free to run `--help` on any sub-command to get specific usages. +The cli is self documenting, so feel free to run `--help` on any sub-command to get specific usages. ## Arguments ``` @@ -266,5 +266,67 @@ The above will start a foreground process and: - Excluding `*.company.int` and `very-secure.local` matching queries, that are forwarded to `10.0.10.1:53` - Write a debug log to `/path/to/log.log` +## DNS Intercept Mode +When running `ctrld` alongside VPN software, DNS conflicts can cause intermittent failures, bypassed filtering, or configuration loops. DNS Intercept Mode prevents these issues by transparently capturing all DNS traffic on the system and routing it through `ctrld`, without modifying network adapter DNS settings. + +### When to Use +Enable DNS Intercept Mode if you: +- Use corporate VPN software (F5, Cisco AnyConnect, Palo Alto GlobalProtect, Zscaler) +- Run overlay networks like Tailscale or WireGuard +- Experience random DNS failures when VPN connects/disconnects +- See gaps in your Control D analytics when VPN is active +- Have endpoint security software that also manages DNS + +### Command + +Windows (Admin Shell) +```shell +ctrld.exe start --intercept-mode dns --cd RESOLVER_ID_HERE +``` + +macOS +```shell +sudo ctrld start --intercept-mode dns --cd RESOLVER_ID_HERE +``` + +`--intercept-mode dns` automatically detects VPN internal domains and routes them to the VPN's DNS server, while Control D handles everything else. + +To disable intercept mode on a service that already has it enabled: + +Windows (Admin Shell) +```shell +ctrld.exe start --intercept-mode off +``` + +macOS +```shell +sudo ctrld start --intercept-mode off +``` + +This removes the intercept rules and reverts to standard interface-based DNS configuration. + +### Platform Support +| Platform | Supported | Mechanism | +|----------|-----------|-----------| +| Windows | ✅ | NRPT (Name Resolution Policy Table) | +| macOS | ✅ | pf (packet filter) redirect | +| Linux | ❌ | Not currently supported | + +### Features +- **VPN split routing** — VPN-specific domains are automatically detected and forwarded to the VPN's DNS server +- **Captive portal recovery** — Wi-Fi login pages (hotels, airports, coffee shops) work automatically +- **No network adapter changes** — DNS settings stay untouched, eliminating conflicts entirely +- **Automatic port 53 conflict resolution** — if another process (e.g., `mDNSResponder` on macOS) is already using port 53, `ctrld` automatically listens on a different port. OS-level packet interception redirects all DNS traffic to `ctrld` transparently, so no manual configuration is needed. This only applies to intercept mode. + +### Tested VPN Software +- F5 BIG-IP APM +- Cisco AnyConnect +- Palo Alto GlobalProtect +- Tailscale (including Exit Nodes) +- Windscribe +- WireGuard + +For more details, see the [DNS Intercept Mode documentation](https://docs.controld.com/docs/dns-intercept). + ## Contributing See [Contribution Guideline](./docs/contributing.md) diff --git a/cmd/cli/commands.go b/cmd/cli/commands.go index eaee8129..a03745fb 100644 --- a/cmd/cli/commands.go +++ b/cmd/cli/commands.go @@ -11,12 +11,14 @@ import ( "net/http" "os" "os/exec" + "os/signal" "path/filepath" "runtime" "slices" "sort" "strconv" "strings" + "syscall" "time" "github.com/docker/go-units" @@ -146,6 +148,88 @@ func initLogCmd() *cobra.Command { fmt.Println(logs.Data) }, } + var tailLines int + logTailCmd := &cobra.Command{ + Use: "tail", + Short: "Tail live runtime debug logs", + Long: "Stream live runtime debug logs to the terminal, similar to tail -f. Press Ctrl+C to stop.", + Args: cobra.NoArgs, + PreRun: func(cmd *cobra.Command, args []string) { + checkHasElevatedPrivilege() + }, + Run: func(cmd *cobra.Command, args []string) { + + p := &prog{router: router.New(&cfg, false)} + s, _ := newService(p, svcConfig) + + status, err := s.Status() + if errors.Is(err, service.ErrNotInstalled) { + mainLog.Load().Warn().Msg("service not installed") + return + } + if status == service.StatusStopped { + mainLog.Load().Warn().Msg("service is not running") + return + } + + dir, err := socketDir() + if err != nil { + mainLog.Load().Fatal().Err(err).Msg("failed to find ctrld home dir") + } + cc := newControlClient(filepath.Join(dir, ctrldControlUnixSock)) + tailPath := fmt.Sprintf("%s?lines=%d", tailLogsPath, tailLines) + resp, err := cc.postStream(tailPath, nil) + if err != nil { + mainLog.Load().Fatal().Err(err).Msg("failed to connect for log tailing") + } + defer resp.Body.Close() + + switch resp.StatusCode { + case http.StatusMovedPermanently: + warnRuntimeLoggingNotEnabled() + return + case http.StatusOK: + default: + mainLog.Load().Fatal().Msgf("unexpected response status: %d", resp.StatusCode) + return + } + + // Set up signal handling for clean shutdown. + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer stop() + + done := make(chan struct{}) + go func() { + defer close(done) + // Stream output to stdout. + buf := make([]byte, 4096) + for { + n, readErr := resp.Body.Read(buf) + if n > 0 { + os.Stdout.Write(buf[:n]) + } + if readErr != nil { + if readErr != io.EOF { + mainLog.Load().Error().Err(readErr).Msg("error reading log stream") + } + return + } + } + }() + + select { + case <-ctx.Done(): + if errors.Is(ctx.Err(), context.Canceled) { + msg := fmt.Sprintf("\nexiting: %s\n", context.Cause(ctx).Error()) + os.Stdout.WriteString(msg) + } + case <-done: + } + + }, + } + logTailCmd.Flags().IntVarP(&tailLines, "lines", "n", 10, "Number of historical lines to show on connect") + logCmd := &cobra.Command{ Use: "log", Short: "Manage runtime debug logs", @@ -156,6 +240,7 @@ func initLogCmd() *cobra.Command { } logCmd.AddCommand(logSendCmd) logCmd.AddCommand(logViewCmd) + logCmd.AddCommand(logTailCmd) rootCmd.AddCommand(logCmd) return logCmd diff --git a/cmd/cli/control_client.go b/cmd/cli/control_client.go index 7382d4e8..0e3f90f0 100644 --- a/cmd/cli/control_client.go +++ b/cmd/cli/control_client.go @@ -32,6 +32,12 @@ func (c *controlClient) post(path string, data io.Reader) (*http.Response, error return c.c.Post("http://unix"+path, contentTypeJson, data) } +// postStream sends a POST request with no timeout, suitable for long-lived streaming connections. +func (c *controlClient) postStream(path string, data io.Reader) (*http.Response, error) { + c.c.Timeout = 0 + return c.c.Post("http://unix"+path, contentTypeJson, data) +} + // deactivationRequest represents request for validating deactivation pin. type deactivationRequest struct { Pin int64 `json:"pin"` diff --git a/cmd/cli/control_server.go b/cmd/cli/control_server.go index 064e7fe5..6f4388a1 100644 --- a/cmd/cli/control_server.go +++ b/cmd/cli/control_server.go @@ -10,6 +10,7 @@ import ( "os" "reflect" "sort" + "strconv" "time" "github.com/kardianos/service" @@ -29,6 +30,7 @@ const ( ifacePath = "/iface" viewLogsPath = "/log/view" sendLogsPath = "/log/send" + tailLogsPath = "/log/tail" ) type ifaceResponse struct { @@ -344,6 +346,170 @@ func (p *prog) registerControlServerHandler() { } p.internalLogSent = time.Now() })) + p.cs.register(tailLogsPath, http.HandlerFunc(func(w http.ResponseWriter, request *http.Request) { + flusher, ok := w.(http.Flusher) + if !ok { + http.Error(w, "streaming unsupported", http.StatusInternalServerError) + return + } + + // Determine logging mode and validate before starting the stream. + var lw *logWriter + useInternalLog := p.needInternalLogging() + if useInternalLog { + p.mu.Lock() + lw = p.internalLogWriter + p.mu.Unlock() + if lw == nil { + w.WriteHeader(http.StatusMovedPermanently) + return + } + } else if p.cfg.Service.LogPath == "" { + // No logging configured at all. + w.WriteHeader(http.StatusMovedPermanently) + return + } + + // Parse optional "lines" query param for initial context. + numLines := 10 + if v := request.URL.Query().Get("lines"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n >= 0 { + numLines = n + } + } + + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + w.Header().Set("Transfer-Encoding", "chunked") + w.Header().Set("X-Content-Type-Options", "nosniff") + w.WriteHeader(http.StatusOK) + + if useInternalLog { + // Internal logging mode: subscribe to the logWriter. + + // Send last N lines as initial context. + if numLines > 0 { + if tail := lw.tailLastLines(numLines); len(tail) > 0 { + w.Write(tail) + flusher.Flush() + } + } + + ch, unsub := lw.Subscribe() + defer unsub() + for { + select { + case data, ok := <-ch: + if !ok { + return + } + if _, err := w.Write(data); err != nil { + return + } + flusher.Flush() + case <-request.Context().Done(): + return + } + } + } else { + // File-based logging mode: tail the log file. + logFile := normalizeLogFilePath(p.cfg.Service.LogPath) + f, err := os.Open(logFile) + if err != nil { + // Already committed 200, just return. + return + } + defer f.Close() + + // Seek to show last N lines. + if numLines > 0 { + if tail := tailFileLastLines(f, numLines); len(tail) > 0 { + w.Write(tail) + flusher.Flush() + } + } else { + // Seek to end. + f.Seek(0, io.SeekEnd) + } + + // Poll for new data. + buf := make([]byte, 4096) + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + for { + select { + case <-ticker.C: + n, err := f.Read(buf) + if n > 0 { + if _, werr := w.Write(buf[:n]); werr != nil { + return + } + flusher.Flush() + } + if err != nil && err != io.EOF { + return + } + case <-request.Context().Done(): + return + } + } + } + })) +} + +// tailFileLastLines reads the last n lines from a file and returns them. +// The file position is left at the end of the file after this call. +func tailFileLastLines(f *os.File, n int) []byte { + stat, err := f.Stat() + if err != nil || stat.Size() == 0 { + return nil + } + + // Read from the end in chunks to find the last n lines. + const chunkSize = 4096 + fileSize := stat.Size() + var lines []byte + offset := fileSize + count := 0 + + for offset > 0 && count <= n { + readSize := int64(chunkSize) + if readSize > offset { + readSize = offset + } + offset -= readSize + buf := make([]byte, readSize) + nRead, err := f.ReadAt(buf, offset) + if err != nil && err != io.EOF { + break + } + buf = buf[:nRead] + lines = append(buf, lines...) + + // Count newlines in this chunk. + for _, b := range buf { + if b == '\n' { + count++ + } + } + } + + // Trim to last n lines. + idx := 0 + nlCount := 0 + for i := len(lines) - 1; i >= 0; i-- { + if lines[i] == '\n' { + nlCount++ + if nlCount == n+1 { + idx = i + 1 + break + } + } + } + lines = lines[idx:] + + // Seek to end of file for subsequent reads. + f.Seek(0, io.SeekEnd) + return lines } func jsonResponse(next http.Handler) http.Handler { diff --git a/cmd/cli/dns_intercept_darwin.go b/cmd/cli/dns_intercept_darwin.go index b761b3ab..62fc73f6 100644 --- a/cmd/cli/dns_intercept_darwin.go +++ b/cmd/cli/dns_intercept_darwin.go @@ -321,14 +321,13 @@ func (p *prog) startDNSIntercept() error { // options → normalization (scrub) → queueing → translation (nat/rdr) → filtering (pass/block/anchor) // // "pfctl -sr" returns BOTH scrub-anchor (normalization) AND anchor/pass/block (filter) rules. -// "pfctl -sn" returns nat-anchor AND rdr-anchor (translation) rules. +// "pfctl -sn" returns rdr-anchor (translation) rules. // Both commands emit "No ALTQ support in kernel" warnings on stderr. // // We must reassemble in correct order: scrub → nat/rdr → filter. // // The anchor reference does not survive a reboot, but ctrld re-adds it on every start. func (p *prog) ensurePFAnchorReference() error { - natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) @@ -347,11 +346,10 @@ func (p *prog) ensurePFAnchorReference() error { natLines := pfFilterRuleLines(string(natOut)) filterLines := pfFilterRuleLines(string(filterOut)) - hasNatAnchor := pfContainsRule(natLines, natAnchorRef) hasRdrAnchor := pfContainsRule(natLines, rdrAnchorRef) hasAnchor := pfContainsRule(filterLines, anchorRef) - if hasNatAnchor && hasRdrAnchor && hasAnchor { + if hasRdrAnchor && hasAnchor { // Verify anchor ordering: our anchor should appear before other anchors // for reliable DNS interception priority. Log a warning if out of order, // but don't force a reload (the interface-specific rules in our anchor @@ -380,15 +378,8 @@ func (p *prog) ensurePFAnchorReference() error { // rules in whichever anchor appears first win. By prepending, our DNS // intercept rules match port 53 traffic before a VPN app's broader // "pass out quick on all" rules in their anchor. - if !hasNatAnchor || !hasRdrAnchor { - var newRefs []string - if !hasNatAnchor { - newRefs = append(newRefs, natAnchorRef) - } - if !hasRdrAnchor { - newRefs = append(newRefs, rdrAnchorRef) - } - natLines = append(newRefs, natLines...) + if !hasRdrAnchor { + natLines = append([]string{rdrAnchorRef}, natLines...) } if !hasAnchor { pureFilterLines = append([]string{anchorRef}, pureFilterLines...) @@ -590,7 +581,6 @@ func (p *prog) stopDNSIntercept() error { // The anchor itself is already flushed by stopDNSIntercept, so even if removal // fails, the empty anchor is a no-op. func (p *prog) removePFAnchorReference() error { - natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) @@ -609,7 +599,7 @@ func (p *prog) removePFAnchorReference() error { var cleanNat []string for _, line := range natLines { - if !strings.Contains(line, rdrAnchorRef) && !strings.Contains(line, natAnchorRef) { + if !strings.Contains(line, rdrAnchorRef) { cleanNat = append(cleanNat, line) } } @@ -804,23 +794,13 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { // a stateful entry that handles response routing. Using "rdr pass" would skip filter // evaluation, and its implicit state alone is insufficient for response delivery — // proven by commit 51cf029 where responses were silently dropped. - rules.WriteString("# --- Translation rules (nat + rdr) ---\n") - - // NAT source to ::1 for IPv6 DNS on loopback. macOS/BSD rejects sendmsg from - // [::1] to a global unicast IPv6 address (EINVAL), unlike IPv4 where sendmsg from - // 127.0.0.1 to local private IPs works fine. The rdr rewrites the destination but - // preserves the original source (machine's global IPv6). Without nat, ctrld cannot - // reply. pf reverses both translations on the response path. - // Note: nat must appear before rdr (pf evaluates nat first in translation phase). - listenerAddr6 := fmt.Sprintf("::1 port %d", listenerPort) - rules.WriteString("nat on lo0 inet6 proto udp from ! ::1 to ! ::1 port 53 -> ::1\n") - rules.WriteString("nat on lo0 inet6 proto tcp from ! ::1 to ! ::1 port 53 -> ::1\n") + rules.WriteString("# --- Translation rules (rdr) ---\n") rules.WriteString("# Redirect DNS on loopback to ctrld's listener.\n") rules.WriteString(fmt.Sprintf("rdr on lo0 inet proto udp from any to ! %s port 53 -> %s\n", listenerIP, listenerAddr)) rules.WriteString(fmt.Sprintf("rdr on lo0 inet proto tcp from any to ! %s port 53 -> %s\n", listenerIP, listenerAddr)) - rules.WriteString(fmt.Sprintf("rdr on lo0 inet6 proto udp from any to ! ::1 port 53 -> %s\n", listenerAddr6)) - rules.WriteString(fmt.Sprintf("rdr on lo0 inet6 proto tcp from any to ! ::1 port 53 -> %s\n\n", listenerAddr6)) + // No IPv6 rdr — IPv6 DNS is blocked at the filter level (see below). + rules.WriteString("\n") // --- Filtering rules --- rules.WriteString("# --- Filtering rules (pass) ---\n\n") @@ -982,8 +962,7 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { } rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet proto udp from any to ! %s port 53\n", iface, listenerIP)) rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet proto tcp from any to ! %s port 53\n", iface, listenerIP)) - rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet6 proto udp from any to ! ::1 port 53\n", iface)) - rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet6 proto tcp from any to ! ::1 port 53\n", iface)) + // No IPv6 route-to — IPv6 DNS is blocked, not intercepted. } rules.WriteString("\n") } @@ -1003,10 +982,14 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { rules.WriteString(fmt.Sprintf("pass out quick on ! lo0 route-to lo0 inet proto udp from any to ! %s port 53\n", listenerIP)) rules.WriteString(fmt.Sprintf("pass out quick on ! lo0 route-to lo0 inet proto tcp from any to ! %s port 53\n\n", listenerIP)) - // Force remaining outbound IPv6 DNS through loopback for interception. - rules.WriteString("# Force remaining outbound IPv6 DNS through loopback for interception.\n") - rules.WriteString("pass out quick on ! lo0 route-to lo0 inet6 proto udp from any to ! ::1 port 53\n") - rules.WriteString("pass out quick on ! lo0 route-to lo0 inet6 proto tcp from any to ! ::1 port 53\n\n") + // Block all outbound IPv6 DNS. ctrld only intercepts IPv4 DNS via the loopback + // redirect. IPv6 DNS interception on macOS is not feasible because the kernel rejects + // sendmsg from [::1] to global unicast IPv6 (EINVAL), and pf's nat-on-lo0 doesn't + // fire for route-to'd packets. Blocking forces macOS to fall back to IPv4 DNS, + // which is fully intercepted. See docs/pf-dns-intercept.md for details. + rules.WriteString("# Block outbound IPv6 DNS — ctrld intercepts IPv4 only.\n") + rules.WriteString("# macOS falls back to IPv4 DNS automatically.\n") + rules.WriteString("block out quick on ! lo0 inet6 proto { udp, tcp } from any to any port 53\n\n") // Allow route-to'd DNS packets to pass outbound on lo0. // Without this, VPN firewalls with "block drop all" (e.g., Windscribe) drop the packet @@ -1018,8 +1001,8 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { rules.WriteString("# Pass route-to'd DNS outbound on lo0 — no state to avoid bypassing rdr inbound.\n") rules.WriteString(fmt.Sprintf("pass out quick on lo0 inet proto udp from any to ! %s port 53 no state\n", listenerIP)) rules.WriteString(fmt.Sprintf("pass out quick on lo0 inet proto tcp from any to ! %s port 53 no state\n", listenerIP)) - rules.WriteString("pass out quick on lo0 inet6 proto udp from any to ! ::1 port 53 no state\n") - rules.WriteString("pass out quick on lo0 inet6 proto tcp from any to ! ::1 port 53 no state\n\n") + // No IPv6 lo0 pass — IPv6 DNS is blocked, not routed through lo0. + rules.WriteString("\n") // Allow the redirected traffic through on loopback (inbound after rdr). // @@ -1034,7 +1017,7 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { // (source 127.0.0.1 → original DNS server IP, e.g., 10.255.255.3). rules.WriteString("# Accept redirected DNS — reply-to lo0 forces response through loopback.\n") rules.WriteString(fmt.Sprintf("pass in quick on lo0 reply-to lo0 inet proto { udp, tcp } from any to %s\n", listenerAddr)) - rules.WriteString(fmt.Sprintf("pass in quick on lo0 reply-to lo0 inet6 proto { udp, tcp } from any to %s\n", listenerAddr6)) + // No IPv6 pass-in — IPv6 DNS is blocked, not redirected to [::1]. return rules.String() } @@ -1043,12 +1026,11 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { // It verifies both the anchor references in the main ruleset and the rules within // our anchor. Failures are logged at ERROR level to make them impossible to miss. func (p *prog) verifyPFState() { - natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) verified := true - // Check main ruleset for anchor references (nat-anchor + rdr-anchor in translation rules). + // Check main ruleset for anchor references (rdr-anchor in translation rules). natOut, err := exec.Command("pfctl", "-sn").CombinedOutput() if err != nil { mainLog.Load().Error().Err(err).Msg("DNS intercept: VERIFICATION FAILED — could not dump NAT rules") @@ -1059,10 +1041,6 @@ func (p *prog) verifyPFState() { mainLog.Load().Error().Msg("DNS intercept: VERIFICATION FAILED — rdr-anchor reference missing from running NAT rules") verified = false } - if !strings.Contains(natStr, natAnchorRef) { - mainLog.Load().Error().Msg("DNS intercept: VERIFICATION FAILED — nat-anchor reference missing from running NAT rules") - verified = false - } } filterOut, err := exec.Command("pfctl", "-sr").CombinedOutput() @@ -1229,6 +1207,7 @@ func stringSlicesEqual(a, b []string) bool { return true } + // pfStartStabilization enters stabilization mode, suppressing all pf restores // until the VPN's ruleset stops changing. This prevents a death spiral where // ctrld and the VPN repeatedly overwrite each other's pf rules. @@ -1347,7 +1326,6 @@ func (p *prog) ensurePFAnchorActive() bool { } } - natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) needsRestore := false @@ -1363,10 +1341,6 @@ func (p *prog) ensurePFAnchorActive() bool { mainLog.Load().Warn().Msg("DNS intercept watchdog: rdr-anchor reference missing from running ruleset") needsRestore = true } - if !strings.Contains(natStr, natAnchorRef) { - mainLog.Load().Warn().Msg("DNS intercept watchdog: nat-anchor reference missing from running ruleset") - needsRestore = true - } if !needsRestore { filterOut, err := exec.Command("pfctl", "-sr").CombinedOutput() @@ -1762,7 +1736,6 @@ func (p *prog) pfInterceptMonitor() { // The reload is safe for VPN interop because it reassembles from the current running // ruleset (pfctl -sr/-sn), preserving all existing anchors and rules. func (p *prog) forceReloadPFMainRuleset() { - natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) @@ -1793,9 +1766,6 @@ func (p *prog) forceReloadPFMainRuleset() { } // Ensure our anchor references are present (they may have been wiped). - if !pfContainsRule(natLines, natAnchorRef) { - natLines = append([]string{natAnchorRef}, natLines...) - } if !pfContainsRule(natLines, rdrAnchorRef) { natLines = append([]string{rdrAnchorRef}, natLines...) } diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go index ac9d10b6..f1aa8103 100644 --- a/cmd/cli/dns_proxy.go +++ b/cmd/cli/dns_proxy.go @@ -903,16 +903,13 @@ func needLocalIPv6Listener(interceptMode string) bool { mainLog.Load().Debug().Msg("IPv6 listener: enabled (Windows)") return true } - // On macOS in intercept mode, pf can't redirect IPv6 DNS to an IPv4 listener (cross-AF rdr - // not supported), and blocking IPv6 DNS causes ~1s timeouts (BSD doesn't deliver ICMP errors - // to unconnected UDP sockets). Listening on [::1] lets us intercept IPv6 DNS directly. - // - // NOTE: We accept the intercept mode string as a parameter instead of reading the global - // dnsIntercept bool, because dnsIntercept is derived later in prog.run() — after the - // listener goroutines are already spawned. Same pattern as the port 5354 fallback fix (MR !860). - if (interceptMode == "dns" || interceptMode == "hard") && runtime.GOOS == "darwin" { - mainLog.Load().Debug().Msg("IPv6 listener: enabled (macOS intercept mode)") - return true + // macOS: IPv6 DNS is blocked at the pf level (not intercepted). The [::1] listener + // is not needed — macOS falls back to IPv4 DNS automatically. See #507 and + // docs/pf-dns-intercept.md for why IPv6 interception on macOS is not feasible + // (sendmsg EINVAL from ::1 to global unicast, nat-on-lo0 doesn't fire for route-to). + if runtime.GOOS == "darwin" { + mainLog.Load().Debug().Msg("IPv6 listener: not needed (macOS — IPv6 DNS blocked at pf, fallback to IPv4)") + return false } mainLog.Load().Debug().Str("os", runtime.GOOS).Str("interceptMode", interceptMode).Msg("IPv6 listener: not needed") return false diff --git a/cmd/cli/log_tail_test.go b/cmd/cli/log_tail_test.go new file mode 100644 index 00000000..37ad4110 --- /dev/null +++ b/cmd/cli/log_tail_test.go @@ -0,0 +1,339 @@ +package cli + +import ( + "io" + "os" + "strings" + "sync" + "testing" + "time" +) + +// ============================================================================= +// logWriter.tailLastLines tests +// ============================================================================= + +func Test_logWriter_tailLastLines_Empty(t *testing.T) { + lw := newLogWriterWithSize(4096) + if got := lw.tailLastLines(10); got != nil { + t.Fatalf("expected nil for empty buffer, got %q", got) + } +} + +func Test_logWriter_tailLastLines_ZeroLines(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("line1\nline2\n")) + if got := lw.tailLastLines(0); got != nil { + t.Fatalf("expected nil for n=0, got %q", got) + } +} + +func Test_logWriter_tailLastLines_NegativeLines(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("line1\nline2\n")) + if got := lw.tailLastLines(-1); got != nil { + t.Fatalf("expected nil for n=-1, got %q", got) + } +} + +func Test_logWriter_tailLastLines_FewerThanN(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("line1\nline2\n")) + got := string(lw.tailLastLines(10)) + want := "line1\nline2\n" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_logWriter_tailLastLines_ExactN(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("line1\nline2\nline3\n")) + got := string(lw.tailLastLines(3)) + want := "line1\nline2\nline3\n" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_logWriter_tailLastLines_MoreThanN(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("line1\nline2\nline3\nline4\nline5\n")) + got := string(lw.tailLastLines(2)) + want := "line4\nline5\n" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_logWriter_tailLastLines_NoTrailingNewline(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("line1\nline2\nline3")) + // Without trailing newline, "line3" is a partial line. + // Asking for 1 line returns the last newline-terminated line plus the partial. + got := string(lw.tailLastLines(1)) + want := "line2\nline3" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_logWriter_tailLastLines_SingleLineNoNewline(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("only line")) + got := string(lw.tailLastLines(5)) + want := "only line" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_logWriter_tailLastLines_SingleLineWithNewline(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("only line\n")) + got := string(lw.tailLastLines(1)) + want := "only line\n" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +// ============================================================================= +// logWriter.Subscribe tests +// ============================================================================= + +func Test_logWriter_Subscribe_Basic(t *testing.T) { + lw := newLogWriterWithSize(4096) + ch, unsub := lw.Subscribe() + defer unsub() + + msg := []byte("hello world\n") + lw.Write(msg) + + select { + case got := <-ch: + if string(got) != string(msg) { + t.Fatalf("got %q, want %q", got, msg) + } + case <-time.After(time.Second): + t.Fatal("timed out waiting for subscriber data") + } +} + +func Test_logWriter_Subscribe_MultipleSubscribers(t *testing.T) { + lw := newLogWriterWithSize(4096) + ch1, unsub1 := lw.Subscribe() + defer unsub1() + ch2, unsub2 := lw.Subscribe() + defer unsub2() + + msg := []byte("broadcast\n") + lw.Write(msg) + + for i, ch := range []<-chan []byte{ch1, ch2} { + select { + case got := <-ch: + if string(got) != string(msg) { + t.Fatalf("subscriber %d: got %q, want %q", i, got, msg) + } + case <-time.After(time.Second): + t.Fatalf("subscriber %d: timed out", i) + } + } +} + +func Test_logWriter_Subscribe_Unsubscribe(t *testing.T) { + lw := newLogWriterWithSize(4096) + ch, unsub := lw.Subscribe() + + // Verify subscribed. + lw.Write([]byte("before unsub\n")) + select { + case <-ch: + case <-time.After(time.Second): + t.Fatal("timed out before unsub") + } + + unsub() + + // Channel should be closed after unsub. + if _, ok := <-ch; ok { + t.Fatal("channel should be closed after unsubscribe") + } + + // Verify subscriber list is empty. + lw.mu.Lock() + count := len(lw.subscribers) + lw.mu.Unlock() + if count != 0 { + t.Fatalf("expected 0 subscribers after unsub, got %d", count) + } +} + +func Test_logWriter_Subscribe_UnsubscribeIdempotent(t *testing.T) { + lw := newLogWriterWithSize(4096) + _, unsub := lw.Subscribe() + unsub() + // Second unsub should not panic. + unsub() +} + +func Test_logWriter_Subscribe_SlowSubscriberDropped(t *testing.T) { + lw := newLogWriterWithSize(4096) + ch, unsub := lw.Subscribe() + defer unsub() + + // Fill the subscriber channel (buffer size is 256). + for i := 0; i < 300; i++ { + lw.Write([]byte("msg\n")) + } + + // Should have 256 buffered messages, rest dropped. + count := 0 + for { + select { + case <-ch: + count++ + default: + goto done + } + } +done: + if count != 256 { + t.Fatalf("expected 256 buffered messages, got %d", count) + } +} + +func Test_logWriter_Subscribe_ConcurrentWriteAndRead(t *testing.T) { + lw := newLogWriterWithSize(64 * 1024) + ch, unsub := lw.Subscribe() + defer unsub() + + const numWrites = 100 + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + for i := 0; i < numWrites; i++ { + lw.Write([]byte("concurrent write\n")) + } + }() + + received := 0 + timeout := time.After(5 * time.Second) + for received < numWrites { + select { + case <-ch: + received++ + case <-timeout: + t.Fatalf("timed out after receiving %d/%d messages", received, numWrites) + } + } + wg.Wait() +} + +// ============================================================================= +// tailFileLastLines tests +// ============================================================================= + +func writeTempFile(t *testing.T, content string) *os.File { + t.Helper() + f, err := os.CreateTemp(t.TempDir(), "tail-test-*") + if err != nil { + t.Fatal(err) + } + if _, err := f.WriteString(content); err != nil { + t.Fatal(err) + } + return f +} + +func Test_tailFileLastLines_Empty(t *testing.T) { + f := writeTempFile(t, "") + defer f.Close() + if got := tailFileLastLines(f, 10); got != nil { + t.Fatalf("expected nil for empty file, got %q", got) + } +} + +func Test_tailFileLastLines_FewerThanN(t *testing.T) { + f := writeTempFile(t, "line1\nline2\n") + defer f.Close() + got := string(tailFileLastLines(f, 10)) + want := "line1\nline2\n" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_tailFileLastLines_ExactN(t *testing.T) { + f := writeTempFile(t, "a\nb\nc\n") + defer f.Close() + got := string(tailFileLastLines(f, 3)) + want := "a\nb\nc\n" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_tailFileLastLines_MoreThanN(t *testing.T) { + f := writeTempFile(t, "line1\nline2\nline3\nline4\nline5\n") + defer f.Close() + got := string(tailFileLastLines(f, 2)) + want := "line4\nline5\n" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_tailFileLastLines_NoTrailingNewline(t *testing.T) { + f := writeTempFile(t, "line1\nline2\nline3") + defer f.Close() + // Without trailing newline, partial last line comes with the previous line. + got := string(tailFileLastLines(f, 1)) + want := "line2\nline3" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_tailFileLastLines_LargerThanChunk(t *testing.T) { + // Build content larger than the 4096 chunk size to exercise multi-chunk reads. + var sb strings.Builder + for i := 0; i < 200; i++ { + sb.WriteString(strings.Repeat("x", 50)) + sb.WriteByte('\n') + } + f := writeTempFile(t, sb.String()) + defer f.Close() + got := string(tailFileLastLines(f, 3)) + lines := strings.Split(strings.TrimRight(got, "\n"), "\n") + if len(lines) != 3 { + t.Fatalf("expected 3 lines, got %d: %q", len(lines), got) + } + expectedLine := strings.Repeat("x", 50) + for _, line := range lines { + if line != expectedLine { + t.Fatalf("unexpected line content: %q", line) + } + } +} + +func Test_tailFileLastLines_SeeksToEnd(t *testing.T) { + f := writeTempFile(t, "line1\nline2\nline3\n") + defer f.Close() + tailFileLastLines(f, 1) + + // After tailFileLastLines, file position should be at the end. + pos, err := f.Seek(0, io.SeekCurrent) + if err != nil { + t.Fatal(err) + } + stat, err := f.Stat() + if err != nil { + t.Fatal(err) + } + if pos != stat.Size() { + t.Fatalf("expected file position at end (%d), got %d", stat.Size(), pos) + } +} diff --git a/cmd/cli/log_writer.go b/cmd/cli/log_writer.go index ab6b855f..f84dcdf1 100644 --- a/cmd/cli/log_writer.go +++ b/cmd/cli/log_writer.go @@ -38,11 +38,17 @@ type logReader struct { size int64 } +// logSubscriber represents a subscriber to live log output. +type logSubscriber struct { + ch chan []byte +} + // logWriter is an internal buffer to keep track of runtime log when no logging is enabled. type logWriter struct { - mu sync.Mutex - buf bytes.Buffer - size int + mu sync.Mutex + buf bytes.Buffer + size int + subscribers []*logSubscriber } // newLogWriter creates an internal log writer. @@ -61,10 +67,70 @@ func newLogWriterWithSize(size int) *logWriter { return lw } +// Subscribe returns a channel that receives new log data as it's written, +// and an unsubscribe function to clean up when done. +func (lw *logWriter) Subscribe() (<-chan []byte, func()) { + lw.mu.Lock() + defer lw.mu.Unlock() + sub := &logSubscriber{ch: make(chan []byte, 256)} + lw.subscribers = append(lw.subscribers, sub) + unsub := func() { + lw.mu.Lock() + defer lw.mu.Unlock() + for i, s := range lw.subscribers { + if s == sub { + lw.subscribers = append(lw.subscribers[:i], lw.subscribers[i+1:]...) + close(sub.ch) + break + } + } + } + return sub.ch, unsub +} + +// tailLastLines returns the last n lines from the current buffer. +func (lw *logWriter) tailLastLines(n int) []byte { + lw.mu.Lock() + defer lw.mu.Unlock() + data := lw.buf.Bytes() + if n <= 0 || len(data) == 0 { + return nil + } + // Find the last n newlines from the end. + count := 0 + pos := len(data) + for pos > 0 { + pos-- + if data[pos] == '\n' { + count++ + if count == n+1 { + pos++ // move past this newline + break + } + } + } + result := make([]byte, len(data)-pos) + copy(result, data[pos:]) + return result +} + func (lw *logWriter) Write(p []byte) (int, error) { lw.mu.Lock() defer lw.mu.Unlock() + // Fan-out to subscribers (non-blocking). + if len(lw.subscribers) > 0 { + cp := make([]byte, len(p)) + copy(cp, p) + for _, sub := range lw.subscribers { + select { + case sub.ch <- cp: + default: + // Drop if subscriber is slow to avoid blocking the logger. + } + } + } + // If writing p causes overflows, discard old data. if lw.buf.Len()+len(p) > lw.size { buf := lw.buf.Bytes() diff --git a/cmd/cli/service_windows.go b/cmd/cli/service_windows.go index fddb0ef8..b12982cf 100644 --- a/cmd/cli/service_windows.go +++ b/cmd/cli/service_windows.go @@ -160,6 +160,7 @@ func hasLocalDnsServerRunning() bool { if e != nil { return false } + defer windows.CloseHandle(h) p := windows.ProcessEntry32{Size: processEntrySize} for { e := windows.Process32Next(h, &p) diff --git a/cmd/cli/vpn_dns.go b/cmd/cli/vpn_dns.go index 03b54786..bc4b1a13 100644 --- a/cmd/cli/vpn_dns.go +++ b/cmd/cli/vpn_dns.go @@ -2,6 +2,7 @@ package cli import ( "context" + "net" "strings" "sync" @@ -241,10 +242,12 @@ func (m *vpnDNSManager) Routes() map[string][]string { // upstreamConfigFor creates a legacy upstream configuration for the given VPN DNS server. func (m *vpnDNSManager) upstreamConfigFor(server string) *ctrld.UpstreamConfig { - endpoint := server - if !strings.Contains(server, ":") { - endpoint = server + ":53" - } + // Use net.JoinHostPort to correctly handle both IPv4 and IPv6 addresses. + // Previously, the strings.Contains(":") check would skip appending ":53" + // for IPv6 addresses (they contain colons), leaving a bare address like + // "2a0d:6fc0:9b0:3600::1" which net.Dial rejects with "too many colons". + // net.JoinHostPort produces "[2a0d:6fc0:9b0:3600::1]:53" as required. + endpoint := net.JoinHostPort(server, "53") return &ctrld.UpstreamConfig{ Name: "VPN DNS", diff --git a/docs/known-issues.md b/docs/known-issues.md index 0d13bccf..e9a897b1 100644 --- a/docs/known-issues.md +++ b/docs/known-issues.md @@ -22,6 +22,34 @@ This document outlines known issues with ctrld and their current status, workaro --- +## Merlin Issues + +### Daemon Crashing on `Ctrl+C` + +**Issue**: `ctrld` daemon terminates unexpectedly after stopping a log tailing command. This typically occurs when running the daemon and the log viewer within the same SSH session on ASUSWRT-Merlin routers. + +**Description** + +The issue is caused by `Signal Propagation` within a shared `Process Group (PGID)`. + +Steps to reproduce: + +1. You start the daemon manually: `ctrld start --cd=`. +2. You view internal logs in the same terminal: `ctrld log tail`. +3. You press `Ctrl+C` to stop viewing logs. +4. The `ctrld` daemon service stops immediately along with the log command. + +When you execute commands sequentially in a single interactive SSH session on Merlin, the shell often assigns them to the same Process Group. In Linux, the `SIGINT` signal (triggered by `Ctrl+C`) is not just sent to the foreground application, but is frequently propagated to every process belonging to that specific process group. + +Because the `ctrld` daemon remains "attached" to the terminal session's process group, it "hears" the interrupt signal intended for the `log tail` command and shuts down. + +**Workarounds**: + +To isolate the signals, avoid running the log viewer in the same window as the daemon: +* **Window A:** Start the daemon and leave it running. +* **Window B:** Open a new SSH connection to run `ctrld log tail`. +Because Window B has a different **Session ID** and **Process Group ID**, pressing `Ctrl+C` in Window B will not affect the process in Window A. + ## Contributing to Known Issues If you encounter an issue not listed here, please: diff --git a/docs/pf-dns-intercept.md b/docs/pf-dns-intercept.md index 6c19925f..f8cbb423 100644 --- a/docs/pf-dns-intercept.md +++ b/docs/pf-dns-intercept.md @@ -17,7 +17,7 @@ options (set) → normalization (scrub) → queueing → translation (nat/rdr) | Anchor Type | Section | Purpose | |-------------|---------|---------| | `scrub-anchor` | Normalization | Packet normalization | -| `nat-anchor` | Translation | NAT rules | +| `nat-anchor` | Translation | NAT rules (not used by ctrld) | | `rdr-anchor` | Translation | Redirect rules | | `anchor` | Filtering | Pass/block rules | @@ -122,69 +122,31 @@ Three problems prevent a simple "mirror the IPv4 rules" approach: 3. **sendmsg from `[::1]` to global unicast fails**: Unlike IPv4 where the kernel allows `sendmsg` from `127.0.0.1` to local private IPs (e.g., `10.x.x.x`), macOS/BSD rejects `sendmsg` from `[::1]` to a global unicast IPv6 address with `EINVAL`. Since pf's `rdr` preserves the original source IP (the machine's global IPv6 address), ctrld's reply would fail. -### Solution: nat + rdr + [::1] Listener +### Solution: Block IPv6 DNS, Fallback to IPv4 -``` -# NAT: rewrite source to ::1 so ctrld can reply -nat on lo0 inet6 proto udp from ! ::1 to ! ::1 port 53 -> ::1 -nat on lo0 inet6 proto tcp from ! ::1 to ! ::1 port 53 -> ::1 - -# RDR: redirect destination to ctrld's IPv6 listener -rdr on lo0 inet6 proto udp from any to ! ::1 port 53 -> ::1 port 53 -rdr on lo0 inet6 proto tcp from any to ! ::1 port 53 -> ::1 port 53 - -# Filter: route-to forces IPv6 DNS to loopback (mirrors IPv4 rules) -pass out quick on ! lo0 route-to lo0 inet6 proto udp from any to ! ::1 port 53 -pass out quick on ! lo0 route-to lo0 inet6 proto tcp from any to ! ::1 port 53 - -# Pass on lo0 without state (mirrors IPv4) -pass out quick on lo0 inet6 proto udp from any to ! ::1 port 53 no state -pass out quick on lo0 inet6 proto tcp from any to ! ::1 port 53 no state - -# Accept redirected IPv6 DNS with reply-to (mirrors IPv4) -pass in quick on lo0 reply-to lo0 inet6 proto { udp, tcp } from any to ::1 port 53 -``` - -### IPv6 Packet Flow +After extensive testing (#507), IPv6 DNS interception on macOS is not feasible with current pf capabilities. The solution is to block all outbound IPv6 DNS: ``` -Application queries [2607:f0c8:8000:8210::1]:53 (IPv6 DNS server) - ↓ -pf filter: "pass out route-to lo0 inet6 ... port 53" → redirects to lo0 - ↓ -pf (outbound lo0): "pass out on lo0 inet6 ... no state" → passes - ↓ -Loopback reflects packet inbound on lo0 - ↓ -pf nat: rewrites source 2607:f0c8:...:ec6e → ::1 -pf rdr: rewrites dest [2607:f0c8:8000:8210::1]:53 → [::1]:53 - ↓ -ctrld receives query from [::1]:port → [::1]:53 - ↓ -ctrld resolves via DoH, replies to [::1]:port (kernel accepts ::1 → ::1) - ↓ -pf reverses both translations: - - nat reverse: dest ::1 → 2607:f0c8:...:ec6e (original client) - - rdr reverse: src ::1 → 2607:f0c8:8000:8210::1 (original DNS server) - ↓ -Application receives response from [2607:f0c8:8000:8210::1]:53 ✓ +block out quick on ! lo0 inet6 proto { udp, tcp } from any to any port 53 ``` -### Client IP Recovery - -The `nat` rewrites the source to `::1`, so ctrld sees the client as `::1` (loopback). The existing `spoofLoopbackIpInClientInfo()` logic detects this and replaces it with the machine's real RFC1918 IPv4 address (e.g., `10.0.10.211`). This is the same mechanism used when queries arrive from `127.0.0.1` — no client identity is lost. +macOS automatically retries DNS over IPv4 when the IPv6 path is blocked. The IPv4 path is fully intercepted via the normal route-to + rdr mechanism. Impact is minimal — at most ~1s latency on the very first DNS query while the IPv6 attempt is blocked. -### IPv6 Listener +### What Was Tried and Why It Failed -The `[::1]` listener reuses the existing infrastructure from Windows (where it was added for the same reason — can't suppress IPv6 DNS resolvers from the system config). The `needLocalIPv6Listener()` function gates it, returning `true` on: -- **Windows**: Always (if IPv6 is available) -- **macOS**: Only in intercept mode +| Approach | Result | +|----------|--------| +| `nat on lo0 inet6` to rewrite source to `::1` | pf skips translation on second interface pass — nat doesn't fire for route-to'd packets arriving on lo0 | +| ULA address on lo0 (`fd00:53::1`) | Kernel rejects: `EHOSTUNREACH` — lo0's routing table is segregated from global unicast | +| Raw IPv6 socket (`SOCK_RAW` + `IPPROTO_UDP`) | Bypasses sendmsg validation, but pf doesn't match raw socket packets against rdr state — response arrives from `::1` not the original server | +| `DIOCNATLOOK` to get original dest + raw socket from that addr | Can't `bind()` to a non-local address (`EADDRNOTAVAIL`) — macOS has no `IPV6_HDRINCL` for source spoofing | +| BPF packet injection on lo0 | Theoretically possible but extremely complex — not justified for the marginal benefit | -If the `[::1]` listener fails to bind, it logs a warning and continues — the IPv4 listener is primary. - -### nat-anchor Requirement +### IPv6 Listener -The `nat` rules in our anchor require a `nat-anchor "com.controld.ctrld"` reference in the main pf ruleset, in addition to the existing `rdr-anchor` and `anchor` references. All pf management functions (inject, remove, verify, watchdog, force-reload) handle all three anchor types. +The `[::1]` listener is used on: +- **Windows**: Always (if IPv6 is available) — Windows can't easily suppress IPv6 DNS resolvers +- **macOS**: **Not used** — IPv6 DNS is blocked at pf, no listener needed ## Rule Ordering Within the Anchor @@ -236,7 +198,7 @@ The trickiest part. macOS only processes anchors declared in the active pf rules 1. Read `/etc/pf.conf` 2. If our anchor reference already exists, reload as-is -3. Otherwise, inject `nat-anchor "com.controld.ctrld"` and `rdr-anchor "com.controld.ctrld"` in the translation section and `anchor "com.controld.ctrld"` in the filter section +3. Otherwise, inject `rdr-anchor "com.controld.ctrld"` in the translation section and `anchor "com.controld.ctrld"` in the filter section 4. Write to a **temp file** and load with `pfctl -f ` 5. **We never modify `/etc/pf.conf` on disk** — changes are runtime-only and don't survive reboot (ctrld re-injects on every start) @@ -376,5 +338,8 @@ We chose `route-to + rdr` as the best balance of effectiveness and deployability 9. **`pass out quick` exemptions work with route-to** — they fire in the same phase (filter), so `quick` + rule ordering means exempted packets never hit the route-to rule 10. **pf cannot cross-AF redirect** — `rdr on lo0 inet6 ... -> 127.0.0.1` is invalid. IPv6 DNS must be handled by an `[::1]` listener. 11. **`block return` doesn't work for IPv6 DNS** — BSD doesn't deliver ICMPv6 unreachable to unconnected UDP sockets (`sendto`). Apps timeout waiting for a response that never comes. -12. **sendmsg from `::1` to global unicast fails on macOS** — unlike IPv4 where `127.0.0.1` can send to any local address, `::1` cannot send to the machine's own global IPv6 address. `nat` on lo0 is required to rewrite the source. -13. **`nat-anchor` is separate from `rdr-anchor`** — pf requires both in the main ruleset for nat and rdr rules in an anchor to be evaluated. `rdr-anchor` alone does not cover nat rules. +12. **sendmsg from `::1` to global unicast fails on macOS** — unlike IPv4 where `127.0.0.1` can send to any local address, `::1` cannot send to the machine's own global IPv6 address (`EINVAL`). This is the fundamental asymmetry that makes IPv6 DNS interception infeasible. +13. **`nat on lo0` doesn't fire for `route-to`'d packets** — pf runs translation on the original outbound interface (en0), then skips it on lo0's outbound pass. `rdr` works because lo0 inbound is a genuinely new direction. Any lo0 address (including ULAs) can't route to global unicast — the kernel segregates lo0's routing table. +14. **Raw IPv6 sockets bypass routing validation but pf doesn't match them** — `SOCK_RAW` can send from `::1` to global unicast, but pf treats raw socket packets as new connections (not matching rdr state), so reverse-translation doesn't happen. The client sees `::1` as the source, not the original DNS server. +15. **`DIOCNATLOOK` can find the original dest but you can't use it** — The ioctl returns the pre-rdr destination, but `bind()` fails with `EADDRNOTAVAIL` because it's not a local address. macOS IPv6 raw sockets don't support `IPV6_HDRINCL` for source spoofing. +16. **Blocking IPv6 DNS is the pragmatic solution** — macOS automatically retries over IPv4. The ~1s penalty on the first blocked query is negligible compared to the complexity of working around the kernel's IPv6 loopback restrictions. diff --git a/nameservers_unix.go b/nameservers_unix.go index d8e6035e..1cbad68c 100644 --- a/nameservers_unix.go +++ b/nameservers_unix.go @@ -4,6 +4,7 @@ package ctrld import ( "net" + "net/netip" "slices" "time" @@ -17,6 +18,31 @@ func currentNameserversFromResolvconf() []string { return resolvconffile.NameServers() } +// localNameservers filters a list of nameserver strings, returning only those +// that are not loopback or local machine IP addresses. +func localNameservers(nss []string, regularIPs, loopbackIPs []netip.Addr) []string { + var result []string + seen := make(map[string]bool) + + for _, ns := range nss { + if ip := net.ParseIP(ns); ip != nil { + // skip loopback and local IPs + isLocal := false + for _, v := range slices.Concat(regularIPs, loopbackIPs) { + if ip.String() == v.String() { + isLocal = true + break + } + } + if !isLocal && !seen[ip.String()] { + seen[ip.String()] = true + result = append(result, ip.String()) + } + } + } + return result +} + // dnsFromResolvConf reads usable nameservers from /etc/resolv.conf file. // A nameserver is usable if it's not one of current machine's IP addresses // and loopback IP addresses. @@ -35,24 +61,7 @@ func dnsFromResolvConf() []string { } nss := resolvconffile.NameServers() - var localDNS []string - seen := make(map[string]bool) - - for _, ns := range nss { - if ip := net.ParseIP(ns); ip != nil { - // skip loopback IPs - for _, v := range slices.Concat(regularIPs, loopbackIPs) { - ipStr := v.String() - if ip.String() == ipStr { - continue - } - } - if !seen[ip.String()] { - seen[ip.String()] = true - localDNS = append(localDNS, ip.String()) - } - } - } + localDNS := localNameservers(nss, regularIPs, loopbackIPs) // If we successfully read the file and found nameservers, return them if len(localDNS) > 0 { diff --git a/nameservers_unix_test.go b/nameservers_unix_test.go new file mode 100644 index 00000000..a771dc12 --- /dev/null +++ b/nameservers_unix_test.go @@ -0,0 +1,105 @@ +//go:build unix + +package ctrld + +import ( + "net/netip" + "testing" +) + +func Test_localNameservers(t *testing.T) { + loopbackIPs := []netip.Addr{ + netip.MustParseAddr("127.0.0.1"), + netip.MustParseAddr("::1"), + } + regularIPs := []netip.Addr{ + netip.MustParseAddr("192.168.1.100"), + netip.MustParseAddr("10.0.0.5"), + } + + tests := []struct { + name string + nss []string + regularIPs []netip.Addr + loopbackIPs []netip.Addr + want []string + }{ + { + name: "filters loopback IPv4", + nss: []string{"127.0.0.1", "8.8.8.8"}, + regularIPs: nil, + loopbackIPs: loopbackIPs, + want: []string{"8.8.8.8"}, + }, + { + name: "filters loopback IPv6", + nss: []string{"::1", "1.1.1.1"}, + regularIPs: nil, + loopbackIPs: loopbackIPs, + want: []string{"1.1.1.1"}, + }, + { + name: "filters local machine IPs", + nss: []string{"192.168.1.100", "8.8.4.4"}, + regularIPs: regularIPs, + loopbackIPs: nil, + want: []string{"8.8.4.4"}, + }, + { + name: "filters both loopback and local IPs", + nss: []string{"127.0.0.1", "192.168.1.100", "8.8.8.8"}, + regularIPs: regularIPs, + loopbackIPs: loopbackIPs, + want: []string{"8.8.8.8"}, + }, + { + name: "deduplicates results", + nss: []string{"8.8.8.8", "8.8.8.8", "1.1.1.1"}, + regularIPs: regularIPs, + loopbackIPs: loopbackIPs, + want: []string{"8.8.8.8", "1.1.1.1"}, + }, + { + name: "all filtered returns nil", + nss: []string{"127.0.0.1", "::1", "192.168.1.100"}, + regularIPs: regularIPs, + loopbackIPs: loopbackIPs, + want: nil, + }, + { + name: "empty input returns nil", + nss: nil, + regularIPs: regularIPs, + loopbackIPs: loopbackIPs, + want: nil, + }, + { + name: "skips unparseable entries", + nss: []string{"not-an-ip", "8.8.8.8"}, + regularIPs: regularIPs, + loopbackIPs: loopbackIPs, + want: []string{"8.8.8.8"}, + }, + { + name: "no local IPs filters nothing", + nss: []string{"8.8.8.8", "1.1.1.1"}, + regularIPs: nil, + loopbackIPs: nil, + want: []string{"8.8.8.8", "1.1.1.1"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := localNameservers(tt.nss, tt.regularIPs, tt.loopbackIPs) + if len(got) != len(tt.want) { + t.Fatalf("localNameservers() = %v, want %v", got, tt.want) + } + for i := range got { + if got[i] != tt.want[i] { + t.Errorf("localNameservers()[%d] = %q, want %q", i, got[i], tt.want[i]) + } + } + }) + } +}