diff --git a/deployments/k8s-sidecar/base/configmap.yaml b/deployments/k8s-sidecar/base/configmap.yaml index bf92871..221e7e6 100644 --- a/deployments/k8s-sidecar/base/configmap.yaml +++ b/deployments/k8s-sidecar/base/configmap.yaml @@ -109,16 +109,15 @@ data: } membership { - service-name "drl" + service-name "drl-headless.drl.svc.cluster.local" port 7946 bind-addr "0.0.0.0" - join-addr "drl-headless.drl.svc.cluster.local" startup-delay "3s" } logging { - level "info" - format "json" + level "debug" + format "text" } internal-api { diff --git a/deployments/k8s-sidecar/base/deployment.yaml b/deployments/k8s-sidecar/base/deployment.yaml index dde2ed7..255f1dc 100644 --- a/deployments/k8s-sidecar/base/deployment.yaml +++ b/deployments/k8s-sidecar/base/deployment.yaml @@ -15,13 +15,6 @@ spec: labels: app: echo-server spec: - # Ensure DRL sidecar is ready before Envoy (init container approach avoids - # Kubernetes sidecar ordering limitations on older clusters). - initContainers: - - name: wait-for-drl - image: busybox:1.36 - command: ["sh", "-c", "until wget -q --spider http://127.0.0.1:9091/health; do echo 'waiting for DRL...'; sleep 2; done"] - containers: # ── echo-server ──────────────────────────────────────────────────────── - name: echo-server @@ -63,8 +56,8 @@ spec: # ── drl sidecar ──────────────────────────────────────────────────────── - name: drl - image: ghcr.io/your-org/drl:latest # replace with your image - args: ["serve"] + image: ghcr.io/gchiesa/drl:latest + args: [] env: - name: DRL_CONFIG_PATH value: /etc/drl/config.kdl diff --git a/deployments/k8s-sidecar/base/kustomization.yaml b/deployments/k8s-sidecar/base/kustomization.yaml index 0d74223..fddb55a 100644 --- a/deployments/k8s-sidecar/base/kustomization.yaml +++ b/deployments/k8s-sidecar/base/kustomization.yaml @@ -4,14 +4,13 @@ kind: Kustomization namespace: drl resources: - - namespace.yaml - configmap.yaml - deployment.yaml - service.yaml # Replace with your actual DRL image images: - - name: ghcr.io/your-org/drl + - name: ghcr.io/gchiesa/drl newTag: latest # Generate the DRL secrets from a local .env file (for local dev only). diff --git a/deployments/k8s-sidecar/base/namespace.yaml b/deployments/k8s-sidecar/base/namespace.yaml deleted file mode 100644 index 62ab8d9..0000000 --- a/deployments/k8s-sidecar/base/namespace.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: drl - labels: - app.kubernetes.io/managed-by: kustomize diff --git a/internal/membership/membership.go b/internal/membership/membership.go index a37fe2d..a2e599e 100644 --- a/internal/membership/membership.go +++ b/internal/membership/membership.go @@ -1,6 +1,7 @@ package membership import ( + "context" "fmt" "log/slog" "net" @@ -212,19 +213,61 @@ func (c *Cluster) markReady() { } } -// discoverPeers resolves the discovery service name to get peer IPs +// privateIPv4Nets are the RFC 1918 ranges plus link-local. Pod IPs in any +// Kubernetes CNI will always fall inside one of these; public addresses +// (e.g. AWS Global Accelerator) never will. +var privateIPv4Nets = func() []*net.IPNet { + var nets []*net.IPNet + for _, cidr := range []string{ + "10.0.0.0/8", + "172.16.0.0/12", + "192.168.0.0/16", + "169.254.0.0/16", + } { + _, n, _ := net.ParseCIDR(cidr) + nets = append(nets, n) + } + return nets +}() + +// discoverPeers resolves the discovery service name to get peer IPs. +// +// net.LookupIP is intentionally avoided: with CGO_ENABLED=0 the pure-Go +// resolver does not always apply /etc/resolv.conf search domains and ndots +// in the same order as the system libc resolver, which can cause the name to +// be resolved by an upstream public DNS server instead of the cluster DNS, +// returning unexpected public IPs. +// +// Using net.DefaultResolver.LookupIPAddr with a context ensures the pure-Go +// resolver path is taken with full resolv.conf semantics. Results are also +// filtered to RFC 1918 private ranges so a misconfigured DNS can never inject +// a public address into the peer list. func (c *Cluster) discoverPeers() ([]string, error) { - ips, err := net.LookupIP(c.config.Membership.ServiceName) + addrs, err := net.DefaultResolver.LookupIPAddr(context.Background(), c.config.Membership.ServiceName) if err != nil { return nil, fmt.Errorf("DNS lookup failed: %w", err) } var peers []string - for _, ip := range ips { - // Only use IPv4 addresses - if ip.To4() != nil { - peers = append(peers, ip.String()) + for _, addr := range addrs { + ip := addr.IP.To4() + if ip == nil { + continue // skip IPv6 + } + private := false + for _, n := range privateIPv4Nets { + if n.Contains(ip) { + private = true + break + } + } + if !private { + c.logger.Warn("discoverPeers: ignoring non-private IP returned by DNS", + slog.String("service", c.config.Membership.ServiceName), + slog.String("ip", ip.String())) + continue } + peers = append(peers, ip.String()) } return peers, nil