Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion test/integration/lrp/lrp_fqdn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ func TestLRPFQDN(t *testing.T) {
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
testLRPCase(t, ctx, *selectedPod, tt.command, tt.expectedMsgContains, tt.expectedErrMsgContains, tt.shouldError, tt.countIncreases)
testLRPCase(t, ctx, *selectedPod, tt.command, tt.expectedMsgContains, tt.expectedErrMsgContains, tt.shouldError, tt.countIncreases, promAddress)
})
}
}
301 changes: 293 additions & 8 deletions test/integration/lrp/lrp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,16 @@ import (
"github.com/Azure/azure-container-networking/test/integration/prometheus"
"github.com/Azure/azure-container-networking/test/internal/kubernetes"
"github.com/Azure/azure-container-networking/test/internal/retry"
ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
ciliumClientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned"
"github.com/pkg/errors"
"github.com/stretchr/testify/require"
"golang.org/x/exp/rand"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
k8sclient "k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"sigs.k8s.io/yaml"
)

const (
Expand Down Expand Up @@ -154,7 +159,7 @@ func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) {
}

func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, clientCmd []string, expectResponse, expectErrMsg string,
shouldError, countShouldIncrease bool) {
shouldError, countShouldIncrease bool, prometheusAddress string) {

config := kubernetes.MustGetRestConfig()
cs := kubernetes.MustGetClientset()
Expand All @@ -167,9 +172,11 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
"zone": ".",
}

// curl localhost:9253/metrics
beforeMetric, err := prometheus.GetMetric(promAddress, coreDNSRequestCountTotal, metricLabels)
// curl to the specified prometheus address
beforeMetric, err := prometheus.GetMetric(prometheusAddress, coreDNSRequestCountTotal, metricLabels)
require.NoError(t, err)
beforeValue := beforeMetric.GetCounter().GetValue()
t.Logf("Before DNS request - metric count: %.0f", beforeValue)

t.Log("calling command from client")

Expand All @@ -187,13 +194,15 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
time.Sleep(500 * time.Millisecond)

// curl again and see count diff
afterMetric, err := prometheus.GetMetric(promAddress, coreDNSRequestCountTotal, metricLabels)
afterMetric, err := prometheus.GetMetric(prometheusAddress, coreDNSRequestCountTotal, metricLabels)
require.NoError(t, err)
afterValue := afterMetric.GetCounter().GetValue()
t.Logf("After DNS request - metric count: %.0f (diff: %.0f)", afterValue, afterValue-beforeValue)

if countShouldIncrease {
require.Greater(t, afterMetric.GetCounter().GetValue(), beforeMetric.GetCounter().GetValue(), "dns metric count did not increase after command")
require.Greater(t, afterValue, beforeValue, "dns metric count did not increase after command - before: %.0f, after: %.0f", beforeValue, afterValue)
} else {
require.Equal(t, afterMetric.GetCounter().GetValue(), beforeMetric.GetCounter().GetValue(), "dns metric count increased after command")
require.Equal(t, afterValue, beforeValue, "dns metric count increased after command - before: %.0f, after: %.0f", beforeValue, afterValue)
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The testify assertions use formatted messages with placeholders but require.Greater and require.Equal do not perform format substitution; use require.Greaterf / require.Equalf or pre-format the message with fmt.Sprintf to ensure values appear in failure output.

Suggested change
require.Equal(t, afterValue, beforeValue, "dns metric count increased after command - before: %.0f, after: %.0f", beforeValue, afterValue)
require.Equalf(t, afterValue, beforeValue, "dns metric count increased after command - before: %.0f, after: %.0f", beforeValue, afterValue)

Copilot uses AI. Check for mistakes.

}
}

Expand All @@ -210,9 +219,285 @@ func TestLRP(t *testing.T) {
defer cleanupFn()
require.NotNil(t, selectedPod)

// Get the kube-dns service IP for DNS requests
cs := kubernetes.MustGetClientset()
svc, err := kubernetes.GetService(ctx, cs, kubeSystemNamespace, dnsService)
require.NoError(t, err)
kubeDNS := svc.Spec.ClusterIP

t.Logf("LRP Test Starting...")

// Basic LRP test
testLRPCase(t, ctx, *selectedPod, []string{
"nslookup", "google.com", "10.0.0.10",
}, "", "", false, true)
"nslookup", "google.com", kubeDNS,
}, "", "", false, true, promAddress)

t.Logf("LRP Test Completed")

t.Logf("Negative LRP Test Starting")

// Run negative LRP test
testNegativeLRP(t, ctx, *selectedPod, kubeDNS)

t.Logf("Negative LRP Test Completed")
}

// testNegativeLRP performs testing of Local Redirect Policy functionality
// including pod restarts, resource recreation, and cilium command validation
// This focuses on negative testing scenarios and edge cases
func testNegativeLRP(t *testing.T, ctx context.Context, clientPod corev1.Pod, kubeDNS string) {
config := kubernetes.MustGetRestConfig()
cs := kubernetes.MustGetClientset()

// Step 1: Initial DNS test to verify LRP is working
t.Log("Step 1: Initial DNS test - verifying LRP functionality")
testLRPCase(t, ctx, clientPod, []string{
"nslookup", "google.com", kubeDNS,
}, "", "", false, true, promAddress)

// Step 2: Validate LRP using cilium commands
t.Log("Step 2: Validating LRP using cilium commands")
validateCiliumLRP(t, ctx, cs, config)

// Step 3: Restart busybox pods and verify LRP still works
t.Log("Step 3: Restarting client pods to test persistence")
restartedPod := restartClientPodsAndGetPod(t, ctx, cs, clientPod)

// Step 4: Verify metrics after restart
t.Log("Step 4: Verifying LRP functionality after pod restart")
testLRPCase(t, ctx, restartedPod, []string{
"nslookup", "google.com", kubeDNS,
}, "", "", false, true, promAddress)

// Step 5: Validate cilium commands still show LRP
t.Log("Step 5: Re-validating cilium LRP after restart")
validateCiliumLRP(t, ctx, cs, config)

// Step 6: Delete and recreate resources & restart nodelocaldns daemonset
t.Log("Step 6: Testing resource deletion and recreation")
recreatedPod := deleteAndRecreateResources(t, ctx, cs, clientPod)

// Step 7: Final verification after recreation
t.Log("Step 7: Final verification after resource recreation - skipping basic DNS test, will validate with metrics in Step 8")

// Step 8: Re-establish port forward to new node-local-dns pod and validate metrics
t.Log("Step 8: Re-establishing port forward to new node-local-dns pod for metrics validation")

// Get the new node-local-dns pod on the same node as our recreated client pod
nodeName := recreatedPod.Spec.NodeName
newNodeLocalDNSPods, err := kubernetes.GetPodsByNode(ctx, cs, kubeSystemNamespace, nodeLocalDNSLabelSelector, nodeName)
require.NoError(t, err)
require.NotEmpty(t, newNodeLocalDNSPods.Items, "No node-local-dns pod found on node %s after restart", nodeName)

newNodeLocalDNSPod := TakeOne(newNodeLocalDNSPods.Items)
t.Logf("Setting up port forward to new node-local-dns pod: %s", newNodeLocalDNSPod.Name)

// Setup new port forward to the new node-local-dns pod
newPf, err := k8s.NewPortForwarder(config, k8s.PortForwardingOpts{
Namespace: newNodeLocalDNSPod.Namespace,
PodName: newNodeLocalDNSPod.Name,
LocalPort: 9254, // Use different port to avoid conflicts
DestPort: 9253,
})
require.NoError(t, err)

newPortForwardCtx, newCancel := context.WithTimeout(ctx, (retryAttempts+1)*retryDelay)
defer newCancel()

err = defaultRetrier.Do(newPortForwardCtx, func() error {
t.Logf("attempting port forward to new node-local-dns pod %s...", newNodeLocalDNSPod.Name)
return errors.Wrap(newPf.Forward(newPortForwardCtx), "could not start port forward to new pod")
})
require.NoError(t, err, "could not start port forward to new node-local-dns pod")
defer newPf.Stop()

t.Log("Port forward to new node-local-dns pod established")

// Now test metrics with the new port forward using port 9254
newPromAddress := "http://localhost:9254/metrics"

// Use testLRPCase function with the new prometheus address
t.Log("Validating metrics with new node-local-dns pod")
testLRPCase(t, ctx, recreatedPod, []string{
"nslookup", "github.com", kubeDNS,
}, "", "", false, true, newPromAddress)

t.Logf("SUCCESS: Metrics validation passed - traffic is being redirected to new node-local-dns pod %s", newNodeLocalDNSPod.Name)

// Step 9: Final cilium validation after node-local-dns restart
t.Log("Step 9: Final cilium validation - ensuring LRP is still active after node-local-dns restart")
validateCiliumLRP(t, ctx, cs, config)

t.Log("Negative LRP test completed successfully")
}

// validateCiliumLRP checks that LRP is properly configured in cilium
func validateCiliumLRP(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, config *rest.Config) {
ciliumPods, err := cs.CoreV1().Pods(kubeSystemNamespace).List(ctx, metav1.ListOptions{
LabelSelector: "k8s-app=cilium",
})
require.NoError(t, err)
require.NotEmpty(t, ciliumPods.Items)
ciliumPod := TakeOne(ciliumPods.Items)

// Get kube-dns service IP for validation
svc, err := kubernetes.GetService(ctx, cs, kubeSystemNamespace, dnsService)
require.NoError(t, err)
kubeDNSIP := svc.Spec.ClusterIP

// IMPORTANT: Get node-local-dns pod IP on the SAME node as the cilium pod we're using
selectedNode := ciliumPod.Spec.NodeName
t.Logf("Using cilium pod %s on node %s for validation", ciliumPod.Name, selectedNode)

// Get node-local-dns pod specifically on the same node as our cilium pod
nodeLocalDNSPods, err := kubernetes.GetPodsByNode(ctx, cs, kubeSystemNamespace, nodeLocalDNSLabelSelector, selectedNode)
require.NoError(t, err)
require.NotEmpty(t, nodeLocalDNSPods.Items, "No node-local-dns pod found on node %s", selectedNode)

// Use the first (and should be only) node-local-dns pod on this node
nodeLocalDNSPod := nodeLocalDNSPods.Items[0]
nodeLocalDNSIP := nodeLocalDNSPod.Status.PodIP
require.NotEmpty(t, nodeLocalDNSIP, "node-local-dns pod %s has no IP address", nodeLocalDNSPod.Name)

t.Logf("Validating LRP: kubeDNS IP=%s, nodeLocalDNS IP=%s (pod: %s), node=%s",
kubeDNSIP, nodeLocalDNSIP, nodeLocalDNSPod.Name, selectedNode)

// Check cilium lrp list
lrpListCmd := []string{"cilium", "lrp", "list"}
lrpOutput, _, err := kubernetes.ExecCmdOnPod(ctx, cs, ciliumPod.Namespace, ciliumPod.Name, "cilium-agent", lrpListCmd, config, false)
require.NoError(t, err)

// Validate the LRP output structure more thoroughly
lrpOutputStr := string(lrpOutput)
require.Contains(t, lrpOutputStr, "nodelocaldns", "LRP not found in cilium lrp list")

// Parse LRP list output to validate structure
lrpLines := strings.Split(lrpOutputStr, "\n")
nodelocaldnsFound := false

for _, line := range lrpLines {
line = strings.TrimSpace(line)
if strings.Contains(line, "nodelocaldns") && strings.Contains(line, "kube-system") {
// Validate that the line contains expected components
require.Contains(t, line, "kube-system", "LRP line should contain kube-system namespace")
require.Contains(t, line, "nodelocaldns", "LRP line should contain nodelocaldns name")
require.Contains(t, line, "kube-dns", "LRP line should reference kube-dns service")
nodelocaldnsFound = true
t.Logf("Found nodelocaldns LRP entry: %s", line)
break
}
}

require.True(t, nodelocaldnsFound, "nodelocaldns LRP entry not found with expected structure in output: %s", lrpOutputStr)

// Check cilium service list for localredirect
serviceListCmd := []string{"cilium", "service", "list"}
serviceOutput, _, err := kubernetes.ExecCmdOnPod(ctx, cs, ciliumPod.Namespace, ciliumPod.Name, "cilium-agent", serviceListCmd, config, false)
require.NoError(t, err)
require.Contains(t, string(serviceOutput), "LocalRedirect", "LocalRedirect not found in cilium service list")

// Validate LocalRedirect entries
serviceLines := strings.Split(string(serviceOutput), "\n")
tcpFound := false
udpFound := false

for _, line := range serviceLines {
if strings.Contains(line, "LocalRedirect") && strings.Contains(line, kubeDNSIP) {
// Check if this line contains the expected frontend (kube-dns) and backend (node-local-dns) IPs
if strings.Contains(line, nodeLocalDNSIP) {
if strings.Contains(line, "/TCP") {
tcpFound = true
t.Logf("Found TCP LocalRedirect: %s", strings.TrimSpace(line))
}
if strings.Contains(line, "/UDP") {
udpFound = true
t.Logf("Found UDP LocalRedirect: %s", strings.TrimSpace(line))
}
}
}
}

// Verify both TCP and UDP LocalRedirect entries exist
require.True(t, tcpFound, "TCP LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode)
require.True(t, udpFound, "UDP LocalRedirect entry not found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode)

t.Logf("Cilium LRP List Output:\n%s", string(lrpOutput))
t.Logf("Cilium Service List Output:\n%s", string(serviceOutput))
}

// restartClientPodsAndGetPod restarts the client daemonset and returns a new pod reference
func restartClientPodsAndGetPod(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, originalPod corev1.Pod) corev1.Pod {
// Find the daemonset name by looking up the pod's owner
podDetails, err := cs.CoreV1().Pods(originalPod.Namespace).Get(ctx, originalPod.Name, metav1.GetOptions{})
require.NoError(t, err)

// Get the node name for consistent testing
nodeName := podDetails.Spec.NodeName

// Restart the daemonset (assumes it's named "lrp-test" based on the manifest)
err = kubernetes.MustRestartDaemonset(ctx, cs, originalPod.Namespace, "lrp-test")
require.NoError(t, err)

// Wait for the daemonset to be ready
kubernetes.WaitForPodDaemonset(ctx, cs, originalPod.Namespace, "lrp-test", clientLabelSelector)

// Get the new pod on the same node
clientPods, err := kubernetes.GetPodsByNode(ctx, cs, originalPod.Namespace, clientLabelSelector, nodeName)
require.NoError(t, err)
require.NotEmpty(t, clientPods.Items)

return TakeOne(clientPods.Items)
}

// deleteAndRecreateResources deletes and recreates client pods and LRP, returning new pod
func deleteAndRecreateResources(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, originalPod corev1.Pod) corev1.Pod {
config := kubernetes.MustGetRestConfig()
ciliumCS, err := ciliumClientset.NewForConfig(config)
require.NoError(t, err)

nodeName := originalPod.Spec.NodeName

// Delete client daemonset
dsClient := cs.AppsV1().DaemonSets(originalPod.Namespace)
clientDS := kubernetes.MustParseDaemonSet(clientPath)
kubernetes.MustDeleteDaemonset(ctx, dsClient, clientDS)

// Delete LRP
lrpContent, err := os.ReadFile(lrpPath)
require.NoError(t, err)
var lrp ciliumv2.CiliumLocalRedirectPolicy
err = yaml.Unmarshal(lrpContent, &lrp)
require.NoError(t, err)

lrpClient := ciliumCS.CiliumV2().CiliumLocalRedirectPolicies(lrp.Namespace)
kubernetes.MustDeleteCiliumLocalRedirectPolicy(ctx, lrpClient, lrp)

// Wait for deletion to complete
time.Sleep(10 * time.Second)
Comment on lines +475 to +476
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed sleep introduces unnecessary delay and potential flakiness; replace with a poll-based wait (e.g., repeatedly checking for resource absence/recreation with a timeout) to reduce test duration and improve reliability.

Suggested change
// Wait for deletion to complete
time.Sleep(10 * time.Second)
// Wait for deletion to complete (poll for absence of client DaemonSet and LRP)
retry.DoWithTimeout(ctx, "wait for client DaemonSet deletion", 30*time.Second, func(ctx context.Context) (bool, error) {
_, err := dsClient.Get(ctx, clientDS.Name, metav1.GetOptions{})
if err != nil {
// DaemonSet not found
return true, nil
}
// Still exists
return false, nil
})
retry.DoWithTimeout(ctx, "wait for LRP deletion", 30*time.Second, func(ctx context.Context) (bool, error) {
_, err := lrpClient.Get(ctx, lrp.Name, metav1.GetOptions{})
if err != nil {
// LRP not found
return true, nil
}
// Still exists
return false, nil
})

Copilot uses AI. Check for mistakes.


// Recreate LRP
_, cleanupLRP := kubernetes.MustSetupLRP(ctx, ciliumCS, lrpPath)
t.Cleanup(cleanupLRP)

// Restart node-local-dns pods to pick up new LRP configuration
t.Log("Restarting node-local-dns pods after LRP recreation")
err = kubernetes.MustRestartDaemonset(ctx, cs, kubeSystemNamespace, "node-local-dns")
require.NoError(t, err)
kubernetes.WaitForPodDaemonset(ctx, cs, kubeSystemNamespace, "node-local-dns", nodeLocalDNSLabelSelector)

// Recreate client daemonset
_, cleanupClient := kubernetes.MustSetupDaemonset(ctx, cs, clientPath)
t.Cleanup(cleanupClient)

// Wait for pods to be ready
kubernetes.WaitForPodDaemonset(ctx, cs, clientDS.Namespace, clientDS.Name, clientLabelSelector)

// Get new pod on the same node
clientPods, err := kubernetes.GetPodsByNode(ctx, cs, clientDS.Namespace, clientLabelSelector, nodeName)
require.NoError(t, err)
require.NotEmpty(t, clientPods.Items)

return TakeOne(clientPods.Items)
}

// TakeOne takes one item from the slice randomly; if empty, it returns the empty value for the type
Expand Down
Loading