From 5f82e447fd810b41438d91f94517b707e3e037d9 Mon Sep 17 00:00:00 2001 From: Mitali Salvi Date: Thu, 30 Apr 2026 15:45:28 +0000 Subject: [PATCH 1/4] Align EKS addon integration tests with helm-chart 6.x release Update resource counts and validations to match the OTLP Container Insights components added in helm-chart 6.x: - Add kube-state-metrics, node-exporter, cluster-scraper resources - Update service/daemonset/pod counts for new components - Add OTLP CI RBAC validations (SAs, roles, bindings) - Add cloudwatch-agent-role to Roles and RoleBindings validation - Replace fmt.Println with t.Logf for proper test logging - Use named daemonSetNameRegex constant instead of inline regex --- .../eks/resourceCount_linuxonly_test.go | 38 +++++-- .../eks/resourceCount_windowslinux_test.go | 40 +++++-- .../eks/validateResources_test.go | 101 ++++++------------ 3 files changed, 98 insertions(+), 81 deletions(-) diff --git a/integration-tests/eks/resourceCount_linuxonly_test.go b/integration-tests/eks/resourceCount_linuxonly_test.go index 35812a7ab..57744cbc6 100644 --- a/integration-tests/eks/resourceCount_linuxonly_test.go +++ b/integration-tests/eks/resourceCount_linuxonly_test.go @@ -7,15 +7,41 @@ package eks_addon const ( - // Services count for CW agent on Linux and Windows - serviceCountLinux = 6 + // Services count on Linux: + // - amazon-cloudwatch-observability-webhook-service + // - cloudwatch-agent + // - cloudwatch-agent-headless + // - cloudwatch-agent-monitoring + // - dcgm-exporter-service + // - neuron-monitor-service + // - kube-state-metrics + // - cloudwatch-agent-cluster-scraper-monitoring + serviceCountLinux = 8 + + // Services count on Windows: + // - cloudwatch-agent-windows + // - cloudwatch-agent-windows-headless + // - cloudwatch-agent-windows-monitoring + // - cloudwatch-agent-windows-container-insights-monitoring serviceCountWindows = 4 - // DaemonSet count for CW agent on Linux and Windows - daemonsetCountLinux = 4 + // DaemonSet count on Linux: + // - cloudwatch-agent + // - dcgm-exporter + // - fluent-bit + // - neuron-monitor + // - node-exporter + daemonsetCountLinux = 5 + + // DaemonSet count on Windows: + // - cloudwatch-agent-windows + // - cloudwatch-agent-windows-container-insights + // - fluent-bit-windows daemonsetCountWindows = 3 - // Pods count for CW agent on Linux and Windows - podCountLinux = 3 + // Pods count on Linux and Windows + // podCountLinux includes 2 OTLP deployment pods (kube-state-metrics, cloudwatch-agent-cluster-scraper) + // + 1 node-exporter daemonset pod + podCountLinux = 6 podCountWindows = 0 ) diff --git a/integration-tests/eks/resourceCount_windowslinux_test.go b/integration-tests/eks/resourceCount_windowslinux_test.go index e95e5a6a7..4ad346847 100644 --- a/integration-tests/eks/resourceCount_windowslinux_test.go +++ b/integration-tests/eks/resourceCount_windowslinux_test.go @@ -7,15 +7,41 @@ package eks_addon const ( - // Services count for CW agent on Linux and Windows - serviceCountLinux = 6 + // Services count on Linux: + // - amazon-cloudwatch-observability-webhook-service + // - cloudwatch-agent + // - cloudwatch-agent-headless + // - cloudwatch-agent-monitoring + // - dcgm-exporter-service + // - neuron-monitor-service + // - kube-state-metrics + // - cloudwatch-agent-cluster-scraper-monitoring + serviceCountLinux = 8 + + // Services count on Windows: + // - cloudwatch-agent-windows + // - cloudwatch-agent-windows-headless + // - cloudwatch-agent-windows-monitoring + // - cloudwatch-agent-windows-container-insights-monitoring serviceCountWindows = 4 - // DaemonSet count for CW agent on Linux and Windows - daemonsetCountLinux = 4 + // DaemonSet count on Linux: + // - cloudwatch-agent + // - dcgm-exporter + // - fluent-bit + // - neuron-monitor + // - node-exporter + daemonsetCountLinux = 5 + + // DaemonSet count on Windows: + // - cloudwatch-agent-windows + // - cloudwatch-agent-windows-container-insights + // - fluent-bit-windows daemonsetCountWindows = 3 - // Pods count for CW agent on Linux and Windows - podCountLinux = 3 - podCountWindows = 2 + // Pods count on Linux and Windows + // podCountLinux includes 2 OTLP deployment pods (kube-state-metrics, cloudwatch-agent-cluster-scraper) + // + 1 node-exporter daemonset pod + podCountLinux = 6 + podCountWindows = 3 ) diff --git a/integration-tests/eks/validateResources_test.go b/integration-tests/eks/validateResources_test.go index 83fc46eaf..5f1f8f6f6 100644 --- a/integration-tests/eks/validateResources_test.go +++ b/integration-tests/eks/validateResources_test.go @@ -25,22 +25,25 @@ import ( ) const ( - nameSpace = "amazon-cloudwatch" - addOnName = "amazon-cloudwatch-observability" - agentName = "cloudwatch-agent" - agentNameWindows = "cloudwatch-agent-windows" - agentNameWindowsContainerInsights = "cloudwatch-agent-windows-container-insights" - operatorName = addOnName + "-controller-manager" - fluentBitName = "fluent-bit" - fluentBitNameWindows = "fluent-bit-windows" - dcgmExporterName = "dcgm-exporter" - neuronMonitor = "neuron-monitor" - podNameRegex = "(" + agentName + "|" + agentNameWindows + "|" + agentNameWindowsContainerInsights + "|" + operatorName + "|" + fluentBitName + "|" + fluentBitNameWindows + ")-*" - serviceNameRegex = agentName + "(-headless|-monitoring)?|" + agentNameWindows + "(-headless|-monitoring)?|" + agentNameWindowsContainerInsights + "(-headless|-monitoring)?|" + addOnName + "-webhook-service|" + dcgmExporterName + "-service|" + neuronMonitor + "-service" + nameSpace = "amazon-cloudwatch" + addOnName = "amazon-cloudwatch-observability" + agentName = "cloudwatch-agent" + agentNameWindows = "cloudwatch-agent-windows" + operatorName = addOnName + "-controller-manager" + fluentBitName = "fluent-bit" + fluentBitNameWindows = "fluent-bit-windows" + dcgmExporterName = "dcgm-exporter" + neuronMonitor = "neuron-monitor" + kubeStateMetricsName = "kube-state-metrics" + clusterScraperName = "cloudwatch-agent-cluster-scraper" + nodeExporterName = "node-exporter" + podNameRegex = "(" + agentName + "|" + agentNameWindows + "|" + operatorName + "|" + fluentBitName + "|" + fluentBitNameWindows + "|" + kubeStateMetricsName + "|" + clusterScraperName + "|" + nodeExporterName + ")-*" + serviceNameRegex = agentName + "(-headless|-monitoring)?|" + agentNameWindows + "(-headless|-monitoring)?|" + addOnName + "-webhook-service|" + dcgmExporterName + "-service|" + neuronMonitor + "-service|" + kubeStateMetricsName + "|" + clusterScraperName + "-monitoring" + daemonSetNameRegex = agentName + "|" + agentNameWindows + "|" + fluentBitName + "|" + fluentBitNameWindows + "|" + dcgmExporterName + "|" + neuronMonitor + "|" + nodeExporterName ) const ( - deploymentCount = 1 + deploymentCount = 3 podCount = podCountLinux + podCountWindows serviceCount = serviceCountLinux + serviceCountWindows daemonsetCount = daemonsetCountLinux + daemonsetCountWindows @@ -75,14 +78,8 @@ func TestOperatorOnEKs(t *testing.T) { assert.NoError(t, err) assert.Len(t, pods.Items, podCount) for _, pod := range pods.Items { - fmt.Println("pod name: " + pod.Name + " namespace:" + pod.Namespace) + t.Logf("pod name: %s namespace:%s", pod.Name, pod.Namespace) assert.Contains(t, []v1.PodPhase{v1.PodRunning, v1.PodPending}, pod.Status.Phase) - // matches - // - cloudwatch-agent-* - // - cloudwatch-agent-windows-* - // - amazon-cloudwatch-observability-controller-manager-* - // - fluent-bit-* - // - fluent-bit-windows-* if match, _ := regexp.MatchString(podNameRegex, pod.Name); !match { assert.Fail(t, "Cluster Pods are not created correctly") } @@ -93,18 +90,7 @@ func TestOperatorOnEKs(t *testing.T) { assert.NoError(t, err) assert.Len(t, services.Items, serviceCount) for _, service := range services.Items { - fmt.Println("service name: " + service.Name + " namespace:" + service.Namespace) - // matches - // - amazon-cloudwatch-observability-webhook-service - // - cloudwatch-agent - // - cloudwatch-agent-headless - // - cloudwatch-agent-monitoring - // - cloudwatch-agent-windows - // - cloudwatch-agent-windows-headless - // - cloudwatch-agent-windows-monitoring - // - cloudwatch-agent-windows-container-insights-monitoring - // - dcgm-exporter-service - // - neuron-monitor-service + t.Logf("service name: %s namespace:%s", service.Name, service.Namespace) if match, _ := regexp.MatchString(serviceNameRegex, service.Name); !match { assert.Fail(t, "Cluster Service is not created correctly") } @@ -114,14 +100,12 @@ func TestOperatorOnEKs(t *testing.T) { deployments, err := ListDeployments(nameSpace, clientSet) assert.NoError(t, err) for _, deployment := range deployments.Items { - fmt.Println("deployment name: " + deployment.Name + " namespace:" + deployment.Namespace) + t.Logf("deployment name: %s namespace:%s", deployment.Name, deployment.Namespace) } assert.Len(t, deployments.Items, deploymentCount) - // matches - // - amazon-cloudwatch-observability-controller-manager assert.Equal(t, addOnName+"-controller-manager", deployments.Items[0].Name) for _, deploymentCondition := range deployments.Items[0].Status.Conditions { - fmt.Println("deployment condition type: " + deploymentCondition.Type) + t.Logf("deployment condition type: %v", deploymentCondition.Type) } assert.Equal(t, appsV1.DeploymentAvailable, deployments.Items[0].Status.Conditions[0].Type) @@ -130,16 +114,8 @@ func TestOperatorOnEKs(t *testing.T) { assert.NoError(t, err) assert.Len(t, daemonSets.Items, daemonsetCount) for _, daemonSet := range daemonSets.Items { - fmt.Println("daemonSet name: " + daemonSet.Name + " namespace:" + daemonSet.Namespace) - // matches - // - cloudwatch-agent - // - cloudwatch-agent-windows - // - cloudwatch-agent-windows-container-insights - // - fluent-bit - // - fluent-bit-windows - // - dcgm-exporter (this can be removed in the future) - // - neuron-monitor - if match, _ := regexp.MatchString(agentName+"|fluent-bit|dcgm-exporter|neuron-monitor", daemonSet.Name); !match { + t.Logf("daemonSet name: %s namespace:%s", daemonSet.Name, daemonSet.Namespace) + if match, _ := regexp.MatchString(daemonSetNameRegex, daemonSet.Name); !match { assert.Fail(t, "DaemonSet is not created correctly") } } @@ -148,13 +124,8 @@ func TestOperatorOnEKs(t *testing.T) { serviceAccounts, err := ListServiceAccounts(nameSpace, clientSet) assert.NoError(t, err) for _, sa := range serviceAccounts.Items { - fmt.Println("serviceAccounts name: " + sa.Name + " namespace:" + sa.Namespace) + t.Logf("serviceAccounts name: %s namespace:%s", sa.Name, sa.Namespace) } - // searches - // - amazon-cloudwatch-observability-controller-manager - // - cloudwatch-agent - // - dcgm-exporter-service-acct - // - neuron-monitor-service-acct assert.True(t, validateServiceAccount(serviceAccounts, addOnName+"-controller-manager")) assert.True(t, validateServiceAccount(serviceAccounts, agentName)) assert.True(t, validateServiceAccount(serviceAccounts, dcgmExporterName+"-service-acct")) @@ -163,53 +134,47 @@ func TestOperatorOnEKs(t *testing.T) { //Validating ClusterRoles clusterRoles, err := ListClusterRoles(clientSet) assert.NoError(t, err) - // searches - // - amazon-cloudwatch-observability-manager-role - // - cloudwatch-agent-role assert.True(t, validateClusterRoles(clusterRoles, addOnName+"-manager-role")) assert.True(t, validateClusterRoles(clusterRoles, agentName+"-role")) //Validating Roles roles, err := ListRoles(nameSpace, clientSet) assert.NoError(t, err) - // searches - // - dcgm-exporter-role - // - neuron-monitor-role + assert.True(t, validateRoles(roles, agentName+"-role")) assert.True(t, validateRoles(roles, dcgmExporterName+"-role")) assert.True(t, validateRoles(roles, neuronMonitor+"-role")) //Validating ClusterRoleBinding clusterRoleBindings, err := ListClusterRoleBindings(clientSet) assert.NoError(t, err) - // searches - // - amazon-cloudwatch-observability-manager-rolebinding - // - cloudwatch-agent-role-binding assert.True(t, validateClusterRoleBindings(clusterRoleBindings, addOnName+"-manager-rolebinding")) assert.True(t, validateClusterRoleBindings(clusterRoleBindings, agentName+"-role-binding")) //Validating RoleBinding roleBindings, err := ListRoleBindings(nameSpace, clientSet) assert.NoError(t, err) - // searches - // - dcgm-exporter-role-binding - // - neuron-monitor-role-binding + assert.True(t, validateRoleBindings(roleBindings, agentName+"-role-binding")) assert.True(t, validateRoleBindings(roleBindings, dcgmExporterName+"-role-binding")) assert.True(t, validateRoleBindings(roleBindings, neuronMonitor+"-role-binding")) + // Validating OTLP Container Insights RBAC + assert.True(t, validateServiceAccount(serviceAccounts, kubeStateMetricsName+"-service-acct")) + assert.True(t, validateServiceAccount(serviceAccounts, nodeExporterName+"-service-acct")) + assert.True(t, validateClusterRoles(clusterRoles, kubeStateMetricsName+"-cluster-role")) + assert.True(t, validateClusterRoleBindings(clusterRoleBindings, kubeStateMetricsName+"-cluster-role-binding")) + assert.True(t, validateRoles(roles, nodeExporterName+"-role")) + assert.True(t, validateRoleBindings(roleBindings, nodeExporterName+"-role-binding")) + //Validating MutatingWebhookConfiguration mutatingWebhookConfigurations, err := ListMutatingWebhookConfigurations(clientSet) assert.NoError(t, err) assert.Len(t, mutatingWebhookConfigurations.Items[0].Webhooks, 5) - // searches - // - amazon-cloudwatch-observability-mutating-webhook-configuration assert.Equal(t, addOnName+"-mutating-webhook-configuration", mutatingWebhookConfigurations.Items[0].Name) //Validating ValidatingWebhookConfiguration validatingWebhookConfigurations, err := ListValidatingWebhookConfigurations(clientSet) assert.NoError(t, err) assert.Len(t, validatingWebhookConfigurations.Items[0].Webhooks, 4) - // searches - // - amazon-cloudwatch-observability-validating-webhook-configuration assert.Equal(t, addOnName+"-validating-webhook-configuration", validatingWebhookConfigurations.Items[0].Name) } From db29b4fc6fd695f55cfe0b4b28d545ff7967fb9e Mon Sep 17 00:00:00 2001 From: Mitali Salvi Date: Thu, 30 Apr 2026 16:35:58 +0000 Subject: [PATCH 2/4] Update default addon version to v6.0.1-eksbuild.1 The previous default v1.1.0-eksbuild.1 is no longer a supported addon version, causing all integration tests to fail with InvalidParameterException before any Go tests could run. --- .github/workflows/eks-add-on-integ-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/eks-add-on-integ-test.yml b/.github/workflows/eks-add-on-integ-test.yml index 37d3016bc..ebbd794ed 100644 --- a/.github/workflows/eks-add-on-integ-test.yml +++ b/.github/workflows/eks-add-on-integ-test.yml @@ -16,7 +16,7 @@ on: addon_version: required: true type: string - default: "v1.1.0-eksbuild.1" + default: "v6.0.1-eksbuild.1" description: "EKS addon version" run_in_beta: required: true From 297c54b0b71aade9a5ee206ad9f146c078a23037 Mon Sep 17 00:00:00 2001 From: Mitali Salvi Date: Thu, 30 Apr 2026 17:06:08 +0000 Subject: [PATCH 3/4] Remove cloudwatch-agent namespace Role/RoleBinding assertions The v6.0.1 EKS addon does not create cloudwatch-agent-role as a namespace-scoped Role or cloudwatch-agent-role-binding as a namespace-scoped RoleBinding. These only exist as ClusterRole and ClusterRoleBinding which are already validated. --- integration-tests/eks/validateResources_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/integration-tests/eks/validateResources_test.go b/integration-tests/eks/validateResources_test.go index 5f1f8f6f6..aa2a9034e 100644 --- a/integration-tests/eks/validateResources_test.go +++ b/integration-tests/eks/validateResources_test.go @@ -140,7 +140,6 @@ func TestOperatorOnEKs(t *testing.T) { //Validating Roles roles, err := ListRoles(nameSpace, clientSet) assert.NoError(t, err) - assert.True(t, validateRoles(roles, agentName+"-role")) assert.True(t, validateRoles(roles, dcgmExporterName+"-role")) assert.True(t, validateRoles(roles, neuronMonitor+"-role")) @@ -153,7 +152,6 @@ func TestOperatorOnEKs(t *testing.T) { //Validating RoleBinding roleBindings, err := ListRoleBindings(nameSpace, clientSet) assert.NoError(t, err) - assert.True(t, validateRoleBindings(roleBindings, agentName+"-role-binding")) assert.True(t, validateRoleBindings(roleBindings, dcgmExporterName+"-role-binding")) assert.True(t, validateRoleBindings(roleBindings, neuronMonitor+"-role-binding")) From c66c83f1407f40069c2385f49fe3362db128ce04 Mon Sep 17 00:00:00 2001 From: Mitali Salvi Date: Thu, 30 Apr 2026 17:44:31 +0000 Subject: [PATCH 4/4] Revert default addon version to v1.1.0-eksbuild.1 --- .github/workflows/eks-add-on-integ-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/eks-add-on-integ-test.yml b/.github/workflows/eks-add-on-integ-test.yml index ebbd794ed..37d3016bc 100644 --- a/.github/workflows/eks-add-on-integ-test.yml +++ b/.github/workflows/eks-add-on-integ-test.yml @@ -16,7 +16,7 @@ on: addon_version: required: true type: string - default: "v6.0.1-eksbuild.1" + default: "v1.1.0-eksbuild.1" description: "EKS addon version" run_in_beta: required: true