diff --git a/api/datadoghq/v2alpha1/datadogagent_types.go b/api/datadoghq/v2alpha1/datadogagent_types.go index 5ad50df0d..72c68a645 100644 --- a/api/datadoghq/v2alpha1/datadogagent_types.go +++ b/api/datadoghq/v2alpha1/datadogagent_types.go @@ -647,6 +647,13 @@ type ServiceDiscoveryFeatureConfig struct { // +optional Enabled *bool `json:"enabled,omitempty"` + // Indicates that the operator enabled this feature automatically rather than in response to an + // explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + // back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + // older agent images. This field is managed by the operator and must not be set by users. + // +optional + EnabledByDefault *bool `json:"enabledByDefault,omitempty"` + // DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28. // +deprecated // +optional diff --git a/api/datadoghq/v2alpha1/zz_generated.deepcopy.go b/api/datadoghq/v2alpha1/zz_generated.deepcopy.go index 4733c0d92..0962f85f1 100644 --- a/api/datadoghq/v2alpha1/zz_generated.deepcopy.go +++ b/api/datadoghq/v2alpha1/zz_generated.deepcopy.go @@ -3453,6 +3453,11 @@ func (in *ServiceDiscoveryFeatureConfig) DeepCopyInto(out *ServiceDiscoveryFeatu *out = new(bool) **out = **in } + if in.EnabledByDefault != nil { + in, out := &in.EnabledByDefault, &out.EnabledByDefault + *out = new(bool) + **out = **in + } if in.NetworkStats != nil { in, out := &in.NetworkStats, &out.NetworkStats *out = new(ServiceDiscoveryNetworkStatsConfig) diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml b/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml index 43993cffb..c650cb909 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml @@ -2488,6 +2488,13 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. + type: boolean networkStats: description: 'DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.' properties: @@ -10905,6 +10912,13 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. + type: boolean networkStats: description: 'DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.' properties: diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json index ba086e025..ca71c4e05 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json @@ -2588,6 +2588,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.", @@ -10742,6 +10746,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.", diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml index aa5590b08..ba0ed0981 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml @@ -2488,6 +2488,13 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. + type: boolean networkStats: description: 'DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.' properties: diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json index 4dbcc7317..b4ae8fa9e 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json @@ -2592,6 +2592,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.", diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml index 4dde7e8f7..0cc1976a1 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml @@ -2488,6 +2488,13 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. + type: boolean networkStats: description: 'DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.' properties: @@ -10955,6 +10962,13 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. + type: boolean networkStats: description: 'DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.' properties: diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json index e7b912323..ad91ec166 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json @@ -2588,6 +2588,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.", @@ -10807,6 +10811,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.", diff --git a/docs/configuration.v2alpha1.md b/docs/configuration.v2alpha1.md index 9640c91d4..d3005524c 100644 --- a/docs/configuration.v2alpha1.md +++ b/docs/configuration.v2alpha1.md @@ -200,6 +200,7 @@ spec: | features.sbom.host.analyzers | To use for SBOM collection. | | features.sbom.host.enabled | Enable this option to activate SBOM collection. Default: false | | features.serviceDiscovery.enabled | Enables the service discovery check. Default: false | +| features.serviceDiscovery.enabledByDefault | Indicates that the operator enabled this feature automatically rather than in response to an explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on older agent images. This field is managed by the operator and must not be set by users. | | features.serviceDiscovery.networkStats.enabled | DEPRECATED: this field is ignored. | | features.tcpQueueLength.enabled | Enables the TCP queue length eBPF-based check. Default: false | | features.usm.enabled | Enables Universal Service Monitoring. Default: false | diff --git a/docs/configuration_public.md b/docs/configuration_public.md index c536cfd61..3fd7f241c 100644 --- a/docs/configuration_public.md +++ b/docs/configuration_public.md @@ -384,6 +384,9 @@ spec: `features.serviceDiscovery.enabled` : Enables the service discovery check. Default: false +`features.serviceDiscovery.enabledByDefault` +: Indicates that the operator enabled this feature automatically rather than in response to an explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on older agent images. This field is managed by the operator and must not be set by users. + `features.serviceDiscovery.networkStats.enabled` : DEPRECATED: this field is ignored. diff --git a/internal/controller/datadogagent/controller_v2_test.go b/internal/controller/datadogagent/controller_v2_test.go index af15dbfdf..b23b5f6af 100644 --- a/internal/controller/datadogagent/controller_v2_test.go +++ b/internal/controller/datadogagent/controller_v2_test.go @@ -1752,7 +1752,7 @@ func Test_DDAI_ReconcileV3(t *testing.T) { wantFunc: func(t *testing.T, c client.Client) { expectedDDAI := getBaseDDAI(dda) expectedDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "ccac39a3a007bad81d7baf8febc6445f", + constants.MD5DDAIDeploymentAnnotationKey: "62d2822cc8547055dc8e2fca6f222a17", } verifyDDAI(t, c, []v1alpha1.DatadogAgentInternal{expectedDDAI}) @@ -1784,7 +1784,7 @@ func Test_DDAI_ReconcileV3(t *testing.T) { baseDDAI := getBaseDDAI(dda) expectedDDAI := baseDDAI.DeepCopy() expectedDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "f2aa21d0ecced63c091ca2df3d31e451", + constants.MD5DDAIDeploymentAnnotationKey: "c362b9a0aa0e2ad1a1d60f4ee8575c8f", } expectedDDAI.Spec.Features.ClusterChecks.UseClusterChecksRunners = apiutils.NewBoolPointer(true) expectedDDAI.Spec.Global.Credentials = &v2alpha1.DatadogCredentials{ @@ -1860,7 +1860,7 @@ func Test_DDAI_ReconcileV3(t *testing.T) { profileDDAI := getBaseDDAI(dda) profileDDAI.Name = "foo-profile" profileDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "73e0cc1e445001e326507ac23654104e", + constants.MD5DDAIDeploymentAnnotationKey: "2c3c1664f08fb6d6591294f2c878d1dd", } profileDDAI.Labels[constants.ProfileLabelKey] = "foo-profile" profileDDAI.Spec.Override = map[v2alpha1.ComponentName]*v2alpha1.DatadogAgentComponentOverride{ @@ -2094,7 +2094,7 @@ func getBaseDDAI(dda *v2alpha1.DatadogAgent) v1alpha1.DatadogAgentInternal { func getDefaultDDAI(dda *v2alpha1.DatadogAgent) v1alpha1.DatadogAgentInternal { expectedDDAI := getBaseDDAI(dda) expectedDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "f98c0497c66e2747f6d116970ab8f0b1", + constants.MD5DDAIDeploymentAnnotationKey: "7e6c12e645247762609327ab80b63d9e", } expectedDDAI.Spec.Override = map[v2alpha1.ComponentName]*v2alpha1.DatadogAgentComponentOverride{ v2alpha1.NodeAgentComponentName: { diff --git a/internal/controller/datadogagent/defaults/datadogagent_default.go b/internal/controller/datadogagent/defaults/datadogagent_default.go index 1e50c5637..24d807412 100644 --- a/internal/controller/datadogagent/defaults/datadogagent_default.go +++ b/internal/controller/datadogagent/defaults/datadogagent_default.go @@ -41,7 +41,8 @@ const ( defaultGPUMonitoringEnabled bool = false - defaultServiceDiscoveryEnabled bool = false + defaultServiceDiscoveryEnabled bool = false + defaultServiceDiscoveryEnabledByDefault bool = false defaultAPMEnabled bool = true defaultAPMHostPortEnabled bool = false @@ -302,7 +303,12 @@ func defaultFeaturesConfig(ddaSpec *v2alpha1.DatadogAgentSpec) { if ddaSpec.Features.ServiceDiscovery == nil { ddaSpec.Features.ServiceDiscovery = &v2alpha1.ServiceDiscoveryFeatureConfig{} } - apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.Enabled, defaultServiceDiscoveryEnabled) + apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.EnabledByDefault, defaultServiceDiscoveryEnabledByDefault) + // Only default Enabled to false when not enabled-by-default, so that Enabled=nil remains + // distinguishable from Enabled=false (explicit user opt-out) when EnabledByDefault=true. + if !apiutils.BoolValue(ddaSpec.Features.ServiceDiscovery.EnabledByDefault) { + apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.Enabled, defaultServiceDiscoveryEnabled) + } // GPU monitoring feature if ddaSpec.Features.GPU == nil { diff --git a/internal/controller/datadogagent/defaults/datadogagent_default_test.go b/internal/controller/datadogagent/defaults/datadogagent_default_test.go index 19d9ff838..46b945fdd 100644 --- a/internal/controller/datadogagent/defaults/datadogagent_default_test.go +++ b/internal/controller/datadogagent/defaults/datadogagent_default_test.go @@ -212,7 +212,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -361,7 +362,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -460,7 +462,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -596,7 +599,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -757,7 +761,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -913,7 +918,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1069,7 +1075,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1234,7 +1241,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1390,7 +1398,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1549,7 +1558,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1751,7 +1761,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1876,7 +1887,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -2033,7 +2045,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -2213,7 +2226,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -2372,7 +2386,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -2544,7 +2559,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(valueTrue), + Enabled: apiutils.NewBoolPointer(valueTrue), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), diff --git a/internal/controller/datadogagent/feature/servicediscovery/feature.go b/internal/controller/datadogagent/feature/servicediscovery/feature.go index a123b7914..6227d2c1a 100644 --- a/internal/controller/datadogagent/feature/servicediscovery/feature.go +++ b/internal/controller/datadogagent/feature/servicediscovery/feature.go @@ -6,6 +6,8 @@ package servicediscovery import ( + "fmt" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -29,6 +31,11 @@ func buildFeature(*feature.Options) feature.Feature { } type serviceDiscoveryFeature struct { + userExplicitlyEnabled bool + // features holds a pointer to the live DDA features struct so that ManageNodeAgent + // can re-evaluate hasOtherSystemProbeFeatures after Remote Config state has been + // merged by other features' Configure calls (e.g. USM merges RC state into the spec). + features *v2alpha1.DatadogFeatures } // ID returns the ID of the Feature @@ -38,17 +45,61 @@ func (f *serviceDiscoveryFeature) ID() feature.IDType { // Configure is used to configure the feature from a v2alpha1.DatadogAgent instance. func (f *serviceDiscoveryFeature) Configure(_ metav1.Object, ddaSpec *v2alpha1.DatadogAgentSpec, _ *v2alpha1.RemoteConfigConfiguration) (reqComp feature.RequiredComponents) { - if ddaSpec.Features != nil && ddaSpec.Features.ServiceDiscovery != nil && apiutils.BoolValue(ddaSpec.Features.ServiceDiscovery.Enabled) { - reqComp.Agent = feature.RequiredComponent{ - IsRequired: apiutils.NewBoolPointer(true), - Containers: []apicommon.AgentContainerName{apicommon.CoreAgentContainerName, apicommon.SystemProbeContainerName}, - } + if ddaSpec.Features == nil || ddaSpec.Features.ServiceDiscovery == nil { + return reqComp + } + sd := ddaSpec.Features.ServiceDiscovery + + // Explicit Enabled=false always disables the feature, even if EnabledByDefault=true. + if sd.Enabled != nil && !*sd.Enabled { + return reqComp + } + // Feature requires either an explicit opt-in or a default enablement. + if !apiutils.BoolValue(sd.Enabled) && !apiutils.BoolValue(sd.EnabledByDefault) { + return reqComp } + reqComp.Agent = feature.RequiredComponent{ + IsRequired: apiutils.NewBoolPointer(true), + Containers: []apicommon.AgentContainerName{apicommon.CoreAgentContainerName, apicommon.SystemProbeContainerName}, + } + + f.features = ddaSpec.Features + f.userExplicitlyEnabled = apiutils.BoolValue(sd.Enabled) + return reqComp } +// systemProbeLiteCommand returns the shell command for the system-probe container when +// system-probe-lite is preferred. If userOptedIn is true (user explicitly enabled discovery), +// system-probe is used as the fallback — the user has accepted the resource cost. +// Otherwise (enabled by default), the fallback is sleep infinity to avoid unexpectedly +// running system-probe on older agent images where the discovery feature may not be supported. +func systemProbeLiteCommand(socketPath string, userOptedIn bool) string { + fallback := "sleep infinity" + if userOptedIn { + fallback = "system-probe --config=/etc/datadog-agent/system-probe.yaml" + } + return fmt.Sprintf("system-probe-lite run --socket %s --log-level ${DD_LOG_LEVEL:-info} || %s", socketPath, fallback) +} + +// hasOtherSystemProbeFeatures returns true if any feature besides service discovery +// requires the full system-probe binary. When true, system-probe-lite cannot be used. +func hasOtherSystemProbeFeatures(features *v2alpha1.DatadogFeatures) bool { + if features == nil { + return false + } + return (features.NPM != nil && apiutils.BoolValue(features.NPM.Enabled)) || + (features.CWS != nil && apiutils.BoolValue(features.CWS.Enabled)) || + (features.CSPM != nil && apiutils.BoolValue(features.CSPM.Enabled) && apiutils.BoolValue(features.CSPM.RunInSystemProbe)) || + (features.USM != nil && apiutils.BoolValue(features.USM.Enabled)) || + (features.OOMKill != nil && apiutils.BoolValue(features.OOMKill.Enabled)) || + (features.TCPQueueLength != nil && apiutils.BoolValue(features.TCPQueueLength.Enabled)) || + (features.EBPFCheck != nil && apiutils.BoolValue(features.EBPFCheck.Enabled)) || + (features.GPU != nil && apiutils.BoolValue(features.GPU.Enabled) && apiutils.BoolValue(features.GPU.PrivilegedMode)) +} + // ManageDependencies allows a feature to manage its dependencies. // Feature's dependencies should be added in the store. func (f *serviceDiscoveryFeature) ManageDependencies(managers feature.ResourceManagers, provider string) error { @@ -104,6 +155,20 @@ func (f *serviceDiscoveryFeature) ManageNodeAgent(managers feature.PodTemplateMa managers.EnvVar().AddEnvVarToContainer(apicommon.CoreAgentContainerName, socketEnvVar) managers.EnvVar().AddEnvVarToContainer(apicommon.SystemProbeContainerName, socketEnvVar) + // Direct PodTemplateSpec mutation: no managers API for command overrides. + // Re-evaluate here (not cached from Configure) so that RC state merged by other + // features' Configure calls (e.g. USM) is taken into account. + if !hasOtherSystemProbeFeatures(f.features) { + for i := range managers.PodTemplateSpec().Spec.Containers { + c := &managers.PodTemplateSpec().Spec.Containers[i] + if c.Name == string(apicommon.SystemProbeContainerName) { + c.Command = []string{"/bin/sh", "-c"} + c.Args = []string{systemProbeLiteCommand(common.DefaultSystemProbeSocketPath, f.userExplicitlyEnabled)} + break + } + } + } + return nil } diff --git a/internal/controller/datadogagent/feature/servicediscovery/feature_test.go b/internal/controller/datadogagent/feature/servicediscovery/feature_test.go index eb54c0180..fae1bc70b 100644 --- a/internal/controller/datadogagent/feature/servicediscovery/feature_test.go +++ b/internal/controller/datadogagent/feature/servicediscovery/feature_test.go @@ -35,6 +35,42 @@ func Test_serviceDiscoveryFeature_Configure(t *testing.T) { ddaServiceDiscoveryEnabled := ddaServiceDiscoveryDisabled.DeepCopy() ddaServiceDiscoveryEnabled.Spec.Features.ServiceDiscovery.Enabled = apiutils.NewBoolPointer(true) + ddaWithNPM := v2alpha1.DatadogAgent{ + Spec: v2alpha1.DatadogAgentSpec{ + Features: &v2alpha1.DatadogFeatures{ + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + NPM: &v2alpha1.NPMFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + }, + }, + } + + ddaWithCWS := v2alpha1.DatadogAgent{ + Spec: v2alpha1.DatadogAgentSpec{ + Features: &v2alpha1.DatadogFeatures{ + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + CWS: &v2alpha1.CWSFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + }, + }, + } + + ddaEnabledByDefault := v2alpha1.DatadogAgent{ + Spec: v2alpha1.DatadogAgentSpec{ + Features: &v2alpha1.DatadogFeatures{ + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ + EnabledByDefault: apiutils.NewBoolPointer(true), + }, + }, + }, + } + tests := test.FeatureTestSuite{ { Name: "service discovery not enabled", @@ -45,14 +81,163 @@ func Test_serviceDiscoveryFeature_Configure(t *testing.T) { Name: "service discovery enabled", DDA: ddaServiceDiscoveryEnabled, WantConfigure: true, - Agent: test.NewDefaultComponentTest().WithWantFunc(getWantFunc()), + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(true, true)), + }, + { + Name: "system-probe-lite not used when NPM also enabled", + DDA: &ddaWithNPM, + WantConfigure: true, + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(false, true)), + }, + { + Name: "system-probe-lite not used when CWS also enabled", + DDA: &ddaWithCWS, + WantConfigure: true, + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(false, true)), + }, + { + Name: "system-probe-lite enabled by default - no system-probe fallback", + DDA: &ddaEnabledByDefault, + WantConfigure: true, + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(true, false)), }, } tests.Run(t, buildFeature) } -func getWantFunc() func(t testing.TB, mgrInterface feature.PodTemplateManagers) { +func Test_hasOtherSystemProbeFeatures(t *testing.T) { + tests := []struct { + name string + features *v2alpha1.DatadogFeatures + want bool + }{ + { + name: "nil features", + features: nil, + want: false, + }, + { + name: "no other features", + features: &v2alpha1.DatadogFeatures{}, + want: false, + }, + { + name: "NPM enabled", + features: &v2alpha1.DatadogFeatures{ + NPM: &v2alpha1.NPMFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "CWS enabled", + features: &v2alpha1.DatadogFeatures{ + CWS: &v2alpha1.CWSFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "USM enabled", + features: &v2alpha1.DatadogFeatures{ + USM: &v2alpha1.USMFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "OOMKill enabled", + features: &v2alpha1.DatadogFeatures{ + OOMKill: &v2alpha1.OOMKillFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "TCPQueueLength enabled", + features: &v2alpha1.DatadogFeatures{ + TCPQueueLength: &v2alpha1.TCPQueueLengthFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "EBPFCheck enabled", + features: &v2alpha1.DatadogFeatures{ + EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "CSPM enabled with RunInSystemProbe", + features: &v2alpha1.DatadogFeatures{ + CSPM: &v2alpha1.CSPMFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + RunInSystemProbe: apiutils.NewBoolPointer(true), + }, + }, + want: true, + }, + { + name: "CSPM enabled without RunInSystemProbe", + features: &v2alpha1.DatadogFeatures{ + CSPM: &v2alpha1.CSPMFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + }, + want: false, + }, + { + name: "GPU enabled with PrivilegedMode", + features: &v2alpha1.DatadogFeatures{ + GPU: &v2alpha1.GPUFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + PrivilegedMode: apiutils.NewBoolPointer(true), + }, + }, + want: true, + }, + { + name: "GPU enabled without PrivilegedMode", + features: &v2alpha1.DatadogFeatures{ + GPU: &v2alpha1.GPUFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + }, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, hasOtherSystemProbeFeatures(tt.features)) + }) + } +} + +func createFuncWithSystemProbeContainer() func(testing.TB) (feature.PodTemplateManagers, string) { + return func(t testing.TB) (feature.PodTemplateManagers, string) { + newPTS := corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: string(apicommon.CoreAgentContainerName), + }, + { + Name: string(apicommon.SystemProbeContainerName), + }, + }, + }, + } + return fake.NewPodTemplateManagers(t, newPTS), "" + } +} + +func getWantFunc(useSPL bool, userOptedIn bool) func(t testing.TB, mgrInterface feature.PodTemplateManagers) { return func(t testing.TB, mgrInterface feature.PodTemplateManagers) { mgr := mgrInterface.(*fake.PodTemplateManagers) @@ -155,5 +340,19 @@ func getWantFunc() func(t testing.TB, mgrInterface feature.PodTemplateManagers) systemProbeEnvVars := mgr.EnvVarMgr.EnvVarsByC[apicommon.SystemProbeContainerName] assert.True(t, apiutils.IsEqualStruct(systemProbeEnvVars, wantSPEnvVars), "System Probe envvars \ndiff = %s", cmp.Diff(systemProbeEnvVars, wantSPEnvVars)) + + // check system-probe container command override + for _, c := range mgr.PodTemplateSpec().Spec.Containers { + if c.Name == string(apicommon.SystemProbeContainerName) { + if useSPL { + assert.Equal(t, []string{"/bin/sh", "-c"}, c.Command, "System Probe command should be overridden for system-probe-lite") + assert.Equal(t, []string{systemProbeLiteCommand(common.DefaultSystemProbeSocketPath, userOptedIn)}, c.Args, "System Probe args mismatch") + } else { + assert.Empty(t, c.Command, "System Probe command should not be overridden") + assert.Empty(t, c.Args, "System Probe args should not be overridden") + } + break + } + } } }