From 515532045dab19728638966165ac1a5a7ce760ee Mon Sep 17 00:00:00 2001 From: Guillaume Pagnoux Date: Tue, 17 Feb 2026 16:07:33 +0100 Subject: [PATCH 1/5] discovery: add system-probe-lite support --- api/datadoghq/v2alpha1/datadogagent_types.go | 8 + .../v2alpha1/zz_generated.deepcopy.go | 5 + .../datadoghq.com_datadogagentinternals.yaml | 16 ++ ...hq.com_datadogagentinternals_v1alpha1.json | 8 + .../datadoghq.com_datadogagentprofiles.yaml | 8 + ...ghq.com_datadogagentprofiles_v1alpha1.json | 4 + .../bases/v1/datadoghq.com_datadogagents.yaml | 16 ++ .../datadoghq.com_datadogagents_v2alpha1.json | 8 + docs/configuration.v2alpha1.md | 1 + docs/configuration_public.md | 3 + .../datadogagent/controller_v2_test.go | 8 +- .../defaults/datadogagent_default.go | 4 +- .../defaults/datadogagent_default_test.go | 48 ++-- .../feature/servicediscovery/feature.go | 81 ++++++- .../feature/servicediscovery/feature_test.go | 208 +++++++++++++++++- 15 files changed, 391 insertions(+), 35 deletions(-) diff --git a/api/datadoghq/v2alpha1/datadogagent_types.go b/api/datadoghq/v2alpha1/datadogagent_types.go index caa0f99a9..e505f7f63 100644 --- a/api/datadoghq/v2alpha1/datadogagent_types.go +++ b/api/datadoghq/v2alpha1/datadogagent_types.go @@ -647,6 +647,14 @@ type ServiceDiscoveryFeatureConfig struct { // +optional Enabled *bool `json:"enabled,omitempty"` + // EnabledByDefault is set by the operator when it enables this feature via default configuration, + // as opposed to an explicit user choice. When true, the system-probe binary is not used as a + // fallback if system-probe-lite is unavailable — the container falls back to sleep infinity + // instead, to avoid unexpected resource usage on older agent images. + // This field is managed by the operator and should not be set by users. + // +optional + EnabledByDefault *bool `json:"enabledByDefault,omitempty"` + // Enables the service discovery network stats collection. // Default: true // +optional diff --git a/api/datadoghq/v2alpha1/zz_generated.deepcopy.go b/api/datadoghq/v2alpha1/zz_generated.deepcopy.go index 4733c0d92..0962f85f1 100644 --- a/api/datadoghq/v2alpha1/zz_generated.deepcopy.go +++ b/api/datadoghq/v2alpha1/zz_generated.deepcopy.go @@ -3453,6 +3453,11 @@ func (in *ServiceDiscoveryFeatureConfig) DeepCopyInto(out *ServiceDiscoveryFeatu *out = new(bool) **out = **in } + if in.EnabledByDefault != nil { + in, out := &in.EnabledByDefault, &out.EnabledByDefault + *out = new(bool) + **out = **in + } if in.NetworkStats != nil { in, out := &in.NetworkStats, &out.NetworkStats *out = new(ServiceDiscoveryNetworkStatsConfig) diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml b/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml index e58c5325d..bf0ec26be 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml @@ -2488,6 +2488,14 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + EnabledByDefault is set by the operator when it enables this feature via default configuration, + as opposed to an explicit user choice. When true, the system-probe binary is not used as a + fallback if system-probe-lite is unavailable — the container falls back to sleep infinity + instead, to avoid unexpected resource usage on older agent images. + This field is managed by the operator and should not be set by users. + type: boolean networkStats: description: |- Enables the service discovery network stats collection. @@ -10909,6 +10917,14 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + EnabledByDefault is set by the operator when it enables this feature via default configuration, + as opposed to an explicit user choice. When true, the system-probe binary is not used as a + fallback if system-probe-lite is unavailable — the container falls back to sleep infinity + instead, to avoid unexpected resource usage on older agent images. + This field is managed by the operator and should not be set by users. + type: boolean networkStats: description: |- Enables the service discovery network stats collection. diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json index ea311f2d5..c4bb96e3f 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json @@ -2588,6 +2588,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "Enables the service discovery network stats collection.\nDefault: true", @@ -10742,6 +10746,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "Enables the service discovery network stats collection.\nDefault: true", diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml index d6908b943..fbaa47722 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml @@ -2488,6 +2488,14 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + EnabledByDefault is set by the operator when it enables this feature via default configuration, + as opposed to an explicit user choice. When true, the system-probe binary is not used as a + fallback if system-probe-lite is unavailable — the container falls back to sleep infinity + instead, to avoid unexpected resource usage on older agent images. + This field is managed by the operator and should not be set by users. + type: boolean networkStats: description: |- Enables the service discovery network stats collection. diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json index ad4153ce5..24085c65f 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json @@ -2592,6 +2592,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "Enables the service discovery network stats collection.\nDefault: true", diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml index 032573381..d52a723ef 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml @@ -2488,6 +2488,14 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + EnabledByDefault is set by the operator when it enables this feature via default configuration, + as opposed to an explicit user choice. When true, the system-probe binary is not used as a + fallback if system-probe-lite is unavailable — the container falls back to sleep infinity + instead, to avoid unexpected resource usage on older agent images. + This field is managed by the operator and should not be set by users. + type: boolean networkStats: description: |- Enables the service discovery network stats collection. @@ -10959,6 +10967,14 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + EnabledByDefault is set by the operator when it enables this feature via default configuration, + as opposed to an explicit user choice. When true, the system-probe binary is not used as a + fallback if system-probe-lite is unavailable — the container falls back to sleep infinity + instead, to avoid unexpected resource usage on older agent images. + This field is managed by the operator and should not be set by users. + type: boolean networkStats: description: |- Enables the service discovery network stats collection. diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json index a1e193416..db93361c7 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json @@ -2588,6 +2588,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "Enables the service discovery network stats collection.\nDefault: true", @@ -10807,6 +10811,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "Enables the service discovery network stats collection.\nDefault: true", diff --git a/docs/configuration.v2alpha1.md b/docs/configuration.v2alpha1.md index 07cc8e4a5..9dab0ca53 100644 --- a/docs/configuration.v2alpha1.md +++ b/docs/configuration.v2alpha1.md @@ -200,6 +200,7 @@ spec: | features.sbom.host.analyzers | To use for SBOM collection. | | features.sbom.host.enabled | Enable this option to activate SBOM collection. Default: false | | features.serviceDiscovery.enabled | Enables the service discovery check. Default: false | +| features.serviceDiscovery.enabledByDefault | EnabledByDefault is set by the operator when it enables this feature via default configuration, as opposed to an explicit user choice. When true, the system-probe binary is not used as a fallback if system-probe-lite is unavailable — the container falls back to sleep infinity instead, to avoid unexpected resource usage on older agent images. This field is managed by the operator and should not be set by users. | | features.serviceDiscovery.networkStats.enabled | Enables the Service Discovery Network Stats feature. Default: true | | features.tcpQueueLength.enabled | Enables the TCP queue length eBPF-based check. Default: false | | features.usm.enabled | Enables Universal Service Monitoring. Default: false | diff --git a/docs/configuration_public.md b/docs/configuration_public.md index c01420697..d4832b8f8 100644 --- a/docs/configuration_public.md +++ b/docs/configuration_public.md @@ -384,6 +384,9 @@ spec: `features.serviceDiscovery.enabled` : Enables the service discovery check. Default: false +`features.serviceDiscovery.enabledByDefault` +: EnabledByDefault is set by the operator when it enables this feature via default configuration, as opposed to an explicit user choice. When true, the system-probe binary is not used as a fallback if system-probe-lite is unavailable — the container falls back to sleep infinity instead, to avoid unexpected resource usage on older agent images. This field is managed by the operator and should not be set by users. + `features.serviceDiscovery.networkStats.enabled` : Enables the Service Discovery Network Stats feature. Default: true diff --git a/internal/controller/datadogagent/controller_v2_test.go b/internal/controller/datadogagent/controller_v2_test.go index af15dbfdf..b23b5f6af 100644 --- a/internal/controller/datadogagent/controller_v2_test.go +++ b/internal/controller/datadogagent/controller_v2_test.go @@ -1752,7 +1752,7 @@ func Test_DDAI_ReconcileV3(t *testing.T) { wantFunc: func(t *testing.T, c client.Client) { expectedDDAI := getBaseDDAI(dda) expectedDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "ccac39a3a007bad81d7baf8febc6445f", + constants.MD5DDAIDeploymentAnnotationKey: "62d2822cc8547055dc8e2fca6f222a17", } verifyDDAI(t, c, []v1alpha1.DatadogAgentInternal{expectedDDAI}) @@ -1784,7 +1784,7 @@ func Test_DDAI_ReconcileV3(t *testing.T) { baseDDAI := getBaseDDAI(dda) expectedDDAI := baseDDAI.DeepCopy() expectedDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "f2aa21d0ecced63c091ca2df3d31e451", + constants.MD5DDAIDeploymentAnnotationKey: "c362b9a0aa0e2ad1a1d60f4ee8575c8f", } expectedDDAI.Spec.Features.ClusterChecks.UseClusterChecksRunners = apiutils.NewBoolPointer(true) expectedDDAI.Spec.Global.Credentials = &v2alpha1.DatadogCredentials{ @@ -1860,7 +1860,7 @@ func Test_DDAI_ReconcileV3(t *testing.T) { profileDDAI := getBaseDDAI(dda) profileDDAI.Name = "foo-profile" profileDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "73e0cc1e445001e326507ac23654104e", + constants.MD5DDAIDeploymentAnnotationKey: "2c3c1664f08fb6d6591294f2c878d1dd", } profileDDAI.Labels[constants.ProfileLabelKey] = "foo-profile" profileDDAI.Spec.Override = map[v2alpha1.ComponentName]*v2alpha1.DatadogAgentComponentOverride{ @@ -2094,7 +2094,7 @@ func getBaseDDAI(dda *v2alpha1.DatadogAgent) v1alpha1.DatadogAgentInternal { func getDefaultDDAI(dda *v2alpha1.DatadogAgent) v1alpha1.DatadogAgentInternal { expectedDDAI := getBaseDDAI(dda) expectedDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "f98c0497c66e2747f6d116970ab8f0b1", + constants.MD5DDAIDeploymentAnnotationKey: "7e6c12e645247762609327ab80b63d9e", } expectedDDAI.Spec.Override = map[v2alpha1.ComponentName]*v2alpha1.DatadogAgentComponentOverride{ v2alpha1.NodeAgentComponentName: { diff --git a/internal/controller/datadogagent/defaults/datadogagent_default.go b/internal/controller/datadogagent/defaults/datadogagent_default.go index 3c1324834..20f787872 100644 --- a/internal/controller/datadogagent/defaults/datadogagent_default.go +++ b/internal/controller/datadogagent/defaults/datadogagent_default.go @@ -42,6 +42,7 @@ const ( defaultGPUMonitoringEnabled bool = false defaultServiceDiscoveryEnabled bool = false + defaultServiceDiscoveryEnabledByDefault bool = false defaultServiceDiscoveryNetworkStatsEnabled bool = true defaultAPMEnabled bool = true @@ -304,8 +305,9 @@ func defaultFeaturesConfig(ddaSpec *v2alpha1.DatadogAgentSpec) { ddaSpec.Features.ServiceDiscovery = &v2alpha1.ServiceDiscoveryFeatureConfig{} } apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.Enabled, defaultServiceDiscoveryEnabled) + apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.EnabledByDefault, defaultServiceDiscoveryEnabledByDefault) - if *ddaSpec.Features.ServiceDiscovery.Enabled { + if *ddaSpec.Features.ServiceDiscovery.Enabled || *ddaSpec.Features.ServiceDiscovery.EnabledByDefault { if ddaSpec.Features.ServiceDiscovery.NetworkStats == nil { ddaSpec.Features.ServiceDiscovery.NetworkStats = &v2alpha1.ServiceDiscoveryNetworkStatsConfig{} } diff --git a/internal/controller/datadogagent/defaults/datadogagent_default_test.go b/internal/controller/datadogagent/defaults/datadogagent_default_test.go index 890ae6adf..ab4752823 100644 --- a/internal/controller/datadogagent/defaults/datadogagent_default_test.go +++ b/internal/controller/datadogagent/defaults/datadogagent_default_test.go @@ -212,7 +212,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -361,7 +362,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -460,7 +462,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -596,7 +599,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -757,7 +761,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -913,7 +918,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1069,7 +1075,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1234,7 +1241,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1390,7 +1398,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1549,7 +1558,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1751,7 +1761,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1876,7 +1887,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -2033,7 +2045,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -2213,7 +2226,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -2372,7 +2386,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -2544,7 +2559,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(valueTrue), + Enabled: apiutils.NewBoolPointer(valueTrue), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), NetworkStats: &v2alpha1.ServiceDiscoveryNetworkStatsConfig{ Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryNetworkStatsEnabled), }, diff --git a/internal/controller/datadogagent/feature/servicediscovery/feature.go b/internal/controller/datadogagent/feature/servicediscovery/feature.go index 214d5100c..285d5b5b3 100644 --- a/internal/controller/datadogagent/feature/servicediscovery/feature.go +++ b/internal/controller/datadogagent/feature/servicediscovery/feature.go @@ -6,6 +6,8 @@ package servicediscovery import ( + "fmt" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -30,7 +32,9 @@ func buildFeature(*feature.Options) feature.Feature { } type serviceDiscoveryFeature struct { - networkStatsEnabled bool + networkStatsEnabled bool + useSystemProbeLite bool + userExplicitlyEnabled bool } // ID returns the ID of the Feature @@ -40,21 +44,66 @@ func (f *serviceDiscoveryFeature) ID() feature.IDType { // Configure is used to configure the feature from a v2alpha1.DatadogAgent instance. func (f *serviceDiscoveryFeature) Configure(_ metav1.Object, ddaSpec *v2alpha1.DatadogAgentSpec, _ *v2alpha1.RemoteConfigConfiguration) (reqComp feature.RequiredComponents) { - if ddaSpec.Features != nil && ddaSpec.Features.ServiceDiscovery != nil && apiutils.BoolValue(ddaSpec.Features.ServiceDiscovery.Enabled) { - reqComp.Agent = feature.RequiredComponent{ - IsRequired: apiutils.NewBoolPointer(true), - Containers: []apicommon.AgentContainerName{apicommon.CoreAgentContainerName, apicommon.SystemProbeContainerName}, - } + if ddaSpec.Features == nil || ddaSpec.Features.ServiceDiscovery == nil { + return reqComp + } - f.networkStatsEnabled = true - if ddaSpec.Features.ServiceDiscovery.NetworkStats != nil { - f.networkStatsEnabled = apiutils.BoolValue(ddaSpec.Features.ServiceDiscovery.NetworkStats.Enabled) - } + sd := ddaSpec.Features.ServiceDiscovery + + // Explicit Enabled=false always disables the feature, even if EnabledByDefault=true. + if sd.Enabled != nil && !*sd.Enabled { + return reqComp + } + // Feature requires either an explicit opt-in or a default enablement. + if !apiutils.BoolValue(sd.Enabled) && !apiutils.BoolValue(sd.EnabledByDefault) { + return reqComp + } + + reqComp.Agent = feature.RequiredComponent{ + IsRequired: apiutils.NewBoolPointer(true), + Containers: []apicommon.AgentContainerName{apicommon.CoreAgentContainerName, apicommon.SystemProbeContainerName}, + } + + f.networkStatsEnabled = true + if sd.NetworkStats != nil { + f.networkStatsEnabled = apiutils.BoolValue(sd.NetworkStats.Enabled) } + f.useSystemProbeLite = !hasOtherSystemProbeFeatures(ddaSpec.Features) + f.userExplicitlyEnabled = apiutils.BoolValue(sd.Enabled) + return reqComp } +// systemProbeLiteCommand returns the shell command for the system-probe container when +// system-probe-lite is preferred. If userOptedIn is true (user explicitly enabled discovery), +// system-probe is used as the fallback — the user has accepted the resource cost. +// Otherwise (enabled by default), the fallback is sleep infinity to avoid unexpectedly +// running system-probe on older agent images where the discovery feature may not be supported. +func systemProbeLiteCommand(socketPath string, userOptedIn bool) string { + fallback := "sleep infinity" + if userOptedIn { + fallback = "system-probe --config=/etc/datadog-agent/system-probe.yaml" + } + return fmt.Sprintf("system-probe-lite run --socket %s --log-level ${DD_LOG_LEVEL:-info} || %s", socketPath, fallback) +} + +// hasOtherSystemProbeFeatures returns true if any feature besides service discovery +// requires the full system-probe binary. When true, system-probe-lite cannot be used. +func hasOtherSystemProbeFeatures(features *v2alpha1.DatadogFeatures) bool { + if features == nil { + return false + } + return (features.NPM != nil && apiutils.BoolValue(features.NPM.Enabled)) || + (features.CWS != nil && apiutils.BoolValue(features.CWS.Enabled)) || + (features.CSPM != nil && apiutils.BoolValue(features.CSPM.Enabled) && apiutils.BoolValue(features.CSPM.RunInSystemProbe)) || + (features.USM != nil && apiutils.BoolValue(features.USM.Enabled)) || + (features.OOMKill != nil && apiutils.BoolValue(features.OOMKill.Enabled)) || + (features.TCPQueueLength != nil && apiutils.BoolValue(features.TCPQueueLength.Enabled)) || + (features.EBPFCheck != nil && apiutils.BoolValue(features.EBPFCheck.Enabled)) || + (features.GPU != nil && apiutils.BoolValue(features.GPU.Enabled) && apiutils.BoolValue(features.GPU.PrivilegedMode)) +} + // ManageDependencies allows a feature to manage its dependencies. // Feature's dependencies should be added in the store. func (f *serviceDiscoveryFeature) ManageDependencies(managers feature.ResourceManagers, provider string) error { @@ -136,6 +185,18 @@ func (f *serviceDiscoveryFeature) ManageNodeAgent(managers feature.PodTemplateMa managers.EnvVar().AddEnvVarToContainer(apicommon.CoreAgentContainerName, socketEnvVar) managers.EnvVar().AddEnvVarToContainer(apicommon.SystemProbeContainerName, socketEnvVar) + // Direct PodTemplateSpec mutation: no managers API for command overrides. + if f.useSystemProbeLite { + for i := range managers.PodTemplateSpec().Spec.Containers { + c := &managers.PodTemplateSpec().Spec.Containers[i] + if c.Name == string(apicommon.SystemProbeContainerName) { + c.Command = []string{"/bin/sh", "-c"} + c.Args = []string{systemProbeLiteCommand(common.DefaultSystemProbeSocketPath, f.userExplicitlyEnabled)} + break + } + } + } + return nil } diff --git a/internal/controller/datadogagent/feature/servicediscovery/feature_test.go b/internal/controller/datadogagent/feature/servicediscovery/feature_test.go index 8cddad23b..06fa110c7 100644 --- a/internal/controller/datadogagent/feature/servicediscovery/feature_test.go +++ b/internal/controller/datadogagent/feature/servicediscovery/feature_test.go @@ -44,6 +44,42 @@ func Test_serviceDiscoveryFeature_Configure(t *testing.T) { ddaServiceDiscoveryEnabledWithNetStats.Spec.Features.ServiceDiscovery.NetworkStats.Enabled = apiutils.NewBoolPointer(true) } + ddaWithNPM := v2alpha1.DatadogAgent{ + Spec: v2alpha1.DatadogAgentSpec{ + Features: &v2alpha1.DatadogFeatures{ + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + NPM: &v2alpha1.NPMFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + }, + }, + } + + ddaWithCWS := v2alpha1.DatadogAgent{ + Spec: v2alpha1.DatadogAgentSpec{ + Features: &v2alpha1.DatadogFeatures{ + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + CWS: &v2alpha1.CWSFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + }, + }, + } + + ddaEnabledByDefault := v2alpha1.DatadogAgent{ + Spec: v2alpha1.DatadogAgentSpec{ + Features: &v2alpha1.DatadogFeatures{ + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ + EnabledByDefault: apiutils.NewBoolPointer(true), + }, + }, + }, + } + tests := test.FeatureTestSuite{ { Name: "service discovery not enabled", @@ -54,25 +90,176 @@ func Test_serviceDiscoveryFeature_Configure(t *testing.T) { Name: "service discovery enabled - no network stats", DDA: ddaServiceDiscoveryEnabledNoNetStats, WantConfigure: true, - Agent: test.NewDefaultComponentTest().WithWantFunc(getWantFunc(noNetStats)), + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(noNetStats, true, true)), }, { Name: "service discovery enabled - with network stats", DDA: ddaServiceDiscoveryEnabledWithNetStats, WantConfigure: true, - Agent: test.NewDefaultComponentTest().WithWantFunc(getWantFunc(withNetStats)), + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(withNetStats, true, true)), + }, + { + Name: "system-probe-lite not used when NPM also enabled", + DDA: &ddaWithNPM, + WantConfigure: true, + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(withNetStats, false, true)), + }, + { + Name: "system-probe-lite not used when CWS also enabled", + DDA: &ddaWithCWS, + WantConfigure: true, + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(withNetStats, false, true)), + }, + { + Name: "system-probe-lite enabled by default - no system-probe fallback", + DDA: &ddaEnabledByDefault, + WantConfigure: true, + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(withNetStats, true, false)), }, } tests.Run(t, buildFeature) } +func Test_hasOtherSystemProbeFeatures(t *testing.T) { + tests := []struct { + name string + features *v2alpha1.DatadogFeatures + want bool + }{ + { + name: "nil features", + features: nil, + want: false, + }, + { + name: "no other features", + features: &v2alpha1.DatadogFeatures{}, + want: false, + }, + { + name: "NPM enabled", + features: &v2alpha1.DatadogFeatures{ + NPM: &v2alpha1.NPMFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "CWS enabled", + features: &v2alpha1.DatadogFeatures{ + CWS: &v2alpha1.CWSFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "USM enabled", + features: &v2alpha1.DatadogFeatures{ + USM: &v2alpha1.USMFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "OOMKill enabled", + features: &v2alpha1.DatadogFeatures{ + OOMKill: &v2alpha1.OOMKillFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "TCPQueueLength enabled", + features: &v2alpha1.DatadogFeatures{ + TCPQueueLength: &v2alpha1.TCPQueueLengthFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "EBPFCheck enabled", + features: &v2alpha1.DatadogFeatures{ + EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "CSPM enabled with RunInSystemProbe", + features: &v2alpha1.DatadogFeatures{ + CSPM: &v2alpha1.CSPMFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + RunInSystemProbe: apiutils.NewBoolPointer(true), + }, + }, + want: true, + }, + { + name: "CSPM enabled without RunInSystemProbe", + features: &v2alpha1.DatadogFeatures{ + CSPM: &v2alpha1.CSPMFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + }, + want: false, + }, + { + name: "GPU enabled with PrivilegedMode", + features: &v2alpha1.DatadogFeatures{ + GPU: &v2alpha1.GPUFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + PrivilegedMode: apiutils.NewBoolPointer(true), + }, + }, + want: true, + }, + { + name: "GPU enabled without PrivilegedMode", + features: &v2alpha1.DatadogFeatures{ + GPU: &v2alpha1.GPUFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + }, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, hasOtherSystemProbeFeatures(tt.features)) + }) + } +} + const ( noNetStats = false withNetStats = true ) -func getWantFunc(withNetStats bool) func(t testing.TB, mgrInterface feature.PodTemplateManagers) { +func createFuncWithSystemProbeContainer() func(testing.TB) (feature.PodTemplateManagers, string) { + return func(t testing.TB) (feature.PodTemplateManagers, string) { + newPTS := corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: string(apicommon.CoreAgentContainerName), + }, + { + Name: string(apicommon.SystemProbeContainerName), + }, + }, + }, + } + return fake.NewPodTemplateManagers(t, newPTS), "" + } +} + +func getWantFunc(withNetStats bool, useSPL bool, userOptedIn bool) func(t testing.TB, mgrInterface feature.PodTemplateManagers) { return func(t testing.TB, mgrInterface feature.PodTemplateManagers) { mgr := mgrInterface.(*fake.PodTemplateManagers) @@ -200,7 +387,6 @@ func getWantFunc(withNetStats bool) func(t testing.TB, mgrInterface feature.PodT }, } - // check env vars wantSPEnvVars := []*corev1.EnvVar{ { Name: DDServiceDiscoveryEnabled, @@ -221,6 +407,20 @@ func getWantFunc(withNetStats bool) func(t testing.TB, mgrInterface feature.PodT systemProbeEnvVars := mgr.EnvVarMgr.EnvVarsByC[apicommon.SystemProbeContainerName] assert.True(t, apiutils.IsEqualStruct(systemProbeEnvVars, wantSPEnvVars), "System Probe envvars \ndiff = %s", cmp.Diff(systemProbeEnvVars, wantSPEnvVars)) + + // check system-probe container command override + for _, c := range mgr.PodTemplateSpec().Spec.Containers { + if c.Name == string(apicommon.SystemProbeContainerName) { + if useSPL { + assert.Equal(t, []string{"/bin/sh", "-c"}, c.Command, "System Probe command should be overridden for system-probe-lite") + assert.Equal(t, []string{systemProbeLiteCommand(common.DefaultSystemProbeSocketPath, userOptedIn)}, c.Args, "System Probe args mismatch") + } else { + assert.Empty(t, c.Command, "System Probe command should not be overridden") + assert.Empty(t, c.Args, "System Probe args should not be overridden") + } + break + } + } } } From 14334d12168a223fefd761c183142838250b261f Mon Sep 17 00:00:00 2001 From: Guillaume Pagnoux Date: Thu, 26 Mar 2026 11:29:30 +0100 Subject: [PATCH 2/5] docs: applied suggestions --- docs/configuration.v2alpha1.md | 2 +- docs/configuration_public.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/configuration.v2alpha1.md b/docs/configuration.v2alpha1.md index 9dab0ca53..81e015e9e 100644 --- a/docs/configuration.v2alpha1.md +++ b/docs/configuration.v2alpha1.md @@ -200,7 +200,7 @@ spec: | features.sbom.host.analyzers | To use for SBOM collection. | | features.sbom.host.enabled | Enable this option to activate SBOM collection. Default: false | | features.serviceDiscovery.enabled | Enables the service discovery check. Default: false | -| features.serviceDiscovery.enabledByDefault | EnabledByDefault is set by the operator when it enables this feature via default configuration, as opposed to an explicit user choice. When true, the system-probe binary is not used as a fallback if system-probe-lite is unavailable — the container falls back to sleep infinity instead, to avoid unexpected resource usage on older agent images. This field is managed by the operator and should not be set by users. | +| features.serviceDiscovery.enabledByDefault | Indicates that the operator enabled this feature automatically rather than in response to an explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on older agent images. This field is managed by the operator and must not be set by users. | | features.serviceDiscovery.networkStats.enabled | Enables the Service Discovery Network Stats feature. Default: true | | features.tcpQueueLength.enabled | Enables the TCP queue length eBPF-based check. Default: false | | features.usm.enabled | Enables Universal Service Monitoring. Default: false | diff --git a/docs/configuration_public.md b/docs/configuration_public.md index d4832b8f8..48e82b8a7 100644 --- a/docs/configuration_public.md +++ b/docs/configuration_public.md @@ -385,7 +385,7 @@ spec: : Enables the service discovery check. Default: false `features.serviceDiscovery.enabledByDefault` -: EnabledByDefault is set by the operator when it enables this feature via default configuration, as opposed to an explicit user choice. When true, the system-probe binary is not used as a fallback if system-probe-lite is unavailable — the container falls back to sleep infinity instead, to avoid unexpected resource usage on older agent images. This field is managed by the operator and should not be set by users. +: Indicates that the operator enabled this feature automatically rather than in response to an explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on older agent images. This field is managed by the operator and must not be set by users. `features.serviceDiscovery.networkStats.enabled` : Enables the Service Discovery Network Stats feature. Default: true From cf6e1d3be2df8d1bea1e835fa248b723f958f8a4 Mon Sep 17 00:00:00 2001 From: Guillaume Pagnoux Date: Thu, 26 Mar 2026 11:31:01 +0100 Subject: [PATCH 3/5] fix unset enabled case --- .../datadogagent/defaults/datadogagent_default.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/internal/controller/datadogagent/defaults/datadogagent_default.go b/internal/controller/datadogagent/defaults/datadogagent_default.go index 20f787872..aacb56cfc 100644 --- a/internal/controller/datadogagent/defaults/datadogagent_default.go +++ b/internal/controller/datadogagent/defaults/datadogagent_default.go @@ -304,10 +304,14 @@ func defaultFeaturesConfig(ddaSpec *v2alpha1.DatadogAgentSpec) { if ddaSpec.Features.ServiceDiscovery == nil { ddaSpec.Features.ServiceDiscovery = &v2alpha1.ServiceDiscoveryFeatureConfig{} } - apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.Enabled, defaultServiceDiscoveryEnabled) apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.EnabledByDefault, defaultServiceDiscoveryEnabledByDefault) + // Only default Enabled to false when not enabled-by-default, so that Enabled=nil remains + // distinguishable from Enabled=false (explicit user opt-out) when EnabledByDefault=true. + if !apiutils.BoolValue(ddaSpec.Features.ServiceDiscovery.EnabledByDefault) { + apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.Enabled, defaultServiceDiscoveryEnabled) + } - if *ddaSpec.Features.ServiceDiscovery.Enabled || *ddaSpec.Features.ServiceDiscovery.EnabledByDefault { + if apiutils.BoolValue(ddaSpec.Features.ServiceDiscovery.Enabled) || apiutils.BoolValue(ddaSpec.Features.ServiceDiscovery.EnabledByDefault) { if ddaSpec.Features.ServiceDiscovery.NetworkStats == nil { ddaSpec.Features.ServiceDiscovery.NetworkStats = &v2alpha1.ServiceDiscoveryNetworkStatsConfig{} } From 8b7ca14f4a4c4c62394511485dbdce43c27d4f3e Mon Sep 17 00:00:00 2001 From: Guillaume Pagnoux Date: Fri, 27 Mar 2026 10:15:08 +0100 Subject: [PATCH 4/5] fix codex CR --- .../datadogagent/feature/servicediscovery/feature.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/internal/controller/datadogagent/feature/servicediscovery/feature.go b/internal/controller/datadogagent/feature/servicediscovery/feature.go index 285d5b5b3..b91b474c3 100644 --- a/internal/controller/datadogagent/feature/servicediscovery/feature.go +++ b/internal/controller/datadogagent/feature/servicediscovery/feature.go @@ -33,8 +33,11 @@ func buildFeature(*feature.Options) feature.Feature { type serviceDiscoveryFeature struct { networkStatsEnabled bool - useSystemProbeLite bool userExplicitlyEnabled bool + // features holds a pointer to the live DDA features struct so that ManageNodeAgent + // can re-evaluate hasOtherSystemProbeFeatures after Remote Config state has been + // merged by other features' Configure calls (e.g. USM merges RC state into the spec). + features *v2alpha1.DatadogFeatures } // ID returns the ID of the Feature @@ -69,7 +72,7 @@ func (f *serviceDiscoveryFeature) Configure(_ metav1.Object, ddaSpec *v2alpha1.D f.networkStatsEnabled = apiutils.BoolValue(sd.NetworkStats.Enabled) } - f.useSystemProbeLite = !hasOtherSystemProbeFeatures(ddaSpec.Features) + f.features = ddaSpec.Features f.userExplicitlyEnabled = apiutils.BoolValue(sd.Enabled) return reqComp @@ -186,7 +189,9 @@ func (f *serviceDiscoveryFeature) ManageNodeAgent(managers feature.PodTemplateMa managers.EnvVar().AddEnvVarToContainer(apicommon.SystemProbeContainerName, socketEnvVar) // Direct PodTemplateSpec mutation: no managers API for command overrides. - if f.useSystemProbeLite { + // Re-evaluate here (not cached from Configure) so that RC state merged by other + // features' Configure calls (e.g. USM) is taken into account. + if !hasOtherSystemProbeFeatures(f.features) { for i := range managers.PodTemplateSpec().Spec.Containers { c := &managers.PodTemplateSpec().Spec.Containers[i] if c.Name == string(apicommon.SystemProbeContainerName) { From 550f2e79b40ad018087d23529c071a0f16acb2cc Mon Sep 17 00:00:00 2001 From: Guillaume Pagnoux Date: Fri, 27 Mar 2026 11:07:59 +0100 Subject: [PATCH 5/5] regenerate docs --- api/datadoghq/v2alpha1/datadogagent_types.go | 9 ++++----- .../datadoghq.com_datadogagentinternals.yaml | 18 ++++++++---------- ...ghq.com_datadogagentinternals_v1alpha1.json | 4 ++-- .../v1/datadoghq.com_datadogagentprofiles.yaml | 9 ++++----- ...oghq.com_datadogagentprofiles_v1alpha1.json | 2 +- .../bases/v1/datadoghq.com_datadogagents.yaml | 18 ++++++++---------- .../datadoghq.com_datadogagents_v2alpha1.json | 4 ++-- docs/configuration.v2alpha1.md | 2 +- 8 files changed, 30 insertions(+), 36 deletions(-) diff --git a/api/datadoghq/v2alpha1/datadogagent_types.go b/api/datadoghq/v2alpha1/datadogagent_types.go index e505f7f63..e99e95bbc 100644 --- a/api/datadoghq/v2alpha1/datadogagent_types.go +++ b/api/datadoghq/v2alpha1/datadogagent_types.go @@ -647,11 +647,10 @@ type ServiceDiscoveryFeatureConfig struct { // +optional Enabled *bool `json:"enabled,omitempty"` - // EnabledByDefault is set by the operator when it enables this feature via default configuration, - // as opposed to an explicit user choice. When true, the system-probe binary is not used as a - // fallback if system-probe-lite is unavailable — the container falls back to sleep infinity - // instead, to avoid unexpected resource usage on older agent images. - // This field is managed by the operator and should not be set by users. + // Indicates that the operator enabled this feature automatically rather than in response to an + // explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + // back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + // older agent images. This field is managed by the operator and must not be set by users. // +optional EnabledByDefault *bool `json:"enabledByDefault,omitempty"` diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml b/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml index bf0ec26be..97b8db5c9 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml @@ -2490,11 +2490,10 @@ spec: type: boolean enabledByDefault: description: |- - EnabledByDefault is set by the operator when it enables this feature via default configuration, - as opposed to an explicit user choice. When true, the system-probe binary is not used as a - fallback if system-probe-lite is unavailable — the container falls back to sleep infinity - instead, to avoid unexpected resource usage on older agent images. - This field is managed by the operator and should not be set by users. + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. type: boolean networkStats: description: |- @@ -10919,11 +10918,10 @@ spec: type: boolean enabledByDefault: description: |- - EnabledByDefault is set by the operator when it enables this feature via default configuration, - as opposed to an explicit user choice. When true, the system-probe binary is not used as a - fallback if system-probe-lite is unavailable — the container falls back to sleep infinity - instead, to avoid unexpected resource usage on older agent images. - This field is managed by the operator and should not be set by users. + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. type: boolean networkStats: description: |- diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json index c4bb96e3f..3feea4293 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json @@ -2589,7 +2589,7 @@ "type": "boolean" }, "enabledByDefault": { - "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", "type": "boolean" }, "networkStats": { @@ -10747,7 +10747,7 @@ "type": "boolean" }, "enabledByDefault": { - "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", "type": "boolean" }, "networkStats": { diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml index fbaa47722..2e67e2f1f 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml @@ -2490,11 +2490,10 @@ spec: type: boolean enabledByDefault: description: |- - EnabledByDefault is set by the operator when it enables this feature via default configuration, - as opposed to an explicit user choice. When true, the system-probe binary is not used as a - fallback if system-probe-lite is unavailable — the container falls back to sleep infinity - instead, to avoid unexpected resource usage on older agent images. - This field is managed by the operator and should not be set by users. + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. type: boolean networkStats: description: |- diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json index 24085c65f..afc7ff06e 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json @@ -2593,7 +2593,7 @@ "type": "boolean" }, "enabledByDefault": { - "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", "type": "boolean" }, "networkStats": { diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml index d52a723ef..faad8df42 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml @@ -2490,11 +2490,10 @@ spec: type: boolean enabledByDefault: description: |- - EnabledByDefault is set by the operator when it enables this feature via default configuration, - as opposed to an explicit user choice. When true, the system-probe binary is not used as a - fallback if system-probe-lite is unavailable — the container falls back to sleep infinity - instead, to avoid unexpected resource usage on older agent images. - This field is managed by the operator and should not be set by users. + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. type: boolean networkStats: description: |- @@ -10969,11 +10968,10 @@ spec: type: boolean enabledByDefault: description: |- - EnabledByDefault is set by the operator when it enables this feature via default configuration, - as opposed to an explicit user choice. When true, the system-probe binary is not used as a - fallback if system-probe-lite is unavailable — the container falls back to sleep infinity - instead, to avoid unexpected resource usage on older agent images. - This field is managed by the operator and should not be set by users. + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. type: boolean networkStats: description: |- diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json index db93361c7..115e2bb07 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json @@ -2589,7 +2589,7 @@ "type": "boolean" }, "enabledByDefault": { - "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", "type": "boolean" }, "networkStats": { @@ -10812,7 +10812,7 @@ "type": "boolean" }, "enabledByDefault": { - "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", "type": "boolean" }, "networkStats": { diff --git a/docs/configuration.v2alpha1.md b/docs/configuration.v2alpha1.md index 81e015e9e..1ac3acbe7 100644 --- a/docs/configuration.v2alpha1.md +++ b/docs/configuration.v2alpha1.md @@ -200,7 +200,7 @@ spec: | features.sbom.host.analyzers | To use for SBOM collection. | | features.sbom.host.enabled | Enable this option to activate SBOM collection. Default: false | | features.serviceDiscovery.enabled | Enables the service discovery check. Default: false | -| features.serviceDiscovery.enabledByDefault | Indicates that the operator enabled this feature automatically rather than in response to an explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on older agent images. This field is managed by the operator and must not be set by users. | +| features.serviceDiscovery.enabledByDefault | Indicates that the operator enabled this feature automatically rather than in response to an explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on older agent images. This field is managed by the operator and must not be set by users. | | features.serviceDiscovery.networkStats.enabled | Enables the Service Discovery Network Stats feature. Default: true | | features.tcpQueueLength.enabled | Enables the TCP queue length eBPF-based check. Default: false | | features.usm.enabled | Enables Universal Service Monitoring. Default: false |