Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions api/datadoghq/v2alpha1/datadogagent_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -2319,6 +2319,49 @@ type RemoteConfigConfiguration struct {
Features *DatadogFeatures `json:"features,omitempty"`
}

// ExperimentPhase represents the current phase of a Fleet Automation experiment.
// +kubebuilder:validation:Enum=running;rollback;promoted;aborted;timeout
type ExperimentPhase string

const (
// ExperimentPhaseRunning indicates a startExperiment signal was received and the experiment is active.
ExperimentPhaseRunning ExperimentPhase = "running"
// ExperimentPhaseRollback indicates a stopExperiment signal was received and rollback is in progress.
ExperimentPhaseRollback ExperimentPhase = "rollback"
// ExperimentPhasePromoted indicates a promoteExperiment signal was received and the experiment is permanent.
ExperimentPhasePromoted ExperimentPhase = "promoted"
// ExperimentPhaseAborted indicates the experiment was aborted due to an external DDA spec change.
ExperimentPhaseAborted ExperimentPhase = "aborted"
// ExperimentPhaseTimeout indicates the operator auto-rolled back after the timeout elapsed.
ExperimentPhaseTimeout ExperimentPhase = "timeout"
)

// ExperimentStatus tracks the state of a Fleet Automation experiment.
// +k8s:openapi-gen=true
type ExperimentStatus struct {
// Phase is the current state of the experiment.
// +optional
Phase ExperimentPhase `json:"phase,omitempty"`
// StartedAt is the timestamp when the experiment began.
// Used by the reconciler to compute elapsed time for auto-rollback.
// +optional
StartedAt *metav1.Time `json:"startedAt,omitempty"`
// BaselineRevision is the name of the ControllerRevision capturing the pre-experiment spec.
// Locked at startExperiment and never shifted, even if subsequent edits occur.
// +optional
BaselineRevision string `json:"baselineRevision,omitempty"`
// ID is the unique experiment ID sent by Fleet Automation.
// Optional in the CRD schema, but required by the RC signal handler.
// +optional
ID string `json:"id,omitempty"`
// ExpectedSpecHash is the truncated MD5 hash of the spec that FA sent in
// startExperiment. The reconciler uses this on the first reconcile to verify
// the current spec matches what FA intended, detecting user edits that land
// between the RC spec patch and the first reconcile.
// +optional
ExpectedSpecHash string `json:"expectedSpecHash,omitempty"`
}

// DatadogAgentStatus defines the observed state of DatadogAgent.
// +k8s:openapi-gen=true
type DatadogAgentStatus struct {
Expand Down Expand Up @@ -2346,6 +2389,15 @@ type DatadogAgentStatus struct {
// RemoteConfigConfiguration stores the configuration received from RemoteConfig.
// +optional
RemoteConfigConfiguration *RemoteConfigConfiguration `json:"remoteConfigConfiguration,omitempty"`
// CurrentRevision is the name of the ControllerRevision for the spec currently applied.
// +optional
CurrentRevision string `json:"currentRevision,omitempty"`
// PreviousRevision is the name of the ControllerRevision for the spec just before the current one.
// +optional
PreviousRevision string `json:"previousRevision,omitempty"`
// Experiment tracks the state of an active or recent Fleet Automation experiment.
// +optional
Experiment *ExperimentStatus `json:"experiment,omitempty"`
}

// DatadogAgent defines Agent configuration, see reference https://github.com/DataDog/datadog-operator/blob/main/docs/configuration.v2alpha1.md
Expand Down
24 changes: 24 additions & 0 deletions api/datadoghq/v2alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

72 changes: 71 additions & 1 deletion api/datadoghq/v2alpha1/zz_generated.openapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,11 @@ spec:
resources:
- controllerrevisions
verbs:
- create
- delete
- get
- list
- update
- watch
- apiGroups:
- apps
Expand Down
40 changes: 40 additions & 0 deletions config/crd/bases/v1/datadoghq.com_datadogagents.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8446,6 +8446,43 @@ spec:
x-kubernetes-list-map-keys:
- type
x-kubernetes-list-type: map
currentRevision:
description: CurrentRevision is the name of the ControllerRevision for the spec currently applied.
type: string
experiment:
description: Experiment tracks the state of an active or recent Fleet Automation experiment.
properties:
baselineRevision:
description: |-
BaselineRevision is the name of the ControllerRevision capturing the pre-experiment spec.
Locked at startExperiment and never shifted, even if subsequent edits occur.
type: string
expectedSpecHash:
description: |-
ExpectedSpecHash is the truncated MD5 hash of the spec that FA sent in
startExperiment. The reconciler uses this on the first reconcile to verify
the current spec matches what FA intended, detecting user edits that land
between the RC spec patch and the first reconcile.
type: string
id:
description: ID is the unique experiment ID sent by Fleet Automation.
type: string
phase:
description: Phase is the current state of the experiment.
enum:
- running
- rollback
- promoted
- aborted
- timeout
type: string
startedAt:
description: |-
StartedAt is the timestamp when the experiment began.
Used by the reconciler to compute elapsed time for auto-rollback.
format: date-time
type: string
type: object
otelAgentGateway:
description: The actual state of the OTel Agent Gateway as a deployment.
properties:
Expand Down Expand Up @@ -8494,6 +8531,9 @@ spec:
format: int32
type: integer
type: object
previousRevision:
description: PreviousRevision is the name of the ControllerRevision for the spec just before the current one.
type: string
remoteConfigConfiguration:
description: RemoteConfigConfiguration stores the configuration received from RemoteConfig.
properties:
Expand Down
43 changes: 43 additions & 0 deletions config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json
Original file line number Diff line number Diff line change
Expand Up @@ -8150,6 +8150,45 @@
],
"x-kubernetes-list-type": "map"
},
"currentRevision": {
"description": "CurrentRevision is the name of the ControllerRevision for the spec currently applied.",
"type": "string"
},
"experiment": {
"additionalProperties": false,
"description": "Experiment tracks the state of an active or recent Fleet Automation experiment.",
"properties": {
"baselineRevision": {
"description": "BaselineRevision is the name of the ControllerRevision capturing the pre-experiment spec.\nLocked at startExperiment and never shifted, even if subsequent edits occur.",
"type": "string"
},
"expectedSpecHash": {
"description": "ExpectedSpecHash is the truncated MD5 hash of the spec that FA sent in\nstartExperiment. The reconciler uses this on the first reconcile to verify\nthe current spec matches what FA intended, detecting user edits that land\nbetween the RC spec patch and the first reconcile.",
"type": "string"
},
"id": {
"description": "ID is the unique experiment ID sent by Fleet Automation.",
"type": "string"
},
"phase": {
"description": "Phase is the current state of the experiment.",
"enum": [
"running",
"rollback",
"promoted",
"aborted",
"timeout"
],
"type": "string"
},
"startedAt": {
"description": "StartedAt is the timestamp when the experiment began.\nUsed by the reconciler to compute elapsed time for auto-rollback.",
"format": "date-time",
"type": "string"
}
},
"type": "object"
},
"otelAgentGateway": {
"additionalProperties": false,
"description": "The actual state of the OTel Agent Gateway as a deployment.",
Expand Down Expand Up @@ -8207,6 +8246,10 @@
},
"type": "object"
},
"previousRevision": {
"description": "PreviousRevision is the name of the ControllerRevision for the spec just before the current one.",
"type": "string"
},
"remoteConfigConfiguration": {
"additionalProperties": false,
"description": "RemoteConfigConfiguration stores the configuration received from RemoteConfig.",
Expand Down
4 changes: 4 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,11 @@ rules:
resources:
- controllerrevisions
verbs:
- create
- delete
- get
- list
- update
- watch
- apiGroups:
- apps
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ require (
)

require (
dario.cat/mergo v1.0.2
github.com/DataDog/datadog-agent/pkg/config/model v0.59.0-rc.5
github.com/DataDog/datadog-agent/pkg/config/remote v0.59.0-rc.5
github.com/DataDog/datadog-agent/pkg/remoteconfig/state v0.59.0-rc.5
Expand Down Expand Up @@ -74,7 +75,6 @@ require (

require (
cel.dev/expr v0.24.0 // indirect
dario.cat/mergo v1.0.2 // indirect
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect
github.com/BurntSushi/toml v1.5.0 // indirect
github.com/DataDog/appsec-internal-go v1.7.0 // indirect
Expand Down
15 changes: 14 additions & 1 deletion internal/controller/datadogagent/controller_reconcile_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/DataDog/datadog-operator/internal/controller/datadogagent/common"
"github.com/DataDog/datadog-operator/internal/controller/datadogagent/component"
"github.com/DataDog/datadog-operator/internal/controller/datadogagent/defaults"
"github.com/DataDog/datadog-operator/internal/controller/datadogagent/experiment"
"github.com/DataDog/datadog-operator/internal/controller/datadogagent/feature"
"github.com/DataDog/datadog-operator/pkg/agentprofile"
"github.com/DataDog/datadog-operator/pkg/condition"
Expand Down Expand Up @@ -48,7 +49,19 @@ func (r *Reconciler) internalReconcileV2(ctx context.Context, instance *datadogh
instanceCopy := instance.DeepCopy()
defaults.DefaultDatadogAgentSpec(&instanceCopy.Spec)

// 4. Delegate to the main reconcile function.
// 4. Handle experiment lifecycle (ControllerRevision management, timeout, conflict detection).
// When HandleExperimentLifecycle returns shouldReturn=true (rollback/timeout),
// it has already persisted the status directly (re-fetching after spec restore
// to avoid resourceVersion conflicts). The caller just returns.
if shouldReturn, res, err := experiment.HandleExperimentLifecycle(
ctx, r.client, instanceCopy, r.scheme, time.Now(), experiment.DefaultExperimentTimeout,
); err != nil {
return res, err
} else if shouldReturn {
return res, nil
}

// 5. Delegate to the main reconcile function.
if r.options.DatadogAgentInternalEnabled {
return r.reconcileInstanceV3(ctx, reqLogger, instanceCopy)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -826,5 +826,13 @@ func IsEqualStatus(current *v2alpha1.DatadogAgentStatus, newStatus *v2alpha1.Dat
return false
}

if current.CurrentRevision != newStatus.CurrentRevision ||
current.PreviousRevision != newStatus.PreviousRevision {
return false
}
if !apiequality.Semantic.DeepEqual(current.Experiment, newStatus.Experiment) {
return false
}

return condition.IsEqualConditions(current.Conditions, newStatus.Conditions)
}
Loading
Loading