Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions config/prometheus/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,11 @@ spec:
annotations:
description: "Workload is not protected for disaster recovery (DRPC: {{ $labels.obj_name }}, Namespace: {{ $labels.obj_namespace }}). Inspect DRPC status.conditions for details."
alert_type: "DisasterRecovery"
- alert: UnsupportedConsistencyGroupingEnabled
expr: ramen_unsupported_consistency_grouping_enabled == 1
for: 10m
labels:
severity: warning
annotations:
description: "Unsupported consistency grouping is enabled for disaster recovery (DRPC: {{ $labels.obj_name }}, Namespace: {{ $labels.obj_namespace }})."
alert_type: "DisasterRecovery"
39 changes: 39 additions & 0 deletions internal/controller/drplacementcontrol_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,28 @@ func (r *DRPlacementControlReconciler) setWorkloadProtectionMetric(workloadProte
workloadProtectionMetrics.WorkloadProtectionStatus.Set(float64(protected))
}

// setCGEnabledMetric sets metric based on annotations on DRPC,
// where 0 indicates consistency grouping is not enabled
// and 1 indicates consistency grouping is enabled
func (r *DRPlacementControlReconciler) setCGEnabledMetric(drpc *rmn.DRPlacementControl,
cgEnabledMetrics *CGEnabledMetrics, log logr.Logger,
) {
if cgEnabledMetrics == nil {
return
}

log.Info(fmt.Sprintf("setting metric: (%s)", CGEnabled))

enabled := 0

drpcAnnotations := drpc.GetAnnotations()
if drpcAnnotations != nil && drpcAnnotations[rmnutil.IsCGEnabledAnnotation] == "true" {
enabled = 1
}

cgEnabledMetrics.CGEnabled.Set(float64(enabled))
}

//nolint:funlen
func (r *DRPlacementControlReconciler) createDRPCInstance(
ctx context.Context,
Expand Down Expand Up @@ -461,6 +483,17 @@ func (r *DRPlacementControlReconciler) createWorkloadProtectionMetricsInstance(
}
}

func (r *DRPlacementControlReconciler) createCGEnabledMetricsInstance(
drpc *rmn.DRPlacementControl,
) *CGEnabledMetrics {
cgEnabledLabels := CGEnabledMetricLabels(drpc)
cgEnabledMetrics := NewCGEnabledMetric(cgEnabledLabels)

return &CGEnabledMetrics{
CGEnabled: cgEnabledMetrics.CGEnabled,
}
}

// isBeingDeleted returns true if either DRPC, user placement, or both are being deleted
func isBeingDeleted(drpc *rmn.DRPlacementControl, usrPl client.Object) bool {
return rmnutil.ResourceIsDeleted(drpc) ||
Expand Down Expand Up @@ -710,6 +743,9 @@ func (r *DRPlacementControlReconciler) finalizeDRPC(ctx context.Context, drpc *r
workloadProtectionLabels := WorkloadProtectionStatusLabels(drpc)
DeleteWorkloadProtectionStatusMetric(workloadProtectionLabels)

cgEnabledMetricLabels := CGEnabledMetricLabels(drpc)
DeleteCGEnabledMetric(cgEnabledMetricLabels)

return nil
}

Expand Down Expand Up @@ -1584,6 +1620,9 @@ func (r *DRPlacementControlReconciler) setDRPCMetrics(ctx context.Context,
workloadProtectionMetrics := r.createWorkloadProtectionMetricsInstance(drpc)
r.setWorkloadProtectionMetric(workloadProtectionMetrics, drpc.Status.Conditions, log)

cgEnabledMetrics := r.createCGEnabledMetricsInstance(drpc)
r.setCGEnabledMetric(drpc, cgEnabledMetrics, log)

drPolicy, err := GetDRPolicy(ctx, r.Client, drpc, log)
if err != nil {
return fmt.Errorf("failed to get DRPolicy %w", err)
Expand Down
39 changes: 39 additions & 0 deletions internal/controller/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const (
LastSyncDurationSeconds = "last_sync_duration_seconds"
LastSyncDataBytes = "last_sync_data_bytes"
WorkloadProtectionStatus = "workload_protection_status"
CGEnabled = "unsupported_consistency_grouping_enabled"
)

type SyncTimeMetrics struct {
Expand All @@ -43,6 +44,9 @@ type SyncDataBytesMetrics struct {
type WorkloadProtectionMetrics struct {
WorkloadProtectionStatus prometheus.Gauge
}
type CGEnabledMetrics struct {
CGEnabled prometheus.Gauge
}

type SyncMetrics struct {
SyncTimeMetrics
Expand Down Expand Up @@ -90,6 +94,12 @@ var (
ObjName, // Name of the resoure [drpc-name]
ObjNamespace, // DRPC namespace
}

cgEnabledMetricLabels = []string{
ObjType, // Name of the type of the resource [drpc]
ObjName, // Name of the resoure [drpc-name]
ObjNamespace, // DRPC namespace
}
)

var (
Expand Down Expand Up @@ -137,6 +147,15 @@ var (
},
workloadProtectionStatusLabels,
)

cgEnabled = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: CGEnabled,
Namespace: metricNamespace,
Help: "Unsupported consistency grouping enabled status",
},
cgEnabledMetricLabels,
)
)

// lastSyncTime metrics reports value from lastGrpupSyncTime taken from DRPC status
Expand Down Expand Up @@ -234,11 +253,31 @@ func DeleteWorkloadProtectionStatusMetric(labels prometheus.Labels) bool {
return workloadProtectionStatus.Delete(labels)
}

// CGEnabled Metric reports information if consistency grouping is enabled for a DRPC
func CGEnabledMetricLabels(drpc *rmn.DRPlacementControl) prometheus.Labels {
return prometheus.Labels{
ObjType: "DRPlacementControl",
ObjName: drpc.Name,
ObjNamespace: drpc.Namespace,
}
}

func NewCGEnabledMetric(labels prometheus.Labels) CGEnabledMetrics {
return CGEnabledMetrics{
CGEnabled: cgEnabled.With(labels),
}
}

func DeleteCGEnabledMetric(labels prometheus.Labels) bool {
return cgEnabled.Delete(labels)
}

func init() {
// Register custom metrics with the global prometheus registry
metrics.Registry.MustRegister(dRPolicySyncInterval)
metrics.Registry.MustRegister(lastSyncTime)
metrics.Registry.MustRegister(lastSyncDuration)
metrics.Registry.MustRegister(lastSyncDataBytes)
metrics.Registry.MustRegister(workloadProtectionStatus)
metrics.Registry.MustRegister(cgEnabled)
}
Loading