From f058990ad9c5c92150de1af11d45613a73cc778d Mon Sep 17 00:00:00 2001 From: rahulguptajss Date: Fri, 3 Apr 2026 15:03:31 +0530 Subject: [PATCH 1/3] feat: add callhome.data.outage.detected to EMS config --- conf/ems/9.6.0/ems.yaml | 7 ++++++- container/prometheus/ems_alert_rules.yml | 24 ++++++++++++++++++++++++ docs/resources/ems-alert-runbook.md | 14 ++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/conf/ems/9.6.0/ems.yaml b/conf/ems/9.6.0/ems.yaml index 4ebfe64ce..b128a169b 100644 --- a/conf/ems/9.6.0/ems.yaml +++ b/conf/ems/9.6.0/ems.yaml @@ -1041,4 +1041,9 @@ events: - name: smbc.pfo.completed exports: - - parameters.dstpath => dst_path \ No newline at end of file + - parameters.dstpath => dst_path + + - name: callhome.data.outage.detected + exports: + - ^^node.name => node + - parameters.subject => subject \ No newline at end of file diff --git a/container/prometheus/ems_alert_rules.yml b/container/prometheus/ems_alert_rules.yml index d5e3085d7..d1184041f 100644 --- a/container/prometheus/ems_alert_rules.yml +++ b/container/prometheus/ems_alert_rules.yml @@ -531,6 +531,30 @@ groups: impact: "Availability" runbook: "https://netapp.github.io/harvest/nightly/resources/ems-alert-runbook/#nvram-battery-low" + - alert: Data Outage Detected + expr: last_over_time(ems_events{message="callhome.data.outage.detected"}[1d]) == 1 + labels: + severity: > + {{- if $labels.severity -}} + {{- if eq $labels.severity "alert" -}} + critical + {{- else if eq $labels.severity "error" -}} + warning + {{- else if eq $labels.severity "emergency" -}} + critical + {{- else if eq $labels.severity "notice" -}} + info + {{- else if eq $labels.severity "informational" -}} + info + {{- else -}} + {{ $labels.severity }} + {{- end -}} + {{- end -}} + annotations: + summary: "Call home for {{ $labels.subject }} on node {{ $labels.node }}" + impact: "Availability" + runbook: "https://netapp.github.io/harvest/nightly/resources/ems-alert-runbook/#data-outage-detected" + - alert: HA Interconnect Down expr: last_over_time(ems_events{message="callhome.hainterconnect.down"}[1d]) == 1 labels: diff --git a/docs/resources/ems-alert-runbook.md b/docs/resources/ems-alert-runbook.md index 74981f4ee..4c2ea6b87 100644 --- a/docs/resources/ems-alert-runbook.md +++ b/docs/resources/ems-alert-runbook.md @@ -401,6 +401,20 @@ Perform the following corrective actions: 2. If the battery was replaced recently or the system was non-operational for an extended period of time, monitor the battery to verify that it is charging properly. 3. Contact NetApp technical support if the battery runtime continues to decrease below critical levels, and the storage system shuts down automatically. +### Data Outage Detected + +**Impact**: Availability + +**EMS Event**: `callhome.data.outage.detected` + +This message occurs when the system detects that it has encountered an outage prior to this boot. +If your system is configured to do so, it generates and transmits an AutoSupport (or 'call home') message to NetApp technical support and to the configured destinations. +Successful delivery of an AutoSupport message significantly improves problem determination and resolution. + +**Remediation** + +Contact NetApp technical support. + ### NetBIOS Name Conflict **Impact**: Availability From b2811bf95debac4de114da5423840a2718615bed Mon Sep 17 00:00:00 2001 From: rahulguptajss Date: Fri, 3 Apr 2026 16:42:53 +0530 Subject: [PATCH 2/3] feat: add callhome.data.outage.detected to EMS config --- integration/test/alert_rule_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration/test/alert_rule_test.go b/integration/test/alert_rule_test.go index 357643ea4..1fdd73013 100644 --- a/integration/test/alert_rule_test.go +++ b/integration/test/alert_rule_test.go @@ -185,8 +185,8 @@ func parseEmsLabels(exports *node.Node) string { var labels []string if exports != nil { for _, export := range exports.GetAllChildContentS() { - name, display, _, _ := template.ParseMetric(export) - if strings.HasPrefix(name, "parameters") { + _, display, _, _ := template.ParseMetric(export) + if display != "" { labels = append(labels, display) } } From 78a995a84417295a183254c6da74eff39b3a93e7 Mon Sep 17 00:00:00 2001 From: rahulguptajss Date: Fri, 3 Apr 2026 16:46:00 +0530 Subject: [PATCH 3/3] feat: add callhome.data.outage.detected to EMS config --- conf/ems/9.6.0/ems.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/ems/9.6.0/ems.yaml b/conf/ems/9.6.0/ems.yaml index b128a169b..64167df6c 100644 --- a/conf/ems/9.6.0/ems.yaml +++ b/conf/ems/9.6.0/ems.yaml @@ -1045,5 +1045,5 @@ events: - name: callhome.data.outage.detected exports: - - ^^node.name => node + - ^^node.name => node - parameters.subject => subject \ No newline at end of file