From f6d2f16fe9372366c93c91c7a6c7a1f7950f32ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luis=20D=C3=ADaz?= <ldiazn98@gmail.com>
Date: Fri, 17 Oct 2025 11:44:41 +0200
Subject: [PATCH 1/4] testing new alerting to report ip of instance down

---
 ansible/roles/prometheus/files/alert_rules.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ansible/roles/prometheus/files/alert_rules.yml b/ansible/roles/prometheus/files/alert_rules.yml
index d94ccc17..0f2309f8 100644
--- a/ansible/roles/prometheus/files/alert_rules.yml
+++ b/ansible/roles/prometheus/files/alert_rules.yml
@@ -12,7 +12,7 @@ groups:
     expr: up != 1
     for: 5m
     annotations:
-      summary: '{{ $labels.instance }} is not `up`'
+      summary: '{{ $labels.instance }} ({{$labels.ec2_host}}) is not `up`'
 
   - alert: systemd # yes, just "systemd", it's unclear what's going wrong :-)
     expr: node_systemd_system_running != 1 # that's basically output of `systemctl is-system-running`

From 74944d3ca58208c2fc08911c046f34eaaa06bfb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luis=20D=C3=ADaz?= <ldiazn98@gmail.com>
Date: Fri, 17 Oct 2025 12:05:13 +0200
Subject: [PATCH 2/4] Set alert time to 1m to make testing easier

---
 ansible/roles/prometheus/files/alert_rules.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ansible/roles/prometheus/files/alert_rules.yml b/ansible/roles/prometheus/files/alert_rules.yml
index 0f2309f8..92af7e6c 100644
--- a/ansible/roles/prometheus/files/alert_rules.yml
+++ b/ansible/roles/prometheus/files/alert_rules.yml
@@ -10,7 +10,7 @@ groups:
   # including http scraping failure
   - alert: InstanceDown
     expr: up != 1
-    for: 5m
+    for: 1m # TODO set back to 5m when we finish testing
     annotations:
       summary: '{{ $labels.instance }} ({{$labels.ec2_host}}) is not `up`'
 

From ecdecc3903c0721d7ab46530c4891537553c7603 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luis=20D=C3=ADaz?= <ldiazn98@gmail.com>
Date: Fri, 17 Oct 2025 12:16:02 +0200
Subject: [PATCH 3/4] improve legibility of alert message

---
 ansible/roles/prometheus/files/alert_rules.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ansible/roles/prometheus/files/alert_rules.yml b/ansible/roles/prometheus/files/alert_rules.yml
index 92af7e6c..2bdeca66 100644
--- a/ansible/roles/prometheus/files/alert_rules.yml
+++ b/ansible/roles/prometheus/files/alert_rules.yml
@@ -12,7 +12,7 @@ groups:
     expr: up != 1
     for: 1m # TODO set back to 5m when we finish testing
     annotations:
-      summary: '{{ $labels.instance }} ({{$labels.ec2_host}}) is not `up`'
+      summary: '{{ $labels.instance }} {{if $labels.ec2_host}} ({{$labels.ec2_host}}) {{end}} is not `up`'
 
   - alert: systemd # yes, just "systemd", it's unclear what's going wrong :-)
     expr: node_systemd_system_running != 1 # that's basically output of `systemctl is-system-running`

From ce670a842e768705cd42528e8fc0b972bec9d416 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luis=20D=C3=ADaz?= <ldiazn98@gmail.com>
Date: Fri, 17 Oct 2025 12:30:50 +0200
Subject: [PATCH 4/4] roll back short alert interval

---
 ansible/roles/prometheus/files/alert_rules.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ansible/roles/prometheus/files/alert_rules.yml b/ansible/roles/prometheus/files/alert_rules.yml
index 2bdeca66..81ca3da5 100644
--- a/ansible/roles/prometheus/files/alert_rules.yml
+++ b/ansible/roles/prometheus/files/alert_rules.yml
@@ -10,7 +10,7 @@ groups:
   # including http scraping failure
   - alert: InstanceDown
     expr: up != 1
-    for: 1m # TODO set back to 5m when we finish testing
+    for: 5m
     annotations:
       summary: '{{ $labels.instance }} {{if $labels.ec2_host}} ({{$labels.ec2_host}}) {{end}} is not `up`'