diff --git a/terraform/gold-eb75ad-team/alerts.tf b/terraform/gold-eb75ad-team/alerts.tf index 099b1f8..4664eb8 100644 --- a/terraform/gold-eb75ad-team/alerts.tf +++ b/terraform/gold-eb75ad-team/alerts.tf @@ -504,116 +504,6 @@ resource "sysdig_monitor_alert_promql" "prod_kc_disk_log_pv_usage_sixty" { } } -# NO LONGER WORKING IN SYSDIG -# # The Keycloak deployment has a max surge of 3, and the keycloak pods -# # require 2 cores. Setting limit at (3*2+1) cores -# ## Max surge as of Aug 2024 1, Pod count 3, CPU limit 4 (1*4+1) = 5 -# resource "sysdig_monitor_alert_promql" "prod_gold_cpu_limit" { -# name = "[GOLD PROD] CPU limit of pods nearing namespace limit" -# description = "There is no longer CPU room to accomodate the keycloak max surge of 1 pod" -# severity = 2 -# enabled = true - -# promql = "(kube_resourcequota_sysdig_limits_cpu_hard -kube_resourcequota_sysdig_limits_cpu_used{namespace=\"eb75ad-prod\",kube_cluster_name=\"gold\",kube_resourcequota_label_scope_notterminating='true'}) < 5" -# trigger_after_minutes = 2 - -# notification_channels = [132277, 57336] -# custom_notification { -# title = "{{__alert_name__}} is {{__alert_status__}}" -# } -# } - -# # The Keycloak deployment has a max surge of 3, and the keycloak pods -# # require 2 cores. Setting limit at (3*2+1) cores -# ## Max surge as of Aug 2024 1, Pod count 3, CPU limit 4 (1*4+1) = 5 -# resource "sysdig_monitor_alert_promql" "test_gold_cpu_limit" { -# name = "[GOLD TEST] CPU limit of pods nearing namespace limit" -# description = "There is no longer CPU room to accomodate the keycloak max surge of 1 pod" -# severity = 4 -# enabled = true - -# promql = "(kube_resourcequota_sysdig_limits_cpu_hard -kube_resourcequota_sysdig_limits_cpu_used{namespace=\"eb75ad-test\",kube_cluster_name=\"gold\",kube_resourcequota_label_scope_notterminating='true'}) < 5" -# trigger_after_minutes = 2 - -# notification_channels = [132277, 57336] -# custom_notification { -# title = "{{__alert_name__}} is {{__alert_status__}}" -# } -# } - -# # The Keycloak deployment has a max surge of 3, and the keycloak pods -# # require 2 cores. Setting limit at (3*2+1) cores -# ## Max surge as of Aug 2024 1, Pod count 3, CPU limit 4 (1*4+1) = 5 -# resource "sysdig_monitor_alert_promql" "dev_gold_cpu_limit" { -# name = "[GOLD DEV] CPU limit of pods nearing namespace limit" -# description = "There is no longer CPU room to accomodate the keycloak max surge of 1 pod" -# severity = 4 -# enabled = true - -# promql = "(kube_resourcequota_sysdig_limits_cpu_hard -kube_resourcequota_sysdig_limits_cpu_used{namespace=\"eb75ad-dev\",kube_cluster_name=\"gold\",kube_resourcequota_label_scope_notterminating='true'}) < 5" -# trigger_after_minutes = 2 - -# notification_channels = [132277, 57336] -# custom_notification { -# title = "{{__alert_name__}} is {{__alert_status__}}" -# } -# } - -# # The Keycloak deployment has a max surge of 3, and the keycloak pods -# # require 4Gi of memory. Setting limit at (3*4+4)Gi -# ## Max surge as of Aug 2024 1, Setting limit at (1*4+4)Gi -# resource "sysdig_monitor_alert_promql" "prod_gold_memory_limit" { -# name = "[GOLD PROD] Memory limit of pods nearing namespace limit" -# description = "There is almost no Memory room to accomodate the keycloak max surge of 1 pod" -# severity = 2 -# enabled = true - -# promql = "(kube_resourcequota_sysdig_limits_memory_hard -kube_resourcequota_sysdig_limits_memory_used{namespace=\"eb75ad-prod\",kube_cluster_name=\"gold\",kube_resourcequota_label_scope_notterminating='true'}) < 8000000000" -# trigger_after_minutes = 2 - -# notification_channels = [132277, 57336] -# custom_notification { -# title = "{{__alert_name__}} is {{__alert_status__}}" -# } -# } - -# # The Keycloak deployment has a max surge of 3, and the keycloak pods -# # require 4Gi of memory. Setting limit at (3*4+4)Gi -# ## Max surge as of Aug 2024 1, Setting limit at (1*4+4)Gi -# resource "sysdig_monitor_alert_promql" "dev_gold_memory_limit" { -# name = "[GOLD DEV] Memory limit of pods nearing namespace limit" -# description = "There is almost no Memory room to accomodate the keycloak max surge of 1 pod" -# severity = 2 -# enabled = true - -# promql = "(kube_resourcequota_sysdig_limits_memory_hard -kube_resourcequota_sysdig_limits_memory_used{namespace=\"eb75ad-dev\",kube_cluster_name=\"gold\",kube_resourcequota_label_scope_notterminating='true'}) < 8000000000" -# trigger_after_minutes = 2 - -# notification_channels = [132277, 57336] -# custom_notification { -# title = "{{__alert_name__}} is {{__alert_status__}}" -# } -# } - -# # The Keycloak deployment has a max surge of 3, and the keycloak pods -# # require 4Gi of memory. Setting limit at (3*4+4)Gi -# ## Max surge as of Aug 2024 1, Setting limit at (1*4+4)Gi - -# resource "sysdig_monitor_alert_promql" "test_gold_memory_limit" { -# name = "[GOLD TEST] Memory limit of pods nearing namespace limit" -# description = "There is almost no Memory room to accomodate the keycloak max surge of 1 pod" -# severity = 2 -# enabled = true - -# promql = "(kube_resourcequota_sysdig_limits_memory_hard -kube_resourcequota_sysdig_limits_memory_used{namespace=\"eb75ad-test\",kube_cluster_name=\"gold\",kube_resourcequota_label_scope_notterminating='true'}) < 8000000000" -# trigger_after_minutes = 2 - -# notification_channels = [132277, 57336] -# custom_notification { -# title = "{{__alert_name__}} is {{__alert_status__}}" -# } -# } - resource "sysdig_monitor_alert_v2_change" "prod_pvcuse_spike" { name = "[PROD] the prod PVC usage surged"