diff --git a/config/prometheus/hack.prometheus.values.yaml b/config/prometheus/hack.prometheus.values.yaml index a6e481f5..4bdf6fb1 100644 --- a/config/prometheus/hack.prometheus.values.yaml +++ b/config/prometheus/hack.prometheus.values.yaml @@ -410,9 +410,9 @@ serverFiles: regex: true # ONLY scrape DCGM exporter pods to reduce load on Prometheus # This prevents scraping random pods that have prometheus.io/scrape=true - - source_labels: [ __meta_kubernetes_pod_label_app ] - action: keep - regex: nvidia-dcgm-exporter + - action: keep + regex: '.*dcgm-exporter.*' + source_labels: [ __meta_kubernetes_pod_label_app, __meta_kubernetes_pod_label_app_kubernetes_io_name ] - source_labels: [ __meta_kubernetes_pod_annotation_prometheus_io_scrape_slow ] action: drop regex: true diff --git a/dist/backend-install.yaml b/dist/backend-install.yaml index e457ebde..ed38cd41 100644 --- a/dist/backend-install.yaml +++ b/dist/backend-install.yaml @@ -141,9 +141,10 @@ data: source_labels: - __meta_kubernetes_pod_annotation_prometheus_io_scrape - action: keep - regex: nvidia-dcgm-exporter + regex: .*dcgm-exporter.* source_labels: - __meta_kubernetes_pod_label_app + - __meta_kubernetes_pod_label_app_kubernetes_io_name - action: drop regex: true source_labels: diff --git a/dist/install.yaml b/dist/install.yaml index c11d97f0..daf594d3 100644 --- a/dist/install.yaml +++ b/dist/install.yaml @@ -142,9 +142,10 @@ data: source_labels: - __meta_kubernetes_pod_annotation_prometheus_io_scrape - action: keep - regex: nvidia-dcgm-exporter + regex: .*dcgm-exporter.* source_labels: - __meta_kubernetes_pod_label_app + - __meta_kubernetes_pod_label_app_kubernetes_io_name - action: drop regex: true source_labels: diff --git a/dist/installer_updater.yaml b/dist/installer_updater.yaml index 66ad663e..0c9cd3d6 100644 --- a/dist/installer_updater.yaml +++ b/dist/installer_updater.yaml @@ -141,9 +141,10 @@ data: source_labels: - __meta_kubernetes_pod_annotation_prometheus_io_scrape - action: keep - regex: nvidia-dcgm-exporter + regex: .*dcgm-exporter.* source_labels: - __meta_kubernetes_pod_label_app + - __meta_kubernetes_pod_label_app_kubernetes_io_name - action: drop regex: true source_labels: diff --git a/dist/prometheus.yaml b/dist/prometheus.yaml index 35c1da0f..15b13d52 100644 --- a/dist/prometheus.yaml +++ b/dist/prometheus.yaml @@ -126,9 +126,10 @@ data: source_labels: - __meta_kubernetes_pod_annotation_prometheus_io_scrape - action: keep - regex: nvidia-dcgm-exporter + regex: .*dcgm-exporter.* source_labels: - __meta_kubernetes_pod_label_app + - __meta_kubernetes_pod_label_app_kubernetes_io_name - action: drop regex: true source_labels: diff --git a/helm-chart/zxporter/templates/prometheus-configmap.yaml b/helm-chart/zxporter/templates/prometheus-configmap.yaml index f24acae6..6fc320f5 100644 --- a/helm-chart/zxporter/templates/prometheus-configmap.yaml +++ b/helm-chart/zxporter/templates/prometheus-configmap.yaml @@ -90,9 +90,10 @@ data: source_labels: - __meta_kubernetes_pod_annotation_prometheus_io_scrape - action: keep - regex: nvidia-dcgm-exporter + regex: '.*dcgm-exporter.*' source_labels: - __meta_kubernetes_pod_label_app + - __meta_kubernetes_pod_label_app_kubernetes_io_name - action: drop regex: true source_labels: