From 4ddad47dbfb676cbdd3b1a3ae218ddfd467771f5 Mon Sep 17 00:00:00 2001 From: schmikei Date: Tue, 7 Oct 2025 16:35:38 -0400 Subject: [PATCH 1/4] update apache mesos mixin to use modern libraries --- .../{alerts => }/alerts.libsonnet | 22 +- apache-mesos-mixin/config.libsonnet | 45 +- apache-mesos-mixin/dashboards.libsonnet | 77 + .../apache-mesos-overview.libsonnet | 1400 ---------------- .../dashboards/dashboards.libsonnet | 1 - .../dashboards_out/apache-mesos-logs.json | 317 ++++ .../dashboards_out/apache-mesos-overview.json | 1458 ++++------------- apache-mesos-mixin/g.libsonnet | 1 + apache-mesos-mixin/jsonnetfile.json | 45 +- apache-mesos-mixin/links.libsonnet | 16 + apache-mesos-mixin/main.libsonnet | 46 + apache-mesos-mixin/mixin.libsonnet | 36 +- apache-mesos-mixin/panels.libsonnet | 162 ++ apache-mesos-mixin/rows.libsonnet | 40 + apache-mesos-mixin/signals/agent.libsonnet | 44 + apache-mesos-mixin/signals/master.libsonnet | 239 +++ 16 files changed, 1406 insertions(+), 2543 deletions(-) rename apache-mesos-mixin/{alerts => }/alerts.libsonnet (90%) create mode 100644 apache-mesos-mixin/dashboards.libsonnet delete mode 100644 apache-mesos-mixin/dashboards/apache-mesos-overview.libsonnet delete mode 100644 apache-mesos-mixin/dashboards/dashboards.libsonnet create mode 100644 apache-mesos-mixin/dashboards_out/apache-mesos-logs.json create mode 100644 apache-mesos-mixin/g.libsonnet create mode 100644 apache-mesos-mixin/links.libsonnet create mode 100644 apache-mesos-mixin/main.libsonnet create mode 100644 apache-mesos-mixin/panels.libsonnet create mode 100644 apache-mesos-mixin/rows.libsonnet create mode 100644 apache-mesos-mixin/signals/agent.libsonnet create mode 100644 apache-mesos-mixin/signals/master.libsonnet diff --git a/apache-mesos-mixin/alerts/alerts.libsonnet b/apache-mesos-mixin/alerts.libsonnet similarity index 90% rename from apache-mesos-mixin/alerts/alerts.libsonnet rename to apache-mesos-mixin/alerts.libsonnet index 04d7056e0..b559348ae 100644 --- a/apache-mesos-mixin/alerts/alerts.libsonnet +++ b/apache-mesos-mixin/alerts.libsonnet @@ -1,5 +1,5 @@ { - prometheusAlerts+:: { + new(this): { groups+: [ { name: 'apache-mesos', @@ -8,7 +8,7 @@ alert: 'ApacheMesosHighMemoryUsage', expr: ||| min without(instance, job, type) (mesos_master_mem{type="percent"}) > %(alertsWarningMemoryUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -19,14 +19,14 @@ ( '{{ printf "%%.0f" $value }} percent memory usage on {{$labels.mesos_cluster}}, ' + 'which is above the threshold of %(alertsWarningMemoryUsage)s.' - ) % $._config, + ) % this.config, }, }, { alert: 'ApacheMesosHighDiskUsage', expr: ||| min without(instance, job, type) (mesos_master_disk{type="percent"}) > %(alertsCriticalDiskUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -37,14 +37,14 @@ ( '{{ printf "%%.0f" $value }} percent disk usage on {{$labels.mesos_cluster}}, ' + 'which is above the threshold of %(alertsCriticalDiskUsage)s.' - ) % $._config, + ) % this.config, }, }, { alert: 'ApacheMesosUnreachableTasks', expr: ||| max without(instance, job, state) (mesos_master_task_states_current{state="unreachable"}) > %(alertsWarningUnreachableTask)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -55,14 +55,14 @@ ( '{{ printf "%%.0f" $value }} unreachable tasks on {{$labels.mesos_cluster}}, ' + 'which is above the threshold of %(alertsWarningUnreachableTask)s.' - ) % $._config, + ) % this.config, }, }, { alert: 'ApacheMesosNoLeaderElected', expr: ||| max without(instance, job) (mesos_master_elected) == 0 - ||| % $._config, + ||| % this.config, 'for': '1m', labels: { severity: 'critical', @@ -72,14 +72,14 @@ description: ( 'There is no cluster coordinator on {{$labels.mesos_cluster}}.' - ) % $._config, + ) % this.config, }, }, { alert: 'ApacheMesosInactiveAgents', expr: ||| max without(instance, job, state) (mesos_master_slaves_state{state=~"connected_inactive|disconnected_inactive"}) > 1 - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -89,7 +89,7 @@ description: ( '{{ printf "%%.0f" $value }} inactive agent clients over the last 5m which is above the threshold of 1.' - ) % $._config, + ) % this.config, }, }, ], diff --git a/apache-mesos-mixin/config.libsonnet b/apache-mesos-mixin/config.libsonnet index ea5b66174..73d6323c4 100644 --- a/apache-mesos-mixin/config.libsonnet +++ b/apache-mesos-mixin/config.libsonnet @@ -1,17 +1,36 @@ { - _config+:: { - dashboardTags: ['apache-mesos-mixin'], - dashboardPeriod: 'now-1h', - dashboardTimezone: 'default', - dashboardRefresh: '1m', - // alerts thresholds - alertsWarningMemoryUsage: 90, - alertsCriticalDiskUsage: 90, - alertsWarningUnreachableTask: 3, - enableLokiLogs: true, - enableMultiCluster: false, - mesosSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"', - multiclusterSelector: 'job=~"$job"', + local this = self, + filteringSelector: 'job="integrations/apache-mesos"', + groupLabels: ['job', 'mesos_cluster', 'cluster'], + instanceLabels: ['instance'], + + dashboardTags: [self.uid + '-mixin'], + uid: 'apache-mesos', + dashboardNamePrefix: 'Apache Mesos', + dashboardPeriod: 'now-1h', + dashboardTimezone: 'default', + dashboardRefresh: '1m', + + // Logging configuration + enableLokiLogs: true, + logLabels: ['job', 'cluster', 'instance'], + extraLogLabels: ['level'], // Required by logs-lib + logsVolumeGroupBy: 'level', + showLogsVolume: true, + + // alerts thresholds + alertsWarningMemoryUsage: 90, + alertsCriticalDiskUsage: 90, + alertsWarningUnreachableTask: 3, + + // metrics source for signals library + metricsSource: 'prometheus', + + // signals configuration + signals+: { + overview: (import './signals/overview.libsonnet')(this), + master: (import './signals/master.libsonnet')(this), + agent: (import './signals/agent.libsonnet')(this), }, } diff --git a/apache-mesos-mixin/dashboards.libsonnet b/apache-mesos-mixin/dashboards.libsonnet new file mode 100644 index 000000000..8cafb1b9e --- /dev/null +++ b/apache-mesos-mixin/dashboards.libsonnet @@ -0,0 +1,77 @@ +local g = import './g.libsonnet'; +local logslib = import 'logs-lib/logs/main.libsonnet'; + +{ + local root = self, + new(this):: + local links = this.grafana.links; + local tags = this.config.dashboardTags; + local uid = g.util.string.slugify(this.config.uid); + local vars = this.grafana.variables; + local annotations = this.grafana.annotations; + local prefix = this.config.dashboardNamePrefix; + local refresh = this.config.dashboardRefresh; + local period = this.config.dashboardPeriod; + local timezone = this.config.dashboardTimezone; + + + { + + 'apache-mesos-overview.json': + g.dashboard.new(this.config.dashboardNamePrefix + ' overview') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels([ + this.grafana.rows.masterOverview, + this.grafana.rows.agentOverview, + ]) + ) + ) + root.applyCommon( + vars.multiInstance, + uid + '_overview', + tags, + links { mesosOverview+:: {} }, + annotations, + timezone, + refresh, + period, + ), + } + + if this.config.enableLokiLogs then { + 'apache-mesos-logs.json': + logslib.new( + prefix + ' logs', + datasourceName=this.grafana.variables.datasources.loki.name, + datasourceRegex=this.grafana.variables.datasources.loki.regex, + filterSelector=this.config.filteringSelector, + labels=this.config.groupLabels + this.config.extraLogLabels, + formatParser=null, + showLogsVolume=this.config.showLogsVolume, + ) { + dashboards+: { + logs+: + root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links { logs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period), + }, + panels+: { + logs+: + g.panel.logs.options.withEnableLogDetails(true) + + g.panel.logs.options.withShowTime(false) + + g.panel.logs.options.withWrapLogMessage(false), + }, + variables+: { + toArray+: [ + this.grafana.variables.datasources.prometheus { hide: 2 }, + ], + }, + }.dashboards.logs, + } else {}, + + applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period): + g.dashboard.withTags(tags) + + g.dashboard.withUid(uid) + + g.dashboard.withLinks(std.objectValues(links)) + + g.dashboard.withTimezone(timezone) + + g.dashboard.withRefresh(refresh) + + g.dashboard.time.withFrom(period) + + g.dashboard.withVariables(vars), +} diff --git a/apache-mesos-mixin/dashboards/apache-mesos-overview.libsonnet b/apache-mesos-mixin/dashboards/apache-mesos-overview.libsonnet deleted file mode 100644 index ce22b0d0d..000000000 --- a/apache-mesos-mixin/dashboards/apache-mesos-overview.libsonnet +++ /dev/null @@ -1,1400 +0,0 @@ -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'apache-mesos-overview'; - -local promDatasourceName = 'prometheus_datasource'; -local lokiDatasourceName = 'loki_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; local lokiDatasource = { - uid: '${%s}' % lokiDatasourceName, -}; - -local getMatcher(cfg) = '%(mesosSelector)s, instance=~"$instance", mesos_cluster=~"$mesos_cluster"' % cfg; - -local masterUptimePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (mesos_master_uptime_seconds{' + matcher + '})', - datasource=promDatasource, - legendFormat='{{mesos_cluster}}', - format='time_series', - ), - ], - type: 'stat', - title: 'Master uptime', - description: 'Uptime of the Mesos master process.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.5.2-cloud.2.0cb5a501', -}; - -local cpusAvailablePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (mesos_master_cpus{' + matcher + ', type="total"})', - datasource=promDatasource, - legendFormat='{{mesos_cluster}}', - format='time_series', - ), - ], - type: 'stat', - title: 'CPUs available', - description: 'CPUs available in the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: '', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.5.2-cloud.2.0cb5a501', -}; - -local memoryAvailablePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (mesos_master_mem{' + matcher + ', type="total"})', - datasource=promDatasource, - legendFormat='{{mesos_cluster}}', - format='time_series', - ), - ], - type: 'stat', - title: 'Memory available', - description: 'Amount of memory available in the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.5.2-cloud.2.0cb5a501', -}; - -local gpusAvailablePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (mesos_master_gpus{' + matcher + ', type="total"})', - datasource=promDatasource, - legendFormat='{{mesos_cluster}}', - format='time_series', - ), - ], - type: 'stat', - title: 'GPUs available', - description: 'Total number of GPUs available in the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: '', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.5.2-cloud.2.0cb5a501', -}; - -local diskAvailablePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (mesos_master_disk{' + matcher + ', type="total"})', - datasource=promDatasource, - legendFormat='{{mesos_cluster}}', - format='time_series', - ), - ], - type: 'stat', - title: 'Disk available', - description: 'Current amount of bytes inside the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.5.2-cloud.2.0cb5a501', -}; - -local memoryUtilizationPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (mesos_master_mem{' + matcher + ', type="percent"})', - datasource=promDatasource, - legendFormat='{{mesos_cluster}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Memory utilization', - description: 'The percentage of allocated memory in use by the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local diskUtilizationPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (mesos_master_disk{' + matcher + ', type="percent"})', - datasource=promDatasource, - legendFormat='{{mesos_cluster}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Disk utilization', - description: 'The percentage of allocated disk storage in use by the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local eventsInQueuePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster, type) (mesos_master_event_queue_length{' + matcher + '})', - datasource=promDatasource, - legendFormat='{{mesos_cluster}} - {{type}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Events in queue', - description: 'The number of events in the event queue.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'events', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local messagesPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster, type) (increase(mesos_master_messages{' + matcher + '}[$__interval:])) > 0', - datasource=promDatasource, - legendFormat='{{mesos_cluster}} - {{type}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Messages', - description: 'The rate of messages being processed.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'messages', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local registrarStatePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (mesos_registrar_state_store_ms{' + matcher + ', type="mean"})', - datasource=promDatasource, - legendFormat='{{mesos_cluster}} - store', - format='time_series', - ), - prometheus.target( - 'max by(mesos_cluster) (mesos_registrar_state_fetch_ms{' + matcher + '})', - datasource=promDatasource, - legendFormat='{{mesos_cluster}} - fetch', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Registrar state', - description: 'Duration of fetching and storing the Mesos agent registrar state.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ms', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local registrarLogRecoveredPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (mesos_registrar_log_recovered{' + matcher + '})', - datasource=promDatasource, - legendFormat='{{mesos_cluster}}', - format='time_series', - ), - ], - type: 'stat', - title: 'Registrar log recovered', - description: 'Whether or not the registrar log was properly recovered.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [ - { - options: { - '0': { - color: 'red', - index: 1, - text: 'Not OK', - }, - '1': { - color: 'green', - index: 0, - text: 'OK', - }, - }, - type: 'value', - }, - { - options: { - match: 'null', - result: { - color: 'text', - index: 2, - text: '-', - }, - }, - type: 'special', - }, - ], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: '', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - text: {}, - textMode: 'auto', - }, - pluginVersion: '9.5.2-cloud.2.0cb5a501', -}; - -local allocatorRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - ), - ], - type: 'row', - title: 'Allocator', - collapsed: false, -}; - -local allocationRunsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (rate(mesos_master_allocation_run_ms_count{' + matcher + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{mesos_cluster}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Allocation runs', - description: 'The rate of how often the allocator is performing allocations.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'allocs/s', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local allocationDurationPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (mesos_master_allocation_run_ms{' + matcher + '})', - datasource=promDatasource, - legendFormat='{{mesos_cluster}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Allocation duration', - description: 'Time spent in the allocation algorithm in ms.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ms', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local allocationLatencyPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (mesos_master_allocation_run_latency_ms{' + matcher + '})', - datasource=promDatasource, - legendFormat='{{mesos_cluster}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Allocation latency', - description: 'Allocation batch latency in ms', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ms', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local eventQueueDispatchesPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (mesos_master_event_queue_dispatches{' + matcher + '})', - datasource=promDatasource, - legendFormat='{{mesos_cluster}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Event queue dispatches', - description: 'The number of dispatch events in the allocator mesos event queue.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'events', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local agentsRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - ), - ], - type: 'row', - title: 'Agents', - collapsed: false, -}; - -local agentMemoryUtilizationPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (100 * mesos_slave_mem_used_bytes{' + matcher + '} / clamp_min(mesos_slave_mem_bytes{' + matcher + '},1))', - datasource=promDatasource, - legendFormat='{{mesos_cluster}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Agent memory utilization', - description: 'The percentage of allocated memory in use by the agent.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local agentDiskUtilizationPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(mesos_cluster) (100 * mesos_slave_disk_used_bytes{' + matcher + '} / clamp_min(mesos_slave_disk_bytes{' + matcher + '},1))', - datasource=promDatasource, - legendFormat='{{mesos_cluster}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Agent disk utilization', - description: 'The percentage of allocated disk storage in use by the agent.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local logsRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - ), - ], - type: 'row', - title: 'Logs', - collapsed: false, -}; - -local masterLogsPanel(matcher) = { - datasource: lokiDatasource, - targets: [ - { - datasource: lokiDatasource, - editorMode: 'code', - expr: '{' + matcher + '} |= `` | (filename=~"/var/log/mesos/master/.*" or log_type="master")', - queryType: 'range', - refId: 'A', - }, - ], - type: 'logs', - title: 'Master logs', - description: 'The application logs for the Mesos master node.', - options: { - dedupStrategy: 'none', - enableLogDetails: true, - prettifyLogMessage: false, - showCommonLabels: false, - showLabels: false, - showTime: false, - sortOrder: 'Descending', - wrapLogMessage: false, - }, -}; - -local agentLogsPanel(matcher) = { - datasource: lokiDatasource, - targets: [ - { - datasource: lokiDatasource, - editorMode: 'code', - expr: '{' + matcher + '} |= `` | (filename=~"/var/log/mesos/agent/.*" or log_type="agent")', - queryType: 'range', - refId: 'A', - }, - ], - type: 'logs', - title: 'Agent logs', - description: 'The application logs for the Mesos agent node.', - options: { - dedupStrategy: 'none', - enableLogDetails: true, - prettifyLogMessage: false, - showCommonLabels: false, - showLabels: false, - showTime: false, - sortOrder: 'Descending', - wrapLogMessage: false, - }, -}; - -{ - grafanaDashboards+:: { - 'apache-mesos-overview.json': - dashboard.new( - 'Apache Mesos overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - .addTemplates( - std.flattenArrays([ - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Prometheus data source', - refresh='load' - ), - ], - if $._config.enableLokiLogs then [ - template.datasource( - lokiDatasourceName, - 'loki', - null, - label='Loki data source', - refresh='load' - ), - ] else [], - [ - template.new( - 'job', - promDatasource, - 'label_values(mesos_exporter_build_info,job)', - label='Job', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=1 - ), - template.new( - 'cluster', - promDatasource, - 'label_values(mesos_exporter_build_info{%(multiclusterSelector)s}, cluster)' % $._config, - label='Cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.*', - hide=if $._config.enableMultiCluster then '' else 'variable', - sort=0 - ), - template.new( - 'instance', - promDatasource, - 'label_values(mesos_exporter_build_info{%(mesosSelector)s}, instance)' % $._config, - label='Instance', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=1 - ), - template.new( - 'mesos_cluster', - promDatasource, - 'label_values(mesos_exporter_build_info{%(mesosSelector)s, instance=~"$instance"}, mesos_cluster)' % $._config, - label='Mesos cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=1 - ), - ], - ]) - ) - .addPanels( - std.flattenArrays([ - [ - masterUptimePanel(getMatcher($._config)) { gridPos: { h: 6, w: 4, x: 0, y: 0 } }, - cpusAvailablePanel(getMatcher($._config)) { gridPos: { h: 6, w: 5, x: 4, y: 0 } }, - memoryAvailablePanel(getMatcher($._config)) { gridPos: { h: 6, w: 5, x: 9, y: 0 } }, - gpusAvailablePanel(getMatcher($._config)) { gridPos: { h: 6, w: 5, x: 14, y: 0 } }, - diskAvailablePanel(getMatcher($._config)) { gridPos: { h: 6, w: 5, x: 19, y: 0 } }, - memoryUtilizationPanel(getMatcher($._config)) { gridPos: { h: 6, w: 12, x: 0, y: 6 } }, - diskUtilizationPanel(getMatcher($._config)) { gridPos: { h: 6, w: 12, x: 12, y: 6 } }, - eventsInQueuePanel(getMatcher($._config)) { gridPos: { h: 6, w: 12, x: 0, y: 12 } }, - messagesPanel(getMatcher($._config)) { gridPos: { h: 6, w: 12, x: 12, y: 12 } }, - registrarStatePanel(getMatcher($._config)) { gridPos: { h: 6, w: 18, x: 0, y: 18 } }, - registrarLogRecoveredPanel(getMatcher($._config)) { gridPos: { h: 6, w: 6, x: 18, y: 18 } }, - allocatorRow { gridPos: { h: 1, w: 24, x: 0, y: 24 } }, - allocationRunsPanel(getMatcher($._config)) { gridPos: { h: 6, w: 6, x: 0, y: 25 } }, - allocationDurationPanel(getMatcher($._config)) { gridPos: { h: 6, w: 6, x: 6, y: 25 } }, - allocationLatencyPanel(getMatcher($._config)) { gridPos: { h: 6, w: 6, x: 12, y: 25 } }, - eventQueueDispatchesPanel(getMatcher($._config)) { gridPos: { h: 6, w: 6, x: 18, y: 25 } }, - agentsRow { gridPos: { h: 1, w: 24, x: 0, y: 31 } }, - agentMemoryUtilizationPanel(getMatcher($._config)) { gridPos: { h: 6, w: 12, x: 0, y: 32 } }, - agentDiskUtilizationPanel(getMatcher($._config)) { gridPos: { h: 6, w: 12, x: 12, y: 32 } }, - ], - if $._config.enableLokiLogs then [ - logsRow { gridPos: { h: 1, w: 24, x: 0, y: 38 } }, - ] else [], - [ - ], - if $._config.enableLokiLogs then [ - masterLogsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 24, x: 0, y: 39 } }, - ] else [], - [ - ], - if $._config.enableLokiLogs then [ - agentLogsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 24, x: 0, y: 47 } }, - ] else [], - [ - ], - ]) - ), - }, -} diff --git a/apache-mesos-mixin/dashboards/dashboards.libsonnet b/apache-mesos-mixin/dashboards/dashboards.libsonnet deleted file mode 100644 index 4c06550ed..000000000 --- a/apache-mesos-mixin/dashboards/dashboards.libsonnet +++ /dev/null @@ -1 +0,0 @@ -(import 'apache-mesos-overview.libsonnet') diff --git a/apache-mesos-mixin/dashboards_out/apache-mesos-logs.json b/apache-mesos-mixin/dashboards_out/apache-mesos-logs.json new file mode 100644 index 000000000..913ca51e7 --- /dev/null +++ b/apache-mesos-mixin/dashboards_out/apache-mesos-logs.json @@ -0,0 +1,317 @@ +{ + "editable": false, + "id": null, + "links": [ + { + "keepTime": true, + "title": "Apache Mesos overview", + "type": "link", + "url": "/d/apachemesos_overview" + } + ], + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "description": "Logs volume grouped by \"level\" label.", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "bars", + "fillOpacity": 50, + "stacking": { + "mode": "normal" + } + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(E|e)(rr.*|RR.*)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(W|w)(arn.*|ARN.*|rn|RN)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(T|t)(race|RACE)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "logs" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "text", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "maxDataPoints": 100, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "expr": "sum by (level) (count_over_time({job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",level=~\"$level\"}\n|~ \"$regex_search\"\n\n[$__auto]))\n", + "legendFormat": "{{ level }}" + } + ], + "title": "Logs volume", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "Value", + "renamePattern": "logs" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 18, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 2, + "options": { + "dedupStrategy": "exact", + "enableLogDetails": true, + "prettifyLogMessage": true, + "showTime": false, + "wrapLogMessage": false + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "expr": "{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",level=~\"$level\"} \n|~ \"$regex_search\"\n\n\n" + } + ], + "title": "Logs", + "type": "logs" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "apache-mesos-mixin" + ], + "templating": { + "list": [ + { + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", + "type": "datasource" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values({job=\"integrations/apache-mesos\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Mesos cluster", + "multi": true, + "name": "mesos_cluster", + "query": "label_values({job=\"integrations/apache-mesos\",job=~\"$job\"}, mesos_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Cluster", + "multi": true, + "name": "cluster", + "query": "label_values({job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Level", + "multi": true, + "name": "level", + "query": "label_values({job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\"}, level)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "label": "Regex search", + "name": "regex_search", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "type": "textbox" + }, + { + "hide": 2, + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timezone": "default", + "title": "Apache Mesos logs", + "uid": "apachemesos-logs" + } \ No newline at end of file diff --git a/apache-mesos-mixin/dashboards_out/apache-mesos-overview.json b/apache-mesos-mixin/dashboards_out/apache-mesos-overview.json index fee81d381..29e721e39 100644 --- a/apache-mesos-mixin/dashboards_out/apache-mesos-overview.json +++ b/apache-mesos-mixin/dashboards_out/apache-mesos-overview.json @@ -1,72 +1,64 @@ { - "__inputs": [ ], - "__requires": [ ], - "annotations": { - "list": [ ] - }, - "description": "", "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, "id": null, - "links": [ ], + "links": [ + { + "keepTime": true, + "title": "Apache Mesos logs", + "type": "link", + "url": "/d/apachemesos-logs" + }, + { + "keepTime": true, + "title": "Apache Mesos overview", + "type": "link", + "url": "/d/apachemesos_overview" + } + ], "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Master overview", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "Uptime of the Mesos master process.", + "description": "Master uptime in seconds", "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 4, "x": 0, - "y": 0 + "y": 1 }, "id": 2, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.5.2-cloud.2.0cb5a501", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (mesos_master_uptime_seconds{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"})", + "expr": "max by(mesos_cluster) (mesos_master_uptime_seconds{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}}" + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Master uptime" } ], "title": "Master uptime", @@ -74,119 +66,69 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "CPUs available in the cluster.", + "description": "CPUs available in the cluster", "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "" - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { - "h": 6, + "h": 8, "w": 5, "x": 4, - "y": 0 + "y": 1 }, "id": 3, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.5.2-cloud.2.0cb5a501", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (mesos_master_cpus{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\", type=\"total\"})", + "expr": "max by(mesos_cluster) (mesos_master_cpus{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"total\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}}" + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "CPUs available" } ], - "title": "CPUs available", + "title": "CPUS available", "type": "stat" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "Amount of memory available in the cluster.", + "description": "Memory available in the cluster", "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "bytes" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 5, "x": 9, - "y": 0 + "y": 1 }, "id": 4, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.5.2-cloud.2.0cb5a501", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (mesos_master_mem{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\", type=\"total\"})", + "expr": "max by(mesos_cluster) (mesos_master_mem{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"total\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}}" + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Memory available" } ], "title": "Memory available", @@ -194,59 +136,34 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "Total number of GPUs available in the cluster.", + "description": "GPUs available in the cluster", "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "" - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { - "h": 6, + "h": 8, "w": 5, "x": 14, - "y": 0 + "y": 1 }, "id": 5, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.5.2-cloud.2.0cb5a501", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (mesos_master_gpus{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\", type=\"total\"})", + "expr": "max by(mesos_cluster) (mesos_master_gpus{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"total\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}}" + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "GPUs available" } ], "title": "GPUs available", @@ -254,59 +171,34 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "Current amount of bytes inside the cluster.", + "description": "Disk available in the cluster", "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "bytes" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 5, "x": 19, - "y": 0 + "y": 1 }, "id": 6, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.5.2-cloud.2.0cb5a501", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (mesos_master_disk{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\", type=\"total\"})", + "expr": "max by(mesos_cluster) (mesos_master_disk{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"total\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}}" + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Disk available" } ], "title": "Disk available", @@ -314,86 +206,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The percentage of allocated memory in use by the cluster.", + "description": "Memory utilization in the cluster", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 6 + "y": 9 }, "id": 7, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (mesos_master_mem{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\", type=\"percent\"})", + "expr": "max by(mesos_cluster) (mesos_master_mem{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"percent\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}}" + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Memory utilization" } ], "title": "Memory utilization", @@ -401,86 +258,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The percentage of allocated disk storage in use by the cluster.", + "description": "Disk utilization in the cluster", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 6 + "y": 9 }, "id": 8, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (mesos_master_disk{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\", type=\"percent\"})", + "expr": "max by(mesos_cluster) (mesos_master_disk{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"percent\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}}" + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Disk utilization" } ], "title": "Disk utilization", @@ -488,86 +310,50 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of events in the event queue.", + "description": "Events in queue in the cluster", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "events" - }, - "overrides": [ ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + } + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 12 + "y": 17 }, "id": 9, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster, type) (mesos_master_event_queue_length{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"})", + "expr": "max by(mesos_cluster, type) (mesos_master_event_queue_length{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}} - {{type}}" + "instant": false, + "legendFormat": "{{mesos_cluster}} - {{type}}", + "refId": "Events in queue" } ], "title": "Events in queue", @@ -575,87 +361,50 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The rate of messages being processed.", + "description": "Messages in the cluster", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "messages" - }, - "overrides": [ ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + } + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 12 + "y": 17 }, "id": 10, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster, type) (increase(mesos_master_messages{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"}[$__interval:])) > 0", + "expr": "max by(mesos_cluster, type) (increase(mesos_master_messages{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__interval:] offset $__interval)) > 0", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}} - {{type}}" + "instant": false, + "legendFormat": "{{mesos_cluster}} - {{type}}", + "refId": "Messages" } ], "title": "Messages", @@ -663,95 +412,62 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Duration of fetching and storing the Mesos agent registrar state.", + "description": "Registrar state store and fetch in the cluster", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "ms" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 18, "x": 0, - "y": 18 + "y": 25 }, "id": 11, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (mesos_registrar_state_store_ms{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\", type=\"mean\"})", + "expr": "max by(mesos_cluster) (mesos_registrar_state_store_ms{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"mean\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}} - store" + "instant": false, + "legendFormat": "{{mesos_cluster}} - store", + "refId": "Registrar state store" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (mesos_registrar_state_fetch_ms{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"})", + "expr": "max by(mesos_cluster) (mesos_registrar_state_fetch_ms{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}} - fetch" + "instant": false, + "legendFormat": "{{mesos_cluster}} - fetch", + "refId": "Registrar state fetch" } ], "title": "Registrar state", @@ -759,200 +475,80 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Whether or not the registrar log was properly recovered.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "0": { - "color": "red", - "index": 1, - "text": "Not OK" - }, - "1": { - "color": "green", - "index": 0, - "text": "OK" - } - }, - "type": "value" - }, - { - "options": { - "match": "null", - "result": { - "color": "text", - "index": 2, - "text": "-" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "" - }, - "overrides": [ ] + "type": "datasource", + "uid": "-- Mixed --" }, + "description": "Registrar log recovered in the cluster", "gridPos": { - "h": 6, + "h": 8, "w": 6, "x": 18, - "y": 18 + "y": 25 }, "id": 12, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": { }, - "textMode": "auto" - }, - "pluginVersion": "9.5.2-cloud.2.0cb5a501", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (mesos_registrar_log_recovered{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"})", + "expr": "max by(mesos_cluster) (mesos_registrar_log_recovered{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}}" + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Registrar log recovered" } ], "title": "Registrar log recovered", "type": "stat" }, { - "collapsed": false, "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 24 - }, - "id": 13, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], - "title": "Allocator", - "type": "row" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The rate of how often the allocator is performing allocations.", + "description": "Allocation runs in the cluster", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "allocs/s" - }, - "overrides": [ ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + } + } }, "gridPos": { - "h": 6, + "h": 8, "w": 6, "x": 0, - "y": 25 + "y": 33 }, - "id": 14, + "id": 13, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (rate(mesos_master_allocation_run_ms_count{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"}[$__rate_interval]))", + "expr": "max by(mesos_cluster) (rate(mesos_master_allocation_run_ms_count{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}}" + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Allocation runs" } ], "title": "Allocation runs", @@ -960,86 +556,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Time spent in the allocation algorithm in ms.", + "description": "Allocation duration in the cluster", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "ms" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 6, "x": 6, - "y": 25 + "y": 33 }, - "id": 15, + "id": 14, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (mesos_master_allocation_run_ms{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"})", + "expr": "max by(mesos_cluster) (mesos_master_allocation_run_ms{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}}" + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Allocation duration" } ], "title": "Allocation duration", @@ -1047,86 +608,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Allocation batch latency in ms", + "description": "Allocation latency in the cluster", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "ms" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 6, "x": 12, - "y": 25 + "y": 33 }, - "id": 16, + "id": 15, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (mesos_master_allocation_run_latency_ms{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"})", + "expr": "max by(mesos_cluster) (mesos_master_allocation_run_latency_ms{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}}" + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Allocation latency" } ], "title": "Allocation latency", @@ -1134,86 +660,50 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of dispatch events in the allocator mesos event queue.", + "description": "Event queue dispatches in the cluster", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "events" - }, - "overrides": [ ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + } + } }, "gridPos": { - "h": 6, + "h": 8, "w": 6, "x": 18, - "y": 25 + "y": 33 }, - "id": 17, + "id": 16, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (mesos_master_event_queue_dispatches{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"})", + "expr": "max by(mesos_cluster) (mesos_master_event_queue_dispatches{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}}" + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Event queue dispatches" } ], "title": "Event queue dispatches", @@ -1221,112 +711,64 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 31 + "y": 41 }, - "id": 18, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], - "title": "Agents", + "id": 17, + "panels": [ ], + "title": "Agent overview", "type": "row" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The percentage of allocated memory in use by the agent.", + "description": "Memory utilization in the cluster", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 32 + "y": 42 }, - "id": 19, + "id": 18, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (100 * mesos_slave_mem_used_bytes{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"} / clamp_min(mesos_slave_mem_bytes{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"},1))", + "expr": "max by(mesos_cluster) (100 * mesos_slave_mem_used_bytes{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"} / clamp_min(mesos_slave_mem_bytes{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"},1))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}}" + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Memory utilization" } ], "title": "Agent memory utilization", @@ -1334,342 +776,146 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The percentage of allocated disk storage in use by the agent.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 32 + "y": 42 }, - "id": 20, + "id": 19, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(mesos_cluster) (100 * mesos_slave_disk_used_bytes{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"} / clamp_min(mesos_slave_disk_bytes{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"},1))", + "expr": "max by(mesos_cluster) (100 * mesos_slave_disk_used_bytes{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"} / clamp_min(mesos_slave_disk_bytes{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"},1))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mesos_cluster}}" + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Disk utilization" } ], "title": "Agent disk utilization", "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 38 - }, - "id": 21, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], - "title": "Logs", - "type": "row" - }, - { - "datasource": { - "uid": "${loki_datasource}" - }, - "description": "The application logs for the Mesos master node.", - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 39 - }, - "id": 22, - "options": { - "dedupStrategy": "none", - "enableLogDetails": true, - "prettifyLogMessage": false, - "showCommonLabels": false, - "showLabels": false, - "showTime": false, - "sortOrder": "Descending", - "wrapLogMessage": false - }, - "targets": [ - { - "datasource": { - "uid": "${loki_datasource}" - }, - "editorMode": "code", - "expr": "{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"} |= `` | (filename=~\"/var/log/mesos/master/.*\" or log_type=\"master\")", - "queryType": "range", - "refId": "A" - } - ], - "title": "Master logs", - "type": "logs" - }, - { - "datasource": { - "uid": "${loki_datasource}" - }, - "description": "The application logs for the Mesos agent node.", - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 47 - }, - "id": 23, - "options": { - "dedupStrategy": "none", - "enableLogDetails": true, - "prettifyLogMessage": false, - "showCommonLabels": false, - "showLabels": false, - "showTime": false, - "sortOrder": "Descending", - "wrapLogMessage": false - }, - "targets": [ - { - "datasource": { - "uid": "${loki_datasource}" - }, - "editorMode": "code", - "expr": "{job=~\"$job\", instance=~\"$instance\", mesos_cluster=~\"$mesos_cluster\"} |= `` | (filename=~\"/var/log/mesos/agent/.*\" or log_type=\"agent\")", - "queryType": "range", - "refId": "A" - } - ], - "title": "Agent logs", - "type": "logs" } ], - "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "refresh": "30s", + "schemaVersion": 39, "tags": [ "apache-mesos-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, "label": "Prometheus data source", "name": "prometheus_datasource", - "options": [ ], "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "current": { }, - "hide": 0, - "label": "Loki data source", - "name": "loki_datasource", - "options": [ ], - "query": "loki", - "refresh": 1, - "regex": "", + "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", "type": "datasource" }, { "allValue": ".+", - "current": { }, "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(mesos_exporter_build_info,job)", + "query": "label_values(mesos_master_uptime_seconds{job=\"integrations/apache-mesos\"}, job)", "refresh": 2, - "regex": "", "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" }, { - "allValue": ".*", - "current": { }, + "allValue": ".+", "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 2, "includeAll": true, - "label": "Cluster", + "label": "Mesos cluster", "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(mesos_exporter_build_info{job=~\"$job\"}, cluster)", + "name": "mesos_cluster", + "query": "label_values(mesos_master_uptime_seconds{job=\"integrations/apache-mesos\",job=~\"$job\"}, mesos_cluster)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { - "allValue": ".+", - "current": { }, + "allValue": ".*", "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, - "label": "Instance", + "label": "Cluster", "multi": true, - "name": "instance", - "options": [ ], - "query": "label_values(mesos_exporter_build_info{job=~\"$job\"}, instance)", + "name": "cluster", + "query": "label_values(mesos_master_uptime_seconds{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\"}, cluster)", "refresh": 2, - "regex": "", "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, - "label": "Mesos cluster", + "label": "Instance", "multi": true, - "name": "mesos_cluster", - "options": [ ], - "query": "label_values(mesos_exporter_build_info{job=~\"$job\", instance=~\"$instance\"}, mesos_cluster)", + "name": "instance", + "query": "label_values(mesos_master_uptime_seconds{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\"}, instance)", "refresh": 2, - "regex": "", "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", + "type": "datasource" } ] }, "time": { - "from": "now-1h", + "from": "now-30m", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", "title": "Apache Mesos overview", - "uid": "apache-mesos-overview", - "version": 0 + "uid": "apachemesos_overview" } \ No newline at end of file diff --git a/apache-mesos-mixin/g.libsonnet b/apache-mesos-mixin/g.libsonnet new file mode 100644 index 000000000..e6a2060ee --- /dev/null +++ b/apache-mesos-mixin/g.libsonnet @@ -0,0 +1 @@ +import 'github.com/grafana/grafonnet/gen/grafonnet-v11.4.0/main.libsonnet' diff --git a/apache-mesos-mixin/jsonnetfile.json b/apache-mesos-mixin/jsonnetfile.json index b61f7e64c..6354d0e12 100644 --- a/apache-mesos-mixin/jsonnetfile.json +++ b/apache-mesos-mixin/jsonnetfile.json @@ -1,15 +1,42 @@ { "version": 1, "dependencies": [ - { - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" - } + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet-lib.git", + "subdir": "grafonnet" + } + }, + "version": "master" }, - "version": "master" - } + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "common-lib" + } + }, + "version": "master" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "grafana-cloud-integration-utils" + } + }, + "version": "master" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "logs-lib" + } + }, + "version": "master" + } ], "legacyImports": true -} \ No newline at end of file +} diff --git a/apache-mesos-mixin/links.libsonnet b/apache-mesos-mixin/links.libsonnet new file mode 100644 index 000000000..51e1cce73 --- /dev/null +++ b/apache-mesos-mixin/links.libsonnet @@ -0,0 +1,16 @@ +local g = import './g.libsonnet'; + +{ + local link = g.dashboard.link, + new(this): + { + overview: + link.link.new('Apache Mesos overview', '/d/' + this.grafana.dashboards['apache-mesos-overview.json'].uid) + + link.link.options.withKeepTime(true), + } + + if this.config.enableLokiLogs then { + logs: + link.link.new('Apache Mesos logs', '/d/' + this.grafana.dashboards['apache-mesos-logs.json'].uid) + + link.link.options.withKeepTime(true), + } else {}, +} diff --git a/apache-mesos-mixin/main.libsonnet b/apache-mesos-mixin/main.libsonnet new file mode 100644 index 000000000..a4de7dfe7 --- /dev/null +++ b/apache-mesos-mixin/main.libsonnet @@ -0,0 +1,46 @@ +local alerts = import './alerts.libsonnet'; +local config = import './config.libsonnet'; +local dashboards = import './dashboards.libsonnet'; +local g = import './g.libsonnet'; +local links = import './links.libsonnet'; +local panels = import './panels.libsonnet'; +local rows = import './rows.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + withConfigMixin(config): { + config+: config, + }, + + new(): { + local this = self, + config: config, + signals: + { + [sig]: commonlib.signals.unmarshallJsonMulti( + this.config.signals[sig], + type=this.config.metricsSource + ) + for sig in std.objectFields(this.config.signals) + }, + grafana: { + variables: commonlib.variables.new( + filteringSelector=this.config.filteringSelector, + groupLabels=this.config.groupLabels, + instanceLabels=this.config.instanceLabels, + varMetric='mesos_master_uptime_seconds', + customAllValue='.+', + enableLokiLogs=this.config.enableLokiLogs, + ), + annotations: {}, + links: links.new(this), + panels: panels.new(this), + dashboards: dashboards.new(this), + rows: rows.new(this), + }, + prometheus: { + alerts: alerts.new(this), + recordingRules: {}, + }, + }, +} diff --git a/apache-mesos-mixin/mixin.libsonnet b/apache-mesos-mixin/mixin.libsonnet index 4d987cf31..fde40c262 100644 --- a/apache-mesos-mixin/mixin.libsonnet +++ b/apache-mesos-mixin/mixin.libsonnet @@ -1,3 +1,33 @@ -(import 'dashboards/dashboards.libsonnet') + -(import 'alerts/alerts.libsonnet') + -(import 'config.libsonnet') +local mixinlib = import './main.libsonnet'; +local config = (import './config.libsonnet'); +local util = import 'grafana-cloud-integration-utils/util.libsonnet'; + +local mixin = mixinlib.new() + + mixinlib.withConfigMixin( + { + filteringSelecter: config.filteringSelector, + uid: config.uid, + enableLokiLogs: true, + } + ); +local k8s_patch = { + mesos_cluster+: { + label: 'Mesos cluster', + }, + cluster+: { + allValue: '.*', + }, +}; + +{ + grafanaDashboards+:: { + local tags = config.dashboardTags, + [fname]: + local dashboard = util.decorate_dashboard(mixin.grafana.dashboards[fname], tags=tags); + dashboard + util.patch_variables(dashboard, k8s_patch) + + for fname in std.objectFields(mixin.grafana.dashboards) + }, + prometheusAlerts+:: mixin.prometheus.alerts, + prometheusRules+:: mixin.prometheus.recordingRules, +} diff --git a/apache-mesos-mixin/panels.libsonnet b/apache-mesos-mixin/panels.libsonnet new file mode 100644 index 000000000..d1749f563 --- /dev/null +++ b/apache-mesos-mixin/panels.libsonnet @@ -0,0 +1,162 @@ +local g = import './g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + new(this): + { + local signals = this.signals, + + masterUptimePanel: + g.panel.stat.new('Master uptime') + + g.panel.stat.queryOptions.withTargets( + signals.master.masterUptime.asTarget() + ) + + g.panel.stat.standardOptions.withUnit('s') + + g.panel.stat.panelOptions.withDescription('Master uptime in seconds'), + cpusAvailablePanel: + g.panel.stat.new('CPUS available') + + g.panel.stat.queryOptions.withTargets( + signals.master.cpusAvailable.asTarget() + ) + + g.panel.stat.standardOptions.withUnit('none') + + g.panel.stat.panelOptions.withDescription('CPUs available in the cluster'), + memoryAvailablePanel: + g.panel.stat.new('Memory available') + + g.panel.stat.queryOptions.withTargets( + signals.master.memoryAvailable.asTarget() + ) + + g.panel.stat.standardOptions.withUnit('bytes') + + g.panel.stat.panelOptions.withDescription('Memory available in the cluster'), + gpusAvailablePanel: + g.panel.stat.new('GPUs available') + + g.panel.stat.queryOptions.withTargets( + signals.master.gpusAvailable.asTarget() + ) + + g.panel.stat.standardOptions.withUnit('none') + + g.panel.stat.panelOptions.withDescription('GPUs available in the cluster'), + + diskAvailablePanel: + g.panel.stat.new('Disk available') + + g.panel.stat.queryOptions.withTargets( + signals.master.diskAvailable.asTarget() + ) + + g.panel.stat.standardOptions.withUnit('bytes') + + g.panel.stat.panelOptions.withDescription('Disk available in the cluster'), + + memoryUtilizationPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Memory utilization', + targets=[ + signals.master.memoryUtilization.asTarget(), + ] + ) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.panelOptions.withDescription('Memory utilization in the cluster'), + + diskUtilizationPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Disk utilization', + targets=[ + signals.master.diskUtilization.asTarget(), + ] + ) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.panelOptions.withDescription('Disk utilization in the cluster'), + + eventsInQueuePanel: + commonlib.panels.generic.timeSeries.base.new( + 'Events in queue', + targets=[ + signals.master.eventsInQueue.asTarget(), + ] + ) + + g.panel.timeSeries.panelOptions.withDescription('Events in queue in the cluster'), + + messagesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Messages', + targets=[ + signals.master.messages.asTarget(), + ] + ) + + g.panel.timeSeries.panelOptions.withDescription('Messages in the cluster'), + + registrarStatePanel: + commonlib.panels.generic.timeSeries.base.new( + 'Registrar state', + targets=[ + signals.master.registrarStateStore.asTarget(), + signals.master.registrarStateFetch.asTarget(), + ] + ) + + g.panel.timeSeries.standardOptions.withUnit('ms') + + g.panel.timeSeries.panelOptions.withDescription('Registrar state store and fetch in the cluster'), + + registrarLogRecoveredPanel: + g.panel.stat.new('Registrar log recovered') + + g.panel.stat.queryOptions.withTargets( + signals.master.registrarLogRecovered.asTarget() + ) + + g.panel.stat.panelOptions.withDescription('Registrar log recovered in the cluster'), + + allocationRunsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Allocation runs', + targets=[ + signals.master.allocationRuns.asTarget(), + ] + ) + + g.panel.timeSeries.panelOptions.withDescription('Allocation runs in the cluster'), + + allocationDurationPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Allocation duration', + targets=[ + signals.master.allocationDuration.asTarget(), + ] + ) + + g.panel.timeSeries.standardOptions.withUnit('ms') + + g.panel.timeSeries.panelOptions.withDescription('Allocation duration in the cluster'), + + allocationLatencyPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Allocation latency', + targets=[ + signals.master.allocationLatency.asTarget(), + ] + ) + + g.panel.timeSeries.standardOptions.withUnit('ms') + + g.panel.timeSeries.panelOptions.withDescription('Allocation latency in the cluster'), + + eventQueueDispatchesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Event queue dispatches', + targets=[ + signals.master.eventQueueDispatches.asTarget(), + ] + ) + + g.panel.timeSeries.panelOptions.withDescription('Event queue dispatches in the cluster'), + + // Agent panels + agentMemoryUtilizationPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Agent memory utilization', + targets=[ + signals.agent.memoryUtilization.asTarget(), + ] + ) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.panelOptions.withDescription('Memory utilization in the cluster'), + + agentDiskUtilizationPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Agent disk utilization', + targets=[ + signals.agent.diskUtilization.asTarget(), + ] + ) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.panelOptions.withDescription('The percentage of allocated disk storage in use by the agent.'), + + }, +} diff --git a/apache-mesos-mixin/rows.libsonnet b/apache-mesos-mixin/rows.libsonnet new file mode 100644 index 000000000..d0cd66e92 --- /dev/null +++ b/apache-mesos-mixin/rows.libsonnet @@ -0,0 +1,40 @@ +local g = import './g.libsonnet'; + +{ + new(this): + { + local panels = this.grafana.panels, + + masterOverview: + g.panel.row.new('Master overview') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.masterUptimePanel + g.panel.timeSeries.gridPos.withW(4), + panels.cpusAvailablePanel + g.panel.timeSeries.gridPos.withW(5), + panels.memoryAvailablePanel + g.panel.timeSeries.gridPos.withW(5), + panels.gpusAvailablePanel + g.panel.timeSeries.gridPos.withW(5), + panels.diskAvailablePanel + g.panel.timeSeries.gridPos.withW(5), + panels.memoryUtilizationPanel + g.panel.timeSeries.gridPos.withW(12), + panels.diskUtilizationPanel + g.panel.timeSeries.gridPos.withW(12), + panels.eventsInQueuePanel + g.panel.timeSeries.gridPos.withW(12), + panels.messagesPanel + g.panel.timeSeries.gridPos.withW(12), + panels.registrarStatePanel + g.panel.timeSeries.gridPos.withW(18), + panels.registrarLogRecoveredPanel + g.panel.timeSeries.gridPos.withW(6), + panels.allocationRunsPanel + g.panel.timeSeries.gridPos.withW(6), + panels.allocationDurationPanel + g.panel.timeSeries.gridPos.withW(6), + panels.allocationLatencyPanel + g.panel.timeSeries.gridPos.withW(6), + panels.eventQueueDispatchesPanel + g.panel.timeSeries.gridPos.withW(6), + ]), + + + agentOverview: + g.panel.row.new('Agent overview') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.agentMemoryUtilizationPanel + g.panel.timeSeries.gridPos.withW(12), + panels.agentDiskUtilizationPanel + g.panel.timeSeries.gridPos.withW(12), + ]), + + + }, +} diff --git a/apache-mesos-mixin/signals/agent.libsonnet b/apache-mesos-mixin/signals/agent.libsonnet new file mode 100644 index 000000000..873fdabb8 --- /dev/null +++ b/apache-mesos-mixin/signals/agent.libsonnet @@ -0,0 +1,44 @@ +function(this) + { + + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + legendCustomTemplate: std.join(' ', std.map(function(label) '{{' + label + '}}', this.instanceLabels)) + ' - {{mesos_cluster}}', + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '2m', + discoveryMetric: { + prometheus: 'mesos_slave_mem_used_bytes', + }, + signals: { + memoryUtilization: { + name: 'Memory utilization', + nameShort: 'Memory %', + type: 'raw', + description: 'Memory utilization in the cluster', + unit: 'percent', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (100 * mesos_slave_mem_used_bytes{%(queriesSelector)s} / clamp_min(mesos_slave_mem_bytes{%(queriesSelector)s},1))', + legendCustomTemplate: '{{mesos_cluster}}', + }, + }, + }, + + diskUtilization: { + name: 'Disk utilization', + nameShort: 'Disk %', + type: 'raw', + description: 'The percentage of allocated disk storage in use by the agent.', + unit: 'percent', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (100 * mesos_slave_disk_used_bytes{%(queriesSelector)s} / clamp_min(mesos_slave_disk_bytes{%(queriesSelector)s},1))', + legendCustomTemplate: '{{mesos_cluster}}', + }, + }, + }, + }, + } diff --git a/apache-mesos-mixin/signals/master.libsonnet b/apache-mesos-mixin/signals/master.libsonnet new file mode 100644 index 000000000..496194d43 --- /dev/null +++ b/apache-mesos-mixin/signals/master.libsonnet @@ -0,0 +1,239 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + legendCustomTemplate: std.join(' ', std.map(function(label) '{{' + label + '}}', this.instanceLabels)) + ' - {{mesos_cluster}}', + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '2m', + discoveryMetric: { + prometheus: 'mesos_master_uptime_seconds', + }, + + signals: { + masterUptime: { + name: 'Master uptime', + nameShort: 'Master uptime', + type: 'raw', + description: 'Master uptime in seconds', + unit: 's', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (mesos_master_uptime_seconds{%(queriesSelector)s})', + legendCustomTemplate: '{{mesos_cluster}}', + }, + }, + }, + + cpusAvailable: { + name: 'CPUs available', + nameShort: 'CPUs', + type: 'raw', + description: 'CPUs available in the cluster', + unit: 'none', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (mesos_master_cpus{%(queriesSelector)s, type="total"})', + legendCustomTemplate: '{{mesos_cluster}}', + }, + }, + }, + memoryAvailable: { + name: 'Memory available', + nameShort: 'Memory', + type: 'raw', + description: 'Memory available in the cluster', + unit: 'bytes', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (mesos_master_mem{%(queriesSelector)s, type="total"})', + legendCustomTemplate: '{{mesos_cluster}}', + }, + }, + }, + + gpusAvailable: { + name: 'GPUs available', + nameShort: 'GPUs', + type: 'raw', + description: 'GPUs available in the cluster', + unit: 'none', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (mesos_master_gpus{%(queriesSelector)s, type="total"})', + legendCustomTemplate: '{{mesos_cluster}}', + }, + }, + }, + + diskAvailable: { + name: 'Disk available', + nameShort: 'Disk', + type: 'raw', + description: 'Disk available in the cluster', + unit: 'bytes', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (mesos_master_disk{%(queriesSelector)s, type="total"})', + legendCustomTemplate: '{{mesos_cluster}}', + }, + }, + }, + + memoryUtilization: { + name: 'Memory utilization', + nameShort: 'Memory %', + type: 'raw', + description: 'Memory utilization in the cluster', + unit: 'percent', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (mesos_master_mem{%(queriesSelector)s, type="percent"})', + legendCustomTemplate: '{{mesos_cluster}}', + }, + }, + }, + + diskUtilization: { + name: 'Disk utilization', + nameShort: 'Disk %', + type: 'raw', + description: 'Disk utilization in the cluster', + unit: 'percent', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (mesos_master_disk{%(queriesSelector)s, type="percent"})', + legendCustomTemplate: '{{mesos_cluster}}', + }, + }, + }, + + eventsInQueue: { + name: 'Events in queue', + nameShort: 'Events', + type: 'raw', + description: 'Events in queue in the cluster', + unit: 'none', + sources: { + prometheus: { + expr: 'max by(mesos_cluster, type) (mesos_master_event_queue_length{%(queriesSelector)s})', + legendCustomTemplate: '{{mesos_cluster}} - {{type}}', + }, + }, + }, + + messages: { + name: 'Messages', + nameShort: 'Messages', + type: 'raw', + description: 'Messages in the cluster', + unit: 'none', + sources: { + prometheus: { + expr: 'max by(mesos_cluster, type) (increase(mesos_master_messages{%(queriesSelector)s}[$__interval:] offset $__interval)) > 0', + legendCustomTemplate: '{{mesos_cluster}} - {{type}}', + }, + }, + }, + + registrarStateStore: { + name: 'Registrar state store', + nameShort: 'Registrar state store', + type: 'raw', + description: 'Registrar state store in the cluster', + unit: 'none', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (mesos_registrar_state_store_ms{%(queriesSelector)s, type="mean"})', + legendCustomTemplate: '{{mesos_cluster}} - store', + }, + }, + }, + + registrarStateFetch: { + name: 'Registrar state fetch', + nameShort: 'Registrar state fetch', + type: 'raw', + description: 'Registrar state fetch in the cluster', + unit: 'none', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (mesos_registrar_state_fetch_ms{%(queriesSelector)s})', + legendCustomTemplate: '{{mesos_cluster}} - fetch', + }, + }, + }, + + registrarLogRecovered: { + name: 'Registrar log recovered', + nameShort: 'Registrar log recovered', + type: 'raw', + description: 'Registrar log recovered in the cluster', + unit: 'none', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (mesos_registrar_log_recovered{%(queriesSelector)s})', + legendCustomTemplate: '{{mesos_cluster}}', + }, + }, + }, + + allocationRuns: { + name: 'Allocation runs', + nameShort: 'Allocation runs', + type: 'raw', + description: 'Allocation runs in the cluster', + unit: 'none', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (rate(mesos_master_allocation_run_ms_count{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{mesos_cluster}}', + }, + }, + }, + + allocationDuration: { + name: 'Allocation duration', + nameShort: 'Allocation duration', + type: 'raw', + description: 'Allocation duration in the cluster', + unit: 'none', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (mesos_master_allocation_run_ms{%(queriesSelector)s})', + legendCustomTemplate: '{{mesos_cluster}}', + }, + }, + }, + + allocationLatency: { + name: 'Allocation latency', + nameShort: 'Allocation latency', + type: 'raw', + description: 'Allocation latency in the cluster', + unit: 'none', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (mesos_master_allocation_run_latency_ms{%(queriesSelector)s})', + legendCustomTemplate: '{{mesos_cluster}}', + }, + }, + }, + + eventQueueDispatches: { + name: 'Event queue dispatches', + nameShort: 'Event queue dispatches', + type: 'raw', + description: 'Event queue dispatches in the cluster', + unit: 'none', + sources: { + prometheus: { + expr: 'max by(mesos_cluster) (mesos_master_event_queue_dispatches{%(queriesSelector)s})', + legendCustomTemplate: '{{mesos_cluster}}', + }, + }, + }, + }, + } From 16d712151c52721c629a84008fa1fba4523dd59a Mon Sep 17 00:00:00 2001 From: schmikei Date: Wed, 8 Oct 2025 11:57:57 -0400 Subject: [PATCH 2/4] touch up based off testing --- apache-mesos-mixin/dashboards.libsonnet | 2 +- .../dashboards_out/apache-mesos-logs.json | 317 ------ .../dashboards_out/apache-mesos-overview.json | 921 ------------------ apache-mesos-mixin/links.libsonnet | 2 +- apache-mesos-mixin/mixin.libsonnet | 1 + apache-mesos-mixin/panels.libsonnet | 19 +- apache-mesos-mixin/rows.libsonnet | 10 +- apache-mesos-mixin/signals/master.libsonnet | 2 +- 8 files changed, 26 insertions(+), 1248 deletions(-) delete mode 100644 apache-mesos-mixin/dashboards_out/apache-mesos-logs.json delete mode 100644 apache-mesos-mixin/dashboards_out/apache-mesos-overview.json diff --git a/apache-mesos-mixin/dashboards.libsonnet b/apache-mesos-mixin/dashboards.libsonnet index 8cafb1b9e..2df1a3b40 100644 --- a/apache-mesos-mixin/dashboards.libsonnet +++ b/apache-mesos-mixin/dashboards.libsonnet @@ -30,7 +30,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; vars.multiInstance, uid + '_overview', tags, - links { mesosOverview+:: {} }, + links { apacheMesosOverview+:: {} }, annotations, timezone, refresh, diff --git a/apache-mesos-mixin/dashboards_out/apache-mesos-logs.json b/apache-mesos-mixin/dashboards_out/apache-mesos-logs.json deleted file mode 100644 index 913ca51e7..000000000 --- a/apache-mesos-mixin/dashboards_out/apache-mesos-logs.json +++ /dev/null @@ -1,317 +0,0 @@ -{ - "editable": false, - "id": null, - "links": [ - { - "keepTime": true, - "title": "Apache Mesos overview", - "type": "link", - "url": "/d/apachemesos_overview" - } - ], - "panels": [ - { - "datasource": { - "type": "loki", - "uid": "${loki_datasource}" - }, - "description": "Logs volume grouped by \"level\" label.", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "bars", - "fillOpacity": 50, - "stacking": { - "mode": "normal" - } - }, - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "purple", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "(E|e)(rr.*|RR.*)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "(W|w)(arn.*|ARN.*|rn|RN)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "(T|t)(race|RACE)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "light-blue", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "logs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "text", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 1, - "maxDataPoints": 100, - "options": { - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "${loki_datasource}" - }, - "expr": "sum by (level) (count_over_time({job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",level=~\"$level\"}\n|~ \"$regex_search\"\n\n[$__auto]))\n", - "legendFormat": "{{ level }}" - } - ], - "title": "Logs volume", - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "Value", - "renamePattern": "logs" - } - } - ], - "type": "timeseries" - }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "gridPos": { - "h": 18, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 2, - "options": { - "dedupStrategy": "exact", - "enableLogDetails": true, - "prettifyLogMessage": true, - "showTime": false, - "wrapLogMessage": false - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "${loki_datasource}" - }, - "expr": "{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",level=~\"$level\"} \n|~ \"$regex_search\"\n\n\n" - } - ], - "title": "Logs", - "type": "logs" - } - ], - "refresh": "30s", - "schemaVersion": 39, - "tags": [ - "apache-mesos-mixin" - ], - "templating": { - "list": [ - { - "label": "Loki data source", - "name": "loki_datasource", - "query": "loki", - "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", - "type": "datasource" - }, - { - "allValue": ".*", - "datasource": { - "type": "loki", - "uid": "${loki_datasource}" - }, - "includeAll": true, - "label": "Job", - "multi": true, - "name": "job", - "query": "label_values({job=\"integrations/apache-mesos\"}, job)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".*", - "datasource": { - "type": "loki", - "uid": "${loki_datasource}" - }, - "includeAll": true, - "label": "Mesos cluster", - "multi": true, - "name": "mesos_cluster", - "query": "label_values({job=\"integrations/apache-mesos\",job=~\"$job\"}, mesos_cluster)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".*", - "datasource": { - "type": "loki", - "uid": "${loki_datasource}" - }, - "includeAll": true, - "label": "Cluster", - "multi": true, - "name": "cluster", - "query": "label_values({job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\"}, cluster)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".*", - "datasource": { - "type": "loki", - "uid": "${loki_datasource}" - }, - "includeAll": true, - "label": "Level", - "multi": true, - "name": "level", - "query": "label_values({job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\"}, level)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "current": { - "selected": false, - "text": "", - "value": "" - }, - "label": "Regex search", - "name": "regex_search", - "options": [ - { - "selected": true, - "text": "", - "value": "" - } - ], - "query": "", - "type": "textbox" - }, - { - "hide": 2, - "label": "Prometheus data source", - "name": "prometheus_datasource", - "query": "prometheus", - "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", - "type": "datasource" - } - ] - }, - "time": { - "from": "now-30m", - "to": "now" - }, - "timezone": "default", - "title": "Apache Mesos logs", - "uid": "apachemesos-logs" - } \ No newline at end of file diff --git a/apache-mesos-mixin/dashboards_out/apache-mesos-overview.json b/apache-mesos-mixin/dashboards_out/apache-mesos-overview.json deleted file mode 100644 index 29e721e39..000000000 --- a/apache-mesos-mixin/dashboards_out/apache-mesos-overview.json +++ /dev/null @@ -1,921 +0,0 @@ -{ - "editable": false, - "id": null, - "links": [ - { - "keepTime": true, - "title": "Apache Mesos logs", - "type": "link", - "url": "/d/apachemesos-logs" - }, - { - "keepTime": true, - "title": "Apache Mesos overview", - "type": "link", - "url": "/d/apachemesos_overview" - } - ], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 0, - "x": 0, - "y": 0 - }, - "id": 1, - "panels": [ ], - "title": "Master overview", - "type": "row" - }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Master uptime in seconds", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 8, - "w": 4, - "x": 0, - "y": 1 - }, - "id": 2, - "pluginVersion": "v11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (mesos_master_uptime_seconds{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}}", - "refId": "Master uptime" - } - ], - "title": "Master uptime", - "type": "stat" - }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "CPUs available in the cluster", - "fieldConfig": { - "defaults": { - "unit": "none" - } - }, - "gridPos": { - "h": 8, - "w": 5, - "x": 4, - "y": 1 - }, - "id": 3, - "pluginVersion": "v11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (mesos_master_cpus{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"total\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}}", - "refId": "CPUs available" - } - ], - "title": "CPUS available", - "type": "stat" - }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Memory available in the cluster", - "fieldConfig": { - "defaults": { - "unit": "bytes" - } - }, - "gridPos": { - "h": 8, - "w": 5, - "x": 9, - "y": 1 - }, - "id": 4, - "pluginVersion": "v11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (mesos_master_mem{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"total\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}}", - "refId": "Memory available" - } - ], - "title": "Memory available", - "type": "stat" - }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "GPUs available in the cluster", - "fieldConfig": { - "defaults": { - "unit": "none" - } - }, - "gridPos": { - "h": 8, - "w": 5, - "x": 14, - "y": 1 - }, - "id": 5, - "pluginVersion": "v11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (mesos_master_gpus{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"total\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}}", - "refId": "GPUs available" - } - ], - "title": "GPUs available", - "type": "stat" - }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Disk available in the cluster", - "fieldConfig": { - "defaults": { - "unit": "bytes" - } - }, - "gridPos": { - "h": 8, - "w": 5, - "x": 19, - "y": 1 - }, - "id": 6, - "pluginVersion": "v11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (mesos_master_disk{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"total\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}}", - "refId": "Disk available" - } - ], - "title": "Disk available", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Memory utilization in the cluster", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "unit": "percent" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 9 - }, - "id": 7, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (mesos_master_mem{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"percent\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}}", - "refId": "Memory utilization" - } - ], - "title": "Memory utilization", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Disk utilization in the cluster", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "unit": "percent" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 9 - }, - "id": 8, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (mesos_master_disk{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"percent\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}}", - "refId": "Disk utilization" - } - ], - "title": "Disk utilization", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Events in queue in the cluster", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 17 - }, - "id": 9, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster, type) (mesos_master_event_queue_length{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}} - {{type}}", - "refId": "Events in queue" - } - ], - "title": "Events in queue", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Messages in the cluster", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 17 - }, - "id": 10, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster, type) (increase(mesos_master_messages{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__interval:] offset $__interval)) > 0", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}} - {{type}}", - "refId": "Messages" - } - ], - "title": "Messages", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Registrar state store and fetch in the cluster", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "unit": "ms" - } - }, - "gridPos": { - "h": 8, - "w": 18, - "x": 0, - "y": 25 - }, - "id": 11, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (mesos_registrar_state_store_ms{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"mean\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}} - store", - "refId": "Registrar state store" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (mesos_registrar_state_fetch_ms{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}} - fetch", - "refId": "Registrar state fetch" - } - ], - "title": "Registrar state", - "type": "timeseries" - }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Registrar log recovered in the cluster", - "gridPos": { - "h": 8, - "w": 6, - "x": 18, - "y": 25 - }, - "id": 12, - "pluginVersion": "v11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (mesos_registrar_log_recovered{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}}", - "refId": "Registrar log recovered" - } - ], - "title": "Registrar log recovered", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Allocation runs in the cluster", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 0, - "y": 33 - }, - "id": 13, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (rate(mesos_master_allocation_run_ms_count{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}}", - "refId": "Allocation runs" - } - ], - "title": "Allocation runs", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Allocation duration in the cluster", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "unit": "ms" - } - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 6, - "y": 33 - }, - "id": 14, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (mesos_master_allocation_run_ms{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}}", - "refId": "Allocation duration" - } - ], - "title": "Allocation duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Allocation latency in the cluster", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "unit": "ms" - } - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 12, - "y": 33 - }, - "id": 15, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (mesos_master_allocation_run_latency_ms{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}}", - "refId": "Allocation latency" - } - ], - "title": "Allocation latency", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Event queue dispatches in the cluster", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 18, - "y": 33 - }, - "id": 16, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (mesos_master_event_queue_dispatches{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}}", - "refId": "Event queue dispatches" - } - ], - "title": "Event queue dispatches", - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 0, - "x": 0, - "y": 41 - }, - "id": 17, - "panels": [ ], - "title": "Agent overview", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Memory utilization in the cluster", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "unit": "percent" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 42 - }, - "id": 18, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (100 * mesos_slave_mem_used_bytes{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"} / clamp_min(mesos_slave_mem_bytes{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"},1))", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}}", - "refId": "Memory utilization" - } - ], - "title": "Agent memory utilization", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "The percentage of allocated disk storage in use by the agent.", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "unit": "percent" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 42 - }, - "id": 19, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by(mesos_cluster) (100 * mesos_slave_disk_used_bytes{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"} / clamp_min(mesos_slave_disk_bytes{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"},1))", - "format": "time_series", - "instant": false, - "legendFormat": "{{mesos_cluster}}", - "refId": "Disk utilization" - } - ], - "title": "Agent disk utilization", - "type": "timeseries" - } - ], - "refresh": "30s", - "schemaVersion": 39, - "tags": [ - "apache-mesos-mixin" - ], - "templating": { - "list": [ - { - "label": "Prometheus data source", - "name": "prometheus_datasource", - "query": "prometheus", - "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", - "type": "datasource" - }, - { - "allValue": ".+", - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "includeAll": true, - "label": "Job", - "multi": true, - "name": "job", - "query": "label_values(mesos_master_uptime_seconds{job=\"integrations/apache-mesos\"}, job)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".+", - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "includeAll": true, - "label": "Mesos cluster", - "multi": true, - "name": "mesos_cluster", - "query": "label_values(mesos_master_uptime_seconds{job=\"integrations/apache-mesos\",job=~\"$job\"}, mesos_cluster)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".*", - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "includeAll": true, - "label": "Cluster", - "multi": true, - "name": "cluster", - "query": "label_values(mesos_master_uptime_seconds{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\"}, cluster)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".+", - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "includeAll": true, - "label": "Instance", - "multi": true, - "name": "instance", - "query": "label_values(mesos_master_uptime_seconds{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\"}, instance)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "hide": 2, - "label": "Loki data source", - "name": "loki_datasource", - "query": "loki", - "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", - "type": "datasource" - } - ] - }, - "time": { - "from": "now-30m", - "to": "now" - }, - "timezone": "default", - "title": "Apache Mesos overview", - "uid": "apachemesos_overview" - } \ No newline at end of file diff --git a/apache-mesos-mixin/links.libsonnet b/apache-mesos-mixin/links.libsonnet index 51e1cce73..48e5d80b2 100644 --- a/apache-mesos-mixin/links.libsonnet +++ b/apache-mesos-mixin/links.libsonnet @@ -4,7 +4,7 @@ local g = import './g.libsonnet'; local link = g.dashboard.link, new(this): { - overview: + apacheMesosOverview: link.link.new('Apache Mesos overview', '/d/' + this.grafana.dashboards['apache-mesos-overview.json'].uid) + link.link.options.withKeepTime(true), } diff --git a/apache-mesos-mixin/mixin.libsonnet b/apache-mesos-mixin/mixin.libsonnet index fde40c262..d51e911b0 100644 --- a/apache-mesos-mixin/mixin.libsonnet +++ b/apache-mesos-mixin/mixin.libsonnet @@ -13,6 +13,7 @@ local mixin = mixinlib.new() local k8s_patch = { mesos_cluster+: { label: 'Mesos cluster', + allValue: '.*', }, cluster+: { allValue: '.*', diff --git a/apache-mesos-mixin/panels.libsonnet b/apache-mesos-mixin/panels.libsonnet index d1749f563..81135b0b8 100644 --- a/apache-mesos-mixin/panels.libsonnet +++ b/apache-mesos-mixin/panels.libsonnet @@ -12,6 +12,9 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.master.masterUptime.asTarget() ) + g.panel.stat.standardOptions.withUnit('s') + + g.panel.stat.standardOptions.color.withMode('fixed') + + g.panel.stat.standardOptions.color.withFixedColor('light-green') + + g.panel.stat.options.withGraphMode('none') + g.panel.stat.panelOptions.withDescription('Master uptime in seconds'), cpusAvailablePanel: g.panel.stat.new('CPUS available') @@ -19,6 +22,9 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.master.cpusAvailable.asTarget() ) + g.panel.stat.standardOptions.withUnit('none') + + g.panel.stat.standardOptions.color.withMode('fixed') + + g.panel.stat.standardOptions.color.withFixedColor('light-green') + + g.panel.stat.options.withGraphMode('none') + g.panel.stat.panelOptions.withDescription('CPUs available in the cluster'), memoryAvailablePanel: g.panel.stat.new('Memory available') @@ -26,6 +32,9 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.master.memoryAvailable.asTarget() ) + g.panel.stat.standardOptions.withUnit('bytes') + + g.panel.stat.standardOptions.color.withMode('fixed') + + g.panel.stat.standardOptions.color.withFixedColor('light-green') + + g.panel.stat.options.withGraphMode('none') + g.panel.stat.panelOptions.withDescription('Memory available in the cluster'), gpusAvailablePanel: g.panel.stat.new('GPUs available') @@ -33,6 +42,9 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.master.gpusAvailable.asTarget() ) + g.panel.stat.standardOptions.withUnit('none') + + g.panel.stat.standardOptions.color.withMode('fixed') + + g.panel.stat.standardOptions.color.withFixedColor('light-green') + + g.panel.stat.options.withGraphMode('none') + g.panel.stat.panelOptions.withDescription('GPUs available in the cluster'), diskAvailablePanel: @@ -41,6 +53,9 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.master.diskAvailable.asTarget() ) + g.panel.stat.standardOptions.withUnit('bytes') + + g.panel.stat.standardOptions.color.withMode('fixed') + + g.panel.stat.standardOptions.color.withFixedColor('light-green') + + g.panel.stat.options.withGraphMode('none') + g.panel.stat.panelOptions.withDescription('Disk available in the cluster'), memoryUtilizationPanel: @@ -76,7 +91,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Messages', targets=[ - signals.master.messages.asTarget(), + signals.master.messages.asTarget() { interval: '2m' }, ] ) + g.panel.timeSeries.panelOptions.withDescription('Messages in the cluster'), @@ -103,7 +118,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Allocation runs', targets=[ - signals.master.allocationRuns.asTarget(), + signals.master.allocationRuns.asTarget() { interval: '2m' }, ] ) + g.panel.timeSeries.panelOptions.withDescription('Allocation runs in the cluster'), diff --git a/apache-mesos-mixin/rows.libsonnet b/apache-mesos-mixin/rows.libsonnet index d0cd66e92..e133ddcfb 100644 --- a/apache-mesos-mixin/rows.libsonnet +++ b/apache-mesos-mixin/rows.libsonnet @@ -9,11 +9,11 @@ local g = import './g.libsonnet'; g.panel.row.new('Master overview') + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ - panels.masterUptimePanel + g.panel.timeSeries.gridPos.withW(4), - panels.cpusAvailablePanel + g.panel.timeSeries.gridPos.withW(5), - panels.memoryAvailablePanel + g.panel.timeSeries.gridPos.withW(5), - panels.gpusAvailablePanel + g.panel.timeSeries.gridPos.withW(5), - panels.diskAvailablePanel + g.panel.timeSeries.gridPos.withW(5), + panels.masterUptimePanel + g.panel.timeSeries.gridPos.withW(4) + g.panel.timeSeries.gridPos.withH(6), + panels.cpusAvailablePanel + g.panel.timeSeries.gridPos.withW(5) + g.panel.timeSeries.gridPos.withH(6), + panels.memoryAvailablePanel + g.panel.timeSeries.gridPos.withW(5) + g.panel.timeSeries.gridPos.withH(6), + panels.gpusAvailablePanel + g.panel.timeSeries.gridPos.withW(5) + g.panel.timeSeries.gridPos.withH(6), + panels.diskAvailablePanel + g.panel.timeSeries.gridPos.withW(5) + g.panel.timeSeries.gridPos.withH(6), panels.memoryUtilizationPanel + g.panel.timeSeries.gridPos.withW(12), panels.diskUtilizationPanel + g.panel.timeSeries.gridPos.withW(12), panels.eventsInQueuePanel + g.panel.timeSeries.gridPos.withW(12), diff --git a/apache-mesos-mixin/signals/master.libsonnet b/apache-mesos-mixin/signals/master.libsonnet index 496194d43..52495624b 100644 --- a/apache-mesos-mixin/signals/master.libsonnet +++ b/apache-mesos-mixin/signals/master.libsonnet @@ -188,7 +188,7 @@ function(this) unit: 'none', sources: { prometheus: { - expr: 'max by(mesos_cluster) (rate(mesos_master_allocation_run_ms_count{%(queriesSelector)s}[$__rate_interval]))', + expr: 'max by(mesos_cluster) (increase(mesos_master_allocation_run_ms_count{%(queriesSelector)s}[$__interval] offset $__interval))', legendCustomTemplate: '{{mesos_cluster}}', }, }, From e037c20cb98dd2bc4183cdb06a149ce89023ab2f Mon Sep 17 00:00:00 2001 From: schmikei Date: Thu, 9 Oct 2025 10:52:54 -0400 Subject: [PATCH 3/4] forgot to commit dashboards_out --- .../dashboards_out/apache-mesos-logs.json | 317 ++++++ .../dashboards_out/apache-mesos-overview.json | 952 ++++++++++++++++++ apache-mesos-mixin/signals/agent.libsonnet | 6 +- apache-mesos-mixin/signals/master.libsonnet | 25 +- 4 files changed, 1278 insertions(+), 22 deletions(-) create mode 100644 apache-mesos-mixin/dashboards_out/apache-mesos-logs.json create mode 100644 apache-mesos-mixin/dashboards_out/apache-mesos-overview.json diff --git a/apache-mesos-mixin/dashboards_out/apache-mesos-logs.json b/apache-mesos-mixin/dashboards_out/apache-mesos-logs.json new file mode 100644 index 000000000..913ca51e7 --- /dev/null +++ b/apache-mesos-mixin/dashboards_out/apache-mesos-logs.json @@ -0,0 +1,317 @@ +{ + "editable": false, + "id": null, + "links": [ + { + "keepTime": true, + "title": "Apache Mesos overview", + "type": "link", + "url": "/d/apachemesos_overview" + } + ], + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "description": "Logs volume grouped by \"level\" label.", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "bars", + "fillOpacity": 50, + "stacking": { + "mode": "normal" + } + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(E|e)(rr.*|RR.*)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(W|w)(arn.*|ARN.*|rn|RN)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(T|t)(race|RACE)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "logs" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "text", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "maxDataPoints": 100, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "expr": "sum by (level) (count_over_time({job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",level=~\"$level\"}\n|~ \"$regex_search\"\n\n[$__auto]))\n", + "legendFormat": "{{ level }}" + } + ], + "title": "Logs volume", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "Value", + "renamePattern": "logs" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 18, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 2, + "options": { + "dedupStrategy": "exact", + "enableLogDetails": true, + "prettifyLogMessage": true, + "showTime": false, + "wrapLogMessage": false + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "expr": "{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",level=~\"$level\"} \n|~ \"$regex_search\"\n\n\n" + } + ], + "title": "Logs", + "type": "logs" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "apache-mesos-mixin" + ], + "templating": { + "list": [ + { + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", + "type": "datasource" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values({job=\"integrations/apache-mesos\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Mesos cluster", + "multi": true, + "name": "mesos_cluster", + "query": "label_values({job=\"integrations/apache-mesos\",job=~\"$job\"}, mesos_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Cluster", + "multi": true, + "name": "cluster", + "query": "label_values({job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Level", + "multi": true, + "name": "level", + "query": "label_values({job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\"}, level)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "label": "Regex search", + "name": "regex_search", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "type": "textbox" + }, + { + "hide": 2, + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timezone": "default", + "title": "Apache Mesos logs", + "uid": "apachemesos-logs" + } \ No newline at end of file diff --git a/apache-mesos-mixin/dashboards_out/apache-mesos-overview.json b/apache-mesos-mixin/dashboards_out/apache-mesos-overview.json new file mode 100644 index 000000000..c050820af --- /dev/null +++ b/apache-mesos-mixin/dashboards_out/apache-mesos-overview.json @@ -0,0 +1,952 @@ +{ + "editable": false, + "id": null, + "links": [ + { + "keepTime": true, + "title": "Apache Mesos logs", + "type": "link", + "url": "/d/apachemesos-logs" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Master overview", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Master uptime in seconds", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "light-green", + "mode": "fixed" + }, + "unit": "s" + } + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "graphMode": "none" + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (mesos_master_uptime_seconds{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Master uptime" + } + ], + "title": "Master uptime", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "CPUs available in the cluster", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "light-green", + "mode": "fixed" + }, + "unit": "none" + } + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 4, + "y": 1 + }, + "id": 3, + "options": { + "graphMode": "none" + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (mesos_master_cpus{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"total\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "CPUs available" + } + ], + "title": "CPUS available", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Memory available in the cluster", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "light-green", + "mode": "fixed" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 9, + "y": 1 + }, + "id": 4, + "options": { + "graphMode": "none" + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (mesos_master_mem{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"total\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Memory available" + } + ], + "title": "Memory available", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "GPUs available in the cluster", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "light-green", + "mode": "fixed" + }, + "unit": "none" + } + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 14, + "y": 1 + }, + "id": 5, + "options": { + "graphMode": "none" + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (mesos_master_gpus{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"total\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "GPUs available" + } + ], + "title": "GPUs available", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Disk available in the cluster", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "light-green", + "mode": "fixed" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 19, + "y": 1 + }, + "id": 6, + "options": { + "graphMode": "none" + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (mesos_master_disk{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"total\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Disk available" + } + ], + "title": "Disk available", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Memory utilization in the cluster", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "percent" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (mesos_master_mem{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"percent\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Memory utilization" + } + ], + "title": "Memory utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Disk utilization in the cluster", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "percent" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (mesos_master_disk{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"percent\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Disk utilization" + } + ], + "title": "Disk utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Events in queue in the cluster", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster, type) (mesos_master_event_queue_length{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}} - {{type}}", + "refId": "Events in queue" + } + ], + "title": "Events in queue", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Messages in the cluster", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster, type) (increase(mesos_master_messages{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__interval:] offset $__interval)) > 0", + "format": "time_series", + "instant": false, + "interval": "2m", + "legendFormat": "{{mesos_cluster}} - {{type}}", + "refId": "Messages" + } + ], + "title": "Messages", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Registrar state store and fetch in the cluster", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "ms" + } + }, + "gridPos": { + "h": 8, + "w": 18, + "x": 0, + "y": 25 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (mesos_registrar_state_store_ms{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", type=\"mean\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}} - store", + "refId": "Registrar state store" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (mesos_registrar_state_fetch_ms{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}} - fetch", + "refId": "Registrar state fetch" + } + ], + "title": "Registrar state", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Registrar log recovered in the cluster", + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 25 + }, + "id": 12, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (mesos_registrar_log_recovered{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Registrar log recovered" + } + ], + "title": "Registrar log recovered", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Allocation runs in the cluster", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 33 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (increase(mesos_master_allocation_run_ms_count{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__interval] offset $__interval))", + "format": "time_series", + "instant": false, + "interval": "2m", + "legendFormat": "{{mesos_cluster}}", + "refId": "Allocation runs" + } + ], + "title": "Allocation runs", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Allocation duration in the cluster", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "ms" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 33 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (mesos_master_allocation_run_ms{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Allocation duration" + } + ], + "title": "Allocation duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Allocation latency in the cluster", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "ms" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 33 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (mesos_master_allocation_run_latency_ms{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Allocation latency" + } + ], + "title": "Allocation latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Event queue dispatches in the cluster", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 33 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (mesos_master_event_queue_dispatches{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Event queue dispatches" + } + ], + "title": "Event queue dispatches", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 41 + }, + "id": 17, + "panels": [ ], + "title": "Agent overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Memory utilization in the cluster", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "percent" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (100 * mesos_slave_mem_used_bytes{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"} / clamp_min(mesos_slave_mem_bytes{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"},1))", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Memory utilization" + } + ], + "title": "Agent memory utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "The percentage of allocated disk storage in use by the agent.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "percent" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by(mesos_cluster) (100 * mesos_slave_disk_used_bytes{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"} / clamp_min(mesos_slave_disk_bytes{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"},1))", + "format": "time_series", + "instant": false, + "legendFormat": "{{mesos_cluster}}", + "refId": "Disk utilization" + } + ], + "title": "Agent disk utilization", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "apache-mesos-mixin" + ], + "templating": { + "list": [ + { + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", + "type": "datasource" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(mesos_master_uptime_seconds{job=\"integrations/apache-mesos\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Mesos cluster", + "multi": true, + "name": "mesos_cluster", + "query": "label_values(mesos_master_uptime_seconds{job=\"integrations/apache-mesos\",job=~\"$job\"}, mesos_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Cluster", + "multi": true, + "name": "cluster", + "query": "label_values(mesos_master_uptime_seconds{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Instance", + "multi": true, + "name": "instance", + "query": "label_values(mesos_master_uptime_seconds{job=\"integrations/apache-mesos\",job=~\"$job\",mesos_cluster=~\"$mesos_cluster\",cluster=~\"$cluster\"}, instance)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timezone": "default", + "title": "Apache Mesos overview", + "uid": "apachemesos_overview" + } \ No newline at end of file diff --git a/apache-mesos-mixin/signals/agent.libsonnet b/apache-mesos-mixin/signals/agent.libsonnet index 873fdabb8..3cbfe655e 100644 --- a/apache-mesos-mixin/signals/agent.libsonnet +++ b/apache-mesos-mixin/signals/agent.libsonnet @@ -1,11 +1,11 @@ function(this) + local legendCustomTemplate = '{{mesos_cluster}}'; { - filteringSelector: this.filteringSelector, groupLabels: this.groupLabels, instanceLabels: this.instanceLabels, enableLokiLogs: this.enableLokiLogs, - legendCustomTemplate: std.join(' ', std.map(function(label) '{{' + label + '}}', this.instanceLabels)) + ' - {{mesos_cluster}}', + legendCustomTemplate: legendCustomTemplate, aggLevel: 'none', aggFunction: 'avg', alertsInterval: '2m', @@ -22,7 +22,6 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (100 * mesos_slave_mem_used_bytes{%(queriesSelector)s} / clamp_min(mesos_slave_mem_bytes{%(queriesSelector)s},1))', - legendCustomTemplate: '{{mesos_cluster}}', }, }, }, @@ -36,7 +35,6 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (100 * mesos_slave_disk_used_bytes{%(queriesSelector)s} / clamp_min(mesos_slave_disk_bytes{%(queriesSelector)s},1))', - legendCustomTemplate: '{{mesos_cluster}}', }, }, }, diff --git a/apache-mesos-mixin/signals/master.libsonnet b/apache-mesos-mixin/signals/master.libsonnet index 52495624b..0c8106339 100644 --- a/apache-mesos-mixin/signals/master.libsonnet +++ b/apache-mesos-mixin/signals/master.libsonnet @@ -1,10 +1,11 @@ function(this) + local legendCustomTemplate = '{{mesos_cluster}}'; { filteringSelector: this.filteringSelector, groupLabels: this.groupLabels, instanceLabels: this.instanceLabels, enableLokiLogs: this.enableLokiLogs, - legendCustomTemplate: std.join(' ', std.map(function(label) '{{' + label + '}}', this.instanceLabels)) + ' - {{mesos_cluster}}', + legendCustomTemplate: legendCustomTemplate, aggLevel: 'none', aggFunction: 'avg', alertsInterval: '2m', @@ -22,7 +23,6 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (mesos_master_uptime_seconds{%(queriesSelector)s})', - legendCustomTemplate: '{{mesos_cluster}}', }, }, }, @@ -36,7 +36,6 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (mesos_master_cpus{%(queriesSelector)s, type="total"})', - legendCustomTemplate: '{{mesos_cluster}}', }, }, }, @@ -49,7 +48,6 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (mesos_master_mem{%(queriesSelector)s, type="total"})', - legendCustomTemplate: '{{mesos_cluster}}', }, }, }, @@ -63,7 +61,6 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (mesos_master_gpus{%(queriesSelector)s, type="total"})', - legendCustomTemplate: '{{mesos_cluster}}', }, }, }, @@ -77,21 +74,19 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (mesos_master_disk{%(queriesSelector)s, type="total"})', - legendCustomTemplate: '{{mesos_cluster}}', }, }, }, memoryUtilization: { name: 'Memory utilization', - nameShort: 'Memory %', + nameShort: 'Memory utilization', type: 'raw', description: 'Memory utilization in the cluster', unit: 'percent', sources: { prometheus: { expr: 'max by(mesos_cluster) (mesos_master_mem{%(queriesSelector)s, type="percent"})', - legendCustomTemplate: '{{mesos_cluster}}', }, }, }, @@ -105,7 +100,6 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (mesos_master_disk{%(queriesSelector)s, type="percent"})', - legendCustomTemplate: '{{mesos_cluster}}', }, }, }, @@ -119,7 +113,7 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster, type) (mesos_master_event_queue_length{%(queriesSelector)s})', - legendCustomTemplate: '{{mesos_cluster}} - {{type}}', + legendCustomTemplate: legendCustomTemplate + ' - {{type}}', }, }, }, @@ -133,7 +127,7 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster, type) (increase(mesos_master_messages{%(queriesSelector)s}[$__interval:] offset $__interval)) > 0', - legendCustomTemplate: '{{mesos_cluster}} - {{type}}', + legendCustomTemplate: legendCustomTemplate + ' - {{type}}', }, }, }, @@ -147,7 +141,7 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (mesos_registrar_state_store_ms{%(queriesSelector)s, type="mean"})', - legendCustomTemplate: '{{mesos_cluster}} - store', + legendCustomTemplate: legendCustomTemplate + ' - store', }, }, }, @@ -161,7 +155,7 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (mesos_registrar_state_fetch_ms{%(queriesSelector)s})', - legendCustomTemplate: '{{mesos_cluster}} - fetch', + legendCustomTemplate: legendCustomTemplate + ' - fetch', }, }, }, @@ -175,7 +169,6 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (mesos_registrar_log_recovered{%(queriesSelector)s})', - legendCustomTemplate: '{{mesos_cluster}}', }, }, }, @@ -189,7 +182,6 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (increase(mesos_master_allocation_run_ms_count{%(queriesSelector)s}[$__interval] offset $__interval))', - legendCustomTemplate: '{{mesos_cluster}}', }, }, }, @@ -203,7 +195,6 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (mesos_master_allocation_run_ms{%(queriesSelector)s})', - legendCustomTemplate: '{{mesos_cluster}}', }, }, }, @@ -217,7 +208,6 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (mesos_master_allocation_run_latency_ms{%(queriesSelector)s})', - legendCustomTemplate: '{{mesos_cluster}}', }, }, }, @@ -231,7 +221,6 @@ function(this) sources: { prometheus: { expr: 'max by(mesos_cluster) (mesos_master_event_queue_dispatches{%(queriesSelector)s})', - legendCustomTemplate: '{{mesos_cluster}}', }, }, }, From daadd991d25d8c0840a15b42c7c50a711b359bad Mon Sep 17 00:00:00 2001 From: schmikei Date: Thu, 9 Oct 2025 11:12:50 -0400 Subject: [PATCH 4/4] remove percent in name --- apache-mesos-mixin/signals/master.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache-mesos-mixin/signals/master.libsonnet b/apache-mesos-mixin/signals/master.libsonnet index 0c8106339..7243ee42a 100644 --- a/apache-mesos-mixin/signals/master.libsonnet +++ b/apache-mesos-mixin/signals/master.libsonnet @@ -93,7 +93,7 @@ function(this) diskUtilization: { name: 'Disk utilization', - nameShort: 'Disk %', + nameShort: 'Disk utilization', type: 'raw', description: 'Disk utilization in the cluster', unit: 'percent',