Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
prometheusAlerts+:: {
new(this): {
groups+: [
{
name: 'apache-mesos',
Expand All @@ -8,7 +8,7 @@
alert: 'ApacheMesosHighMemoryUsage',
expr: |||
min without(instance, job, type) (mesos_master_mem{type="percent"}) > %(alertsWarningMemoryUsage)s
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'warning',
Expand All @@ -19,14 +19,14 @@
(
'{{ printf "%%.0f" $value }} percent memory usage on {{$labels.mesos_cluster}}, ' +
'which is above the threshold of %(alertsWarningMemoryUsage)s.'
) % $._config,
) % this.config,
},
},
{
alert: 'ApacheMesosHighDiskUsage',
expr: |||
min without(instance, job, type) (mesos_master_disk{type="percent"}) > %(alertsCriticalDiskUsage)s
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'critical',
Expand All @@ -37,14 +37,14 @@
(
'{{ printf "%%.0f" $value }} percent disk usage on {{$labels.mesos_cluster}}, ' +
'which is above the threshold of %(alertsCriticalDiskUsage)s.'
) % $._config,
) % this.config,
},
},
{
alert: 'ApacheMesosUnreachableTasks',
expr: |||
max without(instance, job, state) (mesos_master_task_states_current{state="unreachable"}) > %(alertsWarningUnreachableTask)s
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'warning',
Expand All @@ -55,14 +55,14 @@
(
'{{ printf "%%.0f" $value }} unreachable tasks on {{$labels.mesos_cluster}}, ' +
'which is above the threshold of %(alertsWarningUnreachableTask)s.'
) % $._config,
) % this.config,
},
},
{
alert: 'ApacheMesosNoLeaderElected',
expr: |||
max without(instance, job) (mesos_master_elected) == 0
||| % $._config,
||| % this.config,
'for': '1m',
labels: {
severity: 'critical',
Expand All @@ -72,14 +72,14 @@
description:
(
'There is no cluster coordinator on {{$labels.mesos_cluster}}.'
) % $._config,
) % this.config,
},
},
{
alert: 'ApacheMesosInactiveAgents',
expr: |||
max without(instance, job, state) (mesos_master_slaves_state{state=~"connected_inactive|disconnected_inactive"}) > 1
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'warning',
Expand All @@ -89,7 +89,7 @@
description:
(
'{{ printf "%%.0f" $value }} inactive agent clients over the last 5m which is above the threshold of 1.'
) % $._config,
) % this.config,
},
},
],
Expand Down
45 changes: 32 additions & 13 deletions apache-mesos-mixin/config.libsonnet
Original file line number Diff line number Diff line change
@@ -1,17 +1,36 @@
{
_config+:: {
dashboardTags: ['apache-mesos-mixin'],
dashboardPeriod: 'now-1h',
dashboardTimezone: 'default',
dashboardRefresh: '1m',

// alerts thresholds
alertsWarningMemoryUsage: 90,
alertsCriticalDiskUsage: 90,
alertsWarningUnreachableTask: 3,
enableLokiLogs: true,
enableMultiCluster: false,
mesosSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"',
multiclusterSelector: 'job=~"$job"',
local this = self,
filteringSelector: 'job="integrations/apache-mesos"',
groupLabels: ['job', 'mesos_cluster', 'cluster'],
instanceLabels: ['instance'],

dashboardTags: [self.uid + '-mixin'],
uid: 'apache-mesos',
dashboardNamePrefix: 'Apache Mesos',
dashboardPeriod: 'now-1h',
dashboardTimezone: 'default',
dashboardRefresh: '1m',

// Logging configuration
enableLokiLogs: true,
logLabels: ['job', 'cluster', 'instance'],
extraLogLabels: ['level'], // Required by logs-lib
logsVolumeGroupBy: 'level',
showLogsVolume: true,

// alerts thresholds
alertsWarningMemoryUsage: 90,
alertsCriticalDiskUsage: 90,
alertsWarningUnreachableTask: 3,

// metrics source for signals library
metricsSource: 'prometheus',

// signals configuration
signals+: {
overview: (import './signals/overview.libsonnet')(this),
master: (import './signals/master.libsonnet')(this),
agent: (import './signals/agent.libsonnet')(this),
},
}
77 changes: 77 additions & 0 deletions apache-mesos-mixin/dashboards.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
local g = import './g.libsonnet';
local logslib = import 'logs-lib/logs/main.libsonnet';

{
local root = self,
new(this)::
local links = this.grafana.links;
local tags = this.config.dashboardTags;
local uid = g.util.string.slugify(this.config.uid);
local vars = this.grafana.variables;
local annotations = this.grafana.annotations;
local prefix = this.config.dashboardNamePrefix;
local refresh = this.config.dashboardRefresh;
local period = this.config.dashboardPeriod;
local timezone = this.config.dashboardTimezone;


{

'apache-mesos-overview.json':
g.dashboard.new(this.config.dashboardNamePrefix + ' overview')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels([
this.grafana.rows.masterOverview,
this.grafana.rows.agentOverview,
])
)
) + root.applyCommon(
vars.multiInstance,
uid + '_overview',
tags,
links { apacheMesosOverview+:: {} },
annotations,
timezone,
refresh,
period,
),
}
+ if this.config.enableLokiLogs then {
'apache-mesos-logs.json':
logslib.new(
prefix + ' logs',
datasourceName=this.grafana.variables.datasources.loki.name,
datasourceRegex=this.grafana.variables.datasources.loki.regex,
filterSelector=this.config.filteringSelector,
labels=this.config.groupLabels + this.config.extraLogLabels,
formatParser=null,
showLogsVolume=this.config.showLogsVolume,
) {
dashboards+: {
logs+:
root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links { logs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period),
},
panels+: {
logs+:
g.panel.logs.options.withEnableLogDetails(true)
+ g.panel.logs.options.withShowTime(false)
+ g.panel.logs.options.withWrapLogMessage(false),
},
variables+: {
toArray+: [
this.grafana.variables.datasources.prometheus { hide: 2 },
],
},
}.dashboards.logs,
} else {},

applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period):
g.dashboard.withTags(tags)
+ g.dashboard.withUid(uid)
+ g.dashboard.withLinks(std.objectValues(links))
+ g.dashboard.withTimezone(timezone)
+ g.dashboard.withRefresh(refresh)
+ g.dashboard.time.withFrom(period)
+ g.dashboard.withVariables(vars),
}
Loading
Loading