Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
{
prometheusAlerts+:: {
groups+: [
new(this): {
local config = this.config,

groups: [
{
name: 'varnish-cache',
rules: [
{
alert: 'VarnishCacheLowCacheHitRate',
expr: |||
increase(varnish_main_cache_hit[10m]) / (clamp_min((increase(varnish_main_cache_hit[10m]) + increase(varnish_main_cache_miss[10m])), 1)) * 100 < %(alertsWarningCacheHitRate)s and (increase(varnish_main_cache_hit[10m]) + increase(varnish_main_cache_miss[10m]) > 0)
||| % $._config,
increase(varnish_main_cache_hit{%(filteringSelector)s}[10m]) / (clamp_min((increase(varnish_main_cache_hit{%(filteringSelector)s}[10m]) + increase(varnish_main_cache_miss{%(filteringSelector)s}[10m])), 1)) * 100 < %(alertsWarningCacheHitRate)s and (increase(varnish_main_cache_hit{%(filteringSelector)s}[10m]) + increase(varnish_main_cache_miss{%(filteringSelector)s}[10m]) > 0)
||| % config,
'for': '10m',
labels: {
severity: 'warning',
Expand All @@ -19,14 +21,14 @@
(
'The Cache hit rate is {{ printf "%%.0f" $value }} percent over the last 5 minutes on {{$labels.instance}}, ' +
'which is below the threshold of %(alertsWarningCacheHitRate)s percent.'
) % $._config,
) % config,
},
},
{
alert: 'VarnishCacheHighMemoryUsage',
expr: |||
(varnish_sma_g_bytes{type="s0"} / (varnish_sma_g_bytes{type="s0"} + varnish_sma_g_space{type="s0"})) * 100 > %(alertsWarningHighMemoryUsage)s
||| % $._config,
(varnish_sma_g_bytes{%(filteringSelector)s,type="s0"} / (varnish_sma_g_bytes{%(filteringSelector)s,type="s0"} + varnish_sma_g_space{%(filteringSelector)s,type="s0"})) * 100 > %(alertsWarningHighMemoryUsage)s
||| % config,
'for': '5m',
labels: {
severity: 'warning',
Expand All @@ -37,14 +39,14 @@
(
'Current Memory Usage is {{ printf "%%.0f" $value }} percent on {{$labels.instance}}, ' +
'which is above the threshold of %(alertsWarningHighMemoryUsage)s percent.'
) % $._config,
) % config,
},
},
{
alert: 'VarnishCacheHighCacheEvictionRate',
expr: |||
increase(varnish_main_n_lru_nuked[5m]) > %(alertsCriticalCacheEviction)s
||| % $._config,
increase(varnish_main_n_lru_nuked{%(filteringSelector)s}[5m]) > %(alertsCriticalCacheEviction)s
||| % config,
'for': '5m',
labels: {
severity: 'critical',
Expand All @@ -55,14 +57,14 @@
(
'The Cache has evicted {{ printf "%%.0f" $value }} objects over the last 5 minutes on {{$labels.instance}}, ' +
'which is above the threshold of %(alertsCriticalCacheEviction)s.'
) % $._config,
) % config,
},
},
{
alert: 'VarnishCacheHighSaturation',
expr: |||
varnish_main_thread_queue_len > %(alertsWarningHighSaturation)s
||| % $._config,
varnish_main_thread_queue_len{%(filteringSelector)s} > %(alertsWarningHighSaturation)s
||| % config,
'for': '5m',
labels: {
severity: 'warning',
Expand All @@ -73,14 +75,14 @@
(
'The thread queue length is {{ printf "%%.0f" $value }} over the last 5 minutes on {{$labels.instance}}, ' +
'which is above the threshold of %(alertsWarningHighSaturation)s.'
) % $._config,
) % config,
},
},
{
alert: 'VarnishCacheSessionsDropping',
expr: |||
increase(varnish_main_sessions{type="dropped"}[5m]) > %(alertsCriticalSessionsDropped)s
||| % $._config,
increase(varnish_main_sessions{%(filteringSelector)s,type="dropped"}[5m]) > %(alertsCriticalSessionsDropped)s
||| % config,
'for': '5m',
labels: {
severity: 'critical',
Expand All @@ -91,14 +93,14 @@
(
'The amount of sessions dropped is {{ printf "%%.0f" $value }} over the last 5 minutes on {{$labels.instance}}, ' +
'which is above the threshold of %(alertsCriticalSessionsDropped)s.'
) % $._config,
) % config,
},
},
{
alert: 'VarnishCacheBackendUnhealthy',
expr: |||
increase(varnish_main_backend_unhealthy[5m]) > %(alertsCriticalBackendUnhealthy)s
||| % $._config,
increase(varnish_main_backend_unhealthy{%(filteringSelector)s}[5m]) > %(alertsCriticalBackendUnhealthy)s
||| % config,
'for': '5m',
labels: {
severity: 'critical',
Expand All @@ -109,7 +111,7 @@
(
'The amount of unhealthy backend statuses detected is {{ printf "%%.0f" $value }} over the last 5 minutes on {{$labels.instance}}, ' +
'which is above the threshold of %(alertsCriticalBackendUnhealthy)s.'
) % $._config,
) % config,
},
},
],
Expand Down
55 changes: 39 additions & 16 deletions varnish-mixin/config.libsonnet
Original file line number Diff line number Diff line change
@@ -1,20 +1,43 @@
{
_config+:: {
dashboardTags: ['varnish-cache-mixin'],
dashboardPeriod: 'now-1h',
dashboardTimezone: 'default',
dashboardRefresh: '1m',
local this = self,
filteringSelector: 'job="integrations/varnish-cache"',
groupLabels: ['job', 'cluster'],
instanceLabels: ['instance'],
dashboardTags: ['varnish-mixin'],
uid: 'varnish',
dashboardNamePrefix: 'Varnish',

//alert thresholds
alertsWarningCacheHitRate: 80, //%
alertsWarningHighMemoryUsage: 90, //%
alertsCriticalCacheEviction: 0,
alertsWarningHighSaturation: 0,
alertsCriticalSessionsDropped: 0,
alertsCriticalBackendUnhealthy: 0,
enableLokiLogs: true,
enableMultiCluster: false,
multiclusterSelector: 'job=~"$job"',
varnishSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"',
// additional params
dashboardPeriod: 'now-1h',
dashboardTimezone: 'default',
dashboardRefresh: '1m',

// logs lib related
enableLokiLogs: true,
logLabels: ['job', 'instance', 'cluster', 'level'],
extraLogLabels: [], // Required by logs-lib
logsVolumeGroupBy: 'level',
showLogsVolume: true,

// alert thresholds
alertsWarningCacheHitRate: 80, //%
alertsWarningHighMemoryUsage: 90, //%
alertsCriticalCacheEviction: 0,
alertsWarningHighSaturation: 0,
alertsCriticalSessionsDropped: 0,
alertsCriticalBackendUnhealthy: 0,

// metrics source for signals library
metricsSource: 'prometheus',

legendCustomTemplate: std.join(' ', std.map(function(label) '{{' + label + '}}', this.instanceLabels)),
signals+: {
cache: (import './signals/cache.libsonnet')(this),
requests: (import './signals/requests.libsonnet')(this),
sessions: (import './signals/sessions.libsonnet')(this),
memory: (import './signals/memory.libsonnet')(this),
network: (import './signals/network.libsonnet')(this),
threads: (import './signals/threads.libsonnet')(this),
backend: (import './signals/backend.libsonnet')(this),
},
}
88 changes: 88 additions & 0 deletions varnish-mixin/dashboards.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
local g = import './g.libsonnet';
local logslib = import 'logs-lib/logs/main.libsonnet';
{
local root = self,
new(this)::
local prefix = this.config.dashboardNamePrefix;
local links = this.grafana.links;
local tags = this.config.dashboardTags;
local uid = g.util.string.slugify(this.config.uid);
local vars = this.grafana.variables;
local annotations = this.grafana.annotations;
local refresh = this.config.dashboardRefresh;
local period = this.config.dashboardPeriod;
local timezone = this.config.dashboardTimezone;

{
'varnish-overview.json':
g.dashboard.new(prefix + ' overview')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.varnishStats,
this.grafana.rows.varnishOverview,
]
)
)
)
+ root.applyCommon(
vars.multiInstance,
uid + '_overview',
tags,
links { varnishOverview+:: {} },
annotations,
timezone,
refresh,
period
),

}
+
if this.config.enableLokiLogs then
{
'varnish-logs.json':
logslib.new(
prefix + ' logs',
datasourceName=this.grafana.variables.datasources.loki.name,
datasourceRegex=this.grafana.variables.datasources.loki.regex,
filterSelector=this.config.filteringSelector,
labels=this.config.groupLabels + this.config.extraLogLabels,
formatParser=null,
showLogsVolume=this.config.showLogsVolume,
)
{
dashboards+:
{
logs+:
// reference to self, already generated variables, to keep them, but apply other common data in applyCommon
root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links { logs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period),
},
panels+:
{
// modify log panel
logs+:
g.panel.logs.options.withEnableLogDetails(true)
+ g.panel.logs.options.withShowTime(false)
+ g.panel.logs.options.withWrapLogMessage(false),
},
variables+: {
// add prometheus datasource for annotations processing
toArray+: [
this.grafana.variables.datasources.prometheus { hide: 2 },
],
},
}.dashboards.logs,
}
else {},

applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period):
g.dashboard.withTags(tags)
+ g.dashboard.withUid(uid)
+ g.dashboard.withLinks(std.objectValues(links))
+ g.dashboard.withTimezone(timezone)
+ g.dashboard.withRefresh(refresh)
+ g.dashboard.time.withFrom(period)
+ g.dashboard.withVariables(vars)
+ g.dashboard.withAnnotations(std.objectValues(annotations)),
}
Loading
Loading