Skip to content

Commit 0ae142a

Browse files
feat: drop scrape_errors_total metric with a configurable interval (burningalchemist#551)
* feat: add scrape_error_drop_interval param to clear error metrics * refactor: extract logic from main * style: wording
1 parent d8e4c6f commit 0ae142a

File tree

5 files changed

+64
-30
lines changed

5 files changed

+64
-30
lines changed

cmd/sql_exporter/main.go

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/prometheus/client_golang/prometheus"
1717
info "github.com/prometheus/client_golang/prometheus/collectors/version"
1818
"github.com/prometheus/client_golang/prometheus/promhttp"
19+
"github.com/prometheus/common/model"
1920
"github.com/prometheus/common/promlog"
2021
"github.com/prometheus/common/version"
2122
"github.com/prometheus/exporter-toolkit/web"
@@ -87,12 +88,17 @@ func main() {
8788
}
8889

8990
klog.Warningf("Starting SQL exporter %s %s", version.Info(), version.BuildContext())
90-
9191
exporter, err := sql_exporter.NewExporter(*configFile)
9292
if err != nil {
9393
klog.Fatalf("Error creating exporter: %s", err)
9494
}
9595

96+
// Start the scrape_errors_total metric drop ticker if configured.
97+
startScrapeErrorsDropTicker(exporter, exporter.Config().Globals.ScrapeErrorDropInterval)
98+
99+
// Start signal handler to reload collector and target data.
100+
signalHandler(exporter, *configFile)
101+
96102
// Setup and start webserver.
97103
http.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) { http.Error(w, "OK", http.StatusOK) })
98104
http.HandleFunc("/", HomeHandlerFunc(*metricsPath))
@@ -102,24 +108,9 @@ func main() {
102108
http.Handle("/sql_exporter_metrics", promhttp.HandlerFor(prometheus.DefaultGatherer, promhttp.HandlerOpts{}))
103109
// Expose refresh handler to reload collectors and targets
104110
if *enableReload {
105-
http.HandleFunc("/reload", reloadHandler(exporter))
111+
http.HandleFunc("/reload", reloadHandler(exporter, *configFile))
106112
}
107113

108-
// Handle SIGHUP for reloading the configuration
109-
go func() {
110-
c := make(chan os.Signal, 1)
111-
signal.Notify(c, syscall.SIGHUP)
112-
for {
113-
<-c
114-
err := sql_exporter.Reload(exporter, configFile)
115-
if err != nil {
116-
klog.Error(err)
117-
continue
118-
}
119-
}
120-
}()
121-
122-
// Start the web server
123114
server := &http.Server{Addr: *listenAddress, ReadHeaderTimeout: httpReadHeaderTimeout}
124115
if err := web.ListenAndServe(server, &web.FlagConfig{
125116
WebListenAddresses: &([]string{*listenAddress}),
@@ -129,14 +120,43 @@ func main() {
129120
}
130121
}
131122

132-
// reloadHandler returns a handler that reloads collectors and targets
133-
func reloadHandler(e sql_exporter.Exporter) func(http.ResponseWriter, *http.Request) {
123+
// reloadHandler returns a handler that reloads collector and target data.
124+
func reloadHandler(e sql_exporter.Exporter, configFile string) func(http.ResponseWriter, *http.Request) {
134125
return func(w http.ResponseWriter, r *http.Request) {
135-
err := sql_exporter.Reload(e, configFile)
136-
if err != nil {
126+
if err := sql_exporter.Reload(e, &configFile); err != nil {
127+
klog.Error(err)
137128
http.Error(w, err.Error(), http.StatusInternalServerError)
138129
return
139130
}
140131
w.WriteHeader(http.StatusOK)
141132
}
142133
}
134+
135+
// signalHandler listens for SIGHUP signals and reloads the collector and target data.
136+
func signalHandler(e sql_exporter.Exporter, configFile string) {
137+
c := make(chan os.Signal, 1)
138+
signal.Notify(c, syscall.SIGHUP)
139+
go func() {
140+
for range c {
141+
if err := sql_exporter.Reload(e, &configFile); err != nil {
142+
klog.Error(err)
143+
}
144+
}
145+
}()
146+
}
147+
148+
// startScrapeErrorsDropTicker starts a ticker that periodically drops scrape error metrics.
149+
func startScrapeErrorsDropTicker(exporter sql_exporter.Exporter, interval model.Duration) {
150+
if interval <= 0 {
151+
return
152+
}
153+
154+
ticker := time.NewTicker(time.Duration(interval))
155+
klog.Warning("Started scrape_errors_total metrics drop ticker: ", interval)
156+
go func() {
157+
defer ticker.Stop()
158+
for range ticker.C {
159+
exporter.DropErrorMetrics()
160+
}
161+
}()
162+
}

config/global_config.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@ import (
99

1010
// GlobalConfig contains globally applicable defaults.
1111
type GlobalConfig struct {
12-
MinInterval model.Duration `yaml:"min_interval" env:"MIN_INTERVAL"` // minimum interval between query executions, default is 0
13-
ScrapeTimeout model.Duration `yaml:"scrape_timeout" env:"SCRAPE_TIMEOUT"` // per-scrape timeout, global
14-
TimeoutOffset model.Duration `yaml:"scrape_timeout_offset" env:"SCRAPE_TIMEOUT_OFFSET"` // offset to subtract from timeout in seconds
15-
MaxConnLifetime time.Duration `yaml:"max_connection_lifetime" env:"MAX_CONNECTION_LIFETIME"` // maximum amount of time a connection may be reused to any one target
16-
MaxConns int `yaml:"max_connections" env:"MAX_CONNECTIONS"` // maximum number of open connections to any one target
17-
MaxIdleConns int `yaml:"max_idle_connections" env:"MAX_IDLE_CONNECTIONS"` // maximum number of idle connections to any one target
12+
MinInterval model.Duration `yaml:"min_interval" env:"MIN_INTERVAL"` // minimum interval between query executions, default is 0
13+
ScrapeTimeout model.Duration `yaml:"scrape_timeout" env:"SCRAPE_TIMEOUT"` // per-scrape timeout, global
14+
TimeoutOffset model.Duration `yaml:"scrape_timeout_offset" env:"SCRAPE_TIMEOUT_OFFSET"` // offset to subtract from timeout in seconds
15+
ScrapeErrorDropInterval model.Duration `yaml:"scrape_error_drop_interval" env:"SCRAPE_ERROR_DROP_INTERVAL"` // interval to drop scrape errors from the error counter, default is 0
16+
MaxConnLifetime time.Duration `yaml:"max_connection_lifetime" env:"MAX_CONNECTION_LIFETIME"` // maximum amount of time a connection may be reused to any one target
17+
18+
MaxConns int `yaml:"max_connections" env:"MAX_CONNECTIONS"` // maximum number of open connections to any one target
19+
MaxIdleConns int `yaml:"max_idle_connections" env:"MAX_IDLE_CONNECTIONS"` // maximum number of idle connections to any one target
1820

1921
// Catches all undefined fields and must be empty after parsing.
2022
XXX map[string]any `yaml:",inline" json:"-"`
@@ -26,6 +28,8 @@ func (g *GlobalConfig) UnmarshalYAML(unmarshal func(any) error) error {
2628
g.MinInterval = model.Duration(0)
2729
// Default to 10 seconds, since Prometheus has a 10 second scrape timeout default.
2830
g.ScrapeTimeout = model.Duration(10 * time.Second)
31+
// Default to 0 for scrape error drop interval.
32+
g.ScrapeErrorDropInterval = model.Duration(0)
2933
// Default to .5 seconds.
3034
g.TimeoutOffset = model.Duration(500 * time.Millisecond)
3135
g.MaxConns = 3

documentation/sql_exporter.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ global:
1313
#
1414
# Must be strictly positive. The default is 500ms.
1515
scrape_timeout_offset: 500ms
16+
# Interval between dropping scrape_errors_total metric: by default (0s) metrics are persistent.
17+
scrape_error_drop_interval: 0s
1618
# Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
1719
min_interval: 0s
1820
# Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,

exporter.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ type Exporter interface {
3333
UpdateTarget([]Target)
3434
// SetJobFilters sets the jobFilters field
3535
SetJobFilters([]string)
36+
// DropErrorMetrics resets the scrape_errors_total metric
37+
DropErrorMetrics()
3638
}
3739

3840
type exporter struct {
@@ -210,6 +212,12 @@ func (e *exporter) SetJobFilters(filters []string) {
210212
e.jobFilters = filters
211213
}
212214

215+
// DropErrorMetrics implements Exporter.
216+
func (e *exporter) DropErrorMetrics() {
217+
scrapeErrorsMetric.Reset()
218+
klog.Info("Dropped scrape_errors_total metric")
219+
}
220+
213221
// registerScrapeErrorMetric registers the metrics for the exporter itself.
214222
func registerScrapeErrorMetric() *prometheus.CounterVec {
215223
scrapeErrors := prometheus.NewCounterVec(prometheus.CounterOpts{

reload.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func Reload(e Exporter, configFile *string) error {
4747
}
4848

4949
func reloadTarget(e Exporter, nc, cc *cfg.Config) error {
50-
klog.Warning("Recreating targets collectors...")
50+
klog.Warning("Recreating target...")
5151

5252
// We want to preserve DSN from the previous config revision to avoid any connection changes
5353
nc.Target.DSN = cc.Target.DSN
@@ -63,7 +63,7 @@ func reloadTarget(e Exporter, nc, cc *cfg.Config) error {
6363

6464
// Populate the target list
6565
e.UpdateTarget([]Target{target})
66-
klog.Warning("Collectors have been successfully reloaded for target")
66+
klog.Warning("Collectors have been successfully updated for the target")
6767
return nil
6868
}
6969

@@ -98,6 +98,6 @@ func reloadJobs(e Exporter, nc, cc *cfg.Config) error {
9898
}
9999

100100
e.UpdateTarget(targets)
101-
klog.Warning("Query collectors have been successfully reloaded for jobs")
101+
klog.Warning("Collectors have been successfully updated for the jobs")
102102
return nil
103103
}

0 commit comments

Comments
 (0)