@@ -14,54 +14,178 @@ limitations under the License.
1414package app
1515
1616import (
17+ "context"
18+ "fmt"
19+ "log/slog"
1720 "net"
1821 "net/http"
22+ "os"
23+ "time"
1924
2025 "github.com/coredns/coredns/plugin"
21- clog "github.com/coredns/coredns/plugin/pkg/log"
26+ "github.com/coredns/coredns/plugin/pkg/reuseport"
27+
28+ "github.com/coredns/coredns/plugin/pkg/uniq"
2229 "github.com/prometheus/client_golang/prometheus"
2330 "github.com/prometheus/client_golang/prometheus/promhttp"
31+ "github.com/prometheus/exporter-toolkit/web"
32+ )
33+
34+ var (
35+ u = uniq .New ()
36+
37+ // ListenAddr is assigned the address of the prometheus listener. Its use is mainly in tests where
38+ // we listen on "localhost:0" and need to retrieve the actual address.
39+ ListenAddr string
2440)
2541
42+ // shutdownTimeout is the maximum amount of time the metrics plugin will wait
43+ // before erroring when it tries to close the metrics server
44+ const shutdownTimeout time.Duration = time .Second * 5
45+
2646var setupErrCount = prometheus .NewCounterVec (prometheus.CounterOpts {
2747 Namespace : plugin .Namespace ,
2848 Subsystem : "nodecache" ,
2949 Name : "setup_errors_total" ,
3050 Help : "The number of errors during periodic network setup for node-cache" ,
3151}, []string {"errortype" })
3252
33- func initMetrics (ipport string ) {
34- if err := serveMetrics (ipport ); err != nil {
35- clog .Errorf ("Failed to start metrics handler: %s" , err )
36- return
37- }
38- registerMetrics ()
39- }
53+ // Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics .
54+ type Metrics struct {
55+ Next plugin.Handler
56+ Addr string
57+ Reg * prometheus.Registry
4058
41- func registerMetrics () {
42- prometheus . MustRegister ( setupErrCount )
43- setupErrCount . WithLabelValues ( "iptables" ). Add ( 0 )
44- setupErrCount . WithLabelValues ( "iptables_lock" ). Add ( 0 )
45- setupErrCount . WithLabelValues ( "interface_add" ). Add ( 0 )
46- setupErrCount . WithLabelValues ( "interface_check" ). Add ( 0 )
47- setupErrCount . WithLabelValues ( "configmap" ). Add ( 0 )
59+ ln net. Listener
60+ lnSetup bool
61+
62+ mux * http. ServeMux
63+ srv * http. Server
64+
65+ tlsConfigPath string
4866}
4967
50- func publishErrorMetric (label string ) {
51- setupErrCount .WithLabelValues (label ).Inc ()
68+ // New returns a new instance of Metrics with the given address.
69+ func New (addr string ) * Metrics {
70+ met := & Metrics {
71+ Addr : addr ,
72+ Reg : prometheus .DefaultRegisterer .(* prometheus.Registry ),
73+ tlsConfigPath : "" ,
74+ }
75+
76+ return met
5277}
5378
54- func serveMetrics (ipport string ) error {
55- ln , err := net .Listen ("tcp" , ipport )
79+ // OnStartup sets up the metrics on startup.
80+ func (m * Metrics ) OnStartup () error {
81+ ln , err := reuseport .Listen ("tcp" , m .Addr )
5682 if err != nil {
57- return err
83+ return fmt . Errorf ( "Failed to start metrics handler: %s" , err )
5884 }
5985
60- mux := http .NewServeMux ()
61- mux .Handle ("/metrics" , promhttp .Handler ())
62- srv := & http.Server {Handler : mux }
86+ m .ln = ln
87+ m .lnSetup = true
88+
89+ m .mux = http .NewServeMux ()
90+ m .mux .Handle ("/metrics" , promhttp .HandlerFor (m .Reg , promhttp.HandlerOpts {}))
91+
92+ server := & http.Server {
93+ Addr : m .Addr ,
94+ Handler : m .mux ,
95+ ReadTimeout : 5 * time .Second ,
96+ WriteTimeout : 5 * time .Second ,
97+ IdleTimeout : 5 * time .Second ,
98+ }
99+ m .srv = server
100+
101+ // No TLS config file given, start without TLS
102+ if m .tlsConfigPath == "" {
103+ go func () {
104+ if err := server .Serve (ln ); err != nil && err != http .ErrServerClosed {
105+ slog .Error ("Failed to start HTTP metrics server" , "error" , err )
106+ }
107+ }()
108+ ListenAddr = ln .Addr ().String () // For tests.
109+ return nil
110+ }
111+
112+ // Check TLS config file existence
113+ if _ , err := os .Stat (m .tlsConfigPath ); os .IsNotExist (err ) {
114+ return fmt .Errorf ("TLS config file does not exist: %s" , m .tlsConfigPath )
115+ }
116+
117+ // Create web config for ListenAndServe
118+ webConfig := & web.FlagConfig {
119+ WebListenAddresses : & []string {m .Addr },
120+ WebSystemdSocket : new (bool ), // false by default
121+ WebConfigFile : & m .tlsConfigPath ,
122+ }
123+
124+ logger := slog .New (slog .NewTextHandler (os .Stderr , nil ))
125+ // Create channels for synchronization
126+ startResult := make (chan error , 1 )
127+
63128 go func () {
64- srv .Serve (ln )
129+ // Try to start the server and immediately report result
130+ err := web .Serve (m .ln , server , webConfig , logger )
131+ if err != nil && err != http .ErrServerClosed {
132+ slog .Error ("Failed to start HTTPS metrics server" , "error" , err )
133+ startResult <- err
134+ }
135+ // If we get here without error, server is running
65136 }()
137+
138+ // Wait for startup errors
139+ select {
140+ case err := <- startResult :
141+ return err
142+ case <- time .After (200 * time .Millisecond ):
143+ // No immediate error, server likely started succesfully
144+ // web.Serve() validates TLS config at startup
145+ }
146+
147+ registerMetrics ()
148+ ListenAddr = ln .Addr ().String () // For tests.
149+ return nil
150+ }
151+
152+ // OnRestart stops the listener on reload.
153+ func (m * Metrics ) OnRestart () error {
154+ if ! m .lnSetup {
155+ return nil
156+ }
157+ u .Unset (m .Addr )
158+ return m .stopServer ()
159+ }
160+
161+ func (m * Metrics ) stopServer () error {
162+ if ! m .lnSetup {
163+ return nil
164+ }
165+ ctx , cancel := context .WithTimeout (context .Background (), shutdownTimeout )
166+ defer cancel ()
167+ if err := m .srv .Shutdown (ctx ); err != nil {
168+ slog .Error ("Failed to stop prometheus http server" , "error" , err )
169+ return err
170+ }
171+ m .lnSetup = false
172+ m .ln .Close ()
173+ prometheus .Unregister (setupErrCount )
66174 return nil
67175}
176+
177+ // OnFinalShutdown tears down the metrics listener on shutdown and restart.
178+ func (m * Metrics ) OnFinalShutdown () error { return m .stopServer () }
179+
180+ func publishErrorMetric (label string ) {
181+ setupErrCount .WithLabelValues (label ).Inc ()
182+ }
183+
184+ func registerMetrics () {
185+ prometheus .MustRegister (setupErrCount )
186+ setupErrCount .WithLabelValues ("iptables" ).Add (0 )
187+ setupErrCount .WithLabelValues ("iptables_lock" ).Add (0 )
188+ setupErrCount .WithLabelValues ("interface_add" ).Add (0 )
189+ setupErrCount .WithLabelValues ("interface_check" ).Add (0 )
190+ setupErrCount .WithLabelValues ("configmap" ).Add (0 )
191+ }
0 commit comments