From ba47a52ea0a72de155e2205ab1d3ed0923c85047 Mon Sep 17 00:00:00 2001 From: Joel Verezhak Date: Wed, 5 Feb 2025 16:16:37 +0100 Subject: [PATCH 1/5] Only set ready at the end of loading TSDB Drop all notReady functions --- cmd/thanos/receive.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index fffcf6bbdeb..5a50f2e4349 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -322,7 +322,7 @@ func runReceive( statusProber.Healthy() return srv.ListenAndServe() }, func(err error) { - statusProber.NotReady(err) + // statusProber.NotReady(err) defer statusProber.NotHealthy(err) srv.Shutdown(err) @@ -394,7 +394,7 @@ func runReceive( return srv.ListenAndServe() }, func(err error) { - statusProber.NotReady(err) + // statusProber.NotReady(err) defer statusProber.NotHealthy(err) srv.Shutdown(err) @@ -488,6 +488,8 @@ func runReceive( }) } + statusProber.Ready() + level.Info(logger).Log("msg", "starting receiver") return nil } @@ -591,7 +593,7 @@ func setupHashring(g *run.Group, hashringChangedChan <- struct{}{} } else { // If not, just signal we are ready (this is important during first hashring load) - statusProber.Ready() + // statusProber.Ready() } case <-cancel: return nil @@ -674,7 +676,7 @@ func startTSDBAndUpload(g *run.Group, flushHead := !initialized || hashringAlgorithm != receive.AlgorithmKetama if flushHead { msg := "hashring has changed; server is not ready to receive requests" - statusProber.NotReady(errors.New(msg)) + // statusProber.NotReady(errors.New(msg)) level.Info(logger).Log("msg", msg) level.Info(logger).Log("msg", "updating storage") @@ -690,7 +692,7 @@ func startTSDBAndUpload(g *run.Group, <-uploadDone } dbUpdatesCompleted.Inc() - statusProber.Ready() + // statusProber.Ready() level.Info(logger).Log("msg", "storage started, and server is ready to receive requests") dbUpdatesCompleted.Inc() } From cc5b46137edafd28e34633e57a4db5b6c1bdea81 Mon Sep 17 00:00:00 2001 From: Joel Verezhak Date: Wed, 5 Feb 2025 17:05:17 +0100 Subject: [PATCH 2/5] revert change --- cmd/thanos/receive.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index 5a50f2e4349..07724fa2bb2 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -322,7 +322,7 @@ func runReceive( statusProber.Healthy() return srv.ListenAndServe() }, func(err error) { - // statusProber.NotReady(err) + statusProber.NotReady(err) defer statusProber.NotHealthy(err) srv.Shutdown(err) @@ -394,7 +394,7 @@ func runReceive( return srv.ListenAndServe() }, func(err error) { - // statusProber.NotReady(err) + statusProber.NotReady(err) defer statusProber.NotHealthy(err) srv.Shutdown(err) @@ -676,7 +676,7 @@ func startTSDBAndUpload(g *run.Group, flushHead := !initialized || hashringAlgorithm != receive.AlgorithmKetama if flushHead { msg := "hashring has changed; server is not ready to receive requests" - // statusProber.NotReady(errors.New(msg)) + statusProber.NotReady(errors.New(msg)) level.Info(logger).Log("msg", msg) level.Info(logger).Log("msg", "updating storage") From 0de13a7ac5c04665402cb9e5e1bd145ab148d1d5 Mon Sep 17 00:00:00 2001 From: Joel Verezhak Date: Wed, 5 Feb 2025 17:26:39 +0100 Subject: [PATCH 3/5] revert change --- cmd/thanos/receive.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index 07724fa2bb2..fffcf6bbdeb 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -488,8 +488,6 @@ func runReceive( }) } - statusProber.Ready() - level.Info(logger).Log("msg", "starting receiver") return nil } @@ -593,7 +591,7 @@ func setupHashring(g *run.Group, hashringChangedChan <- struct{}{} } else { // If not, just signal we are ready (this is important during first hashring load) - // statusProber.Ready() + statusProber.Ready() } case <-cancel: return nil @@ -692,7 +690,7 @@ func startTSDBAndUpload(g *run.Group, <-uploadDone } dbUpdatesCompleted.Inc() - // statusProber.Ready() + statusProber.Ready() level.Info(logger).Log("msg", "storage started, and server is ready to receive requests") dbUpdatesCompleted.Inc() } From 7b8f72026871a98489b2d71b6e7a01df7952b2b7 Mon Sep 17 00:00:00 2001 From: Fabian Kohn Date: Tue, 11 Feb 2025 09:36:20 +0100 Subject: [PATCH 4/5] add debugging statements and stack trace for HTTP prober and TSDB logic --- pkg/prober/http.go | 2 ++ pkg/receive/multitsdb.go | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/pkg/prober/http.go b/pkg/prober/http.go index 18f9c98af17..d9c771e181a 100644 --- a/pkg/prober/http.go +++ b/pkg/prober/http.go @@ -6,6 +6,7 @@ package prober import ( "io" "net/http" + "runtime/debug" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -61,6 +62,7 @@ func (p *HTTPProbe) isHealthy() bool { // Ready sets components status to ready. func (p *HTTPProbe) Ready() { + debug.PrintStack() p.ready.Swap(1) } diff --git a/pkg/receive/multitsdb.go b/pkg/receive/multitsdb.go index 9c9954d1bd8..9a63e1ddfda 100644 --- a/pkg/receive/multitsdb.go +++ b/pkg/receive/multitsdb.go @@ -341,11 +341,16 @@ func (t *MultiTSDB) Open() error { } g.Go(func() error { + fmt.Println("Loading Tenant", f.Name()) + defer fmt.Println("Done Loading Tenant", f.Name()) + _, err := t.getOrLoadTenant(f.Name(), true) return err }) } + fmt.Println("Waiting for all tenants") + defer fmt.Println("Done waiting for all tenants") return g.Wait() } From 6be479ed75d981580bdb2780b06f51a0f3da5cc6 Mon Sep 17 00:00:00 2001 From: Fabian Kohn Date: Tue, 11 Feb 2025 10:40:31 +0100 Subject: [PATCH 5/5] add prometheus mempostings fix backport --- go.mod | 1 + go.sum | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 9a725cb167e..4cfea91f719 100644 --- a/go.mod +++ b/go.mod @@ -286,6 +286,7 @@ replace ( // Using a 3rd-party branch for custom dialer - see https://github.com/bradfitz/gomemcache/pull/86. // Required by Cortex https://github.com/cortexproject/cortex/pull/3051. github.com/bradfitz/gomemcache => github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab + github.com/prometheus/prometheus => github.com/verejoel/prometheus v0.0.0-20241127143159-d13c2a23550f // Pin kuberesolver/v5 to support new grpc version. Need to upgrade kuberesolver version on weaveworks/common. github.com/sercand/kuberesolver/v4 => github.com/sercand/kuberesolver/v5 v5.1.1 diff --git a/go.sum b/go.sum index 9933abda403..37245a19f9c 100644 --- a/go.sum +++ b/go.sum @@ -2179,8 +2179,6 @@ github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0ua github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= -github.com/prometheus/prometheus v0.55.1-0.20241102120812-a6fd22b9d2c8 h1:hCxAh6+hxwy7dqUPE5ndnilMeCWrqQkJVjPDXtiYRVo= -github.com/prometheus/prometheus v0.55.1-0.20241102120812-a6fd22b9d2c8/go.mod h1:GGS7QlWKCqCbcEzWsVahYIfQwiGhcExkarHyLJTsv6I= github.com/redis/rueidis v1.0.45-alpha.1 h1:69Bu1l7gVC/qDYuGGwMwGg2rjOjSyxESol/Zila62gY= github.com/redis/rueidis v1.0.45-alpha.1/go.mod h1:q7BfhDaPt7xxwy2nv2RqQO12/mmHflDjebpcNwWFjms= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= @@ -2275,6 +2273,8 @@ github.com/uber/jaeger-client-go v2.30.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMW github.com/uber/jaeger-lib v2.2.0+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U= github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVKhn2Um6rjCsSsg= github.com/uber/jaeger-lib v2.4.1+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U= +github.com/verejoel/prometheus v0.0.0-20241127143159-d13c2a23550f h1:oGlsRXv7u3pUNjGDj/1sADlo/liExneaVp/0sSe6ybQ= +github.com/verejoel/prometheus v0.0.0-20241127143159-d13c2a23550f/go.mod h1:GGS7QlWKCqCbcEzWsVahYIfQwiGhcExkarHyLJTsv6I= github.com/vultr/govultr/v2 v2.17.2 h1:gej/rwr91Puc/tgh+j33p/BLR16UrIPnSr+AIwYWZQs= github.com/vultr/govultr/v2 v2.17.2/go.mod h1:ZFOKGWmgjytfyjeyAdhQlSWwTjh2ig+X49cAp50dzXI= github.com/weaveworks/common v0.0.0-20230728070032-dd9e68f319d5 h1:nORobjToZAvi54wcuUXLq+XG2Rsr0XEizy5aHBHvqWQ=