From 3ca158b90ce0902b7f8f9b9656f85e1b479ff95e Mon Sep 17 00:00:00 2001 From: Justin Azoff Date: Thu, 12 Mar 2026 15:49:06 -0400 Subject: [PATCH] improve logging around unhealthy clocks We have seen this error clock synchronization error: this node is more than 500ms away from at least half of the known nodes but when this happens it's not clear what the real issue is. Are the clocks 501ms away? or 5000ms? This logs an additional error any time a remote node is unhealthy E260312 20:20:10.978114 15 2@rpc/clock_offset.go:256 [-] 3 node 3 is not healthy: clock offset is off=91ns, err=31ns, at=1970-01-01 00:00:00 +0000 UTC --- pkg/rpc/clock_offset.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/rpc/clock_offset.go b/pkg/rpc/clock_offset.go index 0d21f7265c1..008b632131c 100644 --- a/pkg/rpc/clock_offset.go +++ b/pkg/rpc/clock_offset.go @@ -252,6 +252,8 @@ func (r *RemoteClockMonitor) VerifyClockOffset(ctx context.Context) error { offsets = append(offsets, float64(offset.Offset-offset.Uncertainty)) if offset.isHealthy(ctx, maxOffset) { healthyOffsetCount++ + } else { + log.Health.Errorf(ctx, "node %s is not healthy: clock offset is %s", addr, offset) } } numClocks := len(r.mu.offsets)