From 7dd5cca8dd7fb7827b0eb2c8f2bf980f6356ff3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Tue, 5 May 2026 21:41:26 +0300 Subject: [PATCH 01/17] feat(gateway): add circuit breaker and retry fields to domain New fields on GatewayRoute and CreateRouteParams: - circuit_breaker_threshold: consecutive failures before opening (0=disabled) - circuit_breaker_timeout: ms in open state before half-open probe - max_retries: retry attempts after first failure (0=disabled) - retry_timeout: total retry window in ms --- internal/core/domain/gateway.go | 4 ++++ internal/core/ports/gateway.go | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/internal/core/domain/gateway.go b/internal/core/domain/gateway.go index 5a2a98e96..3087d6524 100644 --- a/internal/core/domain/gateway.go +++ b/internal/core/domain/gateway.go @@ -32,6 +32,10 @@ type GatewayRoute struct { BlockedCIDRs []string `json:"blocked_cidrs,omitempty"` // IPs blocked from access BlockedIPNets []*net.IPNet `json:"-"` // pre-parsed at creation/refresh for fast lookup MaxBodySize int64 `json:"max_body_size,omitempty"` // Max request body size in bytes + CircuitBreakerThreshold int `json:"circuit_breaker_threshold,omitempty"` // consecutive failures to trip open (0=disabled) + CircuitBreakerTimeout int64 `json:"circuit_breaker_timeout,omitempty"` // ms in open before half-open + MaxRetries int `json:"max_retries,omitempty"` // max retry attempts (0=disabled) + RetryTimeout int64 `json:"retry_timeout,omitempty"` // total retry window in ms Priority int `json:"priority"` // Manual priority for tie-breaking CreatedAt time.Time `json:"created_at"` UpdatedAt time.Time `json:"updated_at"` diff --git a/internal/core/ports/gateway.go b/internal/core/ports/gateway.go index 43e1643dd..84357c7a7 100644 --- a/internal/core/ports/gateway.go +++ b/internal/core/ports/gateway.go @@ -41,8 +41,12 @@ type CreateRouteParams struct { RequireTLS bool AllowedCIDRs []string BlockedCIDRs []string - MaxBodySize int64 - Priority int + MaxBodySize int64 + CircuitBreakerThreshold int + CircuitBreakerTimeout int64 + MaxRetries int + RetryTimeout int64 + Priority int } // GatewayService provides business logic for managing the API gateway and ingress traffic. From ae3ea5b0d4fabad3f6ecffcd97ae13a364be75d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Tue, 5 May 2026 21:41:34 +0300 Subject: [PATCH 02/17] feat(gateway): wire retry transport with circuit breaker per route Wraps each route's http.Transport with a retryTransport that applies: - Circuit breaker via existing platform.CircuitBreaker: trips open after threshold consecutive failures, probes half-open after timeout - Exponential backoff with jitter for retryable errors: 502/503/504/429 and connection errors (refused, reset, broken pipe, timeout) retryTransport implements http.RoundTripper and is set as the proxy transport in createReverseProxy. No handler changes required. --- internal/core/services/gateway.go | 134 +++++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 1 deletion(-) diff --git a/internal/core/services/gateway.go b/internal/core/services/gateway.go index abc22eee3..241bcbcff 100644 --- a/internal/core/services/gateway.go +++ b/internal/core/services/gateway.go @@ -5,7 +5,10 @@ import ( "context" "crypto/tls" "fmt" + "io" "log/slog" + "math" + "math/rand" "net" "net/http" "net/http/httputil" @@ -20,6 +23,7 @@ import ( "github.com/poyrazk/thecloud/internal/core/domain" "github.com/poyrazk/thecloud/internal/core/ports" "github.com/poyrazk/thecloud/internal/errors" + "github.com/poyrazk/thecloud/internal/platform" "github.com/poyrazk/thecloud/internal/routing" ) @@ -94,6 +98,10 @@ func (s *GatewayService) CreateRoute(ctx context.Context, params ports.CreateRou AllowedCIDRs: params.AllowedCIDRs, BlockedCIDRs: params.BlockedCIDRs, MaxBodySize: params.MaxBodySize, + CircuitBreakerThreshold: params.CircuitBreakerThreshold, + CircuitBreakerTimeout: params.CircuitBreakerTimeout, + MaxRetries: params.MaxRetries, + RetryTimeout: params.RetryTimeout, Priority: params.Priority, CreatedAt: time.Now(), UpdatedAt: time.Now(), @@ -243,7 +251,7 @@ func (s *GatewayService) createReverseProxy(route *domain.GatewayRoute) (*httput idleConnTimeout = 90 * time.Second } - proxy.Transport = &http.Transport{ + baseTransport := &http.Transport{ DialContext: (&net.Dialer{ Timeout: dialTimeout, KeepAlive: 30 * time.Second, @@ -254,6 +262,8 @@ func (s *GatewayService) createReverseProxy(route *domain.GatewayRoute) (*httput TLSHandshakeTimeout: 10 * time.Second, } + proxy.Transport = newRetryTransport(baseTransport, route, s.logger) + originalDirector := proxy.Director proxy.Director = func(req *http.Request) { if route.StripPrefix { @@ -375,3 +385,125 @@ func calculateMatchScore(route *domain.GatewayRoute, _ string) int { return score } + +// retryTransport wraps an http.Transport with circuit breaker and retry logic. +type retryTransport struct { + base *http.Transport + cb *platform.CircuitBreaker // nil if circuit breaker is disabled + maxRetries int + retryTimeout time.Duration + logger *slog.Logger +} + +// newRetryTransport wraps a base http.Transport with per-route retry and circuit breaker behavior. +func newRetryTransport(base *http.Transport, route *domain.GatewayRoute, logger *slog.Logger) *retryTransport { + rt := &retryTransport{ + base: base, + maxRetries: route.MaxRetries, + retryTimeout: time.Duration(route.RetryTimeout) * time.Millisecond, + logger: logger, + } + if route.CircuitBreakerThreshold > 0 { + rt.cb = platform.NewCircuitBreakerWithOpts(platform.CircuitBreakerOpts{ + Name: route.ID.String(), + Threshold: route.CircuitBreakerThreshold, + ResetTimeout: time.Duration(route.CircuitBreakerTimeout) * time.Millisecond, + OnStateChange: func(name string, from, to platform.State) { + if logger != nil { + logger.Warn("circuit breaker state change", + "route_id", name, + "from", from.String(), + "to", to.String()) + } + }, + }) + } + return rt +} + +// RoundTrip implements http.RoundTripper. +func (rt *retryTransport) RoundTrip(req *http.Request) (*http.Response, error) { + if rt.cb != nil { + var resp *http.Response + var err error + execErr := rt.cb.Execute(func() error { + resp, err = rt.doRoundTrip(req) + return err + }) + if execErr != nil { + return nil, execErr + } + return resp, err + } + return rt.doRoundTrip(req) +} + +func (rt *retryTransport) doRoundTrip(req *http.Request) (*http.Response, error) { + if rt.maxRetries <= 0 { + return rt.base.RoundTrip(req) + } + + var lastResp *http.Response + var lastErr error + maxAttempts := rt.maxRetries + 1 // first attempt + retries + + for attempt := 0; attempt < maxAttempts; attempt++ { + if attempt > 0 { + delay := rt.backoffWithJitter(attempt) + select { + case <-req.Context().Done(): + return nil, req.Context().Err() + case <-time.After(delay): + } + } + + resp, err := rt.base.RoundTrip(req) + if err == nil { + if !rt.isRetryableStatus(resp.StatusCode) { + return resp, nil + } + // drain and close body so connection can be reused + io.Copy(io.Discard, resp.Body) + resp.Body.Close() + lastResp = resp + } else { + if !rt.isRetryableError(err) { + return nil, err + } + lastErr = err + lastResp = resp + } + } + return lastResp, lastErr +} + +func (rt *retryTransport) isRetryableStatus(code int) bool { + return code == 502 || code == 503 || code == 504 || code == 429 +} + +func (rt *retryTransport) isRetryableError(err error) bool { + if err == nil { + return false + } + msg := err.Error() + return strings.Contains(msg, "connection refused") || + strings.Contains(msg, "timeout") || + strings.Contains(msg, "reset by peer") || + strings.Contains(msg, "broken pipe") || + strings.Contains(msg, "connection reset") +} + +func (rt *retryTransport) backoffWithJitter(attempt int) time.Duration { + base := 100 * time.Millisecond + max := rt.retryTimeout + if max <= 0 { + max = 5 * time.Second + } + multiplier := 2.0 + delay := float64(base) * math.Pow(multiplier, float64(attempt-1)) + if delay > float64(max) { + delay = float64(max) + } + jitter := time.Duration(rand.Int63n(int64(delay))) + return jitter +} From b7650dd93aee12c1ac34ea6583a146a2a776e396 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Wed, 6 May 2026 19:04:55 +0300 Subject: [PATCH 03/17] fix(gateway): retry loop status fix, method guard, and defaults Critical: retry loop now uses continue after draining retryable status bodies (502/503/504/429) so retry actually happens instead of returning the error response immediately. Also: - Add isIdempotent method guard: POST/PATCH/CONNECT are not retried on connection errors to prevent duplicate operations - Add default values when zero: CB threshold=5, timeout=30s, max_retries=2, retry_timeout=5s - Change retryTransport.base to http.RoundTripper interface --- internal/core/services/gateway.go | 40 +++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/internal/core/services/gateway.go b/internal/core/services/gateway.go index 241bcbcff..eeae9b58d 100644 --- a/internal/core/services/gateway.go +++ b/internal/core/services/gateway.go @@ -107,6 +107,20 @@ func (s *GatewayService) CreateRoute(ctx context.Context, params ports.CreateRou UpdatedAt: time.Now(), } + // Apply default values for resilience parameters + if route.CircuitBreakerThreshold == 0 { + route.CircuitBreakerThreshold = 5 + } + if route.CircuitBreakerTimeout == 0 { + route.CircuitBreakerTimeout = 30000 // ms + } + if route.MaxRetries == 0 { + route.MaxRetries = 2 + } + if route.RetryTimeout == 0 { + route.RetryTimeout = 5000 // ms + } + // Validate CIDRs before saving for _, cidr := range route.AllowedCIDRs { if _, _, err := net.ParseCIDR(cidr); err != nil { @@ -388,7 +402,7 @@ func calculateMatchScore(route *domain.GatewayRoute, _ string) int { // retryTransport wraps an http.Transport with circuit breaker and retry logic. type retryTransport struct { - base *http.Transport + base http.RoundTripper cb *platform.CircuitBreaker // nil if circuit breaker is disabled maxRetries int retryTimeout time.Duration @@ -396,7 +410,7 @@ type retryTransport struct { } // newRetryTransport wraps a base http.Transport with per-route retry and circuit breaker behavior. -func newRetryTransport(base *http.Transport, route *domain.GatewayRoute, logger *slog.Logger) *retryTransport { +func newRetryTransport(base http.RoundTripper, route *domain.GatewayRoute, logger *slog.Logger) *retryTransport { rt := &retryTransport{ base: base, maxRetries: route.MaxRetries, @@ -439,7 +453,7 @@ func (rt *retryTransport) RoundTrip(req *http.Request) (*http.Response, error) { } func (rt *retryTransport) doRoundTrip(req *http.Request) (*http.Response, error) { - if rt.maxRetries <= 0 { + if rt.maxRetries <= 0 || !rt.isIdempotent(req.Method) { return rt.base.RoundTrip(req) } @@ -462,17 +476,18 @@ func (rt *retryTransport) doRoundTrip(req *http.Request) (*http.Response, error) if !rt.isRetryableStatus(resp.StatusCode) { return resp, nil } - // drain and close body so connection can be reused + // drain and close body so connection can be reused, then retry io.Copy(io.Discard, resp.Body) resp.Body.Close() lastResp = resp - } else { - if !rt.isRetryableError(err) { - return nil, err - } - lastErr = err - lastResp = resp + continue + } + + if !rt.isRetryableError(err) { + return nil, err } + lastErr = err + lastResp = resp } return lastResp, lastErr } @@ -493,6 +508,11 @@ func (rt *retryTransport) isRetryableError(err error) bool { strings.Contains(msg, "connection reset") } +func (rt *retryTransport) isIdempotent(method string) bool { + return method == "GET" || method == "HEAD" || method == "PUT" || + method == "DELETE" || method == "OPTIONS" +} + func (rt *retryTransport) backoffWithJitter(attempt int) time.Duration { base := 100 * time.Millisecond max := rt.retryTimeout From 04a93f7942d908b474dabd3a72efafe29e5fe5d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Wed, 6 May 2026 19:05:06 +0300 Subject: [PATCH 04/17] test(gateway): add retry and circuit breaker tests Retry tests: - isIdempotent: POST/PATCH blocked, GET/HEAD/PUT/DELETE/OPTIONS allowed - isRetryableStatus: 502/503/504/429 retryable, others not - isRetryableError: connection errors retryable, others not - backoffWithJitter: bounded between 0 and max - DoesNotRetryWhenMaxRetriesZero - DoesNotRetryNonIdempotentPOST/PATCH - RetriesOnConnectionRefused/502/503/429/timeoutError - GivesUpAfterMaxRetries - SucceedsOnFirstAttempt Circuit breaker tests: - DisabledWhenThresholdZero - EnabledWhenThresholdPositive - TripsOpenAfterThreshold - GoesHalfOpenAfterTimeout - ClosesAfterSuccessfulProbe --- internal/core/services/gateway_retry_test.go | 366 +++++++++++++++++++ 1 file changed, 366 insertions(+) create mode 100644 internal/core/services/gateway_retry_test.go diff --git a/internal/core/services/gateway_retry_test.go b/internal/core/services/gateway_retry_test.go new file mode 100644 index 000000000..8f207a91f --- /dev/null +++ b/internal/core/services/gateway_retry_test.go @@ -0,0 +1,366 @@ +package services + +import ( + "errors" + "io" + "net/http" + "strings" + "testing" + "time" + + "github.com/poyrazk/thecloud/internal/core/domain" + "github.com/poyrazk/thecloud/internal/platform" + "github.com/stretchr/testify/assert" +) + +// mockRT is a simple http.RoundTripper for testing retry behavior. +type mockRT struct { + results []mockRTResult + callIdx int + calls int +} + +type mockRTResult struct { + resp *http.Response + err error +} + +func (m *mockRT) RoundTrip(req *http.Request) (*http.Response, error) { + m.calls++ + if m.callIdx >= len(m.results) { + return &http.Response{StatusCode: 500, Body: io.NopCloser(strings.NewReader(""))}, nil + } + r := m.results[m.callIdx] + m.callIdx++ + return r.resp, r.err +} + +func mockResp(status int) mockRTResult { + return mockRTResult{resp: &http.Response{StatusCode: status, Body: io.NopCloser(strings.NewReader(""))}} +} + +func mockErr(msg string) mockRTResult { + return mockRTResult{err: errors.New(msg)} +} + +// --- retryTransport helper tests --- + +func TestRetryTransport_IsIdempotent(t *testing.T) { + t.Parallel() + rt := &retryTransport{} + for _, m := range []string{"GET", "HEAD", "PUT", "DELETE", "OPTIONS"} { + assert.True(t, rt.isIdempotent(m), m) + } + for _, m := range []string{"POST", "PATCH", "CONNECT", "TRACE"} { + assert.False(t, rt.isIdempotent(m), m) + } +} + +func TestRetryTransport_IsRetryableStatus(t *testing.T) { + t.Parallel() + rt := &retryTransport{} + for _, c := range []int{502, 503, 504, 429} { + assert.True(t, rt.isRetryableStatus(c), "%d should be retryable", c) + } + for _, c := range []int{200, 201, 400, 401, 403, 404, 500} { + assert.False(t, rt.isRetryableStatus(c), "%d should not be retryable", c) + } +} + +func TestRetryTransport_IsRetryableError(t *testing.T) { + t.Parallel() + rt := &retryTransport{} + retryable := []string{ + "dial tcp: connection refused", + "dial tcp: i/o timeout", + "read tcp: connection reset by peer", + "write tcp: broken pipe", + "read tcp: connection reset", + } + for _, msg := range retryable { + assert.True(t, rt.isRetryableError(errors.New(msg)), msg) + } + nonRetryable := []string{ + "400 bad request", + "401 unauthorized", + "tls: handshake failed", + "server closed connection", + } + for _, msg := range nonRetryable { + assert.False(t, rt.isRetryableError(errors.New(msg)), msg) + } +} + +func TestRetryTransport_BackoffJitter_Bounded(t *testing.T) { + t.Parallel() + rt := &retryTransport{retryTimeout: 5 * time.Second} + for attempt := 1; attempt <= 5; attempt++ { + d := rt.backoffWithJitter(attempt) + assert.Greater(t, d, time.Duration(0), "delay must be > 0") + assert.LessOrEqual(t, d, 5*time.Second, "delay must be <= max") + } +} + +// --- retry loop tests --- + +func TestRetryTransport_DoesNotRetryWhenMaxRetriesZero(t *testing.T) { + t.Parallel() + m := &mockRT{results: []mockRTResult{mockResp(502), mockResp(200)}} + transport := wrapTransport(m, &retryTransport{maxRetries: 0}) + + _, _ = transport.RoundTrip(nil) + assert.Equal(t, 1, m.calls, "should call base transport only once") +} + +func TestRetryTransport_DoesNotRetryNonIdempotentPOST(t *testing.T) { + t.Parallel() + m := &mockRT{results: []mockRTResult{mockErr("connection refused"), mockResp(200)}} + transport := wrapTransport(m, &retryTransport{maxRetries: 2}) + + req, _ := http.NewRequest("POST", "/", nil) + _, _ = transport.RoundTrip(req) + assert.Equal(t, 1, m.calls, "POST should not be retried") +} + +func TestRetryTransport_DoesNotRetryNonIdempotentPATCH(t *testing.T) { + t.Parallel() + m := &mockRT{results: []mockRTResult{mockErr("connection refused"), mockResp(200)}} + transport := wrapTransport(m, &retryTransport{maxRetries: 2}) + + req, _ := http.NewRequest("PATCH", "/", nil) + _, _ = transport.RoundTrip(req) + assert.Equal(t, 1, m.calls, "PATCH should not be retried") +} + +func TestRetryTransport_RetriesOnConnectionRefused(t *testing.T) { + t.Parallel() + m := &mockRT{results: []mockRTResult{ + mockErr("connection refused"), + mockResp(200), + }} + transport := wrapTransport(m, &retryTransport{maxRetries: 2}) + + req, _ := http.NewRequest("GET", "/", nil) + resp, err := transport.RoundTrip(req) + assert.NoError(t, err) + assert.Equal(t, 200, resp.StatusCode) + assert.Equal(t, 2, m.calls, "should retry after connection refused") +} + +func TestRetryTransport_RetriesOn502(t *testing.T) { + t.Parallel() + m := &mockRT{results: []mockRTResult{ + mockResp(502), + mockResp(502), + mockResp(200), + }} + transport := wrapTransport(m, &retryTransport{maxRetries: 2}) + + req, _ := http.NewRequest("GET", "/", nil) + resp, err := transport.RoundTrip(req) + assert.NoError(t, err) + assert.Equal(t, 200, resp.StatusCode) + assert.Equal(t, 3, m.calls, "should retry 502 twice then succeed") +} + +func TestRetryTransport_RetriesOn503(t *testing.T) { + t.Parallel() + m := &mockRT{results: []mockRTResult{ + mockResp(503), + mockResp(200), + }} + transport := wrapTransport(m, &retryTransport{maxRetries: 2}) + + req, _ := http.NewRequest("GET", "/", nil) + resp, err := transport.RoundTrip(req) + assert.NoError(t, err) + assert.Equal(t, 200, resp.StatusCode) + assert.Equal(t, 2, m.calls) +} + +func TestRetryTransport_RetriesOn429(t *testing.T) { + t.Parallel() + m := &mockRT{results: []mockRTResult{ + mockResp(429), + mockResp(200), + }} + transport := wrapTransport(m, &retryTransport{maxRetries: 2}) + + req, _ := http.NewRequest("GET", "/", nil) + resp, err := transport.RoundTrip(req) + assert.NoError(t, err) + assert.Equal(t, 200, resp.StatusCode) + assert.Equal(t, 2, m.calls) +} + +func TestRetryTransport_NoRetryOn500(t *testing.T) { + t.Parallel() + m := &mockRT{results: []mockRTResult{ + mockResp(500), + }} + transport := wrapTransport(m, &retryTransport{maxRetries: 2}) + + req, _ := http.NewRequest("GET", "/", nil) + resp, err := transport.RoundTrip(req) + assert.NoError(t, err) + assert.Equal(t, 500, resp.StatusCode) + assert.Equal(t, 1, m.calls, "500 should not be retried") +} + +func TestRetryTransport_NoRetryOn400(t *testing.T) { + t.Parallel() + m := &mockRT{results: []mockRTResult{ + mockResp(400), + }} + transport := wrapTransport(m, &retryTransport{maxRetries: 2}) + + req, _ := http.NewRequest("GET", "/", nil) + resp, err := transport.RoundTrip(req) + assert.NoError(t, err) + assert.Equal(t, 400, resp.StatusCode) + assert.Equal(t, 1, m.calls, "400 should not be retried") +} + +func TestRetryTransport_RetriesOnTimeoutError(t *testing.T) { + t.Parallel() + m := &mockRT{results: []mockRTResult{ + mockErr("dial tcp: i/o timeout"), + mockResp(200), + }} + transport := wrapTransport(m, &retryTransport{maxRetries: 2}) + + req, _ := http.NewRequest("GET", "/", nil) + resp, err := transport.RoundTrip(req) + assert.NoError(t, err) + assert.Equal(t, 200, resp.StatusCode) + assert.Equal(t, 2, m.calls) +} + +func TestRetryTransport_GivesUpAfterMaxRetries(t *testing.T) { + t.Parallel() + m := &mockRT{results: []mockRTResult{ + mockResp(502), + mockResp(502), + mockResp(502), + }} + transport := wrapTransport(m, &retryTransport{maxRetries: 2}) + + req, _ := http.NewRequest("GET", "/", nil) + resp, err := transport.RoundTrip(req) + assert.NoError(t, err) + assert.Equal(t, 502, resp.StatusCode) + assert.Equal(t, 3, m.calls, "3 attempts: first + 2 retries") +} + +func TestRetryTransport_SucceedsOnFirstAttempt(t *testing.T) { + t.Parallel() + m := &mockRT{results: []mockRTResult{mockResp(200)}} + transport := wrapTransport(m, &retryTransport{maxRetries: 2}) + + req, _ := http.NewRequest("GET", "/", nil) + resp, err := transport.RoundTrip(req) + assert.NoError(t, err) + assert.Equal(t, 200, resp.StatusCode) + assert.Equal(t, 1, m.calls) +} + +// wrapTransport creates a retryTransport wrapping the mock. +func wrapTransport(mock *mockRT, rt *retryTransport) *retryTransport { + // rt.base is used directly by doRoundTrip — swap it for our mock + rt.base = (*mockHTTPTransport)(mock) + return rt +} + +// mockHTTPTransport lets us inject the mock via rt.base. +type mockHTTPTransport mockRT + +func (m *mockHTTPTransport) RoundTrip(req *http.Request) (*http.Response, error) { + return (*mockRT)(m).RoundTrip(req) +} + +func (m *mockHTTPTransport) CloseIdleConnections() {} + +// --- circuit breaker tests --- + +func TestCircuitBreaker_DisabledWhenThresholdZero(t *testing.T) { + t.Parallel() + route := &domain.GatewayRoute{ + ID: [16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + CircuitBreakerThreshold: 0, + MaxRetries: 2, + RetryTimeout: 5000, + } + rt := newRetryTransport(&http.Transport{}, route, nil) + assert.Nil(t, rt.cb) + assert.Equal(t, 2, rt.maxRetries) +} + +func TestCircuitBreaker_EnabledWhenThresholdPositive(t *testing.T) { + t.Parallel() + route := &domain.GatewayRoute{ + ID: [16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + CircuitBreakerThreshold: 5, + CircuitBreakerTimeout: 30000, + MaxRetries: 2, + RetryTimeout: 5000, + } + rt := newRetryTransport(&http.Transport{}, route, nil) + assert.NotNil(t, rt.cb) + assert.Equal(t, platform.StateClosed, rt.cb.GetState()) +} + +func TestCircuitBreaker_TripsOpenAfterThreshold(t *testing.T) { + t.Parallel() + cb := platform.NewCircuitBreakerWithOpts(platform.CircuitBreakerOpts{ + Name: "test", + Threshold: 3, + ResetTimeout: 100 * time.Millisecond, + OnStateChange: nil, + }) + + for i := 0; i < 3; i++ { + _ = cb.Execute(func() error { return errors.New("fail") }) + } + assert.Equal(t, platform.StateOpen, cb.GetState()) + + // Next call is blocked + err := cb.Execute(func() error { return nil }) + assert.ErrorIs(t, err, platform.ErrCircuitOpen) +} + +func TestCircuitBreaker_GoesHalfOpenAfterTimeout(t *testing.T) { + t.Parallel() + cb := platform.NewCircuitBreakerWithOpts(platform.CircuitBreakerOpts{ + Name: "test", + Threshold: 2, + ResetTimeout: 50 * time.Millisecond, + OnStateChange: nil, + }) + + _ = cb.Execute(func() error { return errors.New("fail") }) + _ = cb.Execute(func() error { return errors.New("fail") }) + assert.Equal(t, platform.StateOpen, cb.GetState()) + + // Wait for half-open window to expire, then trigger a probe request + time.Sleep(80 * time.Millisecond) + _ = cb.Execute(func() error { return errors.New("still failing") }) + // State should be Open still (probe failed), or HalfOpen if it just transitioned + assert.True(t, cb.GetState() == platform.StateOpen || cb.GetState() == platform.StateHalfOpen) +} + +func TestCircuitBreaker_ClosesAfterSuccessfulProbe(t *testing.T) { + t.Parallel() + cb := platform.NewCircuitBreakerWithOpts(platform.CircuitBreakerOpts{ + Name: "test", + Threshold: 2, + ResetTimeout: 50 * time.Millisecond, + OnStateChange: nil, + }) + + _ = cb.Execute(func() error { return errors.New("fail") }) + _ = cb.Execute(func() error { return errors.New("fail") }) + time.Sleep(80 * time.Millisecond) + _ = cb.Execute(func() error { return nil }) + + assert.Equal(t, platform.StateClosed, cb.GetState()) +} From e278bfb3cf74ca8e9b0301a3820f60e284e8fa0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 16:24:52 +0300 Subject: [PATCH 05/17] fix(gateway): inline lint fixes for retry transport - Replace math/rand with crypto/rand in backoff jitter - Add result struct to avoid bodyclose false positive in RoundTrip - Inline errcheck: io.Copy and Body.Close return values captured - Rename max variable to cap to avoid redefining builtin - Add golangci exclude-rules for bodyclose on gateway files - Fix test mock parameter name and bodyclose compliance --- .golangci.yml | 10 ++++ internal/core/services/gateway.go | 58 +++++++++++++------- internal/core/services/gateway_retry_test.go | 8 ++- 3 files changed, 54 insertions(+), 22 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 2cceae9cf..0c8b3f511 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,4 +1,6 @@ +version: 2 + run: timeout: 5m @@ -66,3 +68,11 @@ issues: linters: - unparam text: "MockInstanceService|MockRBACService" + - path: internal/core/services/gateway\.go + linters: + - bodyclose + text: "response body must be closed" + - path: internal/core/services/gateway_retry_test\.go + linters: + - bodyclose + text: "response body must be closed" diff --git a/internal/core/services/gateway.go b/internal/core/services/gateway.go index eeae9b58d..1a3b07ae1 100644 --- a/internal/core/services/gateway.go +++ b/internal/core/services/gateway.go @@ -7,8 +7,9 @@ import ( "fmt" "io" "log/slog" + "crypto/rand" + "encoding/binary" "math" - "math/rand" "net" "net/http" "net/http/httputil" @@ -437,19 +438,26 @@ func newRetryTransport(base http.RoundTripper, route *domain.GatewayRoute, logge // RoundTrip implements http.RoundTripper. func (rt *retryTransport) RoundTrip(req *http.Request) (*http.Response, error) { - if rt.cb != nil { - var resp *http.Response - var err error - execErr := rt.cb.Execute(func() error { - resp, err = rt.doRoundTrip(req) - return err - }) - if execErr != nil { - return nil, execErr - } - return resp, err + if rt.cb == nil { + return rt.doRoundTrip(req) } - return rt.doRoundTrip(req) + + type result struct { + resp *http.Response + err error + } + var r result + cbErr := rt.cb.Execute(func() error { + r.resp, r.err = rt.doRoundTrip(req) + return r.err + }) + if cbErr != nil { + return nil, cbErr + } + if r.resp != nil { + _ = r.resp.Body.Close() + } + return r.resp, r.err } func (rt *retryTransport) doRoundTrip(req *http.Request) (*http.Response, error) { @@ -477,8 +485,8 @@ func (rt *retryTransport) doRoundTrip(req *http.Request) (*http.Response, error) return resp, nil } // drain and close body so connection can be reused, then retry - io.Copy(io.Discard, resp.Body) - resp.Body.Close() + _, _ = io.Copy(io.Discard, resp.Body) + _ = resp.Body.Close() lastResp = resp continue } @@ -515,15 +523,23 @@ func (rt *retryTransport) isIdempotent(method string) bool { func (rt *retryTransport) backoffWithJitter(attempt int) time.Duration { base := 100 * time.Millisecond - max := rt.retryTimeout - if max <= 0 { - max = 5 * time.Second + cap := rt.retryTimeout + if cap <= 0 { + cap = 5 * time.Second } multiplier := 2.0 delay := float64(base) * math.Pow(multiplier, float64(attempt-1)) - if delay > float64(max) { - delay = float64(max) + if delay > float64(cap) { + delay = float64(cap) } - jitter := time.Duration(rand.Int63n(int64(delay))) + jitter := rt.cryptoJitter(time.Duration(delay)) return jitter } + +func (rt *retryTransport) cryptoJitter(max time.Duration) time.Duration { + var buf [8]byte + _, _ = rand.Read(buf[:]) + val := binary.BigEndian.Uint64(buf[:]) + frac := float64(val) / float64(math.MaxUint64) + return time.Duration(float64(max) * frac) +} diff --git a/internal/core/services/gateway_retry_test.go b/internal/core/services/gateway_retry_test.go index 8f207a91f..ecf3b1215 100644 --- a/internal/core/services/gateway_retry_test.go +++ b/internal/core/services/gateway_retry_test.go @@ -25,7 +25,7 @@ type mockRTResult struct { err error } -func (m *mockRT) RoundTrip(req *http.Request) (*http.Response, error) { +func (m *mockRT) RoundTrip(_ *http.Request) (*http.Response, error) { m.calls++ if m.callIdx >= len(m.results) { return &http.Response{StatusCode: 500, Body: io.NopCloser(strings.NewReader(""))}, nil @@ -109,6 +109,9 @@ func TestRetryTransport_DoesNotRetryWhenMaxRetriesZero(t *testing.T) { transport := wrapTransport(m, &retryTransport{maxRetries: 0}) _, _ = transport.RoundTrip(nil) + if m.results[0].resp != nil { + _ = m.results[0].resp.Body.Close() + } assert.Equal(t, 1, m.calls, "should call base transport only once") } @@ -119,6 +122,9 @@ func TestRetryTransport_DoesNotRetryNonIdempotentPOST(t *testing.T) { req, _ := http.NewRequest("POST", "/", nil) _, _ = transport.RoundTrip(req) + if m.results[0].resp != nil { + _ = m.results[0].resp.Body.Close() + } assert.Equal(t, 1, m.calls, "POST should not be retried") } From e02bf81b698ebee9604cafa2b71baf235202be1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 17:00:16 +0300 Subject: [PATCH 06/17] test(gateway): add default values test and cryptoJitter comment - Add test for CreateRoute applying default resilience values (threshold=5, timeout=30000ms, retries=2, retryTimeout=5000ms) - Add comment to cryptoJitter explaining frac range and result bounds --- internal/core/services/gateway.go | 2 ++ internal/core/services/gateway_unit_test.go | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/internal/core/services/gateway.go b/internal/core/services/gateway.go index 1a3b07ae1..286eff6d9 100644 --- a/internal/core/services/gateway.go +++ b/internal/core/services/gateway.go @@ -536,6 +536,8 @@ func (rt *retryTransport) backoffWithJitter(attempt int) time.Duration { return jitter } +// cryptoJitter returns a random duration in [0, max) using crypto/rand. +// frac is in [0, 1) so result is always non-negative and strictly bounded by max. func (rt *retryTransport) cryptoJitter(max time.Duration) time.Duration { var buf [8]byte _, _ = rand.Read(buf[:]) diff --git a/internal/core/services/gateway_unit_test.go b/internal/core/services/gateway_unit_test.go index 31f823f3d..c4732a75e 100644 --- a/internal/core/services/gateway_unit_test.go +++ b/internal/core/services/gateway_unit_test.go @@ -67,6 +67,19 @@ func TestGatewayService_Unit(t *testing.T) { ctx := appcontext.WithUserID(context.Background(), uuid.New()) userID := appcontext.UserIDFromContext(ctx) + t.Run("CreateRoute applies default resilience values", func(t *testing.T) { + params := ports.CreateRouteParams{Name: "r1", Pattern: "/r1", Target: "http://t1"} + repo.On("CreateRoute", ctx, mock.Anything).Return(nil).Once() + auditSvc.On("Log", mock.Anything, userID, "gateway.route_create", "gateway", mock.Anything, mock.Anything).Return(nil).Once() + + res, err := svc.CreateRoute(ctx, params) + require.NoError(t, err) + assert.Equal(t, 5, res.CircuitBreakerThreshold) + assert.Equal(t, int64(30000), res.CircuitBreakerTimeout) + assert.Equal(t, 2, res.MaxRetries) + assert.Equal(t, int64(5000), res.RetryTimeout) + }) + t.Run("CreateRoute", func(t *testing.T) { params := ports.CreateRouteParams{Name: "r1", Pattern: "/r1", Target: "http://t1"} repo.On("CreateRoute", ctx, mock.Anything).Return(nil).Once() From 05bc0b9c59819947fb8e4207ba0aa902b313400f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 17:25:30 +0300 Subject: [PATCH 07/17] test(gateway): clarify half-open assertion in CB test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Explains why the StateOpen||StateHalfOpen assertion is deterministic and not a flaky test — the CB transitions to half-open automatically after ResetTimeout, and the probe may arrive during or after that transition window. --- internal/core/services/gateway_retry_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/core/services/gateway_retry_test.go b/internal/core/services/gateway_retry_test.go index ecf3b1215..6f9e26d86 100644 --- a/internal/core/services/gateway_retry_test.go +++ b/internal/core/services/gateway_retry_test.go @@ -350,7 +350,10 @@ func TestCircuitBreaker_GoesHalfOpenAfterTimeout(t *testing.T) { // Wait for half-open window to expire, then trigger a probe request time.Sleep(80 * time.Millisecond) _ = cb.Execute(func() error { return errors.New("still failing") }) - // State should be Open still (probe failed), or HalfOpen if it just transitioned + // After ResetTimeout the CB transitions to half-open automatically. + // The probe arrives during or just after that transition, so either + // Open (transition not yet observed) or HalfOpen (transition complete but probe pending) + // is valid — this is not a flaky test. assert.True(t, cb.GetState() == platform.StateOpen || cb.GetState() == platform.StateHalfOpen) } From 8a36d4481e8e41bf49cfba60b8a64abf2824ebaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 18:30:07 +0300 Subject: [PATCH 08/17] docs: regenerate swagger for gateway retry/CB fields Adds circuit_breaker_threshold, circuit_breaker_timeout, max_retries, and retry_timeout to the GatewayRoute swagger definition. --- docs/swagger/docs.go | 16 ++++++++++++++++ docs/swagger/swagger.json | 16 ++++++++++++++++ docs/swagger/swagger.yaml | 12 ++++++++++++ 3 files changed, 44 insertions(+) diff --git a/docs/swagger/docs.go b/docs/swagger/docs.go index da4b65ec1..3de855813 100644 --- a/docs/swagger/docs.go +++ b/docs/swagger/docs.go @@ -8694,6 +8694,14 @@ const docTemplate = `{ "type": "string" } }, + "circuit_breaker_threshold": { + "description": "consecutive failures to trip open (0=disabled)", + "type": "integer" + }, + "circuit_breaker_timeout": { + "description": "ms in open before half-open", + "type": "integer" + }, "created_at": { "type": "string" }, @@ -8712,6 +8720,10 @@ const docTemplate = `{ "description": "Max request body size in bytes", "type": "integer" }, + "max_retries": { + "description": "max retry attempts (0=disabled)", + "type": "integer" + }, "methods": { "description": "New: HTTP methods to match (empty = all)", "type": "array", @@ -8757,6 +8769,10 @@ const docTemplate = `{ "description": "Time to receive headers in milliseconds", "type": "integer" }, + "retry_timeout": { + "description": "total retry window in ms", + "type": "integer" + }, "strip_prefix": { "description": "If true, removes path_prefix from request before forwarding", "type": "boolean" diff --git a/docs/swagger/swagger.json b/docs/swagger/swagger.json index 3b209944c..533bcf855 100644 --- a/docs/swagger/swagger.json +++ b/docs/swagger/swagger.json @@ -8686,6 +8686,14 @@ "type": "string" } }, + "circuit_breaker_threshold": { + "description": "consecutive failures to trip open (0=disabled)", + "type": "integer" + }, + "circuit_breaker_timeout": { + "description": "ms in open before half-open", + "type": "integer" + }, "created_at": { "type": "string" }, @@ -8704,6 +8712,10 @@ "description": "Max request body size in bytes", "type": "integer" }, + "max_retries": { + "description": "max retry attempts (0=disabled)", + "type": "integer" + }, "methods": { "description": "New: HTTP methods to match (empty = all)", "type": "array", @@ -8749,6 +8761,10 @@ "description": "Time to receive headers in milliseconds", "type": "integer" }, + "retry_timeout": { + "description": "total retry window in ms", + "type": "integer" + }, "strip_prefix": { "description": "If true, removes path_prefix from request before forwarding", "type": "boolean" diff --git a/docs/swagger/swagger.yaml b/docs/swagger/swagger.yaml index 2602175bc..58243dd44 100644 --- a/docs/swagger/swagger.yaml +++ b/docs/swagger/swagger.yaml @@ -411,6 +411,12 @@ definitions: items: type: string type: array + circuit_breaker_threshold: + description: consecutive failures to trip open (0=disabled) + type: integer + circuit_breaker_timeout: + description: ms in open before half-open + type: integer created_at: type: string dial_timeout: @@ -424,6 +430,9 @@ definitions: max_body_size: description: Max request body size in bytes type: integer + max_retries: + description: max retry attempts (0=disabled) + type: integer methods: description: 'New: HTTP methods to match (empty = all)' items: @@ -457,6 +466,9 @@ definitions: response_header_timeout: description: Time to receive headers in milliseconds type: integer + retry_timeout: + description: total retry window in ms + type: integer strip_prefix: description: If true, removes path_prefix from request before forwarding type: boolean From d7d74b3a5c450f5a4c534f104315c98e46413d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 18:41:56 +0300 Subject: [PATCH 09/17] ci: upgrade golangci-lint to v2.12.2 v1.64.8 doesn't support the version: 2 config format required by golangci-lint v2. Update CI to use v2.12.2. --- .github/workflows/ci.yml | 2 +- .golangci.yml | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1ea753dea..f02913760 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,7 @@ jobs: fi - name: Lint run: | - go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.64.8 + go install github.com/golangci/golangci-lint/cmd/golangci-lint@v2.12.2 "$(go env GOPATH)/bin/golangci-lint" run ./... unit-tests: diff --git a/.golangci.yml b/.golangci.yml index 0c8b3f511..2cceae9cf 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,6 +1,4 @@ -version: 2 - run: timeout: 5m @@ -68,11 +66,3 @@ issues: linters: - unparam text: "MockInstanceService|MockRBACService" - - path: internal/core/services/gateway\.go - linters: - - bodyclose - text: "response body must be closed" - - path: internal/core/services/gateway_retry_test\.go - linters: - - bodyclose - text: "response body must be closed" From da4d0861a4d6513792db47f83a83d4ee41eeb132 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 18:50:16 +0300 Subject: [PATCH 10/17] ci: fix golangci-lint v2 module path The golangci-lint v2 module moved to golangci-lint/v2. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f02913760..22e043d8a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,7 @@ jobs: fi - name: Lint run: | - go install github.com/golangci/golangci-lint/cmd/golangci-lint@v2.12.2 + go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.12.2 "$(go env GOPATH)/bin/golangci-lint" run ./... unit-tests: From 595a61fdacf277c1629c296a7770f49d6e7b6575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 18:58:24 +0300 Subject: [PATCH 11/17] golangci.yml: restore version 2 config marker Was accidentally removed during v1/v2 compatibility troubleshooting. Required for golangci-lint v2. --- .golangci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.golangci.yml b/.golangci.yml index 2cceae9cf..74c8aaf5f 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,4 +1,6 @@ +version: 2 + run: timeout: 5m From a971327289844173d71fe3016f0d561d3dfb01f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 19:06:13 +0300 Subject: [PATCH 12/17] Revert "ci: upgrade golangci-lint to v2.12.2" v2 finds 144 pre-existing lint issues across the codebase that v1 did not flag. Since this PR only adds gateway retry/CB code and doesn't introduce new lint issues, revert to v1.64.8 which was passing CI. The version: 2 config field is also removed since v1 doesn't support it. --- .github/workflows/ci.yml | 2 +- .golangci.yml | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 22e043d8a..1ea753dea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,7 @@ jobs: fi - name: Lint run: | - go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.12.2 + go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.64.8 "$(go env GOPATH)/bin/golangci-lint" run ./... unit-tests: diff --git a/.golangci.yml b/.golangci.yml index 74c8aaf5f..2cceae9cf 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,6 +1,4 @@ -version: 2 - run: timeout: 5m From 21fb825c3bcb44fa77501bf52e635f8cbb388a34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 19:14:00 +0300 Subject: [PATCH 13/17] golangci.yml: add bodyclose exclusions for gateway files Suppress bodyclose false positives in retryTransport.RoundTrip where the response is passed to the proxy caller who closes it. --- .golangci.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.golangci.yml b/.golangci.yml index 2cceae9cf..e6adaaf29 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -66,3 +66,11 @@ issues: linters: - unparam text: "MockInstanceService|MockRBACService" + - path: internal/core/services/gateway\.go + linters: + - bodyclose + text: "response body must be closed" + - path: internal/core/services/gateway_retry_test\.go + linters: + - bodyclose + text: "response body must be closed" From ff2ec9a0e5f65cdcbde0f1fe4f5df347045c3d65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 19:22:29 +0300 Subject: [PATCH 14/17] test(gateway): use require.NoError for success assertions The testifylint require-error rule flags assert.NoError when checking that err is nil on success paths. Use require.NoError instead. --- internal/core/services/gateway_retry_test.go | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/internal/core/services/gateway_retry_test.go b/internal/core/services/gateway_retry_test.go index 6f9e26d86..bbded6ec5 100644 --- a/internal/core/services/gateway_retry_test.go +++ b/internal/core/services/gateway_retry_test.go @@ -11,6 +11,7 @@ import ( "github.com/poyrazk/thecloud/internal/core/domain" "github.com/poyrazk/thecloud/internal/platform" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) // mockRT is a simple http.RoundTripper for testing retry behavior. @@ -148,7 +149,7 @@ func TestRetryTransport_RetriesOnConnectionRefused(t *testing.T) { req, _ := http.NewRequest("GET", "/", nil) resp, err := transport.RoundTrip(req) - assert.NoError(t, err) + require.NoError(t, err) assert.Equal(t, 200, resp.StatusCode) assert.Equal(t, 2, m.calls, "should retry after connection refused") } @@ -164,7 +165,7 @@ func TestRetryTransport_RetriesOn502(t *testing.T) { req, _ := http.NewRequest("GET", "/", nil) resp, err := transport.RoundTrip(req) - assert.NoError(t, err) + require.NoError(t, err) assert.Equal(t, 200, resp.StatusCode) assert.Equal(t, 3, m.calls, "should retry 502 twice then succeed") } @@ -179,7 +180,7 @@ func TestRetryTransport_RetriesOn503(t *testing.T) { req, _ := http.NewRequest("GET", "/", nil) resp, err := transport.RoundTrip(req) - assert.NoError(t, err) + require.NoError(t, err) assert.Equal(t, 200, resp.StatusCode) assert.Equal(t, 2, m.calls) } @@ -194,7 +195,7 @@ func TestRetryTransport_RetriesOn429(t *testing.T) { req, _ := http.NewRequest("GET", "/", nil) resp, err := transport.RoundTrip(req) - assert.NoError(t, err) + require.NoError(t, err) assert.Equal(t, 200, resp.StatusCode) assert.Equal(t, 2, m.calls) } @@ -208,7 +209,7 @@ func TestRetryTransport_NoRetryOn500(t *testing.T) { req, _ := http.NewRequest("GET", "/", nil) resp, err := transport.RoundTrip(req) - assert.NoError(t, err) + require.NoError(t, err) assert.Equal(t, 500, resp.StatusCode) assert.Equal(t, 1, m.calls, "500 should not be retried") } @@ -222,7 +223,7 @@ func TestRetryTransport_NoRetryOn400(t *testing.T) { req, _ := http.NewRequest("GET", "/", nil) resp, err := transport.RoundTrip(req) - assert.NoError(t, err) + require.NoError(t, err) assert.Equal(t, 400, resp.StatusCode) assert.Equal(t, 1, m.calls, "400 should not be retried") } @@ -237,7 +238,7 @@ func TestRetryTransport_RetriesOnTimeoutError(t *testing.T) { req, _ := http.NewRequest("GET", "/", nil) resp, err := transport.RoundTrip(req) - assert.NoError(t, err) + require.NoError(t, err) assert.Equal(t, 200, resp.StatusCode) assert.Equal(t, 2, m.calls) } @@ -253,7 +254,7 @@ func TestRetryTransport_GivesUpAfterMaxRetries(t *testing.T) { req, _ := http.NewRequest("GET", "/", nil) resp, err := transport.RoundTrip(req) - assert.NoError(t, err) + require.NoError(t, err) assert.Equal(t, 502, resp.StatusCode) assert.Equal(t, 3, m.calls, "3 attempts: first + 2 retries") } @@ -265,7 +266,7 @@ func TestRetryTransport_SucceedsOnFirstAttempt(t *testing.T) { req, _ := http.NewRequest("GET", "/", nil) resp, err := transport.RoundTrip(req) - assert.NoError(t, err) + require.NoError(t, err) assert.Equal(t, 200, resp.StatusCode) assert.Equal(t, 1, m.calls) } From 1d3aa62226f58a6f04c3afbcfc8dd00e551690c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 20:26:35 +0300 Subject: [PATCH 15/17] fix(gateway): second-review inline comment fixes - gateway.go: fix RoundTrip bodyclose on CB error path; success path no longer prematurely closes body (caller owns it) - gateway.go: cryptoJitter returns max/2 deterministic fallback when crypto/rand fails instead of silently producing zero jitter - .golangci.yml: remove bodyclose suppression rules now that code is fixed - gateway_unit_test.go: replace magic numbers with named constants for default resilience values - gateway_handler.go: add circuit_breaker_threshold, circuit_breaker_timeout, max_retries, retry_timeout to CreateRouteRequest so they're included in the API contract - swagger docs: regenerate after adding resilience fields to CreateRouteRequest --- .golangci.yml | 8 --- docs/swagger/docs.go | 12 ++++ docs/swagger/swagger.json | 12 ++++ docs/swagger/swagger.yaml | 8 +++ internal/core/services/gateway.go | 11 ++-- internal/core/services/gateway_unit_test.go | 15 +++-- internal/handlers/gateway_handler.go | 66 ++++++++++++--------- 7 files changed, 87 insertions(+), 45 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index e6adaaf29..2cceae9cf 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -66,11 +66,3 @@ issues: linters: - unparam text: "MockInstanceService|MockRBACService" - - path: internal/core/services/gateway\.go - linters: - - bodyclose - text: "response body must be closed" - - path: internal/core/services/gateway_retry_test\.go - linters: - - bodyclose - text: "response body must be closed" diff --git a/docs/swagger/docs.go b/docs/swagger/docs.go index 3de855813..1e6bf4295 100644 --- a/docs/swagger/docs.go +++ b/docs/swagger/docs.go @@ -11118,6 +11118,12 @@ const docTemplate = `{ "type": "string" } }, + "circuit_breaker_threshold": { + "type": "integer" + }, + "circuit_breaker_timeout": { + "type": "integer" + }, "dial_timeout": { "type": "integer", "minimum": 0 @@ -11130,6 +11136,9 @@ const docTemplate = `{ "type": "integer", "minimum": 0 }, + "max_retries": { + "type": "integer" + }, "methods": { "type": "array", "items": { @@ -11157,6 +11166,9 @@ const docTemplate = `{ "type": "integer", "minimum": 0 }, + "retry_timeout": { + "type": "integer" + }, "strip_prefix": { "type": "boolean" }, diff --git a/docs/swagger/swagger.json b/docs/swagger/swagger.json index 533bcf855..270063fba 100644 --- a/docs/swagger/swagger.json +++ b/docs/swagger/swagger.json @@ -11110,6 +11110,12 @@ "type": "string" } }, + "circuit_breaker_threshold": { + "type": "integer" + }, + "circuit_breaker_timeout": { + "type": "integer" + }, "dial_timeout": { "type": "integer", "minimum": 0 @@ -11122,6 +11128,9 @@ "type": "integer", "minimum": 0 }, + "max_retries": { + "type": "integer" + }, "methods": { "type": "array", "items": { @@ -11149,6 +11158,9 @@ "type": "integer", "minimum": 0 }, + "retry_timeout": { + "type": "integer" + }, "strip_prefix": { "type": "boolean" }, diff --git a/docs/swagger/swagger.yaml b/docs/swagger/swagger.yaml index 58243dd44..9ec8c46b6 100644 --- a/docs/swagger/swagger.yaml +++ b/docs/swagger/swagger.yaml @@ -2159,6 +2159,10 @@ definitions: items: type: string type: array + circuit_breaker_threshold: + type: integer + circuit_breaker_timeout: + type: integer dial_timeout: minimum: 0 type: integer @@ -2168,6 +2172,8 @@ definitions: max_body_size: minimum: 0 type: integer + max_retries: + type: integer methods: items: type: string @@ -2187,6 +2193,8 @@ definitions: response_header_timeout: minimum: 0 type: integer + retry_timeout: + type: integer strip_prefix: type: boolean target_url: diff --git a/internal/core/services/gateway.go b/internal/core/services/gateway.go index 286eff6d9..32d4234b5 100644 --- a/internal/core/services/gateway.go +++ b/internal/core/services/gateway.go @@ -452,11 +452,12 @@ func (rt *retryTransport) RoundTrip(req *http.Request) (*http.Response, error) { return r.err }) if cbErr != nil { + if r.resp != nil { + _, _ = io.Copy(io.Discard, r.resp.Body) + _ = r.resp.Body.Close() + } return nil, cbErr } - if r.resp != nil { - _ = r.resp.Body.Close() - } return r.resp, r.err } @@ -540,7 +541,9 @@ func (rt *retryTransport) backoffWithJitter(attempt int) time.Duration { // frac is in [0, 1) so result is always non-negative and strictly bounded by max. func (rt *retryTransport) cryptoJitter(max time.Duration) time.Duration { var buf [8]byte - _, _ = rand.Read(buf[:]) + if _, err := rand.Read(buf[:]); err != nil { + return max / 2 // deterministic fallback on crypto rand failure + } val := binary.BigEndian.Uint64(buf[:]) frac := float64(val) / float64(math.MaxUint64) return time.Duration(float64(max) * frac) diff --git a/internal/core/services/gateway_unit_test.go b/internal/core/services/gateway_unit_test.go index c4732a75e..8f3c068ed 100644 --- a/internal/core/services/gateway_unit_test.go +++ b/internal/core/services/gateway_unit_test.go @@ -16,6 +16,13 @@ import ( "github.com/stretchr/testify/require" ) +const ( + defaultCircuitBreakerThreshold = 5 + defaultCircuitBreakerTimeout = 30000 // ms + defaultMaxRetries = 2 + defaultRetryTimeout = 5000 // ms +) + type mockGatewayRepo struct { mock.Mock } @@ -74,10 +81,10 @@ func TestGatewayService_Unit(t *testing.T) { res, err := svc.CreateRoute(ctx, params) require.NoError(t, err) - assert.Equal(t, 5, res.CircuitBreakerThreshold) - assert.Equal(t, int64(30000), res.CircuitBreakerTimeout) - assert.Equal(t, 2, res.MaxRetries) - assert.Equal(t, int64(5000), res.RetryTimeout) + assert.Equal(t, defaultCircuitBreakerThreshold, res.CircuitBreakerThreshold) + assert.Equal(t, int64(defaultCircuitBreakerTimeout), res.CircuitBreakerTimeout) + assert.Equal(t, defaultMaxRetries, res.MaxRetries) + assert.Equal(t, int64(defaultRetryTimeout), res.RetryTimeout) }) t.Run("CreateRoute", func(t *testing.T) { diff --git a/internal/handlers/gateway_handler.go b/internal/handlers/gateway_handler.go index 9e7e8ab28..26d21125f 100644 --- a/internal/handlers/gateway_handler.go +++ b/internal/handlers/gateway_handler.go @@ -21,21 +21,25 @@ import ( // CreateRouteRequest define the payload for creating a route. type CreateRouteRequest struct { - Name string `json:"name" binding:"required"` - PathPrefix string `json:"path_prefix" binding:"required"` - TargetURL string `json:"target_url" binding:"required"` - Methods []string `json:"methods"` - StripPrefix bool `json:"strip_prefix"` - RateLimit int `json:"rate_limit" binding:"gte=0"` - DialTimeout int64 `json:"dial_timeout" binding:"gte=0"` + Name string `json:"name" binding:"required"` + PathPrefix string `json:"path_prefix" binding:"required"` + TargetURL string `json:"target_url" binding:"required"` + Methods []string `json:"methods"` + StripPrefix bool `json:"strip_prefix"` + RateLimit int `json:"rate_limit" binding:"gte=0"` + DialTimeout int64 `json:"dial_timeout" binding:"gte=0"` ResponseHeaderTimeout int64 `json:"response_header_timeout" binding:"gte=0"` - IdleConnTimeout int64 `json:"idle_conn_timeout" binding:"gte=0"` - TLSSkipVerify bool `json:"tls_skip_verify"` - RequireTLS bool `json:"require_tls"` - AllowedCIDRs []string `json:"allowed_cidrs"` - BlockedCIDRs []string `json:"blocked_cidrs"` - MaxBodySize int64 `json:"max_body_size" binding:"gte=0"` - Priority int `json:"priority" binding:"gte=0"` + IdleConnTimeout int64 `json:"idle_conn_timeout" binding:"gte=0"` + TLSSkipVerify bool `json:"tls_skip_verify"` + RequireTLS bool `json:"require_tls"` + AllowedCIDRs []string `json:"allowed_cidrs"` + BlockedCIDRs []string `json:"blocked_cidrs"` + MaxBodySize int64 `json:"max_body_size" binding:"gte=0"` + Priority int `json:"priority" binding:"gte=0"` + CircuitBreakerThreshold int `json:"circuit_breaker_threshold"` + CircuitBreakerTimeout int64 `json:"circuit_breaker_timeout"` + MaxRetries int `json:"max_retries"` + RetryTimeout int64 `json:"retry_timeout"` } // GatewayHandler handles API gateway HTTP endpoints. @@ -80,21 +84,25 @@ func (h *GatewayHandler) CreateRoute(c *gin.Context) { } params := ports.CreateRouteParams{ - Name: req.Name, - Pattern: req.PathPrefix, - Target: req.TargetURL, - Methods: req.Methods, - StripPrefix: req.StripPrefix, - RateLimit: req.RateLimit, - DialTimeout: req.DialTimeout, - ResponseHeaderTimeout: req.ResponseHeaderTimeout, - IdleConnTimeout: req.IdleConnTimeout, - TLSSkipVerify: req.TLSSkipVerify, - RequireTLS: req.RequireTLS, - AllowedCIDRs: req.AllowedCIDRs, - BlockedCIDRs: req.BlockedCIDRs, - MaxBodySize: req.MaxBodySize, - Priority: req.Priority, + Name: req.Name, + Pattern: req.PathPrefix, + Target: req.TargetURL, + Methods: req.Methods, + StripPrefix: req.StripPrefix, + RateLimit: req.RateLimit, + DialTimeout: req.DialTimeout, + ResponseHeaderTimeout: req.ResponseHeaderTimeout, + IdleConnTimeout: req.IdleConnTimeout, + TLSSkipVerify: req.TLSSkipVerify, + RequireTLS: req.RequireTLS, + AllowedCIDRs: req.AllowedCIDRs, + BlockedCIDRs: req.BlockedCIDRs, + MaxBodySize: req.MaxBodySize, + Priority: req.Priority, + CircuitBreakerThreshold: req.CircuitBreakerThreshold, + CircuitBreakerTimeout: req.CircuitBreakerTimeout, + MaxRetries: req.MaxRetries, + RetryTimeout: req.RetryTimeout, } route, err := h.svc.CreateRoute(c.Request.Context(), params) From 6a15437544a8f95ec55da95eb0e4a962fe9d9ffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 20:52:55 +0300 Subject: [PATCH 16/17] fix(gateway): validate MaxRetries and RetryTimeout are non-negative MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add binding:"gte=0" to MaxRetries, RetryTimeout, CircuitBreakerThreshold, and CircuitBreakerTimeout in CreateRouteRequest — consistent with other numeric fields in the struct. Prevents negative values that would cause undefined behavior in the retry loop and backoff calculation. --- internal/handlers/gateway_handler.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/handlers/gateway_handler.go b/internal/handlers/gateway_handler.go index 26d21125f..ed04a8776 100644 --- a/internal/handlers/gateway_handler.go +++ b/internal/handlers/gateway_handler.go @@ -36,10 +36,10 @@ type CreateRouteRequest struct { BlockedCIDRs []string `json:"blocked_cidrs"` MaxBodySize int64 `json:"max_body_size" binding:"gte=0"` Priority int `json:"priority" binding:"gte=0"` - CircuitBreakerThreshold int `json:"circuit_breaker_threshold"` - CircuitBreakerTimeout int64 `json:"circuit_breaker_timeout"` - MaxRetries int `json:"max_retries"` - RetryTimeout int64 `json:"retry_timeout"` + CircuitBreakerThreshold int `json:"circuit_breaker_threshold" binding:"gte=0"` + CircuitBreakerTimeout int64 `json:"circuit_breaker_timeout" binding:"gte=0"` + MaxRetries int `json:"max_retries" binding:"gte=0"` + RetryTimeout int64 `json:"retry_timeout" binding:"gte=0"` } // GatewayHandler handles API gateway HTTP endpoints. From 2d1074cac227cabf603795f2928b9a0d9903fa9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 21:18:44 +0300 Subject: [PATCH 17/17] docs: regenerate swagger after adding gte=0 validation to resilience fields swag picks up binding:"gte=0" from CreateRouteRequest and emits minimum: 0 in the OpenAPI schema for circuit_breaker_threshold, circuit_breaker_timeout, max_retries, and retry_timeout. --- docs/swagger/docs.go | 12 ++++++++---- docs/swagger/swagger.json | 12 ++++++++---- docs/swagger/swagger.yaml | 4 ++++ 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/docs/swagger/docs.go b/docs/swagger/docs.go index 1e6bf4295..bddbb9c6e 100644 --- a/docs/swagger/docs.go +++ b/docs/swagger/docs.go @@ -11119,10 +11119,12 @@ const docTemplate = `{ } }, "circuit_breaker_threshold": { - "type": "integer" + "type": "integer", + "minimum": 0 }, "circuit_breaker_timeout": { - "type": "integer" + "type": "integer", + "minimum": 0 }, "dial_timeout": { "type": "integer", @@ -11137,7 +11139,8 @@ const docTemplate = `{ "minimum": 0 }, "max_retries": { - "type": "integer" + "type": "integer", + "minimum": 0 }, "methods": { "type": "array", @@ -11167,7 +11170,8 @@ const docTemplate = `{ "minimum": 0 }, "retry_timeout": { - "type": "integer" + "type": "integer", + "minimum": 0 }, "strip_prefix": { "type": "boolean" diff --git a/docs/swagger/swagger.json b/docs/swagger/swagger.json index 270063fba..2db916de3 100644 --- a/docs/swagger/swagger.json +++ b/docs/swagger/swagger.json @@ -11111,10 +11111,12 @@ } }, "circuit_breaker_threshold": { - "type": "integer" + "type": "integer", + "minimum": 0 }, "circuit_breaker_timeout": { - "type": "integer" + "type": "integer", + "minimum": 0 }, "dial_timeout": { "type": "integer", @@ -11129,7 +11131,8 @@ "minimum": 0 }, "max_retries": { - "type": "integer" + "type": "integer", + "minimum": 0 }, "methods": { "type": "array", @@ -11159,7 +11162,8 @@ "minimum": 0 }, "retry_timeout": { - "type": "integer" + "type": "integer", + "minimum": 0 }, "strip_prefix": { "type": "boolean" diff --git a/docs/swagger/swagger.yaml b/docs/swagger/swagger.yaml index 9ec8c46b6..7bf45ac32 100644 --- a/docs/swagger/swagger.yaml +++ b/docs/swagger/swagger.yaml @@ -2160,8 +2160,10 @@ definitions: type: string type: array circuit_breaker_threshold: + minimum: 0 type: integer circuit_breaker_timeout: + minimum: 0 type: integer dial_timeout: minimum: 0 @@ -2173,6 +2175,7 @@ definitions: minimum: 0 type: integer max_retries: + minimum: 0 type: integer methods: items: @@ -2194,6 +2197,7 @@ definitions: minimum: 0 type: integer retry_timeout: + minimum: 0 type: integer strip_prefix: type: boolean