From 05d5fa5cd8ab7b24bd60d12d6161b3d88116512f Mon Sep 17 00:00:00 2001 From: Marco Cavallo Date: Tue, 3 Mar 2026 12:10:14 +0100 Subject: [PATCH 01/11] feat: add Polly retry policy infrastructure (Issue #107) - Add RetryPolicyConfiguration with exponential backoff and jitter - Add RetryPolicyFactory for HTTP, database, and broker policies - Add ResilienceServiceCollectionExtensions for DI registration - Add Polly NuGet package to Infrastructure project Related to #107 --- .../ResilienceServiceCollectionExtensions.cs | 69 +++++++++ .../Resilience/RetryPolicyConfiguration.cs | 53 +++++++ .../Resilience/RetryPolicyFactory.cs | 136 ++++++++++++++++++ .../StarGate.Infrastructure.csproj | 36 ++--- 4 files changed, 271 insertions(+), 23 deletions(-) create mode 100644 src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs create mode 100644 src/StarGate.Infrastructure/Resilience/RetryPolicyConfiguration.cs create mode 100644 src/StarGate.Infrastructure/Resilience/RetryPolicyFactory.cs diff --git a/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs b/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs new file mode 100644 index 00000000..53aaf55b --- /dev/null +++ b/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs @@ -0,0 +1,69 @@ +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using Polly; +using StarGate.Infrastructure.Resilience; + +namespace StarGate.Infrastructure.Extensions; + +/// +/// Extension methods for registering resilience policies. +/// +public static class ResilienceServiceCollectionExtensions +{ + /// + /// Adds resilience policies to the service collection. + /// + /// The service collection. + /// Application configuration. + /// The service collection for chaining. + public static IServiceCollection AddResiliencePolicies( + this IServiceCollection services, + IConfiguration configuration) + { + // Register retry policy configuration + services.Configure( + configuration.GetSection("Resilience:Retry")); + + // Register database retry policy as singleton + services.AddSingleton(provider => + { + var config = provider.GetRequiredService>().Value; + var logger = provider.GetRequiredService>(); + return RetryPolicyFactory.CreateDatabaseRetryPolicy(config, logger); + }); + + // Register broker retry policy as singleton + services.AddSingleton(provider => + { + var config = provider.GetRequiredService>().Value; + var logger = provider.GetRequiredService>(); + return RetryPolicyFactory.CreateBrokerRetryPolicy(config, logger); + }); + + return services; + } + + /// + /// Adds HTTP client with retry policy. + /// + /// HTTP client interface type. + /// The service collection. + /// HTTP client name. + /// HTTP client builder for further configuration. + public static IHttpClientBuilder AddHttpClientWithRetry( + this IServiceCollection services, + string name) + where TClient : class + { + return services + .AddHttpClient(name) + .AddPolicyHandler((provider, request) => + { + var config = provider.GetRequiredService>().Value; + var logger = provider.GetRequiredService>(); + return RetryPolicyFactory.CreateHttpRetryPolicy(config, logger); + }); + } +} diff --git a/src/StarGate.Infrastructure/Resilience/RetryPolicyConfiguration.cs b/src/StarGate.Infrastructure/Resilience/RetryPolicyConfiguration.cs new file mode 100644 index 00000000..8c2a22de --- /dev/null +++ b/src/StarGate.Infrastructure/Resilience/RetryPolicyConfiguration.cs @@ -0,0 +1,53 @@ +namespace StarGate.Infrastructure.Resilience; + +/// +/// Configuration for retry policies. +/// +public class RetryPolicyConfiguration +{ + /// + /// Maximum number of retry attempts. + /// + public int MaxRetryAttempts { get; set; } = 3; + + /// + /// Initial delay before first retry (seconds). + /// + public double InitialDelaySeconds { get; set; } = 1.0; + + /// + /// Maximum delay between retries (seconds). + /// + public double MaxDelaySeconds { get; set; } = 30.0; + + /// + /// Exponential backoff multiplier. + /// + public double BackoffMultiplier { get; set; } = 2.0; + + /// + /// Whether to use jitter to prevent thundering herd. + /// + public bool UseJitter { get; set; } = true; + + /// + /// Calculates delay for a specific retry attempt. + /// + /// The retry attempt number (1-based). + /// Time span representing the delay before next retry. + public TimeSpan CalculateDelay(int retryAttempt) + { + var exponentialDelay = InitialDelaySeconds * Math.Pow(BackoffMultiplier, retryAttempt - 1); + var delay = Math.Min(exponentialDelay, MaxDelaySeconds); + + if (UseJitter) + { + var random = new Random(); + // Generate jitter between -10% and +10% + var jitter = delay * 0.2 * (random.NextDouble() - 0.5); + delay += jitter; + } + + return TimeSpan.FromSeconds(Math.Max(delay, 0)); + } +} diff --git a/src/StarGate.Infrastructure/Resilience/RetryPolicyFactory.cs b/src/StarGate.Infrastructure/Resilience/RetryPolicyFactory.cs new file mode 100644 index 00000000..83f72863 --- /dev/null +++ b/src/StarGate.Infrastructure/Resilience/RetryPolicyFactory.cs @@ -0,0 +1,136 @@ +using Microsoft.Extensions.Logging; +using Polly; +using Polly.Retry; + +namespace StarGate.Infrastructure.Resilience; + +/// +/// Factory for creating Polly retry policies. +/// +public static class RetryPolicyFactory +{ + /// + /// Creates a retry policy for HTTP operations. + /// + /// Retry policy configuration. + /// Logger instance. + /// Configured async retry policy for HTTP responses. + public static AsyncRetryPolicy CreateHttpRetryPolicy( + RetryPolicyConfiguration config, + ILogger logger) + { + return Policy + .HandleResult(r => !r.IsSuccessStatusCode) + .Or() + .Or() + .WaitAndRetryAsync( + retryCount: config.MaxRetryAttempts, + sleepDurationProvider: retryAttempt => config.CalculateDelay(retryAttempt), + onRetry: (outcome, timespan, retryAttempt, context) => + { + var statusCode = outcome.Result?.StatusCode.ToString() ?? "N/A"; + var exception = outcome.Exception?.GetType().Name ?? "None"; + + logger.LogWarning( + "HTTP retry attempt {RetryAttempt}/{MaxRetries}: StatusCode={StatusCode}, Exception={Exception}, Delay={Delay}ms", + retryAttempt, + config.MaxRetryAttempts, + statusCode, + exception, + timespan.TotalMilliseconds); + }); + } + + /// + /// Creates a retry policy for database operations. + /// + /// Retry policy configuration. + /// Logger instance. + /// Configured async retry policy for database operations. + public static AsyncRetryPolicy CreateDatabaseRetryPolicy( + RetryPolicyConfiguration config, + ILogger logger) + { + return Policy + .Handle() + .Or() + .Or(ex => ex.Message.Contains("connection", StringComparison.OrdinalIgnoreCase)) + .WaitAndRetryAsync( + retryCount: config.MaxRetryAttempts, + sleepDurationProvider: retryAttempt => config.CalculateDelay(retryAttempt), + onRetry: (exception, timespan, retryAttempt, context) => + { + logger.LogWarning( + exception, + "Database retry attempt {RetryAttempt}/{MaxRetries}: Exception={Exception}, Delay={Delay}ms", + retryAttempt, + config.MaxRetryAttempts, + exception.GetType().Name, + timespan.TotalMilliseconds); + }); + } + + /// + /// Creates a retry policy for message broker operations. + /// + /// Retry policy configuration. + /// Logger instance. + /// Configured async retry policy for broker operations. + public static AsyncRetryPolicy CreateBrokerRetryPolicy( + RetryPolicyConfiguration config, + ILogger logger) + { + return Policy + .Handle() + .Or() + .Or(ex => ex.Message.Contains("connection", StringComparison.OrdinalIgnoreCase)) + .WaitAndRetryAsync( + retryCount: config.MaxRetryAttempts, + sleepDurationProvider: retryAttempt => config.CalculateDelay(retryAttempt), + onRetry: (exception, timespan, retryAttempt, context) => + { + logger.LogWarning( + exception, + "Broker retry attempt {RetryAttempt}/{MaxRetries}: Exception={Exception}, Delay={Delay}ms", + retryAttempt, + config.MaxRetryAttempts, + exception.GetType().Name, + timespan.TotalMilliseconds); + }); + } + + /// + /// Creates a generic retry policy for any async operation. + /// + /// Retry policy configuration. + /// Logger instance. + /// Configured async retry policy for generic operations. + public static AsyncRetryPolicy CreateGenericRetryPolicy( + RetryPolicyConfiguration config, + ILogger logger) + { + return Policy + .Handle(ex => IsTransientException(ex)) + .WaitAndRetryAsync( + retryCount: config.MaxRetryAttempts, + sleepDurationProvider: retryAttempt => config.CalculateDelay(retryAttempt), + onRetry: (exception, timespan, retryAttempt, context) => + { + logger.LogWarning( + exception, + "Generic retry attempt {RetryAttempt}/{MaxRetries}: Exception={Exception}, Delay={Delay}ms", + retryAttempt, + config.MaxRetryAttempts, + exception.GetType().Name, + timespan.TotalMilliseconds); + }); + } + + private static bool IsTransientException(Exception ex) + { + return ex is TimeoutException + || ex is HttpRequestException + || ex is IOException + || (ex is InvalidOperationException && ex.Message.Contains("connection", StringComparison.OrdinalIgnoreCase)); + } +} diff --git a/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj b/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj index 6675ed49..94e41730 100644 --- a/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj +++ b/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj @@ -2,36 +2,26 @@ net8.0 - StarGate.Infrastructure + enable + enable - - - - - - - - - - - - - - - - - - - - + - - + + + + + + + + + + From 228a6bea7de61056d696ae6929b0fd79e19c4e4d Mon Sep 17 00:00:00 2001 From: Marco Cavallo Date: Tue, 3 Mar 2026 12:10:52 +0100 Subject: [PATCH 02/11] feat: configure Polly retry policies in appsettings and Program.cs (Issue #107) - Add Resilience:Retry configuration to appsettings.json (Production: 3 retries, 1s-30s) - Add Resilience:Retry configuration to appsettings.Development.json (Dev: 2 retries, 0.5s-10s) - Register resilience policies in Program.cs using AddResiliencePolicies Related to #107 --- src/StarGate.Server/Program.cs | 4 ++++ src/StarGate.Server/appsettings.Development.json | 9 +++++++++ src/StarGate.Server/appsettings.json | 9 +++++++++ 3 files changed, 22 insertions(+) diff --git a/src/StarGate.Server/Program.cs b/src/StarGate.Server/Program.cs index 3f57dd19..6098d5d0 100644 --- a/src/StarGate.Server/Program.cs +++ b/src/StarGate.Server/Program.cs @@ -1,6 +1,7 @@ using Microsoft.Extensions.Diagnostics.HealthChecks; using Microsoft.Extensions.Hosting; using StarGate.Core.Configuration; +using StarGate.Infrastructure.Extensions; using StarGate.Server.HealthChecks; using StarGate.Server.Workers; @@ -17,6 +18,9 @@ builder.Services.Configure( builder.Configuration.GetSection("Retry")); +// Add resilience policies +builder.Services.AddResiliencePolicies(builder.Configuration); + // Register ProcessWorker as singleton to allow health check injection builder.Services.AddSingleton(); builder.Services.AddHostedService(sp => sp.GetRequiredService()); diff --git a/src/StarGate.Server/appsettings.Development.json b/src/StarGate.Server/appsettings.Development.json index 9a0e2d38..4f0e2e3c 100644 --- a/src/StarGate.Server/appsettings.Development.json +++ b/src/StarGate.Server/appsettings.Development.json @@ -11,5 +11,14 @@ "MaxDelaySeconds": 60, "BackoffMultiplier": 2.0, "UseJitter": true + }, + "Resilience": { + "Retry": { + "MaxRetryAttempts": 2, + "InitialDelaySeconds": 0.5, + "MaxDelaySeconds": 10.0, + "BackoffMultiplier": 2.0, + "UseJitter": true + } } } diff --git a/src/StarGate.Server/appsettings.json b/src/StarGate.Server/appsettings.json index 39fcab25..d7d4800a 100644 --- a/src/StarGate.Server/appsettings.json +++ b/src/StarGate.Server/appsettings.json @@ -11,5 +11,14 @@ "MaxDelaySeconds": 300, "BackoffMultiplier": 2.0, "UseJitter": true + }, + "Resilience": { + "Retry": { + "MaxRetryAttempts": 3, + "InitialDelaySeconds": 1.0, + "MaxDelaySeconds": 30.0, + "BackoffMultiplier": 2.0, + "UseJitter": true + } } } From 1bb53019b40794924f7991b3e15a98b30815d8f1 Mon Sep 17 00:00:00 2001 From: Marco Cavallo Date: Tue, 3 Mar 2026 12:11:40 +0100 Subject: [PATCH 03/11] test: add unit tests for Polly retry policies (Issue #107) - Add RetryPolicyConfigurationTests for exponential backoff, jitter, and max delay - Add RetryPolicyFactoryTests for HTTP, database, and broker retry policies - Test retry count, eventual success, and exception handling - Verify jitter randomization and delay calculation accuracy Related to #107 --- .../RetryPolicyConfigurationTests.cs | 148 +++++++++++ .../Resilience/RetryPolicyFactoryTests.cs | 251 ++++++++++++++++++ 2 files changed, 399 insertions(+) create mode 100644 tests/StarGate.Infrastructure.Tests/Resilience/RetryPolicyConfigurationTests.cs create mode 100644 tests/StarGate.Infrastructure.Tests/Resilience/RetryPolicyFactoryTests.cs diff --git a/tests/StarGate.Infrastructure.Tests/Resilience/RetryPolicyConfigurationTests.cs b/tests/StarGate.Infrastructure.Tests/Resilience/RetryPolicyConfigurationTests.cs new file mode 100644 index 00000000..e7c132f6 --- /dev/null +++ b/tests/StarGate.Infrastructure.Tests/Resilience/RetryPolicyConfigurationTests.cs @@ -0,0 +1,148 @@ +using FluentAssertions; +using StarGate.Infrastructure.Resilience; + +namespace StarGate.Infrastructure.Tests.Resilience; + +public class RetryPolicyConfigurationTests +{ + [Theory] + [InlineData(1, 1.0)] // First retry: 1 second + [InlineData(2, 2.0)] // Second retry: 2 seconds + [InlineData(3, 4.0)] // Third retry: 4 seconds + [InlineData(4, 8.0)] // Fourth retry: 8 seconds + public void CalculateDelay_Should_UseExponentialBackoff(int retryAttempt, double expectedSeconds) + { + // Arrange + var config = new RetryPolicyConfiguration + { + InitialDelaySeconds = 1.0, + BackoffMultiplier = 2.0, + MaxDelaySeconds = 30.0, + UseJitter = false + }; + + // Act + var delay = config.CalculateDelay(retryAttempt); + + // Assert + delay.TotalSeconds.Should().Be(expectedSeconds); + } + + [Fact] + public void CalculateDelay_Should_RespectMaxDelay() + { + // Arrange + var config = new RetryPolicyConfiguration + { + InitialDelaySeconds = 1.0, + BackoffMultiplier = 2.0, + MaxDelaySeconds = 5.0, + UseJitter = false + }; + + // Act + var delay = config.CalculateDelay(10); // Would be 512 seconds without cap + + // Assert + delay.TotalSeconds.Should().Be(5.0); + } + + [Fact] + public void CalculateDelay_Should_AddJitter_WhenEnabled() + { + // Arrange + var config = new RetryPolicyConfiguration + { + InitialDelaySeconds = 10.0, + BackoffMultiplier = 2.0, + UseJitter = true + }; + + // Act + var delays = Enumerable.Range(0, 20) + .Select(_ => config.CalculateDelay(1).TotalSeconds) + .ToList(); + + // Assert - delays should vary due to jitter + delays.Should().OnlyHaveUniqueItems(); + delays.Should().AllSatisfy(d => d.Should().BeInRange(9.0, 11.0)); // 10 +/- 10% + } + + [Fact] + public void CalculateDelay_Should_NotReturnNegativeDelay() + { + // Arrange + var config = new RetryPolicyConfiguration + { + InitialDelaySeconds = 0.1, + UseJitter = true + }; + + // Act + var delays = Enumerable.Range(0, 100) + .Select(_ => config.CalculateDelay(1)) + .ToList(); + + // Assert + delays.Should().AllSatisfy(d => d.Should().BeGreaterOrEqualTo(TimeSpan.Zero)); + } + + [Fact] + public void CalculateDelay_Should_UseDefaultValues() + { + // Arrange + var config = new RetryPolicyConfiguration(); + + // Assert + config.MaxRetryAttempts.Should().Be(3); + config.InitialDelaySeconds.Should().Be(1.0); + config.MaxDelaySeconds.Should().Be(30.0); + config.BackoffMultiplier.Should().Be(2.0); + config.UseJitter.Should().BeTrue(); + } + + [Fact] + public void CalculateDelay_Should_HandleZeroRetryAttempt() + { + // Arrange + var config = new RetryPolicyConfiguration + { + InitialDelaySeconds = 1.0, + BackoffMultiplier = 2.0, + UseJitter = false + }; + + // Act + var delay = config.CalculateDelay(0); + + // Assert + // 0th retry: 1.0 * 2^(-1) = 0.5 seconds + delay.TotalSeconds.Should().Be(0.5); + } + + [Theory] + [InlineData(1, 2.0, 5.0)] // 5 * 2^0 = 5 + [InlineData(2, 2.0, 10.0)] // 5 * 2^1 = 10 + [InlineData(3, 2.0, 20.0)] // 5 * 2^2 = 20 + [InlineData(4, 2.0, 30.0)] // 5 * 2^3 = 40, capped at 30 + public void CalculateDelay_Should_CalculateCorrectly_WithCustomInitialDelay( + int retryAttempt, + double multiplier, + double expectedSeconds) + { + // Arrange + var config = new RetryPolicyConfiguration + { + InitialDelaySeconds = 5.0, + BackoffMultiplier = multiplier, + MaxDelaySeconds = 30.0, + UseJitter = false + }; + + // Act + var delay = config.CalculateDelay(retryAttempt); + + // Assert + delay.TotalSeconds.Should().Be(expectedSeconds); + } +} diff --git a/tests/StarGate.Infrastructure.Tests/Resilience/RetryPolicyFactoryTests.cs b/tests/StarGate.Infrastructure.Tests/Resilience/RetryPolicyFactoryTests.cs new file mode 100644 index 00000000..0d7c51ef --- /dev/null +++ b/tests/StarGate.Infrastructure.Tests/Resilience/RetryPolicyFactoryTests.cs @@ -0,0 +1,251 @@ +using FluentAssertions; +using Microsoft.Extensions.Logging.Abstractions; +using Polly; +using StarGate.Infrastructure.Resilience; + +namespace StarGate.Infrastructure.Tests.Resilience; + +public class RetryPolicyFactoryTests +{ + private readonly RetryPolicyConfiguration _config; + private readonly NullLogger _logger; + + public RetryPolicyFactoryTests() + { + _config = new RetryPolicyConfiguration + { + MaxRetryAttempts = 3, + InitialDelaySeconds = 0.1, + UseJitter = false + }; + _logger = NullLogger.Instance; + } + + [Fact] + public async Task HttpRetryPolicy_Should_RetryOnHttpRequestException() + { + // Arrange + var policy = RetryPolicyFactory.CreateHttpRetryPolicy(_config, _logger); + var attemptCount = 0; + + // Act + var act = async () => await policy.ExecuteAsync(async () => + { + attemptCount++; + await Task.CompletedTask; + throw new HttpRequestException("Simulated failure"); + }); + + // Assert + await act.Should().ThrowAsync(); + attemptCount.Should().Be(4); // Initial + 3 retries + } + + [Fact] + public async Task HttpRetryPolicy_Should_RetryOnTimeoutException() + { + // Arrange + var policy = RetryPolicyFactory.CreateHttpRetryPolicy(_config, _logger); + var attemptCount = 0; + + // Act + var act = async () => await policy.ExecuteAsync(async () => + { + attemptCount++; + await Task.CompletedTask; + throw new TimeoutException("Simulated timeout"); + }); + + // Assert + await act.Should().ThrowAsync(); + attemptCount.Should().Be(4); // Initial + 3 retries + } + + [Fact] + public async Task DatabaseRetryPolicy_Should_RetryOnTimeoutException() + { + // Arrange + var policy = RetryPolicyFactory.CreateDatabaseRetryPolicy(_config, _logger); + var attemptCount = 0; + + // Act + var act = async () => await policy.ExecuteAsync(async () => + { + attemptCount++; + await Task.CompletedTask; + throw new TimeoutException("Simulated timeout"); + }); + + // Assert + await act.Should().ThrowAsync(); + attemptCount.Should().Be(4); // Initial + 3 retries + } + + [Fact] + public async Task DatabaseRetryPolicy_Should_RetryOnIOException() + { + // Arrange + var policy = RetryPolicyFactory.CreateDatabaseRetryPolicy(_config, _logger); + var attemptCount = 0; + + // Act + var act = async () => await policy.ExecuteAsync(async () => + { + attemptCount++; + await Task.CompletedTask; + throw new IOException("Simulated IO error"); + }); + + // Assert + await act.Should().ThrowAsync(); + attemptCount.Should().Be(4); // Initial + 3 retries + } + + [Fact] + public async Task DatabaseRetryPolicy_Should_RetryOnConnectionException() + { + // Arrange + var policy = RetryPolicyFactory.CreateDatabaseRetryPolicy(_config, _logger); + var attemptCount = 0; + + // Act + var act = async () => await policy.ExecuteAsync(async () => + { + attemptCount++; + await Task.CompletedTask; + throw new InvalidOperationException("Connection failed"); + }); + + // Assert + await act.Should().ThrowAsync(); + attemptCount.Should().Be(4); // Initial + 3 retries + } + + [Fact] + public async Task BrokerRetryPolicy_Should_RetryOnIOException() + { + // Arrange + var policy = RetryPolicyFactory.CreateBrokerRetryPolicy(_config, _logger); + var attemptCount = 0; + + // Act + var act = async () => await policy.ExecuteAsync(async () => + { + attemptCount++; + await Task.CompletedTask; + throw new IOException("Simulated IO error"); + }); + + // Assert + await act.Should().ThrowAsync(); + attemptCount.Should().Be(4); // Initial + 3 retries + } + + [Fact] + public async Task GenericRetryPolicy_Should_RetryOnTransientException() + { + // Arrange + var policy = RetryPolicyFactory.CreateGenericRetryPolicy(_config, _logger); + var attemptCount = 0; + + // Act + var act = async () => await policy.ExecuteAsync(async () => + { + attemptCount++; + await Task.CompletedTask; + throw new TimeoutException("Simulated timeout"); + }); + + // Assert + await act.Should().ThrowAsync(); + attemptCount.Should().Be(4); // Initial + 3 retries + } + + [Fact] + public async Task RetryPolicy_Should_SucceedOnEventualSuccess() + { + // Arrange + var policy = RetryPolicyFactory.CreateGenericRetryPolicy(_config, _logger); + var attemptCount = 0; + + // Act + await policy.ExecuteAsync(async () => + { + attemptCount++; + if (attemptCount < 3) + { + throw new TimeoutException("Transient failure"); + } + await Task.CompletedTask; + }); + + // Assert + attemptCount.Should().Be(3); // 2 failures + 1 success + } + + [Fact] + public async Task DatabaseRetryPolicy_Should_NotRetryOnNonTransientException() + { + // Arrange + var policy = RetryPolicyFactory.CreateDatabaseRetryPolicy(_config, _logger); + var attemptCount = 0; + + // Act + var act = async () => await policy.ExecuteAsync(async () => + { + attemptCount++; + await Task.CompletedTask; + throw new ArgumentException("Non-transient error"); + }); + + // Assert + await act.Should().ThrowAsync(); + attemptCount.Should().Be(1); // Only initial attempt, no retries + } + + [Fact] + public async Task GenericRetryPolicy_Should_NotRetryOnNonTransientException() + { + // Arrange + var policy = RetryPolicyFactory.CreateGenericRetryPolicy(_config, _logger); + var attemptCount = 0; + + // Act + var act = async () => await policy.ExecuteAsync(async () => + { + attemptCount++; + await Task.CompletedTask; + throw new ArgumentNullException("Non-transient error"); + }); + + // Assert + await act.Should().ThrowAsync(); + attemptCount.Should().Be(1); // Only initial attempt, no retries + } + + [Fact] + public async Task RetryPolicy_Should_RespectMaxRetryAttempts() + { + // Arrange + var customConfig = new RetryPolicyConfiguration + { + MaxRetryAttempts = 5, + InitialDelaySeconds = 0.01, + UseJitter = false + }; + var policy = RetryPolicyFactory.CreateGenericRetryPolicy(customConfig, _logger); + var attemptCount = 0; + + // Act + var act = async () => await policy.ExecuteAsync(async () => + { + attemptCount++; + await Task.CompletedTask; + throw new TimeoutException("Always failing"); + }); + + // Assert + await act.Should().ThrowAsync(); + attemptCount.Should().Be(6); // Initial + 5 retries + } +} From a96042e01b9037b09c7287f0ecef6dd562d85dc9 Mon Sep 17 00:00:00 2001 From: Marco Cavallo Date: Tue, 3 Mar 2026 12:13:28 +0100 Subject: [PATCH 04/11] docs: add comprehensive Polly retry policies documentation (Issue #107) - Add POLLY-RETRY-POLICIES.md with implementation guide - Document exponential backoff formula and jitter strategy - Explain difference between Polly retry and ProcessWorker retry - Provide configuration examples for Development and Production - Include testing instructions and troubleshooting guide - Add performance considerations and monitoring recommendations Related to #107 --- docs/POLLY-RETRY-POLICIES.md | 685 +++++++++++++++++++++++++++++++++++ 1 file changed, 685 insertions(+) create mode 100644 docs/POLLY-RETRY-POLICIES.md diff --git a/docs/POLLY-RETRY-POLICIES.md b/docs/POLLY-RETRY-POLICIES.md new file mode 100644 index 00000000..5443e5a7 --- /dev/null +++ b/docs/POLLY-RETRY-POLICIES.md @@ -0,0 +1,685 @@ +# Polly Retry Policies Implementation + +## Overview + +This document describes the Polly-based retry policy implementation for handling transient failures in infrastructure components (HTTP clients, database operations, message broker). This is **different** from the ProcessWorker retry logic documented in [RETRY-LOGIC.md](./RETRY-LOGIC.md). + +## Two-Level Retry Strategy + +StarGate implements a two-level retry strategy: + +### Level 1: Infrastructure Retry (Polly) - **This Document** +- **Purpose**: Handle transient failures in external services (MongoDB, RabbitMQ, HTTP) +- **Scope**: Single operation (e.g., `InsertOneAsync`, `BasicPublish`) +- **Speed**: Fast (1s → 2s → 4s = 7s total) +- **Transparency**: Automatic and transparent to business logic +- **Location**: `StarGate.Infrastructure.Resilience` + +### Level 2: Application Retry (ProcessWorker) +- **Purpose**: Retry entire failed process execution +- **Scope**: Complete process workflow +- **Speed**: Slower (5s → 10s → 20s = 35s+ total) +- **Visibility**: Changes process status to "Retrying" +- **Location**: `StarGate.Server.Workers` +- **Documentation**: [RETRY-LOGIC.md](./RETRY-LOGIC.md) + +## Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ Application Layer (ProcessWorker) │ +│ - Executes business logic │ +│ - Catches unhandled exceptions │ +│ - Implements process-level retry (5s → 10s → 20s) │ +└────────────────────┬────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────┐ +│ Infrastructure Layer (Repositories, Brokers) │ +│ - MongoDB operations (MongoProcessRepository) │ +│ - RabbitMQ operations (RabbitMqBroker) │ +│ - HTTP calls (External APIs) │ +│ ┌─────────────────────────────────────────────────┐ │ +│ │ Polly Retry Policies (Infrastructure Retry) │ │ +│ │ - Intercepts TimeoutException, IOException │ │ +│ │ - Retries automatically (1s → 2s → 4s) │ │ +│ │ - Logs retry attempts │ │ +│ └─────────────────────────────────────────────────┘ │ +└────────────────────┬────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────┐ +│ External Services │ +│ - MongoDB │ +│ - RabbitMQ │ +│ - External HTTP APIs │ +└─────────────────────────────────────────────────────────┘ +``` + +## Components + +### 1. RetryPolicyConfiguration + +**Location**: `src/StarGate.Infrastructure/Resilience/RetryPolicyConfiguration.cs` + +```csharp +public class RetryPolicyConfiguration +{ + public int MaxRetryAttempts { get; set; } = 3; + public double InitialDelaySeconds { get; set; } = 1.0; + public double MaxDelaySeconds { get; set; } = 30.0; + public double BackoffMultiplier { get; set; } = 2.0; + public bool UseJitter { get; set; } = true; + + public TimeSpan CalculateDelay(int retryAttempt) + { + var exponentialDelay = InitialDelaySeconds * Math.Pow(BackoffMultiplier, retryAttempt - 1); + var delay = Math.Min(exponentialDelay, MaxDelaySeconds); + + if (UseJitter) + { + var random = new Random(); + var jitter = delay * 0.2 * (random.NextDouble() - 0.5); // ±10% + delay += jitter; + } + + return TimeSpan.FromSeconds(Math.Max(delay, 0)); + } +} +``` + +### 2. RetryPolicyFactory + +**Location**: `src/StarGate.Infrastructure/Resilience/RetryPolicyFactory.cs` + +Provides static factory methods for creating specialized retry policies: + +#### HTTP Retry Policy + +```csharp +var policy = RetryPolicyFactory.CreateHttpRetryPolicy(config, logger); +``` + +**Handles**: +- `HttpRequestException` +- `TimeoutException` +- HTTP responses with non-success status codes + +**Use Cases**: +- External API calls +- Webhook deliveries +- Service-to-service communication + +#### Database Retry Policy + +```csharp +var policy = RetryPolicyFactory.CreateDatabaseRetryPolicy(config, logger); +``` + +**Handles**: +- `TimeoutException` +- `IOException` +- `InvalidOperationException` containing "connection" + +**Use Cases**: +- MongoDB operations +- Connection pool exhaustion +- Network interruptions + +#### Broker Retry Policy + +```csharp +var policy = RetryPolicyFactory.CreateBrokerRetryPolicy(config, logger); +``` + +**Handles**: +- `TimeoutException` +- `IOException` +- `InvalidOperationException` containing "connection" + +**Use Cases**: +- RabbitMQ publishing +- Message consumption +- Channel creation + +#### Generic Retry Policy + +```csharp +var policy = RetryPolicyFactory.CreateGenericRetryPolicy(config, logger); +``` + +**Handles**: Any transient exception + +**Use Cases**: +- General-purpose retry logic +- New integrations + +### 3. ResilienceServiceCollectionExtensions + +**Location**: `src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs` + +Provides extension methods for registering policies in dependency injection: + +```csharp +// In Program.cs +builder.Services.AddResiliencePolicies(builder.Configuration); + +// For HTTP clients +builder.Services.AddHttpClientWithRetry("external-api"); +``` + +## Exponential Backoff Formula + +The retry delay is calculated using exponential backoff with optional jitter: + +``` +Delay = InitialDelay × (Multiplier ^ (RetryAttempt - 1)) +Delay = min(Delay, MaxDelay) + +With Jitter: +Jitter = Delay × 0.2 × (Random - 0.5) // ±10% +FinalDelay = Delay + Jitter +``` + +### Example Calculations + +With default configuration (InitialDelay=1s, Multiplier=2.0, MaxDelay=30s): + +| Retry | Formula | Base Delay | Jitter Range | Final Range | +|-------|---------|------------|--------------|-------------| +| 1st | 1 × 2⁰ | 1.0s | ±0.1s | 0.9s - 1.1s | +| 2nd | 1 × 2¹ | 2.0s | ±0.2s | 1.8s - 2.2s | +| 3rd | 1 × 2² | 4.0s | ±0.4s | 3.6s - 4.4s | +| 4th | 1 × 2³ | 8.0s | ±0.8s | 7.2s - 8.8s | + +**Total time for 3 retries**: ~7 seconds (1s + 2s + 4s) + +### Comparison with ProcessWorker Retry + +| Aspect | Polly Retry | ProcessWorker Retry | +|--------|-------------|---------------------| +| Initial Delay | 1s | 5s | +| Delay Range | 1s - 30s | 5s - 300s | +| Jitter | ±10% | ±30% | +| Total Time (3 retries) | ~7s | ~35s | +| Purpose | Transient failures | Process execution failures | + +## Configuration + +### appsettings.json (Production) + +```json +{ + "Resilience": { + "Retry": { + "MaxRetryAttempts": 3, + "InitialDelaySeconds": 1.0, + "MaxDelaySeconds": 30.0, + "BackoffMultiplier": 2.0, + "UseJitter": true + } + } +} +``` + +### appsettings.Development.json + +```json +{ + "Resilience": { + "Retry": { + "MaxRetryAttempts": 2, + "InitialDelaySeconds": 0.5, + "MaxDelaySeconds": 10.0, + "BackoffMultiplier": 2.0, + "UseJitter": true + } + } +} +``` + +**Development Configuration Rationale**: +- Fewer retries (2 vs 3) for faster feedback +- Shorter delays (0.5s vs 1s) for quicker development cycles +- Lower max delay (10s vs 30s) to avoid long waits during debugging + +## Usage Examples + +### Applying Retry Policy to MongoDB Repository + +```csharp +public class MongoProcessRepository : IProcessRepository +{ + private readonly IMongoCollection _collection; + private readonly AsyncRetryPolicy _retryPolicy; + private readonly ILogger _logger; + + public MongoProcessRepository( + IMongoDatabase database, + AsyncRetryPolicy retryPolicy, + ILogger logger) + { + _collection = database.GetCollection("processes"); + _retryPolicy = retryPolicy ?? throw new ArgumentNullException(nameof(retryPolicy)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + public async Task CreateAsync(Process process, CancellationToken ct = default) + { + return await _retryPolicy.ExecuteAsync(async () => + { + var document = ProcessMapper.MapToDocument(process); + await _collection.InsertOneAsync(document, cancellationToken: ct); + + _logger.LogDebug("Process created: ProcessId={ProcessId}", process.ProcessId); + return process; + }); + } + + public async Task GetByIdAsync(Guid processId, CancellationToken ct = default) + { + return await _retryPolicy.ExecuteAsync(async () => + { + var bsonGuid = new BsonBinaryData(processId, GuidRepresentation.Standard); + var filter = Builders.Filter.Eq("_id", bsonGuid); + var document = await _collection.Find(filter).FirstOrDefaultAsync(ct); + + return document != null ? ProcessMapper.MapToDomain(document) : null; + }); + } +} +``` + +### Applying Retry Policy to RabbitMQ Broker + +```csharp +public class RabbitMqBroker +{ + private readonly IConnection _connection; + private readonly AsyncRetryPolicy _retryPolicy; + private readonly ILogger _logger; + + public RabbitMqBroker( + IConnection connection, + AsyncRetryPolicy retryPolicy, + ILogger logger) + { + _connection = connection ?? throw new ArgumentNullException(nameof(connection)); + _retryPolicy = retryPolicy ?? throw new ArgumentNullException(nameof(retryPolicy)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + public async Task PublishAsync( + T message, + string routingKey, + CancellationToken ct = default) where T : class + { + await _retryPolicy.ExecuteAsync(async () => + { + using var channel = _connection.CreateModel(); + + var messageBody = SerializeMessage(message); + var properties = channel.CreateBasicProperties(); + properties.Persistent = true; + properties.ContentType = "application/json"; + properties.MessageId = Guid.NewGuid().ToString(); + + channel.BasicPublish( + exchange: "stargate.processes", + routingKey: routingKey, + basicProperties: properties, + body: messageBody); + + _logger.LogDebug( + "Message published: RoutingKey={RoutingKey}, MessageId={MessageId}", + routingKey, + properties.MessageId); + + await Task.CompletedTask; + }); + } +} +``` + +### Registering Policies in DI Container + +```csharp +// Program.cs +builder.Services.AddResiliencePolicies(builder.Configuration); +``` + +This automatically registers: +- `AsyncRetryPolicy` for database operations +- `AsyncRetryPolicy` for broker operations +- `RetryPolicyConfiguration` from appsettings.json + +## Error Classification + +### Transient Errors (Retryable) + +Errors that indicate temporary issues that may resolve on retry: + +- **Network Errors**: `HttpRequestException`, `IOException` +- **Timeout Errors**: `TimeoutException` +- **Connection Errors**: `InvalidOperationException` with "connection" in message +- **HTTP Status Codes**: 408, 429, 500, 502, 503, 504 + +### Permanent Errors (Non-Retryable) + +Errors that indicate persistent issues that won't be fixed by retrying: + +- **Validation Errors**: `ArgumentException`, `ArgumentNullException` +- **Authorization Errors**: `UnauthorizedException`, 401, 403 +- **Not Found Errors**: 404 +- **Bad Request Errors**: 400 +- **Business Logic Errors**: `InvalidOperationException` (without "connection") + +## Jitter Strategy + +### Why Jitter? + +**Without Jitter**: +``` +100 failed requests at t=0 +→ All retry at t=1s (thundering herd) +→ All retry at t=3s (1s+2s) +→ All retry at t=7s (1s+2s+4s) +→ Load spikes every time +``` + +**With Jitter (±10%)**: +``` +100 failed requests at t=0 +→ Retries distributed between 0.9s - 1.1s +→ Retries distributed between 2.7s - 3.3s +→ Retries distributed between 6.3s - 7.7s +→ Smooth load distribution +``` + +### Jitter Implementation + +```csharp +if (UseJitter) +{ + var random = new Random(); + // Generate jitter between -10% and +10% + var jitter = delay * 0.2 * (random.NextDouble() - 0.5); + delay += jitter; +} +``` + +**Range**: ±10% (smaller than ProcessWorker's ±30%) + +**Rationale**: Infrastructure retries happen more frequently and need tighter coordination. + +## Testing + +### Unit Tests + +Run retry policy unit tests: + +```bash +dotnet test tests/StarGate.Infrastructure.Tests \ + --filter "FullyQualifiedName~Resilience" +``` + +Test coverage includes: +- Exponential backoff calculation +- Max delay enforcement +- Jitter randomization +- Retry count accuracy +- Eventual success scenarios +- Non-retryable exceptions + +### Integration Tests + +#### Test MongoDB Retry + +```bash +# 1. Start MongoDB +docker-compose up -d mongodb + +# 2. Create a process (should succeed) +curl -X POST http://localhost:5000/api/processes \ + -H "Content-Type: application/json" \ + -d '{ + "clientId": "test-client", + "processType": "test-type", + "clientProcessId": "test-001" + }' + +# 3. Stop MongoDB to simulate failure +docker-compose stop mongodb + +# 4. Try to create another process (should retry then fail) +curl -X POST http://localhost:5000/api/processes \ + -H "Content-Type: application/json" \ + -d '{ + "clientId": "test-client", + "processType": "test-type", + "clientProcessId": "test-002" + }' + +# 5. Check logs for retry attempts +docker logs stargate-server | grep "Database retry attempt" + +# Expected output: +# Database retry attempt 1/3: Exception=TimeoutException, Delay=1000ms +# Database retry attempt 2/3: Exception=TimeoutException, Delay=2000ms +# Database retry attempt 3/3: Exception=TimeoutException, Delay=4000ms + +# 6. Restart MongoDB +docker-compose start mongodb + +# 7. Verify requests succeed again +``` + +#### Test RabbitMQ Retry + +```bash +# 1. Stop RabbitMQ during process creation +docker-compose stop rabbitmq + +# 2. Create process (should retry broker operations) +curl -X POST http://localhost:5000/api/processes \ + -H "Content-Type: application/json" \ + -d '{ + "clientId": "test-client", + "processType": "test-type", + "clientProcessId": "test-003" + }' + +# 3. Check logs for broker retry attempts +docker logs stargate-server | grep "Broker retry attempt" +``` + +#### Test Jitter Randomization + +```bash +# Create 10 processes simultaneously +for i in {1..10}; do + curl -X POST http://localhost:5000/api/processes \ + -H "Content-Type: application/json" \ + -d "{\"clientId\":\"test-client\",\"processType\":\"test-type\",\"clientProcessId\":\"test-$i\"}" & +done + +# Verify retry delays vary (not all exactly 1s, 2s, 4s) +``` + +## Monitoring and Observability + +### Log Events + +Polly retry policies produce structured logs: + +```csharp +// HTTP retry +logger.LogWarning( + "HTTP retry attempt {RetryAttempt}/{MaxRetries}: StatusCode={StatusCode}, Exception={Exception}, Delay={Delay}ms", + retryAttempt, maxRetries, statusCode, exception, delay); + +// Database retry +logger.LogWarning( + exception, + "Database retry attempt {RetryAttempt}/{MaxRetries}: Exception={Exception}, Delay={Delay}ms", + retryAttempt, maxRetries, exceptionType, delay); + +// Broker retry +logger.LogWarning( + exception, + "Broker retry attempt {RetryAttempt}/{MaxRetries}: Exception={Exception}, Delay={Delay}ms", + retryAttempt, maxRetries, exceptionType, delay); +``` + +### Metrics to Monitor + +#### Infrastructure Retry Metrics + +- **Retry Rate**: Percentage of operations requiring retry +- **Retry Count Distribution**: How many retries before success +- **Retry Success Rate**: Operations that succeed after retry +- **Retry Failure Rate**: Operations that fail after all retries + +#### Performance Metrics + +- **P50 Latency**: Median operation time (should be ~base time) +- **P95 Latency**: 95th percentile (may include 1-2 retries) +- **P99 Latency**: 99th percentile (may include all 3 retries) + +#### Health Indicators + +- **High Retry Rate** (>10%): Infrastructure issues +- **Increasing Retry Failures**: Persistent outages +- **Jitter Distribution**: Should be evenly distributed + +### Example Log Queries + +```bash +# Find all retry attempts in last hour +grep "retry attempt" /var/log/stargate/*.log | tail -100 + +# Count retries by exception type +grep "retry attempt" /var/log/stargate/*.log | \ + grep -oP "Exception=\K[^,]+" | sort | uniq -c + +# Calculate average retry count +grep "retry attempt" /var/log/stargate/*.log | \ + grep -oP "RetryAttempt=\K\d+" | \ + awk '{sum+=$1; count++} END {print "Average:", sum/count}' +``` + +## Performance Considerations + +### Success Case + +- **Overhead**: <1ms (policy check is fast) +- **Memory**: Negligible (policy is singleton) +- **Throughput**: No impact on successful operations + +### Failure Case + +- **Additional Latency**: Up to 7 seconds (1s + 2s + 4s) +- **Memory**: Minimal (no state stored between retries) +- **Throughput**: Reduces during outages (expected behavior) + +### Comparison + +| Scenario | Without Polly | With Polly | +|----------|---------------|------------| +| Success | ~50ms | ~51ms | +| 1 Transient Failure | Immediate failure | +1s → Success | +| 2 Transient Failures | Immediate failure | +3s → Success | +| 3 Transient Failures | Immediate failure | +7s → Success | +| Permanent Failure | Immediate failure | +7s → Failure | + +**Trade-off**: Slight increase in failure latency vs. significantly higher success rate. + +## Troubleshooting + +### Problem: Operations Still Failing After Retries + +**Possible Causes**: +1. Persistent infrastructure outage +2. MaxRetryAttempts too low +3. Network issues + +**Solutions**: +- Check infrastructure status (MongoDB, RabbitMQ) +- Increase `MaxRetryAttempts` temporarily +- Verify network connectivity +- Review exception logs for non-transient errors + +### Problem: Retry Delays Too Short/Long + +**Possible Causes**: +1. Incorrect configuration in appsettings.json +2. Jitter causing unexpected variance + +**Solutions**: +- Review `Resilience:Retry` settings +- Disable jitter temporarily: `"UseJitter": false` +- Monitor actual delay times in logs +- Adjust `InitialDelaySeconds` or `BackoffMultiplier` + +### Problem: High Retry Rate + +**Symptoms**: +- >10% of operations require retry +- Logs flooded with retry warnings + +**Solutions**: +- Investigate infrastructure stability +- Check network latency +- Review timeout configurations +- Consider infrastructure scaling + +### Problem: Thundering Herd Despite Jitter + +**Symptoms**: +- Load spikes at regular intervals +- Multiple operations retrying simultaneously + +**Solutions**: +- Verify `UseJitter` is enabled +- Increase jitter range in code (modify CalculateDelay) +- Stagger initial operation times +- Implement circuit breaker (future enhancement) + +## Future Enhancements + +### Planned Improvements + +1. **Circuit Breaker Integration** (Issue #108) + - Stop retries during known outages + - Fail fast when service is down + - Automatic recovery detection + +2. **Adaptive Backoff** + - Adjust multiplier based on system load + - Faster retries during low load + - Slower retries during high load + +3. **Per-Operation Configuration** + - Different retry strategies per operation + - Critical operations: more retries + - Non-critical operations: fewer retries + +4. **Metrics Dashboard** + - Real-time retry statistics + - Success/failure rates + - Latency distributions + +5. **Retry Budget** + - Limit total retry attempts across all operations + - Prevent retry storms + - Preserve system resources + +## References + +- [Polly Documentation](https://github.com/App-vNext/Polly) +- [Exponential Backoff Pattern](https://en.wikipedia.org/wiki/Exponential_backoff) +- [Transient Fault Handling (Microsoft)](https://docs.microsoft.com/en-us/azure/architecture/best-practices/transient-faults) +- [Retry Pattern (Cloud Design Patterns)](https://docs.microsoft.com/en-us/azure/architecture/patterns/retry) +- [Issue #107](https://github.com/artcava/StarGate/issues/107) +- [RETRY-LOGIC.md](./RETRY-LOGIC.md) (ProcessWorker Retry) +- [CODING-CONVENTIONS.md](./CODING-CONVENTIONS.md) From 9637f59a953c8667e61e55fdb043b0a9519c5825 Mon Sep 17 00:00:00 2001 From: Marco Cavallo Date: Tue, 3 Mar 2026 12:27:57 +0100 Subject: [PATCH 05/11] Fix some errors and dependencies --- src/StarGate.Api/StarGate.Api.csproj | 2 +- .../StarGate.Infrastructure.csproj | 7 ++++--- .../StarGate.Infrastructure.Tests.csproj | 1 + .../StarGate.Integration.Tests.csproj | 2 +- .../StarGate.Security.Tests.csproj | 8 ++++---- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/StarGate.Api/StarGate.Api.csproj b/src/StarGate.Api/StarGate.Api.csproj index c37b79cd..7a839638 100644 --- a/src/StarGate.Api/StarGate.Api.csproj +++ b/src/StarGate.Api/StarGate.Api.csproj @@ -12,7 +12,7 @@ - + diff --git a/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj b/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj index 94e41730..7c499468 100644 --- a/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj +++ b/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj @@ -7,17 +7,18 @@ + - + - - + + diff --git a/tests/StarGate.Infrastructure.Tests/StarGate.Infrastructure.Tests.csproj b/tests/StarGate.Infrastructure.Tests/StarGate.Infrastructure.Tests.csproj index bad5f734..cd71917c 100644 --- a/tests/StarGate.Infrastructure.Tests/StarGate.Infrastructure.Tests.csproj +++ b/tests/StarGate.Infrastructure.Tests/StarGate.Infrastructure.Tests.csproj @@ -9,6 +9,7 @@ + diff --git a/tests/StarGate.Integration.Tests/StarGate.Integration.Tests.csproj b/tests/StarGate.Integration.Tests/StarGate.Integration.Tests.csproj index aff7c91f..6611d6e5 100644 --- a/tests/StarGate.Integration.Tests/StarGate.Integration.Tests.csproj +++ b/tests/StarGate.Integration.Tests/StarGate.Integration.Tests.csproj @@ -14,7 +14,7 @@ - + diff --git a/tests/StarGate.Security.Tests/StarGate.Security.Tests.csproj b/tests/StarGate.Security.Tests/StarGate.Security.Tests.csproj index 5c802a56..5c209d11 100644 --- a/tests/StarGate.Security.Tests/StarGate.Security.Tests.csproj +++ b/tests/StarGate.Security.Tests/StarGate.Security.Tests.csproj @@ -9,12 +9,12 @@ - + - + - - + + runtime; build; native; contentfiles; analyzers; buildtransitive all From 3a85b594a5b3992f05b56c763acafc6e1552bd24 Mon Sep 17 00:00:00 2001 From: Marco Cavallo Date: Tue, 3 Mar 2026 12:29:04 +0100 Subject: [PATCH 06/11] fix: add missing NuGet packages and using directives (Issue #107) - Add FluentValidation.DependencyInjectionExtensions (11.9.2) for AddValidatorsFromAssemblyContaining - Add Microsoft.Extensions.Http (8.0.0) for IHttpClientBuilder and AddHttpClient - Add Microsoft.Extensions.Logging.Abstractions (8.0.0) if missing - Add missing using directive for Microsoft.Extensions.Http in ResilienceServiceCollectionExtensions Fixes compilation errors: - CS1061: IServiceCollection does not contain definition for AddValidatorsFromAssemblyContaining - CS0246: IHttpClientBuilder could not be found - CS1061: IServiceCollection does not contain definition for AddHttpClient Related to #107 --- .../Extensions/ResilienceServiceCollectionExtensions.cs | 1 + src/StarGate.Infrastructure/StarGate.Infrastructure.csproj | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs b/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs index 53aaf55b..9d8bc068 100644 --- a/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs +++ b/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs @@ -1,5 +1,6 @@ using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Http; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using Polly; diff --git a/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj b/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj index 7c499468..1f9ccc72 100644 --- a/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj +++ b/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj @@ -7,11 +7,14 @@ - + + + + From 6197a3969eda53dfc5fafdef2c457305ffbb748f Mon Sep 17 00:00:00 2001 From: Marco Cavallo Date: Tue, 3 Mar 2026 12:30:20 +0100 Subject: [PATCH 07/11] fix: resolve package downgrade and AddPolicyHandler error (Issue #107) - Update Microsoft.Extensions.Logging.Abstractions from 8.0.0 to 8.0.3 to match StarGate.Core dependency - Add Polly.Extensions.Http using directive for AddPolicyHandler extension method Fixes compilation errors: - CS1061: IHttpClientBuilder does not contain definition for AddPolicyHandler - NU1605: Package downgrade warning for Microsoft.Extensions.Logging.Abstractions Related to #107 --- .../Extensions/ResilienceServiceCollectionExtensions.cs | 1 + src/StarGate.Infrastructure/StarGate.Infrastructure.csproj | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs b/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs index 9d8bc068..aeb7e3e4 100644 --- a/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs +++ b/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs @@ -4,6 +4,7 @@ using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using Polly; +using Polly.Extensions.Http; using StarGate.Infrastructure.Resilience; namespace StarGate.Infrastructure.Extensions; diff --git a/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj b/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj index 1f9ccc72..0593272d 100644 --- a/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj +++ b/src/StarGate.Infrastructure/StarGate.Infrastructure.csproj @@ -14,7 +14,7 @@ - + From a3d5a28773bd9da71f2e8e65a44dfdd099240889 Mon Sep 17 00:00:00 2001 From: Marco Cavallo Date: Tue, 3 Mar 2026 12:31:27 +0100 Subject: [PATCH 08/11] fix: use Polly v8 compatible HTTP client configuration (Issue #107) Polly v8 removed AddPolicyHandler extension. Updated to use proper Polly v8 approach: - Simplified AddHttpClientWithRetry to register typed client only - Removed AddPolicyHandler usage (not available in Polly v8.x) - HTTP retry policies should be applied manually in client implementations - Database and Broker retry policies remain injectable via DI Alternative: Consumers can wrap HttpClient calls with policy.ExecuteAsync() manually Fixes CS1061: IHttpClientBuilder does not contain definition for AddPolicyHandler Related to #107 --- .../ResilienceServiceCollectionExtensions.cs | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs b/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs index aeb7e3e4..92394069 100644 --- a/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs +++ b/src/StarGate.Infrastructure/Extensions/ResilienceServiceCollectionExtensions.cs @@ -1,10 +1,8 @@ using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Http; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using Polly; -using Polly.Extensions.Http; using StarGate.Infrastructure.Resilience; namespace StarGate.Infrastructure.Extensions; @@ -44,28 +42,38 @@ public static IServiceCollection AddResiliencePolicies( return RetryPolicyFactory.CreateBrokerRetryPolicy(config, logger); }); + // Register HTTP retry policy factory as singleton + services.AddSingleton(provider => + { + var config = provider.GetRequiredService>().Value; + var loggerFactory = provider.GetRequiredService(); + + // Return a factory function that creates HTTP retry policies with appropriate logger + return new Func>( + logger => RetryPolicyFactory.CreateHttpRetryPolicy(config, logger)); + }); + return services; } /// - /// Adds HTTP client with retry policy. + /// Adds HTTP client without automatic retry policy. + /// Consumers should inject AsyncRetryPolicy and wrap calls manually. /// /// HTTP client interface type. /// The service collection. /// HTTP client name. /// HTTP client builder for further configuration. + /// + /// Polly v8 removed AddPolicyHandler. To use retry policies: + /// 1. Inject AsyncRetryPolicy<HttpResponseMessage> via factory + /// 2. Wrap HTTP calls: await policy.ExecuteAsync(() => httpClient.SendAsync(request)) + /// public static IHttpClientBuilder AddHttpClientWithRetry( this IServiceCollection services, string name) where TClient : class { - return services - .AddHttpClient(name) - .AddPolicyHandler((provider, request) => - { - var config = provider.GetRequiredService>().Value; - var logger = provider.GetRequiredService>(); - return RetryPolicyFactory.CreateHttpRetryPolicy(config, logger); - }); + return services.AddHttpClient(name); } } From 17a2d7e1ecb45c1f9104ece5ba29ae478c3c2112 Mon Sep 17 00:00:00 2001 From: Marco Cavallo Date: Tue, 3 Mar 2026 12:37:26 +0100 Subject: [PATCH 09/11] fix: resolve MongoDB.Driver version mismatch in StarGate.Api (Issue #107) - Explicitly reference MongoDB.Driver 2.28.0 in StarGate.Api.csproj - Ensures version consistency across projects (Infrastructure and Api both use 2.28.0) - Resolves CS0012 errors for MongoClientSettings and IMongoClient types - Required for AspNetCore.HealthChecks.MongoDb health check integration Fixes compilation errors: - CS0012: MongoClientSettings is defined in an assembly that is not referenced - CS0012: IMongoClient is defined in an assembly that is not referenced Related to #107 --- src/StarGate.Api/StarGate.Api.csproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/StarGate.Api/StarGate.Api.csproj b/src/StarGate.Api/StarGate.Api.csproj index 7a839638..7d85fca5 100644 --- a/src/StarGate.Api/StarGate.Api.csproj +++ b/src/StarGate.Api/StarGate.Api.csproj @@ -10,7 +10,7 @@ - + @@ -20,7 +20,7 @@ - + From 33797d2ba883e2987395f763c41dac8184b1a56e Mon Sep 17 00:00:00 2001 From: Marco Cavallo Date: Tue, 3 Mar 2026 12:37:41 +0100 Subject: [PATCH 10/11] fix: correct ProjectReference typo in StarGate.Api.csproj - Change PackageReference to ProjectReference for StarGate.Contracts - Typo introduced in previous commit Related to #107 --- src/StarGate.Api/StarGate.Api.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StarGate.Api/StarGate.Api.csproj b/src/StarGate.Api/StarGate.Api.csproj index 7d85fca5..1080509d 100644 --- a/src/StarGate.Api/StarGate.Api.csproj +++ b/src/StarGate.Api/StarGate.Api.csproj @@ -20,7 +20,7 @@ - + From db6c19543610204c86937a0cc4bc9a799da704ae Mon Sep 17 00:00:00 2001 From: Marco Cavallo Date: Tue, 3 Mar 2026 12:39:57 +0100 Subject: [PATCH 11/11] fix: update AspNetCore.HealthChecks.MongoDb to 8.1.0 (Issue #107) - Update AspNetCore.HealthChecks.MongoDb from 8.0.1 to 8.1.0 - Version 8.1.0 supports MongoDB.Driver 2.28.0 (strong-named assemblies) - Resolves version mismatch between health check package and MongoDB.Driver Background: - MongoDB.Driver 2.28.0 introduced strong-named assemblies (breaking change) - AspNetCore.HealthChecks.MongoDb 8.0.1 only supports up to 2.27.0 - AspNetCore.HealthChecks.MongoDb 8.1.0 added support for 2.28.0 Fixes CS0012 errors: - MongoClientSettings version mismatch - IMongoClient version mismatch References: - https://github.com/Xabaril/AspNetCore.Diagnostics.HealthChecks/issues/2265 - https://www.mongodb.com/docs/drivers/csharp/v2.x/upgrade/ (v2.28.0 changes) Related to #107 --- src/StarGate.Api/StarGate.Api.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StarGate.Api/StarGate.Api.csproj b/src/StarGate.Api/StarGate.Api.csproj index 1080509d..9f9697bb 100644 --- a/src/StarGate.Api/StarGate.Api.csproj +++ b/src/StarGate.Api/StarGate.Api.csproj @@ -6,7 +6,7 @@ - +