From 6dcd3e90d1e85d69993dadcb2cb05bebce612192 Mon Sep 17 00:00:00 2001 From: Alessandra Romero Date: Fri, 22 Aug 2025 14:19:22 -0400 Subject: [PATCH 1/3] Add standard retries and unit tests --- .../smithy-core/src/smithy_core/aio/client.py | 6 +- .../src/smithy_core/interfaces/retries.py | 6 +- .../smithy-core/src/smithy_core/retries.py | 160 ++++++++++++- .../smithy-core/tests/unit/test_retries.py | 213 ++++++++++++++++-- 4 files changed, 362 insertions(+), 23 deletions(-) diff --git a/packages/smithy-core/src/smithy_core/aio/client.py b/packages/smithy-core/src/smithy_core/aio/client.py index bf27c440c..e446f2406 100644 --- a/packages/smithy-core/src/smithy_core/aio/client.py +++ b/packages/smithy-core/src/smithy_core/aio/client.py @@ -330,7 +330,7 @@ async def _retry[I: SerializeableShape, O: DeserializeableShape]( return await self._handle_attempt(call, request_context, request_future) retry_strategy = call.retry_strategy - retry_token = retry_strategy.acquire_initial_retry_token( + retry_token = await retry_strategy.acquire_initial_retry_token( token_scope=call.retry_scope ) @@ -349,7 +349,7 @@ async def _retry[I: SerializeableShape, O: DeserializeableShape]( if isinstance(output_context.response, Exception): try: - retry_strategy.refresh_retry_token_for_retry( + retry_token = await retry_strategy.refresh_retry_token_for_retry( token_to_renew=retry_token, error=output_context.response, ) @@ -364,7 +364,7 @@ async def _retry[I: SerializeableShape, O: DeserializeableShape]( await seek(request_context.transport_request.body, 0) else: - retry_strategy.record_success(token=retry_token) + await retry_strategy.record_success(token=retry_token) return output_context async def _handle_attempt[I: SerializeableShape, O: DeserializeableShape]( diff --git a/packages/smithy-core/src/smithy_core/interfaces/retries.py b/packages/smithy-core/src/smithy_core/interfaces/retries.py index a5c9d428b..ab7bbdeed 100644 --- a/packages/smithy-core/src/smithy_core/interfaces/retries.py +++ b/packages/smithy-core/src/smithy_core/interfaces/retries.py @@ -61,7 +61,7 @@ class RetryStrategy(Protocol): max_attempts: int """Upper limit on total attempt count (initial attempt plus retries).""" - def acquire_initial_retry_token( + async def acquire_initial_retry_token( self, *, token_scope: str | None = None ) -> RetryToken: """Called before any retries (for the first attempt at the operation). @@ -74,7 +74,7 @@ def acquire_initial_retry_token( """ ... - def refresh_retry_token_for_retry( + async def refresh_retry_token_for_retry( self, *, token_to_renew: RetryToken, error: Exception ) -> RetryToken: """Replace an existing retry token from a failed attempt with a new token. @@ -91,7 +91,7 @@ def refresh_retry_token_for_retry( """ ... - def record_success(self, *, token: RetryToken) -> None: + async def record_success(self, *, token: RetryToken) -> None: """Return token after successful completion of an operation. Upon successful completion of the operation, a user calls this function to diff --git a/packages/smithy-core/src/smithy_core/retries.py b/packages/smithy-core/src/smithy_core/retries.py index 06bf6f988..c79d6b3ac 100644 --- a/packages/smithy-core/src/smithy_core/retries.py +++ b/packages/smithy-core/src/smithy_core/retries.py @@ -1,5 +1,6 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +import asyncio import random from collections.abc import Callable from dataclasses import dataclass @@ -204,7 +205,7 @@ def __init__( self.backoff_strategy = backoff_strategy or ExponentialRetryBackoffStrategy() self.max_attempts = max_attempts - def acquire_initial_retry_token( + async def acquire_initial_retry_token( self, *, token_scope: str | None = None ) -> SimpleRetryToken: """Called before any retries (for the first attempt at the operation). @@ -214,7 +215,7 @@ def acquire_initial_retry_token( retry_delay = self.backoff_strategy.compute_next_backoff_delay(0) return SimpleRetryToken(retry_count=0, retry_delay=retry_delay) - def refresh_retry_token_for_retry( + async def refresh_retry_token_for_retry( self, *, token_to_renew: retries_interface.RetryToken, @@ -240,5 +241,158 @@ def refresh_retry_token_for_retry( else: raise RetryError(f"Error is not retryable: {error}") from error - def record_success(self, *, token: retries_interface.RetryToken) -> None: + async def record_success(self, *, token: retries_interface.RetryToken) -> None: """Not used by this retry strategy.""" + + +@dataclass(kw_only=True) +class StandardRetryToken: + retry_count: int + """Retry count is the total number of attempts minus the initial attempt.""" + + retry_delay: float + """Delay in seconds to wait before the retry attempt.""" + + quota_consumed: int = 0 + """The total amount of quota consumed.""" + + last_quota_acquired: int = 0 + """The amount of last quota acquired.""" + + +class StandardRetryStrategy(retries_interface.RetryStrategy): + def __init__(self, *, max_attempts: int = 3): + """Standard retry strategy using truncated binary exponential backoff with full + jitter. + + :param max_attempts: Upper limit on total number of attempts made, including + initial attempt and retries. + """ + self.backoff_strategy = ExponentialRetryBackoffStrategy( + backoff_scale_value=1, + jitter_type=ExponentialBackoffJitterType.FULL, + ) + self.max_attempts = max_attempts + self._retry_quota = StandardRetryQuota() + + async def acquire_initial_retry_token( + self, *, token_scope: str | None = None + ) -> StandardRetryToken: + """Called before any retries (for the first attempt at the operation). + + :param token_scope: This argument is ignored by this retry strategy. + """ + retry_delay = self.backoff_strategy.compute_next_backoff_delay(0) + return StandardRetryToken(retry_count=0, retry_delay=retry_delay) + + async def refresh_retry_token_for_retry( + self, + *, + token_to_renew: StandardRetryToken, + error: Exception, + ) -> StandardRetryToken: + """Replace an existing retry token from a failed attempt with a new token. + + This retry strategy always returns a token until the attempt count stored in + the new token exceeds the ``max_attempts`` value. + + :param token_to_renew: The token used for the previous failed attempt. + :param error: The error that triggered the need for a retry. + :raises RetryError: If no further retry attempts are allowed. + """ + if isinstance(error, retries_interface.ErrorRetryInfo) and error.is_retry_safe: + retry_count = token_to_renew.retry_count + 1 + if retry_count >= self.max_attempts: + raise RetryError( + f"Reached maximum number of allowed attempts: {self.max_attempts}" + ) from error + + # Acquire additional quota for this retry attempt + # (may raise a RetryError if none is available) + quota_acquired = await self._retry_quota.acquire(error=error) + total_quota = token_to_renew.quota_consumed + quota_acquired + + if error.retry_after is not None: + retry_delay = error.retry_after + else: + retry_delay = self.backoff_strategy.compute_next_backoff_delay( + retry_count + ) + + return StandardRetryToken( + retry_count=retry_count, + retry_delay=retry_delay, + quota_consumed=total_quota, + last_quota_acquired=quota_acquired, + ) + else: + raise RetryError(f"Error is not retryable: {error}") from error + + async def record_success(self, *, token: StandardRetryToken) -> None: + """Return token after successful completion of an operation. + + Releases retry tokens back to the retry quota based on the previous amount + consumed. + + :param token: The token used for the previous successful attempt. + """ + await self._retry_quota.release(release_amount=token.last_quota_acquired) + + +class StandardRetryQuota: + """Retry quota used by :py:class:`StandardRetryStrategy`.""" + + INITIAL_RETRY_TOKENS = 500 + RETRY_COST = 5 + NO_RETRY_INCREMENT = 1 + TIMEOUT_RETRY_COST = 10 + + def __init__(self): + self._max_capacity = self.INITIAL_RETRY_TOKENS + self._available_capacity = self.INITIAL_RETRY_TOKENS + self._lock = asyncio.Lock() + + async def acquire(self, *, error: Exception) -> int: + """Attempt to acquire a certain amount of capacity. + + If there's no sufficient amount of capacity available, raise an exception. + Otherwise, we return the amount of capacity successfully allocated. + """ + # TODO: update `is_timeout` when `is_timeout_error` is implemented + is_timeout = False + capacity_amount = self.TIMEOUT_RETRY_COST if is_timeout else self.RETRY_COST + + async with self._lock: + if capacity_amount > self._available_capacity: + raise RetryError("Retry quota exceeded") + self._available_capacity -= capacity_amount + return capacity_amount + + async def release(self, *, release_amount: int) -> None: + """Release capacity back to the retry quota. + + The capacity being released will be truncated if necessary to ensure the max + capacity is never exceeded. + """ + increment = self.NO_RETRY_INCREMENT if release_amount == 0 else release_amount + + if self._available_capacity == self._max_capacity: + return + + async with self._lock: + self._available_capacity = min( + self._available_capacity + increment, self._max_capacity + ) + + +class RetryStrategyMode(Enum): + """Enumeration of available retry strategies.""" + + SIMPLE = "simple" + STANDARD = "standard" + + +RETRY_MODE_MAP = { + RetryStrategyMode.SIMPLE: SimpleRetryStrategy, + RetryStrategyMode.STANDARD: StandardRetryStrategy, +} diff --git a/packages/smithy-core/tests/unit/test_retries.py b/packages/smithy-core/tests/unit/test_retries.py index 0b3c23be4..48b3b9286 100644 --- a/packages/smithy-core/tests/unit/test_retries.py +++ b/packages/smithy-core/tests/unit/test_retries.py @@ -4,7 +4,12 @@ import pytest from smithy_core.exceptions import CallError, RetryError from smithy_core.retries import ExponentialBackoffJitterType as EBJT -from smithy_core.retries import ExponentialRetryBackoffStrategy, SimpleRetryStrategy +from smithy_core.retries import ( + ExponentialRetryBackoffStrategy, + SimpleRetryStrategy, + StandardRetryQuota, + StandardRetryStrategy, +) @pytest.mark.parametrize( @@ -54,49 +59,229 @@ def test_exponential_backoff_strategy( assert delay_actual == pytest.approx(delay_expected) # type: ignore +@pytest.mark.asyncio @pytest.mark.parametrize("max_attempts", [2, 3, 10]) -def test_simple_retry_strategy(max_attempts: int) -> None: +async def test_simple_retry_strategy(max_attempts: int) -> None: strategy = SimpleRetryStrategy( backoff_strategy=ExponentialRetryBackoffStrategy(backoff_scale_value=5), max_attempts=max_attempts, ) error = CallError(is_retry_safe=True) - token = strategy.acquire_initial_retry_token() + token = await strategy.acquire_initial_retry_token() for _ in range(max_attempts - 1): - token = strategy.refresh_retry_token_for_retry( + token = await strategy.refresh_retry_token_for_retry( token_to_renew=token, error=error ) with pytest.raises(RetryError): - strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error) + await strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error) -def test_simple_retry_does_not_retry_unclassified() -> None: +@pytest.mark.asyncio +async def test_simple_retry_does_not_retry_unclassified() -> None: strategy = SimpleRetryStrategy( backoff_strategy=ExponentialRetryBackoffStrategy(backoff_scale_value=5), max_attempts=2, ) - token = strategy.acquire_initial_retry_token() + token = await strategy.acquire_initial_retry_token() with pytest.raises(RetryError): - strategy.refresh_retry_token_for_retry(token_to_renew=token, error=Exception()) + await strategy.refresh_retry_token_for_retry( + token_to_renew=token, error=Exception() + ) -def test_simple_retry_does_not_retry_when_safety_unknown() -> None: +@pytest.mark.asyncio +async def test_simple_retry_does_not_retry_when_safety_unknown() -> None: strategy = SimpleRetryStrategy( backoff_strategy=ExponentialRetryBackoffStrategy(backoff_scale_value=5), max_attempts=2, ) error = CallError(is_retry_safe=None) - token = strategy.acquire_initial_retry_token() + token = await strategy.acquire_initial_retry_token() with pytest.raises(RetryError): - strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error) + await strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error) -def test_simple_retry_does_not_retry_unsafe() -> None: +@pytest.mark.asyncio +async def test_simple_retry_does_not_retry_unsafe() -> None: strategy = SimpleRetryStrategy( backoff_strategy=ExponentialRetryBackoffStrategy(backoff_scale_value=5), max_attempts=2, ) error = CallError(fault="client", is_retry_safe=False) - token = strategy.acquire_initial_retry_token() + token = await strategy.acquire_initial_retry_token() + with pytest.raises(RetryError): + await strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("max_attempts", [2, 3, 10]) +async def test_standard_retry_strategy(max_attempts: int) -> None: + strategy = StandardRetryStrategy(max_attempts=max_attempts) + error = CallError(is_retry_safe=True) + token = await strategy.acquire_initial_retry_token() + for _ in range(max_attempts - 1): + token = await strategy.refresh_retry_token_for_retry( + token_to_renew=token, error=error + ) + with pytest.raises(RetryError): + await strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error) + + +@pytest.mark.asyncio +async def test_standard_retry_does_not_retry_unclassified() -> None: + strategy = StandardRetryStrategy() + token = await strategy.acquire_initial_retry_token() with pytest.raises(RetryError): - strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error) + await strategy.refresh_retry_token_for_retry( + token_to_renew=token, error=Exception() + ) + + +@pytest.mark.asyncio +async def test_standard_retry_does_not_retry_when_safety_unknown() -> None: + strategy = StandardRetryStrategy() + error = CallError(is_retry_safe=None) + token = await strategy.acquire_initial_retry_token() + with pytest.raises(RetryError): + await strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error) + + +@pytest.mark.asyncio +async def test_standard_retry_does_not_retry_unsafe() -> None: + strategy = StandardRetryStrategy() + error = CallError(fault="client", is_retry_safe=False) + token = await strategy.acquire_initial_retry_token() + with pytest.raises(RetryError): + await strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error) + + +@pytest.mark.asyncio +async def test_standard_retry_strategy_respects_max_attempts() -> None: + strategy = StandardRetryStrategy() + error = CallError(is_retry_safe=True) + token = await strategy.acquire_initial_retry_token() + token = await strategy.refresh_retry_token_for_retry( + token_to_renew=token, error=error + ) + token = await strategy.refresh_retry_token_for_retry( + token_to_renew=token, error=error + ) + with pytest.raises(RetryError): + await strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error) + + +@pytest.mark.asyncio +async def test_retry_after_overrides_backoff() -> None: + strategy = StandardRetryStrategy() + error = CallError(is_retry_safe=True, retry_after=5) + token = await strategy.acquire_initial_retry_token() + token = await strategy.refresh_retry_token_for_retry( + token_to_renew=token, error=error + ) + assert token.retry_delay == 5 + + +@pytest.mark.asyncio +async def test_retry_quota_acquire_when_exhausted(monkeypatch) -> None: + monkeypatch.setattr(StandardRetryQuota, "INITIAL_RETRY_TOKENS", 5, raising=False) + monkeypatch.setattr(StandardRetryQuota, "RETRY_COST", 2, raising=False) + + quota = StandardRetryQuota() + assert quota._available_capacity == 5 + + # First acquire: 5 -> 3 + assert await quota.acquire(error=Exception()) == 2 + assert quota._available_capacity == 3 + + # Second acquire: 3 -> 1 + assert await quota.acquire(error=Exception()) == 2 + assert quota._available_capacity == 1 + + # Third acquire needs 2 but only 1 remains -> should raise + with pytest.raises(RetryError): + await quota.acquire(error=Exception()) + assert quota._available_capacity == 1 + + +@pytest.mark.asyncio +async def test_retry_quota_release_zero_adds_increment(monkeypatch) -> None: + monkeypatch.setattr(StandardRetryQuota, "INITIAL_RETRY_TOKENS", 5, raising=False) + monkeypatch.setattr(StandardRetryQuota, "RETRY_COST", 2, raising=False) + monkeypatch.setattr(StandardRetryQuota, "NO_RETRY_INCREMENT", 1, raising=False) + + quota = StandardRetryQuota() + assert quota._available_capacity == 5 + + # First acquire: 5 -> 3 + assert await quota.acquire(error=Exception()) == 2 + assert quota._available_capacity == 3 + + # release 0 should add NO_RETRY_INCREMENT: 3 -> 4 + await quota.release(release_amount=0) + assert quota._available_capacity == 4 + + # Next acquire should still work: 4 -> 2 + assert await quota.acquire(error=Exception()) == 2 + assert quota._available_capacity == 2 + + +@pytest.mark.asyncio +async def test_retry_quota_release_caps_at_max(monkeypatch) -> None: + monkeypatch.setattr(StandardRetryQuota, "INITIAL_RETRY_TOKENS", 10, raising=False) + monkeypatch.setattr(StandardRetryQuota, "RETRY_COST", 3, raising=False) + + quota = StandardRetryQuota() + assert quota._available_capacity == 10 + + # Drain some capacity: 10 -> 7 -> 4 + assert await quota.acquire(error=Exception()) == 3 + assert quota._available_capacity == 7 + assert await quota.acquire(error=Exception()) == 3 + assert quota._available_capacity == 4 + + # Release more than needed: 4 + 8 = 12. Should cap at max = 10 + await quota.release(release_amount=8) + assert quota._available_capacity == 10 + + # Another acquire should succeed from max: 10 -> 7 + assert await quota.acquire(error=Exception()) == 3 + assert quota._available_capacity == 7 + + +@pytest.mark.asyncio +async def test_retry_quota_releases_last_acquired_amount(monkeypatch) -> None: + monkeypatch.setattr(StandardRetryQuota, "INITIAL_RETRY_TOKENS", 10, raising=False) + monkeypatch.setattr(StandardRetryQuota, "RETRY_COST", 5, raising=False) + + strategy = StandardRetryStrategy() + err = CallError(is_retry_safe=True) + token = await strategy.acquire_initial_retry_token() + + # Two retries: 10 -> 5 -> 0 + token = await strategy.refresh_retry_token_for_retry( + token_to_renew=token, error=err + ) + assert strategy._retry_quota._available_capacity == 5 + token = await strategy.refresh_retry_token_for_retry( + token_to_renew=token, error=err + ) + assert strategy._retry_quota._available_capacity == 0 + + # Success returns ONLY the last acquired amount -> 5 + await strategy.record_success(token=token) + assert strategy._retry_quota._available_capacity == 5 + + +@pytest.mark.asyncio +async def test_retry_quota_release_when_no_retry(monkeypatch) -> None: + monkeypatch.setattr(StandardRetryQuota, "INITIAL_RETRY_TOKENS", 10, raising=False) + quota = StandardRetryQuota() + + await quota.acquire(error=Exception()) + assert quota._available_capacity == 5 + before = quota._available_capacity + + await quota.release(release_amount=0) + # Should increment by NO_RETRY_INCREMENT = 1 + assert quota._available_capacity == min(before + 1, quota._max_capacity) + assert quota._available_capacity == 6 From bd3e28a65f1334772321d8fb91baad4730bc5c43 Mon Sep 17 00:00:00 2001 From: Alessandra Romero Date: Fri, 22 Aug 2025 15:20:04 -0400 Subject: [PATCH 2/3] Update codegen to use StandardRetryStrategy as the default mode --- .../smithy/python/codegen/generators/ConfigGenerator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codegen/core/src/main/java/software/amazon/smithy/python/codegen/generators/ConfigGenerator.java b/codegen/core/src/main/java/software/amazon/smithy/python/codegen/generators/ConfigGenerator.java index 4aae8893f..b5fa537ea 100644 --- a/codegen/core/src/main/java/software/amazon/smithy/python/codegen/generators/ConfigGenerator.java +++ b/codegen/core/src/main/java/software/amazon/smithy/python/codegen/generators/ConfigGenerator.java @@ -62,8 +62,8 @@ public final class ConfigGenerator implements Runnable { .nullable(false) .initialize(writer -> { writer.addDependency(SmithyPythonDependency.SMITHY_CORE); - writer.addImport("smithy_core.retries", "SimpleRetryStrategy"); - writer.write("self.retry_strategy = retry_strategy or SimpleRetryStrategy()"); + writer.addImport("smithy_core.retries", "StandardRetryStrategy"); + writer.write("self.retry_strategy = retry_strategy or StandardRetryStrategy()"); }) .build(), ConfigProperty.builder() From 56af01bc1047161484c0dab6240a3e0fdc98de9f Mon Sep 17 00:00:00 2001 From: Alessandra Romero Date: Fri, 22 Aug 2025 15:45:58 -0400 Subject: [PATCH 3/3] Update changelog --- packages/smithy-aws-core/CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/smithy-aws-core/CHANGES.md b/packages/smithy-aws-core/CHANGES.md index bad47df65..6cff38194 100644 --- a/packages/smithy-aws-core/CHANGES.md +++ b/packages/smithy-aws-core/CHANGES.md @@ -12,6 +12,7 @@ ### Features * Added a hand-written implmentation for the `restJson1` protocol. +* Added a new retry mode `standard` and made it the default retry strategy. ## v0.0.3