Skip to content

Commit 8e47e50

Browse files
committed
fix: make retry examples deterministic using seeded random
1 parent d5e3feb commit 8e47e50

File tree

7 files changed

+100
-28
lines changed

7 files changed

+100
-28
lines changed

examples/src/step/step_with_retry.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from random import random
1+
import random
22
from typing import Any
33

44
from aws_durable_execution_sdk_python.config import StepConfig
@@ -14,12 +14,18 @@
1414
)
1515

1616

17+
# Seed random at module level for deterministic behavior across retries
18+
random.seed(42)
19+
20+
1721
@durable_step
1822
def unreliable_operation(
1923
_step_context: StepContext,
2024
) -> str:
25+
# Use seeded random for deterministic behavior
26+
# With seed 42, this will fail once then succeed
2127
failure_threshold = 0.5
22-
if random() > failure_threshold: # noqa: S311
28+
if random.random() > failure_threshold: # noqa: S311
2329
msg = "Random error occurred"
2430
raise RuntimeError(msg)
2531
return "Operation succeeded"

examples/src/step/steps_with_retry.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,31 @@
11
"""Example demonstrating multiple steps with retry logic."""
22

3-
from random import random
3+
import random
44
from typing import Any
55

66
from aws_durable_execution_sdk_python.config import StepConfig
7-
from aws_durable_execution_sdk_python.context import DurableContext
7+
from aws_durable_execution_sdk_python.context import DurableContext, StepContext
88
from aws_durable_execution_sdk_python.execution import durable_execution
99
from aws_durable_execution_sdk_python.retries import (
1010
RetryStrategyConfig,
1111
create_retry_strategy,
1212
)
1313

1414

15-
def simulated_get_item(name: str) -> dict[str, Any] | None:
16-
"""Simulate getting an item that may fail randomly."""
15+
# Seed random at module level for deterministic behavior across retries
16+
random.seed(42)
17+
18+
19+
def simulated_get_item(_step_context: StepContext, name: str) -> dict[str, Any] | None:
20+
"""Simulate getting an item with deterministic seeded random behavior."""
21+
# Use seeded random for deterministic behavior
1722
# Fail 50% of the time
18-
if random() < 0.5: # noqa: S311
23+
if random.random() < 0.5: # noqa: S311
1924
msg = "Random failure"
2025
raise RuntimeError(msg)
2126

2227
# Simulate finding item after some attempts
23-
if random() > 0.3: # noqa: S311
28+
if random.random() > 0.3: # noqa: S311
2429
return {"id": name, "data": "item data"}
2530

2631
return None
@@ -49,7 +54,7 @@ def handler(event: Any, context: DurableContext) -> dict[str, Any]:
4954

5055
# Try to get the item with retry
5156
get_response = context.step(
52-
lambda _, n=name: simulated_get_item(n),
57+
lambda _, n=name: simulated_get_item(_, n),
5358
name=f"get_item_poll_{poll_count}",
5459
config=step_config,
5560
)

examples/test/conftest.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import json
55
import logging
66
import os
7+
import random
78
import sys
89
from enum import StrEnum
910
from pathlib import Path
@@ -29,6 +30,18 @@
2930
logger = logging.getLogger(__name__)
3031

3132

33+
@pytest.fixture(autouse=True)
34+
def reset_random_seed():
35+
"""Reset random seed before each test for deterministic behavior.
36+
37+
This ensures that tests using module-level random.seed(42) get consistent
38+
behavior even when run in different orders or combinations.
39+
"""
40+
random.seed(42)
41+
yield
42+
# No cleanup needed - next test will reset
43+
44+
3245
def deserialize_operation_payload(
3346
payload: OperationPayload | None, serdes: ExtendedTypeSerDes | None = None
3447
) -> Any:

examples/test/step/test_step_with_retry.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,28 @@
1414
lambda_function_name="step with retry",
1515
)
1616
def test_step_with_retry(durable_runner):
17-
"""Test step with retry configuration."""
17+
"""Test step with retry configuration.
18+
19+
With seed 42 at module level, the random state persists across retries:
20+
- Attempt 1: random() = 0.639... > 0.5 → raises RuntimeError ❌
21+
- Attempt 2: random() = 0.025... > 0.5 → False, succeeds ✓
22+
23+
The function deterministically fails once then succeeds on the second attempt.
24+
"""
1825
with durable_runner:
1926
result = durable_runner.run(input="test", timeout=30)
2027

21-
# The function uses random() so it may succeed or fail
22-
# We just verify it completes and has retry configuration
23-
assert result.status in [InvocationStatus.SUCCEEDED, InvocationStatus.FAILED]
28+
# With seeded random (seed=42) at module level, succeeds on attempt 2
29+
assert result.status is InvocationStatus.SUCCEEDED
30+
assert deserialize_operation_payload(result.result) == "Operation succeeded"
2431

25-
# Verify step operation exists
32+
# Verify step operation exists with retry details
2633
step_ops = [
2734
op for op in result.operations if op.operation_type == OperationType.STEP
2835
]
29-
assert len(step_ops) >= 1
36+
assert len(step_ops) == 1
3037

31-
# If it succeeded, verify the result
32-
if result.status is InvocationStatus.SUCCEEDED:
33-
assert deserialize_operation_payload(result.result) == "Operation succeeded"
38+
# The step should have succeeded on attempt 2 (after 1 failure)
39+
# Attempt numbering: 1 (initial attempt), 2 (first retry)
40+
step_op = step_ops[0]
41+
assert step_op.attempt == 2 # Succeeded on first retry (1-indexed: 2=first retry)

examples/test/step/test_steps_with_retry.py

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,42 @@
1414
lambda_function_name="steps with retry",
1515
)
1616
def test_steps_with_retry(durable_runner):
17-
"""Test steps_with_retry pattern."""
17+
"""Test steps_with_retry pattern.
18+
19+
With seed 42 set by conftest fixture, the random state persists:
20+
- Poll 1, Attempt 0: random() = 0.639 ≥ 0.5 (passes), random() = 0.025 ≤ 0.3 → returns None
21+
- Poll 2, Attempt 0: random() = 0.275 < 0.5 → raises RuntimeError ❌
22+
- Poll 2, Attempt 1: random() = 0.736 ≥ 0.5 (passes), random() = 0.676 > 0.3 → returns item ✓
23+
24+
The function finds the item on poll 2 after 1 retry.
25+
"""
1826
with durable_runner:
1927
result = durable_runner.run(input={"name": "test-item"}, timeout=30)
2028

2129
assert result.status is InvocationStatus.SUCCEEDED
2230

23-
# Result should be either success with item or error
24-
assert isinstance(deserialize_operation_payload(result.result), dict)
25-
assert "success" in deserialize_operation_payload(
26-
result.result
27-
) or "error" in deserialize_operation_payload(result.result)
31+
# With seeded random (seed=42) at module level, finds item on poll 2
32+
result_data = deserialize_operation_payload(result.result)
33+
assert isinstance(result_data, dict)
34+
assert result_data.get("success") is True
35+
assert result_data.get("pollsRequired") == 2
36+
assert "item" in result_data
37+
assert result_data["item"]["id"] == "test-item"
2838

29-
# Verify step operations exist (polling steps)
39+
# Verify step operations exist
3040
step_ops = [
3141
op for op in result.operations if op.operation_type == OperationType.STEP
3242
]
33-
assert len(step_ops) >= 1
43+
# Should have exactly 2 step operations (poll 1 and poll 2)
44+
assert len(step_ops) == 2
45+
46+
# Poll 1: succeeded immediately (returned None)
47+
assert step_ops[0].name == "get_item_poll_1"
48+
assert step_ops[0].result == "null"
49+
assert step_ops[0].attempt == 1 # No retries needed (1-indexed: 1=initial)
50+
51+
# Poll 2: succeeded after 1 retry (returned item)
52+
assert step_ops[1].name == "get_item_poll_2"
53+
assert (
54+
step_ops[1].attempt == 2
55+
) # Exactly 1 retry occurred (1-indexed: 2=first retry)

src/aws_durable_execution_sdk_python_testing/checkpoint/processors/base.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
ContextDetails,
1313
ExecutionDetails,
1414
Operation,
15+
OperationAction,
1516
OperationStatus,
1617
OperationType,
1718
OperationUpdate,
@@ -72,9 +73,15 @@ def _create_context_details(self, update: OperationUpdate) -> ContextDetails | N
7273
)
7374

7475
def _create_step_details(
75-
self, update: OperationUpdate, current_operation: Operation | None = None
76+
self,
77+
update: OperationUpdate,
78+
current_operation: Operation | None = None,
7679
) -> StepDetails | None:
77-
"""Create StepDetails from OperationUpdate."""
80+
"""Create StepDetails from OperationUpdate.
81+
82+
Automatically increments attempt count for RETRY, SUCCEED, and FAIL actions.
83+
"""
84+
7885
attempt: int = 0
7986
next_attempt_timestamp: datetime.datetime | None = None
8087

@@ -84,6 +91,13 @@ def _create_step_details(
8491
next_attempt_timestamp = (
8592
current_operation.step_details.next_attempt_timestamp
8693
)
94+
# Increment attempt for RETRY, SUCCEED, and FAIL actions
95+
if update.action in {
96+
OperationAction.RETRY,
97+
OperationAction.SUCCEED,
98+
OperationAction.FAIL,
99+
}:
100+
attempt += 1
87101
return StepDetails(
88102
attempt=attempt,
89103
next_attempt_timestamp=next_attempt_timestamp,

tests/checkpoint/processors/step_test.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ def test_process_succeed_action_with_current_operation():
230230

231231
current_op = Mock()
232232
current_op.start_timestamp = datetime.now(UTC)
233+
current_op.step_details = StepDetails()
233234

234235
update = OperationUpdate(
235236
operation_id="step-123",
@@ -243,6 +244,7 @@ def test_process_succeed_action_with_current_operation():
243244

244245
assert result.start_timestamp == current_op.start_timestamp
245246
assert result.status == OperationStatus.SUCCEEDED
247+
assert result.step_details.attempt == 1
246248

247249

248250
def test_process_fail_action():
@@ -274,6 +276,7 @@ def test_process_fail_action_with_current_operation():
274276

275277
current_op = Mock()
276278
current_op.start_timestamp = datetime.now(UTC)
279+
current_op.step_details = StepDetails()
277280

278281
error = ErrorObject.from_message("step failed")
279282
update = OperationUpdate(
@@ -288,6 +291,7 @@ def test_process_fail_action_with_current_operation():
288291

289292
assert result.start_timestamp == current_op.start_timestamp
290293
assert result.status == OperationStatus.FAILED
294+
assert result.step_details.attempt == 1
291295

292296

293297
def test_process_invalid_action():

0 commit comments

Comments
 (0)