From c62e405e9f432500dfda9c35423209efc87f3ffc Mon Sep 17 00:00:00 2001 From: SoroTask Keeper Date: Tue, 24 Mar 2026 07:49:48 +0100 Subject: [PATCH] feat(keeper): implement retry logic with exponential backoff for failed submissions Implements a fault-tolerant retry mechanism for transaction submissions that handles transient network failures, RPC timeouts, and fee-related rejections without causing duplicate execution or infinite loops. Features: - Generic withRetry() higher-order async wrapper with configurable options - Exponential backoff with jitter: baseDelay * 2^attempt + random(0, baseDelay) - Error classification into retryable, non-retryable, and duplicate categories - DUPLICATE_TRANSACTION responses treated as success (no retry) - Comprehensive error code support for Soroban RPC responses - Retry count tracking and inclusion in execution logs - MAX_RETRIES_EXCEEDED warning event emission - Environment variable configuration (MAX_RETRIES, RETRY_BASE_DELAY_MS, MAX_RETRY_DELAY_MS) Error Classifications: - Retryable: TIMEOUT, NETWORK_ERROR, RATE_LIMITED, SERVER_ERROR, etc. - Non-retryable: INVALID_ARGS, INSUFFICIENT_GAS, CONTRACT_PANIC, TX_BAD_AUTH, etc. - Duplicate: DUPLICATE_TRANSACTION, TX_ALREADY_IN_LEDGER Testing: - Unit tests covering success on 2nd attempt - Non-retryable error bail scenarios - Max retries exceeded handling - Duplicate transaction detection - Exponential backoff with jitter verification - Network error detection Closes #37 --- keeper/.env.example | 5 + keeper/__tests__/retry.test.js | 385 +++++++++++++++++++++++++++++++++ keeper/src/config.js | 4 + keeper/src/executor.js | 66 +++++- keeper/src/retry.js | 295 ++++++++++++++++++++++++- 5 files changed, 741 insertions(+), 14 deletions(-) create mode 100644 keeper/__tests__/retry.test.js diff --git a/keeper/.env.example b/keeper/.env.example index 253a7b6..59098cb 100644 --- a/keeper/.env.example +++ b/keeper/.env.example @@ -19,3 +19,8 @@ START_LEDGER=0 # Execution Configuration WAIT_FOR_CONFIRMATION=true + +# Retry Configuration +MAX_RETRIES=3 +RETRY_BASE_DELAY_MS=1000 +MAX_RETRY_DELAY_MS=30000 diff --git a/keeper/__tests__/retry.test.js b/keeper/__tests__/retry.test.js new file mode 100644 index 0000000..8e13730 --- /dev/null +++ b/keeper/__tests__/retry.test.js @@ -0,0 +1,385 @@ +/** + * Unit tests for retry.js - Retry logic with exponential backoff + */ + +const { + withRetry, + retry, + isRetryableError, + isDuplicateTransactionError, + ErrorClassification +} = require('../src/retry.js'); + +describe('withRetry', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + describe('success cases', () => { + it('should succeed on first attempt', async () => { + const fn = jest.fn().mockResolvedValue('success'); + + const result = await withRetry(fn, { maxRetries: 3, baseDelayMs: 10 }); + + expect(result.success).toBe(true); + expect(result.result).toBe('success'); + expect(result.attempts).toBe(1); + expect(result.retries).toBe(0); + expect(fn).toHaveBeenCalledTimes(1); + }); + + it('should succeed on second attempt after retryable error', async () => { + const fn = jest.fn() + .mockRejectedValueOnce(new Error('NETWORK_ERROR')) + .mockResolvedValueOnce('success'); + + const result = await withRetry(fn, { maxRetries: 3, baseDelayMs: 10 }); + + expect(result.success).toBe(true); + expect(result.result).toBe('success'); + expect(result.attempts).toBe(2); + expect(result.retries).toBe(1); + expect(fn).toHaveBeenCalledTimes(2); + }); + + it('should succeed on third attempt', async () => { + const fn = jest.fn() + .mockRejectedValueOnce(new Error('TIMEOUT')) + .mockRejectedValueOnce(new Error('RATE_LIMITED')) + .mockResolvedValueOnce('success'); + + const result = await withRetry(fn, { maxRetries: 3, baseDelayMs: 10 }); + + expect(result.success).toBe(true); + expect(result.attempts).toBe(3); + expect(result.retries).toBe(2); + expect(fn).toHaveBeenCalledTimes(3); + }); + }); + + describe('duplicate transaction handling', () => { + it('should treat DUPLICATE_TRANSACTION as success', async () => { + const fn = jest.fn().mockRejectedValue(new Error('DUPLICATE_TRANSACTION')); + + const result = await withRetry(fn, { maxRetries: 3, baseDelayMs: 10 }); + + expect(result.success).toBe(true); + expect(result.duplicate).toBe(true); + expect(result.attempts).toBe(1); + expect(fn).toHaveBeenCalledTimes(1); + }); + + it('should treat TX_ALREADY_IN_LEDGER as success', async () => { + const fn = jest.fn().mockRejectedValue({ + code: 'TX_ALREADY_IN_LEDGER', + message: 'Transaction already in ledger' + }); + + const result = await withRetry(fn, { maxRetries: 3, baseDelayMs: 10 }); + + expect(result.success).toBe(true); + expect(result.duplicate).toBe(true); + expect(fn).toHaveBeenCalledTimes(1); + }); + + it('should call onDuplicate callback when duplicate detected', async () => { + const onDuplicate = jest.fn(); + const fn = jest.fn().mockRejectedValue(new Error('DUPLICATE_TRANSACTION')); + + await withRetry(fn, { + maxRetries: 3, + baseDelayMs: 10, + onDuplicate + }); + + expect(onDuplicate).toHaveBeenCalledTimes(1); + }); + }); + + describe('non-retryable errors', () => { + it('should bail immediately on non-retryable error', async () => { + const fn = jest.fn().mockRejectedValue(new Error('INVALID_ARGS')); + + await expect(withRetry(fn, { maxRetries: 3, baseDelayMs: 10 })) + .rejects.toMatchObject({ + success: false, + classification: ErrorClassification.NON_RETRYABLE, + attempts: 1, + retries: 0 + }); + + expect(fn).toHaveBeenCalledTimes(1); + }); + + it('should bail on CONTRACT_PANIC', async () => { + const fn = jest.fn().mockRejectedValue({ + code: 'CONTRACT_PANIC', + message: 'Contract panicked' + }); + + await expect(withRetry(fn, { maxRetries: 3, baseDelayMs: 10 })) + .rejects.toMatchObject({ + success: false, + classification: ErrorClassification.NON_RETRYABLE + }); + + expect(fn).toHaveBeenCalledTimes(1); + }); + + it('should bail on INSUFFICIENT_GAS', async () => { + const fn = jest.fn().mockRejectedValue(new Error('INSUFFICIENT_GAS')); + + await expect(withRetry(fn, { maxRetries: 3, baseDelayMs: 10 })) + .rejects.toMatchObject({ + success: false, + classification: ErrorClassification.NON_RETRYABLE + }); + + expect(fn).toHaveBeenCalledTimes(1); + }); + + it('should bail on TX_BAD_AUTH', async () => { + const fn = jest.fn().mockRejectedValue(new Error('TX_BAD_AUTH')); + + await expect(withRetry(fn, { maxRetries: 3, baseDelayMs: 10 })) + .rejects.toMatchObject({ + success: false, + classification: ErrorClassification.NON_RETRYABLE + }); + + expect(fn).toHaveBeenCalledTimes(1); + }); + }); + + describe('max retries exceeded', () => { + it('should throw MAX_RETRIES_EXCEEDED after exhausting retries', async () => { + const fn = jest.fn().mockRejectedValue(new Error('NETWORK_ERROR')); + + await expect(withRetry(fn, { maxRetries: 2, baseDelayMs: 10 })) + .rejects.toMatchObject({ + success: false, + maxRetriesExceeded: true, + attempts: 3, + retries: 2 + }); + + expect(fn).toHaveBeenCalledTimes(3); + }); + + it('should call onMaxRetries callback when max retries exceeded', async () => { + const onMaxRetries = jest.fn(); + const error = new Error('TIMEOUT'); + const fn = jest.fn().mockRejectedValue(error); + + await expect(withRetry(fn, { + maxRetries: 2, + baseDelayMs: 10, + onMaxRetries + })).rejects.toBeDefined(); + + expect(onMaxRetries).toHaveBeenCalledTimes(1); + expect(onMaxRetries).toHaveBeenCalledWith(error, 3); + }); + }); + + describe('retry callbacks', () => { + it('should call onRetry callback on each retry', async () => { + const onRetry = jest.fn(); + const fn = jest.fn() + .mockRejectedValueOnce(new Error('TIMEOUT')) + .mockRejectedValueOnce(new Error('RATE_LIMITED')) + .mockResolvedValueOnce('success'); + + await withRetry(fn, { + maxRetries: 3, + baseDelayMs: 10, + onRetry + }); + + expect(onRetry).toHaveBeenCalledTimes(2); + expect(onRetry).toHaveBeenNthCalledWith(1, expect.any(Error), 1, expect.any(Number)); + expect(onRetry).toHaveBeenNthCalledWith(2, expect.any(Error), 2, expect.any(Number)); + }); + }); + + describe('exponential backoff', () => { + it('should increase delay exponentially', async () => { + const delays = []; + const onRetry = jest.fn((_, attempt, delay) => { + delays.push({ attempt, delay }); + }); + + const fn = jest.fn() + .mockRejectedValueOnce(new Error('TIMEOUT')) + .mockRejectedValueOnce(new Error('TIMEOUT')) + .mockRejectedValueOnce(new Error('TIMEOUT')) + .mockResolvedValueOnce('success'); + + await withRetry(fn, { + maxRetries: 4, + baseDelayMs: 100, + maxDelayMs: 10000, + onRetry + }); + + // Delays should be increasing (with jitter) + expect(delays[0].delay).toBeGreaterThanOrEqual(100); + expect(delays[1].delay).toBeGreaterThanOrEqual(200); + expect(delays[2].delay).toBeGreaterThanOrEqual(400); + }); + + it('should cap delay at maxDelayMs', async () => { + const delays = []; + const onRetry = jest.fn((_, attempt, delay) => { + delays.push(delay); + }); + + const fn = jest.fn().mockRejectedValue(new Error('TIMEOUT')); + + await expect(withRetry(fn, { + maxRetries: 10, + baseDelayMs: 1000, + maxDelayMs: 5000, + onRetry + })).rejects.toBeDefined(); + + // All delays should be capped at maxDelayMs + jitter + delays.forEach(delay => { + expect(delay).toBeLessThanOrEqual(5000 + 1000); // maxDelayMs + baseDelayMs (jitter) + }); + }); + }); + + describe('network error detection', () => { + it('should retry on timeout message', async () => { + const fn = jest.fn() + .mockRejectedValueOnce(new Error('Request timeout')) + .mockResolvedValueOnce('success'); + + const result = await withRetry(fn, { maxRetries: 3, baseDelayMs: 10 }); + + expect(result.success).toBe(true); + expect(fn).toHaveBeenCalledTimes(2); + }); + + it('should retry on network error message', async () => { + const fn = jest.fn() + .mockRejectedValueOnce(new Error('Network error occurred')) + .mockResolvedValueOnce('success'); + + const result = await withRetry(fn, { maxRetries: 3, baseDelayMs: 10 }); + + expect(result.success).toBe(true); + expect(fn).toHaveBeenCalledTimes(2); + }); + + it('should retry on ECONNREFUSED', async () => { + const fn = jest.fn() + .mockRejectedValueOnce(new Error('ECONNREFUSED')) + .mockResolvedValueOnce('success'); + + const result = await withRetry(fn, { maxRetries: 3, baseDelayMs: 10 }); + + expect(result.success).toBe(true); + expect(fn).toHaveBeenCalledTimes(2); + }); + + it('should retry on fetch failed', async () => { + const fn = jest.fn() + .mockRejectedValueOnce(new Error('fetch failed')) + .mockResolvedValueOnce('success'); + + const result = await withRetry(fn, { maxRetries: 3, baseDelayMs: 10 }); + + expect(result.success).toBe(true); + expect(fn).toHaveBeenCalledTimes(2); + }); + }); + + describe('environment variable defaults', () => { + const originalEnv = process.env; + + beforeEach(() => { + process.env = { ...originalEnv }; + }); + + afterEach(() => { + process.env = originalEnv; + }); + + it('should use MAX_RETRIES from environment', async () => { + process.env.MAX_RETRIES = '5'; + + // Need to re-import to pick up new env var + jest.resetModules(); + const { withRetry: withRetryFresh } = require('../src/retry.js'); + + const fn = jest.fn().mockRejectedValue(new Error('TIMEOUT')); + + await expect(withRetryFresh(fn, { baseDelayMs: 10 })) + .rejects.toMatchObject({ attempts: 6 }); // 5 retries + 1 initial + + expect(fn).toHaveBeenCalledTimes(6); + }); + }); +}); + +describe('retry (legacy function)', () => { + it('should work with legacy retry interface', async () => { + const fn = jest.fn().mockResolvedValue('success'); + + const result = await retry(fn, 3, 10); + + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(1); + }); + + it('should retry with legacy interface', async () => { + const fn = jest.fn() + .mockRejectedValueOnce(new Error('TIMEOUT')) + .mockResolvedValueOnce('success'); + + const result = await retry(fn, 3, 10); + + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(2); + }); +}); + +describe('isRetryableError', () => { + it('should return true for retryable errors', () => { + expect(isRetryableError(new Error('TIMEOUT'))).toBe(true); + expect(isRetryableError(new Error('NETWORK_ERROR'))).toBe(true); + expect(isRetryableError(new Error('RATE_LIMITED'))).toBe(true); + }); + + it('should return false for non-retryable errors', () => { + expect(isRetryableError(new Error('INVALID_ARGS'))).toBe(false); + expect(isRetryableError(new Error('CONTRACT_PANIC'))).toBe(false); + expect(isRetryableError(new Error('INSUFFICIENT_GAS'))).toBe(false); + }); + + it('should return false for duplicate errors', () => { + expect(isRetryableError(new Error('DUPLICATE_TRANSACTION'))).toBe(false); + }); +}); + +describe('isDuplicateTransactionError', () => { + it('should return true for duplicate errors', () => { + expect(isDuplicateTransactionError(new Error('DUPLICATE_TRANSACTION'))).toBe(true); + expect(isDuplicateTransactionError({ code: 'TX_ALREADY_IN_LEDGER' })).toBe(true); + }); + + it('should return false for non-duplicate errors', () => { + expect(isDuplicateTransactionError(new Error('TIMEOUT'))).toBe(false); + expect(isDuplicateTransactionError(new Error('INVALID_ARGS'))).toBe(false); + }); +}); + +describe('ErrorClassification', () => { + it('should have correct values', () => { + expect(ErrorClassification.RETRYABLE).toBe('retryable'); + expect(ErrorClassification.NON_RETRYABLE).toBe('non_retryable'); + expect(ErrorClassification.DUPLICATE).toBe('duplicate'); + }); +}); diff --git a/keeper/src/config.js b/keeper/src/config.js index 381b972..2656eae 100644 --- a/keeper/src/config.js +++ b/keeper/src/config.js @@ -26,5 +26,9 @@ export function loadConfig() { contractId: process.env.CONTRACT_ID, pollIntervalMs: parseInt(process.env.POLLING_INTERVAL_MS, 10) || 10000, + // Retry configuration + maxRetries: parseInt(process.env.MAX_RETRIES, 10) || 3, + retryBaseDelayMs: parseInt(process.env.RETRY_BASE_DELAY_MS, 10) || 1000, + maxRetryDelayMs: parseInt(process.env.MAX_RETRY_DELAY_MS, 10) || 30000, }; } \ No newline at end of file diff --git a/keeper/src/executor.js b/keeper/src/executor.js index 1a01631..0deea8b 100644 --- a/keeper/src/executor.js +++ b/keeper/src/executor.js @@ -1,12 +1,66 @@ -import { retry } from "./retry.js"; +import { withRetry, ErrorClassification } from "./retry.js"; -export function createExecutor({ logger }) { +/** + * Create an executor for task execution with retry logic + * @param {Object} deps - Dependencies + * @param {Object} deps.logger - Logger instance + * @param {Object} deps.config - Configuration object with retry settings + * @returns {Object} - Executor instance + */ +export function createExecutor({ logger, config }) { return { async execute(task) { - await retry(async () => { - logger.info("Executing task", { task }); - // TODO: build and submit Soroban transaction - }); + const retryCount = { value: 0 }; + + const result = await withRetry( + async () => { + logger.info("Executing task", { task, attempt: retryCount.value + 1 }); + // TODO: build and submit Soroban transaction + // For now, this is a placeholder that will be replaced with actual implementation + return { taskId: task.id, status: "executed" }; + }, + { + maxRetries: config?.maxRetries || 3, + baseDelayMs: config?.retryBaseDelayMs || 1000, + maxDelayMs: config?.maxRetryDelayMs || 30000, + onRetry: (error, attempt, delay) => { + retryCount.value = attempt; + logger.info("Retrying task execution", { + taskId: task.id, + attempt, + delay, + error: error.message || error.code, + }); + }, + onMaxRetries: (error, attempts) => { + logger.warn("MAX_RETRIES_EXCEEDED", { + taskId: task.id, + attempts, + error: error.message || error.code, + }); + }, + onDuplicate: () => { + logger.info("Transaction already accepted (duplicate)", { + taskId: task.id, + }); + }, + } + ); + + // Log execution result with retry count + if (result.success) { + logger.info("Task execution completed", { + taskId: task.id, + attempts: result.attempts, + retries: result.retries, + duplicate: result.duplicate || false, + }); + } + + return result; }, }; } + +// Re-export error classification for consumers +export { ErrorClassification }; diff --git a/keeper/src/retry.js b/keeper/src/retry.js index d4d4c3e..70492ee 100644 --- a/keeper/src/retry.js +++ b/keeper/src/retry.js @@ -1,16 +1,295 @@ -export async function retry(fn, attempts = 3, delay = 1000) { +/** + * Error classifications for retry logic + */ +const ErrorClassification = { + RETRYABLE: 'retryable', + NON_RETRYABLE: 'non_retryable', + DUPLICATE: 'duplicate', +}; + +/** + * Soroban RPC error codes that indicate retryable conditions + */ +const RETRYABLE_ERROR_CODES = [ + 'TIMEOUT', // Request timeout + 'NETWORK_ERROR', // Network connectivity issues + 'RATE_LIMITED', // Rate limiting from RPC + 'SERVER_ERROR', // Transient server errors + 'SERVICE_UNAVAILABLE', // Service temporarily unavailable + 'TIMEOUT_ERROR', // Transaction submission timeout + 'TX_BAD_SEQ', // Bad sequence number (can retry with fresh sequence) + 'TX_INSUFFICIENT_BALANCE', // Might be transient if funding is pending +]; + +/** + * Soroban RPC error codes that indicate non-retryable conditions + */ +const NON_RETRYABLE_ERROR_CODES = [ + 'INVALID_ARGS', // Invalid arguments to contract + 'INSUFFICIENT_GAS', // Not enough gas for execution + 'CONTRACT_PANIC', // Contract execution panic + 'INVALID_TRANSACTION', // Malformed transaction + 'TX_INSUFFICIENT_FEE', // Fee too low (would need rebuild) + 'TX_BAD_AUTH', // Bad authorization + 'TX_BAD_AUTH_EXTRA', // Extra authorization issues + 'TX_TOO_EARLY', // Transaction valid before current ledger + 'TX_TOO_LATE', // Transaction valid until passed + 'TX_MISSING_OPERATION', // No operations in transaction + 'TX_NOT_SUPPORTED', // Operation not supported +]; + +/** + * Error codes indicating duplicate transaction (already accepted) + */ +const DUPLICATE_ERROR_CODES = [ + 'DUPLICATE_TRANSACTION', + 'TX_ALREADY_IN_LEDGER', + 'TX_DUPLICATE', +]; + +/** + * Classify an error based on its code or message + * @param {Error} error - The error to classify + * @returns {string} - ErrorClassification value + */ +function classifyError(error) { + if (!error) return ErrorClassification.NON_RETRYABLE; + + const errorCode = error.code || error.errorCode || extractErrorCode(error); + const errorMessage = error.message || error.resultXdr || ''; + + // Check for duplicate transaction indicators + if (DUPLICATE_ERROR_CODES.some(code => + errorCode === code || + errorMessage.includes(code) || + errorMessage.includes('duplicate') || + errorMessage.includes('already in ledger') + )) { + return ErrorClassification.DUPLICATE; + } + + // Check for non-retryable errors + if (NON_RETRYABLE_ERROR_CODES.some(code => + errorCode === code || + errorMessage.includes(code) + )) { + return ErrorClassification.NON_RETRYABLE; + } + + // Check for retryable errors + if (RETRYABLE_ERROR_CODES.some(code => + errorCode === code || + errorMessage.includes(code) + )) { + return ErrorClassification.RETRYABLE; + } + + // Default to retryable for unknown network/transient errors + // This includes generic network errors, timeouts, etc. + if (errorMessage.includes('timeout') || + errorMessage.includes('network') || + errorMessage.includes('ECONNREFUSED') || + errorMessage.includes('ETIMEDOUT') || + errorMessage.includes('ENOTFOUND') || + errorMessage.includes('socket hang up') || + errorMessage.includes('fetch failed')) { + return ErrorClassification.RETRYABLE; + } + + return ErrorClassification.NON_RETRYABLE; +} + +/** + * Extract error code from various error formats + * @param {Error} error - The error object + * @returns {string|null} - Extracted error code or null + */ +function extractErrorCode(error) { + if (error.resultXdr) { + // Try to extract from resultXdr if available + const xdrStr = error.resultXdr.toString ? error.resultXdr.toString() : String(error.resultXdr); + // Common XDR error patterns + const patterns = ['txBadSeq', 'txInsufficientBalance', 'txInsufficientFee', 'txBadAuth']; + for (const pattern of patterns) { + if (xdrStr.includes(pattern)) return pattern.toUpperCase(); + } + } + return null; +} + +/** + * Calculate delay with exponential backoff and jitter + * @param {number} attempt - Current attempt number (0-indexed) + * @param {number} baseDelay - Base delay in milliseconds + * @param {number} maxDelay - Maximum delay in milliseconds + * @returns {number} - Calculated delay with jitter + */ +function calculateDelay(attempt, baseDelay, maxDelay) { + // Exponential backoff: baseDelay * 2^attempt + const exponentialDelay = baseDelay * Math.pow(2, attempt); + + // Cap at maxDelay + const cappedDelay = Math.min(exponentialDelay, maxDelay); + + // Add jitter: random value between 0 and baseDelay + // This prevents thundering herd on shared RPC nodes + const jitter = Math.random() * baseDelay; + + return Math.floor(cappedDelay + jitter); +} + +/** + * Sleep for a given number of milliseconds + * @param {number} ms - Milliseconds to sleep + * @returns {Promise} + */ +function sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +/** + * Default options for withRetry + */ +const DEFAULT_OPTIONS = { + maxRetries: parseInt(process.env.MAX_RETRIES, 10) || 3, + baseDelayMs: parseInt(process.env.RETRY_BASE_DELAY_MS, 10) || 1000, + maxDelayMs: parseInt(process.env.MAX_RETRY_DELAY_MS, 10) || 30000, + onRetry: null, // Callback(error, attempt) called on each retry + onMaxRetries: null, // Callback(error, attempts) called when max retries exceeded + onDuplicate: null, // Callback() called when duplicate transaction detected +}; + +/** + * Generic higher-order async retry wrapper with exponential backoff and jitter + * + * @param {Function} fn - The async function to wrap + * @param {Object} options - Retry configuration options + * @param {number} options.maxRetries - Maximum number of retry attempts (default: 3) + * @param {number} options.baseDelayMs - Base delay in milliseconds (default: 1000) + * @param {number} options.maxDelayMs - Maximum delay cap in milliseconds (default: 30000) + * @param {Function} options.onRetry - Callback(error, attempt) called on each retry + * @param {Function} options.onMaxRetries - Callback(error, attempts) called when max retries exceeded + * @param {Function} options.onDuplicate - Callback() called when duplicate transaction detected + * @returns {Promise<*>} - Result of the wrapped function + * @throws {Error} - Last error if all retries are exhausted + */ +export async function withRetry(fn, options = {}) { + const opts = { ...DEFAULT_OPTIONS, ...options }; let lastError; + let attempt = 0; - for (let i = 0; i < attempts; i++) { + while (attempt <= opts.maxRetries) { try { - return await fn(); - } catch (err) { - lastError = err; - if (i < attempts - 1) { - await new Promise((res) => setTimeout(res, delay)); + const result = await fn(); + return { + success: true, + result, + attempts: attempt + 1, + retries: attempt, + }; + } catch (error) { + lastError = error; + const classification = classifyError(error); + + // Handle duplicate transaction - treat as success + if (classification === ErrorClassification.DUPLICATE) { + if (opts.onDuplicate) { + opts.onDuplicate(); + } + return { + success: true, + result: null, + attempts: attempt + 1, + retries: attempt, + duplicate: true, + }; + } + + // Handle non-retryable errors - bail immediately + if (classification === ErrorClassification.NON_RETRYABLE) { + throw { + success: false, + error, + attempts: attempt + 1, + retries: attempt, + classification: ErrorClassification.NON_RETRYABLE, + }; + } + + // Check if we've exhausted retries + if (attempt >= opts.maxRetries) { + if (opts.onMaxRetries) { + opts.onMaxRetries(error, attempt + 1); + } + throw { + success: false, + error, + attempts: attempt + 1, + retries: attempt, + classification: ErrorClassification.RETRYABLE, + maxRetriesExceeded: true, + }; + } + + // Calculate delay with exponential backoff and jitter + const delay = calculateDelay(attempt, opts.baseDelayMs, opts.maxDelayMs); + + // Call retry callback if provided + if (opts.onRetry) { + opts.onRetry(error, attempt + 1, delay); } + + // Wait before retrying + await sleep(delay); + + attempt++; } } - throw lastError; + // Should never reach here, but just in case + throw { + success: false, + error: lastError, + attempts: attempt + 1, + retries: attempt, + maxRetriesExceeded: true, + }; +} + +/** + * Legacy retry function for backward compatibility + * Simple retry with fixed delay + * + * @param {Function} fn - The async function to retry + * @param {number} attempts - Maximum number of attempts + * @param {number} delay - Delay between attempts in milliseconds + * @returns {Promise<*>} + */ +export async function retry(fn, attempts = 3, delay = 1000) { + return withRetry(fn, { + maxRetries: attempts - 1, + baseDelayMs: delay, + maxDelayMs: delay, + }).then(result => result.result); } + +/** + * Check if an error is retryable + * @param {Error} error - The error to check + * @returns {boolean} - True if the error is retryable + */ +export function isRetryableError(error) { + return classifyError(error) === ErrorClassification.RETRYABLE; +} + +/** + * Check if an error indicates a duplicate transaction + * @param {Error} error - The error to check + * @returns {boolean} - True if the error indicates a duplicate + */ +export function isDuplicateTransactionError(error) { + return classifyError(error) === ErrorClassification.DUPLICATE; +} + +// Export error classifications for external use +export { ErrorClassification };