diff --git a/src/polling/pollScraperResults.ts b/src/polling/pollScraperResults.ts index ed8af96..570c90c 100644 --- a/src/polling/pollScraperResults.ts +++ b/src/polling/pollScraperResults.ts @@ -12,8 +12,18 @@ export type PollResult = ScrapeRun & { data?: unknown[]; }; +const MAX_POLL_FAILURES = 5; + +/** + * Builds a PollResult marking a run as failed. + */ +function failedResult(run: ScrapeRun): PollResult { + return { runId: run.runId, datasetId: run.datasetId, status: "FAILED" }; +} + /** * Polls each scraper run in parallel until all are completed (SUCCEEDED or FAILED). + * Marks a run as FAILED after MAX_POLL_FAILURES consecutive poll errors. * Returns an array of results for each run. */ export async function pollScraperResults( @@ -23,6 +33,7 @@ export async function pollScraperResults( const pendingRuns = new Map( runs.map((run) => [run.runId, run]) ); + const failureCounts = new Map(); while (pendingRuns.size > 0) { // Poll all pending runs in parallel @@ -30,10 +41,26 @@ export async function pollScraperResults( const result = await getScraperResults(run.runId); if (!result) { - logger.warn("Failed to get scraper result", { runId: run.runId }); + const failures = (failureCounts.get(run.runId) ?? 0) + 1; + failureCounts.set(run.runId, failures); + logger.warn("Failed to get scraper result", { + runId: run.runId, + consecutiveFailures: failures, + }); + + if (failures >= MAX_POLL_FAILURES) { + logger.error("Max poll failures reached, marking run as FAILED", { + runId: run.runId, + }); + return { run, pollResult: failedResult(run) }; + } + return null; } + // Reset failure count on successful poll + failureCounts.delete(run.runId); + if (result.status === "SUCCEEDED") { const completedResult = result as { status: string; @@ -47,17 +74,10 @@ export async function pollScraperResults( datasetId: completedResult.datasetId, status: completedResult.status, data: completedResult.data, - }, + } as PollResult, }; } else if (result.status === "FAILED") { - return { - run, - pollResult: { - runId: run.runId, - datasetId: result.datasetId, - status: result.status, - }, - }; + return { run, pollResult: failedResult(run) }; } return null; // Still running