From 642a6111b676c0426de619cbe6d4fb854aa39a0a Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Tue, 31 Mar 2026 10:16:40 -0700 Subject: [PATCH 1/7] [core] Fail runs when replays take longer than 120s Signed-off-by: Peter Wielander --- .changeset/quiet-plums-speak.md | 9 +++++ .../builders/src/vercel-build-output-api.ts | 2 +- packages/core/src/runtime.ts | 38 ++++++++++++++++++- packages/core/src/runtime/constants.ts | 7 ++++ packages/errors/src/error-codes.ts | 2 + packages/next/src/builder-deferred.ts | 2 +- packages/next/src/builder-eager.ts | 2 +- packages/sveltekit/src/index.ts | 2 +- 8 files changed, 59 insertions(+), 5 deletions(-) create mode 100644 .changeset/quiet-plums-speak.md diff --git a/.changeset/quiet-plums-speak.md b/.changeset/quiet-plums-speak.md new file mode 100644 index 0000000000..f625bb5ded --- /dev/null +++ b/.changeset/quiet-plums-speak.md @@ -0,0 +1,9 @@ +--- +"@workflow/sveltekit": patch +"@workflow/builders": patch +"@workflow/errors": patch +"@workflow/core": patch +"@workflow/next": patch +--- + +Increase flow route limit to 180s and fail run if a single replay exceeds 120s diff --git a/packages/builders/src/vercel-build-output-api.ts b/packages/builders/src/vercel-build-output-api.ts index e8dd1768d6..e842a40bce 100644 --- a/packages/builders/src/vercel-build-output-api.ts +++ b/packages/builders/src/vercel-build-output-api.ts @@ -110,7 +110,7 @@ export class VercelBuildOutputAPIBuilder extends BaseBuilder { // Create package.json and .vc-config.json for workflows function await this.createPackageJson(workflowsFuncDir, 'commonjs'); await this.createVcConfig(workflowsFuncDir, { - maxDuration: 60, + maxDuration: 180, experimentalTriggers: [WORKFLOW_QUEUE_TRIGGER], runtime: this.config.runtime, }); diff --git a/packages/core/src/runtime.ts b/packages/core/src/runtime.ts index da7a407bd3..3ac54ca7d1 100644 --- a/packages/core/src/runtime.ts +++ b/packages/core/src/runtime.ts @@ -5,7 +5,10 @@ import { WorkflowRuntimeError, } from '@workflow/errors'; import { classifyRunError } from './classify-error.js'; -import { MAX_QUEUE_DELIVERIES } from './runtime/constants.js'; +import { + MAX_QUEUE_DELIVERIES, + REPLAY_TIMEOUT_MS, +} from './runtime/constants.js'; import { parseWorkflowName } from '@workflow/utils/parse-name'; import { type Event, @@ -161,6 +164,37 @@ export function workflowEntrypoint( const spanLinks = await linkToCurrentContext(); + // --- Replay timeout guard --- + // If the replay takes longer than the timeout, fail the run and exit. + // This must be lower than the function's maxDuration (180s) to ensure + // the failure is recorded before the platform kills the function. + const replayTimeout = setTimeout(async () => { + runtimeLogger.error('Workflow replay exceeded timeout', { + workflowRunId: runId, + timeoutMs: REPLAY_TIMEOUT_MS, + }); + try { + const world = getWorld(); + await world.events.create( + runId, + { + eventType: 'run_failed', + specVersion: SPEC_VERSION_CURRENT, + eventData: { + error: { + message: `Workflow replay exceeded maximum duration (${REPLAY_TIMEOUT_MS / 1000}s)`, + }, + errorCode: RUN_ERROR_CODES.REPLAY_TIMEOUT, + }, + }, + { requestId } + ); + } catch { + // Best effort — process exits regardless + } + process.exit(1); + }, REPLAY_TIMEOUT_MS); + // Invoke user workflow within the propagated trace context and baggage return await withTraceContext(traceContext, async () => { // Set workflow context as baggage for automatic propagation @@ -525,6 +559,8 @@ export function workflowEntrypoint( ); // End trace } ); // End withWorkflowBaggage + }).finally(() => { + clearTimeout(replayTimeout); }); // End withTraceContext } ); diff --git a/packages/core/src/runtime/constants.ts b/packages/core/src/runtime/constants.ts index 398177d053..8fdd806485 100644 --- a/packages/core/src/runtime/constants.ts +++ b/packages/core/src/runtime/constants.ts @@ -11,3 +11,10 @@ // At 48 attempts the total elapsed time is approximately 20 hours, which is // safely under the 24-hour message visibility limit. export const MAX_QUEUE_DELIVERIES = 48; + +// Maximum time allowed for a single workflow replay execution (in ms). +// If a replay exceeds this duration, the run is failed and the process exits. +// This must be lower than the function's maxDuration (180s) to ensure the +// timeout handler has time to post the run_failed event before the platform +// kills the function. +export const REPLAY_TIMEOUT_MS = 120_000; diff --git a/packages/errors/src/error-codes.ts b/packages/errors/src/error-codes.ts index f83b1baadf..b945303e93 100644 --- a/packages/errors/src/error-codes.ts +++ b/packages/errors/src/error-codes.ts @@ -10,6 +10,8 @@ export const RUN_ERROR_CODES = { RUNTIME_ERROR: 'RUNTIME_ERROR', /** Run exceeded the maximum number of queue deliveries */ MAX_DELIVERIES_EXCEEDED: 'MAX_DELIVERIES_EXCEEDED', + /** Workflow replay exceeded the maximum allowed duration */ + REPLAY_TIMEOUT: 'REPLAY_TIMEOUT', } as const; export type RunErrorCode = diff --git a/packages/next/src/builder-deferred.ts b/packages/next/src/builder-deferred.ts index 86b9eb1bc2..3c8c0eb2c3 100644 --- a/packages/next/src/builder-deferred.ts +++ b/packages/next/src/builder-deferred.ts @@ -1109,7 +1109,7 @@ export async function getNextBuilderDeferred() { experimentalTriggers: [STEP_QUEUE_TRIGGER], }, workflows: { - maxDuration: 60, + maxDuration: 180, experimentalTriggers: [WORKFLOW_QUEUE_TRIGGER], }, }; diff --git a/packages/next/src/builder-eager.ts b/packages/next/src/builder-eager.ts index 11aa2a5c08..3298d5a0d2 100644 --- a/packages/next/src/builder-eager.ts +++ b/packages/next/src/builder-eager.ts @@ -420,7 +420,7 @@ export async function getNextBuilderEager() { experimentalTriggers: [STEP_QUEUE_TRIGGER], }, workflows: { - maxDuration: 60, + maxDuration: 180, experimentalTriggers: [WORKFLOW_QUEUE_TRIGGER], }, }; diff --git a/packages/sveltekit/src/index.ts b/packages/sveltekit/src/index.ts index 0bb0af3518..e0490d75ac 100644 --- a/packages/sveltekit/src/index.ts +++ b/packages/sveltekit/src/index.ts @@ -19,7 +19,7 @@ process.on('beforeExit', () => { { file: '.vercel/output/functions/.well-known/workflow/v1/flow.func/.vc-config.json', config: { - maxDuration: 60, + maxDuration: 180, experimentalTriggers: [ { type: 'queue/v2beta', From d997a10334f238fc9265e6e2807bad4a0a1a42f9 Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Tue, 31 Mar 2026 13:00:27 -0700 Subject: [PATCH 2/7] Update Signed-off-by: Peter Wielander --- .changeset/quiet-plums-speak.md | 2 +- packages/builders/src/vercel-build-output-api.ts | 2 +- packages/core/src/runtime/constants.ts | 4 ++-- packages/next/src/builder-deferred.ts | 2 +- packages/next/src/builder-eager.ts | 2 +- packages/sveltekit/src/index.ts | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.changeset/quiet-plums-speak.md b/.changeset/quiet-plums-speak.md index f625bb5ded..6de20c22db 100644 --- a/.changeset/quiet-plums-speak.md +++ b/.changeset/quiet-plums-speak.md @@ -6,4 +6,4 @@ "@workflow/next": patch --- -Increase flow route limit to 180s and fail run if a single replay exceeds 120s +Increase flow route limit to max fluid duration and fail run if a single replay exceeds 300s diff --git a/packages/builders/src/vercel-build-output-api.ts b/packages/builders/src/vercel-build-output-api.ts index e842a40bce..38ecaa2ae9 100644 --- a/packages/builders/src/vercel-build-output-api.ts +++ b/packages/builders/src/vercel-build-output-api.ts @@ -110,7 +110,7 @@ export class VercelBuildOutputAPIBuilder extends BaseBuilder { // Create package.json and .vc-config.json for workflows function await this.createPackageJson(workflowsFuncDir, 'commonjs'); await this.createVcConfig(workflowsFuncDir, { - maxDuration: 180, + maxDuration: 'max', experimentalTriggers: [WORKFLOW_QUEUE_TRIGGER], runtime: this.config.runtime, }); diff --git a/packages/core/src/runtime/constants.ts b/packages/core/src/runtime/constants.ts index 8fdd806485..3d287c9354 100644 --- a/packages/core/src/runtime/constants.ts +++ b/packages/core/src/runtime/constants.ts @@ -14,7 +14,7 @@ export const MAX_QUEUE_DELIVERIES = 48; // Maximum time allowed for a single workflow replay execution (in ms). // If a replay exceeds this duration, the run is failed and the process exits. -// This must be lower than the function's maxDuration (180s) to ensure the +// This must be lower than the function's maxDuration to ensure the // timeout handler has time to post the run_failed event before the platform // kills the function. -export const REPLAY_TIMEOUT_MS = 120_000; +export const REPLAY_TIMEOUT_MS = 300_000; diff --git a/packages/next/src/builder-deferred.ts b/packages/next/src/builder-deferred.ts index 3c8c0eb2c3..86e1e58c86 100644 --- a/packages/next/src/builder-deferred.ts +++ b/packages/next/src/builder-deferred.ts @@ -1109,7 +1109,7 @@ export async function getNextBuilderDeferred() { experimentalTriggers: [STEP_QUEUE_TRIGGER], }, workflows: { - maxDuration: 180, + maxDuration: 'max', experimentalTriggers: [WORKFLOW_QUEUE_TRIGGER], }, }; diff --git a/packages/next/src/builder-eager.ts b/packages/next/src/builder-eager.ts index 3298d5a0d2..c6d6fac0e2 100644 --- a/packages/next/src/builder-eager.ts +++ b/packages/next/src/builder-eager.ts @@ -420,7 +420,7 @@ export async function getNextBuilderEager() { experimentalTriggers: [STEP_QUEUE_TRIGGER], }, workflows: { - maxDuration: 180, + maxDuration: 'max', experimentalTriggers: [WORKFLOW_QUEUE_TRIGGER], }, }; diff --git a/packages/sveltekit/src/index.ts b/packages/sveltekit/src/index.ts index e0490d75ac..3923ecd621 100644 --- a/packages/sveltekit/src/index.ts +++ b/packages/sveltekit/src/index.ts @@ -19,7 +19,7 @@ process.on('beforeExit', () => { { file: '.vercel/output/functions/.well-known/workflow/v1/flow.func/.vc-config.json', config: { - maxDuration: 180, + maxDuration: 'max', experimentalTriggers: [ { type: 'queue/v2beta', From daba9ae569639bbed5f3a7050e2e8019a2d8a337 Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Tue, 31 Mar 2026 13:49:31 -0700 Subject: [PATCH 3/7] adjust to 240s for hobby plan Signed-off-by: Peter Wielander --- packages/core/src/runtime/constants.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/core/src/runtime/constants.ts b/packages/core/src/runtime/constants.ts index 3d287c9354..b63211c06e 100644 --- a/packages/core/src/runtime/constants.ts +++ b/packages/core/src/runtime/constants.ts @@ -17,4 +17,4 @@ export const MAX_QUEUE_DELIVERIES = 48; // This must be lower than the function's maxDuration to ensure the // timeout handler has time to post the run_failed event before the platform // kills the function. -export const REPLAY_TIMEOUT_MS = 300_000; +export const REPLAY_TIMEOUT_MS = 240_000; From 6fd0fa4b0dc5be0e72902f8ac850764e8b899dbc Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Wed, 1 Apr 2026 08:21:46 -0700 Subject: [PATCH 4/7] adjust comment Signed-off-by: Peter Wielander --- packages/core/src/runtime.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/core/src/runtime.ts b/packages/core/src/runtime.ts index 3ac54ca7d1..162b693d95 100644 --- a/packages/core/src/runtime.ts +++ b/packages/core/src/runtime.ts @@ -4,11 +4,6 @@ import { RunExpiredError, WorkflowRuntimeError, } from '@workflow/errors'; -import { classifyRunError } from './classify-error.js'; -import { - MAX_QUEUE_DELIVERIES, - REPLAY_TIMEOUT_MS, -} from './runtime/constants.js'; import { parseWorkflowName } from '@workflow/utils/parse-name'; import { type Event, @@ -16,9 +11,14 @@ import { WorkflowInvokePayloadSchema, type WorkflowRun, } from '@workflow/world'; +import { classifyRunError } from './classify-error.js'; import { importKey } from './encryption.js'; import { WorkflowSuspension } from './global.js'; import { runtimeLogger } from './logger.js'; +import { + MAX_QUEUE_DELIVERIES, + REPLAY_TIMEOUT_MS, +} from './runtime/constants.js'; import { getAllWorkflowRunEvents, getQueueOverhead, @@ -166,7 +166,7 @@ export function workflowEntrypoint( // --- Replay timeout guard --- // If the replay takes longer than the timeout, fail the run and exit. - // This must be lower than the function's maxDuration (180s) to ensure + // This must be lower than the function's maxDuration to ensure // the failure is recorded before the platform kills the function. const replayTimeout = setTimeout(async () => { runtimeLogger.error('Workflow replay exceeded timeout', { From a915b7a28e5e9b05cb489b30dabe6afc8d8c488d Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Wed, 1 Apr 2026 08:28:13 -0700 Subject: [PATCH 5/7] wording changes Signed-off-by: Peter Wielander --- .changeset/quiet-plums-speak.md | 2 +- packages/core/src/runtime/constants.ts | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.changeset/quiet-plums-speak.md b/.changeset/quiet-plums-speak.md index 6de20c22db..11b0a27e20 100644 --- a/.changeset/quiet-plums-speak.md +++ b/.changeset/quiet-plums-speak.md @@ -6,4 +6,4 @@ "@workflow/next": patch --- -Increase flow route limit to max fluid duration and fail run if a single replay exceeds 300s +Increase flow route limit to max fluid duration and fail run if a single replay exceeds takes too long diff --git a/packages/core/src/runtime/constants.ts b/packages/core/src/runtime/constants.ts index b63211c06e..adedfcd883 100644 --- a/packages/core/src/runtime/constants.ts +++ b/packages/core/src/runtime/constants.ts @@ -17,4 +17,6 @@ export const MAX_QUEUE_DELIVERIES = 48; // This must be lower than the function's maxDuration to ensure the // timeout handler has time to post the run_failed event before the platform // kills the function. +// Note that on hobby plan, the maxDuration is 60s, so this barrier will not be hit, +// and the queue will re-try until the visibility window expires. export const REPLAY_TIMEOUT_MS = 240_000; From 6a4defd33e924084852bbe99e9b970f9c65f5301 Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Wed, 1 Apr 2026 08:28:23 -0700 Subject: [PATCH 6/7] ensure only vercel does this check Signed-off-by: Peter Wielander --- packages/core/src/runtime.ts | 57 ++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/packages/core/src/runtime.ts b/packages/core/src/runtime.ts index 162b693d95..0786f7bef2 100644 --- a/packages/core/src/runtime.ts +++ b/packages/core/src/runtime.ts @@ -168,32 +168,37 @@ export function workflowEntrypoint( // If the replay takes longer than the timeout, fail the run and exit. // This must be lower than the function's maxDuration to ensure // the failure is recorded before the platform kills the function. - const replayTimeout = setTimeout(async () => { - runtimeLogger.error('Workflow replay exceeded timeout', { - workflowRunId: runId, - timeoutMs: REPLAY_TIMEOUT_MS, - }); - try { - const world = getWorld(); - await world.events.create( - runId, - { - eventType: 'run_failed', - specVersion: SPEC_VERSION_CURRENT, - eventData: { - error: { - message: `Workflow replay exceeded maximum duration (${REPLAY_TIMEOUT_MS / 1000}s)`, + let replayTimeout: NodeJS.Timeout | undefined; + if (process.env.VERCEL_URL !== undefined) { + replayTimeout = setTimeout(async () => { + runtimeLogger.error('Workflow replay exceeded timeout', { + workflowRunId: runId, + timeoutMs: REPLAY_TIMEOUT_MS, + }); + try { + const world = getWorld(); + await world.events.create( + runId, + { + eventType: 'run_failed', + specVersion: SPEC_VERSION_CURRENT, + eventData: { + error: { + message: `Workflow replay exceeded maximum duration (${REPLAY_TIMEOUT_MS / 1000}s)`, + }, + errorCode: RUN_ERROR_CODES.REPLAY_TIMEOUT, }, - errorCode: RUN_ERROR_CODES.REPLAY_TIMEOUT, }, - }, - { requestId } - ); - } catch { - // Best effort — process exits regardless - } - process.exit(1); - }, REPLAY_TIMEOUT_MS); + { requestId } + ); + } catch { + // Best effort — process exits regardless + } + // Note that this also prevents the runtime to acking the queue message, + // so the queue will call back once, after which a 410 will get it to exit early. + process.exit(1); + }, REPLAY_TIMEOUT_MS); + } // Invoke user workflow within the propagated trace context and baggage return await withTraceContext(traceContext, async () => { @@ -560,7 +565,9 @@ export function workflowEntrypoint( } ); // End withWorkflowBaggage }).finally(() => { - clearTimeout(replayTimeout); + if (replayTimeout) { + clearTimeout(replayTimeout); + } }); // End withTraceContext } ); From e34e4761064fb7cb134de8550060cb5c9b842057 Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Wed, 1 Apr 2026 08:29:03 -0700 Subject: [PATCH 7/7] unref Signed-off-by: Peter Wielander --- packages/core/src/runtime.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/core/src/runtime.ts b/packages/core/src/runtime.ts index 0786f7bef2..46ad5fa4f5 100644 --- a/packages/core/src/runtime.ts +++ b/packages/core/src/runtime.ts @@ -198,6 +198,7 @@ export function workflowEntrypoint( // so the queue will call back once, after which a 410 will get it to exit early. process.exit(1); }, REPLAY_TIMEOUT_MS); + replayTimeout.unref(); } // Invoke user workflow within the propagated trace context and baggage