vercel · pranaygp · Mar 26, 2026 · Copilot · Mar 26, 2026 · Copilot
@@ -0,0 +1,5 @@
+---
+"@workflow/core": patch
+---
+
+Fix false-positive unconsumed event error in `for await` hook loops with step calls
@@ -114,23 +114,33 @@ export class EventsConsumer {
     // is still unconsumed after the queue drains, it's truly orphaned.
     if (currentEvent !== null) {
       const checkVersion = ++this.unconsumedCheckVersion;
-      this.pendingUnconsumedCheck = this.getPromiseQueue().then(() => {
-        // Use a delayed setTimeout after the queue drains. The delay must be
-        // long enough for promise chains to propagate across the VM boundary
-        // (from resolve() in the host context through to the workflow code
-        // calling subscribe() in the VM context). Node.js does not guarantee
-        // that setTimeout(0) fires after all cross-context microtasks settle,
-        // so we use a small but non-zero delay. Any subscribe() call that
-        // arrives during this window will cancel the check via version
-        // invalidation + clearTimeout.
-        this.pendingUnconsumedTimeout = setTimeout(() => {
-          this.pendingUnconsumedTimeout = null;
-          if (this.unconsumedCheckVersion === checkVersion) {
-            this.pendingUnconsumedCheck = null;
-            this.onUnconsumedEvent(currentEvent);
-          }
-        }, 100);
-      });
+      this.pendingUnconsumedCheck = this.getPromiseQueue()
+        .then(
+          // Yield to the event loop after the first queue drain. This allows
+          // microtask chains triggered by the preceding resolve() (e.g., a
+          // step result delivery that resumes a for-await loop, which then
+          // calls createHookPromise and appends a second round of async work
+          // to the promise queue) to propagate before we re-check the queue.
+          () => new Promise<void>((resolve) => setTimeout(resolve, 0))
+        )
+        .then(() => this.getPromiseQueue())
+        .then(() => {
-        .then(
-          // Yield to the event loop after the first queue drain. This allows
-          // microtask chains triggered by the preceding resolve() (e.g., a
-          // step result delivery that resumes a for-await loop, which then
-          // calls createHookPromise and appends a second round of async work
-          // to the promise queue) to propagate before we re-check the queue.
-          () => new Promise<void>((resolve) => setTimeout(resolve, 0))
-        )
-        .then(() => this.getPromiseQueue())
-        .then(() => {
+        .then(() => {
+          // If a newer subscribe() has already invalidated this check, bail out
+          // before scheduling the extra yield timer.
+          if (this.unconsumedCheckVersion !== checkVersion) {
+            return;
+          }
+          // Yield to the event loop after the first queue drain. This allows
+          // microtask chains triggered by the preceding resolve() (e.g., a
+          // step result delivery that resumes a for-await loop, which then
+          // calls createHookPromise and appends a second round of async work
+          // to the promise queue) to propagate before we re-check the queue.
+          return new Promise<void>((resolve) => setTimeout(resolve, 0));
+        })
+        .then(() => {
+          // Short-circuit stale checks before performing a second queue drain.
+          if (this.unconsumedCheckVersion !== checkVersion) {
+            return;
+          }
+          return this.getPromiseQueue();
+        })
+        .then(() => {
+          // If a later subscribe() has invalidated this check by the time both
+          // queue drains (and the yield) have completed, do not schedule the
+          // final timeout at all.
+          if (this.unconsumedCheckVersion !== checkVersion) {
+            return;
+          }
-        .then(
-          // Yield to the event loop after the first queue drain. This allows
-          // microtask chains triggered by the preceding resolve() (e.g., a
-          // step result delivery that resumes a for-await loop, which then
-          // calls createHookPromise and appends a second round of async work
-          // to the promise queue) to propagate before we re-check the queue.
-          () => new Promise<void>((resolve) => setTimeout(resolve, 0))
-        )
-        .then(() => this.getPromiseQueue())
-        .then(() => {
+        .then(() => {
+          // If a newer subscribe() has already invalidated this check, bail out
+          // before scheduling the extra yield timer.
+          if (this.unconsumedCheckVersion !== checkVersion) {
+            return;
+          }
+          // Yield to the event loop after the first queue drain. This allows
+          // microtask chains triggered by the preceding resolve() (e.g., a
+          // step result delivery that resumes a for-await loop, which then
+          // calls createHookPromise and appends a second round of async work
+          // to the promise queue) to propagate before we re-check the queue.
+          return new Promise<void>((resolve) => setTimeout(resolve, 0));
+        })
+        .then(() => {
+          // Short-circuit stale checks before performing a second queue drain.
+          if (this.unconsumedCheckVersion !== checkVersion) {
+            return;
+          }
+          return this.getPromiseQueue();
+        })
+        .then(() => {
+          // If a later subscribe() has invalidated this check by the time both
+          // queue drains (and the yield) have completed, do not schedule the
+          // final timeout at all.
+          if (this.unconsumedCheckVersion !== checkVersion) {
+            return;
+          }
+          // Use a delayed setTimeout after the queue drains. The delay must be
+          // long enough for promise chains to propagate across the VM boundary
+          // (from resolve() in the host context through to the workflow code
+          // calling subscribe() in the VM context). Node.js does not guarantee
+          // that setTimeout(0) fires after all cross-context microtasks settle,
+          // so we use a small but non-zero delay. Any subscribe() call that
+          // arrives during this window will cancel the check via version
+          // invalidation + clearTimeout.
+          this.pendingUnconsumedTimeout = setTimeout(() => {
+            this.pendingUnconsumedTimeout = null;
+            if (this.unconsumedCheckVersion === checkVersion) {
+              this.pendingUnconsumedCheck = null;
+              this.onUnconsumedEvent(currentEvent);
+            }
+          }, 100);
+        });
     }
   };
 }
@@ -599,6 +599,156 @@ function defineTests(mode: 'sync' | 'async') {
     });
   });
 
+  describe(`hook + sleep with step per payload ${label}`, () => {
+    it('should not trigger unconsumed event error when for-await loop calls a step per hook payload', async () => {
+      // Reproduces CI failure: hookWithSleepWorkflow event log had alternating
+      // hook_received + step lifecycle events. During replay, the EventsConsumer
+      // advances past the second step_created before the for-await loop has
+      // called processPayload (and registered the step consumer). The deferred
+      // unconsumed check must wait for the new async work (hook payload
+      // deserialization) before declaring the event orphaned.
+      await setupHydrateMock();
+      const ops: Promise<any>[] = [];
+      const [payload1, payload2, stepResult1, stepResult2] = await Promise.all([
+        dehydrateStepReturnValue(
+          { type: 'subscribe', id: 1 },
+          'wrun_test',
+          undefined,
+          ops
+        ),
+        dehydrateStepReturnValue(
+          { type: 'done', done: true },
+          'wrun_test',
+          undefined,
+          ops
+        ),
+        dehydrateStepReturnValue(
+          { processed: true, type: 'subscribe', id: 1 },
+          'wrun_test',
+          undefined,
+          ops
+        ),
+        dehydrateStepReturnValue(
+          { processed: true, type: 'done' },
+          'wrun_test',
+          undefined,
+          ops
+        ),
+      ]);
+
+      const ctx = setupWorkflowContext([
+        {
+          eventId: 'evnt_0',
+          runId: 'wrun_test',
+          eventType: 'hook_created',
+          correlationId: `hook_${CORR_IDS[0]}`,
+          eventData: { token: 'test-token', isWebhook: false },
+          createdAt: new Date(),
+        },
+        {
+          eventId: 'evnt_1',
+          runId: 'wrun_test',
+          eventType: 'wait_created',
+          correlationId: `wait_${CORR_IDS[1]}`,
+          eventData: { resumeAt: new Date('2099-01-01') },
+          createdAt: new Date(),
+        },
+        // First hook payload → step lifecycle
+        {
+          eventId: 'evnt_2',
+          runId: 'wrun_test',
+          eventType: 'hook_received',
+          correlationId: `hook_${CORR_IDS[0]}`,
+          eventData: { payload: payload1 },
+          createdAt: new Date(),
+        },
+        {
+          eventId: 'evnt_3',
+          runId: 'wrun_test',
+          eventType: 'step_created',
+          correlationId: `step_${CORR_IDS[2]}`,
+          eventData: { stepName: 'processPayload', input: payload1 },
+          createdAt: new Date(),
+        },
+        {
+          eventId: 'evnt_4',
+          runId: 'wrun_test',
+          eventType: 'step_started',
+          correlationId: `step_${CORR_IDS[2]}`,
+          eventData: {},
+          createdAt: new Date(),
+        },
+        {
+          eventId: 'evnt_5',
+          runId: 'wrun_test',
+          eventType: 'step_completed',
+          correlationId: `step_${CORR_IDS[2]}`,
+          eventData: { result: stepResult1 },
+          createdAt: new Date(),
+        },
+        // Second hook payload → step lifecycle
+        {
+          eventId: 'evnt_6',
+          runId: 'wrun_test',
+          eventType: 'hook_received',
+          correlationId: `hook_${CORR_IDS[0]}`,
+          eventData: { payload: payload2 },
+          createdAt: new Date(),
+        },
+        {
+          eventId: 'evnt_7',
+          runId: 'wrun_test',
+          eventType: 'step_created',
+          correlationId: `step_${CORR_IDS[3]}`,
+          eventData: { stepName: 'processPayload', input: payload2 },
+          createdAt: new Date(),
+        },
+        {
+          eventId: 'evnt_8',
+          runId: 'wrun_test',
+          eventType: 'step_started',
+          correlationId: `step_${CORR_IDS[3]}`,
+          eventData: {},
+          createdAt: new Date(),
+        },
+        {
+          eventId: 'evnt_9',
+          runId: 'wrun_test',
+          eventType: 'step_completed',
+          correlationId: `step_${CORR_IDS[3]}`,
+          eventData: { result: stepResult2 },
+          createdAt: new Date(),
+        },
+      ]);
+
+      const createHook = createCreateHook(ctx);
+      const sleep = createSleep(ctx);
+      const useStep = createUseStep(ctx);
+
+      const { result, error } = await runWithDiscontinuation(ctx, async () => {
+        const hook = createHook();
+        void sleep('1d');
+
+        const processPayload = useStep<[any], any>('processPayload');
+        const results: any[] = [];
+
+        for await (const payload of hook) {
+          const processed = await processPayload(payload);
+          results.push(processed);
+          if ((payload as any).done) break;
+        }
+
+        return results;
+      });
+
+      expect(error).toBeUndefined();
+      expect(result).toEqual([
+        { processed: true, type: 'subscribe', id: 1 },
+        { processed: true, type: 'done' },
+      ]);
+    });
+  });
+
   describe(`hook only (no concurrent pending entity) ${label}`, () => {
     it('should deliver all hook payloads and reach step when no sleep or incomplete step exists', async () => {
       await setupHydrateMock();