mongrel-intelligence · zbigniewsobiecki · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/src/agents/definitions/review.yaml b/src/agents/definitions/review.yaml
@@ -36,13 +36,13 @@ triggers:
         description: Filter PRs by author type
         options: [own, external, all]
         defaultValue: own
-    contextPipeline: [prContext, contextFiles]
+    contextPipeline: [prContext, contextFiles, workItem]
   - event: scm:review-requested
     label: On Review Requested
     description: Trigger review when a CASCADE persona is explicitly requested as reviewer
     defaultEnabled: false
     providers: [github]
-    contextPipeline: [prContext, contextFiles]
+    contextPipeline: [prContext, contextFiles, workItem]
   - event: scm:pr-opened
     label: PR Opened
     description: Trigger review when a new PR is opened (without waiting for CI)
@@ -55,7 +55,7 @@ triggers:
         description: Filter PRs by author type
         options: [own, external, all]
         defaultValue: own
-    contextPipeline: [prContext, contextFiles]
+    contextPipeline: [prContext, contextFiles, workItem]
 strategies: {}
 
 prompts:
@@ -81,6 +81,13 @@ prompts:
     You are not expected to review every skipped file — only fetch when the PR
     description or another file points you at it.
 
+    ## Work item context
+
+    If a **ReadWorkItem** pre-fetch is present, it contains the linked PM work item
+    (card/issue) with its description, acceptance criteria, and checklists. Use it
+    to verify that the PR fully addresses the work item's requirements — not just
+    what the PR description claims.
+
     ## Your task
 
     Examine the code changes carefully and submit your review using CreatePRReview.

diff --git a/src/agents/prompts/templates/review.eta b/src/agents/prompts/templates/review.eta
@@ -19,7 +19,7 @@ CRITICAL:
 
 ### Phase 1: Strategic Analysis
 
-1. **Understand the change**: Read the PR description and all modified files. Understand WHAT changed and WHY. An initial comment has already been posted on the PR acknowledging the review is in progress.
+1. **Understand the change**: Read the PR description and all modified files. Understand WHAT changed and WHY. An initial comment has already been posted on the PR acknowledging the review is in progress. If a **ReadWorkItem** injection is present in the pre-fetched context, read the work item's title, description, acceptance criteria, and checklists to understand the original requirements behind this PR.
 
 2. **Explore the codebase** using `ListDirectory`, `ReadFile`, `RipGrep`, and `Tmux` to understand the context of the changes. Read CLAUDE.md and README.md for project conventions.
 
@@ -47,6 +47,8 @@ CRITICAL:
 
 5. **Verify against PR description**: Compare what the PR description claims against what the code actually does. Flag any mismatches between described behavior and actual implementation (e.g., PR says "bottom right" but CSS centers the element).
 
+5a. **Verify against work item** (if ReadWorkItem context is present): Cross-reference the PR changes against the work item's acceptance criteria and checklists. Flag if the PR addresses only part of the work item requirements, or if the implementation deviates from the planned approach described in the work item. A PR description can claim anything — the work item is the ground truth for what was agreed upon.
+
 6. **Trace execution paths**: For each significant code change, mentally trace what happens at runtime. Don't assume bugs exist - verify them.
 
 7. **Validate your claims**: Before stating "X is a problem":

diff --git a/src/triggers/github/check-polling.ts b/src/triggers/github/check-polling.ts
diff --git a/src/triggers/github/check-suite-success.ts b/src/triggers/github/check-suite-success.ts
@@ -1,4 +1,4 @@
-import { type CheckSuiteStatus, githubClient } from '../../github/client.js';
+import { githubClient } from '../../github/client.js';
 import type { TriggerContext, TriggerHandler, TriggerResult } from '../../types/index.js';
 import { logger } from '../../utils/logging.js';
 import { parseRepoFullName } from '../../utils/repo.js';
@@ -19,61 +19,34 @@ import {
 	resolveWorkItemId,
 } from './utils.js';
 
-const MAX_RETRIES = 12;
-const RETRY_DELAY_MS = 10_000;
-
 export { recentlyDispatched } from './review-dispatch-dedup.js';
 
-/**
- * Wait for all check suites to complete, retrying when some are still in-progress.
- * Returns immediately if all checks have completed (whether passing or failing).
- *
- * Called by the worker before starting the review agent (not in the trigger handler).
- */
-export async function waitForChecks(
-	owner: string,
-	repo: string,
-	headSha: string,
-	prNumber: number,
-): Promise<CheckSuiteStatus> {
-	let checkStatus = await githubClient.getCheckSuiteStatus(owner, repo, headSha);
-	if (checkStatus.allPassing) return checkStatus;
-
-	const hasInProgress = checkStatus.checkRuns.some((c) => c.status !== 'completed');
-	if (!hasInProgress) return checkStatus;
-
-	for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
-		logger.info('Some checks still in progress, retrying', {
-			prNumber,
-			attempt,
-			maxRetries: MAX_RETRIES,
-			pending: checkStatus.checkRuns.filter((c) => c.status !== 'completed').map((c) => c.name),
-		});
-		await new Promise((resolve) => setTimeout(resolve, RETRY_DELAY_MS));
-		checkStatus = await githubClient.getCheckSuiteStatus(owner, repo, headSha);
-		if (checkStatus.allPassing) break;
-
-		// If all completed but some failed, no point retrying
-		const stillRunning = checkStatus.checkRuns.some((c) => c.status !== 'completed');
-		if (!stillRunning) break;
-	}
-
-	return checkStatus;
-}
-
 /**
  * Dispatches an outcome agent when a check_suite completes with success
  * conclusion on a PR authored by the implementer persona.
  *
- * Two outcomes — chosen from aggregate state across ALL check_runs on the
- * head SHA, not just this suite's:
- * - `review`              — every completed check passes, OR some are still
- *                           in-progress (defer to worker via `waitForChecks`).
- * - `respond-to-ci`       — every check is complete AND at least one failed.
- *                           Closes the gap where GitHub fires the success
- *                           event last after a fast-failing sibling suite,
- *                           and no later `conclusion=failure` event will fire
- *                           to wake `check-suite-failure`.
+ * Three outcomes, chosen from aggregate state across ALL check_runs on the
+ * head SHA — never just from this individual suite. The LAST check_suite
+ * event for the SHA (regardless of polarity) is the one that makes the
+ * dispatch decision. Mirrors `check-suite-failure.handle`'s defer-on-
+ * incomplete shape.
+ *
+ * - `respond-to-ci` — every check is complete AND at least one failed.
+ *                     Closes the gap where GitHub fires the success
+ *                     event last after a fast-failing sibling suite, and no
+ *                     later `conclusion=failure` event will fire to wake
+ *                     `check-suite-failure`.
+ * - `review`        — every completed check passes.
+ * - skip            — some checks still in-progress. The next check_suite
+ *                     event for the SHA will re-evaluate aggregate state.
+ *                     No worker spawned; no dedup claimed; no orphan ack
+ *                     comment posted.
+ *
+ * Why no worker-side polling: PR #1245 (2026-05-01) shipped a doomed worker
+ * because the success handler dispatched on the FIRST event (CodeQL completing)
+ * while CI was still running. The worker polled 12×10s and bailed; the dedup
+ * then blocked the legitimate later success event from re-dispatching.
+ * Deferring at the handler layer makes that whole class of bug impossible.
  *
  * The trigger fires when:
  * 1. A check_suite completes with success conclusion
@@ -175,14 +148,17 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler {
 		const workItemId = await resolveWorkItemId(ctx.project.id, prNumber);
 		const { workItemUrl, workItemTitle } = await resolveWorkItemDisplayData(workItemId);
 
-		// Mixed-state SHA fork: GitHub fires check_suite.completed once per
-		// workflow. When workflow A's suite succeeds but workflow B's suite on
-		// the same SHA failed earlier (and workflow B's failure handler skipped
-		// with "not all complete yet"), this success event is the one that
-		// closes the picture. If aggregate state has any failure, dispatch
-		// respond-to-ci instead of review — otherwise respond-to-ci is lost
-		// because no later check_suite event with conclusion=failure will fire.
-		// See PR #176 / 2026-04-30 for the live incident.
+		// Aggregate-state fork. GitHub fires check_suite.completed once per
+		// workflow; the LAST event to arrive (regardless of polarity) is the
+		// one that makes the dispatch decision. Three outcomes:
+		//   - !allComplete       → defer (skip). Next event re-evaluates.
+		//   - allComplete + any failed → respond-to-ci (closes the gap where
+		//                          a fast-failing sibling suite would lose
+		//                          dispatch — see PR #176 incident).
+		//   - allComplete + all passing → review.
+		// No `waitForChecks: true` worker-side polling — that path was deleted
+		// after PR #1245 (2026-05-01) where the worker polled 2 min, bailed,
+		// and the cross-process dedup blocked all retries.
 		const checkStatus = await githubClient.getCheckSuiteStatus(owner, repo, headSha);
 		const allComplete = checkStatus.checkRuns.every((cr) => cr.status === 'completed');
 		const anyFailed = checkStatus.checkRuns.some(
@@ -191,7 +167,24 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler {
 				cr.conclusion === 'timed_out' ||
 				cr.conclusion === 'action_required',
 		);
-		if (allComplete && anyFailed) {
+
+		if (!allComplete) {
+			const incomplete = checkStatus.checkRuns
+				.filter((cr) => cr.status !== 'completed')
+				.map((cr) => cr.name);
+			logger.info('Not all checks complete yet, waiting for next check_suite event', {
+				handler: this.name,
+				prNumber,
+				totalChecks: checkStatus.totalCount,
+				incompleteChecks: incomplete,
+			});
+			return skip(
+				this.name,
+				`Not all checks complete yet (${incomplete.length}/${checkStatus.totalCount} still running): ${incomplete.join(', ')}`,
+			);
+		}
+
+		if (anyFailed) {
 			return dispatchRespondToCi({
 				ctx,
 				prNumber,
@@ -205,7 +198,8 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler {
 			});
 		}
 
-		// Skip if the reviewer persona's latest review already covers the current HEAD SHA
+		// allComplete && !anyFailed → review path. Skip if the reviewer
+		// persona's latest review already covers the current HEAD SHA.
 		const reviews = await githubClient.getPRReviews(owner, repo, prNumber);
 
 		// Use persona identities to identify reviewer bot's reviews
@@ -250,12 +244,9 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler {
 			);
 		}
 
-		// The trigger decision is made — the review agent should run.
-		// Actual check polling (waitForChecks) is deferred to the worker via the flag.
-		// GitHub fires a check_suite webhook per individual suite completion.
-		// When multiple suites exist, the first webhook arrives before other suites finish.
-		// The worker will poll until all checks pass before starting the agent.
-		logger.info('Check-suite success trigger matched — deferring check polling to worker', {
+		// Aggregate state is already verified all-passing at this point — no
+		// worker-side polling needed. Dispatch immediately.
+		logger.info('Check-suite success trigger matched — dispatching review', {
 			prNumber,
 			workItemId,
 			headSha,
@@ -280,7 +271,6 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler {
 			workItemId,
 			workItemUrl,
 			workItemTitle,
-			waitForChecks: true,
 			onBlocked: () => releaseReviewDispatch(dedupKey),
 		};
 	}

diff --git a/src/triggers/github/review-dispatch-dedup.ts b/src/triggers/github/review-dispatch-dedup.ts
@@ -1,6 +1,11 @@
 import { logger } from '../../utils/logging.js';
 
-const DEDUP_TTL_MS = 30 * 60 * 1000; // 30 minutes
+// 5-minute TTL — short enough to never wedge the dispatch path for a clean
+// PR. Originally 30 min; shortened in PR #1245 follow-up after the
+// success-handler refactor eliminated the worker-bail-out path. With the new
+// defer-on-incomplete behavior, dispatches correlate with actually-running
+// workers, so the longer TTL had no defensive value and amplified incidents.
+const DEDUP_TTL_MS = 5 * 60 * 1000;
 
 export const recentlyDispatched = new Map<string, number>();
 

diff --git a/src/triggers/github/webhook-handler.ts b/src/triggers/github/webhook-handler.ts
@@ -12,21 +12,18 @@
  */
 
 import { isPMFocusedAgent } from '../../agents/definitions/loader.js';
-import { githubClient, withGitHubToken } from '../../github/client.js';
+import { withGitHubToken } from '../../github/client.js';
 import { getPersonaToken, resolvePersonaIdentities } from '../../github/personas.js';
 import { extractGitHubContext, generateAckMessage } from '../../router/ackMessageGenerator.js';
 import type { CascadeConfig, ProjectConfig, TriggerContext } from '../../types/index.js';
 import { logger, startWatchdog } from '../../utils/index.js';
-import { parseRepoFullName } from '../../utils/repo.js';
-import { safeOperation } from '../../utils/safeOperation.js';
 import type { TriggerRegistry } from '../registry.js';
 import { withAgentTypeConcurrency } from '../shared/concurrency.js';
 import { withPMScope } from '../shared/credential-scope.js';
 import { postPMAckComment } from '../shared/pm-ack.js';
 import { runAgentWithCredentials } from '../shared/webhook-execution.js';
 import type { TriggerResult } from '../types.js';
 import { postAcknowledgmentComment, updateInitialCommentWithError } from './ack-comments.js';
-import { pollWaitForChecks } from './check-polling.js';
 import { GitHubWebhookIntegration } from './integration.js';
 
 const integration = new GitHubWebhookIntegration();
@@ -195,7 +192,6 @@ async function runGitHubAgent(
 	}
 }
 
-// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: webhook orchestration with ack cleanup
 export async function processGitHubWebhook(
 	payload: unknown,
 	eventType: string,
@@ -244,35 +240,10 @@ export async function processGitHubWebhook(
 	if (ackCommentId) result.agentInput.ackCommentId = ackCommentId;
 	if (ackMessage) result.agentInput.ackMessage = ackMessage;
 
-	// Poll until all CI checks pass before starting agent (deferred from trigger)
-	if (result.waitForChecks) {
-		const githubToken = await getPersonaToken(project.id, 'implementation');
-		try {
-			const checksOk = await pollWaitForChecks(result, event.projectIdentifier, githubToken);
-			if (!checksOk) {
-				result.onBlocked?.();
-				// Clean up orphaned ack comment so the PR doesn't show a misleading "Reviewing" message
-				const ackId = result.agentInput.ackCommentId as number | undefined;
-				if (ackId && event.projectIdentifier) {
-					const { owner, repo } = parseRepoFullName(event.projectIdentifier);
-					const deleteToken = await getPersonaToken(
-						project.id,
-						result.agentType ?? 'implementation',
-					);
-					await withGitHubToken(deleteToken, () =>
-						safeOperation(() => githubClient.deletePRComment(owner, repo, ackId), {
-							action: 'delete ack comment after check polling timeout',
-							prNumber: result.prNumber,
-						}),
-					);
-				}
-				return;
-			}
-		} catch (err) {
-			result.onBlocked?.();
-			throw err;
-		}
-	}
+	// Worker-side `waitForChecks` polling was removed in PR #1245 follow-up:
+	// the success handler now defers (skips) on incomplete aggregate state, so
+	// no TriggerResult ever reaches this point with the flag set, and no
+	// worker bail-out can leave the dedup wedged.
 
 	logger.info('GitHub trigger matched', {
 		agentType: result.agentType || '(no agent)',

diff --git a/src/types/index.ts b/src/types/index.ts
@@ -107,8 +107,6 @@ export interface TriggerResult {
 	prUrl?: string;
 	/** Display title of the pull request. */
 	prTitle?: string;
-	/** When true, the worker must poll for all CI checks to pass before starting the agent. */
-	waitForChecks?: boolean;
 	/** Called when the router cannot enqueue the job (work-item lock, concurrency limit).
 	 *  Allows the trigger handler to undo side-effects like dedup marking. */
 	onBlocked?: () => void;

diff --git a/tests/unit/agents/definitions/loader.test.ts b/tests/unit/agents/definitions/loader.test.ts
@@ -162,10 +162,10 @@ describe('YAML agent definitions loader', () => {
 			]);
 		});
 
-		it('review agent triggers use PR context pipeline', () => {
+		it('review agent triggers use PR context pipeline with work item', () => {
 			const def = loadBuiltinDefinition('review');
 			const ciPassedTrigger = def.triggers.find((t) => t.event === 'scm:check-suite-success');
-			expect(ciPassedTrigger?.contextPipeline).toEqual(['prContext', 'contextFiles']);
+			expect(ciPassedTrigger?.contextPipeline).toEqual(['prContext', 'contextFiles', 'workItem']);
 		});
 
 		it('planning agent does not have pm:comment-mention trigger (routed to respond-to-planning-comment)', () => {