From 259fd08e9201afa6004b7d65313274970c09a827 Mon Sep 17 00:00:00 2001 From: aaight Date: Fri, 1 May 2026 14:11:00 +0200 Subject: [PATCH 1/2] feat(review): add PM work item context to review agent context pipeline (#1245) Co-authored-by: Cascade Bot --- src/agents/definitions/review.yaml | 13 +++- src/agents/prompts/templates/review.eta | 4 +- tests/unit/agents/definitions/loader.test.ts | 4 +- .../agents/definitions/review.yaml.test.ts | 75 ++++++++++++++++++- 4 files changed, 89 insertions(+), 7 deletions(-) diff --git a/src/agents/definitions/review.yaml b/src/agents/definitions/review.yaml index 9839ebdb..4cab4220 100644 --- a/src/agents/definitions/review.yaml +++ b/src/agents/definitions/review.yaml @@ -36,13 +36,13 @@ triggers: description: Filter PRs by author type options: [own, external, all] defaultValue: own - contextPipeline: [prContext, contextFiles] + contextPipeline: [prContext, contextFiles, workItem] - event: scm:review-requested label: On Review Requested description: Trigger review when a CASCADE persona is explicitly requested as reviewer defaultEnabled: false providers: [github] - contextPipeline: [prContext, contextFiles] + contextPipeline: [prContext, contextFiles, workItem] - event: scm:pr-opened label: PR Opened description: Trigger review when a new PR is opened (without waiting for CI) @@ -55,7 +55,7 @@ triggers: description: Filter PRs by author type options: [own, external, all] defaultValue: own - contextPipeline: [prContext, contextFiles] + contextPipeline: [prContext, contextFiles, workItem] strategies: {} prompts: @@ -81,6 +81,13 @@ prompts: You are not expected to review every skipped file — only fetch when the PR description or another file points you at it. + ## Work item context + + If a **ReadWorkItem** pre-fetch is present, it contains the linked PM work item + (card/issue) with its description, acceptance criteria, and checklists. Use it + to verify that the PR fully addresses the work item's requirements — not just + what the PR description claims. + ## Your task Examine the code changes carefully and submit your review using CreatePRReview. diff --git a/src/agents/prompts/templates/review.eta b/src/agents/prompts/templates/review.eta index aca1a65e..06325c7a 100644 --- a/src/agents/prompts/templates/review.eta +++ b/src/agents/prompts/templates/review.eta @@ -19,7 +19,7 @@ CRITICAL: ### Phase 1: Strategic Analysis -1. **Understand the change**: Read the PR description and all modified files. Understand WHAT changed and WHY. An initial comment has already been posted on the PR acknowledging the review is in progress. +1. **Understand the change**: Read the PR description and all modified files. Understand WHAT changed and WHY. An initial comment has already been posted on the PR acknowledging the review is in progress. If a **ReadWorkItem** injection is present in the pre-fetched context, read the work item's title, description, acceptance criteria, and checklists to understand the original requirements behind this PR. 2. **Explore the codebase** using `ListDirectory`, `ReadFile`, `RipGrep`, and `Tmux` to understand the context of the changes. Read CLAUDE.md and README.md for project conventions. @@ -47,6 +47,8 @@ CRITICAL: 5. **Verify against PR description**: Compare what the PR description claims against what the code actually does. Flag any mismatches between described behavior and actual implementation (e.g., PR says "bottom right" but CSS centers the element). +5a. **Verify against work item** (if ReadWorkItem context is present): Cross-reference the PR changes against the work item's acceptance criteria and checklists. Flag if the PR addresses only part of the work item requirements, or if the implementation deviates from the planned approach described in the work item. A PR description can claim anything — the work item is the ground truth for what was agreed upon. + 6. **Trace execution paths**: For each significant code change, mentally trace what happens at runtime. Don't assume bugs exist - verify them. 7. **Validate your claims**: Before stating "X is a problem": diff --git a/tests/unit/agents/definitions/loader.test.ts b/tests/unit/agents/definitions/loader.test.ts index 83b739f5..3fc3622a 100644 --- a/tests/unit/agents/definitions/loader.test.ts +++ b/tests/unit/agents/definitions/loader.test.ts @@ -162,10 +162,10 @@ describe('YAML agent definitions loader', () => { ]); }); - it('review agent triggers use PR context pipeline', () => { + it('review agent triggers use PR context pipeline with work item', () => { const def = loadBuiltinDefinition('review'); const ciPassedTrigger = def.triggers.find((t) => t.event === 'scm:check-suite-success'); - expect(ciPassedTrigger?.contextPipeline).toEqual(['prContext', 'contextFiles']); + expect(ciPassedTrigger?.contextPipeline).toEqual(['prContext', 'contextFiles', 'workItem']); }); it('planning agent does not have pm:comment-mention trigger (routed to respond-to-planning-comment)', () => { diff --git a/tests/unit/agents/definitions/review.yaml.test.ts b/tests/unit/agents/definitions/review.yaml.test.ts index 2acaea56..345654f1 100644 --- a/tests/unit/agents/definitions/review.yaml.test.ts +++ b/tests/unit/agents/definitions/review.yaml.test.ts @@ -1,6 +1,10 @@ import { readFileSync } from 'node:fs'; import { join } from 'node:path'; -import { describe, expect, it } from 'vitest'; +import { afterEach, describe, expect, it } from 'vitest'; +import { + clearDefinitionCache, + loadBuiltinDefinition, +} from '../../../../src/agents/definitions/loader.js'; /** * The review agent's task prompt lives in `src/agents/definitions/review.yaml`. @@ -29,3 +33,72 @@ describe('review.yaml prompt contract', () => { expect(yamlText.toLowerCase()).not.toContain('full file contents'); }); }); + +/** + * Work item context pipeline contract — guards that the review agent's three + * triggers all include `workItem` in their contextPipeline so that the PM work + * item is always pre-fetched and available to the agent. + */ +describe('review.yaml work item context pipeline contract', () => { + afterEach(() => { + clearDefinitionCache(); + }); + + it('includes workItem in the scm:check-suite-success context pipeline', () => { + const def = loadBuiltinDefinition('review'); + const trigger = def.triggers?.find((t) => t.event === 'scm:check-suite-success'); + expect(trigger).toBeDefined(); + expect(trigger?.contextPipeline).toContain('workItem'); + }); + + it('includes workItem in the scm:review-requested context pipeline', () => { + const def = loadBuiltinDefinition('review'); + const trigger = def.triggers?.find((t) => t.event === 'scm:review-requested'); + expect(trigger).toBeDefined(); + expect(trigger?.contextPipeline).toContain('workItem'); + }); + + it('includes workItem in the scm:pr-opened context pipeline', () => { + const def = loadBuiltinDefinition('review'); + const trigger = def.triggers?.find((t) => t.event === 'scm:pr-opened'); + expect(trigger).toBeDefined(); + expect(trigger?.contextPipeline).toContain('workItem'); + }); + + it('task prompt mentions ReadWorkItem for work item context', () => { + const def = loadBuiltinDefinition('review'); + const taskPrompt = def.prompts?.taskPrompt ?? ''; + expect(taskPrompt).toMatch(/ReadWorkItem/); + }); + + it('task prompt instructs agent to use work item to verify requirements', () => { + const def = loadBuiltinDefinition('review'); + const taskPrompt = def.prompts?.taskPrompt ?? ''; + // Should mention acceptance criteria or requirements + expect(taskPrompt.toLowerCase()).toMatch(/acceptance criteria|requirements/); + }); +}); + +/** + * Work item context in the review.eta system prompt — guards that the prompt + * template instructs the agent to verify code changes against PM work item + * acceptance criteria and requirements. + */ +describe('review.eta work item context contract', () => { + const etaPath = join(__dirname, '../../../../src/agents/prompts/templates/review.eta'); + const etaText = readFileSync(etaPath, 'utf-8'); + + it('mentions ReadWorkItem injection in the review prompt template', () => { + expect(etaText).toMatch(/ReadWorkItem/); + }); + + it('instructs agent to verify code against work item acceptance criteria', () => { + expect(etaText.toLowerCase()).toMatch(/acceptance criteria/); + }); + + it('instructs agent to cross-reference work item requirements', () => { + // Should mention cross-referencing or verifying against work item requirements + expect(etaText.toLowerCase()).toMatch(/work item/); + expect(etaText.toLowerCase()).toMatch(/requirements|acceptance criteria|checklists/); + }); +}); From c75877e95b175dc45d6ab72de440b7d3a8378a01 Mon Sep 17 00:00:00 2001 From: Zbigniew Sobiecki Date: Fri, 1 May 2026 14:20:09 +0200 Subject: [PATCH 2/2] fix(triggers): defer check-suite-success until aggregate state is complete (#1246) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #1245 (CI fully green) silently never had a review run. Root cause: the success handler dispatched on the FIRST check_suite event (CodeQL completing at 11:37:43) while CI's slower lint-and-test was still in-progress. The worker spawned, polled 12×10s via `pollWaitForChecks` (`MAX_RETRIES=12 × RETRY_DELAY_MS=10s` = 2 min), bailed at 11:39:17 with `statusCode=0` and `Not all checks passing after polling, skipping agent`. lint-and-test completed cleanly 41 seconds later. Subsequent legitimate `check_suite- completed` events for the SHA all hit the dedup at `check-suite-success.ts` and were silently skipped — the worker's `onBlocked` cleanup ran in the worker's own (empty) in-memory dedup map, never reaching the router process that holds the actual claim. Net: zero review for a clean, mergeable PR; 30-minute window of stuck dedup before the entry aged out. This is not a one-off — it bites every PR where multiple workflows emit separate check_suites and the slower one takes >2 min after the faster one completes. CodeQL completing first is the canonical case. Architectural fix — eliminate worker-side polling. Move the dispatch decision entirely to the router, gated on aggregate `allComplete`. The LAST `check_suite.completed` event for a SHA — whichever polarity — makes the dispatch decision based on full aggregate state. Mirrors the existing `check-suite-failure.handle` defer-on-incomplete shape. Three outcomes now in `check-suite-success.handle`: - !allComplete → skip with "Not all checks complete yet (X/Y still running): ". The next check_suite event re-evaluates. No worker spawned; no dedup claimed. - allComplete + anyFailed → dispatch respond-to-ci (existing #1243 fork). - allComplete + !anyFailed → dispatch review (no waitForChecks flag). Eliminates both root causes simultaneously: nothing dispatches prematurely, nothing has to "release on bail" because nothing bails. The asymmetry between the success handler (polled) and failure handler (deferred) is gone. Deleted: - `src/triggers/github/check-polling.ts` and its test (no caller). - `waitForChecks` exported function from check-suite-success (worker-side polling layer no longer needed; `MAX_RETRIES`/`RETRY_DELAY_MS` constants gone with it). - `waitForChecks?: boolean` field from `TriggerResult` type — every caller is updated atomically by the type deletion. - `if (result.waitForChecks)` block in webhook-handler.ts (the worker-side polling call site + orphan-ack-comment cleanup). - 3 tests in `github-webhook-handler.test.ts` covering the deleted polling code path. Tightened: review-dispatch-dedup TTL 30 min → 5 min. With the new defer behavior, dispatches correlate with actually-running workers, so the longer TTL had no defensive value and amplified the wedged-state incident. Tests: existing `check-suite-success.test.ts` updated — 5+ assertions of `waitForChecks: true` flipped to `.toBeUndefined()`; obsolete `describe('waitForChecks')` block (4 tests) deleted; 2 NEW defer-on- incomplete cases added (the regression pin for the PR #1245 incident + a release-no-dedup-claim guard). Verification: - npx vitest run --project unit-triggers → 955/955 - npx vitest run --project unit-api → 1540/1540 - npx vitest run --project unit-core → 5174/5174 - npm run typecheck + npm run lint clean Co-authored-by: Claude Opus 4.7 (1M context) --- src/triggers/github/check-polling.ts | 46 ----- src/triggers/github/check-suite-success.ts | 124 ++++++----- src/triggers/github/review-dispatch-dedup.ts | 7 +- src/triggers/github/webhook-handler.ts | 39 +--- src/types/index.ts | 2 - .../unit/triggers/check-suite-success.test.ts | 161 ++++++--------- .../triggers/github-webhook-handler.test.ts | 75 +------ .../triggers/github/check-polling.test.ts | 192 ------------------ .../github/review-dispatch-dedup.test.ts | 4 +- 9 files changed, 141 insertions(+), 509 deletions(-) delete mode 100644 src/triggers/github/check-polling.ts delete mode 100644 tests/unit/triggers/github/check-polling.test.ts diff --git a/src/triggers/github/check-polling.ts b/src/triggers/github/check-polling.ts deleted file mode 100644 index f2395ce0..00000000 --- a/src/triggers/github/check-polling.ts +++ /dev/null @@ -1,46 +0,0 @@ -/** - * GitHub CI check polling. - * - * Polls until all CI checks pass before allowing an agent to start. - * Used when a trigger sets `waitForChecks: true`. - */ - -import { withGitHubToken } from '../../github/client.js'; -import { logger } from '../../utils/index.js'; -import { parseRepoFullName } from '../../utils/repo.js'; -import type { TriggerResult } from '../types.js'; - -/** - * Poll until all CI checks pass before starting the agent. - * Returns false if checks don't pass after polling (agent should be skipped). - */ -export async function pollWaitForChecks( - result: TriggerResult, - repoFullName: string, - githubToken: string, -): Promise { - const { waitForChecks } = await import('./check-suite-success.js'); - const { owner, repo } = parseRepoFullName(repoFullName); - const headSha = result.agentInput.headSha as string; - const prNumber = result.prNumber ?? 0; - - logger.info('Waiting for all checks to pass before starting agent', { prNumber, headSha }); - - const checkStatus = await withGitHubToken(githubToken, () => - waitForChecks(owner, repo, headSha, prNumber), - ); - - if (!checkStatus.allPassing) { - logger.info('Not all checks passing after polling, skipping agent', { - prNumber, - headSha, - failedChecks: checkStatus.checkRuns - .filter((c) => c.conclusion !== 'success') - .map((c) => c.name), - }); - return false; - } - - logger.info('All checks passing, proceeding with agent', { prNumber }); - return true; -} diff --git a/src/triggers/github/check-suite-success.ts b/src/triggers/github/check-suite-success.ts index cef66fc1..91fad8b0 100644 --- a/src/triggers/github/check-suite-success.ts +++ b/src/triggers/github/check-suite-success.ts @@ -1,4 +1,4 @@ -import { type CheckSuiteStatus, githubClient } from '../../github/client.js'; +import { githubClient } from '../../github/client.js'; import type { TriggerContext, TriggerHandler, TriggerResult } from '../../types/index.js'; import { logger } from '../../utils/logging.js'; import { parseRepoFullName } from '../../utils/repo.js'; @@ -19,61 +19,34 @@ import { resolveWorkItemId, } from './utils.js'; -const MAX_RETRIES = 12; -const RETRY_DELAY_MS = 10_000; - export { recentlyDispatched } from './review-dispatch-dedup.js'; -/** - * Wait for all check suites to complete, retrying when some are still in-progress. - * Returns immediately if all checks have completed (whether passing or failing). - * - * Called by the worker before starting the review agent (not in the trigger handler). - */ -export async function waitForChecks( - owner: string, - repo: string, - headSha: string, - prNumber: number, -): Promise { - let checkStatus = await githubClient.getCheckSuiteStatus(owner, repo, headSha); - if (checkStatus.allPassing) return checkStatus; - - const hasInProgress = checkStatus.checkRuns.some((c) => c.status !== 'completed'); - if (!hasInProgress) return checkStatus; - - for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) { - logger.info('Some checks still in progress, retrying', { - prNumber, - attempt, - maxRetries: MAX_RETRIES, - pending: checkStatus.checkRuns.filter((c) => c.status !== 'completed').map((c) => c.name), - }); - await new Promise((resolve) => setTimeout(resolve, RETRY_DELAY_MS)); - checkStatus = await githubClient.getCheckSuiteStatus(owner, repo, headSha); - if (checkStatus.allPassing) break; - - // If all completed but some failed, no point retrying - const stillRunning = checkStatus.checkRuns.some((c) => c.status !== 'completed'); - if (!stillRunning) break; - } - - return checkStatus; -} - /** * Dispatches an outcome agent when a check_suite completes with success * conclusion on a PR authored by the implementer persona. * - * Two outcomes — chosen from aggregate state across ALL check_runs on the - * head SHA, not just this suite's: - * - `review` — every completed check passes, OR some are still - * in-progress (defer to worker via `waitForChecks`). - * - `respond-to-ci` — every check is complete AND at least one failed. - * Closes the gap where GitHub fires the success - * event last after a fast-failing sibling suite, - * and no later `conclusion=failure` event will fire - * to wake `check-suite-failure`. + * Three outcomes, chosen from aggregate state across ALL check_runs on the + * head SHA — never just from this individual suite. The LAST check_suite + * event for the SHA (regardless of polarity) is the one that makes the + * dispatch decision. Mirrors `check-suite-failure.handle`'s defer-on- + * incomplete shape. + * + * - `respond-to-ci` — every check is complete AND at least one failed. + * Closes the gap where GitHub fires the success + * event last after a fast-failing sibling suite, and no + * later `conclusion=failure` event will fire to wake + * `check-suite-failure`. + * - `review` — every completed check passes. + * - skip — some checks still in-progress. The next check_suite + * event for the SHA will re-evaluate aggregate state. + * No worker spawned; no dedup claimed; no orphan ack + * comment posted. + * + * Why no worker-side polling: PR #1245 (2026-05-01) shipped a doomed worker + * because the success handler dispatched on the FIRST event (CodeQL completing) + * while CI was still running. The worker polled 12×10s and bailed; the dedup + * then blocked the legitimate later success event from re-dispatching. + * Deferring at the handler layer makes that whole class of bug impossible. * * The trigger fires when: * 1. A check_suite completes with success conclusion @@ -175,14 +148,17 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler { const workItemId = await resolveWorkItemId(ctx.project.id, prNumber); const { workItemUrl, workItemTitle } = await resolveWorkItemDisplayData(workItemId); - // Mixed-state SHA fork: GitHub fires check_suite.completed once per - // workflow. When workflow A's suite succeeds but workflow B's suite on - // the same SHA failed earlier (and workflow B's failure handler skipped - // with "not all complete yet"), this success event is the one that - // closes the picture. If aggregate state has any failure, dispatch - // respond-to-ci instead of review — otherwise respond-to-ci is lost - // because no later check_suite event with conclusion=failure will fire. - // See PR #176 / 2026-04-30 for the live incident. + // Aggregate-state fork. GitHub fires check_suite.completed once per + // workflow; the LAST event to arrive (regardless of polarity) is the + // one that makes the dispatch decision. Three outcomes: + // - !allComplete → defer (skip). Next event re-evaluates. + // - allComplete + any failed → respond-to-ci (closes the gap where + // a fast-failing sibling suite would lose + // dispatch — see PR #176 incident). + // - allComplete + all passing → review. + // No `waitForChecks: true` worker-side polling — that path was deleted + // after PR #1245 (2026-05-01) where the worker polled 2 min, bailed, + // and the cross-process dedup blocked all retries. const checkStatus = await githubClient.getCheckSuiteStatus(owner, repo, headSha); const allComplete = checkStatus.checkRuns.every((cr) => cr.status === 'completed'); const anyFailed = checkStatus.checkRuns.some( @@ -191,7 +167,24 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler { cr.conclusion === 'timed_out' || cr.conclusion === 'action_required', ); - if (allComplete && anyFailed) { + + if (!allComplete) { + const incomplete = checkStatus.checkRuns + .filter((cr) => cr.status !== 'completed') + .map((cr) => cr.name); + logger.info('Not all checks complete yet, waiting for next check_suite event', { + handler: this.name, + prNumber, + totalChecks: checkStatus.totalCount, + incompleteChecks: incomplete, + }); + return skip( + this.name, + `Not all checks complete yet (${incomplete.length}/${checkStatus.totalCount} still running): ${incomplete.join(', ')}`, + ); + } + + if (anyFailed) { return dispatchRespondToCi({ ctx, prNumber, @@ -205,7 +198,8 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler { }); } - // Skip if the reviewer persona's latest review already covers the current HEAD SHA + // allComplete && !anyFailed → review path. Skip if the reviewer + // persona's latest review already covers the current HEAD SHA. const reviews = await githubClient.getPRReviews(owner, repo, prNumber); // Use persona identities to identify reviewer bot's reviews @@ -250,12 +244,9 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler { ); } - // The trigger decision is made — the review agent should run. - // Actual check polling (waitForChecks) is deferred to the worker via the flag. - // GitHub fires a check_suite webhook per individual suite completion. - // When multiple suites exist, the first webhook arrives before other suites finish. - // The worker will poll until all checks pass before starting the agent. - logger.info('Check-suite success trigger matched — deferring check polling to worker', { + // Aggregate state is already verified all-passing at this point — no + // worker-side polling needed. Dispatch immediately. + logger.info('Check-suite success trigger matched — dispatching review', { prNumber, workItemId, headSha, @@ -280,7 +271,6 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler { workItemId, workItemUrl, workItemTitle, - waitForChecks: true, onBlocked: () => releaseReviewDispatch(dedupKey), }; } diff --git a/src/triggers/github/review-dispatch-dedup.ts b/src/triggers/github/review-dispatch-dedup.ts index d33dbe59..9abee506 100644 --- a/src/triggers/github/review-dispatch-dedup.ts +++ b/src/triggers/github/review-dispatch-dedup.ts @@ -1,6 +1,11 @@ import { logger } from '../../utils/logging.js'; -const DEDUP_TTL_MS = 30 * 60 * 1000; // 30 minutes +// 5-minute TTL — short enough to never wedge the dispatch path for a clean +// PR. Originally 30 min; shortened in PR #1245 follow-up after the +// success-handler refactor eliminated the worker-bail-out path. With the new +// defer-on-incomplete behavior, dispatches correlate with actually-running +// workers, so the longer TTL had no defensive value and amplified incidents. +const DEDUP_TTL_MS = 5 * 60 * 1000; export const recentlyDispatched = new Map(); diff --git a/src/triggers/github/webhook-handler.ts b/src/triggers/github/webhook-handler.ts index 762ab7db..2492ce5d 100644 --- a/src/triggers/github/webhook-handler.ts +++ b/src/triggers/github/webhook-handler.ts @@ -12,13 +12,11 @@ */ import { isPMFocusedAgent } from '../../agents/definitions/loader.js'; -import { githubClient, withGitHubToken } from '../../github/client.js'; +import { withGitHubToken } from '../../github/client.js'; import { getPersonaToken, resolvePersonaIdentities } from '../../github/personas.js'; import { extractGitHubContext, generateAckMessage } from '../../router/ackMessageGenerator.js'; import type { CascadeConfig, ProjectConfig, TriggerContext } from '../../types/index.js'; import { logger, startWatchdog } from '../../utils/index.js'; -import { parseRepoFullName } from '../../utils/repo.js'; -import { safeOperation } from '../../utils/safeOperation.js'; import type { TriggerRegistry } from '../registry.js'; import { withAgentTypeConcurrency } from '../shared/concurrency.js'; import { withPMScope } from '../shared/credential-scope.js'; @@ -26,7 +24,6 @@ import { postPMAckComment } from '../shared/pm-ack.js'; import { runAgentWithCredentials } from '../shared/webhook-execution.js'; import type { TriggerResult } from '../types.js'; import { postAcknowledgmentComment, updateInitialCommentWithError } from './ack-comments.js'; -import { pollWaitForChecks } from './check-polling.js'; import { GitHubWebhookIntegration } from './integration.js'; const integration = new GitHubWebhookIntegration(); @@ -195,7 +192,6 @@ async function runGitHubAgent( } } -// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: webhook orchestration with ack cleanup export async function processGitHubWebhook( payload: unknown, eventType: string, @@ -244,35 +240,10 @@ export async function processGitHubWebhook( if (ackCommentId) result.agentInput.ackCommentId = ackCommentId; if (ackMessage) result.agentInput.ackMessage = ackMessage; - // Poll until all CI checks pass before starting agent (deferred from trigger) - if (result.waitForChecks) { - const githubToken = await getPersonaToken(project.id, 'implementation'); - try { - const checksOk = await pollWaitForChecks(result, event.projectIdentifier, githubToken); - if (!checksOk) { - result.onBlocked?.(); - // Clean up orphaned ack comment so the PR doesn't show a misleading "Reviewing" message - const ackId = result.agentInput.ackCommentId as number | undefined; - if (ackId && event.projectIdentifier) { - const { owner, repo } = parseRepoFullName(event.projectIdentifier); - const deleteToken = await getPersonaToken( - project.id, - result.agentType ?? 'implementation', - ); - await withGitHubToken(deleteToken, () => - safeOperation(() => githubClient.deletePRComment(owner, repo, ackId), { - action: 'delete ack comment after check polling timeout', - prNumber: result.prNumber, - }), - ); - } - return; - } - } catch (err) { - result.onBlocked?.(); - throw err; - } - } + // Worker-side `waitForChecks` polling was removed in PR #1245 follow-up: + // the success handler now defers (skips) on incomplete aggregate state, so + // no TriggerResult ever reaches this point with the flag set, and no + // worker bail-out can leave the dedup wedged. logger.info('GitHub trigger matched', { agentType: result.agentType || '(no agent)', diff --git a/src/types/index.ts b/src/types/index.ts index bdc05af1..f5e76d86 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -107,8 +107,6 @@ export interface TriggerResult { prUrl?: string; /** Display title of the pull request. */ prTitle?: string; - /** When true, the worker must poll for all CI checks to pass before starting the agent. */ - waitForChecks?: boolean; /** Called when the router cannot enqueue the job (work-item lock, concurrency limit). * Allows the trigger handler to undo side-effects like dedup marking. */ onBlocked?: () => void; diff --git a/tests/unit/triggers/check-suite-success.test.ts b/tests/unit/triggers/check-suite-success.test.ts index cd2e9cae..e0109a09 100644 --- a/tests/unit/triggers/check-suite-success.test.ts +++ b/tests/unit/triggers/check-suite-success.test.ts @@ -1,4 +1,4 @@ -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; import { mockConfigResolverModule, mockGitHubClientModule, @@ -19,7 +19,6 @@ import { resetFixAttempts } from '../../../src/triggers/github/check-suite-failu import { CheckSuiteSuccessTrigger, recentlyDispatched, - waitForChecks, } from '../../../src/triggers/github/check-suite-success.js'; import { ReviewRequestedTrigger } from '../../../src/triggers/github/review-requested.js'; import type { TriggerContext } from '../../../src/triggers/types.js'; @@ -176,7 +175,7 @@ describe('CheckSuiteSuccessTrigger', () => { }); describe('handle', () => { - it('returns review result with waitForChecks flag when PR matches', async () => { + it('returns review result without waitForChecks flag when PR matches and aggregate is all-passing', async () => { vi.mocked(githubClient.getPR).mockResolvedValue({ number: 42, title: 'Test PR', @@ -218,9 +217,11 @@ describe('CheckSuiteSuccessTrigger', () => { }, prNumber: 42, workItemId: 'abc123', - waitForChecks: true, }), ); + // waitForChecks is gone — handler defers on incomplete state instead + // of dispatching with a worker-side polling flag (PR #1245 incident). + expect(result?.waitForChecks).toBeUndefined(); expect(result?.onBlocked).toBeTypeOf('function'); }); @@ -432,7 +433,7 @@ describe('CheckSuiteSuccessTrigger', () => { expect(result).not.toBeNull(); expect(result?.agentType).toBe('review'); - expect(result?.waitForChecks).toBe(true); + expect(result?.waitForChecks).toBeUndefined(); }); it('skips when latest of multiple reviews covers current HEAD', async () => { @@ -530,7 +531,7 @@ describe('CheckSuiteSuccessTrigger', () => { expect(result).not.toBeNull(); expect(result?.agentType).toBe('review'); - expect(result?.waitForChecks).toBe(true); + expect(result?.waitForChecks).toBeUndefined(); }); it('proceeds when PR has reviews from other users only', async () => { @@ -568,7 +569,7 @@ describe('CheckSuiteSuccessTrigger', () => { expect(result).not.toBeNull(); expect(result?.agentType).toBe('review'); - expect(result?.waitForChecks).toBe(true); + expect(result?.waitForChecks).toBeUndefined(); }); it('fires without work item when DB has no link', async () => { @@ -599,7 +600,7 @@ describe('CheckSuiteSuccessTrigger', () => { expect(result).not.toBeNull(); expect(result?.workItemId).toBeUndefined(); expect(result?.agentInput.workItemId).toBeUndefined(); - expect(result?.waitForChecks).toBe(true); + expect(result?.waitForChecks).toBeUndefined(); }); it('skips duplicate check_suite events for the same PR+SHA', async () => { @@ -757,7 +758,7 @@ describe('CheckSuiteSuccessTrigger', () => { expect(result).not.toBeNull(); expect(result?.workItemId).toBe('db-work-item'); - expect(result?.waitForChecks).toBe(true); + expect(result?.waitForChecks).toBeUndefined(); }); it('fires correctly when pull_requests is empty but head_branch has PR ref', async () => { @@ -803,7 +804,7 @@ describe('CheckSuiteSuccessTrigger', () => { prBranch: 'feature/test', headSha: 'sha123', }); - expect(result?.waitForChecks).toBe(true); + expect(result?.waitForChecks).toBeUndefined(); }); }); @@ -883,7 +884,14 @@ describe('CheckSuiteSuccessTrigger', () => { expect(result?.agentType).toBe('review'); }); - it('dispatches review with waitForChecks when not all checks complete yet', async () => { + // Defer-on-incomplete: PR #1245 (2026-05-01) shipped a doomed worker + // because the success handler dispatched on the FIRST check_suite event + // (CodeQL completing) while CI's slower lint-and-test was still running. + // Worker polled 12×10s and bailed; the dedup then blocked the legitimate + // later success event. Mirrors the existing check-suite-failure deferral + // shape — the LAST check_suite event makes the dispatch decision based + // on full aggregate state. + it('skips with "Not all checks complete yet" when an in-progress check exists, naming the pending check', async () => { vi.mocked(githubClient.getPR).mockResolvedValue(baseImplementerPR); vi.mocked(githubClient.getPRReviews).mockResolvedValue([]); vi.mocked(githubClient.getCheckSuiteStatus).mockResolvedValue({ @@ -891,7 +899,7 @@ describe('CheckSuiteSuccessTrigger', () => { totalCount: 2, checkRuns: [ { name: 'CI', status: 'completed', conclusion: 'success' }, - { name: 'E2B Template Rebuild', status: 'in_progress', conclusion: null }, + { name: 'lint-and-test', status: 'in_progress', conclusion: null }, ], }); @@ -904,8 +912,49 @@ describe('CheckSuiteSuccessTrigger', () => { const result = await trigger.handle(ctx); - expect(result?.agentType).toBe('review'); - expect(result?.waitForChecks).toBe(true); + expectSkip(result, /Not all checks complete yet.*lint-and-test/); + // Crucially, no agent dispatched — the worker bail-out path is gone. + expect(result?.agentType).toBeNull(); + // Dedup must NOT be claimed for a deferred event; the next + // check_suite event for this SHA must be free to make the call. + expect(result?.onBlocked).toBeUndefined(); + }); + + it('does NOT claim the review-dispatch dedup when deferring', async () => { + vi.mocked(githubClient.getPR).mockResolvedValue(baseImplementerPR); + vi.mocked(githubClient.getPRReviews).mockResolvedValue([]); + vi.mocked(githubClient.getCheckSuiteStatus) + .mockResolvedValueOnce({ + // First event: CI still in progress → defer. + allPassing: false, + totalCount: 2, + checkRuns: [ + { name: 'CodeQL', status: 'completed', conclusion: 'success' }, + { name: 'lint-and-test', status: 'in_progress', conclusion: null }, + ], + }) + .mockResolvedValueOnce({ + // Later event: everything complete → must dispatch. + allPassing: true, + totalCount: 2, + checkRuns: [ + { name: 'CodeQL', status: 'completed', conclusion: 'success' }, + { name: 'lint-and-test', status: 'completed', conclusion: 'success' }, + ], + }); + + const ctx: TriggerContext = { + project: mockProject, + source: 'github', + payload: makeCheckSuitePayload(), + personaIdentities: mockPersonaIdentities, + }; + + const firstResult = await trigger.handle(ctx); + expectSkip(firstResult, /Not all checks complete yet/); + + const secondResult = await trigger.handle(ctx); + expect(secondResult?.agentType).toBe('review'); }); it('dispatches respond-to-ci on timed_out conclusion', async () => { @@ -1194,83 +1243,7 @@ describe('CheckSuiteSuccessTrigger', () => { }); }); -// ========================================================================== -// waitForChecks() — exported standalone function -// ========================================================================== - -describe('waitForChecks', () => { - beforeEach(() => { - vi.useFakeTimers(); - }); - - afterEach(() => { - vi.useRealTimers(); - }); - - it('returns immediately when all checks are passing', async () => { - vi.mocked(githubClient.getCheckSuiteStatus).mockResolvedValue({ - allPassing: true, - checkRuns: [], - }); - - const result = await waitForChecks('owner', 'repo', 'sha123', 42); - - expect(result.allPassing).toBe(true); - expect(githubClient.getCheckSuiteStatus).toHaveBeenCalledTimes(1); - }); - - it('returns immediately when all checks completed (some failed) — no point retrying', async () => { - vi.mocked(githubClient.getCheckSuiteStatus).mockResolvedValue({ - allPassing: false, - checkRuns: [{ name: 'ci', status: 'completed', conclusion: 'failure' }], - }); - - const resultPromise = waitForChecks('owner', 'repo', 'sha123', 42); - // No timer needed since all completed - const result = await resultPromise; - - expect(result.allPassing).toBe(false); - // Only called once (no in-progress checks → no retry) - expect(githubClient.getCheckSuiteStatus).toHaveBeenCalledTimes(1); - }); - - it('retries when some checks are still in-progress, returns when all pass', async () => { - vi.mocked(githubClient.getCheckSuiteStatus) - .mockResolvedValueOnce({ - allPassing: false, - checkRuns: [{ name: 'ci', status: 'in_progress', conclusion: null }], - }) - .mockResolvedValue({ - allPassing: true, - checkRuns: [{ name: 'ci', status: 'completed', conclusion: 'success' }], - }); - - const resultPromise = waitForChecks('owner', 'repo', 'sha123', 42); - // Advance past the RETRY_DELAY_MS (10000ms) - await vi.runAllTimersAsync(); - const result = await resultPromise; - - expect(result.allPassing).toBe(true); - expect(githubClient.getCheckSuiteStatus).toHaveBeenCalledTimes(2); - }); - - it('stops retrying when all checks complete (even if failed)', async () => { - vi.mocked(githubClient.getCheckSuiteStatus) - .mockResolvedValueOnce({ - allPassing: false, - checkRuns: [{ name: 'ci', status: 'in_progress', conclusion: null }], - }) - .mockResolvedValue({ - allPassing: false, - checkRuns: [{ name: 'ci', status: 'completed', conclusion: 'failure' }], - }); - - const resultPromise = waitForChecks('owner', 'repo', 'sha123', 42); - await vi.runAllTimersAsync(); - const result = await resultPromise; - - expect(result.allPassing).toBe(false); - // Called once initially + once after retry (then stops since all completed) - expect(githubClient.getCheckSuiteStatus).toHaveBeenCalledTimes(2); - }); -}); +// waitForChecks() and the worker-side polling layer were deleted: the success +// handler now defers (skips) when aggregate state is incomplete, and the LAST +// check_suite event makes the dispatch decision. See the +// `mixed-state SHA — aggregate-status fork` describe block above. diff --git a/tests/unit/triggers/github-webhook-handler.test.ts b/tests/unit/triggers/github-webhook-handler.test.ts index 32651de4..519c570e 100644 --- a/tests/unit/triggers/github-webhook-handler.test.ts +++ b/tests/unit/triggers/github-webhook-handler.test.ts @@ -85,10 +85,6 @@ vi.mock('../../../src/triggers/github/ack-comments.js', () => ({ updateInitialCommentWithError: vi.fn().mockResolvedValue(undefined), })); -vi.mock('../../../src/triggers/github/check-polling.js', () => ({ - pollWaitForChecks: vi.fn().mockResolvedValue(true), -})); - vi.mock('../../../src/utils/index.js', () => ({ logger: { debug: vi.fn(), @@ -100,12 +96,10 @@ vi.mock('../../../src/utils/index.js', () => ({ })); import { isPMFocusedAgent } from '../../../src/agents/definitions/loader.js'; -import { githubClient } from '../../../src/github/client.js'; import { postAcknowledgmentComment, updateInitialCommentWithError, } from '../../../src/triggers/github/ack-comments.js'; -import { pollWaitForChecks } from '../../../src/triggers/github/check-polling.js'; import { processGitHubWebhook } from '../../../src/triggers/github/webhook-handler.js'; import { withAgentTypeConcurrency } from '../../../src/triggers/shared/concurrency.js'; import { postPMAckComment } from '../../../src/triggers/shared/pm-ack.js'; @@ -245,71 +239,10 @@ describe('processGitHubWebhook', () => { expect(mockRunAgentWithCredentials).not.toHaveBeenCalled(); }); - it('deletes ack comment when pollWaitForChecks returns false', async () => { - vi.mocked(pollWaitForChecks).mockResolvedValueOnce(false); - const onBlocked = vi.fn(); - const registry = { - dispatch: vi.fn().mockResolvedValue({ - agentType: 'review', - agentInput: { repoFullName: 'owner/repo', headSha: 'abc123' }, - prNumber: 42, - waitForChecks: true, - onBlocked, - }), - }; - - await processGitHubWebhook( - validPayload, - 'check_suite', - registry as never, - 999, // ackCommentId from router - '👀 Reviewing', - ); - - expect(vi.mocked(githubClient.deletePRComment)).toHaveBeenCalledWith('owner', 'repo', 999); - expect(mockRunAgentWithCredentials).not.toHaveBeenCalled(); - expect(onBlocked).toHaveBeenCalledOnce(); - }); - - it('does not attempt ack deletion when no ackCommentId on check timeout', async () => { - vi.mocked(pollWaitForChecks).mockResolvedValueOnce(false); - const onBlocked = vi.fn(); - const registry = { - dispatch: vi.fn().mockResolvedValue({ - agentType: 'review', - agentInput: { repoFullName: 'owner/repo', headSha: 'abc123' }, - prNumber: 42, - waitForChecks: true, - onBlocked, - }), - }; - - await processGitHubWebhook(validPayload, 'check_suite', registry as never); - - expect(vi.mocked(githubClient.deletePRComment)).not.toHaveBeenCalled(); - expect(mockRunAgentWithCredentials).not.toHaveBeenCalled(); - expect(onBlocked).toHaveBeenCalledOnce(); - }); - - it('releases the claim when pollWaitForChecks throws before review starts', async () => { - vi.mocked(pollWaitForChecks).mockRejectedValueOnce(new Error('GitHub API timeout')); - const onBlocked = vi.fn(); - const registry = { - dispatch: vi.fn().mockResolvedValue({ - agentType: 'review', - agentInput: { repoFullName: 'owner/repo', headSha: 'abc123' }, - prNumber: 42, - waitForChecks: true, - onBlocked, - }), - }; - - await expect( - processGitHubWebhook(validPayload, 'check_suite', registry as never), - ).rejects.toThrow('GitHub API timeout'); - expect(onBlocked).toHaveBeenCalledOnce(); - expect(mockRunAgentWithCredentials).not.toHaveBeenCalled(); - }); + // `pollWaitForChecks` polling tests deleted: the worker-side polling layer + // was removed in PR #1245 follow-up. Aggregate-state defer-on-incomplete + // at the handler layer (see check-suite-success.test.ts) makes the worker + // bail-out path unreachable. it('posts PM ack to PM tool when PM-focused agent triggered from GitHub', async () => { vi.mocked(isPMFocusedAgent).mockResolvedValue(true); diff --git a/tests/unit/triggers/github/check-polling.test.ts b/tests/unit/triggers/github/check-polling.test.ts deleted file mode 100644 index 739d8dbe..00000000 --- a/tests/unit/triggers/github/check-polling.test.ts +++ /dev/null @@ -1,192 +0,0 @@ -import { beforeEach, describe, expect, it, vi } from 'vitest'; - -vi.mock('../../../../src/github/client.js', () => ({ - withGitHubToken: vi.fn().mockImplementation((_token: string, fn: () => Promise) => fn()), -})); - -vi.mock('../../../../src/utils/repo.js', async (importOriginal) => { - const actual = await importOriginal(); - return { - ...actual, - parseRepoFullName: vi.fn().mockReturnValue({ owner: 'acme', repo: 'myapp' }), - }; -}); - -vi.mock('../../../../src/utils/index.js', () => ({ - logger: { - info: vi.fn(), - warn: vi.fn(), - error: vi.fn(), - }, -})); - -// Mock the dynamic import of check-suite-success.js -vi.mock('../../../../src/triggers/github/check-suite-success.js', () => ({ - waitForChecks: vi.fn(), -})); - -import { withGitHubToken } from '../../../../src/github/client.js'; -import { pollWaitForChecks } from '../../../../src/triggers/github/check-polling.js'; -import type { TriggerResult } from '../../../../src/triggers/types.js'; -import { parseRepoFullName } from '../../../../src/utils/repo.js'; - -const mockWithGitHubToken = vi.mocked(withGitHubToken); -const mockParseRepoFullName = vi.mocked(parseRepoFullName); - -function makeResult( - overrides: Partial }> = {}, -): TriggerResult { - return { - agentType: 'review', - prNumber: 42, - agentInput: { - repoFullName: 'acme/myapp', - headSha: 'abc123', - }, - ...overrides, - } as TriggerResult; -} - -describe('pollWaitForChecks', () => { - beforeEach(async () => { - mockParseRepoFullName.mockReturnValue({ owner: 'acme', repo: 'myapp' }); - mockWithGitHubToken.mockImplementation( - (_token: string, fn: () => Promise) => fn() as Promise, - ); - }); - - it('returns true when all checks are passing', async () => { - const { waitForChecks } = await import( - '../../../../src/triggers/github/check-suite-success.js' - ); - vi.mocked(waitForChecks).mockResolvedValue({ - totalCount: 2, - allPassing: true, - checkRuns: [ - { name: 'lint', status: 'completed', conclusion: 'success' }, - { name: 'test', status: 'completed', conclusion: 'success' }, - ], - }); - - const result = makeResult(); - const passing = await pollWaitForChecks(result, 'acme/myapp', 'ghp_token'); - - expect(passing).toBe(true); - }); - - it('returns false when some checks are failing', async () => { - const { waitForChecks } = await import( - '../../../../src/triggers/github/check-suite-success.js' - ); - vi.mocked(waitForChecks).mockResolvedValue({ - totalCount: 2, - allPassing: false, - checkRuns: [ - { name: 'lint', status: 'completed', conclusion: 'success' }, - { name: 'test', status: 'completed', conclusion: 'failure' }, - ], - }); - - const result = makeResult(); - const passing = await pollWaitForChecks(result, 'acme/myapp', 'ghp_token'); - - expect(passing).toBe(false); - }); - - it('calls parseRepoFullName with the provided repoFullName', async () => { - const { waitForChecks } = await import( - '../../../../src/triggers/github/check-suite-success.js' - ); - vi.mocked(waitForChecks).mockResolvedValue({ - totalCount: 1, - allPassing: true, - checkRuns: [{ name: 'ci', status: 'completed', conclusion: 'success' }], - }); - - const result = makeResult(); - await pollWaitForChecks(result, 'acme/myapp', 'ghp_token'); - - expect(mockParseRepoFullName).toHaveBeenCalledWith('acme/myapp'); - }); - - it('calls withGitHubToken with the provided token', async () => { - const { waitForChecks } = await import( - '../../../../src/triggers/github/check-suite-success.js' - ); - vi.mocked(waitForChecks).mockResolvedValue({ - totalCount: 1, - allPassing: true, - checkRuns: [{ name: 'ci', status: 'completed', conclusion: 'success' }], - }); - - const result = makeResult(); - await pollWaitForChecks(result, 'acme/myapp', 'ghp_my_token'); - - expect(mockWithGitHubToken).toHaveBeenCalledWith('ghp_my_token', expect.any(Function)); - }); - - it('passes owner, repo, headSha, and prNumber to waitForChecks', async () => { - const { waitForChecks } = await import( - '../../../../src/triggers/github/check-suite-success.js' - ); - const mockWaitForChecks = vi.mocked(waitForChecks); - mockWaitForChecks.mockResolvedValue({ - totalCount: 1, - allPassing: true, - checkRuns: [{ name: 'ci', status: 'completed', conclusion: 'success' }], - }); - - const result = makeResult({ - prNumber: 99, - agentInput: { repoFullName: 'acme/myapp', headSha: 'deadbeef' }, - } as Partial); - - await pollWaitForChecks(result, 'acme/myapp', 'ghp_token'); - - expect(mockWaitForChecks).toHaveBeenCalledWith('acme', 'myapp', 'deadbeef', 99); - }); - - it('returns false and logs failed check names when some checks fail', async () => { - const { waitForChecks } = await import( - '../../../../src/triggers/github/check-suite-success.js' - ); - vi.mocked(waitForChecks).mockResolvedValue({ - totalCount: 3, - allPassing: false, - checkRuns: [ - { name: 'lint', status: 'completed', conclusion: 'success' }, - { name: 'test', status: 'completed', conclusion: 'failure' }, - { name: 'build', status: 'completed', conclusion: 'failure' }, - ], - }); - - const { logger } = await import('../../../../src/utils/index.js'); - const result = makeResult(); - const passing = await pollWaitForChecks(result, 'acme/myapp', 'ghp_token'); - - expect(passing).toBe(false); - expect(vi.mocked(logger.info)).toHaveBeenCalledWith( - expect.stringContaining('Not all checks passing'), - expect.objectContaining({ - failedChecks: expect.arrayContaining(['test', 'build']), - }), - ); - }); - - it('uses prNumber 0 when result.prNumber is undefined', async () => { - const { waitForChecks } = await import( - '../../../../src/triggers/github/check-suite-success.js' - ); - const mockWaitForChecks = vi.mocked(waitForChecks); - mockWaitForChecks.mockResolvedValue({ - totalCount: 1, - allPassing: true, - checkRuns: [{ name: 'ci', status: 'completed', conclusion: 'success' }], - }); - - const result = makeResult({ prNumber: undefined }); - await pollWaitForChecks(result, 'acme/myapp', 'ghp_token'); - - expect(mockWaitForChecks).toHaveBeenCalledWith('acme', 'myapp', 'abc123', 0); - }); -}); diff --git a/tests/unit/triggers/github/review-dispatch-dedup.test.ts b/tests/unit/triggers/github/review-dispatch-dedup.test.ts index 9d6200f4..b60cbb0f 100644 --- a/tests/unit/triggers/github/review-dispatch-dedup.test.ts +++ b/tests/unit/triggers/github/review-dispatch-dedup.test.ts @@ -18,7 +18,7 @@ import { logger } from '../../../../src/utils/logging.js'; const mockLogger = vi.mocked(logger); -const DEDUP_TTL_MS = 30 * 60 * 1000; // 30 minutes +const DEDUP_TTL_MS = 5 * 60 * 1000; // 5 minutes (was 30 min before PR #1245 follow-up) describe('buildReviewDispatchKey', () => { it('returns correct format owner/repo:prNumber:headSha', () => { @@ -112,7 +112,7 @@ describe('claimReviewDispatch', () => { ); }); - it('TTL expiration: a previously claimed key can be reclaimed after 30+ minutes', () => { + it('TTL expiration: a previously claimed key can be reclaimed after 5+ minutes', () => { const key = buildReviewDispatchKey('acme', 'repo', 10, 'sha10'); claimReviewDispatch(key, 'check-suite-success', { prNumber: 10, headSha: 'sha10' });