diff --git a/src/triggers/github/check-suite-failure.ts b/src/triggers/github/check-suite-failure.ts index 68c3407d..28d7ad2d 100644 --- a/src/triggers/github/check-suite-failure.ts +++ b/src/triggers/github/check-suite-failure.ts @@ -3,16 +3,18 @@ import type { TriggerContext, TriggerHandler, TriggerResult } from '../../types/ import { logger } from '../../utils/logging.js'; import { parseRepoFullName } from '../../utils/repo.js'; import { - gateAttemptLimit, gateBaseBranch, gateCascadePersona, gateTriggerEnabled, requirePersonaIdentities, } from '../shared/gates.js'; import { skip } from '../shared/skip.js'; +import { dispatchRespondToCi, resetFixAttempts } from './respond-to-ci-dispatch.js'; import { type GitHubCheckSuitePayload, isGitHubCheckSuitePayload } from './types.js'; import { parsePrNumberFromRef, resolveWorkItemDisplayData, resolveWorkItemId } from './utils.js'; +export { resetFixAttempts }; + /** * Resolve a PR number from a check_suite payload. * Tries pull_requests[], then refs/pull/{N}/head parsing, then a GitHub API lookup by branch. @@ -45,15 +47,6 @@ async function resolvePrNumber( return pr.number; } -// Track fix attempts per PR to prevent infinite loops -const fixAttempts = new Map(); -const MAX_ATTEMPTS = 3; - -// Export for cleanup by PRReadyToMergeTrigger -export function resetFixAttempts(prNumber: number): void { - fixAttempts.delete(prNumber); -} - export class CheckSuiteFailureTrigger implements TriggerHandler { name = 'check-suite-failure'; description = @@ -73,6 +66,10 @@ export class CheckSuiteFailureTrigger implements TriggerHandler { } async handle(ctx: TriggerContext): Promise { + // Early-exit on disabled trigger to avoid GitHub API calls when not needed. + // `dispatchRespondToCi` re-checks the same gate (it's the single source of + // truth for the success-handler's mixed-state fork too); the redundant call + // here is one DB lookup, which the trigger-enabled cache absorbs. const enabled = await gateTriggerEnabled( ctx.project.id, 'respond-to-ci', @@ -154,58 +151,16 @@ export class CheckSuiteFailureTrigger implements TriggerHandler { ); } - // Check attempt limit to prevent infinite loops. Side effect (PR - // comment) is handler-local because the warning text is contractual, - // not part of the gate's pure logic. - const attempts = fixAttempts.get(prNumber) || 0; - const limitSkip = gateAttemptLimit(attempts, MAX_ATTEMPTS, prNumber, this.name); - if (limitSkip) { - await githubClient.createPRComment( - owner, - repo, - prNumber, - '⚠️ Unable to automatically fix failing checks after 3 attempts. Manual intervention required.', - ); - return limitSkip; - } - - // Increment attempt counter - fixAttempts.set(prNumber, attempts + 1); - - logger.info('Check suite failure on implementer PR - all checks complete', { - prNumber, - workItemId, - attempt: attempts + 1, - totalChecks: checkStatus.totalCount, - failedChecks: checkStatus.checkRuns - .filter( - (cr) => - cr.conclusion === 'failure' || - cr.conclusion === 'timed_out' || - cr.conclusion === 'action_required', - ) - .map((cr) => cr.name), - }); - - const prBranch = prDetails.headRef; - - return { - agentType: 'respond-to-ci', - agentInput: { - prNumber, - prBranch, - repoFullName: payload.repository.full_name, - headSha, - triggerType: 'check-failure', - triggerEvent: 'scm:check-suite-failure', - workItemId: workItemId, - }, + return dispatchRespondToCi({ + ctx, prNumber, - prUrl: prDetails.htmlUrl, - prTitle: prDetails.title, + prDetails, + payload, workItemId, workItemUrl, workItemTitle, - }; + checkStatus, + handlerName: this.name, + }); } } diff --git a/src/triggers/github/check-suite-success.ts b/src/triggers/github/check-suite-success.ts index b35cf731..cef66fc1 100644 --- a/src/triggers/github/check-suite-success.ts +++ b/src/triggers/github/check-suite-success.ts @@ -5,6 +5,7 @@ import { parseRepoFullName } from '../../utils/repo.js'; import { gateBaseBranch } from '../shared/gates.js'; import { skip } from '../shared/skip.js'; import { checkTriggerEnabledWithParams } from '../shared/trigger-check.js'; +import { dispatchRespondToCi } from './respond-to-ci-dispatch.js'; import { buildReviewDispatchKey, claimReviewDispatch, @@ -61,12 +62,22 @@ export async function waitForChecks( } /** - * Triggers review agent when all CI checks pass on a PR authored by the implementer persona. + * Dispatches an outcome agent when a check_suite completes with success + * conclusion on a PR authored by the implementer persona. * - * This trigger fires when: + * Two outcomes — chosen from aggregate state across ALL check_runs on the + * head SHA, not just this suite's: + * - `review` — every completed check passes, OR some are still + * in-progress (defer to worker via `waitForChecks`). + * - `respond-to-ci` — every check is complete AND at least one failed. + * Closes the gap where GitHub fires the success + * event last after a fast-failing sibling suite, + * and no later `conclusion=failure` event will fire + * to wake `check-suite-failure`. + * + * The trigger fires when: * 1. A check_suite completes with success conclusion * 2. The PR author matches the configured author mode (own/external/all) - * 3. All checks are actually passing (verified via API) * * Work item resolution uses the pr_work_items DB table only. * The trigger fires even without a linked work item — agents run, PM updates are simply skipped. @@ -164,6 +175,36 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler { const workItemId = await resolveWorkItemId(ctx.project.id, prNumber); const { workItemUrl, workItemTitle } = await resolveWorkItemDisplayData(workItemId); + // Mixed-state SHA fork: GitHub fires check_suite.completed once per + // workflow. When workflow A's suite succeeds but workflow B's suite on + // the same SHA failed earlier (and workflow B's failure handler skipped + // with "not all complete yet"), this success event is the one that + // closes the picture. If aggregate state has any failure, dispatch + // respond-to-ci instead of review — otherwise respond-to-ci is lost + // because no later check_suite event with conclusion=failure will fire. + // See PR #176 / 2026-04-30 for the live incident. + const checkStatus = await githubClient.getCheckSuiteStatus(owner, repo, headSha); + const allComplete = checkStatus.checkRuns.every((cr) => cr.status === 'completed'); + const anyFailed = checkStatus.checkRuns.some( + (cr) => + cr.conclusion === 'failure' || + cr.conclusion === 'timed_out' || + cr.conclusion === 'action_required', + ); + if (allComplete && anyFailed) { + return dispatchRespondToCi({ + ctx, + prNumber, + prDetails, + payload, + workItemId, + workItemUrl, + workItemTitle, + checkStatus, + handlerName: this.name, + }); + } + // Skip if the reviewer persona's latest review already covers the current HEAD SHA const reviews = await githubClient.getPRReviews(owner, repo, prNumber); diff --git a/src/triggers/github/respond-to-ci-dispatch.ts b/src/triggers/github/respond-to-ci-dispatch.ts new file mode 100644 index 00000000..43fd6283 --- /dev/null +++ b/src/triggers/github/respond-to-ci-dispatch.ts @@ -0,0 +1,103 @@ +import { type CheckSuiteStatus, githubClient } from '../../github/client.js'; +import type { TriggerContext, TriggerResult } from '../../types/index.js'; +import { logger } from '../../utils/logging.js'; +import { parseRepoFullName } from '../../utils/repo.js'; +import { gateAttemptLimit, gateTriggerEnabled } from '../shared/gates.js'; +import type { GitHubCheckSuitePayload } from './types.js'; + +// Per-PR fix-attempt counter shared across the failure handler and the +// success handler's mixed-state fork. The lifecycle is: increment on dispatch, +// reset via `resetFixAttempts` (called by tests only — pr-ready-to-merge does +// not auto-reset; the counter ages out with the in-process Map). +const fixAttempts = new Map(); +const MAX_ATTEMPTS = 3; + +export function resetFixAttempts(prNumber: number): void { + fixAttempts.delete(prNumber); +} + +export interface PRDetails { + headRef: string; + htmlUrl: string; + title: string; +} + +/** + * Build a respond-to-ci dispatch result, applying the trigger-enabled gate + + * per-PR attempt limit. Both `check-suite-failure` (when the failure event + * itself arrives last) and `check-suite-success` (when a success event closes + * a mixed-state SHA — see #1241) call this so the dispatch contract is + * single-sourced. + * + * Returns either a respond-to-ci `TriggerResult` ready to enqueue, or a + * structured skip when the trigger is disabled or the attempt limit is hit. + */ +export async function dispatchRespondToCi(opts: { + ctx: TriggerContext; + prNumber: number; + prDetails: PRDetails; + payload: GitHubCheckSuitePayload; + workItemId: string | undefined; + workItemUrl: string | undefined; + workItemTitle: string | undefined; + checkStatus: CheckSuiteStatus; + handlerName: string; +}): Promise { + const enabled = await gateTriggerEnabled( + opts.ctx.project.id, + 'respond-to-ci', + 'scm:check-suite-failure', + opts.handlerName, + ); + if (enabled) return enabled; + + const attempts = fixAttempts.get(opts.prNumber) ?? 0; + const limitSkip = gateAttemptLimit(attempts, MAX_ATTEMPTS, opts.prNumber, opts.handlerName); + if (limitSkip) { + const { owner, repo } = parseRepoFullName(opts.payload.repository.full_name); + await githubClient.createPRComment( + owner, + repo, + opts.prNumber, + '⚠️ Unable to automatically fix failing checks after 3 attempts. Manual intervention required.', + ); + return limitSkip; + } + + fixAttempts.set(opts.prNumber, attempts + 1); + + logger.info('Check suite failure on implementer PR — dispatching respond-to-ci', { + handler: opts.handlerName, + prNumber: opts.prNumber, + workItemId: opts.workItemId, + attempt: attempts + 1, + totalChecks: opts.checkStatus.totalCount, + failedChecks: opts.checkStatus.checkRuns + .filter( + (cr) => + cr.conclusion === 'failure' || + cr.conclusion === 'timed_out' || + cr.conclusion === 'action_required', + ) + .map((cr) => cr.name), + }); + + return { + agentType: 'respond-to-ci', + agentInput: { + prNumber: opts.prNumber, + prBranch: opts.prDetails.headRef, + repoFullName: opts.payload.repository.full_name, + headSha: opts.payload.check_suite.head_sha, + triggerType: 'check-failure', + triggerEvent: 'scm:check-suite-failure', + workItemId: opts.workItemId, + }, + prNumber: opts.prNumber, + prUrl: opts.prDetails.htmlUrl, + prTitle: opts.prDetails.title, + workItemId: opts.workItemId, + workItemUrl: opts.workItemUrl, + workItemTitle: opts.workItemTitle, + }; +} diff --git a/tests/unit/triggers/check-suite-success.test.ts b/tests/unit/triggers/check-suite-success.test.ts index ff6ceb7e..cd2e9cae 100644 --- a/tests/unit/triggers/check-suite-success.test.ts +++ b/tests/unit/triggers/check-suite-success.test.ts @@ -15,6 +15,7 @@ vi.mock('../../../src/triggers/shared/trigger-check.js', () => mockTriggerCheckM vi.mock('../../../src/github/client.js', () => mockGitHubClientModule); import { githubClient } from '../../../src/github/client.js'; +import { resetFixAttempts } from '../../../src/triggers/github/check-suite-failure.js'; import { CheckSuiteSuccessTrigger, recentlyDispatched, @@ -44,6 +45,14 @@ describe('CheckSuiteSuccessTrigger', () => { beforeEach(() => { vi.mocked(lookupWorkItemForPR).mockResolvedValue('abc123'); recentlyDispatched.clear(); + resetFixAttempts(42); + // Default: aggregate status reflects all checks passing. Tests that need + // a mixed-state SHA override this per-case. + vi.mocked(githubClient.getCheckSuiteStatus).mockResolvedValue({ + allPassing: true, + totalCount: 1, + checkRuns: [{ name: 'ci', status: 'completed', conclusion: 'success' }], + }); }); describe('matches', () => { @@ -192,8 +201,9 @@ describe('CheckSuiteSuccessTrigger', () => { const result = await trigger.handle(ctx); expect(githubClient.getPR).toHaveBeenCalledWith('owner', 'repo', 42); - // handle() no longer polls checks — it defers to worker via waitForChecks flag - expect(githubClient.getCheckSuiteStatus).not.toHaveBeenCalled(); + // handle() queries aggregate status to fork between review (allPassing) + // and respond-to-ci (any check failed on the SHA). + expect(githubClient.getCheckSuiteStatus).toHaveBeenCalledWith('owner', 'repo', 'sha123'); expect(result).toEqual( expect.objectContaining({ agentType: 'review', @@ -320,6 +330,7 @@ describe('CheckSuiteSuccessTrigger', () => { const result = await trigger.handle(ctx); expectSkip(result); + // Author gate fails BEFORE the aggregate-status fork — no API call. expect(githubClient.getCheckSuiteStatus).not.toHaveBeenCalled(); }); @@ -346,6 +357,7 @@ describe('CheckSuiteSuccessTrigger', () => { const result = await trigger.handle(ctx); expectSkip(result); + // Persona gate fails BEFORE the aggregate-status fork — no API call. expect(githubClient.getCheckSuiteStatus).not.toHaveBeenCalled(); }); @@ -383,7 +395,6 @@ describe('CheckSuiteSuccessTrigger', () => { const result = await trigger.handle(ctx); expectSkip(result); - expect(githubClient.getCheckSuiteStatus).not.toHaveBeenCalled(); }); it('re-triggers when review commitId differs from headSha', async () => { @@ -466,7 +477,6 @@ describe('CheckSuiteSuccessTrigger', () => { const result = await trigger.handle(ctx); expectSkip(result); - expect(githubClient.getCheckSuiteStatus).not.toHaveBeenCalled(); }); it('ignores COMMENTED reviews from implementer bot when checking for prior review', async () => { @@ -797,6 +807,165 @@ describe('CheckSuiteSuccessTrigger', () => { }); }); + // Mixed-state SHA: GitHub fires check_suite.completed once per workflow. + // When workflow A's suite succeeds but workflow B's suite (same SHA) failed + // fast and earlier — the failure handler at the time saw "not all complete + // yet" and deferred. The success event arrives last; without this fork it + // would dispatch review (which then silently skips at the worker because + // allPassing=false). Closes the gap so respond-to-ci is dispatched on the + // success event when aggregate state shows any failure. + describe('mixed-state SHA — aggregate-status fork', () => { + const baseImplementerPR = { + number: 42, + title: 'Test PR', + body: null, + state: 'open' as const, + headRef: 'feature/test', + headSha: 'sha123', + baseRef: 'main', + merged: false, + htmlUrl: 'https://github.com/owner/repo/pull/42', + user: { login: 'cascade-impl' }, + }; + + it('dispatches respond-to-ci when aggregate has any failure on the SHA', async () => { + vi.mocked(githubClient.getPR).mockResolvedValue(baseImplementerPR); + vi.mocked(githubClient.getPRReviews).mockResolvedValue([]); + vi.mocked(githubClient.getCheckSuiteStatus).mockResolvedValue({ + allPassing: false, + totalCount: 2, + checkRuns: [ + { name: 'CI', status: 'completed', conclusion: 'success' }, + { name: 'E2B Template Rebuild', status: 'completed', conclusion: 'failure' }, + ], + }); + + const ctx: TriggerContext = { + project: mockProject, + source: 'github', + payload: makeCheckSuitePayload(), + personaIdentities: mockPersonaIdentities, + }; + + const result = await trigger.handle(ctx); + + expect(result?.agentType).toBe('respond-to-ci'); + expect(result?.agentInput.triggerEvent).toBe('scm:check-suite-failure'); + expect(result?.agentInput.triggerType).toBe('check-failure'); + expect(result?.agentInput.headSha).toBe('sha123'); + expect(result?.prNumber).toBe(42); + // Should NOT carry waitForChecks — that's a review-path flag and the + // aggregate is already complete. + expect(result?.waitForChecks).toBeFalsy(); + }); + + it('dispatches review when aggregate is all complete and all passing', async () => { + vi.mocked(githubClient.getPR).mockResolvedValue(baseImplementerPR); + vi.mocked(githubClient.getPRReviews).mockResolvedValue([]); + vi.mocked(githubClient.getCheckSuiteStatus).mockResolvedValue({ + allPassing: true, + totalCount: 2, + checkRuns: [ + { name: 'CI', status: 'completed', conclusion: 'success' }, + { name: 'E2B Template Rebuild', status: 'completed', conclusion: 'success' }, + ], + }); + + const ctx: TriggerContext = { + project: mockProject, + source: 'github', + payload: makeCheckSuitePayload(), + personaIdentities: mockPersonaIdentities, + }; + + const result = await trigger.handle(ctx); + + expect(result?.agentType).toBe('review'); + }); + + it('dispatches review with waitForChecks when not all checks complete yet', async () => { + vi.mocked(githubClient.getPR).mockResolvedValue(baseImplementerPR); + vi.mocked(githubClient.getPRReviews).mockResolvedValue([]); + vi.mocked(githubClient.getCheckSuiteStatus).mockResolvedValue({ + allPassing: false, + totalCount: 2, + checkRuns: [ + { name: 'CI', status: 'completed', conclusion: 'success' }, + { name: 'E2B Template Rebuild', status: 'in_progress', conclusion: null }, + ], + }); + + const ctx: TriggerContext = { + project: mockProject, + source: 'github', + payload: makeCheckSuitePayload(), + personaIdentities: mockPersonaIdentities, + }; + + const result = await trigger.handle(ctx); + + expect(result?.agentType).toBe('review'); + expect(result?.waitForChecks).toBe(true); + }); + + it('dispatches respond-to-ci on timed_out conclusion', async () => { + vi.mocked(githubClient.getPR).mockResolvedValue(baseImplementerPR); + vi.mocked(githubClient.getPRReviews).mockResolvedValue([]); + vi.mocked(githubClient.getCheckSuiteStatus).mockResolvedValue({ + allPassing: false, + totalCount: 2, + checkRuns: [ + { name: 'CI', status: 'completed', conclusion: 'success' }, + { name: 'E2B Template Rebuild', status: 'completed', conclusion: 'timed_out' }, + ], + }); + + const ctx: TriggerContext = { + project: mockProject, + source: 'github', + payload: makeCheckSuitePayload(), + personaIdentities: mockPersonaIdentities, + }; + + const result = await trigger.handle(ctx); + + expect(result?.agentType).toBe('respond-to-ci'); + }); + + it('dispatches respond-to-ci even when SHA was already reviewed (CI failure still needs fixing)', async () => { + vi.mocked(githubClient.getPR).mockResolvedValue(baseImplementerPR); + vi.mocked(githubClient.getPRReviews).mockResolvedValue([ + { + id: 1, + user: { login: 'cascade-reviewer' }, + state: 'approved', + body: 'LGTM', + submittedAt: '', + commitId: 'sha123', + }, + ]); + vi.mocked(githubClient.getCheckSuiteStatus).mockResolvedValue({ + allPassing: false, + totalCount: 2, + checkRuns: [ + { name: 'CI', status: 'completed', conclusion: 'success' }, + { name: 'E2B Template Rebuild', status: 'completed', conclusion: 'failure' }, + ], + }); + + const ctx: TriggerContext = { + project: mockProject, + source: 'github', + payload: makeCheckSuitePayload(), + personaIdentities: mockPersonaIdentities, + }; + + const result = await trigger.handle(ctx); + + expect(result?.agentType).toBe('respond-to-ci'); + }); + }); + describe('authorMode-aware behavior via trigger parameters', () => { it('handle returns null when trigger is disabled via checkTriggerEnabledWithParams', async () => { vi.mocked(checkTriggerEnabledWithParams).mockResolvedValueOnce({