Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 14 additions & 59 deletions src/triggers/github/check-suite-failure.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@ import type { TriggerContext, TriggerHandler, TriggerResult } from '../../types/
import { logger } from '../../utils/logging.js';
import { parseRepoFullName } from '../../utils/repo.js';
import {
gateAttemptLimit,
gateBaseBranch,
gateCascadePersona,
gateTriggerEnabled,
requirePersonaIdentities,
} from '../shared/gates.js';
import { skip } from '../shared/skip.js';
import { dispatchRespondToCi, resetFixAttempts } from './respond-to-ci-dispatch.js';
import { type GitHubCheckSuitePayload, isGitHubCheckSuitePayload } from './types.js';
import { parsePrNumberFromRef, resolveWorkItemDisplayData, resolveWorkItemId } from './utils.js';

export { resetFixAttempts };

/**
* Resolve a PR number from a check_suite payload.
* Tries pull_requests[], then refs/pull/{N}/head parsing, then a GitHub API lookup by branch.
Expand Down Expand Up @@ -45,15 +47,6 @@ async function resolvePrNumber(
return pr.number;
}

// Track fix attempts per PR to prevent infinite loops
const fixAttempts = new Map<number, number>();
const MAX_ATTEMPTS = 3;

// Export for cleanup by PRReadyToMergeTrigger
export function resetFixAttempts(prNumber: number): void {
fixAttempts.delete(prNumber);
}

export class CheckSuiteFailureTrigger implements TriggerHandler {
name = 'check-suite-failure';
description =
Expand All @@ -73,6 +66,10 @@ export class CheckSuiteFailureTrigger implements TriggerHandler {
}

async handle(ctx: TriggerContext): Promise<TriggerResult | null> {
// Early-exit on disabled trigger to avoid GitHub API calls when not needed.
// `dispatchRespondToCi` re-checks the same gate (it's the single source of
// truth for the success-handler's mixed-state fork too); the redundant call
// here is one DB lookup, which the trigger-enabled cache absorbs.
const enabled = await gateTriggerEnabled(
ctx.project.id,
'respond-to-ci',
Expand Down Expand Up @@ -154,58 +151,16 @@ export class CheckSuiteFailureTrigger implements TriggerHandler {
);
}

// Check attempt limit to prevent infinite loops. Side effect (PR
// comment) is handler-local because the warning text is contractual,
// not part of the gate's pure logic.
const attempts = fixAttempts.get(prNumber) || 0;
const limitSkip = gateAttemptLimit(attempts, MAX_ATTEMPTS, prNumber, this.name);
if (limitSkip) {
await githubClient.createPRComment(
owner,
repo,
prNumber,
'⚠️ Unable to automatically fix failing checks after 3 attempts. Manual intervention required.',
);
return limitSkip;
}

// Increment attempt counter
fixAttempts.set(prNumber, attempts + 1);

logger.info('Check suite failure on implementer PR - all checks complete', {
prNumber,
workItemId,
attempt: attempts + 1,
totalChecks: checkStatus.totalCount,
failedChecks: checkStatus.checkRuns
.filter(
(cr) =>
cr.conclusion === 'failure' ||
cr.conclusion === 'timed_out' ||
cr.conclusion === 'action_required',
)
.map((cr) => cr.name),
});

const prBranch = prDetails.headRef;

return {
agentType: 'respond-to-ci',
agentInput: {
prNumber,
prBranch,
repoFullName: payload.repository.full_name,
headSha,
triggerType: 'check-failure',
triggerEvent: 'scm:check-suite-failure',
workItemId: workItemId,
},
return dispatchRespondToCi({
ctx,
prNumber,
prUrl: prDetails.htmlUrl,
prTitle: prDetails.title,
prDetails,
payload,
workItemId,
workItemUrl,
workItemTitle,
};
checkStatus,
handlerName: this.name,
});
}
}
47 changes: 44 additions & 3 deletions src/triggers/github/check-suite-success.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { parseRepoFullName } from '../../utils/repo.js';
import { gateBaseBranch } from '../shared/gates.js';
import { skip } from '../shared/skip.js';
import { checkTriggerEnabledWithParams } from '../shared/trigger-check.js';
import { dispatchRespondToCi } from './respond-to-ci-dispatch.js';
import {
buildReviewDispatchKey,
claimReviewDispatch,
Expand Down Expand Up @@ -61,12 +62,22 @@ export async function waitForChecks(
}

/**
* Triggers review agent when all CI checks pass on a PR authored by the implementer persona.
* Dispatches an outcome agent when a check_suite completes with success
* conclusion on a PR authored by the implementer persona.
*
* This trigger fires when:
* Two outcomes — chosen from aggregate state across ALL check_runs on the
* head SHA, not just this suite's:
* - `review` — every completed check passes, OR some are still
* in-progress (defer to worker via `waitForChecks`).
* - `respond-to-ci` — every check is complete AND at least one failed.
* Closes the gap where GitHub fires the success
* event last after a fast-failing sibling suite,
* and no later `conclusion=failure` event will fire
* to wake `check-suite-failure`.
*
* The trigger fires when:
* 1. A check_suite completes with success conclusion
* 2. The PR author matches the configured author mode (own/external/all)
* 3. All checks are actually passing (verified via API)
*
* Work item resolution uses the pr_work_items DB table only.
* The trigger fires even without a linked work item — agents run, PM updates are simply skipped.
Expand Down Expand Up @@ -164,6 +175,36 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler {
const workItemId = await resolveWorkItemId(ctx.project.id, prNumber);
const { workItemUrl, workItemTitle } = await resolveWorkItemDisplayData(workItemId);

// Mixed-state SHA fork: GitHub fires check_suite.completed once per
// workflow. When workflow A's suite succeeds but workflow B's suite on
// the same SHA failed earlier (and workflow B's failure handler skipped
// with "not all complete yet"), this success event is the one that
// closes the picture. If aggregate state has any failure, dispatch
// respond-to-ci instead of review — otherwise respond-to-ci is lost
// because no later check_suite event with conclusion=failure will fire.
// See PR #176 / 2026-04-30 for the live incident.
const checkStatus = await githubClient.getCheckSuiteStatus(owner, repo, headSha);
const allComplete = checkStatus.checkRuns.every((cr) => cr.status === 'completed');
const anyFailed = checkStatus.checkRuns.some(
(cr) =>
cr.conclusion === 'failure' ||
cr.conclusion === 'timed_out' ||
cr.conclusion === 'action_required',
);
if (allComplete && anyFailed) {
return dispatchRespondToCi({
ctx,
prNumber,
prDetails,
payload,
workItemId,
workItemUrl,
workItemTitle,
checkStatus,
handlerName: this.name,
});
}

// Skip if the reviewer persona's latest review already covers the current HEAD SHA
const reviews = await githubClient.getPRReviews(owner, repo, prNumber);

Expand Down
103 changes: 103 additions & 0 deletions src/triggers/github/respond-to-ci-dispatch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import { type CheckSuiteStatus, githubClient } from '../../github/client.js';
import type { TriggerContext, TriggerResult } from '../../types/index.js';
import { logger } from '../../utils/logging.js';
import { parseRepoFullName } from '../../utils/repo.js';
import { gateAttemptLimit, gateTriggerEnabled } from '../shared/gates.js';
import type { GitHubCheckSuitePayload } from './types.js';

// Per-PR fix-attempt counter shared across the failure handler and the
// success handler's mixed-state fork. The lifecycle is: increment on dispatch,
// reset via `resetFixAttempts` (called by tests only — pr-ready-to-merge does
// not auto-reset; the counter ages out with the in-process Map).
const fixAttempts = new Map<number, number>();
const MAX_ATTEMPTS = 3;

export function resetFixAttempts(prNumber: number): void {
fixAttempts.delete(prNumber);
}

export interface PRDetails {
headRef: string;
htmlUrl: string;
title: string;
}

/**
* Build a respond-to-ci dispatch result, applying the trigger-enabled gate +
* per-PR attempt limit. Both `check-suite-failure` (when the failure event
* itself arrives last) and `check-suite-success` (when a success event closes
* a mixed-state SHA — see #1241) call this so the dispatch contract is
* single-sourced.
*
* Returns either a respond-to-ci `TriggerResult` ready to enqueue, or a
* structured skip when the trigger is disabled or the attempt limit is hit.
*/
export async function dispatchRespondToCi(opts: {
ctx: TriggerContext;
prNumber: number;
prDetails: PRDetails;
payload: GitHubCheckSuitePayload;
workItemId: string | undefined;
workItemUrl: string | undefined;
workItemTitle: string | undefined;
checkStatus: CheckSuiteStatus;
handlerName: string;
}): Promise<TriggerResult> {
const enabled = await gateTriggerEnabled(
opts.ctx.project.id,
'respond-to-ci',
'scm:check-suite-failure',
opts.handlerName,
);
if (enabled) return enabled;

const attempts = fixAttempts.get(opts.prNumber) ?? 0;
const limitSkip = gateAttemptLimit(attempts, MAX_ATTEMPTS, opts.prNumber, opts.handlerName);
if (limitSkip) {
const { owner, repo } = parseRepoFullName(opts.payload.repository.full_name);
await githubClient.createPRComment(
owner,
repo,
opts.prNumber,
'⚠️ Unable to automatically fix failing checks after 3 attempts. Manual intervention required.',
);
return limitSkip;
}

fixAttempts.set(opts.prNumber, attempts + 1);

logger.info('Check suite failure on implementer PR — dispatching respond-to-ci', {
handler: opts.handlerName,
prNumber: opts.prNumber,
workItemId: opts.workItemId,
attempt: attempts + 1,
totalChecks: opts.checkStatus.totalCount,
failedChecks: opts.checkStatus.checkRuns
.filter(
(cr) =>
cr.conclusion === 'failure' ||
cr.conclusion === 'timed_out' ||
cr.conclusion === 'action_required',
)
.map((cr) => cr.name),
});

return {
agentType: 'respond-to-ci',
agentInput: {
prNumber: opts.prNumber,
prBranch: opts.prDetails.headRef,
repoFullName: opts.payload.repository.full_name,
headSha: opts.payload.check_suite.head_sha,
triggerType: 'check-failure',
triggerEvent: 'scm:check-suite-failure',
workItemId: opts.workItemId,
},
prNumber: opts.prNumber,
prUrl: opts.prDetails.htmlUrl,
prTitle: opts.prDetails.title,
workItemId: opts.workItemId,
workItemUrl: opts.workItemUrl,
workItemTitle: opts.workItemTitle,
};
}
Loading
Loading