From f69428472cf8bc4939ba9f2270a65d6d241db4ba Mon Sep 17 00:00:00 2001
From: aaight <aaight42@gmail.com>
Date: Thu, 23 Apr 2026 22:20:54 +0200
Subject: [PATCH 1/5] fix(wizard): gate optional PM wizard steps on required
 steps being complete (#1172)

Co-authored-by: Cascade Bot <bot@cascade.dev>
---
 tests/unit/web/jira-wizard-isComplete.test.ts | 140 ++++++++++++++++++
 .../unit/web/linear-wizard-isComplete.test.ts | 122 +++++++++++++++
 .../unit/web/trello-wizard-isComplete.test.ts | 122 +++++++++++++++
 .../projects/pm-providers/jira/wizard.ts      |  27 +++-
 .../projects/pm-providers/linear/wizard.ts    |  25 +++-
 .../projects/pm-providers/trello/wizard.ts    |  25 +++-
 6 files changed, 451 insertions(+), 10 deletions(-)
 create mode 100644 tests/unit/web/jira-wizard-isComplete.test.ts
 create mode 100644 tests/unit/web/linear-wizard-isComplete.test.ts
 create mode 100644 tests/unit/web/trello-wizard-isComplete.test.ts

diff --git a/tests/unit/web/jira-wizard-isComplete.test.ts b/tests/unit/web/jira-wizard-isComplete.test.ts
new file mode 100644
index 00000000..01aecb06
--- /dev/null
+++ b/tests/unit/web/jira-wizard-isComplete.test.ts
@@ -0,0 +1,140 @@
+/**
+ * JIRA wizard — isComplete predicates for optional steps.
+ *
+ * Guards that optional steps (labels, custom-fields, issue-types, webhook)
+ * only show green check marks after the required steps (credentials +
+ * project + status mapping) are all complete. Prevents the UI bug where a
+ * brand-new unconfigured integration showed every step as green.
+ */
+
+import { describe, expect, it } from 'vitest';
+import { jiraProviderWizard } from '../../../web/src/components/projects/pm-providers/jira/wizard.js';
+import { createInitialState } from '../../../web/src/components/projects/pm-wizard-state.js';
+
+const getStep = (id: string) => {
+	const step = jiraProviderWizard.steps.find((s) => s.id === id);
+	if (!step) throw new Error(`Step ${id} not found`);
+	return step;
+};
+
+describe('JIRA optional steps — isComplete gating', () => {
+	describe('fresh state (createInitialState)', () => {
+		const state = createInitialState();
+
+		it('jira-labels is NOT complete on fresh state', () => {
+			expect(getStep('jira-labels').isComplete(state)).toBe(false);
+		});
+
+		it('jira-custom-fields is NOT complete on fresh state', () => {
+			expect(getStep('jira-custom-fields').isComplete(state)).toBe(false);
+		});
+
+		it('jira-issue-types is NOT complete on fresh state', () => {
+			expect(getStep('jira-issue-types').isComplete(state)).toBe(false);
+		});
+
+		it('jira-webhook is NOT complete on fresh state', () => {
+			expect(getStep('jira-webhook').isComplete(state)).toBe(false);
+		});
+	});
+
+	describe('partially configured (credentials only, no project)', () => {
+		const state = {
+			...createInitialState(),
+			jiraEmail: 'user@example.com',
+			jiraApiToken: 'token123',
+			jiraBaseUrl: 'https://example.atlassian.net',
+			verificationResult: { provider: 'jira' as const, display: 'user@example.com' },
+		};
+
+		it('jira-labels is NOT complete when project not selected', () => {
+			expect(getStep('jira-labels').isComplete(state)).toBe(false);
+		});
+
+		it('jira-custom-fields is NOT complete when project not selected', () => {
+			expect(getStep('jira-custom-fields').isComplete(state)).toBe(false);
+		});
+
+		it('jira-issue-types is NOT complete when project not selected', () => {
+			expect(getStep('jira-issue-types').isComplete(state)).toBe(false);
+		});
+
+		it('jira-webhook is NOT complete when project not selected', () => {
+			expect(getStep('jira-webhook').isComplete(state)).toBe(false);
+		});
+	});
+
+	describe('credentials + project, but no status mapping', () => {
+		const state = {
+			...createInitialState(),
+			jiraEmail: 'user@example.com',
+			jiraApiToken: 'token123',
+			jiraBaseUrl: 'https://example.atlassian.net',
+			verificationResult: { provider: 'jira' as const, display: 'user@example.com' },
+			jiraProjectKey: 'PROJ',
+			jiraStatusMappings: {},
+		};
+
+		it('jira-labels is NOT complete without status mapping', () => {
+			expect(getStep('jira-labels').isComplete(state)).toBe(false);
+		});
+
+		it('jira-webhook is NOT complete without status mapping', () => {
+			expect(getStep('jira-webhook').isComplete(state)).toBe(false);
+		});
+	});
+
+	describe('fully configured (credentials + project + status mapping)', () => {
+		const state = {
+			...createInitialState(),
+			jiraEmail: 'user@example.com',
+			jiraApiToken: 'token123',
+			jiraBaseUrl: 'https://example.atlassian.net',
+			verificationResult: { provider: 'jira' as const, display: 'user@example.com' },
+			jiraProjectKey: 'PROJ',
+			jiraStatusMappings: { todo: 'To Do', inProgress: 'In Progress' },
+		};
+
+		it('jira-labels is complete when all required steps done', () => {
+			expect(getStep('jira-labels').isComplete(state)).toBe(true);
+		});
+
+		it('jira-custom-fields is complete when all required steps done', () => {
+			expect(getStep('jira-custom-fields').isComplete(state)).toBe(true);
+		});
+
+		it('jira-issue-types is complete when all required steps done', () => {
+			expect(getStep('jira-issue-types').isComplete(state)).toBe(true);
+		});
+
+		it('jira-webhook is complete when all required steps done', () => {
+			expect(getStep('jira-webhook').isComplete(state)).toBe(true);
+		});
+	});
+
+	describe('edit mode with stored credentials (isEditing + hasStoredCredentials)', () => {
+		const state = {
+			...createInitialState(),
+			isEditing: true,
+			hasStoredCredentials: true,
+			jiraProjectKey: 'PROJ',
+			jiraStatusMappings: { todo: 'To Do' },
+		};
+
+		it('jira-labels is complete in edit mode with stored credentials', () => {
+			expect(getStep('jira-labels').isComplete(state)).toBe(true);
+		});
+
+		it('jira-custom-fields is complete in edit mode with stored credentials', () => {
+			expect(getStep('jira-custom-fields').isComplete(state)).toBe(true);
+		});
+
+		it('jira-issue-types is complete in edit mode with stored credentials', () => {
+			expect(getStep('jira-issue-types').isComplete(state)).toBe(true);
+		});
+
+		it('jira-webhook is complete in edit mode with stored credentials', () => {
+			expect(getStep('jira-webhook').isComplete(state)).toBe(true);
+		});
+	});
+});
diff --git a/tests/unit/web/linear-wizard-isComplete.test.ts b/tests/unit/web/linear-wizard-isComplete.test.ts
new file mode 100644
index 00000000..f480a9be
--- /dev/null
+++ b/tests/unit/web/linear-wizard-isComplete.test.ts
@@ -0,0 +1,122 @@
+/**
+ * Linear wizard — isComplete predicates for optional steps.
+ *
+ * Guards that optional steps (labels, project-scope, webhook) only show
+ * green check marks after the required steps (credentials + team + status
+ * mapping) are all complete. Prevents the UI bug where a brand-new
+ * unconfigured integration showed every step as green.
+ */
+
+import { describe, expect, it } from 'vitest';
+import { linearProviderWizard } from '../../../web/src/components/projects/pm-providers/linear/wizard.js';
+import { createInitialState } from '../../../web/src/components/projects/pm-wizard-state.js';
+
+const getStep = (id: string) => {
+	const step = linearProviderWizard.steps.find((s) => s.id === id);
+	if (!step) throw new Error(`Step ${id} not found`);
+	return step;
+};
+
+describe('Linear optional steps — isComplete gating', () => {
+	describe('fresh state (createInitialState)', () => {
+		const state = createInitialState();
+
+		it('linear-labels is NOT complete on fresh state', () => {
+			expect(getStep('linear-labels').isComplete(state)).toBe(false);
+		});
+
+		it('linear-project-scope is NOT complete on fresh state', () => {
+			expect(getStep('linear-project-scope').isComplete(state)).toBe(false);
+		});
+
+		it('linear-webhook is NOT complete on fresh state', () => {
+			expect(getStep('linear-webhook').isComplete(state)).toBe(false);
+		});
+	});
+
+	describe('partially configured (credentials only, no team)', () => {
+		const state = {
+			...createInitialState(),
+			linearApiKey: 'lin_api_123',
+			verificationResult: { provider: 'linear' as const, display: 'user@example.com' },
+		};
+
+		it('linear-labels is NOT complete when team not selected', () => {
+			expect(getStep('linear-labels').isComplete(state)).toBe(false);
+		});
+
+		it('linear-project-scope is NOT complete when team not selected', () => {
+			expect(getStep('linear-project-scope').isComplete(state)).toBe(false);
+		});
+
+		it('linear-webhook is NOT complete when team not selected', () => {
+			expect(getStep('linear-webhook').isComplete(state)).toBe(false);
+		});
+	});
+
+	describe('credentials + team, but no status mapping', () => {
+		const state = {
+			...createInitialState(),
+			linearApiKey: 'lin_api_123',
+			verificationResult: { provider: 'linear' as const, display: 'user@example.com' },
+			linearTeamId: 'team-1',
+			linearStatusMappings: {},
+		};
+
+		it('linear-labels is NOT complete without status mapping', () => {
+			expect(getStep('linear-labels').isComplete(state)).toBe(false);
+		});
+
+		it('linear-project-scope is NOT complete without status mapping', () => {
+			expect(getStep('linear-project-scope').isComplete(state)).toBe(false);
+		});
+
+		it('linear-webhook is NOT complete without status mapping', () => {
+			expect(getStep('linear-webhook').isComplete(state)).toBe(false);
+		});
+	});
+
+	describe('fully configured (credentials + team + status mapping)', () => {
+		const state = {
+			...createInitialState(),
+			linearApiKey: 'lin_api_123',
+			verificationResult: { provider: 'linear' as const, display: 'user@example.com' },
+			linearTeamId: 'team-1',
+			linearStatusMappings: { todo: 'state-uuid-1', inProgress: 'state-uuid-2' },
+		};
+
+		it('linear-labels is complete when all required steps done', () => {
+			expect(getStep('linear-labels').isComplete(state)).toBe(true);
+		});
+
+		it('linear-project-scope is complete when all required steps done', () => {
+			expect(getStep('linear-project-scope').isComplete(state)).toBe(true);
+		});
+
+		it('linear-webhook is complete when all required steps done', () => {
+			expect(getStep('linear-webhook').isComplete(state)).toBe(true);
+		});
+	});
+
+	describe('edit mode with stored credentials (isEditing + hasStoredCredentials)', () => {
+		const state = {
+			...createInitialState(),
+			isEditing: true,
+			hasStoredCredentials: true,
+			linearTeamId: 'team-1',
+			linearStatusMappings: { todo: 'state-uuid-1' },
+		};
+
+		it('linear-labels is complete in edit mode with stored credentials', () => {
+			expect(getStep('linear-labels').isComplete(state)).toBe(true);
+		});
+
+		it('linear-project-scope is complete in edit mode with stored credentials', () => {
+			expect(getStep('linear-project-scope').isComplete(state)).toBe(true);
+		});
+
+		it('linear-webhook is complete in edit mode with stored credentials', () => {
+			expect(getStep('linear-webhook').isComplete(state)).toBe(true);
+		});
+	});
+});
diff --git a/tests/unit/web/trello-wizard-isComplete.test.ts b/tests/unit/web/trello-wizard-isComplete.test.ts
new file mode 100644
index 00000000..bc97cbd9
--- /dev/null
+++ b/tests/unit/web/trello-wizard-isComplete.test.ts
@@ -0,0 +1,122 @@
+/**
+ * Trello wizard — isComplete predicates for optional steps.
+ *
+ * Guards that optional steps (labels, custom-fields, webhook) only show
+ * green check marks after the required steps (credentials + board + status
+ * mapping) are all complete. Prevents the UI bug where a brand-new
+ * unconfigured integration showed every step as green.
+ */
+
+import { describe, expect, it } from 'vitest';
+import { trelloProviderWizard } from '../../../web/src/components/projects/pm-providers/trello/wizard.js';
+import { createInitialState } from '../../../web/src/components/projects/pm-wizard-state.js';
+
+// Grab the optional steps by id
+const getStep = (id: string) => {
+	const step = trelloProviderWizard.steps.find((s) => s.id === id);
+	if (!step) throw new Error(`Step ${id} not found`);
+	return step;
+};
+
+describe('Trello optional steps — isComplete gating', () => {
+	describe('fresh state (createInitialState)', () => {
+		const state = createInitialState();
+
+		it('trello-labels is NOT complete on fresh state', () => {
+			expect(getStep('trello-labels').isComplete(state)).toBe(false);
+		});
+
+		it('trello-custom-fields is NOT complete on fresh state', () => {
+			expect(getStep('trello-custom-fields').isComplete(state)).toBe(false);
+		});
+
+		it('trello-webhook is NOT complete on fresh state', () => {
+			expect(getStep('trello-webhook').isComplete(state)).toBe(false);
+		});
+	});
+
+	describe('partially configured (credentials only, no board)', () => {
+		const state = {
+			...createInitialState(),
+			trelloApiKey: 'key123',
+			trelloToken: 'token123',
+			verificationResult: { provider: 'trello' as const, display: 'user@example.com' },
+		};
+
+		it('trello-labels is NOT complete when board not selected', () => {
+			expect(getStep('trello-labels').isComplete(state)).toBe(false);
+		});
+
+		it('trello-custom-fields is NOT complete when board not selected', () => {
+			expect(getStep('trello-custom-fields').isComplete(state)).toBe(false);
+		});
+
+		it('trello-webhook is NOT complete when board not selected', () => {
+			expect(getStep('trello-webhook').isComplete(state)).toBe(false);
+		});
+	});
+
+	describe('credentials + board, but no status mapping', () => {
+		const state = {
+			...createInitialState(),
+			trelloApiKey: 'key123',
+			trelloToken: 'token123',
+			verificationResult: { provider: 'trello' as const, display: 'user@example.com' },
+			trelloBoardId: 'board-1',
+			trelloListMappings: {},
+		};
+
+		it('trello-labels is NOT complete without status mapping', () => {
+			expect(getStep('trello-labels').isComplete(state)).toBe(false);
+		});
+
+		it('trello-webhook is NOT complete without status mapping', () => {
+			expect(getStep('trello-webhook').isComplete(state)).toBe(false);
+		});
+	});
+
+	describe('fully configured (credentials + board + status mapping)', () => {
+		const state = {
+			...createInitialState(),
+			trelloApiKey: 'key123',
+			trelloToken: 'token123',
+			verificationResult: { provider: 'trello' as const, display: 'user@example.com' },
+			trelloBoardId: 'board-1',
+			trelloListMappings: { todo: 'list-1', inProgress: 'list-2' },
+		};
+
+		it('trello-labels is complete when all required steps done', () => {
+			expect(getStep('trello-labels').isComplete(state)).toBe(true);
+		});
+
+		it('trello-custom-fields is complete when all required steps done', () => {
+			expect(getStep('trello-custom-fields').isComplete(state)).toBe(true);
+		});
+
+		it('trello-webhook is complete when all required steps done', () => {
+			expect(getStep('trello-webhook').isComplete(state)).toBe(true);
+		});
+	});
+
+	describe('edit mode with stored credentials (isEditing + hasStoredCredentials)', () => {
+		const state = {
+			...createInitialState(),
+			isEditing: true,
+			hasStoredCredentials: true,
+			trelloBoardId: 'board-1',
+			trelloListMappings: { todo: 'list-1' },
+		};
+
+		it('trello-labels is complete in edit mode with stored credentials', () => {
+			expect(getStep('trello-labels').isComplete(state)).toBe(true);
+		});
+
+		it('trello-custom-fields is complete in edit mode with stored credentials', () => {
+			expect(getStep('trello-custom-fields').isComplete(state)).toBe(true);
+		});
+
+		it('trello-webhook is complete in edit mode with stored credentials', () => {
+			expect(getStep('trello-webhook').isComplete(state)).toBe(true);
+		});
+	});
+});
diff --git a/web/src/components/projects/pm-providers/jira/wizard.ts b/web/src/components/projects/pm-providers/jira/wizard.ts
index 62693db8..ae40e874 100644
--- a/web/src/components/projects/pm-providers/jira/wizard.ts
+++ b/web/src/components/projects/pm-providers/jira/wizard.ts
@@ -80,6 +80,25 @@ function isCredentialsComplete(state: {
 	);
 }
 
+/**
+ * Returns true when all required JIRA steps are done:
+ * credentials + project selected + at least one status mapping.
+ * Used to gate optional step `isComplete` predicates so they only show
+ * green after the integration is actually configured.
+ */
+function areJiraRequiredStepsDone(
+	state: Parameters<typeof isCredentialsComplete>[0] & {
+		jiraProjectKey: string;
+		jiraStatusMappings: Record<string, string>;
+	},
+): boolean {
+	return (
+		isCredentialsComplete(state) &&
+		Boolean(state.jiraProjectKey) &&
+		Object.keys(state.jiraStatusMappings).length > 0
+	);
+}
+
 interface JiraProviderHooks {
 	readonly projectOptions: ReadonlyArray<{ readonly id: string; readonly name: string }>;
 	readonly projectsLoading: boolean;
@@ -250,25 +269,25 @@ export const jiraProviderWizard: ProviderWizardDefinition = {
 			id: 'jira-labels',
 			title: 'Labels',
 			Component: JiraLabelMappingAdapter,
-			isComplete: () => true, // labels are optional
+			isComplete: (state) => areJiraRequiredStepsDone(state), // optional, but only green after required steps
 		},
 		{
 			id: 'jira-custom-fields',
 			title: 'Custom fields',
 			Component: JiraCustomFieldMappingAdapter,
-			isComplete: () => true, // cost field optional
+			isComplete: (state) => areJiraRequiredStepsDone(state), // optional, but only green after required steps
 		},
 		{
 			id: 'jira-issue-types',
 			title: 'Issue types',
 			Component: JiraIssueTypeAdapter,
-			isComplete: () => true, // issue-type mapping optional
+			isComplete: (state) => areJiraRequiredStepsDone(state), // optional, but only green after required steps
 		},
 		{
 			id: 'jira-webhook',
 			title: 'Webhook',
 			Component: JiraWebhookAdapter,
-			isComplete: () => true,
+			isComplete: (state) => areJiraRequiredStepsDone(state),
 		},
 	],
 
diff --git a/web/src/components/projects/pm-providers/linear/wizard.ts b/web/src/components/projects/pm-providers/linear/wizard.ts
index 2d42755c..fc0369fe 100644
--- a/web/src/components/projects/pm-providers/linear/wizard.ts
+++ b/web/src/components/projects/pm-providers/linear/wizard.ts
@@ -83,6 +83,25 @@ function isCredentialsComplete(state: {
 	return Boolean(state.linearApiKey && state.verificationResult);
 }
 
+/**
+ * Returns true when all required Linear steps are done:
+ * credentials + team selected + at least one status mapping.
+ * Used to gate optional step `isComplete` predicates so they only show
+ * green after the integration is actually configured.
+ */
+function areLinearRequiredStepsDone(
+	state: Parameters<typeof isCredentialsComplete>[0] & {
+		linearTeamId: string;
+		linearStatusMappings: Record<string, string>;
+	},
+): boolean {
+	return (
+		isCredentialsComplete(state) &&
+		Boolean(state.linearTeamId) &&
+		Object.keys(state.linearStatusMappings).length > 0
+	);
+}
+
 interface LinearProviderHooks {
 	readonly teamOptions: ReadonlyArray<{
 		readonly id: string;
@@ -232,19 +251,19 @@ export const linearProviderWizard: ProviderWizardDefinition = {
 			id: 'linear-labels',
 			title: 'Labels',
 			Component: LinearLabelMappingAdapter,
-			isComplete: () => true, // labels optional
+			isComplete: (state) => areLinearRequiredStepsDone(state), // optional, but only green after required steps
 		},
 		{
 			id: 'linear-project-scope',
 			title: 'Project scope',
 			Component: LinearProjectScopeAdapter,
-			isComplete: () => true, // optional narrowing
+			isComplete: (state) => areLinearRequiredStepsDone(state), // optional, but only green after required steps
 		},
 		{
 			id: 'linear-webhook',
 			title: 'Webhook',
 			Component: LinearWebhookAdapter,
-			isComplete: () => true,
+			isComplete: (state) => areLinearRequiredStepsDone(state),
 		},
 	],
 
diff --git a/web/src/components/projects/pm-providers/trello/wizard.ts b/web/src/components/projects/pm-providers/trello/wizard.ts
index 7c34f795..487aec7d 100644
--- a/web/src/components/projects/pm-providers/trello/wizard.ts
+++ b/web/src/components/projects/pm-providers/trello/wizard.ts
@@ -84,6 +84,25 @@ function isCredentialsComplete(state: {
 	return Boolean(state.trelloApiKey && state.trelloToken && state.verificationResult);
 }
 
+/**
+ * Returns true when all required Trello steps are done:
+ * credentials + board selected + at least one list mapping.
+ * Used to gate optional step `isComplete` predicates so they only show
+ * green after the integration is actually configured.
+ */
+function areTrelloRequiredStepsDone(
+	state: Parameters<typeof isCredentialsComplete>[0] & {
+		trelloBoardId: string;
+		trelloListMappings: Record<string, string>;
+	},
+): boolean {
+	return (
+		isCredentialsComplete(state) &&
+		Boolean(state.trelloBoardId) &&
+		Object.keys(state.trelloListMappings).length > 0
+	);
+}
+
 /**
  * The shape returned by `useProviderHooks`. Each step adapter pulls the
  * slice it needs from this record. Ports all the mutations + memoized
@@ -249,19 +268,19 @@ export const trelloProviderWizard: ProviderWizardDefinition = {
 			id: 'trello-labels',
 			title: 'Label mapping',
 			Component: TrelloLabelMappingAdapter,
-			isComplete: () => true, // labels are optional
+			isComplete: (state) => areTrelloRequiredStepsDone(state), // optional, but only green after required steps
 		},
 		{
 			id: 'trello-custom-fields',
 			title: 'Custom fields',
 			Component: TrelloCustomFieldMappingAdapter,
-			isComplete: () => true, // cost field is optional
+			isComplete: (state) => areTrelloRequiredStepsDone(state), // optional, but only green after required steps
 		},
 		{
 			id: 'trello-webhook',
 			title: 'Webhook',
 			Component: TrelloWebhookAdapter,
-			isComplete: () => true,
+			isComplete: (state) => areTrelloRequiredStepsDone(state),
 		},
 	],
 

From 0e4a08ecb7d02f7d859bd4ce36249d1137d5f18d Mon Sep 17 00:00:00 2001
From: aaight <aaight42@gmail.com>
Date: Thu, 23 Apr 2026 22:22:26 +0200
Subject: [PATCH 2/5] fix(triggers): widen evaluateAuthorMode to recognize
 reviewer persona as 'own' (#1173)

Co-authored-by: Cascade Bot <bot@cascade.dev>
---
 src/triggers/github/check-suite-success.ts    |   2 +-
 src/triggers/github/pr-opened.ts              |   2 +-
 src/triggers/github/utils.ts                  |  17 ++-
 .../unit/triggers/check-suite-success.test.ts |  62 ++++++++++
 tests/unit/triggers/github-utils.test.ts      | 109 ++++++++++++++++++
 tests/unit/triggers/pr-opened.test.ts         |  35 +++++-
 6 files changed, 219 insertions(+), 8 deletions(-)

diff --git a/src/triggers/github/check-suite-success.ts b/src/triggers/github/check-suite-success.ts
index 2ca5e375..a77e8ffb 100644
--- a/src/triggers/github/check-suite-success.ts
+++ b/src/triggers/github/check-suite-success.ts
@@ -138,7 +138,7 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler {
 				handler: this.name,
 				prNumber,
 				prAuthor: prDetails.user.login,
-				isImplementerPR: authorResult.isImplementerPR,
+				isCascadePR: authorResult.isCascadePR,
 				authorMode: authorResult.authorMode,
 			});
 			return null;
diff --git a/src/triggers/github/pr-opened.ts b/src/triggers/github/pr-opened.ts
index ad674a01..edb90908 100644
--- a/src/triggers/github/pr-opened.ts
+++ b/src/triggers/github/pr-opened.ts
@@ -67,7 +67,7 @@ export class PROpenedTrigger implements TriggerHandler {
 				handler: this.name,
 				prNumber,
 				prAuthor,
-				isImplementerPR: authorResult.isImplementerPR,
+				isCascadePR: authorResult.isCascadePR,
 				authorMode: authorResult.authorMode,
 			});
 			return null;
diff --git a/src/triggers/github/utils.ts b/src/triggers/github/utils.ts
index 9bcda4bb..f5492bef 100644
--- a/src/triggers/github/utils.ts
+++ b/src/triggers/github/utils.ts
@@ -1,11 +1,12 @@
 import { lookupWorkItemForPR } from '../../db/repositories/prWorkItemsRepository.js';
+import type { PersonaIdentities } from '../../github/personas.js';
 import type { ProjectConfig } from '../../types/index.js';
 import { logger } from '../../utils/logging.js';
 
 export interface AuthorModeResult {
 	shouldTrigger: boolean;
 	authorMode: string;
-	isImplementerPR: boolean;
+	isCascadePR: boolean;
 }
 
 /**
@@ -14,10 +15,13 @@ export interface AuthorModeResult {
  *
  * Returns `null` when personaIdentities is missing (caller should return null).
  * Validates authorMode against known values and falls back to 'own'.
+ *
+ * "own" means the PR was authored by any CASCADE persona (implementer OR reviewer).
+ * This aligns with `isCascadeBot()` which already checks both personas.
  */
 export function evaluateAuthorMode(
 	prAuthorLogin: string,
-	personaIdentities: { implementer: string } | undefined,
+	personaIdentities: PersonaIdentities | undefined,
 	parameters: Record<string, unknown>,
 	handlerName: string,
 ): AuthorModeResult | null {
@@ -26,7 +30,10 @@ export function evaluateAuthorMode(
 		return null;
 	}
 	const implLogin = personaIdentities.implementer;
+	const reviewerLogin = personaIdentities.reviewer;
 	const isImplementerPR = prAuthorLogin === implLogin || prAuthorLogin === `${implLogin}[bot]`;
+	const isReviewerPR = prAuthorLogin === reviewerLogin || prAuthorLogin === `${reviewerLogin}[bot]`;
+	const isCascadePR = isImplementerPR || isReviewerPR;
 
 	const rawMode = parameters.authorMode;
 	const authorMode =
@@ -41,10 +48,10 @@ export function evaluateAuthorMode(
 
 	const shouldTrigger =
 		authorMode === 'all' ||
-		(authorMode === 'own' && isImplementerPR) ||
-		(authorMode === 'external' && !isImplementerPR);
+		(authorMode === 'own' && isCascadePR) ||
+		(authorMode === 'external' && !isCascadePR);
 
-	return { shouldTrigger, authorMode, isImplementerPR };
+	return { shouldTrigger, authorMode, isCascadePR };
 }
 
 /**
diff --git a/tests/unit/triggers/check-suite-success.test.ts b/tests/unit/triggers/check-suite-success.test.ts
index b5390c5f..54e6cfdf 100644
--- a/tests/unit/triggers/check-suite-success.test.ts
+++ b/tests/unit/triggers/check-suite-success.test.ts
@@ -880,6 +880,68 @@ describe('CheckSuiteSuccessTrigger', () => {
 			expect(result).toBeNull();
 		});
 
+		it('triggers when PR authored by reviewer persona and authorMode=own', async () => {
+			vi.mocked(checkTriggerEnabledWithParams).mockResolvedValueOnce({
+				enabled: true,
+				parameters: { authorMode: 'own' },
+			});
+			vi.mocked(githubClient.getPR).mockResolvedValue({
+				number: 42,
+				title: 'Reviewer persona PR',
+				body: 'https://trello.com/c/abc123',
+				state: 'open',
+				headRef: 'feature/reviewer-authored',
+				headSha: 'sha123',
+				baseRef: 'main',
+				merged: false,
+				htmlUrl: 'https://github.com/owner/repo/pull/42',
+				user: { login: 'cascade-reviewer' },
+			});
+			vi.mocked(githubClient.getPRReviews).mockResolvedValue([]);
+
+			const ctx: TriggerContext = {
+				project: mockProject,
+				source: 'github',
+				payload: makeCheckSuitePayload(),
+				personaIdentities: mockPersonaIdentities,
+			};
+
+			const result = await trigger.handle(ctx);
+
+			expect(result).not.toBeNull();
+			expect(result?.agentType).toBe('review');
+		});
+
+		it('skips reviewer persona PR when authorMode=external', async () => {
+			vi.mocked(checkTriggerEnabledWithParams).mockResolvedValueOnce({
+				enabled: true,
+				parameters: { authorMode: 'external' },
+			});
+			vi.mocked(githubClient.getPR).mockResolvedValue({
+				number: 42,
+				title: 'Reviewer persona PR',
+				body: 'https://trello.com/c/abc123',
+				state: 'open',
+				headRef: 'feature/reviewer-authored',
+				headSha: 'sha123',
+				baseRef: 'main',
+				merged: false,
+				htmlUrl: 'https://github.com/owner/repo/pull/42',
+				user: { login: 'cascade-reviewer' },
+			});
+
+			const ctx: TriggerContext = {
+				project: mockProject,
+				source: 'github',
+				payload: makeCheckSuitePayload(),
+				personaIdentities: mockPersonaIdentities,
+			};
+
+			const result = await trigger.handle(ctx);
+
+			expect(result).toBeNull();
+		});
+
 		it('triggers for both authors when authorMode=all', async () => {
 			vi.mocked(checkTriggerEnabledWithParams).mockResolvedValue({
 				enabled: true,
diff --git a/tests/unit/triggers/github-utils.test.ts b/tests/unit/triggers/github-utils.test.ts
index 9dfca8ca..e9568378 100644
--- a/tests/unit/triggers/github-utils.test.ts
+++ b/tests/unit/triggers/github-utils.test.ts
@@ -5,7 +5,9 @@ vi.mock('../../../src/db/repositories/prWorkItemsRepository.js', () => ({
 }));
 
 import { lookupWorkItemForPR } from '../../../src/db/repositories/prWorkItemsRepository.js';
+import type { PersonaIdentities } from '../../../src/github/personas.js';
 import {
+	evaluateAuthorMode,
 	extractJiraIssueKey,
 	extractTrelloCardId,
 	extractWorkItemId,
@@ -200,3 +202,110 @@ describe('parsePrNumberFromRef', () => {
 		expect(parsePrNumberFromRef('pull/42/head')).toBeNull();
 	});
 });
+
+describe('evaluateAuthorMode', () => {
+	const personas: PersonaIdentities = {
+		implementer: 'cascade-impl',
+		reviewer: 'cascade-reviewer',
+	};
+
+	it('returns null when personaIdentities is undefined', () => {
+		const result = evaluateAuthorMode('some-user', undefined, {}, 'test-handler');
+		expect(result).toBeNull();
+	});
+
+	it('returns shouldTrigger:true + isCascadePR:true for implementer login when authorMode=own', () => {
+		const result = evaluateAuthorMode('cascade-impl', personas, { authorMode: 'own' }, 'handler');
+		expect(result).toEqual({ shouldTrigger: true, authorMode: 'own', isCascadePR: true });
+	});
+
+	it('returns shouldTrigger:true + isCascadePR:true for reviewer login when authorMode=own (core bug regression)', () => {
+		const result = evaluateAuthorMode(
+			'cascade-reviewer',
+			personas,
+			{ authorMode: 'own' },
+			'handler',
+		);
+		expect(result).toEqual({ shouldTrigger: true, authorMode: 'own', isCascadePR: true });
+	});
+
+	it('returns shouldTrigger:true + isCascadePR:true for implementer[bot] variant when authorMode=own', () => {
+		const result = evaluateAuthorMode(
+			'cascade-impl[bot]',
+			personas,
+			{ authorMode: 'own' },
+			'handler',
+		);
+		expect(result).toEqual({ shouldTrigger: true, authorMode: 'own', isCascadePR: true });
+	});
+
+	it('returns shouldTrigger:true + isCascadePR:true for reviewer[bot] variant when authorMode=own', () => {
+		const result = evaluateAuthorMode(
+			'cascade-reviewer[bot]',
+			personas,
+			{ authorMode: 'own' },
+			'handler',
+		);
+		expect(result).toEqual({ shouldTrigger: true, authorMode: 'own', isCascadePR: true });
+	});
+
+	it('returns shouldTrigger:false for external author when authorMode=own', () => {
+		const result = evaluateAuthorMode('external-dev', personas, { authorMode: 'own' }, 'handler');
+		expect(result).toEqual({ shouldTrigger: false, authorMode: 'own', isCascadePR: false });
+	});
+
+	it('returns shouldTrigger:true for external author when authorMode=external', () => {
+		const result = evaluateAuthorMode(
+			'external-dev',
+			personas,
+			{ authorMode: 'external' },
+			'handler',
+		);
+		expect(result).toEqual({ shouldTrigger: true, authorMode: 'external', isCascadePR: false });
+	});
+
+	it('returns shouldTrigger:false for implementer when authorMode=external', () => {
+		const result = evaluateAuthorMode(
+			'cascade-impl',
+			personas,
+			{ authorMode: 'external' },
+			'handler',
+		);
+		expect(result).toEqual({ shouldTrigger: false, authorMode: 'external', isCascadePR: true });
+	});
+
+	it('returns shouldTrigger:false for reviewer when authorMode=external (second regression test)', () => {
+		const result = evaluateAuthorMode(
+			'cascade-reviewer',
+			personas,
+			{ authorMode: 'external' },
+			'handler',
+		);
+		expect(result).toEqual({ shouldTrigger: false, authorMode: 'external', isCascadePR: true });
+	});
+
+	it('returns shouldTrigger:true for any author when authorMode=all', () => {
+		for (const login of ['cascade-impl', 'cascade-reviewer', 'external-dev']) {
+			const result = evaluateAuthorMode(login, personas, { authorMode: 'all' }, 'handler');
+			expect(result?.shouldTrigger).toBe(true);
+			expect(result?.authorMode).toBe('all');
+		}
+	});
+
+	it('falls back to "own" when authorMode is an invalid string', () => {
+		const result = evaluateAuthorMode(
+			'cascade-impl',
+			personas,
+			{ authorMode: 'invalid' },
+			'handler',
+		);
+		expect(result?.authorMode).toBe('own');
+		expect(result?.shouldTrigger).toBe(true);
+	});
+
+	it('falls back to "own" when authorMode is missing from parameters', () => {
+		const result = evaluateAuthorMode('cascade-impl', personas, {}, 'handler');
+		expect(result?.authorMode).toBe('own');
+		expect(result?.shouldTrigger).toBe(true);
+	});
+});
diff --git a/tests/unit/triggers/pr-opened.test.ts b/tests/unit/triggers/pr-opened.test.ts
index 7e9dcfe0..35f67b7d 100644
--- a/tests/unit/triggers/pr-opened.test.ts
+++ b/tests/unit/triggers/pr-opened.test.ts
@@ -417,7 +417,7 @@ describe('PROpenedTrigger', () => {
 			expect(result?.agentType).toBe('review');
 		});
 
-		it('fires for reviewer persona PR when authorMode=external (reviewer is not implementer)', async () => {
+		it('skips reviewer persona PR when authorMode=external (reviewer is own)', async () => {
 			vi.mocked(checkTriggerEnabledWithParams).mockResolvedValueOnce({
 				enabled: true,
 				parameters: { authorMode: 'external' },
@@ -446,6 +446,39 @@ describe('PROpenedTrigger', () => {
 				},
 			};
 
+			const result = await trigger.handle(ctx);
+			expect(result).toBeNull();
+		});
+
+		it('fires for reviewer persona PR when authorMode=own', async () => {
+			vi.mocked(checkTriggerEnabledWithParams).mockResolvedValueOnce({
+				enabled: true,
+				parameters: { authorMode: 'own' },
+			});
+
+			const ctx: TriggerContext = {
+				project: mockProject,
+				source: 'github',
+				personaIdentities: { implementer: 'cascade-impl', reviewer: 'cascade-review' },
+				payload: {
+					action: 'opened',
+					number: 42,
+					pull_request: {
+						number: 42,
+						title: 'feat: add login',
+						body: 'Implements feature',
+						html_url: 'https://github.com/owner/repo/pull/42',
+						state: 'open',
+						draft: false,
+						head: { ref: 'feature/login', sha: 'abc' },
+						base: { ref: 'main' },
+						user: { login: 'cascade-review' },
+					},
+					repository: { full_name: 'owner/repo', html_url: 'https://github.com/owner/repo' },
+					sender: { login: 'cascade-review' },
+				},
+			};
+
 			const result = await trigger.handle(ctx);
 			expect(result).not.toBeNull();
 			expect(result?.agentType).toBe('review');

From 3e555f95397d188ed73380a89ad1b376d4709937 Mon Sep 17 00:00:00 2001
From: aaight <aaight42@gmail.com>
Date: Thu, 23 Apr 2026 23:25:36 +0200
Subject: [PATCH 3/5] feat(engine-ux): redesign Engine page with harness-first
 layout and integrated model (#1175)

* feat(engine-ux): redesign Engine page with harness-first layout and integrated model

* fix(engine-ux): hide model and maxIterations on non-default engine tabs

Model and maxIterations are project-level settings that apply only to the
default engine. Rendering their inputs on every tab created a data corruption
risk: a user editing these fields while viewing a non-default tab would save
incompatible engine/model combinations (e.g. agentEngine=claude-code but
model=gpt-4o). Both sections are now wrapped in {isDefault && (...)} so they
only appear on the default engine's tab.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix(engine-ux): fix activeTab state out-of-sync with async defaultsQuery

Initialize activeTab as null so it reactively follows effectiveEngineId
until the user manually switches tabs. This prevents the mismatch where
defaultsQuery loads after initial render and reveals a different system
default engine than the one useState initialized with.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Cascade Bot <bot@cascade.dev>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../projects/project-harness-form.tsx         | 386 +++++++++---------
 1 file changed, 193 insertions(+), 193 deletions(-)

diff --git a/web/src/components/projects/project-harness-form.tsx b/web/src/components/projects/project-harness-form.tsx
index b895cc35..abf65152 100644
--- a/web/src/components/projects/project-harness-form.tsx
+++ b/web/src/components/projects/project-harness-form.tsx
@@ -17,6 +17,13 @@ import {
 } from '@/components/ui/card.js';
 import { Input } from '@/components/ui/input.js';
 import { Label } from '@/components/ui/label.js';
+import {
+	Select,
+	SelectContent,
+	SelectItem,
+	SelectTrigger,
+	SelectValue,
+} from '@/components/ui/select.js';
 import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs.js';
 import {
 	Tooltip,
@@ -74,8 +81,9 @@ export function ProjectHarnessForm({ project }: { project: Project }) {
 	// The effective project-level engine: either explicitly set or the system default
 	const effectiveEngineId = agentEngine || systemDefaultEngineId;
 
-	// Default tab to show: project's selected engine, or system default
-	const defaultTab = effectiveEngineId;
+	// Controlled active tab — null means "follow effectiveEngineId reactively" (handles async defaultsQuery)
+	const [activeTab, setActiveTab] = useState<string | null>(null);
+	const currentTab = activeTab ?? effectiveEngineId;
 
 	// Resolved engine defaults for EngineSettingsFields
 	function getEngineDefaults(engineId: string): Record<string, unknown> | undefined {
@@ -84,6 +92,14 @@ export function ProjectHarnessForm({ project }: { project: Project }) {
 			: undefined;
 	}
 
+	function handleEngineSelectChange(value: string) {
+		const newEngine = value === '_system' ? '' : value;
+		setAgentEngine(newEngine);
+		// Switch active tab to the newly selected default engine
+		const newEffective = newEngine || systemDefaultEngineId;
+		setActiveTab(newEffective);
+	}
+
 	function handleSubmit(e: React.FormEvent) {
 		e.preventDefault();
 		const activeEngine = agentEngine || null;
@@ -107,224 +123,208 @@ export function ProjectHarnessForm({ project }: { project: Project }) {
 					</p>
 				</div>
 
-				{/* Model & Iterations Card — engine-agnostic, always visible */}
 				<Card>
 					<CardHeader>
-						<CardTitle>Model &amp; Runtime</CardTitle>
+						<CardTitle>Engine</CardTitle>
 						<CardDescription>
-							Global model and iteration settings applied to all agents unless overridden per-agent.
+							Choose the default engine, then configure its model, settings, and credentials.
 						</CardDescription>
 					</CardHeader>
 					<CardContent>
-						<form onSubmit={handleSubmit} className="space-y-4" id="engine-runtime-form">
-							<div className="space-y-2">
-								<div className="flex items-center gap-1.5">
-									<Label htmlFor="model">Model</Label>
-									<Tooltip>
-										<TooltipTrigger asChild>
-											<HelpCircle className="h-3.5 w-3.5 text-muted-foreground cursor-help" />
-										</TooltipTrigger>
-										<TooltipContent>
-											Individual agents can override this in the Agents tab.
-										</TooltipContent>
-									</Tooltip>
-								</div>
-								<ModelField
-									id="model"
-									value={model}
-									onChange={setModel}
-									engine={effectiveEngineId}
-									defaultLabel={defaults ? defaults.model : undefined}
-									projectId={project.id}
-								/>
-								<p className="text-xs text-muted-foreground">
-									Project default model. Per-agent overrides in the Agents tab.
-								</p>
-							</div>
+						<form onSubmit={handleSubmit} id="engine-config-form" className="space-y-6">
+							{/* Default Engine Selector */}
 							<div className="space-y-2">
-								<div className="flex items-center gap-1.5">
-									<Label htmlFor="maxIterations">Max Iterations</Label>
-									<Tooltip>
-										<TooltipTrigger asChild>
-											<HelpCircle className="h-3.5 w-3.5 text-muted-foreground cursor-help" />
-										</TooltipTrigger>
-										<TooltipContent>
-											Individual agents can override this in the Agents tab.
-										</TooltipContent>
-									</Tooltip>
-								</div>
-								<Input
-									id="maxIterations"
-									type="number"
-									min="1"
-									step="1"
-									className="w-32"
-									value={maxIterations}
-									onChange={(e) => setMaxIterations(e.target.value)}
-									placeholder={defaults ? `${defaults.maxIterations} (default)` : 'e.g. 50'}
-								/>
+								<Label htmlFor="default-engine">Default Engine</Label>
+								{engines.length === 0 ? (
+									<p className="text-sm text-muted-foreground">Loading engines…</p>
+								) : (
+									<Select value={agentEngine || '_system'} onValueChange={handleEngineSelectChange}>
+										<SelectTrigger id="default-engine" className="w-full">
+											<SelectValue />
+										</SelectTrigger>
+										<SelectContent>
+											<SelectItem value="_system">
+												System Default ({capitalize(systemDefaultEngineId)})
+											</SelectItem>
+											{engines.map((engine) => (
+												<SelectItem key={engine.id} value={engine.id}>
+													{engine.label}
+												</SelectItem>
+											))}
+										</SelectContent>
+									</Select>
+								)}
 								<p className="text-xs text-muted-foreground">
-									Safety limit on tool-call iterations per run.
+									Used by all agents unless overridden per-agent.
 								</p>
 							</div>
-						</form>
-					</CardContent>
-				</Card>
 
-				{/* Per-engine tabs: credentials + settings + default toggle */}
-				<Card>
-					<CardHeader>
-						<CardTitle>Engine Settings &amp; Credentials</CardTitle>
-						<CardDescription>
-							Configure each engine's credentials and settings. The default engine tab is
-							highlighted. New engines are added automatically as the catalog expands.
-						</CardDescription>
-					</CardHeader>
-					<CardContent>
-						{engines.length === 0 ? (
-							<p className="text-sm text-muted-foreground">Loading engines…</p>
-						) : (
-							<Tabs defaultValue={defaultTab}>
-								<TabsList className="flex w-full h-auto flex-wrap">
+							{/* Per-engine configuration tabs */}
+							{engines.length > 0 && (
+								<Tabs value={currentTab} onValueChange={setActiveTab}>
+									<TabsList className="flex w-full h-auto flex-wrap">
+										{engines.map((engine) => {
+											const isDefault = engine.id === effectiveEngineId;
+											const isUsedByAgents = agentEnginesInUse.includes(engine.id);
+											return (
+												<TabsTrigger
+													key={engine.id}
+													value={engine.id}
+													className="flex items-center gap-1.5"
+												>
+													{engine.label}
+													{isDefault && (
+														<Badge variant="secondary" className="text-xs px-1 py-0">
+															Default
+														</Badge>
+													)}
+													{!isDefault && isUsedByAgents && (
+														<Badge variant="outline" className="text-xs px-1 py-0">
+															In use
+														</Badge>
+													)}
+												</TabsTrigger>
+											);
+										})}
+									</TabsList>
+
 									{engines.map((engine) => {
 										const isDefault = engine.id === effectiveEngineId;
-										const isUsedByAgents = agentEnginesInUse.includes(engine.id);
-										return (
-											<TabsTrigger
-												key={engine.id}
-												value={engine.id}
-												className="flex items-center gap-1.5"
-											>
-												{engine.label}
-												{isDefault && (
-													<Badge variant="secondary" className="text-xs px-1 py-0">
-														Default
-													</Badge>
-												)}
-												{!isDefault && isUsedByAgents && (
-													<Badge variant="outline" className="text-xs px-1 py-0">
-														In use
-													</Badge>
-												)}
-											</TabsTrigger>
+										const engineSecrets = ENGINE_SECRETS.filter((s) =>
+											s.engines?.includes(engine.id),
 										);
-									})}
-								</TabsList>
-
-								{engines.map((engine) => {
-									const isDefault = engine.id === effectiveEngineId;
-									const isUsedByAgents = agentEnginesInUse.includes(engine.id);
-									const engineSecrets = ENGINE_SECRETS.filter((s) =>
-										s.engines?.includes(engine.id),
-									);
-									// Secrets shared with other engines: show a note
-									const sharedSecretEngines = (envVarKey: string): string[] => {
-										const secret = ENGINE_SECRETS.find((s) => s.envVarKey === envVarKey);
-										if (!secret?.engines) return [];
-										return secret.engines.filter((e) => e !== engine.id);
-									};
+										const sharedSecretEngines = (envVarKey: string): string[] => {
+											const secret = ENGINE_SECRETS.find((s) => s.envVarKey === envVarKey);
+											if (!secret?.engines) return [];
+											return secret.engines.filter((e) => e !== engine.id);
+										};
+										const engineDefaults = getEngineDefaults(engine.id);
 
-									const engineDefaults = getEngineDefaults(engine.id);
-
-									return (
-										<TabsContent key={engine.id} value={engine.id} className="mt-4 space-y-6">
-											{/* Engine description */}
-											{engine.description && (
-												<p className="text-sm text-muted-foreground">{engine.description}</p>
-											)}
+										return (
+											<TabsContent key={engine.id} value={engine.id} className="mt-4 space-y-6">
+												{/* Engine description */}
+												{engine.description && (
+													<p className="text-sm text-muted-foreground">{engine.description}</p>
+												)}
 
-											{/* Default engine indicator / Set as Default button */}
-											<div className="flex items-center gap-3">
-												{isDefault ? (
-													<div className="flex items-center gap-2 rounded-md border border-border bg-muted/50 px-3 py-2 text-sm">
-														<span className="text-muted-foreground">
-															✓ Default engine for this project
-															{agentEngine === '' &&
-																` (inheriting system default: ${capitalize(systemDefaultEngineId)})`}
-														</span>
-														{agentEngine !== '' && (
-															<button
-																type="button"
-																onClick={() => setAgentEngine('')}
-																className="ml-2 text-xs text-muted-foreground underline hover:text-foreground transition-colors"
-															>
-																Reset to system default
-															</button>
-														)}
+												{/* Model — only shown for the default engine (project-level setting) */}
+												{isDefault && (
+													<div className="space-y-2">
+														<div className="flex items-center gap-1.5">
+															<Label htmlFor={`model-${engine.id}`}>Model</Label>
+															<Tooltip>
+																<TooltipTrigger asChild>
+																	<HelpCircle className="h-3.5 w-3.5 text-muted-foreground cursor-help" />
+																</TooltipTrigger>
+																<TooltipContent>
+																	Individual agents can override this in the Agents tab.
+																</TooltipContent>
+															</Tooltip>
+														</div>
+														<ModelField
+															id={`model-${engine.id}`}
+															value={model}
+															onChange={setModel}
+															engine={engine.id}
+															defaultLabel={defaults ? defaults.model : undefined}
+															projectId={project.id}
+														/>
+														<p className="text-xs text-muted-foreground">
+															Project default. Per-agent overrides in the Agents tab.
+														</p>
 													</div>
-												) : (
-													<button
-														type="button"
-														onClick={() => setAgentEngine(engine.id)}
-														className="inline-flex h-9 items-center rounded-md border border-input bg-background px-4 text-sm font-medium hover:bg-accent hover:text-accent-foreground transition-colors"
-													>
-														Set as Default Engine
-													</button>
-												)}
-												{!isDefault && isUsedByAgents && (
-													<span className="text-xs text-muted-foreground">
-														Used by agent config overrides
-													</span>
 												)}
-											</div>
 
-											{/* Engine settings */}
-											<EngineSettingsFields
-												engine={engine}
-												value={engineSettings}
-												onChange={(next) => setEngineSettings(next ?? {})}
-												engineDefaults={engineDefaults}
-											/>
+												{/* Engine Settings */}
+												<EngineSettingsFields
+													engine={engine}
+													value={engineSettings}
+													onChange={(next) => setEngineSettings(next ?? {})}
+													engineDefaults={engineDefaults}
+												/>
 
-											{/* Engine credentials */}
-											{engineSecrets.length > 0 ? (
-												<div className="space-y-4">
-													<div>
-														<h4 className="text-sm font-medium">Credentials</h4>
-														<p className="text-xs text-muted-foreground mt-0.5">
-															API keys and tokens for {engine.label}. Values are stored encrypted
-															and never returned to the browser.
+												{/* Max Iterations — only shown for the default engine (project-level setting) */}
+												{isDefault && (
+													<div className="space-y-2">
+														<div className="flex items-center gap-1.5">
+															<Label htmlFor={`maxIterations-${engine.id}`}>Max Iterations</Label>
+															<Tooltip>
+																<TooltipTrigger asChild>
+																	<HelpCircle className="h-3.5 w-3.5 text-muted-foreground cursor-help" />
+																</TooltipTrigger>
+																<TooltipContent>
+																	Individual agents can override this in the Agents tab.
+																</TooltipContent>
+															</Tooltip>
+														</div>
+														<Input
+															id={`maxIterations-${engine.id}`}
+															type="number"
+															min="1"
+															step="1"
+															className="w-32"
+															value={maxIterations}
+															onChange={(e) => setMaxIterations(e.target.value)}
+															placeholder={
+																defaults ? `${defaults.maxIterations} (default)` : 'e.g. 50'
+															}
+														/>
+														<p className="text-xs text-muted-foreground">
+															Safety limit on tool-call iterations per run.
 														</p>
 													</div>
-													{engineSecrets.map((secret) => {
-														const sharedWith = sharedSecretEngines(secret.envVarKey);
-														const sharedNote =
-															sharedWith.length > 0
-																? `Also used by: ${sharedWith.map((id) => engines.find((e) => e.id === id)?.label ?? id).join(', ')}`
-																: undefined;
-														const description =
-															secret.description + (sharedNote ? ` · ${sharedNote}` : '');
-														return (
-															<ProjectSecretField
-																key={secret.envVarKey}
-																projectId={project.id}
-																envVarKey={secret.envVarKey}
-																label={secret.label}
-																description={description}
-																placeholder={secret.placeholder}
-																credential={credentials.find(
-																	(c) => c.envVarKey === secret.envVarKey,
-																)}
-															/>
-														);
-													})}
-												</div>
-											) : (
-												<p className="text-sm text-muted-foreground">
-													No credentials required for {engine.label}.
-												</p>
-											)}
-										</TabsContent>
-									);
-								})}
-							</Tabs>
-						)}
+												)}
+
+												{/* Credentials */}
+												{engineSecrets.length > 0 ? (
+													<div className="space-y-4">
+														<div>
+															<p className="text-sm font-medium">Credentials</p>
+															<p className="text-xs text-muted-foreground mt-0.5">
+																API keys and tokens for {engine.label}. Values are stored encrypted
+																and never returned to the browser.
+															</p>
+														</div>
+														{engineSecrets.map((secret) => {
+															const sharedWith = sharedSecretEngines(secret.envVarKey);
+															const sharedNote =
+																sharedWith.length > 0
+																	? `Also used by: ${sharedWith.map((id) => engines.find((e) => e.id === id)?.label ?? id).join(', ')}`
+																	: undefined;
+															const description =
+																secret.description + (sharedNote ? ` · ${sharedNote}` : '');
+															return (
+																<ProjectSecretField
+																	key={secret.envVarKey}
+																	projectId={project.id}
+																	envVarKey={secret.envVarKey}
+																	label={secret.label}
+																	description={description}
+																	placeholder={secret.placeholder}
+																	credential={credentials.find(
+																		(c) => c.envVarKey === secret.envVarKey,
+																	)}
+																/>
+															);
+														})}
+													</div>
+												) : (
+													<p className="text-sm text-muted-foreground">
+														No credentials required for {engine.label}.
+													</p>
+												)}
+											</TabsContent>
+										);
+									})}
+								</Tabs>
+							)}
+						</form>
 					</CardContent>
 					<CardFooter>
 						<div className="flex items-center gap-2">
 							<button
 								type="submit"
-								form="engine-runtime-form"
+								form="engine-config-form"
 								disabled={updateMutation.isPending}
 								className="inline-flex h-9 items-center rounded-md bg-primary px-4 text-sm font-medium text-primary-foreground hover:bg-primary/90 disabled:opacity-50"
 							>

From e9f85e2b9696f66cd349450ab4d485b0fe4af916 Mon Sep 17 00:00:00 2001
From: aaight <aaight42@gmail.com>
Date: Fri, 24 Apr 2026 00:07:38 +0200
Subject: [PATCH 4/5] feat(backlog-manager): maximize throughput by filling all
 available capacity slots (#1176)

* feat(backlog-manager): maximize throughput by filling all available capacity slots

* test(worker-entry): clear JOB_* env vars in beforeEach to prevent CASCADE process inheritance

When running inside a CASCADE worker container, JOB_ID, JOB_TYPE, and JOB_DATA
are set in the process environment. Tests in `main() - environment variable
validation` must clear these before each test so the "all env vars absent"
case actually sees an empty environment.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Cascade Bot <bot@cascade.dev>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/agents/definitions/backlog-manager.yaml   |  4 +--
 .../prompts/templates/backlog-manager.eta     | 10 ++++---
 src/triggers/github/pr-merged.ts              |  1 +
 src/triggers/shared/agent-execution.ts        |  1 +
 src/triggers/shared/backlog-check.ts          | 27 ++++++++++++++++---
 tests/unit/agents/prompts.test.ts             | 21 +++++++++++++--
 .../triggers/shared/backlog-check.test.ts     | 23 ++++++++++++++++
 tests/unit/worker-entry.test.ts               |  6 +++++
 8 files changed, 81 insertions(+), 12 deletions(-)

diff --git a/src/agents/definitions/backlog-manager.yaml b/src/agents/definitions/backlog-manager.yaml
index 3664b010..e8646c5f 100644
--- a/src/agents/definitions/backlog-manager.yaml
+++ b/src/agents/definitions/backlog-manager.yaml
@@ -63,7 +63,7 @@ prompts:
     A Pipeline Snapshot has been pre-loaded into your context with the current state of all pipeline lists (BACKLOG, TODO, IN_PROGRESS, IN_REVIEW, DONE, MERGED).
 
     1. Review the pre-loaded Pipeline Snapshot and count items currently in the active pipeline (TODO + IN PROGRESS + IN REVIEW).
-    2. If the count is below the capacity limit (see system prompt): use the pre-loaded BACKLOG data from the snapshot to select the best unblocked item(s) and move them to TODO (up to the remaining capacity).
+    2. If the count is below the capacity limit (see system prompt): use the pre-loaded BACKLOG data from the snapshot to select ALL eligible unblocked items to fill remaining capacity completely — always move the maximum number possible.
     3. If already at or above capacity: exit immediately without taking action.
 
-hint: Only act if pipeline has capacity (items in TODO + IN PROGRESS + IN REVIEW < maxInFlightItems).
+hint: Only act if pipeline has capacity (items in TODO + IN PROGRESS + IN REVIEW < maxInFlightItems). When acting, ALWAYS fill ALL remaining capacity.
diff --git a/src/agents/prompts/templates/backlog-manager.eta b/src/agents/prompts/templates/backlog-manager.eta
index d31c80e8..70ef4baa 100644
--- a/src/agents/prompts/templates/backlog-manager.eta
+++ b/src/agents/prompts/templates/backlog-manager.eta
@@ -12,7 +12,7 @@ Use these EXACT IDs when calling `ListWorkItems` and `MoveWorkItem`:
 
 CRITICAL:
 1. **CHECK PIPELINE FIRST** - Count items in the active pipeline (TODO + IN PROGRESS + IN REVIEW) and compare to the capacity limit (<%= it.maxInFlightItems ?? 1 %>).
-2. **CAPACITY LIMIT** - <%= it.maxInFlightItems == null || it.maxInFlightItems === 1 ? 'Move exactly one ' + (it.workItemNoun || 'card') + ' per run. Never move multiple.' : 'Move up to ' + it.maxInFlightItems + ' ' + (it.workItemNounPlural || 'cards') + ' per run (only enough to fill remaining capacity).' %>
+2. **CAPACITY LIMIT** - <%= it.maxInFlightItems == null || it.maxInFlightItems === 1 ? 'Move exactly one ' + (it.workItemNoun || 'card') + ' per run. Never move multiple.' : 'You MUST fill ALL remaining capacity. Move up to ' + it.maxInFlightItems + ' ' + (it.workItemNounPlural || 'cards') + ' per run — always move as many eligible items as there are open slots.' %>
 3. **READ BEFORE SELECTING** - Read <%= it.workItemNoun || 'card' %> contents, descriptions, and checklists to make an informed decision.
 4. DO NOT MANAGE LABELS - Labels are handled automatically by the system.
 
@@ -34,7 +34,7 @@ A **Pipeline Snapshot** has been pre-loaded into your context containing the cur
    - Do NOT post any comments, do NOT scan the backlog
    - Simply end the session
 
-3. **Only if the active pipeline count is below the capacity limit**, proceed to backlog selection. The number of <%= it.workItemNounPlural || 'cards' %> you may move = capacity limit (<%= it.maxInFlightItems ?? 1 %>) minus current active count.
+3. **Only if the active pipeline count is below the capacity limit**, proceed to backlog selection. You MUST move exactly min(remaining_capacity, eligible_unblocked_items) <%= it.workItemNounPlural || 'cards' %>. Remaining capacity = capacity limit (<%= it.maxInFlightItems ?? 1 %>) minus current active count. If 2 open slots and 2 eligible items exist, move BOTH.
 
 Note: DONE and MERGED <%= it.workItemNounPlural || 'cards' %> are completed work and do not block new work from being selected. The snapshot shows their titles and URLs for dependency checking.
 
@@ -53,7 +53,7 @@ When the active pipeline has capacity:
    - Comments indicating external dependencies
    - **Stale annotations**: Text like "(not yet merged)" in a description was written when the <%= it.workItemNoun || 'card' %> was created and is **always stale**. Do NOT use it as evidence of blocked status — only the MERGED list itself is authoritative.
    - **IMPORTANT — MERGED check**: Before declaring a <%= it.workItemNoun || 'card' %> blocked, scan the MERGED section of the Pipeline Snapshot. Use **substring matching**: if the dependency name (e.g., "SCMIntegration", "OpenCodeEngine", "integrationRoles") appears anywhere within a MERGED title, that dependency is **resolved** and does NOT block. Each MERGED entry also shows its URL in parentheses — if the description references a <%= it.pmName || 'PM' %> link, match it against the URL too. A module or class name found anywhere in a title counts as a match.
-4. **Select the best unblocked <%= it.workItemNoun || 'card' %>(s)** considering:
+4. **Select ALL eligible unblocked <%= it.workItemNounPlural || 'cards' %> up to remaining capacity** considering:
    - Smaller, self-contained <%= it.workItemNounPlural || 'cards' %> are preferred
    - <%= it.workItemNounPluralCap || 'Cards' %> with clear acceptance criteria
    - <%= it.workItemNounPluralCap || 'Cards' %> that don't reference incomplete work
@@ -100,7 +100,9 @@ Manual intervention may be needed to unblock the backlog.
 - NEVER move <%= it.workItemNounPlural || 'cards' %> if the active pipeline is at capacity (<%= it.maxInFlightItems ?? 1 %> item(s))
 - EXIT SILENTLY if pipeline is at capacity - do not post comments
 - ALWAYS read <%= it.workItemNoun || 'card' %> contents before making a selection decision
-- <%= it.maxInFlightItems == null || it.maxInFlightItems === 1 ? 'ALWAYS move exactly ONE ' + (it.workItemNoun || 'card') + ' per run' : 'Move only as many ' + (it.workItemNounPlural || 'cards') + ' as needed to reach capacity (limit: ' + it.maxInFlightItems + ')' %>
+- <%= it.maxInFlightItems == null || it.maxInFlightItems === 1 ? 'ALWAYS move exactly ONE ' + (it.workItemNoun || 'card') + ' per run' : 'ALWAYS maximize throughput — fill ALL capacity slots with eligible items (limit: ' + it.maxInFlightItems + '). Never move fewer when eligible items exist.' %>
+<% if ((it.maxInFlightItems ?? 1) > 1) { %>- MAXIMIZE THROUGHPUT — if remaining capacity is <%= it.maxInFlightItems %> and <%= it.maxInFlightItems %>+ unblocked items exist, you MUST move <%= it.maxInFlightItems %> items, not fewer.
+<% } %>
 - ALWAYS post a comment BEFORE moving the <%= it.workItemNoun || 'card' %> — comment first, then move to TODO
 - CONSERVATIVE on detecting dependencies — when unsure if text implies a dependency, treat it as one. But GENEROUS on MERGED resolution — use substring matching and prefer resolved over blocked for ambiguous matches.
 - LOOK FOR dependency keywords: "blocked by", "depends on", "waiting for", "after", "requires"
diff --git a/src/triggers/github/pr-merged.ts b/src/triggers/github/pr-merged.ts
index 0f90b90a..0d145051 100644
--- a/src/triggers/github/pr-merged.ts
+++ b/src/triggers/github/pr-merged.ts
@@ -94,6 +94,7 @@ export class PRMergedTrigger implements TriggerHandler {
 					reason: capacityResult.reason,
 					inFlightCount: capacityResult.inFlightCount,
 					limit: capacityResult.limit,
+					availableSlots: capacityResult.availableSlots,
 				});
 			} else {
 				logger.info('Chaining to backlog-manager after PR merge', { workItemId, prNumber });
diff --git a/src/triggers/shared/agent-execution.ts b/src/triggers/shared/agent-execution.ts
index c72796d1..c3ffe7db 100644
--- a/src/triggers/shared/agent-execution.ts
+++ b/src/triggers/shared/agent-execution.ts
@@ -734,6 +734,7 @@ async function propagateAutoLabelAfterSplitting(
 				reason: capacityResult.reason,
 				inFlightCount: capacityResult.inFlightCount,
 				limit: capacityResult.limit,
+				availableSlots: capacityResult.availableSlots,
 			},
 		);
 		return null;
diff --git a/src/triggers/shared/backlog-check.ts b/src/triggers/shared/backlog-check.ts
index 074c3c7b..cb20459f 100644
--- a/src/triggers/shared/backlog-check.ts
+++ b/src/triggers/shared/backlog-check.ts
@@ -39,6 +39,14 @@ export interface PipelineCapacityResult {
 	inFlightCount?: number;
 	/** The effective capacity limit used for the comparison. */
 	limit?: number;
+	/**
+	 * Number of open slots available (limit - inFlightCount).
+	 * - `'backlog-empty'`: equals limit (pipeline has capacity but nothing to fill it)
+	 * - `'at-capacity'`: 0 (no open slots)
+	 * - `'below-capacity'`: limit - inFlightCount (slots waiting to be filled)
+	 * - `'error'` / `'misconfigured'`: undefined (cannot be computed)
+	 */
+	availableSlots?: number;
 }
 
 /**
@@ -113,8 +121,17 @@ export async function isPipelineAtCapacity(
 		// key, mapped to the provider's native identifier internally.
 		const backlogItems = await provider.listWorkItems(undefined, { status: 'backlog' });
 		if (backlogItems.length === 0) {
-			logger.info('isPipelineAtCapacity: backlog is empty', { projectId: project.id });
-			return { atCapacity: true, reason: 'backlog-empty', inFlightCount: 0, limit };
+			logger.info('isPipelineAtCapacity: backlog is empty', {
+				projectId: project.id,
+				availableSlots: limit,
+			});
+			return {
+				atCapacity: true,
+				reason: 'backlog-empty',
+				inFlightCount: 0,
+				limit,
+				availableSlots: limit,
+			};
 		}
 
 		const inFlightLists = await Promise.all(
@@ -129,11 +146,13 @@ export async function isPipelineAtCapacity(
 				projectId: project.id,
 				inFlightCount,
 				limit,
+				availableSlots: 0,
 			});
-			return { atCapacity: true, reason: 'at-capacity', inFlightCount, limit };
+			return { atCapacity: true, reason: 'at-capacity', inFlightCount, limit, availableSlots: 0 };
 		}
 
-		return { atCapacity: false, reason: 'below-capacity', inFlightCount, limit };
+		const availableSlots = limit - inFlightCount;
+		return { atCapacity: false, reason: 'below-capacity', inFlightCount, limit, availableSlots };
 	} catch (err) {
 		logger.warn('isPipelineAtCapacity: failed to check capacity, assuming not at capacity', {
 			projectId: project.id,
diff --git a/tests/unit/agents/prompts.test.ts b/tests/unit/agents/prompts.test.ts
index 34f58fc6..76f851ae 100644
--- a/tests/unit/agents/prompts.test.ts
+++ b/tests/unit/agents/prompts.test.ts
@@ -235,9 +235,26 @@ describe('system prompts content', () => {
 	it('backlog-manager prompt renders multi-item wording when limit>1', () => {
 		const prompt = getSystemPrompt('backlog-manager', { maxInFlightItems: 3 });
 		expect(prompt).toContain(
-			'Move up to 3 cards per run (only enough to fill remaining capacity).',
+			'You MUST fill ALL remaining capacity. Move up to 3 cards per run — always move as many eligible items as there are open slots.',
 		);
-		expect(prompt).toContain('Move only as many cards as needed to reach capacity (limit: 3)');
+		expect(prompt).toContain(
+			'ALWAYS maximize throughput — fill ALL capacity slots with eligible items (limit: 3). Never move fewer when eligible items exist.',
+		);
+	});
+
+	it('backlog-manager prompt includes maximize-throughput rule when limit>1', () => {
+		const prompt = getSystemPrompt('backlog-manager', { maxInFlightItems: 2 });
+		expect(prompt).toContain('MAXIMIZE THROUGHPUT');
+		expect(prompt).toContain('you MUST move 2 items, not fewer');
+		// Should NOT render the maximize-throughput rule for single-item mode
+		const promptSingle = getSystemPrompt('backlog-manager', { maxInFlightItems: 1 });
+		expect(promptSingle).not.toContain('MAXIMIZE THROUGHPUT');
+	});
+
+	it('backlog-manager prompt instructs exact count for multi-slot scenarios', () => {
+		const prompt = getSystemPrompt('backlog-manager', { maxInFlightItems: 2 });
+		expect(prompt).toContain('min(remaining_capacity, eligible_unblocked_items)');
+		expect(prompt).toContain('If 2 open slots and 2 eligible items exist, move BOTH');
 	});
 
 	it('backlog-manager prompt includes conflict awareness section when limit>1', () => {
diff --git a/tests/unit/triggers/shared/backlog-check.test.ts b/tests/unit/triggers/shared/backlog-check.test.ts
index 95943ac4..cd119647 100644
--- a/tests/unit/triggers/shared/backlog-check.test.ts
+++ b/tests/unit/triggers/shared/backlog-check.test.ts
@@ -107,6 +107,7 @@ describe('isPipelineAtCapacity', () => {
 			expect(result.reason).toBe('backlog-empty');
 			expect(result.inFlightCount).toBe(0);
 			expect(result.limit).toBe(1);
+			expect(result.availableSlots).toBe(1);
 		});
 
 		it('returns at-capacity when in-flight count equals limit (default 1)', async () => {
@@ -129,6 +130,7 @@ describe('isPipelineAtCapacity', () => {
 			expect(result.reason).toBe('at-capacity');
 			expect(result.inFlightCount).toBe(1);
 			expect(result.limit).toBe(1);
+			expect(result.availableSlots).toBe(0);
 		});
 
 		it('returns at-capacity when in-flight count exceeds limit', async () => {
@@ -166,6 +168,7 @@ describe('isPipelineAtCapacity', () => {
 			expect(result.reason).toBe('at-capacity');
 			expect(result.inFlightCount).toBe(3);
 			expect(result.limit).toBe(2);
+			expect(result.availableSlots).toBe(0);
 		});
 
 		it('returns below-capacity when in-flight count is below limit=3', async () => {
@@ -203,6 +206,7 @@ describe('isPipelineAtCapacity', () => {
 			expect(result.reason).toBe('below-capacity');
 			expect(result.inFlightCount).toBe(2);
 			expect(result.limit).toBe(3);
+			expect(result.availableSlots).toBe(1);
 		});
 
 		it('uses default limit=1 when maxInFlightItems is not set', async () => {
@@ -257,6 +261,7 @@ describe('isPipelineAtCapacity', () => {
 			expect(result.reason).toBe('below-capacity');
 			expect(result.inFlightCount).toBe(0);
 			expect(result.limit).toBe(5);
+			expect(result.availableSlots).toBe(5);
 		});
 
 		it('returns not-at-capacity (error fallback) when Trello API throws', async () => {
@@ -269,6 +274,7 @@ describe('isPipelineAtCapacity', () => {
 
 			expect(result.atCapacity).toBe(false);
 			expect(result.reason).toBe('error');
+			expect(result.availableSlots).toBeUndefined();
 			expect(mockLogger.warn).toHaveBeenCalledWith(
 				'isPipelineAtCapacity: failed to check capacity, assuming not at capacity',
 				expect.objectContaining({ projectId: trelloProject.id, error: expect.any(String) }),
@@ -283,6 +289,7 @@ describe('isPipelineAtCapacity', () => {
 
 			expect(result.atCapacity).toBe(false);
 			expect(result.reason).toBe('misconfigured');
+			expect(result.availableSlots).toBeUndefined();
 		});
 
 		it('returns misconfigured when Trello config is missing entirely', async () => {
@@ -293,6 +300,7 @@ describe('isPipelineAtCapacity', () => {
 
 			expect(result.atCapacity).toBe(false);
 			expect(result.reason).toBe('misconfigured');
+			expect(result.availableSlots).toBeUndefined();
 		});
 
 		it('counts items across todo, inProgress, and inReview lists', async () => {
@@ -331,6 +339,7 @@ describe('isPipelineAtCapacity', () => {
 			expect(result.reason).toBe('below-capacity');
 			expect(result.inFlightCount).toBe(6); // 2 + 1 + 3
 			expect(result.limit).toBe(10);
+			expect(result.availableSlots).toBe(4); // 10 - 6
 		});
 	});
 
@@ -371,6 +380,7 @@ describe('isPipelineAtCapacity', () => {
 			expect(result.reason).toBe('backlog-empty');
 			expect(result.inFlightCount).toBe(0);
 			expect(result.limit).toBe(1);
+			expect(result.availableSlots).toBe(1);
 		});
 
 		it('returns at-capacity when in-flight count equals limit=1', async () => {
@@ -394,6 +404,7 @@ describe('isPipelineAtCapacity', () => {
 			expect(result.reason).toBe('at-capacity');
 			expect(result.inFlightCount).toBe(1);
 			expect(result.limit).toBe(1);
+			expect(result.availableSlots).toBe(0);
 		});
 
 		it('returns below-capacity when in-flight count is less than limit=3', async () => {
@@ -432,6 +443,7 @@ describe('isPipelineAtCapacity', () => {
 			expect(result.reason).toBe('below-capacity');
 			expect(result.inFlightCount).toBe(2);
 			expect(result.limit).toBe(3);
+			expect(result.availableSlots).toBe(1);
 		});
 
 		it('returns at-capacity when in-flight count exceeds limit=2', async () => {
@@ -471,6 +483,7 @@ describe('isPipelineAtCapacity', () => {
 			expect(result.reason).toBe('at-capacity');
 			expect(result.inFlightCount).toBe(3);
 			expect(result.limit).toBe(2);
+			expect(result.availableSlots).toBe(0);
 		});
 
 		it('uses default limit=1 when maxInFlightItems is not set', async () => {
@@ -527,6 +540,7 @@ describe('isPipelineAtCapacity', () => {
 			expect(result.reason).toBe('below-capacity');
 			expect(result.inFlightCount).toBe(0);
 			expect(result.limit).toBe(5);
+			expect(result.availableSlots).toBe(5);
 		});
 
 		it('returns not-at-capacity (error fallback) when JIRA API throws', async () => {
@@ -540,6 +554,7 @@ describe('isPipelineAtCapacity', () => {
 
 			expect(result.atCapacity).toBe(false);
 			expect(result.reason).toBe('error');
+			expect(result.availableSlots).toBeUndefined();
 			expect(mockLogger.warn).toHaveBeenCalledWith(
 				'isPipelineAtCapacity: failed to check capacity, assuming not at capacity',
 				expect.objectContaining({ projectId: jiraProject.id, error: expect.any(String) }),
@@ -557,6 +572,7 @@ describe('isPipelineAtCapacity', () => {
 
 			expect(result.atCapacity).toBe(false);
 			expect(result.reason).toBe('misconfigured');
+			expect(result.availableSlots).toBeUndefined();
 		});
 
 		it('returns misconfigured when JIRA config has no projectKey', async () => {
@@ -570,6 +586,7 @@ describe('isPipelineAtCapacity', () => {
 
 			expect(result.atCapacity).toBe(false);
 			expect(result.reason).toBe('misconfigured');
+			expect(result.availableSlots).toBeUndefined();
 		});
 
 		it('returns misconfigured when JIRA config is missing entirely', async () => {
@@ -580,6 +597,7 @@ describe('isPipelineAtCapacity', () => {
 
 			expect(result.atCapacity).toBe(false);
 			expect(result.reason).toBe('misconfigured');
+			expect(result.availableSlots).toBeUndefined();
 		});
 	});
 
@@ -613,6 +631,7 @@ describe('isPipelineAtCapacity', () => {
 
 			expect(result.atCapacity).toBe(true);
 			expect(result.reason).toBe('backlog-empty');
+			expect(result.availableSlots).toBe(1);
 			expect(provider.listWorkItems).toHaveBeenCalledWith(undefined, { status: 'backlog' });
 		});
 
@@ -634,6 +653,7 @@ describe('isPipelineAtCapacity', () => {
 			expect(result.reason).toBe('below-capacity');
 			expect(result.inFlightCount).toBe(0);
 			expect(result.limit).toBe(1);
+			expect(result.availableSlots).toBe(1);
 		});
 
 		it('returns at-capacity when Linear in-flight count meets the limit', async () => {
@@ -651,6 +671,7 @@ describe('isPipelineAtCapacity', () => {
 			expect(result.atCapacity).toBe(true);
 			expect(result.reason).toBe('at-capacity');
 			expect(result.inFlightCount).toBe(1);
+			expect(result.availableSlots).toBe(0);
 		});
 
 		it('returns misconfigured when Linear has no statuses.backlog configured', async () => {
@@ -664,6 +685,7 @@ describe('isPipelineAtCapacity', () => {
 
 			expect(result.atCapacity).toBe(false);
 			expect(result.reason).toBe('misconfigured');
+			expect(result.availableSlots).toBeUndefined();
 			expect(provider.listWorkItems).not.toHaveBeenCalled();
 		});
 
@@ -678,6 +700,7 @@ describe('isPipelineAtCapacity', () => {
 
 			expect(result.atCapacity).toBe(false);
 			expect(result.reason).toBe('misconfigured');
+			expect(result.availableSlots).toBeUndefined();
 		});
 	});
 
diff --git a/tests/unit/worker-entry.test.ts b/tests/unit/worker-entry.test.ts
index 74b0853b..4b3686cd 100644
--- a/tests/unit/worker-entry.test.ts
+++ b/tests/unit/worker-entry.test.ts
@@ -516,6 +516,12 @@ describe('main() - environment variable validation', () => {
 	let exitSpy: ReturnType<typeof vi.spyOn>;
 
 	beforeEach(() => {
+		// Clear JOB_* env vars before each test — they may be inherited from the outer
+		// process (e.g. when running inside a CASCADE worker container). Tests that need
+		// specific values set them explicitly inside the test body; afterEach cleans up.
+		delete process.env.JOB_ID;
+		delete process.env.JOB_TYPE;
+		delete process.env.JOB_DATA;
 		exitSpy = vi.spyOn(process, 'exit').mockImplementation((code?) => {
 			throw new Error(`process.exit(${code ?? 0})`);
 		});

From 045d9dbbbb44f3d4b300a67ab90383f60d9d4d48 Mon Sep 17 00:00:00 2001
From: Zbigniew Sobiecki <zbigniew@sobiecki.name>
Date: Fri, 24 Apr 2026 12:04:31 +0200
Subject: [PATCH 5/5] =?UTF-8?q?feat(subprocess):=20observable=20subprocess?=
 =?UTF-8?q?=20helper=20=E2=80=94=20streaming=20+=20heartbeat=20+=20dual=20?=
 =?UTF-8?q?timeouts=20(#1177)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Spec 013. Replaces buffered spawn() runCommand() with execa + tree-kill: streams child output to parent stderr live, emits 30s silence heartbeats, enforces idle + wall-clock timeouts with SIGTERM→SIGKILL process-group kill, preserves captured hook output on success. createPR push/commit pass tighter explicit timeouts. oclif bootstrap warning silenced. 8338/8338 tests pass.
---
 CHANGELOG.md                                  |   2 +
 README.md                                     |   2 +
 bin/cascade-tools.js                          |   2 +-
 docs/cascade-directory.md                     |   6 +
 .../1-observable-subprocess-helper.md.done    | 302 +++++++++++++++
 .../_coverage.md                              |  32 ++
 .../013-subprocess-output-streaming.md.done   | 135 +++++++
 package-lock.json                             | 191 ++++++++-
 package.json                                  |   2 +
 src/gadgets/github/core/createPR.ts           |  64 ++-
 src/utils/repo.ts                             | 221 +++++++++--
 tests/unit/gadgets/github.test.ts             |  14 +-
 .../unit/gadgets/github/core/createPR.test.ts | 163 ++++++++
 tests/unit/utils/repo.test.ts                 | 364 ++++++++++++++++--
 14 files changed, 1421 insertions(+), 79 deletions(-)
 create mode 100644 docs/plans/013-subprocess-output-streaming/1-observable-subprocess-helper.md.done
 create mode 100644 docs/plans/013-subprocess-output-streaming/_coverage.md
 create mode 100644 docs/specs/013-subprocess-output-streaming.md.done

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8a60cb0f..b24fe938 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,8 @@ All notable user-visible changes to CASCADE are documented here. The format is l
 
 ### Changed
 
+- **`cascade-tools` now streams subprocess output live** (spec 013). The shared subprocess helper (on top of `execa` + `tree-kill`) forwards child stdout/stderr to the parent's stderr line-by-line as it arrives, emits a heartbeat line on stderr every 30 seconds of child silence (configurable), enforces both an idle-silence timeout (default 120s) and a wall-clock timeout (default 600s) with SIGTERM→SIGKILL escalation, and kills the full process tree on timeout. `git push` and `git commit` invoked by `scm create-pr` pass tighter per-caller timeouts and now return captured hook output in the result on success (previously discarded). Result shape is backward-compatible — `{ stdout, stderr, exitCode }` preserved; new optional `reason: 'idle-timeout' | 'wall-timeout'` surfaces when the helper killed the child. Motivation: LLM-driven CASCADE agents watching an output file could not distinguish a slow pre-push hook (~60s of silence) from a hung process, leading to retry loops that burned 5–10+ minutes of run budget. See [spec 013](docs/specs/013-subprocess-output-streaming.md).
+- **`cascade-tools` `command bootstrap not found` warning silenced** (spec 013). The oclif command-loader glob now excludes `bootstrap.js`, which is a side-effect import from `bin/cascade-tools.js`, not a command.
 - **Linear and JIRA checklists are now inline markdown, not sub-issues / subtasks.** Acceptance criteria, implementation steps, and other checklist items added by CASCADE agents (via `AddChecklist` / `AddChecklistItem`) now live as `- [ ]` / `- [x]` markdown checkboxes inside the parent issue's description, under a `### {Checklist Name}` heading. Previously these created full sub-issues (Linear) or subtasks (JIRA) — one per item — which cluttered boards and inflated backlog counts (a single split could create 30+ orphan items). The PMProvider interface is unchanged; only the Linear and JIRA adapter internals changed. Trello continues to use its native checklist API. Forward-only — existing sub-issues / subtasks created before this change are not migrated. See [spec 008](docs/specs/008-inline-checklists.md.done) and the new "Checklist implementation by provider" section in [src/integrations/README.md](src/integrations/README.md).
 
 ### Internal
diff --git a/README.md b/README.md
index ab1eb33a..7369b6c5 100644
--- a/README.md
+++ b/README.md
@@ -159,6 +159,8 @@ All project-level credentials (GitHub tokens, PM keys, LLM API keys) are stored
 
 **`.cascade/` directory** — Each target repository can include a `.cascade/` directory with hooks that control how the agent sets up the project, lints after edits, and runs tests. See **[`.cascade/` Directory Guide](./docs/cascade-directory.md)**.
 
+**Observable subprocesses** — `cascade-tools` streams child stdout/stderr live to the parent's stderr so LLM-driven agents can see progress as it happens, emits 30-second heartbeats during silent stretches, and enforces both idle-silence and wall-clock timeouts with SIGTERM→SIGKILL escalation across the full process tree. See [spec 013](./docs/specs/013-subprocess-output-streaming.md).
+
 For deeper documentation on all of these topics, see [CLAUDE.md](./CLAUDE.md).
 
 ---
diff --git a/bin/cascade-tools.js b/bin/cascade-tools.js
index 360389b9..261257a9 100755
--- a/bin/cascade-tools.js
+++ b/bin/cascade-tools.js
@@ -21,7 +21,7 @@ pjson.oclif = {
 	commands: {
 		strategy: 'pattern',
 		target: './dist/cli',
-		globPatterns: ['**/*.js', '!**/dashboard/**', '!**/_shared/**', '!base.js'],
+		globPatterns: ['**/*.js', '!**/dashboard/**', '!**/_shared/**', '!base.js', '!bootstrap.js'],
 	},
 	topicSeparator: ' ',
 };
diff --git a/docs/cascade-directory.md b/docs/cascade-directory.md
index cdabbb4f..6e31c4d5 100644
--- a/docs/cascade-directory.md
+++ b/docs/cascade-directory.md
@@ -214,3 +214,9 @@ The `env` file is committed to your repository. Keep secrets in CASCADE's creden
 ### Add `.cascade/context/` to `.gitignore`
 
 The `context/` subdirectory is managed entirely by CASCADE. There is nothing useful to commit there, and its contents can be large. Add it to `.gitignore` to keep your repository clean.
+
+### Observable subprocesses (spec 013)
+
+When CASCADE agents shell out via `cascade-tools` (most visibly `scm create-pr`, which invokes `git push` and runs your repo's pre-push hooks), output now streams to the agent's log as it arrives — not only at process exit. If your hook has a silent stretch (e.g. `tsc` running 30+ seconds without output), cascade-tools emits a heartbeat line every ~30 seconds so the agent can distinguish progress from hang.
+
+Two independent timeouts protect against genuine hangs: a per-call wall-clock (default 600s, tighter for `git push` and `git commit` to sit under the gadget's 240s budget) and an idle-silence timeout (default 120s — child emitting nothing at all). On timeout, cascade-tools sends SIGTERM to the full process tree (not just the direct child) and escalates to SIGKILL after a short grace window. This means `.cascade/setup.sh` scripts that legitimately take a while are safe as long as they emit occasional output; silent multi-minute stretches will be terminated.
diff --git a/docs/plans/013-subprocess-output-streaming/1-observable-subprocess-helper.md.done b/docs/plans/013-subprocess-output-streaming/1-observable-subprocess-helper.md.done
new file mode 100644
index 00000000..53fab259
--- /dev/null
+++ b/docs/plans/013-subprocess-output-streaming/1-observable-subprocess-helper.md.done
@@ -0,0 +1,302 @@
+---
+id: 013
+slug: subprocess-output-streaming
+plan: 1
+plan_slug: observable-subprocess-helper
+level: plan
+parent_spec: docs/specs/013-subprocess-output-streaming.md
+depends_on: []
+status: done
+---
+
+# 013/1: Observable subprocess helper
+
+> Part 1 of 1 in the 013-subprocess-output-streaming plan. See [parent spec](../../specs/013-subprocess-output-streaming.md).
+
+## Summary
+
+This plan is the full execution of spec 013. One cohesive change: replace the hand-rolled `spawn`-based `runCommand()` in `src/utils/repo.ts` with an `execa`-backed implementation that streams child output to the parent's stderr as it arrives, emits a heartbeat line every 30s during child silence, kills the child (and its descendants) on either an idle-silence timeout or a wall-clock timeout via a SIGTERM→SIGKILL ladder, and preserves captured stdout/stderr in the returned result on both success and failure. A second, trivially-adjacent change updates `bin/cascade-tools.js` to exclude `bootstrap.js` from the oclif command-loader glob, silencing the `command bootstrap not found` warning.
+
+**Value ships:** agents watching cascade-tools output stop perceiving long `git push`/hook runs as hangs; cascade-tools emits live progress and enforces clean termination for actually-stuck children; operator log output is cleaner. All nine spec ACs land here (AC 9 is `[manual]`).
+
+**What this plan does NOT change:** the `runCommand()` exported signature's existing fields (`{ stdout, stderr, exitCode }`) — they continue to appear in the result object (we only add an optional `reason` field for termination cause). No caller is required to pass the new `options` arg; defaults cover every existing callsite. Gadget-level 240s timeout and the dashboard/router/worker paths are untouched.
+
+**Components delivered:**
+- `package.json` — add `execa` and `tree-kill` to `dependencies`
+- `src/utils/repo.ts` — full rewrite of `runCommand()` on top of execa; new exported `RunCommandOptions` type; new optional `reason` field on result
+- `src/gadgets/github/core/createPR.ts` — `pushBranch()` and `stageAndCommit()` pass explicit tighter timeouts (push waits up to the gadget's 240s ceiling); success path returns captured hook output instead of dropping it
+- `bin/cascade-tools.js` — append `!bootstrap.js` to oclif `globPatterns`
+- `tests/unit/utils/repo.test.ts` — retool existing mocks (spawn → execa), add tests for streaming, heartbeat, idle timeout, wall timeout, tree-kill, kept-output-on-success
+- `tests/unit/gadgets/github/core/createPR.test.ts` — add assertions that pushBranch/stageAndCommit pass explicit timeout options and that success-path result exposes hook output
+
+**Deferred to later plans in this spec:**
+- None — 1-plan spec.
+
+---
+
+## Spec ACs satisfied by this plan
+
+- Spec AC #1 (live stderr streaming during hook runs) — **full**
+- Spec AC #2 (30s heartbeat on silence) — **full**
+- Spec AC #3 (idle-timeout kill with SIGTERM→SIGKILL) — **full**
+- Spec AC #4 (wall-clock kill with SIGTERM→SIGKILL) — **full**
+- Spec AC #5 (descendant / process-group kill) — **full**
+- Spec AC #6 (captured output preserved on success + failure) — **full**
+- Spec AC #7 (oclif `command bootstrap not found` warning gone) — **full**
+- Spec AC #8 (backward-compat result shape for all existing callers) — **full**
+- Spec AC #9 (agent end-to-end sees hook progress ≤30s, no retry loop) — **full `[manual]`** — see Manual Verification section
+
+---
+
+## Depends On
+
+None. Plan depends only on cascade's existing test + build toolchain and a working network for `npm install`.
+
+---
+
+## Detailed Task List (TDD)
+
+### 1. Add `execa` and `tree-kill` dependencies
+
+**No tests for this step directly** — dependency presence is verified indirectly by the helper tests below.
+
+**Implementation** (`package.json`):
+- Add `"execa": "^9.6.1"` to `dependencies`
+- Add `"tree-kill": "^1.2.2"` to `dependencies`
+- Add `"@types/tree-kill": "^1.2.2"` to `devDependencies` (tree-kill ships CJS; types live under `@types`)
+- Run `npm install` and commit the updated `package-lock.json`
+
+---
+
+### 2. Rewrite `runCommand()` on execa
+
+**Tests first** (`tests/unit/utils/repo.test.ts`):
+
+Retool the existing `vi.mock('node:child_process', ...)` block: keep it for residual callsites but remove the `spawn` mock and switch the `runCommand` tests to mock `execa` and `tree-kill` directly. Use `vi.mock('execa', ...)` returning a fake subprocess whose `stdout` / `stderr` are `Readable` streams the test can push to, with a `pid` number and a `then(...)` resolver representing the await of the subprocess. Use `vi.mock('tree-kill', ...)` returning a mock function.
+
+For each test specify name, type, setup, expected outcome, AND expected red.
+
+- `streams child stdout to parent stderr line-by-line as it arrives` — unit — mock execa subprocess pushes `"line1\n"`, then `"line2\n"` to `stdout`; spy on `process.stderr.write`; await helper call → both lines appear on the spy in order, BEFORE the subprocess resolves. Expected red: `AssertionError: expected process.stderr.write to have been called with "line1\n" but it was called [] (0 times)`.
+- `streams child stderr to parent stderr line-by-line` — unit — same shape as above but push to child `stderr` → parent stderr receives. Expected red: `AssertionError: expected process.stderr.write to have been called with "err1\n" but it was called [] (0 times)`.
+- `emits a heartbeat to parent stderr after N ms of child silence, citing elapsed time and command label` — unit — `vi.useFakeTimers()`; call helper with `heartbeatMs: 1000` and a label; subprocess emits no data; advance timers by 1000ms → a line matching `/\[git-push\] still running \(1s\)/` appears on parent stderr. Expected red: `AssertionError: expected process.stderr.write to have been called with match(/\[git-push\] still running/) but it was not`.
+- `resets the heartbeat timer when child emits output` — unit — fake timers; `heartbeatMs: 1000`; advance 900ms (no heartbeat yet); subprocess pushes `"tick\n"`; advance 1000ms more → only one heartbeat fires total (at 1900ms cumulative), not two. Expected red: `AssertionError: expected 1 heartbeat, got 2 (heartbeat at 1000ms was not cancelled by child output at 900ms)`.
+- `does not emit heartbeat when child exits before heartbeatMs elapses` — unit — `heartbeatMs: 10_000`; subprocess resolves after 100ms → zero heartbeat lines. Expected red: `AssertionError: expected 0 heartbeats, got 1`.
+- `kills the child via tree-kill with SIGTERM when idleTimeoutMs elapses with no output` — unit — fake timers; `idleTimeoutMs: 5000`; no child output; advance timers by 5000ms → tree-kill mock called with `(pid, 'SIGTERM')`; result `reason` is `'idle-timeout'`; `exitCode` is non-zero. Expected red: `AssertionError: expected tree-kill to have been called with ["<pid>", "SIGTERM"] but was not called`.
+- `escalates to SIGKILL after forceKillAfterMs if the child did not exit on SIGTERM` — unit — fake timers; idle-timeout fires SIGTERM; advance another 5000ms without the child exiting → tree-kill called a second time with `(pid, 'SIGKILL')`. Expected red: `AssertionError: expected tree-kill to be called 2 times (SIGTERM then SIGKILL), was called 1 time`.
+- `kills the child via tree-kill with SIGTERM when wallTimeoutMs elapses even with ongoing output` — unit — fake timers; `wallTimeoutMs: 5000`, `idleTimeoutMs: 100_000`; subprocess pushes data every 500ms (resets idle timer); advance 5000ms → tree-kill called with SIGTERM; result `reason` is `'wall-timeout'`. Expected red: `AssertionError: expected reason: "wall-timeout", got undefined (wall-clock timer not armed or not firing under fake-timer advance)`.
+- `returns captured stdout and stderr in the result on success` — unit — subprocess pushes `"ok\n"` on stdout then resolves with exit 0 → result is `{ stdout: "ok\n", stderr: "", exitCode: 0 }`. Expected red: `AssertionError: expected result.stdout to equal "ok\n", got "" (capture discarded)`.
+- `returns captured stdout and stderr in the result on non-zero exit` — unit — subprocess pushes `"failed\n"` on stderr then resolves with exit 1 → result exposes stderr contents AND exitCode 1. Expected red: `AssertionError: expected result.exitCode to equal 1, got 0` (or capture-discard variant).
+- `does not stream when options.silent is true` — unit — subprocess pushes data; spy on `process.stderr.write` → zero forwarded lines (heartbeat-suppression is a separate option but silent also suppresses streaming). Expected red: `AssertionError: expected process.stderr.write 0 calls, got 1`.
+- `backward-compatible signature: runCommand(cmd, args, cwd) returns { stdout, stderr, exitCode }` — unit — no options arg; mock subprocess; result has exactly the three-field shape plus optional `reason` undefined. Expected red: `TypeError: options is not defined` or shape-mismatch assertion.
+
+**Implementation** (`src/utils/repo.ts`):
+
+Replace the current `runCommand()` body (lines 70–103) with an execa-based implementation. Keep the existing exported signature; extend with a fifth optional `options` arg.
+
+- New exported type:
+
+```ts
+export type RunCommandOptions = {
+  /** Emit a heartbeat on parent stderr every N ms of child silence. Default 30_000. Set to 0 to disable. */
+  heartbeatMs?: number;
+  /** Kill child if no output for N ms. Default 120_000. Set to 0 to disable. */
+  idleTimeoutMs?: number;
+  /** Kill child after N ms of total runtime. Default 600_000. Set to 0 to disable. */
+  wallTimeoutMs?: number;
+  /** After SIGTERM, wait N ms before SIGKILL. Default 5_000. */
+  forceKillAfterMs?: number;
+  /** Short label emitted in heartbeat lines. Defaults to `command`. */
+  label?: string;
+  /** Suppress streaming and heartbeats. Capture-only. Default false. */
+  silent?: boolean;
+};
+
+export type RunCommandResult = {
+  stdout: string;
+  stderr: string;
+  exitCode: number;
+  /** Set when the child was killed by the helper's timeouts. Undefined on natural exit. */
+  reason?: 'idle-timeout' | 'wall-timeout';
+};
+```
+
+- Signature:
+
+```ts
+export async function runCommand(
+  command: string,
+  args: string[],
+  cwd: string,
+  env?: Record<string, string>,
+  options?: RunCommandOptions,
+): Promise<RunCommandResult>
+```
+
+- Internals:
+  - Spawn via `execa(command, args, { cwd, env: { ...process.env, ...env }, all: false, reject: false })`. `reject: false` makes execa return a `SubprocessResult` even on non-zero exits, matching the current contract.
+  - If `!silent`, attach listeners on `subprocess.stdout` and `subprocess.stderr` that (a) append chunks to capture buffers AND (b) write them to `process.stderr` as they arrive, and (c) reset the idle-timer.
+  - If `silent`, capture only; don't stream; don't heartbeat.
+  - Heartbeat: `setInterval` with `heartbeatMs` (default 30_000). Each tick checks "was there child output since the last tick?"; if not, write `[<label>] still running (<elapsed>s)\n` to `process.stderr`. Elapsed is rounded whole seconds since helper start.
+  - Idle timer: `setTimeout` with `idleTimeoutMs` (default 120_000), re-armed on every child-output chunk. On fire: call `tree-kill(subprocess.pid, 'SIGTERM')`, schedule `tree-kill(subprocess.pid, 'SIGKILL')` after `forceKillAfterMs` (default 5_000), set `reason = 'idle-timeout'`.
+  - Wall timer: `setTimeout` with `wallTimeoutMs` (default 600_000), NOT reset on child output. On fire: same termination sequence, set `reason = 'wall-timeout'`. If idle fires first, cancel wall; and vice versa.
+  - On natural `subprocess` resolution: clear all timers and intervals.
+  - Return `{ stdout, stderr, exitCode, reason }` where `exitCode` is the actual child exit code (or `-1` if killed by SIGKILL and execa surfaces a signal instead of a code — map to non-zero).
+
+- Do not change `cleanupTempDir`, `createTempDir`, or any other function in this file.
+
+**Leave room for refactor on green** (per TDD anti-patterns): the `Implementation` specifies the CONTRACT and observable behavior, not every line. If `/implement` finds a cleaner arrangement (e.g., lifting the timer bundle into a small helper class) during refactor-on-green, it should do so.
+
+---
+
+### 3. Tighten timeouts on slow callsites in `createPR`
+
+**Tests first** (`tests/unit/gadgets/github/core/createPR.test.ts`):
+
+Add to the existing test file (do not create a new one). Mock the `runCommand` import at the top of the file.
+
+- `pushBranch passes an explicit wallTimeoutMs below the gadget's 240s ceiling` — unit — call `createPR()` with the push path reaching `pushBranch`; assert `runCommand` mock was called for `['git', 'push', ...]` with a 5th-arg options object whose `wallTimeoutMs` is ≤ 230_000. Expected red: `AssertionError: expected runCommand to be called with options.wallTimeoutMs ≤ 230000, got undefined`.
+- `pushBranch passes an explicit idleTimeoutMs` — unit — same shape; assert options `idleTimeoutMs` is a finite number (e.g., 90_000). Expected red: `AssertionError: expected options.idleTimeoutMs to be a number, got undefined`.
+- `pushBranch result carries the captured hook output even on success` — unit — mock `runCommand` to resolve `{ stdout: "hook stdout", stderr: "hook stderr", exitCode: 0 }`; drive `createPR`; assert the result object exposes (via an added field on `CreatePRResult`, e.g., `pushOutput`) the captured stdout+stderr. Expected red: `AssertionError: expected result.pushOutput to be defined, got undefined`.
+- `stageAndCommit result carries the captured hook output even on success` — unit — same shape for the commit path via `commitOutput`. Expected red: as above.
+
+**Implementation** (`src/gadgets/github/core/createPR.ts`):
+- `pushBranch()`: pass `{ label: 'git-push', wallTimeoutMs: 230_000, idleTimeoutMs: 90_000 }` to `runCommand`. On success, return the captured `{ stdout, stderr }` to the caller rather than discarding.
+- `stageAndCommit()`: pass `{ label: 'git-commit', wallTimeoutMs: 120_000, idleTimeoutMs: 60_000 }`. On success, return captured output.
+- Extend the `CreatePRResult` type with optional `pushOutput?: string` and `commitOutput?: string`. Populate them in `createPR()` from the two helpers' captured output. The sidecar writer preserves whatever fields are present — no schema change to the sidecar needed.
+- Leave the fast callsites (`git remote get-url`, `git add`, `git status`, `git ls-remote`, `git ls-files`) with NO options arg. Defaults cover them.
+
+---
+
+### 4. Exclude bootstrap.js from oclif command glob
+
+**Tests first** — there is no unit test for an oclif config one-liner; rely on the build + a smoke check in Manual Verification.
+
+**Implementation** (`bin/cascade-tools.js`):
+- Current: `globPatterns: ['**/*.js', '!**/dashboard/**', '!**/_shared/**', '!base.js']`
+- After: `globPatterns: ['**/*.js', '!**/dashboard/**', '!**/_shared/**', '!base.js', '!bootstrap.js']`
+
+Single-character diff. Do not move or rename `src/cli/bootstrap.ts`; its side-effect import from `bin/cascade-tools.js` line 11 is load-bearing and must continue to fire.
+
+---
+
+### 5. Adjust existing tests that still mock `spawn` via the shared helper
+
+If any other existing test in `tests/unit/` breaks because it was asserting the old buffered-capture behavior (e.g., expecting zero writes to parent stderr), update those tests to accommodate the new default of streaming-on. Do not change the tests' underlying intent; just align expectations.
+
+- `tests/unit/agents/utils/setup.test.ts` — uses `runCommand` for bash setup-script; if it asserted on `process.stderr`, add the expected forwarded content.
+- `tests/unit/agents/shared/repository.test.ts` — uses `runCommand` for fetch/checkout/rev-parse/reset; same adjustment.
+- `tests/unit/gadgets/github.test.ts` — if it asserts createPR flow.
+
+Only change what's necessary; no structural refactors.
+
+---
+
+### 6. Documentation updates
+
+**Implementation**:
+
+- `README.md` — in the relevant "what cascade-tools provides" or dependency mention, add a short note: "As of spec 013, cascade-tools streams all subprocess output live to parent stderr, emits heartbeats on silence, and enforces idle + wall-clock timeouts with process-group kill."
+- `docs/cascade-directory.md` — under the `setup.sh` / `ensure-services.sh` discussion (or add a short new subsection), clarify that cascade-tools streams hook output live and has a per-subprocess wall-clock timeout independent of the gadget 240s; if a hook genuinely hangs, it will be killed after the idle-silence timeout.
+- `CHANGELOG.md` — add an entry under the next unreleased section: "Spec 013: `cascade-tools` subprocess helper now streams child output live, emits 30s-silence heartbeats, enforces idle + wall-clock timeouts with process-group kill, and preserves captured hook output on success; oclif `command bootstrap not found` warning silenced."
+
+---
+
+## Test Plan
+
+### Unit tests
+- [ ] `tests/unit/utils/repo.test.ts`: 12 retooled/new tests covering streaming, heartbeat, idle timeout, wall timeout, tree-kill, force-kill escalation, capture preservation, silent mode, backward-compat signature
+- [ ] `tests/unit/gadgets/github/core/createPR.test.ts`: 4 new assertions for pushBranch/stageAndCommit options + captured-output-on-success
+- [ ] Existing test alignment in `tests/unit/agents/utils/setup.test.ts`, `tests/unit/agents/shared/repository.test.ts`, `tests/unit/gadgets/github.test.ts` as needed
+
+### Integration tests
+- None directly added by this plan. Cascade's existing integration suite exercises `runCommand` through real subprocess invocations on the integration-test DB; if any integration test asserts on stderr absence during a `runCommand` call, update as needed in step 5.
+
+### Acceptance tests
+- AC #1–#8 covered by the unit test battery above.
+- AC #9 is `[manual]` — see Manual Verification below.
+
+---
+
+## Manual Verification (for `[manual]`-tagged ACs only)
+
+- **AC**: spec AC #9 (end-to-end agent behavior: agent sees hook progress within ~30s and does not enter a retry loop)
+- **Why manual**: requires triggering a real CASCADE agent run against a target repo with a multi-second pre-push hook and observing the agent's `Monitor` tool output file over time. Cascade's unit and integration suites do not exercise LLM-agent behavior.
+- **Verification protocol**:
+  1. Ensure cascade's dev router + dashboard are running locally, and that the `ucho` project is registered with its GitHub tokens and Linear credentials.
+  2. Create or identify a Linear issue on the `ucho` team tagged to trigger the `implementation` agent (any simple change — e.g., "update a README typo"). Move the issue to the trigger status.
+  3. The CASCADE router enqueues the run; a worker picks it up.
+  4. In a separate shell, monitor the run: `~/Code/cascade/bin/cascade.js runs logs <runId>` — follow the `cascadeLog` output.
+  5. When the agent reaches the `cascade-tools scm create-pr` step, observe in the log:
+     - Within the first ~10s after the tool call, the child's stdout/stderr from `git push` and the pre-push hook start appearing on stderr in the run log (e.g., `Starting pre-push hook`, `typecheck…`, `test:run…`).
+     - If the hook has a silent stretch, a heartbeat line in the form `[git-push] still running (30s)` (or similar) appears every ~30s.
+     - The agent does NOT retry `cascade-tools scm create-pr`; the tool call completes in a single invocation.
+     - The PR is successfully created; its URL appears on stdout in the final `{ success: true, data: { prUrl: ... } }` line.
+  6. Mark AC #9 verified if all four observable outcomes in step 5 are met.
+
+If any fail, do not mark verified; investigate root cause and file a follow-up.
+
+---
+
+## Acceptance Criteria (per-plan, testable)
+
+1. `runCommand()` streams child stdout and stderr to parent stderr as they arrive (tested).
+2. `runCommand()` emits a heartbeat line to parent stderr after `heartbeatMs` ms of child silence, including the command label and elapsed seconds; resets on child output (tested).
+3. `runCommand()` kills the child (and its process tree) via `tree-kill` SIGTERM when `idleTimeoutMs` elapses without output, escalates to SIGKILL after `forceKillAfterMs`, and returns `reason: 'idle-timeout'` (tested).
+4. `runCommand()` kills the child (and its process tree) on `wallTimeoutMs` elapsed and returns `reason: 'wall-timeout'`, regardless of ongoing output (tested).
+5. `runCommand()` preserves captured stdout and stderr in its return on both success and non-zero exit; `silent: true` suppresses streaming/heartbeats but not capture (tested).
+6. `runCommand()` is backward-compatible: callers that pass no `options` receive `{ stdout, stderr, exitCode }` (tested).
+7. `pushBranch()` passes `wallTimeoutMs ≤ 230_000` and a finite `idleTimeoutMs`; returns captured hook output on success path (tested).
+8. `stageAndCommit()` passes explicit timeouts and returns captured output (tested).
+9. Running `cascade-tools --help` or any other subcommand produces no `@oclif … command bootstrap not found` warning on stderr (verified via a simple post-build smoke script or manual invocation — documented in Manual Verification).
+10. All new/modified code has corresponding tests.
+11. `npm run build` passes.
+12. `npm test` passes (all 4 unit projects).
+13. `npm run typecheck` passes.
+14. `npm run lint` passes.
+15. All documentation listed in Documentation Impact has been updated.
+
+---
+
+## Documentation Impact (this plan only)
+
+| File | Change |
+|---|---|
+| `README.md` | Add short note about cascade-tools' live-streaming + heartbeat + timeout behavior in the relevant architecture/cascade-tools section |
+| `docs/cascade-directory.md` | Clarify that cascade-tools streams hook output live; note the idle- and wall-clock termination guarantees from cascade's side |
+| `CHANGELOG.md` | Unreleased entry: "spec 013: observable subprocess helper" with a one-paragraph summary of new default behavior |
+
+---
+
+## Out of Scope (this plan)
+
+- Auditing or replacing direct `execSync` / `spawnSync` callsites outside the shared subprocess helper (spec Out of Scope).
+- Lefthook / target-repo hook configuration (spec Out of Scope).
+- Dashboard, router, worker, or gadget-level timeout changes (spec Non-goals).
+- Windows platform support (spec Out of Scope).
+- TUI progress UIs (spec Non-goals).
+- Changing the gadget success/error JSON shape emitted on stdout (spec Non-goals).
+
+---
+
+## Progress
+
+<!-- /implement updates these as it works. Do not edit manually. -->
+- [x] AC #1 — streaming (2 tests in `tests/unit/utils/repo.test.ts`)
+- [x] AC #2 — heartbeat (3 tests — emit on silence, reset on output, no-op on fast commands)
+- [x] AC #3 — idle-timeout SIGTERM→SIGKILL (2 tests)
+- [x] AC #4 — wall-timeout (1 test)
+- [x] AC #5 — descendant kill via tree-kill (covered by the kill tests asserting tree-kill is called)
+- [x] AC #6 — captured output preserved on success + non-zero exit + silent mode (3 tests)
+- [x] AC #7 — pushBranch timeouts + pushOutput in result (3 tests in `tests/unit/gadgets/github/core/createPR.test.ts`)
+- [x] AC #8 — stageAndCommit timeouts + commitOutput (2 tests)
+- [x] AC #9 — oclif `command bootstrap not found` warning silenced (verified live via `node bin/cascade-tools.js --help` — zero warning lines)
+- [x] AC #10 — all new code has tests
+- [x] AC #11 — `npm run build` passes
+- [x] AC #12 — `npm test` passes (8338/8338, 23 skipped pre-existing)
+- [x] AC #13 — `npm run typecheck` passes (clean)
+- [x] AC #14 — `npm run lint` passes (clean)
+- [x] AC #15 — docs updated: `README.md`, `docs/cascade-directory.md`, `CHANGELOG.md`
+
+### Spec AC #9 (agent end-to-end) — `[manual]` — DEFERRED
+
+The Manual Verification protocol in this plan requires triggering a live CASCADE agent run against a target repo with a multi-second pre-push hook and observing the Monitor output file over ~60s. That cannot be executed inside the `/implement` run: it requires the cascade dashboard/router/worker running in the user's environment, an inbound Linear/Trello/JIRA webhook, and an agent-driven `cascade-tools scm create-pr` call. **Recommended next step**: after this plan lands on `dev` and a worker picks up the updated image, monitor the next real implementation-agent run against ucho (or any project with a slow pre-push hook) and confirm the four observable outcomes listed in the Manual Verification protocol.
diff --git a/docs/plans/013-subprocess-output-streaming/_coverage.md b/docs/plans/013-subprocess-output-streaming/_coverage.md
new file mode 100644
index 00000000..d6790448
--- /dev/null
+++ b/docs/plans/013-subprocess-output-streaming/_coverage.md
@@ -0,0 +1,32 @@
+# Coverage map for spec 013-subprocess-output-streaming
+
+Auto-generated by /plan. Tracks which plans satisfy which spec ACs.
+
+## Spec ACs
+
+| # | Spec AC (short) | Satisfied by | Status |
+|---|---|---|---|
+| 1 | Live stderr streaming during long subprocess runs | plan 1 (observable-subprocess-helper) | full |
+| 2 | 30s heartbeat on silence | plan 1 | full |
+| 3 | Idle-timeout SIGTERM→SIGKILL with process-group kill | plan 1 | full |
+| 4 | Wall-clock timeout SIGTERM→SIGKILL with process-group kill | plan 1 | full |
+| 5 | Descendant / process-tree kill via tree-kill | plan 1 | full |
+| 6 | Captured output preserved on success + failure | plan 1 | full |
+| 7 | No `@oclif command bootstrap not found` warning | plan 1 | full |
+| 8 | Backward-compat result shape for all existing callers | plan 1 | full |
+| 9 | Agent sees hook progress ≤30s, no retry loop | plan 1 | full `[manual]` |
+
+## Coverage summary
+
+- **9 spec ACs** mapped to **1 plan**
+- **1 plan** with full-coverage ACs (testable in isolation)
+- **0 plans** with partial-coverage ACs
+- **1 `[manual]`-tagged AC** (AC #9) — protocol lives in plan 1's Manual Verification section
+
+## Plan dependency graph
+
+```
+1-observable-subprocess-helper
+```
+
+Single-plan spec. No dependencies between plans (there are no other plans).
diff --git a/docs/specs/013-subprocess-output-streaming.md.done b/docs/specs/013-subprocess-output-streaming.md.done
new file mode 100644
index 00000000..47a8b8e6
--- /dev/null
+++ b/docs/specs/013-subprocess-output-streaming.md.done
@@ -0,0 +1,135 @@
+---
+id: 013
+slug: subprocess-output-streaming
+level: spec
+title: Live subprocess output + heartbeats + idle-timeout in cascade-tools
+created: 2026-04-24
+status: done
+---
+
+# 013: Live subprocess output + heartbeats + idle-timeout in cascade-tools
+
+## Problem & Motivation
+
+An LLM-driven CASCADE agent inside a worker container shells out to `cascade-tools` for git operations. When the agent runs something that triggers user-defined git hooks — most commonly `cascade-tools scm create-pr`, which invokes `git push` and thereby the target repo's `pre-push` hook — the child process may take tens of seconds (typecheck + unit tests are common). Throughout that time, the agent sees **nothing**: no stdout, no stderr, no log lines, an empty output file.
+
+The agent is watching the output file via its `Monitor` tool. An empty file for 60 seconds is indistinguishable from "hung subprocess". The agent reaches that conclusion, kills its own wrapper, retries, sometimes retries again, and burns 5–10 minutes of its 30-minute budget before giving up or completing the task late. This reliably takes runs over the wire.
+
+Two recent failed CASCADE runs on the `ucho` project isolated the cause:
+
+- **MNG-287** (`f60a8ae6`): `implementation` agent timed out at 29m59s. Core loss: push silent during lefthook's `pnpm typecheck` + `pnpm test:run` (~60s), then the push eventually failed on an unrelated `git diff HEAD main` lefthook config issue. The silence multiplied the cost of the failure.
+- **MNG-290** (`32f81472`): `implementation` agent editing README.md completed all code changes, ran checks clean, then hung for 7+ minutes on `cascade-tools scm create-pr`. The agent spent dozens of tool calls reading an empty output file, trying alternate invocations, checking git state, never realizing the push was actually progressing.
+
+Tracing the cascade-tools code path: the shared subprocess helper fully buffers child stdout and stderr into in-memory strings and emits them to the caller **only when the subprocess exits**. On the success branch, captured output is **discarded entirely** — agents never see what the hook actually ran. There is also no per-subprocess wall-clock timeout and no kill-on-silence; the only timeout is a gadget-level 240s that doesn't actually kill the child.
+
+This spec turns the subprocess path from a silent black box into an observable pipe with sane liveness and termination semantics.
+
+---
+
+## Goals
+
+- A CASCADE agent watching `cascade-tools`'s output file sees live, line-buffered subprocess output for any command cascade-tools shells out to — git, installers, test runners invoked via hooks, etc.
+- During stretches where the subprocess emits nothing, cascade-tools surfaces a human-readable heartbeat (elapsed time + command identifier) to stderr at a regular cadence so the agent can distinguish "still working" from "genuinely hung".
+- Subprocesses that produce no output for too long are terminated cleanly (SIGTERM with an escalation to SIGKILL) rather than waiting for an outer wall-clock or hanging the agent's entire run.
+- Subprocesses that exceed a wall-clock budget are also terminated cleanly, with the same escalation path.
+- Process-group termination: killing a subprocess also kills its descendants (relevant because hooks spawn subshells that spawn test runners that spawn more processes).
+- After a long subprocess finishes — success or failure — the captured output is preserved in the gadget's result, not silently dropped.
+- Cosmetic: stop emitting the oclif `command bootstrap not found` warning on every invocation. It's unrelated noise that adds confusion to failure-triage log excerpts.
+
+---
+
+## Non-goals
+
+- Rewriting gadget-level timeout semantics or the 240s gadget budget.
+- Adding a TTY-style progress UI (spinners, bars). Output is a plain file read by an LLM; human-style TUI is out of scope.
+- Persisting full subprocess transcripts to long-term storage. Preservation is per-call, in the returned result.
+- Changing the shape of gadget success/error JSON on stdout. The final `{ success, data }` line stays on stdout; streaming goes to stderr.
+- Auditing every shell-out in cascade to see whether it should go through the shared subprocess wrapper. The shared wrapper is the target of this spec; direct `execSync` / `spawnSync` callsites found along the way are out of scope unless trivially adjacent.
+- Solving LLM-agent timeouts in general. This spec solves the "cascade-tools appears hung" failure mode; other agent flakiness is separate.
+
+---
+
+## Constraints
+
+- Must work in the CASCADE worker container today: Node 22, Debian 12 bookworm, non-root `node` user with passwordless sudo.
+- Must not break the existing gadget result contract. Every current caller reading `{ stdout, stderr, exitCode }` continues to work; streaming is additive.
+- Must not corrupt the final-line stdout JSON that the gadget factory emits. Streaming goes to stderr so stdout-parsers aren't affected.
+- Process-group kill must be cross-platform enough to work on Linux workers and MacOS dev machines (Windows is not a cascade deployment target).
+- Dependency budget: acceptable to add two small, well-maintained npm packages. Not acceptable to add a heavy framework.
+- Backward compatibility: existing tests that assert on `runCommand` result shape must continue to pass without refactoring, other than adjustments to account for new default behavior where it legitimately changes observable output (e.g. stderr now includes forwarded child output during the call).
+
+---
+
+## User stories / Requirements
+
+1. **As the CASCADE agent running `cascade-tools scm create-pr` on a repo with slow pre-push hooks**, I see hook output line-by-line as it is produced so I can distinguish progress from hang.
+2. **As the CASCADE agent**, during silent stretches I see a heartbeat entry on stderr every ~30 seconds including elapsed time and a short command identifier, so my `Monitor` tool does not misread silence as failure.
+3. **As the CASCADE agent waiting on a genuinely stuck subprocess**, the subprocess is killed after a configurable idle period (no output for N seconds), and the kill propagates to descendants so no zombies remain.
+4. **As the CASCADE agent waiting on a subprocess that is actively emitting output but has exceeded its overall budget**, the subprocess is killed at the wall-clock boundary with the same descendant-kill guarantee.
+5. **As an operator reading cascade-tools logs after a successful push**, I can see what the hook actually ran — the captured output is available in the result, not discarded.
+6. **As an operator invoking `cascade-tools` for any subcommand**, I do not see an `@oclif command bootstrap not found` warning preceding my output. The warning is gone.
+
+---
+
+## Research Notes
+
+- **Idle-timeout is the canonical CI-runner solution to the silent-subprocess problem.** Travis CI kills builds that produce no output for 10 min and provides `travis_wait` to emit keepalive dots when a user knows a command will be quiet (Travis CI [common build problems](https://docs.travis-ci.com/user/common-build-problems/#build-times-out-because-no-output-was-received)). Buildkite and CircleCI use the same no-output-kill pattern. GitHub Actions doesn't need it because its runner agent streams logs live. An LLM agent watching a file is functionally the same problem Travis solves: needs line-buffered forwarding plus optional heartbeat on quiet.
+- **`execa` (Sindre Sorhus / ehmicky)** is the de-facto Node subprocess library: ~140M downloads/month, MIT, streams by default, tee-to-terminal pattern via `stdout: ['pipe', 'inherit']`, built-in SIGTERM→SIGKILL ladder via `forceKillAfterDelay`. Termination docs: [execa/docs/termination.md](https://github.com/sindresorhus/execa/blob/main/docs/termination.md). Streaming: [execa/docs/streams.md](https://github.com/sindresorhus/execa/blob/main/docs/streams.md). Solves ~90% of what this spec needs.
+- **`tree-kill`** is the canonical cross-platform process-tree killer for Node; necessary because Node's built-in single-process kill only signals the direct child, and lefthook / test-runner chains create deep process trees. Pair with execa.
+- **Heartbeat-on-silence is the custom piece.** execa doesn't emit one; it must be implemented on top as a `setInterval` that fires only when no child output has flushed since the last tick. Straightforward but needs care in test doubles.
+
+---
+
+## Open Source Decisions
+
+| Tool | Solves | Decision | Reason |
+|------|--------|----------|--------|
+| [`execa`](https://github.com/sindresorhus/execa) | Subprocess spawning with streaming + kill-ladder + encoding + cross-platform quoting | **Use** | Industry default; owners maintain most of Node's subprocess tooling. Streaming and `forceKillAfterDelay` cover the primary goals out of the box. |
+| [`tree-kill`](https://github.com/pkrumins/node-tree-kill) | Cross-platform process-tree termination | **Use** | Node's built-in single-process kill doesn't reach grandchildren; hooks spawn grandchildren; without this, SIGTERM of the git process leaves orphaned test runners. |
+| `nano-spawn`, `tinyspawn`, `zx`, `listr2`, `ora` | Alternative subprocess / task-runner tools | **Skip** | `nano-spawn` / `tinyspawn` are minimalist replacements — not worth trading execa's battle-testing for a few KB. `zx` is a shell DSL, wrong abstraction. `listr2` / `ora` need a TTY; output sink here is a file read by an LLM. |
+| Custom heartbeat ticker | Periodic "still alive" line when child is silent | **Build** | Not available in any library at the granularity this spec needs (per-subprocess, cadence-configurable, idle-aware). Thin wrapper on `setInterval`. |
+
+---
+
+## Strategic decisions
+
+1. **Adopt `execa` + `tree-kill` rather than extend the hand-rolled subprocess wrapper.** Rebuild the shared subprocess helper on top of execa. Reason: streaming, kill-ladder, and encoding handling are all bug-prone when home-rolled, and execa has solved them in the wild for years. Dependency cost is small (two packages, tiny total footprint) and matches the kind of infra dep cascade already uses.
+2. **Enforce BOTH wall-clock and idle-silence timeouts.** Wall-clock (per-caller, default near the gadget 240s ceiling) is the outer safety net. Idle-silence (per-caller, default in the tens of seconds) catches genuinely wedged children early instead of burning the full wall-clock. A subprocess that emits anything on any tick resets the idle timer. Both terminations go through the SIGTERM→SIGKILL ladder with process-group kill.
+3. **Heartbeat cadence: 30 seconds of silence.** Frequent enough that the agent sees activity inside a typical test-suite run, infrequent enough to avoid spamming short commands (which complete before the first heartbeat ever fires). Configurable per-call; 30s is the default.
+4. **Apply live streaming + heartbeat + idle-timeout to every caller of the shared subprocess helper.** Not scoped to just `git push`. Clone, fetch, checkout, setup-script invocation, and any future callsite all benefit; the cost on fast callsites (ms-scale operations) is zero because the heartbeat never fires and streaming is a no-op on empty output. Per-caller timeouts and heartbeat interval are configurable with safe defaults; callers who want different numbers set them.
+5. **Stream to stderr; keep stdout pristine for the final JSON result line.** The gadget factory emits `{ success, data }` as a single JSON line on stdout at the end. Streaming subprocess output to stdout would corrupt parsers. Stderr is the conventional place for progress, and agents reading the output file can see both streams.
+6. **Preserve captured output in the returned result on both success and failure.** Today, a successful `git push` throws away the hook output. Keeping it gives agents and operators post-hoc insight without re-running anything.
+7. **Silence the `oclif command bootstrap not found` warning via the command-loader glob**, not by renaming or relocating the bootstrap module. The module's side-effect import at the entry point is load-bearing (it registers all integrations before oclif starts); excluding it from the command glob is the single-line change. Renaming touches more code and risks breaking the side-effect order.
+
+---
+
+## Acceptance Criteria (outcome-level)
+
+1. Running any `cascade-tools` subcommand that invokes git against a repo with a slow pre-push hook produces incremental stderr output visible in the process's output file while the hook runs, not only at exit.
+2. During any stretch where the subprocess emits no output for ≥30 seconds, cascade-tools writes a heartbeat line to stderr identifying the command and cumulative elapsed time, and continues writing one heartbeat every further 30 seconds of silence until output resumes or the command terminates.
+3. A subprocess that emits nothing for longer than its configured idle-silence timeout is terminated with SIGTERM and escalated to SIGKILL if it does not exit within a short grace window. The cascade-tools invocation returns a non-zero exit code with both the captured output and a clear error indicating idle timeout.
+4. A subprocess that exceeds its configured wall-clock timeout is terminated with the same SIGTERM→SIGKILL escalation and returns a non-zero exit code with captured output and a clear wall-clock error.
+5. Termination of a subprocess kills its descendants. No processes spawned by the subprocess (test runners, subshells, etc.) survive after cascade-tools returns.
+6. `cascade-tools scm create-pr` — or any gadget that pushes — returns captured stdout/stderr in its result on both success and failure paths. A successful push that ran a hook preserves the hook's output.
+7. `cascade-tools --help`, `cascade-tools scm create-pr`, and every other cascade-tools invocation emit **no** `@oclif … command bootstrap not found` warning at startup.
+8. All existing integration points that call the shared subprocess helper (clone, fetch, checkout, setup-script, git push, git commit, git status, ls-remote, and any other current callsites) continue to receive an equivalent `{ stdout, stderr, exitCode }` result shape, with behavior unchanged aside from the new default of live streaming to stderr and heartbeat emission on silence.
+9. A CASCADE agent triggered on a ucho-style project that runs a full `cascade-tools scm create-pr` end-to-end sees pre-push hook progress within the first ~30 seconds and does not enter a retry loop on Monitor tool output. `[manual]` — verification requires running an actual agent run against a repo with a multi-second pre-push hook; end-to-end agent behavior isn't exercised by cascade's unit or integration suite.
+
+---
+
+## Documentation Impact (high-level)
+
+- `README.md` — note the new dependency line (execa + tree-kill) under the library section if one exists, or add one; reference the new observable-subprocess behavior in any "debugging cascade-tools" section.
+- `docs/cascade-directory.md` — the existing `.cascade/` hook reference may mention hook timeout guarantees from cascade's side; update or add a short paragraph clarifying that cascade-tools now streams hook output live and enforces both wall-clock and idle timeouts.
+- `CHANGELOG.md` — entry for the observable-subprocess change and the oclif warning removal. Operator-visible; worth calling out.
+
+---
+
+## Out of Scope
+
+- Lefthook configuration inside target repositories. The `main`-reference issue observed in MNG-287 is a separate concern for the target repo's hook config, not cascade-tools.
+- Auditing direct `execSync` / `spawnSync` uses elsewhere in the cascade codebase. The shared subprocess helper is the target here; ad-hoc callsites are a different workstream.
+- Agent-side retry policy. Whether a CASCADE agent retries a seemingly-hung cascade-tools invocation is a property of the agent; this spec reduces the false-positive rate by making subprocesses actually observable, but agent retry logic is not changed here.
+- TUI progress UIs for interactive human users. The output sink is a file; optimize for that reader.
+- Platform support for Windows workers. Cascade workers run on Linux; MacOS dev machines are the only non-Linux target and `tree-kill` covers it.
+- Dashboard or router behavior. This spec is entirely inside cascade-tools.
diff --git a/package-lock.json b/package-lock.json
index 5bbe5da4..27c20d70 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -28,6 +28,7 @@
 				"dockerode": "^4.0.9",
 				"drizzle-orm": "^0.45.1",
 				"eta": "^4.5.0",
+				"execa": "^9.6.1",
 				"hono": "^4.12.14",
 				"jira.js": "^5.3.0",
 				"js-yaml": "^4.1.1",
@@ -36,6 +37,7 @@
 				"open": "^11.0.0",
 				"ora": "^9.3.0",
 				"pg": "^8.18.0",
+				"tree-kill": "^1.2.2",
 				"trello.js": "^1.2.8",
 				"zangief": "^1.0.5",
 				"zod": "^3.25.0"
@@ -3867,6 +3869,12 @@
 				"win32"
 			]
 		},
+		"node_modules/@sec-ant/readable-stream": {
+			"version": "0.4.1",
+			"resolved": "https://registry.npmjs.org/@sec-ant/readable-stream/-/readable-stream-0.4.1.tgz",
+			"integrity": "sha512-831qok9r2t8AlxLko40y2ebgSDhenenCatLVeW/uBtnHPyhHOvG0C7TvfgecV+wHzIm5KUICgzmVpWS+IMEAeg==",
+			"license": "MIT"
+		},
 		"node_modules/@sentry/core": {
 			"version": "10.47.0",
 			"resolved": "https://registry.npmjs.org/@sentry/core/-/core-10.47.0.tgz",
@@ -4030,6 +4038,18 @@
 				"url": "https://github.com/sindresorhus/is?sponsor=1"
 			}
 		},
+		"node_modules/@sindresorhus/merge-streams": {
+			"version": "4.0.0",
+			"resolved": "https://registry.npmjs.org/@sindresorhus/merge-streams/-/merge-streams-4.0.0.tgz",
+			"integrity": "sha512-tlqY9xq5ukxTUZBmoOp+m61cqwQD5pHJtFY3Mn8CA8ps6yghLH/Hw8UPdqg4OLmFW3IFlcXnQNmo/dh8HzXYIQ==",
+			"license": "MIT",
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
 		"node_modules/@testing-library/dom": {
 			"version": "10.4.1",
 			"resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz",
@@ -6973,6 +6993,44 @@
 				"node": ">=18.0.0"
 			}
 		},
+		"node_modules/execa": {
+			"version": "9.6.1",
+			"resolved": "https://registry.npmjs.org/execa/-/execa-9.6.1.tgz",
+			"integrity": "sha512-9Be3ZoN4LmYR90tUoVu2te2BsbzHfhJyfEiAVfz7N5/zv+jduIfLrV2xdQXOHbaD6KgpGdO9PRPM1Y4Q9QkPkA==",
+			"license": "MIT",
+			"dependencies": {
+				"@sindresorhus/merge-streams": "^4.0.0",
+				"cross-spawn": "^7.0.6",
+				"figures": "^6.1.0",
+				"get-stream": "^9.0.0",
+				"human-signals": "^8.0.1",
+				"is-plain-obj": "^4.1.0",
+				"is-stream": "^4.0.1",
+				"npm-run-path": "^6.0.0",
+				"pretty-ms": "^9.2.0",
+				"signal-exit": "^4.1.0",
+				"strip-final-newline": "^4.0.0",
+				"yoctocolors": "^2.1.1"
+			},
+			"engines": {
+				"node": "^18.19.0 || >=20.5.0"
+			},
+			"funding": {
+				"url": "https://github.com/sindresorhus/execa?sponsor=1"
+			}
+		},
+		"node_modules/execa/node_modules/is-stream": {
+			"version": "4.0.1",
+			"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-4.0.1.tgz",
+			"integrity": "sha512-Dnz92NInDqYckGEUJv689RbRiTSEHCQ7wOVeALbkOz999YpqT46yMRIGtSNl2iCL1waAZSx40+h59NV/EwzV/A==",
+			"license": "MIT",
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
 		"node_modules/expect-type": {
 			"version": "1.3.0",
 			"dev": true,
@@ -7145,6 +7203,21 @@
 				"node": "^12.20 || >= 14.13"
 			}
 		},
+		"node_modules/figures": {
+			"version": "6.1.0",
+			"resolved": "https://registry.npmjs.org/figures/-/figures-6.1.0.tgz",
+			"integrity": "sha512-d+l3qxjSesT4V7v2fh+QnmFnUWv9lSpjarhShNTgBOfA0ttejbQUAlHLitbjkoRiDulW0OPoQPYIGhIC8ohejg==",
+			"license": "MIT",
+			"dependencies": {
+				"is-unicode-supported": "^2.0.0"
+			},
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
 		"node_modules/filelist": {
 			"version": "1.0.4",
 			"resolved": "https://registry.npmjs.org/filelist/-/filelist-1.0.4.tgz",
@@ -7378,6 +7451,34 @@
 				"node": ">= 0.4"
 			}
 		},
+		"node_modules/get-stream": {
+			"version": "9.0.1",
+			"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-9.0.1.tgz",
+			"integrity": "sha512-kVCxPF3vQM/N0B1PmoqVUqgHP+EeVjmZSQn+1oCRPxd2P21P2F19lIgbR3HBosbB1PUhOAoctJnfEn2GbN2eZA==",
+			"license": "MIT",
+			"dependencies": {
+				"@sec-ant/readable-stream": "^0.4.1",
+				"is-stream": "^4.0.1"
+			},
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
+		"node_modules/get-stream/node_modules/is-stream": {
+			"version": "4.0.1",
+			"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-4.0.1.tgz",
+			"integrity": "sha512-Dnz92NInDqYckGEUJv689RbRiTSEHCQ7wOVeALbkOz999YpqT46yMRIGtSNl2iCL1waAZSx40+h59NV/EwzV/A==",
+			"license": "MIT",
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
 		"node_modules/get-tsconfig": {
 			"version": "4.13.0",
 			"dev": true,
@@ -7591,6 +7692,15 @@
 				"node": ">= 14"
 			}
 		},
+		"node_modules/human-signals": {
+			"version": "8.0.1",
+			"resolved": "https://registry.npmjs.org/human-signals/-/human-signals-8.0.1.tgz",
+			"integrity": "sha512-eKCa6bwnJhvxj14kZk5NCPc6Hb6BdsU9DZcOnmQKSnO1VKrfV0zCvtttPZUsBvjmNDn8rpcJfpwSYnHBjc95MQ==",
+			"license": "Apache-2.0",
+			"engines": {
+				"node": ">=18.18.0"
+			}
+		},
 		"node_modules/iconv-lite": {
 			"version": "0.7.2",
 			"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz",
@@ -7855,7 +7965,6 @@
 			"version": "4.1.0",
 			"resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-4.1.0.tgz",
 			"integrity": "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==",
-			"dev": true,
 			"license": "MIT",
 			"engines": {
 				"node": ">=12"
@@ -8909,6 +9018,34 @@
 				"node": ">=0.10.0"
 			}
 		},
+		"node_modules/npm-run-path": {
+			"version": "6.0.0",
+			"resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-6.0.0.tgz",
+			"integrity": "sha512-9qny7Z9DsQU8Ou39ERsPU4OZQlSTP47ShQzuKZ6PRXpYLtIFgl/DEBYEXKlvcEa+9tHVcK8CF81Y2V72qaZhWA==",
+			"license": "MIT",
+			"dependencies": {
+				"path-key": "^4.0.0",
+				"unicorn-magic": "^0.3.0"
+			},
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
+		"node_modules/npm-run-path/node_modules/path-key": {
+			"version": "4.0.0",
+			"resolved": "https://registry.npmjs.org/path-key/-/path-key-4.0.0.tgz",
+			"integrity": "sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ==",
+			"license": "MIT",
+			"engines": {
+				"node": ">=12"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
 		"node_modules/object-assign": {
 			"version": "4.1.1",
 			"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
@@ -9121,6 +9258,18 @@
 				"url": "https://github.com/sponsors/sindresorhus"
 			}
 		},
+		"node_modules/parse-ms": {
+			"version": "4.0.0",
+			"resolved": "https://registry.npmjs.org/parse-ms/-/parse-ms-4.0.0.tgz",
+			"integrity": "sha512-TXfryirbmq34y8QBwgqCVLi+8oA3oWx2eAnSn62ITyEhEYaWRlVZ2DvMM9eZbMs/RfxPu/PK/aBLyGj4IrqMHw==",
+			"license": "MIT",
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
 		"node_modules/parse5": {
 			"version": "5.1.1",
 			"resolved": "https://registry.npmjs.org/parse5/-/parse5-5.1.1.tgz",
@@ -9417,6 +9566,21 @@
 				"url": "https://github.com/chalk/ansi-styles?sponsor=1"
 			}
 		},
+		"node_modules/pretty-ms": {
+			"version": "9.3.0",
+			"resolved": "https://registry.npmjs.org/pretty-ms/-/pretty-ms-9.3.0.tgz",
+			"integrity": "sha512-gjVS5hOP+M3wMm5nmNOucbIrqudzs9v/57bWRHQWLYklXqoXKrVfYW2W9+glfGsqtPgpiz5WwyEEB+ksXIx3gQ==",
+			"license": "MIT",
+			"dependencies": {
+				"parse-ms": "^4.0.0"
+			},
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
 		"node_modules/process": {
 			"version": "0.11.10",
 			"license": "MIT",
@@ -10263,6 +10427,18 @@
 				"node": ">=8"
 			}
 		},
+		"node_modules/strip-final-newline": {
+			"version": "4.0.0",
+			"resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-4.0.0.tgz",
+			"integrity": "sha512-aulFJcD6YK8V1G7iRB5tigAP4TsHBZZrOV8pjV++zdUwmeV8uzbY7yn6h9MswN62adStNZFuCIx4haBnRuMDaw==",
+			"license": "MIT",
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
 		"node_modules/strip-literal": {
 			"version": "3.1.0",
 			"resolved": "https://registry.npmjs.org/strip-literal/-/strip-literal-3.1.0.tgz",
@@ -10555,7 +10731,6 @@
 			"version": "1.2.2",
 			"resolved": "https://registry.npmjs.org/tree-kill/-/tree-kill-1.2.2.tgz",
 			"integrity": "sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==",
-			"dev": true,
 			"license": "MIT",
 			"bin": {
 				"tree-kill": "cli.js"
@@ -10702,6 +10877,18 @@
 				"node": ">=4"
 			}
 		},
+		"node_modules/unicorn-magic": {
+			"version": "0.3.0",
+			"resolved": "https://registry.npmjs.org/unicorn-magic/-/unicorn-magic-0.3.0.tgz",
+			"integrity": "sha512-+QBBXBCvifc56fsbuxZQ6Sic3wqqc3WWaqxs58gvJrcOuN83HGTCwz3oS5phzU9LthRNE9VrJCFCLUgHeeFnfA==",
+			"license": "MIT",
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
 		"node_modules/universal-user-agent": {
 			"version": "7.0.3",
 			"license": "ISC"
diff --git a/package.json b/package.json
index 238652a7..c28f354e 100644
--- a/package.json
+++ b/package.json
@@ -73,6 +73,7 @@
 		"dockerode": "^4.0.9",
 		"drizzle-orm": "^0.45.1",
 		"eta": "^4.5.0",
+		"execa": "^9.6.1",
 		"hono": "^4.12.14",
 		"jira.js": "^5.3.0",
 		"js-yaml": "^4.1.1",
@@ -81,6 +82,7 @@
 		"open": "^11.0.0",
 		"ora": "^9.3.0",
 		"pg": "^8.18.0",
+		"tree-kill": "^1.2.2",
 		"trello.js": "^1.2.8",
 		"zangief": "^1.0.5",
 		"zod": "^3.25.0"
diff --git a/src/gadgets/github/core/createPR.ts b/src/gadgets/github/core/createPR.ts
index 1edbaa96..b32e80e8 100644
--- a/src/gadgets/github/core/createPR.ts
+++ b/src/gadgets/github/core/createPR.ts
@@ -18,8 +18,20 @@ export interface CreatePRResult {
 	prUrl: string;
 	repoFullName: string;
 	alreadyExisted: boolean;
+	/** Captured stdout+stderr from `git push` (including pre-push hook output). Spec 013. */
+	pushOutput?: string;
+	/** Captured stdout+stderr from `git commit` (including pre-commit hook output). Spec 013. */
+	commitOutput?: string;
 }
 
+// Spec 013: per-caller timeouts for the two commands that trigger user-defined
+// hooks. Values are sized to sit just under the gadget's 240s ceiling and to
+// give test suites enough headroom for their slowest inter-event gaps.
+const PUSH_WALL_TIMEOUT_MS = 230_000;
+const PUSH_IDLE_TIMEOUT_MS = 90_000;
+const COMMIT_WALL_TIMEOUT_MS = 120_000;
+const COMMIT_IDLE_TIMEOUT_MS = 60_000;
+
 async function detectOwnerRepo(): Promise<{ owner: string; repo: string }> {
 	const result = await runCommand('git', ['remote', 'get-url', 'origin'], process.cwd());
 	if (result.exitCode !== 0) {
@@ -32,7 +44,12 @@ async function detectOwnerRepo(): Promise<{ owner: string; repo: string }> {
 	return { owner: match[1], repo: match[2] };
 }
 
-async function stageAndCommit(commitMessage: string): Promise<void> {
+/**
+ * Stage changes and commit. Returns the captured stdout+stderr of `git commit`
+ * so callers can surface pre-commit hook output to operators. Empty string if
+ * there was nothing to commit (early return after `git status`).
+ */
+async function stageAndCommit(commitMessage: string): Promise<string> {
 	const addResult = await runCommand('git', ['add', '-u'], process.cwd());
 	if (addResult.exitCode !== 0) {
 		throw new Error(`Failed to stage changes: ${addResult.stderr || addResult.stdout}`.trim());
@@ -55,26 +72,52 @@ async function stageAndCommit(commitMessage: string): Promise<void> {
 
 	const statusResult = await runCommand('git', ['status', '--porcelain'], process.cwd());
 	if (statusResult.stdout.trim() === '') {
-		return;
+		return '';
 	}
 
-	const commitResult = await runCommand('git', ['commit', '-m', commitMessage], process.cwd());
+	const commitResult = await runCommand(
+		'git',
+		['commit', '-m', commitMessage],
+		process.cwd(),
+		undefined,
+		{
+			label: 'git-commit',
+			wallTimeoutMs: COMMIT_WALL_TIMEOUT_MS,
+			idleTimeoutMs: COMMIT_IDLE_TIMEOUT_MS,
+		},
+	);
 	if (commitResult.exitCode !== 0) {
 		const output = [commitResult.stdout, commitResult.stderr].filter(Boolean).join('\n').trim();
 		throw new Error(
 			`COMMIT FAILED (pre-commit hooks may have failed)\n\n--- OUTPUT ---\n${output}`,
 		);
 	}
+	return [commitResult.stdout, commitResult.stderr].filter(Boolean).join('\n').trim();
 }
 
-async function pushBranch(branch: string): Promise<void> {
-	const pushResult = await runCommand('git', ['push', '-u', 'origin', branch], process.cwd());
+/**
+ * Push the branch. Returns the captured stdout+stderr of `git push` so callers
+ * can surface pre-push hook output (typecheck, tests, etc.) to operators.
+ */
+async function pushBranch(branch: string): Promise<string> {
+	const pushResult = await runCommand(
+		'git',
+		['push', '-u', 'origin', branch],
+		process.cwd(),
+		undefined,
+		{
+			label: 'git-push',
+			wallTimeoutMs: PUSH_WALL_TIMEOUT_MS,
+			idleTimeoutMs: PUSH_IDLE_TIMEOUT_MS,
+		},
+	);
 	if (pushResult.exitCode !== 0) {
 		const output = [pushResult.stdout, pushResult.stderr].filter(Boolean).join('\n').trim();
 		throw new Error(
 			`PUSH FAILED for branch '${branch}' (pre-push hooks may have failed)\n\n--- OUTPUT ---\n${output}`,
 		);
 	}
+	return [pushResult.stdout, pushResult.stderr].filter(Boolean).join('\n').trim();
 }
 
 async function verifyBranchOnRemote(branch: string): Promise<boolean> {
@@ -86,12 +129,15 @@ export async function createPR(params: CreatePRParams): Promise<CreatePRResult>
 	const { owner, repo } = await detectOwnerRepo();
 	const commitMessage = params.commitMessage || params.title;
 
+	let commitOutput: string | undefined;
+	let pushOutput: string | undefined;
+
 	if (params.commit !== false) {
-		await stageAndCommit(commitMessage);
+		commitOutput = await stageAndCommit(commitMessage);
 	}
 
 	if (params.push !== false) {
-		await pushBranch(params.head);
+		pushOutput = await pushBranch(params.head);
 	}
 
 	const branchExists = await verifyBranchOnRemote(params.head);
@@ -118,6 +164,8 @@ export async function createPR(params: CreatePRParams): Promise<CreatePRResult>
 			prUrl: pr.htmlUrl,
 			repoFullName: `${owner}/${repo}`,
 			alreadyExisted: false,
+			pushOutput,
+			commitOutput,
 		};
 	} catch (error) {
 		if (
@@ -133,6 +181,8 @@ export async function createPR(params: CreatePRParams): Promise<CreatePRResult>
 					prUrl: existingPR.htmlUrl,
 					repoFullName: `${owner}/${repo}`,
 					alreadyExisted: true,
+					pushOutput,
+					commitOutput,
 				};
 			}
 		}
diff --git a/src/utils/repo.ts b/src/utils/repo.ts
index 3e68195e..e661d7e0 100644
--- a/src/utils/repo.ts
+++ b/src/utils/repo.ts
@@ -1,5 +1,7 @@
-import { execSync, spawn } from 'node:child_process';
+import { execSync } from 'node:child_process';
 import { existsSync, mkdirSync, rmSync } from 'node:fs';
+import { execa } from 'execa';
+import treeKill from 'tree-kill';
 import { getProjectGitHubToken } from '../config/projects.js';
 import type { ProjectConfig } from '../types/index.js';
 import { logger } from './logging.js';
@@ -67,37 +69,200 @@ export function cleanupTempDir(dir: string): void {
 	}
 }
 
+/**
+ * Options for {@link runCommand}.
+ *
+ * All timing fields are in milliseconds. Setting a timing field to `0` disables it.
+ * Spec 013: the defaults balance noise against agent-observability — fast calls
+ * (ms-scale) never hit the heartbeat or idle timer, slow calls (git push with hooks)
+ * emit progress and are killed cleanly if wedged.
+ */
+export type RunCommandOptions = {
+	/** Emit a heartbeat on parent stderr every N ms of child silence. Default 30_000. Set to 0 to disable. */
+	heartbeatMs?: number;
+	/** Kill child if no output for N ms. Default 120_000. Set to 0 to disable. */
+	idleTimeoutMs?: number;
+	/** Kill child after N ms of total runtime. Default 600_000. Set to 0 to disable. */
+	wallTimeoutMs?: number;
+	/** After SIGTERM, wait N ms before SIGKILL. Default 5_000. */
+	forceKillAfterMs?: number;
+	/** Short label emitted in heartbeat lines. Defaults to `command`. */
+	label?: string;
+	/** Suppress streaming and heartbeats. Capture-only. Default false. */
+	silent?: boolean;
+};
+
+export type RunCommandResult = {
+	stdout: string;
+	stderr: string;
+	exitCode: number;
+	/** Set when the helper's timeouts fired. Undefined on natural exit. */
+	reason?: 'idle-timeout' | 'wall-timeout';
+};
+
+const DEFAULT_HEARTBEAT_MS = 30_000;
+const DEFAULT_IDLE_TIMEOUT_MS = 120_000;
+const DEFAULT_WALL_TIMEOUT_MS = 600_000;
+const DEFAULT_FORCE_KILL_AFTER_MS = 5_000;
+
+/**
+ * Internal. Manages the three timers (heartbeat, idle, wall) + SIGTERM→SIGKILL
+ * ladder for a subprocess. Returned handle's `noteOutput` resets heartbeat + idle.
+ */
+type WatcherHandle = {
+	noteOutput: () => void;
+	dispose: () => void;
+	getReason: () => 'idle-timeout' | 'wall-timeout' | undefined;
+};
+
+function createSubprocessWatcher(
+	pid: number | undefined,
+	config: {
+		heartbeatMs: number;
+		idleTimeoutMs: number;
+		wallTimeoutMs: number;
+		forceKillAfterMs: number;
+		label: string;
+		silent: boolean;
+	},
+	startMs: number,
+): WatcherHandle {
+	const { heartbeatMs, idleTimeoutMs, wallTimeoutMs, forceKillAfterMs, label, silent } = config;
+	let reason: 'idle-timeout' | 'wall-timeout' | undefined;
+	let heartbeatTimer: NodeJS.Timeout | null = null;
+	let idleTimer: NodeJS.Timeout | null = null;
+	let wallTimer: NodeJS.Timeout | null = null;
+	let forceKillTimer: NodeJS.Timeout | null = null;
+
+	const killTree = (signal: 'SIGTERM' | 'SIGKILL') => {
+		if (pid) treeKill(pid, signal, () => {});
+	};
+
+	const scheduleForceKill = () => {
+		if (forceKillTimer || forceKillAfterMs <= 0) return;
+		forceKillTimer = setTimeout(() => killTree('SIGKILL'), forceKillAfterMs);
+	};
+
+	const onTimeout = (kind: 'idle-timeout' | 'wall-timeout') => {
+		reason = kind;
+		killTree('SIGTERM');
+		scheduleForceKill();
+	};
+
+	const armHeartbeat = () => {
+		if (silent || heartbeatMs <= 0) return;
+		if (heartbeatTimer) clearTimeout(heartbeatTimer);
+		const tick = () => {
+			const elapsedS = Math.round((Date.now() - startMs) / 1000);
+			process.stderr.write(`[${label}] still running (${elapsedS}s)\n`);
+			heartbeatTimer = setTimeout(tick, heartbeatMs);
+		};
+		heartbeatTimer = setTimeout(tick, heartbeatMs);
+	};
+
+	const armIdle = () => {
+		if (idleTimeoutMs <= 0) return;
+		if (idleTimer) clearTimeout(idleTimer);
+		idleTimer = setTimeout(() => onTimeout('idle-timeout'), idleTimeoutMs);
+	};
+
+	if (wallTimeoutMs > 0) {
+		wallTimer = setTimeout(() => onTimeout('wall-timeout'), wallTimeoutMs);
+	}
+	armIdle();
+	armHeartbeat();
+
+	return {
+		noteOutput: () => {
+			armIdle();
+			armHeartbeat();
+		},
+		dispose: () => {
+			if (heartbeatTimer) clearTimeout(heartbeatTimer);
+			if (idleTimer) clearTimeout(idleTimer);
+			if (wallTimer) clearTimeout(wallTimer);
+			if (forceKillTimer) clearTimeout(forceKillTimer);
+		},
+		getReason: () => reason,
+	};
+}
+
+function resolveOptions(
+	command: string,
+	options: RunCommandOptions | undefined,
+): {
+	heartbeatMs: number;
+	idleTimeoutMs: number;
+	wallTimeoutMs: number;
+	forceKillAfterMs: number;
+	label: string;
+	silent: boolean;
+} {
+	return {
+		heartbeatMs: options?.heartbeatMs ?? DEFAULT_HEARTBEAT_MS,
+		idleTimeoutMs: options?.idleTimeoutMs ?? DEFAULT_IDLE_TIMEOUT_MS,
+		wallTimeoutMs: options?.wallTimeoutMs ?? DEFAULT_WALL_TIMEOUT_MS,
+		forceKillAfterMs: options?.forceKillAfterMs ?? DEFAULT_FORCE_KILL_AFTER_MS,
+		label: options?.label ?? command,
+		silent: options?.silent ?? false,
+	};
+}
+
+/**
+ * Spawn a subprocess and return captured output + exit code.
+ *
+ * Spec 013 behavior: streams child stdout/stderr to the parent's stderr as they
+ * arrive (line-buffered), emits a periodic heartbeat line during child silence,
+ * enforces both an idle-silence timeout and a wall-clock timeout, kills the
+ * child AND its descendants via tree-kill on timeout (SIGTERM → SIGKILL after a
+ * grace window), and preserves captured output on success and failure alike.
+ *
+ * All options are configurable per call with safe defaults; callers who want
+ * different numbers (e.g. a push that should not exceed the gadget's 240s budget)
+ * pass explicit values.
+ */
 export async function runCommand(
 	command: string,
 	args: string[],
 	cwd: string,
 	env?: Record<string, string>,
-): Promise<{ stdout: string; stderr: string; exitCode: number }> {
-	return new Promise((resolve) => {
-		const child = spawn(command, args, {
-			cwd,
-			env: { ...process.env, ...env },
-			stdio: ['pipe', 'pipe', 'pipe'],
-		});
-
-		let stdout = '';
-		let stderr = '';
-
-		child.stdout.on('data', (data) => {
-			stdout += data.toString();
-		});
-
-		child.stderr.on('data', (data) => {
-			stderr += data.toString();
-		});
-
-		child.on('close', (code) => {
-			resolve({ stdout, stderr, exitCode: code ?? 1 });
-		});
-
-		child.on('error', (err) => {
-			stderr += err.message;
-			resolve({ stdout, stderr, exitCode: 1 });
-		});
+	options?: RunCommandOptions,
+): Promise<RunCommandResult> {
+	const config = resolveOptions(command, options);
+	const child = execa(command, args, {
+		cwd,
+		env: { ...process.env, ...env },
+		reject: false,
+		encoding: 'utf8',
 	});
+
+	const watcher = createSubprocessWatcher(child.pid, config, Date.now());
+	const capture = { stdout: '', stderr: '' };
+	const onChunk = (chunk: unknown, kind: 'stdout' | 'stderr') => {
+		const text = typeof chunk === 'string' ? chunk : String(chunk);
+		capture[kind] += text;
+		if (!config.silent) process.stderr.write(text);
+		watcher.noteOutput();
+	};
+	child.stdout?.on('data', (c) => onChunk(c, 'stdout'));
+	child.stderr?.on('data', (c) => onChunk(c, 'stderr'));
+
+	try {
+		const result = await child;
+		const stdout = capture.stdout || (typeof result.stdout === 'string' ? result.stdout : '');
+		const stderr = capture.stderr || (typeof result.stderr === 'string' ? result.stderr : '');
+		const reason = watcher.getReason();
+		const exitCode = typeof result.exitCode === 'number' ? result.exitCode : reason ? 143 : 1;
+		return { stdout, stderr, exitCode, reason };
+	} catch (err) {
+		const msg = err instanceof Error ? err.message : String(err);
+		return {
+			stdout: capture.stdout,
+			stderr: capture.stderr + msg,
+			exitCode: 1,
+			reason: watcher.getReason(),
+		};
+	} finally {
+		watcher.dispose();
+	}
 }
diff --git a/tests/unit/gadgets/github.test.ts b/tests/unit/gadgets/github.test.ts
index 66c530f6..613f2417 100644
--- a/tests/unit/gadgets/github.test.ts
+++ b/tests/unit/gadgets/github.test.ts
@@ -175,11 +175,13 @@ describe('GitHub Gadgets', () => {
 			);
 			// Should check for changes
 			expect(runCommand).toHaveBeenCalledWith('git', ['status', '--porcelain'], expect.any(String));
-			// Should push
+			// Should push (spec 013: push now passes timeout options as 4th/5th args)
 			expect(runCommand).toHaveBeenCalledWith(
 				'git',
 				['push', '-u', 'origin', 'feature/test'],
 				expect.any(String),
+				undefined,
+				expect.objectContaining({ label: 'git-push' }),
 			);
 			expect(result).toContain('PR #44 created successfully');
 		});
@@ -249,11 +251,13 @@ describe('GitHub Gadgets', () => {
 				head: 'feature/test',
 			});
 
-			// Should commit with PR title as message
+			// Should commit with PR title as message (spec 013: commit now passes timeout options)
 			expect(runCommand).toHaveBeenCalledWith(
 				'git',
 				['commit', '-m', 'Test PR'],
 				expect.any(String),
+				undefined,
+				expect.objectContaining({ label: 'git-commit' }),
 			);
 		});
 
@@ -288,6 +292,8 @@ describe('GitHub Gadgets', () => {
 				'git',
 				['commit', '-m', 'feat(test): custom commit message'],
 				expect.any(String),
+				undefined,
+				expect.objectContaining({ label: 'git-commit' }),
 			);
 		});
 
@@ -319,11 +325,13 @@ describe('GitHub Gadgets', () => {
 				['status', '--porcelain'],
 				expect.any(String),
 			);
-			// Should still push
+			// Should still push (spec 013: passes timeout options)
 			expect(runCommand).toHaveBeenCalledWith(
 				'git',
 				['push', '-u', 'origin', 'feature/test'],
 				expect.any(String),
+				undefined,
+				expect.objectContaining({ label: 'git-push' }),
 			);
 		});
 
diff --git a/tests/unit/gadgets/github/core/createPR.test.ts b/tests/unit/gadgets/github/core/createPR.test.ts
index ca4f490b..74b3c996 100644
--- a/tests/unit/gadgets/github/core/createPR.test.ts
+++ b/tests/unit/gadgets/github/core/createPR.test.ts
@@ -527,3 +527,166 @@ describe('createPR', () => {
 		expect(commitCall).toContain('Custom commit message');
 	});
 });
+
+// ────────────────────────────────────────────────────────────────────────────
+// Spec 013: per-caller timeouts + captured hook output preservation
+// ────────────────────────────────────────────────────────────────────────────
+
+describe('pushBranch and stageAndCommit timeout options (spec 013)', () => {
+	it('pushBranch passes an explicit wallTimeoutMs below the gadget 240s ceiling', async () => {
+		mockRunCommand.mockImplementation(async (_cmd, args) => {
+			if (args?.[0] === 'remote') return { stdout: HTTPS_URL, stderr: '', exitCode: 0 };
+			if (args?.[0] === 'ls-remote')
+				return { stdout: 'abc\trefs/heads/feat', stderr: '', exitCode: 0 };
+			return { stdout: '', stderr: '', exitCode: 0 };
+		});
+		mockGithub.createPR.mockResolvedValue({
+			number: 1,
+			htmlUrl: 'https://github.com/test-owner/test-repo/pull/1',
+		} as Awaited<ReturnType<typeof mockGithub.createPR>>);
+
+		await createPR({
+			title: 'Test',
+			body: 'Body',
+			head: 'feat',
+			base: 'main',
+			commit: false,
+			push: true,
+		});
+
+		const pushCall = mockRunCommand.mock.calls.find(
+			(c) => Array.isArray(c[1]) && c[1][0] === 'push',
+		);
+		expect(pushCall).toBeDefined();
+		// 5th arg is RunCommandOptions
+		const options = pushCall?.[4] as { wallTimeoutMs?: number } | undefined;
+		expect(options).toBeDefined();
+		expect(typeof options?.wallTimeoutMs).toBe('number');
+		expect(options?.wallTimeoutMs).toBeLessThanOrEqual(230_000);
+	});
+
+	it('pushBranch passes an explicit finite idleTimeoutMs', async () => {
+		mockRunCommand.mockImplementation(async (_cmd, args) => {
+			if (args?.[0] === 'remote') return { stdout: HTTPS_URL, stderr: '', exitCode: 0 };
+			if (args?.[0] === 'ls-remote')
+				return { stdout: 'abc\trefs/heads/feat', stderr: '', exitCode: 0 };
+			return { stdout: '', stderr: '', exitCode: 0 };
+		});
+		mockGithub.createPR.mockResolvedValue({
+			number: 1,
+			htmlUrl: 'https://github.com/test-owner/test-repo/pull/1',
+		} as Awaited<ReturnType<typeof mockGithub.createPR>>);
+
+		await createPR({
+			title: 'Test',
+			body: 'Body',
+			head: 'feat',
+			base: 'main',
+			commit: false,
+			push: true,
+		});
+
+		const pushCall = mockRunCommand.mock.calls.find(
+			(c) => Array.isArray(c[1]) && c[1][0] === 'push',
+		);
+		const options = pushCall?.[4] as { idleTimeoutMs?: number } | undefined;
+		expect(typeof options?.idleTimeoutMs).toBe('number');
+		expect(options?.idleTimeoutMs).toBeGreaterThan(0);
+	});
+
+	it('stageAndCommit passes explicit wallTimeoutMs and idleTimeoutMs', async () => {
+		mockRunCommand.mockImplementation(async (_cmd, args) => {
+			if (args?.[0] === 'remote') return { stdout: HTTPS_URL, stderr: '', exitCode: 0 };
+			if (args?.[0] === 'status') return { stdout: 'M foo.ts\n', stderr: '', exitCode: 0 };
+			if (args?.[0] === 'ls-remote')
+				return { stdout: 'abc\trefs/heads/feat', stderr: '', exitCode: 0 };
+			return { stdout: '', stderr: '', exitCode: 0 };
+		});
+		mockGithub.createPR.mockResolvedValue({
+			number: 1,
+			htmlUrl: 'https://github.com/test-owner/test-repo/pull/1',
+		} as Awaited<ReturnType<typeof mockGithub.createPR>>);
+
+		await createPR({
+			title: 'Test',
+			body: 'Body',
+			head: 'feat',
+			base: 'main',
+			commit: true,
+			push: false,
+		});
+
+		const commitCall = mockRunCommand.mock.calls.find(
+			(c) => Array.isArray(c[1]) && c[1][0] === 'commit',
+		);
+		expect(commitCall).toBeDefined();
+		const options = commitCall?.[4] as
+			| { wallTimeoutMs?: number; idleTimeoutMs?: number }
+			| undefined;
+		expect(typeof options?.wallTimeoutMs).toBe('number');
+		expect(typeof options?.idleTimeoutMs).toBe('number');
+	});
+
+	it('createPR result carries captured push output on success', async () => {
+		mockRunCommand.mockImplementation(async (_cmd, args) => {
+			if (args?.[0] === 'remote') return { stdout: HTTPS_URL, stderr: '', exitCode: 0 };
+			if (args?.[0] === 'push')
+				return {
+					stdout: 'Pre-push hook ran: typecheck OK\n',
+					stderr: 'To github.com...\n',
+					exitCode: 0,
+				};
+			if (args?.[0] === 'ls-remote')
+				return { stdout: 'abc\trefs/heads/feat', stderr: '', exitCode: 0 };
+			return { stdout: '', stderr: '', exitCode: 0 };
+		});
+		mockGithub.createPR.mockResolvedValue({
+			number: 1,
+			htmlUrl: 'https://github.com/test-owner/test-repo/pull/1',
+		} as Awaited<ReturnType<typeof mockGithub.createPR>>);
+
+		const result = await createPR({
+			title: 'Test',
+			body: 'Body',
+			head: 'feat',
+			base: 'main',
+			commit: false,
+			push: true,
+		});
+
+		expect(result.pushOutput).toBeDefined();
+		expect(result.pushOutput).toContain('Pre-push hook ran: typecheck OK');
+	});
+
+	it('createPR result carries captured commit output on success', async () => {
+		mockRunCommand.mockImplementation(async (_cmd, args) => {
+			if (args?.[0] === 'remote') return { stdout: HTTPS_URL, stderr: '', exitCode: 0 };
+			if (args?.[0] === 'status') return { stdout: 'M foo.ts\n', stderr: '', exitCode: 0 };
+			if (args?.[0] === 'commit')
+				return {
+					stdout: 'Pre-commit hook ran: biome OK\n[feat abc123] msg\n',
+					stderr: '',
+					exitCode: 0,
+				};
+			if (args?.[0] === 'ls-remote')
+				return { stdout: 'abc\trefs/heads/feat', stderr: '', exitCode: 0 };
+			return { stdout: '', stderr: '', exitCode: 0 };
+		});
+		mockGithub.createPR.mockResolvedValue({
+			number: 1,
+			htmlUrl: 'https://github.com/test-owner/test-repo/pull/1',
+		} as Awaited<ReturnType<typeof mockGithub.createPR>>);
+
+		const result = await createPR({
+			title: 'Test',
+			body: 'Body',
+			head: 'feat',
+			base: 'main',
+			commit: true,
+			push: false,
+		});
+
+		expect(result.commitOutput).toBeDefined();
+		expect(result.commitOutput).toContain('Pre-commit hook ran: biome OK');
+	});
+});
diff --git a/tests/unit/utils/repo.test.ts b/tests/unit/utils/repo.test.ts
index 9085c9a6..c6949094 100644
--- a/tests/unit/utils/repo.test.ts
+++ b/tests/unit/utils/repo.test.ts
@@ -5,7 +5,6 @@ vi.mock('node:child_process', async () => {
 	return {
 		...actual,
 		execSync: vi.fn(),
-		spawn: vi.fn(),
 	};
 });
 
@@ -15,6 +14,16 @@ vi.mock('node:fs', () => ({
 	rmSync: vi.fn(),
 }));
 
+vi.mock('execa', () => ({
+	execa: vi.fn(),
+}));
+
+vi.mock('tree-kill', () => ({
+	default: vi.fn((_pid: number, _signal: string, cb?: (err?: Error) => void) => {
+		if (cb) cb();
+	}),
+}));
+
 vi.mock('../../../src/config/projects.js', () => ({
 	getProjectGitHubToken: vi.fn(() => Promise.resolve('test-token')),
 }));
@@ -26,10 +35,11 @@ vi.mock('../../../src/utils/logging.js', () => ({
 	},
 }));
 
-import { execSync, spawn } from 'node:child_process';
-import { EventEmitter } from 'node:events';
+import { execSync } from 'node:child_process';
 import { existsSync, mkdirSync, rmSync } from 'node:fs';
 import { Readable } from 'node:stream';
+import { execa } from 'execa';
+import treeKill from 'tree-kill';
 import {
 	cleanupTempDir,
 	cloneRepo,
@@ -176,74 +186,352 @@ describe('repo utils', () => {
 	});
 
 	describe('runCommand', () => {
-		function createMockChild() {
+		/**
+		 * Build a fake execa Subprocess: awaitable + has readable stdout/stderr + pid.
+		 * `resolveExec` / `rejectExec` are test hooks to settle the subprocess when done.
+		 */
+		function createMockSubprocess() {
 			const stdout = new Readable({ read() {} });
 			const stderr = new Readable({ read() {} });
-			const child = new EventEmitter() as EventEmitter & {
+			let resolveExec: (r: { stdout: string; stderr: string; exitCode: number }) => void;
+			let rejectExec: (e: Error) => void;
+			const promise = new Promise<{ stdout: string; stderr: string; exitCode: number }>(
+				(res, rej) => {
+					resolveExec = res;
+					rejectExec = rej;
+				},
+			);
+			const subprocess = promise as Promise<{
+				stdout: string;
+				stderr: string;
+				exitCode: number;
+			}> & {
 				stdout: Readable;
 				stderr: Readable;
-				stdin: { write: vi.Mock; end: vi.Mock };
+				pid: number;
+				resolveExec: typeof resolveExec;
+				rejectExec: typeof rejectExec;
 			};
-			child.stdout = stdout;
-			child.stderr = stderr;
-			child.stdin = { write: vi.fn(), end: vi.fn() };
-			return child;
+			subprocess.stdout = stdout;
+			subprocess.stderr = stderr;
+			subprocess.pid = 12345;
+			subprocess.resolveExec = (r) => resolveExec(r);
+			subprocess.rejectExec = (e) => rejectExec(e);
+			return subprocess;
 		}
 
-		it('runs command and returns stdout/stderr/exitCode', async () => {
-			const mockChild = createMockChild();
-			vi.mocked(spawn).mockReturnValue(mockChild as unknown as ReturnType<typeof spawn>);
+		let stderrSpy: ReturnType<typeof vi.spyOn>;
+
+		beforeEach(() => {
+			stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation(() => true);
+			vi.mocked(execa).mockReset();
+			vi.mocked(treeKill).mockClear();
+		});
+
+		afterEach(() => {
+			stderrSpy.mockRestore();
+			vi.useRealTimers();
+		});
+
+		it('streams child stdout to parent stderr line-by-line as it arrives', async () => {
+			const child = createMockSubprocess();
+			vi.mocked(execa).mockReturnValue(child as unknown as ReturnType<typeof execa>);
+
+			const promise = runCommand('echo', ['a'], '/tmp');
+			await new Promise((r) => setTimeout(r, 0));
+
+			child.stdout.push('line1\n');
+			child.stdout.push('line2\n');
+			await new Promise((r) => setTimeout(r, 0));
+
+			expect(stderrSpy).toHaveBeenCalledWith('line1\n');
+			expect(stderrSpy).toHaveBeenCalledWith('line2\n');
+
+			child.stdout.push(null);
+			child.stderr.push(null);
+			child.resolveExec({ stdout: 'line1\nline2\n', stderr: '', exitCode: 0 });
+			await promise;
+		});
+
+		it('streams child stderr to parent stderr line-by-line', async () => {
+			const child = createMockSubprocess();
+			vi.mocked(execa).mockReturnValue(child as unknown as ReturnType<typeof execa>);
+
+			const promise = runCommand('cmd', [], '/tmp');
+			await new Promise((r) => setTimeout(r, 0));
+
+			child.stderr.push('err1\n');
+			await new Promise((r) => setTimeout(r, 0));
+
+			expect(stderrSpy).toHaveBeenCalledWith('err1\n');
+
+			child.stdout.push(null);
+			child.stderr.push(null);
+			child.resolveExec({ stdout: '', stderr: 'err1\n', exitCode: 0 });
+			await promise;
+		});
+
+		it('emits a heartbeat to parent stderr after heartbeatMs of child silence, citing elapsed time and command label', async () => {
+			vi.useFakeTimers();
+			const child = createMockSubprocess();
+			vi.mocked(execa).mockReturnValue(child as unknown as ReturnType<typeof execa>);
+
+			const promise = runCommand('git', ['push'], '/tmp', undefined, {
+				heartbeatMs: 1000,
+				label: 'git-push',
+			});
+			await Promise.resolve();
+
+			vi.advanceTimersByTime(1000);
+
+			const heartbeatCall = stderrSpy.mock.calls.find((c) =>
+				/\[git-push\] still running \(1s\)/.test(String(c[0])),
+			);
+			expect(heartbeatCall).toBeTruthy();
+
+			child.stdout.push(null);
+			child.stderr.push(null);
+			child.resolveExec({ stdout: '', stderr: '', exitCode: 0 });
+			vi.useRealTimers();
+			await promise;
+		});
+
+		it('resets the heartbeat timer when child emits output', async () => {
+			vi.useFakeTimers();
+			const child = createMockSubprocess();
+			vi.mocked(execa).mockReturnValue(child as unknown as ReturnType<typeof execa>);
+
+			const promise = runCommand('cmd', [], '/tmp', undefined, {
+				heartbeatMs: 1000,
+				label: 'cmd',
+			});
+			await Promise.resolve();
+
+			// 900ms of silence — no heartbeat yet
+			vi.advanceTimersByTime(900);
+			let heartbeats = stderrSpy.mock.calls.filter((c) =>
+				/still running/.test(String(c[0])),
+			).length;
+			expect(heartbeats).toBe(0);
+
+			// Child output at 900ms → resets idle + heartbeat timers.
+			// Use emit('data', ...) rather than push() because push queues the
+			// 'data' event on process.nextTick, which vi.advanceTimersByTime
+			// does not flush — the heartbeat timer would fire before onChunk runs.
+			child.stdout.emit('data', 'tick\n');
+
+			// Advance 900ms more (total silence since last child output: 900ms) — still no heartbeat
+			vi.advanceTimersByTime(900);
+			heartbeats = stderrSpy.mock.calls.filter((c) => /still running/.test(String(c[0]))).length;
+			expect(heartbeats).toBe(0);
+
+			// Advance to 1000ms since last child output — one heartbeat fires
+			vi.advanceTimersByTime(100);
+			heartbeats = stderrSpy.mock.calls.filter((c) => /still running/.test(String(c[0]))).length;
+			expect(heartbeats).toBe(1);
+
+			child.stdout.push(null);
+			child.stderr.push(null);
+			child.resolveExec({ stdout: 'tick\n', stderr: '', exitCode: 0 });
+			vi.useRealTimers();
+			await promise;
+		});
 
-			const promise = runCommand('echo', ['hello'], '/tmp');
+		it('does not emit heartbeat when child exits before heartbeatMs elapses', async () => {
+			const child = createMockSubprocess();
+			vi.mocked(execa).mockReturnValue(child as unknown as ReturnType<typeof execa>);
 
-			// Need to yield to allow event handlers to be attached
+			const promise = runCommand('cmd', [], '/tmp', undefined, {
+				heartbeatMs: 10_000,
+				label: 'cmd',
+			});
 			await new Promise((r) => setTimeout(r, 0));
 
-			mockChild.stdout.push('hello\n');
-			mockChild.stdout.push(null);
-			mockChild.stderr.push(null);
-			mockChild.emit('close', 0);
+			child.stdout.push('done\n');
+			child.stdout.push(null);
+			child.stderr.push(null);
+			child.resolveExec({ stdout: 'done\n', stderr: '', exitCode: 0 });
+			await promise;
+
+			const heartbeats = stderrSpy.mock.calls.filter((c) =>
+				/still running/.test(String(c[0])),
+			).length;
+			expect(heartbeats).toBe(0);
+		});
+
+		it('kills the child via tree-kill with SIGTERM when idleTimeoutMs elapses with no output', async () => {
+			vi.useFakeTimers();
+			const child = createMockSubprocess();
+			vi.mocked(execa).mockReturnValue(child as unknown as ReturnType<typeof execa>);
+
+			const promise = runCommand('cmd', [], '/tmp', undefined, {
+				idleTimeoutMs: 5000,
+				heartbeatMs: 0,
+				forceKillAfterMs: 5000,
+			});
+			await Promise.resolve();
+
+			vi.advanceTimersByTime(5000);
+			await Promise.resolve();
+
+			// After idle fires, helper kills with SIGTERM
+			expect(vi.mocked(treeKill)).toHaveBeenCalledWith(12345, 'SIGTERM', expect.any(Function));
 
+			// Settle the subprocess so the awaiting runCommand resolves
+			child.stdout.push(null);
+			child.stderr.push(null);
+			child.resolveExec({ stdout: '', stderr: '', exitCode: 143 });
+			vi.useRealTimers();
 			const result = await promise;
+			expect(result.reason).toBe('idle-timeout');
+			expect(result.exitCode).not.toBe(0);
+		});
+
+		it('escalates to SIGKILL after forceKillAfterMs if the child did not exit on SIGTERM', async () => {
+			vi.useFakeTimers();
+			const child = createMockSubprocess();
+			vi.mocked(execa).mockReturnValue(child as unknown as ReturnType<typeof execa>);
+
+			const promise = runCommand('cmd', [], '/tmp', undefined, {
+				idleTimeoutMs: 1000,
+				heartbeatMs: 0,
+				forceKillAfterMs: 2000,
+			});
+			await Promise.resolve();
+
+			vi.advanceTimersByTime(1000);
+			await Promise.resolve();
+			expect(vi.mocked(treeKill)).toHaveBeenCalledWith(12345, 'SIGTERM', expect.any(Function));
+			expect(vi.mocked(treeKill)).toHaveBeenCalledTimes(1);
+
+			// Child does NOT exit; advance the force-kill window
+			vi.advanceTimersByTime(2000);
+			await Promise.resolve();
+			expect(vi.mocked(treeKill)).toHaveBeenCalledWith(12345, 'SIGKILL', expect.any(Function));
+			expect(vi.mocked(treeKill)).toHaveBeenCalledTimes(2);
+
+			// Settle
+			child.stdout.push(null);
+			child.stderr.push(null);
+			child.resolveExec({ stdout: '', stderr: '', exitCode: 137 });
+			vi.useRealTimers();
+			await promise;
+		});
+
+		it('kills the child via tree-kill with SIGTERM when wallTimeoutMs elapses even with ongoing output', async () => {
+			vi.useFakeTimers();
+			const child = createMockSubprocess();
+			vi.mocked(execa).mockReturnValue(child as unknown as ReturnType<typeof execa>);
+
+			const promise = runCommand('cmd', [], '/tmp', undefined, {
+				wallTimeoutMs: 5000,
+				idleTimeoutMs: 100_000,
+				heartbeatMs: 0,
+				forceKillAfterMs: 5000,
+			});
+			await Promise.resolve();
+
+			// Tick every 500ms with output — idle timer keeps resetting but wall ticks down
+			for (let t = 0; t < 5000; t += 500) {
+				child.stdout.push(`tick ${t}\n`);
+				await Promise.resolve();
+				vi.advanceTimersByTime(500);
+				await Promise.resolve();
+			}
+
+			expect(vi.mocked(treeKill)).toHaveBeenCalledWith(12345, 'SIGTERM', expect.any(Function));
+
+			child.stdout.push(null);
+			child.stderr.push(null);
+			child.resolveExec({ stdout: '', stderr: '', exitCode: 143 });
+			vi.useRealTimers();
+			const result = await promise;
+			expect(result.reason).toBe('wall-timeout');
+		});
+
+		it('returns captured stdout and stderr in the result on success', async () => {
+			const child = createMockSubprocess();
+			vi.mocked(execa).mockReturnValue(child as unknown as ReturnType<typeof execa>);
 
-			expect(result.stdout).toBe('hello\n');
+			const promise = runCommand('cmd', [], '/tmp');
+			await new Promise((r) => setTimeout(r, 0));
+
+			child.stdout.push('ok\n');
+			child.stdout.push(null);
+			child.stderr.push(null);
+			child.resolveExec({ stdout: 'ok\n', stderr: '', exitCode: 0 });
+
+			const result = await promise;
+			expect(result.stdout).toBe('ok\n');
 			expect(result.stderr).toBe('');
 			expect(result.exitCode).toBe(0);
 		});
 
-		it('handles command error', async () => {
-			const mockChild = createMockChild();
-			vi.mocked(spawn).mockReturnValue(mockChild as unknown as ReturnType<typeof spawn>);
+		it('returns captured stdout and stderr in the result on non-zero exit', async () => {
+			const child = createMockSubprocess();
+			vi.mocked(execa).mockReturnValue(child as unknown as ReturnType<typeof execa>);
 
-			const promise = runCommand('bad-command', [], '/tmp');
+			const promise = runCommand('cmd', [], '/tmp');
+			await new Promise((r) => setTimeout(r, 0));
+
+			child.stderr.push('failed\n');
+			child.stdout.push(null);
+			child.stderr.push(null);
+			child.resolveExec({ stdout: '', stderr: 'failed\n', exitCode: 1 });
 
+			const result = await promise;
+			expect(result.stderr).toBe('failed\n');
+			expect(result.exitCode).toBe(1);
+		});
+
+		it('does not stream when options.silent is true', async () => {
+			const child = createMockSubprocess();
+			vi.mocked(execa).mockReturnValue(child as unknown as ReturnType<typeof execa>);
+
+			const promise = runCommand('cmd', [], '/tmp', undefined, { silent: true });
 			await new Promise((r) => setTimeout(r, 0));
 
-			mockChild.stdout.push(null);
-			mockChild.stderr.push(null);
-			mockChild.emit('error', new Error('spawn ENOENT'));
+			child.stdout.push('silent-stdout\n');
+			child.stderr.push('silent-stderr\n');
+			await new Promise((r) => setTimeout(r, 0));
 
+			const forwardedChild = stderrSpy.mock.calls.filter(
+				(c) => String(c[0]) === 'silent-stdout\n' || String(c[0]) === 'silent-stderr\n',
+			).length;
+			expect(forwardedChild).toBe(0);
+
+			child.stdout.push(null);
+			child.stderr.push(null);
+			child.resolveExec({
+				stdout: 'silent-stdout\n',
+				stderr: 'silent-stderr\n',
+				exitCode: 0,
+			});
 			const result = await promise;
 
-			expect(result.exitCode).toBe(1);
-			expect(result.stderr).toContain('spawn ENOENT');
+			// Capture still works despite silent mode
+			expect(result.stdout).toBe('silent-stdout\n');
+			expect(result.stderr).toBe('silent-stderr\n');
 		});
 
-		it('handles null exit code', async () => {
-			const mockChild = createMockChild();
-			vi.mocked(spawn).mockReturnValue(mockChild as unknown as ReturnType<typeof spawn>);
+		it('backward-compatible signature: runCommand(cmd, args, cwd) returns { stdout, stderr, exitCode }', async () => {
+			const child = createMockSubprocess();
+			vi.mocked(execa).mockReturnValue(child as unknown as ReturnType<typeof execa>);
 
 			const promise = runCommand('cmd', [], '/tmp');
-
 			await new Promise((r) => setTimeout(r, 0));
 
-			mockChild.stdout.push(null);
-			mockChild.stderr.push(null);
-			mockChild.emit('close', null);
+			child.stdout.push(null);
+			child.stderr.push(null);
+			child.resolveExec({ stdout: '', stderr: '', exitCode: 0 });
 
 			const result = await promise;
-
-			expect(result.exitCode).toBe(1);
+			expect(result).toMatchObject({ stdout: '', stderr: '', exitCode: 0 });
+			expect(typeof result.stdout).toBe('string');
+			expect(typeof result.stderr).toBe('string');
+			expect(typeof result.exitCode).toBe('number');
+			// reason is optional; undefined on natural exit
+			expect(result.reason).toBeUndefined();
 		});
 	});
 });