diff --git a/docs/tier2.md b/docs/tier2.md new file mode 100644 index 00000000..4b2ad1dc --- /dev/null +++ b/docs/tier2.md @@ -0,0 +1,143 @@ +# Tier 2: Local Services + +squads-cli operates in one of two tiers: + +- **Tier 1** (default) — file-based only. All state lives in JSONL, markdown, and git. Zero external dependencies. Works everywhere. +- **Tier 2** — local Docker services. Adds Postgres, Redis, a REST API, and a Bridge service for webhook-driven workflows and richer observability. + +Tier 2 is optional and fully local. Most users never need it. + +## What Tier 2 Adds + +| Service | Port | Purpose | +|-----------|-------|------------------------------------------| +| API | 8090 | REST API for agent executions and jobs | +| Bridge | 8088 | GitHub webhook receiver | +| Postgres | 5432 | Persistent job queue (Procrastinate) | +| Redis | 6379 | Pub/sub and caching | + +## Prerequisites + +- [Docker Desktop](https://www.docker.com/products/docker-desktop) installed and running +- The `agents-squads/engineering` repo cloned as a sibling to this repo: + +``` +~/agents-squads/ + squads-cli/ ← this repo + engineering/ + docker/ + docker-compose.yml ← Tier 2 services definition +``` + +squads-cli looks for the compose file at: + +1. `~/agents-squads/engineering/docker/docker-compose.yml` +2. `~/agents-squads/engineering/docker/docker-compose.yaml` +3. `../engineering/docker/docker-compose.yml` (relative to cwd) + +## Usage + +### Start services + +```bash +squads services up +``` + +Expected output: + +``` + Starting Tier 2 services... + + docker compose up -d + [Docker output] + + Services started. Waiting for health checks... + Tier 2 active. All services healthy. + + API: http://localhost:8090 + Bridge: http://localhost:8088 + Postgres: localhost:5432 + Redis: localhost:6379 +``` + +Optional profiles: + +```bash +squads services up --webhooks # also start ngrok tunnel for GitHub webhooks +squads services up --telemetry # also start OpenTelemetry collector +``` + +### Check status + +```bash +squads services status +``` + +Expected output (when running): + +``` + Services (Tier 2) + + up squads-postgres 0.0.0.0:5432->5432/tcp + up squads-redis 0.0.0.0:6379->6379/tcp + up squads-api 0.0.0.0:8090->8090/tcp + up squads-bridge 0.0.0.0:8088->8088/tcp + + Database + Procrastinate jobs: 12 + Agent executions: 47 +``` + +Expected output (when not running): + +``` + Services (Tier 1) + + No Docker containers running. +``` + +### Stop services + +```bash +squads services down +``` + +Expected output: + +``` + Stopping Tier 2 services... + + [Docker output] + + Services stopped. Falling back to Tier 1 (file-based). +``` + +If no compose file is found: + +``` + No docker-compose.yml found. Nothing to stop. +``` + +## Fallback Behavior + +squads-cli always degrades gracefully to Tier 1 when Tier 2 services are unavailable: + +- Commands that read from Postgres fall back to JSONL files. +- Commands that post to the API are silently skipped or use local state. +- `squads services status` reports `Tier 1` and shows no containers. + +You can always run `squads services status` to confirm which tier is active. + +## Tier Detection + +At startup, squads-cli probes `http://localhost:8090/health` and `http://localhost:8088/health` with a 1.5 s timeout. If the API responds with HTTP 2xx, Tier 2 is active. The result is cached for the lifetime of the process. + +To check programmatically: + +```typescript +import { detectTier } from 'squads-cli/lib/tier-detect'; + +const info = await detectTier(); +console.log(info.tier); // 1 or 2 +console.log(info.services.api); // true | false +``` diff --git a/package.json b/package.json index 71bfdc66..2ffa5101 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "squads-cli", - "version": "0.2.2", + "version": "0.3.0", "description": "Your AI workforce. Every user gets an AI manager that runs their team — finance, marketing, engineering, operations — for the cost of API calls.", "type": "module", "bin": { diff --git a/src/cli.ts b/src/cli.ts index 713327e5..036c1964 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -63,6 +63,9 @@ import { registerReleaseCommands } from './commands/release-check.js'; import { registerObservabilityCommands } from './commands/observability.js'; import { registerTierCommand } from './commands/tier.js'; import { registerServicesCommands } from './commands/services.js'; +import { registerGoalsCommand } from './commands/goals.js'; +import { registerCredentialsCommand } from './commands/credentials.js'; +import { registerReviewCommand } from './commands/review.js'; // All other command handlers are lazy-loaded via dynamic import() inside // action handlers. Only the invoked command's dependencies are loaded, @@ -309,6 +312,9 @@ program .option('--phased', 'Autopilot: use dependency-based phase ordering (from SQUAD.md depends_on)') .option('--no-eval', 'Skip post-run COO evaluation') .option('--org', 'Run all squads as a coordinated org cycle (scan → plan → execute → report)') + .option('--force', 'Force re-run squads that already completed today') + .option('--resume', 'Resume org cycle from where quota stopped it') + .option('--focus ', 'Cycle focus: create, resolve, review, ship, research, cost (default: create)') .addHelpText('after', ` Examples: $ squads run engineering Run squad conversation (lead → scan → work → review) @@ -408,6 +414,28 @@ exec.action(async (options) => { return execListCommand(options); }); +// Log command - run history from observability JSONL +program + .command('log') + .description('Show run history with timestamps, duration, and status') + .option('-s, --squad ', 'Filter by squad') + .option('-a, --agent ', 'Filter by agent') + .option('-n, --limit ', 'Number of runs to show (default: 20)', '20') + .option('--since ', 'Show runs since date (e.g. 7d, 2026-04-01)') + .option('-j, --json', 'Output as JSON') + .addHelpText('after', ` +Examples: + $ squads log Show last 20 runs + $ squads log --squad product Filter by squad + $ squads log --limit 50 Show last 50 runs + $ squads log --since 7d Runs in last 7 days + $ squads log --json Machine-readable output +`) + .action(async (options) => { + const { logCommand } = await import('./commands/log.js'); + return logCommand({ ...options, limit: parseInt(options.limit, 10) }); + }); + // ─── Understand (situational awareness) ────────────────────────────────────── // Dashboard command @@ -1058,6 +1086,9 @@ registerReleaseCommands(program); registerObservabilityCommands(program); registerTierCommand(program); registerServicesCommands(program); +registerGoalsCommand(program); +registerCredentialsCommand(program); +registerReviewCommand(program); // Providers command - show LLM CLI availability for multi-LLM support program diff --git a/src/commands/catalog.ts b/src/commands/catalog.ts index bc28d49e..d1c955a2 100644 --- a/src/commands/catalog.ts +++ b/src/commands/catalog.ts @@ -25,7 +25,8 @@ function noIdp(): boolean { export function registerCatalogCommands(program: Command): void { const catalog = program .command('catalog') - .description('Service catalog — browse, inspect, and validate services'); + .description('Service catalog — browse, inspect, and validate services') + .action(() => { catalog.outputHelp(); }); // ── catalog list ── catalog diff --git a/src/commands/context.ts b/src/commands/context.ts index f59d4db3..7db64801 100644 --- a/src/commands/context.ts +++ b/src/commands/context.ts @@ -386,20 +386,9 @@ export async function contextPromptCommand( const agentPath = `.agents/squads/${squadName}/${options.agent}.md`; - // Build the prompt for Claude - const prompt = `Execute the ${options.agent} agent from squad ${squadName}. - -Read the agent definition at ${agentPath} and follow its instructions exactly. - -CRITICAL INSTRUCTIONS: -- Work autonomously - do NOT ask clarifying questions -- Use Task tool to spawn sub-agents when needed -- Output findings to GitHub issues (gh issue create) -- Output code changes as PRs (gh pr create) -- Update memory files in .agents/memory/${squadName}/${options.agent}/ -- Type /exit when done - -Begin now.`; + // Prompt: identity + agent path only. All instructions in SYSTEM.md and agent.md. + const prompt = `You are ${options.agent} from squad ${squadName}. +Read your agent definition at ${agentPath} and your context layers. Execute your goals.`; if (options.json) { console.log(JSON.stringify({ diff --git a/src/commands/credentials.ts b/src/commands/credentials.ts new file mode 100644 index 00000000..757a299b --- /dev/null +++ b/src/commands/credentials.ts @@ -0,0 +1,373 @@ +/** + * squads credentials — manage per-squad GCP service accounts and credentials. + * + * Creates, rotates, lists, and revokes service accounts so agents + * can access the APIs they need without founder intervention. + */ + +import { Command } from 'commander'; +import { execSync } from 'child_process'; +import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync, unlinkSync } from 'fs'; +import { join, basename } from 'path'; +import { findSquadsDir } from '../lib/squad-parser.js'; +import { colors, bold, RESET, writeLine, icons } from '../lib/terminal.js'; +import { homedir } from 'os'; + +// ── Permission mapping per squad ──────────────────────────────────────── +// Each squad gets ONLY the GCP roles it needs. Principle of least privilege. + +interface SquadPermissions { + roles: string[]; + apis: string[]; // APIs to enable on the project + description: string; +} + +const SQUAD_PERMISSIONS: Record = { + analytics: { + roles: ['roles/bigquery.dataViewer', 'roles/bigquery.jobUser'], + apis: ['bigquery.googleapis.com'], + description: 'BQ telemetry read access', + }, + data: { + roles: ['roles/bigquery.dataViewer', 'roles/bigquery.jobUser', 'roles/cloudsql.client'], + apis: ['bigquery.googleapis.com', 'sqladmin.googleapis.com'], + description: 'BQ read + Cloud SQL client', + }, + finance: { + roles: ['roles/drive.file', 'roles/sheets.editor'], + apis: ['sheets.googleapis.com', 'drive.googleapis.com'], + description: 'Google Sheets + Drive for financial models', + }, + marketing: { + roles: ['roles/bigquery.dataViewer', 'roles/bigquery.jobUser'], + apis: ['bigquery.googleapis.com', 'searchconsole.googleapis.com'], + description: 'BQ read + Search Console', + }, + engineering: { + roles: ['roles/cloudsql.admin', 'roles/run.developer', 'roles/secretmanager.secretAccessor'], + apis: ['sqladmin.googleapis.com', 'run.googleapis.com', 'secretmanager.googleapis.com'], + description: 'Cloud SQL admin + Cloud Run deploy + secrets', + }, + customer: { + roles: ['roles/bigquery.dataViewer', 'roles/bigquery.jobUser'], + apis: ['bigquery.googleapis.com'], + description: 'BQ telemetry for user analysis', + }, + intelligence: { + roles: ['roles/bigquery.dataViewer', 'roles/bigquery.jobUser'], + apis: ['bigquery.googleapis.com'], + description: 'BQ read for intelligence queries', + }, + product: { + roles: ['roles/bigquery.dataViewer', 'roles/bigquery.jobUser'], + apis: ['bigquery.googleapis.com'], + description: 'BQ telemetry for product analytics', + }, + growth: { + roles: ['roles/bigquery.dataViewer', 'roles/bigquery.jobUser'], + apis: ['bigquery.googleapis.com'], + description: 'BQ telemetry for growth metrics', + }, + operations: { + roles: ['roles/bigquery.dataViewer', 'roles/bigquery.jobUser', 'roles/monitoring.viewer'], + apis: ['bigquery.googleapis.com', 'monitoring.googleapis.com'], + description: 'BQ read + monitoring for ops health', + }, +}; + +const SECRETS_DIR = join(homedir(), '.squads', 'secrets'); +const SA_SUFFIX = '-agent'; + +function getProject(): string { + try { + return execSync('gcloud config get-value project 2>/dev/null', { encoding: 'utf-8' }).trim(); + } catch { + throw new Error('No GCP project configured. Run: gcloud config set project '); + } +} + +function saEmail(squad: string, project: string): string { + return `${squad}${SA_SUFFIX}@${project}.iam.gserviceaccount.com`; +} + +function keyPath(squad: string): string { + return join(SECRETS_DIR, `${squad}-sa-key.json`); +} + +function ensureSecretsDir(): void { + if (!existsSync(SECRETS_DIR)) { + mkdirSync(SECRETS_DIR, { recursive: true }); + } +} + +function gcloudExec(cmd: string, silent = false): string { + try { + const result = execSync(cmd, { encoding: 'utf-8', stdio: silent ? 'pipe' : ['pipe', 'inherit', 'inherit'] }); + return (result || '').trim(); + } catch (e) { + const msg = e instanceof Error ? e.message : String(e); + if (msg.includes('Reauthentication')) { + throw new Error('gcloud auth expired. Run: gcloud auth login'); + } + throw e; + } +} + +// ── Commands ──────────────────────────────────────────────────────────── + +async function createCredential(squad: string, opts: { force?: boolean }): Promise { + const project = getProject(); + const email = saEmail(squad, project); + const key = keyPath(squad); + const perms = SQUAD_PERMISSIONS[squad]; + + if (!perms) { + writeLine(` ${icons.error} ${colors.red}No permission mapping for squad "${squad}"${RESET}`); + writeLine(` ${colors.dim}Known squads: ${Object.keys(SQUAD_PERMISSIONS).join(', ')}${RESET}`); + return; + } + + if (existsSync(key) && !opts.force) { + writeLine(` ${icons.warning} ${colors.yellow}Credential already exists: ${key}${RESET}`); + writeLine(` ${colors.dim}Use --force to recreate${RESET}`); + return; + } + + ensureSecretsDir(); + + writeLine(` ${bold}Creating service account for ${squad}${RESET}`); + writeLine(` ${colors.dim}${perms.description}${RESET}`); + writeLine(); + + // 1. Enable required APIs + for (const api of perms.apis) { + writeLine(` ${colors.dim}Enabling ${api}...${RESET}`); + try { + gcloudExec(`gcloud services enable ${api} --project ${project} 2>/dev/null`, true); + } catch { /* already enabled or no permission — continue */ } + } + + // 2. Create service account (or skip if exists) + try { + gcloudExec(`gcloud iam service-accounts describe ${email} --project ${project} 2>/dev/null`, true); + writeLine(` ${colors.dim}Service account exists: ${email}${RESET}`); + } catch { + writeLine(` Creating ${email}...`); + gcloudExec(`gcloud iam service-accounts create ${squad}${SA_SUFFIX} --display-name "Squads ${squad} agent" --project ${project}`); + } + + // 3. Grant IAM roles + for (const role of perms.roles) { + writeLine(` ${colors.dim}Granting ${role}...${RESET}`); + try { + gcloudExec( + `gcloud projects add-iam-policy-binding ${project} --member="serviceAccount:${email}" --role="${role}" --condition=None --quiet 2>/dev/null`, + true, + ); + } catch { /* role may already be bound */ } + } + + // 4. Create and download key + if (existsSync(key) && opts.force) { + unlinkSync(key); + } + writeLine(` ${colors.dim}Creating key...${RESET}`); + gcloudExec(`gcloud iam service-accounts keys create ${key} --iam-account=${email} --project ${project}`); + + writeLine(); + writeLine(` ${icons.success} ${colors.green}${squad}${RESET} credential ready`); + writeLine(` ${colors.dim}Key: ${key}${RESET}`); + writeLine(` ${colors.dim}Roles: ${perms.roles.join(', ')}${RESET}`); + writeLine(); +} + +async function rotateCredential(squad: string): Promise { + const project = getProject(); + const email = saEmail(squad, project); + const key = keyPath(squad); + + if (!existsSync(key)) { + writeLine(` ${icons.error} ${colors.red}No credential found for ${squad}. Run: squads credentials create ${squad}${RESET}`); + return; + } + + // Read old key to get key ID for deletion + const oldKeyData = JSON.parse(readFileSync(key, 'utf-8')); + const oldKeyId = oldKeyData.private_key_id; + + writeLine(` ${bold}Rotating ${squad} credential${RESET}`); + + // Create new key first + const tmpKey = key + '.new'; + gcloudExec(`gcloud iam service-accounts keys create ${tmpKey} --iam-account=${email} --project ${project}`); + + // Replace old key file + unlinkSync(key); + const { renameSync } = await import('fs'); + renameSync(tmpKey, key); + + // Delete old key from GCP + if (oldKeyId) { + try { + gcloudExec( + `gcloud iam service-accounts keys delete ${oldKeyId} --iam-account=${email} --project ${project} --quiet`, + true, + ); + } catch { /* old key may already be expired */ } + } + + writeLine(` ${icons.success} ${colors.green}${squad}${RESET} credential rotated`); + writeLine(` ${colors.dim}New key: ${key}${RESET}`); + writeLine(); +} + +async function listCredentials(): Promise { + ensureSecretsDir(); + const squadsDir = findSquadsDir(); + const allSquads = Object.keys(SQUAD_PERMISSIONS).sort(); + + writeLine(); + writeLine(` ${bold}Squad Credentials${RESET}`); + writeLine(); + writeLine(` ${'Squad'.padEnd(16)} ${'Status'.padEnd(10)} ${'Roles'.padEnd(40)} Key`); + writeLine(` ${'-'.repeat(90)}`); + + for (const squad of allSquads) { + const key = keyPath(squad); + const perms = SQUAD_PERMISSIONS[squad]; + const hasKey = existsSync(key); + const status = hasKey ? `${colors.green}active${RESET}` : `${colors.dim}none${RESET} `; + const roles = perms.roles.map(r => r.split('/')[1]).join(', '); + + writeLine(` ${squad.padEnd(16)} ${status} ${colors.dim}${roles.slice(0, 38).padEnd(40)}${RESET} ${hasKey ? '~/.squads/secrets/' + basename(key) : ''}`); + } + + // Show squads without permission mapping + if (squadsDir) { + const dirs = readdirSync(squadsDir).filter(d => + existsSync(join(squadsDir, d, 'SQUAD.md')) && !SQUAD_PERMISSIONS[d] + ); + if (dirs.length > 0) { + writeLine(); + writeLine(` ${colors.dim}Squads without permission mapping: ${dirs.join(', ')}${RESET}`); + writeLine(` ${colors.dim}Add to SQUAD_PERMISSIONS in credentials.ts if they need GCP access.${RESET}`); + } + } + + writeLine(); +} + +async function revokeCredential(squad: string): Promise { + const project = getProject(); + const email = saEmail(squad, project); + const key = keyPath(squad); + + writeLine(` ${bold}Revoking ${squad} credential${RESET}`); + + // Delete local key + if (existsSync(key)) { + unlinkSync(key); + writeLine(` ${colors.dim}Deleted local key${RESET}`); + } + + // Delete all keys from GCP + try { + const keysJson = gcloudExec( + `gcloud iam service-accounts keys list --iam-account=${email} --project ${project} --format=json 2>/dev/null`, + true, + ); + const keys = JSON.parse(keysJson); + for (const k of keys) { + if (k.keyType === 'USER_MANAGED') { + gcloudExec( + `gcloud iam service-accounts keys delete ${k.name.split('/').pop()} --iam-account=${email} --project ${project} --quiet`, + true, + ); + } + } + writeLine(` ${colors.dim}Deleted remote keys${RESET}`); + } catch { /* SA may not exist */ } + + // Delete service account + try { + gcloudExec(`gcloud iam service-accounts delete ${email} --project ${project} --quiet`); + writeLine(` ${colors.dim}Deleted service account${RESET}`); + } catch { /* already deleted */ } + + writeLine(` ${icons.success} ${colors.green}${squad}${RESET} credential revoked`); + writeLine(); +} + +async function createAll(opts: { force?: boolean }): Promise { + const squads = Object.keys(SQUAD_PERMISSIONS).sort(); + writeLine(` ${bold}Creating credentials for ${squads.length} squads${RESET}`); + writeLine(); + + for (const squad of squads) { + await createCredential(squad, opts); + } + + writeLine(` ${bold}Done.${RESET} Run ${colors.cyan}squads credentials list${RESET} to verify.`); + writeLine(); +} + +// ── Register ──────────────────────────────────────────────────────────── + +export function registerCredentialsCommand(program: Command): void { + const creds = program + .command('credentials') + .description('Manage per-squad GCP service accounts and credentials'); + + creds + .command('create ') + .description('Create a service account and key for a squad') + .option('--force', 'Recreate even if credential exists') + .action(async (squad: string, opts) => { + if (squad === '--all') { + await createAll(opts); + } else { + await createCredential(squad, opts); + } + }); + + creds + .command('create-all') + .description('Create credentials for all squads with permission mappings') + .option('--force', 'Recreate even if credentials exist') + .action(async (opts) => { + await createAll(opts); + }); + + creds + .command('rotate ') + .description('Rotate a squad credential (create new key, delete old)') + .action(async (squad: string) => { + await rotateCredential(squad); + }); + + creds + .command('list') + .description('List all squad credentials and their status') + .action(async () => { + await listCredentials(); + }); + + creds + .command('revoke ') + .description('Delete a squad service account and all keys') + .action(async (squad: string) => { + await revokeCredential(squad); + }); +} + +// ── Helper for execution engine ───────────────────────────────────────── + +/** + * Resolve the credential path for a squad. Returns the path to the + * service account key file if it exists, or undefined. + * Used by the execution engine to inject GOOGLE_APPLICATION_CREDENTIALS. + */ +export function resolveSquadCredential(squad: string): string | undefined { + const key = keyPath(squad); + return existsSync(key) ? key : undefined; +} diff --git a/src/commands/goals.ts b/src/commands/goals.ts new file mode 100644 index 00000000..1e990ce2 --- /dev/null +++ b/src/commands/goals.ts @@ -0,0 +1,141 @@ +/** + * squads goals — dashboard view of all squad goals. + */ + +import { Command } from 'commander'; +import { existsSync, readFileSync, readdirSync } from 'fs'; +import { join } from 'path'; +import { findSquadsDir } from '../lib/squad-parser.js'; +import { findMemoryDir } from '../lib/memory.js'; +import { colors, bold, RESET, writeLine } from '../lib/terminal.js'; + +interface GoalInfo { + name: string; + status: string; + section: 'active' | 'achieved' | 'abandoned' | 'proposed'; +} + +function parseGoals(filePath: string): GoalInfo[] { + if (!existsSync(filePath)) return []; + const content = readFileSync(filePath, 'utf-8'); + const goals: GoalInfo[] = []; + + let currentSection: GoalInfo['section'] = 'active'; + + for (const line of content.split('\n')) { + if (line.startsWith('## Active')) currentSection = 'active'; + else if (line.startsWith('## Achieved')) currentSection = 'achieved'; + else if (line.startsWith('## Abandoned')) currentSection = 'abandoned'; + else if (line.startsWith('## Proposed')) currentSection = 'proposed'; + + const match = line.match(/\*\*([^*]+)\*\*.*status:\s*(\S+)/); + if (match) { + goals.push({ name: match[1].trim(), status: match[2].trim(), section: currentSection }); + } + + // Achieved goals don't have status field — detect by section + if (currentSection === 'achieved' && line.match(/\*\*([^*]+)\*\*.*achieved:/)) { + const nameMatch = line.match(/\*\*([^*]+)\*\*/); + if (nameMatch) { + goals.push({ name: nameMatch[1].trim(), status: 'achieved', section: 'achieved' }); + } + } + } + + return goals; +} + +export function registerGoalsCommand(program: Command): void { + program + .command('goals') + .description('Dashboard of all squad goals — status at a glance') + .option('-s, --squad ', 'Show goals for a specific squad') + .option('--json', 'Output as JSON') + .action((opts) => { + const squadsDir = findSquadsDir(); + const memoryDir = findMemoryDir(); + if (!squadsDir || !memoryDir) { + writeLine(`\n ${colors.dim}No squads found. Run squads init.${RESET}\n`); + return; + } + + const squadDirs = readdirSync(squadsDir).filter(d => { + return existsSync(join(squadsDir, d, 'SQUAD.md')); + }).sort(); + + const allData: Record = {}; + + for (const squad of squadDirs) { + if (opts.squad && squad !== opts.squad) continue; + const goalsPath = join(memoryDir, squad, 'goals.md'); + const goals = parseGoals(goalsPath); + const active = goals.filter(g => g.section === 'active').length; + const achieved = goals.filter(g => g.section === 'achieved').length; + const blocked = goals.filter(g => g.status === 'blocked' || g.status === 'AT-RISK').length; + allData[squad] = { goals, active, achieved, blocked }; + } + + if (opts.json) { + console.log(JSON.stringify(allData, null, 2)); + return; + } + + // Summary view + const totalActive = Object.values(allData).reduce((s, d) => s + d.active, 0); + const totalAchieved = Object.values(allData).reduce((s, d) => s + d.achieved, 0); + const totalBlocked = Object.values(allData).reduce((s, d) => s + d.blocked, 0); + + writeLine(); + writeLine(` ${bold}Goals Dashboard${RESET} ${totalActive} active | ${colors.green}${totalAchieved} achieved${RESET} | ${totalBlocked > 0 ? colors.red : colors.dim}${totalBlocked} blocked${RESET}`); + writeLine(); + writeLine(` ${'Squad'} ${''.padEnd(10)} ${'Active'.padStart(6)} ${'Done'.padStart(6)} ${'Block'.padStart(6)} Top Goal`); + writeLine(` ${'-'.repeat(78)}`); + + for (const [squad, data] of Object.entries(allData)) { + const frozen = data.active === 0 && data.achieved === 0; + if (frozen) continue; // Skip frozen squads in summary + + const activeGoals = data.goals.filter(g => g.section === 'active'); + const topGoal = activeGoals[0]; + const topStr = topGoal + ? `${topGoal.name.slice(0, 30).padEnd(30)} ${statusIcon(topGoal.status)}` + : `${colors.dim}(no active goals)${RESET}`; + + const blockedStr = data.blocked > 0 ? `${colors.red}${data.blocked}${RESET}` : `${colors.dim}0${RESET}`; + const achievedStr = data.achieved > 0 ? `${colors.green}${data.achieved}${RESET}` : `${colors.dim}0${RESET}`; + + writeLine(` ${squad.padEnd(15)} ${String(data.active).padStart(6)} ${String(data.achieved).padStart(6)} ${String(data.blocked).padStart(6)} ${topStr}`); + } + + // Detail view for specific squad + if (opts.squad && allData[opts.squad]) { + const data = allData[opts.squad]; + writeLine(); + writeLine(` ${bold}${opts.squad} — Detail${RESET}`); + + for (const section of ['active', 'achieved', 'abandoned', 'proposed'] as const) { + const sectionGoals = data.goals.filter(g => g.section === section); + if (sectionGoals.length === 0) continue; + writeLine(`\n ${colors.cyan}${section.toUpperCase()}${RESET}`); + for (const g of sectionGoals) { + writeLine(` ${statusIcon(g.status)} ${g.name}`); + } + } + } + + writeLine(); + }); +} + +function statusIcon(status: string): string { + switch (status) { + case 'achieved': + case 'complete': return `${colors.green}done${RESET}`; + case 'in-progress': + case 'improving': return `${colors.cyan}prog${RESET}`; + case 'not-started': return `${colors.dim}todo${RESET}`; + case 'blocked': + case 'AT-RISK': return `${colors.red}block${RESET}`; + default: return `${colors.dim}${status.slice(0, 5)}${RESET}`; + } +} diff --git a/src/commands/init.ts b/src/commands/init.ts index a046a682..a9808f0f 100644 --- a/src/commands/init.ts +++ b/src/commands/init.ts @@ -16,9 +16,11 @@ import ora from 'ora'; import fs from 'fs/promises'; import path from 'path'; import { execSync } from 'child_process'; +import { createHash } from 'crypto'; import { createInterface } from 'readline'; import { checkGitStatus, getRepoName } from '../lib/git.js'; import { track, Events } from '../lib/telemetry.js'; +import { saveEmail } from '../lib/env-config.js'; import { existsSync, readFileSync } from 'fs'; import { loadTemplate, @@ -570,6 +572,7 @@ export async function initCommand(options: InitOptions): Promise { // Core directories (always created) const dirs = [ + '.agents/squads/demo', '.agents/squads/company', '.agents/squads/research', '.agents/squads/intelligence', @@ -578,6 +581,7 @@ export async function initCommand(options: InitOptions): Promise { '.agents/memory/company/event-dispatcher', '.agents/memory/company/goal-tracker', '.agents/memory/company/company-eval', + '.agents/memory/demo/hello-world', '.agents/memory/company/company-critic', '.agents/memory/research/lead', '.agents/memory/research/analyst', @@ -606,6 +610,12 @@ export async function initCommand(options: InitOptions): Promise { spinner.text = 'Creating squad definitions...'; + // Demo squad (always created — starter agent so `squads run demo hello-world` works) + const demoFiles: [string, string][] = [ + ['.agents/squads/demo/SQUAD.md', 'squads/demo/SQUAD.md'], + ['.agents/squads/demo/hello-world.md', 'squads/demo/hello-world.md'], + ]; + // Core squad files (always created) const companyFiles: [string, string][] = [ ['.agents/squads/company/SQUAD.md', 'squads/company/SQUAD.md'], @@ -639,7 +649,7 @@ export async function initCommand(options: InitOptions): Promise { } // Write all squad files - for (const [dest, template] of [...companyFiles, ...researchFiles, ...intelligenceFiles, ...productFiles, ...useCaseFiles]) { + for (const [dest, template] of [...demoFiles, ...companyFiles, ...researchFiles, ...intelligenceFiles, ...productFiles, ...useCaseFiles]) { const content = loadSeedTemplate(template, variables); await writeFile(path.join(cwd, dest), content); } @@ -828,8 +838,9 @@ export async function initCommand(options: InitOptions): Promise { writeLine(chalk.dim(' Created:')); // Core squads (always present) - writeLine(chalk.dim(' • research/ 3 agents — Researches your market, competitors, and opportunities')); - writeLine(chalk.dim(' • company/ 5 agents — Manages goals, events, and strategy')); + writeLine(chalk.dim(' • demo/ 1 agent — Starter agent to verify your setup')); + writeLine(chalk.dim(' • research/ 3 agents — Researches your market, competitors, and opportunities')); + writeLine(chalk.dim(' • company/ 5 agents — Manages goals, events, and strategy')); writeLine(chalk.dim(' • intelligence/ 3 agents — Monitors trends and competitive signals')); writeLine(chalk.dim(' • product/ 3 agents — Roadmap, specs, user feedback synthesis')); @@ -848,53 +859,56 @@ export async function initCommand(options: InitOptions): Promise { writeLine(chalk.dim(' • .claude/settings.json Session hooks')); } writeLine(); - writeLine(chalk.bold(' Getting started:')); + writeLine(chalk.bold(' What\'s next:')); writeLine(); - writeLine(` ${chalk.cyan('1.')} ${chalk.yellow('$EDITOR .agents/BUSINESS_BRIEF.md')}`); - writeLine(chalk.dim(' Set your business context — agents use this for every run')); + writeLine(` ${chalk.green('→')} Verify your setup works:`); + writeLine(` ${chalk.yellow('squads run demo hello-world')}`); writeLine(); - // Dynamic "first run" suggestion based on use case - const firstRunCommand = getFirstRunCommand(selectedUseCase); - const squadCommand = firstRunCommand.command.replace(/\/[^/]+$/, ''); - writeLine(` ${chalk.cyan('2.')} ${chalk.yellow(firstRunCommand.command)}`); - writeLine(chalk.dim(` ${firstRunCommand.description}`)); - writeLine(chalk.dim(` Full squad (4+ agents, longer): ${squadCommand}`)); + // Dynamic first-run suggestion based on use case + const firstRun = getFirstRunCommand(selectedUseCase); + const firstRunCmd = `squads run ${firstRun.squad} -a ${firstRun.agent}`; + writeLine(` ${chalk.green('→')} Run your first real agent:`); + writeLine(` ${chalk.yellow(firstRunCmd)}`); writeLine(); - writeLine(` ${chalk.cyan('3.')} ${chalk.yellow(`squads run`)}`); - writeLine(chalk.dim(' Autopilot — runs all squads on schedule, learns between cycles')); - writeLine(chalk.dim(` Options: squads run --once (single cycle), squads run -i 15 --budget 50`)); - writeLine(); - writeLine(chalk.dim(' Docs: https://agents-squads.com/docs/getting-started')); + writeLine(` ${chalk.dim('See all squads:')} ${chalk.yellow('squads status')}`); + writeLine(` ${chalk.dim('Docs:')} ${chalk.dim('https://agents-squads.com/docs/getting-started')}`); writeLine(); + + // 7. Opt-in email capture for founder outreach + // Gracefully wrapped — never blocks init if prompt fails + try { + if (isInteractive()) { + const emailInput = await prompt('Email (optional, for updates):', ''); + if (emailInput && emailInput.includes('@')) { + saveEmail(emailInput); + const emailHash = createHash('sha256').update(emailInput.toLowerCase().trim()).digest('hex'); + await track(Events.CLI_EMAIL_CAPTURED, { emailHash }); + writeLine(chalk.dim(' Email saved. We will reach out with updates.')); + writeLine(); + } + } + } catch { + // Non-fatal — email capture failure must never break init + } } /** - * Get the suggested first command based on installed packs + * Get the suggested first agent to run based on installed packs. + * Returns squad and agent names separately so the caller can format + * the command as: squads run {squad} -a {agent} */ -function getFirstRunCommand(useCase: UseCase): { command: string; description: string } { +function getFirstRunCommand(useCase: UseCase): { squad: string; agent: string } { switch (useCase) { case 'engineering': - return { - command: 'squads run engineering/issue-solver', - description: 'Run a single agent — finds and solves GitHub issues (~2 min)', - }; + return { squad: 'engineering', agent: 'issue-solver' }; case 'marketing': - return { - command: 'squads run marketing/content-drafter', - description: 'Run a single agent — drafts content for your business (~2 min)', - }; + return { squad: 'marketing', agent: 'content-drafter' }; case 'operations': - return { - command: 'squads run operations/ops-lead', - description: 'Run a single agent — coordinates daily operations (~2 min)', - }; + return { squad: 'operations', agent: 'ops-lead' }; case 'full-company': case 'custom': default: - return { - command: 'squads run research/lead', - description: 'Run a single agent — researches the topic you set (~2 min)', - }; + return { squad: 'research', agent: 'lead' }; } } diff --git a/src/commands/log.ts b/src/commands/log.ts new file mode 100644 index 00000000..03ed2b4f --- /dev/null +++ b/src/commands/log.ts @@ -0,0 +1,150 @@ +/** + * squads log — run history with timestamps, duration, status, and cost + * + * Reads from .agents/observability/executions.jsonl (local, no server required). + * Gives returning users immediate visibility into what ran and whether it worked. + */ + +import { track, Events } from '../lib/telemetry.js'; +import { queryExecutions } from '../lib/observability.js'; +import { formatDuration, formatRelativeTime } from '../lib/executions.js'; +import { + colors, + bold, + RESET, + gradient, + box, + padEnd, + writeLine, + icons, +} from '../lib/terminal.js'; + +interface LogOptions { + squad?: string; + agent?: string; + limit?: number; + since?: string; + json?: boolean; +} + +function formatTimestamp(iso: string): string { + const d = new Date(iso); + if (isNaN(d.getTime())) return iso; + const pad = (n: number) => String(n).padStart(2, '0'); + return `${d.getFullYear()}-${pad(d.getMonth() + 1)}-${pad(d.getDate())} ${pad(d.getHours())}:${pad(d.getMinutes())}`; +} + +function formatCost(usd: number): string { + if (!usd || usd === 0) return '—'; + if (usd < 0.01) return `$${(usd * 100).toFixed(2)}¢`; + return `$${usd.toFixed(3)}`; +} + +export async function logCommand(options: LogOptions = {}): Promise { + await track(Events.CLI_LOG, { squad: options.squad, limit: options.limit }); + + const records = queryExecutions({ + squad: options.squad, + agent: options.agent, + since: options.since, + limit: options.limit || 20, + }); + + if (options.json) { + console.log(JSON.stringify(records, null, 2)); + return; + } + + writeLine(); + writeLine(` ${gradient('squads')} ${colors.dim}log${RESET}${options.squad ? ` ${colors.cyan}${options.squad}${RESET}` : ''}`); + writeLine(); + + if (records.length === 0) { + writeLine(` ${colors.dim}No runs found${RESET}`); + writeLine(); + writeLine(` ${colors.dim}Runs are logged after executing agents:${RESET}`); + writeLine(` ${colors.dim}$${RESET} squads run ${colors.cyan}${RESET}`); + writeLine(); + return; + } + + // Column widths + const w = { ts: 17, agent: 28, duration: 10, status: 12, cost: 8 }; + const tableWidth = w.ts + w.agent + w.duration + w.status + w.cost + 6; + + writeLine(` ${colors.purple}${box.topLeft}${colors.dim}${box.horizontal.repeat(tableWidth)}${colors.purple}${box.topRight}${RESET}`); + + const header = ` ${colors.purple}${box.vertical}${RESET} ` + + `${bold}${padEnd('TIMESTAMP', w.ts)}${RESET}` + + `${bold}${padEnd('SQUAD/AGENT', w.agent)}${RESET}` + + `${bold}${padEnd('DURATION', w.duration)}${RESET}` + + `${bold}${padEnd('STATUS', w.status)}${RESET}` + + `${bold}COST${RESET}` + + ` ${colors.purple}${box.vertical}${RESET}`; + writeLine(header); + + writeLine(` ${colors.purple}${box.teeRight}${colors.dim}${box.horizontal.repeat(tableWidth)}${colors.purple}${box.teeLeft}${RESET}`); + + for (const r of records) { + const agentLabel = `${r.squad}/${r.agent}`; + const truncatedAgent = agentLabel.length > w.agent - 1 + ? agentLabel.slice(0, w.agent - 4) + '...' + : agentLabel; + + let statusIcon: string; + let statusColor: string; + + if (r.status === 'completed') { + statusIcon = icons.success; + statusColor = colors.green; + } else if (r.status === 'failed') { + statusIcon = icons.error; + statusColor = colors.red; + } else { + statusIcon = icons.warning; + statusColor = colors.yellow; + } + + const statusStr = `${statusColor}${statusIcon} ${r.status}${RESET}`; + const durationStr = formatDuration(r.duration_ms); + const tsStr = formatTimestamp(r.ts); + const costStr = formatCost(r.cost_usd); + + const row = ` ${colors.purple}${box.vertical}${RESET} ` + + `${colors.dim}${padEnd(tsStr, w.ts)}${RESET}` + + `${colors.cyan}${padEnd(truncatedAgent, w.agent)}${RESET}` + + `${padEnd(durationStr, w.duration)}` + + `${padEnd(statusStr, w.status + 10)}` + // +10 for ANSI escape codes + `${colors.dim}${costStr}${RESET}` + + ` ${colors.purple}${box.vertical}${RESET}`; + + writeLine(row); + } + + writeLine(` ${colors.purple}${box.bottomLeft}${colors.dim}${box.horizontal.repeat(tableWidth)}${colors.purple}${box.bottomRight}${RESET}`); + writeLine(); + + // Summary line + const completed = records.filter(r => r.status === 'completed').length; + const failed = records.filter(r => r.status === 'failed').length; + const totalCost = records.reduce((sum, r) => sum + (r.cost_usd || 0), 0); + const parts: string[] = []; + if (completed > 0) parts.push(`${colors.green}${completed} completed${RESET}`); + if (failed > 0) parts.push(`${colors.red}${failed} failed${RESET}`); + if (totalCost > 0) parts.push(`${colors.dim}${formatCost(totalCost)} total${RESET}`); + + if (parts.length > 0) { + writeLine(` ${parts.join(` ${colors.dim}|${RESET} `)}`); + writeLine(); + } + + if (records.length >= (options.limit || 20)) { + writeLine(` ${colors.dim}Showing ${records.length} most recent. Use --limit to see more.${RESET}`); + writeLine(); + } + + writeLine(` ${colors.dim}$${RESET} squads log --squad ${colors.cyan}${RESET} ${colors.dim}Filter by squad${RESET}`); + writeLine(` ${colors.dim}$${RESET} squads log --since ${colors.cyan}7d${RESET} ${colors.dim}Filter by date${RESET}`); + writeLine(` ${colors.dim}$${RESET} squads log --json ${colors.dim}JSON output${RESET}`); + writeLine(); +} diff --git a/src/commands/observability.ts b/src/commands/observability.ts index ca70a1c2..4c1c3c91 100644 --- a/src/commands/observability.ts +++ b/src/commands/observability.ts @@ -12,7 +12,8 @@ import { colors, bold, RESET, writeLine } from '../lib/terminal.js'; export function registerObservabilityCommands(program: Command): void { const obs = program .command('obs') - .description('Observability — execution history, token costs, and trends'); + .description('Observability — execution history, token costs, and trends') + .action(() => { obs.outputHelp(); }); obs .command('history') diff --git a/src/commands/release-check.ts b/src/commands/release-check.ts index 5ecc8019..4f7c41d9 100644 --- a/src/commands/release-check.ts +++ b/src/commands/release-check.ts @@ -21,7 +21,8 @@ async function checkHealth(url: string, expect: number): Promise<{ ok: boolean; export function registerReleaseCommands(program: Command): void { const release = program .command('release') - .description('Release management — pre-deploy checks and status'); + .description('Release management — pre-deploy checks and status') + .action(() => { release.outputHelp(); }); release .command('pre-check ') diff --git a/src/commands/review.ts b/src/commands/review.ts new file mode 100644 index 00000000..4f1c6281 --- /dev/null +++ b/src/commands/review.ts @@ -0,0 +1,458 @@ +/** + * squads review — post-cycle evaluation dashboard. + * + * Optimized for founder + COO: + * - Overview: scan all squads in 10 seconds + * - Founder actions: what needs human input (separated from agent blockers) + * - Goal progress: only meaningful changes (achieved, blocked, new — not churn) + * - Cost efficiency: cost per goal change + * - Detail: drill into any squad + */ + +import { Command } from 'commander'; +import { existsSync, readFileSync, readdirSync } from 'fs'; +import { join } from 'path'; +import { findSquadsDir } from '../lib/squad-parser.js'; +import { findMemoryDir } from '../lib/memory.js'; +import { queryExecutions, calculateCostSummary, type ObservabilityRecord } from '../lib/observability.js'; +import { colors, bold, RESET, writeLine } from '../lib/terminal.js'; + +// ── Types ─────────────────────────────────────────────────────────────── + +interface GoalInfo { + name: string; + status: string; + section: 'active' | 'achieved' | 'abandoned' | 'proposed'; + deadline?: string; + blocker?: string; +} + +interface GoalChange { + name: string; + before: string; + after: string; +} + +interface SquadRow { + squad: string; + exec: ObservabilityRecord | null; + goals: GoalInfo[]; + status: string; + topAction: string; + founderBlockers: string[]; + agentBlockers: string[]; +} + +// ── Parsers ───────────────────────────────────────────────────────────── + +function parseGoalsDetailed(filePath: string): GoalInfo[] { + if (!existsSync(filePath)) return []; + const content = readFileSync(filePath, 'utf-8'); + const goals: GoalInfo[] = []; + let currentSection: GoalInfo['section'] = 'active'; + + for (const line of content.split('\n')) { + if (line.startsWith('## Active')) currentSection = 'active'; + else if (line.startsWith('## Achieved')) currentSection = 'achieved'; + else if (line.startsWith('## Abandoned')) currentSection = 'abandoned'; + else if (line.startsWith('## Proposed')) currentSection = 'proposed'; + + const match = line.match(/\*\*([^*]+)\*\*/); + if (!match) continue; + + const name = match[1].trim(); + const statusMatch = line.match(/status:\s*(\S+)/); + const deadlineMatch = line.match(/deadline:\s*(\S+)/); + const blockerMatch = line.match(/blocker:\s*([^|]+)/); + + if (statusMatch || (currentSection === 'achieved' && line.includes('achieved:'))) { + goals.push({ + name, + status: statusMatch ? statusMatch[1].trim() : 'achieved', + section: currentSection, + deadline: deadlineMatch ? deadlineMatch[1].trim() : undefined, + blocker: blockerMatch ? blockerMatch[1].trim() : undefined, + }); + } + } + return goals; +} + +function readLeadState(memoryDir: string, squad: string): { + status: string; + topAction: string; + founderBlockers: string[]; + agentBlockers: string[]; +} { + const squadMemDir = join(memoryDir, squad); + if (!existsSync(squadMemDir)) return { status: 'unknown', topAction: '', founderBlockers: [], agentBlockers: [] }; + + let stateFile: string | null = null; + try { + const dirs = readdirSync(squadMemDir).filter(d => + d.endsWith('-lead') && existsSync(join(squadMemDir, d, 'state.md')) + ); + if (dirs.length > 0) stateFile = join(squadMemDir, dirs[0], 'state.md'); + } catch { /* */ } + + if (!stateFile) return { status: 'unknown', topAction: '', founderBlockers: [], agentBlockers: [] }; + + const content = readFileSync(stateFile, 'utf-8'); + const lines = content.split('\n'); + + // Status from frontmatter + const statusMatch = content.match(/status:\s*"?(\w+)"?/); + const status = statusMatch ? statusMatch[1] : 'unknown'; + + // Top action: first completed item (✅, [x], Done, DONE) or first bullet under ## Current / ## Actions + let topAction = ''; + let inActions = false; + for (const line of lines) { + if (/^## (Current|Actions|This Run|What was done|Done|Completed)/i.test(line)) { + inActions = true; + continue; + } + if (inActions && line.startsWith('## ')) break; + if (inActions && line.trim()) { + // Look for completed items first + const cleaned = line.replace(/\*\*/g, '').replace(/[\u{1F534}\u{1F7E1}\u{1F7E2}\u{2705}\u{274C}\u2713]/gu, '').replace(/^[-*]\s*/, '').replace(/^\[x\]\s*/i, '').trim(); + if (cleaned.length > 10 && !cleaned.startsWith('---')) { + topAction = cleaned.slice(0, 60); + break; + } + } + } + + // Blockers: split by founder-needing vs agent-resolvable + const founderBlockers: string[] = []; + const agentBlockers: string[] = []; + let inBlockers = false; + + for (const line of lines) { + if (/^## Blocker/i.test(line)) { inBlockers = true; continue; } + if (inBlockers && line.startsWith('## ')) break; + if (inBlockers && line.trim().startsWith('-')) { + const text = line.replace(/^-\s*/, '').replace(/\*\*/g, '').trim(); + if (!text || text.toLowerCase() === 'none' || text === '(none)') continue; + + const link = extractLink(text); + const entry = link ? `${text.slice(0, 65)}\n ${colors.dim}${link}${RESET}` : text.slice(0, 80); + + // Founder blockers: mention founder, kokevidaurre, needs:human, "enable", "login", "auth" + const isFounder = /founder|kokevidaurre|needs:human|needs founder|assigned to founder|enable at|auth login|bank cartola|CPA/i.test(text); + if (isFounder) { + founderBlockers.push(entry); + } else { + agentBlockers.push(entry); + } + } + } + + return { status, topAction, founderBlockers, agentBlockers }; +} + +function statusIcon(status: string): string { + switch (status) { + case 'achieved': case 'complete': return `${colors.green}done${RESET}`; + case 'in-progress': case 'improving': return `${colors.cyan}prog${RESET}`; + case 'not-started': return `${colors.dim}todo${RESET}`; + case 'blocked': case 'AT-RISK': return `${colors.red}risk${RESET}`; + default: return `${colors.dim}${status.slice(0, 4)}${RESET}`; + } +} + +function daysUntil(dateStr: string): number | null { + if (!dateStr || dateStr === 'ongoing') return null; + const d = new Date(dateStr); + if (isNaN(d.getTime())) return null; + return Math.ceil((d.getTime() - Date.now()) / (1000 * 60 * 60 * 24)); +} + +/** Extract a URL or GitHub issue link from text */ +function extractLink(text: string): string { + // Direct URL + const urlMatch = text.match(/(https?:\/\/[^\s)]+)/); + if (urlMatch) return urlMatch[1]; + + // Issue reference: "repo#N" or "#N" with repo context + const repoIssue = text.match(/([a-z][\w-]*)#(\d+)/i); + if (repoIssue) { + const repo = repoIssue[1]; + const num = repoIssue[2]; + return `https://github.com/agents-squads/${repo}/issues/${num}`; + } + + // Bare #N — can't resolve without repo context + return ''; +} + +function parseSinceToISO(since: string): string { + const match = since.match(/^(\d+)(h|d|w)$/); + if (!match) return since; + const val = parseInt(match[1]); + const unit = match[2]; + const ms = unit === 'h' ? val * 3600000 : unit === 'd' ? val * 86400000 : val * 604800000; + return new Date(Date.now() - ms).toISOString(); +} + +// ── Overview ──────────────────────────────────────────────────────────── + +function showOverview(squadsDir: string, memoryDir: string, since: string): void { + const squadDirs = readdirSync(squadsDir).filter(d => + existsSync(join(squadsDir, d, 'SQUAD.md')) + ).sort(); + + const sinceISO = parseSinceToISO(since); + const execs = queryExecutions({ since: sinceISO, limit: 500 }); + const costSummary = calculateCostSummary('7d'); + + // Last execution per squad + const lastExec = new Map(); + for (const e of execs) { + if (!lastExec.has(e.squad) || e.ts > lastExec.get(e.squad)!.ts) { + lastExec.set(e.squad, e); + } + } + + // Build rows + const rows: SquadRow[] = []; + for (const squad of squadDirs) { + const goals = parseGoalsDetailed(join(memoryDir, squad, 'goals.md')); + const exec = lastExec.get(squad) || null; + if (goals.length === 0 && !exec) continue; // frozen + + const state = readLeadState(memoryDir, squad); + rows.push({ squad, exec, goals, ...state }); + } + + // ── Metrics ── + const totalActive = rows.reduce((s, r) => s + r.goals.filter(g => g.section === 'active').length, 0); + const totalAchieved = rows.reduce((s, r) => s + r.goals.filter(g => g.section === 'achieved').length, 0); + const totalBlocked = rows.reduce((s, r) => s + r.goals.filter(g => g.status === 'blocked' || g.status === 'AT-RISK').length, 0); + const meaningfulChanges = execs.reduce((s, e) => s + (e.goals_changed?.filter(c => + c.after === 'achieved' || c.after === 'blocked' || c.before === 'not-started' + ).length || 0), 0); + const costPerChange = meaningfulChanges > 0 ? costSummary.total_cost / meaningfulChanges : 0; + + writeLine(); + writeLine(` ${bold}Cycle Review${RESET}`); + writeLine(` ${costSummary.total_runs} runs $${costSummary.total_cost.toFixed(0)} (7d) ${costPerChange > 0 ? `$${costPerChange.toFixed(1)}/goal-change` : ''} ${totalActive} active ${colors.green}${totalAchieved} achieved${RESET} ${totalBlocked > 0 ? `${colors.red}${totalBlocked} blocked${RESET}` : ''}`); + writeLine(); + + // ── Squad table ── + writeLine(` ${'Squad'.padEnd(15)} ${'Run'.padEnd(8)} ${'$'.padStart(5)} ${'G'.padStart(4)} Top Action`); + writeLine(` ${'-'.repeat(80)}`); + + for (const r of rows) { + const active = r.goals.filter(g => g.section === 'active').length; + const achieved = r.goals.filter(g => g.section === 'achieved').length; + const goalStr = achieved > 0 ? `${colors.green}${achieved}${RESET}/${active + achieved}` : `${active}`; + + let runStr: string; + let costStr: string; + if (r.exec) { + const d = new Date(r.exec.ts); + const ago = Math.round((Date.now() - d.getTime()) / 3600000); + runStr = ago < 24 ? `${ago}h ago` : `${Math.round(ago / 24)}d ago`; + costStr = `$${r.exec.cost_usd.toFixed(1)}`; + if (r.exec.status !== 'completed') { + runStr = `${colors.red}${runStr}${RESET}`; + } + } else { + runStr = `${colors.dim}—${RESET} `; + costStr = `${colors.dim}—${RESET} `; + } + + const action = r.topAction || `${colors.dim}(no state)${RESET}`; + writeLine(` ${r.squad.padEnd(15)} ${runStr.padEnd(8)} ${costStr.padStart(5)} ${goalStr.padStart(4)} ${action}`); + } + + // ── Founder Action Required ── + const founderItems = rows.flatMap(r => + r.founderBlockers.map(b => ({ squad: r.squad, text: b })) + ); + + // Add deadline-driven items + const urgentDeadlines = rows.flatMap(r => + r.goals.filter(g => g.deadline && g.section === 'active') + .map(g => ({ squad: r.squad, days: daysUntil(g.deadline!), name: g.name })) + .filter(g => g.days !== null && g.days <= 14) + ).sort((a, b) => (a.days || 99) - (b.days || 99)); + + if (founderItems.length > 0 || urgentDeadlines.length > 0) { + writeLine(); + writeLine(` ${bold}Founder Action${RESET}`); + + for (const d of urgentDeadlines) { + const color = (d.days || 0) <= 3 ? colors.red : colors.yellow; + writeLine(` ${color}${String(d.days).padStart(2)}d${RESET} ${d.squad}: ${d.name}`); + } + + for (const f of founderItems) { + writeLine(` ${colors.yellow}>>>${RESET} ${f.squad}: ${f.text.slice(0, 70)}`); + } + } + + // ── Blocked goals (agent-resolvable) ── + const blockedGoals = rows.flatMap(r => + r.goals.filter(g => g.status === 'blocked' || g.status === 'AT-RISK') + .map(g => ({ squad: r.squad, name: g.name, blocker: g.blocker })) + ); + + if (blockedGoals.length > 0) { + writeLine(); + writeLine(` ${bold}Blocked Goals${RESET}`); + for (const b of blockedGoals) { + const link = b.blocker ? extractLink(b.blocker) : ''; + writeLine(` ${colors.red}block${RESET} ${b.squad}: ${b.name}${b.blocker ? ` ${colors.dim}← ${b.blocker.slice(0, 45)}${RESET}` : ''}`); + if (link) writeLine(` ${colors.dim}${link}${RESET}`); + } + } + + // ── Goal changes: only meaningful (achieved, blocked, new starts) ── + const allChanges: Array<{ squad: string; change: GoalChange }> = []; + for (const e of execs) { + if (!e.goals_changed) continue; + for (const c of e.goals_changed) { + // Skip noise: in-progress→in-progress, status churn + if (c.before === c.after) continue; + // Only show: achieved, blocked, removed, or first start + const isMeaningful = + c.after === 'achieved' || c.after === 'blocked' || c.after === 'removed' || + c.before === 'not-started' || c.before === 'new' || + c.after === 'AT-RISK'; + if (isMeaningful) { + // Deduplicate: keep only latest change per goal per squad + const existing = allChanges.findIndex(x => x.squad === e.squad && x.change.name === c.name); + if (existing >= 0) { + allChanges[existing] = { squad: e.squad, change: c }; + } else { + allChanges.push({ squad: e.squad, change: c }); + } + } + } + } + + // Group by type for readability + const achieved = allChanges.filter(c => c.change.after === 'achieved'); + const blocked = allChanges.filter(c => c.change.after === 'blocked' || c.change.after === 'AT-RISK'); + const started = allChanges.filter(c => c.change.before === 'not-started' || c.change.before === 'new'); + const removed = allChanges.filter(c => c.change.after === 'removed'); + + if (allChanges.length > 0) { + writeLine(); + writeLine(` ${bold}Goal Changes${RESET} ${achieved.length} achieved ${started.length} started ${blocked.length} blocked ${removed.length} removed`); + + if (achieved.length > 0) { + for (const c of achieved) { + writeLine(` ${colors.green}achieved${RESET} ${c.squad}: ${c.change.name}`); + } + } + if (blocked.length > 0) { + for (const c of blocked) { + writeLine(` ${colors.red}blocked${RESET} ${c.squad}: ${c.change.name}`); + } + } + if (started.length > 0 && started.length <= 8) { + for (const c of started) { + writeLine(` ${colors.cyan}started${RESET} ${c.squad}: ${c.change.name}`); + } + } else if (started.length > 8) { + writeLine(` ${colors.cyan}started${RESET} ${started.length} goals across ${new Set(started.map(s => s.squad)).size} squads`); + } + } + + writeLine(); + writeLine(` ${colors.dim}squads review --squad drill into squad${RESET}`); + writeLine(); +} + +// ── Squad Detail ──────────────────────────────────────────────────────── + +function showSquadDetail(squad: string, memoryDir: string): void { + writeLine(); + writeLine(` ${bold}${squad}${RESET}`); + + // Goals + const goals = parseGoalsDetailed(join(memoryDir, squad, 'goals.md')); + if (goals.length > 0) { + writeLine(); + for (const section of ['active', 'achieved', 'abandoned', 'proposed'] as const) { + const sg = goals.filter(g => g.section === section); + if (sg.length === 0) continue; + writeLine(` ${colors.cyan}${section.toUpperCase()}${RESET}`); + for (const g of sg) { + const days = g.deadline ? daysUntil(g.deadline) : null; + const dl = days !== null ? ` ${days <= 7 ? colors.red : colors.dim}(${days}d)${RESET}` : ''; + const bl = g.blocker ? ` ${colors.red}← ${g.blocker.slice(0, 45)}${RESET}` : ''; + writeLine(` ${statusIcon(g.status)} ${g.name}${dl}${bl}`); + } + } + } + + // Runs + const execs = queryExecutions({ squad, limit: 5 }); + if (execs.length > 0) { + writeLine(); + writeLine(` ${bold}Runs${RESET}`); + for (const e of execs) { + const d = new Date(e.ts); + const date = `${d.getMonth() + 1}/${d.getDate()} ${d.getHours()}:${String(d.getMinutes()).padStart(2, '0')}`; + const icon = e.status === 'completed' ? `${colors.green}pass${RESET}` : `${colors.red}fail${RESET}`; + const dur = Math.round(e.duration_ms / 60000); + const gc = e.goals_changed?.length || 0; + writeLine(` ${icon} ${date} ${dur}m $${e.cost_usd.toFixed(2)} ${e.agent}${gc > 0 ? ` ${colors.green}+${gc} goals${RESET}` : ''}`); + } + + // Cost trend + const totalCost = execs.reduce((s, e) => s + e.cost_usd, 0); + const totalGoalChanges = execs.reduce((s, e) => s + (e.goals_changed?.length || 0), 0); + writeLine(` ${colors.dim}total: $${totalCost.toFixed(2)} / ${totalGoalChanges} goal changes${RESET}`); + } + + // State + blockers + const state = readLeadState(memoryDir, squad); + if (state.topAction) { + writeLine(); + writeLine(` ${bold}Last Action${RESET} ${state.topAction}`); + } + + if (state.founderBlockers.length > 0) { + writeLine(); + writeLine(` ${bold}${colors.yellow}Founder Action${RESET}`); + for (const b of state.founderBlockers) writeLine(` ${colors.yellow}>>>${RESET} ${b}`); + } + + if (state.agentBlockers.length > 0) { + writeLine(); + writeLine(` ${bold}Agent Blockers${RESET}`); + for (const b of state.agentBlockers) writeLine(` ${colors.dim}-${RESET} ${b}`); + } + + writeLine(); +} + +// ── Register ──────────────────────────────────────────────────────────── + +export function registerReviewCommand(program: Command): void { + program + .command('review') + .description('Post-cycle evaluation — goals, costs, blockers, founder actions') + .option('-s, --squad ', 'Detail view for a specific squad') + .option('--since ', 'Look back period (e.g. 24h, 7d, 30d)', '7d') + .option('--json', 'Output as JSON') + .action((opts) => { + const squadsDir = findSquadsDir(); + const memoryDir = findMemoryDir(); + if (!squadsDir || !memoryDir) { + writeLine(`\n ${colors.dim}No squads found. Run squads init.${RESET}\n`); + return; + } + + if (opts.squad) { + showSquadDetail(opts.squad, memoryDir); + } else { + showOverview(squadsDir, memoryDir, opts.since); + } + }); +} diff --git a/src/commands/run.ts b/src/commands/run.ts index 4e6e9f1f..9acf12fb 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -1,5 +1,6 @@ import { join } from 'path'; -import { existsSync } from 'fs'; +import { existsSync, readdirSync, readFileSync, writeFileSync, mkdirSync, unlinkSync } from 'fs'; +import { execSync } from 'child_process'; import { findSquadsDir, loadSquad, @@ -27,6 +28,8 @@ import { runCloudDispatch } from '../lib/cloud-dispatch.js'; import { runConversation, saveTranscript, type ConversationOptions } from '../lib/workflow.js'; import { reportExecutionStart, reportConversationResult, pushCognitionSignal } from '../lib/api-client.js'; import { runAgent } from '../lib/agent-runner.js'; +import { findMemoryDir } from '../lib/memory.js'; +import { statSync } from 'fs'; import { runPostEvaluation, runAutopilot, runLeadMode, runSequentialMode } from '../lib/run-modes.js'; export async function runCommand( @@ -52,7 +55,8 @@ export async function runCommand( const { scanOrg, planOrgCycle, displayOrgScan, displayPlan } = await import('../lib/org-cycle.js'); writeLine(); - writeLine(` ${gradient('squads')} ${colors.dim}org cycle${RESET}`); + const focusLabel = options.focus ? ` ${bold}[${options.focus}]${RESET}` : ''; + writeLine(` ${gradient('squads')} ${colors.dim}org cycle${RESET}${focusLabel}`); writeLine(); // Step 1: SCAN @@ -72,52 +76,186 @@ export async function runCommand( return; } - // Step 3: EXECUTE — run each lead sequentially + // Step 3: EXECUTE — wave-based parallel execution + // + // Wave 1 (producers): research, intelligence, data — create raw material + // Wave 2 (builders): cli, website, finance, engineering — build on wave 1 + // Wave 3 (amplifiers): marketing, growth, product, analytics, customer, economics — use wave 2 + // Wave 4 (reviewers): operations, company — evaluate everything + // + // Within each wave, squads run in parallel (different target repos). + // Between waves, we wait — so each wave sees the previous wave's output. + + const waves: string[][] = [ + ['research', 'intelligence', 'data'], + ['cli', 'website', 'finance', 'engineering'], + ['marketing', 'growth', 'product', 'analytics', 'customer', 'economics'], + ['operations', 'company'], + ]; + + // Resume support: load quota-skipped squads from previous run + const resumeFile = join(process.cwd(), '.agents', 'observability', 'resume.json'); + let resumeSquads: Set | null = null; + if (options.resume && existsSync(resumeFile)) { + try { + const data = JSON.parse(readFileSync(resumeFile, 'utf-8')); + resumeSquads = new Set(data.squads as string[]); + writeLine(` ${bold}Resuming${RESET} ${resumeSquads.size} squads from quota stop: ${[...resumeSquads].join(', ')}`); + } catch { /* invalid file, run full cycle */ } + } + const cycleStart = Date.now(); - const results: Array<{ squad: string; agent: string; status: string; durationMs: number }> = []; + const results: Array<{ squad: string; agent: string; status: string; durationMs: number; turnCount?: number; totalCost?: number; converged?: boolean }> = []; // Snapshot all goals before execution - const { snapshotGoals, diffGoals } = await import('../lib/observability.js'); + const { snapshotGoals, diffGoals, queryExecutions } = await import('../lib/observability.js'); const allGoalsBefore: Record> = {}; for (const s of plan) { allGoalsBefore[s.squad] = snapshotGoals(s.squad); } - for (const s of plan) { - if (!s.lead) continue; - const leadPath = join(squadsDir, s.squad, `${s.lead}.md`); - if (!existsSync(leadPath)) continue; + // Build set of planned squads for filtering + const plannedSquads = new Set(plan.map(s => s.squad)); + + // Check skip logic + const today = new Date().toISOString().slice(0, 10); + const todayExecs = queryExecutions({ since: `${today}T00:00:00Z`, limit: 100 }); + const completedTodayMap = new Map(); + for (const e of todayExecs) { + if (e.status === 'completed' && e.agent?.includes('lead')) { + if (!completedTodayMap.has(e.squad) || e.ts > completedTodayMap.get(e.squad)!) { + completedTodayMap.set(e.squad, e.ts); + } + } + } + + function shouldSkip(squadName: string): boolean { + if (options.force) return false; + const lastRun = completedTodayMap.get(squadName); + if (!lastRun) return false; + const memoryDir = findMemoryDir(); + if (memoryDir) { + const goalsPath = join(memoryDir, squadName, 'goals.md'); + const priPath = join(memoryDir, squadName, 'priorities.md'); + try { + const lastRunMs = new Date(lastRun).getTime(); + const goalsMtime = existsSync(goalsPath) ? statSync(goalsPath).mtimeMs : 0; + const priMtime = existsSync(priPath) ? statSync(priPath).mtimeMs : 0; + if (goalsMtime > lastRunMs || priMtime > lastRunMs) return false; + } catch { return false; } + } + return true; + } + + async function runSquadConversation(squadName: string): Promise { + const s = plan.find(p => p.squad === squadName); + if (!s || !s.lead) return { squad: squadName, agent: 'unknown', status: 'skipped', durationMs: 0 }; + + if (shouldSkip(squadName)) { + writeLine(` ${colors.dim}skip ${squadName} (completed today, goals unchanged)${RESET}`); + return { squad: squadName, agent: s.lead, status: 'skipped', durationMs: 0 }; + } + + const squad = loadSquad(squadName); + if (!squad) { + writeLine(` ${colors.red}${squadName}: squad not found${RESET}`); + return { squad: squadName, agent: s.lead, status: 'failed', durationMs: 0 }; + } - writeLine(` ${colors.cyan}Running ${s.squad}/${s.lead}...${RESET}`); const runStart = Date.now(); try { - await runAgent(s.lead, leadPath, s.squad, { ...options, execute: true }); - results.push({ squad: s.squad, agent: s.lead, status: 'completed', durationMs: Date.now() - runStart }); + const convOptions: ConversationOptions = { + task: options.task, + maxTurns: options.maxTurns, + costCeiling: options.costCeiling, + verbose: options.verbose, + model: options.model, + focus: (options.focus as ConversationOptions['focus']) || undefined, + }; + + const result = await runConversation(squad, convOptions); + saveTranscript(result.transcript); + + const status = result.converged ? 'converged' : 'completed'; + writeLine(` ${result.converged ? icons.success : icons.warning} ${squadName}: ${result.reason} ${colors.dim}(${result.turnCount}t, ~$${result.totalCost.toFixed(2)})${RESET}`); + return { + squad: squadName, agent: s.lead, status, + durationMs: Date.now() - runStart, + turnCount: result.turnCount, totalCost: result.totalCost, converged: result.converged, + }; } catch (e) { const errMsg = e instanceof Error ? e.message : String(e); - results.push({ squad: s.squad, agent: s.lead, status: 'failed', durationMs: Date.now() - runStart }); - - // Detect quota limit — if agent fails in <10s, likely quota/rate limit - const failDuration = Date.now() - runStart; - const isQuotaLikely = failDuration < 10000 && errMsg.includes('code 1'); - const isExplicitQuota = errMsg.includes('hit your limit') || errMsg.includes('rate limit') || errMsg.includes('quota'); - - if (isExplicitQuota || isQuotaLikely) { - // Check if previous squad also failed fast — confirms it's quota, not a bug - const prevFailed = results.length >= 2 && - results[results.length - 2]?.status === 'failed' && - (results[results.length - 2]?.durationMs || 0) < 10000; - - if (isExplicitQuota || prevFailed) { - writeLine(` ${colors.red}Quota limit reached — stopping org cycle.${RESET}`); - writeLine(` ${colors.dim}Completed ${results.filter(r => r.status === 'completed').length} squads before hitting limit.${RESET}`); - writeLine(` ${colors.dim}Resume with 'squads run --org' when quota resets.${RESET}`); - break; + writeLine(` ${colors.red}${squadName} failed: ${errMsg.slice(0, 80)}${RESET}`); + return { squad: squadName, agent: s.lead, status: 'failed', durationMs: Date.now() - runStart }; + } + } + + for (let waveIdx = 0; waveIdx < waves.length; waveIdx++) { + const wave = waves[waveIdx]; + // If resuming, only run squads that were skipped last time + const waveSquads = wave.filter(s => plannedSquads.has(s) && (!resumeSquads || resumeSquads.has(s))); + if (waveSquads.length === 0) continue; + + const waveNum = waveIdx + 1; + const waveNames = ['Producers', 'Builders', 'Amplifiers', 'Reviewers']; + writeLine(); + writeLine(` ${bold}Wave ${waveNum}: ${waveNames[waveIdx]}${RESET} ${colors.dim}(${waveSquads.join(', ')})${RESET}`); + + // Run all squads in this wave in parallel + const waveResults = await Promise.all( + waveSquads.map(s => runSquadConversation(s)) + ); + results.push(...waveResults); + + // Quota detection: if any squad in this wave got "hit your limit" responses, + // stop the cycle — remaining waves will produce empty results. + const quotaHit = waveResults.some(r => { + // Check transcripts for quota messages + const convDir = join(process.cwd(), '.agents', 'conversations', r.squad); + try { + const files = readdirSync(convDir).sort().reverse(); + if (files.length > 0) { + const latest = readFileSync(join(convDir, files[0]), 'utf-8'); + return latest.includes('hit your limit') || latest.includes('rate limit') || latest.includes('[QUOTA]') || latest.includes('Quota limit reached'); } + } catch { /* no transcript */ } + return false; + }); + + if (quotaHit) { + const remainingWaves = waves.slice(waveIdx + 1).flat().filter(s => plannedSquads.has(s) && (!resumeSquads || resumeSquads.has(s))); + if (remainingWaves.length > 0) { + writeLine(`\n ${colors.red}Quota limit reached.${RESET} Skipping ${remainingWaves.length} remaining squads.`); + writeLine(` ${colors.dim}Resume later: squads run --org --resume${RESET}`); + for (const s of remainingWaves) { + results.push({ squad: s, agent: 'unknown', status: 'quota-skipped', durationMs: 0 }); + } + // Save skipped squads for resume + try { + const obsDir = join(process.cwd(), '.agents', 'observability'); + if (!existsSync(obsDir)) mkdirSync(obsDir, { recursive: true }); + writeFileSync(resumeFile, JSON.stringify({ + squads: remainingWaves, + stoppedAt: new Date().toISOString(), + waveIdx: waveIdx + 1, + })); + } catch { /* best effort */ } + break; // Exit wave loop } - - writeLine(` ${colors.red}${s.squad}/${s.lead} failed: ${errMsg}${RESET}`); } + + // Commit hq memory changes between waves so next wave sees fresh state + try { + execSync('git add -A .agents/memory/ && git diff --cached --quiet || git commit -m "memory: wave ' + waveNum + ' state updates"', { + cwd: process.cwd(), stdio: 'pipe', encoding: 'utf-8', + }); + } catch { /* no changes to commit */ } + } + + // Clear resume file if cycle completed without quota hit + const quotaSkipped = results.filter(r => r.status === 'quota-skipped').length; + if (quotaSkipped === 0 && existsSync(resumeFile)) { + try { unlinkSync(resumeFile); } catch { /* best effort */ } } // Step 4: REPORT — compare goals before and after @@ -125,14 +263,21 @@ export async function runCommand( const completed = results.filter(r => r.status === 'completed').length; const failed = results.filter(r => r.status === 'failed').length; + const totalCostAll = results.reduce((s, r) => s + (r.totalCost || 0), 0); + const totalTurns = results.reduce((s, r) => s + (r.turnCount || 0), 0); + writeLine(); writeLine(` ${bold}Org Cycle Complete${RESET}`); - writeLine(` Duration: ${Math.round(totalMs / 60000)}m | Squads: ${completed} completed, ${failed} failed | Frozen: ${scan.filter(s => s.status === 'frozen').length} skipped`); + writeLine(` Duration: ${Math.round(totalMs / 60000)}m | Squads: ${completed} done, ${failed} failed | ~$${totalCostAll.toFixed(0)} | ${totalTurns} turns`); writeLine(); for (const r of results) { - const icon = r.status === 'completed' ? `${colors.green}pass${RESET}` : `${colors.red}fail${RESET}`; - writeLine(` ${icon} ${r.squad}/${r.agent} ${colors.dim}${Math.round(r.durationMs / 1000)}s${RESET}`); + const icon = r.status === 'converged' ? `${colors.green}conv${RESET}` + : r.status === 'completed' ? `${colors.green}done${RESET}` + : r.status === 'skipped' ? `${colors.dim}skip${RESET}` + : `${colors.red}fail${RESET}`; + const meta = r.turnCount ? `${r.turnCount}t ~$${(r.totalCost || 0).toFixed(2)}` : ''; + writeLine(` ${icon} ${r.squad.padEnd(18)} ${colors.dim}${Math.round(r.durationMs / 1000)}s ${meta}${RESET}`); } // Goal changes summary @@ -213,9 +358,23 @@ export async function runCommand( if (squad) { await track(Events.CLI_RUN, { type: 'squad', target: squad.name }); await flushEvents(); // Ensure telemetry is sent before potential exit - await runSquad(squad, squadsDir, options); - // Post-run COO evaluation (default on, --no-eval to skip) - await runPostEvaluation([squad.name], options); + const runStartMs = Date.now(); + let hadError = false; + try { + await runSquad(squad, squadsDir, options); + // Post-run COO evaluation (default on, --no-eval to skip) + await runPostEvaluation([squad.name], options); + } catch (err) { + hadError = true; + throw err; + } finally { + await track(Events.CLI_RUN_COMPLETE, { + exit_code: hadError ? 1 : 0, + duration_ms: Date.now() - runStartMs, + agent_count: squad.agents?.length ?? 1, + had_error: hadError, + }); + } } else { // Try to find as an agent const agents = listAgents(squadsDir); @@ -226,9 +385,23 @@ export async function runCommand( const pathParts = agent.filePath.split('/'); const squadIdx = pathParts.indexOf('squads'); const resolvedSquadName = squadIdx >= 0 ? pathParts[squadIdx + 1] : 'unknown'; - await runAgent(agent.name, agent.filePath, resolvedSquadName, options); - // Post-run COO evaluation for the squad this agent belongs to - await runPostEvaluation([resolvedSquadName], options); + const runStartMs = Date.now(); + let hadError = false; + try { + await runAgent(agent.name, agent.filePath, resolvedSquadName, options); + // Post-run COO evaluation for the squad this agent belongs to + await runPostEvaluation([resolvedSquadName], options); + } catch (err) { + hadError = true; + throw err; + } finally { + await track(Events.CLI_RUN_COMPLETE, { + exit_code: hadError ? 1 : 0, + duration_ms: Date.now() - runStartMs, + agent_count: 1, + had_error: hadError, + }); + } } else { writeLine(` ${colors.red}Squad or agent "${target}" not found${RESET}`); const similar = findSimilarSquads(target, listSquads(squadsDir)); diff --git a/src/commands/services.ts b/src/commands/services.ts index 9674e372..acf87583 100644 --- a/src/commands/services.ts +++ b/src/commands/services.ts @@ -47,7 +47,8 @@ function dockerComposeAvailable(): boolean { export function registerServicesCommands(program: Command): void { const services = program .command('services') - .description('Manage Tier 2 local services (Postgres, Redis, API, Bridge)'); + .description('Manage Tier 2 local services (Postgres, Redis, API, Bridge)') + .action(() => { services.outputHelp(); }); // ── services up ── services diff --git a/src/lib/agent-runner.ts b/src/lib/agent-runner.ts index df2f62de..9b12ca95 100644 --- a/src/lib/agent-runner.ts +++ b/src/lib/agent-runner.ts @@ -4,7 +4,7 @@ */ import ora from 'ora'; -import { join } from 'path'; +import { join, basename, extname } from 'path'; import { existsSync, readFileSync } from 'fs'; import { findSquadsDir, @@ -72,6 +72,13 @@ import { findMemoryDir } from './memory.js'; // ── Operational constants (no magic numbers) ────────────────────────── export const DRYRUN_DEF_MAX_CHARS = 500; + +function formatRunDuration(ms: number): string { + if (ms < 60_000) return `${(ms / 1000).toFixed(1)}s`; + const m = Math.floor(ms / 60_000); + const s = Math.round((ms % 60_000) / 1000); + return s > 0 ? `${m}m ${s}s` : `${m}m`; +} export const DRYRUN_CONTEXT_MAX_CHARS = parseInt(process.env.SQUADS_DRYRUN_MAX_CHARS || '800', 10); export async function runAgent( @@ -80,6 +87,10 @@ export async function runAgent( squadName: string, options: RunOptions & { execute?: boolean } ): Promise { + // Normalize: strip path prefix and extension if a full file path was passed + if (agentName.includes('/') || agentName.includes('\\')) { + agentName = basename(agentName, extname(agentName)); + } const spinner = ora(`Running agent: ${agentName}`).start(); const startMs = Date.now(); const startTime = new Date(startMs).toISOString(); @@ -278,15 +289,8 @@ export async function runAgent( : ''; const prompt = `You are ${agentName} from squad ${squadName}. ${taskDirective} -Your full context follows — read it top-to-bottom. Each layer builds on the previous: -- SYSTEM.md: how the system works (already loaded) -- Company: who we are and why -- Priorities: where to focus now -- Goals: what to achieve (measurable targets) -- Agent: your specific role and instructions -- State: where you left off -${systemContext}${squadContext}${cognitionContext}${learningContext} -TIME LIMIT: ${timeoutMins} minutes. Focus on priorities first. If blocked, note it in state.md and move on.`; +Your full context follows — read it top-to-bottom: +${systemContext}${squadContext}${cognitionContext}${learningContext}`; // Resolve provider with full chain: // 1. Agent config (from agent file frontmatter/header) @@ -385,9 +389,11 @@ TIME LIMIT: ${timeoutMins} minutes. Focus on priorities first. If blocked, note if (isForeground || isWatch) { spinner.succeed(`Agent ${agentName} completed (${cliName})`); + writeLine(` ${colors.green}Run completed${RESET} — ${squadName}/${agentName} (${formatRunDuration(Date.now() - startMs)})`); } else { spinner.succeed(`Agent ${agentName} launched in background (${cliName})`); - writeLine(` ${colors.dim}${result}${RESET}`); + writeLine(` ${colors.green}Run started${RESET} — ${squadName}/${agentName} (background)`); + if (result) writeLine(` ${colors.dim}${result}${RESET}`); writeLine(); writeLine(` ${colors.dim}Monitor:${RESET} squads workers`); writeLine(` ${colors.dim}Memory:${RESET} squads memory show ${squadName}`); @@ -399,16 +405,21 @@ TIME LIMIT: ${timeoutMins} minutes. Focus on priorities first. If blocked, note squad: squadName, agent: agentName, executionId, error: String(error), }).catch(() => {}); - spinner.fail(`Agent ${agentName} failed to launch`); + spinner.fail(`Agent ${agentName} failed`); + writeLine(` ${colors.red}Run failed${RESET} — ${squadName}/${agentName} (${formatRunDuration(Date.now() - startMs)})`); updateExecutionStatus(squadName, agentName, executionId, 'failed', { error: String(error), durationMs: Date.now() - startMs, }); const msg = error instanceof Error ? error.message : String(error); - const isLikelyBug = error instanceof ReferenceError || error instanceof TypeError || error instanceof SyntaxError; + const isApiKeyError = /api.?key|authentication|unauthorized|401/i.test(msg); + const isLikelyBug = !isApiKeyError && (error instanceof ReferenceError || error instanceof TypeError || error instanceof SyntaxError); writeLine(` ${colors.red}${msg}${RESET}`); writeLine(); - if (isLikelyBug) { + if (isApiKeyError) { + writeLine(` ${colors.yellow}API key not set or invalid. Set ANTHROPIC_API_KEY and retry:${RESET}`); + writeLine(` ${colors.dim}$ export ANTHROPIC_API_KEY=sk-ant-...${RESET}`); + } else if (isLikelyBug) { writeLine(` ${colors.yellow}This looks like a bug. Please try:${RESET}`); writeLine(` ${colors.dim}$${RESET} squads doctor ${colors.dim}— check your setup${RESET}`); writeLine(` ${colors.dim}$${RESET} squads update ${colors.dim}— get the latest fixes${RESET}`); @@ -418,7 +429,8 @@ TIME LIMIT: ${timeoutMins} minutes. Focus on priorities first. If blocked, note } else { writeLine(` ${colors.dim}Run \`squads doctor\` to check your setup, or \`squads run ${agentName} --verbose\` for details.${RESET}`); } - break; // Error — exit retry loop + // Re-throw so callers (org cycle) can detect the failure + throw error; } } } else { diff --git a/src/lib/cognition.ts b/src/lib/cognition.ts index 6e76fc6d..7cfaa0d1 100644 --- a/src/lib/cognition.ts +++ b/src/lib/cognition.ts @@ -328,14 +328,19 @@ export async function synthesizeSignals( .map((s, i) => `${i + 1}. [${s.source}] ${s.signal_type}${s.value !== null ? ' = ' + s.value : ''}${s.unit ? ' ' + s.unit : ''}: ${(s.data.content as string || '').slice(0, 100)}`) .join('\n'); + // Load classification prompt from markdown + const { findProjectRoot } = require('./squad-parser.js'); + const classifyPath = join(findProjectRoot() || '', '.agents', 'config', 'cognition-prompts.md'); + const classifyContent = existsSync(classifyPath) ? readFileSync(classifyPath, 'utf-8') : ''; + const classifySection = classifyContent.match(/## Belief Classification\n([\s\S]*?)(?=\n## |$)/); + const classifyInstructions = classifySection ? classifySection[1].trim() : 'Classify each signal. Respond with JSON: {"supporting": [], "contradicting": [], "neutral": []}'; + const prompt = `Given this belief: "${belief.statement}" -Classify each signal as SUPPORTING or CONTRADICTING or NEUTRAL. +${classifyInstructions} Signals: -${signalList} - -Respond with JSON only: {"supporting": [indexes], "contradicting": [indexes], "neutral": [indexes]}`; +${signalList}`; try { // Call Haiku via claude CLI (uses subscription, no API key needed) @@ -475,10 +480,14 @@ export async function reflect( ? state.reflections[state.reflections.length - 1] : null; - const prompt = `You are the cognition engine for an AI-native company called Agents Squads. -Your job is to reflect on the current state of the business and produce actionable insights. + // Load reflection prompt from markdown + const { findProjectRoot: findRoot } = require('./squad-parser.js'); + const reflectPath = join(findRoot() || '', '.agents', 'config', 'cognition-prompts.md'); + const reflectContent = existsSync(reflectPath) ? readFileSync(reflectPath, 'utf-8') : ''; + const reflectSection = reflectContent.match(/## Business Reflection\n([\s\S]*?)$/); + const reflectInstructions = reflectSection ? reflectSection[1].trim() : 'Produce a business reflection as JSON.'; -## Current Beliefs (world model) + const prompt = `## Current Beliefs (world model) ${beliefsText || '(none)'} ## Recent Signals (since last reflection) @@ -489,15 +498,7 @@ ${decisionsText || '(none)'} ${lastReflection ? `Previous reflection (${lastReflection.created_at}):\n${lastReflection.assessment}\n` : ''} -## Your Task -Produce a business reflection. Respond as JSON only: -{ - "assessment": "2-3 sentence summary of business state", - "insights": [{"type": "highlight|warning|recommendation", "message": "..."}], - "belief_updates": [{"belief_key": "...", "suggested_confidence": 0.X, "reason": "..."}], - "priority_adjustments": [{"description": "...", "urgency": "high|medium|low"}], - "founder_escalations": [{"issue": "...", "why_human_needed": "...", "suggested_action": "...", "urgency": "immediate|today|this_week"}] -}`; +${reflectInstructions}`; try { const result = callClaude(prompt, 'sonnet', 60000); diff --git a/src/lib/conversation.ts b/src/lib/conversation.ts index 9928db24..79b16453 100644 --- a/src/lib/conversation.ts +++ b/src/lib/conversation.ts @@ -18,23 +18,23 @@ export type AgentRole = 'lead' | 'scanner' | 'worker' | 'verifier'; * Fallback: matches against agent name (for squads without role descriptions). */ export function classifyAgent(agentName: string, roleDescription?: string): AgentRole | null { - // Primary: parse the role description from SQUAD.md - if (roleDescription) { - const lower = roleDescription.toLowerCase(); - if (lower.includes('orchestrat') || lower.includes('triage') || lower.includes('coordinat')) return 'lead'; - if (lower.includes('scan') || lower.includes('monitor') || lower.includes('detect')) return 'scanner'; - if (lower.includes('verif') || lower.includes('review') || lower.includes('check') || lower.includes('critic')) return 'verifier'; - // Any role description that doesn't match above = worker (the default doer) - return 'worker'; - } - - // Fallback: match against agent name (lead checked first to avoid substring collisions) + // Name-based classification FIRST — more reliable than parsing ambiguous + // role descriptions (e.g. "review PRs" in eng-lead ≠ verifier). const name = agentName.toLowerCase(); if (name.includes('lead') || name.includes('orchestrator')) return 'lead'; if (name.includes('scanner') || name.includes('scout') || name.includes('monitor')) return 'scanner'; if (name.includes('verifier') || name.includes('critic') || name.includes('reviewer')) return 'verifier'; if (name.includes('worker') || name.includes('solver') || name.includes('builder')) return 'worker'; + // Fallback: parse role description from SQUAD.md + if (roleDescription) { + const lower = roleDescription.toLowerCase(); + if (lower.includes('orchestrat') || lower.includes('triage') || lower.includes('coordinat') || lower.includes('lead')) return 'lead'; + if (lower.includes('scan') || lower.includes('monitor') || lower.includes('detect')) return 'scanner'; + if (lower.includes('verif') || lower.includes('critic') || lower.includes('review') || lower.includes('check')) return 'verifier'; + return 'worker'; + } + return null; // Unclassified — excluded from conversation } @@ -82,39 +82,144 @@ export function createTranscript(squad: string): Transcript { }; } -/** Serialize transcript for prompt injection. - * Compacts after 5 turns: keeps first brief + last lead review (natural summary) - * + turns since that review. The lead review already summarizes prior work, - * so it acts as a compaction point — no information lost, just compressed. +/** Max total chars for serialized transcript. Triggers aggressive compaction. */ +const MAX_TRANSCRIPT_CHARS = 20000; + +/** + * Serialize transcript for prompt injection with auto-compaction. + * + * Strategy (inspired by Claude Code's auto-compact): + * - Recent turns (current cycle): kept in full + * - Older cycles: compressed into a structured digest + * - Digest format: what was done, what was decided, what's pending + * + * This lets conversations go 20+ turns without blowing context. */ export function serializeTranscript(transcript: Transcript): string { if (transcript.turns.length === 0) return ''; - let turns = transcript.turns; - if (turns.length > 5) { - const firstBrief = turns[0]; + const turns = transcript.turns; + + // Find cycle boundaries (each lead turn after the first starts a new cycle) + const cycleBoundaries: number[] = [0]; + for (let i = 1; i < turns.length; i++) { + if (turns[i].role === 'lead' && i > 0) { + // A lead turn that follows a verifier or is the first lead after workers = new cycle + const prevRole = turns[i - 1]?.role; + if (prevRole === 'verifier' || prevRole === 'worker') { + cycleBoundaries.push(i); + } + } + } + + // If short conversation (≤5 turns or single cycle), return everything + if (turns.length <= 5 || cycleBoundaries.length <= 1) { + return formatTurns(turns, transcript.turns.length); + } + + // Split into: old cycles (digest) + current cycle (full) + const lastCycleStart = cycleBoundaries[cycleBoundaries.length - 1]; + const currentCycleTurns = turns.slice(lastCycleStart); + const oldTurns = turns.slice(0, lastCycleStart); + + // Build digest of old cycles + const digest = buildDigest(oldTurns, cycleBoundaries.slice(0, -1)); + + // Assemble + const lines = ['## Conversation So Far\n']; + + // Always preserve the initial brief (first turn) + const firstTurn = turns[0]; + lines.push(`**${firstTurn.agent} (${firstTurn.role}):**`); + lines.push(firstTurn.content); + lines.push(''); + + if (oldTurns.length > 1) { + lines.push(`*(${oldTurns.length - 1} earlier turns compacted)*\n`); + } + + if (digest) { + lines.push('### Prior Cycles (digest)'); + lines.push(digest); + lines.push(''); + } + + lines.push(`### Current Cycle (${currentCycleTurns.length} turns)\n`); + for (const turn of currentCycleTurns) { + lines.push(`**${turn.agent} (${turn.role}):**`); + lines.push(turn.content); + lines.push(''); + } + + const result = lines.join('\n'); - // Find last lead review (any lead turn after the first brief) - let lastReviewIdx = -1; - for (let i = turns.length - 1; i > 0; i--) { - if (turns[i].role === 'lead') { - lastReviewIdx = i; - break; + // Safety: if still too large, truncate from the beginning of the digest + if (result.length > MAX_TRANSCRIPT_CHARS) { + const overflow = result.length - MAX_TRANSCRIPT_CHARS; + return '*(transcript truncated — ' + overflow + ' chars removed from older cycles)*\n\n' + + result.slice(overflow); + } + + return result; +} + +/** Build a structured digest from completed cycles. */ +function buildDigest(turns: Turn[], cycleBoundaries: number[]): string { + const sections: string[] = []; + + for (let c = 0; c < cycleBoundaries.length; c++) { + const start = cycleBoundaries[c]; + const end = c + 1 < cycleBoundaries.length ? cycleBoundaries[c + 1] : turns.length; + const cycleTurns = turns.slice(start, end); + + // Extract key signals from each role + const done: string[] = []; + const pending: string[] = []; + const blocked: string[] = []; + + for (const t of cycleTurns) { + const lines = t.content.split('\n'); + for (const line of lines) { + const l = line.trim(); + // Extract PR numbers, issue numbers, key actions + if (/PR\s*#\d+|merged|MERGED/.test(l) && l.length < 200) { + done.push(l.replace(/^[-*]\s*/, '').slice(0, 100)); + } + if (/BLOCKED|blocked|needs:human/i.test(l) && l.length < 200) { + blocked.push(l.replace(/^[-*]\s*/, '').slice(0, 100)); + } + if (/## STATUS:\s*CONTINUE|Remaining:|todo|not-started/i.test(l)) { + pending.push(l.replace(/^[-*]\s*/, '').slice(0, 100)); + } } } - if (lastReviewIdx > 0) { - // First brief + last lead review + everything after it - turns = [firstBrief, ...turns.slice(lastReviewIdx)]; - } else { - // No lead review yet — keep first brief + last 3 - turns = [firstBrief, ...turns.slice(-3)]; + // Verifier verdict + const verifierTurn = cycleTurns.find(t => t.role === 'verifier'); + const verdict = verifierTurn + ? (/APPROVED|approved|lgtm/i.test(verifierTurn.content) ? 'APPROVED' : 'REJECTED') + : 'no verifier'; + + const cycleLines: string[] = [`**Cycle ${c + 1}** (${verdict}):`]; + if (done.length > 0) cycleLines.push(` Done: ${done.slice(0, 3).join('; ')}`); + if (blocked.length > 0) cycleLines.push(` Blocked: ${blocked.slice(0, 2).join('; ')}`); + if (pending.length > 0) cycleLines.push(` Pending: ${pending.slice(0, 2).join('; ')}`); + + if (done.length === 0 && blocked.length === 0 && pending.length === 0) { + cycleLines.push(` (${cycleTurns.length} turns, no key signals extracted)`); } + + sections.push(cycleLines.join('\n')); } + return sections.join('\n'); +} + +/** Format turns as markdown for transcript injection. */ +function formatTurns(turns: Turn[], totalTurns: number): string { const lines = ['## Conversation So Far\n']; - if (turns.length < transcript.turns.length) { - lines.push(`*(${transcript.turns.length - turns.length} earlier turns compacted — lead review below summarizes prior work)*\n`); + if (turns.length < totalTurns) { + lines.push(`*(${totalTurns - turns.length} earlier turns compacted)*\n`); } for (const turn of turns) { lines.push(`### ${turn.agent} (${turn.role}) — ${turn.timestamp}`); @@ -124,6 +229,9 @@ export function serializeTranscript(transcript: Transcript): string { return lines.join('\n'); } +/** Max chars per turn in transcript. Larger outputs are truncated with a note. */ +const MAX_TURN_CHARS = 8000; + export function addTurn( transcript: Transcript, agent: string, @@ -131,10 +239,15 @@ export function addTurn( content: string, estimatedCost: number, ): void { + // Budget: cap turn content to prevent context bloat + const trimmedContent = content.length > MAX_TURN_CHARS + ? content.slice(0, MAX_TURN_CHARS) + `\n\n...(truncated — ${content.length} chars total. Key outputs: check git log and gh pr list for deliverables.)` + : content; + transcript.turns.push({ agent, role, - content, + content: trimmedContent, timestamp: new Date().toISOString(), estimatedCost, }); @@ -199,7 +312,13 @@ export interface ConvergenceResult { /** * Detect if the conversation has converged. - * Continuation signals beat convergence signals (bias toward more work). + * + * Uses explicit STATUS/VERDICT markers from conversation-roles.md: + * - Lead: `## STATUS: DONE` or `## STATUS: CONTINUE` + * - Verifier: `## VERDICT: APPROVED` or `## VERDICT: REJECTED` + * - Any role: `BLOCKED: [reason]` + * + * Falls back to keyword detection for agents that don't follow the format. */ export function detectConvergence( transcript: Transcript, @@ -214,47 +333,61 @@ export function detectConvergence( return { converged: true, reason: `Cost ceiling reached ($${transcript.totalCost.toFixed(2)}/$${costCeiling})` }; } - // Check last turn content if (transcript.turns.length === 0) { return { converged: false, reason: 'No turns yet' }; } const lastTurn = transcript.turns[transcript.turns.length - 1]; const content = lastTurn.content; + + // ── Explicit markers (preferred) ────────────────────────────────── + + // Verifier verdict — strongest signal + if (/## VERDICT:\s*APPROVED/i.test(content)) { + return { converged: true, reason: 'Verifier approved' }; + } + if (/## VERDICT:\s*REJECTED/i.test(content)) { + return { converged: false, reason: 'Verifier rejected — continuing cycle' }; + } + + // Lead status — only from lead turns + if (lastTurn.role === 'lead') { + if (/## STATUS:\s*DONE/i.test(content)) { + return { converged: true, reason: 'Lead signaled completion' }; + } + if (/## STATUS:\s*CONTINUE/i.test(content)) { + return { converged: false, reason: 'Lead assigned more work' }; + } + } + + // Blocked — any role + if (/BLOCKED:/i.test(content)) { + return { converged: true, reason: 'Blocked — needs human action' }; + } + + // ── Fallback: keyword detection ─────────────────────────────────── + // For agents that don't follow the STATUS/VERDICT format + const lower = content.toLowerCase(); - // Verifier turns: check approval/rejection before generic signals if (lastTurn.role === 'verifier') { const rejected = VERIFIER_REJECTION_PHRASES.some(phrase => lower.includes(phrase)); - if (rejected) { - return { converged: false, reason: 'Verifier rejected — continuing cycle' }; - } + if (rejected) return { converged: false, reason: 'Verifier rejected (keyword)' }; const approved = VERIFIER_APPROVAL_PHRASES.some(phrase => lower.includes(phrase)); - if (approved) { - return { converged: true, reason: 'Verifier approved' }; - } + if (approved) return { converged: true, reason: 'Verifier approved (keyword)' }; } - // Lead completion: hard stop when lead signals the session is done. - // Checked before continuation phrases — lead saying "done" overrides stale - // continuation signals (e.g. "will proceed to close" shouldn't keep running). if (lastTurn.role === 'lead') { const leadDone = LEAD_COMPLETION_PHRASES.some(phrase => lower.includes(phrase)); - if (leadDone) { - return { converged: true, reason: 'Lead signaled completion' }; - } + if (leadDone) return { converged: true, reason: 'Lead signaled completion (keyword)' }; } - // Continuation signals beat generic convergence (bias toward completing work) + // Continuation beats convergence const hasContinuation = CONTINUATION_PHRASES.some(phrase => lower.includes(phrase)); - if (hasContinuation) { - return { converged: false, reason: 'Continuation signal detected' }; - } + if (hasContinuation) return { converged: false, reason: 'Continuation signal detected' }; const hasConvergence = CONVERGENCE_PHRASES.some(phrase => lower.includes(phrase)); - if (hasConvergence) { - return { converged: true, reason: 'Convergence signal detected' }; - } + if (hasConvergence) return { converged: true, reason: 'Convergence signal detected' }; return { converged: false, reason: 'No signals detected, continuing' }; } diff --git a/src/lib/env-config.ts b/src/lib/env-config.ts index 1eb63ea8..515c06e4 100644 --- a/src/lib/env-config.ts +++ b/src/lib/env-config.ts @@ -32,6 +32,8 @@ export interface EnvironmentConfig { export interface SquadsConfig { current: string; environments: Record; + /** User email — captured opt-in during `squads init` for founder outreach */ + email?: string; } // --------------------------------------------------------------------------- @@ -142,3 +144,20 @@ export function getBridgeUrl(): string { export function getConsoleUrl(): string { return getEnv().console_url; } + +/** + * Persist the user's email address in ~/.squads/config.json. + * Used for opt-in founder outreach captured during `squads init`. + */ +export function saveEmail(email: string): void { + const config = loadConfig(); + config.email = email; + saveConfig(config); +} + +/** + * Retrieve the stored user email, if any. + */ +export function getEmail(): string | undefined { + return loadConfig().email; +} diff --git a/src/lib/execution-engine.ts b/src/lib/execution-engine.ts index 547b19e8..415a91bc 100644 --- a/src/lib/execution-engine.ts +++ b/src/lib/execution-engine.ts @@ -4,10 +4,17 @@ */ import { spawn, execSync } from 'child_process'; -import { join } from 'path'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; import { existsSync, readFileSync, writeFileSync, mkdirSync, cpSync, unlinkSync } from 'fs'; +import { homedir } from 'os'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); import { loadSquad, + findSquadsDir, + findProjectRoot, type EffortLevel, type Squad, } from './squad-parser.js'; @@ -50,6 +57,34 @@ export const VERIFICATION_EXEC_TIMEOUT_MS = 30000; export const LOG_FILE_INIT_DELAY_MS = 500; export const VERBOSE_COMMAND_MAX_CHARS = 50; +// ── Guardrail settings ──────────────────────────────────────────────── + +/** + * Resolve the path to a guardrail settings JSON file for --settings injection. + * + * Resolution order: + * 1. `.claude/guardrail.json` in the project root (user-provided override) + * 2. Bundled default: `templates/guardrail.json` alongside the squads-cli package + * + * Returns undefined when neither exists (no guardrail applied). + */ +export function resolveGuardrailSettings(projectRoot: string): string | undefined { + // 1. Project-level override + const projectGuardrail = join(projectRoot, '.claude', 'guardrail.json'); + if (existsSync(projectGuardrail)) return projectGuardrail; + + // 2. Bundled default (dist/lib/ → dist/templates/ in compiled output; + // src/lib/ → templates/ in source tree) + const bundledGuardrail = join(__dirname, '..', '..', 'templates', 'guardrail.json'); + if (existsSync(bundledGuardrail)) return bundledGuardrail; + + // Also check one level up (when running from dist/lib/) + const bundledGuardrailAlt = join(__dirname, '..', 'templates', 'guardrail.json'); + if (existsSync(bundledGuardrailAlt)) return bundledGuardrailAlt; + + return undefined; +} + // ── Interfaces ──────────────────────────────────────────────────────── export interface ExecuteWithClaudeOptions { @@ -181,9 +216,11 @@ export async function verifyExecution( recentCommits = '(no commits found)'; } - const verifyPrompt = `You are verifying whether an agent completed its task successfully. + // Load verification protocol from markdown + const verifyProtocolPath = join(findProjectRoot() || '', '.agents', 'config', 'verification.md'); + const verifyProtocol = existsSync(verifyProtocolPath) ? readFileSync(verifyProtocolPath, 'utf-8') : 'Respond: PASS: reason or FAIL: reason'; -Agent: ${squadName}/${agentName} + const verifyPrompt = `Agent: ${squadName}/${agentName} ## Acceptance Criteria ${criteria} @@ -196,12 +233,7 @@ ${stateContent || '(empty or not found)'} ### Recent Git Commits ${recentCommits} -## Instructions -Evaluate whether the acceptance criteria are met based on the evidence. -Respond with EXACTLY one line: -PASS: -or -FAIL: `; +${verifyProtocol}`; try { const escapedPrompt = verifyPrompt.replace(/'/g, "'\\''"); @@ -302,6 +334,11 @@ export function buildAgentEnv( // not the user's personal gh auth. This enables founder to review/approve. if (options?.ghToken) env.GH_TOKEN = options.ghToken; + // Inject per-squad GCP credential if available + // Agents get GOOGLE_APPLICATION_CREDENTIALS pointing to their squad's service account key + const credPath = join(homedir(), '.squads', 'secrets', `${execContext.squad}-sa-key.json`); + if (existsSync(credPath)) env.GOOGLE_APPLICATION_CREDENTIALS = credPath; + if (options?.includeOtel) { env.OTEL_RESOURCE_ATTRIBUTES = `squads.squad=${execContext.squad},squads.agent=${execContext.agent},squads.task_type=${execContext.taskType},squads.trigger=${execContext.trigger},squads.execution_id=${execContext.executionId}`; } @@ -614,10 +651,32 @@ export async function executeWithClaude( ensureProjectTrusted(projectRoot); // Resolve model and provider + // Priority: 1) CLI --model flag 2) agent frontmatter model: 3) SQUAD.md model routing const squad = squadName !== 'unknown' ? loadSquad(squadName) : null; const mcpConfigPath = selectMcpConfig(squadName, squad); + + // Merge CLI --skills flag with SQUAD.md context.skills + const squadSkills = squad?.context?.skills || []; + const mergedSkills = [...new Set([...(skills || []), ...squadSkills])]; const taskType = detectTaskType(agentName); - const resolvedModel = resolveModel(model, squad, taskType); + + // Read agent frontmatter model if no explicit CLI flag + let effectiveModel = model; + if (!effectiveModel) { + const squadsDir = findSquadsDir(); + if (squadsDir) { + const agentPath = join(squadsDir, squadName, `${agentName}.md`); + if (existsSync(agentPath)) { + const content = readFileSync(agentPath, 'utf-8'); + const modelMatch = content.match(/^model:\s*["']?([^"'\n]+)["']?/m); + if (modelMatch) { + effectiveModel = modelMatch[1].trim(); + } + } + } + } + + const resolvedModel = resolveModel(effectiveModel, squad, taskType); const provider = resolvedModel ? detectProviderFromModel(resolvedModel) : 'anthropic'; // Resolve target repo for worktree creation (squad.repo → sibling dir) @@ -664,7 +723,7 @@ export async function executeWithClaude( if (verbose) { logVerboseExecution({ projectRoot, mode: 'foreground', useApi, execContext, - effort, skills, resolvedModel, claudeModelAlias, explicitModel: model, + effort, skills: mergedSkills, resolvedModel, claudeModelAlias, explicitModel: model, }); } @@ -684,6 +743,8 @@ export async function executeWithClaude( 'Bash(git:*)', 'Bash(gh:*)', 'Bash(npm:*)', 'Bash(npx:*)', 'Bash(node:*)', 'Bash(python3:*)', 'Bash(curl:*)', 'Bash(docker:*)', 'Bash(duckdb:*)', + 'Bash(bq:*)', 'Bash(gcloud:*)', + 'Bash(gws:*)', 'Bash(stripe:*)', 'Bash(ls:*)', 'Bash(mkdir:*)', 'Bash(cp:*)', 'Bash(mv:*)', 'Bash(cat:*)', 'Bash(head:*)', 'Bash(tail:*)', 'Bash(wc:*)', 'Bash(echo:*)', 'Bash(chmod:*)', 'Bash(date:*)', @@ -693,11 +754,14 @@ export async function executeWithClaude( } claudeArgs.push('--disable-slash-commands'); if (mcpConfigPath) claudeArgs.push('--mcp-config', mcpConfigPath); + // Inject guardrail PreToolUse hooks so spawned sessions inherit destructive-command guards + const guardrailPath = resolveGuardrailSettings(targetRepoRoot); + if (guardrailPath) claudeArgs.push('--settings', guardrailPath); if (claudeModelAlias) claudeArgs.push('--model', claudeModelAlias); claudeArgs.push('--', prompt); const agentEnv = buildAgentEnv(spawnEnv as Record, execContext, { - effort, skills, includeOtel: true, ghToken: botGhToken, + effort, skills: mergedSkills, includeOtel: true, ghToken: botGhToken, }); return executeForeground({ @@ -710,7 +774,7 @@ export async function executeWithClaude( const timestamp = Date.now(); const { logFile, pidFile } = prepareLogFiles(projectRoot, squadName, agentName, timestamp); const agentEnv = buildAgentEnv(spawnEnv as Record, execContext, { - effort, skills, includeOtel: !runInWatch, ghToken: botGhToken, + effort, skills: mergedSkills, includeOtel: !runInWatch, ghToken: botGhToken, }); const wrapperScript = buildDetachedShellScript({ @@ -733,7 +797,7 @@ export async function executeWithClaude( if (verbose) { logVerboseExecution({ projectRoot, mode: 'background', useApi, execContext, - effort, skills, resolvedModel, claudeModelAlias, + effort, skills: mergedSkills, resolvedModel, claudeModelAlias, explicitModel: model, logFile, mcpConfigPath, }); } diff --git a/src/lib/run-context.ts b/src/lib/run-context.ts index 9248aaf0..24a1d518 100644 --- a/src/lib/run-context.ts +++ b/src/lib/run-context.ts @@ -18,7 +18,7 @@ */ import { join, dirname } from 'path'; -import { existsSync, readFileSync, readdirSync } from 'fs'; +import { existsSync, readFileSync, readdirSync, statSync } from 'fs'; import { execSync } from 'child_process'; import { findSquadsDir } from './squad-parser.js'; import { findMemoryDir } from './memory.js'; @@ -464,22 +464,25 @@ export function gatherSquadContext( return true; } - // ── L1: company.md — Why (company identity, alignment) ── - const companyContext = loadCompanyContext(); - if (companyContext) { - addLayer(1, 'Company', stripYamlFrontmatter(companyContext)); - } + // ═══════════════════════════════════════════════════════════════════ + // Context injection order: ACTION-FIRST, REFERENCE-LAST + // + // LLMs pay most attention to the beginning and end of context. + // Put what the agent should ACT ON first (feedback, goals, state). + // Put reference material last (company, agent definition). + // ═══════════════════════════════════════════════════════════════════ - // ── L2: priorities.md — Where (current focus, urgency) ── + // ── L6: feedback.md — ACT ON THIS (corrections from last cycle) ── + // Injected FIRST so agents address feedback before anything else. if (memoryDir) { - const prioritiesFile = join(memoryDir, squadName, 'priorities.md'); - const content = safeRead(prioritiesFile); + const feedbackFile = join(memoryDir, squadName, 'feedback.md'); + const content = safeRead(feedbackFile); if (content) { - addLayer(2, 'Priorities', stripYamlFrontmatter(content)); + addLayer(6, 'Feedback (act on this first)', content); } } - // ── L3: goals.md — What (measurable targets) ── + // ── L3: goals.md — What to achieve this cycle ── if (memoryDir) { const goalsFile = join(memoryDir, squadName, 'goals.md'); const content = safeRead(goalsFile); @@ -488,38 +491,49 @@ export function gatherSquadContext( } } - // ── L4: agent.md — You (agent role, instructions) ── - if (options.agentPath) { - const agentContent = safeRead(options.agentPath); - if (agentContent) { - // Strip YAML frontmatter — inject the markdown body only - const body = stripYamlFrontmatter(agentContent); - addLayer(4, `Agent: ${agentName}`, body); - } - } - - // ── L5: state.md — Memory (continuity from last run) ── + // ── L5: state.md — Where we left off ── if (memoryDir) { const stateFile = join(memoryDir, squadName, agentName, 'state.md'); const content = safeRead(stateFile); if (content) { - // Strip frontmatter — LLM gets the body (Current/Blockers/Carry Forward) const body = stripYamlFrontmatter(content); const stateCap = (role === 'scanner' || role === 'verifier') ? 2000 : undefined; - addLayer(5, 'Previous State', body, stateCap); + // Add staleness caveat (#721) so agents know if their memory is outdated + let staleNote = ''; + try { + const mtime = statSync(stateFile).mtimeMs; + const daysAgo = Math.round((Date.now() - mtime) / 86400000); + if (daysAgo > 0) staleNote = `*(Last updated ${daysAgo} day${daysAgo > 1 ? 's' : ''} ago — verify before relying on this)*\n\n`; + } catch { /* */ } + addLayer(5, 'Previous State', staleNote + body, stateCap); } } - // ── L6: feedback.md — Supporting (squad-level feedback) ── + // ── L2: priorities.md — Where to focus ── if (memoryDir) { - const feedbackFile = join(memoryDir, squadName, 'feedback.md'); - const content = safeRead(feedbackFile); + const prioritiesFile = join(memoryDir, squadName, 'priorities.md'); + const content = safeRead(prioritiesFile); if (content) { - addLayer(6, 'Feedback', content); + addLayer(2, 'Priorities', stripYamlFrontmatter(content)); } } - // ── L7: Daily briefing — Supporting (org pulse, leads+coo only) ── + // ── L4: agent.md — Your role and instructions ── + if (options.agentPath) { + const agentContent = safeRead(options.agentPath); + if (agentContent) { + const body = stripYamlFrontmatter(agentContent); + addLayer(4, `Agent: ${agentName}`, body); + } + } + + // ── L1: company.md — Who we are (reference) ── + const companyContext = loadCompanyContext(); + if (companyContext) { + addLayer(1, 'Company', stripYamlFrontmatter(companyContext)); + } + + // ── L7: Daily briefing — Org pulse (leads+coo only, reference) ── if (memoryDir) { const dailyFile = join(memoryDir, 'daily-briefing.md'); const content = safeRead(dailyFile); @@ -528,7 +542,7 @@ export function gatherSquadContext( } } - // ── L8: Cross-squad learnings — Supporting (from context_from agents) ── + // ── L8: Cross-squad learnings (leads+coo only, reference) ── if (memoryDir) { const frontmatter = options.agentPath ? parseAgentFrontmatter(options.agentPath) : {}; const contextSquads = frontmatter.context_from || []; diff --git a/src/lib/run-modes.ts b/src/lib/run-modes.ts index 5a063ff1..fa68475d 100644 --- a/src/lib/run-modes.ts +++ b/src/lib/run-modes.ts @@ -26,6 +26,7 @@ import { import { runAgent } from './agent-runner.js'; import { findSquadsDir, + findProjectRoot, loadSquad, } from './squad-parser.js'; import { @@ -114,70 +115,10 @@ export async function runPostEvaluation( writeLine(); writeLine(` ${gradient('eval')} ${colors.dim}COO evaluating: ${squadList}${RESET}`); - const evalTask = `Post-run evaluation for: ${squadList}. - -## Evaluation Process - -For each squad (${squadList}): - -### 1. Read previous feedback FIRST -Read \`.agents/memory/{squad}/feedback.md\` if it exists. Note the previous grade, identified patterns, and priorities. This is your baseline — you are measuring CHANGE, not just current state. - -### 2. Gather current evidence -- PRs (last 7 days): \`gh pr list --state all --limit 20 --json number,title,state,mergedAt,createdAt\` -- Recent commits (last 7 days): \`gh api repos/{owner}/{repo}/commits?since=YYYY-MM-DDT00:00:00Z&per_page=20 --jq '.[].commit.message'\` -- Open issues: \`gh issue list --state open --limit 15 --json number,title,labels\` -- Read \`.agents/memory/{squad}/priorities.md\` and \`.agents/memory/company/directives.md\` -- Read \`.agents/memory/{squad}/active-work.md\` (previous cycle's work tracking) - -### 3. Write feedback.md (APPEND history, don't overwrite) -\`\`\`markdown -# Feedback — {squad} - -## Current Assessment (YYYY-MM-DD): [A-F] -Merge rate: X% | Noise ratio: Y% | Priority alignment: Z% - -## Trajectory: [improving | stable | declining | new] -Previous grade: [grade] → Current: [grade]. [1-line explanation of why] - -## Valuable (continue) -- [specific PR/issue that advanced priorities] - -## Noise (stop) -- [specific anti-pattern observed] - -## Next Cycle Priorities -1. [specific actionable item] - -## History -| Date | Grade | Key Signal | -|------|-------|------------| -| YYYY-MM-DD | X | [what drove this grade] | -[keep last 10 entries, append new row] -\`\`\` - -### 4. Write active-work.md -\`\`\`markdown -# Active Work — {squad} (YYYY-MM-DD) -## Continue (open PRs) -- #{number}: {title} — {status/next action} -## Backlog (assigned issues) -- #{number}: {title} — {priority} -## Do NOT Create -- {description of known duplicate patterns from feedback history} -\`\`\` - -### 5. Commit to hq main -${squadsRun.length > 1 ? ` -### 6. Cross-squad assessment -Evaluate how outputs from ${squadList} connect: -- Duplicated efforts across squads? -- Missing handoffs (one squad's output should feed another)? -- Coordination gaps (conflicting PRs, redundant issues)? -- Combined trajectory: is the org getting more effective or more noisy? -Write cross-squad findings to \`.agents/memory/company/cross-squad-review.md\`. -` : ''} -CRITICAL: You are measuring DIRECTION not just position. A C-grade squad improving from F is better than a B-grade squad declining from A. The history table IS the feedback loop — agents read it next cycle.`; + // Load evaluation protocol from markdown (single source of truth) + const evalProtocolPath = join(findProjectRoot() || '', '.agents', 'config', 'coo-evaluation.md'); + const evalProtocol = existsSync(evalProtocolPath) ? readFileSync(evalProtocolPath, 'utf-8') : ''; + const evalTask = `Post-run evaluation for: ${squadList}.\n\n${evalProtocol}`; await runAgent('company-lead', cooPath, 'company', { ...options, @@ -641,6 +582,10 @@ export async function runLeadMode( const agentList = agentFiles.map(a => `- ${a.name}: ${a.role}`).join('\n'); const agentPaths = agentFiles.map(a => `- ${a.name}: ${a.path}`).join('\n'); + // Load lead mode protocol from markdown + const leadProtocolPath = join(findProjectRoot() || '', '.agents', 'config', 'lead-mode.md'); + const leadProtocol = existsSync(leadProtocolPath) ? readFileSync(leadProtocolPath, 'utf-8') : ''; + const prompt = `You are the Lead of the ${squad.name} squad. ## Mission @@ -652,45 +597,7 @@ ${agentList} ## Agent Definition Files ${agentPaths} -## Your Role as Lead - -1. **Assess the situation**: Check for pending work: - - Run \`gh issue list --repo {org}/hq --label squad:${squad.name}\` for assigned issues - - Check .agents/memory/${squad.dir}/ for squad state and pending tasks - - Review recent activity with \`git log --oneline -10\` - -2. **Delegate work using Task tool**: For each piece of work: - - Use the Task tool with subagent_type="general-purpose" - - Include the agent definition file path in the prompt - - Spawn multiple Task agents IN PARALLEL when work is independent - - Example: "Read ${agentFiles[0]?.path || 'agent.md'} and execute its instructions for [specific task]" - -3. **Coordinate parallel execution**: - - Independent tasks → spawn Task agents in parallel (single message, multiple tool calls) - - Dependent tasks → run sequentially - - Monitor progress and handle failures - -4. **Report and update memory**: - - Update .agents/memory/${squad.dir}/state.md with completed work - - Log learnings to learnings.md - - Create issues for follow-up work if needed - -## Time Budget -You have ${timeoutMins} minutes. Prioritize high-impact work. - -## Critical Instructions -- Use Task tool for delegation, NOT direct execution of agent work -- Spawn parallel Task agents when work is independent -- When done, type /exit to end the session -- Do NOT wait for user input - work autonomously - -## Async Mode (CRITICAL) -This is ASYNC execution - Task agents must be fully autonomous: -- **Findings** → Create GitHub issues (gh issue create) -- **Code changes** → Create PRs (gh pr create) -- **Analysis results** → Write to .agents/outputs/ or memory files -- **NEVER wait for human review** - complete the work and move on -- **NEVER ask clarifying questions** - make reasonable decisions +${leadProtocol} Instruct each Task agent: "Work autonomously. Output findings to GitHub issues. Output code changes as PRs. Do not wait for review." diff --git a/src/lib/run-types.ts b/src/lib/run-types.ts index 06f49c12..77dc6a27 100644 --- a/src/lib/run-types.ts +++ b/src/lib/run-types.ts @@ -43,6 +43,9 @@ export interface RunOptions { phased?: boolean; // Autopilot: use dependency-based phase ordering eval?: boolean; // Post-run COO evaluation (default: true, --no-eval to skip) org?: boolean; // Org cycle: scan → plan → execute all leads → report + force?: boolean; // Force re-run squads that already completed today + resume?: boolean; // Resume org cycle from quota-skipped squads + focus?: string; // Cycle focus: create, resolve, review, ship, research, cost } /** diff --git a/src/lib/squad-parser.ts b/src/lib/squad-parser.ts index 2d9112b7..55de0718 100644 --- a/src/lib/squad-parser.ts +++ b/src/lib/squad-parser.ts @@ -1,5 +1,6 @@ import { readFileSync, existsSync, readdirSync, writeFileSync } from 'fs'; import { join, basename, dirname } from 'path'; +import { spawnSync } from 'child_process'; import matter from 'gray-matter'; import { resolveMcpConfig, type McpResolution } from './mcp-config.js'; @@ -64,6 +65,8 @@ export interface SquadFrontmatter { providers?: SquadProviders; /** Squad names this squad must wait for before executing (phase ordering) */ depends_on?: string[]; + /** Agents that participate in conversations. Others run on schedules. */ + conversation_agents?: string[]; } export interface Agent { @@ -138,6 +141,8 @@ export interface Squad { context?: SquadContext; // Frontmatter context block repo?: string; stack?: string; + /** Agents that participate in squad conversations. Others run on schedules. */ + conversation_agents?: string[]; /** Multi-LLM provider configuration */ providers?: SquadProviders; /** Domain this squad operates in */ @@ -175,23 +180,77 @@ export interface ExecutionContext extends SquadContext { } /** - * Find the .agents/squads directory by searching current directory and parents. - * Searches up to 5 parent directories. - * @returns Path to squads directory or null if not found + * Run `git rev-parse` with a given flag in a given directory. + * Returns stdout trimmed, or null on error. */ -export function findSquadsDir(): string | null { - // Look for .agents/squads in current directory or parent directories - let dir = process.cwd(); +function gitRevParse(flag: string, cwd: string): string | null { + const result = spawnSync('git', ['rev-parse', flag], { + cwd, + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'ignore'], + }); + if (result.status !== 0 || !result.stdout) return null; + return result.stdout.trim(); +} - for (let i = 0; i < 5; i++) { +/** + * Walk up from `startDir` (up to `maxLevels` times) looking for .agents/squads. + * Returns the squads path on first hit, or null. + */ +function walkUpForSquadsDir(startDir: string, maxLevels: number): string | null { + let dir = startDir; + for (let i = 0; i < maxLevels; i++) { const squadsPath = join(dir, '.agents', 'squads'); - if (existsSync(squadsPath)) { - return squadsPath; - } + if (existsSync(squadsPath)) return squadsPath; const parent = join(dir, '..'); if (parent === dir) break; dir = parent; } + return null; +} + +/** + * Find the .agents/squads directory by searching current directory and parents. + * + * Search order: + * 1. Walk up to 5 levels from process.cwd() (handles normal project layouts). + * 2. If that fails and we are inside a git worktree or subdirectory, get the + * git toplevel via `git rev-parse --show-toplevel` and walk up from there. + * 3. Also check the parent of the git toplevel so that sibling layouts like + * `agents-squads/hq/` are found when CWD is `agents-squads/.worktrees/xxx/`. + * + * @returns Path to squads directory or null if not found + */ +export function findSquadsDir(): string | null { + const cwd = process.cwd(); + + // 1. Standard ancestor walk from CWD. + const fromCwd = walkUpForSquadsDir(cwd, 5); + if (fromCwd) return fromCwd; + + // 2. Git-aware fallback: get the worktree's toplevel checkout directory. + const gitToplevel = gitRevParse('--show-toplevel', cwd); + if (gitToplevel && gitToplevel !== cwd) { + // Walk up from the git toplevel (handles CWD being deep inside a worktree). + const fromGitRoot = walkUpForSquadsDir(gitToplevel, 5); + if (fromGitRoot) return fromGitRoot; + } + + // 3. For git worktrees the common .git dir lives in the main repo. Use + // --git-common-dir to find the main repo root and look for siblings. + const gitCommonDir = gitRevParse('--git-common-dir', cwd); + if (gitCommonDir) { + // --git-common-dir returns the path to the common .git dir. + // Its parent is the main repo root; walk up from there. + const mainRepoRoot = join(gitCommonDir, '..'); + const fromMainRoot = walkUpForSquadsDir(mainRepoRoot, 5); + if (fromMainRoot) return fromMainRoot; + + // Also check siblings of the main repo root (e.g. hq/ lives next to squads-cli/). + const orgRoot = join(mainRepoRoot, '..'); + const fromOrgRoot = walkUpForSquadsDir(orgRoot, 3); + if (fromOrgRoot) return fromOrgRoot; + } return null; } @@ -354,6 +413,7 @@ export function parseSquadFile(filePath: string): Squad { context: fm.context, repo: fm.repo, stack: fm.stack, + conversation_agents: Array.isArray(fm.conversation_agents) ? fm.conversation_agents : undefined, providers: fm.providers, // Preserve raw frontmatter for KPIs and other custom fields frontmatter: frontmatter as Record, diff --git a/src/lib/telemetry.ts b/src/lib/telemetry.ts index da21ebb6..50c9a46b 100644 --- a/src/lib/telemetry.ts +++ b/src/lib/telemetry.ts @@ -310,6 +310,7 @@ export const Events = { // Commands CLI_RUN: 'cli.run', + CLI_RUN_COMPLETE: 'cli.run.complete', CLI_STATUS: 'cli.status', CLI_DASHBOARD: 'cli.dashboard', CLI_WORKERS: 'cli.workers', @@ -317,6 +318,7 @@ export const Events = { CLI_CONTEXT: 'cli.context', CLI_COST: 'cli.cost', CLI_EXEC: 'cli.exec', + CLI_LOG: 'cli.log', CLI_BASELINE: 'cli.baseline', // Goals @@ -359,6 +361,9 @@ export const Events = { // Cycle Sync CLI_SYNC_CYCLE: 'cli.sync.cycle', + // User outreach + CLI_EMAIL_CAPTURED: 'cli.email_captured', + // Context Condenser CONDENSER_COMPRESS: 'condenser.compress', CONDENSER_DEDUPE: 'condenser.dedupe', diff --git a/src/lib/workflow.ts b/src/lib/workflow.ts index 14148141..a55d4aaa 100644 --- a/src/lib/workflow.ts +++ b/src/lib/workflow.ts @@ -1,15 +1,21 @@ /** - * Squad Conversation Workflow — Orchestrates multi-agent conversations. + * Squad Workflow — Plan → Execute → Review → Verify * - * Lead briefs → scanners discover → workers execute → lead reviews → - * loop until convergence or budget exhausted. + * Architecture: + * 1. PLAN: Lead sees goals + feedback + budget → produces task assignments + * 2. EXECUTE: Workers run independently in parallel, each with their task + * 3. REVIEW: Lead evaluates worker output, merges PRs, updates goals + * 4. VERIFY: Verifier checks deliverables against quality gate * - * CLI manages turns (deterministic), lead manages content (creative). + * Workers don't share a conversation — they get their task + squad context. + * Token budget replaces turn limits. Lead plans within the budget. */ import { join } from 'path'; -import { existsSync, writeFileSync, mkdirSync } from 'fs';; -import { execSync, exec } from 'child_process';; +import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs'; +import { dirname } from 'path'; +import { spawn } from 'child_process'; +import { homedir } from 'os'; import { type AgentRole, @@ -30,200 +36,178 @@ import { type ContextRole, gatherSquadContext, } from './run-context.js'; +import { + buildAgentEnv, + resolveGuardrailSettings, +} from './execution-engine.js'; +import { type ExecutionContext } from './run-types.js'; +import { getBotGhEnv } from './github.js'; +import { generateExecutionId, getClaudeModelAlias } from './run-utils.js'; +import { colors, RESET, writeLine, bold } from './terminal.js'; +import { + logObservability, + snapshotGoals, + diffGoals, + type ObservabilityRecord, +} from './observability.js'; // ============================================================================= // Configuration // ============================================================================= +export type CycleFocus = 'create' | 'resolve' | 'review' | 'ship' | 'research' | 'cost'; + export interface ConversationOptions { - /** Override lead's briefing with a founder directive */ task?: string; - /** Maximum turns before stopping (default: 20) */ maxTurns?: number; - /** Cost ceiling in USD (default: 25) */ costCeiling?: number; - /** Verbose logging */ verbose?: boolean; - /** Model override for all agents */ model?: string; + /** Token budget for the squad (output tokens). Default: 50K */ + tokenBudget?: number; + /** Cycle focus — changes the lead's planning behavior */ + focus?: CycleFocus; +} + +/** Load focus instructions from .agents/config/cycle-focus.md */ +function loadFocusPrompt(focus: CycleFocus): string { + const squadsDir = findSquadsDir(); + if (!squadsDir) return ''; + const focusPath = join(squadsDir, '..', 'config', 'cycle-focus.md'); + if (!existsSync(focusPath)) return ''; + const content = readFileSync(focusPath, 'utf-8'); + if (!content) return ''; + const match = content.match(new RegExp(`## ${focus}\\n([\\s\\S]*?)(?=\\n## |$)`)); + return match ? match[1].trim() : ''; } -const DEFAULT_MAX_TURNS = 20; +/** Default output token budget per squad. Lead should plan within this. */ +const DEFAULT_TOKEN_BUDGET = 50000; const DEFAULT_COST_CEILING = 25; // ============================================================================= -// Agent Turn Execution +// Agent Execution (independent, tool-capable) // ============================================================================= -interface AgentTurnConfig { +interface AgentRunConfig { agentName: string; agentPath: string; role: AgentRole; squadName: string; model: string; - transcript: Transcript; - task?: string; - /** Working directory for the agent process (defaults to process.cwd()) */ - cwd?: string; + /** The specific task for this agent (from lead's plan) */ + task: string; + /** Full squad context (goals, feedback, priorities, etc.) */ + squadContext: string; + cwd: string; } /** - * Execute a single agent turn via `claude --print`. - * Returns the agent's text output. + * Run a single agent independently via `claude --print --allowedTools`. + * Agent gets: their task + squad context. No shared transcript. */ -function executeAgentTurn(config: AgentTurnConfig): string { - const { agentName, agentPath, role, squadName, model: _model, transcript, task } = config; - - // Build the prompt: agent definition + squad context + transcript context + role instructions - const transcriptContext = serializeTranscript(transcript); - - // Inject role-based squad context (priorities, feedback, active work, etc.) - const contextRole: ContextRole = agentName.includes('company-lead') ? 'coo' : (role as ContextRole); - const squadContext = gatherSquadContext(squadName, agentName, { - agentPath, role: contextRole - }); - - let roleInstructions: string; - switch (role) { - case 'lead': - if (transcript.turns.length === 0 && task) { - // First turn with founder directive — replaces lead briefing - roleInstructions = `## Founder Directive\n\n${task}\n\nBrief the team on this directive. Set priorities and assign work.`; - } else if (transcript.turns.length === 0) { - roleInstructions = `## Your Role: Lead\n\nYou are starting a new squad session. Brief the team:\n1. Review open issues and PRs\n2. Set priorities for this session\n3. Assign work to workers\n4. Be specific about what each worker should do`; - } else { - roleInstructions = `## Your Role: Lead (Review)\n\nReview the work done so far. Either:\n- Request specific changes from workers\n- Approve and signal completion if quality is sufficient\n- Merge PRs using \`gh pr merge --squash --delete-branch --auto\` (waits for required checks)`; - } - break; - case 'scanner': - roleInstructions = `## Your Role: Scanner\n\nScan for issues, gaps, and opportunities. Report findings concisely. Do NOT fix anything — just discover and report.`; - break; - case 'worker': - roleInstructions = `## Your Role: Worker\n\nExecute the work assigned by the lead. Create branches, write code, open PRs to develop. Be focused and efficient.`; - break; - case 'verifier': - roleInstructions = `## Your Role: Verifier\n\nVerify that work meets quality standards. Check PRs, run tests, validate output. Report pass/fail with specifics.`; - break; - } +async function runIndependentAgent(config: AgentRunConfig): Promise { + const { agentName, agentPath, role, squadName, task, squadContext } = config; const prompt = `You are ${agentName} (${role}) in squad ${squadName}. Read your full agent definition at ${agentPath} and follow its instructions. -${roleInstructions} +## Your Task + +${task} + ${squadContext} -${transcriptContext} -IMPORTANT: -- Be concise. Your output becomes part of a shared transcript. -- Reference specific issue numbers, PR numbers, and file paths. -- If you create a PR, include the PR number in your output. -- If there's nothing to do, say "Nothing to do" clearly. -- When done, summarize what you did in 2-3 sentences.`; +## Output Requirements + +- Commit your work (git add, commit, push) +- Open PRs targeting develop (product repos) or push to main (domain repos) +- Run the build before pushing — fix if it fails +- Report: branch name, PR number, build status, what you changed +- End with: ## STATUS: DONE or ## STATUS: BLOCKED [reason]`; - // Resolve model: CLI override > role default const resolvedModel = config.model || modelForRole(role); + const claudeModel = getClaudeModelAlias(resolvedModel) || resolvedModel; - // Execute via claude --print (captures output) - // Strip CLAUDECODE and ANTHROPIC_API_KEY so child process uses Max subscription const { CLAUDECODE: _cc, ANTHROPIC_API_KEY: _ak, ...cleanEnv } = process.env; - const escapedPrompt = prompt.replace(/'/g, "'\\''"); + let botGhToken: string | undefined; try { - const output = execSync( - `claude --print --dangerously-skip-permissions --model ${resolvedModel} -- '${escapedPrompt}'`, - { - cwd: config.cwd || process.cwd(), - timeout: 15 * 60 * 1000, // 15 min per turn - maxBuffer: 10 * 1024 * 1024, // 10MB - encoding: 'utf-8', - env: cleanEnv, - } - ); - return output.trim(); - } catch (err: unknown) { - const error = err as { stdout?: string; stderr?: string; message?: string }; - // If the command produced output before failing, use it - if (error.stdout && error.stdout.trim().length > 0) { - return error.stdout.trim(); - } - return `[ERROR] Agent ${agentName} failed: ${error.message || 'unknown error'}`; - } -} + const ghEnv = await getBotGhEnv(); + botGhToken = ghEnv.GH_TOKEN; + } catch { /* falls back to user auth */ } + + const execContext: ExecutionContext = { + squad: squadName, agent: agentName, + taskType: role === 'lead' ? 'lead' : role === 'scanner' ? 'research' : role === 'verifier' ? 'evaluation' : 'execution', + trigger: 'scheduled', executionId: generateExecutionId(), + }; -/** - * Async version of executeAgentTurn for parallel execution. - * Same logic, but returns a Promise instead of blocking. - */ -function executeAgentTurnAsync(config: AgentTurnConfig): Promise { - const { agentName, agentPath, role, squadName, model: _model, transcript, task } = config; - - let roleInstructions = ''; - switch (role) { - case 'lead': - roleInstructions = task - ? `FOUNDER DIRECTIVE: ${task}\n\nBrief the team on this directive. Assign specific tasks to scanners and workers.` - : 'Review the conversation so far. Assess worker output. Direct next actions or declare convergence.'; - break; - case 'scanner': - roleInstructions = 'Scan for issues, data, or signals relevant to the lead\'s brief. Report findings concisely.'; - break; - case 'worker': - roleInstructions = 'Execute the specific task assigned by the lead. Produce concrete output (PRs, issues, content, analysis).'; - break; - case 'verifier': - roleInstructions = 'Verify the worker\'s output meets quality standards. Check for errors, omissions, and alignment with goals.'; - break; - } + // Effort level per role (#702): scanners low, workers high, verifiers medium + const effortByRole: Record = { lead: 'high', scanner: 'low', worker: 'high', verifier: 'medium' }; + const agentEnv = buildAgentEnv(cleanEnv as Record, execContext, { ghToken: botGhToken, effort: effortByRole[role] as 'high' | 'medium' | 'low' }); - const transcriptContext = transcript.turns.length > 0 - ? `\n== CONVERSATION SO FAR ==\n${serializeTranscript(transcript)}\n== END CONVERSATION ==` - : ''; + // Role-based tool sets (#701): scanners get read-only, workers get full, verifiers get read+build + const readTools = ['Read', 'Glob', 'Grep', 'Bash(git:*)', 'Bash(gh:*)', 'Bash(ls:*)', 'Bash(cat:*)', 'Bash(head:*)', 'Bash(tail:*)', 'Bash(wc:*)', 'Bash(date:*)', 'Bash(curl:*)', 'WebFetch', 'WebSearch']; + const writeTools = ['Write', 'Edit', 'Bash(npm:*)', 'Bash(npx:*)', 'Bash(node:*)', 'Bash(python3:*)', 'Bash(docker:*)', 'Bash(duckdb:*)', 'Bash(bq:*)', 'Bash(gcloud:*)', 'Bash(gws:*)', 'Bash(stripe:*)', 'Bash(mkdir:*)', 'Bash(cp:*)', 'Bash(mv:*)', 'Bash(echo:*)', 'Bash(chmod:*)', 'Bash(squads:*)', 'Agent']; + const buildTools = ['Bash(npm:*)', 'Bash(npx:*)', 'Bash(node:*)']; - const resolvedModel = config.model || modelForRole(role); - const prompt = `You are ${agentName} (${role}) in squad ${squadName}. + const toolsByRole: Record = { + lead: [...readTools, ...writeTools], + scanner: readTools, + worker: [...readTools, ...writeTools], + verifier: [...readTools, ...buildTools], + }; -Read your full agent definition at ${agentPath} and follow its instructions. + const claudeArgs: string[] = ['--print']; + if (process.env.SQUADS_SKIP_PERMISSIONS === '1') { + claudeArgs.push('--dangerously-skip-permissions'); + } else { + const tools = toolsByRole[role] || [...readTools, ...writeTools]; + claudeArgs.push('--allowedTools', ...tools); + } + claudeArgs.push('--disable-slash-commands'); + const guardrailPath = resolveGuardrailSettings(config.cwd); + if (guardrailPath) claudeArgs.push('--settings', guardrailPath); + if (claudeModel) claudeArgs.push('--model', claudeModel); + + return new Promise((resolve) => { + const chunks: Buffer[] = []; + const child = spawn('claude', claudeArgs, { + cwd: config.cwd, env: agentEnv, + stdio: ['pipe', 'pipe', 'pipe'], + }); -${roleInstructions} - -${transcriptContext} - -IMPORTANT: -- Be concise. Your output becomes part of a shared transcript. -- Reference specific issue numbers, PR numbers, and file paths. -- If you create a PR, include the PR number in your output. -- If there's nothing to do, say "Nothing to do" clearly. -- When done, summarize what you did in 2-3 sentences.`; - - const escapedPrompt = prompt.replace(/'/g, "'\\''"); - const { CLAUDECODE: _cc2, ANTHROPIC_API_KEY: _ak2, ...cleanEnvAsync } = process.env; - - return new Promise((resolve) => { - exec( - `claude --print --dangerously-skip-permissions --model ${resolvedModel} -- '${escapedPrompt}'`, - { - cwd: config.cwd || process.cwd(), - timeout: 15 * 60 * 1000, - maxBuffer: 10 * 1024 * 1024, - encoding: 'utf-8', - env: cleanEnvAsync, - }, - (error: Error | null, stdout: string, _stderr: string) => { - if (stdout && stdout.trim().length > 0) { - resolve(stdout.trim()); - } else if (error) { - resolve(`[ERROR] Agent ${agentName} failed: ${error.message || 'unknown error'}`); - } else { - resolve('[No output]'); - } + child.stdin.write(prompt); + child.stdin.end(); + + child.stdout.on('data', (chunk: Buffer) => chunks.push(chunk)); + const stderrChunks: Buffer[] = []; + child.stderr.on('data', (chunk: Buffer) => stderrChunks.push(chunk)); + + child.on('close', (code) => { + const output = Buffer.concat(chunks).toString('utf-8').trim(); + const stderr = Buffer.concat(stderrChunks).toString('utf-8').trim(); + // Detect quota hit — Claude returns this when rate limited + if (output.includes('hit your limit') || output.includes('rate limit')) { + resolve(`[QUOTA] ${agentName}: API limit reached`); + } else if (output.length > 0) { + resolve(output); + } else if (code !== 0) { + resolve(`[ERROR] ${agentName} exited with code ${code}${stderr ? ': ' + stderr.slice(0, 200) : ''}`); + } else { + resolve(`[${agentName} completed with no output]`); } - ); + }); + + child.on('error', (err) => resolve(`[ERROR] ${agentName} failed to spawn: ${err.message}`)); + setTimeout(() => { child.kill('SIGTERM'); resolve(`[ERROR] ${agentName} timed out after 8 minutes`); }, 8 * 60 * 1000); }); } // ============================================================================= -// Conversation Orchestrator +// Squad Workflow: Plan → Execute → Review → Verify // ============================================================================= interface ClassifiedAgent { @@ -232,23 +216,20 @@ interface ClassifiedAgent { path: string; } -/** - * Build the turn order for a squad conversation. - * Returns agents grouped by role in execution order. - */ -function buildTurnPlan(squad: Squad, squadsDir: string): ClassifiedAgent[] { - const agents: ClassifiedAgent[] = []; +function buildAgentRoster(squad: Squad, squadsDir: string): ClassifiedAgent[] { + // If squad defines conversation_agents, only include those in the conversation. + // Other agents run on their own schedules, not in the squad conversation. + const conversationFilter = squad.conversation_agents; + const agents: ClassifiedAgent[] = []; for (const agent of squad.agents) { + if (conversationFilter && !conversationFilter.includes(agent.name)) continue; const role = classifyAgent(agent.name, agent.role); - if (!role) continue; // Unclassified agents are excluded - + if (!role) continue; const agentPath = join(squadsDir, squad.dir, `${agent.name}.md`); if (!existsSync(agentPath)) continue; - agents.push({ name: agent.name, role, path: agentPath }); } - return agents; } @@ -261,15 +242,10 @@ export interface ConversationResult { } /** - * Run a full squad conversation. + * Run a squad workflow: Plan → Execute → Review → Verify. * - * Turn order per cycle: - * 1. Lead briefs (or founder directive on first turn) - * 2. Scanners discover (parallel-safe but run sequentially for simplicity) - * 3. Workers execute - * 4. Lead reviews - * 5. Verifiers check (if workers produced output) - * 6. Check convergence → loop or exit + * Lead plans within token budget, workers execute independently in parallel, + * lead reviews, verifier checks quality. */ export async function runConversation( squad: Squad, @@ -277,252 +253,366 @@ export async function runConversation( ): Promise { const squadsDir = findSquadsDir(); if (!squadsDir) { - return { - transcript: createTranscript(squad.name), - turnCount: 0, - totalCost: 0, - converged: true, - reason: 'No squads directory found', - }; + return { transcript: createTranscript(squad.name), turnCount: 0, totalCost: 0, converged: true, reason: 'No squads directory found' }; } - const maxTurns = options.maxTurns || DEFAULT_MAX_TURNS; + const tokenBudget = options.tokenBudget || DEFAULT_TOKEN_BUDGET; const costCeiling = options.costCeiling || DEFAULT_COST_CEILING; + const maxTurns = options.maxTurns || 100; const transcript = createTranscript(squad.name); - // Resolve squad's working directory from repo field (e.g. "org/squads-cli" → sibling repo dir) - // squadsDir = /path/to/hq/.agents/squads → go up 3 levels to get parent of project root + // Resolve squad's working directory let squadCwd = process.cwd(); if (squad.repo) { const repoName = squad.repo.split('/').pop(); if (repoName) { const reposRoot = join(squadsDir, '..', '..', '..'); const candidatePath = join(reposRoot, repoName); - if (existsSync(candidatePath)) { - squadCwd = candidatePath; - } + if (existsSync(candidatePath)) squadCwd = candidatePath; } } - // Classify all agents - const allAgents = buildTurnPlan(squad, squadsDir); + const allAgents = buildAgentRoster(squad, squadsDir); const leads = allAgents.filter(a => a.role === 'lead'); const scanners = allAgents.filter(a => a.role === 'scanner'); const workers = allAgents.filter(a => a.role === 'worker'); const verifiers = allAgents.filter(a => a.role === 'verifier'); if (leads.length === 0) { - return { - transcript, - turnCount: 0, - totalCost: 0, - converged: true, - reason: 'No lead agent found — cannot orchestrate conversation', - }; + return { transcript, turnCount: 0, totalCost: 0, converged: true, reason: 'No lead agent found' }; } - const lead = leads[0]; // Primary lead - const log = (msg: string) => { - if (options.verbose) { - const ts = new Date().toISOString().slice(11, 19); - process.stderr.write(` [${ts}] ${msg}\n`); - } - }; + const lead = leads[0]; + const log = (msg: string) => writeLine(` ${colors.dim}${msg}${RESET}`); + + // Track timing and goals before cycle begins + const cycleStartMs = Date.now(); + const executionId = generateExecutionId(); + const goalsBefore = snapshotGoals(squad.name); + + log(`${squad.name}: ${allAgents.length} agents (${leads.length}L ${scanners.length}S ${workers.length}W ${verifiers.length}V) budget: ${Math.round(tokenBudget / 1000)}K tokens`); + + // Build squad context once (shared by all agents) + const contextRole: ContextRole = lead.name.includes('company-lead') ? 'coo' : 'lead'; + const squadContext = gatherSquadContext(squad.name, lead.name, { + agentPath: lead.path, role: contextRole, + }); + + // ═══════════════════════════════════════════════════════════════════ + // PHASE 1: PLAN — Lead scopes work within budget + // ═══════════════════════════════════════════════════════════════════ + + log(` plan: ${lead.name}...`); + + const workerNames = workers.map(w => w.name).join(', ') || '(no workers — do the work yourself)'; + const scannerNames = scanners.map(s => s.name).join(', '); + + // Load focus-specific instructions from .agents/config/cycle-focus.md + const focus = options.focus || 'create'; + const focusInstructions = loadFocusPrompt(focus); + + // Load plan prompt template from .agents/config/conversation-roles.md (Lead first turn) + // Focus instructions override the default planning behavior + const planPrompt = `You are ${lead.name} (lead) in squad ${squad.name}. + +Read your full agent definition at ${lead.path} and follow its instructions. + +## Cycle Focus: ${focus.toUpperCase()} + +${focusInstructions} + +## Budget + +${Math.round(tokenBudget / 1000)}K output tokens for the whole squad. +Each worker task uses ~5-10K tokens. Max ${Math.floor(tokenBudget / 10000)} tasks. + +Available workers: ${workerNames} +Available scanners: ${scannerNames || '(none)'} - log(`Conversation: ${squad.name} | ${allAgents.length} agents | max ${maxTurns} turns | $${costCeiling} ceiling`); - log(` Lead: ${lead.name} | Scanners: ${scanners.map(s => s.name).join(', ') || 'none'} | Workers: ${workers.map(w => w.name).join(', ') || 'none'} | Verifiers: ${verifiers.map(v => v.name).join(', ') || 'none'}`); +## Output Format - // === CYCLE LOOP === - let cycleCount = 0; - const MAX_CYCLES = 5; // Safety: max 5 full cycles (lead→scan→work→review→verify) +\`\`\`plan +GOAL: [which goal this cycle advances] +TASKS: +- worker: [worker-name] | task: [specific instruction with issue number or PR number] +- worker: [worker-name] | task: [specific instruction] +\`\`\` - while (cycleCount < MAX_CYCLES) { - cycleCount++; - log(`\n--- Cycle ${cycleCount} ---`); +Then end with: +## STATUS: CONTINUE - // Step 1: Lead briefs - log(`Turn ${transcript.turns.length + 1}: ${lead.name} (lead)`); - const leadOutput = executeAgentTurn({ - agentName: lead.name, - agentPath: lead.path, - role: 'lead', - squadName: squad.name, +${squadContext}`; + + const planOutput = await runIndependentAgent({ + agentName: lead.name, agentPath: lead.path, role: 'lead', + squadName: squad.name, model: options.model || modelForRole('lead'), + task: options.task || planPrompt, squadContext: '', cwd: squadCwd, + }); + addTurn(transcript, lead.name, 'lead', planOutput, estimateTurnCost(options.model || 'sonnet')); + + // Quota detection — if plan hit the API limit, stop immediately + if (planOutput.includes('[QUOTA]') || planOutput.includes('hit your limit')) { + logObservability({ + ts: new Date().toISOString(), + id: executionId, + squad: squad.name, + agent: lead.name, + provider: 'anthropic', model: options.model || modelForRole('lead'), - transcript, - task: cycleCount === 1 ? options.task : undefined, - cwd: squadCwd, + trigger: 'scheduled', + status: 'failed', + duration_ms: Date.now() - cycleStartMs, + input_tokens: 0, + output_tokens: 0, + cache_read_tokens: 0, + cache_write_tokens: 0, + cost_usd: transcript.totalCost, + context_tokens: 0, + error: 'Quota limit reached', + task: options.task, }); - addTurn(transcript, lead.name, 'lead', leadOutput, estimateTurnCost(options.model || 'sonnet')); + return { transcript, turnCount: transcript.turns.length, totalCost: transcript.totalCost, converged: false, reason: 'Quota limit reached' }; + } - // Check convergence after lead - let conv = detectConvergence(transcript, maxTurns, costCeiling); - if (conv.converged) { - log(`Converged after lead: ${conv.reason}`); - return { transcript, turnCount: transcript.turns.length, totalCost: transcript.totalCost, converged: true, reason: conv.reason }; - } + // Check if lead declared done immediately (nothing to do) + const conv = detectConvergence(transcript, maxTurns, costCeiling); + if (conv.converged) { + const goalsAfterEarly = snapshotGoals(squad.name); + const goalsChangedEarly = diffGoals(goalsBefore, goalsAfterEarly); + logObservability({ + ts: new Date().toISOString(), + id: executionId, + squad: squad.name, + agent: lead.name, + provider: 'anthropic', + model: options.model || modelForRole('lead'), + trigger: 'scheduled', + status: 'completed', + duration_ms: Date.now() - cycleStartMs, + input_tokens: 0, + output_tokens: 0, + cache_read_tokens: 0, + cache_write_tokens: 0, + cost_usd: transcript.totalCost, + context_tokens: 0, + task: options.task, + goals_before: Object.keys(goalsBefore).length > 0 ? goalsBefore : undefined, + goals_after: Object.keys(goalsAfterEarly).length > 0 ? goalsAfterEarly : undefined, + goals_changed: goalsChangedEarly.length > 0 ? goalsChangedEarly : undefined, + }); + return { transcript, turnCount: transcript.turns.length, totalCost: transcript.totalCost, converged: true, reason: conv.reason }; + } - // Step 2: Scanners (only on first cycle) — run in parallel - if (cycleCount === 1 && scanners.length > 0) { - if (scanners.length === 1) { - log(`Turn ${transcript.turns.length + 1}: ${scanners[0].name} (scanner)`); - const output = executeAgentTurn({ - agentName: scanners[0].name, - agentPath: scanners[0].path, - role: 'scanner', - squadName: squad.name, - model: options.model || modelForRole('scanner'), - transcript, - cwd: squadCwd, - }); - addTurn(transcript, scanners[0].name, 'scanner', output, estimateTurnCost(options.model || 'haiku')); - } else { - log(`Turns ${transcript.turns.length + 1}-${transcript.turns.length + scanners.length}: ${scanners.map(s => s.name).join(', ')} (scanners, parallel)`); - const scannerPromises = scanners.map(scanner => - executeAgentTurnAsync({ - agentName: scanner.name, - agentPath: scanner.path, - role: 'scanner', - squadName: squad.name, - model: options.model || modelForRole('scanner'), - transcript, // snapshot — all scanners see same context - cwd: squadCwd, - }).then(output => ({ agent: scanner, output })) - ); - const scannerResults = await Promise.all(scannerPromises); - for (const { agent, output } of scannerResults) { - addTurn(transcript, agent.name, 'scanner', output, estimateTurnCost(options.model || 'haiku')); - } - } + // ═══════════════════════════════════════════════════════════════════ + // PHASE 2: EXECUTE — Workers run independently in parallel + // ═══════════════════════════════════════════════════════════════════ + + // Parse task assignments from lead's plan + const taskAssignments = parseTaskAssignments(planOutput, [...workers, ...scanners]); + + if (taskAssignments.length === 0) { + // No tasks parsed — lead does the work directly + log(` execute: no task assignments found, lead works directly`); + addTurn(transcript, lead.name, 'lead', '[Lead produced plan but no parseable task assignments. Lead should do the work directly in the review phase.]', estimateTurnCost('sonnet')); + } else { + log(` execute: ${taskAssignments.length} tasks in parallel...`); + + // Run all assigned workers in parallel + const workerPromises = taskAssignments.map(({ agent, task }) => { + log(` ${agent.name}: ${task.slice(0, 60)}...`); + return runIndependentAgent({ + agentName: agent.name, agentPath: agent.path, role: agent.role, + squadName: squad.name, model: options.model || modelForRole(agent.role), + task, squadContext, cwd: squadCwd, + }).then(output => ({ agent, output })); + }); - conv = detectConvergence(transcript, maxTurns, costCeiling); - if (conv.converged) { - return { transcript, turnCount: transcript.turns.length, totalCost: transcript.totalCost, converged: true, reason: conv.reason }; - } - } + const workerResults = await Promise.all(workerPromises); - // Step 3: Workers execute — run in parallel if multiple - if (workers.length === 1) { - log(`Turn ${transcript.turns.length + 1}: ${workers[0].name} (worker)`); - const output = executeAgentTurn({ - agentName: workers[0].name, - agentPath: workers[0].path, - role: 'worker', - squadName: squad.name, - model: options.model || modelForRole('worker'), - transcript, - cwd: squadCwd, - }); + for (const { agent, output } of workerResults) { if (output.startsWith('[ERROR]')) { - process.stderr.write(` [WARN] Worker ${workers[0].name} errored: ${output}\n`); - } - addTurn(transcript, workers[0].name, 'worker', output, estimateTurnCost(options.model || 'sonnet')); - } else if (workers.length > 1) { - log(`Turns ${transcript.turns.length + 1}-${transcript.turns.length + workers.length}: ${workers.map(w => w.name).join(', ')} (workers, parallel)`); - const workerPromises = workers.map(worker => - executeAgentTurnAsync({ - agentName: worker.name, - agentPath: worker.path, - role: 'worker', - squadName: squad.name, - model: options.model || modelForRole('worker'), - transcript, // snapshot — all workers see same context - cwd: squadCwd, - }).then(output => ({ agent: worker, output })) - ); - const workerResults = await Promise.all(workerPromises); - for (const { agent, output } of workerResults) { - if (output.startsWith('[ERROR]')) { - process.stderr.write(` [WARN] Worker ${agent.name} errored: ${output}\n`); - } - addTurn(transcript, agent.name, 'worker', output, estimateTurnCost(options.model || 'sonnet')); + writeLine(` ${colors.yellow}[WARN] ${agent.name}: ${output.slice(0, 80)}${RESET}`); } + addTurn(transcript, agent.name, agent.role, output, estimateTurnCost(options.model || 'sonnet')); } + } - conv = detectConvergence(transcript, maxTurns, costCeiling); - if (conv.converged) { - return { transcript, turnCount: transcript.turns.length, totalCost: transcript.totalCost, converged: true, reason: conv.reason }; - } + // ═══════════════════════════════════════════════════════════════════ + // PHASE 3: REVIEW — Lead evaluates worker output + // ═══════════════════════════════════════════════════════════════════ - // Step 4: Lead reviews worker output - log(`Turn ${transcript.turns.length + 1}: ${lead.name} (lead review)`); - const reviewOutput = executeAgentTurn({ - agentName: lead.name, - agentPath: lead.path, - role: 'lead', - squadName: squad.name, - model: options.model || modelForRole('lead'), - transcript, + log(` review: ${lead.name}...`); + + const reviewPrompt = `Review the work done by your team. The conversation transcript shows what each worker produced. + +1. Check if workers actually committed code (PR numbers, commit SHAs) +2. Merge PRs that are ready: \`gh pr merge --squash --delete-branch --auto\` +3. Update goals.md if a goal was achieved +4. Update state.md with what was accomplished + +End with: +## STATUS: DONE +Summary: [what was achieved]`; + + const reviewOutput = await runIndependentAgent({ + agentName: lead.name, agentPath: lead.path, role: 'lead', + squadName: squad.name, model: options.model || modelForRole('lead'), + task: reviewPrompt, squadContext: `${squadContext}\n\n${serializeTranscript(transcript)}`, + cwd: squadCwd, + }); + addTurn(transcript, lead.name, 'lead', reviewOutput, estimateTurnCost(options.model || 'sonnet')); + + // Goals.md staleness check — warn if goals were not updated during review + const goalsAfterReview = snapshotGoals(squad.name); + const goalsChangedInReview = diffGoals(goalsBefore, goalsAfterReview); + if (goalsChangedInReview.length === 0 && Object.keys(goalsBefore).length > 0) { + writeLine(` ${colors.yellow}[WARN] ${squad.name}: goals.md not updated after review — lead should update goals when work is completed${RESET}`); + } + + // ═══════════════════════════════════════════════════════════════════ + // PHASE 4: VERIFY — Quality gate + // ═══════════════════════════════════════════════════════════════════ + + if (verifiers.length > 0) { + const verifier = verifiers[0]; + log(` verify: ${verifier.name}...`); + + const verifyPrompt = `Verify the work from this cycle. The transcript shows the plan and worker outputs. + +Check every PR and deliverable: +1. Build: does it pass? +2. Conflicts: is the PR mergeable? +3. Review comments: are ALL automated review comments addressed? +4. Correctness: does it match what the lead asked for? + +End with: +## VERDICT: APPROVED (all checks pass) +or +## VERDICT: REJECTED (which check failed and why)`; + + const verifyOutput = await runIndependentAgent({ + agentName: verifier.name, agentPath: verifier.path, role: 'verifier', + squadName: squad.name, model: options.model || modelForRole('verifier'), + task: verifyPrompt, squadContext: `${squadContext}\n\n${serializeTranscript(transcript)}`, cwd: squadCwd, }); - addTurn(transcript, lead.name, 'lead', reviewOutput, estimateTurnCost(options.model || 'sonnet')); + addTurn(transcript, verifier.name, 'verifier', verifyOutput, estimateTurnCost(options.model || 'haiku')); + } - conv = detectConvergence(transcript, maxTurns, costCeiling); - if (conv.converged) { - return { transcript, turnCount: transcript.turns.length, totalCost: transcript.totalCost, converged: true, reason: conv.reason }; - } + // Determine final convergence + const finalConv = detectConvergence(transcript, maxTurns, costCeiling); + + // ═══════════════════════════════════════════════════════════════════ + // Observability — log conversation cycle as a single record + // ═══════════════════════════════════════════════════════════════════ + + const cycleDurationMs = Date.now() - cycleStartMs; + const goalsAfterFinal = snapshotGoals(squad.name); + const goalsChanged = diffGoals(goalsBefore, goalsAfterFinal); + + const obsRecord: ObservabilityRecord = { + ts: new Date().toISOString(), + id: executionId, + squad: squad.name, + agent: lead.name, + provider: 'anthropic', + model: options.model || modelForRole('lead'), + trigger: 'scheduled', + status: 'completed', + duration_ms: cycleDurationMs, + input_tokens: 0, // token-level data not available from spawned agents + output_tokens: transcript.turns.length > 0 ? transcript.turns.reduce((acc, t) => acc + t.content.length, 0) : 0, + cache_read_tokens: 0, + cache_write_tokens: 0, + cost_usd: transcript.totalCost, + context_tokens: 0, + task: options.task, + goals_before: Object.keys(goalsBefore).length > 0 ? goalsBefore : undefined, + goals_after: Object.keys(goalsAfterFinal).length > 0 ? goalsAfterFinal : undefined, + goals_changed: goalsChanged.length > 0 ? goalsChanged : undefined, + }; + logObservability(obsRecord); - // Step 5: Verifiers — run in parallel if multiple - if (verifiers.length === 1) { - log(`Turn ${transcript.turns.length + 1}: ${verifiers[0].name} (verifier)`); - const output = executeAgentTurn({ - agentName: verifiers[0].name, - agentPath: verifiers[0].path, - role: 'verifier', - squadName: squad.name, - model: options.model || modelForRole('verifier'), - transcript, - cwd: squadCwd, - }); - addTurn(transcript, verifiers[0].name, 'verifier', output, estimateTurnCost(options.model || 'haiku')); - } else if (verifiers.length > 1) { - log(`Turns ${transcript.turns.length + 1}-${transcript.turns.length + verifiers.length}: ${verifiers.map(v => v.name).join(', ')} (verifiers, parallel)`); - const verifierPromises = verifiers.map(verifier => - executeAgentTurnAsync({ - agentName: verifier.name, - agentPath: verifier.path, - role: 'verifier', - squadName: squad.name, - model: options.model || modelForRole('verifier'), - transcript, - cwd: squadCwd, - }).then(output => ({ agent: verifier, output })) - ); - const verifierResults = await Promise.all(verifierPromises); - for (const { agent, output } of verifierResults) { - addTurn(transcript, agent.name, 'verifier', output, estimateTurnCost(options.model || 'haiku')); + return { + transcript, + turnCount: transcript.turns.length, + totalCost: transcript.totalCost, + converged: finalConv.converged || true, // plan→execute→review→verify is always one pass + reason: finalConv.reason || 'Cycle complete (plan → execute → review → verify)', + }; +} + +// ============================================================================= +// Task Assignment Parser +// ============================================================================= + +interface TaskAssignment { + agent: ClassifiedAgent; + task: string; +} + +/** + * Parse task assignments from lead's plan output. + * Looks for patterns like: + * - worker: worker-name | task: do something + * - scanner: scanner-name | task: scan something + * - Assigned: worker-name → do something + */ +function parseTaskAssignments(planOutput: string, availableAgents: ClassifiedAgent[]): TaskAssignment[] { + const assignments: TaskAssignment[] = []; + const lines = planOutput.split('\n'); + + for (const line of lines) { + // Pattern: "- worker: name | task: description" + const pipeMatch = line.match(/(?:worker|scanner|agent):\s*(\S+)\s*\|\s*task:\s*(.+)/i); + if (pipeMatch) { + const agentName = pipeMatch[1].trim(); + const task = pipeMatch[2].trim(); + const agent = availableAgents.find(a => a.name === agentName || a.name.includes(agentName) || agentName.includes(a.name)); + if (agent && task) { + assignments.push({ agent, task }); + continue; } } - if (verifiers.length > 0) { - conv = detectConvergence(transcript, maxTurns, costCeiling); - if (conv.converged) { - return { transcript, turnCount: transcript.turns.length, totalCost: transcript.totalCost, converged: true, reason: conv.reason }; + // Pattern: "Assigned: name → description" or "- name: description" + const arrowMatch = line.match(/(?:assigned|assign):\s*(\S+)\s*[→→-]\s*(.+)/i); + if (arrowMatch) { + const agentName = arrowMatch[1].trim(); + const task = arrowMatch[2].trim(); + const agent = availableAgents.find(a => a.name === agentName || a.name.includes(agentName) || agentName.includes(a.name)); + if (agent && task) { + assignments.push({ agent, task }); + continue; } } } - return { - transcript, - turnCount: transcript.turns.length, - totalCost: transcript.totalCost, - converged: false, - reason: `Max cycles reached (${MAX_CYCLES})`, - }; + // If no assignments parsed but workers exist, assign all workers the lead's full plan + if (assignments.length === 0 && availableAgents.length > 0) { + // Give the first worker the whole plan as context + const firstWorker = availableAgents.find(a => a.role === 'worker'); + if (firstWorker) { + assignments.push({ + agent: firstWorker, + task: `The lead produced this plan. Execute the most important task:\n\n${planOutput.slice(0, 3000)}`, + }); + } + } + + return assignments; } // ============================================================================= // Transcript Persistence // ============================================================================= -/** Save conversation transcript to .agents/conversations/{squad}/ */ export function saveTranscript(transcript: Transcript): string | null { const squadsDir = findSquadsDir(); if (!squadsDir) return null; const convDir = join(squadsDir, '..', 'conversations', transcript.squad); - if (!existsSync(convDir)) { - mkdirSync(convDir, { recursive: true }); - } + if (!existsSync(convDir)) mkdirSync(convDir, { recursive: true }); const id = Date.now().toString(36); const filePath = join(convDir, `${id}.md`); @@ -532,9 +622,7 @@ export function saveTranscript(transcript: Transcript): string | null { `Started: ${transcript.startedAt}`, `Turns: ${transcript.turns.length}`, `Estimated cost: $${transcript.totalCost.toFixed(2)}`, - '', - '---', - '', + '', '---', '', ]; for (const turn of transcript.turns) { diff --git a/templates/guardrail.json b/templates/guardrail.json new file mode 100644 index 00000000..60f5e16b --- /dev/null +++ b/templates/guardrail.json @@ -0,0 +1,16 @@ +{ + "hooks": { + "PreToolUse": [ + { + "matcher": "Bash", + "hooks": [ + { + "type": "command", + "command": "bash -c 'cmd=$(echo \"$CLAUDE_TOOL_INPUT\" | python3 -c \"import sys,json; d=json.load(sys.stdin); print(d.get(\\\"command\\\",\\\"\\\"))\" 2>/dev/null || true); case \"$cmd\" in *\"rm -rf /\"*|*\"rm -rf ~\"*|*\"rm -rf $HOME\"*) echo \"BLOCKED: rm -rf on root/home is not allowed\" >&2; exit 2;; *\"git push --force\"*|*\"git push -f \"*) echo \"BLOCKED: force push is not allowed\" >&2; exit 2;; *\"git reset --hard\"*) echo \"BLOCKED: git reset --hard is not allowed\" >&2; exit 2;; *\"git clean -f\"*|*\"git clean -fd\"*) echo \"BLOCKED: destructive git clean is not allowed\" >&2; exit 2;; esac'", + "timeout": 5 + } + ] + } + ] + } +} diff --git a/templates/seed/squads/demo/SQUAD.md b/templates/seed/squads/demo/SQUAD.md new file mode 100644 index 00000000..e8847c75 --- /dev/null +++ b/templates/seed/squads/demo/SQUAD.md @@ -0,0 +1,22 @@ +--- +name: Demo +lead: hello-world +model: sonnet +effort: low +--- + +# Demo + +Starter squad — proves your setup works in under 30 seconds. + +## Agents + +| Agent | Role | Purpose | +|-------|------|---------| +| hello-world | lead | Confirms your AI workforce is online and ready | + +## Usage + +```bash +squads run demo hello-world +``` diff --git a/templates/seed/squads/demo/hello-world.md b/templates/seed/squads/demo/hello-world.md new file mode 100644 index 00000000..40188ac8 --- /dev/null +++ b/templates/seed/squads/demo/hello-world.md @@ -0,0 +1,43 @@ +--- +name: Hello World +role: lead +squad: "demo" +provider: "{{PROVIDER}}" +model: sonnet +effort: low +timeout: 120 +max_retries: 1 +--- + +# Hello World + +## Role + +Confirm that your AI workforce is installed and ready to run. + +## Task + +1. Print a greeting that includes today's date and the project name: **{{BUSINESS_NAME}}** +2. Write a short summary (3-5 sentences) of what squads-cli does and why it matters +3. Save the result to `.agents/memory/demo/hello-world/state.md` in this format: + +``` +# Hello World — Run Log + +## Last Run +Date: +Status: success + +## What is squads-cli? + +``` + +## Constraints + +- Keep output concise — this is a smoke test, not a research task +- Do not make any API calls or external requests +- Do not modify any files other than `.agents/memory/demo/hello-world/state.md` + +## Output + +A confirmation message and the updated state file. If you reach this step, setup is working. diff --git a/test/commands/catalog.test.ts b/test/commands/catalog.test.ts new file mode 100644 index 00000000..817bb0dc --- /dev/null +++ b/test/commands/catalog.test.ts @@ -0,0 +1,894 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import type { CatalogEntry, ScorecardDefinition, ScorecardResult } from '../../src/lib/idp/types.js'; + +// --- Mocks (before imports) --- + +const mockFindIdpDir = vi.fn<() => string | null>(); +vi.mock('../../src/lib/idp/resolver.js', () => ({ + findIdpDir: (...args: unknown[]) => mockFindIdpDir(...(args as [])), +})); + +const mockLoadCatalog = vi.fn<() => CatalogEntry[]>(); +const mockLoadService = vi.fn<(name: string) => CatalogEntry | null>(); +const mockLoadScorecard = vi.fn<(name: string) => ScorecardDefinition | null>(); +vi.mock('../../src/lib/idp/catalog-loader.js', () => ({ + loadCatalog: (...args: unknown[]) => mockLoadCatalog(...(args as [])), + loadService: (...args: unknown[]) => mockLoadService(...(args as [string])), + loadScorecard: (...args: unknown[]) => mockLoadScorecard(...(args as [string])), +})); + +const mockEvaluateService = vi.fn<(entry: CatalogEntry, scorecard: ScorecardDefinition) => ScorecardResult>(); +vi.mock('../../src/lib/idp/scorecard-engine.js', () => ({ + evaluateService: (...args: unknown[]) => mockEvaluateService(...(args as [CatalogEntry, ScorecardDefinition])), +})); + +const mockWriteLine = vi.fn(); +vi.mock('../../src/lib/terminal.js', () => ({ + writeLine: (...args: unknown[]) => mockWriteLine(...args), + colors: { dim: '', red: '', green: '', yellow: '', cyan: '', white: '', purple: '' }, + bold: '', + RESET: '', +})); + +// --- Import under test --- + +import { registerCatalogCommands } from '../../src/commands/catalog.js'; +import { Command } from 'commander'; + +// --- Helpers --- + +function makeCatalogEntry(overrides: Partial<{ + name: string; + type: 'product' | 'domain'; + stack: string; + owner: string; + repo: string; + description: string; + framework: string; + scorecard: string; + ciTemplate: string | null; + deploy: CatalogEntry['spec']['deploy']; + health: CatalogEntry['spec']['health']; + dependencies: CatalogEntry['spec']['dependencies']; + branches: CatalogEntry['spec']['branches']; + tags: string[]; +}> = {}): CatalogEntry { + return { + apiVersion: 'squads/v1', + kind: 'Service', + metadata: { + name: overrides.name ?? 'test-service', + description: overrides.description ?? 'A test service', + owner: overrides.owner ?? 'engineering', + repo: overrides.repo ?? 'org/test-service', + tags: overrides.tags ?? ['test'], + }, + spec: { + type: overrides.type ?? 'product', + stack: overrides.stack ?? 'node', + framework: overrides.framework, + scorecard: overrides.scorecard ?? 'product', + branches: overrides.branches ?? { + default: 'main', + development: 'develop', + workflow: 'pr-to-develop', + }, + ci: { + template: overrides.ciTemplate !== undefined ? overrides.ciTemplate : 'node', + required_checks: ['build', 'test'], + build_command: 'npm run build', + test_command: 'npm test', + }, + deploy: overrides.deploy !== undefined ? overrides.deploy : { + target: 'vercel', + trigger: 'push-to-main', + environments: [{ name: 'production', url: 'https://example.com' }], + }, + health: overrides.health ?? [ + { name: 'api', url: 'https://example.com/health', type: 'http', expect: 200 }, + ], + dependencies: overrides.dependencies ?? { + runtime: [ + { service: 'postgres', version: '15', required: true, description: 'Primary database' }, + ], + }, + }, + }; +} + +function makeScorecard(): ScorecardDefinition { + return { + apiVersion: 'squads/v1', + kind: 'Scorecard', + metadata: { + name: 'product', + description: 'Product service scorecard', + }, + checks: [ + { name: 'ci-passing', description: 'CI is green', weight: 20, source: 'github', severity: 'critical' }, + { name: 'test-coverage', description: 'Tests exist', weight: 15, source: 'local', severity: 'high' }, + { name: 'readme-exists', description: 'README present', weight: 10, source: 'local', severity: 'medium' }, + ], + grades: { A: { min: 90 }, B: { min: 70 }, C: { min: 50 }, D: { min: 30 } }, + }; +} + +function makeScorecardResult(overrides: Partial = {}): ScorecardResult { + return { + service: overrides.service ?? 'test-service', + scorecard: overrides.scorecard ?? 'product', + score: overrides.score ?? 85, + grade: overrides.grade ?? 'B', + checks: overrides.checks ?? [ + { name: 'ci-passing', passed: true, weight: 20, detail: 'latest run: success' }, + { name: 'test-coverage', passed: true, weight: 15, detail: 'test command defined: npm test' }, + { name: 'readme-exists', passed: false, weight: 10, detail: 'README.md not found' }, + ], + timestamp: overrides.timestamp ?? new Date().toISOString(), + }; +} + +/** Create a program, register catalog commands, and parse argv */ +async function runCatalog(args: string[]): Promise { + const program = new Command(); + program.exitOverride(); // throw instead of process.exit + registerCatalogCommands(program); + await program.parseAsync(['node', 'squads', ...args]); +} + +// --- Tests --- + +describe('catalog command', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockFindIdpDir.mockReturnValue('/tmp/fake-idp'); + }); + + // ── catalog list ── + + describe('catalog list', () => { + it('shows error when IDP is not found', async () => { + mockFindIdpDir.mockReturnValue(null); + + await runCatalog(['catalog', 'list']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('IDP not found'), + ); + }); + + it('shows message when catalog is empty', async () => { + mockLoadCatalog.mockReturnValue([]); + + await runCatalog(['catalog', 'list']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('No catalog entries found'), + ); + }); + + it('lists product services', async () => { + const entry = makeCatalogEntry({ name: 'my-app', type: 'product', stack: 'react' }); + mockLoadCatalog.mockReturnValue([entry]); + + await runCatalog(['catalog', 'list']); + + // Should output the service name + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('my-app'), + ); + // Should show "Product Services" heading + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('Product Services'), + ); + }); + + it('lists domain repos', async () => { + const entry = makeCatalogEntry({ name: 'docs-repo', type: 'domain' }); + mockLoadCatalog.mockReturnValue([entry]); + + await runCatalog(['catalog', 'list']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('docs-repo'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('Domain Repos'), + ); + }); + + it('displays both product and domain services', async () => { + const product = makeCatalogEntry({ name: 'web-app', type: 'product' }); + const domain = makeCatalogEntry({ name: 'knowledge-base', type: 'domain' }); + mockLoadCatalog.mockReturnValue([product, domain]); + + await runCatalog(['catalog', 'list']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('Product Services'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('Domain Repos'), + ); + }); + + it('filters by --type product', async () => { + const product = makeCatalogEntry({ name: 'web-app', type: 'product' }); + const domain = makeCatalogEntry({ name: 'knowledge', type: 'domain' }); + mockLoadCatalog.mockReturnValue([product, domain]); + + await runCatalog(['catalog', 'list', '--type', 'product']); + + // Should show 1 service (only product) + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('1 services'), + ); + }); + + it('filters by --type domain', async () => { + const product = makeCatalogEntry({ name: 'web-app', type: 'product' }); + const domain = makeCatalogEntry({ name: 'knowledge', type: 'domain' }); + mockLoadCatalog.mockReturnValue([product, domain]); + + await runCatalog(['catalog', 'list', '--type', 'domain']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('1 services'), + ); + }); + + it('outputs JSON with --json', async () => { + const entry = makeCatalogEntry({ name: 'my-svc', type: 'product', stack: 'node', owner: 'eng' }); + mockLoadCatalog.mockReturnValue([entry]); + + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + + await runCatalog(['catalog', 'list', '--json']); + + expect(consoleSpy).toHaveBeenCalledTimes(1); + const output = JSON.parse(consoleSpy.mock.calls[0][0] as string); + expect(output).toHaveLength(1); + expect(output[0]).toEqual(expect.objectContaining({ + name: 'my-svc', + type: 'product', + stack: 'node', + owner: 'eng', + repo: 'org/test-service', + })); + + consoleSpy.mockRestore(); + }); + + it('shows CI template and deploy target for product services', async () => { + const entry = makeCatalogEntry({ + name: 'api-svc', + type: 'product', + ciTemplate: 'node', + deploy: { target: 'aws', trigger: 'push-to-main' }, + }); + mockLoadCatalog.mockReturnValue([entry]); + + await runCatalog(['catalog', 'list']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('ci:node'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('deploy:aws'), + ); + }); + + it('shows no-ci when CI template is null', async () => { + const entry = makeCatalogEntry({ name: 'simple', type: 'product', ciTemplate: null }); + mockLoadCatalog.mockReturnValue([entry]); + + await runCatalog(['catalog', 'list']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('no-ci'), + ); + }); + + it('shows manual deploy when no deploy target', async () => { + const entry = makeCatalogEntry({ name: 'lib', type: 'product', deploy: null }); + mockLoadCatalog.mockReturnValue([entry]); + + await runCatalog(['catalog', 'list']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('deploy:manual'), + ); + }); + }); + + // ── catalog show ── + + describe('catalog show', () => { + it('shows error when IDP not found', async () => { + mockFindIdpDir.mockReturnValue(null); + + await runCatalog(['catalog', 'show', 'my-svc']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('IDP not found'), + ); + }); + + it('shows error when service not found', async () => { + mockLoadService.mockReturnValue(null); + + await runCatalog(['catalog', 'show', 'nonexistent']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('Service not found: nonexistent'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining("squads catalog list"), + ); + }); + + it('displays service details', async () => { + const entry = makeCatalogEntry({ + name: 'web-app', + type: 'product', + stack: 'react', + framework: 'next', + owner: 'frontend', + repo: 'org/web-app', + description: 'Main web application', + tags: ['frontend', 'react'], + }); + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'web-app']); + + // Service header + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('web-app'), + ); + // General section + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('frontend'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('org/web-app'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('react (next)'), + ); + }); + + it('displays branch info', async () => { + const entry = makeCatalogEntry({ + name: 'svc', + branches: { default: 'main', development: 'develop', workflow: 'pr-to-develop' }, + }); + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'svc']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('main'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('pr-to-develop'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('develop'), + ); + }); + + it('displays CI information when template exists', async () => { + const entry = makeCatalogEntry({ name: 'svc', ciTemplate: 'node' }); + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'svc']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('node'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('build, test'), + ); + }); + + it('skips CI section when template is null', async () => { + const entry = makeCatalogEntry({ name: 'svc', ciTemplate: null }); + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'svc']); + + // The "Template:" line should not appear for CI + const ciTemplateCalls = mockWriteLine.mock.calls.filter( + (c: unknown[]) => typeof c[0] === 'string' && (c[0] as string).includes('Template:'), + ); + expect(ciTemplateCalls).toHaveLength(0); + }); + + it('displays deploy info', async () => { + const entry = makeCatalogEntry({ + name: 'svc', + deploy: { + target: 'vercel', + trigger: 'push-to-main', + environments: [ + { name: 'staging', url: 'https://staging.example.com' }, + { name: 'production', url: 'https://example.com' }, + ], + }, + }); + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'svc']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('vercel'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('staging.example.com'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('example.com'), + ); + }); + + it('skips deploy section when deploy is null', async () => { + const entry = makeCatalogEntry({ name: 'svc', deploy: null }); + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'svc']); + + const deployCalls = mockWriteLine.mock.calls.filter( + (c: unknown[]) => typeof c[0] === 'string' && (c[0] as string).includes('Deploy'), + ); + expect(deployCalls).toHaveLength(0); + }); + + it('displays dependencies', async () => { + const entry = makeCatalogEntry({ + name: 'svc', + dependencies: { + runtime: [ + { service: 'redis', version: '7', required: true, description: 'Cache layer' }, + { service: 'stripe-api', required: false, description: 'Payment provider' }, + ], + }, + }); + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'svc']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('redis'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('(required)'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('stripe-api'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('(optional)'), + ); + }); + + it('skips dependencies section when empty', async () => { + const entry = makeCatalogEntry({ + name: 'svc', + dependencies: { runtime: [] }, + }); + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'svc']); + + const depCalls = mockWriteLine.mock.calls.filter( + (c: unknown[]) => typeof c[0] === 'string' && (c[0] as string).includes('Dependencies'), + ); + expect(depCalls).toHaveLength(0); + }); + + it('displays health endpoints', async () => { + const entry = makeCatalogEntry({ + name: 'svc', + health: [ + { name: 'api', url: 'https://api.example.com/health', type: 'http', expect: 200 }, + { name: 'metrics', url: 'https://api.example.com/metrics', type: 'json', expect: 200 }, + ], + }); + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'svc']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('api.example.com/health'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('api.example.com/metrics'), + ); + }); + + it('skips health section when empty', async () => { + const entry = makeCatalogEntry({ name: 'svc', health: [] }); + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'svc']); + + const healthCalls = mockWriteLine.mock.calls.filter( + (c: unknown[]) => typeof c[0] === 'string' && (c[0] as string).includes('Health'), + ); + expect(healthCalls).toHaveLength(0); + }); + + it('displays tags', async () => { + const entry = makeCatalogEntry({ name: 'svc', tags: ['backend', 'api', 'graphql'] }); + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'svc']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('backend, api, graphql'), + ); + }); + + it('shows none when tags is empty', async () => { + const entry = makeCatalogEntry({ name: 'svc', tags: [] }); + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'svc']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('none'), + ); + }); + + it('shows framework in stack when present', async () => { + const entry = makeCatalogEntry({ name: 'svc', stack: 'node', framework: 'express' }); + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'svc']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('node (express)'), + ); + }); + + it('shows stack without parentheses when no framework', async () => { + const entry = makeCatalogEntry({ name: 'svc', stack: 'go' }); + // Explicitly no framework + delete (entry.spec as Record)['framework']; + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'svc']); + + // Should have 'go' but not '(undefined)' or '()' + const stackCalls = mockWriteLine.mock.calls.filter( + (c: unknown[]) => typeof c[0] === 'string' && (c[0] as string).includes('Stack:'), + ); + expect(stackCalls).toHaveLength(1); + expect(stackCalls[0][0]).toContain('go'); + expect(stackCalls[0][0]).not.toContain('('); + }); + + it('does not show dev branch when not set', async () => { + const entry = makeCatalogEntry({ + name: 'svc', + branches: { default: 'main', workflow: 'direct-to-main' }, + }); + mockLoadService.mockReturnValue(entry); + + await runCatalog(['catalog', 'show', 'svc']); + + const devBranchCalls = mockWriteLine.mock.calls.filter( + (c: unknown[]) => typeof c[0] === 'string' && (c[0] as string).includes('Dev branch:'), + ); + expect(devBranchCalls).toHaveLength(0); + }); + + it('outputs JSON with --json', async () => { + const entry = makeCatalogEntry({ name: 'json-svc' }); + mockLoadService.mockReturnValue(entry); + + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + + await runCatalog(['catalog', 'show', 'json-svc', '--json']); + + expect(consoleSpy).toHaveBeenCalledTimes(1); + const output = JSON.parse(consoleSpy.mock.calls[0][0] as string); + expect(output.metadata.name).toBe('json-svc'); + expect(output.spec.type).toBe('product'); + + consoleSpy.mockRestore(); + }); + }); + + // ── catalog check ── + + describe('catalog check', () => { + it('shows error when IDP not found', async () => { + mockFindIdpDir.mockReturnValue(null); + + await runCatalog(['catalog', 'check']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('IDP not found'), + ); + }); + + it('shows error when no services found', async () => { + mockLoadCatalog.mockReturnValue([]); + + await runCatalog(['catalog', 'check']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('No services found'), + ); + }); + + it('checks a specific service by name', async () => { + const entry = makeCatalogEntry({ name: 'my-svc', scorecard: 'product' }); + const scorecard = makeScorecard(); + const result = makeScorecardResult({ service: 'my-svc', grade: 'A', score: 95 }); + + mockLoadService.mockReturnValue(entry); + mockLoadScorecard.mockReturnValue(scorecard); + mockEvaluateService.mockReturnValue(result); + + await runCatalog(['catalog', 'check', 'my-svc']); + + expect(mockLoadService).toHaveBeenCalledWith('my-svc'); + expect(mockLoadScorecard).toHaveBeenCalledWith('product'); + expect(mockEvaluateService).toHaveBeenCalledWith(entry, scorecard); + + // Should display grade + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('my-svc'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('A'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('95'), + ); + }); + + it('checks all services when no name given', async () => { + const entry1 = makeCatalogEntry({ name: 'svc-1', scorecard: 'product' }); + const entry2 = makeCatalogEntry({ name: 'svc-2', scorecard: 'product' }); + const scorecard = makeScorecard(); + const result1 = makeScorecardResult({ service: 'svc-1', grade: 'A', score: 92 }); + const result2 = makeScorecardResult({ service: 'svc-2', grade: 'C', score: 55 }); + + mockLoadCatalog.mockReturnValue([entry1, entry2]); + mockLoadScorecard.mockReturnValue(scorecard); + mockEvaluateService + .mockReturnValueOnce(result1) + .mockReturnValueOnce(result2); + + await runCatalog(['catalog', 'check']); + + expect(mockEvaluateService).toHaveBeenCalledTimes(2); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('svc-1'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('svc-2'), + ); + }); + + it('shows grade A in green', async () => { + const entry = makeCatalogEntry({ name: 'svc' }); + const scorecard = makeScorecard(); + const result = makeScorecardResult({ grade: 'A', score: 95 }); + + mockLoadService.mockReturnValue(entry); + mockLoadScorecard.mockReturnValue(scorecard); + mockEvaluateService.mockReturnValue(result); + + await runCatalog(['catalog', 'check', 'svc']); + + // Grade A is rendered (colors are empty strings in mock) + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('A'), + ); + }); + + it('shows grade B in cyan', async () => { + const entry = makeCatalogEntry({ name: 'svc' }); + const scorecard = makeScorecard(); + const result = makeScorecardResult({ grade: 'B', score: 80 }); + + mockLoadService.mockReturnValue(entry); + mockLoadScorecard.mockReturnValue(scorecard); + mockEvaluateService.mockReturnValue(result); + + await runCatalog(['catalog', 'check', 'svc']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('B'), + ); + }); + + it('shows grade C in yellow', async () => { + const entry = makeCatalogEntry({ name: 'svc' }); + const scorecard = makeScorecard(); + const result = makeScorecardResult({ grade: 'C', score: 55 }); + + mockLoadService.mockReturnValue(entry); + mockLoadScorecard.mockReturnValue(scorecard); + mockEvaluateService.mockReturnValue(result); + + await runCatalog(['catalog', 'check', 'svc']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('C'), + ); + }); + + it('shows grade D/F in red', async () => { + const entry = makeCatalogEntry({ name: 'svc' }); + const scorecard = makeScorecard(); + const result = makeScorecardResult({ grade: 'D', score: 30 }); + + mockLoadService.mockReturnValue(entry); + mockLoadScorecard.mockReturnValue(scorecard); + mockEvaluateService.mockReturnValue(result); + + await runCatalog(['catalog', 'check', 'svc']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('D'), + ); + }); + + it('shows pass/fail for individual checks', async () => { + const entry = makeCatalogEntry({ name: 'svc' }); + const scorecard = makeScorecard(); + const result = makeScorecardResult({ + checks: [ + { name: 'ci-passing', passed: true, weight: 20, detail: 'latest run: success' }, + { name: 'readme-exists', passed: false, weight: 10, detail: 'README.md not found' }, + ], + }); + + mockLoadService.mockReturnValue(entry); + mockLoadScorecard.mockReturnValue(scorecard); + mockEvaluateService.mockReturnValue(result); + + await runCatalog(['catalog', 'check', 'svc']); + + // pass indicator for ci-passing + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('pass'), + ); + // fail indicator for readme-exists + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('fail'), + ); + // Check detail + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('latest run: success'), + ); + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('README.md not found'), + ); + }); + + it('skips service when scorecard not found', async () => { + const entry = makeCatalogEntry({ name: 'svc', scorecard: 'nonexistent' }); + mockLoadCatalog.mockReturnValue([entry]); + mockLoadScorecard.mockReturnValue(null); + + await runCatalog(['catalog', 'check']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining("No scorecard 'nonexistent'"), + ); + expect(mockEvaluateService).not.toHaveBeenCalled(); + }); + + it('handles specific service not found (empty array after filter)', async () => { + mockLoadService.mockReturnValue(null); + + await runCatalog(['catalog', 'check', 'ghost']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('No services found'), + ); + }); + + it('outputs JSON with --json', async () => { + const entry = makeCatalogEntry({ name: 'svc' }); + const scorecard = makeScorecard(); + const result = makeScorecardResult({ service: 'svc', grade: 'B', score: 80 }); + + mockLoadService.mockReturnValue(entry); + mockLoadScorecard.mockReturnValue(scorecard); + mockEvaluateService.mockReturnValue(result); + + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + + await runCatalog(['catalog', 'check', 'svc', '--json']); + + expect(consoleSpy).toHaveBeenCalledTimes(1); + const output = JSON.parse(consoleSpy.mock.calls[0][0] as string); + expect(output).toHaveLength(1); + expect(output[0].service).toBe('svc'); + expect(output[0].grade).toBe('B'); + expect(output[0].score).toBe(80); + + consoleSpy.mockRestore(); + }); + + it('JSON output includes all services when checking all', async () => { + const entry1 = makeCatalogEntry({ name: 'svc-1', scorecard: 'product' }); + const entry2 = makeCatalogEntry({ name: 'svc-2', scorecard: 'product' }); + const scorecard = makeScorecard(); + + mockLoadCatalog.mockReturnValue([entry1, entry2]); + mockLoadScorecard.mockReturnValue(scorecard); + mockEvaluateService + .mockReturnValueOnce(makeScorecardResult({ service: 'svc-1' })) + .mockReturnValueOnce(makeScorecardResult({ service: 'svc-2' })); + + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + + await runCatalog(['catalog', 'check', '--json']); + + const output = JSON.parse(consoleSpy.mock.calls[0][0] as string); + expect(output).toHaveLength(2); + expect(output.map((r: ScorecardResult) => r.service)).toEqual(['svc-1', 'svc-2']); + + consoleSpy.mockRestore(); + }); + + it('does not call writeLine for individual checks in --json mode', async () => { + const entry = makeCatalogEntry({ name: 'svc' }); + const scorecard = makeScorecard(); + const result = makeScorecardResult(); + + mockLoadService.mockReturnValue(entry); + mockLoadScorecard.mockReturnValue(scorecard); + mockEvaluateService.mockReturnValue(result); + + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + + mockWriteLine.mockClear(); + await runCatalog(['catalog', 'check', 'svc', '--json']); + + // In --json mode, service-level writeLine calls should not include grade/check lines + const serviceLines = mockWriteLine.mock.calls.filter( + (c: unknown[]) => typeof c[0] === 'string' && (c[0] as string).includes('pass'), + ); + expect(serviceLines).toHaveLength(0); + + consoleSpy.mockRestore(); + }); + }); + + // ── noIdp helper ── + + describe('noIdp guard', () => { + it('prints instructions when IDP not found', async () => { + mockFindIdpDir.mockReturnValue(null); + + await runCatalog(['catalog', 'list']); + + expect(mockWriteLine).toHaveBeenCalledWith( + expect.stringContaining('SQUADS_IDP_PATH'), + ); + }); + + it('proceeds normally when IDP is found', async () => { + mockFindIdpDir.mockReturnValue('/some/idp/path'); + mockLoadCatalog.mockReturnValue([]); + + await runCatalog(['catalog', 'list']); + + // Should not show the IDP not found message + const idpNotFoundCalls = mockWriteLine.mock.calls.filter( + (c: unknown[]) => typeof c[0] === 'string' && (c[0] as string).includes('IDP not found'), + ); + expect(idpNotFoundCalls).toHaveLength(0); + }); + }); +}); diff --git a/test/commands/dashboard.test.ts b/test/commands/dashboard.test.ts new file mode 100644 index 00000000..3f468807 --- /dev/null +++ b/test/commands/dashboard.test.ts @@ -0,0 +1,934 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; + +// ── Mocks (must be declared before imports) ── + +vi.mock('fs', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + existsSync: vi.fn(() => true), + readdirSync: vi.fn(() => []), + statSync: vi.fn(() => ({ mtimeMs: Date.now() - 1000 })), + }; +}); + +vi.mock('../../src/lib/squad-parser.js', () => ({ + findSquadsDir: vi.fn(), + listSquads: vi.fn(), + loadSquad: vi.fn(), + hasLocalInfraConfig: vi.fn(() => false), +})); + +vi.mock('../../src/lib/memory.js', () => ({ + findMemoryDir: vi.fn(() => null), +})); + +vi.mock('../../src/lib/costs.js', () => ({ + fetchCostSummary: vi.fn().mockResolvedValue(null), + fetchInsights: vi.fn().mockResolvedValue(null), + fetchBridgeStats: vi.fn().mockResolvedValue(null), + isMaxPlan: vi.fn(() => false), + getPlanType: vi.fn(() => 'unknown'), + fetchNpmStats: vi.fn().mockResolvedValue(null), + fetchQuotaInfo: vi.fn().mockResolvedValue(null), + fetchClaudeCodeCapacity: vi.fn().mockResolvedValue(null), + calculateROIMetrics: vi.fn(() => ({ + totalCostUsd: 0, + costPerGoal: 0, + costPerPR: 0, + costPerCommit: 0, + roiMultiplier: 0, + estimatedValueUsd: 0, + dailyProjectedCost: 0, + weeklyProjectedCost: 0, + monthlyProjectedCost: 0, + })), + calculateSquadCostProjections: vi.fn(() => []), +})); + +vi.mock('../../src/lib/git.js', () => ({ + getMultiRepoGitStats: vi.fn().mockResolvedValue(null), + getActivitySparkline: vi.fn().mockResolvedValue([]), + getGitHubStatsOptimized: vi.fn(() => null), +})); + +vi.mock('../../src/lib/db.js', () => ({ + saveDashboardSnapshot: vi.fn().mockResolvedValue(null), + isDatabaseAvailable: vi.fn().mockResolvedValue(false), + getDashboardHistory: vi.fn().mockResolvedValue([]), + closeDatabase: vi.fn().mockResolvedValue(undefined), + getLatestBaseline: vi.fn().mockResolvedValue(null), +})); + +vi.mock('../../src/lib/sessions.js', () => ({ + getLiveSessionSummaryAsync: vi.fn().mockResolvedValue({ + totalSessions: 0, + bySquad: {}, + squadCount: 0, + byTool: {}, + }), + cleanupStaleSessions: vi.fn(), +})); + +vi.mock('../../src/lib/update.js', () => ({ + checkForUpdate: vi.fn(() => ({ updateAvailable: false })), +})); + +vi.mock('../../src/lib/telemetry.js', () => ({ + track: vi.fn().mockResolvedValue(undefined), + Events: { CLI_DASHBOARD: 'cli_dashboard' }, +})); + +vi.mock('../../src/lib/terminal.js', () => ({ + writeLine: vi.fn(), + colors: { dim: '', red: '', green: '', yellow: '', purple: '', cyan: '', white: '' }, + bold: '', + RESET: '', + gradient: vi.fn((s: string) => s), + progressBar: vi.fn((_pct: number, _w: number) => '[====]'), + box: { + topLeft: '+', topRight: '+', bottomLeft: '+', bottomRight: '+', + vertical: '|', horizontal: '-', teeLeft: '+', teeRight: '+', + }, + padEnd: vi.fn((s: string, n: number) => s.padEnd(n)), + truncate: vi.fn((s: string, n: number) => s.length > n ? s.slice(0, n - 1) + '~' : s), + icons: { active: '*', idle: 'o', error: 'x', pending: 'o', progress: '>', empty: '-', warning: '!' }, + sparkline: vi.fn(() => '|____|'), + barChart: vi.fn(() => '[===]'), +})); + +// ── Imports (after mocks) ── + +import { dashboardCommand } from '../../src/commands/dashboard.js'; +import { findSquadsDir, listSquads, loadSquad, hasLocalInfraConfig } from '../../src/lib/squad-parser.js'; +import { findMemoryDir } from '../../src/lib/memory.js'; +import { getLiveSessionSummaryAsync } from '../../src/lib/sessions.js'; +import { getMultiRepoGitStats, getActivitySparkline, getGitHubStatsOptimized } from '../../src/lib/git.js'; +import { + fetchCostSummary, fetchBridgeStats, fetchInsights, + getPlanType, isMaxPlan, + fetchNpmStats, fetchQuotaInfo, fetchClaudeCodeCapacity, + calculateROIMetrics, calculateSquadCostProjections, +} from '../../src/lib/costs.js'; +import { isDatabaseAvailable, getDashboardHistory, closeDatabase, getLatestBaseline } from '../../src/lib/db.js'; +import { checkForUpdate } from '../../src/lib/update.js'; +import { writeLine } from '../../src/lib/terminal.js'; +import { existsSync, readdirSync } from 'fs'; + +// ── Typed mocks ── + +const mockFindSquadsDir = vi.mocked(findSquadsDir); +const mockListSquads = vi.mocked(listSquads); +const mockLoadSquad = vi.mocked(loadSquad); +const mockHasLocalInfraConfig = vi.mocked(hasLocalInfraConfig); +const mockFindMemoryDir = vi.mocked(findMemoryDir); +const mockGetLiveSessionSummaryAsync = vi.mocked(getLiveSessionSummaryAsync); +const mockGetMultiRepoGitStats = vi.mocked(getMultiRepoGitStats); +const mockGetActivitySparkline = vi.mocked(getActivitySparkline); +const mockGetGitHubStatsOptimized = vi.mocked(getGitHubStatsOptimized); +const mockFetchCostSummary = vi.mocked(fetchCostSummary); +const mockFetchBridgeStats = vi.mocked(fetchBridgeStats); +const mockFetchInsights = vi.mocked(fetchInsights); +const mockGetPlanType = vi.mocked(getPlanType); +const mockIsMaxPlan = vi.mocked(isMaxPlan); +const mockFetchNpmStats = vi.mocked(fetchNpmStats); +const mockFetchQuotaInfo = vi.mocked(fetchQuotaInfo); +const mockFetchClaudeCodeCapacity = vi.mocked(fetchClaudeCodeCapacity); +const mockCalculateROIMetrics = vi.mocked(calculateROIMetrics); +const mockCalculateSquadCostProjections = vi.mocked(calculateSquadCostProjections); +const mockIsDatabaseAvailable = vi.mocked(isDatabaseAvailable); +const mockGetDashboardHistory = vi.mocked(getDashboardHistory); +const mockCloseDatabase = vi.mocked(closeDatabase); +const mockGetLatestBaseline = vi.mocked(getLatestBaseline); +const mockCheckForUpdate = vi.mocked(checkForUpdate); +const mockWriteLine = vi.mocked(writeLine); +const mockExistsSync = vi.mocked(existsSync); +const mockReaddirSync = vi.mocked(readdirSync); + +// ── Test helpers ── + +function makeSquad(overrides: Record = {}) { + return { + name: 'engineering', + mission: 'Build great software', + goals: [ + { description: 'Ship v1.0', metrics: [], completed: false }, + { description: 'Fix critical bug', metrics: [], completed: true }, + ], + context: {}, + agents: [], + pipelines: [], + routines: [], + ...overrides, + } as ReturnType; +} + +function allWriteLineOutput(): string { + return mockWriteLine.mock.calls.map(c => String(c[0] ?? '')).join('\n'); +} + +// ── Tests ── + +describe('dashboardCommand', () => { + beforeEach(() => { + vi.clearAllMocks(); + + // Default: squads dir exists with one squad + mockFindSquadsDir.mockReturnValue('/test/.agents/squads'); + mockListSquads.mockReturnValue(['engineering']); + mockLoadSquad.mockReturnValue(makeSquad()); + mockFindMemoryDir.mockReturnValue(null); + mockHasLocalInfraConfig.mockReturnValue(false); + + // Defaults for async data fetchers (all return "no data") + mockGetLiveSessionSummaryAsync.mockResolvedValue({ + totalSessions: 0, + bySquad: {}, + squadCount: 0, + byTool: {}, + }); + mockGetMultiRepoGitStats.mockResolvedValue(null); + mockGetActivitySparkline.mockResolvedValue([]); + mockGetGitHubStatsOptimized.mockReturnValue(null); + mockFetchCostSummary.mockResolvedValue(null); + mockFetchBridgeStats.mockResolvedValue(null); + mockFetchInsights.mockResolvedValue(null); + mockFetchNpmStats.mockResolvedValue(null); + mockFetchQuotaInfo.mockResolvedValue(null); + mockFetchClaudeCodeCapacity.mockResolvedValue(null); + mockCalculateROIMetrics.mockReturnValue({ + totalCostUsd: 0, costPerGoal: 0, costPerPR: 0, costPerCommit: 0, + roiMultiplier: 0, estimatedValueUsd: 0, + dailyProjectedCost: 0, weeklyProjectedCost: 0, monthlyProjectedCost: 0, + } as ReturnType); + mockCalculateSquadCostProjections.mockReturnValue([]); + mockIsDatabaseAvailable.mockResolvedValue(false); + mockGetDashboardHistory.mockResolvedValue([]); + mockCloseDatabase.mockResolvedValue(undefined); + mockGetLatestBaseline.mockResolvedValue(null); + mockCheckForUpdate.mockReturnValue({ updateAvailable: false } as ReturnType); + mockGetPlanType.mockReturnValue('unknown'); + mockIsMaxPlan.mockReturnValue(false); + + // fs defaults: findAgentsSquadsDir returns null (no hq dir, no .git) + mockExistsSync.mockReturnValue(false); + mockReaddirSync.mockReturnValue([]); + }); + + // ── Basic rendering ── + + it('renders dashboard without errors for a single squad', async () => { + await expect(dashboardCommand()).resolves.toBeUndefined(); + expect(mockCloseDatabase).toHaveBeenCalled(); + }); + + it('returns early when no squads dir found', async () => { + mockFindSquadsDir.mockReturnValue(null); + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('No .agents/squads directory found'); + // Should not call closeDatabase since we return early + expect(mockCloseDatabase).not.toHaveBeenCalled(); + }); + + it('renders header with squad gradient title', async () => { + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('squads'); + expect(output).toContain('dashboard'); + }); + + it('renders squads table with squad name', async () => { + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('SQUAD'); + expect(output).toContain('engineering'); + }); + + it('renders footer with command hints', async () => { + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('squads run'); + expect(output).toContain('squads goal set'); + }); + + it('tracks telemetry event', async () => { + const { track } = await import('../../src/lib/telemetry.js'); + await dashboardCommand({ verbose: true }); + expect(track).toHaveBeenCalledWith('cli_dashboard', expect.objectContaining({ verbose: true })); + }); + + // ── Multiple squads ── + + it('renders multiple squads in the table', async () => { + mockListSquads.mockReturnValue(['engineering', 'marketing', 'product']); + mockLoadSquad.mockImplementation((name: string) => + makeSquad({ name, mission: `${name} mission` }), + ); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('engineering'); + expect(output).toContain('marketing'); + expect(output).toContain('product'); + }); + + // ── Empty / edge cases ── + + it('handles zero squads', async () => { + mockListSquads.mockReturnValue([]); + await expect(dashboardCommand()).resolves.toBeUndefined(); + }); + + it('handles squad with no goals', async () => { + mockLoadSquad.mockReturnValue(makeSquad({ goals: [] })); + await expect(dashboardCommand()).resolves.toBeUndefined(); + }); + + it('handles squad with all goals completed', async () => { + mockLoadSquad.mockReturnValue(makeSquad({ + goals: [ + { description: 'Done A', metrics: [], completed: true }, + { description: 'Done B', metrics: [], completed: true }, + ], + })); + await expect(dashboardCommand()).resolves.toBeUndefined(); + }); + + it('skips a squad when loadSquad returns null', async () => { + mockListSquads.mockReturnValue(['engineering', 'missing']); + mockLoadSquad.mockImplementation((name: string) => { + if (name === 'missing') return null; + return makeSquad({ name }); + }); + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('engineering'); + expect(output).not.toContain('missing'); + }); + + // ── JSON output ── + + it('outputs valid JSON with --json flag', async () => { + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + await dashboardCommand({ json: true }); + + expect(consoleSpy).toHaveBeenCalledTimes(1); + const parsed = JSON.parse(consoleSpy.mock.calls[0][0] as string); + expect(parsed.ok).toBe(true); + expect(parsed.command).toBe('dash'); + expect(parsed.data).toBeDefined(); + expect(parsed.data.squads).toBeInstanceOf(Array); + expect(parsed.data.stats).toBeDefined(); + expect(parsed.data.goals).toBeDefined(); + consoleSpy.mockRestore(); + }); + + it('JSON output includes squad data', async () => { + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + await dashboardCommand({ json: true }); + + const parsed = JSON.parse(consoleSpy.mock.calls[0][0] as string); + const squad = parsed.data.squads[0]; + expect(squad.name).toBe('engineering'); + expect(squad.mission).toBe('Build great software'); + expect(squad.goals).toBeInstanceOf(Array); + consoleSpy.mockRestore(); + }); + + it('JSON output includes goal counts', async () => { + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + await dashboardCommand({ json: true }); + + const parsed = JSON.parse(consoleSpy.mock.calls[0][0] as string); + expect(parsed.data.goals.active).toBe(1); + expect(parsed.data.goals.completed).toBe(1); + consoleSpy.mockRestore(); + }); + + it('JSON output includes stats aggregation', async () => { + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + mockListSquads.mockReturnValue(['engineering', 'marketing']); + mockLoadSquad.mockImplementation((name: string) => + makeSquad({ name, goals: [{ description: 'A goal', metrics: [], completed: false }] }), + ); + await dashboardCommand({ json: true }); + + const parsed = JSON.parse(consoleSpy.mock.calls[0][0] as string); + expect(parsed.data.stats.totalSquads).toBe(2); + consoleSpy.mockRestore(); + }); + + // ── Session display ── + + it('shows active sessions when present', async () => { + mockGetLiveSessionSummaryAsync.mockResolvedValue({ + totalSessions: 3, + bySquad: { engineering: 2, marketing: 1 }, + squadCount: 2, + byTool: { claude: 2, cursor: 1 }, + }); + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('3'); + expect(output).toContain('session'); + }); + + it('does not show session line when no active sessions', async () => { + await dashboardCommand(); + const output = allWriteLineOutput(); + // writeLine calls should not include "active session" text + expect(output).not.toContain('active session'); + }); + + // ── Update available ── + + it('shows update notice when update available', async () => { + mockCheckForUpdate.mockReturnValue({ + updateAvailable: true, + currentVersion: '0.7.0', + latestVersion: '0.8.0', + } as ReturnType); + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('Update available'); + expect(output).toContain('0.8.0'); + }); + + // ── CEO mode ── + + it('renders CEO report with --ceo flag', async () => { + await dashboardCommand({ ceo: true }); + const output = allWriteLineOutput(); + expect(output).toContain('CEO Report'); + expect(output).toContain('METRIC'); + expect(output).toContain('Active Squads'); + }); + + it('CEO report shows blockers for squads with no active goals', async () => { + mockLoadSquad.mockReturnValue(makeSquad({ + goals: [{ description: 'All done', metrics: [], completed: true }], + })); + await dashboardCommand({ ceo: true }); + const output = allWriteLineOutput(); + expect(output).toContain('Blockers'); + expect(output).toContain('No active goals'); + }); + + it('CEO report shows P0 and P1 goals', async () => { + mockLoadSquad.mockReturnValue(makeSquad({ + goals: [ + { description: 'Build revenue pipeline', metrics: [], completed: false }, + { description: 'Fix deploy script', metrics: [], completed: false }, + ], + })); + await dashboardCommand({ ceo: true }); + const output = allWriteLineOutput(); + expect(output).toContain('P0'); + expect(output).toContain('P1'); + }); + + it('CEO report shows Next Steps section', async () => { + await dashboardCommand({ ceo: true }); + const output = allWriteLineOutput(); + expect(output).toContain('Next Steps'); + }); + + // ── Goals section ── + + it('renders goals section with tactical/strategic sorting', async () => { + mockLoadSquad.mockReturnValue(makeSquad({ + goals: [ + { description: 'Build revenue pipeline', metrics: [], completed: false }, + { description: 'Fix login bug', metrics: [], completed: false }, + { description: 'Improve performance', metrics: [], completed: false }, + ], + })); + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('Goals'); + expect(output).toContain('tactical'); + }); + + it('renders goal progress when available', async () => { + mockLoadSquad.mockReturnValue(makeSquad({ + goals: [ + { description: 'Fix login bug', metrics: [], completed: false, progress: '70% done' }, + ], + })); + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('70% done'); + }); + + it('shows +N more when goals exceed limit', async () => { + const manyGoals = Array.from({ length: 8 }, (_, i) => ({ + description: `Fix bug ${i + 1}`, + metrics: [], + completed: false, + })); + mockLoadSquad.mockReturnValue(makeSquad({ goals: manyGoals })); + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('+'); + expect(output).toContain('more'); + }); + + // ── Git activity section ── + + it('renders git performance when stats available', async () => { + mockExistsSync.mockReturnValue(true); // findAgentsSquadsDir will find .git + mockGetMultiRepoGitStats.mockResolvedValue({ + totalCommits: 42, + avgCommitsPerDay: 3, + activeDays: 14, + peakDay: { date: '2026-03-15', count: 8 }, + commitsByRepo: new Map([['squads-cli', 30], ['hq', 12]]), + commitsByAuthor: new Map([['alice', 25], ['bob', 17]]), + repos: [ + { name: 'squads-cli', commits: 30, branch: 'main', latestCommit: 'abc' }, + { name: 'hq', commits: 12, branch: 'main', latestCommit: 'def' }, + ], + recentCommits: [ + { hash: 'abc1234', message: 'feat: add dashboard tests', repo: 'squads-cli', date: '2026-03-27', author: 'alice' }, + ], + } as Awaited>); + mockGetActivitySparkline.mockResolvedValue([1, 3, 5, 2, 0, 4, 7, 3, 2, 1, 5, 6, 4, 3]); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('Git Activity'); + expect(output).toContain('42'); + expect(output).toContain('commits'); + }); + + it('shows "no commits" when git stats empty', async () => { + mockExistsSync.mockReturnValue(true); + mockGetMultiRepoGitStats.mockResolvedValue({ + totalCommits: 0, + avgCommitsPerDay: 0, + activeDays: 0, + peakDay: null, + commitsByRepo: new Map(), + commitsByAuthor: new Map(), + repos: [], + recentCommits: [], + } as unknown as Awaited>); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('no commits'); + }); + + // ── Working On section ── + + it('renders working on section with recent commits', async () => { + mockExistsSync.mockReturnValue(true); + mockGetMultiRepoGitStats.mockResolvedValue({ + totalCommits: 5, + avgCommitsPerDay: 1, + activeDays: 5, + peakDay: null, + commitsByRepo: new Map([['squads-cli', 5]]), + commitsByAuthor: new Map([['alice', 5]]), + repos: [{ name: 'squads-cli', commits: 5, branch: 'main', latestCommit: 'abc' }], + recentCommits: [ + { hash: 'abc1234567', message: 'feat: something cool', repo: 'squads-cli', date: '2026-03-27', author: 'alice' }, + { hash: 'def5678901', message: 'fix: another thing', repo: 'hq', date: '2026-03-26', author: 'bob' }, + ], + } as Awaited>); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('Working On'); + expect(output).toContain('abc1234'); // short hash + }); + + // ── Token economics / costs ── + + it('renders token economics with cost data', async () => { + mockHasLocalInfraConfig.mockReturnValue(true); + mockFetchBridgeStats.mockResolvedValue({ + today: { costUsd: 1.5, generations: 25, inputTokens: 100000, outputTokens: 20000 }, + week: { costUsd: 8.0, generations: 100, inputTokens: 500000, outputTokens: 100000, byModel: [] }, + budget: { used: 1.5, daily: 10, usedPct: 15 }, + bySquad: [], + byModel: [], + health: { postgres: 'connected', redis: 'disabled' }, + source: 'bridge', + } as unknown as Awaited>); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('Token Economics'); + }); + + it('renders plan info when getPlanType returns max', async () => { + mockGetPlanType.mockReturnValue('max'); + mockHasLocalInfraConfig.mockReturnValue(true); + mockFetchBridgeStats.mockResolvedValue({ + today: { costUsd: 0, generations: 0, inputTokens: 0, outputTokens: 0 }, + week: null, + budget: { used: 0, daily: 10, usedPct: 0 }, + bySquad: [], + byModel: [], + health: { postgres: 'disconnected', redis: 'disabled' }, + source: 'bridge', + } as unknown as Awaited>); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('Claude Max'); + }); + + it('renders setup hint when no infra configured', async () => { + mockHasLocalInfraConfig.mockReturnValue(false); + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('Token Economics'); + expect(output).toContain('Track costs'); + }); + + // ── Infrastructure section ── + + it('renders infrastructure as local-only when no infra', async () => { + mockHasLocalInfraConfig.mockReturnValue(false); + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('Infrastructure'); + expect(output).toContain('local only'); + }); + + it('renders infrastructure health when bridge stats available', async () => { + mockHasLocalInfraConfig.mockReturnValue(true); + mockFetchBridgeStats.mockResolvedValue({ + today: { costUsd: 0, generations: 0, inputTokens: 0, outputTokens: 0 }, + week: null, + budget: { used: 0, daily: 10, usedPct: 0 }, + bySquad: [], + byModel: [], + health: { postgres: 'connected', redis: 'connected' }, + source: 'bridge', + } as unknown as Awaited>); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('postgres'); + expect(output).toContain('redis'); + }); + + // ── Quota / subscription ROI ── + + it('renders subscription ROI section when quota data available', async () => { + mockFetchQuotaInfo.mockResolvedValue({ + monthlyQuota: 200, + monthlyUsed: 450, + autonomyScore: 80, + confidenceLevel: 'high', + learningCount: 5, + } as Awaited>); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('Subscription ROI'); + expect(output).toContain('Excellent value'); + }); + + it('skips quota section when no quota data', async () => { + mockFetchQuotaInfo.mockResolvedValue(null); + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).not.toContain('Subscription ROI'); + }); + + // ── Capacity section ── + + it('renders capacity section when Claude Code capacity available', async () => { + mockFetchClaudeCodeCapacity.mockResolvedValue({ + weeklyCapacityPct: 45, + weeklyResetDate: 'Mon Apr 1', + weeklyTokensUsed: 500000, + weeklyTokensLimit: 1100000, + opusTokensUsed: 300000, + sonnetTokensUsed: 200000, + sessionCapacityPct: 5, + sessionResetTime: '3:00 PM', + } as Awaited>); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('Subscription Capacity'); + expect(output).toContain('headroom'); + }); + + it('skips capacity section when no capacity data', async () => { + mockFetchClaudeCodeCapacity.mockResolvedValue(null); + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).not.toContain('Subscription Capacity'); + }); + + // ── Historical trends ── + + it('renders historical trends when db has history', async () => { + mockIsDatabaseAvailable.mockResolvedValue(true); + mockGetDashboardHistory.mockResolvedValue([ + { costUsd: 1.5, inputTokens: 50000, outputTokens: 10000, goalProgressPct: 30, totalSquads: 3, totalCommits: 10, totalPrsMerged: 2, totalIssuesClosed: 5, totalIssuesOpen: 3, dailyBudgetUsd: 10, commits30d: 10, avgCommitsPerDay: 2, activeDays: 5, peakCommits: 4, peakDate: null, squadsData: [], authorsData: [], reposData: [] }, + { costUsd: 2.0, inputTokens: 60000, outputTokens: 12000, goalProgressPct: 35, totalSquads: 3, totalCommits: 15, totalPrsMerged: 3, totalIssuesClosed: 7, totalIssuesOpen: 2, dailyBudgetUsd: 10, commits30d: 15, avgCommitsPerDay: 3, activeDays: 6, peakCommits: 5, peakDate: null, squadsData: [], authorsData: [], reposData: [] }, + { costUsd: 1.8, inputTokens: 55000, outputTokens: 11000, goalProgressPct: 40, totalSquads: 3, totalCommits: 12, totalPrsMerged: 2, totalIssuesClosed: 6, totalIssuesOpen: 2, dailyBudgetUsd: 10, commits30d: 12, avgCommitsPerDay: 2, activeDays: 5, peakCommits: 4, peakDate: null, squadsData: [], authorsData: [], reposData: [] }, + ] as Awaited>); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('Usage Trends'); + }); + + it('skips trends when db not available', async () => { + mockIsDatabaseAvailable.mockResolvedValue(false); + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).not.toContain('Usage Trends'); + }); + + // ── Insights section ── + + it('renders agent insights when insights available', async () => { + mockFetchInsights.mockResolvedValue({ + source: 'bridge', + days: 7, + taskMetrics: [ + { agent: 'solver', tasksTotal: 10, tasksCompleted: 8, tasksFailed: 2, totalRetries: 3, tasksWithRetries: 2, avgDurationMs: 5000 }, + ], + toolMetrics: [], + qualityMetrics: { avgFirstTrySuccess: 0.8, avgRetryRate: 0.2 }, + } as Awaited>); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('Agent Insights'); + expect(output).toContain('completed'); + }); + + it('skips insights when source is none', async () => { + mockFetchInsights.mockResolvedValue({ + source: 'none', + days: 7, + taskMetrics: [], + toolMetrics: [], + qualityMetrics: { avgFirstTrySuccess: 0, avgRetryRate: 0 }, + } as Awaited>); + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).not.toContain('Agent Insights'); + }); + + // ── ROI section ── + + it('renders ROI section when costs available', async () => { + mockFetchCostSummary.mockResolvedValue({ + totalCost: 5.0, + dailyBudget: 10, + totalCalls: 50, + bySquad: [], + } as unknown as Awaited>); + mockCalculateROIMetrics.mockReturnValue({ + totalCostUsd: 5.0, + costPerGoal: 2.5, + costPerPR: 1.0, + costPerCommit: 0.5, + roiMultiplier: 3.0, + estimatedValueUsd: 15.0, + dailyProjectedCost: 5.0, + weeklyProjectedCost: 35.0, + monthlyProjectedCost: 150.0, + } as ReturnType); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('ROI & Projections'); + }); + + it('skips ROI when no cost data', async () => { + mockFetchCostSummary.mockResolvedValue(null); + mockFetchBridgeStats.mockResolvedValue(null); + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).not.toContain('ROI & Projections'); + }); + + // ── Baseline comparison ── + + it('renders baseline comparison when baseline exists', async () => { + mockFetchCostSummary.mockResolvedValue({ + totalCost: 5.0, + dailyBudget: 10, + totalCalls: 50, + bySquad: [], + } as unknown as Awaited>); + mockGetLatestBaseline.mockResolvedValue({ + name: 'v0.7', + costUsd: 3.0, + goalsCompleted: 2, + commits: 10, + prsMerged: 5, + snapshotDate: '2026-03-20', + } as Awaited>); + mockCalculateROIMetrics.mockReturnValue({ + totalCostUsd: 5.0, costPerGoal: 2.5, costPerPR: 1.0, costPerCommit: 0.5, + roiMultiplier: 3.0, estimatedValueUsd: 15.0, + dailyProjectedCost: 5.0, weeklyProjectedCost: 35.0, monthlyProjectedCost: 150.0, + } as ReturnType); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('vs Baseline'); + }); + + it('shows baseline capture hint when no baseline', async () => { + mockFetchCostSummary.mockResolvedValue({ + totalCost: 1.0, + dailyBudget: 10, + totalCalls: 5, + bySquad: [], + } as unknown as Awaited>); + mockCalculateROIMetrics.mockReturnValue({ + totalCostUsd: 1.0, costPerGoal: 0, costPerPR: 0, costPerCommit: 0, + roiMultiplier: 0, estimatedValueUsd: 0, + dailyProjectedCost: 1.0, weeklyProjectedCost: 7.0, monthlyProjectedCost: 30.0, + } as ReturnType); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('No baseline set'); + expect(output).toContain('squads baseline'); + }); + + // ── Squad status classification ── + + it('classifies squad as needs-goal when no active goals', async () => { + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + mockLoadSquad.mockReturnValue(makeSquad({ + goals: [{ description: 'Done', metrics: [], completed: true }], + })); + await dashboardCommand({ json: true }); + + const parsed = JSON.parse(consoleSpy.mock.calls[0][0] as string); + expect(parsed.data.squads[0].status).toBe('needs-goal'); + consoleSpy.mockRestore(); + }); + + it('classifies squad as stale when memory dir unavailable (lastActivity contains "w")', async () => { + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + // findMemoryDir returns null -> getLastActivityDate returns 'unknown' + // 'unknown' contains 'w', so the squad is classified as 'stale' + mockLoadSquad.mockReturnValue(makeSquad({ + goals: [{ description: 'Ship it', metrics: [], completed: false }], + })); + await dashboardCommand({ json: true }); + + const parsed = JSON.parse(consoleSpy.mock.calls[0][0] as string); + expect(parsed.data.squads[0].status).toBe('stale'); + consoleSpy.mockRestore(); + }); + + it('classifies squad with dash lastActivity as stale', async () => { + // When memory dir exists but squad memory subdir does not, lastActivity = '—' + // '—' === '—' triggers stale status + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + mockFindMemoryDir.mockReturnValue('/test/.agents/memory'); + // existsSync returns false for the squad memory subdir + mockExistsSync.mockReturnValue(false); + mockLoadSquad.mockReturnValue(makeSquad({ + goals: [{ description: 'Ship it', metrics: [], completed: false }], + })); + await dashboardCommand({ json: true }); + + const parsed = JSON.parse(consoleSpy.mock.calls[0][0] as string); + expect(parsed.data.squads[0].status).toBe('stale'); + consoleSpy.mockRestore(); + }); + + // ── fast mode ── + + it('defaults to fast mode (skips GitHub API)', async () => { + await dashboardCommand(); + // getGitHubStatsOptimized should not be called in fast mode (default) + expect(mockGetGitHubStatsOptimized).not.toHaveBeenCalled(); + }); + + // ── Database snapshot saving ── + + it('saves snapshot when database is available', async () => { + const { saveDashboardSnapshot } = await import('../../src/lib/db.js'); + mockIsDatabaseAvailable.mockResolvedValue(true); + await dashboardCommand(); + expect(saveDashboardSnapshot).toHaveBeenCalled(); + }); + + it('does not save snapshot when database unavailable', async () => { + const { saveDashboardSnapshot } = await import('../../src/lib/db.js'); + mockIsDatabaseAvailable.mockResolvedValue(false); + await dashboardCommand(); + expect(saveDashboardSnapshot).not.toHaveBeenCalled(); + }); + + // ── closeDatabase always called ── + + it('calls closeDatabase after rendering', async () => { + await dashboardCommand(); + expect(mockCloseDatabase).toHaveBeenCalledTimes(1); + }); + + it('calls closeDatabase after JSON output', async () => { + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + await dashboardCommand({ json: true }); + expect(mockCloseDatabase).toHaveBeenCalledTimes(1); + consoleSpy.mockRestore(); + }); + + // ── Acquisition section ── + + it('skips acquisition when SQUADS_NPM_PACKAGE not set', async () => { + delete process.env.SQUADS_NPM_PACKAGE; + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).not.toContain('Acquisition'); + }); + + it('renders acquisition when npm stats available and env set', async () => { + process.env.SQUADS_NPM_PACKAGE = 'squads-cli'; + mockFetchNpmStats.mockResolvedValue({ + downloads: { lastDay: 50, lastWeek: 300, lastMonth: 1200 }, + weekOverWeek: 15, + } as Awaited>); + + await dashboardCommand(); + const output = allWriteLineOutput(); + expect(output).toContain('Acquisition'); + expect(output).toContain('installs/week'); + delete process.env.SQUADS_NPM_PACKAGE; + }); + + // ── Dashboard stats aggregation ── + + it('calculates overall progress as average of squad progress', async () => { + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + mockListSquads.mockReturnValue(['a', 'b']); + mockLoadSquad.mockImplementation((name: string) => { + if (name === 'a') return makeSquad({ + name: 'a', + goals: [{ description: 'G1', metrics: [], completed: true }, { description: 'G2', metrics: [], completed: false }], + }); + return makeSquad({ + name: 'b', + goals: [{ description: 'G3', metrics: [], completed: true }], + }); + }); + + await dashboardCommand({ json: true }); + const parsed = JSON.parse(consoleSpy.mock.calls[0][0] as string); + expect(parsed.data.stats.overallProgress).toBeGreaterThanOrEqual(0); + expect(parsed.data.stats.overallProgress).toBeLessThanOrEqual(100); + consoleSpy.mockRestore(); + }); +}); diff --git a/test/commands/services.test.ts b/test/commands/services.test.ts new file mode 100644 index 00000000..4d8ef02d --- /dev/null +++ b/test/commands/services.test.ts @@ -0,0 +1,309 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { Command } from 'commander'; + +// All mocks must be hoisted before any imports from the modules under test. +vi.mock('child_process', () => ({ + execSync: vi.fn(), +})); + +vi.mock('fs', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + existsSync: vi.fn(() => false), + }; +}); + +vi.mock('../../src/lib/tier-detect.js', () => ({ + detectTier: vi.fn(), +})); + +vi.mock('../../src/lib/terminal.js', () => ({ + writeLine: vi.fn(), + colors: { + dim: '', + red: '', + green: '', + yellow: '', + cyan: '', + white: '', + purple: '', + }, + bold: '', + RESET: '', + gradient: vi.fn((s: string) => s), + padEnd: vi.fn((s: string, n: number) => s.padEnd(n)), + icons: { + success: '✓', + error: '✗', + warning: '!', + progress: '›', + empty: '○', + }, +})); + +import { execSync } from 'child_process'; +import { existsSync } from 'fs'; +import { detectTier } from '../../src/lib/tier-detect.js'; +import { writeLine } from '../../src/lib/terminal.js'; +import { registerServicesCommands } from '../../src/commands/services.js'; + +const mockExecSync = vi.mocked(execSync); +const mockExistsSync = vi.mocked(existsSync); +const mockDetectTier = vi.mocked(detectTier); +const mockWriteLine = vi.mocked(writeLine); + +const tier1Info = { + tier: 1 as const, + services: { api: false, bridge: false, postgres: false, redis: false }, + urls: { api: null, bridge: null }, +}; + +const tier2Info = { + tier: 2 as const, + services: { api: true, bridge: true, postgres: true, redis: true }, + urls: { api: 'http://localhost:8090', bridge: 'http://localhost:8088' }, +}; + +function buildProgram() { + const program = new Command(); + program.exitOverride(); + registerServicesCommands(program); + return program; +} + +describe('services up', () => { + beforeEach(() => { + vi.clearAllMocks(); + // Default: Docker unavailable, no compose file + mockExecSync.mockImplementation(() => { + throw new Error('command not found'); + }); + mockExistsSync.mockReturnValue(false); + mockDetectTier.mockResolvedValue(tier1Info); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('shows error and exits gracefully when Docker is unavailable', async () => { + const program = buildProgram(); + await program.parseAsync(['node', 'squads', 'services', 'up']); + + const calls = mockWriteLine.mock.calls.map(c => String(c[0] ?? '')); + const combined = calls.join('\n'); + expect(combined).toMatch(/Docker not found/i); + // Should NOT throw — graceful exit + }); + + it('shows Docker Desktop install hint when Docker is unavailable', async () => { + const program = buildProgram(); + await program.parseAsync(['node', 'squads', 'services', 'up']); + + const calls = mockWriteLine.mock.calls.map(c => String(c[0] ?? '')); + const combined = calls.join('\n'); + expect(combined).toMatch(/docker\.com/i); + }); + + it('shows error when Docker Compose is unavailable (Docker present)', async () => { + mockExecSync.mockImplementation((cmd: unknown) => { + const c = String(cmd); + if (c.includes('docker --version')) return 'Docker version 24.0.0'; + throw new Error('command not found'); + }); + + const program = buildProgram(); + await program.parseAsync(['node', 'squads', 'services', 'up']); + + const calls = mockWriteLine.mock.calls.map(c => String(c[0] ?? '')); + const combined = calls.join('\n'); + expect(combined).toMatch(/Docker Compose not found/i); + }); + + it('shows error with expected path when compose file is missing', async () => { + // Docker and Compose available, but no compose file on disk + mockExecSync.mockImplementation((cmd: unknown) => { + const c = String(cmd); + if (c.includes('docker --version') || c.includes('docker compose version')) { + return 'ok'; + } + throw new Error('command not found'); + }); + mockExistsSync.mockReturnValue(false); + + const program = buildProgram(); + await program.parseAsync(['node', 'squads', 'services', 'up']); + + const calls = mockWriteLine.mock.calls.map(c => String(c[0] ?? '')); + const combined = calls.join('\n'); + expect(combined).toMatch(/docker-compose\.yml not found/i); + expect(combined).toMatch(/engineering\/docker/i); + }); + + it('registers the services up subcommand', () => { + const program = buildProgram(); + const serviceCmd = program.commands.find(c => c.name() === 'services'); + expect(serviceCmd).toBeDefined(); + const upCmd = serviceCmd?.commands.find(c => c.name() === 'up'); + expect(upCmd).toBeDefined(); + expect(upCmd?.description()).toMatch(/start/i); + }); +}); + +describe('services down', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockExecSync.mockImplementation(() => { + throw new Error('command not found'); + }); + mockExistsSync.mockReturnValue(false); + mockDetectTier.mockResolvedValue(tier1Info); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('shows "nothing to stop" message when compose file is missing', async () => { + mockExistsSync.mockReturnValue(false); + + const program = buildProgram(); + await program.parseAsync(['node', 'squads', 'services', 'down']); + + const calls = mockWriteLine.mock.calls.map(c => String(c[0] ?? '')); + const combined = calls.join('\n'); + expect(combined).toMatch(/nothing to stop/i); + }); + + it('resolves gracefully when compose file is absent', async () => { + const program = buildProgram(); + await expect( + program.parseAsync(['node', 'squads', 'services', 'down']) + ).resolves.toBeDefined(); + }); + + it('registers the services down subcommand', () => { + const program = buildProgram(); + const serviceCmd = program.commands.find(c => c.name() === 'services'); + const downCmd = serviceCmd?.commands.find(c => c.name() === 'down'); + expect(downCmd).toBeDefined(); + expect(downCmd?.description()).toMatch(/stop/i); + }); +}); + +describe('services status', () => { + beforeEach(() => { + vi.clearAllMocks(); + // docker ps returns null (no running containers) + mockExecSync.mockReturnValue(null as unknown as ReturnType); + mockDetectTier.mockResolvedValue(tier1Info); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('shows "no containers running" when docker ps returns nothing', async () => { + mockExecSync.mockImplementation(() => { + throw new Error('command not found'); + }); + + const program = buildProgram(); + await program.parseAsync(['node', 'squads', 'services', 'status']); + + const calls = mockWriteLine.mock.calls.map(c => String(c[0] ?? '')); + const combined = calls.join('\n'); + expect(combined).toMatch(/no docker containers running/i); + }); + + it('shows tier info from detectTier even when no containers', async () => { + mockExecSync.mockImplementation(() => { + throw new Error('command not found'); + }); + + const program = buildProgram(); + await program.parseAsync(['node', 'squads', 'services', 'status']); + + expect(mockDetectTier).toHaveBeenCalledOnce(); + }); + + it('displays container names when docker ps returns results', async () => { + mockDetectTier.mockResolvedValue(tier2Info); + mockExecSync.mockImplementation((cmd: unknown) => { + const c = String(cmd); + if (c.includes('docker ps')) { + return 'squads-postgres\tUp 5 minutes (healthy)\t0.0.0.0:5432->5432/tcp'; + } + // psql queries return null (throw) + throw new Error('not found'); + }); + + const program = buildProgram(); + await program.parseAsync(['node', 'squads', 'services', 'status']); + + const calls = mockWriteLine.mock.calls.map(c => String(c[0] ?? '')); + const combined = calls.join('\n'); + expect(combined).toContain('squads-postgres'); + }); + + it('shows Tier 2 when api is healthy', async () => { + mockDetectTier.mockResolvedValue(tier2Info); + mockExecSync.mockImplementation((cmd: unknown) => { + const c = String(cmd); + if (c.includes('docker ps')) { + return 'squads-api\tUp 2 minutes (healthy)\t0.0.0.0:8090->8090/tcp'; + } + throw new Error('not found'); + }); + + const program = buildProgram(); + await program.parseAsync(['node', 'squads', 'services', 'status']); + + const calls = mockWriteLine.mock.calls.map(c => String(c[0] ?? '')); + const combined = calls.join('\n'); + expect(combined).toMatch(/tier 2/i); + }); + + it('registers the services status subcommand', () => { + const program = buildProgram(); + const serviceCmd = program.commands.find(c => c.name() === 'services'); + const statusCmd = serviceCmd?.commands.find(c => c.name() === 'status'); + expect(statusCmd).toBeDefined(); + expect(statusCmd?.description()).toMatch(/health|show|running/i); + }); +}); + +describe('services command structure', () => { + it('registers services command with correct description', () => { + const program = buildProgram(); + const serviceCmd = program.commands.find(c => c.name() === 'services'); + expect(serviceCmd).toBeDefined(); + expect(serviceCmd?.description()).toMatch(/tier 2|docker|services/i); + }); + + it('registers all three subcommands: up, down, status', () => { + const program = buildProgram(); + const serviceCmd = program.commands.find(c => c.name() === 'services'); + const names = serviceCmd?.commands.map(c => c.name()) ?? []; + expect(names).toContain('up'); + expect(names).toContain('down'); + expect(names).toContain('status'); + }); + + it('services up supports --webhooks flag', () => { + const program = buildProgram(); + const serviceCmd = program.commands.find(c => c.name() === 'services'); + const upCmd = serviceCmd?.commands.find(c => c.name() === 'up'); + const options = upCmd?.options.map(o => o.long) ?? []; + expect(options).toContain('--webhooks'); + }); + + it('services up supports --telemetry flag', () => { + const program = buildProgram(); + const serviceCmd = program.commands.find(c => c.name() === 'services'); + const upCmd = serviceCmd?.commands.find(c => c.name() === 'up'); + const options = upCmd?.options.map(o => o.long) ?? []; + expect(options).toContain('--telemetry'); + }); +}); diff --git a/test/e2e/first-run.e2e.test.ts b/test/e2e/first-run.e2e.test.ts index 18fe2b21..43b53b19 100644 --- a/test/e2e/first-run.e2e.test.ts +++ b/test/e2e/first-run.e2e.test.ts @@ -188,16 +188,16 @@ describe('E2E: First-Run User Journey (#488)', () => { * Step 3b: Verify init scaffolding content * The 4 core squads, cascade files, sentinel, and agent count. */ - it('Step 3b — init content: 4 squads, 14 agents, cascade files, placeholder sentinel', () => { + it('Step 3b — init content: 5 squads (4 core + demo), 15 agents, cascade files, placeholder sentinel', () => { const squadsDir = join(testDir, '.agents', 'squads'); const squads = readdirSync(squadsDir).filter( (f) => existsSync(join(squadsDir, f, 'SQUAD.md')) ); - // Must create exactly 4 core squads - expect(squads.sort()).toEqual(['company', 'intelligence', 'product', 'research']); + // Must create exactly 5 squads: 4 core + demo starter squad + expect(squads.sort()).toEqual(['company', 'demo', 'intelligence', 'product', 'research']); - // Must create 14 agent files total (excluding SQUAD.md) + // Must create 15 agent files total: 5 company + 3 research + 3 intelligence + 3 product + 1 demo (excluding SQUAD.md) let agentCount = 0; for (const squad of squads) { const files = readdirSync(join(squadsDir, squad)).filter( @@ -205,7 +205,7 @@ describe('E2E: First-Run User Journey (#488)', () => { ); agentCount += files.length; } - expect(agentCount).toBe(14); + expect(agentCount).toBe(15); // Context cascade files must exist expect(existsSync(join(testDir, '.agents', 'config', 'SYSTEM.md'))).toBe(true); diff --git a/test/guardrail.test.ts b/test/guardrail.test.ts new file mode 100644 index 00000000..d8ab5ea0 --- /dev/null +++ b/test/guardrail.test.ts @@ -0,0 +1,94 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { mkdirSync, writeFileSync, rmSync, existsSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; +import { resolveGuardrailSettings } from '../src/lib/execution-engine.js'; + +const TEST_DIR = join(tmpdir(), `squads-guardrail-test-${Date.now()}`); + +describe('resolveGuardrailSettings', () => { + beforeEach(() => { + mkdirSync(TEST_DIR, { recursive: true }); + }); + + afterEach(() => { + if (existsSync(TEST_DIR)) { + rmSync(TEST_DIR, { recursive: true, force: true }); + } + }); + + it('returns undefined when no guardrail file exists', () => { + const result = resolveGuardrailSettings(TEST_DIR); + // Result is either undefined (no bundled default found) or the bundled default path + // In CI with the built package, the bundled default is present + if (result !== undefined) { + expect(result).toContain('guardrail.json'); + } + }); + + it('returns project-level .claude/guardrail.json when present', () => { + const claudeDir = join(TEST_DIR, '.claude'); + mkdirSync(claudeDir, { recursive: true }); + const guardrailPath = join(claudeDir, 'guardrail.json'); + writeFileSync(guardrailPath, JSON.stringify({ hooks: {} })); + + const result = resolveGuardrailSettings(TEST_DIR); + + expect(result).toBe(guardrailPath); + }); + + it('project-level guardrail takes precedence over bundled default', () => { + // Create project-level override + const claudeDir = join(TEST_DIR, '.claude'); + mkdirSync(claudeDir, { recursive: true }); + const projectGuardrail = join(claudeDir, 'guardrail.json'); + writeFileSync(projectGuardrail, JSON.stringify({ hooks: { PreToolUse: [] } })); + + const result = resolveGuardrailSettings(TEST_DIR); + + // Should return the project-level path, not the bundled one + expect(result).toBe(projectGuardrail); + }); + + it('returns a path ending with guardrail.json', () => { + const claudeDir = join(TEST_DIR, '.claude'); + mkdirSync(claudeDir, { recursive: true }); + writeFileSync(join(claudeDir, 'guardrail.json'), '{}'); + + const result = resolveGuardrailSettings(TEST_DIR); + + expect(result).toBeDefined(); + expect(result!.endsWith('guardrail.json')).toBe(true); + }); +}); + +describe('guardrail.json template', () => { + it('bundled template is valid JSON with hooks structure', async () => { + // Find and parse the bundled guardrail.json from templates/ + const { existsSync, readFileSync } = await import('fs'); + const { join: joinPath, dirname } = await import('path'); + const { fileURLToPath } = await import('url'); + + const __filename = fileURLToPath(import.meta.url); + const __dirname = dirname(__filename); + + // Look for it relative to test/ directory + const candidates = [ + joinPath(__dirname, '..', 'templates', 'guardrail.json'), + joinPath(__dirname, '..', 'dist', 'templates', 'guardrail.json'), + ]; + + const found = candidates.find(p => existsSync(p)); + if (!found) { + // Skip if not found (e.g., in some CI environments) + return; + } + + const content = readFileSync(found, 'utf-8'); + const parsed = JSON.parse(content); + + expect(parsed).toHaveProperty('hooks'); + expect(parsed.hooks).toHaveProperty('PreToolUse'); + expect(Array.isArray(parsed.hooks.PreToolUse)).toBe(true); + }); +}); diff --git a/test/init.test.ts b/test/init.test.ts index d1922684..25b18bef 100644 --- a/test/init.test.ts +++ b/test/init.test.ts @@ -558,6 +558,168 @@ describe('initCommand', () => { expect(vars['REPO_NAME']).toBe('test-org/test-repo'); expect(vars['SERVICE_NAME']).toBe('test-repo'); }); + + it('detects Python stack from pyproject.toml', async () => { + writeFileSync(join(testDir, 'pyproject.toml'), '[project]\nname = "my-app"\n'); + + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['SERVICE_STACK']).toBe('python'); + expect(vars['SERVICE_TYPE']).toBe('product'); + expect(vars['TEST_COMMAND']).toBe('pytest'); + }); + + it('detects Python stack from setup.py', async () => { + writeFileSync(join(testDir, 'setup.py'), 'from setuptools import setup\nsetup(name="app")\n'); + + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['SERVICE_STACK']).toBe('python'); + }); + + it('detects Next.js framework from dependencies', async () => { + writeFileSync(join(testDir, 'package.json'), JSON.stringify({ + name: 'next-app', + dependencies: { next: '^14.0.0', react: '^18.0.0' }, + })); + + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['SERVICE_STACK']).toBe('next'); + }); + + it('detects Nuxt framework from dependencies', async () => { + writeFileSync(join(testDir, 'package.json'), JSON.stringify({ + name: 'nuxt-app', + dependencies: { nuxt: '^3.0.0' }, + })); + + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['SERVICE_STACK']).toBe('nuxt'); + }); + + it('detects Vue framework from dependencies', async () => { + writeFileSync(join(testDir, 'package.json'), JSON.stringify({ + name: 'vue-app', + dependencies: { vue: '^3.0.0' }, + })); + + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['SERVICE_STACK']).toBe('vue'); + }); + + it('detects Astro framework from dependencies', async () => { + writeFileSync(join(testDir, 'package.json'), JSON.stringify({ + name: 'astro-app', + dependencies: { astro: '^4.0.0' }, + })); + + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['SERVICE_STACK']).toBe('astro'); + }); + + it('sets product-type IDP variables for product services', async () => { + writeFileSync(join(testDir, 'package.json'), JSON.stringify({ name: 'my-app' })); + + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['SERVICE_TYPE']).toBe('product'); + expect(vars['SERVICE_SCORECARD']).toBe('product'); + expect(vars['BRANCHES_WORKFLOW']).toBe('pr-to-develop'); + expect(vars['BRANCHES_DEVELOPMENT']).toBe('develop'); + expect(vars['CI_TEMPLATE']).toBe('node'); + }); + + it('sets domain-type IDP variables when no project files detected', async () => { + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['SERVICE_TYPE']).toBe('domain'); + expect(vars['SERVICE_SCORECARD']).toBe('domain'); + expect(vars['BRANCHES_WORKFLOW']).toBe('direct-to-main'); + expect(vars['BRANCHES_DEVELOPMENT']).toBe(''); + expect(vars['CI_TEMPLATE']).toBe('null'); + }); + + it('sets BUILD_COMMAND to null when no build command detected', async () => { + writeFileSync(join(testDir, 'requirements.txt'), 'flask==2.0\n'); + + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + // Python has no build command + expect(vars['BUILD_COMMAND']).toBe('null'); + }); + + it('writes IDP catalog file with project name as filename', async () => { + await initCommand({ yes: true, force: true }); + + const catalogFile = join(testDir, '.agents/idp/catalog/test-repo.yaml'); + expect(existsSync(catalogFile)).toBe(true); + }); + + it('sets OWNER_SQUAD from first squad in use case config', async () => { + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['OWNER_SQUAD']).toBeDefined(); + expect(typeof vars['OWNER_SQUAD']).toBe('string'); + expect(vars['OWNER_SQUAD'].length).toBeGreaterThan(0); + }); + + it('handles malformed package.json gracefully', async () => { + writeFileSync(join(testDir, 'package.json'), '{ invalid json }}}'); + + await initCommand({ yes: true, force: true }); + + // Should still complete and fall back to node stack + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + // When JSON.parse fails, catch block ignores error, stack stays 'node' (from package.json existing) + expect(vars['SERVICE_STACK']).toBe('node'); + }); }); // ---------- Template variables ---------- diff --git a/test/lib/workflow.test.ts b/test/lib/workflow.test.ts index 501bc78d..05ed0b0d 100644 --- a/test/lib/workflow.test.ts +++ b/test/lib/workflow.test.ts @@ -19,11 +19,13 @@ vi.mock('fs', () => ({ vi.mock('child_process', () => ({ execSync: vi.fn(), exec: vi.fn(), + spawn: vi.fn(), })); // Mock squad-parser vi.mock('../../src/lib/squad-parser.js', () => ({ findSquadsDir: vi.fn(), + findProjectRoot: vi.fn().mockReturnValue(null), })); // Mock run-context to avoid file system reads in unit tests @@ -41,7 +43,7 @@ vi.mock('../../src/lib/conversation.js', async () => { }); import { existsSync, writeFileSync, mkdirSync } from 'fs'; -import { execSync } from 'child_process'; +import { execSync, spawn } from 'child_process'; import { findSquadsDir } from '../../src/lib/squad-parser.js'; import { runConversation, saveTranscript } from '../../src/lib/workflow.js'; import { createTranscript, addTurn } from '../../src/lib/conversation.js'; @@ -51,8 +53,36 @@ const mockExistsSync = vi.mocked(existsSync); const mockWriteFileSync = vi.mocked(writeFileSync); const mockMkdirSync = vi.mocked(mkdirSync); const mockExecSync = vi.mocked(execSync); +const mockSpawn = vi.mocked(spawn); const mockFindSquadsDir = vi.mocked(findSquadsDir); +/** Create a mock child process that emits output then closes. */ +function makeMockChild(output: string) { + const stdoutHandlers: Array<(chunk: Buffer) => void> = []; + const closeHandlers: Array<(code: number) => void> = []; + return { + stdin: { + write: vi.fn(), + end: vi.fn(() => { + process.nextTick(() => { + stdoutHandlers.forEach(l => l(Buffer.from(output))); + closeHandlers.forEach(l => l(0)); + }); + }), + }, + stdout: { + on: vi.fn((event: string, handler: (chunk: Buffer) => void) => { + if (event === 'data') stdoutHandlers.push(handler); + }), + }, + stderr: { on: vi.fn() }, + on: vi.fn((event: string, handler: (code: number) => void) => { + if (event === 'close') closeHandlers.push(handler); + }), + kill: vi.fn(), + } as unknown as ReturnType; +} + // Minimal squad fixture function makeSquad(overrides: Partial = {}): Squad { return { @@ -105,7 +135,7 @@ describe('runConversation', () => { mockExistsSync.mockReturnValue(true); // agent file exists // Lead outputs a convergence phrase immediately - mockExecSync.mockReturnValue('Session complete. All PRs merged.' as never); + mockSpawn.mockImplementation(() => makeMockChild('Session complete. All PRs merged.')); const squad = makeSquad({ agents: [{ name: 'squad-lead', role: 'orchestrates the team', model: undefined }], @@ -121,7 +151,7 @@ describe('runConversation', () => { mockExistsSync.mockReturnValue(true); // Each lead turn produces non-convergent output but we set very low cost ceiling - mockExecSync.mockReturnValue('Still working on it.' as never); + mockSpawn.mockImplementation(() => makeMockChild('Still working on it.')); const squad = makeSquad({ agents: [{ name: 'squad-lead', role: 'orchestrates the team', model: undefined }], @@ -142,7 +172,7 @@ describe('runConversation', () => { mockExistsSync.mockReturnValue(true); // Each turn produces non-convergent output with no cost (free) - mockExecSync.mockImplementation(() => 'Still working on it.' as never); + mockSpawn.mockImplementation(() => makeMockChild('Still working on it.')); const squad = makeSquad({ agents: [{ name: 'squad-lead', role: 'orchestrates the team', model: undefined }], @@ -163,9 +193,24 @@ describe('runConversation', () => { mockExistsSync.mockReturnValue(true); const capturedPrompts: string[] = []; - mockExecSync.mockImplementation((cmd: string) => { - capturedPrompts.push(cmd); - return 'Session complete.' as never; + mockSpawn.mockImplementation(() => { + const stdoutHandlers: Array<(chunk: Buffer) => void> = []; + const closeHandlers: Array<(code: number) => void> = []; + return { + stdin: { + write: vi.fn((prompt: string) => { capturedPrompts.push(prompt); }), + end: vi.fn(() => { + process.nextTick(() => { + stdoutHandlers.forEach(l => l(Buffer.from('Session complete.'))); + closeHandlers.forEach(l => l(0)); + }); + }), + }, + stdout: { on: vi.fn((e: string, h: (c: Buffer) => void) => { if (e === 'data') stdoutHandlers.push(h); }) }, + stderr: { on: vi.fn() }, + on: vi.fn((e: string, h: (c: number) => void) => { if (e === 'close') closeHandlers.push(h); }), + kill: vi.fn(), + } as unknown as ReturnType; }); const squad = makeSquad({ @@ -190,7 +235,7 @@ describe('runConversation', () => { return false; }); - mockExecSync.mockReturnValue('Session complete.' as never); + mockSpawn.mockImplementation(() => makeMockChild('Session complete.')); const squad = makeSquad({ repo: 'agents-squads/squads-cli', @@ -206,7 +251,7 @@ describe('runConversation', () => { mockFindSquadsDir.mockReturnValue('/fake/.agents/squads'); mockExistsSync.mockReturnValue(true); - mockExecSync.mockReturnValue('Session complete.' as never); + mockSpawn.mockImplementation(() => makeMockChild('Session complete.')); const squad = makeSquad({ agents: [ diff --git a/test/telemetry.test.ts b/test/telemetry.test.ts index cea5cc1f..dcfa201b 100644 --- a/test/telemetry.test.ts +++ b/test/telemetry.test.ts @@ -47,6 +47,7 @@ describe('telemetry', () => { it('has command events', () => { expect(Events.CLI_RUN).toBe('cli.run'); + expect(Events.CLI_RUN_COMPLETE).toBe('cli.run.complete'); expect(Events.CLI_STATUS).toBe('cli.status'); expect(Events.CLI_DASHBOARD).toBe('cli.dashboard'); });