From 595cd05dde9566dc45fdd8028a128c5f6aee7670 Mon Sep 17 00:00:00 2001 From: Dean Sharon Date: Thu, 19 Mar 2026 09:49:17 +0200 Subject: [PATCH 1/3] feat(ambient): add agent orchestration to ambient mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace passive skill-only ambient mode with full agent orchestration. Two depth tiers: QUICK (zero overhead) and ORCHESTRATED (skills + agent pipelines). Three new orchestration skills drive intent-specific pipelines: - implementation-orchestration: pre-flight → Coder → quality gates - debug-orchestration: competing hypotheses → parallel Explores → root cause - plan-orchestration: Skimmer → Explores → Plan agent → gap validation Key changes: - Remove ELEVATE tier, replace GUIDED with ORCHESTRATED - Rename BUILD intent to IMPLEMENT for clarity - Skills loaded via Skill tool instead of Read (fixes broken loading) - Add TDD skill to Coder agent frontmatter permanently - Ambient plugin now includes 7 agents + 4 skills - Update ambient-prompt hook preamble for new tiers - Classification conservatism: default to QUICK Closes #84 (superseded by Skill tool approach). --- CLAUDE.md | 6 +- README.md | 4 +- .../.claude-plugin/plugin.json | 21 +++-- plugins/devflow-ambient/commands/ambient.md | 71 +++++++-------- scripts/hooks/ambient-prompt | 13 ++- shared/agents/coder.md | 2 +- shared/skills/ambient-router/SKILL.md | 68 ++++++++------ .../references/skill-catalog.md | 16 ++-- shared/skills/debug-orchestration/SKILL.md | 67 ++++++++++++++ .../implementation-orchestration/SKILL.md | 90 +++++++++++++++++++ shared/skills/plan-orchestration/SKILL.md | 69 ++++++++++++++ .../skills/test-driven-development/SKILL.md | 9 +- src/cli/commands/ambient.ts | 2 +- src/cli/plugins.ts | 6 +- 14 files changed, 348 insertions(+), 96 deletions(-) create mode 100644 shared/skills/debug-orchestration/SKILL.md create mode 100644 shared/skills/implementation-orchestration/SKILL.md create mode 100644 shared/skills/plan-orchestration/SKILL.md diff --git a/CLAUDE.md b/CLAUDE.md index d2c1f49..d2af6b4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -22,7 +22,7 @@ Plugin marketplace with 17 plugins (8 core + 9 optional language/ecosystem), eac | `devflow-resolve` | Review issue resolution | Optional | | `devflow-debug` | Competing hypothesis debugging | Optional | | `devflow-self-review` | Self-review (Simplifier + Scrutinizer) | No | -| `devflow-ambient` | Ambient mode — auto-loads relevant skills based on each prompt | No | +| `devflow-ambient` | Ambient mode — intent classification with agent orchestration | No | | `devflow-core-skills` | Auto-activating quality enforcement | No | | `devflow-audit-claude` | Audit CLAUDE.md files (optional) | No | | `devflow-typescript` | TypeScript language patterns (optional) | No | @@ -44,7 +44,7 @@ Commands with Teams Variant ship as `{name}.md` (parallel subagents) and `{name} ``` devflow/ -├── shared/skills/ # 32 skills (single source of truth) +├── shared/skills/ # 35 skills (single source of truth) ├── shared/agents/ # 10 shared agents (single source of truth) ├── plugins/devflow-*/ # 17 plugins (8 core + 9 optional language/ecosystem) ├── docs/reference/ # Detailed reference documentation @@ -113,7 +113,7 @@ Working memory files live in a dedicated `.memory/` directory: - `/resolve` — N Resolver agents + Git - `/debug` — Agent Teams competing hypotheses - `/self-review` — Simplifier then Scrutinizer (sequential) -- `/ambient` — Intent classification + proportional skill loading (no agents, main session only) +- `/ambient` — Intent classification + agent orchestration (IMPLEMENT/DEBUG/PLAN/REVIEW pipelines) - `/audit-claude` — CLAUDE.md audit (optional plugin) **Shared agents** (10): git, synthesizer, skimmer, simplifier, coder, reviewer, resolver, shepherd, scrutinizer, validator diff --git a/README.md b/README.md index 338a724..225d1f6 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ DevFlow adds structured commands that handle the full lifecycle: specify feature - **Full-lifecycle implementation** — spec, explore, plan, code, validate, refine in one command - **Automatic session memory** — survives restarts, `/clear`, and context compaction - **Parallel debugging** — competing hypotheses investigated simultaneously -- **32 quality skills** — 9 auto-activating core, 8 optional language/ecosystem, plus specialized review and agent skills +- **35 quality skills** — 9 auto-activating core, 8 optional language/ecosystem, plus specialized review, agent, and orchestration skills ## Quick Start @@ -48,7 +48,7 @@ Then in Claude Code: | `devflow-resolve` | `/resolve` | Process review issues — fix or defer to tech debt | | `devflow-debug` | `/debug` | Parallel hypothesis debugging | | `devflow-self-review` | `/self-review` | Self-review workflow (Simplifier + Scrutinizer) | -| `devflow-ambient` | `/ambient` | Ambient mode — auto-loads relevant skills based on each prompt | +| `devflow-ambient` | `/ambient` | Ambient mode — intent classification with agent orchestration | | `devflow-core-skills` | (auto) | Auto-activating quality enforcement skills | ## Command Details diff --git a/plugins/devflow-ambient/.claude-plugin/plugin.json b/plugins/devflow-ambient/.claude-plugin/plugin.json index 3aac1a3..b9a772b 100644 --- a/plugins/devflow-ambient/.claude-plugin/plugin.json +++ b/plugins/devflow-ambient/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "devflow-ambient", - "description": "Ambient mode — auto-loads relevant skills for every prompt", + "description": "Ambient mode — intent classification with proportional agent orchestration", "author": { "name": "Dean0x" }, @@ -12,11 +12,22 @@ "ambient", "intent", "classification", - "skills", - "auto-load" + "orchestration", + "agents" + ], + "agents": [ + "coder", + "validator", + "simplifier", + "scrutinizer", + "shepherd", + "skimmer", + "reviewer" ], - "agents": [], "skills": [ - "ambient-router" + "ambient-router", + "implementation-orchestration", + "debug-orchestration", + "plan-orchestration" ] } diff --git a/plugins/devflow-ambient/commands/ambient.md b/plugins/devflow-ambient/commands/ambient.md index f599cbe..9e24332 100644 --- a/plugins/devflow-ambient/commands/ambient.md +++ b/plugins/devflow-ambient/commands/ambient.md @@ -4,12 +4,12 @@ description: Ambient mode — classify intent and auto-load relevant skills for # Ambient Command -Classify user intent and auto-load relevant skills. No agents spawned — enhances the main session only. +Classify user intent and respond with proportional effort — zero overhead for simple requests, skill loading + agent orchestration for substantive work. ## Usage ``` -/ambient Classify and respond with skill enforcement +/ambient Classify, load skills, orchestrate agents if needed /ambient Show usage ``` @@ -24,23 +24,24 @@ Read the `ambient-router` skill: Apply the ambient-router classification to `$ARGUMENTS`: -1. **Intent:** BUILD | DEBUG | REVIEW | PLAN | EXPLORE | CHAT -2. **Depth:** QUICK | GUIDED | ELEVATE +1. **Intent:** IMPLEMENT | DEBUG | REVIEW | PLAN | EXPLORE | CHAT +2. **Depth:** QUICK | ORCHESTRATED If no arguments provided, output: ``` ## Ambient Mode -Classify intent and auto-load relevant skills. +Classify intent and auto-load relevant skills with agent orchestration. Usage: /ambient Examples: - /ambient add a login form → BUILD/GUIDED (loads TDD + implementation-patterns) - /ambient fix the auth error → DEBUG/GUIDED (loads test-patterns + core-patterns) + /ambient add a login form → IMPLEMENT/ORCHESTRATED (Coder + quality gates) + /ambient fix the auth error → DEBUG/ORCHESTRATED (parallel hypothesis investigation) + /ambient how should we cache? → PLAN/ORCHESTRATED (Skimmer + Explore + Plan agents) /ambient where is the config? → EXPLORE/QUICK (responds normally) - /ambient refactor the auth system → BUILD/ELEVATE (suggests /implement) + /ambient commit this → QUICK (no overhead) Always-on: devflow ambient --enable ``` @@ -50,45 +51,40 @@ Then stop. ### Phase 3: State Classification - **QUICK:** Skip this phase entirely. Respond directly in Phase 4. -- **GUIDED:** Output one line: `Ambient: {INTENT}/{DEPTH}. Loading: {skill1}, {skill2}.` -- **ELEVATE:** Skip — recommendation happens in Phase 4. +- **ORCHESTRATED:** Output one line: `Ambient: {INTENT}/ORCHESTRATED. Loading: {skill1}, {skill2}.` ### Phase 4: Apply **QUICK:** Respond to the user's prompt normally. Zero skill loading. Zero overhead. -**GUIDED:** -Read the selected skills based on the ambient-router's skill selection matrix: +**ORCHESTRATED:** +Invoke each selected skill using the Skill tool based on the ambient-router's skill selection matrix: -| Intent | Primary Skills | Secondary (conditional) | -|--------|---------------|------------------------| -| BUILD | test-driven-development, implementation-patterns | typescript (.ts), react (.tsx), frontend-design (CSS/UI), input-validation (forms/API), security-patterns (auth/crypto) | -| DEBUG | test-patterns, core-patterns | git-safety (if git ops) | -| REVIEW | self-review, core-patterns | test-patterns | -| PLAN | implementation-patterns | core-patterns | +| Intent | Skills Loaded | Agent Pipeline | +|--------|--------------|----------------| +| IMPLEMENT | implementation-orchestration, implementation-patterns | Pre-flight → Coder → Validator → Simplifier → Scrutinizer → Shepherd | +| DEBUG | debug-orchestration, core-patterns | Hypotheses → parallel Explores → convergence → report → offer fix | +| PLAN | plan-orchestration, implementation-patterns, core-patterns | Skimmer → Explores → Plan agent → gap validation | +| REVIEW | self-review, core-patterns | Single Reviewer agent with focus from prompt | -Read up to 3 skills from `~/.claude/skills/{name}/SKILL.md`. Apply their patterns and constraints when responding to the user's prompt. - -For BUILD intent: enforce RED-GREEN-REFACTOR from test-driven-development. Write failing tests before production code. - -**ELEVATE:** -Respond to the user's prompt with your best effort, then append: - -> This task spans multiple files/systems. Consider `/implement` for full lifecycle management (exploration → planning → implementation → review). +After loading skills, follow the orchestration skill's pipeline (Step 5 of ambient-router). ## Architecture ``` -/ambient (main session, no agents) +/ambient │ ├─ Phase 1: Load ambient-router skill ├─ Phase 2: Classify intent + depth -├─ Phase 3: State classification (GUIDED only) +├─ Phase 3: State classification (ORCHESTRATED only) └─ Phase 4: Apply - ├─ QUICK → respond directly - ├─ GUIDED → load 2-3 skills, apply patterns, respond - └─ ELEVATE → respond + workflow nudge + ├─ QUICK → respond directly (no agents) + └─ ORCHESTRATED → load skills via Skill tool → orchestrate agents + ├─ IMPLEMENT → implementation-orchestration pipeline + ├─ DEBUG → debug-orchestration pipeline + ├─ PLAN → plan-orchestration pipeline + └─ REVIEW → single Reviewer agent ``` ## Edge Cases @@ -98,13 +94,14 @@ Respond to the user's prompt with your best effort, then append: | No arguments | Show usage and stop | | Single word ("help") | Classify — likely CHAT/QUICK | | Prompt references `/implement` etc. | Classify as normal — user chose /ambient intentionally | -| Mixed intent ("fix and add test") | Use higher-overhead intent (BUILD > DEBUG) | +| Mixed intent ("fix and add test") | Use higher-overhead intent (IMPLEMENT > DEBUG) | | User says "no enforcement" | Respect immediately — treat as QUICK | +| Multiple ORCHESTRATED triggers per session | Each runs independently; context compaction handles accumulation | ## Principles -1. **No agents** — Ambient enhances the main session, never spawns subagents -2. **Proportional** — QUICK gets zero overhead, GUIDED gets 2-3 skills, ELEVATE gets a nudge -3. **Transparent** — State classification for GUIDED/ELEVATE, silent for QUICK -4. **Respectful** — Never over-classify; when in doubt, go one tier lower -5. **TDD for BUILD** — GUIDED depth BUILD tasks enforce test-first workflow +1. **Agents for ORCHESTRATED, main session for QUICK** — proportional response +2. **Skill tool for loading** — invoke skills via Skill tool, not Read +3. **Conservative classification** — default to QUICK; ORCHESTRATED has real agent cost +4. **Transparent** — state classification for ORCHESTRATED, silent for QUICK +5. **Respectful** — never over-classify; when in doubt, QUICK diff --git a/scripts/hooks/ambient-prompt b/scripts/hooks/ambient-prompt index 57cfc7a..3d61a95 100755 --- a/scripts/hooks/ambient-prompt +++ b/scripts/hooks/ambient-prompt @@ -32,13 +32,18 @@ fi # Inject classification preamble PREAMBLE="AMBIENT MODE ACTIVE: Before responding, silently classify this prompt: -Intent: BUILD | DEBUG | REVIEW | PLAN | EXPLORE | CHAT -Depth: QUICK (no overhead) | GUIDED (load skills) | ELEVATE (suggest /command) +Intent: IMPLEMENT | DEBUG | REVIEW | PLAN | EXPLORE | CHAT +Depth: QUICK (no overhead) | ORCHESTRATED (skills + agent orchestration) -If GUIDED+: Use the ambient-router skill already in your session context for classification details and skill selection matrix. Then Read the selected skills from ~/.claude/skills//SKILL.md and apply their patterns. For BUILD tasks, also load test-driven-development skill and enforce RED-GREEN-REFACTOR. +Classification conservatism: default to QUICK. Only classify ORCHESTRATED when the prompt +has clear task scope. Small edits, config changes, git ops, exploratory questions → QUICK. + +If ORCHESTRATED: Use the ambient-router skill already in your session context. +BLOCKING REQUIREMENT: Invoke each selected skill using the Skill tool before proceeding. +Then orchestrate agents per the ambient-router Step 5. If QUICK: Respond normally without stating classification. -Only state classification aloud for GUIDED/ELEVATE." +Only state classification aloud for ORCHESTRATED." jq -n --arg ctx "$PREAMBLE" '{ "hookSpecificOutput": { diff --git a/shared/agents/coder.md b/shared/agents/coder.md index 962dc7e..b9c8d0f 100644 --- a/shared/agents/coder.md +++ b/shared/agents/coder.md @@ -2,7 +2,7 @@ name: Coder description: Autonomous task implementation on feature branch. Implements, tests, and commits. model: inherit -skills: core-patterns, git-safety, implementation-patterns, git-workflow, test-patterns, input-validation +skills: core-patterns, git-safety, implementation-patterns, git-workflow, test-patterns, test-driven-development, input-validation --- # Coder Agent diff --git a/shared/skills/ambient-router/SKILL.md b/shared/skills/ambient-router/SKILL.md index e040528..b89cc2b 100644 --- a/shared/skills/ambient-router/SKILL.md +++ b/shared/skills/ambient-router/SKILL.md @@ -7,16 +7,16 @@ allowed-tools: Read, Grep, Glob # Ambient Router -Classify user intent and auto-load relevant skills. Zero overhead for simple requests, skill injection for substantive work, workflow nudges for complex tasks. +Classify user intent and auto-load relevant skills. Zero overhead for simple requests, skill loading + agent orchestration for substantive work. ## Iron Law -> **PROPORTIONAL RESPONSE** +> **ORCHESTRATED GETS SKILLS + AGENT ORCHESTRATION MATCHED TO INTENT** > -> Match effort to intent. Never apply heavyweight processes to lightweight requests. -> A chat question gets zero overhead. A 3-file feature gets 2-3 skills. A system -> refactor gets a nudge toward `/implement`. Misclassification in either direction -> is a failure. +> QUICK gets zero overhead. ORCHESTRATED gets full skill loading via the Skill tool plus +> agent pipeline execution. Misclassification in either direction is a failure — +> false-positive ORCHESTRATED is expensive (5-6 agent spawns), false-negative +> ORCHESTRATED leaves quality on the table. --- @@ -26,14 +26,14 @@ Determine what the user is trying to do from their prompt. | Intent | Signal Words / Patterns | Examples | |--------|------------------------|---------| -| **BUILD** | "add", "create", "implement", "build", "write", "make" | "add a login form", "create an API endpoint" | +| **IMPLEMENT** | "add", "create", "implement", "build", "write", "make" | "add a login form", "create an API endpoint" | | **DEBUG** | "fix", "bug", "broken", "failing", "error", "why does" | "fix the auth error", "why is this test failing" | | **REVIEW** | "check", "look at", "review", "is this ok", "any issues" | "check this function", "any issues with this?" | | **PLAN** | "how should", "design", "architecture", "approach", "strategy" | "how should I structure auth?", "what's the approach for caching?" | | **EXPLORE** | "what is", "where is", "find", "show me", "explain", "how does" | "where is the config?", "explain this function" | | **CHAT** | greetings, meta-questions, confirmations, short responses | "thanks", "yes", "what can you do?" | -**Ambiguous prompts:** Default to the lowest-overhead classification. "Update the README" → BUILD/GUIDED. Git operations like "commit this" → QUICK. +**Ambiguous prompts:** Default to the lowest-overhead classification. "Update the README" → QUICK. Git operations like "commit this" → QUICK. ## Step 2: Classify Depth @@ -41,53 +41,67 @@ Determine how much enforcement the prompt warrants. | Depth | Criteria | Action | |-------|----------|--------| -| **QUICK** | CHAT intent. EXPLORE with no analytical depth ("where is X?"). Git/devops operations (commit, push, merge, branch, pr, deploy, reinstall). Single-word continuations. | Respond normally. Zero overhead. Do not state classification. | -| **GUIDED** | BUILD/DEBUG/REVIEW/PLAN intent (any word count). EXPLORE with analytical depth ("analyze our X", "discuss how Y works"). | Read and apply 2-3 relevant skills from the selection matrix below. State classification briefly. | -| **ELEVATE** | Multi-file architectural change, system-wide scope, > 5 files. Detailed implementation plan (100+ words with plan structure). | Respond at best effort + recommend: "This looks like it would benefit from `/implement` for full lifecycle management." | +| **QUICK** | CHAT intent. EXPLORE intent. PLAN discussions without clear deliverable. Git/devops operations (commit, push, merge, branch, pr, deploy, reinstall). Single-word continuations. Small edits, config changes. | Respond normally. Zero overhead. Do not state classification. | +| **ORCHESTRATED** | IMPLEMENT with clear, scoped task. DEBUG with specific bug/error. PLAN with clear deliverable ("design the caching layer"). REVIEW with code to review. Multi-file changes with defined scope. | Load skills via Skill tool, then orchestrate agents per Step 5. State classification. | -## Step 3: Select Skills (GUIDED depth only) +**Classification conservatism:** Default to QUICK. Only classify ORCHESTRATED when the prompt has clear task scope. ORCHESTRATED triggers agent spawning which has real cost. When in doubt, QUICK. -Based on classified intent, read the following skills to inform your response. +## Step 3: Select Skills (ORCHESTRATED depth only) + +Based on classified intent, invoke each selected skill using the Skill tool. | Intent | Primary Skills | Secondary (if file type matches) | |--------|---------------|----------------------------------| -| **BUILD** | test-driven-development, implementation-patterns, search-first | typescript (.ts), react (.tsx/.jsx), go (.go), java (.java), python (.py), rust (.rs), frontend-design (CSS/UI), input-validation (forms/API), security-patterns (auth/crypto) | -| **DEBUG** | test-patterns, core-patterns | git-safety (if git operations involved) | +| **IMPLEMENT** | implementation-orchestration, implementation-patterns | typescript (.ts), react (.tsx/.jsx), go (.go), java (.java), python (.py), rust (.rs), frontend-design (CSS/UI), input-validation (forms/API), security-patterns (auth/crypto) | +| **DEBUG** | debug-orchestration, core-patterns | git-safety (if git operations involved) | +| **PLAN** | plan-orchestration, implementation-patterns, core-patterns | — | | **REVIEW** | self-review, core-patterns | test-patterns | -| **PLAN** | implementation-patterns | core-patterns | -**Excluded from ambient** (review-command-only): review-methodology, complexity-patterns, consistency-patterns, database-patterns, dependencies-patterns, documentation-patterns, regression-patterns, architecture-patterns, accessibility. +**Excluded from ambient** (review-command-only): review-methodology, complexity-patterns, consistency-patterns, database-patterns, dependencies-patterns, documentation-patterns, regression-patterns, architecture-patterns, accessibility, performance-patterns. See `references/skill-catalog.md` for the full skill-to-intent mapping with file pattern triggers. ## Step 4: Apply -When classification is GUIDED or ELEVATE, skill application is NON-NEGOTIABLE. +When classification is ORCHESTRATED, skill loading is NON-NEGOTIABLE. Do not rationalize skipping skills. Do not respond without loading them first. -If test-driven-development is selected, you MUST write the failing test before ANY production code. +BLOCKING REQUIREMENT: Invoke each selected skill using the Skill tool before proceeding. - **QUICK:** Respond directly. No preamble, no classification statement. -- **GUIDED:** State classification briefly: `Ambient: BUILD/GUIDED. Loading: test-driven-development, implementation-patterns.` Then read the selected skills and apply their patterns. No exceptions. -- **ELEVATE:** Respond with your best effort, then append: `> This task spans multiple files/systems. Consider \`/implement\` for full lifecycle.` +- **ORCHESTRATED:** State classification briefly: `Ambient: IMPLEMENT/ORCHESTRATED. Loading: implementation-orchestration, implementation-patterns.` Then invoke each skill using the Skill tool and follow Step 5 for agent orchestration. + +## Step 5: Orchestrate Agents (ORCHESTRATED depth only) + +After loading skills via Step 3-4, execute the agent pipeline for the classified intent: + +| Intent | Pipeline | +|--------|----------| +| **IMPLEMENT** | Follow implementation-orchestration skill pipeline: pre-flight → plan synthesis → Coder → quality gates | +| **DEBUG** | Follow debug-orchestration skill pipeline: hypotheses → parallel Explores → convergence → report → offer fix | +| **PLAN** | Follow plan-orchestration skill pipeline: Skimmer → Explores → Plan agent → gap validation | +| **REVIEW** | Spawn single Reviewer agent with focus derived from prompt (e.g., "check security" → security-patterns focus; vague prompt → general review across all pillars) | +| **EXPLORE** | No agents — respond in main session | +| **CHAT** | No agents — respond in main session | --- ## Transparency Rules 1. **QUICK → silent.** No classification output. -2. **GUIDED → brief statement + full skill enforcement.** One line: intent, depth, skills loaded. Then follow every skill requirement without shortcuts. -3. **ELEVATE → recommendation.** Best-effort response + workflow nudge. -4. **Never lie about classification.** If uncertain, say so. -5. **Never over-classify.** When in doubt, go one tier lower. -6. **Never under-apply.** Rationalization is the enemy of quality. If a skill requires a step, do the step. +2. **ORCHESTRATED → brief statement + full skill enforcement + agent orchestration.** One line: intent, depth, skills loaded. Then follow every skill requirement and orchestrate agents per Step 5. +3. **Never lie about classification.** If uncertain, say so. +4. **Never over-classify.** When in doubt, QUICK. +5. **Never under-apply.** Rationalization is the enemy of quality. If a skill requires a step, do the step. ## Edge Cases | Case | Handling | |------|----------| -| Mixed intent ("fix this bug and add a test") | Use the higher-overhead intent (BUILD > DEBUG) | +| Mixed intent ("fix this bug and add a test") | Use the higher-overhead intent (IMPLEMENT > DEBUG) | | Continuation of previous conversation | Inherit previous classification unless prompt clearly shifts | | User explicitly requests no enforcement | Respect immediately — classify as QUICK | | Prompt references specific DevFlow command | Skip ambient — the command has its own orchestration | +| Multi-file change with clear scope | ORCHESTRATED | +| Multiple ORCHESTRATED triggers per session | Each runs independently; context compaction handles accumulation | diff --git a/shared/skills/ambient-router/references/skill-catalog.md b/shared/skills/ambient-router/references/skill-catalog.md index 652d1bd..a69521e 100644 --- a/shared/skills/ambient-router/references/skill-catalog.md +++ b/shared/skills/ambient-router/references/skill-catalog.md @@ -4,15 +4,14 @@ Full mapping of DevFlow skills to ambient intents and file-type triggers. The am ## Skills Available for Ambient Loading -These skills may be loaded during GUIDED-depth ambient routing. +These skills may be loaded during ORCHESTRATED-depth ambient routing. -### BUILD Intent +### IMPLEMENT Intent | Skill | When to Load | File Patterns | |-------|-------------|---------------| -| test-driven-development | Always for BUILD | `*.ts`, `*.tsx`, `*.js`, `*.jsx`, `*.py` | -| implementation-patterns | Always for BUILD | Any code file | -| search-first | Always for BUILD | Any code file | +| implementation-orchestration | Always for IMPLEMENT | Any — orchestrates agent pipeline | +| implementation-patterns | Always for IMPLEMENT | Any code file | | typescript | TypeScript files in scope | `*.ts`, `*.tsx` | | react | React components in scope | `*.tsx`, `*.jsx` | | frontend-design | UI/styling work | `*.css`, `*.scss`, `*.tsx` with styling keywords | @@ -27,7 +26,7 @@ These skills may be loaded during GUIDED-depth ambient routing. | Skill | When to Load | File Patterns | |-------|-------------|---------------| -| test-patterns | Always for DEBUG | Any test-related context | +| debug-orchestration | Always for DEBUG | Any — orchestrates investigation pipeline | | core-patterns | Always for DEBUG | Any code file | | git-safety | Git operations involved | User mentions git, rebase, merge, etc. | @@ -43,6 +42,7 @@ These skills may be loaded during GUIDED-depth ambient routing. | Skill | When to Load | File Patterns | |-------|-------------|---------------| +| plan-orchestration | Always for PLAN | Any — orchestrates design pipeline | | implementation-patterns | Always for PLAN | Any planning context | | core-patterns | Architectural planning | System design discussions | @@ -63,7 +63,7 @@ These skills are loaded only by explicit DevFlow commands (primarily `/code-revi ## Selection Limits -- **Maximum 3 skills** per ambient response (primary + up to 2 secondary) +- **Maximum 3 knowledge skills** per ambient response (primary + up to 2 secondary) +- **Orchestration skills** (implementation-orchestration, debug-orchestration, plan-orchestration) are loaded in addition to knowledge skills — they don't count toward the limit - **Primary skills** are always loaded for the classified intent - **Secondary skills** are loaded only when file patterns match conversation context -- If more than 3 skills seem relevant, this is an ELEVATE signal diff --git a/shared/skills/debug-orchestration/SKILL.md b/shared/skills/debug-orchestration/SKILL.md new file mode 100644 index 0000000..e29fe8f --- /dev/null +++ b/shared/skills/debug-orchestration/SKILL.md @@ -0,0 +1,67 @@ +--- +name: debug-orchestration +description: Agent orchestration for DEBUG intent — hypothesis investigation, root cause analysis, optional fix +user-invocable: false +allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion +--- + +# Debug Orchestration + +Agent pipeline for DEBUG intent in ambient ORCHESTRATED mode. Competing hypothesis investigation, parallel evidence gathering, convergence validation, and optional fix. + +## Iron Law + +> **COMPETING HYPOTHESES BEFORE CONCLUSIONS** +> +> Never investigate a single theory. Generate 3-5 distinct hypotheses, investigate them +> in parallel, and let evidence determine the root cause. Confirmation bias is the enemy +> of debugging — multiple hypotheses are the antidote. + +--- + +## Phase 1: Hypothesize + +Analyze the bug description, error messages, and conversation context. Generate 3-5 hypotheses that are: + +- **Specific**: Points to a concrete mechanism (not "something is wrong with auth") +- **Testable**: Can be confirmed or disproved by examining specific files/logs +- **Distinct**: Each hypothesis proposes a different root cause + +If fewer than 3 hypotheses are possible, proceed with 2. + +## Phase 2: Investigate (Parallel) + +Spawn one Explore agent per hypothesis **in a single message** (parallel execution): + +- Each investigator searches for evidence FOR and AGAINST its hypothesis +- Must provide file:line references for all evidence +- Returns verdict: **CONFIRMED** | **DISPROVED** | **PARTIAL** (some evidence supports, some contradicts) + +## Phase 3: Converge + +Evaluate investigation results: + +- **One CONFIRMED**: Spawn 1-2 additional Explore agents to validate from different angles (prevent confirmation bias) +- **Multiple PARTIAL**: Look for a unifying root cause that explains all partial evidence +- **All DISPROVED**: Report honestly — "No root cause identified from initial hypotheses." Generate 2-3 second-round hypotheses if conversation context suggests avenues not yet explored. + +## Phase 4: Report + +Present root cause analysis: + +- **Confidence level**: HIGH (confirmed + validated) | MEDIUM (partial convergence) | LOW (best guess from incomplete evidence) +- **Evidence table**: Hypothesis → verdict → key evidence (file:line) +- **Root cause**: Clear statement of what's wrong and why +- **Recommended fix**: Specific changes with file references + +## Phase 5: Offer Fix + +Ask user via AskUserQuestion: "Want me to implement this fix?" + +- **YES** → Run the implementation-orchestration pipeline (load it via Skill tool): pre-flight → Coder → quality gates. The fix description becomes the EXECUTION_PLAN. +- **NO** → Done. Report stands as documentation. + +## Error Handling + +- **All hypotheses disproved, no second-round ideas**: Report "No root cause identified" with summary of what was investigated and ruled out +- **Explore agents return insufficient evidence**: Report LOW confidence with available evidence, suggest manual investigation areas diff --git a/shared/skills/implementation-orchestration/SKILL.md b/shared/skills/implementation-orchestration/SKILL.md new file mode 100644 index 0000000..cb1e1ef --- /dev/null +++ b/shared/skills/implementation-orchestration/SKILL.md @@ -0,0 +1,90 @@ +--- +name: implementation-orchestration +description: Agent orchestration for IMPLEMENT intent — pre-flight, Coder, quality gates +user-invocable: false +allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion +--- + +# Implementation Orchestration + +Agent pipeline for IMPLEMENT intent in ambient ORCHESTRATED mode. Pre-flight checks, plan synthesis, Coder execution, and quality gates. + +## Iron Law + +> **QUALITY GATES ARE NON-NEGOTIABLE** +> +> Every Coder output passes through Validator → Simplifier → Scrutinizer → re-Validate → Shepherd. +> Skipping a gate because "it looks fine" is never acceptable. The pipeline runs to completion +> or halts on failure — there is no shortcut. + +--- + +## Phase 1: Pre-flight — Branch Safety + +Detect branch type before spawning Coder: + +- **Work branches** (`feat/`, `fix/`, `chore/`, `refactor/`, `docs/` prefix): proceed on current branch. +- **Protected branches** (`main`, `master`, `develop`, `release/*`, `staging`, `production`): ask user via AskUserQuestion with 2-3 suggested branch names following `{type}/{ticket}-{slug}` convention. Include ticket number if available from conversation context. +- **If user declines branch creation**: proceed on the protected branch. Respect the user's choice. + +## Phase 2: Plan Synthesis + +Synthesize conversation context into a structured EXECUTION_PLAN for Coder: + +- **If a plan exists** in conversation context (from plan mode — accepted in-session or injected after "accept and clear") → use the plan as-is. +- **Otherwise** → synthesize from conversation: what to build, files/modules affected, constraints, decisions made during discussion. + +Format as structured markdown with: Goal, Steps, Files, Constraints, Decisions. + +## Phase 3: Coder Execution + +Record git SHA before first Coder: `git rev-parse HEAD` + +Spawn Coder agent with input variables: +- **TASK_ID**: Generated from timestamp (e.g., `task-2026-03-19_1430`) +- **TASK_DESCRIPTION**: From conversation context +- **BASE_BRANCH**: Current branch (or newly created branch from Phase 1) +- **EXECUTION_PLAN**: From Phase 2 +- **PATTERNS**: Codebase patterns from conversation context +- **CREATE_PR**: `false` (commit only, no push) +- **DOMAIN**: Inferred from files in scope (`backend`, `frontend`, `tests`, `fullstack`) + +**Execution strategy**: Single sequential Coder by default. Parallel Coders only when tasks are self-contained — zero shared contracts, no integration points, different files/modules with no imports between them. + +If Coder returns **BLOCKED**, halt the pipeline and report to user. + +## Phase 4: FILES_CHANGED Detection + +After Coder completes, detect changed files: + +```bash +git diff --name-only {starting_sha}...HEAD +``` + +Pass FILES_CHANGED to all quality gate agents. + +## Phase 5: Quality Gates + +Run sequentially — each gate must pass before the next: + +1. **Validator** (build + typecheck + lint + tests) — retry up to 2× on failure (Coder fixes between retries) +2. **Simplifier** — code clarity and maintainability pass on FILES_CHANGED +3. **Scrutinizer** — 9-pillar quality evaluation on FILES_CHANGED +4. **Validator** (re-validate after Simplifier/Scrutinizer changes) +5. **Shepherd** — verify implementation matches original request — retry up to 2× if misalignment found + +If any gate exhausts retries, halt pipeline and report what passed and what failed. + +## Phase 6: Completion + +Report results: +- Commits created (from Coder) +- Files changed +- Quality gate results (pass/fail per gate) +- No push — user decides when to push + +## Error Handling + +- **Coder BLOCKED**: Halt immediately, report blocker to user +- **Validator fails after retries**: Report specific failures, halt pipeline +- **Shepherd misalignment after retries**: Report misalignment details, let user decide next steps diff --git a/shared/skills/plan-orchestration/SKILL.md b/shared/skills/plan-orchestration/SKILL.md new file mode 100644 index 0000000..4003a56 --- /dev/null +++ b/shared/skills/plan-orchestration/SKILL.md @@ -0,0 +1,69 @@ +--- +name: plan-orchestration +description: Agent orchestration for PLAN intent — codebase orientation, design exploration, gap validation +user-invocable: false +allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion +--- + +# Plan Orchestration + +Agent pipeline for PLAN intent in ambient ORCHESTRATED mode. Codebase orientation, targeted exploration, architecture design, and gap validation. + +## Iron Law + +> **PLANS WITHOUT CODEBASE GROUNDING ARE FANTASIES** +> +> Orient before architecting. Every design decision must reference existing patterns, +> real file structures, and actual integration points. A plan that ignores the codebase +> will fail on contact with implementation. + +--- + +## Phase 1: Orient + +Spawn Skimmer agent to get codebase overview relevant to the planning question: + +- Existing patterns and conventions in the affected area +- File structure and module boundaries +- Test patterns and coverage approach +- Related prior implementations (similar features, analogous patterns) + +## Phase 2: Explore + +Based on Skimmer findings, spawn 2-3 Explore agents **in a single message** (parallel execution): + +- **Integration explorer**: Examine integration points — APIs, shared types, module boundaries the plan must respect +- **Pattern explorer**: Find existing implementations of similar features to follow as templates +- **Constraint explorer**: Identify constraints — test infrastructure, build system, CI requirements, deployment concerns + +Adjust explorer focus based on the specific planning question. + +## Phase 3: Design + +Spawn Plan agent with combined Skimmer + Explore findings: + +- Design implementation approach with file-level specificity +- Reference existing patterns discovered in Phase 1-2 +- Include: architecture decisions, file changes, new files needed, test strategy +- Flag any areas where existing patterns conflict with the proposed approach + +## Phase 4: Validate + +Main session reviews the plan for: + +- **Gaps**: Missing files, unhandled edge cases, integration points not addressed +- **Risks**: Areas where the plan deviates from existing patterns, potential regressions +- **Ambiguities**: Design choices that need user input + +Present plan to user with identified risks. Use AskUserQuestion for any ambiguous design choices. + +## Output + +Structured plan ready to feed into IMPLEMENT/ORCHESTRATED if user proceeds: + +- Goal and scope +- Architecture decisions with rationale +- File-level change list (create/modify/delete) +- Test strategy +- Risks and mitigations +- Open questions (if any) diff --git a/shared/skills/test-driven-development/SKILL.md b/shared/skills/test-driven-development/SKILL.md index c60180c..4524457 100644 --- a/shared/skills/test-driven-development/SKILL.md +++ b/shared/skills/test-driven-development/SKILL.md @@ -91,7 +91,7 @@ See `references/rationalization-prevention.md` for extended examples with code. ## Process Enforcement -When implementing any feature under ambient BUILD/GUIDED: +When implementing any feature under ambient IMPLEMENT/ORCHESTRATED: 1. **Identify the first behavior** — What is the simplest thing this feature must do? 2. **Write the test** — Describe that behavior as a failing test @@ -130,7 +130,6 @@ When skipping TDD, never rationalize. State clearly: "Skipping TDD because: [spe ## Integration with Ambient Mode -- **BUILD/GUIDED** → TDD enforced. Every new function/method gets test-first treatment. -- **BUILD/QUICK** → TDD skipped (trivial single-file edit). -- **BUILD/ELEVATE** → TDD mentioned in nudge toward `/implement`. -- **DEBUG/GUIDED** → TDD applies to the fix: write a test that reproduces the bug first, then fix. +- **IMPLEMENT/ORCHESTRATED** → TDD enforced via Coder agent (skill in Coder frontmatter). Every implementation gets test-first treatment. +- **IMPLEMENT/QUICK** → TDD skipped (trivial single-file edit). +- **DEBUG/ORCHESTRATED** → TDD applies to the fix: write a test that reproduces the bug first, then fix. diff --git a/src/cli/commands/ambient.ts b/src/cli/commands/ambient.ts index c9a7510..4df1f31 100644 --- a/src/cli/commands/ambient.ts +++ b/src/cli/commands/ambient.ts @@ -168,7 +168,7 @@ export const ambientCommand = new Command('ambient') } await fs.writeFile(settingsPath, updated, 'utf-8'); p.log.success('Ambient mode enabled — UserPromptSubmit hook registered'); - p.log.info(color.dim('Relevant skills will now auto-load based on each prompt')); + p.log.info(color.dim('Skills auto-load and agents orchestrate based on each prompt')); } if (options.disable) { diff --git a/src/cli/plugins.ts b/src/cli/plugins.ts index 34f2531..9e15c23 100644 --- a/src/cli/plugins.ts +++ b/src/cli/plugins.ts @@ -70,10 +70,10 @@ export const DEVFLOW_PLUGINS: PluginDefinition[] = [ }, { name: 'devflow-ambient', - description: 'Ambient mode — auto-loads relevant skills for every prompt', + description: 'Ambient mode — intent classification with proportional agent orchestration', commands: ['/ambient'], - agents: [], - skills: ['ambient-router'], + agents: ['coder', 'validator', 'simplifier', 'scrutinizer', 'shepherd', 'skimmer', 'reviewer'], + skills: ['ambient-router', 'implementation-orchestration', 'debug-orchestration', 'plan-orchestration'], }, { name: 'devflow-audit-claude', From 15849ce7c300452af2f7f4bb6c1e23fef32d64f8 Mon Sep 17 00:00:00 2001 From: Dean Sharon Date: Thu, 19 Mar 2026 18:38:22 +0200 Subject: [PATCH 2/3] fix(ambient): three-tier model, search-first on Coder, debug agent budget MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code review fixes for PR #149: - Reinstate GUIDED as middle tier: QUICK / GUIDED / ORCHESTRATED - GUIDED: small scope (≤2 files), main session + skills + Simplifier - ORCHESTRATED: large scope (>2 files), full agent pipeline - Scope-based split per intent (IMPLEMENT, DEBUG, PLAN, REVIEW) - Add search-first to Coder agent permanent skills - Add hard cap of 8 Explore agents for DEBUG pipeline - Fix stale integration tests: BUILD→IMPLEMENT, ELEVATE→ORCHESTRATED - Update plugin README, ambient command, hook preamble for three tiers - Add CHANGELOG entry under [Unreleased] --- CHANGELOG.md | 8 +++ CLAUDE.md | 2 +- plugins/devflow-ambient/README.md | 51 ++++++++++---- plugins/devflow-ambient/commands/ambient.md | 49 +++++++++---- scripts/hooks/ambient-prompt | 13 ++-- shared/agents/coder.md | 2 +- shared/skills/ambient-router/SKILL.md | 69 ++++++++++++++----- .../references/skill-catalog.md | 66 +++++++++--------- shared/skills/debug-orchestration/SKILL.md | 14 +++- .../skills/test-driven-development/SKILL.md | 4 +- tests/integration/ambient-activation.test.ts | 12 ++-- tests/integration/helpers.ts | 8 +-- 12 files changed, 201 insertions(+), 97 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 067b993..9e6cbf0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed +- **Ambient mode**: Three depth tiers (QUICK/GUIDED/ORCHESTRATED) replacing old QUICK/GUIDED/ELEVATE +- **Ambient mode**: ORCHESTRATED tier spawns agent pipelines for IMPLEMENT, DEBUG, PLAN intents +- **Ambient mode**: GUIDED tier for small-scope IMPLEMENT (≤2 files), simple DEBUG, focused PLAN, and REVIEW — main session with skills + Simplifier +- **Ambient mode**: BUILD intent renamed to IMPLEMENT for clarity +- **Coder agent**: Added `test-driven-development` and `search-first` to permanent skills +- **Debug pipeline**: Agent budget capped at 8 Explore agents total + --- ## [1.5.0] - 2026-03-13 diff --git a/CLAUDE.md b/CLAUDE.md index d2af6b4..b86f091 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -22,7 +22,7 @@ Plugin marketplace with 17 plugins (8 core + 9 optional language/ecosystem), eac | `devflow-resolve` | Review issue resolution | Optional | | `devflow-debug` | Competing hypothesis debugging | Optional | | `devflow-self-review` | Self-review (Simplifier + Scrutinizer) | No | -| `devflow-ambient` | Ambient mode — intent classification with agent orchestration | No | +| `devflow-ambient` | Ambient mode — three-tier intent classification (QUICK/GUIDED/ORCHESTRATED) | No | | `devflow-core-skills` | Auto-activating quality enforcement | No | | `devflow-audit-claude` | Audit CLAUDE.md files (optional) | No | | `devflow-typescript` | TypeScript language patterns (optional) | No | diff --git a/plugins/devflow-ambient/README.md b/plugins/devflow-ambient/README.md index 6fcc701..fdcb9d8 100644 --- a/plugins/devflow-ambient/README.md +++ b/plugins/devflow-ambient/README.md @@ -1,18 +1,21 @@ # devflow-ambient -Ambient mode — auto-loads relevant skills based on each prompt, no explicit commands needed. +Ambient mode — auto-classifies intent and applies proportional skill enforcement with optional agent orchestration. ## Command ### `/ambient` -Classify user intent and apply proportional skill enforcement to any prompt. +Classify user intent and apply proportional enforcement to any prompt. ```bash -/ambient add a login form # BUILD/GUIDED — loads TDD + implementation-patterns -/ambient fix the auth error # DEBUG/GUIDED — loads test-patterns + core-patterns +/ambient add a login form # IMPLEMENT/GUIDED — skills + main session + Simplifier +/ambient refactor the auth system # IMPLEMENT/ORCHESTRATED — Coder + quality gates +/ambient fix the auth error # DEBUG/GUIDED — main session diagnoses + fixes +/ambient debug flaky test failures # DEBUG/ORCHESTRATED — parallel hypothesis investigation +/ambient how should we cache? # PLAN/ORCHESTRATED — Skimmer + Explore + Plan agents /ambient where is the config? # EXPLORE/QUICK — responds normally, zero overhead -/ambient refactor the auth system # BUILD/ELEVATE — suggests /implement +/ambient commit this # QUICK — no overhead ``` ## Always-On Mode @@ -29,21 +32,39 @@ When enabled, a `UserPromptSubmit` hook injects a classification preamble before ## How It Works -1. **Classify intent** — BUILD, DEBUG, REVIEW, PLAN, EXPLORE, or CHAT -2. **Classify depth** — QUICK (zero overhead), GUIDED (2-3 skills), or ELEVATE (workflow nudge) +1. **Classify intent** — IMPLEMENT, DEBUG, REVIEW, PLAN, EXPLORE, or CHAT +2. **Classify depth** — QUICK, GUIDED, or ORCHESTRATED (scope-based) 3. **Apply proportionally**: - - QUICK: respond normally - - GUIDED: load relevant skills, enforce TDD for BUILD - - ELEVATE: respond + recommend full workflow command + - QUICK: respond normally (zero overhead) + - GUIDED: load skills, implement in main session, spawn Simplifier after code changes + - ORCHESTRATED: load skills, orchestrate full agent pipeline ## Depth Tiers -| Depth | When | Overhead | -|-------|------|----------| -| QUICK | Chat, simple exploration, git/devops ops, single-word confirmations | ~0 tokens | -| GUIDED | BUILD/DEBUG/REVIEW/PLAN, 1-5 file scope | ~500-1000 tokens (skill reads) | -| ELEVATE | Multi-file, architectural, system-wide scope | ~0 extra tokens (nudge only) | +| Depth | When | What Happens | +|-------|------|-------------| +| QUICK | Chat, exploration, git ops, config, trivial edits | Zero overhead — respond normally | +| GUIDED | Small-scope IMPLEMENT (≤2 files), clear DEBUG, focused PLAN, REVIEW | Load skills → main session works → Simplifier cleanup | +| ORCHESTRATED | Large-scope IMPLEMENT (>2 files), vague DEBUG, system-level PLAN | Load skills → spawn agent pipeline | + +### Scope-Based Split + +| Intent | GUIDED | ORCHESTRATED | +|--------|--------|-------------| +| IMPLEMENT | ≤2 files, single module | >2 files, multi-module | +| DEBUG | Clear error with stack trace/location | Vague/cross-cutting bug | +| PLAN | Focused design question | System-level architecture | +| REVIEW | Always GUIDED | — | + +## Agent Orchestration (ORCHESTRATED only) + +| Intent | Pipeline | +|--------|----------| +| IMPLEMENT | Pre-flight → Coder → Validator → Simplifier → Scrutinizer → Shepherd | +| DEBUG | Hypotheses → parallel Explores (max 8) → convergence → report → offer fix | +| PLAN | Skimmer → Explores → Plan agent → gap validation | ## Skills - `ambient-router` — Intent + depth classification, skill selection matrix +- `test-driven-development` — TDD enforcement for IMPLEMENT (GUIDED + ORCHESTRATED) diff --git a/plugins/devflow-ambient/commands/ambient.md b/plugins/devflow-ambient/commands/ambient.md index 9e24332..442907a 100644 --- a/plugins/devflow-ambient/commands/ambient.md +++ b/plugins/devflow-ambient/commands/ambient.md @@ -4,7 +4,7 @@ description: Ambient mode — classify intent and auto-load relevant skills for # Ambient Command -Classify user intent and respond with proportional effort — zero overhead for simple requests, skill loading + agent orchestration for substantive work. +Classify user intent and respond with proportional effort — zero overhead for simple requests, skill loading for guided work, skill loading + agent orchestration for complex work. ## Usage @@ -25,20 +25,22 @@ Read the `ambient-router` skill: Apply the ambient-router classification to `$ARGUMENTS`: 1. **Intent:** IMPLEMENT | DEBUG | REVIEW | PLAN | EXPLORE | CHAT -2. **Depth:** QUICK | ORCHESTRATED +2. **Depth:** QUICK | GUIDED | ORCHESTRATED If no arguments provided, output: ``` ## Ambient Mode -Classify intent and auto-load relevant skills with agent orchestration. +Classify intent and auto-load relevant skills with optional agent orchestration. Usage: /ambient Examples: - /ambient add a login form → IMPLEMENT/ORCHESTRATED (Coder + quality gates) - /ambient fix the auth error → DEBUG/ORCHESTRATED (parallel hypothesis investigation) + /ambient add a login form → IMPLEMENT/GUIDED (main session + skills + Simplifier) + /ambient refactor the auth system → IMPLEMENT/ORCHESTRATED (Coder + quality gates) + /ambient fix the auth error → DEBUG/GUIDED (main session diagnoses + fixes) + /ambient debug flaky test failures → DEBUG/ORCHESTRATED (parallel hypothesis investigation) /ambient how should we cache? → PLAN/ORCHESTRATED (Skimmer + Explore + Plan agents) /ambient where is the config? → EXPLORE/QUICK (responds normally) /ambient commit this → QUICK (no overhead) @@ -51,6 +53,7 @@ Then stop. ### Phase 3: State Classification - **QUICK:** Skip this phase entirely. Respond directly in Phase 4. +- **GUIDED:** Output one line: `Ambient: {INTENT}/GUIDED. Loading: {skill1}, {skill2}.` - **ORCHESTRATED:** Output one line: `Ambient: {INTENT}/ORCHESTRATED. Loading: {skill1}, {skill2}.` ### Phase 4: Apply @@ -58,6 +61,18 @@ Then stop. **QUICK:** Respond to the user's prompt normally. Zero skill loading. Zero overhead. +**GUIDED:** +Invoke each selected skill using the Skill tool based on the ambient-router's skill selection matrix: + +| Intent | Skills Loaded | Main Session Work | Post-Work | +|--------|--------------|-------------------|-----------| +| IMPLEMENT | implementation-patterns, search-first | Implement directly with TDD | Spawn Simplifier on changed files | +| DEBUG | core-patterns, test-patterns | Investigate, diagnose, fix | Spawn Simplifier on changed files | +| PLAN | implementation-patterns, core-patterns | Explore and design directly | No Simplifier | +| REVIEW | self-review, core-patterns | Review directly | No Simplifier | + +After loading skills, work directly in main session following loaded skill patterns. + **ORCHESTRATED:** Invoke each selected skill using the Skill tool based on the ambient-router's skill selection matrix: @@ -66,7 +81,6 @@ Invoke each selected skill using the Skill tool based on the ambient-router's sk | IMPLEMENT | implementation-orchestration, implementation-patterns | Pre-flight → Coder → Validator → Simplifier → Scrutinizer → Shepherd | | DEBUG | debug-orchestration, core-patterns | Hypotheses → parallel Explores → convergence → report → offer fix | | PLAN | plan-orchestration, implementation-patterns, core-patterns | Skimmer → Explores → Plan agent → gap validation | -| REVIEW | self-review, core-patterns | Single Reviewer agent with focus from prompt | After loading skills, follow the orchestration skill's pipeline (Step 5 of ambient-router). @@ -77,14 +91,18 @@ After loading skills, follow the orchestration skill's pipeline (Step 5 of ambie │ ├─ Phase 1: Load ambient-router skill ├─ Phase 2: Classify intent + depth -├─ Phase 3: State classification (ORCHESTRATED only) +├─ Phase 3: State classification (GUIDED/ORCHESTRATED only) └─ Phase 4: Apply - ├─ QUICK → respond directly (no agents) + ├─ QUICK → respond directly (no agents, no skills) + ├─ GUIDED → load skills via Skill tool → main session implements → Simplifier + │ ├─ IMPLEMENT → skills + TDD + Simplifier + │ ├─ DEBUG → skills + diagnose/fix + Simplifier + │ ├─ PLAN → skills + explore/design + │ └─ REVIEW → skills + review directly └─ ORCHESTRATED → load skills via Skill tool → orchestrate agents ├─ IMPLEMENT → implementation-orchestration pipeline ├─ DEBUG → debug-orchestration pipeline - ├─ PLAN → plan-orchestration pipeline - └─ REVIEW → single Reviewer agent + └─ PLAN → plan-orchestration pipeline ``` ## Edge Cases @@ -96,12 +114,13 @@ After loading skills, follow the orchestration skill's pipeline (Step 5 of ambie | Prompt references `/implement` etc. | Classify as normal — user chose /ambient intentionally | | Mixed intent ("fix and add test") | Use higher-overhead intent (IMPLEMENT > DEBUG) | | User says "no enforcement" | Respect immediately — treat as QUICK | -| Multiple ORCHESTRATED triggers per session | Each runs independently; context compaction handles accumulation | +| Scope ambiguous (GUIDED vs ORCHESTRATED) | Default to GUIDED; escalate if complexity emerges | +| Multiple triggers per session | Each runs independently; context compaction handles accumulation | ## Principles -1. **Agents for ORCHESTRATED, main session for QUICK** — proportional response +1. **Three tiers** — QUICK (zero overhead), GUIDED (skills + main session), ORCHESTRATED (skills + agents) 2. **Skill tool for loading** — invoke skills via Skill tool, not Read -3. **Conservative classification** — default to QUICK; ORCHESTRATED has real agent cost -4. **Transparent** — state classification for ORCHESTRATED, silent for QUICK -5. **Respectful** — never over-classify; when in doubt, QUICK +3. **Conservative classification** — default to QUICK; prefer GUIDED over ORCHESTRATED +4. **Transparent** — state classification for GUIDED/ORCHESTRATED, silent for QUICK +5. **Respectful** — never over-classify; when in doubt, one tier lower diff --git a/scripts/hooks/ambient-prompt b/scripts/hooks/ambient-prompt index 3d61a95..1db66fb 100755 --- a/scripts/hooks/ambient-prompt +++ b/scripts/hooks/ambient-prompt @@ -33,17 +33,22 @@ fi # Inject classification preamble PREAMBLE="AMBIENT MODE ACTIVE: Before responding, silently classify this prompt: Intent: IMPLEMENT | DEBUG | REVIEW | PLAN | EXPLORE | CHAT -Depth: QUICK (no overhead) | ORCHESTRATED (skills + agent orchestration) +Depth: QUICK (no overhead) | GUIDED (skills + main session + Simplifier) | ORCHESTRATED (skills + agent orchestration) -Classification conservatism: default to QUICK. Only classify ORCHESTRATED when the prompt -has clear task scope. Small edits, config changes, git ops, exploratory questions → QUICK. +Classification conservatism: default to QUICK. Only classify GUIDED when the prompt has clear +task scope with small scope (≤2 files). Only classify ORCHESTRATED when scope is large (>2 files, +multi-module). Small edits, config changes, git ops, exploratory questions → QUICK. + +If GUIDED: Use the ambient-router skill already in your session context. +BLOCKING REQUIREMENT: Invoke each selected skill using the Skill tool before proceeding. +Then implement directly in main session. After code changes, spawn Simplifier on changed files. If ORCHESTRATED: Use the ambient-router skill already in your session context. BLOCKING REQUIREMENT: Invoke each selected skill using the Skill tool before proceeding. Then orchestrate agents per the ambient-router Step 5. If QUICK: Respond normally without stating classification. -Only state classification aloud for ORCHESTRATED." +Only state classification aloud for GUIDED and ORCHESTRATED." jq -n --arg ctx "$PREAMBLE" '{ "hookSpecificOutput": { diff --git a/shared/agents/coder.md b/shared/agents/coder.md index b9c8d0f..f548598 100644 --- a/shared/agents/coder.md +++ b/shared/agents/coder.md @@ -2,7 +2,7 @@ name: Coder description: Autonomous task implementation on feature branch. Implements, tests, and commits. model: inherit -skills: core-patterns, git-safety, implementation-patterns, git-workflow, test-patterns, test-driven-development, input-validation +skills: core-patterns, git-safety, implementation-patterns, git-workflow, test-patterns, test-driven-development, search-first, input-validation --- # Coder Agent diff --git a/shared/skills/ambient-router/SKILL.md b/shared/skills/ambient-router/SKILL.md index b89cc2b..31abb49 100644 --- a/shared/skills/ambient-router/SKILL.md +++ b/shared/skills/ambient-router/SKILL.md @@ -7,16 +7,17 @@ allowed-tools: Read, Grep, Glob # Ambient Router -Classify user intent and auto-load relevant skills. Zero overhead for simple requests, skill loading + agent orchestration for substantive work. +Classify user intent and auto-load relevant skills. Zero overhead for simple requests, skill loading + optional agent orchestration for substantive work. ## Iron Law -> **ORCHESTRATED GETS SKILLS + AGENT ORCHESTRATION MATCHED TO INTENT** +> **PROPORTIONAL RESPONSE MATCHED TO SCOPE** > -> QUICK gets zero overhead. ORCHESTRATED gets full skill loading via the Skill tool plus +> QUICK gets zero overhead. GUIDED gets skill loading + main session implementation +> with Simplifier cleanup. ORCHESTRATED gets full skill loading via the Skill tool plus > agent pipeline execution. Misclassification in either direction is a failure — > false-positive ORCHESTRATED is expensive (5-6 agent spawns), false-negative -> ORCHESTRATED leaves quality on the table. +> GUIDED leaves quality on the table. --- @@ -41,21 +42,41 @@ Determine how much enforcement the prompt warrants. | Depth | Criteria | Action | |-------|----------|--------| -| **QUICK** | CHAT intent. EXPLORE intent. PLAN discussions without clear deliverable. Git/devops operations (commit, push, merge, branch, pr, deploy, reinstall). Single-word continuations. Small edits, config changes. | Respond normally. Zero overhead. Do not state classification. | -| **ORCHESTRATED** | IMPLEMENT with clear, scoped task. DEBUG with specific bug/error. PLAN with clear deliverable ("design the caching layer"). REVIEW with code to review. Multi-file changes with defined scope. | Load skills via Skill tool, then orchestrate agents per Step 5. State classification. | +| **QUICK** | CHAT intent. EXPLORE intent. Git/devops operations (commit, push, merge, branch, pr, deploy, reinstall). Single-word continuations. Small edits, config changes, trivial single-file tweaks. | Respond normally. Zero overhead. Do not state classification. | +| **GUIDED** | IMPLEMENT with small scope (≤2 files, single module). DEBUG with clear error location (stack trace, specific file, known function). PLAN for focused design questions (specific area/pattern). REVIEW (always GUIDED). | Load skills via Skill tool. Main session implements directly. Spawn Simplifier after code changes. State classification. | +| **ORCHESTRATED** | IMPLEMENT with larger scope (>2 files, multi-module, complex). DEBUG with vague/cross-cutting bug (no clear location, multiple possible causes). PLAN for system-level architecture (caching layer, auth system, multi-module design). | Load skills via Skill tool, then orchestrate agents per Step 5. State classification. | -**Classification conservatism:** Default to QUICK. Only classify ORCHESTRATED when the prompt has clear task scope. ORCHESTRATED triggers agent spawning which has real cost. When in doubt, QUICK. +**Scope-based decision criteria:** -## Step 3: Select Skills (ORCHESTRATED depth only) +| Intent | GUIDED (small scope) | ORCHESTRATED (large scope) | +|--------|---------------------|---------------------------| +| **IMPLEMENT** | ≤2 files, single module, clear task | >2 files, multi-module, complex | +| **DEBUG** | Clear error with known location (stack trace, specific file) | Vague/cross-cutting bug, multiple possible causes | +| **PLAN** | Focused question about specific area/pattern | System-level architecture, multi-module design | +| **REVIEW** | Always GUIDED | — | -Based on classified intent, invoke each selected skill using the Skill tool. +**Classification conservatism:** Default to QUICK. Only classify GUIDED/ORCHESTRATED when the prompt has clear task scope. When choosing between GUIDED and ORCHESTRATED, prefer GUIDED — escalate only when scope clearly exceeds main-session capacity. + +## Step 3: Select Skills + +Based on classified intent and depth, invoke each selected skill using the Skill tool. + +### GUIDED-depth skills + +| Intent | Primary Skills | Secondary (if file type matches) | +|--------|---------------|----------------------------------| +| **IMPLEMENT** | implementation-patterns, search-first | typescript (.ts), react (.tsx/.jsx), go (.go), java (.java), python (.py), rust (.rs), frontend-design (CSS/UI), input-validation (forms/API), security-patterns (auth/crypto) | +| **DEBUG** | core-patterns, test-patterns | git-safety (if git operations involved) | +| **PLAN** | implementation-patterns, core-patterns | — | +| **REVIEW** | self-review, core-patterns | test-patterns | + +### ORCHESTRATED-depth skills | Intent | Primary Skills | Secondary (if file type matches) | |--------|---------------|----------------------------------| | **IMPLEMENT** | implementation-orchestration, implementation-patterns | typescript (.ts), react (.tsx/.jsx), go (.go), java (.java), python (.py), rust (.rs), frontend-design (CSS/UI), input-validation (forms/API), security-patterns (auth/crypto) | | **DEBUG** | debug-orchestration, core-patterns | git-safety (if git operations involved) | | **PLAN** | plan-orchestration, implementation-patterns, core-patterns | — | -| **REVIEW** | self-review, core-patterns | test-patterns | **Excluded from ambient** (review-command-only): review-methodology, complexity-patterns, consistency-patterns, database-patterns, dependencies-patterns, documentation-patterns, regression-patterns, architecture-patterns, accessibility, performance-patterns. @@ -64,14 +85,25 @@ See `references/skill-catalog.md` for the full skill-to-intent mapping with file ## Step 4: Apply -When classification is ORCHESTRATED, skill loading is NON-NEGOTIABLE. +When classification is GUIDED or ORCHESTRATED, skill loading is NON-NEGOTIABLE. Do not rationalize skipping skills. Do not respond without loading them first. BLOCKING REQUIREMENT: Invoke each selected skill using the Skill tool before proceeding. +If test-driven-development is selected (IMPLEMENT intent), you MUST write the failing test before ANY production code. - **QUICK:** Respond directly. No preamble, no classification statement. +- **GUIDED:** State classification briefly: `Ambient: IMPLEMENT/GUIDED. Loading: implementation-patterns, search-first.` Then invoke each skill using the Skill tool and work directly in main session. After code changes, spawn Simplifier on changed files. - **ORCHESTRATED:** State classification briefly: `Ambient: IMPLEMENT/ORCHESTRATED. Loading: implementation-orchestration, implementation-patterns.` Then invoke each skill using the Skill tool and follow Step 5 for agent orchestration. +### GUIDED Behavior by Intent + +| Intent | Main Session Work | Post-Work | +|--------|------------------|-----------| +| **IMPLEMENT** | Implement directly with loaded skills. Follow TDD cycle. | Spawn Simplifier on changed files. | +| **DEBUG** | Investigate directly — reproduce bug, diagnose from stack trace/error, fix. | Spawn Simplifier on changed files. | +| **PLAN** | Explore relevant code and design directly. The area is focused enough for main session. | No Simplifier (no code changes). | +| **REVIEW** | Review directly with loaded skills. | No Simplifier. | + ## Step 5: Orchestrate Agents (ORCHESTRATED depth only) After loading skills via Step 3-4, execute the agent pipeline for the classified intent: @@ -81,7 +113,6 @@ After loading skills via Step 3-4, execute the agent pipeline for the classified | **IMPLEMENT** | Follow implementation-orchestration skill pipeline: pre-flight → plan synthesis → Coder → quality gates | | **DEBUG** | Follow debug-orchestration skill pipeline: hypotheses → parallel Explores → convergence → report → offer fix | | **PLAN** | Follow plan-orchestration skill pipeline: Skimmer → Explores → Plan agent → gap validation | -| **REVIEW** | Spawn single Reviewer agent with focus derived from prompt (e.g., "check security" → security-patterns focus; vague prompt → general review across all pillars) | | **EXPLORE** | No agents — respond in main session | | **CHAT** | No agents — respond in main session | @@ -90,10 +121,11 @@ After loading skills via Step 3-4, execute the agent pipeline for the classified ## Transparency Rules 1. **QUICK → silent.** No classification output. -2. **ORCHESTRATED → brief statement + full skill enforcement + agent orchestration.** One line: intent, depth, skills loaded. Then follow every skill requirement and orchestrate agents per Step 5. -3. **Never lie about classification.** If uncertain, say so. -4. **Never over-classify.** When in doubt, QUICK. -5. **Never under-apply.** Rationalization is the enemy of quality. If a skill requires a step, do the step. +2. **GUIDED → brief statement + full skill enforcement.** One line: intent, depth, skills loaded. Then implement in main session with skill patterns applied. +3. **ORCHESTRATED → brief statement + full skill enforcement + agent orchestration.** One line: intent, depth, skills loaded. Then follow every skill requirement and orchestrate agents per Step 5. +4. **Never lie about classification.** If uncertain, say so. +5. **Never over-classify.** When in doubt, go one tier lower. +6. **Never under-apply.** Rationalization is the enemy of quality. If a skill requires a step, do the step. ## Edge Cases @@ -103,5 +135,6 @@ After loading skills via Step 3-4, execute the agent pipeline for the classified | Continuation of previous conversation | Inherit previous classification unless prompt clearly shifts | | User explicitly requests no enforcement | Respect immediately — classify as QUICK | | Prompt references specific DevFlow command | Skip ambient — the command has its own orchestration | -| Multi-file change with clear scope | ORCHESTRATED | -| Multiple ORCHESTRATED triggers per session | Each runs independently; context compaction handles accumulation | +| Scope ambiguous between GUIDED and ORCHESTRATED | Default to GUIDED; escalate if complexity emerges during work | +| REVIEW intent | Always GUIDED — single Reviewer focus, no orchestration pipeline | +| Multiple triggers per session | Each runs independently; context compaction handles accumulation | diff --git a/shared/skills/ambient-router/references/skill-catalog.md b/shared/skills/ambient-router/references/skill-catalog.md index a69521e..4f460d5 100644 --- a/shared/skills/ambient-router/references/skill-catalog.md +++ b/shared/skills/ambient-router/references/skill-catalog.md @@ -4,47 +4,49 @@ Full mapping of DevFlow skills to ambient intents and file-type triggers. The am ## Skills Available for Ambient Loading -These skills may be loaded during ORCHESTRATED-depth ambient routing. +These skills may be loaded during GUIDED and ORCHESTRATED-depth ambient routing. ### IMPLEMENT Intent -| Skill | When to Load | File Patterns | -|-------|-------------|---------------| -| implementation-orchestration | Always for IMPLEMENT | Any — orchestrates agent pipeline | -| implementation-patterns | Always for IMPLEMENT | Any code file | -| typescript | TypeScript files in scope | `*.ts`, `*.tsx` | -| react | React components in scope | `*.tsx`, `*.jsx` | -| frontend-design | UI/styling work | `*.css`, `*.scss`, `*.tsx` with styling keywords | -| input-validation | Forms, APIs, user input | Files with form/input/validation keywords | -| go | Go files in scope | `*.go` | -| java | Java files in scope | `*.java` | -| python | Python files in scope | `*.py` | -| rust | Rust files in scope | `*.rs` | -| security-patterns | Auth, crypto, secrets | Files with auth/token/crypto/password keywords | +| Skill | When to Load | Depth | File Patterns | +|-------|-------------|-------|---------------| +| implementation-orchestration | ORCHESTRATED only | ORCHESTRATED | Any — orchestrates agent pipeline | +| implementation-patterns | Always for IMPLEMENT | GUIDED + ORCHESTRATED | Any code file | +| search-first | Always for IMPLEMENT | GUIDED + ORCHESTRATED | Any — enforces research before building | +| typescript | TypeScript files in scope | GUIDED + ORCHESTRATED | `*.ts`, `*.tsx` | +| react | React components in scope | GUIDED + ORCHESTRATED | `*.tsx`, `*.jsx` | +| frontend-design | UI/styling work | GUIDED + ORCHESTRATED | `*.css`, `*.scss`, `*.tsx` with styling keywords | +| input-validation | Forms, APIs, user input | GUIDED + ORCHESTRATED | Files with form/input/validation keywords | +| go | Go files in scope | GUIDED + ORCHESTRATED | `*.go` | +| java | Java files in scope | GUIDED + ORCHESTRATED | `*.java` | +| python | Python files in scope | GUIDED + ORCHESTRATED | `*.py` | +| rust | Rust files in scope | GUIDED + ORCHESTRATED | `*.rs` | +| security-patterns | Auth, crypto, secrets | GUIDED + ORCHESTRATED | Files with auth/token/crypto/password keywords | ### DEBUG Intent -| Skill | When to Load | File Patterns | -|-------|-------------|---------------| -| debug-orchestration | Always for DEBUG | Any — orchestrates investigation pipeline | -| core-patterns | Always for DEBUG | Any code file | -| git-safety | Git operations involved | User mentions git, rebase, merge, etc. | +| Skill | When to Load | Depth | File Patterns | +|-------|-------------|-------|---------------| +| debug-orchestration | ORCHESTRATED only | ORCHESTRATED | Any — orchestrates investigation pipeline | +| core-patterns | Always for DEBUG | GUIDED + ORCHESTRATED | Any code file | +| test-patterns | Always for DEBUG (GUIDED) | GUIDED | Any code file | +| git-safety | Git operations involved | GUIDED + ORCHESTRATED | User mentions git, rebase, merge, etc. | ### REVIEW Intent -| Skill | When to Load | File Patterns | -|-------|-------------|---------------| -| self-review | Always for REVIEW | Any code file | -| core-patterns | Always for REVIEW | Any code file | -| test-patterns | Test files in scope | `*.test.*`, `*.spec.*` | +| Skill | When to Load | Depth | File Patterns | +|-------|-------------|-------|---------------| +| self-review | Always for REVIEW | GUIDED | Any code file | +| core-patterns | Always for REVIEW | GUIDED | Any code file | +| test-patterns | Test files in scope | GUIDED | `*.test.*`, `*.spec.*` | ### PLAN Intent -| Skill | When to Load | File Patterns | -|-------|-------------|---------------| -| plan-orchestration | Always for PLAN | Any — orchestrates design pipeline | -| implementation-patterns | Always for PLAN | Any planning context | -| core-patterns | Architectural planning | System design discussions | +| Skill | When to Load | Depth | File Patterns | +|-------|-------------|-------|---------------| +| plan-orchestration | ORCHESTRATED only | ORCHESTRATED | Any — orchestrates design pipeline | +| implementation-patterns | Always for PLAN | GUIDED + ORCHESTRATED | Any planning context | +| core-patterns | Always for PLAN | GUIDED + ORCHESTRATED | System design discussions | ## Skills Excluded from Ambient @@ -64,6 +66,8 @@ These skills are loaded only by explicit DevFlow commands (primarily `/code-revi ## Selection Limits - **Maximum 3 knowledge skills** per ambient response (primary + up to 2 secondary) -- **Orchestration skills** (implementation-orchestration, debug-orchestration, plan-orchestration) are loaded in addition to knowledge skills — they don't count toward the limit -- **Primary skills** are always loaded for the classified intent +- **Orchestration skills** (implementation-orchestration, debug-orchestration, plan-orchestration) are loaded only at ORCHESTRATED depth — they don't count toward the knowledge skill limit +- **Primary skills** are always loaded for the classified intent at both GUIDED and ORCHESTRATED depth - **Secondary skills** are loaded only when file patterns match conversation context +- **GUIDED depth** loads knowledge skills only (no orchestration skills) — main session works directly +- **ORCHESTRATED depth** loads orchestration skill + knowledge skills — agents execute the pipeline diff --git a/shared/skills/debug-orchestration/SKILL.md b/shared/skills/debug-orchestration/SKILL.md index e29fe8f..ecdf5a9 100644 --- a/shared/skills/debug-orchestration/SKILL.md +++ b/shared/skills/debug-orchestration/SKILL.md @@ -29,9 +29,21 @@ Analyze the bug description, error messages, and conversation context. Generate If fewer than 3 hypotheses are possible, proceed with 2. +## Agent Budget + +Hard cap: **8 total Explore agents** across all phases. + +| Phase | Allocation | +|-------|-----------| +| Phase 2 (Investigate) | Up to 5 (one per hypothesis, 3-5 hypotheses) | +| Phase 3 (Converge — validation) | Up to 2 | +| Phase 3 (Converge — second round) | Remaining budget (typically 1) | + +If budget is exhausted before convergence, ask user to narrow scope via AskUserQuestion rather than spawning more agents. + ## Phase 2: Investigate (Parallel) -Spawn one Explore agent per hypothesis **in a single message** (parallel execution): +Spawn one Explore agent per hypothesis **in a single message** (parallel execution, max 5): - Each investigator searches for evidence FOR and AGAINST its hypothesis - Must provide file:line references for all evidence diff --git a/shared/skills/test-driven-development/SKILL.md b/shared/skills/test-driven-development/SKILL.md index 4524457..f21b9e8 100644 --- a/shared/skills/test-driven-development/SKILL.md +++ b/shared/skills/test-driven-development/SKILL.md @@ -91,7 +91,7 @@ See `references/rationalization-prevention.md` for extended examples with code. ## Process Enforcement -When implementing any feature under ambient IMPLEMENT/ORCHESTRATED: +When implementing any feature under ambient IMPLEMENT/GUIDED or IMPLEMENT/ORCHESTRATED: 1. **Identify the first behavior** — What is the simplest thing this feature must do? 2. **Write the test** — Describe that behavior as a failing test @@ -130,6 +130,8 @@ When skipping TDD, never rationalize. State clearly: "Skipping TDD because: [spe ## Integration with Ambient Mode +- **IMPLEMENT/GUIDED** → TDD enforced in main session. Write the failing test before production code. Skill loaded directly. - **IMPLEMENT/ORCHESTRATED** → TDD enforced via Coder agent (skill in Coder frontmatter). Every implementation gets test-first treatment. - **IMPLEMENT/QUICK** → TDD skipped (trivial single-file edit). +- **DEBUG/GUIDED** → TDD applies to the fix in main session: write a test that reproduces the bug first, then fix. - **DEBUG/ORCHESTRATED** → TDD applies to the fix: write a test that reproduces the bug first, then fix. diff --git a/tests/integration/ambient-activation.test.ts b/tests/integration/ambient-activation.test.ts index 334e467..b3f7daa 100644 --- a/tests/integration/ambient-activation.test.ts +++ b/tests/integration/ambient-activation.test.ts @@ -32,14 +32,14 @@ describe.skipIf(!isClaudeAvailable())('ambient classification', () => { expect(isQuietResponse(output) || extractDepth(output) === 'QUICK').toBe(true); }); - // GUIDED tier — skills referenced in output - it('classifies "add a login form" as BUILD/GUIDED', () => { + // GUIDED tier — skills loaded, main session implements + it('classifies "add a login form" as IMPLEMENT/GUIDED', () => { const output = runClaude('add a login form with email and password fields'); if (hasClassification(output)) { - expect(extractIntent(output)).toBe('BUILD'); - expect(extractDepth(output)).toBe('GUIDED'); + expect(extractIntent(output)).toBe('IMPLEMENT'); + expect(['GUIDED', 'ORCHESTRATED']).toContain(extractDepth(output)); } - // Even without explicit classification, BUILD prompts should reference TDD + // Even without explicit classification, IMPLEMENT prompts should reference TDD expect( output.toLowerCase().includes('test') || output.toLowerCase().includes('tdd') || @@ -51,7 +51,7 @@ describe.skipIf(!isClaudeAvailable())('ambient classification', () => { const output = runClaude('fix the authentication error in the login handler'); if (hasClassification(output)) { expect(extractIntent(output)).toBe('DEBUG'); - expect(['GUIDED', 'ELEVATE']).toContain(extractDepth(output)); + expect(['GUIDED', 'ORCHESTRATED']).toContain(extractDepth(output)); } }); }); diff --git a/tests/integration/helpers.ts b/tests/integration/helpers.ts index beff342..3f7f092 100644 --- a/tests/integration/helpers.ts +++ b/tests/integration/helpers.ts @@ -34,10 +34,10 @@ export function runClaude(prompt: string, options?: { timeout?: number }): strin /** * Assert that output contains a classification marker (case-insensitive). - * Classification markers look like: "Ambient: BUILD/GUIDED" + * Classification markers look like: "Ambient: IMPLEMENT/GUIDED" */ export function hasClassification(output: string): boolean { - return /ambient:\s*(BUILD|DEBUG|REVIEW|PLAN|EXPLORE|CHAT)\s*\/\s*(QUICK|GUIDED|ELEVATE)/i.test(output); + return /ambient:\s*(IMPLEMENT|DEBUG|REVIEW|PLAN|EXPLORE|CHAT)\s*\/\s*(QUICK|GUIDED|ORCHESTRATED)/i.test(output); } /** @@ -52,7 +52,7 @@ export function isQuietResponse(output: string): boolean { * Extract the intent from a classification marker. */ export function extractIntent(output: string): string | null { - const match = output.match(/ambient:\s*(BUILD|DEBUG|REVIEW|PLAN|EXPLORE|CHAT)/i); + const match = output.match(/ambient:\s*(IMPLEMENT|DEBUG|REVIEW|PLAN|EXPLORE|CHAT)/i); return match ? match[1].toUpperCase() : null; } @@ -60,6 +60,6 @@ export function extractIntent(output: string): string | null { * Extract the depth from a classification marker. */ export function extractDepth(output: string): string | null { - const match = output.match(/ambient:\s*\w+\s*\/\s*(QUICK|GUIDED|ELEVATE)/i); + const match = output.match(/ambient:\s*\w+\s*\/\s*(QUICK|GUIDED|ORCHESTRATED)/i); return match ? match[1].toUpperCase() : null; } From e635ebfb97ac1b4e50d9028776997fabadec7dea Mon Sep 17 00:00:00 2001 From: Dean Sharon Date: Thu, 19 Mar 2026 23:48:06 +0200 Subject: [PATCH 3/3] =?UTF-8?q?fix(ambient):=20address=20code=20review=20f?= =?UTF-8?q?indings=20=E2=80=94=203=20blocking,=2010=20should-fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit B1: Restore TDD to GUIDED/IMPLEMENT skill selection (was missing) B2: Trim hook preamble to ~30 words + add git keyword fast-path B3: Remove hard-capped 8-agent budget from debug-orchestration S1: Document pipeline delta (vs /implement, /debug) in orchestration skills S3: Add ORCHESTRATED classification integration test S4: Update stale marketplace.json + init.ts descriptions S5: Fix zero-assertion tests (remove if-guard, add expect) S6: Change debug Phase 5 from implementation-orchestration to GUIDED fix S7: Standardize agent references to Task(subagent_type="X") format S8: Extract CLASSIFICATION_PATTERN constant in test helpers S9: Document behavioral changes in CHANGELOG S10: Full rewrite of ambient plugin README (hook-based, no /ambient) Also removes redundant /ambient slash command — ambient mode is hook-only. --- .claude-plugin/marketplace.json | 2 +- CHANGELOG.md | 14 +- CLAUDE.md | 1 - README.md | 2 +- docs/reference/skills-architecture.md | 2 +- plugins/devflow-ambient/README.md | 48 ++++--- plugins/devflow-ambient/commands/ambient.md | 126 ------------------ scripts/hooks/ambient-prompt | 25 ++-- shared/skills/ambient-router/SKILL.md | 6 +- .../references/skill-catalog.md | 1 + shared/skills/debug-orchestration/SKILL.md | 20 +-- .../implementation-orchestration/SKILL.md | 14 +- shared/skills/plan-orchestration/SKILL.md | 8 +- src/cli/commands/init.ts | 4 +- tests/integration/ambient-activation.test.ts | 28 ++-- tests/integration/helpers.ts | 10 +- 16 files changed, 90 insertions(+), 221 deletions(-) delete mode 100644 plugins/devflow-ambient/commands/ambient.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index aa9369b..a57feaf 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -91,7 +91,7 @@ { "name": "devflow-ambient", "source": "./plugins/devflow-ambient", - "description": "Ambient mode — auto-loads relevant skills for every prompt", + "description": "Ambient mode — classifies intent and applies proportional effort (skills for guided work, agent orchestration for complex tasks)", "version": "1.5.0", "keywords": [ "ambient", diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e6cbf0..93d60a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,13 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- **Ambient agent orchestration**: ORCHESTRATED tier spawns agent pipelines for IMPLEMENT, DEBUG, PLAN intents +- **Orchestration skills**: `implementation-orchestration`, `debug-orchestration`, `plan-orchestration` for ambient agent pipelines + ### Changed - **Ambient mode**: Three depth tiers (QUICK/GUIDED/ORCHESTRATED) replacing old QUICK/GUIDED/ELEVATE -- **Ambient mode**: ORCHESTRATED tier spawns agent pipelines for IMPLEMENT, DEBUG, PLAN intents - **Ambient mode**: GUIDED tier for small-scope IMPLEMENT (≤2 files), simple DEBUG, focused PLAN, and REVIEW — main session with skills + Simplifier - **Ambient mode**: BUILD intent renamed to IMPLEMENT for clarity - **Coder agent**: Added `test-driven-development` and `search-first` to permanent skills -- **Debug pipeline**: Agent budget capped at 8 Explore agents total + +### Removed +- **`/ambient` command**: Ambient mode is now hook-only. Use `devflow ambient --enable` to activate. + +### Behavioral Changes +- EXPLORE intent now always classifies as QUICK (was split QUICK/GUIDED) +- Simple text edits ("Update the README") classify as QUICK (was BUILD/GUIDED) +- Debug agent budget cap removed — agents scale to investigation needs --- diff --git a/CLAUDE.md b/CLAUDE.md index b86f091..db28b05 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -113,7 +113,6 @@ Working memory files live in a dedicated `.memory/` directory: - `/resolve` — N Resolver agents + Git - `/debug` — Agent Teams competing hypotheses - `/self-review` — Simplifier then Scrutinizer (sequential) -- `/ambient` — Intent classification + agent orchestration (IMPLEMENT/DEBUG/PLAN/REVIEW pipelines) - `/audit-claude` — CLAUDE.md audit (optional plugin) **Shared agents** (10): git, synthesizer, skimmer, simplifier, coder, reviewer, resolver, shepherd, scrutinizer, validator diff --git a/README.md b/README.md index 225d1f6..dd48fc7 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Then in Claude Code: | `devflow-resolve` | `/resolve` | Process review issues — fix or defer to tech debt | | `devflow-debug` | `/debug` | Parallel hypothesis debugging | | `devflow-self-review` | `/self-review` | Self-review workflow (Simplifier + Scrutinizer) | -| `devflow-ambient` | `/ambient` | Ambient mode — intent classification with agent orchestration | +| `devflow-ambient` | (hook) | Ambient mode — intent classification with agent orchestration | | `devflow-core-skills` | (auto) | Auto-activating quality enforcement skills | ## Command Details diff --git a/docs/reference/skills-architecture.md b/docs/reference/skills-architecture.md index 3d9a212..dd809ca 100644 --- a/docs/reference/skills-architecture.md +++ b/docs/reference/skills-architecture.md @@ -21,7 +21,7 @@ Shared patterns used by multiple agents. | `github-patterns` | GitHub API patterns (rate limiting, PR comments, issues, releases) | Git | | `implementation-patterns` | CRUD, API endpoints, events, config, logging | Coder, Resolver | | `agent-teams` | Agent Teams patterns for peer-to-peer collaboration, debate, consensus | /code-review, /implement, /debug | -| `ambient-router` | Intent classification and proportional skill loading for ambient mode | `/ambient` command | +| `ambient-router` | Intent classification and proportional skill loading for ambient mode | Ambient UserPromptSubmit hook | | `knowledge-persistence` | Record/load architectural decisions and pitfalls to `.memory/knowledge/` | /implement, /code-review, /resolve, /debug, /specify, /self-review | ### Tier 1b: Pattern Skills diff --git a/plugins/devflow-ambient/README.md b/plugins/devflow-ambient/README.md index fdcb9d8..5aed4a4 100644 --- a/plugins/devflow-ambient/README.md +++ b/plugins/devflow-ambient/README.md @@ -1,26 +1,8 @@ # devflow-ambient -Ambient mode — auto-classifies intent and applies proportional skill enforcement with optional agent orchestration. +Ambient mode — classifies intent and applies proportional effort via a `UserPromptSubmit` hook. No slash command — ambient mode activates automatically on every prompt when enabled. -## Command - -### `/ambient` - -Classify user intent and apply proportional enforcement to any prompt. - -```bash -/ambient add a login form # IMPLEMENT/GUIDED — skills + main session + Simplifier -/ambient refactor the auth system # IMPLEMENT/ORCHESTRATED — Coder + quality gates -/ambient fix the auth error # DEBUG/GUIDED — main session diagnoses + fixes -/ambient debug flaky test failures # DEBUG/ORCHESTRATED — parallel hypothesis investigation -/ambient how should we cache? # PLAN/ORCHESTRATED — Skimmer + Explore + Plan agents -/ambient where is the config? # EXPLORE/QUICK — responds normally, zero overhead -/ambient commit this # QUICK — no overhead -``` - -## Always-On Mode - -Enable ambient classification on every prompt without typing `/ambient`: +## Activation ```bash devflow ambient --enable # Register UserPromptSubmit hook @@ -28,7 +10,7 @@ devflow ambient --disable # Remove hook devflow ambient --status # Check if enabled ``` -When enabled, a `UserPromptSubmit` hook injects a classification preamble before every prompt. Slash commands (`/implement`, `/code-review`, etc.) and short confirmations ("yes", "ok") are skipped automatically. +When enabled, the hook injects a classification preamble before every prompt. Slash commands (`/implement`, `/code-review`, etc.) and short confirmations ("yes", "ok") are skipped automatically. Git operations (`commit`, `push`, `merge`, etc.) are fast-pathed to zero overhead. ## How It Works @@ -39,7 +21,7 @@ When enabled, a `UserPromptSubmit` hook injects a classification preamble before - GUIDED: load skills, implement in main session, spawn Simplifier after code changes - ORCHESTRATED: load skills, orchestrate full agent pipeline -## Depth Tiers +## Three-Tier Classification | Depth | When | What Happens | |-------|------|-------------| @@ -47,7 +29,7 @@ When enabled, a `UserPromptSubmit` hook injects a classification preamble before | GUIDED | Small-scope IMPLEMENT (≤2 files), clear DEBUG, focused PLAN, REVIEW | Load skills → main session works → Simplifier cleanup | | ORCHESTRATED | Large-scope IMPLEMENT (>2 files), vague DEBUG, system-level PLAN | Load skills → spawn agent pipeline | -### Scope-Based Split +### Intent × Depth Matrix | Intent | GUIDED | ORCHESTRATED | |--------|--------|-------------| @@ -56,15 +38,31 @@ When enabled, a `UserPromptSubmit` hook injects a classification preamble before | PLAN | Focused design question | System-level architecture | | REVIEW | Always GUIDED | — | -## Agent Orchestration (ORCHESTRATED only) +## GUIDED Behavior + +Skills are loaded via the Skill tool and work happens in the main session: + +| Intent | Skills | Main Session Work | Post-Work | +|--------|--------|-------------------|-----------| +| IMPLEMENT | test-driven-development, implementation-patterns, search-first | Implement with TDD | `Task(subagent_type="Simplifier")` | +| DEBUG | core-patterns, test-patterns | Investigate, diagnose, fix | `Task(subagent_type="Simplifier")` | +| PLAN | implementation-patterns, core-patterns | Explore and design | — | +| REVIEW | self-review, core-patterns | Review directly | — | + +## ORCHESTRATED Pipelines | Intent | Pipeline | |--------|----------| | IMPLEMENT | Pre-flight → Coder → Validator → Simplifier → Scrutinizer → Shepherd | -| DEBUG | Hypotheses → parallel Explores (max 8) → convergence → report → offer fix | +| DEBUG | Hypotheses → parallel Explores → convergence → report → offer fix | | PLAN | Skimmer → Explores → Plan agent → gap validation | +These are lightweight variants of `/implement`, `/debug`, and the Plan phase of `/implement` — focused on the immediate task without full lifecycle features (PR creation, knowledge persistence, retry loops). + ## Skills - `ambient-router` — Intent + depth classification, skill selection matrix - `test-driven-development` — TDD enforcement for IMPLEMENT (GUIDED + ORCHESTRATED) +- `implementation-orchestration` — Agent pipeline for IMPLEMENT/ORCHESTRATED +- `debug-orchestration` — Agent pipeline for DEBUG/ORCHESTRATED +- `plan-orchestration` — Agent pipeline for PLAN/ORCHESTRATED diff --git a/plugins/devflow-ambient/commands/ambient.md b/plugins/devflow-ambient/commands/ambient.md deleted file mode 100644 index 442907a..0000000 --- a/plugins/devflow-ambient/commands/ambient.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -description: Ambient mode — classify intent and auto-load relevant skills for any prompt ---- - -# Ambient Command - -Classify user intent and respond with proportional effort — zero overhead for simple requests, skill loading for guided work, skill loading + agent orchestration for complex work. - -## Usage - -``` -/ambient Classify, load skills, orchestrate agents if needed -/ambient Show usage -``` - -## Phases - -### Phase 1: Load Router - -Read the `ambient-router` skill: -- `~/.claude/skills/ambient-router/SKILL.md` - -### Phase 2: Classify - -Apply the ambient-router classification to `$ARGUMENTS`: - -1. **Intent:** IMPLEMENT | DEBUG | REVIEW | PLAN | EXPLORE | CHAT -2. **Depth:** QUICK | GUIDED | ORCHESTRATED - -If no arguments provided, output: - -``` -## Ambient Mode - -Classify intent and auto-load relevant skills with optional agent orchestration. - -Usage: /ambient - -Examples: - /ambient add a login form → IMPLEMENT/GUIDED (main session + skills + Simplifier) - /ambient refactor the auth system → IMPLEMENT/ORCHESTRATED (Coder + quality gates) - /ambient fix the auth error → DEBUG/GUIDED (main session diagnoses + fixes) - /ambient debug flaky test failures → DEBUG/ORCHESTRATED (parallel hypothesis investigation) - /ambient how should we cache? → PLAN/ORCHESTRATED (Skimmer + Explore + Plan agents) - /ambient where is the config? → EXPLORE/QUICK (responds normally) - /ambient commit this → QUICK (no overhead) - -Always-on: devflow ambient --enable -``` - -Then stop. - -### Phase 3: State Classification - -- **QUICK:** Skip this phase entirely. Respond directly in Phase 4. -- **GUIDED:** Output one line: `Ambient: {INTENT}/GUIDED. Loading: {skill1}, {skill2}.` -- **ORCHESTRATED:** Output one line: `Ambient: {INTENT}/ORCHESTRATED. Loading: {skill1}, {skill2}.` - -### Phase 4: Apply - -**QUICK:** -Respond to the user's prompt normally. Zero skill loading. Zero overhead. - -**GUIDED:** -Invoke each selected skill using the Skill tool based on the ambient-router's skill selection matrix: - -| Intent | Skills Loaded | Main Session Work | Post-Work | -|--------|--------------|-------------------|-----------| -| IMPLEMENT | implementation-patterns, search-first | Implement directly with TDD | Spawn Simplifier on changed files | -| DEBUG | core-patterns, test-patterns | Investigate, diagnose, fix | Spawn Simplifier on changed files | -| PLAN | implementation-patterns, core-patterns | Explore and design directly | No Simplifier | -| REVIEW | self-review, core-patterns | Review directly | No Simplifier | - -After loading skills, work directly in main session following loaded skill patterns. - -**ORCHESTRATED:** -Invoke each selected skill using the Skill tool based on the ambient-router's skill selection matrix: - -| Intent | Skills Loaded | Agent Pipeline | -|--------|--------------|----------------| -| IMPLEMENT | implementation-orchestration, implementation-patterns | Pre-flight → Coder → Validator → Simplifier → Scrutinizer → Shepherd | -| DEBUG | debug-orchestration, core-patterns | Hypotheses → parallel Explores → convergence → report → offer fix | -| PLAN | plan-orchestration, implementation-patterns, core-patterns | Skimmer → Explores → Plan agent → gap validation | - -After loading skills, follow the orchestration skill's pipeline (Step 5 of ambient-router). - -## Architecture - -``` -/ambient -│ -├─ Phase 1: Load ambient-router skill -├─ Phase 2: Classify intent + depth -├─ Phase 3: State classification (GUIDED/ORCHESTRATED only) -└─ Phase 4: Apply - ├─ QUICK → respond directly (no agents, no skills) - ├─ GUIDED → load skills via Skill tool → main session implements → Simplifier - │ ├─ IMPLEMENT → skills + TDD + Simplifier - │ ├─ DEBUG → skills + diagnose/fix + Simplifier - │ ├─ PLAN → skills + explore/design - │ └─ REVIEW → skills + review directly - └─ ORCHESTRATED → load skills via Skill tool → orchestrate agents - ├─ IMPLEMENT → implementation-orchestration pipeline - ├─ DEBUG → debug-orchestration pipeline - └─ PLAN → plan-orchestration pipeline -``` - -## Edge Cases - -| Case | Handling | -|------|----------| -| No arguments | Show usage and stop | -| Single word ("help") | Classify — likely CHAT/QUICK | -| Prompt references `/implement` etc. | Classify as normal — user chose /ambient intentionally | -| Mixed intent ("fix and add test") | Use higher-overhead intent (IMPLEMENT > DEBUG) | -| User says "no enforcement" | Respect immediately — treat as QUICK | -| Scope ambiguous (GUIDED vs ORCHESTRATED) | Default to GUIDED; escalate if complexity emerges | -| Multiple triggers per session | Each runs independently; context compaction handles accumulation | - -## Principles - -1. **Three tiers** — QUICK (zero overhead), GUIDED (skills + main session), ORCHESTRATED (skills + agents) -2. **Skill tool for loading** — invoke skills via Skill tool, not Read -3. **Conservative classification** — default to QUICK; prefer GUIDED over ORCHESTRATED -4. **Transparent** — state classification for GUIDED/ORCHESTRATED, silent for QUICK -5. **Respectful** — never over-classify; when in doubt, one tier lower diff --git a/scripts/hooks/ambient-prompt b/scripts/hooks/ambient-prompt index 1db66fb..ae6c50e 100755 --- a/scripts/hooks/ambient-prompt +++ b/scripts/hooks/ambient-prompt @@ -30,25 +30,16 @@ if [ "$WORD_COUNT" -lt 2 ]; then exit 0 fi -# Inject classification preamble -PREAMBLE="AMBIENT MODE ACTIVE: Before responding, silently classify this prompt: -Intent: IMPLEMENT | DEBUG | REVIEW | PLAN | EXPLORE | CHAT -Depth: QUICK (no overhead) | GUIDED (skills + main session + Simplifier) | ORCHESTRATED (skills + agent orchestration) - -Classification conservatism: default to QUICK. Only classify GUIDED when the prompt has clear -task scope with small scope (≤2 files). Only classify ORCHESTRATED when scope is large (>2 files, -multi-module). Small edits, config changes, git ops, exploratory questions → QUICK. - -If GUIDED: Use the ambient-router skill already in your session context. -BLOCKING REQUIREMENT: Invoke each selected skill using the Skill tool before proceeding. -Then implement directly in main session. After code changes, spawn Simplifier on changed files. +# Normalize to lowercase for matching +PROMPT_LOWER=$(echo "$PROMPT" | tr '[:upper:]' '[:lower:]') -If ORCHESTRATED: Use the ambient-router skill already in your session context. -BLOCKING REQUIREMENT: Invoke each selected skill using the Skill tool before proceeding. -Then orchestrate agents per the ambient-router Step 5. +# Fast-path: git operations are always QUICK — skip preamble +if echo "$PROMPT_LOWER" | grep -qE '^(commit|push|pull|merge|rebase|cherry-pick|squash|tag|stash)|create (a )?pr|open (a )?pr'; then + exit 0 +fi -If QUICK: Respond normally without stating classification. -Only state classification aloud for GUIDED and ORCHESTRATED." +# Inject classification preamble +PREAMBLE="AMBIENT MODE ACTIVE: Before responding, silently classify this prompt using the ambient-router skill already in your session context. If QUICK, respond normally without stating classification." jq -n --arg ctx "$PREAMBLE" '{ "hookSpecificOutput": { diff --git a/shared/skills/ambient-router/SKILL.md b/shared/skills/ambient-router/SKILL.md index 31abb49..e66035e 100644 --- a/shared/skills/ambient-router/SKILL.md +++ b/shared/skills/ambient-router/SKILL.md @@ -1,6 +1,6 @@ --- name: ambient-router -description: This skill should be used when classifying user intent for ambient mode, auto-loading relevant skills without explicit command invocation. Used by /ambient command and always-on UserPromptSubmit hook. +description: This skill should be used when classifying user intent for ambient mode, auto-loading relevant skills without explicit command invocation. Used by the always-on UserPromptSubmit hook. user-invocable: false allowed-tools: Read, Grep, Glob --- @@ -65,7 +65,7 @@ Based on classified intent and depth, invoke each selected skill using the Skill | Intent | Primary Skills | Secondary (if file type matches) | |--------|---------------|----------------------------------| -| **IMPLEMENT** | implementation-patterns, search-first | typescript (.ts), react (.tsx/.jsx), go (.go), java (.java), python (.py), rust (.rs), frontend-design (CSS/UI), input-validation (forms/API), security-patterns (auth/crypto) | +| **IMPLEMENT** | test-driven-development, implementation-patterns, search-first | typescript (.ts), react (.tsx/.jsx), go (.go), java (.java), python (.py), rust (.rs), frontend-design (CSS/UI), input-validation (forms/API), security-patterns (auth/crypto) | | **DEBUG** | core-patterns, test-patterns | git-safety (if git operations involved) | | **PLAN** | implementation-patterns, core-patterns | — | | **REVIEW** | self-review, core-patterns | test-patterns | @@ -88,7 +88,7 @@ See `references/skill-catalog.md` for the full skill-to-intent mapping with file When classification is GUIDED or ORCHESTRATED, skill loading is NON-NEGOTIABLE. Do not rationalize skipping skills. Do not respond without loading them first. BLOCKING REQUIREMENT: Invoke each selected skill using the Skill tool before proceeding. -If test-driven-development is selected (IMPLEMENT intent), you MUST write the failing test before ANY production code. +For IMPLEMENT intent, enforce TDD: write the failing test before ANY production code. - **QUICK:** Respond directly. No preamble, no classification statement. diff --git a/shared/skills/ambient-router/references/skill-catalog.md b/shared/skills/ambient-router/references/skill-catalog.md index 4f460d5..f822c69 100644 --- a/shared/skills/ambient-router/references/skill-catalog.md +++ b/shared/skills/ambient-router/references/skill-catalog.md @@ -11,6 +11,7 @@ These skills may be loaded during GUIDED and ORCHESTRATED-depth ambient routing. | Skill | When to Load | Depth | File Patterns | |-------|-------------|-------|---------------| | implementation-orchestration | ORCHESTRATED only | ORCHESTRATED | Any — orchestrates agent pipeline | +| test-driven-development | Always for IMPLEMENT | GUIDED + ORCHESTRATED | Any code file — enforces RED-GREEN-REFACTOR | | implementation-patterns | Always for IMPLEMENT | GUIDED + ORCHESTRATED | Any code file | | search-first | Always for IMPLEMENT | GUIDED + ORCHESTRATED | Any — enforces research before building | | typescript | TypeScript files in scope | GUIDED + ORCHESTRATED | `*.ts`, `*.tsx` | diff --git a/shared/skills/debug-orchestration/SKILL.md b/shared/skills/debug-orchestration/SKILL.md index ecdf5a9..6c52a98 100644 --- a/shared/skills/debug-orchestration/SKILL.md +++ b/shared/skills/debug-orchestration/SKILL.md @@ -9,6 +9,8 @@ allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion Agent pipeline for DEBUG intent in ambient ORCHESTRATED mode. Competing hypothesis investigation, parallel evidence gathering, convergence validation, and optional fix. +This is a lightweight variant of `/debug` for ambient ORCHESTRATED mode. Excluded: knowledge persistence loading, GitHub issue fetching, pitfall recording. + ## Iron Law > **COMPETING HYPOTHESES BEFORE CONCLUSIONS** @@ -29,21 +31,9 @@ Analyze the bug description, error messages, and conversation context. Generate If fewer than 3 hypotheses are possible, proceed with 2. -## Agent Budget - -Hard cap: **8 total Explore agents** across all phases. - -| Phase | Allocation | -|-------|-----------| -| Phase 2 (Investigate) | Up to 5 (one per hypothesis, 3-5 hypotheses) | -| Phase 3 (Converge — validation) | Up to 2 | -| Phase 3 (Converge — second round) | Remaining budget (typically 1) | - -If budget is exhausted before convergence, ask user to narrow scope via AskUserQuestion rather than spawning more agents. - ## Phase 2: Investigate (Parallel) -Spawn one Explore agent per hypothesis **in a single message** (parallel execution, max 5): +Spawn one `Task(subagent_type="Explore")` per hypothesis **in a single message** (parallel execution): - Each investigator searches for evidence FOR and AGAINST its hypothesis - Must provide file:line references for all evidence @@ -53,7 +43,7 @@ Spawn one Explore agent per hypothesis **in a single message** (parallel executi Evaluate investigation results: -- **One CONFIRMED**: Spawn 1-2 additional Explore agents to validate from different angles (prevent confirmation bias) +- **One CONFIRMED**: Spawn 1-2 additional `Task(subagent_type="Explore")` agents to validate from different angles (prevent confirmation bias) - **Multiple PARTIAL**: Look for a unifying root cause that explains all partial evidence - **All DISPROVED**: Report honestly — "No root cause identified from initial hypotheses." Generate 2-3 second-round hypotheses if conversation context suggests avenues not yet explored. @@ -70,7 +60,7 @@ Present root cause analysis: Ask user via AskUserQuestion: "Want me to implement this fix?" -- **YES** → Run the implementation-orchestration pipeline (load it via Skill tool): pre-flight → Coder → quality gates. The fix description becomes the EXECUTION_PLAN. +- **YES** → Implement the fix directly in main session using GUIDED approach: load implementation-patterns, search-first, and test-driven-development skills, then code the fix. Spawn `Task(subagent_type="Simplifier")` on changed files after. - **NO** → Done. Report stands as documentation. ## Error Handling diff --git a/shared/skills/implementation-orchestration/SKILL.md b/shared/skills/implementation-orchestration/SKILL.md index cb1e1ef..b46ba3e 100644 --- a/shared/skills/implementation-orchestration/SKILL.md +++ b/shared/skills/implementation-orchestration/SKILL.md @@ -9,6 +9,8 @@ allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion Agent pipeline for IMPLEMENT intent in ambient ORCHESTRATED mode. Pre-flight checks, plan synthesis, Coder execution, and quality gates. +This is a lightweight variant of `/implement` for ambient ORCHESTRATED mode. Excluded: strategy selection (single/sequential/parallel Coders), retry loops, PR creation, knowledge loading. + ## Iron Law > **QUALITY GATES ARE NON-NEGOTIABLE** @@ -40,7 +42,7 @@ Format as structured markdown with: Goal, Steps, Files, Constraints, Decisions. Record git SHA before first Coder: `git rev-parse HEAD` -Spawn Coder agent with input variables: +Spawn `Task(subagent_type="Coder")` with input variables: - **TASK_ID**: Generated from timestamp (e.g., `task-2026-03-19_1430`) - **TASK_DESCRIPTION**: From conversation context - **BASE_BRANCH**: Current branch (or newly created branch from Phase 1) @@ -67,11 +69,11 @@ Pass FILES_CHANGED to all quality gate agents. Run sequentially — each gate must pass before the next: -1. **Validator** (build + typecheck + lint + tests) — retry up to 2× on failure (Coder fixes between retries) -2. **Simplifier** — code clarity and maintainability pass on FILES_CHANGED -3. **Scrutinizer** — 9-pillar quality evaluation on FILES_CHANGED -4. **Validator** (re-validate after Simplifier/Scrutinizer changes) -5. **Shepherd** — verify implementation matches original request — retry up to 2× if misalignment found +1. `Task(subagent_type="Validator")` (build + typecheck + lint + tests) — retry up to 2× on failure (Coder fixes between retries) +2. `Task(subagent_type="Simplifier")` — code clarity and maintainability pass on FILES_CHANGED +3. `Task(subagent_type="Scrutinizer")` — 9-pillar quality evaluation on FILES_CHANGED +4. `Task(subagent_type="Validator")` (re-validate after Simplifier/Scrutinizer changes) +5. `Task(subagent_type="Shepherd")` — verify implementation matches original request — retry up to 2× if misalignment found If any gate exhausts retries, halt pipeline and report what passed and what failed. diff --git a/shared/skills/plan-orchestration/SKILL.md b/shared/skills/plan-orchestration/SKILL.md index 4003a56..c9e9184 100644 --- a/shared/skills/plan-orchestration/SKILL.md +++ b/shared/skills/plan-orchestration/SKILL.md @@ -9,6 +9,8 @@ allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion Agent pipeline for PLAN intent in ambient ORCHESTRATED mode. Codebase orientation, targeted exploration, architecture design, and gap validation. +This is a lightweight variant of the Plan phase in `/implement` for ambient ORCHESTRATED mode. + ## Iron Law > **PLANS WITHOUT CODEBASE GROUNDING ARE FANTASIES** @@ -21,7 +23,7 @@ Agent pipeline for PLAN intent in ambient ORCHESTRATED mode. Codebase orientatio ## Phase 1: Orient -Spawn Skimmer agent to get codebase overview relevant to the planning question: +Spawn `Task(subagent_type="Skimmer")` to get codebase overview relevant to the planning question: - Existing patterns and conventions in the affected area - File structure and module boundaries @@ -30,7 +32,7 @@ Spawn Skimmer agent to get codebase overview relevant to the planning question: ## Phase 2: Explore -Based on Skimmer findings, spawn 2-3 Explore agents **in a single message** (parallel execution): +Based on Skimmer findings, spawn 2-3 `Task(subagent_type="Explore")` agents **in a single message** (parallel execution): - **Integration explorer**: Examine integration points — APIs, shared types, module boundaries the plan must respect - **Pattern explorer**: Find existing implementations of similar features to follow as templates @@ -40,7 +42,7 @@ Adjust explorer focus based on the specific planning question. ## Phase 3: Design -Spawn Plan agent with combined Skimmer + Explore findings: +Spawn `Task(subagent_type="Plan")` with combined Skimmer + Explore findings: - Design implementation approach with file-level specificity - Reference existing patterns discovered in Phase 1-2 diff --git a/src/cli/commands/init.ts b/src/cli/commands/init.ts index 54eecf2..6130e17 100644 --- a/src/cli/commands/init.ts +++ b/src/cli/commands/init.ts @@ -105,7 +105,7 @@ export const initCommand = new Command('init') .option('--plugin ', 'Install specific plugin(s), comma-separated (e.g., implement,code-review)') .option('--teams', 'Enable Agent Teams (peer debate, adversarial review)') .option('--no-teams', 'Disable Agent Teams (use parallel subagents instead)') - .option('--ambient', 'Enable ambient mode (auto-loads relevant skills for every prompt)') + .option('--ambient', 'Enable ambient mode (classifies intent, loads skills, orchestrates agents)') .option('--no-ambient', 'Disable ambient mode') .option('--memory', 'Enable working memory (session context preservation)') .option('--no-memory', 'Disable working memory hooks') @@ -225,7 +225,7 @@ export const initCommand = new Command('init') const ambientChoice = await p.select({ message: 'Enable ambient mode?', options: [ - { value: true, label: 'Yes (Recommended)', hint: 'Auto-loads relevant skills for each prompt' }, + { value: true, label: 'Yes (Recommended)', hint: 'Classifies intent, loads skills, orchestrates agents' }, { value: false, label: 'No', hint: 'Full control — load skills manually' }, ], }); diff --git a/tests/integration/ambient-activation.test.ts b/tests/integration/ambient-activation.test.ts index b3f7daa..1205630 100644 --- a/tests/integration/ambient-activation.test.ts +++ b/tests/integration/ambient-activation.test.ts @@ -35,23 +35,23 @@ describe.skipIf(!isClaudeAvailable())('ambient classification', () => { // GUIDED tier — skills loaded, main session implements it('classifies "add a login form" as IMPLEMENT/GUIDED', () => { const output = runClaude('add a login form with email and password fields'); - if (hasClassification(output)) { - expect(extractIntent(output)).toBe('IMPLEMENT'); - expect(['GUIDED', 'ORCHESTRATED']).toContain(extractDepth(output)); - } - // Even without explicit classification, IMPLEMENT prompts should reference TDD - expect( - output.toLowerCase().includes('test') || - output.toLowerCase().includes('tdd') || - hasClassification(output) - ).toBe(true); + expect(hasClassification(output)).toBe(true); + expect(extractIntent(output)).toBe('IMPLEMENT'); + expect(['GUIDED', 'ORCHESTRATED']).toContain(extractDepth(output)); }); it('classifies "fix the auth error" as DEBUG/GUIDED', () => { const output = runClaude('fix the authentication error in the login handler'); - if (hasClassification(output)) { - expect(extractIntent(output)).toBe('DEBUG'); - expect(['GUIDED', 'ORCHESTRATED']).toContain(extractDepth(output)); - } + expect(hasClassification(output)).toBe(true); + expect(extractIntent(output)).toBe('DEBUG'); + expect(['GUIDED', 'ORCHESTRATED']).toContain(extractDepth(output)); + }); + + // ORCHESTRATED tier — agents spawned for complex multi-file work + it('classifies complex multi-file refactor as ORCHESTRATED', () => { + const output = runClaude('Refactor the authentication system across the API layer, database models, and frontend components'); + expect(hasClassification(output)).toBe(true); + expect(extractIntent(output)).toBe('IMPLEMENT'); + expect(extractDepth(output)).toBe('ORCHESTRATED'); }); }); diff --git a/tests/integration/helpers.ts b/tests/integration/helpers.ts index 3f7f092..270b2fa 100644 --- a/tests/integration/helpers.ts +++ b/tests/integration/helpers.ts @@ -1,5 +1,7 @@ import { execSync, execFileSync } from 'child_process'; +const CLASSIFICATION_PATTERN = /ambient:\s*(IMPLEMENT|DEBUG|REVIEW|PLAN|EXPLORE|CHAT)\s*\/\s*(QUICK|GUIDED|ORCHESTRATED)/i; + /** * Check if the `claude` CLI is available on this machine. */ @@ -37,7 +39,7 @@ export function runClaude(prompt: string, options?: { timeout?: number }): strin * Classification markers look like: "Ambient: IMPLEMENT/GUIDED" */ export function hasClassification(output: string): boolean { - return /ambient:\s*(IMPLEMENT|DEBUG|REVIEW|PLAN|EXPLORE|CHAT)\s*\/\s*(QUICK|GUIDED|ORCHESTRATED)/i.test(output); + return CLASSIFICATION_PATTERN.test(output); } /** @@ -52,7 +54,7 @@ export function isQuietResponse(output: string): boolean { * Extract the intent from a classification marker. */ export function extractIntent(output: string): string | null { - const match = output.match(/ambient:\s*(IMPLEMENT|DEBUG|REVIEW|PLAN|EXPLORE|CHAT)/i); + const match = output.match(CLASSIFICATION_PATTERN); return match ? match[1].toUpperCase() : null; } @@ -60,6 +62,6 @@ export function extractIntent(output: string): string | null { * Extract the depth from a classification marker. */ export function extractDepth(output: string): string | null { - const match = output.match(/ambient:\s*\w+\s*\/\s*(QUICK|GUIDED|ORCHESTRATED)/i); - return match ? match[1].toUpperCase() : null; + const match = output.match(CLASSIFICATION_PATTERN); + return match ? match[2].toUpperCase() : null; }