diff --git a/.cursor/skills/plan-with-critique/SKILL.md b/.cursor/skills/plan-with-critique/SKILL.md deleted file mode 100644 index 9051b9c39f..0000000000 --- a/.cursor/skills/plan-with-critique/SKILL.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -name: plan-with-critique -description: Create implementation plan from user input, then run plan-critique for feedback and improvement. Use when the user wants a plan that has been stress-tested and refined before implementation. ---- - -# Plan with Critique - -Create a plan, then run the plan-critique skill to critique and improve it. - -## When to Use - -- User asks for a plan and wants it vetted before implementation -- User wants "plan with critique" or "plan then critique" -- User wants a stress-tested plan with feedback incorporated - -## Workflow - -### Phase 1: Plan - -1. **Use superpowers:writing-plans** to create the implementation plan from user input. -2. Follow the writing-plans skill (bite-sized tasks, exact paths, TDD, DRY, YAGNI). -3. Save plan to `docs/plans/YYYY-MM-DD-.md` or `.cursor/plans/.plan.md`. - -### Phase 2: Critique and Improve - -4. **Use plan-critique skill** – it will: - - Spawn parallel subagents, each critiquing from one perspective - - Synthesize critic feedback into Blockers, Important, Minor, Recommendations - - Apply feedback selectively (adopt/reject/adapt) - - Add a "Critique Decisions" section to the plan - -## Integration - -- **superpowers:writing-plans** – Phase 1 -- **plan-critique** – Phase 2 (handles critique + replan) -- **superpowers:executing-plans** / **superpowers:subagent-driven-development** – after plan is final - -## Red Flags - -- **Don't** duplicate the improvement logic—plan-critique owns it -- **Don't** skip running plan-critique after creating the plan diff --git a/.cursor/skills/plan-critique/SKILL.md b/.cursor/skills/review-plan/SKILL.md similarity index 64% rename from .cursor/skills/plan-critique/SKILL.md rename to .cursor/skills/review-plan/SKILL.md index 17c16cb78e..739c00b98b 100644 --- a/.cursor/skills/plan-critique/SKILL.md +++ b/.cursor/skills/review-plan/SKILL.md @@ -1,9 +1,9 @@ --- -name: plan-critique +name: review-plan description: Spawn parallel subagents to criticize implementation plans from multiple perspectives (duplication, correctness, security, performance, testing, architecture, scope), then improve the plan based on feedback. Use when reviewing a plan before implementation or when stress-testing a plan for gaps. --- -# Plan Critique +# Review Plan Spawn parallel subagents, each critiquing a plan from a single perspective. Synthesize results into actionable feedback, then improve the plan and document decisions. @@ -13,6 +13,7 @@ Spawn parallel subagents, each critiquing a plan from a single perspective. Synt - After drafting a plan in superpowers:writing-plans - When a plan feels risky or complex - When you want diverse criticism without one reviewer dominating +- When a plan references specific frameworks/APIs and you want to verify against official docs ## Critique Perspectives @@ -27,6 +28,7 @@ Dispatch one subagent per perspective. Each critic is independent. | **Testing** | Coverage strategy, edge cases, integration needs | | **Architecture** | Fit with existing patterns, dependencies, layering | | **Scope** | YAGNI, scope creep, unnecessary features | +| **Documentation** | *(Conditional)* API docs, guides, best practices alignment (when plan mentions docs, frameworks, or external APIs) | ## Workflow @@ -38,6 +40,15 @@ Ensure the plan is in context (read the file or paste it). Note the plan path fo For each perspective, spawn a subagent with a focused prompt. Use the Task tool with the critic template below. +**Standard critics (always dispatch these 7):** +- Duplication, Correctness, Security, Performance, Testing, Architecture, Scope + +**Documentation critic (dispatch if):** +- Plan mentions specific frameworks, libraries, or external APIs +- Plan references documentation or docs lookup +- Plan involves integrating with third-party services +- You need to verify best practices from official sources + **Template per critic:** ``` @@ -48,7 +59,7 @@ Critique the implementation plan from the [{PERSPECTIVE}] perspective. **Your role:** You are a critical reviewer focused ONLY on [{PERSPECTIVE}]. Be skeptical. Find gaps, risks, and oversights. **Check:** -[PERSPECTIVE-SPECIFIC CHECKS - see references/critic-prompts.md] +[PERSPECTIVE-SPECIFIC CHECKS - see below] **Output format:** - **Issues found:** (Blocker / Important / Minor) @@ -57,7 +68,34 @@ Critique the implementation plan from the [{PERSPECTIVE}] perspective. - **Verdict:** Ready / Needs changes / High risk ``` -For full prompts per perspective, see [references/critic-prompts.md](references/critic-prompts.md). +#### Documentation Critic Template (when applicable) + +``` +Review the implementation plan against official documentation and best practices. + +**Plan:** [path or paste plan content] + +**Your role:** You are a documentation expert. Look up official docs, guides, and best practices for the technologies mentioned in the plan. + +**Steps:** +1. Identify frameworks, libraries, APIs, or services mentioned in the plan +2. Search for and read relevant official documentation +3. Check if the plan's approach aligns with recommended patterns +4. Identify any deprecated methods, better alternatives, or missing configurations + +**Check:** +- Does the plan use recommended/current APIs and patterns? +- Are there official best practices the plan should follow? +- Are there important configuration options or setup steps missing? +- Are there newer or better approaches documented? +- Are there known gotchas or warnings in the docs? + +**Output format:** +- **Issues found:** (Blocker / Important / Minor) - cite specific docs +- **Missing considerations:** What the docs recommend but plan doesn't address +- **Recommendations:** Specific improvements with doc references +- **Verdict:** Ready / Needs changes / High risk +``` ### 3. Synthesize Results @@ -71,7 +109,7 @@ When all critics return: ### 4. Output Format ```markdown -# Plan Critique Summary +# Review Plan Summary ## Blockers [Must address before implementation] @@ -137,7 +175,9 @@ Add to the plan after the improvement pass: **Adapted:** [Item] – [How you addressed it differently] ``` -## Example +## Examples + +### Example 1: Standard Critique (7 critics) ``` Plan: .cursor/plans/tasks_feature_implementation_a2a999e8.plan.md @@ -158,10 +198,33 @@ Dispatch 7 subagents in parallel: [Add Critique Decisions section] ``` +### Example 2: With Documentation Lookup (8 critics) + +``` +Plan: .cursor/plans/react_query_integration_b3f441a2.plan.md +(Plan mentions "React Query", "TanStack Query", and "data fetching patterns") + +Dispatch 8 subagents in parallel: +- Task("Critique from Duplication perspective...") +- Task("Critique from Correctness perspective...") +- Task("Critique from Security perspective...") +- Task("Critique from Performance perspective...") +- Task("Critique from Testing perspective...") +- Task("Critique from Architecture perspective...") +- Task("Critique from Scope perspective...") +- Task("Review against React Query official docs...") ← Documentation critic + +[Synthesize when all return] → Critique Summary (includes doc-based recommendations) + +[Improve plan] → Adopt/Reject/Adapt each item + +[Add Critique Decisions section] +``` + ## Integration - **superpowers:writing-plans** – Use after drafting to critique before implementation -- **plan-with-critique** – Runs plan first, then invokes this skill +- **plan-with-critique** – Runs plan first, then invokes this skill (review-plan) - **superpowers:dispatching-parallel-agents** – Same pattern (independent domains) ## Red Flags @@ -173,3 +236,5 @@ Dispatch 7 subagents in parallel: - **Don't** skip the Critique Decisions section (it clarifies your reasoning) - **Don't** let critics override project conventions (e.g., CLAUDE.md patterns) - **Don't** make the plan worse by over-incorporating feedback (e.g., scope creep from "add more") +- **Don't** skip the documentation critic when plan references specific frameworks/APIs/libraries (docs often reveal better approaches or missing steps) +- **Don't** dispatch documentation critic for generic plans without specific tech mentioned (wastes time) diff --git a/.cursor/skills/plan-critique/references/critic-prompts.md b/.cursor/skills/review-plan/references/critic-prompts.md similarity index 100% rename from .cursor/skills/plan-critique/references/critic-prompts.md rename to .cursor/skills/review-plan/references/critic-prompts.md diff --git a/.cursor/skills/review-critique/SKILL.md b/.cursor/skills/review-pr/SKILL.md similarity index 97% rename from .cursor/skills/review-critique/SKILL.md rename to .cursor/skills/review-pr/SKILL.md index d9f3e8c0b7..6bff12801d 100644 --- a/.cursor/skills/review-critique/SKILL.md +++ b/.cursor/skills/review-pr/SKILL.md @@ -1,9 +1,9 @@ --- -name: review-critique +name: review-pr description: Analyze the git diff between the current branch and main from multiple perspectives (duplication, correctness, security, performance, testing, architecture, scope) using parallel subagents, then produce a remediation plan for issues found. Use when reviewing branch changes before merge, after implementation, or when the user asks to critique or review current code changes. --- -# Review Critique +# Review PR Spawn parallel subagents to critique the git diff of the current branch (vs main) from independent perspectives. Synthesize findings into a prioritized remediation plan. @@ -92,7 +92,7 @@ When all critics return: ### 4. Critique Summary ```markdown -# Review Critique Summary +# Review PR Summary **Branch:** {branch_name} **Files changed:** {count} @@ -169,6 +169,6 @@ Transform findings into an actionable plan. Group by file or concern, not by cri ## Integration -- **plan-critique** — Similar pattern but for plans; use review-critique for code +- **review-plan** — Similar pattern but for plans; use review-pr for code - **superpowers:verification-before-completion** — Run after remediation to confirm fixes -- **superpowers:finishing-a-development-branch** — Use review-critique before finishing +- **superpowers:finishing-a-development-branch** — Use review-pr before finishing diff --git a/.cursor/skills/review-critique/references/critic-prompts.md b/.cursor/skills/review-pr/references/critic-prompts.md similarity index 100% rename from .cursor/skills/review-critique/references/critic-prompts.md rename to .cursor/skills/review-pr/references/critic-prompts.md diff --git a/apps/mesh/src/api/routes/decopilot/built-in-tools/agent-search.ts b/apps/mesh/src/api/routes/decopilot/built-in-tools/agent-search.ts index 33d5e43a3a..c496f86674 100644 --- a/apps/mesh/src/api/routes/decopilot/built-in-tools/agent-search.ts +++ b/apps/mesh/src/api/routes/decopilot/built-in-tools/agent-search.ts @@ -49,6 +49,7 @@ const description = export interface AgentSearchParams { organization: OrganizationScope; + needsApproval?: boolean; } const AGENT_SEARCH_ANNOTATIONS = { @@ -69,12 +70,13 @@ export function createAgentSearchTool( params: AgentSearchParams, ctx: MeshContext, ) { - const { organization } = params; + const { organization, needsApproval } = params; return tool({ description, inputSchema: zodSchema(AgentSearchInputSchema), outputSchema: zodSchema(AgentSearchOutputSchema), + needsApproval, execute: async ({ search_term }, options) => { const startTime = performance.now(); try { diff --git a/apps/mesh/src/api/routes/decopilot/built-in-tools/index.ts b/apps/mesh/src/api/routes/decopilot/built-in-tools/index.ts index f53afbf783..dd0d8075cd 100644 --- a/apps/mesh/src/api/routes/decopilot/built-in-tools/index.ts +++ b/apps/mesh/src/api/routes/decopilot/built-in-tools/index.ts @@ -7,6 +7,7 @@ import type { MeshContext, OrganizationScope } from "@/core/mesh-context"; import type { UIMessageStreamWriter } from "ai"; +import { toolNeedsApproval, type ToolApprovalLevel } from "../helpers"; import { createAgentSearchTool } from "./agent-search"; import { createSubtaskTool } from "./subtask"; import { userAskTool } from "./user-ask"; @@ -16,6 +17,7 @@ export interface BuiltinToolParams { modelProvider: ModelProvider; organization: OrganizationScope; models: ModelsConfig; + toolApprovalLevel?: ToolApprovalLevel; } /** @@ -28,14 +30,31 @@ export function getBuiltInTools( params: BuiltinToolParams, ctx: MeshContext, ) { - const { modelProvider, organization, models } = params; + const { + modelProvider, + organization, + models, + toolApprovalLevel = "none", + } = params; return { user_ask: userAskTool, subtask: createSubtaskTool( writer, - { modelProvider, organization, models }, + { + modelProvider, + organization, + models, + needsApproval: toolNeedsApproval(toolApprovalLevel, false), + }, + ctx, + ), + agent_search: createAgentSearchTool( + writer, + { + organization, + needsApproval: toolNeedsApproval(toolApprovalLevel, true), + }, ctx, ), - agent_search: createAgentSearchTool(writer, { organization }, ctx), } as const; } diff --git a/apps/mesh/src/api/routes/decopilot/built-in-tools/subtask.ts b/apps/mesh/src/api/routes/decopilot/built-in-tools/subtask.ts index 029078bf6e..9b2bdf8de3 100644 --- a/apps/mesh/src/api/routes/decopilot/built-in-tools/subtask.ts +++ b/apps/mesh/src/api/routes/decopilot/built-in-tools/subtask.ts @@ -63,6 +63,7 @@ export interface SubtaskParams { modelProvider: ModelProvider; organization: OrganizationScope; models: ModelsConfig; + needsApproval?: boolean; } const SUBTASK_ANNOTATIONS = { @@ -104,11 +105,12 @@ export function createSubtaskTool( params: SubtaskParams, ctx: MeshContext, ) { - const { modelProvider, organization, models } = params; + const { modelProvider, organization, models, needsApproval } = params; return tool({ description: SUBTASK_DESCRIPTION, inputSchema: zodSchema(SubtaskInputSchema), + needsApproval, execute: async function* ( { prompt, agent_id }, { abortSignal, toolCallId }, @@ -137,7 +139,7 @@ export function createSubtaskTool( ); // ── 3. Load tools, excluding ones that shouldn't nest ────────── - const mcpTools = await toolsFromMCP(mcpClient); + const mcpTools = await toolsFromMCP(mcpClient, writer); const subagentTools = Object.fromEntries( Object.entries(mcpTools).filter( ([name]) => !SUBAGENT_EXCLUDED_TOOLS.includes(name), diff --git a/apps/mesh/src/api/routes/decopilot/helpers.test.ts b/apps/mesh/src/api/routes/decopilot/helpers.test.ts new file mode 100644 index 0000000000..d2b74980ef --- /dev/null +++ b/apps/mesh/src/api/routes/decopilot/helpers.test.ts @@ -0,0 +1,56 @@ +/** + * Tests for Decopilot Helper Functions + */ + +import { describe, expect, test } from "bun:test"; +import { toolNeedsApproval, type ToolApprovalLevel } from "./helpers"; + +describe("toolNeedsApproval", () => { + describe('approval level: "yolo"', () => { + const level: ToolApprovalLevel = "yolo"; + + test("returns false when readOnlyHint is true", () => { + expect(toolNeedsApproval(level, true)).toBe(false); + }); + + test("returns false when readOnlyHint is false", () => { + expect(toolNeedsApproval(level, false)).toBe(false); + }); + + test("returns false when readOnlyHint is undefined", () => { + expect(toolNeedsApproval(level, undefined)).toBe(false); + }); + }); + + describe('approval level: "none"', () => { + const level: ToolApprovalLevel = "none"; + + test("returns true when readOnlyHint is true", () => { + expect(toolNeedsApproval(level, true)).toBe(true); + }); + + test("returns true when readOnlyHint is false", () => { + expect(toolNeedsApproval(level, false)).toBe(true); + }); + + test("returns true when readOnlyHint is undefined", () => { + expect(toolNeedsApproval(level, undefined)).toBe(true); + }); + }); + + describe('approval level: "readonly"', () => { + const level: ToolApprovalLevel = "readonly"; + + test("returns false when readOnlyHint is true (auto-approve)", () => { + expect(toolNeedsApproval(level, true)).toBe(false); + }); + + test("returns true when readOnlyHint is false (requires approval)", () => { + expect(toolNeedsApproval(level, false)).toBe(true); + }); + + test("returns true when readOnlyHint is undefined (requires approval)", () => { + expect(toolNeedsApproval(level, undefined)).toBe(true); + }); + }); +}); diff --git a/apps/mesh/src/api/routes/decopilot/helpers.ts b/apps/mesh/src/api/routes/decopilot/helpers.ts index 0489aadb3e..cc3e8de98a 100644 --- a/apps/mesh/src/api/routes/decopilot/helpers.ts +++ b/apps/mesh/src/api/routes/decopilot/helpers.ts @@ -20,6 +20,28 @@ import type { Context } from "hono"; import type { MeshContext, OrganizationScope } from "@/core/mesh-context"; import { MCP_TOOL_CALL_TIMEOUT_MS } from "../proxy"; +/** + * Tool approval levels determine which tools require user approval before executing + */ +export type ToolApprovalLevel = "none" | "readonly" | "yolo"; + +/** + * Determine if a tool needs approval based on approval level and readOnlyHint + * + * @param level - The approval level setting + * @param readOnlyHint - Optional hint from MCP tool annotations + * @returns true if the tool requires approval, false if auto-approved + */ +export function toolNeedsApproval( + level: ToolApprovalLevel, + readOnlyHint?: boolean, +): boolean { + if (level === "yolo") return false; + if (level === "none") return true; + // "readonly": auto-approve only if explicitly marked readOnly + return readOnlyHint !== true; +} + /** * Ensure organization context exists and matches route param */ @@ -42,11 +64,13 @@ export function ensureOrganization( export async function toolsFromMCP( client: Client, writer?: UIMessageStreamWriter, + toolApprovalLevel: ToolApprovalLevel = "none", ): Promise { const list = await client.listTools(); const toolEntries = list.tools.map((t) => { - const { name, title, description, inputSchema, outputSchema } = t; + const { name, title, description, inputSchema, outputSchema, annotations } = + t; return [ name, @@ -57,6 +81,10 @@ export async function toolsFromMCP( outputSchema: outputSchema ? jsonSchema(outputSchema as JSONSchema7) : undefined, + needsApproval: toolNeedsApproval( + toolApprovalLevel, + annotations?.readOnlyHint, + ), execute: async (input, options) => { const startTime = performance.now(); try { diff --git a/apps/mesh/src/api/routes/decopilot/routes.ts b/apps/mesh/src/api/routes/decopilot/routes.ts index 120d5cb5a2..96fd63f44d 100644 --- a/apps/mesh/src/api/routes/decopilot/routes.ts +++ b/apps/mesh/src/api/routes/decopilot/routes.ts @@ -120,6 +120,7 @@ app.post("/:org/decopilot/stream", async (c) => { temperature, memory: memoryConfig, thread_id, + toolApprovalLevel, } = await validateRequest(c); const userId = ctx.auth?.user?.id; @@ -223,14 +224,24 @@ app.post("/:org/decopilot/stream", async (c) => { let streamFinished = false; // 4. Create stream with writer access for data parts + // IMPORTANT: Do NOT pass onFinish/onStepFinish to createUIMessageStream when + // using writer.merge with toUIMessageStream that has originalMessages. + // createUIMessageStream wraps its stream in handleUIMessageStreamFinish which + // runs processUIMessageStream on every chunk. Without originalMessages, the outer + // state starts with an empty assistant message, causing "No tool invocation found" + // errors when tool-output-available chunks arrive (e.g. after tool approval flow). const uiStream = createUIMessageStream({ execute: async ({ writer }) => { // Create tools inside execute so they have access to writer - const mcpTools = await toolsFromMCP(mcpClient, writer); + const mcpTools = await toolsFromMCP( + mcpClient, + writer, + toolApprovalLevel, + ); const builtInTools = getBuiltInTools( writer, - { modelProvider, organization, models }, + { modelProvider, organization, models, toolApprovalLevel }, ctx, ); @@ -361,6 +372,8 @@ app.post("/:org/decopilot/stream", async (c) => { return; }, onFinish: async ({ responseMessage }) => { + streamFinished = true; + const now = Date.now(); const messagesToSave = [ ...new Map( @@ -370,9 +383,6 @@ app.post("/:org/decopilot/stream", async (c) => { ).values(), ].map((message, i) => ({ ...message, - metadata: { - ...message.metadata, - }, thread_id: mem.thread.id, created_at: new Date(now + i).toISOString(), updated_at: new Date(now + i).toISOString(), @@ -409,11 +419,20 @@ app.post("/:org/decopilot/stream", async (c) => { onError: (error) => { streamFinished = true; console.error("[decopilot] stream error:", error); + + if (mem.thread.id) { + ctx.storage.threads + .update(mem.thread.id, { status: "failed" }) + .catch((statusErr) => { + console.error( + "[decopilot:stream] Error updating thread status on stream error", + statusErr, + ); + }); + } + return error instanceof Error ? error.message : String(error); }, - onFinish: () => { - streamFinished = true; - }, }); return createUIMessageStreamResponse({ diff --git a/apps/mesh/src/api/routes/decopilot/schemas.ts b/apps/mesh/src/api/routes/decopilot/schemas.ts index 27dab2051e..5290dd68c2 100644 --- a/apps/mesh/src/api/routes/decopilot/schemas.ts +++ b/apps/mesh/src/api/routes/decopilot/schemas.ts @@ -77,6 +77,7 @@ export const StreamRequestSchema = z.object({ stream: z.boolean().optional(), temperature: z.number().default(0.5), thread_id: z.string().optional(), + toolApprovalLevel: z.enum(["none", "readonly", "yolo"]).default("none"), }); export type StreamRequest = z.infer; diff --git a/apps/mesh/src/api/routes/decopilot/status.test.ts b/apps/mesh/src/api/routes/decopilot/status.test.ts index edbe70f5d3..6f019f3462 100644 --- a/apps/mesh/src/api/routes/decopilot/status.test.ts +++ b/apps/mesh/src/api/routes/decopilot/status.test.ts @@ -57,6 +57,60 @@ describe("resolveThreadStatus", () => { expect(resolveThreadStatus("tool-calls", parts)).toBe("completed"); }); + test("tool-calls with approval-requested -> requires_action", () => { + const parts = [ + { + type: "tool-invocation", + toolName: "some_tool", + state: "approval-requested", + }, + ]; + expect(resolveThreadStatus("tool-calls", parts)).toBe("requires_action"); + }); + + test("tool-calls with multiple tools, one approval-requested -> requires_action", () => { + const parts = [ + { + type: "tool-invocation", + toolName: "tool_a", + state: "output-available", + }, + { + type: "tool-invocation", + toolName: "tool_b", + state: "approval-requested", + }, + ]; + expect(resolveThreadStatus("tool-calls", parts)).toBe("requires_action"); + }); + + test("tool-calls with approval-requested and user_ask pending -> requires_action", () => { + const parts = [ + { + type: "tool-invocation", + toolName: "some_tool", + state: "approval-requested", + }, + { + type: "tool-user_ask", + toolName: "user_ask", + state: "input-available", + }, + ]; + expect(resolveThreadStatus("tool-calls", parts)).toBe("requires_action"); + }); + + test("tool-calls with denied approval -> completed", () => { + const parts = [ + { + type: "tool-invocation", + toolName: "some_tool", + state: "output-denied", + }, + ]; + expect(resolveThreadStatus("tool-calls", parts)).toBe("completed"); + }); + test("length -> failed", () => { expect(resolveThreadStatus("length", [])).toBe("failed"); }); diff --git a/apps/mesh/src/api/routes/decopilot/status.ts b/apps/mesh/src/api/routes/decopilot/status.ts index 127034f412..5f97b8db3a 100644 --- a/apps/mesh/src/api/routes/decopilot/status.ts +++ b/apps/mesh/src/api/routes/decopilot/status.ts @@ -43,7 +43,15 @@ export function resolveThreadStatus( (part) => part.type === "tool-user_ask" && part.state === "input-available", ); - return hasUserAskPending ? "requires_action" : "completed"; + + // Check if any tools are awaiting approval + const hasApprovalPending = responseParts.some( + (part) => part.state === "approval-requested", + ); + + return hasUserAskPending || hasApprovalPending + ? "requires_action" + : "completed"; } // "length", "content-filter", "error", "other", "unknown", undefined diff --git a/apps/mesh/src/web/components/chat/context.tsx b/apps/mesh/src/web/components/chat/context.tsx index 4712712a0f..66b0225730 100644 --- a/apps/mesh/src/web/components/chat/context.tsx +++ b/apps/mesh/src/web/components/chat/context.tsx @@ -21,6 +21,7 @@ import type { import { DefaultChatTransport, lastAssistantMessageIsCompleteWithToolCalls, + lastAssistantMessageIsCompleteWithApprovalResponses, type UIMessage, } from "ai"; import { @@ -35,6 +36,7 @@ import { useAllowedModels } from "../../hooks/use-allowed-models"; import { useContext as useContextHook } from "../../hooks/use-context"; import { useInvalidateCollectionsOnToolCall } from "../../hooks/use-invalidate-collections-on-tool-call"; import { useLocalStorage } from "../../hooks/use-local-storage"; +import { usePreferences } from "../../hooks/use-preferences"; import { authClient } from "../../lib/auth-client"; import { LOCALSTORAGE_KEYS } from "../../lib/localstorage-keys"; import { type ModelChangePayload, useModels } from "./select-model"; @@ -87,6 +89,7 @@ type ChatFromUseChat = Pick< | "clearError" | "stop" | "addToolOutput" + | "addToolApprovalResponse" >; /** @@ -147,8 +150,9 @@ const createModelsTransport = ( const { system, tiptapDoc: _tiptapDoc, + toolApprovalLevel, ...metadata - } = requestMetadata as Metadata; + } = requestMetadata as Metadata & { toolApprovalLevel?: string }; const systemMessage: UIMessage | null = system ? { id: crypto.randomUUID(), @@ -174,6 +178,7 @@ const createModelsTransport = ( body: { messages: allMessages, ...mergedMetadata, + ...(toolApprovalLevel && { toolApprovalLevel }), }, }; }, @@ -558,6 +563,8 @@ export function ChatProvider({ children }: PropsWithChildren) { // User session const { data: session } = authClient.useSession(); const user = session?.user ?? null; + // Preferences + const [preferences] = usePreferences(); // Chat state (reducer-based) const [chatState, chatDispatch] = useReducer( chatStateReducer, @@ -654,7 +661,9 @@ export function ChatProvider({ children }: PropsWithChildren) { id: threadManager.activeThreadId, messages: initialMessages, transport, - sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls, + sendAutomaticallyWhen: ({ messages }) => + lastAssistantMessageIsCompleteWithToolCalls({ messages }) || + lastAssistantMessageIsCompleteWithApprovalResponses({ messages }), onFinish, onToolCall, onError, @@ -746,6 +755,7 @@ export function ChatProvider({ children }: PropsWithChildren) { ...messageMetadata, system: contextPrompt, models: selectedModel, + toolApprovalLevel: preferences.toolApprovalLevel, }; const userMessage: ChatMessage = { @@ -807,6 +817,7 @@ export function ChatProvider({ children }: PropsWithChildren) { clearError: chat.clearError, stop, addToolOutput: chat.addToolOutput, + addToolApprovalResponse: chat.addToolApprovalResponse, sendMessage, isStreaming, isChatEmpty, diff --git a/apps/mesh/src/web/components/chat/highlight/index.tsx b/apps/mesh/src/web/components/chat/highlight/index.tsx index d0051fd03c..70c6efd99c 100644 --- a/apps/mesh/src/web/components/chat/highlight/index.tsx +++ b/apps/mesh/src/web/components/chat/highlight/index.tsx @@ -139,6 +139,14 @@ export function ChatHighlight() { (p) => p.state !== "output-available", )?.length; + // Check if any tools are awaiting approval + const isWaitingForApprovals = + lastMessage?.role === "assistant" + ? lastMessage.parts.some( + (part) => "state" in part && part.state === "approval-requested", + ) + : false; + const handleFixInChat = () => { if (error) { const text = `I encountered this error: ${error.message}. Can you help me fix it?`; @@ -194,7 +202,12 @@ export function ChatHighlight() { ); } - if (!isStreaming && finishReason && finishReason !== "stop") { + if ( + !isStreaming && + finishReason && + finishReason !== "stop" && + !isWaitingForApprovals + ) { return ( + + + + ); +} diff --git a/apps/mesh/src/web/components/chat/message/parts/tool-call-part/common.tsx b/apps/mesh/src/web/components/chat/message/parts/tool-call-part/common.tsx index 5674b3e63e..33293b81a7 100644 --- a/apps/mesh/src/web/components/chat/message/parts/tool-call-part/common.tsx +++ b/apps/mesh/src/web/components/chat/message/parts/tool-call-part/common.tsx @@ -33,6 +33,8 @@ export interface ToolCallShellProps { state: "loading" | "error" | "idle"; /** Detail shown in expanded view. Rendered as plain text (copiable). */ detail?: string | null; + /** Optional actions rendered below the title/summary (e.g., approve/deny buttons) */ + actions?: ReactNode; } export function ToolCallShell({ @@ -44,6 +46,7 @@ export function ToolCallShell({ summary, state, detail, + actions, }: ToolCallShellProps) { const [isExpanded, setIsExpanded] = useState(false); const { handleCopy, copied } = useCopy(); @@ -118,6 +121,13 @@ export function ToolCallShell({ )} + {/* Actions (e.g., approve/deny buttons) - outside shimmer */} + {actions && ( +
+ {actions} +
+ )} + {isExpandable && (
diff --git a/apps/mesh/src/web/components/chat/message/parts/tool-call-part/generic.tsx b/apps/mesh/src/web/components/chat/message/parts/tool-call-part/generic.tsx index a375e0da2c..a2e4784d2c 100644 --- a/apps/mesh/src/web/components/chat/message/parts/tool-call-part/generic.tsx +++ b/apps/mesh/src/web/components/chat/message/parts/tool-call-part/generic.tsx @@ -4,8 +4,13 @@ import type { ToolUIPart, DynamicToolUIPart } from "ai"; import type { ToolDefinition } from "@decocms/mesh-sdk"; import { Atom02 } from "@untitledui/icons"; import { ToolCallShell } from "./common.tsx"; -import { getFriendlyToolName } from "./utils.tsx"; +import { + getFriendlyToolName, + getApprovalId, + getEffectiveState, +} from "./utils.tsx"; import { getToolPartErrorText } from "../utils.ts"; +import { ApprovalActions } from "./approval-actions.tsx"; interface GenericToolCallPartProps { part: ToolUIPart | DynamicToolUIPart; @@ -31,6 +36,10 @@ function getTitle(state: string, friendlyName: string): string { case "input-streaming": case "input-available": return `Calling ${friendlyName}...`; + case "approval-requested": + return `Approve ${friendlyName}`; + case "output-denied": + return `Denied ${friendlyName}`; case "output-available": return `Called ${friendlyName}`; case "output-error": @@ -45,6 +54,10 @@ function getSummary(state: string): string { case "input-streaming": case "input-available": return "Generating input"; + case "approval-requested": + return "Waiting for approval"; + case "output-denied": + return "Execution denied"; case "output-available": return "Tool answered"; case "output-error": @@ -73,30 +86,29 @@ export function GenericToolCallPart({ const summary = getSummary(part.state); // Derive UI state for ToolCallShell - const effectiveState: "loading" | "error" | "idle" = - part.state === "output-error" - ? "error" - : part.state === "input-streaming" || - part.state === "input-available" || - part.state === "approval-requested" - ? "loading" - : "idle"; + const effectiveState = getEffectiveState(part.state); // Build expanded content let detail = ""; if (part.input !== undefined) { - detail += "Input\n" + safeStringify(part.input); + detail += "# Input\n" + safeStringify(part.input); } if (part.state === "output-error") { const errorText = getToolPartErrorText(part); if (detail) detail += "\n\n"; - detail += "Error\n" + errorText; + detail += "# Error\n" + errorText; } else if (part.output !== undefined) { if (detail) detail += "\n\n"; - detail += "Output\n" + safeStringify(part.output); + detail += "# Output\n" + safeStringify(part.output); } + // Build approval actions for approval-requested state + const approvalId = getApprovalId(part); + const actions = approvalId ? ( + + ) : undefined; + return (
); diff --git a/apps/mesh/src/web/components/chat/message/parts/tool-call-part/subtask.tsx b/apps/mesh/src/web/components/chat/message/parts/tool-call-part/subtask.tsx index 90955ab86b..96d972aadd 100644 --- a/apps/mesh/src/web/components/chat/message/parts/tool-call-part/subtask.tsx +++ b/apps/mesh/src/web/components/chat/message/parts/tool-call-part/subtask.tsx @@ -8,6 +8,8 @@ import { useChat } from "../../../context.tsx"; import type { SubtaskToolPart } from "../../../types.ts"; import { extractTextFromOutput, getToolPartErrorText } from "../utils.ts"; import { ToolCallShell } from "./common.tsx"; +import { ApprovalActions } from "./approval-actions.tsx"; +import { getApprovalId, getEffectiveState } from "./utils.tsx"; interface SubtaskPartProps { part: SubtaskToolPart; @@ -36,11 +38,10 @@ export function SubtaskPart({ const isError = part.state === "output-error"; // Derive UI state for ToolCallShell - const effectiveState: "loading" | "error" | "idle" = isError - ? "error" - : isInputStreaming || isOutputStreaming - ? "loading" - : "idle"; + const effectiveState = getEffectiveState( + part.state, + "preliminary" in part ? part.preliminary : false, + ); // Agent lookup const agentId = part.input?.agent_id; @@ -81,6 +82,12 @@ export function SubtaskPart({ /> ); + // Build approval actions for approval-requested state + const approvalId = getApprovalId(part); + const actions = approvalId ? ( + + ) : undefined; + return (
); diff --git a/apps/mesh/src/web/components/chat/message/parts/tool-call-part/utils.tsx b/apps/mesh/src/web/components/chat/message/parts/tool-call-part/utils.tsx index fdc7558d62..37809e9643 100644 --- a/apps/mesh/src/web/components/chat/message/parts/tool-call-part/utils.tsx +++ b/apps/mesh/src/web/components/chat/message/parts/tool-call-part/utils.tsx @@ -46,3 +46,52 @@ export function getFriendlyToolName(toolName: string): string { .map((word) => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()) .join(" "); } + +/** + * Check if a tool part is awaiting approval and has valid approval data. + * Returns the approval ID if all conditions are met, otherwise returns null. + */ +export function getApprovalId(part: { + state: string; + approval?: { id: string }; +}): string | null { + if ( + part.state === "approval-requested" && + "approval" in part && + part.approval + ) { + return part.approval.id; + } + return null; +} + +/** + * Derive the effective UI state for a tool call part. + * Returns "error", "loading", or "idle" based on the tool state. + * + * @param state - The current state of the tool part + * @param preliminary - Optional flag indicating streaming output (for subtasks) + * @returns The effective UI state for display + */ +export function getEffectiveState( + state: string, + preliminary?: boolean, +): "loading" | "error" | "idle" { + // Error state takes precedence + if (state === "output-error") { + return "error"; + } + + // Loading states: input generation, approval waiting, or streaming output + if ( + state === "input-streaming" || + state === "input-available" || + state === "approval-requested" || + (state === "output-available" && preliminary === true) + ) { + return "loading"; + } + + // Default to idle + return "idle"; +} diff --git a/apps/mesh/src/web/components/chat/types.ts b/apps/mesh/src/web/components/chat/types.ts index 1b0d52aa73..4902d37a4e 100644 --- a/apps/mesh/src/web/components/chat/types.ts +++ b/apps/mesh/src/web/components/chat/types.ts @@ -65,6 +65,8 @@ export interface Metadata { system?: string; /** Tiptap document for rich user input (includes prompt tags with resources) */ tiptapDoc?: TiptapDoc; + /** Tool approval level preference */ + toolApprovalLevel?: "none" | "readonly" | "yolo"; usage?: { inputTokens?: number; outputTokens?: number; diff --git a/apps/mesh/src/web/components/user-settings-dialog.tsx b/apps/mesh/src/web/components/user-settings-dialog.tsx index 280d6c805e..854dc6fbe5 100644 --- a/apps/mesh/src/web/components/user-settings-dialog.tsx +++ b/apps/mesh/src/web/components/user-settings-dialog.tsx @@ -7,6 +7,13 @@ import { DialogTitle, } from "@deco/ui/components/dialog.tsx"; import { Label } from "@deco/ui/components/label.tsx"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@deco/ui/components/select.tsx"; import { Switch } from "@deco/ui/components/switch.tsx"; import { Tooltip, @@ -117,6 +124,55 @@ export function UserSettingsDialog({
+ {/* Tool Approval */} +
+ +

+ Choose when to require approval before tools execute +

+ +
+ {/* Experimental */}

diff --git a/apps/mesh/src/web/hooks/use-preferences.ts b/apps/mesh/src/web/hooks/use-preferences.ts index a804421b7a..c47e9b9fd7 100644 --- a/apps/mesh/src/web/hooks/use-preferences.ts +++ b/apps/mesh/src/web/hooks/use-preferences.ts @@ -1,16 +1,20 @@ import { useLocalStorage } from "./use-local-storage.ts"; import { LOCALSTORAGE_KEYS } from "@/web/lib/localstorage-keys.ts"; +export type ToolApprovalLevel = "none" | "readonly" | "yolo"; + interface Preferences { devMode: boolean; experimental_projects: boolean; experimental_tasks: boolean; + toolApprovalLevel: ToolApprovalLevel; } const DEFAULT_PREFERENCES: Preferences = { devMode: false, experimental_projects: false, experimental_tasks: false, + toolApprovalLevel: "none", }; export function usePreferences() { diff --git a/packages/bindings/src/well-known/language-model.ts b/packages/bindings/src/well-known/language-model.ts index b801e3f9bf..5161a9977c 100644 --- a/packages/bindings/src/well-known/language-model.ts +++ b/packages/bindings/src/well-known/language-model.ts @@ -166,7 +166,10 @@ const ToolCallOutputPartSchema = z.object({ /** * Tool Result Output Schema - * The output of a tool result + * The output of a tool result. + * Accepts typed objects (text, json, error-text, error-json, content), + * execution-denied (from AI SDK tool approval flow), and raw strings + * (JSON-serialized outputs from mapToolResultOutput). */ const ToolResultOutputSchema = z.union([ z.object({ @@ -201,6 +204,15 @@ const ToolResultOutputSchema = z.union([ ]), ), }), + z.object({ + type: z.literal("execution-denied"), + reason: z.string().optional(), + }), + z + .string() + .describe( + "Raw or JSON-serialized output (e.g. from AI SDK mapToolResultOutput)", + ), ]); /**