diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..8c56e529 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,46 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Development commands + +### Python / CLI +- Install dev dependencies: `uv sync --extra dev` +- Run the CLI: `uv run oh` +- Run the CLI with an active environment: `oh` +- Lint: `uv run ruff check src tests scripts` +- Run tests: `uv run pytest -q` +- Run a single test: `uv run pytest tests/path/to/test_file.py::test_name -q` +- Optional type check: `uv run mypy src/openharness` + +### Frontend terminal UI +Run these from `frontend/terminal/`: +- Install dependencies: `npm ci` +- Start the Ink terminal UI: `npm start` +- Typecheck: `npx tsc --noEmit` + +## Repository architecture +- `src/openharness/cli.py` is the Typer entrypoint and command surface for the Python CLI, including session flags and subcommands such as MCP, plugin, and auth. +- `src/openharness/engine/query.py` and `src/openharness/engine/query_engine.py` are the core agent loop. They stream model output, execute tool calls, apply permission checks, run hooks, and append tool results back into the conversation. +- `src/openharness/tools/` is the action surface exposed to the model. +- `src/openharness/permissions/` and `src/openharness/hooks/` are the governance rails around tool execution. +- `src/openharness/plugins/`, `src/openharness/skills/`, `src/openharness/mcp/`, `src/openharness/memory/`, and `src/openharness/services/` provide extensibility and runtime support. +- `frontend/terminal/` is a separate React + Ink terminal client; treat it as a separate app from the Python runtime. + +## Where to look before changing behavior +- CLI and command wiring: `src/openharness/cli.py` +- Core runtime flow: `src/openharness/engine/query.py`, `src/openharness/engine/query_engine.py` +- Tool registration and execution: `src/openharness/tools/` +- Permission and approval behavior: `src/openharness/permissions/` +- Hook lifecycle: `src/openharness/hooks/` +- Plugin / skill / MCP integration: `src/openharness/plugins/`, `src/openharness/skills/`, `src/openharness/mcp/` +- Frontend terminal behavior: `frontend/terminal/` + +## Repo-specific guidance +- Use `uv` for Python environment and dependency management. The repo requires Python 3.10+. +- Node.js 18+ is only needed when working on the frontend terminal UI. +- Before opening a PR, run the same core checks as CI: `uv run ruff check src tests scripts`, `uv run pytest -q`, and `cd frontend/terminal && npx tsc --noEmit` if frontend code changed. +- Keep PRs scoped and reviewable. When behavior changes, add or update tests. +- Update docs when CLI flags, workflows, or compatibility claims change. +- Add a short entry under `Unreleased` in `CHANGELOG.md` for user-visible changes. +- The PR template expects a concise summary of the problem and change, plus a validation section with the commands you ran. diff --git a/src/openharness/engine/query.py b/src/openharness/engine/query.py index 08c2f301..9d6a0cb5 100644 --- a/src/openharness/engine/query.py +++ b/src/openharness/engine/query.py @@ -31,6 +31,7 @@ ) from openharness.hooks import HookEvent, HookExecutor from openharness.permissions.checker import PermissionChecker +from openharness.skills.runtime import ActiveSkillContext, build_effective_system_prompt, filter_tool_registry from openharness.tools.base import ToolExecutionContext from openharness.tools.base import ToolRegistry @@ -95,6 +96,7 @@ class QueryContext: max_turns: int | None = 200 hook_executor: HookExecutor | None = None tool_metadata: dict[str, object] | None = None + active_skill: ActiveSkillContext | None = None def _append_capped_unique(bucket: list[Any], value: Any, *, limit: int) -> None: @@ -525,14 +527,23 @@ async def _progress(event: CompactProgressEvent) -> None: final_message: ConversationMessage | None = None usage = UsageSnapshot() + # Skill overrides for this turn + active_skill = context.active_skill + model = active_skill.model_override if active_skill is not None and active_skill.model_override else context.model + system_prompt = build_effective_system_prompt(context.system_prompt, active_skill) + tool_registry = filter_tool_registry( + context.tool_registry, + active_skill.allowed_tools if active_skill is not None else None, + ) + try: async for event in context.api_client.stream_message( ApiMessageRequest( - model=context.model, + model=model, messages=messages, - system_prompt=context.system_prompt, + system_prompt=system_prompt, max_tokens=context.max_tokens, - tools=context.tool_registry.to_api_schema(), + tools=tool_registry.to_api_schema(), ) ): if isinstance(event, ApiTextDeltaEvent): @@ -606,18 +617,19 @@ async def _progress(event: CompactProgressEvent) -> None: if len(tool_calls) == 1: # Single tool: sequential (stream events immediately) tc = tool_calls[0] - yield ToolExecutionStarted(tool_name=tc.name, tool_input=tc.input), None + yield ToolExecutionStarted(tool_name=tc.name, tool_input=tc.input, tool_call_id=tc.id), None result = await _execute_tool_call(context, tc.name, tc.id, tc.input) yield ToolExecutionCompleted( tool_name=tc.name, output=result.content, + tool_call_id=tc.id, is_error=result.is_error, ), None tool_results = [result] else: # Multiple tools: execute concurrently, emit events after for tc in tool_calls: - yield ToolExecutionStarted(tool_name=tc.name, tool_input=tc.input), None + yield ToolExecutionStarted(tool_name=tc.name, tool_input=tc.input, tool_call_id=tc.id), None async def _run(tc): return await _execute_tool_call(context, tc.name, tc.id, tc.input) @@ -649,6 +661,7 @@ async def _run(tc): yield ToolExecutionCompleted( tool_name=tc.name, output=result.content, + tool_call_id=tc.id, is_error=result.is_error, ), None diff --git a/src/openharness/engine/query_engine.py b/src/openharness/engine/query_engine.py index 14e62b4f..a922afea 100644 --- a/src/openharness/engine/query_engine.py +++ b/src/openharness/engine/query_engine.py @@ -13,6 +13,7 @@ from openharness.engine.stream_events import AssistantTurnComplete, StreamEvent from openharness.hooks import HookEvent, HookExecutor from openharness.permissions.checker import PermissionChecker +from openharness.skills.runtime import ActiveSkillContext from openharness.tools.base import ToolRegistry @@ -53,6 +54,7 @@ def __init__( self._tool_metadata = tool_metadata or {} self._messages: list[ConversationMessage] = [] self._cost_tracker = CostTracker() + self._active_skill: ActiveSkillContext | None = None @property def messages(self) -> list[ConversationMessage]: @@ -114,6 +116,15 @@ def set_permission_checker(self, checker: PermissionChecker) -> None: """Update the active permission checker for future turns.""" self._permission_checker = checker + def set_active_skill(self, active_skill: ActiveSkillContext | None) -> None: + """Update the active skill scope for future turns.""" + self._active_skill = active_skill + + @property + def active_skill(self) -> ActiveSkillContext | None: + """Return the currently active skill context.""" + return self._active_skill + def _build_coordinator_context_message(self) -> ConversationMessage | None: """Build a synthetic user message carrying coordinator runtime context.""" context = get_coordinator_user_context() @@ -177,6 +188,7 @@ async def submit_message(self, prompt: str | ConversationMessage) -> AsyncIterat ask_user_prompt=self._ask_user_prompt, hook_executor=self._hook_executor, tool_metadata=self._tool_metadata, + active_skill=self._active_skill, ) query_messages = list(self._messages) coordinator_context = self._build_coordinator_context_message() @@ -206,6 +218,7 @@ async def continue_pending(self, *, max_turns: int | None = None) -> AsyncIterat ask_user_prompt=self._ask_user_prompt, hook_executor=self._hook_executor, tool_metadata=self._tool_metadata, + active_skill=self._active_skill, ) async for event, usage in run_query(context, self._messages): if usage is not None: diff --git a/src/openharness/engine/stream_events.py b/src/openharness/engine/stream_events.py index 27aef809..ca1d43e4 100644 --- a/src/openharness/engine/stream_events.py +++ b/src/openharness/engine/stream_events.py @@ -30,6 +30,7 @@ class ToolExecutionStarted: tool_name: str tool_input: dict[str, Any] + tool_call_id: str @dataclass(frozen=True) @@ -38,6 +39,7 @@ class ToolExecutionCompleted: tool_name: str output: str + tool_call_id: str is_error: bool = False diff --git a/src/openharness/skills/markdown.py b/src/openharness/skills/markdown.py new file mode 100644 index 00000000..6ea8b677 --- /dev/null +++ b/src/openharness/skills/markdown.py @@ -0,0 +1,123 @@ +"""Shared markdown skill parsing helpers.""" + +from __future__ import annotations + +from dataclasses import asdict +from pathlib import Path +from typing import Any + +import yaml + +from openharness.skills.types import SkillDefinition + + +_SKILL_FRONTMATTER_FIELDS = { + "name", + "description", + "user_invocable", + "model_invocable", + "aliases", + "allowed_tools", + "model_override", + "effort_override", + "execution_mode", +} + + +def _split_frontmatter(content: str) -> tuple[dict[str, Any], str]: + lines = content.splitlines() + if not lines or lines[0].strip() != "---": + return {}, content + for i, line in enumerate(lines[1:], 1): + if line.strip() != "---": + continue + raw = "\n".join(lines[1:i]) + body = "\n".join(lines[i + 1 :]) + parsed = yaml.safe_load(raw) or {} + return parsed if isinstance(parsed, dict) else {}, body + return {}, content + + +def _coerce_aliases(value: Any) -> tuple[str, ...]: + if value is None: + return () + if isinstance(value, str): + cleaned = value.strip() + return (cleaned,) if cleaned else () + if isinstance(value, list): + return tuple(str(item).strip() for item in value if str(item).strip()) + return () + + +def _coerce_allowed_tools(value: Any) -> tuple[str, ...] | None: + if value is None: + return None + if isinstance(value, str): + cleaned = value.strip() + return (cleaned,) if cleaned else None + if isinstance(value, list): + tools = tuple(str(item).strip() for item in value if str(item).strip()) + return tools or None + return None + + +def _fallback_name_and_description(default_name: str, body: str) -> tuple[str, str]: + name = default_name + description = "" + for line in body.splitlines(): + stripped = line.strip() + if stripped.startswith("# "): + name = stripped[2:].strip() or default_name + continue + if stripped and not stripped.startswith("#"): + description = stripped[:200] + break + return name, description or f"Skill: {name}" + + +def parse_skill_markdown( + default_name: str, + content: str, + *, + source: str, + path: str | None = None, +) -> SkillDefinition: + """Parse one markdown skill file into a normalized definition.""" + frontmatter, body = _split_frontmatter(content) + fallback_name, fallback_description = _fallback_name_and_description(default_name, body) + metadata = {k: frontmatter[k] for k in _SKILL_FRONTMATTER_FIELDS if k in frontmatter} + skill = SkillDefinition( + name=str(frontmatter.get("name") or fallback_name), + description=str(frontmatter.get("description") or fallback_description), + content=content, + instructions=body.strip() or content.strip(), + source=source, + path=path, + user_invocable=bool(frontmatter.get("user_invocable", True)), + model_invocable=bool(frontmatter.get("model_invocable", True)), + aliases=_coerce_aliases(frontmatter.get("aliases")), + allowed_tools=_coerce_allowed_tools(frontmatter.get("allowed_tools")), + model_override=(str(frontmatter["model_override"]).strip() if frontmatter.get("model_override") else None), + effort_override=(str(frontmatter["effort_override"]).strip() if frontmatter.get("effort_override") else None), + execution_mode=str(frontmatter.get("execution_mode") or "inline"), + metadata=metadata, + ) + return SkillDefinition(**asdict(skill)) + + +def load_skills_from_directory(path: Path, *, source: str) -> list[SkillDefinition]: + """Load all markdown skills from a directory.""" + if not path.exists(): + return [] + skills: list[SkillDefinition] = [] + for skill_path in sorted(path.glob("*.md")): + content = skill_path.read_text(encoding="utf-8") + skills.append( + parse_skill_markdown( + skill_path.stem, + content, + source=source, + path=str(skill_path), + ) + ) + return skills diff --git a/src/openharness/skills/registry.py b/src/openharness/skills/registry.py index 671ce449..6aff3167 100644 --- a/src/openharness/skills/registry.py +++ b/src/openharness/skills/registry.py @@ -6,19 +6,40 @@ class SkillRegistry: - """Store loaded skills by name.""" + """Store loaded skills by name and alias.""" def __init__(self) -> None: self._skills: dict[str, SkillDefinition] = {} + self._aliases: dict[str, str] = {} + + @staticmethod + def _normalize(name: str) -> str: + return name.strip().lower() def register(self, skill: SkillDefinition) -> None: """Register one skill.""" - self._skills[skill.name] = skill + canonical = self._normalize(skill.name) + self._skills[canonical] = skill + self._aliases[canonical] = canonical + for alias in skill.aliases: + normalized = self._normalize(alias) + if normalized: + self._aliases[normalized] = canonical def get(self, name: str) -> SkillDefinition | None: - """Return a skill by name.""" - return self._skills.get(name) + """Return a skill by name or alias.""" + normalized = self._normalize(name) + canonical = self._aliases.get(normalized, normalized) + return self._skills.get(canonical) def list_skills(self) -> list[SkillDefinition]: """Return all skills sorted by name.""" return sorted(self._skills.values(), key=lambda skill: skill.name) + + def list_user_invocable(self) -> list[SkillDefinition]: + """Return skills exposed as slash commands.""" + return [skill for skill in self.list_skills() if skill.user_invocable] + + def list_model_invocable(self) -> list[SkillDefinition]: + """Return skills exposed to the model.""" + return [skill for skill in self.list_skills() if skill.model_invocable] diff --git a/src/openharness/skills/runtime.py b/src/openharness/skills/runtime.py new file mode 100644 index 00000000..414abfd0 --- /dev/null +++ b/src/openharness/skills/runtime.py @@ -0,0 +1,100 @@ +"""Runtime helpers for activating skills.""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING + +from openharness.config.settings import Settings +from openharness.skills.loader import load_skill_registry +from openharness.skills.types import SkillDefinition + +if TYPE_CHECKING: + from openharness.tools.base import ToolRegistry + + +@dataclass(frozen=True) +class ActiveSkillContext: + """Scoped runtime overrides for an active skill.""" + + definition: SkillDefinition + + @property + def allowed_tools(self) -> tuple[str, ...] | None: + return self.definition.allowed_tools + + @property + def model_override(self) -> str | None: + return self.definition.model_override + + @property + def effort_override(self) -> str | None: + return self.definition.effort_override + + +def resolve_skill(name: str, cwd: str | Path) -> SkillDefinition | None: + """Resolve a skill from the current registry.""" + return load_skill_registry(cwd).get(name) + + +def activate_skill(name: str, cwd: str | Path) -> ActiveSkillContext | None: + """Resolve and wrap a skill for runtime activation.""" + skill = resolve_skill(name, cwd) + if skill is None: + return None + return ActiveSkillContext(definition=skill) + + +def build_skill_instruction_message(skill: SkillDefinition) -> str: + """Return the user-visible instruction payload for a skill activation.""" + return skill.instructions or skill.content + + +def build_active_skill_section(active_skill: ActiveSkillContext) -> str: + """Return the prompt section describing the active skill scope.""" + allowed_tools = ", ".join(active_skill.allowed_tools) if active_skill.allowed_tools else "inherit runtime defaults" + return ( + "# Active Skill\n" + f"- Name: {active_skill.definition.name}\n" + f"- Description: {active_skill.definition.description}\n" + f"- Execution mode: {active_skill.definition.execution_mode}\n" + f"- Allowed tools: {allowed_tools}\n\n" + "Use the following skill instructions as the active scoped workflow for this turn:\n\n" + f"{active_skill.definition.instructions}" + ) + + +def build_effective_system_prompt(base_prompt: str, active_skill: ActiveSkillContext | None) -> str: + """Return the system prompt with any active skill section applied.""" + if active_skill is None: + return base_prompt + return f"{base_prompt}\n\n{build_active_skill_section(active_skill)}" + + +def filter_tool_registry(tool_registry: "ToolRegistry", allowed_tools: tuple[str, ...] | None) -> "ToolRegistry": + """Return a filtered registry when a skill narrows tool access.""" + if allowed_tools is None: + return tool_registry + from openharness.tools.base import ToolRegistry + + allowed = {name.strip() for name in allowed_tools if name.strip()} + filtered = ToolRegistry() + for tool in tool_registry.list_tools(): + if tool.name in allowed: + filtered.register(tool) + return filtered + + +def apply_skill_overrides(settings: Settings, active_skill: ActiveSkillContext | None) -> Settings: + """Return settings with any active skill overrides applied.""" + if active_skill is None: + return settings + updates: dict[str, object] = {} + if active_skill.model_override: + updates["model"] = active_skill.model_override + if active_skill.effort_override: + updates["effort"] = active_skill.effort_override + if not updates: + return settings + return settings.model_copy(update=updates) diff --git a/src/openharness/skills/types.py b/src/openharness/skills/types.py index 9bb84a90..af17f4e1 100644 --- a/src/openharness/skills/types.py +++ b/src/openharness/skills/types.py @@ -2,7 +2,8 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, field +from typing import Any @dataclass(frozen=True) @@ -14,3 +15,12 @@ class SkillDefinition: content: str source: str path: str | None = None + instructions: str = "" + user_invocable: bool = True + model_invocable: bool = True + aliases: tuple[str, ...] = () + allowed_tools: tuple[str, ...] | None = None + model_override: str | None = None + effort_override: str | None = None + execution_mode: str = "inline" + metadata: dict[str, Any] = field(default_factory=dict) diff --git a/src/openharness/ui/app.py b/src/openharness/ui/app.py index 21d25c2e..43b29bae 100644 --- a/src/openharness/ui/app.py +++ b/src/openharness/ui/app.py @@ -420,12 +420,12 @@ async def _render_event(event: StreamEvent) -> None: events_list.append(obj) elif isinstance(event, ToolExecutionStarted): if output_format == "stream-json": - obj = {"type": "tool_started", "tool_name": event.tool_name, "tool_input": event.tool_input} + obj = {"type": "tool_started", "tool_name": event.tool_name, "tool_input": event.tool_input, "tool_call_id": event.tool_call_id} print(json.dumps(obj), flush=True) events_list.append(obj) elif isinstance(event, ToolExecutionCompleted): if output_format == "stream-json": - obj = {"type": "tool_completed", "tool_name": event.tool_name, "output": event.output, "is_error": event.is_error} + obj = {"type": "tool_completed", "tool_name": event.tool_name, "output": event.output, "is_error": event.is_error, "tool_call_id": event.tool_call_id} print(json.dumps(obj), flush=True) events_list.append(obj) elif isinstance(event, ErrorEvent): diff --git a/src/openharness/ui/backend_host.py b/src/openharness/ui/backend_host.py index abc50ee5..f134c2c4 100644 --- a/src/openharness/ui/backend_host.py +++ b/src/openharness/ui/backend_host.py @@ -236,11 +236,13 @@ async def _render_event(event: StreamEvent) -> None: type="tool_started", tool_name=event.tool_name, tool_input=event.tool_input, + tool_call_id=event.tool_call_id, item=TranscriptItem( role="tool", text=f"{event.tool_name} {json.dumps(event.tool_input, ensure_ascii=True)}", tool_name=event.tool_name, tool_input=event.tool_input, + tool_call_id=event.tool_call_id, ), ) ) @@ -252,11 +254,13 @@ async def _render_event(event: StreamEvent) -> None: tool_name=event.tool_name, output=event.output, is_error=event.is_error, + tool_call_id=event.tool_call_id, item=TranscriptItem( role="tool_result", text=event.output, tool_name=event.tool_name, is_error=event.is_error, + tool_call_id=event.tool_call_id, ), ) ) diff --git a/src/openharness/ui/output.py b/src/openharness/ui/output.py index e79192c4..7aab6a0d 100644 --- a/src/openharness/ui/output.py +++ b/src/openharness/ui/output.py @@ -112,13 +112,14 @@ def render_event(self, event: StreamEvent) -> None: self.console.print() self._assistant_line_open = False tool_name = event.tool_name + tool_call_id = event.tool_call_id summary = _summarize_tool_input(tool_name, event.tool_input) self._last_tool_input = event.tool_input if self._style_name == "minimal": - self.console.print(f" > {tool_name} {summary}") + self.console.print(f" > {tool_name} [{tool_call_id}] {summary}") else: self.console.print( - f" [bold cyan]\u23f5 {tool_name}[/bold cyan] [dim]{summary}[/dim]" + f" [bold cyan]\u23f5 {tool_name}[/bold cyan] [dim]{summary} [{tool_call_id}][/dim]" ) self._start_spinner(tool_name) return @@ -126,13 +127,14 @@ def render_event(self, event: StreamEvent) -> None: if isinstance(event, ToolExecutionCompleted): self._stop_spinner() tool_name = event.tool_name + tool_call_id = event.tool_call_id output = event.output is_error = event.is_error if self._style_name == "minimal": - self.console.print(f" {output}") + self.console.print(f" [{tool_call_id}] {output}") return if is_error: - self.console.print(Panel(output, title=f"{tool_name} error", border_style="red", padding=(0, 1))) + self.console.print(Panel(output, title=f"{tool_name} error [{tool_call_id}]", border_style="red", padding=(0, 1))) return # Render tool output based on tool type tool_input = getattr(event, "tool_input", None) or self._last_tool_input diff --git a/src/openharness/ui/protocol.py b/src/openharness/ui/protocol.py index 3f46aadb..d8a282ee 100644 --- a/src/openharness/ui/protocol.py +++ b/src/openharness/ui/protocol.py @@ -40,6 +40,7 @@ class TranscriptItem(BaseModel): tool_name: str | None = None tool_input: dict[str, Any] | None = None is_error: bool | None = None + tool_call_id: str | None = None class TaskSnapshot(BaseModel): @@ -96,6 +97,7 @@ class BackendEvent(BaseModel): modal: dict[str, Any] | None = None tool_name: str | None = None tool_input: dict[str, Any] | None = None + tool_call_id: str | None = None output: str | None = None is_error: bool | None = None compact_phase: str | None = None diff --git a/src/openharness/ui/textual_app.py b/src/openharness/ui/textual_app.py index c3ef02d6..98aa5561 100644 --- a/src/openharness/ui/textual_app.py +++ b/src/openharness/ui/textual_app.py @@ -359,12 +359,12 @@ async def _render_event(self, event: StreamEvent) -> None: if isinstance(event, ToolExecutionStarted): payload = json.dumps(event.tool_input, ensure_ascii=False) - self._append_line(f"tool> {event.tool_name} {payload}") + self._append_line(f"tool> {event.tool_name} [{event.tool_call_id}] {payload}") return if isinstance(event, ToolExecutionCompleted): prefix = "tool-error>" if event.is_error else "tool-result>" - self._append_line(f"{prefix} {event.tool_name}: {event.output}") + self._append_line(f"{prefix} {event.tool_name} [{event.tool_call_id}]: {event.output}") return if isinstance(event, ErrorEvent): diff --git a/tests/test_tools/test_core_tools.py b/tests/test_tools/test_core_tools.py index eaa33404..e21259b7 100644 --- a/tests/test_tools/test_core_tools.py +++ b/tests/test_tools/test_core_tools.py @@ -115,11 +115,47 @@ async def test_skill_todo_and_config_tools(tmp_path: Path, monkeypatch): pytest_dir.mkdir() (pytest_dir / "SKILL.md").write_text("# Pytest\nHelpful pytest notes.\n", encoding="utf-8") + class StubEngine: + def __init__(self): + self.active_skill = None + + def set_active_skill(self, active_skill): + self.active_skill = active_skill + + engine = StubEngine() skill_result = await SkillTool().execute( SkillToolInput(name="Pytest"), - ToolExecutionContext(cwd=tmp_path), + ToolExecutionContext(cwd=tmp_path, metadata={"query_engine": engine}), ) assert "Helpful pytest notes." in skill_result.output + assert engine.active_skill is not None + assert engine.active_skill.definition.name == "Pytest" + + skill_read_result = await SkillTool().execute( + SkillToolInput(name="Pytest", mode="read"), + ToolExecutionContext(cwd=tmp_path), + ) + assert "Helpful pytest notes." in skill_read_result.output + + (skills_dir / "private.md").write_text( + "---\nname: private\nmodel_invocable: false\n---\n\n# Private\n\nHidden flow.\n", + encoding="utf-8", + ) + private_result = await SkillTool().execute( + SkillToolInput(name="private"), + ToolExecutionContext(cwd=tmp_path, metadata={"query_engine": engine}), + ) + assert private_result.is_error is True + assert "not model-invocable" in private_result.output + assert engine.active_skill.definition.name == "Pytest" + + private_read_result = await SkillTool().execute( + SkillToolInput(name="private", mode="read"), + ToolExecutionContext(cwd=tmp_path, metadata={"query_engine": engine}), + ) + assert private_read_result.is_error is True + assert "not model-invocable" in private_read_result.output + assert engine.active_skill.definition.name == "Pytest" todo_result = await TodoWriteTool().execute( TodoWriteToolInput(item="wire commands"), diff --git a/tests/test_ui/test_textual_app.py b/tests/test_ui/test_textual_app.py index 92d9107f..01ef76ef 100644 --- a/tests/test_ui/test_textual_app.py +++ b/tests/test_ui/test_textual_app.py @@ -112,7 +112,7 @@ async def _answer(question: str) -> str: await pilot.press("enter") await pilot.pause() - assert any("tool-result> ask_user_question: green" in line for line in app.transcript_lines) + assert any("tool-result> ask_user_question" in line and ": green" in line for line in app.transcript_lines) assert any("assistant> chosen green" in line for line in app.transcript_lines)