From 7b79382655dc1bbe0c10292d7ba08515cb5ab37e Mon Sep 17 00:00:00 2001 From: Slava Trofimov <26082149+pmbstyle@users.noreply.github.com> Date: Sat, 4 Apr 2026 19:39:09 -0400 Subject: [PATCH 1/4] fix: keep connector tools in octo budget --- src/octopal/runtime/octo/router.py | 16 +++++++++++++++- tests/test_connector_tools.py | 26 ++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/octopal/runtime/octo/router.py b/src/octopal/runtime/octo/router.py index d411ea8..ee37708 100644 --- a/src/octopal/runtime/octo/router.py +++ b/src/octopal/runtime/octo/router.py @@ -825,7 +825,21 @@ def _is_transient_provider_error(exc: Exception) -> bool: def _tool_priority(spec: ToolSpec) -> tuple[int, str]: name = str(getattr(spec, "name", "") or "") - return (0 if name in _PRIORITY_TOOL_NAMES else 1, name) + if name in _PRIORITY_TOOL_NAMES: + return (0, name) + if _is_connector_tool(spec): + return (1, name) + return (2, name) + + +def _is_connector_tool(spec: ToolSpec) -> bool: + metadata = getattr(spec, "metadata", None) + category = str(getattr(metadata, "category", "") or "").strip().lower() + if category == "connectors": + return True + + name = str(getattr(spec, "name", "") or "").strip().lower() + return name.startswith(("gmail_", "calendar_", "drive_", "connector_")) def _budget_tool_specs(tool_specs: list[ToolSpec], *, max_count: int) -> list[ToolSpec]: diff --git a/tests/test_connector_tools.py b/tests/test_connector_tools.py index 149fde8..139ac93 100644 --- a/tests/test_connector_tools.py +++ b/tests/test_connector_tools.py @@ -5,11 +5,13 @@ from octopal.infrastructure.config.models import ConnectorInstanceConfig, OctopalConfig from octopal.infrastructure.connectors.manager import ConnectorManager +from octopal.runtime.octo.router import _budget_tool_specs from octopal.tools.catalog import get_tools from octopal.tools.connectors.calendar import get_calendar_connector_tools from octopal.tools.connectors.drive import get_drive_connector_tools from octopal.tools.connectors.gmail import get_gmail_connector_tools from octopal.tools.connectors.status import connector_status_read +from octopal.tools.registry import ToolSpec def test_catalog_includes_read_only_connector_status_tool() -> None: @@ -66,6 +68,30 @@ def get_all_tools(self): assert "gmail_get_message" in names +def test_octo_budget_keeps_connector_alias_tools() -> None: + class _Manager: + def get_all_tools(self): + return [ + ToolSpec( + name=f"mcp_demo_tool_{index}", + description="demo", + parameters={"type": "object"}, + permission="mcp_exec", + handler=lambda _args, _ctx: "ok", + is_async=True, + ) + for index in range(40) + ] + + tools = get_tools(mcp_manager=_Manager()) + active = _budget_tool_specs(tools, max_count=64) + names = {tool.name for tool in active} + + assert "gmail_list_messages" in names + assert "gmail_search_messages" in names + assert "gmail_get_unread_count" in names + + def test_catalog_includes_first_class_calendar_tools_when_mcp_manager_is_present() -> None: class _Manager: def get_all_tools(self): From 86778213a8ae5e045f1afcd37f25c6c7fa57ab75 Mon Sep 17 00:00:00 2001 From: Slava Trofimov <26082149+pmbstyle@users.noreply.github.com> Date: Sat, 4 Apr 2026 19:52:26 -0400 Subject: [PATCH 2/4] add: global tool catalog search for octo --- src/octopal/runtime/octo/router.py | 3 + src/octopal/tools/catalog.py | 174 +++++++++++++++++++++++++++++ tests/test_octo_tool_loop.py | 66 +++++++++++ tests/test_router_tool_budget.py | 1 + 4 files changed, 244 insertions(+) diff --git a/src/octopal/runtime/octo/router.py b/src/octopal/runtime/octo/router.py index ee37708..ca78ab1 100644 --- a/src/octopal/runtime/octo/router.py +++ b/src/octopal/runtime/octo/router.py @@ -53,6 +53,7 @@ _PRIORITY_TOOL_NAMES = { "octo_context_reset", "octo_context_health", + "tool_catalog_search", "octo_experiment_log", "check_schedule", "start_worker", @@ -69,6 +70,7 @@ "octo_context_health", "check_schedule", "scheduler_status", + "tool_catalog_search", # Scheduler control loop "list_schedule", "schedule_task", @@ -746,6 +748,7 @@ def _get_octo_tools(octo: Any, chat_id: int) -> tuple[list[ToolSpec], dict[str, ) max_tools = _env_int("OCTOPAL_OCTO_MAX_TOOL_COUNT", _DEFAULT_MAX_TOOL_COUNT, minimum=8) tool_specs = _budget_tool_specs(tool_specs, max_count=max_tools) + ctx["active_tool_specs"] = tool_specs ctx["tool_resolution_report"] = resolution_report ctx["all_tool_specs"] = all_tools return tool_specs, ctx diff --git a/src/octopal/tools/catalog.py b/src/octopal/tools/catalog.py index 18d73ef..c12a1ef 100644 --- a/src/octopal/tools/catalog.py +++ b/src/octopal/tools/catalog.py @@ -53,6 +53,147 @@ logger = structlog.get_logger(__name__) +def _tool_catalog_search(args, ctx) -> str: + query = str((args or {}).get("query", "") or "").strip().lower() + category_filter = str((args or {}).get("category", "") or "").strip().lower() + capability_filter = str((args or {}).get("capability", "") or "").strip().lower() + limit = max(1, min(int((args or {}).get("limit", 12) or 12), 50)) + + report = ctx.get("tool_resolution_report") + if report is not None and hasattr(report, "available_tools"): + candidates = list(report.available_tools) + else: + candidates = list(ctx.get("all_tool_specs") or []) + + active_names = { + str(getattr(spec, "name", "") or "").strip().lower() + for spec in (ctx.get("active_tool_specs") or []) + } + + scored: list[tuple[int, ToolSpec]] = [] + for spec in candidates: + if str(spec.name).strip().lower() == "tool_catalog_search": + continue + + metadata = getattr(spec, "metadata", None) + category = str(getattr(metadata, "category", "") or "").strip().lower() + capabilities = tuple(getattr(metadata, "capabilities", ()) or ()) + profile_tags = tuple(getattr(metadata, "profile_tags", ()) or ()) + if category_filter and category != category_filter: + continue + if capability_filter and capability_filter not in capabilities: + continue + + score = _tool_catalog_search_score( + spec, + query=query, + category=category, + capabilities=capabilities, + profile_tags=profile_tags, + ) + if query and score <= 0: + continue + scored.append((score, spec)) + + scored.sort( + key=lambda item: ( + -item[0], + 0 if str(getattr(item[1], "name", "") or "").strip().lower() not in active_names else 1, + str(getattr(item[1], "name", "") or ""), + ) + ) + + items = [] + for score, spec in scored[:limit]: + metadata = getattr(spec, "metadata", None) + params = spec.parameters if isinstance(spec.parameters, dict) else {} + properties = params.get("properties") if isinstance(params, dict) else {} + required = params.get("required") if isinstance(params, dict) else [] + if not isinstance(properties, dict): + properties = {} + if not isinstance(required, list): + required = [] + + items.append( + { + "name": spec.name, + "description": spec.description, + "category": str(getattr(metadata, "category", "") or "misc"), + "risk": str(getattr(metadata, "risk", "") or "safe"), + "capabilities": list(getattr(metadata, "capabilities", ()) or ()), + "profile_tags": list(getattr(metadata, "profile_tags", ()) or ()), + "required_arguments": [str(item) for item in required if str(item).strip()], + "argument_names": sorted(str(key) for key in properties.keys()), + "active_now": str(spec.name).strip().lower() in active_names, + "score": score, + } + ) + + return json.dumps( + { + "status": "ok", + "query": query or None, + "category": category_filter or None, + "capability": capability_filter or None, + "count": len(items), + "results": items, + "hint": ( + "If the needed tool is not active right now, use this catalog result to decide what tool family you need next." + ), + }, + ensure_ascii=False, + ) + + +def _tool_catalog_search_score( + spec: ToolSpec, + *, + query: str, + category: str, + capabilities: tuple[str, ...], + profile_tags: tuple[str, ...], +) -> int: + if not query: + return 1 + + name = str(getattr(spec, "name", "") or "").strip().lower() + description = str(getattr(spec, "description", "") or "").strip().lower() + query_terms = [term for term in query.replace("-", "_").split() if term] + if not query_terms: + query_terms = [query] + + score = 0 + for term in query_terms: + if name == term: + score += 120 + elif name.startswith(term): + score += 80 + elif term in name: + score += 55 + + if category == term: + score += 35 + elif term and term in category: + score += 20 + + if term in description: + score += 12 + + for capability in capabilities: + if capability == term: + score += 30 + elif term in capability: + score += 16 + + for tag in profile_tags: + if tag == term: + score += 18 + elif term in tag: + score += 10 + + return score + + def get_tools(mcp_manager=None) -> list[ToolSpec]: tools = [ ToolSpec( @@ -146,6 +287,39 @@ def get_tools(mcp_manager=None) -> list[ToolSpec]: handler=_tool_octo_context_health, is_async=True, ), + ToolSpec( + name="tool_catalog_search", + description=( + "Search the full catalog of available Octo tools, including tools that may not be active in the current " + "tool budget. Use this when the visible toolset seems insufficient for the task." + ), + parameters={ + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search text such as gmail, calendar, worker, drive, file, or schedule.", + }, + "category": { + "type": "string", + "description": "Optional category filter such as connectors, browser, workers, or filesystem.", + }, + "capability": { + "type": "string", + "description": "Optional capability filter such as gmail_read or connector_use.", + }, + "limit": { + "type": "integer", + "minimum": 1, + "maximum": 50, + "description": "Maximum number of matches to return.", + }, + }, + "additionalProperties": False, + }, + permission="self_control", + handler=_tool_catalog_search, + ), ToolSpec( name="octo_opportunity_scan", description="Generate proactive opportunity cards (impact/effort/confidence/next_action) for the active chat.", diff --git a/tests/test_octo_tool_loop.py b/tests/test_octo_tool_loop.py index 295a7eb..7befbed 100644 --- a/tests/test_octo_tool_loop.py +++ b/tests/test_octo_tool_loop.py @@ -1,13 +1,17 @@ from __future__ import annotations +import json + import pytest from octopal.runtime.octo.router import ( _build_octo_tool_policy_summary, _handle_octo_tool_call, + _budget_tool_specs, _record_octo_tool_call, ) from octopal.tools.diagnostics import resolve_tool_diagnostics +from octopal.tools.catalog import get_tools from octopal.tools.metadata import ToolMetadata from octopal.tools.registry import ToolSpec @@ -137,3 +141,65 @@ def test_build_octo_tool_policy_summary_counts_risk_classes() -> None: assert "Tool policy contract:" in summary assert "active_safe=1" in summary assert "blocked_dangerous=1" in summary + + +@pytest.mark.asyncio +async def test_tool_catalog_search_can_find_available_tool_outside_active_budget() -> None: + class _Manager: + def get_all_tools(self): + return [ + ToolSpec( + name=f"mcp_demo_tool_{index}", + description="demo", + parameters={"type": "object"}, + permission="mcp_exec", + handler=lambda _args, _ctx: "ok", + is_async=True, + ) + for index in range(40) + ] + + all_tools = get_tools(mcp_manager=_Manager()) + report = resolve_tool_diagnostics( + all_tools, + permissions={ + "filesystem_read": True, + "filesystem_write": True, + "worker_manage": True, + "llm_subtask": True, + "canon_manage": True, + "network": True, + "exec": True, + "service_read": True, + "service_control": True, + "deploy_control": True, + "db_admin": True, + "security_audit": True, + "self_control": True, + "mcp_exec": True, + "skill_use": True, + "skill_exec": True, + "skill_manage": True, + }, + ) + active_tools = _budget_tool_specs(list(report.available_tools), max_count=64) + active_names = {spec.name for spec in active_tools} + assert "mcp_demo_tool_39" not in active_names + + catalog_tool = next(spec for spec in active_tools if spec.name == "tool_catalog_search") + result = catalog_tool.handler( + {"query": "mcp_demo_tool_39", "limit": 5}, + { + "tool_resolution_report": report, + "all_tool_specs": all_tools, + "active_tool_specs": active_tools, + }, + ) + + payload = json.loads(result) + names = {item["name"] for item in payload["results"]} + + assert payload["status"] == "ok" + assert "mcp_demo_tool_39" in names + match = next(item for item in payload["results"] if item["name"] == "mcp_demo_tool_39") + assert match["active_now"] is False diff --git a/tests/test_router_tool_budget.py b/tests/test_router_tool_budget.py index b4ca7cd..114316b 100644 --- a/tests/test_router_tool_budget.py +++ b/tests/test_router_tool_budget.py @@ -27,6 +27,7 @@ def test_budget_keeps_internal_worker_and_scheduler_tools() -> None: "list_workers", "list_active_workers", "schedule_task", + "tool_catalog_search", } assert must_keep.issubset(names) From 4d2d095fd522a80737d0293183865bb66720a34a Mon Sep 17 00:00:00 2001 From: Slava Trofimov <26082149+pmbstyle@users.noreply.github.com> Date: Sat, 4 Apr 2026 19:54:05 -0400 Subject: [PATCH 3/4] add: activate catalog-selected tools in octo loop --- src/octopal/runtime/octo/router.py | 66 ++++++++++++ tests/test_router_tool_budget.py | 159 +++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+) diff --git a/src/octopal/runtime/octo/router.py b/src/octopal/runtime/octo/router.py index ca78ab1..34c7287 100644 --- a/src/octopal/runtime/octo/router.py +++ b/src/octopal/runtime/octo/router.py @@ -43,6 +43,7 @@ _MAX_VERIFY_CONTEXT_CHARS = 20000 _DEFAULT_MAX_TOOL_COUNT = 64 _MIN_TOOL_COUNT_ON_OVERFLOW = 12 +_CATALOG_TOOL_EXPANSION_LIMIT = 12 _MANDATORY_OCTO_TOOL_NAMES = { "fs_list", "fs_read", @@ -394,6 +395,25 @@ async def route_or_reply( for call in tool_calls: tool_result, tool_meta = await _handle_octo_tool_call(call, active_tool_specs, ctx) + expanded_names: list[str] = [] + if str(call.get("function", {}).get("name") or "") == "tool_catalog_search": + active_tool_specs, expanded_names = _expand_active_tool_specs_from_catalog_result( + tool_result, + active_tool_specs=active_tool_specs, + ctx=ctx, + ) + tools = [spec.to_openai_tool() for spec in active_tool_specs] + if expanded_names: + messages.append( + Message( + role="system", + content=( + "Tool catalog expansion complete. The following tools are now active for this turn:\n" + + "\n".join(f"- {name}" for name in expanded_names) + + "\nUse them directly if they fit the task." + ), + ) + ) tool_result_text = render_tool_result_for_llm(tool_result).text loop_state = _record_octo_tool_call( tool_call_history, @@ -875,6 +895,52 @@ def _shrink_tool_specs_for_retry(tool_specs: list[ToolSpec]) -> list[ToolSpec]: return _budget_tool_specs(tool_specs, max_count=reduced) +def _expand_active_tool_specs_from_catalog_result( + tool_result: Any, + *, + active_tool_specs: list[ToolSpec], + ctx: dict[str, object], +) -> tuple[list[ToolSpec], list[str]]: + payload = tool_result if isinstance(tool_result, dict) else {} + if isinstance(tool_result, str): + try: + parsed = json.loads(tool_result) + except Exception: + parsed = None + if isinstance(parsed, dict): + payload = parsed + results = payload.get("results") if isinstance(payload, dict) else None + if not isinstance(results, list) or not results: + return active_tool_specs, [] + + all_specs = list(ctx.get("all_tool_specs") or []) + by_name = {str(getattr(spec, "name", "") or ""): spec for spec in all_specs} + selected = list(active_tool_specs) + selected_names = {str(getattr(spec, "name", "") or "") for spec in selected} + + expanded_names: list[str] = [] + for item in results: + if len(expanded_names) >= _CATALOG_TOOL_EXPANSION_LIMIT: + break + if not isinstance(item, dict): + continue + if bool(item.get("active_now")): + continue + name = str(item.get("name", "") or "").strip() + if not name or name in selected_names: + continue + spec = by_name.get(name) + if spec is None: + continue + selected.append(spec) + selected_names.add(name) + expanded_names.append(name) + + if expanded_names: + ctx["active_tool_specs"] = selected + return selected, expanded_names + + async def _handle_octo_tool_call( call: dict, tools: list[ToolSpec], diff --git a/tests/test_router_tool_budget.py b/tests/test_router_tool_budget.py index 114316b..3df196b 100644 --- a/tests/test_router_tool_budget.py +++ b/tests/test_router_tool_budget.py @@ -5,8 +5,10 @@ from octopal.infrastructure.providers.base import Message from octopal.runtime.octo.router import ( _budget_tool_specs, + _expand_active_tool_specs_from_catalog_result, _finalize_response, _recover_textual_tool_call, + route_or_reply, _sanitize_messages_for_complete, _shrink_tool_specs_for_retry, ) @@ -39,6 +41,39 @@ def test_shrink_retry_keeps_start_worker() -> None: assert "start_worker" in names +def test_catalog_result_expands_active_tool_specs() -> None: + active = [ + ToolSpec( + name="tool_catalog_search", + description="catalog", + parameters={"type": "object", "properties": {}}, + permission="self_control", + handler=lambda args, ctx: "{}", + ) + ] + hidden = ToolSpec( + name="hidden_tool", + description="hidden", + parameters={"type": "object", "properties": {}}, + permission="self_control", + handler=lambda args, ctx: {"ok": True}, + ) + + updated, expanded = _expand_active_tool_specs_from_catalog_result( + { + "results": [ + {"name": "hidden_tool", "active_now": False}, + {"name": "tool_catalog_search", "active_now": True}, + ] + }, + active_tool_specs=active, + ctx={"all_tool_specs": active + [hidden]}, + ) + + assert expanded == ["hidden_tool"] + assert {spec.name for spec in updated} == {"tool_catalog_search", "hidden_tool"} + + def test_route_falls_back_when_tool_run_ends_with_empty_response(monkeypatch) -> None: class DummyProvider: def __init__(self) -> None: @@ -575,6 +610,130 @@ async def scenario() -> None: asyncio.run(scenario()) +def test_route_can_expand_toolset_after_catalog_search(monkeypatch) -> None: + hidden_tool = ToolSpec( + name="hidden_tool", + description="A hidden tool revealed by catalog search.", + parameters={"type": "object", "properties": {}, "additionalProperties": False}, + permission="self_control", + handler=lambda args, ctx: {"ok": True, "used": "hidden_tool"}, + ) + + catalog_tool = ToolSpec( + name="tool_catalog_search", + description="catalog", + parameters={ + "type": "object", + "properties": {"query": {"type": "string"}}, + "additionalProperties": False, + }, + permission="self_control", + handler=lambda args, ctx: '{"status":"ok","results":[{"name":"hidden_tool","active_now":false}]}', + ) + + class DummyProvider: + def __init__(self) -> None: + self.tool_snapshots: list[list[str]] = [] + self.calls = 0 + + async def complete(self, messages, **kwargs): + raise AssertionError("plain completion should not be used in this scenario") + + async def complete_stream(self, messages, *, on_partial, **kwargs): + raise AssertionError("streaming should not be used in this scenario") + + async def complete_with_tools(self, messages, *, tools, tool_choice="auto", **kwargs): + names = [tool["function"]["name"] for tool in tools] + self.tool_snapshots.append(names) + self.calls += 1 + if self.calls == 1: + assert "tool_catalog_search" in names + assert "hidden_tool" not in names + return { + "content": "", + "tool_calls": [ + { + "id": "call-1", + "type": "function", + "function": { + "name": "tool_catalog_search", + "arguments": '{"query":"hidden tool"}', + }, + } + ], + } + if self.calls == 2: + assert "hidden_tool" in names + return { + "content": "", + "tool_calls": [ + { + "id": "call-2", + "type": "function", + "function": {"name": "hidden_tool", "arguments": "{}"}, + } + ], + } + return {"content": "Expanded tool worked.", "tool_calls": []} + + class DummyMemory: + async def add_message(self, role, content, metadata=None): + return None + + class DummyOcto: + store = object() + canon = object() + internal_progress_send = None + is_ws_active = False + + async def set_typing(self, chat_id: int, active: bool) -> None: + return None + + async def set_thinking(self, active: bool) -> None: + return None + + def peek_context_wakeup(self, chat_id: int) -> str: + return "" + + async def fake_build_octo_prompt(**kwargs): + return [Message(role="user", content=str(kwargs["user_text"]))] + + async def fake_build_plan(provider, messages, has_tools): + return None + + def fake_get_octo_tools(octo, chat_id): + active_tools = [catalog_tool] + all_tools = [catalog_tool, hidden_tool] + return active_tools, { + "octo": octo, + "chat_id": chat_id, + "active_tool_specs": active_tools, + "all_tool_specs": all_tools, + } + + import octopal.runtime.octo.router as router + + monkeypatch.setattr(router, "build_octo_prompt", fake_build_octo_prompt) + monkeypatch.setattr(router, "_build_plan", fake_build_plan) + monkeypatch.setattr(router, "_get_octo_tools", fake_get_octo_tools) + + async def scenario() -> None: + provider = DummyProvider() + response = await route_or_reply( + DummyOcto(), + provider, + DummyMemory(), + "use the hidden tool", + 123, + "", + ) + assert response == "Expanded tool worked." + assert len(provider.tool_snapshots) == 3 + assert "hidden_tool" in provider.tool_snapshots[1] + + asyncio.run(scenario()) + + def test_finalize_response_returns_no_user_response_when_non_followup_rewrite_still_bad() -> None: class DummyProvider: async def complete(self, messages, **kwargs): From 3fc49b5da76064621823b8c6b7603fa9725a7c5f Mon Sep 17 00:00:00 2001 From: Slava Trofimov <26082149+pmbstyle@users.noreply.github.com> Date: Sat, 4 Apr 2026 19:54:11 -0400 Subject: [PATCH 4/4] docs: teach octo how to use tool catalog --- src/octopal/runtime/octo/prompts/octo_system.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/octopal/runtime/octo/prompts/octo_system.md b/src/octopal/runtime/octo/prompts/octo_system.md index 5cc27dd..6cbc564 100644 --- a/src/octopal/runtime/octo/prompts/octo_system.md +++ b/src/octopal/runtime/octo/prompts/octo_system.md @@ -92,6 +92,16 @@ Octopal skills are internal tools, not MCP servers. - Do not prefer exec_run for skill bundle scripts when run_skill_script is available. - A skill can be available even if there is no MCP entry for it. +## Tool Catalog + +The visible tool list may be a curated active subset of the full catalog. + +- If the tools you can currently see do not cover the task, call tool_catalog_search before saying the capability is unavailable. +- Use tool_catalog_search to search the full tool catalog by task intent, tool family, category, or capability. +- Treat tool_catalog_search as a discovery step, not the final action. +- After tool_catalog_search, the system may activate matching tools for the current turn. If that happens, call the newly activated tool directly on the next step. +- Do not tell the user a tool is missing or inaccessible until you have checked the catalog when appropriate. + ## Canonical Memory Management You are responsible for maintaining the long-term knowledge base in `memory/canon/`.