From 88afa344118b1b9d3af25834b6be011d2e598f6e Mon Sep 17 00:00:00 2001 From: Andy Deng Date: Fri, 17 Apr 2026 23:52:20 +0800 Subject: [PATCH 1/4] fix(agent-config): add end-to-end per-tool result_mode controls --- proto/agent.proto | 14 + studio/api/app/agent_builder/edit_ops.py | 32 +- studio/api/app/agent_builder/service.py | 15 +- studio/api/app/api/agents.py | 59 +- studio/api/app/schemas/agent_pb2.py | 50 +- studio/api/app/schemas/agent_pb2.pyi | 506 ++++++++++++++---- .../schemas/agent-config-v1.schema.json | 101 +++- .../components/agent/agent-config-editor.tsx | 81 ++- studio/web/src/lib/api/agent.ts | 10 + studio/web/src/lib/api/client.ts | 1 + .../agent-kit/src/agent_backends/default.rs | 161 ++++-- .../crates/agent-kit/src/quickjs_engine.rs | 1 + voice/engine/crates/agent-kit/src/swarm.rs | 6 +- .../crates/agent-kit/src/tool_executor.rs | 113 +++- 14 files changed, 956 insertions(+), 194 deletions(-) diff --git a/proto/agent.proto b/proto/agent.proto index b36fe24..9b43303 100644 --- a/proto/agent.proto +++ b/proto/agent.proto @@ -38,6 +38,17 @@ message ParamDef { repeated string options = 5; // Valid values for enum types } +// Per-tool output handling mode after execution. +enum ToolResultMode { + TOOL_RESULT_MODE_UNSPECIFIED = 0; + // LLM summarize long tool output (uses global tool_summarizer gate). + TOOL_RESULT_MODE_SUMMARIZE = 1; + // Deterministically truncate tool output to runtime hard cap. + TOOL_RESULT_MODE_TRUNCATE = 2; + // Keep raw tool output as-is (no summarize, no truncate). + TOOL_RESULT_MODE_NONE = 3; +} + // A tool definition (always a JS script) message ToolDef { string description = 1; @@ -47,6 +58,9 @@ message ToolDef { bool cancel_on_barge_in = 4; // If true, this tool has side effects bool side_effect = 5; + // Optional post-tool output handling mode. + // If UNSPECIFIED, runtime falls back to global config behavior. + ToolResultMode result_mode = 6; } // A single node in the graph diff --git a/studio/api/app/agent_builder/edit_ops.py b/studio/api/app/agent_builder/edit_ops.py index 642048a..219bdd8 100644 --- a/studio/api/app/agent_builder/edit_ops.py +++ b/studio/api/app/agent_builder/edit_ops.py @@ -30,7 +30,13 @@ # ── Tool field whitelist ───────────────────────────────────────── -_TOOL_FIELD_ALLOWED = {"description", "params", "script", "side_effect"} +_TOOL_FIELD_ALLOWED = { + "description", + "params", + "script", + "side_effect", + "result_mode", +} # ── Canonical field ordering ───────────────────────────────────── @@ -46,7 +52,14 @@ _NODE_FIELD_ORDER = ["system_prompt", "greeting", "tools", "edges"] -_TOOL_FIELD_ORDER = ["description", "params", "script", "side_effect"] +_TOOL_FIELD_ORDER = ["description", "params", "script", "side_effect", "result_mode"] + +_TOOL_RESULT_MODES = { + "TOOL_RESULT_MODE_UNSPECIFIED", + "TOOL_RESULT_MODE_SUMMARIZE", + "TOOL_RESULT_MODE_TRUNCATE", + "TOOL_RESULT_MODE_NONE", +} def _validate_string_list(value: Any, field_name: str) -> None: @@ -158,6 +171,21 @@ def _validate_fields(self) -> UpsertTool: self.fields["side_effect"], bool ): raise ValueError("'side_effect' must be a boolean") + if "result_mode" in self.fields: + value = self.fields["result_mode"] + if isinstance(value, int): + if value < 0 or value > 3: + raise ValueError( + "'result_mode' integer must be in [0, 3] " + "(UNSPECIFIED, SUMMARIZE, TRUNCATE, NONE)" + ) + elif isinstance(value, str): + if value not in _TOOL_RESULT_MODES: + raise ValueError( + f"'result_mode' must be one of {sorted(_TOOL_RESULT_MODES)}" + ) + else: + raise ValueError("'result_mode' must be a string enum name or integer") return self diff --git a/studio/api/app/agent_builder/service.py b/studio/api/app/agent_builder/service.py index 6ec686b..adf5fbd 100644 --- a/studio/api/app/agent_builder/service.py +++ b/studio/api/app/agent_builder/service.py @@ -105,7 +105,8 @@ "description": "", "params": [{{"name": "", "type": "string", "required": true}}], "script": "", - "side_effect": false + "side_effect": false, + "result_mode": "TOOL_RESULT_MODE_UNSPECIFIED" }} }} }} @@ -114,7 +115,17 @@ - **entry**: The starting node of the conversation - **nodes**: Each node has a `system_prompt`, optional `greeting`, `tools` and `edges` - **greeting**: Optional. The first message spoken when the conversation starts (entry node only). -- **tools**: Each tool has a `description`, `params`, a QuickJS `script`, and a `side_effect` flag +- **tools**: Each tool has a `description`, `params`, a QuickJS `script`, a `side_effect` flag, and optional `result_mode` + +## Tool Result Mode + +Set `result_mode` per tool to control what gets fed back to the LLM after execution: +- `TOOL_RESULT_MODE_SUMMARIZE`: summarize long output (best for verbose tools) +- `TOOL_RESULT_MODE_TRUNCATE`: deterministic hard-cap truncate (preserves exact text prefixes/URLs) +- `TOOL_RESULT_MODE_NONE`: no post-processing (full raw output; use sparingly) +- `TOOL_RESULT_MODE_UNSPECIFIED`: runtime default + +For tools that produce identifiers/URLs consumed by follow-up tools, prefer `TRUNCATE` or `NONE` over `SUMMARIZE`. ## QuickJS Tool Rules diff --git a/studio/api/app/api/agents.py b/studio/api/app/api/agents.py index b68ffdc..6deb487 100644 --- a/studio/api/app/api/agents.py +++ b/studio/api/app/api/agents.py @@ -1,6 +1,7 @@ """Agent CRUD API routes.""" import asyncio +import copy import uuid from typing import Any @@ -583,6 +584,10 @@ class AgentConfigPatch(BaseModel): tts_provider: str | None = None tts_model: str | None = None gemini_live_model: str | None = None + # tool_id -> mode + # null means "auto" (unset result_mode) + # 1 = summarize, 2 = truncate, 3 = full/none + tool_result_modes: dict[str, int | None] | None = None regenerate_greeting: bool = False @@ -610,10 +615,15 @@ async def patch_agent_config( if not version: raise HTTPException(status_code=404, detail="Active version not found") - config = dict(version.config_json) - patch = body.model_dump(exclude_unset=True, exclude={"regenerate_greeting"}) + # Deep copy is required: shallow copy can mutate nested JSON objects in-place, + # which may prevent SQLAlchemy from detecting a JSONB change. + config = copy.deepcopy(version.config_json) + patch = body.model_dump( + exclude_unset=True, exclude={"regenerate_greeting", "tool_result_modes"} + ) + tool_result_modes = body.tool_result_modes force_regen = body.regenerate_greeting - if not patch and not force_regen: + if not patch and not force_regen and not tool_result_modes: raise HTTPException(status_code=400, detail="No fields to update") # ── Language validation ────────────────────────────────────────────── @@ -670,6 +680,40 @@ async def patch_agent_config( for key, value in patch.items(): config[key] = value + # ── Per-tool result_mode patch ─────────────────────────────────────── + if tool_result_modes is not None: + tools_obj = config.get("tools") + if not isinstance(tools_obj, dict): + raise HTTPException( + status_code=422, + detail="Invalid config: top-level 'tools' must be an object", + ) + + for tool_id, mode in tool_result_modes.items(): + tool_def = tools_obj.get(tool_id) + if not isinstance(tool_def, dict): + raise HTTPException( + status_code=422, + detail=f"Unknown tool '{tool_id}' in tool_result_modes", + ) + + # "auto" => remove field and let runtime default behavior apply + if mode is None: + tool_def.pop("result_mode", None) + continue + + # Valid explicit runtime enum values: + # 1 summarize, 2 truncate, 3 none/full. + if mode not in {1, 2, 3}: + raise HTTPException( + status_code=422, + detail=( + f"Invalid result_mode {mode} for tool '{tool_id}'. " + "Expected null (auto) or 1/2/3." + ), + ) + tool_def["result_mode"] = mode + # ── Greeting regeneration ────────────────────────────────────────── greeting_updated = False new_greeting: str | None = None @@ -704,6 +748,7 @@ async def patch_agent_config( db, agent_id=agent.id, patch=patch, + tool_result_modes=tool_result_modes, greeting_updated=greeting_updated, new_greeting=new_greeting, ) @@ -733,7 +778,8 @@ async def patch_agent_config( async def _inject_config_change_event( db: AsyncSession, agent_id: uuid.UUID, - patch: dict[str, str], + patch: dict[str, Any], + tool_result_modes: dict[str, int | None] | None = None, greeting_updated: bool = False, new_greeting: str | None = None, ) -> None: @@ -765,6 +811,11 @@ async def _inject_config_change_event( else "Standard Pipeline" ) changes.append(f"conversation mode set to {mode}") + if tool_result_modes: + mode_label = {1: "summary", 2: "truncate", 3: "full"} + for tool_id, mode in tool_result_modes.items(): + label = "auto" if mode is None else mode_label.get(mode, str(mode)) + changes.append(f"{tool_id} result_mode set to {label}") if not changes: return diff --git a/studio/api/app/schemas/agent_pb2.py b/studio/api/app/schemas/agent_pb2.py index 3dad1f1..5138a46 100644 --- a/studio/api/app/schemas/agent_pb2.py +++ b/studio/api/app/schemas/agent_pb2.py @@ -1,12 +1,22 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE # source: agent.proto -# Protobuf Python Version: 4.25.0 +# Protobuf Python Version: 6.33.4 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version from google.protobuf import symbol_database as _symbol_database from google.protobuf.internal import builder as _builder +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, + 6, + 33, + 4, + '', + 'agent.proto' +) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() @@ -14,34 +24,36 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0b\x61gent.proto\x12\x05\x61gent\"\x8f\x02\n\x0fRecordingConfig\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\x12\x12\n\noutput_uri\x18\x02 \x01(\t\x12(\n\x0c\x61udio_layout\x18\x03 \x01(\x0e\x32\x12.agent.AudioLayout\x12\x13\n\x0bsample_rate\x18\x04 \x01(\r\x12(\n\x0c\x61udio_format\x18\x05 \x01(\x0e\x32\x12.agent.AudioFormat\x12\x19\n\x11max_duration_secs\x18\x06 \x01(\r\x12\x17\n\x0fsave_transcript\x18\x07 \x01(\x08\x12\x1c\n\x14include_tool_details\x18\x08 \x01(\x08\x12\x1c\n\x14include_llm_metadata\x18\t \x01(\x08\"^\n\x08ParamDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x03 \x01(\t\x12\x10\n\x08required\x18\x04 \x01(\x08\x12\x0f\n\x07options\x18\x05 \x03(\t\"\x80\x01\n\x07ToolDef\x12\x13\n\x0b\x64\x65scription\x18\x01 \x01(\t\x12\x0e\n\x06script\x18\x02 \x01(\t\x12\x1f\n\x06params\x18\x03 \x03(\x0b\x32\x0f.agent.ParamDef\x12\x1a\n\x12\x63\x61ncel_on_barge_in\x18\x04 \x01(\x08\x12\x13\n\x0bside_effect\x18\x05 \x01(\x08\"\xf6\x01\n\x07NodeDef\x12\x15\n\rsystem_prompt\x18\x01 \x01(\t\x12\r\n\x05tools\x18\x02 \x03(\t\x12\r\n\x05\x65\x64ges\x18\x03 \x03(\t\x12\x12\n\x05model\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0btemperature\x18\x05 \x01(\x01H\x01\x88\x01\x01\x12\x17\n\nmax_tokens\x18\x06 \x01(\rH\x02\x88\x01\x01\x12\x15\n\x08voice_id\x18\x07 \x01(\tH\x03\x88\x01\x01\x12\x15\n\x08greeting\x18\x08 \x01(\tH\x04\x88\x01\x01\x42\x08\n\x06_modelB\x0e\n\x0c_temperatureB\r\n\x0b_max_tokensB\x0b\n\t_voice_idB\x0b\n\t_greeting\"\xb4\x04\n\rAgentGraphDef\x12\r\n\x05\x65ntry\x18\x01 \x01(\t\x12.\n\x05nodes\x18\x02 \x03(\x0b\x32\x1f.agent.AgentGraphDef.NodesEntry\x12.\n\x05tools\x18\x03 \x03(\x0b\x32\x1f.agent.AgentGraphDef.ToolsEntry\x12\x15\n\x08language\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08timezone\x18\x05 \x01(\tH\x01\x88\x01\x01\x12\x15\n\x08voice_id\x18\x06 \x01(\tH\x02\x88\x01\x01\x12\x19\n\x0ctts_provider\x18\x07 \x01(\tH\x03\x88\x01\x01\x12\x16\n\ttts_model\x18\x08 \x01(\tH\x04\x88\x01\x01\x12.\n\trecording\x18\t \x01(\x0b\x32\x16.agent.RecordingConfigH\x05\x88\x01\x01\x12\"\n\x15\x63onfig_schema_version\x18\n \x01(\tH\x06\x88\x01\x01\x1a<\n\nNodesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x1d\n\x05value\x18\x02 \x01(\x0b\x32\x0e.agent.NodeDef:\x02\x38\x01\x1a<\n\nToolsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x1d\n\x05value\x18\x02 \x01(\x0b\x32\x0e.agent.ToolDef:\x02\x38\x01\x42\x0b\n\t_languageB\x0b\n\t_timezoneB\x0b\n\t_voice_idB\x0f\n\r_tts_providerB\x0c\n\n_tts_modelB\x0c\n\n_recordingB\x18\n\x16_config_schema_version*[\n\x0b\x41udioLayout\x12\x1c\n\x18\x41UDIO_LAYOUT_UNSPECIFIED\x10\x00\x12\x17\n\x13\x41UDIO_LAYOUT_STEREO\x10\x01\x12\x15\n\x11\x41UDIO_LAYOUT_MONO\x10\x02*X\n\x0b\x41udioFormat\x12\x1c\n\x18\x41UDIO_FORMAT_UNSPECIFIED\x10\x00\x12\x15\n\x11\x41UDIO_FORMAT_OPUS\x10\x01\x12\x14\n\x10\x41UDIO_FORMAT_WAV\x10\x02\x42\x30Z.github.com/prime8ai/voice-agent-os/proto/agentb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0b\x61gent.proto\x12\x05\x61gent\"\x8f\x02\n\x0fRecordingConfig\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\x12\x12\n\noutput_uri\x18\x02 \x01(\t\x12(\n\x0c\x61udio_layout\x18\x03 \x01(\x0e\x32\x12.agent.AudioLayout\x12\x13\n\x0bsample_rate\x18\x04 \x01(\r\x12(\n\x0c\x61udio_format\x18\x05 \x01(\x0e\x32\x12.agent.AudioFormat\x12\x19\n\x11max_duration_secs\x18\x06 \x01(\r\x12\x17\n\x0fsave_transcript\x18\x07 \x01(\x08\x12\x1c\n\x14include_tool_details\x18\x08 \x01(\x08\x12\x1c\n\x14include_llm_metadata\x18\t \x01(\x08\"^\n\x08ParamDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x03 \x01(\t\x12\x10\n\x08required\x18\x04 \x01(\x08\x12\x0f\n\x07options\x18\x05 \x03(\t\"\xac\x01\n\x07ToolDef\x12\x13\n\x0b\x64\x65scription\x18\x01 \x01(\t\x12\x0e\n\x06script\x18\x02 \x01(\t\x12\x1f\n\x06params\x18\x03 \x03(\x0b\x32\x0f.agent.ParamDef\x12\x1a\n\x12\x63\x61ncel_on_barge_in\x18\x04 \x01(\x08\x12\x13\n\x0bside_effect\x18\x05 \x01(\x08\x12*\n\x0bresult_mode\x18\x06 \x01(\x0e\x32\x15.agent.ToolResultMode\"\xf6\x01\n\x07NodeDef\x12\x15\n\rsystem_prompt\x18\x01 \x01(\t\x12\r\n\x05tools\x18\x02 \x03(\t\x12\r\n\x05\x65\x64ges\x18\x03 \x03(\t\x12\x12\n\x05model\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0btemperature\x18\x05 \x01(\x01H\x01\x88\x01\x01\x12\x17\n\nmax_tokens\x18\x06 \x01(\rH\x02\x88\x01\x01\x12\x15\n\x08voice_id\x18\x07 \x01(\tH\x03\x88\x01\x01\x12\x15\n\x08greeting\x18\x08 \x01(\tH\x04\x88\x01\x01\x42\x08\n\x06_modelB\x0e\n\x0c_temperatureB\r\n\x0b_max_tokensB\x0b\n\t_voice_idB\x0b\n\t_greeting\"\xea\x04\n\rAgentGraphDef\x12\r\n\x05\x65ntry\x18\x01 \x01(\t\x12.\n\x05nodes\x18\x02 \x03(\x0b\x32\x1f.agent.AgentGraphDef.NodesEntry\x12.\n\x05tools\x18\x03 \x03(\x0b\x32\x1f.agent.AgentGraphDef.ToolsEntry\x12\x15\n\x08language\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08timezone\x18\x05 \x01(\tH\x01\x88\x01\x01\x12\x15\n\x08voice_id\x18\x06 \x01(\tH\x02\x88\x01\x01\x12\x19\n\x0ctts_provider\x18\x07 \x01(\tH\x03\x88\x01\x01\x12\x16\n\ttts_model\x18\x08 \x01(\tH\x04\x88\x01\x01\x12.\n\trecording\x18\t \x01(\x0b\x32\x16.agent.RecordingConfigH\x05\x88\x01\x01\x12\"\n\x15\x63onfig_schema_version\x18\n \x01(\tH\x06\x88\x01\x01\x12\x1e\n\x11gemini_live_model\x18\x0c \x01(\tH\x07\x88\x01\x01\x1a<\n\nNodesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x1d\n\x05value\x18\x02 \x01(\x0b\x32\x0e.agent.NodeDef:\x02\x38\x01\x1a<\n\nToolsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x1d\n\x05value\x18\x02 \x01(\x0b\x32\x0e.agent.ToolDef:\x02\x38\x01\x42\x0b\n\t_languageB\x0b\n\t_timezoneB\x0b\n\t_voice_idB\x0f\n\r_tts_providerB\x0c\n\n_tts_modelB\x0c\n\n_recordingB\x18\n\x16_config_schema_versionB\x14\n\x12_gemini_live_model*[\n\x0b\x41udioLayout\x12\x1c\n\x18\x41UDIO_LAYOUT_UNSPECIFIED\x10\x00\x12\x17\n\x13\x41UDIO_LAYOUT_STEREO\x10\x01\x12\x15\n\x11\x41UDIO_LAYOUT_MONO\x10\x02*X\n\x0b\x41udioFormat\x12\x1c\n\x18\x41UDIO_FORMAT_UNSPECIFIED\x10\x00\x12\x15\n\x11\x41UDIO_FORMAT_OPUS\x10\x01\x12\x14\n\x10\x41UDIO_FORMAT_WAV\x10\x02*\x8c\x01\n\x0eToolResultMode\x12 \n\x1cTOOL_RESULT_MODE_UNSPECIFIED\x10\x00\x12\x1e\n\x1aTOOL_RESULT_MODE_SUMMARIZE\x10\x01\x12\x1d\n\x19TOOL_RESULT_MODE_TRUNCATE\x10\x02\x12\x19\n\x15TOOL_RESULT_MODE_NONE\x10\x03\x42\x30Z.github.com/prime8ai/voice-agent-os/proto/agentb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'agent_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None +if not _descriptor._USE_C_DESCRIPTORS: + _globals['DESCRIPTOR']._loaded_options = None _globals['DESCRIPTOR']._serialized_options = b'Z.github.com/prime8ai/voice-agent-os/proto/agent' - _globals['_AGENTGRAPHDEF_NODESENTRY']._options = None + _globals['_AGENTGRAPHDEF_NODESENTRY']._loaded_options = None _globals['_AGENTGRAPHDEF_NODESENTRY']._serialized_options = b'8\001' - _globals['_AGENTGRAPHDEF_TOOLSENTRY']._options = None + _globals['_AGENTGRAPHDEF_TOOLSENTRY']._loaded_options = None _globals['_AGENTGRAPHDEF_TOOLSENTRY']._serialized_options = b'8\001' - _globals['_AUDIOLAYOUT']._serialized_start=1339 - _globals['_AUDIOLAYOUT']._serialized_end=1430 - _globals['_AUDIOFORMAT']._serialized_start=1432 - _globals['_AUDIOFORMAT']._serialized_end=1520 + _globals['_AUDIOLAYOUT']._serialized_start=1437 + _globals['_AUDIOLAYOUT']._serialized_end=1528 + _globals['_AUDIOFORMAT']._serialized_start=1530 + _globals['_AUDIOFORMAT']._serialized_end=1618 + _globals['_TOOLRESULTMODE']._serialized_start=1621 + _globals['_TOOLRESULTMODE']._serialized_end=1761 _globals['_RECORDINGCONFIG']._serialized_start=23 _globals['_RECORDINGCONFIG']._serialized_end=294 _globals['_PARAMDEF']._serialized_start=296 _globals['_PARAMDEF']._serialized_end=390 _globals['_TOOLDEF']._serialized_start=393 - _globals['_TOOLDEF']._serialized_end=521 - _globals['_NODEDEF']._serialized_start=524 - _globals['_NODEDEF']._serialized_end=770 - _globals['_AGENTGRAPHDEF']._serialized_start=773 - _globals['_AGENTGRAPHDEF']._serialized_end=1337 - _globals['_AGENTGRAPHDEF_NODESENTRY']._serialized_start=1105 - _globals['_AGENTGRAPHDEF_NODESENTRY']._serialized_end=1165 - _globals['_AGENTGRAPHDEF_TOOLSENTRY']._serialized_start=1167 - _globals['_AGENTGRAPHDEF_TOOLSENTRY']._serialized_end=1227 + _globals['_TOOLDEF']._serialized_end=565 + _globals['_NODEDEF']._serialized_start=568 + _globals['_NODEDEF']._serialized_end=814 + _globals['_AGENTGRAPHDEF']._serialized_start=817 + _globals['_AGENTGRAPHDEF']._serialized_end=1435 + _globals['_AGENTGRAPHDEF_NODESENTRY']._serialized_start=1181 + _globals['_AGENTGRAPHDEF_NODESENTRY']._serialized_end=1241 + _globals['_AGENTGRAPHDEF_TOOLSENTRY']._serialized_start=1243 + _globals['_AGENTGRAPHDEF_TOOLSENTRY']._serialized_end=1303 # @@protoc_insertion_point(module_scope) diff --git a/studio/api/app/schemas/agent_pb2.pyi b/studio/api/app/schemas/agent_pb2.pyi index 89cd077..e475913 100644 --- a/studio/api/app/schemas/agent_pb2.pyi +++ b/studio/api/app/schemas/agent_pb2.pyi @@ -1,133 +1,407 @@ -from google.protobuf.internal import containers as _containers -from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper +""" +@generated by mypy-protobuf. Do not edit manually! +isort:skip_file +""" + +from collections import abc as _abc from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message -from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union +from google.protobuf.internal import containers as _containers +from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper +import builtins as _builtins +import sys +import typing as _typing + +if sys.version_info >= (3, 10): + from typing import TypeAlias as _TypeAlias +else: + from typing_extensions import TypeAlias as _TypeAlias DESCRIPTOR: _descriptor.FileDescriptor -class AudioLayout(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): - __slots__ = () - AUDIO_LAYOUT_UNSPECIFIED: _ClassVar[AudioLayout] - AUDIO_LAYOUT_STEREO: _ClassVar[AudioLayout] - AUDIO_LAYOUT_MONO: _ClassVar[AudioLayout] - -class AudioFormat(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): - __slots__ = () - AUDIO_FORMAT_UNSPECIFIED: _ClassVar[AudioFormat] - AUDIO_FORMAT_OPUS: _ClassVar[AudioFormat] - AUDIO_FORMAT_WAV: _ClassVar[AudioFormat] -AUDIO_LAYOUT_UNSPECIFIED: AudioLayout -AUDIO_LAYOUT_STEREO: AudioLayout -AUDIO_LAYOUT_MONO: AudioLayout -AUDIO_FORMAT_UNSPECIFIED: AudioFormat -AUDIO_FORMAT_OPUS: AudioFormat -AUDIO_FORMAT_WAV: AudioFormat +class _AudioLayout: + ValueType = _typing.NewType("ValueType", _builtins.int) + V: _TypeAlias = ValueType # noqa: Y015 + +class _AudioLayoutEnumTypeWrapper(_enum_type_wrapper._EnumTypeWrapper[_AudioLayout.ValueType], _builtins.type): + DESCRIPTOR: _descriptor.EnumDescriptor + AUDIO_LAYOUT_UNSPECIFIED: _AudioLayout.ValueType # 0 + AUDIO_LAYOUT_STEREO: _AudioLayout.ValueType # 1 + AUDIO_LAYOUT_MONO: _AudioLayout.ValueType # 2 + +class AudioLayout(_AudioLayout, metaclass=_AudioLayoutEnumTypeWrapper): ... + +AUDIO_LAYOUT_UNSPECIFIED: AudioLayout.ValueType # 0 +AUDIO_LAYOUT_STEREO: AudioLayout.ValueType # 1 +AUDIO_LAYOUT_MONO: AudioLayout.ValueType # 2 +Global___AudioLayout: _TypeAlias = AudioLayout # noqa: Y015 + +class _AudioFormat: + ValueType = _typing.NewType("ValueType", _builtins.int) + V: _TypeAlias = ValueType # noqa: Y015 + +class _AudioFormatEnumTypeWrapper(_enum_type_wrapper._EnumTypeWrapper[_AudioFormat.ValueType], _builtins.type): + DESCRIPTOR: _descriptor.EnumDescriptor + AUDIO_FORMAT_UNSPECIFIED: _AudioFormat.ValueType # 0 + AUDIO_FORMAT_OPUS: _AudioFormat.ValueType # 1 + AUDIO_FORMAT_WAV: _AudioFormat.ValueType # 2 +class AudioFormat(_AudioFormat, metaclass=_AudioFormatEnumTypeWrapper): ... + +AUDIO_FORMAT_UNSPECIFIED: AudioFormat.ValueType # 0 +AUDIO_FORMAT_OPUS: AudioFormat.ValueType # 1 +AUDIO_FORMAT_WAV: AudioFormat.ValueType # 2 +Global___AudioFormat: _TypeAlias = AudioFormat # noqa: Y015 + +class _ToolResultMode: + ValueType = _typing.NewType("ValueType", _builtins.int) + V: _TypeAlias = ValueType # noqa: Y015 + +class _ToolResultModeEnumTypeWrapper(_enum_type_wrapper._EnumTypeWrapper[_ToolResultMode.ValueType], _builtins.type): + DESCRIPTOR: _descriptor.EnumDescriptor + TOOL_RESULT_MODE_UNSPECIFIED: _ToolResultMode.ValueType # 0 + TOOL_RESULT_MODE_SUMMARIZE: _ToolResultMode.ValueType # 1 + """LLM summarize long tool output (uses global tool_summarizer gate).""" + TOOL_RESULT_MODE_TRUNCATE: _ToolResultMode.ValueType # 2 + """Deterministically truncate tool output to runtime hard cap.""" + TOOL_RESULT_MODE_NONE: _ToolResultMode.ValueType # 3 + """Keep raw tool output as-is (no summarize, no truncate).""" + +class ToolResultMode(_ToolResultMode, metaclass=_ToolResultModeEnumTypeWrapper): + """Per-tool output handling mode after execution.""" + +TOOL_RESULT_MODE_UNSPECIFIED: ToolResultMode.ValueType # 0 +TOOL_RESULT_MODE_SUMMARIZE: ToolResultMode.ValueType # 1 +"""LLM summarize long tool output (uses global tool_summarizer gate).""" +TOOL_RESULT_MODE_TRUNCATE: ToolResultMode.ValueType # 2 +"""Deterministically truncate tool output to runtime hard cap.""" +TOOL_RESULT_MODE_NONE: ToolResultMode.ValueType # 3 +"""Keep raw tool output as-is (no summarize, no truncate).""" +Global___ToolResultMode: _TypeAlias = ToolResultMode # noqa: Y015 + +@_typing.final class RecordingConfig(_message.Message): - __slots__ = ("enabled", "output_uri", "audio_layout", "sample_rate", "audio_format", "max_duration_secs", "save_transcript", "include_tool_details", "include_llm_metadata") - ENABLED_FIELD_NUMBER: _ClassVar[int] - OUTPUT_URI_FIELD_NUMBER: _ClassVar[int] - AUDIO_LAYOUT_FIELD_NUMBER: _ClassVar[int] - SAMPLE_RATE_FIELD_NUMBER: _ClassVar[int] - AUDIO_FORMAT_FIELD_NUMBER: _ClassVar[int] - MAX_DURATION_SECS_FIELD_NUMBER: _ClassVar[int] - SAVE_TRANSCRIPT_FIELD_NUMBER: _ClassVar[int] - INCLUDE_TOOL_DETAILS_FIELD_NUMBER: _ClassVar[int] - INCLUDE_LLM_METADATA_FIELD_NUMBER: _ClassVar[int] - enabled: bool - output_uri: str - audio_layout: AudioLayout - sample_rate: int - audio_format: AudioFormat - max_duration_secs: int - save_transcript: bool - include_tool_details: bool - include_llm_metadata: bool - def __init__(self, enabled: bool = ..., output_uri: _Optional[str] = ..., audio_layout: _Optional[_Union[AudioLayout, str]] = ..., sample_rate: _Optional[int] = ..., audio_format: _Optional[_Union[AudioFormat, str]] = ..., max_duration_secs: _Optional[int] = ..., save_transcript: bool = ..., include_tool_details: bool = ..., include_llm_metadata: bool = ...) -> None: ... + """Session recording configuration""" + + DESCRIPTOR: _descriptor.Descriptor + + ENABLED_FIELD_NUMBER: _builtins.int + OUTPUT_URI_FIELD_NUMBER: _builtins.int + AUDIO_LAYOUT_FIELD_NUMBER: _builtins.int + SAMPLE_RATE_FIELD_NUMBER: _builtins.int + AUDIO_FORMAT_FIELD_NUMBER: _builtins.int + MAX_DURATION_SECS_FIELD_NUMBER: _builtins.int + SAVE_TRANSCRIPT_FIELD_NUMBER: _builtins.int + INCLUDE_TOOL_DETAILS_FIELD_NUMBER: _builtins.int + INCLUDE_LLM_METADATA_FIELD_NUMBER: _builtins.int + enabled: _builtins.bool + output_uri: _builtins.str + audio_layout: Global___AudioLayout.ValueType + sample_rate: _builtins.int + audio_format: Global___AudioFormat.ValueType + max_duration_secs: _builtins.int + save_transcript: _builtins.bool + include_tool_details: _builtins.bool + include_llm_metadata: _builtins.bool + def __init__( + self, + *, + enabled: _builtins.bool = ..., + output_uri: _builtins.str = ..., + audio_layout: Global___AudioLayout.ValueType = ..., + sample_rate: _builtins.int = ..., + audio_format: Global___AudioFormat.ValueType = ..., + max_duration_secs: _builtins.int = ..., + save_transcript: _builtins.bool = ..., + include_tool_details: _builtins.bool = ..., + include_llm_metadata: _builtins.bool = ..., + ) -> None: ... + _ClearFieldArgType: _TypeAlias = _typing.Literal["audio_format", b"audio_format", "audio_layout", b"audio_layout", "enabled", b"enabled", "include_llm_metadata", b"include_llm_metadata", "include_tool_details", b"include_tool_details", "max_duration_secs", b"max_duration_secs", "output_uri", b"output_uri", "sample_rate", b"sample_rate", "save_transcript", b"save_transcript"] # noqa: Y015 + def ClearField(self, field_name: _ClearFieldArgType) -> None: ... +Global___RecordingConfig: _TypeAlias = RecordingConfig # noqa: Y015 + +@_typing.final class ParamDef(_message.Message): - __slots__ = ("name", "type", "description", "required", "options") - NAME_FIELD_NUMBER: _ClassVar[int] - TYPE_FIELD_NUMBER: _ClassVar[int] - DESCRIPTION_FIELD_NUMBER: _ClassVar[int] - REQUIRED_FIELD_NUMBER: _ClassVar[int] - OPTIONS_FIELD_NUMBER: _ClassVar[int] - name: str - type: str - description: str - required: bool - options: _containers.RepeatedScalarFieldContainer[str] - def __init__(self, name: _Optional[str] = ..., type: _Optional[str] = ..., description: _Optional[str] = ..., required: bool = ..., options: _Optional[_Iterable[str]] = ...) -> None: ... + """Tool parameter definition""" + + DESCRIPTOR: _descriptor.Descriptor + NAME_FIELD_NUMBER: _builtins.int + TYPE_FIELD_NUMBER: _builtins.int + DESCRIPTION_FIELD_NUMBER: _builtins.int + REQUIRED_FIELD_NUMBER: _builtins.int + OPTIONS_FIELD_NUMBER: _builtins.int + name: _builtins.str + type: _builtins.str + """e.g., "string", "integer", "boolean" """ + description: _builtins.str + required: _builtins.bool + @_builtins.property + def options(self) -> _containers.RepeatedScalarFieldContainer[_builtins.str]: + """Valid values for enum types""" + + def __init__( + self, + *, + name: _builtins.str = ..., + type: _builtins.str = ..., + description: _builtins.str = ..., + required: _builtins.bool = ..., + options: _abc.Iterable[_builtins.str] | None = ..., + ) -> None: ... + _ClearFieldArgType: _TypeAlias = _typing.Literal["description", b"description", "name", b"name", "options", b"options", "required", b"required", "type", b"type"] # noqa: Y015 + def ClearField(self, field_name: _ClearFieldArgType) -> None: ... + +Global___ParamDef: _TypeAlias = ParamDef # noqa: Y015 + +@_typing.final class ToolDef(_message.Message): - __slots__ = ("description", "script", "params", "cancel_on_barge_in", "side_effect") - DESCRIPTION_FIELD_NUMBER: _ClassVar[int] - SCRIPT_FIELD_NUMBER: _ClassVar[int] - PARAMS_FIELD_NUMBER: _ClassVar[int] - CANCEL_ON_BARGE_IN_FIELD_NUMBER: _ClassVar[int] - SIDE_EFFECT_FIELD_NUMBER: _ClassVar[int] - description: str - script: str - params: _containers.RepeatedCompositeFieldContainer[ParamDef] - cancel_on_barge_in: bool - side_effect: bool - def __init__(self, description: _Optional[str] = ..., script: _Optional[str] = ..., params: _Optional[_Iterable[_Union[ParamDef, _Mapping]]] = ..., cancel_on_barge_in: bool = ..., side_effect: bool = ...) -> None: ... + """A tool definition (always a JS script)""" + + DESCRIPTOR: _descriptor.Descriptor + + DESCRIPTION_FIELD_NUMBER: _builtins.int + SCRIPT_FIELD_NUMBER: _builtins.int + PARAMS_FIELD_NUMBER: _builtins.int + CANCEL_ON_BARGE_IN_FIELD_NUMBER: _builtins.int + SIDE_EFFECT_FIELD_NUMBER: _builtins.int + RESULT_MODE_FIELD_NUMBER: _builtins.int + description: _builtins.str + script: _builtins.str + cancel_on_barge_in: _builtins.bool + """If true (default), barge-in drops the tool result""" + side_effect: _builtins.bool + """If true, this tool has side effects""" + result_mode: Global___ToolResultMode.ValueType + """Optional post-tool output handling mode. + If UNSPECIFIED, runtime falls back to global config behavior. + """ + @_builtins.property + def params(self) -> _containers.RepeatedCompositeFieldContainer[Global___ParamDef]: ... + def __init__( + self, + *, + description: _builtins.str = ..., + script: _builtins.str = ..., + params: _abc.Iterable[Global___ParamDef] | None = ..., + cancel_on_barge_in: _builtins.bool = ..., + side_effect: _builtins.bool = ..., + result_mode: Global___ToolResultMode.ValueType = ..., + ) -> None: ... + _ClearFieldArgType: _TypeAlias = _typing.Literal["cancel_on_barge_in", b"cancel_on_barge_in", "description", b"description", "params", b"params", "result_mode", b"result_mode", "script", b"script", "side_effect", b"side_effect"] # noqa: Y015 + def ClearField(self, field_name: _ClearFieldArgType) -> None: ... + +Global___ToolDef: _TypeAlias = ToolDef # noqa: Y015 +@_typing.final class NodeDef(_message.Message): - __slots__ = ("system_prompt", "tools", "edges", "model", "temperature", "max_tokens", "voice_id", "greeting") - SYSTEM_PROMPT_FIELD_NUMBER: _ClassVar[int] - TOOLS_FIELD_NUMBER: _ClassVar[int] - EDGES_FIELD_NUMBER: _ClassVar[int] - MODEL_FIELD_NUMBER: _ClassVar[int] - TEMPERATURE_FIELD_NUMBER: _ClassVar[int] - MAX_TOKENS_FIELD_NUMBER: _ClassVar[int] - VOICE_ID_FIELD_NUMBER: _ClassVar[int] - GREETING_FIELD_NUMBER: _ClassVar[int] - system_prompt: str - tools: _containers.RepeatedScalarFieldContainer[str] - edges: _containers.RepeatedScalarFieldContainer[str] - model: str - temperature: float - max_tokens: int - voice_id: str - greeting: str - def __init__(self, system_prompt: _Optional[str] = ..., tools: _Optional[_Iterable[str]] = ..., edges: _Optional[_Iterable[str]] = ..., model: _Optional[str] = ..., temperature: _Optional[float] = ..., max_tokens: _Optional[int] = ..., voice_id: _Optional[str] = ..., greeting: _Optional[str] = ...) -> None: ... + """A single node in the graph""" + + DESCRIPTOR: _descriptor.Descriptor + + SYSTEM_PROMPT_FIELD_NUMBER: _builtins.int + TOOLS_FIELD_NUMBER: _builtins.int + EDGES_FIELD_NUMBER: _builtins.int + MODEL_FIELD_NUMBER: _builtins.int + TEMPERATURE_FIELD_NUMBER: _builtins.int + MAX_TOKENS_FIELD_NUMBER: _builtins.int + VOICE_ID_FIELD_NUMBER: _builtins.int + GREETING_FIELD_NUMBER: _builtins.int + system_prompt: _builtins.str + model: _builtins.str + temperature: _builtins.float + max_tokens: _builtins.int + voice_id: _builtins.str + """TTS voice override""" + greeting: _builtins.str + @_builtins.property + def tools(self) -> _containers.RepeatedScalarFieldContainer[_builtins.str]: + """keys referencing AgentGraphDef.tools""" + + @_builtins.property + def edges(self) -> _containers.RepeatedScalarFieldContainer[_builtins.str]: + """node IDs this node can transfer to""" + + def __init__( + self, + *, + system_prompt: _builtins.str = ..., + tools: _abc.Iterable[_builtins.str] | None = ..., + edges: _abc.Iterable[_builtins.str] | None = ..., + model: _builtins.str | None = ..., + temperature: _builtins.float | None = ..., + max_tokens: _builtins.int | None = ..., + voice_id: _builtins.str | None = ..., + greeting: _builtins.str | None = ..., + ) -> None: ... + _HasFieldArgType: _TypeAlias = _typing.Literal["_greeting", b"_greeting", "_max_tokens", b"_max_tokens", "_model", b"_model", "_temperature", b"_temperature", "_voice_id", b"_voice_id", "greeting", b"greeting", "max_tokens", b"max_tokens", "model", b"model", "temperature", b"temperature", "voice_id", b"voice_id"] # noqa: Y015 + def HasField(self, field_name: _HasFieldArgType) -> _builtins.bool: ... + _ClearFieldArgType: _TypeAlias = _typing.Literal["_greeting", b"_greeting", "_max_tokens", b"_max_tokens", "_model", b"_model", "_temperature", b"_temperature", "_voice_id", b"_voice_id", "edges", b"edges", "greeting", b"greeting", "max_tokens", b"max_tokens", "model", b"model", "system_prompt", b"system_prompt", "temperature", b"temperature", "tools", b"tools", "voice_id", b"voice_id"] # noqa: Y015 + def ClearField(self, field_name: _ClearFieldArgType) -> None: ... + _WhichOneofReturnType__greeting: _TypeAlias = _typing.Literal["greeting"] # noqa: Y015 + _WhichOneofArgType__greeting: _TypeAlias = _typing.Literal["_greeting", b"_greeting"] # noqa: Y015 + _WhichOneofReturnType__max_tokens: _TypeAlias = _typing.Literal["max_tokens"] # noqa: Y015 + _WhichOneofArgType__max_tokens: _TypeAlias = _typing.Literal["_max_tokens", b"_max_tokens"] # noqa: Y015 + _WhichOneofReturnType__model: _TypeAlias = _typing.Literal["model"] # noqa: Y015 + _WhichOneofArgType__model: _TypeAlias = _typing.Literal["_model", b"_model"] # noqa: Y015 + _WhichOneofReturnType__temperature: _TypeAlias = _typing.Literal["temperature"] # noqa: Y015 + _WhichOneofArgType__temperature: _TypeAlias = _typing.Literal["_temperature", b"_temperature"] # noqa: Y015 + _WhichOneofReturnType__voice_id: _TypeAlias = _typing.Literal["voice_id"] # noqa: Y015 + _WhichOneofArgType__voice_id: _TypeAlias = _typing.Literal["_voice_id", b"_voice_id"] # noqa: Y015 + @_typing.overload + def WhichOneof(self, oneof_group: _WhichOneofArgType__greeting) -> _WhichOneofReturnType__greeting | None: ... + @_typing.overload + def WhichOneof(self, oneof_group: _WhichOneofArgType__max_tokens) -> _WhichOneofReturnType__max_tokens | None: ... + @_typing.overload + def WhichOneof(self, oneof_group: _WhichOneofArgType__model) -> _WhichOneofReturnType__model | None: ... + @_typing.overload + def WhichOneof(self, oneof_group: _WhichOneofArgType__temperature) -> _WhichOneofReturnType__temperature | None: ... + @_typing.overload + def WhichOneof(self, oneof_group: _WhichOneofArgType__voice_id) -> _WhichOneofReturnType__voice_id | None: ... + +Global___NodeDef: _TypeAlias = NodeDef # noqa: Y015 +@_typing.final class AgentGraphDef(_message.Message): - __slots__ = ("entry", "nodes", "tools", "language", "timezone", "voice_id", "tts_provider", "tts_model", "recording", "config_schema_version") + """A complete agent graph""" + + DESCRIPTOR: _descriptor.Descriptor + + @_typing.final class NodesEntry(_message.Message): - __slots__ = ("key", "value") - KEY_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - key: str - value: NodeDef - def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[NodeDef, _Mapping]] = ...) -> None: ... + DESCRIPTOR: _descriptor.Descriptor + + KEY_FIELD_NUMBER: _builtins.int + VALUE_FIELD_NUMBER: _builtins.int + key: _builtins.str + @_builtins.property + def value(self) -> Global___NodeDef: ... + def __init__( + self, + *, + key: _builtins.str = ..., + value: Global___NodeDef | None = ..., + ) -> None: ... + _HasFieldArgType: _TypeAlias = _typing.Literal["value", b"value"] # noqa: Y015 + def HasField(self, field_name: _HasFieldArgType) -> _builtins.bool: ... + _ClearFieldArgType: _TypeAlias = _typing.Literal["key", b"key", "value", b"value"] # noqa: Y015 + def ClearField(self, field_name: _ClearFieldArgType) -> None: ... + + @_typing.final class ToolsEntry(_message.Message): - __slots__ = ("key", "value") - KEY_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - key: str - value: ToolDef - def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[ToolDef, _Mapping]] = ...) -> None: ... - ENTRY_FIELD_NUMBER: _ClassVar[int] - NODES_FIELD_NUMBER: _ClassVar[int] - TOOLS_FIELD_NUMBER: _ClassVar[int] - LANGUAGE_FIELD_NUMBER: _ClassVar[int] - TIMEZONE_FIELD_NUMBER: _ClassVar[int] - VOICE_ID_FIELD_NUMBER: _ClassVar[int] - TTS_PROVIDER_FIELD_NUMBER: _ClassVar[int] - TTS_MODEL_FIELD_NUMBER: _ClassVar[int] - RECORDING_FIELD_NUMBER: _ClassVar[int] - CONFIG_SCHEMA_VERSION_FIELD_NUMBER: _ClassVar[int] - entry: str - nodes: _containers.MessageMap[str, NodeDef] - tools: _containers.MessageMap[str, ToolDef] - language: str - timezone: str - voice_id: str - tts_provider: str - tts_model: str - recording: RecordingConfig - config_schema_version: str - def __init__(self, entry: _Optional[str] = ..., nodes: _Optional[_Mapping[str, NodeDef]] = ..., tools: _Optional[_Mapping[str, ToolDef]] = ..., language: _Optional[str] = ..., timezone: _Optional[str] = ..., voice_id: _Optional[str] = ..., tts_provider: _Optional[str] = ..., tts_model: _Optional[str] = ..., recording: _Optional[_Union[RecordingConfig, _Mapping]] = ..., config_schema_version: _Optional[str] = ...) -> None: ... + DESCRIPTOR: _descriptor.Descriptor + + KEY_FIELD_NUMBER: _builtins.int + VALUE_FIELD_NUMBER: _builtins.int + key: _builtins.str + @_builtins.property + def value(self) -> Global___ToolDef: ... + def __init__( + self, + *, + key: _builtins.str = ..., + value: Global___ToolDef | None = ..., + ) -> None: ... + _HasFieldArgType: _TypeAlias = _typing.Literal["value", b"value"] # noqa: Y015 + def HasField(self, field_name: _HasFieldArgType) -> _builtins.bool: ... + _ClearFieldArgType: _TypeAlias = _typing.Literal["key", b"key", "value", b"value"] # noqa: Y015 + def ClearField(self, field_name: _ClearFieldArgType) -> None: ... + + ENTRY_FIELD_NUMBER: _builtins.int + NODES_FIELD_NUMBER: _builtins.int + TOOLS_FIELD_NUMBER: _builtins.int + LANGUAGE_FIELD_NUMBER: _builtins.int + TIMEZONE_FIELD_NUMBER: _builtins.int + VOICE_ID_FIELD_NUMBER: _builtins.int + TTS_PROVIDER_FIELD_NUMBER: _builtins.int + TTS_MODEL_FIELD_NUMBER: _builtins.int + RECORDING_FIELD_NUMBER: _builtins.int + CONFIG_SCHEMA_VERSION_FIELD_NUMBER: _builtins.int + GEMINI_LIVE_MODEL_FIELD_NUMBER: _builtins.int + entry: _builtins.str + """The ID of the node to start with""" + language: _builtins.str + """-- Agent-wide settings -- + ISO 639-1 + """ + timezone: _builtins.str + """IANA timezone""" + voice_id: _builtins.str + """Default TTS voice ID""" + tts_provider: _builtins.str + """e.g. "elevenlabs" """ + tts_model: _builtins.str + """e.g. "eleven_turbo_v2" """ + config_schema_version: _builtins.str + """Envelope field for versions (e.g. "v3_graph")""" + gemini_live_model: _builtins.str + """-- Native multimodal (Gemini Live) -- + When set, the session bypasses STT/LLM/TTS and uses Gemini Live's native + bidirectional audio-to-audio WebSocket for the entire conversation. + """ + @_builtins.property + def nodes(self) -> _containers.MessageMap[_builtins.str, Global___NodeDef]: + """All nodes keyed by ID""" + + @_builtins.property + def tools(self) -> _containers.MessageMap[_builtins.str, Global___ToolDef]: + """All tool definitions keyed by tool name""" + + @_builtins.property + def recording(self) -> Global___RecordingConfig: + """Session recording configuration""" + + def __init__( + self, + *, + entry: _builtins.str = ..., + nodes: _abc.Mapping[_builtins.str, Global___NodeDef] | None = ..., + tools: _abc.Mapping[_builtins.str, Global___ToolDef] | None = ..., + language: _builtins.str | None = ..., + timezone: _builtins.str | None = ..., + voice_id: _builtins.str | None = ..., + tts_provider: _builtins.str | None = ..., + tts_model: _builtins.str | None = ..., + recording: Global___RecordingConfig | None = ..., + config_schema_version: _builtins.str | None = ..., + gemini_live_model: _builtins.str | None = ..., + ) -> None: ... + _HasFieldArgType: _TypeAlias = _typing.Literal["_config_schema_version", b"_config_schema_version", "_gemini_live_model", b"_gemini_live_model", "_language", b"_language", "_recording", b"_recording", "_timezone", b"_timezone", "_tts_model", b"_tts_model", "_tts_provider", b"_tts_provider", "_voice_id", b"_voice_id", "config_schema_version", b"config_schema_version", "gemini_live_model", b"gemini_live_model", "language", b"language", "recording", b"recording", "timezone", b"timezone", "tts_model", b"tts_model", "tts_provider", b"tts_provider", "voice_id", b"voice_id"] # noqa: Y015 + def HasField(self, field_name: _HasFieldArgType) -> _builtins.bool: ... + _ClearFieldArgType: _TypeAlias = _typing.Literal["_config_schema_version", b"_config_schema_version", "_gemini_live_model", b"_gemini_live_model", "_language", b"_language", "_recording", b"_recording", "_timezone", b"_timezone", "_tts_model", b"_tts_model", "_tts_provider", b"_tts_provider", "_voice_id", b"_voice_id", "config_schema_version", b"config_schema_version", "entry", b"entry", "gemini_live_model", b"gemini_live_model", "language", b"language", "nodes", b"nodes", "recording", b"recording", "timezone", b"timezone", "tools", b"tools", "tts_model", b"tts_model", "tts_provider", b"tts_provider", "voice_id", b"voice_id"] # noqa: Y015 + def ClearField(self, field_name: _ClearFieldArgType) -> None: ... + _WhichOneofReturnType__config_schema_version: _TypeAlias = _typing.Literal["config_schema_version"] # noqa: Y015 + _WhichOneofArgType__config_schema_version: _TypeAlias = _typing.Literal["_config_schema_version", b"_config_schema_version"] # noqa: Y015 + _WhichOneofReturnType__gemini_live_model: _TypeAlias = _typing.Literal["gemini_live_model"] # noqa: Y015 + _WhichOneofArgType__gemini_live_model: _TypeAlias = _typing.Literal["_gemini_live_model", b"_gemini_live_model"] # noqa: Y015 + _WhichOneofReturnType__language: _TypeAlias = _typing.Literal["language"] # noqa: Y015 + _WhichOneofArgType__language: _TypeAlias = _typing.Literal["_language", b"_language"] # noqa: Y015 + _WhichOneofReturnType__recording: _TypeAlias = _typing.Literal["recording"] # noqa: Y015 + _WhichOneofArgType__recording: _TypeAlias = _typing.Literal["_recording", b"_recording"] # noqa: Y015 + _WhichOneofReturnType__timezone: _TypeAlias = _typing.Literal["timezone"] # noqa: Y015 + _WhichOneofArgType__timezone: _TypeAlias = _typing.Literal["_timezone", b"_timezone"] # noqa: Y015 + _WhichOneofReturnType__tts_model: _TypeAlias = _typing.Literal["tts_model"] # noqa: Y015 + _WhichOneofArgType__tts_model: _TypeAlias = _typing.Literal["_tts_model", b"_tts_model"] # noqa: Y015 + _WhichOneofReturnType__tts_provider: _TypeAlias = _typing.Literal["tts_provider"] # noqa: Y015 + _WhichOneofArgType__tts_provider: _TypeAlias = _typing.Literal["_tts_provider", b"_tts_provider"] # noqa: Y015 + _WhichOneofReturnType__voice_id: _TypeAlias = _typing.Literal["voice_id"] # noqa: Y015 + _WhichOneofArgType__voice_id: _TypeAlias = _typing.Literal["_voice_id", b"_voice_id"] # noqa: Y015 + @_typing.overload + def WhichOneof(self, oneof_group: _WhichOneofArgType__config_schema_version) -> _WhichOneofReturnType__config_schema_version | None: ... + @_typing.overload + def WhichOneof(self, oneof_group: _WhichOneofArgType__gemini_live_model) -> _WhichOneofReturnType__gemini_live_model | None: ... + @_typing.overload + def WhichOneof(self, oneof_group: _WhichOneofArgType__language) -> _WhichOneofReturnType__language | None: ... + @_typing.overload + def WhichOneof(self, oneof_group: _WhichOneofArgType__recording) -> _WhichOneofReturnType__recording | None: ... + @_typing.overload + def WhichOneof(self, oneof_group: _WhichOneofArgType__timezone) -> _WhichOneofReturnType__timezone | None: ... + @_typing.overload + def WhichOneof(self, oneof_group: _WhichOneofArgType__tts_model) -> _WhichOneofReturnType__tts_model | None: ... + @_typing.overload + def WhichOneof(self, oneof_group: _WhichOneofArgType__tts_provider) -> _WhichOneofReturnType__tts_provider | None: ... + @_typing.overload + def WhichOneof(self, oneof_group: _WhichOneofArgType__voice_id) -> _WhichOneofReturnType__voice_id | None: ... + +Global___AgentGraphDef: _TypeAlias = AgentGraphDef # noqa: Y015 diff --git a/studio/web/public/schemas/agent-config-v1.schema.json b/studio/web/public/schemas/agent-config-v1.schema.json index abfc4c0..1699657 100644 --- a/studio/web/public/schemas/agent-config-v1.schema.json +++ b/studio/web/public/schemas/agent-config-v1.schema.json @@ -25,7 +25,106 @@ "config": { "type": "object", "description": "Agent runtime config (v3_graph).", - "additionalProperties": true + "additionalProperties": true, + "properties": { + "entry": { + "type": "string", + "minLength": 1 + }, + "nodes": { + "type": "object", + "additionalProperties": { + "type": "object", + "additionalProperties": true, + "properties": { + "system_prompt": { + "type": "string" + }, + "greeting": { + "type": "string" + }, + "tools": { + "type": "array", + "items": { + "type": "string" + } + }, + "edges": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, + "tools": { + "type": "object", + "additionalProperties": { + "type": "object", + "additionalProperties": true, + "properties": { + "description": { + "type": "string" + }, + "script": { + "type": "string" + }, + "params": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true, + "properties": { + "name": { + "type": "string" + }, + "type": { + "type": "string" + }, + "description": { + "type": "string" + }, + "required": { + "type": "boolean" + }, + "options": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, + "cancel_on_barge_in": { + "type": "boolean" + }, + "side_effect": { + "type": "boolean" + }, + "result_mode": { + "description": "Optional per-tool post-processing mode for tool results.", + "oneOf": [ + { + "type": "integer", + "enum": [0, 1, 2, 3] + }, + { + "type": "string", + "enum": [ + "TOOL_RESULT_MODE_UNSPECIFIED", + "TOOL_RESULT_MODE_SUMMARIZE", + "TOOL_RESULT_MODE_TRUNCATE", + "TOOL_RESULT_MODE_NONE" + ] + } + ] + } + } + } + } + } }, "mermaid_diagram": { "type": ["string", "null"] diff --git a/studio/web/src/components/agent/agent-config-editor.tsx b/studio/web/src/components/agent/agent-config-editor.tsx index 6793c95..9bcd89e 100644 --- a/studio/web/src/components/agent/agent-config-editor.tsx +++ b/studio/web/src/components/agent/agent-config-editor.tsx @@ -40,6 +40,12 @@ import { cn } from "@/lib/utils"; import ReactMarkdown from "react-markdown"; import remarkGfm from "remark-gfm"; import { Dialog, DialogContent, DialogTrigger } from "@/components/ui/dialog"; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, +} from "@/components/ui/dropdown-menu"; import ConfigViewer from "@/components/agent/config-viewer"; import ConfigDiff from "@/components/agent/config-diff"; import ShikiCodeBlock from "@/components/ui/shiki-code-block"; @@ -180,6 +186,7 @@ interface ConfigTool { description?: string; side_effect?: boolean; script?: string; + result_mode?: number | string; } interface FullConfig extends Record { @@ -188,6 +195,22 @@ interface FullConfig extends Record { tools?: Record; } +type ToolResultModeUi = "auto" | "summary" | "truncate" | "full"; + +function modeToUi(mode: unknown): ToolResultModeUi { + if (mode === 1 || mode === "TOOL_RESULT_MODE_SUMMARIZE") return "summary"; + if (mode === 2 || mode === "TOOL_RESULT_MODE_TRUNCATE") return "truncate"; + if (mode === 3 || mode === "TOOL_RESULT_MODE_NONE") return "full"; + return "auto"; +} + +function uiToMode(ui: ToolResultModeUi): number | null { + if (ui === "summary") return 1; + if (ui === "truncate") return 2; + if (ui === "full") return 3; + return null; +} + // ── Component ──────────────────────────────────────────────────── interface AgentConfigEditorProps { @@ -288,9 +311,10 @@ export default function AgentConfigEditor({ }, [fields?.language, fields?.timezone, fields?.voice_id, fields?.gemini_live_model]); const patchField = useCallback( - async (payload: Record) => { + async (payload: Record) => { const primaryField = Object.keys(payload).find((k) => k !== "regenerate_greeting"); - if (!primaryField && !payload.regenerate_greeting) return; + const shouldRegen = Boolean(payload["regenerate_greeting"]); + if (!primaryField && !shouldRegen) return; const trackField = primaryField ?? "language"; const showGlobalSyncToast = !["voice_id", "language", "timezone"].includes(trackField); @@ -912,15 +936,60 @@ export default function AgentConfigEditor({ -
- - Audit -

{tool.description || "—"}

+ +
+ + + + + e.stopPropagation()} + > + {(["auto", "summary", "truncate", "full"] as const).map( + (mode) => ( + + patchField({ + tool_result_modes: { [id]: uiToMode(mode) }, + }) + } + > + {mode.replace(/^./, (c) => c.toUpperCase())} + + ) + )} + + + +
+ + View Code +
+
diff --git a/studio/web/src/lib/api/agent.ts b/studio/web/src/lib/api/agent.ts index f569a2b..9258e4f 100644 --- a/studio/web/src/lib/api/agent.ts +++ b/studio/web/src/lib/api/agent.ts @@ -20,6 +20,14 @@ export enum AudioFormat { UNRECOGNIZED = -1, } +export enum ToolResultMode { + TOOL_RESULT_MODE_UNSPECIFIED = 0, + TOOL_RESULT_MODE_SUMMARIZE = 1, + TOOL_RESULT_MODE_TRUNCATE = 2, + TOOL_RESULT_MODE_NONE = 3, + UNRECOGNIZED = -1, +} + /** Session recording configuration */ export interface RecordingConfig { enabled: boolean; @@ -53,6 +61,8 @@ export interface ToolDef { cancel_on_barge_in: boolean; /** If true, this tool has side effects */ side_effect: boolean; + /** Optional post-tool output handling mode */ + result_mode: ToolResultMode; } /** A single node in the graph */ diff --git a/studio/web/src/lib/api/client.ts b/studio/web/src/lib/api/client.ts index 1031ea2..2574d32 100644 --- a/studio/web/src/lib/api/client.ts +++ b/studio/web/src/lib/api/client.ts @@ -593,6 +593,7 @@ export const api = { voice_id?: string; tts_provider?: string; tts_model?: string; + tool_result_modes?: Record; regenerate_greeting?: boolean; } ) => diff --git a/voice/engine/crates/agent-kit/src/agent_backends/default.rs b/voice/engine/crates/agent-kit/src/agent_backends/default.rs index 7316cab..bace4c6 100644 --- a/voice/engine/crates/agent-kit/src/agent_backends/default.rs +++ b/voice/engine/crates/agent-kit/src/agent_backends/default.rs @@ -13,17 +13,19 @@ use tokio::sync::mpsc; use tracing::{info, warn}; use uuid::Uuid; +use crate::agent_backends::ChatMessage; use crate::agent_backends::{AgentBackend, AgentBackendConfig, AgentEvent, ToolInterceptor}; use crate::context_summarizer::{trim_history, ContextSummarizationConfig, ContextSummarizer}; use crate::micro_tasks; use crate::providers::{LlmCallConfig, LlmProvider, LlmProviderError}; +use crate::providers::{LlmEvent as InnerLlmEvent, ToolCallEvent}; use crate::swarm::{ build_node_tool_schemas, make_artifact_tool_schemas, make_hang_up_tool_schema, make_on_hold_tool_schema, AgentGraphDef, SwarmState, HANG_UP_TOOL_NAME, ON_HOLD_TOOL_NAME, }; -use crate::tool_executor::{spawn_tool_task, ToolTaskResult}; -use crate::agent_backends::ChatMessage; -use crate::providers::{LlmEvent as InnerLlmEvent, ToolCallEvent}; +use crate::tool_executor::{ + resolve_tool_post_process_mode, spawn_tool_task, ToolPostProcessMode, ToolTaskResult, +}; use crate::ScriptEngine; // ── Runtime system prompt suffix ──────────────────────────────── @@ -219,9 +221,6 @@ pub struct DefaultAgentBackend { /// Optional interceptor for intercepting tool calls (testing, observability). interceptor: Option>, - /// Optional async summarizer for tool results before feeding to LLM. - tool_result_transformer: Option, - // ── Context summarization ── /// Optional context summarizer for background conversation compression. context_summarizer: Option, @@ -305,16 +304,6 @@ impl DefaultAgentBackend { // Flags are set by the calling binary (voice-engine reads env vars via // envy and populates AgentBackendConfig directly). - let tool_transformer: Option = if config.tool_summarizer - { - Some(micro_tasks::ToolResultSummarizer::new( - Arc::clone(&provider), - 500, // min chars before summarization kicks in - )) - } else { - None - }; - let ctx_summarizer: Option = if config.context_summarizer { Some(ContextSummarizer::new(Arc::clone(&provider))) } else { @@ -325,7 +314,6 @@ impl DefaultAgentBackend { provider, script_engine, interceptor: None, - tool_result_transformer: tool_transformer, context_summarizer: ctx_summarizer, context_summarization_config: ContextSummarizationConfig::default(), filler_task: None, @@ -566,7 +554,25 @@ impl DefaultAgentBackend { /// If a `ToolInterceptor` is set, it is consulted before and after execution: /// - `before_tool_call` can return `Stub(result)` to skip execution entirely. /// - `after_tool_call` can return `Override(result)` to replace the real result. - fn spawn_tool(&mut self, call_id: String, name: String, args: String, side_effect: bool) { + fn spawn_tool( + &mut self, + call_id: String, + name: String, + args: String, + side_effect: bool, + post_process_mode: ToolPostProcessMode, + ) { + // Some streaming providers can emit duplicate tool-call events for the same + // call_id (e.g. retry/delta edge cases). Guard against double-counting, which + // would leave `tools_remaining` stuck > 0 forever. + if self.pending_tool_info.contains_key(&call_id) { + warn!( + "[agent_backend] duplicate tool call id ignored: {} ({})", + call_id, name + ); + return; + } + // Spawn filler generator for side-effecting tools only when enabled. // We ensure only one filler task runs per wait-batch by checking `.is_none()`. if side_effect && self.config.tool_filler && self.filler_task.is_none() { @@ -589,16 +595,27 @@ impl DefaultAgentBackend { })); } + let before = self.tools_remaining; self.pending_tool_info.insert(call_id.clone(), name.clone()); self.tools_remaining += 1; + info!( + "[agent_backend] spawn_tool: id={} name={} tools_remaining {}->{} pending_info={}", + call_id, + name, + before, + self.tools_remaining, + self.pending_tool_info.len() + ); spawn_tool_task( call_id, name, args, side_effect, + post_process_mode, self.script_engine.clone(), self.interceptor.clone(), + Some(Arc::clone(&self.provider)), self.tool_result_tx.clone(), ); } @@ -621,8 +638,14 @@ impl DefaultAgentBackend { .and_then(|v| v.get("reason").and_then(|r| r.as_str()).map(String::from)) .unwrap_or_else(|| "agent_initiated".to_string()); - info!("[agent_backend] hang_up deferred (tools_remaining={}): {}", self.tools_remaining, reason); - self.pending_hang_up = Some(PendingHangUp { reason, content: None }); + info!( + "[agent_backend] hang_up deferred (tools_remaining={}): {}", + self.tools_remaining, reason + ); + self.pending_hang_up = Some(PendingHangUp { + reason, + content: None, + }); // Do NOT touch llm_event_rx, pending_tokens, or phase here. // The stream continues; hang_up is resolved at stream-end. } @@ -715,12 +738,26 @@ impl DefaultAgentBackend { ..tc.clone() }); - let side_effect = self + let (side_effect, post_process_mode) = self .swarm .as_ref() .and_then(|s| s.graph.tools.get(&tc.name)) - .map(|t| t.side_effect) - .unwrap_or(false); + .map(|t| { + ( + t.side_effect, + resolve_tool_post_process_mode( + self.config.tool_summarizer, + t.result_mode, + ), + ) + }) + .unwrap_or(( + false, + resolve_tool_post_process_mode( + self.config.tool_summarizer, + crate::swarm::ToolResultMode::Unspecified as i32, + ), + )); if side_effect { tracing::debug!("[agent_backend] Tool '{}' marked as side-effect", tc.name); @@ -733,6 +770,7 @@ impl DefaultAgentBackend { tc.name.clone(), tc.arguments.clone(), side_effect, + post_process_mode, ); return Some(AgentEvent::ToolCallStarted { @@ -763,8 +801,13 @@ impl DefaultAgentBackend { self.phase = Phase::Idle; // Pending hang_up takes priority over Finished. if let Some(ph) = self.pending_hang_up.take() { - info!("[agent_backend] hang_up resolved at stream-end (no pending tools)"); - return Some(AgentEvent::HangUp { reason: ph.reason, content: ph.content }); + info!( + "[agent_backend] hang_up resolved at stream-end (no pending tools)" + ); + return Some(AgentEvent::HangUp { + reason: ph.reason, + content: ph.content, + }); } // Normal turn completion. if let Some(ctx) = self.context_summarizer.as_mut() { @@ -784,6 +827,10 @@ impl DefaultAgentBackend { } async fn handle_waiting_for_tools_phase(&mut self) -> Option { + tracing::debug!( + "[agent_backend] WaitingForTools: pending={} waiting for next tool result", + self.tools_remaining + ); let rx = &mut self.tool_result_rx; let result = if let Some(mut filler_task) = self.filler_task.take() { tokio::select! { @@ -806,15 +853,26 @@ impl DefaultAgentBackend { rx.recv().await? }; - // Remove from pending info - self.pending_tool_info.remove(&result.call_id); + info!( + "[agent_backend] WaitingForTools: received tool result id={} name={} success={}", + result.call_id, result.name, result.success + ); - // Apply tool-result summarization if enabled - let content = if let Some(ref transformer) = self.tool_result_transformer { - transformer.transform(&result.name, &result.result).await - } else { - result.result - }; + // Remove from pending info (best-effort). + let removed = self.pending_tool_info.remove(&result.call_id).is_some(); + if !removed { + warn!( + "[agent_backend] received tool result for unknown call_id={} (name={})", + result.call_id, result.name + ); + } + + // IMPORTANT: + // Do not await additional async work (e.g. tool summarizer) here. + // This phase runs inside a recv loop that can be cancelled/repolled by + // the reactor select loop; keeping this path await-free ensures + // `tools_remaining` accounting is atomic once a result is received. + let content = result.result.clone(); let error_msg = (!result.success).then(|| content.clone()); @@ -826,7 +884,23 @@ impl DefaultAgentBackend { tool_call_id: Some(result.call_id.clone()), }); - self.tools_remaining -= 1; + let before = self.tools_remaining; + if self.tools_remaining == 0 { + warn!( + "[agent_backend] tools_remaining already zero when result arrived: id={} name={}", + result.call_id, result.name + ); + } else { + self.tools_remaining -= 1; + } + info!( + "[agent_backend] tool_result_accounting: id={} removed={} tools_remaining {}->{} pending_info={}", + result.call_id, + removed, + before, + self.tools_remaining, + self.pending_tool_info.len() + ); let event = AgentEvent::ToolCallCompleted { id: result.call_id, @@ -888,7 +962,10 @@ impl AgentBackend for DefaultAgentBackend { let tz = self.timezone(); self.conversation = vec![ChatMessage { role: "system".to_string(), - content: Some(serde_json::Value::String(with_suffix(&prompt, tz.as_deref()))), + content: Some(serde_json::Value::String(with_suffix( + &prompt, + tz.as_deref(), + ))), tool_calls: None, tool_call_id: None, }]; @@ -914,6 +991,11 @@ impl AgentBackend for DefaultAgentBackend { } async fn start_turn(&mut self) -> Result<(), LlmProviderError> { + info!( + "[agent_backend] start_turn: reset counters (prev tools_remaining={} pending_info={})", + self.tools_remaining, + self.pending_tool_info.len() + ); self.tool_rounds = 0; self.tools_remaining = 0; self.pending_tool_info.clear(); @@ -971,7 +1053,9 @@ impl AgentBackend for DefaultAgentBackend { for (call_id, _name) in std::mem::take(&mut self.pending_tool_info) { self.conversation.push(ChatMessage { role: "tool".to_string(), - content: Some(serde_json::Value::String("Tool execution was interrupted by the user.".to_string())), + content: Some(serde_json::Value::String( + "Tool execution was interrupted by the user.".to_string(), + )), tool_calls: None, tool_call_id: Some(call_id), }); @@ -1004,7 +1088,10 @@ impl AgentBackend for DefaultAgentBackend { if let Some(first_msg) = self.conversation.first_mut() { if first_msg.role == "system" { let tz = swarm.graph.timezone.as_deref(); - first_msg.content = Some(serde_json::Value::String(with_suffix(&node.system_prompt, tz))); + first_msg.content = Some(serde_json::Value::String(with_suffix( + &node.system_prompt, + tz, + ))); } } } diff --git a/voice/engine/crates/agent-kit/src/quickjs_engine.rs b/voice/engine/crates/agent-kit/src/quickjs_engine.rs index 00cac64..2931435 100644 --- a/voice/engine/crates/agent-kit/src/quickjs_engine.rs +++ b/voice/engine/crates/agent-kit/src/quickjs_engine.rs @@ -685,6 +685,7 @@ mod tests { params, cancel_on_barge_in: true, side_effect: false, + result_mode: crate::swarm::ToolResultMode::Unspecified as i32, } } diff --git a/voice/engine/crates/agent-kit/src/swarm.rs b/voice/engine/crates/agent-kit/src/swarm.rs index b03bab6..097ea79 100644 --- a/voice/engine/crates/agent-kit/src/swarm.rs +++ b/voice/engine/crates/agent-kit/src/swarm.rs @@ -17,7 +17,6 @@ use std::collections::HashMap; - use serde_json::json; // ── Graph Definition ──────────────────────────────────────────── @@ -48,7 +47,7 @@ use serde_json::json; // Re-export canonical recording type definitions from common. pub use common::{AudioFormat, AudioLayout, RecordingConfig}; -pub use proto::agent::{AgentGraphDef, NodeDef, ToolDef, ParamDef}; +pub use proto::agent::{AgentGraphDef, NodeDef, ParamDef, ToolDef, ToolResultMode}; // ── Runtime State ─────────────────────────────────────────────── @@ -460,8 +459,7 @@ mod tests { #[test] fn node_tool_schemas_include_transfer() { let graph = sample_graph(); - let schemas = - build_node_tool_schemas(&graph.nodes["receptionist"], &graph.tools); + let schemas = build_node_tool_schemas(&graph.nodes["receptionist"], &graph.tools); // Base tools + transfer_to + hang_up + on_hold + artifacts assert!( diff --git a/voice/engine/crates/agent-kit/src/tool_executor.rs b/voice/engine/crates/agent-kit/src/tool_executor.rs index 01e4de1..07bdd52 100644 --- a/voice/engine/crates/agent-kit/src/tool_executor.rs +++ b/voice/engine/crates/agent-kit/src/tool_executor.rs @@ -3,6 +3,9 @@ use tokio::sync::mpsc; use tracing::{info, warn}; use crate::agent_backends::{AfterToolCallAction, BeforeToolCallAction, ToolInterceptor}; +use crate::micro_tasks; +use crate::providers::LlmProvider; +use crate::swarm::ToolResultMode; use crate::ScriptEngine; // ── Types ─────────────────────────────────────────────────────── @@ -82,6 +85,47 @@ pub(super) struct ToolTaskResult { pub result: String, } +const TOOL_SUMMARY_MIN_LENGTH: usize = 500; +const TOOL_RESULT_HARD_CAP_CHARS: usize = 8000; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum ToolPostProcessMode { + Summarize, + Truncate, + None, +} + +pub(super) fn resolve_tool_post_process_mode( + global_summarizer_enabled: bool, + tool_result_mode: i32, +) -> ToolPostProcessMode { + match ToolResultMode::try_from(tool_result_mode).unwrap_or(ToolResultMode::Unspecified) { + ToolResultMode::Summarize => ToolPostProcessMode::Summarize, + ToolResultMode::Truncate => ToolPostProcessMode::Truncate, + ToolResultMode::None => ToolPostProcessMode::None, + ToolResultMode::Unspecified => { + if global_summarizer_enabled { + ToolPostProcessMode::Summarize + } else { + ToolPostProcessMode::Truncate + } + } + } +} + +fn cap_tool_result(result: &str, max_chars: usize) -> String { + let char_count = result.chars().count(); + if char_count <= max_chars { + return result.to_string(); + } + let truncated: String = result.chars().take(max_chars).collect(); + format!( + "{}\n\n[tool result truncated: {} chars omitted]", + truncated, + char_count.saturating_sub(max_chars) + ) +} + // ── Pipeline ──────────────────────────────────────────────────── /// Spawns a background task to execute a tool call, routing through hooks and engines. @@ -92,8 +136,10 @@ pub(super) fn spawn_tool_task( name: String, args: String, side_effect: bool, + post_process_mode: ToolPostProcessMode, script_engine_opt: Option>, interceptor_opt: Option>, + summary_provider_opt: Option>, tx: mpsc::UnboundedSender, ) { tokio::spawn(async move { @@ -182,7 +228,7 @@ pub(super) fn spawn_tool_task( }; // Execution Timeout limit (25 seconds) - let result = + let mut result = match tokio::time::timeout(std::time::Duration::from_secs(25), result_fut).await { Ok(r) => r, Err(_) => { @@ -192,6 +238,44 @@ pub(super) fn spawn_tool_task( } }; + if result.success { + if post_process_mode == ToolPostProcessMode::Summarize { + if let Some(provider) = summary_provider_opt.as_deref() { + result.result = micro_tasks::summarize_tool_result( + provider, + &task_name, + &result.result, + TOOL_SUMMARY_MIN_LENGTH, + ) + .await; + } + } + + if post_process_mode != ToolPostProcessMode::None { + let capped = cap_tool_result(&result.result, TOOL_RESULT_HARD_CAP_CHARS); + if capped.len() != result.result.len() { + info!( + tool.name = %task_name, + tool.call_id = %call_id, + tool.before_chars = result.result.len(), + tool.after_chars = capped.len(), + tool.post_process_mode = ?post_process_mode, + "[agent_backend] Tool result capped before enqueue" + ); + } + result.result = capped; + } + } + + info!( + tool.name = %task_name, + tool.call_id = %call_id, + tool.side_effect = side_effect, + tool.success = result.success, + tool.result_chars = result.result.len(), + "[agent_backend] Tool task finished; sending result to backend channel" + ); + if tx .send(ToolTaskResult { call_id: call_id.clone(), @@ -209,6 +293,12 @@ pub(super) fn spawn_tool_task( tool.result_chars = result.result.len(), "[agent_backend] Tool completed after session ended (result orphaned)" ); + } else { + info!( + tool.name = %task_name, + tool.call_id = %call_id, + "[agent_backend] Tool result sent to backend channel" + ); } }); } @@ -293,9 +383,26 @@ mod tests { #[test] fn classify_whitespace_trimmed_before_parsing() { - let out = - ToolOutcome::classify_script_result(" {\"result\": \"trimmed\"} ".to_string()); + let out = ToolOutcome::classify_script_result(" {\"result\": \"trimmed\"} ".to_string()); assert!(out.success); assert_eq!(out.result, "trimmed"); } + + #[test] + fn resolve_mode_defaults_to_summarize_when_global_enabled() { + let mode = resolve_tool_post_process_mode(true, ToolResultMode::Unspecified as i32); + assert_eq!(mode, ToolPostProcessMode::Summarize); + } + + #[test] + fn resolve_mode_defaults_to_truncate_when_global_disabled() { + let mode = resolve_tool_post_process_mode(false, ToolResultMode::Unspecified as i32); + assert_eq!(mode, ToolPostProcessMode::Truncate); + } + + #[test] + fn resolve_mode_explicit_none_wins_over_global() { + let mode = resolve_tool_post_process_mode(true, ToolResultMode::None as i32); + assert_eq!(mode, ToolPostProcessMode::None); + } } From 98e3b6cd864402863a3d39bdc105f2c5005a5b9a Mon Sep 17 00:00:00 2001 From: Andy Deng Date: Sat, 18 Apr 2026 00:00:10 +0800 Subject: [PATCH 2/4] fix(voice-engine): prevent voice tool-call stalls in reactor and micro tasks --- studio/api/app/api/agents.py | 9 +- .../crates/agent-kit/src/micro_tasks.rs | 147 ++++++++++-------- .../crates/agent-kit/src/tool_executor.rs | 6 +- voice/engine/src/reactor/mod.rs | 37 +++-- 4 files changed, 116 insertions(+), 83 deletions(-) diff --git a/studio/api/app/api/agents.py b/studio/api/app/api/agents.py index 6deb487..7648eca 100644 --- a/studio/api/app/api/agents.py +++ b/studio/api/app/api/agents.py @@ -812,9 +812,12 @@ async def _inject_config_change_event( ) changes.append(f"conversation mode set to {mode}") if tool_result_modes: - mode_label = {1: "summary", 2: "truncate", 3: "full"} - for tool_id, mode in tool_result_modes.items(): - label = "auto" if mode is None else mode_label.get(mode, str(mode)) + mode_label: dict[int, str] = {1: "summary", 2: "truncate", 3: "full"} + for tool_id, result_mode in tool_result_modes.items(): + if result_mode is None: + label = "auto" + else: + label = mode_label.get(result_mode, str(result_mode)) changes.append(f"{tool_id} result_mode set to {label}") if not changes: diff --git a/voice/engine/crates/agent-kit/src/micro_tasks.rs b/voice/engine/crates/agent-kit/src/micro_tasks.rs index 87358f6..288b263 100644 --- a/voice/engine/crates/agent-kit/src/micro_tasks.rs +++ b/voice/engine/crates/agent-kit/src/micro_tasks.rs @@ -1,16 +1,14 @@ //! Internal LLM micro-call tasks managed directly by `DefaultAgentBackend`. //! //! These tasks run in the background to augment agentic logic: -//! bridging silence with filler words, and compressing tool logs. +//! bridging silence with filler words and summarizing tool output. //! They are fully internal — no public traits, no external customization points. -use std::sync::Arc; +use std::time::Duration; use tracing::{info, warn}; -use crate::providers::{collect_text, LlmCallConfig, LlmProvider}; use crate::agent_backends::ChatMessage; - -// ── Tool Summarizer ───────────────────────────────────────────────── +use crate::providers::{collect_text, LlmCallConfig, LlmProvider}; const TOOL_SUMMARY_PROMPT: &str = "\ You are a tool result summarizer for a voice assistant. Condense \ @@ -19,69 +17,76 @@ captures the key information the voice assistant needs to respond \ to the user. Keep only the facts that matter for the conversation.\n\n\ Output ONLY the summary. No explanation, no formatting."; -#[derive(Clone)] -pub(super) struct ToolResultSummarizer { - provider: Arc, - summary_min_length: usize, -} +const TOOL_SUMMARY_TIMEOUT: Duration = Duration::from_secs(8); +const TOOL_FILLER_TIMEOUT: Duration = Duration::from_secs(4); -impl ToolResultSummarizer { - pub(super) fn new(provider: Arc, summary_min_length: usize) -> Self { - Self { - provider, - summary_min_length, - } +pub(super) async fn summarize_tool_result( + provider: &dyn LlmProvider, + tool_name: &str, + raw_result: &str, + summary_min_length: usize, +) -> String { + if raw_result.len() < summary_min_length { + return raw_result.to_string(); } - pub(super) async fn transform(&self, tool_name: &str, raw_result: &str) -> String { - if raw_result.len() < self.summary_min_length { - return raw_result.to_string(); - } - let messages = vec![ - ChatMessage { - role: "system".to_string(), - content: Some(serde_json::Value::String(TOOL_SUMMARY_PROMPT.to_string())), - tool_calls: None, - tool_call_id: None, - }, - ChatMessage { - role: "user".to_string(), - content: Some(serde_json::Value::String(format!( - "Tool: {}\n\nRaw output:\n{}", - tool_name, raw_result - ))), - tool_calls: None, - tool_call_id: None, - }, - ]; - let config = LlmCallConfig { - temperature: 0.0, - max_tokens: 200, - model: None, - }; - match collect_text(&*self.provider, &messages, &config).await { - Ok(text) => { - let trimmed = text.trim().to_string(); - if trimmed.is_empty() { - raw_result.to_string() - } else { - info!( - "[agent_backend::helpers] Tool result summarized ({}): {} → {} chars", - tool_name, - raw_result.len(), - trimmed.len() - ); - trimmed - } - } - Err(e) => { - warn!( - "[agent_backend::helpers] Tool {} summarization failed: {} — using raw result", - tool_name, e - ); + let messages = vec![ + ChatMessage { + role: "system".to_string(), + content: Some(serde_json::Value::String(TOOL_SUMMARY_PROMPT.to_string())), + tool_calls: None, + tool_call_id: None, + }, + ChatMessage { + role: "user".to_string(), + content: Some(serde_json::Value::String(format!( + "Tool: {}\n\nRaw output:\n{}", + tool_name, raw_result + ))), + tool_calls: None, + tool_call_id: None, + }, + ]; + let config = LlmCallConfig { + temperature: 0.0, + max_tokens: 200, + model: None, + }; + + match tokio::time::timeout( + TOOL_SUMMARY_TIMEOUT, + collect_text(provider, &messages, &config), + ) + .await + { + Ok(Ok(text)) => { + let trimmed = text.trim().to_string(); + if trimmed.is_empty() { raw_result.to_string() + } else { + info!( + "[agent_backend::helpers] Tool result summarized ({}): {} -> {} chars", + tool_name, + raw_result.len(), + trimmed.len() + ); + trimmed } } + Ok(Err(e)) => { + warn!( + "[agent_backend::helpers] Tool {} summarization failed: {} - using raw result", + tool_name, e + ); + raw_result.to_string() + } + Err(_) => { + warn!( + "[agent_backend::helpers] Tool {} summarization timed out after {:?} - using raw result", + tool_name, TOOL_SUMMARY_TIMEOUT + ); + raw_result.to_string() + } } } @@ -122,8 +127,13 @@ pub(super) async fn generate_tool_filler( max_tokens: 30, model: None, }; - match collect_text(provider, &messages, &config).await { - Ok(text) => { + match tokio::time::timeout( + TOOL_FILLER_TIMEOUT, + collect_text(provider, &messages, &config), + ) + .await + { + Ok(Ok(text)) => { let trimmed = text.trim().to_string(); if trimmed.is_empty() { None @@ -135,9 +145,16 @@ pub(super) async fn generate_tool_filler( Some(trimmed) } } - Err(e) => { + Ok(Err(e)) => { warn!("[agent_backend::helpers] Tool filler failed: {}", e); None } + Err(_) => { + warn!( + "[agent_backend::helpers] Tool filler timed out after {:?}", + TOOL_FILLER_TIMEOUT + ); + None + } } } diff --git a/voice/engine/crates/agent-kit/src/tool_executor.rs b/voice/engine/crates/agent-kit/src/tool_executor.rs index 07bdd52..8b5648f 100644 --- a/voice/engine/crates/agent-kit/src/tool_executor.rs +++ b/voice/engine/crates/agent-kit/src/tool_executor.rs @@ -227,7 +227,11 @@ pub(super) fn spawn_tool_task( } }; - // Execution Timeout limit (25 seconds) + // Execution timeout: 25 seconds. + // If post-processing (summarization) is enabled, add up to + // TOOL_SUMMARY_TIMEOUT (8 s) for a worst-case total of ~33 s. + // Both limits are enforced inside this detached tokio::spawn, + // so neither blocks the reactor's select! loop directly. let mut result = match tokio::time::timeout(std::time::Duration::from_secs(25), result_fut).await { Ok(r) => r, diff --git a/voice/engine/src/reactor/mod.rs b/voice/engine/src/reactor/mod.rs index 4fb1214..81513ae 100644 --- a/voice/engine/src/reactor/mod.rs +++ b/voice/engine/src/reactor/mod.rs @@ -611,17 +611,32 @@ impl Reactor { break; } + // LLM events are polled first (biased select arm #1) to prevent + // rapid RTP ingress from repeatedly cancelling `llm.recv()` before + // a ready tool-result event is consumed. + // + // Trade-off: if the LLM channel is continuously ready (fast token + // stream), audio_rx could be transiently starved. This is acceptable + // because: (a) LLM streams are bounded in duration, (b) VAD and the + // denoiser run on the audio thread and do not block on this recv(), + // and (c) a brief delay in on_audio() during token streaming has no + // perceptible impact on voice quality or latency. + // + // replay_log.record(ReactorInput::*) arms below capture a typed + // input snapshot (opt-in, zero-cost when disabled) used by the + // replay/sim harness for deterministic testing. Each arm decides + // independently whether to record. See reactor/replay.rs. tokio::select! { biased; - // ── Highest priority: incoming audio (keep VAD responsive) ── - // - // replay_log.record(ReactorInput::*) - // → typed input snapshot (opt-in, zero-cost when disabled). - // Used by the replay/sim harness for deterministic testing. - // - // Each arm decides independently whether to call one or both. - // See reactor/replay.rs for the ReactorInput type documentation. + // ── LLM tokens / tool calls / finished ── + // Prioritized over audio — see rationale above. + Some(ev) = self.llm.recv(), if self.llm.is_active() => { + self.replay_log.record(replay::ReactorInput::LlmEvent(ev.clone())); + self.on_llm_event(ev).await; + } + + // ── Audio (keep VAD responsive) ── msg = self.audio_rx.recv() => { match msg { Some(raw) => { @@ -647,12 +662,6 @@ impl Reactor { self.on_stt_event(ev).await; } - // ── LLM tokens / tool calls / finished ── - Some(ev) = self.llm.recv(), if self.llm.is_active() => { - self.replay_log.record(replay::ReactorInput::LlmEvent(ev.clone())); - self.on_llm_event(ev).await; - } - // ── TTS audio chunks ── Some(ev) = self.tts.recv(), if self.tts.is_active() => { // Record non-audio events only — audio chunks are large and From bc2409f883101c4822dff018298a598fc5707c8f Mon Sep 17 00:00:00 2001 From: Jijun Leng <962285+jjleng@users.noreply.github.com> Date: Fri, 17 Apr 2026 11:43:05 -0700 Subject: [PATCH 3/4] chore: pin a protoc version --- Makefile | 12 +- inference/stt/stt_pb2.py | 16 +- inference/stt/stt_pb2.pyi | 3 +- studio/api/app/schemas/agent_pb2.py | 6 +- studio/api/app/schemas/agent_pb2.pyi | 522 +++++++-------------------- studio/web/src/lib/api/agent.ts | 51 ++- 6 files changed, 199 insertions(+), 411 deletions(-) diff --git a/Makefile b/Makefile index 939d825..a788364 100644 --- a/Makefile +++ b/Makefile @@ -7,24 +7,30 @@ RUST_DIR := voice API_DIR := studio/api WEB_DIR := studio/web +ifeq (, $(shell command -v uvx 2> /dev/null)) +$(error "uvx could not be found. Please install uv (https://docs.astral.sh/uv/) before proceeding") +endif + +PROTOC := uvx --python 3.12 --from grpcio-tools==1.80.0 python -m grpc_tools.protoc + # ── Protobuf ───────────────────────────────────────────────────── .PHONY: proto proto: ## Generate Python, TS, and ML stubs from proto definitions # ML layer mkdir -p $(ML_DIR)/stt - protoc \ + $(PROTOC) \ --python_out=$(ML_DIR)/stt \ --pyi_out=$(ML_DIR)/stt \ --proto_path=$(PROTO_DIR) \ $(PROTO_DIR)/stt.proto # Studio API layer - protoc \ + $(PROTOC) \ --python_out=$(API_DIR)/app/schemas \ --pyi_out=$(API_DIR)/app/schemas \ --proto_path=$(PROTO_DIR) \ $(PROTO_DIR)/agent.proto # Studio Web layer (TS interfaces) - protoc \ + $(PROTOC) \ --plugin=protoc-gen-ts_proto=$(WEB_DIR)/node_modules/.bin/protoc-gen-ts_proto \ --ts_proto_out=$(WEB_DIR)/src/lib/api \ --ts_proto_opt=esModuleInterop=true,forceLong=string,outputServices=false,outputJsonMethods=false,outputClientImpl=false,outputEncodeMethods=false,outputPartialMethods=false,outputTypeRegistry=false,onlyTypes=true,snakeToCamel=false \ diff --git a/inference/stt/stt_pb2.py b/inference/stt/stt_pb2.py index 3e36eb5..74f0dfb 100644 --- a/inference/stt/stt_pb2.py +++ b/inference/stt/stt_pb2.py @@ -1,12 +1,22 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE # source: stt.proto -# Protobuf Python Version: 4.25.0 +# Protobuf Python Version: 6.31.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version from google.protobuf import symbol_database as _symbol_database from google.protobuf.internal import builder as _builder +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, + 6, + 31, + 1, + '', + 'stt.proto' +) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() @@ -19,8 +29,8 @@ _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'stt_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None +if not _descriptor._USE_C_DESCRIPTORS: + DESCRIPTOR._loaded_options = None _globals['_STTREQUEST']._serialized_start=18 _globals['_STTREQUEST']._serialized_end=144 _globals['_AUDIODATA']._serialized_start=146 diff --git a/inference/stt/stt_pb2.pyi b/inference/stt/stt_pb2.pyi index 52440ee..a02e8fd 100644 --- a/inference/stt/stt_pb2.pyi +++ b/inference/stt/stt_pb2.pyi @@ -1,6 +1,7 @@ from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message -from typing import ClassVar as _ClassVar, Mapping as _Mapping, Optional as _Optional, Union as _Union +from collections.abc import Mapping as _Mapping +from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union DESCRIPTOR: _descriptor.FileDescriptor diff --git a/studio/api/app/schemas/agent_pb2.py b/studio/api/app/schemas/agent_pb2.py index 5138a46..bc6982a 100644 --- a/studio/api/app/schemas/agent_pb2.py +++ b/studio/api/app/schemas/agent_pb2.py @@ -2,7 +2,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: agent.proto -# Protobuf Python Version: 6.33.4 +# Protobuf Python Version: 6.31.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -12,8 +12,8 @@ _runtime_version.ValidateProtobufRuntimeVersion( _runtime_version.Domain.PUBLIC, 6, - 33, - 4, + 31, + 1, '', 'agent.proto' ) diff --git a/studio/api/app/schemas/agent_pb2.pyi b/studio/api/app/schemas/agent_pb2.pyi index e475913..574bdcd 100644 --- a/studio/api/app/schemas/agent_pb2.pyi +++ b/studio/api/app/schemas/agent_pb2.pyi @@ -1,407 +1,149 @@ -""" -@generated by mypy-protobuf. Do not edit manually! -isort:skip_file -""" - -from collections import abc as _abc -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message from google.protobuf.internal import containers as _containers from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper -import builtins as _builtins -import sys -import typing as _typing - -if sys.version_info >= (3, 10): - from typing import TypeAlias as _TypeAlias -else: - from typing_extensions import TypeAlias as _TypeAlias +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from collections.abc import Iterable as _Iterable, Mapping as _Mapping +from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union DESCRIPTOR: _descriptor.FileDescriptor -class _AudioLayout: - ValueType = _typing.NewType("ValueType", _builtins.int) - V: _TypeAlias = ValueType # noqa: Y015 - -class _AudioLayoutEnumTypeWrapper(_enum_type_wrapper._EnumTypeWrapper[_AudioLayout.ValueType], _builtins.type): - DESCRIPTOR: _descriptor.EnumDescriptor - AUDIO_LAYOUT_UNSPECIFIED: _AudioLayout.ValueType # 0 - AUDIO_LAYOUT_STEREO: _AudioLayout.ValueType # 1 - AUDIO_LAYOUT_MONO: _AudioLayout.ValueType # 2 - -class AudioLayout(_AudioLayout, metaclass=_AudioLayoutEnumTypeWrapper): ... - -AUDIO_LAYOUT_UNSPECIFIED: AudioLayout.ValueType # 0 -AUDIO_LAYOUT_STEREO: AudioLayout.ValueType # 1 -AUDIO_LAYOUT_MONO: AudioLayout.ValueType # 2 -Global___AudioLayout: _TypeAlias = AudioLayout # noqa: Y015 - -class _AudioFormat: - ValueType = _typing.NewType("ValueType", _builtins.int) - V: _TypeAlias = ValueType # noqa: Y015 - -class _AudioFormatEnumTypeWrapper(_enum_type_wrapper._EnumTypeWrapper[_AudioFormat.ValueType], _builtins.type): - DESCRIPTOR: _descriptor.EnumDescriptor - AUDIO_FORMAT_UNSPECIFIED: _AudioFormat.ValueType # 0 - AUDIO_FORMAT_OPUS: _AudioFormat.ValueType # 1 - AUDIO_FORMAT_WAV: _AudioFormat.ValueType # 2 +class AudioLayout(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + AUDIO_LAYOUT_UNSPECIFIED: _ClassVar[AudioLayout] + AUDIO_LAYOUT_STEREO: _ClassVar[AudioLayout] + AUDIO_LAYOUT_MONO: _ClassVar[AudioLayout] + +class AudioFormat(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + AUDIO_FORMAT_UNSPECIFIED: _ClassVar[AudioFormat] + AUDIO_FORMAT_OPUS: _ClassVar[AudioFormat] + AUDIO_FORMAT_WAV: _ClassVar[AudioFormat] + +class ToolResultMode(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + TOOL_RESULT_MODE_UNSPECIFIED: _ClassVar[ToolResultMode] + TOOL_RESULT_MODE_SUMMARIZE: _ClassVar[ToolResultMode] + TOOL_RESULT_MODE_TRUNCATE: _ClassVar[ToolResultMode] + TOOL_RESULT_MODE_NONE: _ClassVar[ToolResultMode] +AUDIO_LAYOUT_UNSPECIFIED: AudioLayout +AUDIO_LAYOUT_STEREO: AudioLayout +AUDIO_LAYOUT_MONO: AudioLayout +AUDIO_FORMAT_UNSPECIFIED: AudioFormat +AUDIO_FORMAT_OPUS: AudioFormat +AUDIO_FORMAT_WAV: AudioFormat +TOOL_RESULT_MODE_UNSPECIFIED: ToolResultMode +TOOL_RESULT_MODE_SUMMARIZE: ToolResultMode +TOOL_RESULT_MODE_TRUNCATE: ToolResultMode +TOOL_RESULT_MODE_NONE: ToolResultMode -class AudioFormat(_AudioFormat, metaclass=_AudioFormatEnumTypeWrapper): ... - -AUDIO_FORMAT_UNSPECIFIED: AudioFormat.ValueType # 0 -AUDIO_FORMAT_OPUS: AudioFormat.ValueType # 1 -AUDIO_FORMAT_WAV: AudioFormat.ValueType # 2 -Global___AudioFormat: _TypeAlias = AudioFormat # noqa: Y015 - -class _ToolResultMode: - ValueType = _typing.NewType("ValueType", _builtins.int) - V: _TypeAlias = ValueType # noqa: Y015 - -class _ToolResultModeEnumTypeWrapper(_enum_type_wrapper._EnumTypeWrapper[_ToolResultMode.ValueType], _builtins.type): - DESCRIPTOR: _descriptor.EnumDescriptor - TOOL_RESULT_MODE_UNSPECIFIED: _ToolResultMode.ValueType # 0 - TOOL_RESULT_MODE_SUMMARIZE: _ToolResultMode.ValueType # 1 - """LLM summarize long tool output (uses global tool_summarizer gate).""" - TOOL_RESULT_MODE_TRUNCATE: _ToolResultMode.ValueType # 2 - """Deterministically truncate tool output to runtime hard cap.""" - TOOL_RESULT_MODE_NONE: _ToolResultMode.ValueType # 3 - """Keep raw tool output as-is (no summarize, no truncate).""" - -class ToolResultMode(_ToolResultMode, metaclass=_ToolResultModeEnumTypeWrapper): - """Per-tool output handling mode after execution.""" - -TOOL_RESULT_MODE_UNSPECIFIED: ToolResultMode.ValueType # 0 -TOOL_RESULT_MODE_SUMMARIZE: ToolResultMode.ValueType # 1 -"""LLM summarize long tool output (uses global tool_summarizer gate).""" -TOOL_RESULT_MODE_TRUNCATE: ToolResultMode.ValueType # 2 -"""Deterministically truncate tool output to runtime hard cap.""" -TOOL_RESULT_MODE_NONE: ToolResultMode.ValueType # 3 -"""Keep raw tool output as-is (no summarize, no truncate).""" -Global___ToolResultMode: _TypeAlias = ToolResultMode # noqa: Y015 - -@_typing.final class RecordingConfig(_message.Message): - """Session recording configuration""" - - DESCRIPTOR: _descriptor.Descriptor - - ENABLED_FIELD_NUMBER: _builtins.int - OUTPUT_URI_FIELD_NUMBER: _builtins.int - AUDIO_LAYOUT_FIELD_NUMBER: _builtins.int - SAMPLE_RATE_FIELD_NUMBER: _builtins.int - AUDIO_FORMAT_FIELD_NUMBER: _builtins.int - MAX_DURATION_SECS_FIELD_NUMBER: _builtins.int - SAVE_TRANSCRIPT_FIELD_NUMBER: _builtins.int - INCLUDE_TOOL_DETAILS_FIELD_NUMBER: _builtins.int - INCLUDE_LLM_METADATA_FIELD_NUMBER: _builtins.int - enabled: _builtins.bool - output_uri: _builtins.str - audio_layout: Global___AudioLayout.ValueType - sample_rate: _builtins.int - audio_format: Global___AudioFormat.ValueType - max_duration_secs: _builtins.int - save_transcript: _builtins.bool - include_tool_details: _builtins.bool - include_llm_metadata: _builtins.bool - def __init__( - self, - *, - enabled: _builtins.bool = ..., - output_uri: _builtins.str = ..., - audio_layout: Global___AudioLayout.ValueType = ..., - sample_rate: _builtins.int = ..., - audio_format: Global___AudioFormat.ValueType = ..., - max_duration_secs: _builtins.int = ..., - save_transcript: _builtins.bool = ..., - include_tool_details: _builtins.bool = ..., - include_llm_metadata: _builtins.bool = ..., - ) -> None: ... - _ClearFieldArgType: _TypeAlias = _typing.Literal["audio_format", b"audio_format", "audio_layout", b"audio_layout", "enabled", b"enabled", "include_llm_metadata", b"include_llm_metadata", "include_tool_details", b"include_tool_details", "max_duration_secs", b"max_duration_secs", "output_uri", b"output_uri", "sample_rate", b"sample_rate", "save_transcript", b"save_transcript"] # noqa: Y015 - def ClearField(self, field_name: _ClearFieldArgType) -> None: ... + __slots__ = ("enabled", "output_uri", "audio_layout", "sample_rate", "audio_format", "max_duration_secs", "save_transcript", "include_tool_details", "include_llm_metadata") + ENABLED_FIELD_NUMBER: _ClassVar[int] + OUTPUT_URI_FIELD_NUMBER: _ClassVar[int] + AUDIO_LAYOUT_FIELD_NUMBER: _ClassVar[int] + SAMPLE_RATE_FIELD_NUMBER: _ClassVar[int] + AUDIO_FORMAT_FIELD_NUMBER: _ClassVar[int] + MAX_DURATION_SECS_FIELD_NUMBER: _ClassVar[int] + SAVE_TRANSCRIPT_FIELD_NUMBER: _ClassVar[int] + INCLUDE_TOOL_DETAILS_FIELD_NUMBER: _ClassVar[int] + INCLUDE_LLM_METADATA_FIELD_NUMBER: _ClassVar[int] + enabled: bool + output_uri: str + audio_layout: AudioLayout + sample_rate: int + audio_format: AudioFormat + max_duration_secs: int + save_transcript: bool + include_tool_details: bool + include_llm_metadata: bool + def __init__(self, enabled: bool = ..., output_uri: _Optional[str] = ..., audio_layout: _Optional[_Union[AudioLayout, str]] = ..., sample_rate: _Optional[int] = ..., audio_format: _Optional[_Union[AudioFormat, str]] = ..., max_duration_secs: _Optional[int] = ..., save_transcript: bool = ..., include_tool_details: bool = ..., include_llm_metadata: bool = ...) -> None: ... -Global___RecordingConfig: _TypeAlias = RecordingConfig # noqa: Y015 - -@_typing.final class ParamDef(_message.Message): - """Tool parameter definition""" - - DESCRIPTOR: _descriptor.Descriptor + __slots__ = ("name", "type", "description", "required", "options") + NAME_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + DESCRIPTION_FIELD_NUMBER: _ClassVar[int] + REQUIRED_FIELD_NUMBER: _ClassVar[int] + OPTIONS_FIELD_NUMBER: _ClassVar[int] + name: str + type: str + description: str + required: bool + options: _containers.RepeatedScalarFieldContainer[str] + def __init__(self, name: _Optional[str] = ..., type: _Optional[str] = ..., description: _Optional[str] = ..., required: bool = ..., options: _Optional[_Iterable[str]] = ...) -> None: ... - NAME_FIELD_NUMBER: _builtins.int - TYPE_FIELD_NUMBER: _builtins.int - DESCRIPTION_FIELD_NUMBER: _builtins.int - REQUIRED_FIELD_NUMBER: _builtins.int - OPTIONS_FIELD_NUMBER: _builtins.int - name: _builtins.str - type: _builtins.str - """e.g., "string", "integer", "boolean" """ - description: _builtins.str - required: _builtins.bool - @_builtins.property - def options(self) -> _containers.RepeatedScalarFieldContainer[_builtins.str]: - """Valid values for enum types""" - - def __init__( - self, - *, - name: _builtins.str = ..., - type: _builtins.str = ..., - description: _builtins.str = ..., - required: _builtins.bool = ..., - options: _abc.Iterable[_builtins.str] | None = ..., - ) -> None: ... - _ClearFieldArgType: _TypeAlias = _typing.Literal["description", b"description", "name", b"name", "options", b"options", "required", b"required", "type", b"type"] # noqa: Y015 - def ClearField(self, field_name: _ClearFieldArgType) -> None: ... - -Global___ParamDef: _TypeAlias = ParamDef # noqa: Y015 - -@_typing.final class ToolDef(_message.Message): - """A tool definition (always a JS script)""" - - DESCRIPTOR: _descriptor.Descriptor - - DESCRIPTION_FIELD_NUMBER: _builtins.int - SCRIPT_FIELD_NUMBER: _builtins.int - PARAMS_FIELD_NUMBER: _builtins.int - CANCEL_ON_BARGE_IN_FIELD_NUMBER: _builtins.int - SIDE_EFFECT_FIELD_NUMBER: _builtins.int - RESULT_MODE_FIELD_NUMBER: _builtins.int - description: _builtins.str - script: _builtins.str - cancel_on_barge_in: _builtins.bool - """If true (default), barge-in drops the tool result""" - side_effect: _builtins.bool - """If true, this tool has side effects""" - result_mode: Global___ToolResultMode.ValueType - """Optional post-tool output handling mode. - If UNSPECIFIED, runtime falls back to global config behavior. - """ - @_builtins.property - def params(self) -> _containers.RepeatedCompositeFieldContainer[Global___ParamDef]: ... - def __init__( - self, - *, - description: _builtins.str = ..., - script: _builtins.str = ..., - params: _abc.Iterable[Global___ParamDef] | None = ..., - cancel_on_barge_in: _builtins.bool = ..., - side_effect: _builtins.bool = ..., - result_mode: Global___ToolResultMode.ValueType = ..., - ) -> None: ... - _ClearFieldArgType: _TypeAlias = _typing.Literal["cancel_on_barge_in", b"cancel_on_barge_in", "description", b"description", "params", b"params", "result_mode", b"result_mode", "script", b"script", "side_effect", b"side_effect"] # noqa: Y015 - def ClearField(self, field_name: _ClearFieldArgType) -> None: ... - -Global___ToolDef: _TypeAlias = ToolDef # noqa: Y015 + __slots__ = ("description", "script", "params", "cancel_on_barge_in", "side_effect", "result_mode") + DESCRIPTION_FIELD_NUMBER: _ClassVar[int] + SCRIPT_FIELD_NUMBER: _ClassVar[int] + PARAMS_FIELD_NUMBER: _ClassVar[int] + CANCEL_ON_BARGE_IN_FIELD_NUMBER: _ClassVar[int] + SIDE_EFFECT_FIELD_NUMBER: _ClassVar[int] + RESULT_MODE_FIELD_NUMBER: _ClassVar[int] + description: str + script: str + params: _containers.RepeatedCompositeFieldContainer[ParamDef] + cancel_on_barge_in: bool + side_effect: bool + result_mode: ToolResultMode + def __init__(self, description: _Optional[str] = ..., script: _Optional[str] = ..., params: _Optional[_Iterable[_Union[ParamDef, _Mapping]]] = ..., cancel_on_barge_in: bool = ..., side_effect: bool = ..., result_mode: _Optional[_Union[ToolResultMode, str]] = ...) -> None: ... -@_typing.final class NodeDef(_message.Message): - """A single node in the graph""" - - DESCRIPTOR: _descriptor.Descriptor - - SYSTEM_PROMPT_FIELD_NUMBER: _builtins.int - TOOLS_FIELD_NUMBER: _builtins.int - EDGES_FIELD_NUMBER: _builtins.int - MODEL_FIELD_NUMBER: _builtins.int - TEMPERATURE_FIELD_NUMBER: _builtins.int - MAX_TOKENS_FIELD_NUMBER: _builtins.int - VOICE_ID_FIELD_NUMBER: _builtins.int - GREETING_FIELD_NUMBER: _builtins.int - system_prompt: _builtins.str - model: _builtins.str - temperature: _builtins.float - max_tokens: _builtins.int - voice_id: _builtins.str - """TTS voice override""" - greeting: _builtins.str - @_builtins.property - def tools(self) -> _containers.RepeatedScalarFieldContainer[_builtins.str]: - """keys referencing AgentGraphDef.tools""" - - @_builtins.property - def edges(self) -> _containers.RepeatedScalarFieldContainer[_builtins.str]: - """node IDs this node can transfer to""" - - def __init__( - self, - *, - system_prompt: _builtins.str = ..., - tools: _abc.Iterable[_builtins.str] | None = ..., - edges: _abc.Iterable[_builtins.str] | None = ..., - model: _builtins.str | None = ..., - temperature: _builtins.float | None = ..., - max_tokens: _builtins.int | None = ..., - voice_id: _builtins.str | None = ..., - greeting: _builtins.str | None = ..., - ) -> None: ... - _HasFieldArgType: _TypeAlias = _typing.Literal["_greeting", b"_greeting", "_max_tokens", b"_max_tokens", "_model", b"_model", "_temperature", b"_temperature", "_voice_id", b"_voice_id", "greeting", b"greeting", "max_tokens", b"max_tokens", "model", b"model", "temperature", b"temperature", "voice_id", b"voice_id"] # noqa: Y015 - def HasField(self, field_name: _HasFieldArgType) -> _builtins.bool: ... - _ClearFieldArgType: _TypeAlias = _typing.Literal["_greeting", b"_greeting", "_max_tokens", b"_max_tokens", "_model", b"_model", "_temperature", b"_temperature", "_voice_id", b"_voice_id", "edges", b"edges", "greeting", b"greeting", "max_tokens", b"max_tokens", "model", b"model", "system_prompt", b"system_prompt", "temperature", b"temperature", "tools", b"tools", "voice_id", b"voice_id"] # noqa: Y015 - def ClearField(self, field_name: _ClearFieldArgType) -> None: ... - _WhichOneofReturnType__greeting: _TypeAlias = _typing.Literal["greeting"] # noqa: Y015 - _WhichOneofArgType__greeting: _TypeAlias = _typing.Literal["_greeting", b"_greeting"] # noqa: Y015 - _WhichOneofReturnType__max_tokens: _TypeAlias = _typing.Literal["max_tokens"] # noqa: Y015 - _WhichOneofArgType__max_tokens: _TypeAlias = _typing.Literal["_max_tokens", b"_max_tokens"] # noqa: Y015 - _WhichOneofReturnType__model: _TypeAlias = _typing.Literal["model"] # noqa: Y015 - _WhichOneofArgType__model: _TypeAlias = _typing.Literal["_model", b"_model"] # noqa: Y015 - _WhichOneofReturnType__temperature: _TypeAlias = _typing.Literal["temperature"] # noqa: Y015 - _WhichOneofArgType__temperature: _TypeAlias = _typing.Literal["_temperature", b"_temperature"] # noqa: Y015 - _WhichOneofReturnType__voice_id: _TypeAlias = _typing.Literal["voice_id"] # noqa: Y015 - _WhichOneofArgType__voice_id: _TypeAlias = _typing.Literal["_voice_id", b"_voice_id"] # noqa: Y015 - @_typing.overload - def WhichOneof(self, oneof_group: _WhichOneofArgType__greeting) -> _WhichOneofReturnType__greeting | None: ... - @_typing.overload - def WhichOneof(self, oneof_group: _WhichOneofArgType__max_tokens) -> _WhichOneofReturnType__max_tokens | None: ... - @_typing.overload - def WhichOneof(self, oneof_group: _WhichOneofArgType__model) -> _WhichOneofReturnType__model | None: ... - @_typing.overload - def WhichOneof(self, oneof_group: _WhichOneofArgType__temperature) -> _WhichOneofReturnType__temperature | None: ... - @_typing.overload - def WhichOneof(self, oneof_group: _WhichOneofArgType__voice_id) -> _WhichOneofReturnType__voice_id | None: ... - -Global___NodeDef: _TypeAlias = NodeDef # noqa: Y015 + __slots__ = ("system_prompt", "tools", "edges", "model", "temperature", "max_tokens", "voice_id", "greeting") + SYSTEM_PROMPT_FIELD_NUMBER: _ClassVar[int] + TOOLS_FIELD_NUMBER: _ClassVar[int] + EDGES_FIELD_NUMBER: _ClassVar[int] + MODEL_FIELD_NUMBER: _ClassVar[int] + TEMPERATURE_FIELD_NUMBER: _ClassVar[int] + MAX_TOKENS_FIELD_NUMBER: _ClassVar[int] + VOICE_ID_FIELD_NUMBER: _ClassVar[int] + GREETING_FIELD_NUMBER: _ClassVar[int] + system_prompt: str + tools: _containers.RepeatedScalarFieldContainer[str] + edges: _containers.RepeatedScalarFieldContainer[str] + model: str + temperature: float + max_tokens: int + voice_id: str + greeting: str + def __init__(self, system_prompt: _Optional[str] = ..., tools: _Optional[_Iterable[str]] = ..., edges: _Optional[_Iterable[str]] = ..., model: _Optional[str] = ..., temperature: _Optional[float] = ..., max_tokens: _Optional[int] = ..., voice_id: _Optional[str] = ..., greeting: _Optional[str] = ...) -> None: ... -@_typing.final class AgentGraphDef(_message.Message): - """A complete agent graph""" - - DESCRIPTOR: _descriptor.Descriptor - - @_typing.final + __slots__ = ("entry", "nodes", "tools", "language", "timezone", "voice_id", "tts_provider", "tts_model", "recording", "config_schema_version", "gemini_live_model") class NodesEntry(_message.Message): - DESCRIPTOR: _descriptor.Descriptor - - KEY_FIELD_NUMBER: _builtins.int - VALUE_FIELD_NUMBER: _builtins.int - key: _builtins.str - @_builtins.property - def value(self) -> Global___NodeDef: ... - def __init__( - self, - *, - key: _builtins.str = ..., - value: Global___NodeDef | None = ..., - ) -> None: ... - _HasFieldArgType: _TypeAlias = _typing.Literal["value", b"value"] # noqa: Y015 - def HasField(self, field_name: _HasFieldArgType) -> _builtins.bool: ... - _ClearFieldArgType: _TypeAlias = _typing.Literal["key", b"key", "value", b"value"] # noqa: Y015 - def ClearField(self, field_name: _ClearFieldArgType) -> None: ... - - @_typing.final + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: NodeDef + def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[NodeDef, _Mapping]] = ...) -> None: ... class ToolsEntry(_message.Message): - DESCRIPTOR: _descriptor.Descriptor - - KEY_FIELD_NUMBER: _builtins.int - VALUE_FIELD_NUMBER: _builtins.int - key: _builtins.str - @_builtins.property - def value(self) -> Global___ToolDef: ... - def __init__( - self, - *, - key: _builtins.str = ..., - value: Global___ToolDef | None = ..., - ) -> None: ... - _HasFieldArgType: _TypeAlias = _typing.Literal["value", b"value"] # noqa: Y015 - def HasField(self, field_name: _HasFieldArgType) -> _builtins.bool: ... - _ClearFieldArgType: _TypeAlias = _typing.Literal["key", b"key", "value", b"value"] # noqa: Y015 - def ClearField(self, field_name: _ClearFieldArgType) -> None: ... - - ENTRY_FIELD_NUMBER: _builtins.int - NODES_FIELD_NUMBER: _builtins.int - TOOLS_FIELD_NUMBER: _builtins.int - LANGUAGE_FIELD_NUMBER: _builtins.int - TIMEZONE_FIELD_NUMBER: _builtins.int - VOICE_ID_FIELD_NUMBER: _builtins.int - TTS_PROVIDER_FIELD_NUMBER: _builtins.int - TTS_MODEL_FIELD_NUMBER: _builtins.int - RECORDING_FIELD_NUMBER: _builtins.int - CONFIG_SCHEMA_VERSION_FIELD_NUMBER: _builtins.int - GEMINI_LIVE_MODEL_FIELD_NUMBER: _builtins.int - entry: _builtins.str - """The ID of the node to start with""" - language: _builtins.str - """-- Agent-wide settings -- - ISO 639-1 - """ - timezone: _builtins.str - """IANA timezone""" - voice_id: _builtins.str - """Default TTS voice ID""" - tts_provider: _builtins.str - """e.g. "elevenlabs" """ - tts_model: _builtins.str - """e.g. "eleven_turbo_v2" """ - config_schema_version: _builtins.str - """Envelope field for versions (e.g. "v3_graph")""" - gemini_live_model: _builtins.str - """-- Native multimodal (Gemini Live) -- - When set, the session bypasses STT/LLM/TTS and uses Gemini Live's native - bidirectional audio-to-audio WebSocket for the entire conversation. - """ - @_builtins.property - def nodes(self) -> _containers.MessageMap[_builtins.str, Global___NodeDef]: - """All nodes keyed by ID""" - - @_builtins.property - def tools(self) -> _containers.MessageMap[_builtins.str, Global___ToolDef]: - """All tool definitions keyed by tool name""" - - @_builtins.property - def recording(self) -> Global___RecordingConfig: - """Session recording configuration""" - - def __init__( - self, - *, - entry: _builtins.str = ..., - nodes: _abc.Mapping[_builtins.str, Global___NodeDef] | None = ..., - tools: _abc.Mapping[_builtins.str, Global___ToolDef] | None = ..., - language: _builtins.str | None = ..., - timezone: _builtins.str | None = ..., - voice_id: _builtins.str | None = ..., - tts_provider: _builtins.str | None = ..., - tts_model: _builtins.str | None = ..., - recording: Global___RecordingConfig | None = ..., - config_schema_version: _builtins.str | None = ..., - gemini_live_model: _builtins.str | None = ..., - ) -> None: ... - _HasFieldArgType: _TypeAlias = _typing.Literal["_config_schema_version", b"_config_schema_version", "_gemini_live_model", b"_gemini_live_model", "_language", b"_language", "_recording", b"_recording", "_timezone", b"_timezone", "_tts_model", b"_tts_model", "_tts_provider", b"_tts_provider", "_voice_id", b"_voice_id", "config_schema_version", b"config_schema_version", "gemini_live_model", b"gemini_live_model", "language", b"language", "recording", b"recording", "timezone", b"timezone", "tts_model", b"tts_model", "tts_provider", b"tts_provider", "voice_id", b"voice_id"] # noqa: Y015 - def HasField(self, field_name: _HasFieldArgType) -> _builtins.bool: ... - _ClearFieldArgType: _TypeAlias = _typing.Literal["_config_schema_version", b"_config_schema_version", "_gemini_live_model", b"_gemini_live_model", "_language", b"_language", "_recording", b"_recording", "_timezone", b"_timezone", "_tts_model", b"_tts_model", "_tts_provider", b"_tts_provider", "_voice_id", b"_voice_id", "config_schema_version", b"config_schema_version", "entry", b"entry", "gemini_live_model", b"gemini_live_model", "language", b"language", "nodes", b"nodes", "recording", b"recording", "timezone", b"timezone", "tools", b"tools", "tts_model", b"tts_model", "tts_provider", b"tts_provider", "voice_id", b"voice_id"] # noqa: Y015 - def ClearField(self, field_name: _ClearFieldArgType) -> None: ... - _WhichOneofReturnType__config_schema_version: _TypeAlias = _typing.Literal["config_schema_version"] # noqa: Y015 - _WhichOneofArgType__config_schema_version: _TypeAlias = _typing.Literal["_config_schema_version", b"_config_schema_version"] # noqa: Y015 - _WhichOneofReturnType__gemini_live_model: _TypeAlias = _typing.Literal["gemini_live_model"] # noqa: Y015 - _WhichOneofArgType__gemini_live_model: _TypeAlias = _typing.Literal["_gemini_live_model", b"_gemini_live_model"] # noqa: Y015 - _WhichOneofReturnType__language: _TypeAlias = _typing.Literal["language"] # noqa: Y015 - _WhichOneofArgType__language: _TypeAlias = _typing.Literal["_language", b"_language"] # noqa: Y015 - _WhichOneofReturnType__recording: _TypeAlias = _typing.Literal["recording"] # noqa: Y015 - _WhichOneofArgType__recording: _TypeAlias = _typing.Literal["_recording", b"_recording"] # noqa: Y015 - _WhichOneofReturnType__timezone: _TypeAlias = _typing.Literal["timezone"] # noqa: Y015 - _WhichOneofArgType__timezone: _TypeAlias = _typing.Literal["_timezone", b"_timezone"] # noqa: Y015 - _WhichOneofReturnType__tts_model: _TypeAlias = _typing.Literal["tts_model"] # noqa: Y015 - _WhichOneofArgType__tts_model: _TypeAlias = _typing.Literal["_tts_model", b"_tts_model"] # noqa: Y015 - _WhichOneofReturnType__tts_provider: _TypeAlias = _typing.Literal["tts_provider"] # noqa: Y015 - _WhichOneofArgType__tts_provider: _TypeAlias = _typing.Literal["_tts_provider", b"_tts_provider"] # noqa: Y015 - _WhichOneofReturnType__voice_id: _TypeAlias = _typing.Literal["voice_id"] # noqa: Y015 - _WhichOneofArgType__voice_id: _TypeAlias = _typing.Literal["_voice_id", b"_voice_id"] # noqa: Y015 - @_typing.overload - def WhichOneof(self, oneof_group: _WhichOneofArgType__config_schema_version) -> _WhichOneofReturnType__config_schema_version | None: ... - @_typing.overload - def WhichOneof(self, oneof_group: _WhichOneofArgType__gemini_live_model) -> _WhichOneofReturnType__gemini_live_model | None: ... - @_typing.overload - def WhichOneof(self, oneof_group: _WhichOneofArgType__language) -> _WhichOneofReturnType__language | None: ... - @_typing.overload - def WhichOneof(self, oneof_group: _WhichOneofArgType__recording) -> _WhichOneofReturnType__recording | None: ... - @_typing.overload - def WhichOneof(self, oneof_group: _WhichOneofArgType__timezone) -> _WhichOneofReturnType__timezone | None: ... - @_typing.overload - def WhichOneof(self, oneof_group: _WhichOneofArgType__tts_model) -> _WhichOneofReturnType__tts_model | None: ... - @_typing.overload - def WhichOneof(self, oneof_group: _WhichOneofArgType__tts_provider) -> _WhichOneofReturnType__tts_provider | None: ... - @_typing.overload - def WhichOneof(self, oneof_group: _WhichOneofArgType__voice_id) -> _WhichOneofReturnType__voice_id | None: ... - -Global___AgentGraphDef: _TypeAlias = AgentGraphDef # noqa: Y015 + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: ToolDef + def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[ToolDef, _Mapping]] = ...) -> None: ... + ENTRY_FIELD_NUMBER: _ClassVar[int] + NODES_FIELD_NUMBER: _ClassVar[int] + TOOLS_FIELD_NUMBER: _ClassVar[int] + LANGUAGE_FIELD_NUMBER: _ClassVar[int] + TIMEZONE_FIELD_NUMBER: _ClassVar[int] + VOICE_ID_FIELD_NUMBER: _ClassVar[int] + TTS_PROVIDER_FIELD_NUMBER: _ClassVar[int] + TTS_MODEL_FIELD_NUMBER: _ClassVar[int] + RECORDING_FIELD_NUMBER: _ClassVar[int] + CONFIG_SCHEMA_VERSION_FIELD_NUMBER: _ClassVar[int] + GEMINI_LIVE_MODEL_FIELD_NUMBER: _ClassVar[int] + entry: str + nodes: _containers.MessageMap[str, NodeDef] + tools: _containers.MessageMap[str, ToolDef] + language: str + timezone: str + voice_id: str + tts_provider: str + tts_model: str + recording: RecordingConfig + config_schema_version: str + gemini_live_model: str + def __init__(self, entry: _Optional[str] = ..., nodes: _Optional[_Mapping[str, NodeDef]] = ..., tools: _Optional[_Mapping[str, ToolDef]] = ..., language: _Optional[str] = ..., timezone: _Optional[str] = ..., voice_id: _Optional[str] = ..., tts_provider: _Optional[str] = ..., tts_model: _Optional[str] = ..., recording: _Optional[_Union[RecordingConfig, _Mapping]] = ..., config_schema_version: _Optional[str] = ..., gemini_live_model: _Optional[str] = ...) -> None: ... diff --git a/studio/web/src/lib/api/agent.ts b/studio/web/src/lib/api/agent.ts index 9258e4f..f8de8f0 100644 --- a/studio/web/src/lib/api/agent.ts +++ b/studio/web/src/lib/api/agent.ts @@ -1,9 +1,11 @@ // Code generated by protoc-gen-ts_proto. DO NOT EDIT. // versions: // protoc-gen-ts_proto v2.11.6 -// protoc v4.25.0 +// protoc v6.31.1 // source: agent.proto +/* eslint-disable */ + export const protobufPackage = "agent"; export enum AudioLayout { @@ -20,10 +22,14 @@ export enum AudioFormat { UNRECOGNIZED = -1, } +/** Per-tool output handling mode after execution. */ export enum ToolResultMode { TOOL_RESULT_MODE_UNSPECIFIED = 0, + /** TOOL_RESULT_MODE_SUMMARIZE - LLM summarize long tool output (uses global tool_summarizer gate). */ TOOL_RESULT_MODE_SUMMARIZE = 1, + /** TOOL_RESULT_MODE_TRUNCATE - Deterministically truncate tool output to runtime hard cap. */ TOOL_RESULT_MODE_TRUNCATE = 2, + /** TOOL_RESULT_MODE_NONE - Keep raw tool output as-is (no summarize, no truncate). */ TOOL_RESULT_MODE_NONE = 3, UNRECOGNIZED = -1, } @@ -61,7 +67,10 @@ export interface ToolDef { cancel_on_barge_in: boolean; /** If true, this tool has side effects */ side_effect: boolean; - /** Optional post-tool output handling mode */ + /** + * Optional post-tool output handling mode. + * If UNSPECIFIED, runtime falls back to global config behavior. + */ result_mode: ToolResultMode; } @@ -74,7 +83,9 @@ export interface NodeDef { edges: string[]; model?: string | undefined; temperature?: number | undefined; - max_tokens?: number | undefined; + max_tokens?: + | number + | undefined; /** TTS voice override */ voice_id?: string | undefined; greeting?: string | undefined; @@ -89,20 +100,38 @@ export interface AgentGraphDef { /** All tool definitions keyed by tool name */ tools: { [key: string]: ToolDef }; /** -- Agent-wide settings -- */ - language?: string | undefined; + language?: + | string + | undefined; /** IANA timezone */ - timezone?: string | undefined; + timezone?: + | string + | undefined; /** Default TTS voice ID */ - voice_id?: string | undefined; + voice_id?: + | string + | undefined; /** e.g. "elevenlabs" */ - tts_provider?: string | undefined; + tts_provider?: + | string + | undefined; /** e.g. "eleven_turbo_v2" */ - tts_model?: string | undefined; + tts_model?: + | string + | undefined; /** Session recording configuration */ - recording?: RecordingConfig | undefined; + recording?: + | RecordingConfig + | undefined; /** Envelope field for versions (e.g. "v3_graph") */ - config_schema_version?: string | undefined; - gemini_live_api_key?: string | undefined; + config_schema_version?: + | string + | undefined; + /** + * -- Native multimodal (Gemini Live) -- + * When set, the session bypasses STT/LLM/TTS and uses Gemini Live's native + * bidirectional audio-to-audio WebSocket for the entire conversation. + */ gemini_live_model?: string | undefined; } From f276800b35e86d4f444da21457aa997bc90bf3e6 Mon Sep 17 00:00:00 2001 From: Jijun Leng <962285+jjleng@users.noreply.github.com> Date: Fri, 17 Apr 2026 14:46:44 -0700 Subject: [PATCH 4/4] refactor(agent): simplify tool result summarization configuring --- proto/agent.proto | 29 +--- studio/api/app/agent_builder/edit_ops.py | 30 +--- studio/api/app/agent_builder/service.py | 15 +- studio/api/app/api/agents.py | 56 +++---- studio/api/app/schemas/agent_pb2.py | 30 ++-- studio/api/app/schemas/agent_pb2.pyi | 19 +-- .../schemas/agent-config-v1.schema.json | 20 +-- .../components/agent/agent-config-editor.tsx | 158 +++++++----------- studio/web/src/lib/api/agent.ts | 18 +- studio/web/src/lib/api/client.ts | 2 +- .../agent-kit/src/agent_backends/default.rs | 28 +--- .../agent-kit/src/agent_backends/mod.rs | 4 - .../crates/agent-kit/src/quickjs_engine.rs | 2 +- voice/engine/crates/agent-kit/src/swarm.rs | 2 +- .../crates/agent-kit/src/tool_executor.rs | 75 +++------ voice/engine/src/session.rs | 2 - voice/engine/src/settings.rs | 5 - 17 files changed, 159 insertions(+), 336 deletions(-) diff --git a/proto/agent.proto b/proto/agent.proto index 9b43303..6c31f5b 100644 --- a/proto/agent.proto +++ b/proto/agent.proto @@ -38,17 +38,6 @@ message ParamDef { repeated string options = 5; // Valid values for enum types } -// Per-tool output handling mode after execution. -enum ToolResultMode { - TOOL_RESULT_MODE_UNSPECIFIED = 0; - // LLM summarize long tool output (uses global tool_summarizer gate). - TOOL_RESULT_MODE_SUMMARIZE = 1; - // Deterministically truncate tool output to runtime hard cap. - TOOL_RESULT_MODE_TRUNCATE = 2; - // Keep raw tool output as-is (no summarize, no truncate). - TOOL_RESULT_MODE_NONE = 3; -} - // A tool definition (always a JS script) message ToolDef { string description = 1; @@ -58,9 +47,9 @@ message ToolDef { bool cancel_on_barge_in = 4; // If true, this tool has side effects bool side_effect = 5; - // Optional post-tool output handling mode. - // If UNSPECIFIED, runtime falls back to global config behavior. - ToolResultMode result_mode = 6; + // When true, the runtime passes the result through an LLM summarizer + // before feeding it back. Adds ~5-10s latency. Default false = truncate. + bool summarize_result = 6; } // A single node in the graph @@ -68,7 +57,7 @@ message NodeDef { string system_prompt = 1; repeated string tools = 2; // keys referencing AgentGraphDef.tools repeated string edges = 3; // node IDs this node can transfer to - + optional string model = 4; optional double temperature = 5; optional uint32 max_tokens = 6; @@ -80,23 +69,23 @@ message NodeDef { message AgentGraphDef { // The ID of the node to start with string entry = 1; - + // All nodes keyed by ID map nodes = 2; - + // All tool definitions keyed by tool name map tools = 3; - + // -- Agent-wide settings -- optional string language = 4; // ISO 639-1 optional string timezone = 5; // IANA timezone optional string voice_id = 6; // Default TTS voice ID optional string tts_provider = 7; // e.g. "elevenlabs" optional string tts_model = 8; // e.g. "eleven_turbo_v2" - + // Session recording configuration optional RecordingConfig recording = 9; - + // Envelope field for versions (e.g. "v3_graph") optional string config_schema_version = 10; diff --git a/studio/api/app/agent_builder/edit_ops.py b/studio/api/app/agent_builder/edit_ops.py index 219bdd8..f4ac007 100644 --- a/studio/api/app/agent_builder/edit_ops.py +++ b/studio/api/app/agent_builder/edit_ops.py @@ -35,7 +35,7 @@ "params", "script", "side_effect", - "result_mode", + "summarize_result", } # ── Canonical field ordering ───────────────────────────────────── @@ -52,14 +52,7 @@ _NODE_FIELD_ORDER = ["system_prompt", "greeting", "tools", "edges"] -_TOOL_FIELD_ORDER = ["description", "params", "script", "side_effect", "result_mode"] - -_TOOL_RESULT_MODES = { - "TOOL_RESULT_MODE_UNSPECIFIED", - "TOOL_RESULT_MODE_SUMMARIZE", - "TOOL_RESULT_MODE_TRUNCATE", - "TOOL_RESULT_MODE_NONE", -} +_TOOL_FIELD_ORDER = ["description", "params", "script", "side_effect", "summarize_result"] def _validate_string_list(value: Any, field_name: str) -> None: @@ -171,21 +164,10 @@ def _validate_fields(self) -> UpsertTool: self.fields["side_effect"], bool ): raise ValueError("'side_effect' must be a boolean") - if "result_mode" in self.fields: - value = self.fields["result_mode"] - if isinstance(value, int): - if value < 0 or value > 3: - raise ValueError( - "'result_mode' integer must be in [0, 3] " - "(UNSPECIFIED, SUMMARIZE, TRUNCATE, NONE)" - ) - elif isinstance(value, str): - if value not in _TOOL_RESULT_MODES: - raise ValueError( - f"'result_mode' must be one of {sorted(_TOOL_RESULT_MODES)}" - ) - else: - raise ValueError("'result_mode' must be a string enum name or integer") + if "summarize_result" in self.fields and not isinstance( + self.fields["summarize_result"], bool + ): + raise ValueError("'summarize_result' must be a boolean") return self diff --git a/studio/api/app/agent_builder/service.py b/studio/api/app/agent_builder/service.py index adf5fbd..6ec686b 100644 --- a/studio/api/app/agent_builder/service.py +++ b/studio/api/app/agent_builder/service.py @@ -105,8 +105,7 @@ "description": "", "params": [{{"name": "", "type": "string", "required": true}}], "script": "", - "side_effect": false, - "result_mode": "TOOL_RESULT_MODE_UNSPECIFIED" + "side_effect": false }} }} }} @@ -115,17 +114,7 @@ - **entry**: The starting node of the conversation - **nodes**: Each node has a `system_prompt`, optional `greeting`, `tools` and `edges` - **greeting**: Optional. The first message spoken when the conversation starts (entry node only). -- **tools**: Each tool has a `description`, `params`, a QuickJS `script`, a `side_effect` flag, and optional `result_mode` - -## Tool Result Mode - -Set `result_mode` per tool to control what gets fed back to the LLM after execution: -- `TOOL_RESULT_MODE_SUMMARIZE`: summarize long output (best for verbose tools) -- `TOOL_RESULT_MODE_TRUNCATE`: deterministic hard-cap truncate (preserves exact text prefixes/URLs) -- `TOOL_RESULT_MODE_NONE`: no post-processing (full raw output; use sparingly) -- `TOOL_RESULT_MODE_UNSPECIFIED`: runtime default - -For tools that produce identifiers/URLs consumed by follow-up tools, prefer `TRUNCATE` or `NONE` over `SUMMARIZE`. +- **tools**: Each tool has a `description`, `params`, a QuickJS `script`, and a `side_effect` flag ## QuickJS Tool Rules diff --git a/studio/api/app/api/agents.py b/studio/api/app/api/agents.py index 7648eca..ef957af 100644 --- a/studio/api/app/api/agents.py +++ b/studio/api/app/api/agents.py @@ -584,10 +584,9 @@ class AgentConfigPatch(BaseModel): tts_provider: str | None = None tts_model: str | None = None gemini_live_model: str | None = None - # tool_id -> mode - # null means "auto" (unset result_mode) - # 1 = summarize, 2 = truncate, 3 = full/none - tool_result_modes: dict[str, int | None] | None = None + # tool_id -> true/false + # null means "unset" (falls back to default truncate behavior) + tool_summarize_overrides: dict[str, bool | None] | None = None regenerate_greeting: bool = False @@ -619,11 +618,11 @@ async def patch_agent_config( # which may prevent SQLAlchemy from detecting a JSONB change. config = copy.deepcopy(version.config_json) patch = body.model_dump( - exclude_unset=True, exclude={"regenerate_greeting", "tool_result_modes"} + exclude_unset=True, exclude={"regenerate_greeting", "tool_summarize_overrides"} ) - tool_result_modes = body.tool_result_modes + tool_summarize_overrides = body.tool_summarize_overrides force_regen = body.regenerate_greeting - if not patch and not force_regen and not tool_result_modes: + if not patch and not force_regen and tool_summarize_overrides is None: raise HTTPException(status_code=400, detail="No fields to update") # ── Language validation ────────────────────────────────────────────── @@ -680,8 +679,8 @@ async def patch_agent_config( for key, value in patch.items(): config[key] = value - # ── Per-tool result_mode patch ─────────────────────────────────────── - if tool_result_modes is not None: + # ── Per-tool summarize_result patch ─────────────────────────────────────── + if tool_summarize_overrides is not None: tools_obj = config.get("tools") if not isinstance(tools_obj, dict): raise HTTPException( @@ -689,30 +688,18 @@ async def patch_agent_config( detail="Invalid config: top-level 'tools' must be an object", ) - for tool_id, mode in tool_result_modes.items(): + for tool_id, summarize in tool_summarize_overrides.items(): tool_def = tools_obj.get(tool_id) if not isinstance(tool_def, dict): raise HTTPException( status_code=422, - detail=f"Unknown tool '{tool_id}' in tool_result_modes", + detail=f"Unknown tool '{tool_id}' in tool_summarize_overrides", ) - # "auto" => remove field and let runtime default behavior apply - if mode is None: - tool_def.pop("result_mode", None) - continue - - # Valid explicit runtime enum values: - # 1 summarize, 2 truncate, 3 none/full. - if mode not in {1, 2, 3}: - raise HTTPException( - status_code=422, - detail=( - f"Invalid result_mode {mode} for tool '{tool_id}'. " - "Expected null (auto) or 1/2/3." - ), - ) - tool_def["result_mode"] = mode + if summarize is None: + tool_def.pop("summarize_result", None) + else: + tool_def["summarize_result"] = bool(summarize) # ── Greeting regeneration ────────────────────────────────────────── greeting_updated = False @@ -748,7 +735,7 @@ async def patch_agent_config( db, agent_id=agent.id, patch=patch, - tool_result_modes=tool_result_modes, + tool_summarize_overrides=tool_summarize_overrides, greeting_updated=greeting_updated, new_greeting=new_greeting, ) @@ -779,7 +766,7 @@ async def _inject_config_change_event( db: AsyncSession, agent_id: uuid.UUID, patch: dict[str, Any], - tool_result_modes: dict[str, int | None] | None = None, + tool_summarize_overrides: dict[str, bool | None] | None = None, greeting_updated: bool = False, new_greeting: str | None = None, ) -> None: @@ -811,14 +798,13 @@ async def _inject_config_change_event( else "Standard Pipeline" ) changes.append(f"conversation mode set to {mode}") - if tool_result_modes: - mode_label: dict[int, str] = {1: "summary", 2: "truncate", 3: "full"} - for tool_id, result_mode in tool_result_modes.items(): - if result_mode is None: + if tool_summarize_overrides: + for tool_id, summarize in tool_summarize_overrides.items(): + if summarize is None: label = "auto" else: - label = mode_label.get(result_mode, str(result_mode)) - changes.append(f"{tool_id} result_mode set to {label}") + label = "enabled" if summarize else "disabled" + changes.append(f"{tool_id} AI summarization set to {label}") if not changes: return diff --git a/studio/api/app/schemas/agent_pb2.py b/studio/api/app/schemas/agent_pb2.py index bc6982a..5cfd654 100644 --- a/studio/api/app/schemas/agent_pb2.py +++ b/studio/api/app/schemas/agent_pb2.py @@ -24,7 +24,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0b\x61gent.proto\x12\x05\x61gent\"\x8f\x02\n\x0fRecordingConfig\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\x12\x12\n\noutput_uri\x18\x02 \x01(\t\x12(\n\x0c\x61udio_layout\x18\x03 \x01(\x0e\x32\x12.agent.AudioLayout\x12\x13\n\x0bsample_rate\x18\x04 \x01(\r\x12(\n\x0c\x61udio_format\x18\x05 \x01(\x0e\x32\x12.agent.AudioFormat\x12\x19\n\x11max_duration_secs\x18\x06 \x01(\r\x12\x17\n\x0fsave_transcript\x18\x07 \x01(\x08\x12\x1c\n\x14include_tool_details\x18\x08 \x01(\x08\x12\x1c\n\x14include_llm_metadata\x18\t \x01(\x08\"^\n\x08ParamDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x03 \x01(\t\x12\x10\n\x08required\x18\x04 \x01(\x08\x12\x0f\n\x07options\x18\x05 \x03(\t\"\xac\x01\n\x07ToolDef\x12\x13\n\x0b\x64\x65scription\x18\x01 \x01(\t\x12\x0e\n\x06script\x18\x02 \x01(\t\x12\x1f\n\x06params\x18\x03 \x03(\x0b\x32\x0f.agent.ParamDef\x12\x1a\n\x12\x63\x61ncel_on_barge_in\x18\x04 \x01(\x08\x12\x13\n\x0bside_effect\x18\x05 \x01(\x08\x12*\n\x0bresult_mode\x18\x06 \x01(\x0e\x32\x15.agent.ToolResultMode\"\xf6\x01\n\x07NodeDef\x12\x15\n\rsystem_prompt\x18\x01 \x01(\t\x12\r\n\x05tools\x18\x02 \x03(\t\x12\r\n\x05\x65\x64ges\x18\x03 \x03(\t\x12\x12\n\x05model\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0btemperature\x18\x05 \x01(\x01H\x01\x88\x01\x01\x12\x17\n\nmax_tokens\x18\x06 \x01(\rH\x02\x88\x01\x01\x12\x15\n\x08voice_id\x18\x07 \x01(\tH\x03\x88\x01\x01\x12\x15\n\x08greeting\x18\x08 \x01(\tH\x04\x88\x01\x01\x42\x08\n\x06_modelB\x0e\n\x0c_temperatureB\r\n\x0b_max_tokensB\x0b\n\t_voice_idB\x0b\n\t_greeting\"\xea\x04\n\rAgentGraphDef\x12\r\n\x05\x65ntry\x18\x01 \x01(\t\x12.\n\x05nodes\x18\x02 \x03(\x0b\x32\x1f.agent.AgentGraphDef.NodesEntry\x12.\n\x05tools\x18\x03 \x03(\x0b\x32\x1f.agent.AgentGraphDef.ToolsEntry\x12\x15\n\x08language\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08timezone\x18\x05 \x01(\tH\x01\x88\x01\x01\x12\x15\n\x08voice_id\x18\x06 \x01(\tH\x02\x88\x01\x01\x12\x19\n\x0ctts_provider\x18\x07 \x01(\tH\x03\x88\x01\x01\x12\x16\n\ttts_model\x18\x08 \x01(\tH\x04\x88\x01\x01\x12.\n\trecording\x18\t \x01(\x0b\x32\x16.agent.RecordingConfigH\x05\x88\x01\x01\x12\"\n\x15\x63onfig_schema_version\x18\n \x01(\tH\x06\x88\x01\x01\x12\x1e\n\x11gemini_live_model\x18\x0c \x01(\tH\x07\x88\x01\x01\x1a<\n\nNodesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x1d\n\x05value\x18\x02 \x01(\x0b\x32\x0e.agent.NodeDef:\x02\x38\x01\x1a<\n\nToolsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x1d\n\x05value\x18\x02 \x01(\x0b\x32\x0e.agent.ToolDef:\x02\x38\x01\x42\x0b\n\t_languageB\x0b\n\t_timezoneB\x0b\n\t_voice_idB\x0f\n\r_tts_providerB\x0c\n\n_tts_modelB\x0c\n\n_recordingB\x18\n\x16_config_schema_versionB\x14\n\x12_gemini_live_model*[\n\x0b\x41udioLayout\x12\x1c\n\x18\x41UDIO_LAYOUT_UNSPECIFIED\x10\x00\x12\x17\n\x13\x41UDIO_LAYOUT_STEREO\x10\x01\x12\x15\n\x11\x41UDIO_LAYOUT_MONO\x10\x02*X\n\x0b\x41udioFormat\x12\x1c\n\x18\x41UDIO_FORMAT_UNSPECIFIED\x10\x00\x12\x15\n\x11\x41UDIO_FORMAT_OPUS\x10\x01\x12\x14\n\x10\x41UDIO_FORMAT_WAV\x10\x02*\x8c\x01\n\x0eToolResultMode\x12 \n\x1cTOOL_RESULT_MODE_UNSPECIFIED\x10\x00\x12\x1e\n\x1aTOOL_RESULT_MODE_SUMMARIZE\x10\x01\x12\x1d\n\x19TOOL_RESULT_MODE_TRUNCATE\x10\x02\x12\x19\n\x15TOOL_RESULT_MODE_NONE\x10\x03\x42\x30Z.github.com/prime8ai/voice-agent-os/proto/agentb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0b\x61gent.proto\x12\x05\x61gent\"\x8f\x02\n\x0fRecordingConfig\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\x12\x12\n\noutput_uri\x18\x02 \x01(\t\x12(\n\x0c\x61udio_layout\x18\x03 \x01(\x0e\x32\x12.agent.AudioLayout\x12\x13\n\x0bsample_rate\x18\x04 \x01(\r\x12(\n\x0c\x61udio_format\x18\x05 \x01(\x0e\x32\x12.agent.AudioFormat\x12\x19\n\x11max_duration_secs\x18\x06 \x01(\r\x12\x17\n\x0fsave_transcript\x18\x07 \x01(\x08\x12\x1c\n\x14include_tool_details\x18\x08 \x01(\x08\x12\x1c\n\x14include_llm_metadata\x18\t \x01(\x08\"^\n\x08ParamDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x03 \x01(\t\x12\x10\n\x08required\x18\x04 \x01(\x08\x12\x0f\n\x07options\x18\x05 \x03(\t\"\x9a\x01\n\x07ToolDef\x12\x13\n\x0b\x64\x65scription\x18\x01 \x01(\t\x12\x0e\n\x06script\x18\x02 \x01(\t\x12\x1f\n\x06params\x18\x03 \x03(\x0b\x32\x0f.agent.ParamDef\x12\x1a\n\x12\x63\x61ncel_on_barge_in\x18\x04 \x01(\x08\x12\x13\n\x0bside_effect\x18\x05 \x01(\x08\x12\x18\n\x10summarize_result\x18\x06 \x01(\x08\"\xf6\x01\n\x07NodeDef\x12\x15\n\rsystem_prompt\x18\x01 \x01(\t\x12\r\n\x05tools\x18\x02 \x03(\t\x12\r\n\x05\x65\x64ges\x18\x03 \x03(\t\x12\x12\n\x05model\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0btemperature\x18\x05 \x01(\x01H\x01\x88\x01\x01\x12\x17\n\nmax_tokens\x18\x06 \x01(\rH\x02\x88\x01\x01\x12\x15\n\x08voice_id\x18\x07 \x01(\tH\x03\x88\x01\x01\x12\x15\n\x08greeting\x18\x08 \x01(\tH\x04\x88\x01\x01\x42\x08\n\x06_modelB\x0e\n\x0c_temperatureB\r\n\x0b_max_tokensB\x0b\n\t_voice_idB\x0b\n\t_greeting\"\xea\x04\n\rAgentGraphDef\x12\r\n\x05\x65ntry\x18\x01 \x01(\t\x12.\n\x05nodes\x18\x02 \x03(\x0b\x32\x1f.agent.AgentGraphDef.NodesEntry\x12.\n\x05tools\x18\x03 \x03(\x0b\x32\x1f.agent.AgentGraphDef.ToolsEntry\x12\x15\n\x08language\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08timezone\x18\x05 \x01(\tH\x01\x88\x01\x01\x12\x15\n\x08voice_id\x18\x06 \x01(\tH\x02\x88\x01\x01\x12\x19\n\x0ctts_provider\x18\x07 \x01(\tH\x03\x88\x01\x01\x12\x16\n\ttts_model\x18\x08 \x01(\tH\x04\x88\x01\x01\x12.\n\trecording\x18\t \x01(\x0b\x32\x16.agent.RecordingConfigH\x05\x88\x01\x01\x12\"\n\x15\x63onfig_schema_version\x18\n \x01(\tH\x06\x88\x01\x01\x12\x1e\n\x11gemini_live_model\x18\x0c \x01(\tH\x07\x88\x01\x01\x1a<\n\nNodesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x1d\n\x05value\x18\x02 \x01(\x0b\x32\x0e.agent.NodeDef:\x02\x38\x01\x1a<\n\nToolsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x1d\n\x05value\x18\x02 \x01(\x0b\x32\x0e.agent.ToolDef:\x02\x38\x01\x42\x0b\n\t_languageB\x0b\n\t_timezoneB\x0b\n\t_voice_idB\x0f\n\r_tts_providerB\x0c\n\n_tts_modelB\x0c\n\n_recordingB\x18\n\x16_config_schema_versionB\x14\n\x12_gemini_live_model*[\n\x0b\x41udioLayout\x12\x1c\n\x18\x41UDIO_LAYOUT_UNSPECIFIED\x10\x00\x12\x17\n\x13\x41UDIO_LAYOUT_STEREO\x10\x01\x12\x15\n\x11\x41UDIO_LAYOUT_MONO\x10\x02*X\n\x0b\x41udioFormat\x12\x1c\n\x18\x41UDIO_FORMAT_UNSPECIFIED\x10\x00\x12\x15\n\x11\x41UDIO_FORMAT_OPUS\x10\x01\x12\x14\n\x10\x41UDIO_FORMAT_WAV\x10\x02\x42\x30Z.github.com/prime8ai/voice-agent-os/proto/agentb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -36,24 +36,22 @@ _globals['_AGENTGRAPHDEF_NODESENTRY']._serialized_options = b'8\001' _globals['_AGENTGRAPHDEF_TOOLSENTRY']._loaded_options = None _globals['_AGENTGRAPHDEF_TOOLSENTRY']._serialized_options = b'8\001' - _globals['_AUDIOLAYOUT']._serialized_start=1437 - _globals['_AUDIOLAYOUT']._serialized_end=1528 - _globals['_AUDIOFORMAT']._serialized_start=1530 - _globals['_AUDIOFORMAT']._serialized_end=1618 - _globals['_TOOLRESULTMODE']._serialized_start=1621 - _globals['_TOOLRESULTMODE']._serialized_end=1761 + _globals['_AUDIOLAYOUT']._serialized_start=1419 + _globals['_AUDIOLAYOUT']._serialized_end=1510 + _globals['_AUDIOFORMAT']._serialized_start=1512 + _globals['_AUDIOFORMAT']._serialized_end=1600 _globals['_RECORDINGCONFIG']._serialized_start=23 _globals['_RECORDINGCONFIG']._serialized_end=294 _globals['_PARAMDEF']._serialized_start=296 _globals['_PARAMDEF']._serialized_end=390 _globals['_TOOLDEF']._serialized_start=393 - _globals['_TOOLDEF']._serialized_end=565 - _globals['_NODEDEF']._serialized_start=568 - _globals['_NODEDEF']._serialized_end=814 - _globals['_AGENTGRAPHDEF']._serialized_start=817 - _globals['_AGENTGRAPHDEF']._serialized_end=1435 - _globals['_AGENTGRAPHDEF_NODESENTRY']._serialized_start=1181 - _globals['_AGENTGRAPHDEF_NODESENTRY']._serialized_end=1241 - _globals['_AGENTGRAPHDEF_TOOLSENTRY']._serialized_start=1243 - _globals['_AGENTGRAPHDEF_TOOLSENTRY']._serialized_end=1303 + _globals['_TOOLDEF']._serialized_end=547 + _globals['_NODEDEF']._serialized_start=550 + _globals['_NODEDEF']._serialized_end=796 + _globals['_AGENTGRAPHDEF']._serialized_start=799 + _globals['_AGENTGRAPHDEF']._serialized_end=1417 + _globals['_AGENTGRAPHDEF_NODESENTRY']._serialized_start=1163 + _globals['_AGENTGRAPHDEF_NODESENTRY']._serialized_end=1223 + _globals['_AGENTGRAPHDEF_TOOLSENTRY']._serialized_start=1225 + _globals['_AGENTGRAPHDEF_TOOLSENTRY']._serialized_end=1285 # @@protoc_insertion_point(module_scope) diff --git a/studio/api/app/schemas/agent_pb2.pyi b/studio/api/app/schemas/agent_pb2.pyi index 574bdcd..ce5ff37 100644 --- a/studio/api/app/schemas/agent_pb2.pyi +++ b/studio/api/app/schemas/agent_pb2.pyi @@ -18,23 +18,12 @@ class AudioFormat(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): AUDIO_FORMAT_UNSPECIFIED: _ClassVar[AudioFormat] AUDIO_FORMAT_OPUS: _ClassVar[AudioFormat] AUDIO_FORMAT_WAV: _ClassVar[AudioFormat] - -class ToolResultMode(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): - __slots__ = () - TOOL_RESULT_MODE_UNSPECIFIED: _ClassVar[ToolResultMode] - TOOL_RESULT_MODE_SUMMARIZE: _ClassVar[ToolResultMode] - TOOL_RESULT_MODE_TRUNCATE: _ClassVar[ToolResultMode] - TOOL_RESULT_MODE_NONE: _ClassVar[ToolResultMode] AUDIO_LAYOUT_UNSPECIFIED: AudioLayout AUDIO_LAYOUT_STEREO: AudioLayout AUDIO_LAYOUT_MONO: AudioLayout AUDIO_FORMAT_UNSPECIFIED: AudioFormat AUDIO_FORMAT_OPUS: AudioFormat AUDIO_FORMAT_WAV: AudioFormat -TOOL_RESULT_MODE_UNSPECIFIED: ToolResultMode -TOOL_RESULT_MODE_SUMMARIZE: ToolResultMode -TOOL_RESULT_MODE_TRUNCATE: ToolResultMode -TOOL_RESULT_MODE_NONE: ToolResultMode class RecordingConfig(_message.Message): __slots__ = ("enabled", "output_uri", "audio_layout", "sample_rate", "audio_format", "max_duration_secs", "save_transcript", "include_tool_details", "include_llm_metadata") @@ -73,20 +62,20 @@ class ParamDef(_message.Message): def __init__(self, name: _Optional[str] = ..., type: _Optional[str] = ..., description: _Optional[str] = ..., required: bool = ..., options: _Optional[_Iterable[str]] = ...) -> None: ... class ToolDef(_message.Message): - __slots__ = ("description", "script", "params", "cancel_on_barge_in", "side_effect", "result_mode") + __slots__ = ("description", "script", "params", "cancel_on_barge_in", "side_effect", "summarize_result") DESCRIPTION_FIELD_NUMBER: _ClassVar[int] SCRIPT_FIELD_NUMBER: _ClassVar[int] PARAMS_FIELD_NUMBER: _ClassVar[int] CANCEL_ON_BARGE_IN_FIELD_NUMBER: _ClassVar[int] SIDE_EFFECT_FIELD_NUMBER: _ClassVar[int] - RESULT_MODE_FIELD_NUMBER: _ClassVar[int] + SUMMARIZE_RESULT_FIELD_NUMBER: _ClassVar[int] description: str script: str params: _containers.RepeatedCompositeFieldContainer[ParamDef] cancel_on_barge_in: bool side_effect: bool - result_mode: ToolResultMode - def __init__(self, description: _Optional[str] = ..., script: _Optional[str] = ..., params: _Optional[_Iterable[_Union[ParamDef, _Mapping]]] = ..., cancel_on_barge_in: bool = ..., side_effect: bool = ..., result_mode: _Optional[_Union[ToolResultMode, str]] = ...) -> None: ... + summarize_result: bool + def __init__(self, description: _Optional[str] = ..., script: _Optional[str] = ..., params: _Optional[_Iterable[_Union[ParamDef, _Mapping]]] = ..., cancel_on_barge_in: bool = ..., side_effect: bool = ..., summarize_result: bool = ...) -> None: ... class NodeDef(_message.Message): __slots__ = ("system_prompt", "tools", "edges", "model", "temperature", "max_tokens", "voice_id", "greeting") diff --git a/studio/web/public/schemas/agent-config-v1.schema.json b/studio/web/public/schemas/agent-config-v1.schema.json index 1699657..a6b50ce 100644 --- a/studio/web/public/schemas/agent-config-v1.schema.json +++ b/studio/web/public/schemas/agent-config-v1.schema.json @@ -103,23 +103,9 @@ "side_effect": { "type": "boolean" }, - "result_mode": { - "description": "Optional per-tool post-processing mode for tool results.", - "oneOf": [ - { - "type": "integer", - "enum": [0, 1, 2, 3] - }, - { - "type": "string", - "enum": [ - "TOOL_RESULT_MODE_UNSPECIFIED", - "TOOL_RESULT_MODE_SUMMARIZE", - "TOOL_RESULT_MODE_TRUNCATE", - "TOOL_RESULT_MODE_NONE" - ] - } - ] + "summarize_result": { + "type": "boolean", + "description": "When true, long tool output is summarized by AI instead of being truncated." } } } diff --git a/studio/web/src/components/agent/agent-config-editor.tsx b/studio/web/src/components/agent/agent-config-editor.tsx index 9bcd89e..e7c7b26 100644 --- a/studio/web/src/components/agent/agent-config-editor.tsx +++ b/studio/web/src/components/agent/agent-config-editor.tsx @@ -36,19 +36,15 @@ import { SelectValue, } from "@/components/ui/select"; import { Spinner } from "@/components/ui/spinner"; +import { Switch } from "@/components/ui/switch"; import { cn } from "@/lib/utils"; import ReactMarkdown from "react-markdown"; import remarkGfm from "remark-gfm"; import { Dialog, DialogContent, DialogTrigger } from "@/components/ui/dialog"; -import { - DropdownMenu, - DropdownMenuContent, - DropdownMenuItem, - DropdownMenuTrigger, -} from "@/components/ui/dropdown-menu"; import ConfigViewer from "@/components/agent/config-viewer"; import ConfigDiff from "@/components/agent/config-diff"; import ShikiCodeBlock from "@/components/ui/shiki-code-block"; +import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"; import { toast } from "sonner"; // ── Language & timezone options ────────────────────────────────── @@ -186,7 +182,7 @@ interface ConfigTool { description?: string; side_effect?: boolean; script?: string; - result_mode?: number | string; + summarize_result?: boolean; } interface FullConfig extends Record { @@ -195,22 +191,6 @@ interface FullConfig extends Record { tools?: Record; } -type ToolResultModeUi = "auto" | "summary" | "truncate" | "full"; - -function modeToUi(mode: unknown): ToolResultModeUi { - if (mode === 1 || mode === "TOOL_RESULT_MODE_SUMMARIZE") return "summary"; - if (mode === 2 || mode === "TOOL_RESULT_MODE_TRUNCATE") return "truncate"; - if (mode === 3 || mode === "TOOL_RESULT_MODE_NONE") return "full"; - return "auto"; -} - -function uiToMode(ui: ToolResultModeUi): number | null { - if (ui === "summary") return 1; - if (ui === "truncate") return 2; - if (ui === "full") return 3; - return null; -} - // ── Component ──────────────────────────────────────────────────── interface AgentConfigEditorProps { @@ -904,92 +884,84 @@ export default function AgentConfigEditor({ {tools.length > 0 ? ( -
+
{tools.map(([id, tool]) => (
-
-
-
- -
-
- - {id} +
+
+ +
+
+ + {id} + +
+ + {tool.side_effect ? "Write" : "Read"} -
- - {tool.side_effect ? "Write" : "Read"} - -
-
+
-
-

- {tool.description || "—"} -

- -
- - - - - e.stopPropagation()} - > - {(["auto", "summary", "truncate", "full"] as const).map( - (mode) => ( - +
+ + + +
e.stopPropagation()} + onPointerDown={(e) => e.stopPropagation()} + > + + Summarize Result + + patchField({ - tool_result_modes: { [id]: uiToMode(mode) }, + tool_summarize_overrides: { + [id]: checked || null, + }, }) } - > - {mode.replace(/^./, (c) => c.toUpperCase())} - - ) - )} - - - -
- - View Code -
+ /> +
+
+ + Use LLM to summarize long tool outputs + +
+ + + +
+ +
+
+ + View Tool Implementation + +
+
+ + {/* Row 3: description */} +

+ {tool.description || "No description provided."} +

diff --git a/studio/web/src/lib/api/agent.ts b/studio/web/src/lib/api/agent.ts index f8de8f0..c51443c 100644 --- a/studio/web/src/lib/api/agent.ts +++ b/studio/web/src/lib/api/agent.ts @@ -22,18 +22,6 @@ export enum AudioFormat { UNRECOGNIZED = -1, } -/** Per-tool output handling mode after execution. */ -export enum ToolResultMode { - TOOL_RESULT_MODE_UNSPECIFIED = 0, - /** TOOL_RESULT_MODE_SUMMARIZE - LLM summarize long tool output (uses global tool_summarizer gate). */ - TOOL_RESULT_MODE_SUMMARIZE = 1, - /** TOOL_RESULT_MODE_TRUNCATE - Deterministically truncate tool output to runtime hard cap. */ - TOOL_RESULT_MODE_TRUNCATE = 2, - /** TOOL_RESULT_MODE_NONE - Keep raw tool output as-is (no summarize, no truncate). */ - TOOL_RESULT_MODE_NONE = 3, - UNRECOGNIZED = -1, -} - /** Session recording configuration */ export interface RecordingConfig { enabled: boolean; @@ -68,10 +56,10 @@ export interface ToolDef { /** If true, this tool has side effects */ side_effect: boolean; /** - * Optional post-tool output handling mode. - * If UNSPECIFIED, runtime falls back to global config behavior. + * When true, the runtime passes the result through an LLM summarizer + * before feeding it back. Adds ~5-10s latency. Default false = truncate. */ - result_mode: ToolResultMode; + summarize_result: boolean; } /** A single node in the graph */ diff --git a/studio/web/src/lib/api/client.ts b/studio/web/src/lib/api/client.ts index 2574d32..411cbe6 100644 --- a/studio/web/src/lib/api/client.ts +++ b/studio/web/src/lib/api/client.ts @@ -593,7 +593,7 @@ export const api = { voice_id?: string; tts_provider?: string; tts_model?: string; - tool_result_modes?: Record; + tool_summarize_overrides?: Record; regenerate_greeting?: boolean; } ) => diff --git a/voice/engine/crates/agent-kit/src/agent_backends/default.rs b/voice/engine/crates/agent-kit/src/agent_backends/default.rs index bace4c6..7cea969 100644 --- a/voice/engine/crates/agent-kit/src/agent_backends/default.rs +++ b/voice/engine/crates/agent-kit/src/agent_backends/default.rs @@ -24,7 +24,7 @@ use crate::swarm::{ make_on_hold_tool_schema, AgentGraphDef, SwarmState, HANG_UP_TOOL_NAME, ON_HOLD_TOOL_NAME, }; use crate::tool_executor::{ - resolve_tool_post_process_mode, spawn_tool_task, ToolPostProcessMode, ToolTaskResult, + spawn_tool_task, ToolTaskResult, }; use crate::ScriptEngine; @@ -560,7 +560,7 @@ impl DefaultAgentBackend { name: String, args: String, side_effect: bool, - post_process_mode: ToolPostProcessMode, + summarize: bool, ) { // Some streaming providers can emit duplicate tool-call events for the same // call_id (e.g. retry/delta edge cases). Guard against double-counting, which @@ -612,7 +612,7 @@ impl DefaultAgentBackend { name, args, side_effect, - post_process_mode, + summarize, self.script_engine.clone(), self.interceptor.clone(), Some(Arc::clone(&self.provider)), @@ -738,26 +738,12 @@ impl DefaultAgentBackend { ..tc.clone() }); - let (side_effect, post_process_mode) = self + let (side_effect, summarize) = self .swarm .as_ref() .and_then(|s| s.graph.tools.get(&tc.name)) - .map(|t| { - ( - t.side_effect, - resolve_tool_post_process_mode( - self.config.tool_summarizer, - t.result_mode, - ), - ) - }) - .unwrap_or(( - false, - resolve_tool_post_process_mode( - self.config.tool_summarizer, - crate::swarm::ToolResultMode::Unspecified as i32, - ), - )); + .map(|t| (t.side_effect, t.summarize_result)) + .unwrap_or((false, false)); if side_effect { tracing::debug!("[agent_backend] Tool '{}' marked as side-effect", tc.name); @@ -770,7 +756,7 @@ impl DefaultAgentBackend { tc.name.clone(), tc.arguments.clone(), side_effect, - post_process_mode, + summarize, ); return Some(AgentEvent::ToolCallStarted { diff --git a/voice/engine/crates/agent-kit/src/agent_backends/mod.rs b/voice/engine/crates/agent-kit/src/agent_backends/mod.rs index bf2855a..5331d80 100644 --- a/voice/engine/crates/agent-kit/src/agent_backends/mod.rs +++ b/voice/engine/crates/agent-kit/src/agent_backends/mod.rs @@ -204,8 +204,6 @@ pub struct AgentBackendConfig { /// swap in fresh credentials mid-session, while QuickJS `secret()` reads /// always see the latest value via a non-blocking read lock. pub secrets: SharedSecretMap, - /// Summarize long tool results before feeding them to the main LLM. - pub tool_summarizer: bool, /// Compress conversation history when it grows too long. pub context_summarizer: bool, /// Speak a brief filler phrase while side-effecting tools run. @@ -226,7 +224,6 @@ impl std::fmt::Debug for AgentBackendConfig { self.secrets.read().map(|s| s.len()).unwrap_or(0) ), ) - .field("tool_summarizer", &self.tool_summarizer) .field("context_summarizer", &self.context_summarizer) .field("tool_filler", &self.tool_filler) .finish() @@ -240,7 +237,6 @@ impl Default for AgentBackendConfig { max_tokens: 32768, max_tool_rounds: 5, secrets: std::sync::Arc::new(std::sync::RwLock::new(SecretMap::new())), - tool_summarizer: false, context_summarizer: false, tool_filler: false, } diff --git a/voice/engine/crates/agent-kit/src/quickjs_engine.rs b/voice/engine/crates/agent-kit/src/quickjs_engine.rs index 2931435..8013f7a 100644 --- a/voice/engine/crates/agent-kit/src/quickjs_engine.rs +++ b/voice/engine/crates/agent-kit/src/quickjs_engine.rs @@ -685,7 +685,7 @@ mod tests { params, cancel_on_barge_in: true, side_effect: false, - result_mode: crate::swarm::ToolResultMode::Unspecified as i32, + summarize_result: false, } } diff --git a/voice/engine/crates/agent-kit/src/swarm.rs b/voice/engine/crates/agent-kit/src/swarm.rs index 097ea79..50f7cbc 100644 --- a/voice/engine/crates/agent-kit/src/swarm.rs +++ b/voice/engine/crates/agent-kit/src/swarm.rs @@ -47,7 +47,7 @@ use serde_json::json; // Re-export canonical recording type definitions from common. pub use common::{AudioFormat, AudioLayout, RecordingConfig}; -pub use proto::agent::{AgentGraphDef, NodeDef, ParamDef, ToolDef, ToolResultMode}; +pub use proto::agent::{AgentGraphDef, NodeDef, ParamDef, ToolDef}; // ── Runtime State ─────────────────────────────────────────────── diff --git a/voice/engine/crates/agent-kit/src/tool_executor.rs b/voice/engine/crates/agent-kit/src/tool_executor.rs index 8b5648f..c842797 100644 --- a/voice/engine/crates/agent-kit/src/tool_executor.rs +++ b/voice/engine/crates/agent-kit/src/tool_executor.rs @@ -5,7 +5,6 @@ use tracing::{info, warn}; use crate::agent_backends::{AfterToolCallAction, BeforeToolCallAction, ToolInterceptor}; use crate::micro_tasks; use crate::providers::LlmProvider; -use crate::swarm::ToolResultMode; use crate::ScriptEngine; // ── Types ─────────────────────────────────────────────────────── @@ -88,31 +87,6 @@ pub(super) struct ToolTaskResult { const TOOL_SUMMARY_MIN_LENGTH: usize = 500; const TOOL_RESULT_HARD_CAP_CHARS: usize = 8000; -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub(super) enum ToolPostProcessMode { - Summarize, - Truncate, - None, -} - -pub(super) fn resolve_tool_post_process_mode( - global_summarizer_enabled: bool, - tool_result_mode: i32, -) -> ToolPostProcessMode { - match ToolResultMode::try_from(tool_result_mode).unwrap_or(ToolResultMode::Unspecified) { - ToolResultMode::Summarize => ToolPostProcessMode::Summarize, - ToolResultMode::Truncate => ToolPostProcessMode::Truncate, - ToolResultMode::None => ToolPostProcessMode::None, - ToolResultMode::Unspecified => { - if global_summarizer_enabled { - ToolPostProcessMode::Summarize - } else { - ToolPostProcessMode::Truncate - } - } - } -} - fn cap_tool_result(result: &str, max_chars: usize) -> String { let char_count = result.chars().count(); if char_count <= max_chars { @@ -136,7 +110,7 @@ pub(super) fn spawn_tool_task( name: String, args: String, side_effect: bool, - post_process_mode: ToolPostProcessMode, + summarize: bool, script_engine_opt: Option>, interceptor_opt: Option>, summary_provider_opt: Option>, @@ -243,7 +217,7 @@ pub(super) fn spawn_tool_task( }; if result.success { - if post_process_mode == ToolPostProcessMode::Summarize { + if summarize { if let Some(provider) = summary_provider_opt.as_deref() { result.result = micro_tasks::summarize_tool_result( provider, @@ -255,20 +229,18 @@ pub(super) fn spawn_tool_task( } } - if post_process_mode != ToolPostProcessMode::None { - let capped = cap_tool_result(&result.result, TOOL_RESULT_HARD_CAP_CHARS); - if capped.len() != result.result.len() { - info!( - tool.name = %task_name, - tool.call_id = %call_id, - tool.before_chars = result.result.len(), - tool.after_chars = capped.len(), - tool.post_process_mode = ?post_process_mode, - "[agent_backend] Tool result capped before enqueue" - ); - } - result.result = capped; + let capped = cap_tool_result(&result.result, TOOL_RESULT_HARD_CAP_CHARS); + if capped.len() != result.result.len() { + info!( + tool.name = %task_name, + tool.call_id = %call_id, + tool.before_chars = result.result.len(), + tool.after_chars = capped.len(), + tool.summarize = summarize, + "[agent_backend] Tool result capped before enqueue" + ); } + result.result = capped; } info!( @@ -393,20 +365,17 @@ mod tests { } #[test] - fn resolve_mode_defaults_to_summarize_when_global_enabled() { - let mode = resolve_tool_post_process_mode(true, ToolResultMode::Unspecified as i32); - assert_eq!(mode, ToolPostProcessMode::Summarize); - } - - #[test] - fn resolve_mode_defaults_to_truncate_when_global_disabled() { - let mode = resolve_tool_post_process_mode(false, ToolResultMode::Unspecified as i32); - assert_eq!(mode, ToolPostProcessMode::Truncate); + fn cap_tool_result_exact_boundary() { + let input = "a".repeat(8000); + let output = cap_tool_result(&input, 8000); + assert_eq!(output, input); // no truncation notice } #[test] - fn resolve_mode_explicit_none_wins_over_global() { - let mode = resolve_tool_post_process_mode(true, ToolResultMode::None as i32); - assert_eq!(mode, ToolPostProcessMode::None); + fn cap_tool_result_exceeds_boundary() { + let input = "a".repeat(8001); + let output = cap_tool_result(&input, 8000); + assert!(output.starts_with(&"a".repeat(8000))); + assert!(output.ends_with("[tool result truncated: 1 chars omitted]")); } } diff --git a/voice/engine/src/session.rs b/voice/engine/src/session.rs index 842acff..3143119 100644 --- a/voice/engine/src/session.rs +++ b/voice/engine/src/session.rs @@ -227,7 +227,6 @@ impl VoiceSession { max_tokens: config.max_tokens, max_tool_rounds: 5, secrets, - tool_summarizer: task.agent_tool_summarizer, context_summarizer: task.agent_context_summarizer, tool_filler: task.agent_tool_filler, }; @@ -277,7 +276,6 @@ impl VoiceSession { max_tokens: config.max_tokens, max_tool_rounds: 5, secrets, - tool_summarizer: task.agent_tool_summarizer, context_summarizer: task.agent_context_summarizer, tool_filler: task.agent_tool_filler, }; diff --git a/voice/engine/src/settings.rs b/voice/engine/src/settings.rs index a0958a5..fbaa84a 100644 --- a/voice/engine/src/settings.rs +++ b/voice/engine/src/settings.rs @@ -48,14 +48,10 @@ use serde::Deserialize; /// /// | Variable | Default | Description | /// |---|---|---| -/// | `AGENT__TOOL_SUMMARIZER` | `true` | Summarize long tool results before feeding to LLM | /// | `AGENT__CONTEXT_SUMMARIZER` | `true` | Compress conversation history when it grows long | /// | `AGENT__TOOL_FILLER` | `false` | Speak a filler phrase while side-effecting tools run | #[derive(Debug, Clone, Deserialize)] pub struct AgentTaskSettings { - #[serde(rename = "agent__tool_summarizer", default = "default_true")] - pub agent_tool_summarizer: bool, - #[serde(rename = "agent__context_summarizer", default = "default_true")] pub agent_context_summarizer: bool, @@ -98,7 +94,6 @@ impl AgentTaskSettings { impl Default for AgentTaskSettings { fn default() -> Self { Self { - agent_tool_summarizer: true, agent_context_summarizer: true, agent_tool_filler: false, }