Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion studio/api/app/api/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from fastapi import APIRouter, Depends, HTTPException
from loguru import logger
from pydantic import BaseModel
from pydantic import BaseModel, Field
from pydantic_ai import Agent as PydanticAiAgent
from pydantic_ai.messages import ModelRequest, ModelResponse, TextPart, UserPromptPart
from pydantic_core import to_jsonable_python
Expand Down Expand Up @@ -304,6 +304,11 @@ async def delete_agent(
await db.delete(agent)


class EscalationDestination(BaseModel):
name: str = Field(..., min_length=1, max_length=50)
phone_number: str = Field(..., min_length=4, max_length=100, pattern=r'^(?:\+?[0-9\s\-\(\)]+|sips?:.+)$')


class AgentConfigPatch(BaseModel):
"""Fields that can be patched directly on the agent's active config."""

Expand All @@ -313,6 +318,7 @@ class AgentConfigPatch(BaseModel):
tts_provider: str | None = None
tts_model: str | None = None
gemini_live_model: str | None = None
escalation_destinations: list[EscalationDestination] | None = None
regenerate_greeting: bool = False


Expand Down
4 changes: 4 additions & 0 deletions studio/api/app/api/voice_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ async def text_test_socket(
)
greeting = "Hello! How can I help you today?"
graph_json: str | None = None
escalation_destinations_json: str | None = None

try:
agent_uuid = uuid.UUID(agent_id)
Expand All @@ -183,6 +184,8 @@ async def text_test_socket(
else:
system_prompt = cfg.get("system_prompt", system_prompt)
greeting = cfg.get("greeting", greeting)
if destinations := cfg.get("escalation_destinations"):
escalation_destinations_json = json.dumps(destinations)
except Exception:
logger.warning("Text test: failed to resolve agent config for {}", agent_id)

Expand Down Expand Up @@ -214,6 +217,7 @@ def _build_runner() -> AgentRunner:
temperature=float(getattr(llm_cfg, "temperature", 0.7)),
max_tokens=int(getattr(llm_cfg, "max_tokens", 512)),
secrets=secrets or None,
escalation_destinations_json=escalation_destinations_json,
)

runner = _build_runner()
Expand Down
1 change: 1 addition & 0 deletions studio/api/voice_engine.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class AgentRunner:
max_tokens: int = 32768,
greeting: str | None = None,
secrets: dict[str, str] | None = None,
escalation_destinations_json: str | None = None,
) -> None: ...
def send(self, text: str) -> list[Any]: ...
def start_turn(self, text: str) -> None: ...
Expand Down
71 changes: 69 additions & 2 deletions studio/web/src/components/agent/agent-config-editor.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ function getConfigFields(config: Record<string, unknown>): {
tts_provider: string;
tts_model: string;
gemini_live_model: string;
escalation_destinations: { name: string; phone_number: string }[];
} {
return {
language: (config?.language as string) || "en",
Expand All @@ -144,6 +145,7 @@ function getConfigFields(config: Record<string, unknown>): {
tts_model: (config?.tts_model as string) || "",
gemini_live_model:
(config?.geminiLiveModel as string) || (config?.gemini_live_model as string) || "",
escalation_destinations: (config?.escalation_destinations as { name: string; phone_number: string }[]) || [],
};
}

Expand Down Expand Up @@ -209,6 +211,9 @@ export default function AgentConfigEditor({
const [language, setLanguage] = useState(fields?.language ?? "en");
const [timezone, setTimezone] = useState(fields?.timezone ?? "");
const [voiceIdDraft, setVoiceIdDraft] = useState(fields?.voice_id ?? "");
const [escalationDestinations, setEscalationDestinations] = useState<{id: string, name: string, phone_number: string}[]>(
(fields?.escalation_destinations ?? []).map((d: any) => ({ ...d, id: Math.random().toString(36).substring(7) }))
);
const [saving, setSaving] = useState<string | null>(null);
const [saved, setSaved] = useState<string | null>(null);
const [tts, setTts] = useState<VoiceProviderSettings | null>(null);
Expand Down Expand Up @@ -284,11 +289,12 @@ export default function AgentConfigEditor({
setLanguage(fields.language);
setTimezone(fields.timezone);
setVoiceIdDraft(fields.voice_id);
setEscalationDestinations((fields.escalation_destinations ?? []).map((d: any) => ({ ...d, id: Math.random().toString(36).substring(7) })));
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [fields?.language, fields?.timezone, fields?.voice_id, fields?.gemini_live_model]);
}, [fields?.language, fields?.timezone, fields?.voice_id, fields?.gemini_live_model, fields?.escalation_destinations]);

const patchField = useCallback(
async (payload: Record<string, string | boolean | null>) => {
async (payload: Record<string, unknown>) => {
const primaryField = Object.keys(payload).find((k) => k !== "regenerate_greeting");
if (!primaryField && !payload.regenerate_greeting) return;
const trackField = primaryField ?? "language";
Expand Down Expand Up @@ -783,6 +789,67 @@ export default function AgentConfigEditor({
</SelectContent>
</Select>
</SettingRow>

<SettingRow
icon={<HugeiconsIcon icon={Alert02Icon} className="size-4" />}
label="Human Handoff / Escalation"
description="Configure escalation destinations for telephony agents"
className="items-start py-6 flex-col gap-4"
childrenClassName="w-full ml-0 pl-12"
>
<div className="space-y-3">
{escalationDestinations.map((dest, idx) => (
<div key={dest.id} className="flex items-center gap-2">
<input
type="text"
value={dest.name}
onChange={(e) => {
const newDests = [...escalationDestinations];
newDests[idx].name = e.target.value;
setEscalationDestinations(newDests);
}}
onBlur={() => patchField({ escalation_destinations: escalationDestinations.map(({ id, ...rest }) => rest) })}
placeholder="Name (e.g. Sales)"
disabled={saving === "escalation_destinations"}
className="h-9 w-40 rounded-lg border border-border/60 bg-accent/30 px-3 text-xs font-bold text-foreground placeholder:text-muted-foreground/40 focus:outline-none focus:ring-1 focus:ring-primary/40 disabled:opacity-50 transition-all"
/>
<input
type="text"
value={dest.phone_number}
onChange={(e) => {
const newDests = [...escalationDestinations];
newDests[idx].phone_number = e.target.value;
setEscalationDestinations(newDests);
}}
onBlur={() => patchField({ escalation_destinations: escalationDestinations.map(({ id, ...rest }) => rest) })}
placeholder="Phone (e.g. +1234567890)"
disabled={saving === "escalation_destinations"}
className="h-9 w-48 rounded-lg border border-border/60 bg-accent/30 px-3 text-xs font-bold text-foreground placeholder:text-muted-foreground/40 focus:outline-none focus:ring-1 focus:ring-primary/40 disabled:opacity-50 transition-all font-mono"
/>
<button
onClick={() => {
const newDests = escalationDestinations.filter((_, i) => i !== idx);
setEscalationDestinations(newDests);
patchField({ escalation_destinations: newDests.map(({ id, ...rest }) => rest) });
}}
className="p-2 text-muted-foreground hover:text-red-500 transition-colors rounded-lg hover:bg-red-500/10"
>
<HugeiconsIcon icon={Cancel01Icon} className="size-4" />
</button>
</div>
))}
<button
onClick={() => {
const newDests = [...escalationDestinations, { id: Math.random().toString(36).substring(7), name: "", phone_number: "" }];
setEscalationDestinations(newDests);
patchField({ escalation_destinations: newDests.map(({ id, ...rest }) => rest) });
}}
className="flex items-center gap-2 px-3 py-1.5 rounded-lg text-xs font-bold text-muted-foreground hover:text-foreground bg-accent/30 hover:bg-accent transition-all border border-transparent hover:border-border"
>
+ Add Destination
</button>
</div>
</SettingRow>
</div>
</div>
</div>
Expand Down
1 change: 1 addition & 0 deletions voice/engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ hex = "0.4.3"
secrecy = "0.10.3"
async-trait = "0.1.89"
base64 = "0.22.1"
sha1 = "0.11.0"

[features]
default = []
Expand Down
126 changes: 116 additions & 10 deletions voice/engine/crates/agent-kit/src/agent_backends/default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ use crate::micro_tasks;
use crate::providers::{LlmCallConfig, LlmProvider, LlmProviderError};
use crate::swarm::{
build_node_tool_schemas, make_artifact_tool_schemas, make_hang_up_tool_schema,
make_on_hold_tool_schema, AgentGraphDef, SwarmState, HANG_UP_TOOL_NAME, ON_HOLD_TOOL_NAME,
make_on_hold_tool_schema, AgentGraphDef, EscalationDestination, SwarmState,
ESCALATE_CALL_TOOL_NAME, HANG_UP_TOOL_NAME, ON_HOLD_TOOL_NAME,
};
use crate::tool_executor::{spawn_tool_task, ToolTaskResult};
use crate::agent_backends::ChatMessage;
Expand Down Expand Up @@ -182,6 +183,14 @@ struct PendingHangUp {
content: Option<String>,
}

/// A `escalate_call` tool call that was deferred during LLM streaming.
/// Emitted as `AgentEvent::EscalateCall` once all sibling tools complete.
struct PendingEscalateCall {
/// Resolved phone number (looked up from destination_name).
destination: String,
reason: String,
}

// ── Observability ────────────────────────────────────────────────

/// Per-LLM-call observability context.
Expand Down Expand Up @@ -265,6 +274,18 @@ pub struct DefaultAgentBackend {
/// other tools are still in-flight. Cleared at turn start / cancel.
pending_hang_up: Option<PendingHangUp>,

// ── Deferred escalate_call ──
/// Set when an `escalate_call` tool call is seen during LLM streaming.
/// Emitted as `AgentEvent::EscalateCall` after TTS drains.
pending_escalate_call: Option<PendingEscalateCall>,

// ── Telephony/Escalation context ──
/// True when this session is over a telephony transport (Twilio/Telnyx).
/// Controls whether `escalate_call` tool is injected.
is_telephony: bool,
/// Pre-configured escalation targets for this agent.
escalation_destinations: Vec<EscalationDestination>,

// ── Observability (per LLM call) ──
/// Present while an LLM stream is active; consumed by `emit_llm_complete`.
obs: Option<LlmCallObs>,
Expand Down Expand Up @@ -343,6 +364,9 @@ impl DefaultAgentBackend {
pending_tokens: Vec::new(),
pending_tool_calls: Vec::new(),
pending_hang_up: None,
pending_escalate_call: None,
is_telephony: false,
escalation_destinations: Vec::new(),
obs: None,
}
}
Expand All @@ -352,6 +376,20 @@ impl DefaultAgentBackend {
self.swarm.as_ref().and_then(|s| s.graph.timezone.clone())
}

/// Configure telephony context for this session.
///
/// When `is_telephony` is `true` and `destinations` is non-empty,
/// the `escalate_call` tool is injected into the LLM tool schemas.
pub fn with_telephony_escalation(
mut self,
is_telephony: bool,
destinations: Vec<EscalationDestination>,
) -> Self {
self.is_telephony = is_telephony;
self.escalation_destinations = destinations;
self
}

/// Set a tool interceptor for intercepting tool execution.
///
/// Used by [`ArtifactInterceptor`] in production and optionally by callers for
Expand All @@ -371,7 +409,12 @@ impl DefaultAgentBackend {
) -> Result<(Vec<serde_json::Value>, f64, u32, Option<String>), String> {
if let Some(swarm) = &self.swarm {
if let Some(node) = swarm.active_def() {
let schemas = build_node_tool_schemas(node, &swarm.graph.tools);
let schemas = build_node_tool_schemas(
node,
&swarm.graph.tools,
self.is_telephony,
&self.escalation_destinations,
);
let temp = node.temperature.unwrap_or(self.config.temperature);
let mt = node.max_tokens.unwrap_or(self.config.max_tokens);
let model = node.model.clone();
Expand Down Expand Up @@ -627,6 +670,49 @@ impl DefaultAgentBackend {
// The stream continues; hang_up is resolved at stream-end.
}

/// Record an `escalate_call` tool call as a deferred marker.
///
/// Resolves the `destination_name` argument to its actual phone number
/// using the pre-configured `escalation_destinations` list.
/// Emitted as `AgentEvent::EscalateCall` once the stream ends and all
/// sibling tool tasks complete.
fn handle_escalate_call(&mut self, tc: &ToolCallEvent) {
if self.pending_escalate_call.is_some() {
warn!("[agent_backend] duplicate escalate_call in same response — ignoring");
return;
}
let args: serde_json::Value = serde_json::from_str(&tc.arguments).unwrap_or_default();
let destination_name = args
.get("destination_name")
.and_then(|v| v.as_str())
.unwrap_or("");
let reason = args
.get("reason")
.and_then(|v| v.as_str())
.unwrap_or("user_request")
.to_string();

// Resolve destination name → phone number
let destination = self
.escalation_destinations
.iter()
.find(|d| d.name == destination_name)
.map(|d| d.phone_number.clone())
.unwrap_or_else(|| {
warn!(
"[agent_backend] escalate_call: unknown destination '{}' — using name as raw number",
destination_name
);
destination_name.to_string()
});

info!(
"[agent_backend] escalate_call deferred: {} → {} (reason={})",
destination_name, destination, reason
);
self.pending_escalate_call = Some(PendingEscalateCall { destination, reason });
}

fn handle_on_hold(&mut self, tc: &ToolCallEvent) {
let duration_mins = serde_json::from_str::<serde_json::Value>(&tc.arguments)
.ok()
Expand Down Expand Up @@ -689,10 +775,6 @@ impl DefaultAgentBackend {
}
Some(InnerLlmEvent::ToolCall(tc)) => {
// ── Intercept hang_up (synthetic runtime tool) ──
// Defer: record the intent and continue reading the stream
// so that any sibling tool calls / tokens in the same
// response are not lost. The HangUp event is emitted
// after the stream closes and all tool tasks complete.
if tc.name == HANG_UP_TOOL_NAME {
self.handle_hang_up(&tc);
continue;
Expand All @@ -701,8 +783,13 @@ impl DefaultAgentBackend {
// ── Intercept on_hold (synthetic runtime tool) ──
if tc.name == ON_HOLD_TOOL_NAME {
self.handle_on_hold(&tc);
// Don't interrupt the stream — let the LLM continue
// generating its acknowledgement text.
continue;
}

// ── Intercept escalate_call (synthetic telephony tool) ──
// Defer: let the LLM finish its farewell text, then emit EscalateCall.
if tc.name == ESCALATE_CALL_TOOL_NAME {
self.handle_escalate_call(&tc);
continue;
}

Expand Down Expand Up @@ -761,6 +848,16 @@ impl DefaultAgentBackend {

if self.tools_remaining == 0 {
self.phase = Phase::Idle;
// EscalateCall takes highest priority (overrides HangUp)
if let Some(pe) = self.pending_escalate_call.take() {
info!("[agent_backend] escalate_call resolved at stream-end");
// Clear any concurrent hang_up so the reactor only gets one shutdown signal.
self.pending_hang_up = None;
return Some(AgentEvent::EscalateCall {
destination: pe.destination,
reason: pe.reason,
});
}
// Pending hang_up takes priority over Finished.
if let Some(ph) = self.pending_hang_up.take() {
info!("[agent_backend] hang_up resolved at stream-end (no pending tools)");
Expand Down Expand Up @@ -836,10 +933,19 @@ impl DefaultAgentBackend {
};

if self.tools_remaining == 0 {
// All tools done — check for deferred hang_up first.
// All tools done — check for deferred escalate/hang-up first.
self.tool_rounds += 1;

if let Some(ph) = self.pending_hang_up.take() {
if let Some(pe) = self.pending_escalate_call.take() {
// escalate_call takes priority over hang_up.
info!("[agent_backend] escalate_call resolved after all tools completed");
self.pending_hang_up = None;
self.phase = Phase::Idle;
self.event_buffer.push_back(AgentEvent::EscalateCall {
destination: pe.destination,
reason: pe.reason,
});
} else if let Some(ph) = self.pending_hang_up.take() {
// hang_up was deferred while tools were in-flight.
// Now that every tool has completed we can end the session.
// Buffer HangUp so it is emitted *after* this ToolCallCompleted.
Expand Down
Loading
Loading