From f7f3e5fd981d1197e79f27488219b4d7b7170545 Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Tue, 28 Oct 2025 15:08:50 +0900 Subject: [PATCH 1/2] feat: #439 add SIP support for realtime agent runner --- .changeset/seven-llamas-buy.md | 5 + examples/realtime-twilio-sip/README.md | 56 +++++ examples/realtime-twilio-sip/agents.ts | 89 +++++++ examples/realtime-twilio-sip/package.json | 17 ++ examples/realtime-twilio-sip/server.ts | 233 ++++++++++++++++++ examples/realtime-twilio-sip/tsconfig.json | 12 + examples/realtime-twilio-sip/types.d.ts | 1 + packages/agents-realtime/src/index.ts | 2 + .../agents-realtime/src/openaiRealtimeSip.ts | 25 ++ .../src/openaiRealtimeWebsocket.ts | 18 +- .../agents-realtime/src/realtimeSession.ts | 6 + .../agents-realtime/src/transportLayer.ts | 5 + .../test/openaiRealtimeWebsocket.test.ts | 46 ++++ pnpm-lock.yaml | 34 +++ 14 files changed, 545 insertions(+), 4 deletions(-) create mode 100644 .changeset/seven-llamas-buy.md create mode 100644 examples/realtime-twilio-sip/README.md create mode 100644 examples/realtime-twilio-sip/agents.ts create mode 100644 examples/realtime-twilio-sip/package.json create mode 100644 examples/realtime-twilio-sip/server.ts create mode 100644 examples/realtime-twilio-sip/tsconfig.json create mode 100644 examples/realtime-twilio-sip/types.d.ts create mode 100644 packages/agents-realtime/src/openaiRealtimeSip.ts diff --git a/.changeset/seven-llamas-buy.md b/.changeset/seven-llamas-buy.md new file mode 100644 index 00000000..29d22cf4 --- /dev/null +++ b/.changeset/seven-llamas-buy.md @@ -0,0 +1,5 @@ +--- +'@openai/agents-realtime': patch +--- + +feat: #439 add SIP support for realtime agent runner diff --git a/examples/realtime-twilio-sip/README.md b/examples/realtime-twilio-sip/README.md new file mode 100644 index 00000000..6da14127 --- /dev/null +++ b/examples/realtime-twilio-sip/README.md @@ -0,0 +1,56 @@ +# Twilio SIP Realtime Example + +This example shows how to handle OpenAI Realtime SIP calls with the Agents JS SDK. Incoming calls are accepted through the Realtime Calls API, a triage agent answers with a fixed greeting, and handoffs route the caller to specialist agents (FAQ lookup and record updates) similar to the realtime UI demo. + +## Prerequisites + +- Node.js 22+ +- pnpm 10+ +- An OpenAI API key with Realtime API access +- A configured webhook secret for your OpenAI project +- A Twilio account with a phone number and Elastic SIP Trunking enabled +- A public HTTPS endpoint for local development (for example, [ngrok](https://ngrok.com/)) + +## Configure OpenAI + +1. In [platform settings](https://platform.openai.com/settings) select your project. +2. Create a webhook pointing to `https:///openai/webhook` with the **realtime.call.incoming** event type and note the signing secret. The server verifies each webhook using `OPENAI_WEBHOOK_SECRET`. + +## Configure Twilio Elastic SIP Trunking + +1. Create (or edit) an Elastic SIP trunk. +2. On the **Origination** tab, add an origination SIP URI of `sip:proj_@sip.api.openai.com;transport=tls` so Twilio sends inbound calls to OpenAI. (The Termination tab always ends with `.pstn.twilio.com`, so leave it unchanged.) +3. Attach at least one phone number to the trunk so inbound calls are forwarded to OpenAI. + +## Setup + +1. Install dependencies from the monorepo root (if you have not already): + ```bash + pnpm install + ``` +2. Export the required environment variables: + ```bash + export OPENAI_API_KEY="sk-..." + export OPENAI_WEBHOOK_SECRET="whsec_..." + export PORT=8000 # optional, defaults to 8000 + ``` +3. (Optional) Adjust the multi-agent logic in `examples/realtime-twilio-sip/agents.ts` if you want to change the specialist agents or tools. +4. Start the Fastify server: + ```bash + pnpm -F realtime-twilio-sip start + ``` +5. Expose the server publicly (example with ngrok): + ```bash + ngrok http 8000 + ``` + +## Test a Call + +1. Place a call to the Twilio number attached to the SIP trunk. +2. Twilio sends the call to `sip.api.openai.com`; OpenAI emits a `realtime.call.incoming` event, which this server accepts via the Realtime Calls API. +3. The triage agent greets the caller, then either keeps the conversation or hands off to: + - **FAQ Agent** – answers common questions via `faq_lookup_tool`. + - **Records Agent** – writes short notes using `update_customer_record`. +4. The background task attaches to the call and logs transcripts plus basic events in the console. + +Tweak `server.ts` to customize instructions, add tools, or integrate with internal systems after the SIP session is active. diff --git a/examples/realtime-twilio-sip/agents.ts b/examples/realtime-twilio-sip/agents.ts new file mode 100644 index 00000000..0131f474 --- /dev/null +++ b/examples/realtime-twilio-sip/agents.ts @@ -0,0 +1,89 @@ +import { RECOMMENDED_PROMPT_PREFIX } from '@openai/agents-core/extensions'; +import { RealtimeAgent, tool } from '@openai/agents/realtime'; +import { z } from 'zod'; + +export const WELCOME_MESSAGE = + 'Hello, this is ABC customer service. How can I help you today?'; + +const wait = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); + +const faqLookupSchema = z.object({ + question: z.string().describe('The caller question to search for.'), +}); + +const faqLookupTool = tool({ + name: 'faq_lookup_tool', + description: 'Lookup frequently asked questions for the caller.', + parameters: faqLookupSchema, + execute: async ({ question }: z.infer) => { + await wait(1000); + + const normalized = question.toLowerCase(); + if (normalized.includes('wi-fi') || normalized.includes('wifi')) { + return 'We provide complimentary Wi-Fi. Join the ABC-Customer network.'; + } + if (normalized.includes('billing') || normalized.includes('invoice')) { + return 'Your latest invoice is available in the ABC portal under Billing > History.'; + } + if (normalized.includes('hours') || normalized.includes('support')) { + return 'Human support agents are available 24/7; transfer to the specialist if needed.'; + } + return "I'm not sure about that. Let me transfer you back to the triage agent."; + }, +}); + +const updateCustomerRecordSchema = z.object({ + customerId: z + .string() + .describe('Unique identifier for the customer you are updating.'), + note: z + .string() + .describe('Brief summary of the customer request to store in records.'), +}); + +const updateCustomerRecord = tool({ + name: 'update_customer_record', + description: 'Record a short note about the caller.', + parameters: updateCustomerRecordSchema, + execute: async ({ + customerId, + note, + }: z.infer) => { + await wait(1000); + return `Recorded note for ${customerId}: ${note}`; + }, +}); + +const faqAgent = new RealtimeAgent({ + name: 'FAQ Agent', + handoffDescription: + 'Handles frequently asked questions and general account inquiries.', + instructions: `${RECOMMENDED_PROMPT_PREFIX} +You are an FAQ specialist. Always rely on the faq_lookup_tool for answers and keep replies concise. If the caller needs hands-on help, transfer back to the triage agent.`, + tools: [faqLookupTool], +}); + +const recordsAgent = new RealtimeAgent({ + name: 'Records Agent', + handoffDescription: + 'Updates customer records with brief notes and confirmation numbers.', + instructions: `${RECOMMENDED_PROMPT_PREFIX} +You handle structured updates. Confirm the customer's ID, capture their request in a short note, and use the update_customer_record tool. For anything outside data updates, return to the triage agent.`, + tools: [updateCustomerRecord], +}); + +const triageAgent = new RealtimeAgent({ + name: 'Triage Agent', + handoffDescription: + 'Greets callers and routes them to the most appropriate specialist.', + instructions: `${RECOMMENDED_PROMPT_PREFIX} +Always begin the call by saying exactly '${WELCOME_MESSAGE}' before collecting details. Once the greeting is complete, gather context and hand off to the FAQ or Records agents when appropriate.`, + handoffs: [faqAgent, recordsAgent], +}); + +faqAgent.handoffs = [triageAgent, recordsAgent]; +recordsAgent.handoffs = [triageAgent, faqAgent]; + +export function getStartingAgent(): RealtimeAgent { + return triageAgent; +} diff --git a/examples/realtime-twilio-sip/package.json b/examples/realtime-twilio-sip/package.json new file mode 100644 index 00000000..64593e8a --- /dev/null +++ b/examples/realtime-twilio-sip/package.json @@ -0,0 +1,17 @@ +{ + "private": true, + "name": "realtime-twilio-sip", + "dependencies": { + "@openai/agents-core": "workspace:*", + "@openai/agents-realtime": "workspace:*", + "@openai/agents": "workspace:*", + "dotenv": "^16.5.0", + "fastify": "^5.3.3", + "fastify-raw-body": "^5.0.0", + "openai": "^6.7.0" + }, + "scripts": { + "build-check": "tsc --noEmit", + "start": "tsx server.ts" + } +} diff --git a/examples/realtime-twilio-sip/server.ts b/examples/realtime-twilio-sip/server.ts new file mode 100644 index 00000000..93e7bef6 --- /dev/null +++ b/examples/realtime-twilio-sip/server.ts @@ -0,0 +1,233 @@ +import fastifyFactory from 'fastify'; +import fastifyRawBody from 'fastify-raw-body'; +import dotenv from 'dotenv'; +import OpenAI from 'openai'; +import { APIError, InvalidWebhookSignatureError } from 'openai/error'; +import { + OpenAIRealtimeSIP, + RealtimeItem, + RealtimeSession, +} from '@openai/agents/realtime'; +import { getStartingAgent, WELCOME_MESSAGE } from './agents'; + +dotenv.config(); + +const OPENAI_API_KEY = process.env.OPENAI_API_KEY; +const OPENAI_WEBHOOK_SECRET = process.env.OPENAI_WEBHOOK_SECRET; +const PORT = Number(process.env.PORT ?? 8000); + +if (!OPENAI_API_KEY || !OPENAI_WEBHOOK_SECRET) { + console.error( + 'Missing OPENAI_API_KEY or OPENAI_WEBHOOK_SECRET environment variables.', + ); + process.exit(1); +} + +const apiKey = OPENAI_API_KEY!; +const webhookSecret = OPENAI_WEBHOOK_SECRET!; + +const openai = new OpenAI({ + apiKey, + webhookSecret, +}); + +async function main() { + const fastify = fastifyFactory(); + await fastify.register(fastifyRawBody, { + field: 'rawBody', + global: false, + encoding: 'utf8', + runFirst: true, + routes: ['/openai/webhook'], + }); + + const activeCallTasks = new Map>(); + const startingAgent = getStartingAgent(); + + function getDefaultInstructions(): string { + if (typeof startingAgent.instructions === 'string') { + return startingAgent.instructions; + } + return 'You are a helpful triage agent for ABC customer service.'; + } + + async function acceptCall(callId: string): Promise { + try { + await openai.realtime.calls.accept(callId, { + type: 'realtime', + model: 'gpt-realtime', + instructions: getDefaultInstructions(), + }); + console.info(`Accepted call ${callId}`); + } catch (error) { + if (error instanceof APIError && error.status === 404) { + console.warn( + `Call ${callId} no longer exists when attempting accept. Skipping.`, + ); + return; + } + throw error; + } + } + + function logHistoryItem(item: RealtimeItem): void { + if (item.type !== 'message') { + return; + } + + if (item.role === 'user') { + for (const content of item.content) { + if (content.type === 'input_text' && content.text) { + console.info(`Caller: ${content.text}`); + } else if (content.type === 'input_audio' && content.transcript) { + console.info(`Caller (audio transcript): ${content.transcript}`); + } + } + } else if (item.role === 'assistant') { + for (const content of item.content) { + if (content.type === 'output_text' && content.text) { + console.info(`Assistant (text): ${content.text}`); + } else if (content.type === 'output_audio' && content.transcript) { + console.info(`Assistant (audio transcript): ${content.transcript}`); + } + } + } + } + + async function observeCall(callId: string): Promise { + const session = new RealtimeSession(startingAgent, { + transport: new OpenAIRealtimeSIP(), + model: 'gpt-realtime', + config: { + audio: { + input: { + turnDetection: { + type: 'semantic_vad', + interruptResponse: true, + }, + }, + }, + }, + }); + + session.on('history_added', (item: RealtimeItem) => logHistoryItem(item)); + session.on('agent_handoff', (_context, fromAgent, toAgent) => { + console.info(`Handing off from ${fromAgent.name} to ${toAgent.name}.`); + }); + session.on('error', (event) => { + console.error('Realtime session error:', event.error); + }); + + try { + await session.connect({ apiKey, callId }); + console.info(`Attached to realtime call ${callId}`); + + session.transport.sendEvent({ + type: 'response.create', + response: { + instructions: `Say exactly '${WELCOME_MESSAGE}' now before continuing the conversation.`, + }, + }); + + await new Promise((resolve) => { + const handleDisconnect = () => { + session.transport.off('disconnected', handleDisconnect); + resolve(); + }; + session.transport.on('disconnected', handleDisconnect); + }); + } catch (error) { + console.error(`Error while observing call ${callId}:`, error); + } finally { + session.close(); + console.info(`Call ${callId} ended`); + } + } + + fastify.post('/openai/webhook', async (request, reply) => { + const rawBody = (request as unknown as { rawBody?: string | Buffer }) + .rawBody; + const payload = + typeof rawBody === 'string' ? rawBody : rawBody?.toString('utf8'); + + if (!payload) { + reply + .status(400) + .send({ error: 'Missing raw body for webhook verification.' }); + return; + } + + let event: Awaited>; + try { + event = await openai.webhooks.unwrap(payload, request.headers); + } catch (error) { + if (error instanceof InvalidWebhookSignatureError) { + console.warn('Invalid webhook signature.'); + reply.status(400).send({ error: 'Invalid webhook signature.' }); + return; + } + console.error('Failed to parse webhook payload.', error); + reply.status(500).send({ error: 'Failed to parse webhook payload.' }); + return; + } + + if (event.type === 'realtime.call.incoming') { + const callId = event.data.call_id; + try { + await acceptCall(callId); + } catch (error) { + console.error(`Failed to accept call ${callId}:`, error); + reply.status(500).send({ error: 'Failed to accept call.' }); + return; + } + + if (!activeCallTasks.has(callId)) { + const task = observeCall(callId) + .catch((error) => { + console.error( + `Unhandled error while observing call ${callId}:`, + error, + ); + }) + .finally(() => { + activeCallTasks.delete(callId); + }); + activeCallTasks.set(callId, task); + } else { + console.info( + `Call ${callId} already being observed; skipping duplicate webhook.`, + ); + } + } + + reply.status(200).send({ ok: true }); + }); + + fastify.get('/', async () => ({ status: 'ok' })); + + const shutdown = async () => { + try { + await fastify.close(); + } catch (error) { + console.error('Error during shutdown.', error); + } finally { + process.exit(0); + } + }; + + process.on('SIGINT', shutdown); + process.on('SIGTERM', shutdown); + + try { + await fastify.listen({ host: '0.0.0.0', port: PORT }); + console.log(`Server listening on port ${PORT}`); + } catch (error) { + console.error('Failed to start server.', error); + process.exit(1); + } +} + +main().catch((error) => { + console.error('Failed to start server.', error); + process.exit(1); +}); diff --git a/examples/realtime-twilio-sip/tsconfig.json b/examples/realtime-twilio-sip/tsconfig.json new file mode 100644 index 00000000..0538d3f5 --- /dev/null +++ b/examples/realtime-twilio-sip/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "../../tsconfig.examples.json", + "compilerOptions": { + "baseUrl": ".", + "paths": { + "@openai/agents-core/extensions": [ + "../../packages/agents-core/dist/extensions/index.d.ts" + ] + } + }, + "include": ["./**/*.ts", "./types.d.ts"] +} diff --git a/examples/realtime-twilio-sip/types.d.ts b/examples/realtime-twilio-sip/types.d.ts new file mode 100644 index 00000000..de7c0836 --- /dev/null +++ b/examples/realtime-twilio-sip/types.d.ts @@ -0,0 +1 @@ +declare module 'fastify-raw-body'; diff --git a/packages/agents-realtime/src/index.ts b/packages/agents-realtime/src/index.ts index 91f4cbbe..72647a29 100644 --- a/packages/agents-realtime/src/index.ts +++ b/packages/agents-realtime/src/index.ts @@ -40,6 +40,8 @@ export { WebSocketState, } from './openaiRealtimeWebsocket'; +export { OpenAIRealtimeSIP } from './openaiRealtimeSip'; + export { OpenAIRealtimeModels, OpenAIRealtimeBase, diff --git a/packages/agents-realtime/src/openaiRealtimeSip.ts b/packages/agents-realtime/src/openaiRealtimeSip.ts new file mode 100644 index 00000000..b4105b8d --- /dev/null +++ b/packages/agents-realtime/src/openaiRealtimeSip.ts @@ -0,0 +1,25 @@ +import { UserError } from '@openai/agents-core'; +import type { RealtimeTransportLayerConnectOptions } from './transportLayer'; +import { + OpenAIRealtimeWebSocket, + OpenAIRealtimeWebSocketOptions, +} from './openaiRealtimeWebsocket'; + +/** + * Transport layer that connects to an existing SIP-initiated Realtime call via call ID. + */ +export class OpenAIRealtimeSIP extends OpenAIRealtimeWebSocket { + constructor(options: OpenAIRealtimeWebSocketOptions = {}) { + super(options); + } + + async connect(options: RealtimeTransportLayerConnectOptions): Promise { + if (!options.callId) { + throw new UserError( + 'OpenAIRealtimeSIP requires `callId` in the connect options.', + ); + } + + await super.connect(options); + } +} diff --git a/packages/agents-realtime/src/openaiRealtimeWebsocket.ts b/packages/agents-realtime/src/openaiRealtimeWebsocket.ts index 3c4e7935..d8715a41 100644 --- a/packages/agents-realtime/src/openaiRealtimeWebsocket.ts +++ b/packages/agents-realtime/src/openaiRealtimeWebsocket.ts @@ -85,6 +85,7 @@ export class OpenAIRealtimeWebSocket { #apiKey: string | undefined; #url: string | undefined; + #defaultUrl: string | undefined; #state: WebSocketState = { status: 'disconnected', websocket: undefined, @@ -108,6 +109,7 @@ export class OpenAIRealtimeWebSocket constructor(options: OpenAIRealtimeWebSocketOptions = {}) { super(options); this.#url = options.url; + this.#defaultUrl = options.url; this.#useInsecureApiKey = options.useInsecureApiKey ?? false; this.#createWebSocket = options.createWebSocket; this.#skipOpenEventListeners = options.skipOpenEventListeners ?? false; @@ -324,10 +326,18 @@ export class OpenAIRealtimeWebSocket const model = options.model ?? this.currentModel; this.currentModel = model; this.#apiKey = await this._getApiKey(options); - const url = - options.url ?? - this.#url ?? - `wss://api.openai.com/v1/realtime?model=${this.currentModel}`; + const callId = options.callId; + let url: string; + if (options.url) { + url = options.url; + this.#defaultUrl = options.url; + } else if (callId) { + url = `wss://api.openai.com/v1/realtime?call_id=${callId}`; + } else if (this.#defaultUrl) { + url = this.#defaultUrl; + } else { + url = `wss://api.openai.com/v1/realtime?model=${this.currentModel}`; + } this.#url = url; const sessionConfig: Partial = { diff --git a/packages/agents-realtime/src/realtimeSession.ts b/packages/agents-realtime/src/realtimeSession.ts index e4ca173f..34d9cc7f 100644 --- a/packages/agents-realtime/src/realtimeSession.ts +++ b/packages/agents-realtime/src/realtimeSession.ts @@ -148,6 +148,11 @@ export type RealtimeSessionConnectOptions = { * The URL to use for the connection. */ url?: string; + + /** + * The call ID to attach to when connecting to a SIP-initiated session. + */ + callId?: string; }; function cloneDefaultSessionConfig(): Partial { @@ -852,6 +857,7 @@ export class RealtimeSession< apiKey: options.apiKey ?? this.options.apiKey, model: this.options.model, url: options.url, + callId: options.callId, initialSessionConfig: await this.#getSessionConfig(this.options.config), }); // Ensure the cached lastSessionConfig includes everything passed as the initial session config diff --git a/packages/agents-realtime/src/transportLayer.ts b/packages/agents-realtime/src/transportLayer.ts index 33317fcd..9a667bda 100644 --- a/packages/agents-realtime/src/transportLayer.ts +++ b/packages/agents-realtime/src/transportLayer.ts @@ -35,6 +35,11 @@ export type RealtimeTransportLayerConnectOptions = { */ url?: string; + /** + * The call ID to attach to instead of starting a new session. + */ + callId?: string; + /** * The initial session config to use for the session. */ diff --git a/packages/agents-realtime/test/openaiRealtimeWebsocket.test.ts b/packages/agents-realtime/test/openaiRealtimeWebsocket.test.ts index db135648..3312603d 100644 --- a/packages/agents-realtime/test/openaiRealtimeWebsocket.test.ts +++ b/packages/agents-realtime/test/openaiRealtimeWebsocket.test.ts @@ -1,6 +1,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { OpenAIRealtimeBase } from '../src/openaiRealtimeBase'; import { OpenAIRealtimeWebSocket } from '../src/openaiRealtimeWebsocket'; +import { OpenAIRealtimeSIP } from '../src/openaiRealtimeSip'; let lastFakeSocket: any; vi.mock('ws', () => { @@ -360,4 +361,49 @@ describe('OpenAIRealtimeWebSocket', () => { ws.interrupt(); expect(sendSpy).not.toHaveBeenCalled(); }); + + it('connects using callId when provided', async () => { + const ws = new OpenAIRealtimeWebSocket(); + const p = ws.connect({ apiKey: 'ek_test', callId: 'call_abc' }); + await vi.runAllTimersAsync(); + await p; + expect(lastFakeSocket!.url).toBe( + 'wss://api.openai.com/v1/realtime?call_id=call_abc', + ); + ws.close(); + }); + + it('updates cached URL when callId changes', async () => { + const ws = new OpenAIRealtimeWebSocket(); + const first = ws.connect({ apiKey: 'ek_test', callId: 'call_one' }); + await vi.runAllTimersAsync(); + await first; + expect(lastFakeSocket!.url).toBe( + 'wss://api.openai.com/v1/realtime?call_id=call_one', + ); + ws.close(); + + const second = ws.connect({ apiKey: 'ek_test', callId: 'call_two' }); + await vi.runAllTimersAsync(); + await second; + expect(lastFakeSocket!.url).toBe( + 'wss://api.openai.com/v1/realtime?call_id=call_two', + ); + ws.close(); + }); + + it('OpenAIRealtimeSIP requires callId', async () => { + const sip = new OpenAIRealtimeSIP(); + await expect(sip.connect({ apiKey: 'ek_test' } as any)).rejects.toThrow( + 'callId', + ); + + const p = sip.connect({ apiKey: 'ek_test', callId: 'call_xyz' }); + await vi.runAllTimersAsync(); + await p; + expect(lastFakeSocket!.url).toBe( + 'wss://api.openai.com/v1/realtime?call_id=call_xyz', + ); + sip.close(); + }); }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index be909751..81c58231 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -399,6 +399,30 @@ importers: specifier: ^8.18.1 version: 8.18.2 + examples/realtime-twilio-sip: + dependencies: + '@openai/agents': + specifier: workspace:* + version: link:../../packages/agents + '@openai/agents-core': + specifier: workspace:* + version: link:../../packages/agents-core + '@openai/agents-realtime': + specifier: workspace:* + version: link:../../packages/agents-realtime + dotenv: + specifier: ^16.5.0 + version: 16.5.0 + fastify: + specifier: ^5.3.3 + version: 5.3.3 + fastify-raw-body: + specifier: ^5.0.0 + version: 5.0.0 + openai: + specifier: ^6.7.0 + version: 6.7.0(ws@8.18.3)(zod@3.25.76) + examples/research-bot: dependencies: '@openai/agents': @@ -3051,6 +3075,10 @@ packages: fastify-plugin@5.0.1: resolution: {integrity: sha512-HCxs+YnRaWzCl+cWRYFnHmeRFyR5GVnJTAaCJQiYzQSDwK9MgJdyAsuL3nh0EWRCYMgQ5MeziymvmAhUHYHDUQ==} + fastify-raw-body@5.0.0: + resolution: {integrity: sha512-2qfoaQ3BQDhZ1gtbkKZd6n0kKxJISJGM6u/skD9ljdWItAscjXrtZ1lnjr7PavmXX9j4EyCPmBDiIsLn07d5vA==} + engines: {node: '>= 10'} + fastify@5.3.3: resolution: {integrity: sha512-nCBiBCw9q6jPx+JJNVgO8JVnTXeUyrGcyTKPQikRkA/PanrFcOIo4R+ZnLeOLPZPGgzjomqfVarzE0kYx7qWiQ==} @@ -8637,6 +8665,12 @@ snapshots: fastify-plugin@5.0.1: {} + fastify-raw-body@5.0.0: + dependencies: + fastify-plugin: 5.0.1 + raw-body: 3.0.1 + secure-json-parse: 2.7.0 + fastify@5.3.3: dependencies: '@fastify/ajv-compiler': 4.0.2 From 0e2aefcddf96737fd569b32b605f7311260a441b Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Fri, 31 Oct 2025 15:15:59 +0900 Subject: [PATCH 2/2] fix review comments --- examples/realtime-twilio-sip/server.ts | 42 ++++++++-------- packages/agents-realtime/src/index.ts | 1 + .../agents-realtime/src/openaiRealtimeBase.ts | 32 +++++++++++-- .../agents-realtime/src/openaiRealtimeSip.ts | 48 +++++++++++++++++++ .../agents-realtime/src/realtimeSession.ts | 47 ++++++++++++++++++ .../test/openaiRealtimeWebsocket.test.ts | 28 +++++++++++ .../test/realtimeSession.test.ts | 8 ++++ 7 files changed, 181 insertions(+), 25 deletions(-) diff --git a/examples/realtime-twilio-sip/server.ts b/examples/realtime-twilio-sip/server.ts index 93e7bef6..74346c6a 100644 --- a/examples/realtime-twilio-sip/server.ts +++ b/examples/realtime-twilio-sip/server.ts @@ -7,6 +7,7 @@ import { OpenAIRealtimeSIP, RealtimeItem, RealtimeSession, + type RealtimeSessionOptions, } from '@openai/agents/realtime'; import { getStartingAgent, WELCOME_MESSAGE } from './agents'; @@ -44,20 +45,27 @@ async function main() { const activeCallTasks = new Map>(); const startingAgent = getStartingAgent(); - function getDefaultInstructions(): string { - if (typeof startingAgent.instructions === 'string') { - return startingAgent.instructions; - } - return 'You are a helpful triage agent for ABC customer service.'; - } + // Reuse the same session options when accepting the call and when instantiating the session so + // the SIP payload remains in sync with the live websocket session. + const sessionOptions: Partial = { + model: 'gpt-realtime', + config: { + audio: { + input: { + turnDetection: { type: 'semantic_vad', interruptResponse: true }, + }, + }, + }, + }; async function acceptCall(callId: string): Promise { try { - await openai.realtime.calls.accept(callId, { - type: 'realtime', - model: 'gpt-realtime', - instructions: getDefaultInstructions(), - }); + // Build the initial session config using the agent data and session options + const initialConfig = await OpenAIRealtimeSIP.buildInitialConfig( + startingAgent, + sessionOptions, + ); + await openai.realtime.calls.accept(callId, initialConfig); console.info(`Accepted call ${callId}`); } catch (error) { if (error instanceof APIError && error.status === 404) { @@ -97,17 +105,7 @@ async function main() { async function observeCall(callId: string): Promise { const session = new RealtimeSession(startingAgent, { transport: new OpenAIRealtimeSIP(), - model: 'gpt-realtime', - config: { - audio: { - input: { - turnDetection: { - type: 'semantic_vad', - interruptResponse: true, - }, - }, - }, - }, + ...sessionOptions, }); session.on('history_added', (item: RealtimeItem) => logHistoryItem(item)); diff --git a/packages/agents-realtime/src/index.ts b/packages/agents-realtime/src/index.ts index 72647a29..078a4bfa 100644 --- a/packages/agents-realtime/src/index.ts +++ b/packages/agents-realtime/src/index.ts @@ -49,6 +49,7 @@ export { OpenAIRealtimeEventTypes, DEFAULT_OPENAI_REALTIME_MODEL, DEFAULT_OPENAI_REALTIME_SESSION_CONFIG, + RealtimeSessionPayload, } from './openaiRealtimeBase'; export { RealtimeOutputGuardrail } from './guardrail'; diff --git a/packages/agents-realtime/src/openaiRealtimeBase.ts b/packages/agents-realtime/src/openaiRealtimeBase.ts index 86014411..27e093ab 100644 --- a/packages/agents-realtime/src/openaiRealtimeBase.ts +++ b/packages/agents-realtime/src/openaiRealtimeBase.ts @@ -48,6 +48,8 @@ export type OpenAIRealtimeModels = | 'gpt-4o-mini-realtime-preview-2024-12-17' | 'gpt-realtime' | 'gpt-realtime-2025-08-28' + | 'gpt-realtime-mini' + | 'gpt-realtime-mini-2025-10-06' | (string & {}); // ensures autocomplete works /** @@ -105,6 +107,13 @@ export type OpenAIRealtimeEventTypes = { disconnected: []; } & RealtimeTransportEventTypes; +/** + * Shape of the payload that the Realtime API expects for session.create/update operations. + * This closely mirrors the REST `CallAcceptParams` type so that callers can feed the payload + * directly into the `openai.realtime.calls.accept` helper without casts. + */ +export type RealtimeSessionPayload = { type: 'realtime' } & Record; + export abstract class OpenAIRealtimeBase extends EventEmitterDelegate implements RealtimeTransportLayer @@ -523,10 +532,12 @@ export abstract class OpenAIRealtimeBase ); } - protected _getMergedSessionConfig(config: Partial) { + protected _getMergedSessionConfig( + config: Partial, + ): RealtimeSessionPayload { const newConfig = toNewSessionConfig(config); - const sessionData: Record = { + const sessionData: RealtimeSessionPayload = { type: 'realtime', instructions: newConfig.instructions, model: newConfig.model ?? this.#model, @@ -588,6 +599,21 @@ export abstract class OpenAIRealtimeBase return sessionData; } + /** + * Build the payload object expected by the Realtime API when creating or updating a session. + * + * The helper centralises the conversion from camelCase runtime config to the snake_case payload + * required by the Realtime API so transports that need a one-off payload (for example SIP call + * acceptance) can reuse the same logic without duplicating private state. + * + * @param config - The session config to merge with defaults. + */ + buildSessionPayload( + config: Partial, + ): RealtimeSessionPayload { + return this._getMergedSessionConfig(config); + } + private static buildTurnDetectionConfig( c: RealtimeTurnDetectionConfig | undefined, ): RealtimeTurnDetectionConfigAsIs | undefined { @@ -735,7 +761,7 @@ export abstract class OpenAIRealtimeBase * @param config - The session config to update. */ updateSessionConfig(config: Partial): void { - const sessionData = this._getMergedSessionConfig(config); + const sessionData = this.buildSessionPayload(config); this.sendEvent({ type: 'session.update', diff --git a/packages/agents-realtime/src/openaiRealtimeSip.ts b/packages/agents-realtime/src/openaiRealtimeSip.ts index b4105b8d..c0f79024 100644 --- a/packages/agents-realtime/src/openaiRealtimeSip.ts +++ b/packages/agents-realtime/src/openaiRealtimeSip.ts @@ -4,6 +4,14 @@ import { OpenAIRealtimeWebSocket, OpenAIRealtimeWebSocketOptions, } from './openaiRealtimeWebsocket'; +import type { RealtimeSessionPayload } from './openaiRealtimeBase'; +import type { RealtimeSessionConfig } from './clientMessages'; +import { + RealtimeSession, + type RealtimeSessionOptions, + type RealtimeContextData, +} from './realtimeSession'; +import { RealtimeAgent } from './realtimeAgent'; /** * Transport layer that connects to an existing SIP-initiated Realtime call via call ID. @@ -13,6 +21,46 @@ export class OpenAIRealtimeSIP extends OpenAIRealtimeWebSocket { super(options); } + /** + * Build the initial session payload for a SIP-attached session, matching the config that a RealtimeSession would send on connect. + * + * This enables SIP deployments to accept an incoming call with a payload that already reflects + * the active agent's instructions, tools, prompt, and tracing metadata without duplicating the + * session logic outside of the SDK. The returned object structurally matches the REST + * `CallAcceptParams` interface, so it can be forwarded directly to + * `openai.realtime.calls.accept(...)`. + * + * @param agent - The starting agent used to seed the session instructions, tools, and prompt. + * @param options - Optional session options that mirror the ones passed to the RealtimeSession constructor. + * @param overrides - Additional config overrides applied on top of the session options. + */ + static async buildInitialConfig( + agent: + | RealtimeAgent + | RealtimeAgent>, + options: Partial> = {}, + overrides: Partial = {}, + ): Promise { + const sessionConfig = await RealtimeSession.computeInitialSessionConfig( + agent, + options, + overrides, + ); + const transport = new OpenAIRealtimeSIP(); + return transport.buildSessionPayload(sessionConfig); + } + + override sendAudio( + _audio: ArrayBuffer, + _options: { commit?: boolean } = {}, + ): never { + // SIP integrations stream audio to OpenAI directly through the telephony provider, so the + // transport deliberately prevents userland code from sending duplicate buffers. + throw new Error( + 'OpenAIRealtimeSIP does not support sending audio buffers; audio is handled by the SIP call.', + ); + } + async connect(options: RealtimeTransportLayerConnectOptions): Promise { if (!options.callId) { throw new UserError( diff --git a/packages/agents-realtime/src/realtimeSession.ts b/packages/agents-realtime/src/realtimeSession.ts index 34d9cc7f..f697e952 100644 --- a/packages/agents-realtime/src/realtimeSession.ts +++ b/packages/agents-realtime/src/realtimeSession.ts @@ -406,6 +406,53 @@ export class RealtimeSession< return fullConfig; } + /** + * Compute the initial session config that the current session will use when connecting. + * + * This mirrors the configuration payload we send during `connect`, including dynamic values + * such as the upstream agent instructions, tool definitions, and prompt content generated at + * runtime. Keeping this helper exposed allows transports or orchestration layers to precompute + * a CallAccept-compatible payload without opening a socket. + * + * @param overrides - Additional config overrides applied on top of the session options. + */ + async getInitialSessionConfig( + overrides: Partial = {}, + ): Promise> { + await this.#setCurrentAgent(this.initialAgent); + return this.#getSessionConfig({ + ...(this.options.config ?? {}), + ...(overrides ?? {}), + }); + } + + /** + * Convenience helper to compute the initial session config without manually instantiating and connecting a session. + * + * This is primarily useful for integrations that must provide the session configuration to a + * third party (for example the SIP `calls.accept` endpoint) before the actual realtime session + * is attached. The helper instantiates a throwaway session so all agent-driven dynamic fields + * resolve in exactly the same way as the live session path. + * + * @param agent - The starting agent for the session. + * @param options - Session options used to seed the config calculation. + * @param overrides - Additional config overrides applied on top of the provided options. + */ + static async computeInitialSessionConfig( + agent: + | RealtimeAgent + | RealtimeAgent>, + options: Partial> = {}, + overrides: Partial = {}, + ): Promise> { + const session = new RealtimeSession(agent, options); + try { + return await session.getInitialSessionConfig(overrides); + } finally { + session.close(); + } + } + async updateAgent(newAgent: RealtimeAgent) { this.#currentAgent.emit('agent_handoff', this.#context, newAgent); this.emit('agent_handoff', this.#context, this.#currentAgent, newAgent); diff --git a/packages/agents-realtime/test/openaiRealtimeWebsocket.test.ts b/packages/agents-realtime/test/openaiRealtimeWebsocket.test.ts index 3312603d..fa5216ec 100644 --- a/packages/agents-realtime/test/openaiRealtimeWebsocket.test.ts +++ b/packages/agents-realtime/test/openaiRealtimeWebsocket.test.ts @@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { OpenAIRealtimeBase } from '../src/openaiRealtimeBase'; import { OpenAIRealtimeWebSocket } from '../src/openaiRealtimeWebsocket'; import { OpenAIRealtimeSIP } from '../src/openaiRealtimeSip'; +import { RealtimeAgent } from '../src/realtimeAgent'; let lastFakeSocket: any; vi.mock('ws', () => { @@ -406,4 +407,31 @@ describe('OpenAIRealtimeWebSocket', () => { ); sip.close(); }); + + it('OpenAIRealtimeSIP buildInitialConfig returns realtime payload seeded from agent', async () => { + const agent = new RealtimeAgent({ + name: 'sip-agent', + handoffs: [], + instructions: 'Respond politely.', + }); + const payload = await OpenAIRealtimeSIP.buildInitialConfig( + agent, + { + model: 'gpt-realtime', + config: { audio: { output: { speed: 1.5 } } }, + }, + { audio: { output: { speed: 2 } } }, + ); + expect(payload.type).toBe('realtime'); + expect(payload.model).toBe('gpt-realtime'); + expect(payload.instructions).toBe('Respond politely.'); + expect(payload.audio?.output?.speed).toBe(2); + }); + + it('OpenAIRealtimeSIP sendAudio throws', () => { + const sip = new OpenAIRealtimeSIP(); + expect(() => sip.sendAudio(new ArrayBuffer(1))).toThrow( + 'OpenAIRealtimeSIP does not support sending audio buffers', + ); + }); }); diff --git a/packages/agents-realtime/test/realtimeSession.test.ts b/packages/agents-realtime/test/realtimeSession.test.ts index 1631b1cd..7ef89068 100644 --- a/packages/agents-realtime/test/realtimeSession.test.ts +++ b/packages/agents-realtime/test/realtimeSession.test.ts @@ -126,6 +126,14 @@ describe('RealtimeSession', () => { expect(t.connectCalls[0]?.url).toBe('ws://example'); }); + it('forwards callId in connect options to transport', async () => { + const t = new FakeTransport(); + const agent = new RealtimeAgent({ name: 'A', handoffs: [] }); + const s = new RealtimeSession(agent, { transport: t }); + await s.connect({ apiKey: 'test', callId: 'call_123' }); + expect(t.connectCalls[0]?.callId).toBe('call_123'); + }); + it('includes default transcription config when connecting', async () => { const t = new FakeTransport(); const agent = new RealtimeAgent({ name: 'A', handoffs: [] });