Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
403 changes: 101 additions & 302 deletions package-lock.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
"lint": "next lint"
},
"dependencies": {
"@openai/agents": "^0.0.5",
"@openai/agents": "^0.1.9",
"@radix-ui/react-icons": "^1.3.2",
"dotenv": "^16.4.7",
"next": "^15.3.1",
"openai": "^4.77.3",
"openai": "^6.2.0",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"react-markdown": "^9.0.3",
Expand Down
40 changes: 19 additions & 21 deletions src/app/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,12 @@ function App() {
sendEvent({
type: 'session.update',
session: {
turn_detection: turnDetection,
type: 'realtime',
audio: {
input: {
turn_detection: turnDetection,
},
},
},
});

Expand Down Expand Up @@ -302,6 +307,7 @@ function App() {
interrupt();

setIsPTTUserSpeaking(true);
mute(false);
sendClientEvent({ type: 'input_audio_buffer.clear' }, 'clear PTT buffer');

// No placeholder; we'll rely on server transcript once ready.
Expand All @@ -314,6 +320,8 @@ function App() {
setIsPTTUserSpeaking(false);
sendClientEvent({ type: 'input_audio_buffer.commit' }, 'commit PTT');
sendClientEvent({ type: 'response.create' }, 'trigger response PTT');
sendClientEvent({ type: 'input_audio_buffer.clear' }, 'reset buffer after PTT');
mute(true);
};

const onToggleConnection = () => {
Expand Down Expand Up @@ -382,6 +390,16 @@ function App() {
);
}, [isAudioPlaybackEnabled]);

useEffect(() => {
if (sessionStatus !== 'CONNECTED') return;

const shouldMuteMic = isPTTActive
? !isPTTUserSpeaking
: !isAudioPlaybackEnabled;

mute(shouldMuteMic);
}, [sessionStatus, isPTTActive, isPTTUserSpeaking, isAudioPlaybackEnabled, mute]);

useEffect(() => {
if (audioElementRef.current) {
if (isAudioPlaybackEnabled) {
Expand All @@ -395,28 +413,8 @@ function App() {
audioElementRef.current.pause();
}
}

// Toggle server-side audio stream mute so bandwidth is saved when the
// user disables playback.
try {
mute(!isAudioPlaybackEnabled);
} catch (err) {
console.warn('Failed to toggle SDK mute', err);
}
}, [isAudioPlaybackEnabled]);

// Ensure mute state is propagated to transport right after we connect or
// whenever the SDK client reference becomes available.
useEffect(() => {
if (sessionStatus === 'CONNECTED') {
try {
mute(!isAudioPlaybackEnabled);
} catch (err) {
console.warn('mute sync after connect failed', err);
}
}
}, [sessionStatus, isAudioPlaybackEnabled]);

useEffect(() => {
if (sessionStatus === "CONNECTED" && audioElementRef.current?.srcObject) {
// The remote audio stream from the audio element.
Expand Down
2 changes: 1 addition & 1 deletion src/app/agentConfigs/chatSupervisor/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { getNextResponseFromSupervisor } from './supervisorAgent';

export const chatAgent = new RealtimeAgent({
name: 'chatAgent',
voice: 'sage',
voice: 'marin',
instructions: `
You are a helpful junior customer service agent. Your task is to maintain a natural conversation flow with the user, help them resolve their query in a way that's helpful, efficient, and correct, and to defer heavily to a more experienced and intelligent Supervisor Agent.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { RealtimeAgent, tool } from '@openai/agents/realtime';

export const authenticationAgent = new RealtimeAgent({
name: 'authentication',
voice: 'sage',
voice: 'marin',
handoffDescription:
'The initial agent that greets the user, does authentication and routes them to the correct downstream agent.',

Expand Down
2 changes: 1 addition & 1 deletion src/app/agentConfigs/customerServiceRetail/returns.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { RealtimeAgent, tool, RealtimeItem } from '@openai/agents/realtime';

export const returnsAgent = new RealtimeAgent({
name: 'returns',
voice: 'sage',
voice: 'marin',
handoffDescription:
'Customer Service Agent specialized in order lookups, policy checks, and return initiations.',

Expand Down
2 changes: 1 addition & 1 deletion src/app/agentConfigs/customerServiceRetail/sales.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { RealtimeAgent, tool } from '@openai/agents/realtime';

export const salesAgent = new RealtimeAgent({
name: 'salesAgent',
voice: 'sage',
voice: 'marin',
handoffDescription:
"Handles sales-related inquiries, including new product details, recommendations, promotions, and purchase flows. Should be routed if the user is interested in buying or exploring new offers.",

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { RealtimeAgent } from '@openai/agents/realtime';

export const simulatedHumanAgent = new RealtimeAgent({
name: 'simulatedHuman',
voice: 'sage',
voice: 'marin',
handoffDescription:
'Placeholder, simulated human agent that can provide more advanced help to the user. Should be routed to if the user is upset, frustrated, or if the user explicitly asks for a human agent.',
instructions:
Expand Down
4 changes: 2 additions & 2 deletions src/app/agentConfigs/simpleHandoff.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import {

export const haikuWriterAgent = new RealtimeAgent({
name: 'haikuWriter',
voice: 'sage',
voice: 'marin',
instructions:
'Ask the user for a topic, then reply with a haiku about that topic.',
handoffs: [],
Expand All @@ -14,7 +14,7 @@ export const haikuWriterAgent = new RealtimeAgent({

export const greeterAgent = new RealtimeAgent({
name: 'greeter',
voice: 'sage',
voice: 'marin',
instructions:
"Please greet the user and ask them if they'd like a Haiku. If yes, hand off to the 'haiku' agent.",
handoffs: [haikuWriterAgent],
Expand Down
43 changes: 29 additions & 14 deletions src/app/api/session/route.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,37 @@
import { NextResponse } from "next/server";
import OpenAI from "openai";

const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });

export async function GET() {
try {
const response = await fetch(
"https://api.openai.com/v1/realtime/sessions",
{
method: "POST",
headers: {
Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
"Content-Type": "application/json",
const data = await openai.realtime.clientSecrets.create({
session: {
type: "realtime",
model: "gpt-realtime",
output_modalities: ["audio"],
audio: {
input: {
format: { type: "audio/pcm", rate: 24000 },
transcription: {
model: "gpt-4o-mini-transcribe",
},
},
output: {
format: { type: "audio/pcm", rate: 24000 },
voice: "marin",
},
},
body: JSON.stringify({
model: "gpt-4o-realtime-preview-2025-06-03",
}),
}
);
const data = await response.json();
return NextResponse.json(data);
},
});

return NextResponse.json({
client_secret: {
value: data.value,
expires_at: data.expires_at,
},
session: data.session,
});
} catch (error) {
console.error("Error in /session:", error);
return NextResponse.json(
Expand Down
19 changes: 14 additions & 5 deletions src/app/hooks/useRealtimeSession.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,12 @@ export function useRealtimeSession(callbacks: RealtimeSessionCallbacks = {}) {
historyHandlers.handleTranscriptionCompleted(event);
break;
}
case "response.output_audio_transcript.done":
case "response.audio_transcript.done": {
historyHandlers.handleTranscriptionCompleted(event);
break;
}
case "response.output_audio_transcript.delta":
case "response.audio_transcript.delta": {
historyHandlers.handleTranscriptionDelta(event);
break;
Expand Down Expand Up @@ -137,12 +139,19 @@ export function useRealtimeSession(callbacks: RealtimeSessionCallbacks = {}) {
return pc;
},
}),
model: 'gpt-4o-realtime-preview-2025-06-03',
model: 'gpt-realtime',
config: {
inputAudioFormat: audioFormat,
outputAudioFormat: audioFormat,
inputAudioTranscription: {
model: 'gpt-4o-mini-transcribe',
outputModalities: ['audio'],
audio: {
input: {
format: audioFormat,
transcription: {
model: 'gpt-4o-mini-transcribe',
},
},
output: {
format: audioFormat,
},
},
},
outputGuardrails: outputGuardrails ?? [],
Expand Down
21 changes: 14 additions & 7 deletions src/app/lib/codecUtils.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
export function audioFormatForCodec(codec: string): 'pcm16' | 'g711_ulaw' | 'g711_alaw' {
let audioFormat: 'pcm16' | 'g711_ulaw' | 'g711_alaw' = 'pcm16';
if (typeof window !== 'undefined') {
const c = codec.toLowerCase();
if (c === 'pcmu') audioFormat = 'g711_ulaw';
else if (c === 'pcma') audioFormat = 'g711_alaw';
import type { RealtimeAudioFormat } from '@openai/agents/realtime';

export function audioFormatForCodec(codec: string): RealtimeAudioFormat {
const normalized = typeof codec === 'string' ? codec.toLowerCase() : 'opus';

if (normalized === 'pcmu') {
return { type: 'audio/pcmu' };
}
return audioFormat;

if (normalized === 'pcma') {
return { type: 'audio/pcma' };
}

// Default to wideband PCM for Opus or any other codec
return { type: 'audio/pcm', rate: 24000 };
}

// Apply preferred codec on a peer connection's audio transceivers. Safe to call multiple times.
Expand Down