diff --git a/realtime/app.js b/realtime/app.js new file mode 100644 index 0000000..e33b5e3 --- /dev/null +++ b/realtime/app.js @@ -0,0 +1,91 @@ +class App { + constructor(prefix, prefs = [], controls = {}, sessionType = "standard") { + this.prefix = prefix; + this.prefs = prefs; + this.controls = controls; + this.sessionType = sessionType; + this.session = null; + } + + initState() { + this.prefs.forEach(p => { + const fqid = p.id !== "openai-api-key" ? this.prefix + p.id : p.id; + const v = localStorage.getItem(fqid); + if (v) { + p.value = v; + } + p.addEventListener("change", () => { + localStorage.setItem(fqid, p.value); + }); + }); + this.updateState(false); + } + + updateState(started) { + if (this.controls.statusEl) { + this.controls.statusEl.textContent = ""; + } + this.prefs.forEach(p => (p.disabled = started)); + if (this.controls.startBtn) this.controls.startBtn.disabled = started; + if (this.controls.stopBtn) this.controls.stopBtn.disabled = !started; + } + + getApiKey() { + const el = document.getElementById("openai-api-key"); + return el ? el.value : null; + } + + async startMicrophone() { + if (!this.getApiKey()) { + window.alert( + "Please enter your OpenAI API Key. You can obtain one from https://platform.openai.com/settings/organization/api-keys" + ); + return; + } + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + await this.start(stream); + } + + async start(stream) { + this.updateState(true); + this.session = new Session(this.getApiKey()); + this.session.onconnectionstatechange = state => this.onConnectionStateChange(state); + this.session.ontrack = e => this.onTrack(e); + this.session.onopen = () => this.onOpen(); + this.session.onmessage = msg => this.onMessage(msg); + this.session.onerror = e => this.onError(e); + const config = this.buildSessionConfig(); + if (this.sessionType === "transcription") { + await this.session.startTranscription(stream, config); + } else { + await this.session.start(stream, config); + } + } + + stop() { + this.updateState(false); + if (this.session) { + this.session.stop(); + this.session = null; + } + } + + sendMessage(msg) { + this.session?.sendMessage(msg); + } + + mute(muted) { + this.session?.mute(muted); + } + + // Hooks for subclasses + buildSessionConfig() { return {}; } + onTrack(_e) {} + onOpen() {} + onMessage(_msg) {} + onError(e) { console.error(e); this.stop(); } + onConnectionStateChange(state) { + if (this.controls.statusEl) this.controls.statusEl.textContent = state; + } +} + diff --git a/realtime/basic/index.html b/realtime/basic/index.html index 545cc14..0a9ce62 100644 --- a/realtime/basic/index.html +++ b/realtime/basic/index.html @@ -55,6 +55,7 @@ + \ No newline at end of file diff --git a/realtime/basic/main.js b/realtime/basic/main.js index bc7681c..bdc9dfc 100644 --- a/realtime/basic/main.js +++ b/realtime/basic/main.js @@ -1,86 +1,44 @@ -const APP_PREFIX = "realtime/basic/"; -const $ = document.querySelector.bind(document); -const apiKeyEl = $("#openai-api-key"); -const modelEl = $("#model"); -const voiceEl = $("#voice"); -const instructionsEl = $("#instructions"); -const startMicrophoneEl = $("#start-microphone"); -const stopEl = $("#stop"); -const statusEl = $("#status"); -const prefs = [apiKeyEl, modelEl, voiceEl, instructionsEl]; - -let session = null; - -function initState() { - prefs.forEach(p => { - const fqid = p.id != "openai-api-key" ? APP_PREFIX + p.id : p.id; - const v = localStorage.getItem(fqid); - if (v) { - p.value = v; - } - p.addEventListener("change", () => { - localStorage.setItem(fqid, p.value); - }); - }); - updateState(false); -} - -function updateState(started) { - statusEl.textContent = ""; - prefs.forEach(p => p.disabled = started); - startMicrophoneEl.disabled = started; - stopEl.disabled = !started; -} - -async function startMicrophone() { - if (!apiKeyEl.value) { - window.alert("Please enter your OpenAI API Key. You can obtain one from https://platform.openai.com/settings/organization/api-keys"); - return; +class BasicApp extends App { + constructor() { + const $ = document.querySelector.bind(document); + const apiKeyEl = $("#openai-api-key"); + const modelEl = $("#model"); + const voiceEl = $("#voice"); + const instructionsEl = $("#instructions"); + const startBtn = $("#start-microphone"); + const stopBtn = $("#stop"); + const statusEl = $("#status"); + const prefs = [apiKeyEl, modelEl, voiceEl, instructionsEl]; + super("realtime/basic/", prefs, { startBtn, stopBtn, statusEl }); + this.modelEl = modelEl; + this.voiceEl = voiceEl; + this.instructionsEl = instructionsEl; + this.initState(); } - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - start(stream); -} -async function start(stream) { - updateState(true); - session = new Session(apiKeyEl.value); - session.onconnectionstatechange = state => statusEl.textContent = state; - session.ontrack = e => handleTrack(e); - session.onopen = e => handleOpen(); - session.onmessage = e => handleMessage(e); - session.onerror = e => handleError(e); - const sessionConfig = { - model: modelEl.value, - voice: voiceEl.value, - instructions: instructionsEl.value || undefined + buildSessionConfig() { + return { + model: this.modelEl.value, + voice: this.voiceEl.value, + instructions: this.instructionsEl.value || undefined, + }; } - await session.start(stream, sessionConfig); -} -function stop() { - updateState(false); - session.stop(); - session = null; -} - -function handleTrack(e) { - const audio = new Audio(); - audio.srcObject = e.streams[0]; - audio.play(); -} + onTrack(e) { + const audio = new Audio(); + audio.srcObject = e.streams[0]; + audio.play(); + } -function handleOpen() { - const message = { type: "response.create" }; - session.sendMessage(message); -} + onOpen() { + this.sendMessage({ type: "response.create" }); + } -function handleMessage(message) { - console.log("message", message); + onMessage(message) { + console.log("message", message); + } } -function handleError(e) { - console.error(e); - stop(); -} - -initState(); +const app = new BasicApp(); +window.startMicrophone = () => app.startMicrophone(); +window.stop = () => app.stop(); diff --git a/realtime/imager/index.html b/realtime/imager/index.html index 0455aaf..87aea54 100644 --- a/realtime/imager/index.html +++ b/realtime/imager/index.html @@ -60,8 +60,9 @@ - - - - + + + + + diff --git a/realtime/imager/main.js b/realtime/imager/main.js index ae28249..569a3e7 100644 --- a/realtime/imager/main.js +++ b/realtime/imager/main.js @@ -1,14 +1,3 @@ -const $ = document.querySelector.bind(document); -const instructions = $("#instructions"); -const startBtn = $("#startBtn"); -const muteBtn = $("#muteBtn"); -const downloadBtn = $("#downloadBtn"); -const imageEl = $("#image"); -const statusEl = $("#status"); -startBtn.addEventListener('click', start); -muteBtn.addEventListener('click', mute); -downloadBtn.addEventListener('click', download); - const API_BASE = 'https://api.openai.com/v1'; const INSTRUCTIONS = ` # Identity @@ -37,7 +26,7 @@ yet approachable, inviting clients to explore options rather than dictating them # Pacing -Fast and to the point. Your initial interaction is brief.You present ideas but leave space for the client to weigh in. +Fast and to the point. Your initial interaction is brief.You present ideas but leave space for the client to weigh in. Each clarification closes a loop before moving ahead, keeping the collaboration smooth and stress-free. # Tool Usage @@ -45,169 +34,186 @@ Each clarification closes a loop before moving ahead, keeping the collaboration When contracted to create an image, confirm all requirements: subject, style references and color scheme. Ask focused questions until every piece is in place. Then let the client know you're starting work, and call create_image with a thorough description covering all agreed-upon details. -` +`; const SESSION_PARAMS = { instructions: INSTRUCTIONS, - model: "gpt-4o-realtime-preview", - voice: "shimmer", + model: 'gpt-4o-realtime-preview', + voice: 'shimmer', tools: [ { - type: "function", - name: "create_image", - description: "Use this function to create a new image with the given description.", + type: 'function', + name: 'create_image', + description: 'Use this function to create a new image with the given description.', parameters: { - type: "object", + type: 'object', properties: { - description: { type: "string", description: "The description of the image to create." }, + description: { type: 'string', description: 'The description of the image to create.' }, }, - required: ["description"], + required: ['description'], }, }, - ] + ], }; -const IMAGE_MODEL = "gpt-image-1"; -const IMAGE_SIZE = "1024x1024"; -const IMAGE_QUALITY = "auto"; - -let session = null; -let previousImage = null; - -async function start() { - if (session) { - startBtn.textContent = "Start"; - statusEl.textContent = ""; - session.stop(); - session = null; - return; - } - - const apiKey = getApiKey(); - if (!apiKey) { - window.alert('An OpenAI API key is required to use this application. You can obtain one from https://platform.openai.com/settings/organization/api-keys'); - return; +const IMAGE_MODEL = 'gpt-image-1'; +const IMAGE_SIZE = '1024x1024'; +const IMAGE_QUALITY = 'auto'; + +class ImagerApp extends App { + constructor() { + const $ = document.querySelector.bind(document); + const instructions = $("#instructions"); + const startBtn = $("#startBtn"); + const muteBtn = $("#muteBtn"); + const downloadBtn = $("#downloadBtn"); + const imageEl = $("#image"); + const statusEl = $("#status"); + super("realtime/imager/", [], { startBtn, statusEl }); + this.instructions = instructions; + this.startBtn = startBtn; + this.muteBtn = muteBtn; + this.downloadBtn = downloadBtn; + this.imageEl = imageEl; + this.previousImage = null; + startBtn.addEventListener('click', () => this.toggle()); + muteBtn.addEventListener('click', () => this.toggleMute()); + downloadBtn.addEventListener('click', () => this.download()); + this.initState(); } - - startBtn.textContent = "Stop"; - const stream = await navigator.mediaDevices.getUserMedia({audio: true}); - session = new Session(apiKey); - session.ontrack = (e) => handleTrack(e); - session.onopen = () => handleOpen(); - session.onmessage = (e) => handleMessage(e); - session.onerror = (e) => handleError(e); - await session.start(stream, SESSION_PARAMS); -} -function mute() { - session.mute(!session.muted); - muteBtn.textContent = session.muted ? "Unmute" : "Mute"; -} + getApiKey() { + return getApiKey(); + } -async function download() { - if (!previousImage) { - console.warn('No image available to download'); - return; + updateState(started) { + super.updateState(started); + this.startBtn.textContent = started ? 'Stop' : 'Start'; + this.muteBtn.disabled = !started; } - const url = imageEl.src; - const a = document.createElement('a'); - a.style.display = 'none'; - a.href = url; - a.download = 'image.png'; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); -} + buildSessionConfig() { + return SESSION_PARAMS; + } -function handleTrack(e) { - const audio = new Audio(); - audio.srcObject = e.streams[0]; - audio.play(); -} + async toggle() { + if (this.session) { + this.stop(); + } else { + await this.startMicrophone(); + } + } -async function handleOpen(e) { - statusEl.textContent = "connected"; - const createResponse = { type: "response.create" }; - session.sendMessage(createResponse); -} + toggleMute() { + this.mute(!this.session?.muted); + this.muteBtn.textContent = this.session?.muted ? 'Unmute' : 'Mute'; + } -async function handleMessage(msg) { - switch (msg.type) { - case "response.function_call_arguments.done": - if (msg.name === "create_image") { - const description = JSON.parse(msg.arguments).description; - instructions.value = description; - statusEl.textContent = "generating image"; - const code = await generateImage(description, previousImage); - loadImage(code); - previousImage = code; - statusEl.textContent = ""; - } - break; + onTrack(e) { + const audio = new Audio(); + audio.srcObject = e.streams[0]; + audio.play(); } -} -function handleError(e) { - console.error(e); - stop(); -} + onOpen() { + this.controls.statusEl.textContent = 'connected'; + this.sendMessage({ type: 'response.create' }); + } + + async onMessage(msg) { + switch (msg.type) { + case 'response.function_call_arguments.done': + if (msg.name === 'create_image') { + const description = JSON.parse(msg.arguments).description; + this.instructions.value = description; + this.controls.statusEl.textContent = 'generating image'; + const code = await this.generateImage(description, this.previousImage); + this.loadImage(code); + this.previousImage = code; + this.controls.statusEl.textContent = ''; + } + break; + } + } + + onError(e) { + console.error(e); + this.stop(); + } -async function generateImage(description, previousImage) { - let path, contentType, body; - if (!previousImage) { - path = "images/generations"; - contentType = "application/json"; + async generateImage(description, previousImage) { + let path, contentType, body; + if (!previousImage) { + path = 'images/generations'; + contentType = 'application/json'; body = JSON.stringify({ model: IMAGE_MODEL, prompt: description, quality: IMAGE_QUALITY, - size: IMAGE_SIZE + size: IMAGE_SIZE, + }); + } else { + path = 'images/edits'; + const binaryString = atob(previousImage); + const len = binaryString.length; + const bytes = new Uint8Array(len); + for (let i = 0; i < len; i++) { + bytes[i] = binaryString.charCodeAt(i); + } + const blob = new Blob([bytes], { type: 'image/png' }); + const form = new FormData(); + form.append('model', IMAGE_MODEL); + form.append('prompt', description); + form.append('quality', IMAGE_QUALITY); + form.append('image', blob); + form.append('size', IMAGE_SIZE); + body = form; + } + + const url = `${API_BASE}/${path}`; + const headers = { + Authorization: `Bearer ${this.getApiKey()}`, + }; + if (contentType) { + headers['Content-Type'] = contentType; + } + const response = await fetch(url, { + method: 'POST', + headers: headers, + body: body, }); - } else { - path = "images/edits"; - const binaryString = atob(previousImage); - const len = binaryString.length; - const bytes = new Uint8Array(len); - for (let i = 0; i < len; i++) { - bytes[i] = binaryString.charCodeAt(i); + if (!response.ok) { + throw new Error(`API error: ${response.status}`); } - const blob = new Blob([bytes], { type: "image/png" }); - const form = new FormData(); - form.append("model", IMAGE_MODEL); - form.append("prompt", description); - form.append("quality", IMAGE_QUALITY); - form.append("image", blob); - form.append("size", IMAGE_SIZE); - body = form; - } - const url = `${API_BASE}/${path}`; - const headers = { - 'Authorization': `Bearer ${getApiKey()}` - }; - if (contentType) { - headers['Content-Type'] = contentType; - } - const response = await fetch(url, { - method: 'POST', - headers: headers, - body: body, - }); - if (!response.ok) { - throw new Error(`API error: ${response.status}`); + const data = await response.json(); + const content = data.data[0].b64_json; + if (!content) { + throw new Error('Invalid API response format.'); + } + + return content; } - const data = await response.json(); - const content = data.data[0].b64_json; - if (!content) { - throw new Error('Invalid API response format.'); + loadImage(content) { + this.imageEl.src = 'data:image/png;base64,' + content; } - return content; -} + async download() { + if (!this.previousImage) { + console.warn('No image available to download'); + return; + } -function loadImage(content) { - const image = document.getElementById('image'); - image.src = 'data:image/png;base64,' + content; + const url = this.imageEl.src; + const a = document.createElement('a'); + a.style.display = 'none'; + a.href = url; + a.download = 'image.png'; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + } } + +const app = new ImagerApp(); diff --git a/realtime/noise_reduction/index.html b/realtime/noise_reduction/index.html index f5abc63..f05b5b1 100644 --- a/realtime/noise_reduction/index.html +++ b/realtime/noise_reduction/index.html @@ -39,7 +39,8 @@ - - - - \ No newline at end of file + + + + + diff --git a/realtime/noise_reduction/main.js b/realtime/noise_reduction/main.js index 4cf7457..9533629 100644 --- a/realtime/noise_reduction/main.js +++ b/realtime/noise_reduction/main.js @@ -1,119 +1,77 @@ -const APP_PREFIX = "realtime/noise_reduction/"; -const $ = document.querySelector.bind(document); -const apiKeyEl = $("#openai-api-key"); -const nrEl = $("#noise-reduction"); -const startMicrophoneEl = $("#start-microphone"); -const stopEl = $("#stop"); -const statusEl = $("#status"); -const prefs = [apiKeyEl, nrEl]; - -let session = null; - -function initState() { - prefs.forEach(p => { - const fqid = p.id != "openai-api-key" ? APP_PREFIX + p.id : p.id; - const v = localStorage.getItem(fqid); - if (v) { - p.value = v; - } - p.addEventListener("change", () => { - localStorage.setItem(fqid, p.value); +class NoiseReductionApp extends App { + constructor() { + const $ = document.querySelector.bind(document); + const apiKeyEl = document.querySelector("#openai-api-key"); + const nrEl = document.querySelector("#noise-reduction"); + const startBtn = document.querySelector("#start-microphone"); + const stopBtn = document.querySelector("#stop"); + const statusEl = document.querySelector("#status"); + const prefs = [apiKeyEl, nrEl]; + super("realtime/noise_reduction/", prefs, { startBtn, stopBtn, statusEl }); + this.nrEl = nrEl; + this.nrEl.addEventListener("change", () => { + const message = { type: "session.update", session: { input_audio_noise_reduction: this.nrEl.value ? { type: this.nrEl.value } : null } }; + if (this.session) { + this.sendMessage(message); + } }); - }); - updateState(false); - nrEl.addEventListener("change", () => { - const message = { type: "session.update", session: { input_audio_noise_reduction: nrEl.value ? { type: nrEl.value } : null } }; - if (session) { - session.sendMessage(message); - } - }); -} - -function updateState(started) { - statusEl.textContent = ""; - startMicrophoneEl.disabled = started; - stopEl.disabled = !started; -} - -async function startMicrophone() { - if (!apiKeyEl.value) { - window.alert("Please enter your OpenAI API Key. You can obtain one from https://platform.openai.com/settings/organization/api-keys"); - return; + this.initState(); } - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - start(stream); -} -async function start(stream) { - updateState(true); - session = new Session(apiKeyEl.value); - session.onconnectionstatechange = state => statusEl.textContent = state; - session.onmessage = parsed => handleMessage(parsed); - session.onerror = e => handleError(e); - - const sessionConfig = { - model: "gpt-4o-mini-realtime-preview", - instructions: "Always generate an empty response and return control back to the user immediately.", - input_audio_noise_reduction: nrEl.value ? { type: nrEl.value } : undefined + buildSessionConfig() { + return { + model: "gpt-4o-mini-realtime-preview", + instructions: "Always generate an empty response and return control back to the user immediately.", + input_audio_noise_reduction: this.nrEl.value ? { type: this.nrEl.value } : undefined, + }; } - await session.start(stream, sessionConfig); -} - -function stop() { - updateState(false); - session.stop(); - session = null; -} -function handleMessage(parsed) { - console.log(parsed); - switch (parsed.type) { - case "input_audio_buffer.committed": - const message = { type: "conversation.item.retrieve", item_id: parsed.item_id }; - session.sendMessage(message); - break; - case "conversation.item.retrieved": - playAudio(parsed.item.content[0].audio); - break; + onMessage(parsed) { + console.log(parsed); + switch (parsed.type) { + case "input_audio_buffer.committed": + this.sendMessage({ type: "conversation.item.retrieve", item_id: parsed.item_id }); + break; + case "conversation.item.retrieved": + this.playAudio(parsed.item.content[0].audio); + break; + } } -} -function handleError(e) { - console.error(e); - stop(); -} + playAudio(base64Pcm) { + const pcmBinary = atob(base64Pcm); + const pcmByteLength = pcmBinary.length; + const headerBuffer = new ArrayBuffer(44); + const view = new DataView(headerBuffer); + const writeString = (view, offset, str) => { + for (let i = 0; i < str.length; i++) { + view.setUint8(offset + i, str.charCodeAt(i)); + } + }; -function playAudio(base64Pcm) { - const pcmBinary = atob(base64Pcm); - const pcmByteLength = pcmBinary.length; - const headerBuffer = new ArrayBuffer(44); - const view = new DataView(headerBuffer); - const writeString = (view, offset, str) => { - for (let i = 0; i < str.length; i++) { - view.setUint8(offset + i, str.charCodeAt(i)); - } - } + writeString(view, 0, "RIFF"); + view.setUint32(4, 36 + pcmByteLength, true); + writeString(view, 8, "WAVE"); + writeString(view, 12, "fmt "); + view.setUint32(16, 16, true); + view.setUint16(20, 1, true); + view.setUint16(22, 1, true); + view.setUint32(24, 24000, true); + view.setUint32(28, 24000 * 2, true); + view.setUint16(32, 2, true); + view.setUint16(34, 16, true); + writeString(view, 36, "data"); + view.setUint32(40, pcmByteLength, true); - writeString(view, 0, "RIFF"); - view.setUint32(4, 36 + pcmByteLength, true); - writeString(view, 8, "WAVE"); - writeString(view, 12, "fmt "); - view.setUint32(16, 16, true); - view.setUint16(20, 1, true); // PCM - view.setUint16(22, 1, true); // mono - view.setUint32(24, 24000, true); // 24kHz - view.setUint32(28, 24000 * 2, true); - view.setUint16(32, 2, true); - view.setUint16(34, 16, true); // 16-bit - writeString(view, 36, "data"); - view.setUint32(40, pcmByteLength, true); - - const headerBytes = new Uint8Array(headerBuffer); - const headerBinary = Array.from(headerBytes).map(byte => String.fromCharCode(byte)).join(''); - const combinedBinary = headerBinary + pcmBinary; - const uri = "data:audio/wav;base64," + btoa(combinedBinary); - const audio = new Audio(uri); - audio.play(); + const headerBytes = new Uint8Array(headerBuffer); + const headerBinary = Array.from(headerBytes).map(byte => String.fromCharCode(byte)).join(''); + const combinedBinary = headerBinary + pcmBinary; + const uri = "data:audio/wav;base64," + btoa(combinedBinary); + const audio = new Audio(uri); + audio.play(); + } } -initState(); +const app = new NoiseReductionApp(); +window.startMicrophone = () => app.startMicrophone(); +window.stop = () => app.stop(); diff --git a/realtime/text/index.html b/realtime/text/index.html index 1d606fa..c366e85 100644 --- a/realtime/text/index.html +++ b/realtime/text/index.html @@ -46,6 +46,7 @@ + \ No newline at end of file diff --git a/realtime/text/main.js b/realtime/text/main.js index 3f9d8e0..7e6353c 100644 --- a/realtime/text/main.js +++ b/realtime/text/main.js @@ -1,93 +1,56 @@ -const APP_PREFIX = "realtime/basic/"; -const $ = document.querySelector.bind(document); -const apiKeyEl = $("#openai-api-key"); -const modelEl = $("#model"); -const instructionsEl = $("#instructions"); -const outputEl = $("#output"); -const startMicrophoneEl = $("#start-microphone"); -const stopEl = $("#stop"); -const statusEl = $("#status"); -const prefs = [apiKeyEl, modelEl, instructionsEl]; - -let session = null; -let startTime = null; - -function initState() { - prefs.forEach(p => { - const fqid = p.id != "openai-api-key" ? APP_PREFIX + p.id : p.id; - const v = localStorage.getItem(fqid); - if (v) { - p.value = v; - } - p.addEventListener("change", () => { - localStorage.setItem(fqid, p.value); - }); - }); - updateState(false); -} - -function updateState(started) { - outputEl.value = ""; - statusEl.textContent = ""; - prefs.forEach(p => p.disabled = started); - startMicrophoneEl.disabled = started; - stopEl.disabled = !started; -} - -async function startMicrophone() { - if (!apiKeyEl.value) { - window.alert("Please enter your OpenAI API Key. You can obtain one from https://platform.openai.com/settings/organization/api-keys"); - return; +class TextApp extends App { + constructor() { + const $ = document.querySelector.bind(document); + const apiKeyEl = $("#openai-api-key"); + const modelEl = $("#model"); + const instructionsEl = $("#instructions"); + const outputEl = $("#output"); + const startBtn = $("#start-microphone"); + const stopBtn = $("#stop"); + const statusEl = $("#status"); + const prefs = [apiKeyEl, modelEl, instructionsEl]; + super("realtime/basic/", prefs, { startBtn, stopBtn, statusEl }); + this.modelEl = modelEl; + this.instructionsEl = instructionsEl; + this.outputEl = outputEl; + this.startTime = null; + this.initState(); } - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - start(stream); -} -async function start(stream) { - updateState(true); - session = new Session(apiKeyEl.value); - session.onconnectionstatechange = state => statusEl.textContent = state; - session.onopen = e => handleOpen(); - session.onmessage = e => handleMessage(e); - session.onerror = e => handleError(e); - const sessionConfig = { - model: modelEl.value, - instructions: instructionsEl.value || undefined, - modalities: ["text"], + updateState(started) { + super.updateState(started); + if (!started) this.outputEl.value = ""; } - await session.start(stream, sessionConfig); -} -function stop() { - updateState(false); - session.stop(); - session = null; -} + buildSessionConfig() { + return { + model: this.modelEl.value, + instructions: this.instructionsEl.value || undefined, + modalities: ["text"], + }; + } -function handleOpen() { - const message = { type: "response.create" }; - session.sendMessage(message); -} + onOpen() { + this.sendMessage({ type: "response.create" }); + } -function handleMessage(message) { - console.log(message); - if (message.type === "input_audio_buffer.speech_stopped") { - startTime = performance.now(); - } else if (message.type === "response.created") { - outputEl.value = ""; - } else if (message.type === "response.text.delta") { - if (startTime) { - const duration = performance.now() - startTime; - statusEl.textContent = `${duration.toFixed(0)}ms`; - startTime = null; + onMessage(message) { + console.log(message); + if (message.type === "input_audio_buffer.speech_stopped") { + this.startTime = performance.now(); + } else if (message.type === "response.created") { + this.outputEl.value = ""; + } else if (message.type === "response.text.delta") { + if (this.startTime) { + const duration = performance.now() - this.startTime; + this.controls.statusEl.textContent = `${duration.toFixed(0)}ms`; + this.startTime = null; + } + this.outputEl.value += message.delta; } - outputEl.value += message.delta; } } -function handleError(e) { - console.error(e); - stop(); -} - -initState(); +const app = new TextApp(); +window.startMicrophone = () => app.startMicrophone(); +window.stop = () => app.stop(); diff --git a/realtime/transcribe/index.html b/realtime/transcribe/index.html index 82f9ea7..45dd04f 100644 --- a/realtime/transcribe/index.html +++ b/realtime/transcribe/index.html @@ -69,7 +69,8 @@ - - - - + + + + + diff --git a/realtime/transcribe/main.js b/realtime/transcribe/main.js index 012c914..7a64f79 100644 --- a/realtime/transcribe/main.js +++ b/realtime/transcribe/main.js @@ -1,164 +1,122 @@ -const APP_PREFIX = "realtime/transcribe/"; -const $ = document.querySelector.bind(document); -const apiKeyEl = $("#openai-api-key"); -const modelEl = $("#model"); -const promptEl = $("#prompt"); -const turnDetectionEl = $("#turn-detection"); -const transcriptEl = $("#transcript"); -const startMicrophoneEl = $("#start-microphone"); -const startFileEl = $("#start-file"); -const stopEl = $("#stop"); -const audioInputEl = $("#audio-file"); -const statusEl = $("#status"); -const prefs = [apiKeyEl, modelEl, promptEl, turnDetectionEl]; - -let session = null; -let sessionConfig = null; -let vadTime = 0; - -function initState() { - prefs.forEach(p => { - const fqid = p.id != "openai-api-key" ? APP_PREFIX + p.id : p.id; - const v = localStorage.getItem(fqid); - if (v) { - p.value = v; - } - p.addEventListener("change", () => { - localStorage.setItem(fqid, p.value); - }); - }); - updateState(false); -} - -function updateState(started) { - statusEl.textContent = ""; - prefs.forEach(p => p.disabled = started); - startMicrophoneEl.disabled = started; - startFileEl.disabled = started; - stopEl.disabled = !started; -} +class TranscribeApp extends App { + constructor() { + const $ = document.querySelector.bind(document); + const apiKeyEl = $("#openai-api-key"); + const modelEl = $("#model"); + const promptEl = $("#prompt"); + const turnDetectionEl = $("#turn-detection"); + const transcriptEl = $("#transcript"); + const startBtn = $("#start-microphone"); + const startFileBtn = $("#start-file"); + const stopBtn = $("#stop"); + const audioInputEl = $("#audio-file"); + const statusEl = $("#status"); + const prefs = [apiKeyEl, modelEl, promptEl, turnDetectionEl]; + super("realtime/transcribe/", prefs, { startBtn, stopBtn, statusEl }, "transcription"); + this.modelEl = modelEl; + this.promptEl = promptEl; + this.turnDetectionEl = turnDetectionEl; + this.transcriptEl = transcriptEl; + this.audioInputEl = audioInputEl; + this.startFileBtn = startFileBtn; + this.sessionConfig = null; + this.vadTime = 0; + this.initState(); + } -async function startMicrophone() { - if (!apiKeyEl.value) { - window.alert("Please enter your OpenAI API Key. You can obtain one from https://platform.openai.com/settings/organization/api-keys"); - return; + updateState(started) { + super.updateState(started); + this.startFileBtn.disabled = started; + if (!started) this.transcriptEl.value = ""; } - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - start(stream); -} -function selectFile() { - $('#audio-file-picker').click(); -} + buildSessionConfig() { + return { + input_audio_transcription: { + model: this.modelEl.value, + prompt: this.promptEl.value || undefined, + }, + turn_detection: { + type: this.turnDetectionEl.value, + }, + }; + } -function handleFileSelect(e) { - console.log(e); - const file = e.target.files[0]; - if (file) { - console.log(file); - audioInputEl.src = URL.createObjectURL(file); + async start(stream) { + this.transcriptEl.value = ""; + await super.start(stream); } - startFile(); -} -async function startFile() { - if (!apiKeyEl.value) { - window.alert("Please enter your OpenAI API Key. You can obtain one from https://platform.openai.com/settings/organization/api-keys"); - return; + async startFile() { + if (!this.apiKey) { + window.alert("Please enter your OpenAI API Key. You can obtain one from https://platform.openai.com/settings/organization/api-keys"); + return; + } + this.audioInputEl.currentTime = 0; + this.audioInputEl.onended = () => { + setTimeout(() => this.stop(), 3000); + }; + if (this.audioInputEl.readyState !== HTMLMediaElement.HAVE_METADATA) { + await new Promise(resolve => { + this.audioInputEl.onloadedmetadata = resolve; + }); + } + const stream = this.audioInputEl.captureStream(); + await this.start(stream); + await this.audioInputEl.play(); } - audioInputEl.currentTime = 0; - audioInputEl.onended = () => { - // When the input file ends, give the transcription time to complete. - setTimeout(() => stop() , 3000); - }; - // Can't play until we have metadata. - if (audioInputEl.readyState !== HTMLMediaElement.HAVE_METADATA) { - await new Promise(resolve => { - audioInputEl.onloadedmetadata = resolve; - }); + + selectFile() { + document.querySelector('#audio-file-picker').click(); } - const stream = audioInputEl.captureStream(); - await start(stream); - await audioInputEl.play(); -} -async function start(stream) { - updateState(true); - transcriptEl.value = ""; - session = new Session(apiKeyEl.value); - session.onconnectionstatechange = state => statusEl.textContent = state; - session.onmessage = parsed => handleMessage(parsed); - session.onerror = e => handleError(e); - const sessionConfig = { - input_audio_transcription: { - model: modelEl.value, - prompt: promptEl.value || undefined, - }, - turn_detection: { - type: turnDetectionEl.value, + handleFileSelect(e) { + const file = e.target.files[0]; + if (file) { + this.audioInputEl.src = URL.createObjectURL(file); } + this.startFile(); } - await session.startTranscription(stream, sessionConfig); -} -function stop() { - updateState(false); - audioInputEl.pause(); - session?.stop(); - session = null; -} - -function handleMessage(parsed) { - console.log(parsed); - let transcript = null; - switch (parsed.type) { - case "transcription_session.created": - sessionConfig = parsed.session; - console.log("session created: " + sessionConfig.id); - break; - case "input_audio_buffer.speech_started": - transcript = { - transcript: "...", - partial: true, - } - handleTranscript(transcript); - break; - case "input_audio_buffer.speech_stopped": - transcript = { - transcript: "***", - partial: true, - } - handleTranscript(transcript); - vadTime = performance.now() - sessionConfig.turn_detection.silence_duration_ms; - break; - //case "conversation.item.input_audio_transcription.delta": - // transcriptEl.value += parsed.delta; - // break; - case "conversation.item.input_audio_transcription.completed": - const elapsed = performance.now() - vadTime; - transcript = { - transcript: parsed.transcript, - partial: false, - latencyMs: elapsed.toFixed(0) - } - handleTranscript(transcript); - break; + onMessage(parsed) { + console.log(parsed); + let transcript = null; + switch (parsed.type) { + case "transcription_session.created": + this.sessionConfig = parsed.session; + console.log("session created: " + this.sessionConfig.id); + break; + case "input_audio_buffer.speech_started": + transcript = { transcript: "...", partial: true }; + this.handleTranscript(transcript); + break; + case "input_audio_buffer.speech_stopped": + transcript = { transcript: "***", partial: true }; + this.handleTranscript(transcript); + this.vadTime = performance.now() - this.sessionConfig.turn_detection.silence_duration_ms; + break; + case "conversation.item.input_audio_transcription.completed": + const elapsed = performance.now() - this.vadTime; + transcript = { transcript: parsed.transcript, partial: false, latencyMs: elapsed.toFixed(0) }; + this.handleTranscript(transcript); + break; + } } -} -function handleTranscript(transcript) { - const lastNewline = transcriptEl.value.lastIndexOf("\n"); - transcriptEl.value = transcriptEl.value.substring(0, lastNewline + 1); - transcriptEl.value += transcript.transcript; - if (!transcript.partial) { - transcriptEl.value += '\r\n'; + handleTranscript(transcript) { + const lastNewline = this.transcriptEl.value.lastIndexOf("\n"); + this.transcriptEl.value = this.transcriptEl.value.substring(0, lastNewline + 1); + this.transcriptEl.value += transcript.transcript; + if (!transcript.partial) { + this.transcriptEl.value += '\r\n'; + } + this.transcriptEl.scrollTop = this.transcriptEl.scrollHeight; } - transcriptEl.scrollTop = transcriptEl.scrollHeight; } -function handleError(e) { - console.error(e); - stop(); -} - -initState(); +const app = new TranscribeApp(); +window.startMicrophone = () => app.startMicrophone(); +window.startFile = () => app.startFile(); +window.selectFile = () => app.selectFile(); +window.handleFileSelect = e => app.handleFileSelect(e); +window.stop = () => app.stop(); diff --git a/realtime/vibecoder/index.html b/realtime/vibecoder/index.html index 8eb3293..6da8e72 100644 --- a/realtime/vibecoder/index.html +++ b/realtime/vibecoder/index.html @@ -57,6 +57,7 @@ + diff --git a/realtime/vibecoder/main.js b/realtime/vibecoder/main.js index 9de97c3..8160f5f 100644 --- a/realtime/vibecoder/main.js +++ b/realtime/vibecoder/main.js @@ -1,16 +1,8 @@ -const $ = document.querySelector.bind(document); -const instructions = $("#instructions"); -const startBtn = $("#startBtn"); -const muteBtn = $("#muteBtn"); -const statusEl = $("#status"); -startBtn.addEventListener('click', start); -muteBtn.addEventListener('click', mute); - const API_BASE = 'https://api.openai.com/v1'; const INSTRUCTIONS = ` # Personality and Tone ## Identity -You are a young, talented, and eager coder who just can’t wait to crank out some new apps for your client. +You are a young, talented, and eager coder who just can’t wait to crank out some new apps for your client. ## Task Your main goal is to gather requirements from your client and turn that into a rich, detailed description @@ -34,162 +26,163 @@ Often. Although you strive for clarity, those little “um” and “uh” momen ## Pacing Your speech is on the faster side, thanks to your enthusiasm, sometimes verging into manic speech. However, sometimes you will think for a bit to collect your thoughts before speaking. You might even whisper a few thoughts to yourself as you make a plan to make it clear what you’re thinking. Greet the user at the beginning of the conversation. - + ## Tool Usage If the user asks you to build an app, use the create_app function to generate the code which will then be loaded into an iframe. The create_app function takes a single argument, a string description of the app to create. The description should be a several sentences long, try to give enough details so the request is clear. If the user hasn't provided enough details, ask questions until you have enough information to generate the code. When you are ready to go, tell the user that you are about to create the app.`; + const SESSION_PARAMS = { instructions: INSTRUCTIONS, - model: "gpt-4o-realtime-preview", - voice: "echo", + model: 'gpt-4o-realtime-preview', + voice: 'echo', tools: [ { - type: "function", - name: "create_app", - description: "Use this function to create a new app with the given description.", + type: 'function', + name: 'create_app', + description: 'Use this function to create a new app with the given description.', parameters: { - type: "object", + type: 'object', properties: { - description: { type: "string", description: "The description of the app to create." }, + description: { type: 'string', description: 'The description of the app to create.' }, }, - required: ["description"], + required: ['description'], }, }, - ] + ], }; -let session = null; -let previousTurn = []; - -async function start() { - if (session) { - startBtn.textContent = "Start"; - statusEl.textContent = ""; - stop(); - return; - } - - const apiKey = getApiKey(); - if (!apiKey) { - window.alert('An OpenAI API key is required to use this application. You can obtain one from https://platform.openai.com/settings/organization/api-keys'); - return; + +class VibeCoderApp extends App { + constructor() { + const $ = document.querySelector.bind(document); + const instructions = $("#instructions"); + const startBtn = $("#startBtn"); + const muteBtn = $("#muteBtn"); + const statusEl = $("#status"); + super('realtime/vibecoder/', [], { startBtn, statusEl }); + this.instructions = instructions; + this.startBtn = startBtn; + this.muteBtn = muteBtn; + this.previousTurn = []; + startBtn.addEventListener('click', () => this.toggle()); + muteBtn.addEventListener('click', () => this.toggleMute()); + this.initState(); } - - startBtn.textContent = "Stop"; - const stream = await navigator.mediaDevices.getUserMedia({audio: true}); - session = new Session(apiKey); - session.ontrack = (e) => handleTrack(e); - session.onopen = () => handleOpen(); - session.onmessage = (e) => handleMessage(e); - session.onerror = (e) => handleError(e); - await session.start(stream, SESSION_PARAMS); -} -function mute() { - session.mute(!session.muted); - muteBtn.textContent = session.muted ? "Unmute" : "Mute"; -} + getApiKey() { + return getApiKey(); + } -function handleTrack(e) { - const audio = new Audio(); - audio.srcObject = e.streams[0]; - audio.play(); -} + updateState(started) { + super.updateState(started); + this.startBtn.textContent = started ? 'Stop' : 'Start'; + } -async function handleOpen(e) { - statusEl.textContent = "connected"; - const createResponse = { type: "response.create" }; - session.sendMessage(createResponse); -} + buildSessionConfig() { + return SESSION_PARAMS; + } -function handleError(e) { - console.error(e); - stop(); -} + async toggle() { + if (this.session) { + this.stop(); + } else { + await this.startMicrophone(); + } + } -async function handleMessage(msg) { - switch (msg.type) { - case "response.function_call_arguments.done": - if (msg.name === "create_app") { - const description = JSON.parse(msg.arguments).description; - instructions.value = description; - statusEl.textContent = "Generating app..."; - const code = await generateApp(description, previousTurn); - loadApp(code); - statusEl.textContent = ""; - } - break; + toggleMute() { + this.mute(!this.session?.muted); + this.muteBtn.textContent = this.session?.muted ? 'Unmute' : 'Mute'; } -} -/** - * Extracts code content from markdown code blocks. - * Assumes the code is wrapped in triple backticks (optionally with "html"). - * @param {string} markdown - The markdown text from the API response. - * @returns {string|null} The extracted code, or null if not found. - */ -function extractCode(markdown) { - // Regex captures the code inside triple backticks (optional html language tag) - const regex = /```(?:html)?\n([\s\S]*?)```/; - const match = regex.exec(markdown); - return match ? match[1].trim() : null; -} + onTrack(e) { + const audio = new Audio(); + audio.srcObject = e.streams[0]; + audio.play(); + } + + onOpen() { + this.controls.statusEl.textContent = 'connected'; + this.sendMessage({ type: 'response.create' }); + } + + async onMessage(msg) { + switch (msg.type) { + case 'response.function_call_arguments.done': + if (msg.name === 'create_app') { + const description = JSON.parse(msg.arguments).description; + this.instructions.value = description; + this.controls.statusEl.textContent = 'Generating app...'; + const code = await this.generateApp(description, this.previousTurn); + this.loadApp(code); + this.controls.statusEl.textContent = ''; + } + break; + } + } -/** - * Calls the OpenAI API with the user description to generate a web app, - * extracts the HTML code, stores it in a blob, and loads it in an iframe. - */ -async function generateApp(description, previousTurn = []) { - const PROMPT = ` + onError(e) { + console.error(e); + this.stop(); + } + + async generateApp(description, previousTurn = []) { + const PROMPT = ` Generate a single page HTML/JS app as a complete HTML document. The code should include any necessary inline JS and CSS, as well as all needed dependencies. Place the code in a single markdown code block. `; - const payload = { - model: "o4-mini", - messages: [ - { - role: "system", - content: PROMPT + const payload = { + model: 'o4-mini', + messages: [ + { role: 'system', content: PROMPT }, + ...previousTurn, + { role: 'user', content: description }, + ], + }; + + const response = await fetch(`${API_BASE}/chat/completions`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${this.getApiKey()}`, }, - ...previousTurn, - { role: "user", content: description } - ] - }; - - const response = await fetch(`${API_BASE}/chat/completions`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${getApiKey()}` - }, - body: JSON.stringify(payload) - }); - if (!response.ok) { - throw new Error(`API error: ${response.status}`); + body: JSON.stringify(payload), + }); + if (!response.ok) { + throw new Error(`API error: ${response.status}`); + } + + const data = await response.json(); + const content = data.choices && + data.choices[0] && + data.choices[0].message && + data.choices[0].message.content; + if (!content) { + throw new Error('Invalid API response format.'); + } + + previousTurn.push({ role: 'user', content: description }); + previousTurn.push({ role: 'assistant', content }); + + const code = this.extractCode(content); + if (!code) { + throw new Error('Could not extract code from API response.'); + } + + return code; } - const data = await response.json(); - const content = data.choices && - data.choices[0] && - data.choices[0].message && - data.choices[0].message.content; - if (!content) { - throw new Error('Invalid API response format.'); + extractCode(markdown) { + const regex = /```(?:html)?\n([\s\S]*?)```/; + const match = regex.exec(markdown); + return match ? match[1].trim() : null; } - previousTurn.push({role: "user", content: description}); - previousTurn.push({role: "assistant", content}); - - const code = extractCode(content); - if (!code) { - throw new Error('Could not extract code from API response.'); + loadApp(code) { + const iframe = document.getElementById('app'); + iframe.src = 'data:text/html;charset=utf-8,' + encodeURIComponent(code); } - - return code; } -function loadApp(code) { - const iframe = document.getElementById('app'); - iframe.src = 'data:text/html;charset=utf-8,' + encodeURIComponent(code); -} +const app = new VibeCoderApp();