From f3c22903aeb3f2c1c7c5b9972f840b60240b4e72 Mon Sep 17 00:00:00 2001 From: toanpro Date: Tue, 3 Feb 2026 11:22:09 +0700 Subject: [PATCH 1/2] feat: support custom OpenAI-compatible whisper endpoints Add support for OPENAI_WHISPER_BASE_URL and OPENAI_BASE_URL environment variables to allow using custom OpenAI-compatible whisper endpoints for audio transcription. This enables users to use self-hosted whisper servers or alternative providers that implement the OpenAI whisper API specification. Priority: OPENAI_WHISPER_BASE_URL > OPENAI_BASE_URL > default (api.openai.com) --- packages/core/src/transcription/whisper/openai.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/packages/core/src/transcription/whisper/openai.ts b/packages/core/src/transcription/whisper/openai.ts index 6ef1d72b..2acc8536 100644 --- a/packages/core/src/transcription/whisper/openai.ts +++ b/packages/core/src/transcription/whisper/openai.ts @@ -5,7 +5,8 @@ export async function transcribeWithOpenAi( bytes: Uint8Array, mediaType: string, filename: string | null, - apiKey: string + apiKey: string, + baseUrl?: string | null ): Promise { const form = new FormData() const providedName = filename?.trim() ? filename.trim() : 'media' @@ -14,7 +15,14 @@ export async function transcribeWithOpenAi( form.append('file', new Blob([toArrayBuffer(bytes)], { type: mediaType }), safeName) form.append('model', 'whisper-1') - const response = await globalThis.fetch('https://api.openai.com/v1/audio/transcriptions', { + // Support custom OpenAI-compatible whisper endpoints via OPENAI_WHISPER_BASE_URL or OPENAI_BASE_URL + const effectiveBaseUrl = baseUrl + ?? process.env.OPENAI_WHISPER_BASE_URL + ?? process.env.OPENAI_BASE_URL + ?? 'https://api.openai.com/v1' + const transcriptionUrl = `${effectiveBaseUrl.replace(/\/+$/, '')}/audio/transcriptions` + + const response = await globalThis.fetch(transcriptionUrl, { method: 'POST', headers: { Authorization: `Bearer ${apiKey}` }, body: form, From bef693a91e6515811507235c8d1fb71cdc0cb2e7 Mon Sep 17 00:00:00 2001 From: toanpro Date: Tue, 3 Feb 2026 11:36:18 +0700 Subject: [PATCH 2/2] fix: add missing media extensions to isDirectMediaUrl Add ogg, opus, aiff, wma, mpeg, mpg, avi, wmv, flv to the media URL detection regex. This fixes OGG audio files (common in Telegram voice messages) being incorrectly routed through HTML fetcher instead of media transcription handler. --- packages/core/src/content/url.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/core/src/content/url.ts b/packages/core/src/content/url.ts index b775df91..7c55ffda 100644 --- a/packages/core/src/content/url.ts +++ b/packages/core/src/content/url.ts @@ -73,7 +73,7 @@ export function extractYouTubeVideoId(rawUrl: string): string | null { } export function isDirectMediaUrl(url: string): boolean { - return /\.(mp4|mov|m4v|mkv|webm|mp3|m4a|wav|flac|aac)(\?|#|$)/i.test(url) + return /\.(mp4|mov|m4v|mkv|webm|mpeg|mpg|avi|wmv|flv|mp3|m4a|wav|flac|aac|ogg|opus|aiff|wma)(\?|#|$)/i.test(url) } export function shouldPreferUrlMode(url: string): boolean {