diff --git a/.env.example b/.env.example index 79671e72c..984a9dc85 100644 --- a/.env.example +++ b/.env.example @@ -54,11 +54,16 @@ GROK_API_KEY= GROK_BASE_URL= GROK_MODELS= -# --- TTS (Text-to-Speech) ---------------------------------------------------- +# --- TTS (Text-to-Speech) --- TTS_OPENAI_API_KEY= TTS_OPENAI_BASE_URL= +TTS_OPENAI_COMPATIBLE_API_KEY= +TTS_OPENAI_COMPATIBLE_BASE_URL= +# Optional custom models (comma-separated): gpt-4o-mini-tts,tts-1,custom-model +TTS_OPENAI_COMPATIBLE_MODELS= + TTS_AZURE_API_KEY= TTS_AZURE_BASE_URL= @@ -74,11 +79,16 @@ TTS_MINIMAX_BASE_URL=https://api.minimaxi.com TTS_ELEVENLABS_API_KEY= TTS_ELEVENLABS_BASE_URL= -# --- ASR (Automatic Speech Recognition) -------------------------------------- +# --- ASR (Automatic Speech Recognition) --- ASR_OPENAI_API_KEY= ASR_OPENAI_BASE_URL= +ASR_OPENAI_COMPATIBLE_API_KEY= +ASR_OPENAI_COMPATIBLE_BASE_URL= +# Optional custom models (comma-separated): whisper-1,custom-model +ASR_OPENAI_COMPATIBLE_MODELS= + ASR_QWEN_API_KEY= ASR_QWEN_BASE_URL= @@ -90,7 +100,7 @@ PDF_UNPDF_BASE_URL= PDF_MINERU_API_KEY= PDF_MINERU_BASE_URL= -# --- Image Generation --------------------------------------------------------- +# --- Image Generation --- IMAGE_SEEDREAM_API_KEY= IMAGE_SEEDREAM_BASE_URL= @@ -108,7 +118,12 @@ IMAGE_MINIMAX_BASE_URL=https://api.minimaxi.com IMAGE_GROK_API_KEY= IMAGE_GROK_BASE_URL= -# --- Video Generation --------------------------------------------------------- +IMAGE_OPENAI_COMPATIBLE_API_KEY= +IMAGE_OPENAI_COMPATIBLE_BASE_URL= +# Optional custom models (comma-separated): dall-e-3,custom-model +IMAGE_OPENAI_COMPATIBLE_MODELS= + +# --- Video Generation --- VIDEO_SEEDANCE_API_KEY= VIDEO_SEEDANCE_BASE_URL= @@ -129,6 +144,11 @@ VIDEO_MINIMAX_BASE_URL=https://api.minimaxi.com VIDEO_GROK_API_KEY= VIDEO_GROK_BASE_URL= +VIDEO_OPENAI_COMPATIBLE_API_KEY= +VIDEO_OPENAI_COMPATIBLE_BASE_URL= +# Optional custom models (comma-separated): grok-imagine-video,custom-model +VIDEO_OPENAI_COMPATIBLE_MODELS= + # --- Web Search --------------------------------------------------------------- # Note: Grok (xAI) web search is available via chat completions + search tools, # not as a standalone search API. Use Grok LLM provider with search_parameters diff --git a/app/api/verify-asr-provider/route.ts b/app/api/verify-asr-provider/route.ts new file mode 100644 index 000000000..fa3d66d4d --- /dev/null +++ b/app/api/verify-asr-provider/route.ts @@ -0,0 +1,123 @@ +/** + * Verify ASR Provider API + * + * Lightweight endpoint that validates ASR provider credentials. + * + * POST /api/verify-asr-provider + * + * Body: + * providerId: ASRProviderId + * apiKey: string (optional, server fallback) + * baseUrl: string (optional) + * modelId: string (optional) + * language: string (optional) + * + * Response: { success: boolean, message: string } + */ + +import { NextRequest } from 'next/server'; +import { transcribeAudio } from '@/lib/audio/asr-providers'; +import { ASR_PROVIDERS } from '@/lib/audio/constants'; +import type { ASRProviderId, ASRModelConfig } from '@/lib/audio/types'; +import { apiError, apiSuccess } from '@/lib/server/api-response'; +import { createLogger } from '@/lib/logger'; + +const log = createLogger('VerifyASRProvider'); + +/** + * Create a minimal test audio buffer (100ms of silence at 16kHz) + */ +function createTestAudioBuffer(): Buffer { + const sampleRate = 16000; + const duration = 0.1; // 100ms + const samples = Math.floor(sampleRate * duration); + + // Create WAV buffer with minimal valid WAV header + const buffer = Buffer.alloc(44 + samples * 2); + + // WAV header + buffer.write('RIFF'); + buffer.writeUInt32LE(36 + samples * 2, 4); + buffer.write('WAVE', 8); + buffer.write('fmt ', 12); + buffer.writeUInt32LE(16, 16); // fmt chunk size + buffer.writeUInt16LE(1, 20); // PCM format + buffer.writeUInt16LE(1, 22); // Mono + buffer.writeUInt32LE(sampleRate, 24); // Sample rate + buffer.writeUInt32LE(sampleRate * 2, 28); // Byte rate + buffer.writeUInt16LE(2, 32); // Block align + buffer.writeUInt16LE(16, 34); // Bits per sample + buffer.write('data', 36); + buffer.writeUInt32LE(samples * 2, 40); + + // Audio data (silence) + for (let i = 0; i < samples; i++) { + buffer.writeInt16LE(0, 44 + i * 2); + } + + return buffer; +} + +export async function POST(request: NextRequest) { + try { + const body = await request.json() as { + providerId: ASRProviderId; + apiKey?: string; + baseUrl?: string; + modelId?: string; + language?: string; + }; + + const { providerId, apiKey, baseUrl, modelId, language } = body; + + if (!providerId) { + return apiError('MISSING_REQUIRED_FIELD', 400, 'Provider ID is required'); + } + + const provider = ASR_PROVIDERS[providerId]; + if (!provider) { + return apiError('INVALID_REQUEST', 400, `Unknown ASR provider: ${providerId}`); + } + + if (!apiKey) { + return apiError('MISSING_API_KEY', 400, 'API key is required'); + } + + try { + const config: ASRModelConfig = { + providerId, + apiKey, + baseUrl: baseUrl || provider.defaultBaseUrl, + modelId: modelId || provider.defaultModelId, + language: language || 'auto', + }; + + // Create test audio buffer + const testAudio = createTestAudioBuffer(); + + // Try to transcribe test audio + const result = await transcribeAudio(config, testAudio); + + // For silence, we expect either an error or empty result, which is fine + // The important thing is that the API accepted our request with valid auth + return apiSuccess({ + success: true, + message: 'ASR provider connection successful', + }); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log.warn('ASR verification failed:', { providerId, error: message }); + + // Check if it's an auth error + if (message.includes('401') || message.includes('Unauthorized') || message.includes('API key')) { + return apiError('INVALID_REQUEST', 401, 'Authentication failed: Check your API key'); + } + + return apiError('UPSTREAM_ERROR', 400, `ASR verification failed: ${message}`); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log.error('Verify ASR provider error:', message); + return apiError('INTERNAL_ERROR', 500, `Internal error: ${message}`); + } +} diff --git a/app/api/verify-tts-provider/route.ts b/app/api/verify-tts-provider/route.ts new file mode 100644 index 000000000..f7c06d4b1 --- /dev/null +++ b/app/api/verify-tts-provider/route.ts @@ -0,0 +1,87 @@ +/** + * Verify TTS Provider API + * + * Lightweight endpoint that validates TTS provider credentials. + * + * POST /api/verify-tts-provider + * + * Body: + * providerId: TTSProviderId + * apiKey: string (optional, server fallback) + * baseUrl: string (optional) + * modelId: string (optional) + * + * Response: { success: boolean, message: string } + */ + +import { NextRequest } from 'next/server'; +import { generateTTS } from '@/lib/audio/tts-providers'; +import { TTS_PROVIDERS } from '@/lib/audio/constants'; +import type { TTSProviderId, TTSModelConfig } from '@/lib/audio/types'; +import { apiError, apiSuccess } from '@/lib/server/api-response'; +import { createLogger } from '@/lib/logger'; + +const log = createLogger('VerifyTTSProvider'); + +export async function POST(request: NextRequest) { + try { + const body = await request.json() as { + providerId: TTSProviderId; + apiKey?: string; + baseUrl?: string; + modelId?: string; + }; + + const { providerId, apiKey, baseUrl, modelId } = body; + + if (!providerId) { + return apiError('MISSING_REQUIRED_FIELD', 400, 'Provider ID is required'); + } + + const provider = TTS_PROVIDERS[providerId]; + if (!provider) { + return apiError('INVALID_REQUEST', 400, `Unknown TTS provider: ${providerId}`); + } + + if (!apiKey) { + return apiError('MISSING_API_KEY', 400, 'API key is required'); + } + + try { + const config: TTSModelConfig = { + providerId, + apiKey, + baseUrl: baseUrl || provider.defaultBaseUrl, + modelId: modelId || provider.defaultModelId, + voice: provider.voices[0]?.id || 'default', + speed: 1.0, + }; + + // Try to generate test TTS + const result = await generateTTS(config, 'test'); + + if (result.audio && result.audio.length > 0) { + return apiSuccess({ + success: true, + message: 'TTS provider connection successful', + }); + } else { + return apiError('UPSTREAM_ERROR', 500, 'TTS generation returned empty audio'); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log.warn('TTS verification failed:', { providerId, error: message }); + + // Check if it's an auth error + if (message.includes('401') || message.includes('Unauthorized') || message.includes('API key')) { + return apiError('INVALID_REQUEST', 401, 'Authentication failed: Check your API key'); + } + + return apiError('UPSTREAM_ERROR', 400, `TTS verification failed: ${message}`); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log.error('Verify TTS provider error:', message); + return apiError('INTERNAL_ERROR', 500, `Internal error: ${message}`); + } +} diff --git a/app/generation-preview/page.tsx b/app/generation-preview/page.tsx index b63b4eb69..d7c935719 100644 --- a/app/generation-preview/page.tsx +++ b/app/generation-preview/page.tsx @@ -722,6 +722,25 @@ function GenerationPreviewContent() { const audioId = `tts_${action.id}`; action.audioId = audioId; try { + // For OpenAI Compatible TTS, prioritize custom models + let ttsModelId = ttsProviderConfig?.modelId; + if ( + settings.ttsProviderId === 'openai-compatible-tts' && + ttsProviderConfig?.customModels?.length + ) { + // Use first custom model if available + ttsModelId = ttsProviderConfig.customModels[0].id; + } + + // For OpenAI Compatible TTS, use custom voice if configured + let ttsVoice = settings.ttsVoice; + if (settings.ttsProviderId === 'openai-compatible-tts') { + const customVoice = ttsProviderConfig?.providerOptions?.customVoice as string | undefined; + if (customVoice?.trim()) { + ttsVoice = customVoice.trim(); + } + } + const resp = await fetch('/api/generate/tts', { method: 'POST', headers: { 'Content-Type': 'application/json' }, @@ -729,8 +748,8 @@ function GenerationPreviewContent() { text: action.text, audioId, ttsProviderId: settings.ttsProviderId, - ttsModelId: ttsProviderConfig?.modelId, - ttsVoice: settings.ttsVoice, + ttsModelId, + ttsVoice, ttsSpeed: settings.ttsSpeed, ttsApiKey: ttsProviderConfig?.apiKey || undefined, ttsBaseUrl: ttsProviderConfig?.baseUrl || undefined, diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 9379d9353..903aac867 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -102,6 +102,16 @@ function AgentVoicePill({ const controller = new AbortController(); previewAbortRef.current = controller; const providerConfig = ttsProvidersConfig[providerId]; + + // For OpenAI Compatible TTS, use custom voice if configured + let ttsVoice = voiceId; + if (providerId === 'openai-compatible-tts') { + const customVoice = providerConfig?.providerOptions?.customVoice as string | undefined; + if (customVoice?.trim()) { + ttsVoice = customVoice.trim(); + } + } + const res = await fetch('/api/generate/tts', { method: 'POST', headers: { 'Content-Type': 'application/json' }, @@ -110,7 +120,7 @@ function AgentVoicePill({ audioId: 'voice-preview', ttsProviderId: providerId, ttsModelId: modelId || providerConfig?.modelId, - ttsVoice: voiceId, + ttsVoice, ttsSpeed: 1, ttsApiKey: providerConfig?.apiKey, ttsBaseUrl: providerConfig?.serverBaseUrl || providerConfig?.baseUrl, diff --git a/components/generation/media-popover.tsx b/components/generation/media-popover.tsx index a09a32432..81032ec8e 100644 --- a/components/generation/media-popover.tsx +++ b/components/generation/media-popover.tsx @@ -85,6 +85,7 @@ const TABS: Array<{ id: TabId; icon: LucideIcon; label: string }> = [ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => string): string { const names: Record = { 'openai-tts': t('settings.providerOpenAITTS'), + 'openai-compatible-tts': t('settings.providerOpenAICompatibleTTS'), 'azure-tts': t('settings.providerAzureTTS'), 'glm-tts': t('settings.providerGLMTTS'), 'qwen-tts': t('settings.providerQwenTTS'), diff --git a/components/settings/asr-settings.tsx b/components/settings/asr-settings.tsx index 4a0cfb021..45667edc4 100644 --- a/components/settings/asr-settings.tsx +++ b/components/settings/asr-settings.tsx @@ -1,6 +1,6 @@ 'use client'; -import { useState, useRef } from 'react'; +import { useState, useRef, useCallback, useMemo } from 'react'; import { Label } from '@/components/ui/label'; import { Input } from '@/components/ui/input'; import { Button } from '@/components/ui/button'; @@ -15,9 +15,10 @@ import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; import { ASR_PROVIDERS } from '@/lib/audio/constants'; import type { ASRProviderId } from '@/lib/audio/types'; -import { Mic, MicOff, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react'; +import { Mic, MicOff, CheckCircle2, XCircle, Eye, EyeOff, Plus, Settings2, Trash2, ChevronUp, ChevronDown, Star } from 'lucide-react'; import { cn } from '@/lib/utils'; import { createLogger } from '@/lib/logger'; +import { MediaModelEditDialog } from './media-model-edit-dialog'; const log = createLogger('ASRSettings'); @@ -32,15 +33,24 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) { const asrProvidersConfig = useSettingsStore((state) => state.asrProvidersConfig); const setASRProviderConfig = useSettingsStore((state) => state.setASRProviderConfig); - const asrProvider = ASR_PROVIDERS[selectedProviderId] ?? ASR_PROVIDERS['openai-whisper']; - const isServerConfigured = !!asrProvidersConfig[selectedProviderId]?.isServerConfigured; - const [showApiKey, setShowApiKey] = useState(false); const [isRecording, setIsRecording] = useState(false); const [asrResult, setASRResult] = useState(''); const [testStatus, setTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>('idle'); const [testMessage, setTestMessage] = useState(''); const mediaRecorderRef = useRef(null); + + // Model dialog state + const [showModelDialog, setShowModelDialog] = useState(false); + const [editingModelIndex, setEditingModelIndex] = useState(null); + const [modelDialogData, setModelDialogData] = useState<{ id: string; name: string }>({ id: '', name: '' }); + + const asrProvider = ASR_PROVIDERS[selectedProviderId] ?? ASR_PROVIDERS['openai-whisper']; + const isServerConfigured = !!asrProvidersConfig[selectedProviderId]?.isServerConfigured; + const customModels = useMemo( + () => asrProvidersConfig[selectedProviderId]?.customModels || [], + [asrProvidersConfig[selectedProviderId]?.customModels], + ); // Reset state when provider changes (derived state pattern) const [prevProviderId, setPrevProviderId] = useState(selectedProviderId); @@ -50,6 +60,8 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) { setTestStatus('idle'); setTestMessage(''); setASRResult(''); + setShowModelDialog(false); + setEditingModelIndex(null); } const handleToggleASRRecording = async () => { @@ -155,6 +167,60 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) { } }; + // Model CRUD + const handleOpenAddModel = () => { + setEditingModelIndex(null); + setModelDialogData({ id: '', name: '' }); + setShowModelDialog(true); + }; + + const handleOpenEditModel = (index: number) => { + setEditingModelIndex(index); + setModelDialogData({ ...customModels[index] }); + setShowModelDialog(true); + }; + + const handleSaveModel = useCallback(() => { + if (!modelDialogData.id.trim()) return; + const newCustomModels = [...customModels]; + if (editingModelIndex !== null) { + newCustomModels[editingModelIndex] = { + id: modelDialogData.id.trim(), + name: modelDialogData.name.trim() || modelDialogData.id.trim(), + }; + } else { + newCustomModels.push({ + id: modelDialogData.id.trim(), + name: modelDialogData.name.trim() || modelDialogData.id.trim(), + }); + } + setASRProviderConfig(selectedProviderId, { + customModels: newCustomModels, + }); + setShowModelDialog(false); + }, [modelDialogData, editingModelIndex, customModels, selectedProviderId, setASRProviderConfig]); + + const handleDeleteModel = (index: number) => { + const newCustomModels = customModels.filter((_, i) => i !== index); + setASRProviderConfig(selectedProviderId, { + customModels: newCustomModels, + }); + }; + + const handleMoveModel = (fromIndex: number, direction: 'up' | 'down') => { + const toIndex = direction === 'up' ? fromIndex - 1 : fromIndex + 1; + if (toIndex < 0 || toIndex >= customModels.length) return; + + const newCustomModels = [...customModels]; + [newCustomModels[fromIndex], newCustomModels[toIndex]] = [ + newCustomModels[toIndex], + newCustomModels[fromIndex], + ]; + setASRProviderConfig(selectedProviderId, { + customModels: newCustomModels, + }); + }; + return (
{/* Server-configured notice */} @@ -293,27 +359,106 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) {
)} - {/* Model Selection */} - {asrProvider.models.length > 0 && ( -
- - + {/* Custom Models Section (OpenAI Compatible) */} + {selectedProviderId === 'openai-compatible-asr' && ( +
+
+ + +
+ +
+ {customModels.map((model, index) => ( +
+
+ {index === 0 && ( + + )} +
+
+ {model.name} + {index === 0 && ( + + {t('settings.defaultModel')} + + )} +
+
{model.id}
+
+
+
+ + + + +
+
+ ))} +
)} + + {/* Model Edit Dialog */} + setModelDialogData((prev) => ({ ...prev, id }))} + modelName={modelDialogData.name} + onModelNameChange={(name) => setModelDialogData((prev) => ({ ...prev, name }))} + apiKey={asrProvidersConfig[selectedProviderId]?.apiKey || ''} + baseUrl={asrProvidersConfig[selectedProviderId]?.baseUrl || ''} + providerId={selectedProviderId} + language={asrLanguage} + onSave={handleSaveModel} + isEditing={editingModelIndex !== null} + />
); } diff --git a/components/settings/audio-settings.tsx b/components/settings/audio-settings.tsx index d88590ac0..d195c20f6 100644 --- a/components/settings/audio-settings.tsx +++ b/components/settings/audio-settings.tsx @@ -34,6 +34,7 @@ const log = createLogger('AudioSettings'); function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => string): string { const names: Record = { 'openai-tts': t('settings.providerOpenAITTS'), + 'openai-compatible-tts': t('settings.providerOpenAICompatibleTTS'), 'azure-tts': t('settings.providerAzureTTS'), 'glm-tts': t('settings.providerGLMTTS'), 'qwen-tts': t('settings.providerQwenTTS'), @@ -48,6 +49,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin function getASRProviderName(providerId: ASRProviderId, t: (key: string) => string): string { const names: Record = { 'openai-whisper': t('settings.providerOpenAIWhisper'), + 'openai-compatible-asr': t('settings.providerOpenAICompatibleASR'), 'browser-native': t('settings.providerBrowserNative'), 'qwen-asr': t('settings.providerQwenASR'), }; diff --git a/components/settings/image-settings.tsx b/components/settings/image-settings.tsx index 0931cb2b1..79419af40 100644 --- a/components/settings/image-settings.tsx +++ b/components/settings/image-settings.tsx @@ -4,7 +4,6 @@ import { useState, useCallback, useMemo } from 'react'; import { Label } from '@/components/ui/label'; import { Input } from '@/components/ui/input'; import { Button } from '@/components/ui/button'; -import { Dialog, DialogContent, DialogTitle, DialogDescription } from '@/components/ui/dialog'; import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; import { IMAGE_PROVIDERS } from '@/lib/media/image-providers'; @@ -18,9 +17,13 @@ import { Plus, Settings2, Trash2, + ChevronUp, + ChevronDown, + Star, } from 'lucide-react'; import { cn } from '@/lib/utils'; import type { ImageProviderId } from '@/lib/media/types'; +import { MediaModelEditDialog } from './media-model-edit-dialog'; interface ImageSettingsProps { selectedProviderId: ImageProviderId; @@ -74,11 +77,17 @@ export function ImageSettings({ selectedProviderId }: ImageSettingsProps) { setTestStatus('idle'); setTestMessage(''); try { + // For OpenAI Compatible, use first custom model if available + let model = imageModelId || ''; + if (selectedProviderId === 'openai-compatible-image' && customModels.length > 0) { + model = customModels[0].id; + } + const response = await fetch('/api/verify-image-provider', { method: 'POST', headers: { 'x-image-provider': selectedProviderId, - 'x-image-model': imageModelId || '', + 'x-image-model': model, 'x-api-key': currentConfig?.apiKey || '', 'x-base-url': currentConfig?.baseUrl || '', }, @@ -139,6 +148,20 @@ export function ImageSettings({ selectedProviderId }: ImageSettingsProps) { }); }; + const handleMoveModel = (fromIndex: number, direction: 'up' | 'down') => { + const toIndex = direction === 'up' ? fromIndex - 1 : fromIndex + 1; + if (toIndex < 0 || toIndex >= customModels.length) return; + + const newCustomModels = [...customModels]; + [newCustomModels[fromIndex], newCustomModels[toIndex]] = [ + newCustomModels[toIndex], + newCustomModels[fromIndex], + ]; + setImageProviderConfig(selectedProviderId, { + customModels: newCustomModels, + }); + }; + return (
{/* Server-configured notice */} @@ -273,13 +296,50 @@ export function ImageSettings({ selectedProviderId }: ImageSettingsProps) { {customModels.map((model, index) => (
-
-
{model.name}
-
{model.id}
+
+ {index === 0 && ( + + )} +
+
+ {model.name} + {index === 0 && ( + + {t('settings.defaultModel')} + + )} +
+
{model.id}
+
-
+
+ + - -
-
- - + setModelForm((prev) => ({ ...prev, id }))} + modelName={modelForm.name} + onModelNameChange={(name) => setModelForm((prev) => ({ ...prev, name }))} + apiKey={currentConfig?.apiKey || ''} + baseUrl={currentConfig?.baseUrl || ''} + providerId={selectedProviderId} + onSave={handleSaveModel} + isEditing={editingModelIndex !== null} + />
); } diff --git a/components/settings/index.tsx b/components/settings/index.tsx index 5f122e2e5..995b6ab36 100644 --- a/components/settings/index.tsx +++ b/components/settings/index.tsx @@ -118,6 +118,7 @@ function ProviderListColumn({ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => string): string { const names: Record = { 'openai-tts': t('settings.providerOpenAITTS'), + 'openai-compatible-tts': t('settings.providerOpenAICompatibleTTS'), 'azure-tts': t('settings.providerAzureTTS'), 'glm-tts': t('settings.providerGLMTTS'), 'qwen-tts': t('settings.providerQwenTTS'), @@ -132,6 +133,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin function getASRProviderName(providerId: ASRProviderId, t: (key: string) => string): string { const names: Record = { 'openai-whisper': t('settings.providerOpenAIWhisper'), + 'openai-compatible-asr': t('settings.providerOpenAICompatibleASR'), 'browser-native': t('settings.providerBrowserNative'), 'qwen-asr': t('settings.providerQwenASR'), }; @@ -145,6 +147,7 @@ const IMAGE_PROVIDER_NAMES: Record = { 'nano-banana': 'providerNanoBanana', 'minimax-image': 'providerMiniMaxImage', 'grok-image': 'providerGrokImage', + 'openai-compatible-image': 'providerOpenAICompatibleImage', }; const IMAGE_PROVIDER_ICONS: Record = { @@ -153,6 +156,7 @@ const IMAGE_PROVIDER_ICONS: Record = { 'nano-banana': '/logos/gemini.svg', 'minimax-image': '/logos/minimax.svg', 'grok-image': '/logos/grok.svg', + 'openai-compatible-image': '/logos/openai.svg', }; const VIDEO_PROVIDER_NAMES: Record = { @@ -162,6 +166,7 @@ const VIDEO_PROVIDER_NAMES: Record = { sora: 'providerSora', 'minimax-video': 'providerMiniMaxVideo', 'grok-video': 'providerGrokVideo', + 'openai-compatible-video': 'providerOpenAICompatibleVideo', }; const VIDEO_PROVIDER_ICONS: Record = { @@ -171,6 +176,7 @@ const VIDEO_PROVIDER_ICONS: Record = { sora: '/logos/openai.svg', 'minimax-video': '/logos/minimax.svg', 'grok-video': '/logos/grok.svg', + 'openai-compatible-video': '/logos/openai.svg', }; interface SettingsDialogProps { diff --git a/components/settings/media-model-edit-dialog.tsx b/components/settings/media-model-edit-dialog.tsx new file mode 100644 index 000000000..cb7d48e7a --- /dev/null +++ b/components/settings/media-model-edit-dialog.tsx @@ -0,0 +1,276 @@ +'use client'; + +import { useState, useCallback, useEffect } from 'react'; +import { Dialog, DialogContent, DialogTitle, DialogDescription } from '@/components/ui/dialog'; +import { Button } from '@/components/ui/button'; +import { Input } from '@/components/ui/input'; +import { Label } from '@/components/ui/label'; +import { Loader2, CheckCircle, XCircle, Zap } from 'lucide-react'; +import { useI18n } from '@/lib/hooks/use-i18n'; +import { cn } from '@/lib/utils'; +import { createLogger } from '@/lib/logger'; + +const log = createLogger('MediaModelEditDialog'); + +type MediaType = 'tts' | 'asr' | 'image' | 'video'; + +interface MediaModelEditDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; + mediaType: MediaType; + providerId: string; + modelId: string; + onModelIdChange: (id: string) => void; + modelName: string; + onModelNameChange: (name: string) => void; + apiKey: string; + baseUrl?: string; + onSave: () => void; + isEditing?: boolean; + language?: string; +} + +export function MediaModelEditDialog({ + open, + onOpenChange, + mediaType, + providerId, + modelId, + onModelIdChange, + modelName, + onModelNameChange, + apiKey, + baseUrl, + onSave, + isEditing = false, + language, +}: MediaModelEditDialogProps) { + const { t } = useI18n(); + const [testStatus, setTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>('idle'); + const [testMessage, setTestMessage] = useState(''); + + // Reset test state when dialog opens + useEffect(() => { + if (open) { + setTestStatus('idle'); + setTestMessage(''); + } + }, [open]); + + const handleClose = () => { + onOpenChange(false); + }; + + const handleSave = () => { + if (!modelId.trim()) return; + onSave(); + handleClose(); + }; + + const handleTestModel = useCallback(async () => { + if (!modelId.trim()) return; + + setTestStatus('testing'); + setTestMessage(''); + + const testEndpoint = () => { + switch (mediaType) { + case 'image': + return '/api/verify-image-provider'; + case 'video': + return '/api/verify-video-provider'; + case 'tts': + return '/api/verify-tts-provider'; + case 'asr': + return '/api/verify-asr-provider'; + default: + return ''; + } + }; + + const buildBody = () => { + const baseBody: Record = { + providerId, + apiKey, + baseUrl, + }; + + switch (mediaType) { + case 'image': + return { + ...baseBody, + model: modelId, + }; + case 'video': + return { + ...baseBody, + model: modelId, + }; + case 'tts': + return { + ...baseBody, + modelId, + }; + case 'asr': + return { + ...baseBody, + modelId, + language: language || 'en', + }; + default: + return baseBody; + } + }; + + const buildHeaders = () => { + const headers: Record = { + 'Content-Type': 'application/json', + }; + + switch (mediaType) { + case 'image': + return { + 'x-image-provider': providerId, + 'x-image-model': modelId, + 'x-api-key': apiKey, + 'x-base-url': baseUrl || '', + }; + case 'video': + return { + 'x-video-provider': providerId, + 'x-video-model': modelId, + 'x-api-key': apiKey, + 'x-base-url': baseUrl || '', + }; + default: + return headers; + } + }; + + try { + const endpoint = testEndpoint(); + const headers = buildHeaders(); + const isImageOrVideo = mediaType === 'image' || mediaType === 'video'; + + const response = await fetch(endpoint, { + method: 'POST', + headers: isImageOrVideo + ? (headers as Record) + : { 'Content-Type': 'application/json' }, + body: isImageOrVideo ? undefined : JSON.stringify(buildBody()), + }); + + const data = await response.json(); + + if (data.success) { + setTestStatus('success'); + setTestMessage(t('settings.connectionSuccess') || 'Connection successful'); + } else { + setTestStatus('error'); + setTestMessage(data.error || data.message || t('settings.connectionFailed')); + } + } catch (err) { + log.error('Model test failed:', err); + setTestStatus('error'); + setTestMessage(t('settings.connectionFailed') || 'Connection failed'); + } + }, [modelId, mediaType, providerId, apiKey, baseUrl, language, t]); + + const canTest = modelId.trim() && apiKey && baseUrl; + + return ( + + + + {isEditing ? t('settings.editModel') : t('settings.addNewModel')} + + + {t('settings.customizeModel')} + + +
+ {/* Model ID */} +
+ + onModelIdChange(e.target.value)} + className="font-mono text-sm" + /> +
+ + {/* Model Display Name */} +
+ + onModelNameChange(e.target.value)} + className="text-sm" + /> +
+ + {/* Test Connection */} +
+
+ + +
+ + {testMessage && ( +
+
+ {testStatus === 'success' && } + {testStatus === 'error' && } +

{testMessage}

+
+
+ )} + + {!canTest && baseUrl === undefined && ( +
+ {t('settings.testRequiresConfig')} +
+ )} +
+
+ + {/* Dialog Actions */} +
+ + +
+
+
+ ); +} diff --git a/components/settings/tts-settings.tsx b/components/settings/tts-settings.tsx index 4a35a4f36..b50d1a2d9 100644 --- a/components/settings/tts-settings.tsx +++ b/components/settings/tts-settings.tsx @@ -1,17 +1,19 @@ 'use client'; -import { useState, useEffect } from 'react'; +import { useState, useEffect, useCallback, useMemo } from 'react'; import { Label } from '@/components/ui/label'; import { Input } from '@/components/ui/input'; import { Button } from '@/components/ui/button'; +import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'; import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; import { TTS_PROVIDERS, DEFAULT_TTS_VOICES } from '@/lib/audio/constants'; import type { TTSProviderId } from '@/lib/audio/types'; -import { Volume2, Loader2, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react'; +import { Volume2, Loader2, CheckCircle2, XCircle, Eye, EyeOff, Plus, Settings2, Trash2, ChevronUp, ChevronDown, Star } from 'lucide-react'; import { cn } from '@/lib/utils'; import { createLogger } from '@/lib/logger'; import { useTTSPreview } from '@/lib/audio/use-tts-preview'; +import { MediaModelEditDialog } from './media-model-edit-dialog'; const log = createLogger('TTSSettings'); @@ -37,12 +39,21 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { const ttsProvider = TTS_PROVIDERS[selectedProviderId] ?? TTS_PROVIDERS['openai-tts']; const isServerConfigured = !!ttsProvidersConfig[selectedProviderId]?.isServerConfigured; + const customModels = useMemo( + () => ttsProvidersConfig[selectedProviderId]?.customModels || [], + [ttsProvidersConfig[selectedProviderId]?.customModels], + ); const [showApiKey, setShowApiKey] = useState(false); const [testText, setTestText] = useState(t('settings.ttsTestTextDefault')); const [testStatus, setTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>('idle'); const [testMessage, setTestMessage] = useState(''); const { previewing: testingTTS, startPreview, stopPreview } = useTTSPreview(); + + // Model dialog state + const [showModelDialog, setShowModelDialog] = useState(false); + const [editingModelIndex, setEditingModelIndex] = useState(null); + const [modelDialogData, setModelDialogData] = useState<{ id: string; name: string }>({ id: '', name: '' }); // Doubao TTS uses compound "appId:accessKey" — split for separate UI fields const isDoubao = selectedProviderId === 'doubao-tts'; @@ -72,6 +83,8 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { setShowApiKey(false); setTestStatus('idle'); setTestMessage(''); + setShowModelDialog(false); + setEditingModelIndex(null); }, [selectedProviderId, stopPreview]); const handleTestTTS = async () => { @@ -81,11 +94,26 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { setTestMessage(''); try { + // For OpenAI Compatible, use first custom model if available + let modelId = ttsProvidersConfig[selectedProviderId]?.modelId || ttsProvider.defaultModelId; + if (selectedProviderId === 'openai-compatible-tts' && customModels.length > 0) { + modelId = customModels[0].id; + } + + // For OpenAI Compatible TTS, use custom voice if configured + let voiceToTest = effectiveVoice; + if (selectedProviderId === 'openai-compatible-tts') { + const customVoice = ttsProvidersConfig[selectedProviderId]?.providerOptions?.customVoice as string | undefined; + if (customVoice?.trim()) { + voiceToTest = customVoice.trim(); + } + } + await startPreview({ text: testText, providerId: selectedProviderId, - modelId: ttsProvidersConfig[selectedProviderId]?.modelId || ttsProvider.defaultModelId, - voice: effectiveVoice, + modelId, + voice: voiceToTest, speed: ttsSpeed, apiKey: ttsProvidersConfig[selectedProviderId]?.apiKey, baseUrl: ttsProvidersConfig[selectedProviderId]?.baseUrl, @@ -103,6 +131,60 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { } }; + // Model CRUD + const handleOpenAddModel = () => { + setEditingModelIndex(null); + setModelDialogData({ id: '', name: '' }); + setShowModelDialog(true); + }; + + const handleOpenEditModel = (index: number) => { + setEditingModelIndex(index); + setModelDialogData({ ...customModels[index] }); + setShowModelDialog(true); + }; + + const handleSaveModel = useCallback(() => { + if (!modelDialogData.id.trim()) return; + const newCustomModels = [...customModels]; + if (editingModelIndex !== null) { + newCustomModels[editingModelIndex] = { + id: modelDialogData.id.trim(), + name: modelDialogData.name.trim() || modelDialogData.id.trim(), + }; + } else { + newCustomModels.push({ + id: modelDialogData.id.trim(), + name: modelDialogData.name.trim() || modelDialogData.id.trim(), + }); + } + setTTSProviderConfig(selectedProviderId, { + customModels: newCustomModels, + }); + setShowModelDialog(false); + }, [modelDialogData, editingModelIndex, customModels, selectedProviderId, setTTSProviderConfig]); + + const handleDeleteModel = (index: number) => { + const newCustomModels = customModels.filter((_, i) => i !== index); + setTTSProviderConfig(selectedProviderId, { + customModels: newCustomModels, + }); + }; + + const handleMoveModel = (fromIndex: number, direction: 'up' | 'down') => { + const toIndex = direction === 'up' ? fromIndex - 1 : fromIndex + 1; + if (toIndex < 0 || toIndex >= customModels.length) return; + + const newCustomModels = [...customModels]; + [newCustomModels[fromIndex], newCustomModels[toIndex]] = [ + newCustomModels[toIndex], + newCustomModels[fromIndex], + ]; + setTTSProviderConfig(selectedProviderId, { + customModels: newCustomModels, + }); + }; + return (
{/* Server-configured notice */} @@ -332,6 +414,178 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) {

)} + + {/* Custom Models Section (OpenAI Compatible) */} + {selectedProviderId === 'openai-compatible-tts' && ( +
+
+ + +
+ +
+ {customModels.map((model, index) => ( +
+
+ {index === 0 && ( + + )} +
+
+ {model.name} + {index === 0 && ( + + {t('settings.defaultModel')} + + )} +
+
{model.id}
+
+
+
+ + + + +
+
+ ))} +
+
+ )} + + {/* Supported Voices & Custom Voice Selection (OpenAI Compatible) */} + {selectedProviderId === 'openai-compatible-tts' && ( + <> + {/* Supported Voices List */} +
+ +

+ {t('settings.supportedVoicesDescription')} +

+ { + const voices = e.target.value.split(',').map((v) => v.trim()).filter(Boolean); + setTTSProviderConfig(selectedProviderId, { + providerOptions: { + ...ttsProvidersConfig[selectedProviderId]?.providerOptions, + supportedVoices: voices.length > 0 ? voices.join(',') : undefined, + // Reset customVoice if it's not in the new list + customVoice: voices.length > 0 ? voices[0] : undefined, + }, + }); + }} + className="text-sm" + /> +

+ {t('settings.supportedVoicesHint')} +

+
+ + {/* Custom Voice Selection */} + {(() => { + const supportedVoicesStr = (ttsProvidersConfig[selectedProviderId]?.providerOptions?.supportedVoices as string) || ''; + const voiceList = supportedVoicesStr.split(',').map((v) => v.trim()).filter(Boolean); + if (voiceList.length === 0) return null; + + const selectedVoice = (ttsProvidersConfig[selectedProviderId]?.providerOptions?.customVoice as string) || voiceList[0]; + + return ( +
+ + +

+ {t('settings.defaultVoiceHint')} +

+
+ ); + })()} + + )} + + {/* Model Edit Dialog */} + setModelDialogData((prev) => ({ ...prev, id }))} + modelName={modelDialogData.name} + onModelNameChange={(name) => setModelDialogData((prev) => ({ ...prev, name }))} + apiKey={ttsProvidersConfig[selectedProviderId]?.apiKey || ''} + baseUrl={ttsProvidersConfig[selectedProviderId]?.baseUrl || ''} + providerId={selectedProviderId} + onSave={handleSaveModel} + isEditing={editingModelIndex !== null} + />
); } diff --git a/components/settings/video-settings.tsx b/components/settings/video-settings.tsx index 0260c165a..8a4899b4e 100644 --- a/components/settings/video-settings.tsx +++ b/components/settings/video-settings.tsx @@ -4,7 +4,6 @@ import { useState, useCallback, useMemo } from 'react'; import { Label } from '@/components/ui/label'; import { Input } from '@/components/ui/input'; import { Button } from '@/components/ui/button'; -import { Dialog, DialogContent, DialogTitle, DialogDescription } from '@/components/ui/dialog'; import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; import { VIDEO_PROVIDERS } from '@/lib/media/video-providers'; @@ -18,9 +17,13 @@ import { Plus, Settings2, Trash2, + ChevronUp, + ChevronDown, + Star, } from 'lucide-react'; import { cn } from '@/lib/utils'; import type { VideoProviderId } from '@/lib/media/types'; +import { MediaModelEditDialog } from './media-model-edit-dialog'; interface VideoSettingsProps { selectedProviderId: VideoProviderId; @@ -73,11 +76,17 @@ export function VideoSettings({ selectedProviderId }: VideoSettingsProps) { setTestStatus('idle'); setTestMessage(''); try { + // For OpenAI Compatible, use first custom model if available + let model = videoModelId || ''; + if (selectedProviderId === 'openai-compatible-video' && customModels.length > 0) { + model = customModels[0].id; + } + const response = await fetch('/api/verify-video-provider', { method: 'POST', headers: { 'x-video-provider': selectedProviderId, - 'x-video-model': videoModelId || '', + 'x-video-model': model, 'x-api-key': currentConfig?.apiKey || '', 'x-base-url': currentConfig?.baseUrl || '', }, @@ -138,6 +147,20 @@ export function VideoSettings({ selectedProviderId }: VideoSettingsProps) { }); }; + const handleMoveModel = (fromIndex: number, direction: 'up' | 'down') => { + const toIndex = direction === 'up' ? fromIndex - 1 : fromIndex + 1; + if (toIndex < 0 || toIndex >= customModels.length) return; + + const newCustomModels = [...customModels]; + [newCustomModels[fromIndex], newCustomModels[toIndex]] = [ + newCustomModels[toIndex], + newCustomModels[fromIndex], + ]; + setVideoProviderConfig(selectedProviderId, { + customModels: newCustomModels, + }); + }; + return (
{/* Server-configured notice */} @@ -276,13 +299,50 @@ export function VideoSettings({ selectedProviderId }: VideoSettingsProps) { {customModels.map((model, index) => (
-
-
{model.name}
-
{model.id}
+
+ {index === 0 && ( + + )} +
+
+ {model.name} + {index === 0 && ( + + {t('settings.defaultModel')} + + )} +
+
{model.id}
+
-
+
+ + - -
-
- - + setModelForm((prev) => ({ ...prev, id }))} + modelName={modelForm.name} + onModelNameChange={(name) => setModelForm((prev) => ({ ...prev, name }))} + apiKey={currentConfig?.apiKey || ''} + baseUrl={currentConfig?.baseUrl || ''} + providerId={selectedProviderId} + onSave={handleSaveModel} + isEditing={editingModelIndex !== null} + />
); } diff --git a/lib/audio/asr-providers.ts b/lib/audio/asr-providers.ts index 93365899a..5d5c185ee 100644 --- a/lib/audio/asr-providers.ts +++ b/lib/audio/asr-providers.ts @@ -178,6 +178,9 @@ export async function transcribeAudio( case 'openai-whisper': return await transcribeOpenAIWhisper(config, audioBuffer); + case 'openai-compatible-asr': + return await transcribeOpenAIWhisper(config, audioBuffer); + case 'browser-native': throw new Error('Browser Native ASR must be handled client-side using useBrowserASR hook'); diff --git a/lib/audio/constants.ts b/lib/audio/constants.ts index 423f5b82c..e2abba4c9 100644 --- a/lib/audio/constants.ts +++ b/lib/audio/constants.ts @@ -164,6 +164,62 @@ export const TTS_PROVIDERS: Record = { speedRange: { min: 0.25, max: 4.0, default: 1.0 }, }, + 'openai-compatible-tts': { + id: 'openai-compatible-tts', + name: 'OpenAI Compatible TTS', + requiresApiKey: true, + defaultBaseUrl: 'https://api.openai.com/v1', + icon: '/logos/openai.svg', + models: [], // User can add custom models + defaultModelId: '', + voices: [ + { + id: 'alloy', + name: 'Alloy', + language: 'en', + gender: 'neutral', + description: 'voiceAlloy', + }, + { + id: 'echo', + name: 'Echo', + language: 'en', + gender: 'male', + description: 'voiceEcho', + }, + { + id: 'fable', + name: 'Fable', + language: 'en', + gender: 'neutral', + description: 'voiceFable', + }, + { + id: 'nova', + name: 'Nova', + language: 'en', + gender: 'female', + description: 'voiceNova', + }, + { + id: 'onyx', + name: 'Onyx', + language: 'en', + gender: 'male', + description: 'voiceOnyx', + }, + { + id: 'shimmer', + name: 'Shimmer', + language: 'en', + gender: 'female', + description: 'voiceShimmer', + }, + ], + supportedFormats: ['mp3', 'opus', 'aac', 'flac'], + speedRange: { min: 0.25, max: 4.0, default: 1.0 }, + }, + 'azure-tts': { id: 'azure-tts', name: 'Azure TTS', @@ -1031,6 +1087,30 @@ export const ASR_PROVIDERS: Record = { supportedFormats: ['mp3', 'wav', 'webm', 'm4a', 'flac'], }, + 'openai-compatible-asr': { + id: 'openai-compatible-asr', + name: 'OpenAI Compatible ASR', + requiresApiKey: true, + defaultBaseUrl: 'https://api.openai.com/v1', + icon: '/logos/openai.svg', + models: [ + { id: 'whisper-1', name: 'Whisper-1' }, + ], + defaultModelId: 'whisper-1', + supportedLanguages: [ + 'auto', // Auto-detect + 'zh', 'en', 'ja', 'ko', 'es', 'fr', 'de', 'ru', 'ar', 'pt', 'it', 'hi', + 'af', 'hy', 'az', 'be', 'bs', 'bg', 'ca', 'hr', 'cs', 'da', 'nl', 'et', + 'fi', 'gl', 'ka', 'el', 'gu', 'ht', 'ha', 'haw', 'he', 'hu', 'is', 'ig', + 'id', 'ga', 'jw', 'kn', 'kk', 'km', 'rw', 'ku', 'ky', 'lo', 'la', 'lv', + 'lt', 'lb', 'mk', 'mg', 'ms', 'mt', 'mi', 'mn', 'my', 'ne', 'no', 'or', + 'ps', 'fa', 'pl', 'ro', 'sm', 'gd', 'sr', 'st', 'sn', 'sd', 'si', 'sk', + 'sl', 'so', 'su', 'sw', 'sv', 'tl', 'tg', 'ta', 'te', 'th', 'tr', 'tk', + 'uk', 'ur', 'ug', 'uz', 'vi', 'cy', 'xh', 'yi', 'yo', 'zu', + ], + supportedFormats: ['mp3', 'wav', 'webm', 'm4a', 'flac'], + }, + 'browser-native': { id: 'browser-native', name: '浏览器原生 ASR (Web Speech API)', @@ -1119,6 +1199,7 @@ export function getTTSProvider(providerId: TTSProviderId): TTSProviderConfig | u */ export const DEFAULT_TTS_VOICES: Record = { 'openai-tts': 'alloy', + 'openai-compatible-tts': 'alloy', 'azure-tts': 'zh-CN-XiaoxiaoNeural', 'glm-tts': 'tongtong', 'qwen-tts': 'Cherry', @@ -1130,6 +1211,7 @@ export const DEFAULT_TTS_VOICES: Record = { export const DEFAULT_TTS_MODELS: Record = { 'openai-tts': 'gpt-4o-mini-tts', + 'openai-compatible-tts': '', 'azure-tts': '', 'glm-tts': 'glm-tts', 'qwen-tts': 'qwen3-tts-flash', diff --git a/lib/audio/tts-providers.ts b/lib/audio/tts-providers.ts index 67f0e7cc0..3489ae9c7 100644 --- a/lib/audio/tts-providers.ts +++ b/lib/audio/tts-providers.ts @@ -141,6 +141,9 @@ export async function generateTTS( case 'openai-tts': return await generateOpenAITTS(config, text); + case 'openai-compatible-tts': + return await generateOpenAITTS(config, text); + case 'azure-tts': return await generateAzureTTS(config, text); @@ -169,12 +172,21 @@ export async function generateTTS( /** * OpenAI TTS implementation (direct API call with explicit UTF-8 encoding) + * Supports both official OpenAI TTS and OpenAI-compatible TTS APIs */ async function generateOpenAITTS( config: TTSModelConfig, text: string, ): Promise { - const baseUrl = config.baseUrl || TTS_PROVIDERS['openai-tts'].defaultBaseUrl; + // Determine baseUrl based on provider type + let baseUrl: string; + + if (config.baseUrl) { + baseUrl = config.baseUrl; + } else { + // For official OpenAI TTS, use default baseUrl + baseUrl = TTS_PROVIDERS['openai-tts'].defaultBaseUrl || 'https://api.openai.com/v1'; + } // Use gpt-4o-mini-tts for best quality and intelligent realtime applications const response = await fetch(`${baseUrl}/audio/speech`, { @@ -197,9 +209,30 @@ async function generateOpenAITTS( } const arrayBuffer = await response.arrayBuffer(); + + // Extract audio format from Content-Type header + const contentType = response.headers.get('content-type') || 'audio/mpeg'; + let format = 'mp3'; // default + + if (contentType.includes('audio/wav')) { + format = 'wav'; + } else if (contentType.includes('audio/mpeg') || contentType.includes('audio/mp3')) { + format = 'mp3'; + } else if (contentType.includes('audio/ogg')) { + format = 'ogg'; + } else if (contentType.includes('audio/aac')) { + format = 'aac'; + } else if (contentType.includes('audio/flac')) { + format = 'flac'; + } else if (contentType.includes('audio/webm')) { + format = 'webm'; + } else if (contentType.includes('audio/opus')) { + format = 'opus'; + } + return { audio: new Uint8Array(arrayBuffer), - format: 'mp3', + format, }; } diff --git a/lib/audio/types.ts b/lib/audio/types.ts index 0c3c91792..23254f0f1 100644 --- a/lib/audio/types.ts +++ b/lib/audio/types.ts @@ -6,6 +6,7 @@ * * Currently Supported TTS Providers: * - OpenAI TTS (https://platform.openai.com/docs/guides/text-to-speech) + * - OpenAI-Compatible TTS (any OpenAI-compatible API) * - Azure TTS (https://learn.microsoft.com/en-us/azure/ai-services/speech-service/text-to-speech) * - GLM TTS (https://docs.bigmodel.cn/cn/guide/models/sound-and-video/glm-tts) * - Qwen TTS (https://bailian.console.aliyun.com/) @@ -14,6 +15,7 @@ * * Currently Supported ASR Providers: * - OpenAI Whisper (https://platform.openai.com/docs/guides/speech-to-text) + * - OpenAI-Compatible ASR (any OpenAI-compatible API) * - Browser Native (Web Speech API, client-side only) * - Qwen ASR (DashScope API) * @@ -80,6 +82,7 @@ */ export type TTSProviderId = | 'openai-tts' + | 'openai-compatible-tts' | 'azure-tts' | 'glm-tts' | 'qwen-tts' @@ -152,7 +155,11 @@ export interface TTSModelConfig { * Add new ASR providers here as union members. * Keep in sync with ASR_PROVIDERS registry in constants.ts */ -export type ASRProviderId = 'openai-whisper' | 'browser-native' | 'qwen-asr'; +export type ASRProviderId = + | 'openai-whisper' + | 'openai-compatible-asr' + | 'browser-native' + | 'qwen-asr'; // Add new ASR providers below (uncomment and modify): // | 'elevenlabs-asr' // | 'assemblyai-asr' diff --git a/lib/audio/voice-resolver.ts b/lib/audio/voice-resolver.ts index b4e54eee8..f08e4f246 100644 --- a/lib/audio/voice-resolver.ts +++ b/lib/audio/voice-resolver.ts @@ -78,11 +78,18 @@ export interface ProviderWithVoices { * Get all available providers and their voices for the voice picker UI. * A provider is available if it has an API key or is server-configured. * Browser-native-tts is excluded (no static voice list). + * Includes both built-in models and custom models for OpenAI-compatible providers. */ export function getAvailableProvidersWithVoices( ttsProvidersConfig: Record< string, - { apiKey?: string; enabled?: boolean; isServerConfigured?: boolean } + { + apiKey?: string; + enabled?: boolean; + isServerConfigured?: boolean; + customModels?: Array<{ id: string; name: string }>; + providerOptions?: Record; + } >, ): ProviderWithVoices[] { const result: ProviderWithVoices[] = []; @@ -101,6 +108,8 @@ export function getAvailableProvidersWithVoices( // Build model groups const modelGroups: ModelVoiceGroup[] = []; + + // Add built-in models if (config.models.length > 0) { for (const model of config.models) { const compatibleVoices = config.voices @@ -112,8 +121,32 @@ export function getAvailableProvidersWithVoices( voices: compatibleVoices, }); } - } else { - // Provider has no model concept (Azure, Browser Native, Doubao) + } + + // Add custom models for OpenAI-compatible TTS + const customModels = providerConfig?.customModels || []; + if (providerId === 'openai-compatible-tts' && customModels.length > 0) { + // Get supported voices from provider options + const supportedVoicesStr = (providerConfig?.providerOptions?.supportedVoices as string) || ''; + const supportedVoices = supportedVoicesStr + .split(',') + .map((v) => v.trim()) + .filter(Boolean) + .map((voice) => ({ id: voice, name: voice })); + + for (const model of customModels) { + // Use configured supported voices, or fall back to all predefined voices + const modelVoices = supportedVoices.length > 0 ? supportedVoices : allVoices; + modelGroups.push({ + modelId: model.id, + modelName: model.name, + voices: modelVoices, + }); + } + } + + // If no models at all, add default group (for providers without model concept) + if (modelGroups.length === 0) { modelGroups.push({ modelId: '', modelName: config.name, diff --git a/lib/hooks/use-scene-generator.ts b/lib/hooks/use-scene-generator.ts index c48012085..d6a9b31c4 100644 --- a/lib/hooks/use-scene-generator.ts +++ b/lib/hooks/use-scene-generator.ts @@ -130,6 +130,26 @@ export async function generateAndStoreTTS( if (settings.ttsProviderId === 'browser-native-tts') return; const ttsProviderConfig = settings.ttsProvidersConfig?.[settings.ttsProviderId]; + + // For OpenAI Compatible TTS, prioritize custom models + let ttsModelId = ttsProviderConfig?.modelId; + if ( + settings.ttsProviderId === 'openai-compatible-tts' && + ttsProviderConfig?.customModels?.length + ) { + // Use first custom model if available + ttsModelId = ttsProviderConfig.customModels[0].id; + } + + // For OpenAI Compatible TTS, use custom voice if configured + let ttsVoice = settings.ttsVoice; + if (settings.ttsProviderId === 'openai-compatible-tts') { + const customVoice = ttsProviderConfig?.providerOptions?.customVoice as string | undefined; + if (customVoice?.trim()) { + ttsVoice = customVoice.trim(); + } + } + const response = await fetch('/api/generate/tts', { method: 'POST', headers: { 'Content-Type': 'application/json' }, @@ -137,8 +157,8 @@ export async function generateAndStoreTTS( text, audioId, ttsProviderId: settings.ttsProviderId, - ttsModelId: ttsProviderConfig?.modelId, - ttsVoice: settings.ttsVoice, + ttsModelId, + ttsVoice, ttsSpeed: settings.ttsSpeed, ttsApiKey: ttsProviderConfig?.apiKey || undefined, ttsBaseUrl: ttsProviderConfig?.baseUrl || undefined, diff --git a/lib/i18n/common.ts b/lib/i18n/common.ts index 1bceb5d61..6f05d4d36 100644 --- a/lib/i18n/common.ts +++ b/lib/i18n/common.ts @@ -3,6 +3,7 @@ export const commonZhCN = { you: '你', confirm: '确定', cancel: '取消', + save: '保存', loading: '加载中...', }, home: { @@ -44,6 +45,7 @@ export const commonEnUS = { you: 'You', confirm: 'Confirm', cancel: 'Cancel', + save: 'Save', loading: 'Loading...', }, home: { diff --git a/lib/i18n/settings.ts b/lib/i18n/settings.ts index 356fea554..bae58f54f 100644 --- a/lib/i18n/settings.ts +++ b/lib/i18n/settings.ts @@ -223,6 +223,7 @@ export const settingsZhCN = { browserNativeNote: '浏览器原生 ASR 无需配置,完全免费', // Audio provider names providerOpenAITTS: 'OpenAI TTS (gpt-4o-mini-tts)', + providerOpenAICompatibleTTS: 'OpenAI Compatible TTS', providerAzureTTS: 'Azure TTS', providerGLMTTS: 'GLM TTS', providerQwenTTS: 'Qwen TTS(阿里云百炼)', @@ -231,6 +232,7 @@ export const settingsZhCN = { providerMiniMaxTTS: 'MiniMax TTS', providerBrowserNativeTTS: '浏览器原生 TTS', providerOpenAIWhisper: 'OpenAI ASR (gpt-4o-mini-transcribe)', + providerOpenAICompatibleASR: 'OpenAI Compatible ASR', providerBrowserNative: '浏览器原生 ASR', providerQwenASR: 'Qwen ASR(阿里云百炼)', providerUnpdf: 'unpdf(内置)', @@ -490,6 +492,7 @@ export const settingsZhCN = { providerNanoBanana: 'Nano Banana(Gemini)', providerMiniMaxImage: 'MiniMax 图像', providerGrokImage: 'Grok Image(xAI)', + providerOpenAICompatibleImage: 'OpenAI Compatible 图像生成', testImageGeneration: '测试图像生成', testImageConnectivity: '测试连接', imageConnectivitySuccess: '图像服务连接成功', @@ -513,6 +516,7 @@ export const settingsZhCN = { providerSora: 'Sora(OpenAI)', providerMiniMaxVideo: 'MiniMax 视频', providerGrokVideo: 'Grok Video(xAI)', + providerOpenAICompatibleVideo: 'OpenAI Compatible 视频生成', testVideoGeneration: '测试视频生成', testVideoConnectivity: '测试连接', videoConnectivitySuccess: '视频服务连接成功', @@ -561,7 +565,17 @@ export const settingsZhCN = { webSearchApiKeyHint: '从 tavily.com 获取 API Key,用于网络搜索', webSearchBaseUrl: 'Base URL', webSearchServerConfigured: '服务端已配置 Tavily API Key', - optional: '可选', + // Edit model dialog + modelEdit: '编辑模型', + customizeModel: '自定义模型设置', + modelDisplayName: '模型显示名称', + customModels: '自定义模型', + supportedVoices: '支持的语音列表', + supportedVoicesDescription: '配置此 API 支持的所有语音/讲话人', + supportedVoicesHint: '用逗号分隔,例如:aiden, dylan, eric, ryan。列表为空时使用默认语音', + defaultVoice: '默认语音', + defaultVoiceHint: '这个语音将作为默认选项,在角色配置中可以选择其他语音', + testRequiresConfig: '请先配置API密钥和BaseURL才能测试', }, profile: { title: '个人资料', @@ -821,6 +835,7 @@ export const settingsEnUS = { browserNativeNote: 'Browser Native ASR requires no configuration and is completely free', // Audio provider names providerOpenAITTS: 'OpenAI TTS (gpt-4o-mini-tts)', + providerOpenAICompatibleTTS: 'OpenAI Compatible TTS', providerAzureTTS: 'Azure TTS', providerGLMTTS: 'GLM TTS', providerQwenTTS: 'Qwen TTS (Alibaba Cloud Bailian)', @@ -829,6 +844,7 @@ export const settingsEnUS = { providerMiniMaxTTS: 'MiniMax TTS', providerBrowserNativeTTS: 'Browser Native TTS', providerOpenAIWhisper: 'OpenAI ASR (gpt-4o-mini-transcribe)', + providerOpenAICompatibleASR: 'OpenAI Compatible ASR', providerBrowserNative: 'Browser Native ASR', providerQwenASR: 'Qwen ASR (Alibaba Cloud Bailian)', providerUnpdf: 'unpdf (Built-in)', @@ -1091,6 +1107,7 @@ export const settingsEnUS = { providerNanoBanana: 'Nano Banana (Gemini)', providerMiniMaxImage: 'MiniMax Image', providerGrokImage: 'Grok Image (xAI)', + providerOpenAICompatibleImage: 'OpenAI Compatible Image', testImageGeneration: 'Test Image Generation', testImageConnectivity: 'Test Connection', imageConnectivitySuccess: 'Image service connected successfully', @@ -1115,6 +1132,7 @@ export const settingsEnUS = { providerSora: 'Sora (OpenAI)', providerMiniMaxVideo: 'MiniMax Video', providerGrokVideo: 'Grok Video (xAI)', + providerOpenAICompatibleVideo: 'OpenAI Compatible Video', testVideoGeneration: 'Test Video Generation', testVideoConnectivity: 'Test Connection', videoConnectivitySuccess: 'Video service connected successfully', @@ -1166,6 +1184,17 @@ export const settingsEnUS = { webSearchBaseUrl: 'Base URL', webSearchServerConfigured: 'Server-side Tavily API key is configured', optional: 'Optional', + // Edit model dialog + modelEdit: 'Edit Model', + customizeModel: 'Customize model settings', + modelDisplayName: 'Model Display Name', + customModels: 'Custom Models', + supportedVoices: 'Supported Voices', + supportedVoicesDescription: 'Configure all voices/speakers supported by this API', + supportedVoicesHint: 'Comma-separated list, e.g.: aiden, dylan, eric, ryan. Empty list uses default voice', + defaultVoice: 'Default Voice', + defaultVoiceHint: 'This voice will be used as the default option. You can select other voices in role configuration', + testRequiresConfig: 'Please configure API key and Base URL to test', }, profile: { title: 'Profile', diff --git a/lib/media/adapters/openai-compatible-image-adapter.ts b/lib/media/adapters/openai-compatible-image-adapter.ts new file mode 100644 index 000000000..7c8f57963 --- /dev/null +++ b/lib/media/adapters/openai-compatible-image-adapter.ts @@ -0,0 +1,176 @@ +/** + * OpenAI-Compatible Image Generation Adapter + * + * Supports any API that implements the OpenAI images/generations endpoint format: + * - https://platform.openai.com/docs/api-reference/images/create + * + * Can be used with: + * - OpenAI (default: https://api.openai.com/v1) + * - LocalAI, LM Studio, Ollama with OpenAI-compatible endpoints + * - Custom endpoints that implement the OpenAI API format + * + * Authentication: Bearer token via Authorization header (or API-Key header) + */ + +import type { + ImageGenerationConfig, + ImageGenerationOptions, + ImageGenerationResult, +} from '../types'; + +const DEFAULT_BASE_URL = 'https://api.openai.com/v1'; +const DEFAULT_MODEL = 'dall-e-3'; + +/** + * Lightweight connectivity test — validates API key by making a minimal request + */ +export async function testOpenAICompatibleImageConnectivity( + config: ImageGenerationConfig, +): Promise<{ success: boolean; message: string }> { + const baseUrl = config.baseUrl || DEFAULT_BASE_URL; + + try { + const response = await fetch(`${baseUrl}/images/generations`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${config.apiKey}`, + }, + body: JSON.stringify({ + model: config.model || DEFAULT_MODEL, + prompt: 'test', + n: 1, + size: '256x256', + }), + }); + + if (response.status === 401 || response.status === 403) { + const text = await response.text(); + return { + success: false, + message: `OpenAI Compatible Image auth failed (${response.status}): ${text}`, + }; + } + + if (!response.ok) { + const text = await response.text(); + return { + success: false, + message: `OpenAI Compatible Image API error (${response.status}): ${text}`, + }; + } + + return { success: true, message: 'Connected to OpenAI-Compatible Image endpoint' }; + } catch (err) { + return { + success: false, + message: `OpenAI Compatible Image connectivity error: ${err}`, + }; + } +} + +/** + * Generate image using OpenAI-compatible endpoint + */ +export async function generateWithOpenAICompatibleImage( + config: ImageGenerationConfig, + options: ImageGenerationOptions, +): Promise { + const baseUrl = config.baseUrl || DEFAULT_BASE_URL; + const model = config.model || DEFAULT_MODEL; + + // Calculate size from aspect ratio or use provided dimensions + const size = calculateImageSize(options); + + try { + const response = await fetch(`${baseUrl}/images/generations`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${config.apiKey}`, + }, + body: JSON.stringify({ + model, + prompt: options.prompt, + n: 1, + size: size, + ...(options.style && { style: options.style }), + quality: 'hd', + }), + }); + + if (!response.ok) { + const error = await response.text(); + throw new Error( + `OpenAI Compatible Image API error (${response.status}): ${error}`, + ); + } + + const data = (await response.json()) as { + data: Array<{ url?: string; b64_json?: string }>; + }; + + if (!data.data || data.data.length === 0) { + throw new Error('No image data returned from OpenAI Compatible API'); + } + + const imageData = data.data[0]; + const [width, height] = size + .split('x') + .map((v) => parseInt(v, 10)); + + if (imageData.url) { + return { + url: imageData.url, + width, + height, + }; + } + + if (imageData.b64_json) { + return { + base64: imageData.b64_json, + width, + height, + }; + } + + throw new Error('No image URL or base64 data in OpenAI Compatible response'); + } catch (err) { + if (err instanceof Error) { + throw err; + } + throw new Error(`OpenAI Compatible image generation failed: ${err}`); + } +} + +/** + * Calculate OpenAI-compatible size format from options + */ +function calculateImageSize(options: ImageGenerationOptions): string { + // OpenAI supports: 256x256, 512x512, 1024x1024, 1792x1024, 1024x1792 + const supportedSizes = [ + '256x256', + '512x512', + '1024x1024', + '1792x1024', + '1024x1792', + ]; + + if (options.width && options.height) { + const requestedSize = `${options.width}x${options.height}`; + if (supportedSizes.includes(requestedSize)) { + return requestedSize; + } + } + + // Default based on aspect ratio + switch (options.aspectRatio) { + case '16:9': + return '1792x1024'; + case '9:16': + return '1024x1792'; + default: + return '1024x1024'; + } +} diff --git a/lib/media/adapters/openai-compatible-video-adapter.ts b/lib/media/adapters/openai-compatible-video-adapter.ts new file mode 100644 index 000000000..d89665fe1 --- /dev/null +++ b/lib/media/adapters/openai-compatible-video-adapter.ts @@ -0,0 +1,158 @@ +/** + * OpenAI-Compatible Video Generation Adapter + * + * Supports any API that implements the OpenAI video/generations endpoint format: + * - https://platform.openai.com/docs/api-reference/videos (future OpenAI API) + * + * Can be used with: + * - Custom endpoints that implement the OpenAI video API format + * - Grok Video or similar OpenAI-compatible video APIs + * + * Note: This is a template for OpenAI API-compatible video providers + * The actual OpenAI video API is not yet released, so this uses a similar pattern. + * + * Authentication: Bearer token via Authorization header + */ + +import type { + VideoGenerationConfig, + VideoGenerationOptions, + VideoGenerationResult, +} from '../types'; + +const DEFAULT_BASE_URL = 'https://api.openai.com/v1'; +const DEFAULT_MODEL = 'gpt-4o-vision'; + +/** + * Lightweight connectivity test — validates API key by making a minimal request + */ +export async function testOpenAICompatibleVideoConnectivity( + config: VideoGenerationConfig, +): Promise<{ success: boolean; message: string }> { + const baseUrl = config.baseUrl || DEFAULT_BASE_URL; + + try { + // Test using a simple text-to-video request format + const response = await fetch(`${baseUrl}/video/generations`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${config.apiKey}`, + }, + body: JSON.stringify({ + model: config.model || DEFAULT_MODEL, + prompt: 'test', + }), + }); + + if (response.status === 401 || response.status === 403) { + const text = await response.text(); + return { + success: false, + message: `OpenAI Compatible Video auth failed (${response.status}): ${text}`, + }; + } + + if (!response.ok) { + // 404 is acceptable if endpoint doesn't exist yet, but auth passed + if (response.status === 404) { + return { + success: true, + message: 'Endpoint not found, but authentication succeeded', + }; + } + const text = await response.text(); + return { + success: false, + message: `OpenAI Compatible Video API error (${response.status}): ${text}`, + }; + } + + return { + success: true, + message: 'Connected to OpenAI-Compatible Video endpoint', + }; + } catch (err) { + return { + success: false, + message: `OpenAI Compatible Video connectivity error: ${err}`, + }; + } +} + +/** + * Generate video using OpenAI-compatible endpoint + */ +export async function generateWithOpenAICompatibleVideo( + config: VideoGenerationConfig, + options: VideoGenerationOptions, +): Promise { + const baseUrl = config.baseUrl || DEFAULT_BASE_URL; + const model = config.model || DEFAULT_MODEL; + + const size = calculateVideoSize(options); + const [width, height] = size.split('x').map((v) => parseInt(v, 10)); + + try { + const response = await fetch(`${baseUrl}/video/generations`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${config.apiKey}`, + }, + body: JSON.stringify({ + model, + prompt: options.prompt, + duration: options.duration || 6, + size: size, + }), + }); + + if (!response.ok) { + const error = await response.text(); + throw new Error( + `OpenAI Compatible Video API error (${response.status}): ${error}`, + ); + } + + const data = (await response.json()) as { + data: Array<{ url?: string }>; + }; + + if (!data.data || data.data.length === 0) { + throw new Error('No video data returned from OpenAI Compatible API'); + } + + const videoData = data.data[0]; + + if (videoData.url) { + return { + url: videoData.url, + duration: options.duration || 6, + width, + height, + }; + } + + throw new Error('No video URL in OpenAI Compatible response'); + } catch (err) { + if (err instanceof Error) { + throw err; + } + throw new Error(`OpenAI Compatible video generation failed: ${err}`); + } +} + +/** + * Calculate OpenAI-compatible video size format from options + */ +function calculateVideoSize(options: VideoGenerationOptions): string { + // Common video sizes + const supportedSizes: { [key: string]: string } = { + '16:9': '1280x720', + '9:16': '720x1280', + '1:1': '720x720', + }; + + return supportedSizes[options.aspectRatio || '16:9'] || '1280x720'; +} diff --git a/lib/media/image-providers.ts b/lib/media/image-providers.ts index d40fa770a..d97abdaf3 100644 --- a/lib/media/image-providers.ts +++ b/lib/media/image-providers.ts @@ -17,6 +17,10 @@ import { testMiniMaxImageConnectivity, } from './adapters/minimax-image-adapter'; import { generateWithGrokImage, testGrokImageConnectivity } from './adapters/grok-image-adapter'; +import { + generateWithOpenAICompatibleImage, + testOpenAICompatibleImageConnectivity, +} from './adapters/openai-compatible-image-adapter'; export const IMAGE_PROVIDERS: Record = { seedream: { @@ -93,6 +97,15 @@ export const IMAGE_PROVIDERS: Record = { ], supportedAspectRatios: ['16:9', '4:3', '1:1', '9:16'], }, + + 'openai-compatible-image': { + id: 'openai-compatible-image', + name: 'OpenAI Compatible Image', + requiresApiKey: true, + defaultBaseUrl: 'https://api.openai.com/v1', + models: [], + supportedAspectRatios: ['16:9', '4:3', '1:1', '9:16'], + }, }; export async function testImageConnectivity( @@ -109,6 +122,8 @@ export async function testImageConnectivity( return testMiniMaxImageConnectivity(config); case 'grok-image': return testGrokImageConnectivity(config); + case 'openai-compatible-image': + return testOpenAICompatibleImageConnectivity(config); default: return { success: false, @@ -132,6 +147,8 @@ export async function generateImage( return generateWithMiniMaxImage(config, options); case 'grok-image': return generateWithGrokImage(config, options); + case 'openai-compatible-image': + return generateWithOpenAICompatibleImage(config, options); default: throw new Error(`Unsupported image provider: ${config.providerId}`); } diff --git a/lib/media/types.ts b/lib/media/types.ts index 13fbdb696..525e0edac 100644 --- a/lib/media/types.ts +++ b/lib/media/types.ts @@ -8,12 +8,14 @@ * - Seedream (ByteDance SDXL-based image generation) * - Qwen Image (Alibaba Cloud Wanx image generation) * - Nano Banana (Lightweight image generation via Banana.dev) + * - OpenAI-Compatible Image (any OpenAI-compatible API) * * Currently Supported Video Providers (Phase 2): * - Seedance (ByteDance video generation) * - Kling (Kuaishou video generation) * - Veo (Google DeepMind video generation) * - Sora (OpenAI video generation) + * - OpenAI-Compatible Video (any OpenAI-compatible API) * * HOW TO ADD A NEW PROVIDER: * @@ -74,7 +76,8 @@ export type ImageProviderId = | 'qwen-image' | 'nano-banana' | 'minimax-image' - | 'grok-image'; + | 'grok-image' + | 'openai-compatible-image'; // Add new image providers below (uncomment and modify): // | 'dall-e' // | 'midjourney' @@ -189,7 +192,8 @@ export type VideoProviderId = | 'veo' | 'sora' | 'minimax-video' - | 'grok-video'; + | 'grok-video' + | 'openai-compatible-video'; // Add new video providers below (uncomment and modify): // | 'runway' // | 'pika' diff --git a/lib/media/video-providers.ts b/lib/media/video-providers.ts index 6c2b5d0e8..ce012e2a0 100644 --- a/lib/media/video-providers.ts +++ b/lib/media/video-providers.ts @@ -17,6 +17,10 @@ import { testMiniMaxVideoConnectivity, } from './adapters/minimax-video-adapter'; import { generateWithGrokVideo, testGrokVideoConnectivity } from './adapters/grok-video-adapter'; +import { + generateWithOpenAICompatibleVideo, + testOpenAICompatibleVideoConnectivity, +} from './adapters/openai-compatible-video-adapter'; export const VIDEO_PROVIDERS: Record = { seedance: { @@ -106,6 +110,17 @@ export const VIDEO_PROVIDERS: Record = { supportedDurations: [6], maxDuration: 6, }, + + 'openai-compatible-video': { + id: 'openai-compatible-video', + name: 'OpenAI Compatible Video', + requiresApiKey: true, + defaultBaseUrl: 'https://api.openai.com/v1', + models: [], + supportedAspectRatios: ['16:9', '1:1', '9:16'], + supportedDurations: [6], + maxDuration: 60, + }, }; export async function testVideoConnectivity( @@ -122,6 +137,8 @@ export async function testVideoConnectivity( return testMiniMaxVideoConnectivity(config); case 'grok-video': return testGrokVideoConnectivity(config); + case 'openai-compatible-video': + return testOpenAICompatibleVideoConnectivity(config); default: return { success: false, @@ -189,6 +206,8 @@ export async function generateVideo( return generateWithMiniMaxVideo(config, options); case 'grok-video': return generateWithGrokVideo(config, options); + case 'openai-compatible-video': + return generateWithOpenAICompatibleVideo(config, options); default: throw new Error(`Unsupported video provider: ${config.providerId}`); } diff --git a/lib/store/settings.ts b/lib/store/settings.ts index 4b088bbc6..6715151a1 100644 --- a/lib/store/settings.ts +++ b/lib/store/settings.ts @@ -284,6 +284,7 @@ const getDefaultAudioConfig = () => ({ asrLanguage: 'zh', ttsProvidersConfig: { 'openai-tts': { apiKey: '', baseUrl: '', enabled: true }, + 'openai-compatible-tts': { apiKey: '', baseUrl: '', enabled: false }, 'azure-tts': { apiKey: '', baseUrl: '', enabled: false }, 'glm-tts': { apiKey: '', baseUrl: '', enabled: false }, 'qwen-tts': { apiKey: '', baseUrl: '', enabled: false }, @@ -297,6 +298,7 @@ const getDefaultAudioConfig = () => ({ >, asrProvidersConfig: { 'openai-whisper': { apiKey: '', baseUrl: '', enabled: true }, + 'openai-compatible-asr': { apiKey: '', baseUrl: '', enabled: false }, 'browser-native': { apiKey: '', baseUrl: '', enabled: true }, 'qwen-asr': { apiKey: '', baseUrl: '', enabled: false }, } as Record, @@ -321,6 +323,7 @@ const getDefaultImageConfig = () => ({ 'nano-banana': { apiKey: '', baseUrl: '', enabled: false }, 'minimax-image': { apiKey: '', baseUrl: '', enabled: false }, 'grok-image': { apiKey: '', baseUrl: '', enabled: false }, + 'openai-compatible-image': { apiKey: '', baseUrl: '', enabled: false }, } as Record, }); @@ -335,6 +338,7 @@ const getDefaultVideoConfig = () => ({ sora: { apiKey: '', baseUrl: '', enabled: false }, 'minimax-video': { apiKey: '', baseUrl: '', enabled: false }, 'grok-video': { apiKey: '', baseUrl: '', enabled: false }, + 'openai-compatible-video': { apiKey: '', baseUrl: '', enabled: false }, } as Record, });