Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 24 additions & 4 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,16 @@ GROK_API_KEY=
GROK_BASE_URL=
GROK_MODELS=

# --- TTS (Text-to-Speech) ----------------------------------------------------
# --- TTS (Text-to-Speech) ---

TTS_OPENAI_API_KEY=
TTS_OPENAI_BASE_URL=

TTS_OPENAI_COMPATIBLE_API_KEY=
TTS_OPENAI_COMPATIBLE_BASE_URL=
# Optional custom models (comma-separated): gpt-4o-mini-tts,tts-1,custom-model
TTS_OPENAI_COMPATIBLE_MODELS=

TTS_AZURE_API_KEY=
TTS_AZURE_BASE_URL=

Expand All @@ -74,11 +79,16 @@ TTS_MINIMAX_BASE_URL=https://api.minimaxi.com
TTS_ELEVENLABS_API_KEY=
TTS_ELEVENLABS_BASE_URL=

# --- ASR (Automatic Speech Recognition) --------------------------------------
# --- ASR (Automatic Speech Recognition) ---

ASR_OPENAI_API_KEY=
ASR_OPENAI_BASE_URL=

ASR_OPENAI_COMPATIBLE_API_KEY=
ASR_OPENAI_COMPATIBLE_BASE_URL=
# Optional custom models (comma-separated): whisper-1,custom-model
ASR_OPENAI_COMPATIBLE_MODELS=

ASR_QWEN_API_KEY=
ASR_QWEN_BASE_URL=

Expand All @@ -90,7 +100,7 @@ PDF_UNPDF_BASE_URL=
PDF_MINERU_API_KEY=
PDF_MINERU_BASE_URL=

# --- Image Generation ---------------------------------------------------------
# --- Image Generation ---

IMAGE_SEEDREAM_API_KEY=
IMAGE_SEEDREAM_BASE_URL=
Expand All @@ -108,7 +118,12 @@ IMAGE_MINIMAX_BASE_URL=https://api.minimaxi.com
IMAGE_GROK_API_KEY=
IMAGE_GROK_BASE_URL=

# --- Video Generation ---------------------------------------------------------
IMAGE_OPENAI_COMPATIBLE_API_KEY=
IMAGE_OPENAI_COMPATIBLE_BASE_URL=
# Optional custom models (comma-separated): dall-e-3,custom-model
IMAGE_OPENAI_COMPATIBLE_MODELS=

# --- Video Generation ---

VIDEO_SEEDANCE_API_KEY=
VIDEO_SEEDANCE_BASE_URL=
Expand All @@ -129,6 +144,11 @@ VIDEO_MINIMAX_BASE_URL=https://api.minimaxi.com
VIDEO_GROK_API_KEY=
VIDEO_GROK_BASE_URL=

VIDEO_OPENAI_COMPATIBLE_API_KEY=
VIDEO_OPENAI_COMPATIBLE_BASE_URL=
# Optional custom models (comma-separated): grok-imagine-video,custom-model
VIDEO_OPENAI_COMPATIBLE_MODELS=

# --- Web Search ---------------------------------------------------------------
# Note: Grok (xAI) web search is available via chat completions + search tools,
# not as a standalone search API. Use Grok LLM provider with search_parameters
Expand Down
123 changes: 123 additions & 0 deletions app/api/verify-asr-provider/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/**
* Verify ASR Provider API
*
* Lightweight endpoint that validates ASR provider credentials.
*
* POST /api/verify-asr-provider
*
* Body:
* providerId: ASRProviderId
* apiKey: string (optional, server fallback)
* baseUrl: string (optional)
* modelId: string (optional)
* language: string (optional)
*
* Response: { success: boolean, message: string }
*/

import { NextRequest } from 'next/server';
import { transcribeAudio } from '@/lib/audio/asr-providers';
import { ASR_PROVIDERS } from '@/lib/audio/constants';
import type { ASRProviderId, ASRModelConfig } from '@/lib/audio/types';
import { apiError, apiSuccess } from '@/lib/server/api-response';
import { createLogger } from '@/lib/logger';

const log = createLogger('VerifyASRProvider');

/**
* Create a minimal test audio buffer (100ms of silence at 16kHz)
*/
function createTestAudioBuffer(): Buffer {
const sampleRate = 16000;
const duration = 0.1; // 100ms
const samples = Math.floor(sampleRate * duration);

// Create WAV buffer with minimal valid WAV header
const buffer = Buffer.alloc(44 + samples * 2);

// WAV header
buffer.write('RIFF');
buffer.writeUInt32LE(36 + samples * 2, 4);
buffer.write('WAVE', 8);
buffer.write('fmt ', 12);
buffer.writeUInt32LE(16, 16); // fmt chunk size
buffer.writeUInt16LE(1, 20); // PCM format
buffer.writeUInt16LE(1, 22); // Mono
buffer.writeUInt32LE(sampleRate, 24); // Sample rate
buffer.writeUInt32LE(sampleRate * 2, 28); // Byte rate
buffer.writeUInt16LE(2, 32); // Block align
buffer.writeUInt16LE(16, 34); // Bits per sample
buffer.write('data', 36);
buffer.writeUInt32LE(samples * 2, 40);

// Audio data (silence)
for (let i = 0; i < samples; i++) {
buffer.writeInt16LE(0, 44 + i * 2);
}

return buffer;
}

export async function POST(request: NextRequest) {
try {
const body = await request.json() as {
providerId: ASRProviderId;
apiKey?: string;
baseUrl?: string;
modelId?: string;
language?: string;
};

const { providerId, apiKey, baseUrl, modelId, language } = body;

if (!providerId) {
return apiError('MISSING_REQUIRED_FIELD', 400, 'Provider ID is required');
}

const provider = ASR_PROVIDERS[providerId];
if (!provider) {
return apiError('INVALID_REQUEST', 400, `Unknown ASR provider: ${providerId}`);
}

if (!apiKey) {
return apiError('MISSING_API_KEY', 400, 'API key is required');
}

try {
const config: ASRModelConfig = {
providerId,
apiKey,
baseUrl: baseUrl || provider.defaultBaseUrl,
modelId: modelId || provider.defaultModelId,
language: language || 'auto',
};

// Create test audio buffer
const testAudio = createTestAudioBuffer();

// Try to transcribe test audio
const result = await transcribeAudio(config, testAudio);

// For silence, we expect either an error or empty result, which is fine
// The important thing is that the API accepted our request with valid auth
return apiSuccess({
success: true,
message: 'ASR provider connection successful',
});
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
log.warn('ASR verification failed:', { providerId, error: message });

// Check if it's an auth error
if (message.includes('401') || message.includes('Unauthorized') || message.includes('API key')) {
return apiError('INVALID_REQUEST', 401, 'Authentication failed: Check your API key');
}

return apiError('UPSTREAM_ERROR', 400, `ASR verification failed: ${message}`);
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
log.error('Verify ASR provider error:', message);
return apiError('INTERNAL_ERROR', 500, `Internal error: ${message}`);
}
}
87 changes: 87 additions & 0 deletions app/api/verify-tts-provider/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/**
* Verify TTS Provider API
*
* Lightweight endpoint that validates TTS provider credentials.
*
* POST /api/verify-tts-provider
*
* Body:
* providerId: TTSProviderId
* apiKey: string (optional, server fallback)
* baseUrl: string (optional)
* modelId: string (optional)
*
* Response: { success: boolean, message: string }
*/

import { NextRequest } from 'next/server';
import { generateTTS } from '@/lib/audio/tts-providers';
import { TTS_PROVIDERS } from '@/lib/audio/constants';
import type { TTSProviderId, TTSModelConfig } from '@/lib/audio/types';
import { apiError, apiSuccess } from '@/lib/server/api-response';
import { createLogger } from '@/lib/logger';

const log = createLogger('VerifyTTSProvider');

export async function POST(request: NextRequest) {
try {
const body = await request.json() as {
providerId: TTSProviderId;
apiKey?: string;
baseUrl?: string;
modelId?: string;
};

const { providerId, apiKey, baseUrl, modelId } = body;

if (!providerId) {
return apiError('MISSING_REQUIRED_FIELD', 400, 'Provider ID is required');
}

const provider = TTS_PROVIDERS[providerId];
if (!provider) {
return apiError('INVALID_REQUEST', 400, `Unknown TTS provider: ${providerId}`);
}

if (!apiKey) {
return apiError('MISSING_API_KEY', 400, 'API key is required');
}

try {
const config: TTSModelConfig = {
providerId,
apiKey,
baseUrl: baseUrl || provider.defaultBaseUrl,
modelId: modelId || provider.defaultModelId,
voice: provider.voices[0]?.id || 'default',
speed: 1.0,
};

// Try to generate test TTS
const result = await generateTTS(config, 'test');

if (result.audio && result.audio.length > 0) {
return apiSuccess({
success: true,
message: 'TTS provider connection successful',
});
} else {
return apiError('UPSTREAM_ERROR', 500, 'TTS generation returned empty audio');
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
log.warn('TTS verification failed:', { providerId, error: message });

// Check if it's an auth error
if (message.includes('401') || message.includes('Unauthorized') || message.includes('API key')) {
return apiError('INVALID_REQUEST', 401, 'Authentication failed: Check your API key');
}

return apiError('UPSTREAM_ERROR', 400, `TTS verification failed: ${message}`);
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
log.error('Verify TTS provider error:', message);
return apiError('INTERNAL_ERROR', 500, `Internal error: ${message}`);
}
}
23 changes: 21 additions & 2 deletions app/generation-preview/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -722,15 +722,34 @@ function GenerationPreviewContent() {
const audioId = `tts_${action.id}`;
action.audioId = audioId;
try {
// For OpenAI Compatible TTS, prioritize custom models
let ttsModelId = ttsProviderConfig?.modelId;
if (
settings.ttsProviderId === 'openai-compatible-tts' &&
ttsProviderConfig?.customModels?.length
) {
// Use first custom model if available
ttsModelId = ttsProviderConfig.customModels[0].id;
}

// For OpenAI Compatible TTS, use custom voice if configured
let ttsVoice = settings.ttsVoice;
if (settings.ttsProviderId === 'openai-compatible-tts') {
const customVoice = ttsProviderConfig?.providerOptions?.customVoice as string | undefined;
if (customVoice?.trim()) {
ttsVoice = customVoice.trim();
}
}

const resp = await fetch('/api/generate/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
text: action.text,
audioId,
ttsProviderId: settings.ttsProviderId,
ttsModelId: ttsProviderConfig?.modelId,
ttsVoice: settings.ttsVoice,
ttsModelId,
ttsVoice,
ttsSpeed: settings.ttsSpeed,
ttsApiKey: ttsProviderConfig?.apiKey || undefined,
ttsBaseUrl: ttsProviderConfig?.baseUrl || undefined,
Expand Down
12 changes: 11 additions & 1 deletion components/agent/agent-bar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,16 @@ function AgentVoicePill({
const controller = new AbortController();
previewAbortRef.current = controller;
const providerConfig = ttsProvidersConfig[providerId];

// For OpenAI Compatible TTS, use custom voice if configured
let ttsVoice = voiceId;
if (providerId === 'openai-compatible-tts') {
const customVoice = providerConfig?.providerOptions?.customVoice as string | undefined;
if (customVoice?.trim()) {
ttsVoice = customVoice.trim();
}
}

const res = await fetch('/api/generate/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
Expand All @@ -110,7 +120,7 @@ function AgentVoicePill({
audioId: 'voice-preview',
ttsProviderId: providerId,
ttsModelId: modelId || providerConfig?.modelId,
ttsVoice: voiceId,
ttsVoice,
ttsSpeed: 1,
ttsApiKey: providerConfig?.apiKey,
ttsBaseUrl: providerConfig?.serverBaseUrl || providerConfig?.baseUrl,
Expand Down
1 change: 1 addition & 0 deletions components/generation/media-popover.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ const TABS: Array<{ id: TabId; icon: LucideIcon; label: string }> = [
function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => string): string {
const names: Record<TTSProviderId, string> = {
'openai-tts': t('settings.providerOpenAITTS'),
'openai-compatible-tts': t('settings.providerOpenAICompatibleTTS'),
'azure-tts': t('settings.providerAzureTTS'),
'glm-tts': t('settings.providerGLMTTS'),
'qwen-tts': t('settings.providerQwenTTS'),
Expand Down
Loading