diff --git a/app/src/App.tsx b/app/src/App.tsx index b6964db1..e77552aa 100644 --- a/app/src/App.tsx +++ b/app/src/App.tsx @@ -4,6 +4,8 @@ import voiceboxLogo from '@/assets/voicebox-logo.png'; import ShinyText from '@/components/ShinyText'; import { TitleBarDragRegion } from '@/components/TitleBarDragRegion'; import { useAutoUpdater } from '@/hooks/useAutoUpdater'; +import { apiClient } from '@/lib/api/client'; +import type { HealthResponse } from '@/lib/api/types'; import { TOP_SAFE_AREA_PADDING } from '@/lib/constants/ui'; import { cn } from '@/lib/utils/cn'; import { usePlatform } from '@/platform/PlatformContext'; @@ -11,6 +13,33 @@ import { router } from '@/router'; import { useLogStore } from '@/stores/logStore'; import { useServerStore } from '@/stores/serverStore'; +/** + * Validate that a health response has the expected Voicebox-specific shape. + * Prevents misidentifying an unrelated service on the same port. + */ +function isVoiceboxHealthResponse(health: HealthResponse): boolean { + return ( + health?.status === 'healthy' && + typeof health.model_loaded === 'boolean' && + typeof health.gpu_available === 'boolean' + ); +} + +/** + * Check whether a startup error indicates the port is occupied by an external + * server (which we should try to reuse via health-check polling) vs. a real + * failure (missing sidecar, signing issue, etc.) that should surface immediately. + */ +function isPortInUseError(error: unknown): boolean { + const msg = error instanceof Error ? error.message : String(error); + return ( + msg.includes('already in use') || + msg.includes('port') || + msg.includes('EADDRINUSE') || + msg.includes('address already in use') + ); +} + const LOADING_MESSAGES = [ 'Warming up tensors...', 'Calibrating synthesizer engine...', @@ -37,6 +66,7 @@ const LOADING_MESSAGES = [ function App() { const platform = usePlatform(); const [serverReady, setServerReady] = useState(false); + const [startupError, setStartupError] = useState(null); const [loadingMessageIndex, setLoadingMessageIndex] = useState(0); const serverStartingRef = useRef(false); @@ -122,6 +152,46 @@ function App() { serverStartingRef.current = false; // @ts-expect-error - adding property to window window.__voiceboxServerStartedByApp = false; + + // Only fall back to health-check polling when the error indicates the + // port is occupied (likely an external server). For real failures + // (missing sidecar, signing issues, etc.) surface the error immediately. + if (!isPortInUseError(error)) { + const msg = error instanceof Error ? error.message : String(error); + console.error('Real startup failure — not polling:', msg); + setStartupError(msg); + return; + } + + // Fall back to polling: the server may already be running externally + // (e.g. started via python/uvicorn/Docker). Poll the health endpoint + // until it responds with a valid Voicebox payload, then transition to + // the main UI. + console.log('Falling back to health-check polling...'); + const pollInterval = setInterval(async () => { + try { + const health = await apiClient.getHealth(); + if (!isVoiceboxHealthResponse(health)) { + console.log('Health response is not from a Voicebox server, keep polling...'); + return; + } + console.log('External Voicebox server detected via health check'); + clearInterval(pollInterval); + setServerReady(true); + } catch { + // Server not ready yet, keep polling + } + }, 2000); + + // Stop polling after 2 minutes and surface the failure + setTimeout(() => { + clearInterval(pollInterval); + serverStartingRef.current = false; + setStartupError( + 'Could not connect to a Voicebox server within 2 minutes. ' + + 'Please check that the server is running and try again.', + ); + }, 120_000); }); // Cleanup: stop server on actual unmount (not StrictMode remount) @@ -168,15 +238,34 @@ function App() { className="w-48 h-48 object-contain animate-fade-in-scale relative z-10" /> -
- -
+ {startupError ? ( +
+

Server startup failed

+

{startupError}

+ +
+ ) : ( +
+ +
+ )} ); diff --git a/app/src/components/Generation/EngineModelSelector.tsx b/app/src/components/Generation/EngineModelSelector.tsx index 4382d3f7..7f4f600b 100644 --- a/app/src/components/Generation/EngineModelSelector.tsx +++ b/app/src/components/Generation/EngineModelSelector.tsx @@ -1,3 +1,4 @@ +import { useEffect } from 'react'; import type { UseFormReturn } from 'react-hook-form'; import { FormControl } from '@/components/ui/form'; import { @@ -7,6 +8,7 @@ import { SelectTrigger, SelectValue, } from '@/components/ui/select'; +import type { VoiceProfileResponse } from '@/lib/api/types'; import { getLanguageOptionsForEngine } from '@/lib/constants/languages'; import type { GenerationFormValues } from '@/lib/hooks/useGenerationForm'; @@ -15,34 +17,57 @@ import type { GenerationFormValues } from '@/lib/hooks/useGenerationForm'; * Adding a new engine means adding one entry here. */ const ENGINE_OPTIONS = [ - { value: 'qwen:1.7B', label: 'Qwen3-TTS 1.7B' }, - { value: 'qwen:0.6B', label: 'Qwen3-TTS 0.6B' }, - { value: 'luxtts', label: 'LuxTTS' }, - { value: 'chatterbox', label: 'Chatterbox' }, - { value: 'chatterbox_turbo', label: 'Chatterbox Turbo' }, - { value: 'tada:1B', label: 'TADA 1B' }, - { value: 'tada:3B', label: 'TADA 3B Multilingual' }, + { value: 'qwen:1.7B', label: 'Qwen3-TTS 1.7B', engine: 'qwen' }, + { value: 'qwen:0.6B', label: 'Qwen3-TTS 0.6B', engine: 'qwen' }, + { value: 'qwen_custom_voice:1.7B', label: 'Qwen CustomVoice 1.7B', engine: 'qwen_custom_voice' }, + { value: 'qwen_custom_voice:0.6B', label: 'Qwen CustomVoice 0.6B', engine: 'qwen_custom_voice' }, + { value: 'luxtts', label: 'LuxTTS', engine: 'luxtts' }, + { value: 'chatterbox', label: 'Chatterbox', engine: 'chatterbox' }, + { value: 'chatterbox_turbo', label: 'Chatterbox Turbo', engine: 'chatterbox_turbo' }, + { value: 'tada:1B', label: 'TADA 1B', engine: 'tada' }, + { value: 'tada:3B', label: 'TADA 3B Multilingual', engine: 'tada' }, + { value: 'kokoro', label: 'Kokoro 82M', engine: 'kokoro' }, ] as const; const ENGINE_DESCRIPTIONS: Record = { qwen: 'Multi-language, two sizes', + qwen_custom_voice: '9 preset voices, instruct control', luxtts: 'Fast, English-focused', chatterbox: '23 languages, incl. Hebrew', chatterbox_turbo: 'English, [laugh] [cough] tags', tada: 'HumeAI, 700s+ coherent audio', + kokoro: '82M params, CPU realtime, 8 langs', }; /** Engines that only support English and should force language to 'en' on select. */ const ENGLISH_ONLY_ENGINES = new Set(['luxtts', 'chatterbox_turbo']); +/** Engines that support cloned (reference audio) profiles. */ +const CLONING_ENGINES = new Set(['qwen', 'luxtts', 'chatterbox', 'chatterbox_turbo', 'tada']); + +function getAvailableOptions(selectedProfile?: VoiceProfileResponse | null) { + if (!selectedProfile) return ENGINE_OPTIONS; + return ENGINE_OPTIONS.filter((opt) => isProfileCompatibleWithEngine(selectedProfile, opt.engine)); +} + function getSelectValue(engine: string, modelSize?: string): string { if (engine === 'qwen') return `qwen:${modelSize || '1.7B'}`; + if (engine === 'qwen_custom_voice') return `qwen_custom_voice:${modelSize || '1.7B'}`; if (engine === 'tada') return `tada:${modelSize || '1B'}`; return engine; } -function handleEngineChange(form: UseFormReturn, value: string) { - if (value.startsWith('qwen:')) { +export function applyEngineSelection(form: UseFormReturn, value: string) { + if (value.startsWith('qwen_custom_voice:')) { + const [, modelSize] = value.split(':'); + form.setValue('engine', 'qwen_custom_voice'); + form.setValue('modelSize', modelSize as '1.7B' | '0.6B'); + const currentLang = form.getValues('language'); + const available = getLanguageOptionsForEngine('qwen_custom_voice'); + if (!available.some((l) => l.value === currentLang)) { + form.setValue('language', available[0]?.value ?? 'en'); + } + } else if (value.startsWith('qwen:')) { const [, modelSize] = value.split(':'); form.setValue('engine', 'qwen'); form.setValue('modelSize', modelSize as '1.7B' | '0.6B'); @@ -85,12 +110,22 @@ function handleEngineChange(form: UseFormReturn, value: st interface EngineModelSelectorProps { form: UseFormReturn; compact?: boolean; + selectedProfile?: VoiceProfileResponse | null; } -export function EngineModelSelector({ form, compact }: EngineModelSelectorProps) { +export function EngineModelSelector({ form, compact, selectedProfile }: EngineModelSelectorProps) { const engine = form.watch('engine') || 'qwen'; const modelSize = form.watch('modelSize'); const selectValue = getSelectValue(engine, modelSize); + const availableOptions = getAvailableOptions(selectedProfile); + + const currentEngineAvailable = availableOptions.some((opt) => opt.value === selectValue); + + useEffect(() => { + if (!currentEngineAvailable && availableOptions.length > 0) { + applyEngineSelection(form, availableOptions[0].value); + } + }, [availableOptions, currentEngineAvailable, form]); const itemClass = compact ? 'text-xs text-muted-foreground' : undefined; const triggerClass = compact @@ -98,14 +133,14 @@ export function EngineModelSelector({ form, compact }: EngineModelSelectorProps) : undefined; return ( - applyEngineSelection(form, v)}> - {ENGINE_OPTIONS.map((opt) => ( + {availableOptions.map((opt) => ( {opt.label} @@ -119,3 +154,17 @@ export function EngineModelSelector({ form, compact }: EngineModelSelectorProps) export function getEngineDescription(engine: string): string { return ENGINE_DESCRIPTIONS[engine] ?? ''; } + +/** + * Check if a profile is compatible with the currently selected engine. + * Useful for UI hints. + */ +export function isProfileCompatibleWithEngine( + profile: VoiceProfileResponse, + engine: string, +): boolean { + const voiceType = profile.voice_type || 'cloned'; + if (voiceType === 'preset') return profile.preset_engine === engine; + if (voiceType === 'cloned') return CLONING_ENGINES.has(engine); + return true; // designed — future +} diff --git a/app/src/components/Generation/FloatingGenerateBox.tsx b/app/src/components/Generation/FloatingGenerateBox.tsx index 96e8f553..f1cd571d 100644 --- a/app/src/components/Generation/FloatingGenerateBox.tsx +++ b/app/src/components/Generation/FloatingGenerateBox.tsx @@ -36,6 +36,7 @@ export function FloatingGenerateBox({ }: FloatingGenerateBoxProps) { const selectedProfileId = useUIStore((state) => state.selectedProfileId); const setSelectedProfileId = useUIStore((state) => state.setSelectedProfileId); + const setSelectedEngine = useUIStore((state) => state.setSelectedEngine); const { data: selectedProfile } = useProfile(selectedProfileId || ''); const { data: profiles } = useProfiles(); const [isExpanded, setIsExpanded] = useState(false); @@ -67,7 +68,12 @@ export function FloatingGenerateBox({ } }, getEffectsChain: () => { - if (!selectedPresetId || !effectPresets) return undefined; + if (!selectedPresetId) return undefined; + // Profile's own effects chain (no matching preset) + if (selectedPresetId === '_profile') { + return selectedProfile?.effects_chain ?? undefined; + } + if (!effectPresets) return undefined; const preset = effectPresets.find((p) => p.id === selectedPresetId); return preset?.effects_chain; }, @@ -110,12 +116,56 @@ export function FloatingGenerateBox({ } }, [selectedProfileId, profiles, setSelectedProfileId]); - // Sync generation form language with selected profile's language + // Sync engine selection to global store so ProfileList can filter + const watchedEngine = form.watch('engine'); + useEffect(() => { + if (watchedEngine) { + setSelectedEngine(watchedEngine); + } + }, [watchedEngine, setSelectedEngine]); + + // Sync generation form language, engine, and effects with selected profile useEffect(() => { if (selectedProfile?.language) { form.setValue('language', selectedProfile.language as LanguageCode); } - }, [selectedProfile, form]); + // Auto-switch engine if profile has a default + if (selectedProfile?.default_engine) { + form.setValue( + 'engine', + selectedProfile.default_engine as + | 'qwen' + | 'luxtts' + | 'chatterbox' + | 'chatterbox_turbo' + | 'tada' + | 'kokoro', + ); + } + // Pre-fill effects from profile defaults + if ( + selectedProfile?.effects_chain && + selectedProfile.effects_chain.length > 0 && + effectPresets + ) { + // Try to match against a known preset + const profileChainJson = JSON.stringify(selectedProfile.effects_chain); + const matchingPreset = effectPresets.find( + (p) => JSON.stringify(p.effects_chain) === profileChainJson, + ); + if (matchingPreset) { + setSelectedPresetId(matchingPreset.id); + } else { + // No matching preset — use special value to pass profile chain directly + setSelectedPresetId('_profile'); + } + } else if ( + selectedProfile && + (!selectedProfile.effects_chain || selectedProfile.effects_chain.length === 0) + ) { + setSelectedPresetId(null); + } + }, [selectedProfile, effectPresets, form]); // Auto-resize textarea based on content (only when expanded) useEffect(() => { @@ -375,6 +425,12 @@ export function FloatingGenerateBox({ No effects + {selectedProfile?.effects_chain && + selectedProfile.effects_chain.length > 0 && ( + + Profile default + + )} {effectPresets?.map((preset) => ( {preset.name} diff --git a/app/src/components/Generation/GenerationForm.tsx b/app/src/components/Generation/GenerationForm.tsx index 225e8dfa..ef3ff2c0 100644 --- a/app/src/components/Generation/GenerationForm.tsx +++ b/app/src/components/Generation/GenerationForm.tsx @@ -1,3 +1,4 @@ +import { useEffect } from 'react'; import { Loader2, Mic } from 'lucide-react'; import { Button } from '@/components/ui/button'; import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'; @@ -19,19 +20,41 @@ import { SelectValue, } from '@/components/ui/select'; import { Textarea } from '@/components/ui/textarea'; -import { getLanguageOptionsForEngine } from '@/lib/constants/languages'; +import { getLanguageOptionsForEngine, type LanguageCode } from '@/lib/constants/languages'; import { useGenerationForm } from '@/lib/hooks/useGenerationForm'; import { useProfile } from '@/lib/hooks/useProfiles'; import { useUIStore } from '@/stores/uiStore'; -import { EngineModelSelector, getEngineDescription } from './EngineModelSelector'; +import { EngineModelSelector, applyEngineSelection, getEngineDescription } from './EngineModelSelector'; import { ParalinguisticInput } from './ParalinguisticInput'; +function getEngineSelectValue(engine: string): string { + if (engine === 'qwen') return 'qwen:1.7B'; + if (engine === 'qwen_custom_voice') return 'qwen_custom_voice:1.7B'; + if (engine === 'tada') return 'tada:1B'; + return engine; +} + export function GenerationForm() { const selectedProfileId = useUIStore((state) => state.selectedProfileId); const { data: selectedProfile } = useProfile(selectedProfileId || ''); const { form, handleSubmit, isPending } = useGenerationForm(); + useEffect(() => { + if (!selectedProfile) { + return; + } + + if (selectedProfile.language) { + form.setValue('language', selectedProfile.language as LanguageCode); + } + + const preferredEngine = selectedProfile.default_engine || selectedProfile.preset_engine; + if (preferredEngine) { + applyEngineSelection(form, getEngineSelectValue(preferredEngine)); + } + }, [form, selectedProfile]); + async function onSubmit(data: Parameters[0]) { await handleSubmit(data, selectedProfileId); } @@ -91,7 +114,7 @@ export function GenerationForm() { )} /> - {form.watch('engine') === 'qwen' && ( + {(form.watch('engine') === 'qwen' || form.watch('engine') === 'qwen_custom_voice') && ( Model - + {getEngineDescription(form.watch('engine') || 'qwen')} diff --git a/app/src/components/History/HistoryTable.tsx b/app/src/components/History/HistoryTable.tsx index e88c7701..914c7fcb 100644 --- a/app/src/components/History/HistoryTable.tsx +++ b/app/src/components/History/HistoryTable.tsx @@ -569,15 +569,27 @@ export function HistoryTable() { )} {isFailed ? ( - + <> + + + ) : ( <> diff --git a/app/src/components/ServerSettings/ModelManagement.tsx b/app/src/components/ServerSettings/ModelManagement.tsx index c415306d..d3fe24fa 100644 --- a/app/src/components/ServerSettings/ModelManagement.tsx +++ b/app/src/components/ServerSettings/ModelManagement.tsx @@ -66,6 +66,12 @@ const MODEL_DESCRIPTIONS: Record = { 'HumeAI TADA 1B — English speech-language model built on Llama 3.2 1B. Generates 700s+ of coherent audio with synchronized text-acoustic alignment.', 'tada-3b-ml': 'HumeAI TADA 3B Multilingual — built on Llama 3.2 3B. Supports 10 languages with high-fidelity voice cloning via text-acoustic dual alignment.', + kokoro: + 'Kokoro 82M by hexgrad. Tiny 82M-parameter TTS that runs at CPU realtime. Supports 8 languages with pre-built voice styles. Apache 2.0 licensed.', + 'qwen-custom-voice-1.7B': + 'Qwen3-TTS CustomVoice 1.7B by Alibaba. 9 premium preset voices with instruct-based style control for tone, emotion, and prosody. Supports 10 languages.', + 'qwen-custom-voice-0.6B': + 'Qwen3-TTS CustomVoice 0.6B by Alibaba. Lightweight version with the same 9 preset voices and instruct control. Faster inference for lower-end hardware.', 'whisper-base': 'Smallest Whisper model (74M parameters). Fast transcription with moderate accuracy.', 'whisper-small': @@ -394,9 +400,11 @@ export function ModelManagement() { modelStatus?.models.filter( (m) => m.model_name.startsWith('qwen-tts') || + m.model_name.startsWith('qwen-custom-voice') || m.model_name.startsWith('luxtts') || m.model_name.startsWith('chatterbox') || - m.model_name.startsWith('tada'), + m.model_name.startsWith('tada') || + m.model_name.startsWith('kokoro'), ) ?? []; const whisperModels = modelStatus?.models.filter((m) => m.model_name.startsWith('whisper')) ?? []; diff --git a/app/src/components/VoiceProfiles/ProfileCard.tsx b/app/src/components/VoiceProfiles/ProfileCard.tsx index 3675b765..e634c38c 100644 --- a/app/src/components/VoiceProfiles/ProfileCard.tsx +++ b/app/src/components/VoiceProfiles/ProfileCard.tsx @@ -17,6 +17,12 @@ import { useDeleteProfile, useExportProfile } from '@/lib/hooks/useProfiles'; import { cn } from '@/lib/utils/cn'; import { useUIStore } from '@/stores/uiStore'; +/** Human-readable display names for preset engine badges. */ +const ENGINE_DISPLAY_NAMES: Record = { + kokoro: 'Kokoro', + qwen_custom_voice: 'CustomVoice', +}; + interface ProfileCardProps { profile: VoiceProfileResponse; } @@ -97,6 +103,16 @@ export function ProfileCard({ profile }: ProfileCardProps) { {profile.language} + {profile.voice_type === 'preset' && ( + + {ENGINE_DISPLAY_NAMES[profile.preset_engine ?? ''] ?? profile.preset_engine} + + )} + {profile.voice_type === 'designed' && ( + + designed + + )} {profile.effects_chain && profile.effects_chain.length > 0 && ( )} diff --git a/app/src/components/VoiceProfiles/ProfileForm.tsx b/app/src/components/VoiceProfiles/ProfileForm.tsx index 13edf6f4..50b8cb57 100644 --- a/app/src/components/VoiceProfiles/ProfileForm.tsx +++ b/app/src/components/VoiceProfiles/ProfileForm.tsx @@ -1,9 +1,11 @@ import { zodResolver } from '@hookform/resolvers/zod'; -import { Edit2, Mic, Monitor, Upload, X } from 'lucide-react'; +import { useQuery } from '@tanstack/react-query'; +import { Edit2, Mic, Monitor, Music, Upload, X } from 'lucide-react'; import { useEffect, useRef, useState } from 'react'; import { useForm } from 'react-hook-form'; import * as z from 'zod'; import { EffectsChainEditor } from '@/components/Effects/EffectsChainEditor'; +import { Badge } from '@/components/ui/badge'; import { Button } from '@/components/ui/button'; import { Dialog, @@ -15,6 +17,7 @@ import { import { Form, FormControl, + FormDescription, FormField, FormItem, FormLabel, @@ -32,7 +35,7 @@ import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'; import { Textarea } from '@/components/ui/textarea'; import { useToast } from '@/components/ui/use-toast'; import { apiClient } from '@/lib/api/client'; -import type { EffectConfig } from '@/lib/api/types'; +import type { EffectConfig, PresetVoice, VoiceType } from '@/lib/api/types'; import { LANGUAGE_CODES, LANGUAGE_OPTIONS, type LanguageCode } from '@/lib/constants/languages'; import { useAudioPlayer } from '@/lib/hooks/useAudioPlayer'; import { useAudioRecording } from '@/lib/hooks/useAudioRecording'; @@ -40,6 +43,7 @@ import { useAddSample, useCreateProfile, useDeleteAvatar, + useDeleteProfile, useProfile, useUpdateProfile, useUploadAvatar, @@ -56,6 +60,16 @@ import { AudioSampleUpload } from './AudioSampleUpload'; import { SampleList } from './SampleList'; const MAX_AUDIO_DURATION_SECONDS = 30; +const PRESET_ONLY_ENGINES = new Set(['kokoro', 'qwen_custom_voice']); +const DEFAULT_ENGINE_OPTIONS = [ + { value: 'qwen', label: 'Qwen3-TTS' }, + { value: 'qwen_custom_voice', label: 'Qwen CustomVoice' }, + { value: 'luxtts', label: 'LuxTTS' }, + { value: 'chatterbox', label: 'Chatterbox' }, + { value: 'chatterbox_turbo', label: 'Chatterbox Turbo' }, + { value: 'tada', label: 'TADA' }, + { value: 'kokoro', label: 'Kokoro 82M' }, +] as const; const baseProfileSchema = z.object({ name: z.string().min(1, 'Name is required').max(100), @@ -116,20 +130,25 @@ export function ProfileForm() { const createProfile = useCreateProfile(); const updateProfile = useUpdateProfile(); const addSample = useAddSample(); + const deleteProfile = useDeleteProfile(); const uploadAvatar = useUploadAvatar(); const deleteAvatar = useDeleteAvatar(); const transcribe = useTranscription(); const { toast } = useToast(); + const [voiceSource, setVoiceSource] = useState<'clone' | 'builtin'>('clone'); const [sampleMode, setSampleMode] = useState<'upload' | 'record' | 'system'>('record'); const [audioDuration, setAudioDuration] = useState(null); const [isValidatingAudio, setIsValidatingAudio] = useState(false); const [avatarPreview, setAvatarPreview] = useState(null); + const [selectedPresetEngine, setSelectedPresetEngine] = useState('kokoro'); + const [selectedPresetVoiceId, setSelectedPresetVoiceId] = useState(''); const avatarInputRef = useRef(null); const { isPlaying, playPause, cleanup: cleanupAudio } = useAudioPlayer(); const isCreating = !editingProfileId; const serverUrl = useServerStore((state) => state.serverUrl); const [profileEffectsChain, setProfileEffectsChain] = useState([]); const [effectsDirty, setEffectsDirty] = useState(false); + const [defaultEngine, setDefaultEngine] = useState(''); const form = useForm({ resolver: zodResolver(profileSchema), @@ -239,6 +258,26 @@ export function ProfileForm() { }, }); + // Fetch available preset voices for the selected engine + const presetEngineToQuery = isCreating + ? selectedPresetEngine + : (editingProfile?.preset_engine ?? ''); + const { data: presetVoicesData } = useQuery({ + queryKey: ['presetVoices', presetEngineToQuery], + queryFn: () => apiClient.listPresetVoices(presetEngineToQuery), + enabled: + !!presetEngineToQuery && + ((voiceSource === 'builtin' && isCreating) || + (!isCreating && editingProfile?.voice_type === 'preset')), + }); + const presetVoices = presetVoicesData?.voices ?? []; + const isSampleBasedProfile = isCreating + ? voiceSource === 'clone' + : editingProfile?.voice_type !== 'preset'; + const availableDefaultEngines = DEFAULT_ENGINE_OPTIONS.filter( + (option) => !isSampleBasedProfile || !PRESET_ONLY_ENGINES.has(option.value), + ); + // Show recording errors useEffect(() => { if (recordingError) { @@ -287,6 +326,7 @@ export function ProfileForm() { }); setProfileEffectsChain(editingProfile.effects_chain ?? []); setEffectsDirty(false); + setDefaultEngine(editingProfile.default_engine ?? ''); } else if (profileFormDraft && open) { // Restore from draft when opening in create mode form.reset({ @@ -326,6 +366,24 @@ export function ProfileForm() { } }, [editingProfile, profileFormDraft, open, form]); + useEffect(() => { + if ( + defaultEngine && + !availableDefaultEngines.some((option) => option.value === defaultEngine) + ) { + setDefaultEngine(''); + } + }, [availableDefaultEngines, defaultEngine]); + + useEffect(() => { + if (!selectedPresetVoiceId) { + return; + } + + if (!presetVoices.some((voice: PresetVoice) => voice.voice_id === selectedPresetVoiceId)) { + setSelectedPresetVoiceId(''); + } + }, [presetVoices, selectedPresetVoiceId]); async function handleTranscribe() { const file = form.getValues('sampleFile'); if (!file) { @@ -415,13 +473,14 @@ export function ProfileForm() { async function onSubmit(data: ProfileFormValues) { try { if (editingProfileId) { - // Editing: just update profile + // Editing: update profile await updateProfile.mutateAsync({ profileId: editingProfileId, data: { name: data.name, description: data.description, language: data.language, + default_engine: defaultEngine || undefined, }, }); @@ -464,8 +523,50 @@ export function ProfileForm() { title: 'Voice updated', description: `"${data.name}" has been updated successfully.`, }); + } else if (voiceSource === 'builtin') { + // Creating preset profile from built-in voice + if (!selectedPresetVoiceId) { + toast({ + title: 'No voice selected', + description: 'Please select a built-in voice.', + variant: 'destructive', + }); + return; + } + + const profile = await createProfile.mutateAsync({ + name: data.name, + description: data.description, + language: data.language, + voice_type: 'preset' as VoiceType, + preset_engine: selectedPresetEngine, + preset_voice_id: selectedPresetVoiceId, + default_engine: selectedPresetEngine, + }); + + // Handle avatar upload if provided + if (data.avatarFile) { + try { + await uploadAvatar.mutateAsync({ + profileId: profile.id, + file: data.avatarFile, + }); + } catch (avatarError) { + toast({ + title: 'Avatar upload failed', + description: + avatarError instanceof Error ? avatarError.message : 'Failed to upload avatar', + variant: 'destructive', + }); + } + } + + toast({ + title: 'Profile created', + description: `"${data.name}" has been created with a built-in voice.`, + }); } else { - // Creating: require sample file and reference text + // Creating cloned profile: require sample file and reference text const sampleFile = form.getValues('sampleFile'); const referenceText = form.getValues('referenceText'); @@ -528,6 +629,7 @@ export function ProfileForm() { name: data.name, description: data.description, language: data.language, + default_engine: defaultEngine || undefined, }); // Convert non-WAV uploads to WAV so the backend can always use soundfile. @@ -572,12 +674,32 @@ export function ProfileForm() { description: `"${data.name}" has been created with a sample.`, }); } catch (sampleError) { - // Profile was created but sample failed - still show error + let rollbackSucceeded = false; + try { + await deleteProfile.mutateAsync(profile.id); + rollbackSucceeded = true; + } catch (rollbackError) { + toast({ + title: 'Rollback failed', + description: + rollbackError instanceof Error + ? rollbackError.message + : 'Created profile could not be removed after sample upload failure.', + variant: 'destructive', + }); + } + toast({ title: 'Failed to add sample', - description: `Profile "${data.name}" was created, but failed to add sample: ${sampleError instanceof Error ? sampleError.message : 'Unknown error'}`, + description: + sampleError instanceof Error + ? `${sampleError.message}${rollbackSucceeded ? ' The profile was rolled back.' : ''}` + : rollbackSucceeded + ? 'Failed to add sample. The profile was rolled back.' + : 'Failed to add sample.', variant: 'destructive', }); + return; } } @@ -642,16 +764,16 @@ export function ProfileForm() { return ( - -
+ +
- {editingProfileId ? 'Edit Voice' : 'Clone voice'} + {editingProfileId ? 'Edit Voice' : 'Create Voice'} {editingProfileId ? 'Update your voice profile details and manage samples.' - : 'Create a new voice profile with an audio sample to clone the voice.'} + : 'Create a new voice profile from an audio sample or a built-in voice.'} {isCreating && profileFormDraft && (
@@ -682,143 +804,276 @@ export function ProfileForm() {
-
+
{/* Left column: Sample management */} -
+
{isCreating ? ( <> - { - const newMode = v as 'upload' | 'record' | 'system'; - // Cancel any active recordings when switching modes - if (isRecording && newMode !== 'record') { - cancelRecording(); - } - if (isSystemRecording && newMode !== 'system') { - cancelSystemRecording(); - } - setSampleMode(newMode); - }} - > - - - - Upload - - - - Record - - {platform.metadata.isTauri && isSystemAudioSupported && ( - - - System Audio - - )} - - - - ( - MAX_AUDIO_DURATION_SECONDS - } - fieldName={name} + {/* Voice source selector */} +
+
+ + +
+
+ + {voiceSource === 'builtin' ? ( +
+ + Choose a pre-built voice. These don't require an audio sample. + + + {/* Engine selector */} + + Engine + + + + {/* Voice picker */} + + Voice +
+ {presetVoices.map((voice: PresetVoice) => ( + + ))} +
+
+
+ ) : ( + <> + { + const newMode = v as 'upload' | 'record' | 'system'; + // Cancel any active recordings when switching modes + if (isRecording && newMode !== 'record') { + cancelRecording(); + } + if (isSystemRecording && newMode !== 'system') { + cancelSystemRecording(); + } + setSampleMode(newMode); + }} + > + + + + Upload + + + + Record + + {platform.metadata.isTauri && isSystemAudioSupported && ( + + + System Audio + + )} + + + + ( + MAX_AUDIO_DURATION_SECONDS + } + fieldName={name} + /> + )} /> + + + + ( + + )} + /> + + + {platform.metadata.isTauri && isSystemAudioSupported && ( + + ( + + )} + /> + )} - /> -
+
- ( - + name="referenceText" + render={({ field }) => ( + + Reference Text + +