diff --git a/app/src/App.tsx b/app/src/App.tsx
index b6964db1..e77552aa 100644
--- a/app/src/App.tsx
+++ b/app/src/App.tsx
@@ -4,6 +4,8 @@ import voiceboxLogo from '@/assets/voicebox-logo.png';
 import ShinyText from '@/components/ShinyText';
 import { TitleBarDragRegion } from '@/components/TitleBarDragRegion';
 import { useAutoUpdater } from '@/hooks/useAutoUpdater';
+import { apiClient } from '@/lib/api/client';
+import type { HealthResponse } from '@/lib/api/types';
 import { TOP_SAFE_AREA_PADDING } from '@/lib/constants/ui';
 import { cn } from '@/lib/utils/cn';
 import { usePlatform } from '@/platform/PlatformContext';
@@ -11,6 +13,33 @@ import { router } from '@/router';
 import { useLogStore } from '@/stores/logStore';
 import { useServerStore } from '@/stores/serverStore';
 
+/**
+ * Validate that a health response has the expected Voicebox-specific shape.
+ * Prevents misidentifying an unrelated service on the same port.
+ */
+function isVoiceboxHealthResponse(health: HealthResponse): boolean {
+  return (
+    health?.status === 'healthy' &&
+    typeof health.model_loaded === 'boolean' &&
+    typeof health.gpu_available === 'boolean'
+  );
+}
+
+/**
+ * Check whether a startup error indicates the port is occupied by an external
+ * server (which we should try to reuse via health-check polling) vs. a real
+ * failure (missing sidecar, signing issue, etc.) that should surface immediately.
+ */
+function isPortInUseError(error: unknown): boolean {
+  const msg = error instanceof Error ? error.message : String(error);
+  return (
+    msg.includes('already in use') ||
+    msg.includes('port') ||
+    msg.includes('EADDRINUSE') ||
+    msg.includes('address already in use')
+  );
+}
+
 const LOADING_MESSAGES = [
   'Warming up tensors...',
   'Calibrating synthesizer engine...',
@@ -37,6 +66,7 @@ const LOADING_MESSAGES = [
 function App() {
   const platform = usePlatform();
   const [serverReady, setServerReady] = useState(false);
+  const [startupError, setStartupError] = useState<string | null>(null);
   const [loadingMessageIndex, setLoadingMessageIndex] = useState(0);
   const serverStartingRef = useRef(false);
 
@@ -122,6 +152,46 @@ function App() {
         serverStartingRef.current = false;
         // @ts-expect-error - adding property to window
         window.__voiceboxServerStartedByApp = false;
+
+        // Only fall back to health-check polling when the error indicates the
+        // port is occupied (likely an external server). For real failures
+        // (missing sidecar, signing issues, etc.) surface the error immediately.
+        if (!isPortInUseError(error)) {
+          const msg = error instanceof Error ? error.message : String(error);
+          console.error('Real startup failure — not polling:', msg);
+          setStartupError(msg);
+          return;
+        }
+
+        // Fall back to polling: the server may already be running externally
+        // (e.g. started via python/uvicorn/Docker). Poll the health endpoint
+        // until it responds with a valid Voicebox payload, then transition to
+        // the main UI.
+        console.log('Falling back to health-check polling...');
+        const pollInterval = setInterval(async () => {
+          try {
+            const health = await apiClient.getHealth();
+            if (!isVoiceboxHealthResponse(health)) {
+              console.log('Health response is not from a Voicebox server, keep polling...');
+              return;
+            }
+            console.log('External Voicebox server detected via health check');
+            clearInterval(pollInterval);
+            setServerReady(true);
+          } catch {
+            // Server not ready yet, keep polling
+          }
+        }, 2000);
+
+        // Stop polling after 2 minutes and surface the failure
+        setTimeout(() => {
+          clearInterval(pollInterval);
+          serverStartingRef.current = false;
+          setStartupError(
+            'Could not connect to a Voicebox server within 2 minutes. ' +
+              'Please check that the server is running and try again.',
+          );
+        }, 120_000);
       });
 
     // Cleanup: stop server on actual unmount (not StrictMode remount)
@@ -168,15 +238,34 @@ function App() {
               className="w-48 h-48 object-contain animate-fade-in-scale relative z-10"
             />
           </div>
-          <div className="animate-fade-in-delayed">
-            <ShinyText
-              text={LOADING_MESSAGES[loadingMessageIndex]}
-              className="text-lg font-medium text-muted-foreground"
-              speed={2}
-              color="hsl(var(--muted-foreground))"
-              shineColor="hsl(var(--foreground))"
-            />
-          </div>
+          {startupError ? (
+            <div className="animate-fade-in-delayed max-w-md mx-auto space-y-3">
+              <p className="text-lg font-medium text-destructive">Server startup failed</p>
+              <p className="text-sm text-muted-foreground">{startupError}</p>
+              <button
+                type="button"
+                className="mt-2 px-4 py-2 text-sm rounded-md bg-primary text-primary-foreground hover:bg-primary/90 transition-colors"
+                onClick={() => {
+                  setStartupError(null);
+                  serverStartingRef.current = false;
+                  // Trigger a re-mount of the effect by toggling state
+                  window.location.reload();
+                }}
+              >
+                Retry
+              </button>
+            </div>
+          ) : (
+            <div className="animate-fade-in-delayed">
+              <ShinyText
+                text={LOADING_MESSAGES[loadingMessageIndex]}
+                className="text-lg font-medium text-muted-foreground"
+                speed={2}
+                color="hsl(var(--muted-foreground))"
+                shineColor="hsl(var(--foreground))"
+              />
+            </div>
+          )}
         </div>
       </div>
     );
diff --git a/app/src/components/Generation/EngineModelSelector.tsx b/app/src/components/Generation/EngineModelSelector.tsx
index 4382d3f7..7f4f600b 100644
--- a/app/src/components/Generation/EngineModelSelector.tsx
+++ b/app/src/components/Generation/EngineModelSelector.tsx
@@ -1,3 +1,4 @@
+import { useEffect } from 'react';
 import type { UseFormReturn } from 'react-hook-form';
 import { FormControl } from '@/components/ui/form';
 import {
@@ -7,6 +8,7 @@ import {
   SelectTrigger,
   SelectValue,
 } from '@/components/ui/select';
+import type { VoiceProfileResponse } from '@/lib/api/types';
 import { getLanguageOptionsForEngine } from '@/lib/constants/languages';
 import type { GenerationFormValues } from '@/lib/hooks/useGenerationForm';
 
@@ -15,34 +17,57 @@ import type { GenerationFormValues } from '@/lib/hooks/useGenerationForm';
  * Adding a new engine means adding one entry here.
  */
 const ENGINE_OPTIONS = [
-  { value: 'qwen:1.7B', label: 'Qwen3-TTS 1.7B' },
-  { value: 'qwen:0.6B', label: 'Qwen3-TTS 0.6B' },
-  { value: 'luxtts', label: 'LuxTTS' },
-  { value: 'chatterbox', label: 'Chatterbox' },
-  { value: 'chatterbox_turbo', label: 'Chatterbox Turbo' },
-  { value: 'tada:1B', label: 'TADA 1B' },
-  { value: 'tada:3B', label: 'TADA 3B Multilingual' },
+  { value: 'qwen:1.7B', label: 'Qwen3-TTS 1.7B', engine: 'qwen' },
+  { value: 'qwen:0.6B', label: 'Qwen3-TTS 0.6B', engine: 'qwen' },
+  { value: 'qwen_custom_voice:1.7B', label: 'Qwen CustomVoice 1.7B', engine: 'qwen_custom_voice' },
+  { value: 'qwen_custom_voice:0.6B', label: 'Qwen CustomVoice 0.6B', engine: 'qwen_custom_voice' },
+  { value: 'luxtts', label: 'LuxTTS', engine: 'luxtts' },
+  { value: 'chatterbox', label: 'Chatterbox', engine: 'chatterbox' },
+  { value: 'chatterbox_turbo', label: 'Chatterbox Turbo', engine: 'chatterbox_turbo' },
+  { value: 'tada:1B', label: 'TADA 1B', engine: 'tada' },
+  { value: 'tada:3B', label: 'TADA 3B Multilingual', engine: 'tada' },
+  { value: 'kokoro', label: 'Kokoro 82M', engine: 'kokoro' },
 ] as const;
 
 const ENGINE_DESCRIPTIONS: Record<string, string> = {
   qwen: 'Multi-language, two sizes',
+  qwen_custom_voice: '9 preset voices, instruct control',
   luxtts: 'Fast, English-focused',
   chatterbox: '23 languages, incl. Hebrew',
   chatterbox_turbo: 'English, [laugh] [cough] tags',
   tada: 'HumeAI, 700s+ coherent audio',
+  kokoro: '82M params, CPU realtime, 8 langs',
 };
 
 /** Engines that only support English and should force language to 'en' on select. */
 const ENGLISH_ONLY_ENGINES = new Set(['luxtts', 'chatterbox_turbo']);
 
+/** Engines that support cloned (reference audio) profiles. */
+const CLONING_ENGINES = new Set(['qwen', 'luxtts', 'chatterbox', 'chatterbox_turbo', 'tada']);
+
+function getAvailableOptions(selectedProfile?: VoiceProfileResponse | null) {
+  if (!selectedProfile) return ENGINE_OPTIONS;
+  return ENGINE_OPTIONS.filter((opt) => isProfileCompatibleWithEngine(selectedProfile, opt.engine));
+}
+
 function getSelectValue(engine: string, modelSize?: string): string {
   if (engine === 'qwen') return `qwen:${modelSize || '1.7B'}`;
+  if (engine === 'qwen_custom_voice') return `qwen_custom_voice:${modelSize || '1.7B'}`;
   if (engine === 'tada') return `tada:${modelSize || '1B'}`;
   return engine;
 }
 
-function handleEngineChange(form: UseFormReturn<GenerationFormValues>, value: string) {
-  if (value.startsWith('qwen:')) {
+export function applyEngineSelection(form: UseFormReturn<GenerationFormValues>, value: string) {
+  if (value.startsWith('qwen_custom_voice:')) {
+    const [, modelSize] = value.split(':');
+    form.setValue('engine', 'qwen_custom_voice');
+    form.setValue('modelSize', modelSize as '1.7B' | '0.6B');
+    const currentLang = form.getValues('language');
+    const available = getLanguageOptionsForEngine('qwen_custom_voice');
+    if (!available.some((l) => l.value === currentLang)) {
+      form.setValue('language', available[0]?.value ?? 'en');
+    }
+  } else if (value.startsWith('qwen:')) {
     const [, modelSize] = value.split(':');
     form.setValue('engine', 'qwen');
     form.setValue('modelSize', modelSize as '1.7B' | '0.6B');
@@ -85,12 +110,22 @@ function handleEngineChange(form: UseFormReturn<GenerationFormValues>, value: st
 interface EngineModelSelectorProps {
   form: UseFormReturn<GenerationFormValues>;
   compact?: boolean;
+  selectedProfile?: VoiceProfileResponse | null;
 }
 
-export function EngineModelSelector({ form, compact }: EngineModelSelectorProps) {
+export function EngineModelSelector({ form, compact, selectedProfile }: EngineModelSelectorProps) {
   const engine = form.watch('engine') || 'qwen';
   const modelSize = form.watch('modelSize');
   const selectValue = getSelectValue(engine, modelSize);
+  const availableOptions = getAvailableOptions(selectedProfile);
+
+  const currentEngineAvailable = availableOptions.some((opt) => opt.value === selectValue);
+
+  useEffect(() => {
+    if (!currentEngineAvailable && availableOptions.length > 0) {
+      applyEngineSelection(form, availableOptions[0].value);
+    }
+  }, [availableOptions, currentEngineAvailable, form]);
 
   const itemClass = compact ? 'text-xs text-muted-foreground' : undefined;
   const triggerClass = compact
@@ -98,14 +133,14 @@ export function EngineModelSelector({ form, compact }: EngineModelSelectorProps)
     : undefined;
 
   return (
-    <Select value={selectValue} onValueChange={(v) => handleEngineChange(form, v)}>
+    <Select value={selectValue} onValueChange={(v) => applyEngineSelection(form, v)}>
       <FormControl>
         <SelectTrigger className={triggerClass}>
           <SelectValue />
         </SelectTrigger>
       </FormControl>
       <SelectContent>
-        {ENGINE_OPTIONS.map((opt) => (
+        {availableOptions.map((opt) => (
           <SelectItem key={opt.value} value={opt.value} className={itemClass}>
             {opt.label}
           </SelectItem>
@@ -119,3 +154,17 @@ export function EngineModelSelector({ form, compact }: EngineModelSelectorProps)
 export function getEngineDescription(engine: string): string {
   return ENGINE_DESCRIPTIONS[engine] ?? '';
 }
+
+/**
+ * Check if a profile is compatible with the currently selected engine.
+ * Useful for UI hints.
+ */
+export function isProfileCompatibleWithEngine(
+  profile: VoiceProfileResponse,
+  engine: string,
+): boolean {
+  const voiceType = profile.voice_type || 'cloned';
+  if (voiceType === 'preset') return profile.preset_engine === engine;
+  if (voiceType === 'cloned') return CLONING_ENGINES.has(engine);
+  return true; // designed — future
+}
diff --git a/app/src/components/Generation/FloatingGenerateBox.tsx b/app/src/components/Generation/FloatingGenerateBox.tsx
index 96e8f553..f1cd571d 100644
--- a/app/src/components/Generation/FloatingGenerateBox.tsx
+++ b/app/src/components/Generation/FloatingGenerateBox.tsx
@@ -36,6 +36,7 @@ export function FloatingGenerateBox({
 }: FloatingGenerateBoxProps) {
   const selectedProfileId = useUIStore((state) => state.selectedProfileId);
   const setSelectedProfileId = useUIStore((state) => state.setSelectedProfileId);
+  const setSelectedEngine = useUIStore((state) => state.setSelectedEngine);
   const { data: selectedProfile } = useProfile(selectedProfileId || '');
   const { data: profiles } = useProfiles();
   const [isExpanded, setIsExpanded] = useState(false);
@@ -67,7 +68,12 @@ export function FloatingGenerateBox({
       }
     },
     getEffectsChain: () => {
-      if (!selectedPresetId || !effectPresets) return undefined;
+      if (!selectedPresetId) return undefined;
+      // Profile's own effects chain (no matching preset)
+      if (selectedPresetId === '_profile') {
+        return selectedProfile?.effects_chain ?? undefined;
+      }
+      if (!effectPresets) return undefined;
       const preset = effectPresets.find((p) => p.id === selectedPresetId);
       return preset?.effects_chain;
     },
@@ -110,12 +116,56 @@ export function FloatingGenerateBox({
     }
   }, [selectedProfileId, profiles, setSelectedProfileId]);
 
-  // Sync generation form language with selected profile's language
+  // Sync engine selection to global store so ProfileList can filter
+  const watchedEngine = form.watch('engine');
+  useEffect(() => {
+    if (watchedEngine) {
+      setSelectedEngine(watchedEngine);
+    }
+  }, [watchedEngine, setSelectedEngine]);
+
+  // Sync generation form language, engine, and effects with selected profile
   useEffect(() => {
     if (selectedProfile?.language) {
       form.setValue('language', selectedProfile.language as LanguageCode);
     }
-  }, [selectedProfile, form]);
+    // Auto-switch engine if profile has a default
+    if (selectedProfile?.default_engine) {
+      form.setValue(
+        'engine',
+        selectedProfile.default_engine as
+          | 'qwen'
+          | 'luxtts'
+          | 'chatterbox'
+          | 'chatterbox_turbo'
+          | 'tada'
+          | 'kokoro',
+      );
+    }
+    // Pre-fill effects from profile defaults
+    if (
+      selectedProfile?.effects_chain &&
+      selectedProfile.effects_chain.length > 0 &&
+      effectPresets
+    ) {
+      // Try to match against a known preset
+      const profileChainJson = JSON.stringify(selectedProfile.effects_chain);
+      const matchingPreset = effectPresets.find(
+        (p) => JSON.stringify(p.effects_chain) === profileChainJson,
+      );
+      if (matchingPreset) {
+        setSelectedPresetId(matchingPreset.id);
+      } else {
+        // No matching preset — use special value to pass profile chain directly
+        setSelectedPresetId('_profile');
+      }
+    } else if (
+      selectedProfile &&
+      (!selectedProfile.effects_chain || selectedProfile.effects_chain.length === 0)
+    ) {
+      setSelectedPresetId(null);
+    }
+  }, [selectedProfile, effectPresets, form]);
 
   // Auto-resize textarea based on content (only when expanded)
   useEffect(() => {
@@ -375,6 +425,12 @@ export function FloatingGenerateBox({
                         <SelectItem value="none" className="text-xs">
                           No effects
                         </SelectItem>
+                        {selectedProfile?.effects_chain &&
+                          selectedProfile.effects_chain.length > 0 && (
+                            <SelectItem value="_profile" className="text-xs">
+                              Profile default
+                            </SelectItem>
+                          )}
                         {effectPresets?.map((preset) => (
                           <SelectItem key={preset.id} value={preset.id} className="text-xs">
                             {preset.name}
diff --git a/app/src/components/Generation/GenerationForm.tsx b/app/src/components/Generation/GenerationForm.tsx
index 225e8dfa..ef3ff2c0 100644
--- a/app/src/components/Generation/GenerationForm.tsx
+++ b/app/src/components/Generation/GenerationForm.tsx
@@ -1,3 +1,4 @@
+import { useEffect } from 'react';
 import { Loader2, Mic } from 'lucide-react';
 import { Button } from '@/components/ui/button';
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
@@ -19,19 +20,41 @@ import {
   SelectValue,
 } from '@/components/ui/select';
 import { Textarea } from '@/components/ui/textarea';
-import { getLanguageOptionsForEngine } from '@/lib/constants/languages';
+import { getLanguageOptionsForEngine, type LanguageCode } from '@/lib/constants/languages';
 import { useGenerationForm } from '@/lib/hooks/useGenerationForm';
 import { useProfile } from '@/lib/hooks/useProfiles';
 import { useUIStore } from '@/stores/uiStore';
-import { EngineModelSelector, getEngineDescription } from './EngineModelSelector';
+import { EngineModelSelector, applyEngineSelection, getEngineDescription } from './EngineModelSelector';
 import { ParalinguisticInput } from './ParalinguisticInput';
 
+function getEngineSelectValue(engine: string): string {
+  if (engine === 'qwen') return 'qwen:1.7B';
+  if (engine === 'qwen_custom_voice') return 'qwen_custom_voice:1.7B';
+  if (engine === 'tada') return 'tada:1B';
+  return engine;
+}
+
 export function GenerationForm() {
   const selectedProfileId = useUIStore((state) => state.selectedProfileId);
   const { data: selectedProfile } = useProfile(selectedProfileId || '');
 
   const { form, handleSubmit, isPending } = useGenerationForm();
 
+  useEffect(() => {
+    if (!selectedProfile) {
+      return;
+    }
+
+    if (selectedProfile.language) {
+      form.setValue('language', selectedProfile.language as LanguageCode);
+    }
+
+    const preferredEngine = selectedProfile.default_engine || selectedProfile.preset_engine;
+    if (preferredEngine) {
+      applyEngineSelection(form, getEngineSelectValue(preferredEngine));
+    }
+  }, [form, selectedProfile]);
+
   async function onSubmit(data: Parameters<typeof handleSubmit>[0]) {
     await handleSubmit(data, selectedProfileId);
   }
@@ -91,7 +114,7 @@ export function GenerationForm() {
               )}
             />
 
-            {form.watch('engine') === 'qwen' && (
+            {(form.watch('engine') === 'qwen' || form.watch('engine') === 'qwen_custom_voice') && (
               <FormField
                 control={form.control}
                 name="instruct"
@@ -118,7 +141,7 @@ export function GenerationForm() {
             <div className="grid gap-4 md:grid-cols-3">
               <FormItem>
                 <FormLabel>Model</FormLabel>
-                <EngineModelSelector form={form} />
+                <EngineModelSelector form={form} selectedProfile={selectedProfile} />
                 <FormDescription>
                   {getEngineDescription(form.watch('engine') || 'qwen')}
                 </FormDescription>
diff --git a/app/src/components/History/HistoryTable.tsx b/app/src/components/History/HistoryTable.tsx
index e88c7701..914c7fcb 100644
--- a/app/src/components/History/HistoryTable.tsx
+++ b/app/src/components/History/HistoryTable.tsx
@@ -569,15 +569,27 @@ export function HistoryTable() {
                       )}
 
                       {isFailed ? (
-                        <Button
-                          variant="ghost"
-                          size="icon"
-                          className="h-6 w-6 text-muted-foreground/50 hover:bg-muted-foreground/20 hover:text-muted-foreground"
-                          aria-label="Retry generation"
-                          onClick={() => handleRetry(gen.id)}
-                        >
-                          <RotateCcw className="h-2 w-2" />
-                        </Button>
+                        <>
+                          <Button
+                            variant="ghost"
+                            size="icon"
+                            className="h-6 w-6 text-muted-foreground/50 hover:bg-muted-foreground/20 hover:text-muted-foreground"
+                            aria-label="Retry generation"
+                            onClick={() => handleRetry(gen.id)}
+                          >
+                            <RotateCcw className="h-2 w-2" />
+                          </Button>
+                          <Button
+                            variant="ghost"
+                            size="icon"
+                            className="h-6 w-6 text-muted-foreground/50 hover:bg-muted-foreground/20 hover:text-muted-foreground"
+                            aria-label="Delete generation"
+                            disabled={deleteGeneration.isPending}
+                            onClick={() => handleDeleteClick(gen.id, gen.profile_name)}
+                          >
+                            <Trash2 className="h-2 w-2" />
+                          </Button>
+                        </>
                       ) : (
                         <>
                           <DropdownMenu>
diff --git a/app/src/components/ServerSettings/ModelManagement.tsx b/app/src/components/ServerSettings/ModelManagement.tsx
index c415306d..d3fe24fa 100644
--- a/app/src/components/ServerSettings/ModelManagement.tsx
+++ b/app/src/components/ServerSettings/ModelManagement.tsx
@@ -66,6 +66,12 @@ const MODEL_DESCRIPTIONS: Record<string, string> = {
     'HumeAI TADA 1B — English speech-language model built on Llama 3.2 1B. Generates 700s+ of coherent audio with synchronized text-acoustic alignment.',
   'tada-3b-ml':
     'HumeAI TADA 3B Multilingual — built on Llama 3.2 3B. Supports 10 languages with high-fidelity voice cloning via text-acoustic dual alignment.',
+  kokoro:
+    'Kokoro 82M by hexgrad. Tiny 82M-parameter TTS that runs at CPU realtime. Supports 8 languages with pre-built voice styles. Apache 2.0 licensed.',
+  'qwen-custom-voice-1.7B':
+    'Qwen3-TTS CustomVoice 1.7B by Alibaba. 9 premium preset voices with instruct-based style control for tone, emotion, and prosody. Supports 10 languages.',
+  'qwen-custom-voice-0.6B':
+    'Qwen3-TTS CustomVoice 0.6B by Alibaba. Lightweight version with the same 9 preset voices and instruct control. Faster inference for lower-end hardware.',
   'whisper-base':
     'Smallest Whisper model (74M parameters). Fast transcription with moderate accuracy.',
   'whisper-small':
@@ -394,9 +400,11 @@ export function ModelManagement() {
     modelStatus?.models.filter(
       (m) =>
         m.model_name.startsWith('qwen-tts') ||
+        m.model_name.startsWith('qwen-custom-voice') ||
         m.model_name.startsWith('luxtts') ||
         m.model_name.startsWith('chatterbox') ||
-        m.model_name.startsWith('tada'),
+        m.model_name.startsWith('tada') ||
+        m.model_name.startsWith('kokoro'),
     ) ?? [];
   const whisperModels = modelStatus?.models.filter((m) => m.model_name.startsWith('whisper')) ?? [];
 
diff --git a/app/src/components/VoiceProfiles/ProfileCard.tsx b/app/src/components/VoiceProfiles/ProfileCard.tsx
index 3675b765..e634c38c 100644
--- a/app/src/components/VoiceProfiles/ProfileCard.tsx
+++ b/app/src/components/VoiceProfiles/ProfileCard.tsx
@@ -17,6 +17,12 @@ import { useDeleteProfile, useExportProfile } from '@/lib/hooks/useProfiles';
 import { cn } from '@/lib/utils/cn';
 import { useUIStore } from '@/stores/uiStore';
 
+/** Human-readable display names for preset engine badges. */
+const ENGINE_DISPLAY_NAMES: Record<string, string> = {
+  kokoro: 'Kokoro',
+  qwen_custom_voice: 'CustomVoice',
+};
+
 interface ProfileCardProps {
   profile: VoiceProfileResponse;
 }
@@ -97,6 +103,16 @@ export function ProfileCard({ profile }: ProfileCardProps) {
             <Badge variant="outline" className="text-xs h-5 px-1.5 text-muted-foreground">
               {profile.language}
             </Badge>
+            {profile.voice_type === 'preset' && (
+              <Badge variant="secondary" className="text-xs h-5 px-1.5">
+                {ENGINE_DISPLAY_NAMES[profile.preset_engine ?? ''] ?? profile.preset_engine}
+              </Badge>
+            )}
+            {profile.voice_type === 'designed' && (
+              <Badge variant="secondary" className="text-xs h-5 px-1.5">
+                designed
+              </Badge>
+            )}
             {profile.effects_chain && profile.effects_chain.length > 0 && (
               <Sparkles className="h-3.5 w-3.5 text-accent fill-accent" />
             )}
diff --git a/app/src/components/VoiceProfiles/ProfileForm.tsx b/app/src/components/VoiceProfiles/ProfileForm.tsx
index 13edf6f4..50b8cb57 100644
--- a/app/src/components/VoiceProfiles/ProfileForm.tsx
+++ b/app/src/components/VoiceProfiles/ProfileForm.tsx
@@ -1,9 +1,11 @@
 import { zodResolver } from '@hookform/resolvers/zod';
-import { Edit2, Mic, Monitor, Upload, X } from 'lucide-react';
+import { useQuery } from '@tanstack/react-query';
+import { Edit2, Mic, Monitor, Music, Upload, X } from 'lucide-react';
 import { useEffect, useRef, useState } from 'react';
 import { useForm } from 'react-hook-form';
 import * as z from 'zod';
 import { EffectsChainEditor } from '@/components/Effects/EffectsChainEditor';
+import { Badge } from '@/components/ui/badge';
 import { Button } from '@/components/ui/button';
 import {
   Dialog,
@@ -15,6 +17,7 @@ import {
 import {
   Form,
   FormControl,
+  FormDescription,
   FormField,
   FormItem,
   FormLabel,
@@ -32,7 +35,7 @@ import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs';
 import { Textarea } from '@/components/ui/textarea';
 import { useToast } from '@/components/ui/use-toast';
 import { apiClient } from '@/lib/api/client';
-import type { EffectConfig } from '@/lib/api/types';
+import type { EffectConfig, PresetVoice, VoiceType } from '@/lib/api/types';
 import { LANGUAGE_CODES, LANGUAGE_OPTIONS, type LanguageCode } from '@/lib/constants/languages';
 import { useAudioPlayer } from '@/lib/hooks/useAudioPlayer';
 import { useAudioRecording } from '@/lib/hooks/useAudioRecording';
@@ -40,6 +43,7 @@ import {
   useAddSample,
   useCreateProfile,
   useDeleteAvatar,
+  useDeleteProfile,
   useProfile,
   useUpdateProfile,
   useUploadAvatar,
@@ -56,6 +60,16 @@ import { AudioSampleUpload } from './AudioSampleUpload';
 import { SampleList } from './SampleList';
 
 const MAX_AUDIO_DURATION_SECONDS = 30;
+const PRESET_ONLY_ENGINES = new Set(['kokoro', 'qwen_custom_voice']);
+const DEFAULT_ENGINE_OPTIONS = [
+  { value: 'qwen', label: 'Qwen3-TTS' },
+  { value: 'qwen_custom_voice', label: 'Qwen CustomVoice' },
+  { value: 'luxtts', label: 'LuxTTS' },
+  { value: 'chatterbox', label: 'Chatterbox' },
+  { value: 'chatterbox_turbo', label: 'Chatterbox Turbo' },
+  { value: 'tada', label: 'TADA' },
+  { value: 'kokoro', label: 'Kokoro 82M' },
+] as const;
 
 const baseProfileSchema = z.object({
   name: z.string().min(1, 'Name is required').max(100),
@@ -116,20 +130,25 @@ export function ProfileForm() {
   const createProfile = useCreateProfile();
   const updateProfile = useUpdateProfile();
   const addSample = useAddSample();
+  const deleteProfile = useDeleteProfile();
   const uploadAvatar = useUploadAvatar();
   const deleteAvatar = useDeleteAvatar();
   const transcribe = useTranscription();
   const { toast } = useToast();
+  const [voiceSource, setVoiceSource] = useState<'clone' | 'builtin'>('clone');
   const [sampleMode, setSampleMode] = useState<'upload' | 'record' | 'system'>('record');
   const [audioDuration, setAudioDuration] = useState<number | null>(null);
   const [isValidatingAudio, setIsValidatingAudio] = useState(false);
   const [avatarPreview, setAvatarPreview] = useState<string | null>(null);
+  const [selectedPresetEngine, setSelectedPresetEngine] = useState<string>('kokoro');
+  const [selectedPresetVoiceId, setSelectedPresetVoiceId] = useState<string>('');
   const avatarInputRef = useRef<HTMLInputElement>(null);
   const { isPlaying, playPause, cleanup: cleanupAudio } = useAudioPlayer();
   const isCreating = !editingProfileId;
   const serverUrl = useServerStore((state) => state.serverUrl);
   const [profileEffectsChain, setProfileEffectsChain] = useState<EffectConfig[]>([]);
   const [effectsDirty, setEffectsDirty] = useState(false);
+  const [defaultEngine, setDefaultEngine] = useState<string>('');
 
   const form = useForm<ProfileFormValues>({
     resolver: zodResolver(profileSchema),
@@ -239,6 +258,26 @@ export function ProfileForm() {
     },
   });
 
+  // Fetch available preset voices for the selected engine
+  const presetEngineToQuery = isCreating
+    ? selectedPresetEngine
+    : (editingProfile?.preset_engine ?? '');
+  const { data: presetVoicesData } = useQuery({
+    queryKey: ['presetVoices', presetEngineToQuery],
+    queryFn: () => apiClient.listPresetVoices(presetEngineToQuery),
+    enabled:
+      !!presetEngineToQuery &&
+      ((voiceSource === 'builtin' && isCreating) ||
+        (!isCreating && editingProfile?.voice_type === 'preset')),
+  });
+  const presetVoices = presetVoicesData?.voices ?? [];
+  const isSampleBasedProfile = isCreating
+    ? voiceSource === 'clone'
+    : editingProfile?.voice_type !== 'preset';
+  const availableDefaultEngines = DEFAULT_ENGINE_OPTIONS.filter(
+    (option) => !isSampleBasedProfile || !PRESET_ONLY_ENGINES.has(option.value),
+  );
+
   // Show recording errors
   useEffect(() => {
     if (recordingError) {
@@ -287,6 +326,7 @@ export function ProfileForm() {
       });
       setProfileEffectsChain(editingProfile.effects_chain ?? []);
       setEffectsDirty(false);
+      setDefaultEngine(editingProfile.default_engine ?? '');
     } else if (profileFormDraft && open) {
       // Restore from draft when opening in create mode
       form.reset({
@@ -326,6 +366,24 @@ export function ProfileForm() {
     }
   }, [editingProfile, profileFormDraft, open, form]);
 
+  useEffect(() => {
+    if (
+      defaultEngine &&
+      !availableDefaultEngines.some((option) => option.value === defaultEngine)
+    ) {
+      setDefaultEngine('');
+    }
+  }, [availableDefaultEngines, defaultEngine]);
+
+  useEffect(() => {
+    if (!selectedPresetVoiceId) {
+      return;
+    }
+
+    if (!presetVoices.some((voice: PresetVoice) => voice.voice_id === selectedPresetVoiceId)) {
+      setSelectedPresetVoiceId('');
+    }
+  }, [presetVoices, selectedPresetVoiceId]);
   async function handleTranscribe() {
     const file = form.getValues('sampleFile');
     if (!file) {
@@ -415,13 +473,14 @@ export function ProfileForm() {
   async function onSubmit(data: ProfileFormValues) {
     try {
       if (editingProfileId) {
-        // Editing: just update profile
+        // Editing: update profile
         await updateProfile.mutateAsync({
           profileId: editingProfileId,
           data: {
             name: data.name,
             description: data.description,
             language: data.language,
+            default_engine: defaultEngine || undefined,
           },
         });
 
@@ -464,8 +523,50 @@ export function ProfileForm() {
           title: 'Voice updated',
           description: `"${data.name}" has been updated successfully.`,
         });
+      } else if (voiceSource === 'builtin') {
+        // Creating preset profile from built-in voice
+        if (!selectedPresetVoiceId) {
+          toast({
+            title: 'No voice selected',
+            description: 'Please select a built-in voice.',
+            variant: 'destructive',
+          });
+          return;
+        }
+
+        const profile = await createProfile.mutateAsync({
+          name: data.name,
+          description: data.description,
+          language: data.language,
+          voice_type: 'preset' as VoiceType,
+          preset_engine: selectedPresetEngine,
+          preset_voice_id: selectedPresetVoiceId,
+          default_engine: selectedPresetEngine,
+        });
+
+        // Handle avatar upload if provided
+        if (data.avatarFile) {
+          try {
+            await uploadAvatar.mutateAsync({
+              profileId: profile.id,
+              file: data.avatarFile,
+            });
+          } catch (avatarError) {
+            toast({
+              title: 'Avatar upload failed',
+              description:
+                avatarError instanceof Error ? avatarError.message : 'Failed to upload avatar',
+              variant: 'destructive',
+            });
+          }
+        }
+
+        toast({
+          title: 'Profile created',
+          description: `"${data.name}" has been created with a built-in voice.`,
+        });
       } else {
-        // Creating: require sample file and reference text
+        // Creating cloned profile: require sample file and reference text
         const sampleFile = form.getValues('sampleFile');
         const referenceText = form.getValues('referenceText');
 
@@ -528,6 +629,7 @@ export function ProfileForm() {
           name: data.name,
           description: data.description,
           language: data.language,
+          default_engine: defaultEngine || undefined,
         });
 
         // Convert non-WAV uploads to WAV so the backend can always use soundfile.
@@ -572,12 +674,32 @@ export function ProfileForm() {
             description: `"${data.name}" has been created with a sample.`,
           });
         } catch (sampleError) {
-          // Profile was created but sample failed - still show error
+          let rollbackSucceeded = false;
+          try {
+            await deleteProfile.mutateAsync(profile.id);
+            rollbackSucceeded = true;
+          } catch (rollbackError) {
+            toast({
+              title: 'Rollback failed',
+              description:
+                rollbackError instanceof Error
+                  ? rollbackError.message
+                  : 'Created profile could not be removed after sample upload failure.',
+              variant: 'destructive',
+            });
+          }
+
           toast({
             title: 'Failed to add sample',
-            description: `Profile "${data.name}" was created, but failed to add sample: ${sampleError instanceof Error ? sampleError.message : 'Unknown error'}`,
+            description:
+              sampleError instanceof Error
+                ? `${sampleError.message}${rollbackSucceeded ? ' The profile was rolled back.' : ''}`
+                : rollbackSucceeded
+                  ? 'Failed to add sample. The profile was rolled back.'
+                  : 'Failed to add sample.',
             variant: 'destructive',
           });
+          return;
         }
       }
 
@@ -642,16 +764,16 @@ export function ProfileForm() {
 
   return (
     <Dialog open={open} onOpenChange={handleOpenChange}>
-      <DialogContent className="max-w-none w-screen h-screen left-0 top-0 translate-x-0 translate-y-0 rounded-none p-6 overflow-y-auto">
-        <div className="max-w-5xl max-h-[85vh] mx-auto my-auto w-full flex flex-col">
+      <DialogContent className="max-w-none w-screen h-screen left-0 top-0 translate-x-0 translate-y-0 rounded-none p-6 overflow-hidden">
+        <div className="max-w-5xl h-[85vh] mx-auto my-auto w-full flex flex-col overflow-hidden">
           <DialogHeader>
             <DialogTitle className="text-2xl">
-              {editingProfileId ? 'Edit Voice' : 'Clone voice'}
+              {editingProfileId ? 'Edit Voice' : 'Create Voice'}
             </DialogTitle>
             <DialogDescription>
               {editingProfileId
                 ? 'Update your voice profile details and manage samples.'
-                : 'Create a new voice profile with an audio sample to clone the voice.'}
+                : 'Create a new voice profile from an audio sample or a built-in voice.'}
             </DialogDescription>
             {isCreating && profileFormDraft && (
               <div className="flex items-center gap-2 pt-2">
@@ -682,143 +804,276 @@ export function ProfileForm() {
 
           <Form {...form}>
             <form onSubmit={form.handleSubmit(onSubmit)} className="flex-1 min-h-0 flex flex-col">
-              <div className="grid gap-6 grid-cols-2 flex-1 overflow-y-auto min-h-0">
+              <div className="grid gap-6 grid-cols-2 flex-1 min-h-0 overflow-hidden">
                 {/* Left column: Sample management */}
-                <div className="space-y-4 border-r pr-6">
+                <div className="space-y-4 border-r pr-6 overflow-y-auto min-h-0">
                   {isCreating ? (
                     <>
-                      <Tabs
-                        className="pt-4"
-                        value={sampleMode}
-                        onValueChange={(v) => {
-                          const newMode = v as 'upload' | 'record' | 'system';
-                          // Cancel any active recordings when switching modes
-                          if (isRecording && newMode !== 'record') {
-                            cancelRecording();
-                          }
-                          if (isSystemRecording && newMode !== 'system') {
-                            cancelSystemRecording();
-                          }
-                          setSampleMode(newMode);
-                        }}
-                      >
-                        <TabsList
-                          className={`grid w-full ${platform.metadata.isTauri && isSystemAudioSupported ? 'grid-cols-3' : 'grid-cols-2'}`}
-                        >
-                          <TabsTrigger value="upload" className="flex items-center gap-2">
-                            <Upload className="h-4 w-4 shrink-0" />
-                            Upload
-                          </TabsTrigger>
-                          <TabsTrigger value="record" className="flex items-center gap-2">
-                            <Mic className="h-4 w-4 shrink-0" />
-                            Record
-                          </TabsTrigger>
-                          {platform.metadata.isTauri && isSystemAudioSupported && (
-                            <TabsTrigger value="system" className="flex items-center gap-2">
-                              <Monitor className="h-4 w-4 shrink-0" />
-                              System Audio
-                            </TabsTrigger>
-                          )}
-                        </TabsList>
-
-                        <TabsContent value="upload" className="space-y-4">
-                          <FormField
-                            control={form.control}
-                            name="sampleFile"
-                            render={({ field: { onChange, name } }) => (
-                              <AudioSampleUpload
-                                file={selectedFile}
-                                onFileChange={onChange}
-                                onTranscribe={handleTranscribe}
-                                onPlayPause={handlePlayPause}
-                                isPlaying={isPlaying}
-                                isValidating={isValidatingAudio}
-                                isTranscribing={transcribe.isPending}
-                                isDisabled={
-                                  audioDuration !== null &&
-                                  audioDuration > MAX_AUDIO_DURATION_SECONDS
-                                }
-                                fieldName={name}
+                      {/* Voice source selector */}
+                      <div className="flex pt-4 pb-2">
+                        <div className="inline-flex rounded-lg border border-border p-0.5 bg-muted/50">
+                          <button
+                            type="button"
+                            onClick={() => setVoiceSource('clone')}
+                            className={`inline-flex items-center gap-2 px-3 py-1.5 text-sm rounded-md transition-colors ${
+                              voiceSource === 'clone'
+                                ? 'bg-accent text-accent-foreground shadow-sm'
+                                : 'text-muted-foreground hover:text-foreground'
+                            }`}
+                          >
+                            <Mic className="h-3.5 w-3.5" />
+                            Clone from audio
+                          </button>
+                          <button
+                            type="button"
+                            onClick={() => setVoiceSource('builtin')}
+                            className={`inline-flex items-center gap-2 px-3 py-1.5 text-sm rounded-md transition-colors ${
+                              voiceSource === 'builtin'
+                                ? 'bg-accent text-accent-foreground shadow-sm'
+                                : 'text-muted-foreground hover:text-foreground'
+                            }`}
+                          >
+                            <Music className="h-3.5 w-3.5" />
+                            Built-in voice
+                          </button>
+                        </div>
+                      </div>
+
+                      {voiceSource === 'builtin' ? (
+                        <div className="space-y-4">
+                          <FormDescription>
+                            Choose a pre-built voice. These don't require an audio sample.
+                          </FormDescription>
+
+                          {/* Engine selector */}
+                          <FormItem>
+                            <FormLabel>Engine</FormLabel>
+                            <Select
+                              value={selectedPresetEngine}
+                              onValueChange={setSelectedPresetEngine}
+                            >
+                              <FormControl>
+                                <SelectTrigger>
+                                  <SelectValue />
+                                </SelectTrigger>
+                              </FormControl>
+                              <SelectContent>
+                                <SelectItem value="kokoro">Kokoro 82M</SelectItem>
+                                <SelectItem value="qwen_custom_voice">Qwen CustomVoice</SelectItem>
+                              </SelectContent>
+                            </Select>
+                          </FormItem>
+
+                          {/* Voice picker */}
+                          <FormItem>
+                            <FormLabel>Voice</FormLabel>
+                            <div className="grid grid-cols-2 gap-1.5 max-h-[340px] overflow-y-auto pr-1">
+                              {presetVoices.map((voice: PresetVoice) => (
+                                <button
+                                  key={voice.voice_id}
+                                  type="button"
+                                  onClick={() => {
+                                    setSelectedPresetVoiceId(voice.voice_id);
+                                    // Auto-set language from voice
+                                    if (voice.language) {
+                                      form.setValue('language', voice.language as LanguageCode);
+                                    }
+                                  }}
+                                  className={`text-left px-3 py-2 rounded-md border text-sm transition-colors ${
+                                    selectedPresetVoiceId === voice.voice_id
+                                      ? 'border-accent bg-accent/10 text-accent-foreground'
+                                      : 'border-border hover:bg-muted'
+                                  }`}
+                                >
+                                  <div className="font-medium">{voice.name}</div>
+                                  <div className="flex gap-1.5 mt-0.5">
+                                    <Badge variant="outline" className="text-[10px] h-4 px-1">
+                                      {voice.gender}
+                                    </Badge>
+                                    <Badge variant="outline" className="text-[10px] h-4 px-1">
+                                      {voice.language}
+                                    </Badge>
+                                  </div>
+                                </button>
+                              ))}
+                            </div>
+                          </FormItem>
+                        </div>
+                      ) : (
+                        <>
+                          <Tabs
+                            className="pt-0"
+                            value={sampleMode}
+                            onValueChange={(v) => {
+                              const newMode = v as 'upload' | 'record' | 'system';
+                              // Cancel any active recordings when switching modes
+                              if (isRecording && newMode !== 'record') {
+                                cancelRecording();
+                              }
+                              if (isSystemRecording && newMode !== 'system') {
+                                cancelSystemRecording();
+                              }
+                              setSampleMode(newMode);
+                            }}
+                          >
+                            <TabsList
+                              className={`grid w-full ${platform.metadata.isTauri && isSystemAudioSupported ? 'grid-cols-3' : 'grid-cols-2'}`}
+                            >
+                              <TabsTrigger value="upload" className="flex items-center gap-2">
+                                <Upload className="h-4 w-4 shrink-0" />
+                                Upload
+                              </TabsTrigger>
+                              <TabsTrigger value="record" className="flex items-center gap-2">
+                                <Mic className="h-4 w-4 shrink-0" />
+                                Record
+                              </TabsTrigger>
+                              {platform.metadata.isTauri && isSystemAudioSupported && (
+                                <TabsTrigger value="system" className="flex items-center gap-2">
+                                  <Monitor className="h-4 w-4 shrink-0" />
+                                  System Audio
+                                </TabsTrigger>
+                              )}
+                            </TabsList>
+
+                            <TabsContent value="upload" className="space-y-4">
+                              <FormField
+                                control={form.control}
+                                name="sampleFile"
+                                render={({ field: { onChange, name } }) => (
+                                  <AudioSampleUpload
+                                    file={selectedFile}
+                                    onFileChange={onChange}
+                                    onTranscribe={handleTranscribe}
+                                    onPlayPause={handlePlayPause}
+                                    isPlaying={isPlaying}
+                                    isValidating={isValidatingAudio}
+                                    isTranscribing={transcribe.isPending}
+                                    isDisabled={
+                                      audioDuration !== null &&
+                                      audioDuration > MAX_AUDIO_DURATION_SECONDS
+                                    }
+                                    fieldName={name}
+                                  />
+                                )}
                               />
+                            </TabsContent>
+
+                            <TabsContent value="record" className="space-y-4">
+                              <FormField
+                                control={form.control}
+                                name="sampleFile"
+                                render={() => (
+                                  <AudioSampleRecording
+                                    file={selectedFile}
+                                    isRecording={isRecording}
+                                    duration={duration}
+                                    onStart={startRecording}
+                                    onStop={stopRecording}
+                                    onCancel={handleCancelRecording}
+                                    onTranscribe={handleTranscribe}
+                                    onPlayPause={handlePlayPause}
+                                    isPlaying={isPlaying}
+                                    isTranscribing={transcribe.isPending}
+                                  />
+                                )}
+                              />
+                            </TabsContent>
+
+                            {platform.metadata.isTauri && isSystemAudioSupported && (
+                              <TabsContent value="system" className="space-y-4">
+                                <FormField
+                                  control={form.control}
+                                  name="sampleFile"
+                                  render={() => (
+                                    <AudioSampleSystem
+                                      file={selectedFile}
+                                      isRecording={isSystemRecording}
+                                      duration={systemDuration}
+                                      onStart={startSystemRecording}
+                                      onStop={stopSystemRecording}
+                                      onCancel={handleCancelRecording}
+                                      onTranscribe={handleTranscribe}
+                                      onPlayPause={handlePlayPause}
+                                      isPlaying={isPlaying}
+                                      isTranscribing={transcribe.isPending}
+                                    />
+                                  )}
+                                />
+                              </TabsContent>
                             )}
-                          />
-                        </TabsContent>
+                          </Tabs>
 
-                        <TabsContent value="record" className="space-y-4">
                           <FormField
                             control={form.control}
-                            name="sampleFile"
-                            render={() => (
-                              <AudioSampleRecording
-                                file={selectedFile}
-                                isRecording={isRecording}
-                                duration={duration}
-                                onStart={startRecording}
-                                onStop={stopRecording}
-                                onCancel={handleCancelRecording}
-                                onTranscribe={handleTranscribe}
-                                onPlayPause={handlePlayPause}
-                                isPlaying={isPlaying}
-                                isTranscribing={transcribe.isPending}
-                              />
+                            name="referenceText"
+                            render={({ field }) => (
+                              <FormItem>
+                                <FormLabel>Reference Text</FormLabel>
+                                <FormControl>
+                                  <Textarea
+                                    placeholder="Enter the exact text spoken in the audio..."
+                                    className="min-h-[100px]"
+                                    {...field}
+                                  />
+                                </FormControl>
+                                <FormMessage />
+                              </FormItem>
                             )}
                           />
-                        </TabsContent>
-
-                        {platform.metadata.isTauri && isSystemAudioSupported && (
-                          <TabsContent value="system" className="space-y-4">
-                            <FormField
-                              control={form.control}
-                              name="sampleFile"
-                              render={() => (
-                                <AudioSampleSystem
-                                  file={selectedFile}
-                                  isRecording={isSystemRecording}
-                                  duration={systemDuration}
-                                  onStart={startSystemRecording}
-                                  onStop={stopSystemRecording}
-                                  onCancel={handleCancelRecording}
-                                  onTranscribe={handleTranscribe}
-                                  onPlayPause={handlePlayPause}
-                                  isPlaying={isPlaying}
-                                  isTranscribing={transcribe.isPending}
-                                />
-                              )}
-                            />
-                          </TabsContent>
-                        )}
-                      </Tabs>
-
-                      <FormField
-                        control={form.control}
-                        name="referenceText"
-                        render={({ field }) => (
-                          <FormItem>
-                            <FormLabel>Reference Text</FormLabel>
-                            <FormControl>
-                              <Textarea
-                                placeholder="Enter the exact text spoken in the audio..."
-                                className="min-h-[100px]"
-                                {...field}
-                              />
-                            </FormControl>
-                            <FormMessage />
-                          </FormItem>
-                        )}
-                      />
+                        </>
+                      )}
                     </>
                   ) : (
-                    // Show sample list when editing
-                    editingProfileId && (
+                    // Editing mode
+                    editingProfileId &&
+                    editingProfile &&
+                    (editingProfile.voice_type === 'preset' ? (
+                      <div className="space-y-4 pt-4">
+                        <div className="rounded-lg border border-border p-4 space-y-3">
+                          <div className="text-sm font-medium text-muted-foreground">
+                            Built-in Voice
+                          </div>
+                          <div className="flex items-center gap-3">
+                            <div className="text-lg font-semibold">
+                              {presetVoices.find(
+                                (v: PresetVoice) => v.voice_id === editingProfile.preset_voice_id,
+                              )?.name ?? editingProfile.preset_voice_id}
+                            </div>
+                            <Badge variant="secondary" className="text-xs">
+                              {editingProfile.preset_engine}
+                            </Badge>
+                          </div>
+                          {(() => {
+                            const voice = presetVoices.find(
+                              (v: PresetVoice) => v.voice_id === editingProfile.preset_voice_id,
+                            );
+                            return voice ? (
+                              <div className="flex gap-1.5">
+                                <Badge variant="outline" className="text-xs">
+                                  {voice.gender}
+                                </Badge>
+                                <Badge variant="outline" className="text-xs">
+                                  {voice.language}
+                                </Badge>
+                              </div>
+                            ) : null;
+                          })()}
+                        </div>
+                        <p className="text-xs text-muted-foreground">
+                          This profile uses a built-in voice. The voice cannot be changed after
+                          creation.
+                        </p>
+                      </div>
+                    ) : (
                       <div>
                         <SampleList profileId={editingProfileId} />
                       </div>
-                    )
+                    ))
                   )}
                 </div>
 
                 {/* Right column: Profile info */}
-                <div className="space-y-4">
+                <div className="space-y-4 overflow-y-auto min-h-0">
                   {/* Avatar Upload */}
                   <FormField
                     control={form.control}
@@ -924,6 +1179,36 @@ export function ProfileForm() {
                     )}
                   />
 
+                  <FormItem>
+                    <FormLabel>Default Engine</FormLabel>
+                    <Select
+                      value={defaultEngine || '_none'}
+                      onValueChange={(v) => {
+                        setDefaultEngine(v === '_none' ? '' : v);
+                      }}
+                      disabled={
+                        voiceSource === 'builtin' || editingProfile?.voice_type === 'preset'
+                      }
+                    >
+                      <FormControl>
+                        <SelectTrigger>
+                          <SelectValue placeholder="No preference" />
+                        </SelectTrigger>
+                      </FormControl>
+                      <SelectContent>
+                        <SelectItem value="_none">No preference</SelectItem>
+                        {availableDefaultEngines.map((option) => (
+                          <SelectItem key={option.value} value={option.value}>
+                            {option.label}
+                          </SelectItem>
+                        ))}
+                      </SelectContent>
+                    </Select>
+                    <p className="text-xs text-muted-foreground">
+                      Auto-selects this engine when the profile is chosen.
+                    </p>
+                  </FormItem>
+
                   {editingProfileId && (
                     <div className="space-y-2">
                       <FormLabel>Default Effects</FormLabel>
diff --git a/app/src/components/VoiceProfiles/ProfileList.tsx b/app/src/components/VoiceProfiles/ProfileList.tsx
index 89252433..be7332a0 100644
--- a/app/src/components/VoiceProfiles/ProfileList.tsx
+++ b/app/src/components/VoiceProfiles/ProfileList.tsx
@@ -1,4 +1,4 @@
-import { Mic, Sparkles } from 'lucide-react';
+import { Mic, Music, Sparkles } from 'lucide-react';
 import { Button } from '@/components/ui/button';
 import { Card, CardContent } from '@/components/ui/card';
 import { useProfiles } from '@/lib/hooks/useProfiles';
@@ -6,9 +6,19 @@ import { useUIStore } from '@/stores/uiStore';
 import { ProfileCard } from './ProfileCard';
 import { ProfileForm } from './ProfileForm';
 
+/** Engines that use preset (built-in) voices instead of cloned profiles. */
+const PRESET_ENGINES = new Set(['kokoro', 'qwen_custom_voice']);
+
+/** Human-readable engine names for empty state messages. */
+const ENGINE_NAMES: Record<string, string> = {
+  kokoro: 'Kokoro',
+  qwen_custom_voice: 'Qwen CustomVoice',
+};
+
 export function ProfileList() {
   const { data: profiles, isLoading, error } = useProfiles();
   const setDialogOpen = useUIStore((state) => state.setProfileDialogOpen);
+  const selectedEngine = useUIStore((state) => state.selectedEngine);
 
   if (isLoading) {
     return null;
@@ -23,6 +33,12 @@ export function ProfileList() {
   }
 
   const allProfiles = profiles || [];
+  const isPresetEngine = PRESET_ENGINES.has(selectedEngine);
+
+  // Filter profiles based on selected engine
+  const filteredProfiles = isPresetEngine
+    ? allProfiles.filter((p) => p.voice_type === 'preset' && p.preset_engine === selectedEngine)
+    : allProfiles.filter((p) => p.voice_type !== 'preset');
 
   return (
     <div className="flex flex-col">
@@ -40,9 +56,25 @@ export function ProfileList() {
               </Button>
             </CardContent>
           </Card>
+        ) : filteredProfiles.length === 0 && isPresetEngine ? (
+          <Card>
+            <CardContent className="flex flex-col items-center justify-center py-12">
+              <Music className="h-12 w-12 text-muted-foreground mb-4" />
+              <p className="text-muted-foreground mb-2">
+                No {ENGINE_NAMES[selectedEngine] ?? selectedEngine} voices created yet.
+              </p>
+              <p className="text-sm text-muted-foreground mb-4">
+                Create a profile to choose a specific voice before generating.
+              </p>
+              <Button onClick={() => setDialogOpen(true)}>
+                <Sparkles className="mr-2 h-4 w-4" />
+                Create {ENGINE_NAMES[selectedEngine] ?? selectedEngine} Voice
+              </Button>
+            </CardContent>
+          </Card>
         ) : (
           <div className="flex gap-4 overflow-x-auto p-1 pb-1 lg:grid lg:grid-cols-3 lg:auto-rows-auto lg:overflow-x-visible lg:pb-[150px]">
-            {allProfiles.map((profile) => (
+            {filteredProfiles.map((profile) => (
               <div key={profile.id} className="shrink-0 w-[200px] lg:w-auto lg:shrink">
                 <ProfileCard profile={profile} />
               </div>
diff --git a/app/src/lib/api/client.ts b/app/src/lib/api/client.ts
index 98f98182..98a375e3 100644
--- a/app/src/lib/api/client.ts
+++ b/app/src/lib/api/client.ts
@@ -17,6 +17,7 @@ import type {
   HistoryResponse,
   ModelDownloadRequest,
   ModelStatusListResponse,
+  PresetVoice,
   ProfileSampleResponse,
   StoryCreate,
   StoryDetailResponse,
@@ -97,6 +98,10 @@ class ApiClient {
     return this.request<VoiceProfileResponse>(`/profiles/${profileId}`);
   }
 
+  async listPresetVoices(engine: string): Promise<{ engine: string; voices: PresetVoice[] }> {
+    return this.request<{ engine: string; voices: PresetVoice[] }>(`/profiles/presets/${engine}`);
+  }
+
   async updateProfile(profileId: string, data: VoiceProfileCreate): Promise<VoiceProfileResponse> {
     return this.request<VoiceProfileResponse>(`/profiles/${profileId}`, {
       method: 'PUT',
diff --git a/app/src/lib/api/types.ts b/app/src/lib/api/types.ts
index aa85d001..86e3012f 100644
--- a/app/src/lib/api/types.ts
+++ b/app/src/lib/api/types.ts
@@ -1,10 +1,17 @@
 // API Types matching backend Pydantic models
 import type { LanguageCode } from '@/lib/constants/languages';
 
+export type VoiceType = 'cloned' | 'preset' | 'designed';
+
 export interface VoiceProfileCreate {
   name: string;
   description?: string;
   language: LanguageCode;
+  voice_type?: VoiceType;
+  preset_engine?: string;
+  preset_voice_id?: string;
+  design_prompt?: string;
+  default_engine?: string;
 }
 
 export interface VoiceProfileResponse {
@@ -14,12 +21,24 @@ export interface VoiceProfileResponse {
   language: string;
   avatar_path?: string;
   effects_chain?: EffectConfig[];
+  voice_type: VoiceType;
+  preset_engine?: string;
+  preset_voice_id?: string;
+  design_prompt?: string;
+  default_engine?: string;
   generation_count: number;
   sample_count: number;
   created_at: string;
   updated_at: string;
 }
 
+export interface PresetVoice {
+  voice_id: string;
+  name: string;
+  gender: 'male' | 'female';
+  language: string;
+}
+
 export interface ProfileSampleCreate {
   reference_text: string;
 }
@@ -43,7 +62,14 @@ export interface GenerationRequest {
   language: LanguageCode;
   seed?: number;
   model_size?: '1.7B' | '0.6B' | '1B' | '3B';
-  engine?: 'qwen' | 'luxtts' | 'chatterbox' | 'chatterbox_turbo' | 'tada';
+  engine?:
+    | 'qwen'
+    | 'qwen_custom_voice'
+    | 'luxtts'
+    | 'chatterbox'
+    | 'chatterbox_turbo'
+    | 'tada'
+    | 'kokoro';
   instruct?: string;
   max_chunk_chars?: number;
   crossfade_ms?: number;
diff --git a/app/src/lib/constants/languages.ts b/app/src/lib/constants/languages.ts
index a0d233a5..e28c519b 100644
--- a/app/src/lib/constants/languages.ts
+++ b/app/src/lib/constants/languages.ts
@@ -5,6 +5,7 @@
  * LuxTTS is English-only.
  * Chatterbox Multilingual supports 23 languages.
  * Chatterbox Turbo is English-only.
+ * Kokoro supports 8 languages.
  */
 
 /** All languages that any engine supports. */
@@ -67,6 +68,8 @@ export const ENGINE_LANGUAGES: Record<string, readonly LanguageCode[]> = {
   ],
   chatterbox_turbo: ['en'],
   tada: ['en', 'ar', 'zh', 'de', 'es', 'fr', 'it', 'ja', 'pl', 'pt'],
+  kokoro: ['en', 'es', 'fr', 'hi', 'it', 'pt', 'ja', 'zh'],
+  qwen_custom_voice: ['zh', 'en', 'ja', 'ko', 'de', 'fr', 'ru', 'pt', 'es', 'it'],
 } as const;
 
 /** Helper: get language options for a given engine. */
diff --git a/app/src/lib/hooks/useGenerationForm.ts b/app/src/lib/hooks/useGenerationForm.ts
index 8e73ce07..0acdabbf 100644
--- a/app/src/lib/hooks/useGenerationForm.ts
+++ b/app/src/lib/hooks/useGenerationForm.ts
@@ -17,7 +17,17 @@ const generationSchema = z.object({
   seed: z.number().int().optional(),
   modelSize: z.enum(['1.7B', '0.6B', '1B', '3B']).optional(),
   instruct: z.string().max(500).optional(),
-  engine: z.enum(['qwen', 'luxtts', 'chatterbox', 'chatterbox_turbo', 'tada']).optional(),
+  engine: z
+    .enum([
+      'qwen',
+      'qwen_custom_voice',
+      'luxtts',
+      'chatterbox',
+      'chatterbox_turbo',
+      'tada',
+      'kokoro',
+    ])
+    .optional(),
 });
 
 export type GenerationFormValues = z.infer<typeof generationSchema>;
@@ -83,7 +93,11 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
                 ? data.modelSize === '3B'
                   ? 'tada-3b-ml'
                   : 'tada-1b'
-                : `qwen-tts-${data.modelSize}`;
+                : engine === 'kokoro'
+                  ? 'kokoro'
+                  : engine === 'qwen_custom_voice'
+                    ? `qwen-custom-voice-${data.modelSize}`
+                    : `qwen-tts-${data.modelSize}`;
       const displayName =
         engine === 'luxtts'
           ? 'LuxTTS'
@@ -95,9 +109,15 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
                 ? data.modelSize === '3B'
                   ? 'TADA 3B Multilingual'
                   : 'TADA 1B'
-                : data.modelSize === '1.7B'
-                  ? 'Qwen TTS 1.7B'
-                  : 'Qwen TTS 0.6B';
+                : engine === 'kokoro'
+                  ? 'Kokoro 82M'
+                  : engine === 'qwen_custom_voice'
+                    ? data.modelSize === '1.7B'
+                      ? 'Qwen CustomVoice 1.7B'
+                      : 'Qwen CustomVoice 0.6B'
+                    : data.modelSize === '1.7B'
+                      ? 'Qwen TTS 1.7B'
+                      : 'Qwen TTS 0.6B';
 
       // Check if model needs downloading
       try {
@@ -112,7 +132,9 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
         console.error('Failed to check model status:', error);
       }
 
-      const hasModelSizes = engine === 'qwen' || engine === 'tada';
+      const hasModelSizes =
+        engine === 'qwen' || engine === 'qwen_custom_voice' || engine === 'tada';
+      const supportsInstruct = engine === 'qwen' || engine === 'qwen_custom_voice';
       const effectsChain = options.getEffectsChain?.();
       // This now returns immediately with status="generating"
       const result = await generation.mutateAsync({
@@ -122,7 +144,7 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
         seed: data.seed,
         model_size: hasModelSizes ? data.modelSize : undefined,
         engine,
-        instruct: engine === 'qwen' ? data.instruct || undefined : undefined,
+        instruct: supportsInstruct ? data.instruct || undefined : undefined,
         max_chunk_chars: maxChunkChars,
         crossfade_ms: crossfadeMs,
         normalize: normalizeAudio,
diff --git a/app/src/lib/queryClient.ts b/app/src/lib/queryClient.ts
new file mode 100644
index 00000000..43e8670e
--- /dev/null
+++ b/app/src/lib/queryClient.ts
@@ -0,0 +1,19 @@
+import { QueryClient } from '@tanstack/react-query';
+
+/**
+ * Shared QueryClient instance used across the app.
+ *
+ * Extracted into its own side-effect-free module so it can be imported from
+ * both the React bootstrap (main.tsx) and non-React code (stores, utilities)
+ * without pulling in ReactDOM or other bootstrap side effects.
+ */
+export const queryClient = new QueryClient({
+  defaultOptions: {
+    queries: {
+      staleTime: 1000 * 60 * 5, // 5 minutes
+      gcTime: 1000 * 60 * 10, // 10 minutes (formerly cacheTime)
+      retry: 1,
+      refetchOnWindowFocus: false,
+    },
+  },
+});
diff --git a/app/src/main.tsx b/app/src/main.tsx
index e4a5e482..2607811e 100644
--- a/app/src/main.tsx
+++ b/app/src/main.tsx
@@ -1,20 +1,10 @@
-import { QueryClient, QueryClientProvider } from '@tanstack/react-query';
+import { QueryClientProvider } from '@tanstack/react-query';
 // import { ReactQueryDevtools } from '@tanstack/react-query-devtools';
 import React from 'react';
 import ReactDOM from 'react-dom/client';
 import App from './App';
 import './index.css';
-
-const queryClient = new QueryClient({
-  defaultOptions: {
-    queries: {
-      staleTime: 1000 * 60 * 5, // 5 minutes
-      gcTime: 1000 * 60 * 10, // 10 minutes (formerly cacheTime)
-      retry: 1,
-      refetchOnWindowFocus: false,
-    },
-  },
-});
+import { queryClient } from './lib/queryClient';
 
 ReactDOM.createRoot(document.getElementById('root')!).render(
   <React.StrictMode>
diff --git a/app/src/stores/serverStore.ts b/app/src/stores/serverStore.ts
index 8f983049..c25deba7 100644
--- a/app/src/stores/serverStore.ts
+++ b/app/src/stores/serverStore.ts
@@ -1,5 +1,6 @@
 import { create } from 'zustand';
 import { persist } from 'zustand/middleware';
+import { queryClient } from '@/lib/queryClient';
 
 interface ServerStore {
   serverUrl: string;
@@ -30,11 +31,25 @@ interface ServerStore {
   setCustomModelsDir: (dir: string | null) => void;
 }
 
+/**
+ * Invalidate all React Query caches so stale data from the previous
+ * server is not shown. Called when the server URL changes.
+ */
+function invalidateAllServerData() {
+  queryClient.invalidateQueries();
+}
+
 export const useServerStore = create<ServerStore>()(
   persist(
-    (set) => ({
+    (set, get) => ({
       serverUrl: 'http://127.0.0.1:17493',
-      setServerUrl: (url) => set({ serverUrl: url }),
+      setServerUrl: (url) => {
+        const prev = get().serverUrl;
+        set({ serverUrl: url });
+        if (url !== prev) {
+          invalidateAllServerData();
+        }
+      },
 
       isConnected: false,
       setIsConnected: (connected) => set({ isConnected: connected }),
diff --git a/app/src/stores/uiStore.ts b/app/src/stores/uiStore.ts
index f2db88a2..38a089e0 100644
--- a/app/src/stores/uiStore.ts
+++ b/app/src/stores/uiStore.ts
@@ -31,6 +31,10 @@ interface UIStore {
   selectedProfileId: string | null;
   setSelectedProfileId: (id: string | null) => void;
 
+  // Currently selected engine (synced from generation form)
+  selectedEngine: string;
+  setSelectedEngine: (engine: string) => void;
+
   // Selected voice in Voices tab inspector
   selectedVoiceId: string | null;
   setSelectedVoiceId: (id: string | null) => void;
@@ -59,6 +63,9 @@ export const useUIStore = create<UIStore>((set) => ({
   selectedProfileId: null,
   setSelectedProfileId: (id) => set({ selectedProfileId: id }),
 
+  selectedEngine: 'qwen',
+  setSelectedEngine: (engine) => set({ selectedEngine: engine }),
+
   selectedVoiceId: null,
   setSelectedVoiceId: (id) => set({ selectedVoiceId: id }),
 
diff --git a/backend/backends/__init__.py b/backend/backends/__init__.py
index a4f5113a..db19b140 100644
--- a/backend/backends/__init__.py
+++ b/backend/backends/__init__.py
@@ -163,10 +163,12 @@ def is_loaded(self) -> bool:
 # The factory function uses this for the if/elif chain; the model configs live on the backend classes.
 TTS_ENGINES = {
     "qwen": "Qwen TTS",
+    "qwen_custom_voice": "Qwen CustomVoice",
     "luxtts": "LuxTTS",
     "chatterbox": "Chatterbox TTS",
     "chatterbox_turbo": "Chatterbox Turbo",
     "tada": "TADA",
+    "kokoro": "Kokoro",
 }
 
 
@@ -204,6 +206,32 @@ def _get_qwen_model_configs() -> list[ModelConfig]:
     ]
 
 
+def _get_qwen_custom_voice_configs() -> list[ModelConfig]:
+    """Return Qwen CustomVoice model configs."""
+    return [
+        ModelConfig(
+            model_name="qwen-custom-voice-1.7B",
+            display_name="Qwen CustomVoice 1.7B",
+            engine="qwen_custom_voice",
+            hf_repo_id="Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice",
+            model_size="1.7B",
+            size_mb=3500,
+            supports_instruct=True,
+            languages=["zh", "en", "ja", "ko", "de", "fr", "ru", "pt", "es", "it"],
+        ),
+        ModelConfig(
+            model_name="qwen-custom-voice-0.6B",
+            display_name="Qwen CustomVoice 0.6B",
+            engine="qwen_custom_voice",
+            hf_repo_id="Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice",
+            model_size="0.6B",
+            size_mb=1200,
+            supports_instruct=True,
+            languages=["zh", "en", "ja", "ko", "de", "fr", "ru", "pt", "es", "it"],
+        ),
+    ]
+
+
 def _get_non_qwen_tts_configs() -> list[ModelConfig]:
     """Return model configs for non-Qwen TTS engines.
 
@@ -278,6 +306,14 @@ def _get_non_qwen_tts_configs() -> list[ModelConfig]:
             size_mb=8000,
             languages=["en", "ar", "zh", "de", "es", "fr", "it", "ja", "pl", "pt"],
         ),
+        ModelConfig(
+            model_name="kokoro",
+            display_name="Kokoro 82M",
+            engine="kokoro",
+            hf_repo_id="hexgrad/Kokoro-82M",
+            size_mb=350,
+            languages=["en", "es", "fr", "hi", "it", "pt", "ja", "zh"],
+        ),
     ]
 
 
@@ -324,12 +360,12 @@ def _get_whisper_configs() -> list[ModelConfig]:
 
 def get_all_model_configs() -> list[ModelConfig]:
     """Return the full list of model configs (TTS + STT)."""
-    return _get_qwen_model_configs() + _get_non_qwen_tts_configs() + _get_whisper_configs()
+    return _get_qwen_model_configs() + _get_qwen_custom_voice_configs() + _get_non_qwen_tts_configs() + _get_whisper_configs()
 
 
 def get_tts_model_configs() -> list[ModelConfig]:
     """Return only TTS model configs."""
-    return _get_qwen_model_configs() + _get_non_qwen_tts_configs()
+    return _get_qwen_model_configs() + _get_qwen_custom_voice_configs() + _get_non_qwen_tts_configs()
 
 
 # Lookup helpers — these replace the if/elif chains in main.py
@@ -360,7 +396,7 @@ def engine_has_model_sizes(engine: str) -> bool:
 async def load_engine_model(engine: str, model_size: str = "default") -> None:
     """Load a model for the given engine, handling engines with multiple model sizes."""
     backend = get_tts_backend_for_engine(engine)
-    if engine == "qwen":
+    if engine in ("qwen", "qwen_custom_voice"):
         await backend.load_model_async(model_size)
     elif engine == "tada":
         await backend.load_model(model_size)
@@ -379,7 +415,7 @@ async def ensure_model_cached_or_raise(engine: str, model_size: str = "default")
             cfg = c
             break
 
-    if engine in ("qwen", "tada"):
+    if engine in ("qwen", "qwen_custom_voice", "tada"):
         if not backend._is_model_cached(model_size):
             raise HTTPException(
                 status_code=400,
@@ -414,6 +450,14 @@ def unload_model_by_config(config: ModelConfig) -> bool:
             return True
         return False
 
+    if config.engine == "qwen_custom_voice":
+        backend = get_tts_backend_for_engine(config.engine)
+        loaded_size = getattr(backend, "_current_model_size", None) or getattr(backend, "model_size", None)
+        if backend.is_loaded() and loaded_size == config.model_size:
+            backend.unload_model()
+            return True
+        return False
+
     # All other TTS engines
     backend = get_tts_backend_for_engine(config.engine)
     if backend.is_loaded():
@@ -437,6 +481,11 @@ def check_model_loaded(config: ModelConfig) -> bool:
             loaded_size = getattr(tts_model, "_current_model_size", None) or getattr(tts_model, "model_size", None)
             return tts_model.is_loaded() and loaded_size == config.model_size
 
+        if config.engine == "qwen_custom_voice":
+            backend = get_tts_backend_for_engine(config.engine)
+            loaded_size = getattr(backend, "_current_model_size", None) or getattr(backend, "model_size", None)
+            return backend.is_loaded() and loaded_size == config.model_size
+
         backend = get_tts_backend_for_engine(config.engine)
         return backend.is_loaded()
     except Exception:
@@ -454,6 +503,9 @@ def get_model_load_func(config: ModelConfig):
     if config.engine == "qwen":
         return lambda: tts.get_tts_model().load_model(config.model_size)
 
+    if config.engine == "qwen_custom_voice":
+        return lambda: get_tts_backend_for_engine(config.engine).load_model(config.model_size)
+
     return lambda: get_tts_backend_for_engine(config.engine).load_model()
 
 
@@ -515,6 +567,14 @@ def get_tts_backend_for_engine(engine: str) -> TTSBackend:
             from .hume_backend import HumeTadaBackend
 
             backend = HumeTadaBackend()
+        elif engine == "kokoro":
+            from .kokoro_backend import KokoroTTSBackend
+
+            backend = KokoroTTSBackend()
+        elif engine == "qwen_custom_voice":
+            from .qwen_custom_voice_backend import QwenCustomVoiceBackend
+
+            backend = QwenCustomVoiceBackend()
         else:
             raise ValueError(f"Unknown TTS engine: {engine}. Supported: {list(TTS_ENGINES.keys())}")
 
diff --git a/backend/backends/kokoro_backend.py b/backend/backends/kokoro_backend.py
new file mode 100644
index 00000000..efe91dfc
--- /dev/null
+++ b/backend/backends/kokoro_backend.py
@@ -0,0 +1,288 @@
+"""
+Kokoro TTS backend implementation.
+
+Wraps the Kokoro-82M model for fast, lightweight text-to-speech.
+82M parameters, CPU realtime, 24kHz output, Apache 2.0 license.
+
+Kokoro uses pre-built voice style vectors (not traditional zero-shot cloning
+from arbitrary audio). Voice prompts are stored as deferred references to
+HF-hosted voice .pt files.
+
+Languages supported (via misaki G2P):
+  - American English (a), British English (b)
+  - Spanish (e), French (f), Hindi (h), Italian (i), Portuguese (p)
+  - Japanese (j) — requires misaki[ja]
+  - Chinese (z) — requires misaki[zh]
+"""
+
+import asyncio
+import logging
+import os
+from typing import Optional
+
+import numpy as np
+
+from . import TTSBackend
+from .base import (
+    get_torch_device,
+    combine_voice_prompts as _combine_voice_prompts,
+    model_load_progress,
+)
+
+logger = logging.getLogger(__name__)
+
+# HuggingFace repo for model + voice detection
+KOKORO_HF_REPO = "hexgrad/Kokoro-82M"
+KOKORO_SAMPLE_RATE = 24000
+
+# Default voice if none specified
+KOKORO_DEFAULT_VOICE = "af_heart"
+
+# All available Kokoro voices: (voice_id, display_name, gender, lang_code)
+KOKORO_VOICES = [
+    # American English female
+    ("af_alloy", "Alloy", "female", "en"),
+    ("af_aoede", "Aoede", "female", "en"),
+    ("af_bella", "Bella", "female", "en"),
+    ("af_heart", "Heart", "female", "en"),
+    ("af_jessica", "Jessica", "female", "en"),
+    ("af_kore", "Kore", "female", "en"),
+    ("af_nicole", "Nicole", "female", "en"),
+    ("af_nova", "Nova", "female", "en"),
+    ("af_river", "River", "female", "en"),
+    ("af_sarah", "Sarah", "female", "en"),
+    ("af_sky", "Sky", "female", "en"),
+    # American English male
+    ("am_adam", "Adam", "male", "en"),
+    ("am_echo", "Echo", "male", "en"),
+    ("am_eric", "Eric", "male", "en"),
+    ("am_fenrir", "Fenrir", "male", "en"),
+    ("am_liam", "Liam", "male", "en"),
+    ("am_michael", "Michael", "male", "en"),
+    ("am_onyx", "Onyx", "male", "en"),
+    ("am_puck", "Puck", "male", "en"),
+    ("am_santa", "Santa", "male", "en"),
+    # British English female
+    ("bf_alice", "Alice", "female", "en"),
+    ("bf_emma", "Emma", "female", "en"),
+    ("bf_isabella", "Isabella", "female", "en"),
+    ("bf_lily", "Lily", "female", "en"),
+    # British English male
+    ("bm_daniel", "Daniel", "male", "en"),
+    ("bm_fable", "Fable", "male", "en"),
+    ("bm_george", "George", "male", "en"),
+    ("bm_lewis", "Lewis", "male", "en"),
+    # Spanish
+    ("ef_dora", "Dora", "female", "es"),
+    ("em_alex", "Alex", "male", "es"),
+    ("em_santa", "Santa", "male", "es"),
+    # French
+    ("ff_siwis", "Siwis", "female", "fr"),
+    # Hindi
+    ("hf_alpha", "Alpha", "female", "hi"),
+    ("hf_beta", "Beta", "female", "hi"),
+    ("hm_omega", "Omega", "male", "hi"),
+    ("hm_psi", "Psi", "male", "hi"),
+    # Italian
+    ("if_sara", "Sara", "female", "it"),
+    ("im_nicola", "Nicola", "male", "it"),
+    # Japanese
+    ("jf_alpha", "Alpha", "female", "ja"),
+    ("jf_gongitsune", "Gongitsune", "female", "ja"),
+    ("jf_nezumi", "Nezumi", "female", "ja"),
+    ("jf_tebukuro", "Tebukuro", "female", "ja"),
+    ("jm_kumo", "Kumo", "male", "ja"),
+    # Portuguese
+    ("pf_dora", "Dora", "female", "pt"),
+    ("pm_alex", "Alex", "male", "pt"),
+    ("pm_santa", "Santa", "male", "pt"),
+    # Chinese
+    ("zf_xiaobei", "Xiaobei", "female", "zh"),
+    ("zf_xiaoni", "Xiaoni", "female", "zh"),
+    ("zf_xiaoxiao", "Xiaoxiao", "female", "zh"),
+    ("zf_xiaoyi", "Xiaoyi", "female", "zh"),
+]
+
+# Map our ISO language codes to Kokoro lang_code characters
+LANG_CODE_MAP = {
+    "en": "a",  # American English
+    "es": "e",
+    "fr": "f",
+    "hi": "h",
+    "it": "i",
+    "pt": "p",
+    "ja": "j",
+    "zh": "z",
+}
+
+
+class KokoroTTSBackend:
+    """Kokoro-82M TTS backend — tiny, fast, CPU-friendly."""
+
+    def __init__(self):
+        self._model = None
+        self._pipelines: dict = {}  # lang_code -> KPipeline
+        self._device: Optional[str] = None
+        self.model_size = "default"
+
+    def _get_device(self) -> str:
+        """Select device. Kokoro supports CUDA and CPU. MPS needs fallback env var."""
+        device = get_torch_device(allow_mps=False)
+        # Kokoro can use MPS but requires PYTORCH_ENABLE_MPS_FALLBACK=1
+        # For now, skip MPS to avoid user confusion — CPU is already realtime
+        return device
+
+    @property
+    def device(self) -> str:
+        if self._device is None:
+            self._device = self._get_device()
+        return self._device
+
+    def is_loaded(self) -> bool:
+        return self._model is not None
+
+    def _get_model_path(self, model_size: str) -> str:
+        return KOKORO_HF_REPO
+
+    def _is_model_cached(self, model_size: str = "default") -> bool:
+        """Check if Kokoro model files are cached locally."""
+        from .base import is_model_cached
+
+        return is_model_cached(
+            KOKORO_HF_REPO,
+            required_files=["config.json", "kokoro-v1_0.pth"],
+        )
+
+    async def load_model(self, model_size: str = "default") -> None:
+        """Load the Kokoro model."""
+        if self._model is not None:
+            return
+        await asyncio.to_thread(self._load_model_sync)
+
+    def _load_model_sync(self):
+        """Synchronous model loading."""
+        model_name = "kokoro"
+        is_cached = self._is_model_cached()
+
+        with model_load_progress(model_name, is_cached):
+            from kokoro import KModel
+
+            device = self.device
+            logger.info(f"Loading Kokoro-82M on {device}...")
+
+            self._model = KModel(repo_id=KOKORO_HF_REPO).to(device).eval()
+
+        logger.info("Kokoro-82M loaded successfully")
+
+    def _get_pipeline(self, lang_code: str):
+        """Get or create a KPipeline for the given language code."""
+        kokoro_lang = LANG_CODE_MAP.get(lang_code, "a")
+
+        if kokoro_lang not in self._pipelines:
+            from kokoro import KPipeline
+
+            # Create pipeline with our existing model (no redundant model loading)
+            self._pipelines[kokoro_lang] = KPipeline(
+                lang_code=kokoro_lang,
+                repo_id=KOKORO_HF_REPO,
+                model=self._model,
+            )
+
+        return self._pipelines[kokoro_lang]
+
+    def unload_model(self) -> None:
+        """Unload model to free memory."""
+        if self._model is not None:
+            del self._model
+            self._model = None
+            self._pipelines.clear()
+
+            import torch
+
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+
+            logger.info("Kokoro unloaded")
+
+    async def create_voice_prompt(
+        self,
+        audio_path: str,
+        reference_text: str,
+        use_cache: bool = True,
+    ) -> tuple[dict, bool]:
+        """
+        Create voice prompt for Kokoro.
+
+        Kokoro doesn't do traditional voice cloning from arbitrary audio.
+        When called for a cloned profile (fallback), uses the default voice.
+        For preset profiles, the voice_prompt dict is built by the profile
+        service and bypasses this method entirely.
+        """
+        return {
+            "voice_type": "preset",
+            "preset_engine": "kokoro",
+            "preset_voice_id": KOKORO_DEFAULT_VOICE,
+        }, False
+
+    async def combine_voice_prompts(
+        self,
+        audio_paths: list[str],
+        reference_texts: list[str],
+    ) -> tuple[np.ndarray, str]:
+        """Combine voice prompts — uses base implementation for audio concatenation."""
+        return await _combine_voice_prompts(
+            audio_paths, reference_texts, sample_rate=KOKORO_SAMPLE_RATE
+        )
+
+    async def generate(
+        self,
+        text: str,
+        voice_prompt: dict,
+        language: str = "en",
+        seed: Optional[int] = None,
+        instruct: Optional[str] = None,
+    ) -> tuple[np.ndarray, int]:
+        """
+        Generate audio from text using Kokoro.
+
+        Args:
+            text: Text to synthesize
+            voice_prompt: Dict with kokoro_voice key
+            language: Language code
+            seed: Random seed for reproducibility
+            instruct: Not supported by Kokoro (ignored)
+
+        Returns:
+            Tuple of (audio_array, sample_rate)
+        """
+        await self.load_model()
+
+        voice_name = voice_prompt.get("preset_voice_id") or voice_prompt.get("kokoro_voice") or KOKORO_DEFAULT_VOICE
+
+        def _generate_sync():
+            import torch
+
+            if seed is not None:
+                torch.manual_seed(seed)
+                if torch.cuda.is_available():
+                    torch.cuda.manual_seed(seed)
+
+            pipeline = self._get_pipeline(language)
+
+            # Generate all chunks and concatenate
+            audio_chunks = []
+            for result in pipeline(text, voice=voice_name, speed=1.0):
+                if result.audio is not None:
+                    chunk = result.audio
+                    if isinstance(chunk, torch.Tensor):
+                        chunk = chunk.detach().cpu().numpy()
+                    audio_chunks.append(chunk.squeeze())
+
+            if not audio_chunks:
+                # Return 1 second of silence as fallback
+                return np.zeros(KOKORO_SAMPLE_RATE, dtype=np.float32), KOKORO_SAMPLE_RATE
+
+            audio = np.concatenate(audio_chunks)
+            return audio.astype(np.float32), KOKORO_SAMPLE_RATE
+
+        return await asyncio.to_thread(_generate_sync)
diff --git a/backend/backends/qwen_custom_voice_backend.py b/backend/backends/qwen_custom_voice_backend.py
new file mode 100644
index 00000000..fbbf9f30
--- /dev/null
+++ b/backend/backends/qwen_custom_voice_backend.py
@@ -0,0 +1,210 @@
+"""
+Qwen3-TTS CustomVoice backend implementation.
+
+Wraps the Qwen3-TTS-12Hz CustomVoice model for preset-speaker TTS with
+instruction-based style control. Uses the same qwen_tts library as the
+Base model (pytorch_backend.py) but loads a different checkpoint and
+calls generate_custom_voice() instead of generate_voice_clone().
+
+Key differences from the Base engine:
+  - Uses preset speakers (9 built-in voices) instead of zero-shot cloning
+  - Supports instruct parameter for tone/emotion/prosody control
+  - Two model sizes: 1.7B and 0.6B
+
+Languages supported: zh, en, ja, ko, de, fr, ru, pt, es, it
+"""
+
+import asyncio
+import logging
+from typing import Optional
+
+import numpy as np
+import torch
+
+from . import TTSBackend, LANGUAGE_CODE_TO_NAME
+from .base import (
+    is_model_cached,
+    get_torch_device,
+    combine_voice_prompts as _combine_voice_prompts,
+    model_load_progress,
+)
+
+logger = logging.getLogger(__name__)
+
+# ── Preset speakers ──────────────────────────────────────────────────
+
+# (speaker_id, display_name, gender, native_language_code, description)
+QWEN_CUSTOM_VOICES = [
+    ("Vivian", "Vivian", "female", "zh", "Bright, slightly edgy young female voice"),
+    ("Serena", "Serena", "female", "zh", "Warm, gentle young female voice"),
+    ("Uncle_Fu", "Uncle Fu", "male", "zh", "Seasoned male voice with a low, mellow timbre"),
+    ("Dylan", "Dylan", "male", "zh", "Youthful Beijing male voice with a clear, natural timbre"),
+    ("Eric", "Eric", "male", "zh", "Lively Chengdu male voice with a slightly husky brightness"),
+    ("Ryan", "Ryan", "male", "en", "Dynamic male voice with strong rhythmic drive"),
+    ("Aiden", "Aiden", "male", "en", "Sunny American male voice with a clear midrange"),
+    ("Ono_Anna", "Ono Anna", "female", "ja", "Playful Japanese female voice with a light, nimble timbre"),
+    ("Sohee", "Sohee", "female", "ko", "Warm Korean female voice with rich emotion"),
+]
+
+QWEN_CV_DEFAULT_SPEAKER = "Ryan"
+
+# HuggingFace repo IDs per model size
+QWEN_CV_HF_REPOS = {
+    "1.7B": "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice",
+    "0.6B": "Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice",
+}
+
+
+class QwenCustomVoiceBackend:
+    """Qwen3-TTS CustomVoice backend — preset speakers with instruct control."""
+
+    def __init__(self, model_size: str = "1.7B"):
+        self.model = None
+        self.model_size = model_size
+        self.device = self._get_device()
+        self._current_model_size: Optional[str] = None
+
+    def _get_device(self) -> str:
+        return get_torch_device(allow_xpu=True, allow_directml=True)
+
+    def is_loaded(self) -> bool:
+        return self.model is not None
+
+    def _get_model_path(self, model_size: str) -> str:
+        if model_size not in QWEN_CV_HF_REPOS:
+            raise ValueError(f"Unknown model size: {model_size}")
+        return QWEN_CV_HF_REPOS[model_size]
+
+    def _is_model_cached(self, model_size: Optional[str] = None) -> bool:
+        size = model_size or self.model_size
+        return is_model_cached(self._get_model_path(size))
+
+    async def load_model_async(self, model_size: Optional[str] = None) -> None:
+        if model_size is None:
+            model_size = self.model_size
+
+        if self.model is not None and self._current_model_size == model_size:
+            return
+
+        if self.model is not None and self._current_model_size != model_size:
+            self.unload_model()
+
+        await asyncio.to_thread(self._load_model_sync, model_size)
+
+    # Alias for compatibility with the TTSBackend protocol
+    load_model = load_model_async
+
+    def _load_model_sync(self, model_size: str) -> None:
+        model_name = f"qwen-custom-voice-{model_size}"
+        is_cached = self._is_model_cached(model_size)
+
+        with model_load_progress(model_name, is_cached):
+            from qwen_tts import Qwen3TTSModel
+
+            model_path = self._get_model_path(model_size)
+            logger.info("Loading Qwen CustomVoice %s on %s...", model_size, self.device)
+
+            if self.device == "cpu":
+                self.model = Qwen3TTSModel.from_pretrained(
+                    model_path,
+                    torch_dtype=torch.float32,
+                    low_cpu_mem_usage=False,
+                )
+            else:
+                self.model = Qwen3TTSModel.from_pretrained(
+                    model_path,
+                    device_map=self.device,
+                    torch_dtype=torch.bfloat16,
+                )
+
+        self._current_model_size = model_size
+        self.model_size = model_size
+        logger.info("Qwen CustomVoice %s loaded successfully", model_size)
+
+    def unload_model(self) -> None:
+        if self.model is not None:
+            del self.model
+            self.model = None
+            self._current_model_size = None
+
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+
+            logger.info("Qwen CustomVoice unloaded")
+
+    async def create_voice_prompt(
+        self,
+        audio_path: str,
+        reference_text: str,
+        use_cache: bool = True,
+    ) -> tuple[dict, bool]:
+        """
+        Create voice prompt for CustomVoice.
+
+        CustomVoice doesn't use reference audio — it uses preset speakers.
+        When called for a cloned profile (fallback), uses the default speaker.
+        For preset profiles, the voice_prompt dict is built by the profile
+        service and bypasses this method entirely.
+        """
+        return {
+            "voice_type": "preset",
+            "preset_engine": "qwen_custom_voice",
+            "preset_voice_id": QWEN_CV_DEFAULT_SPEAKER,
+        }, False
+
+    async def combine_voice_prompts(
+        self,
+        audio_paths: list[str],
+        reference_texts: list[str],
+    ) -> tuple[np.ndarray, str]:
+        return await _combine_voice_prompts(audio_paths, reference_texts)
+
+    async def generate(
+        self,
+        text: str,
+        voice_prompt: dict,
+        language: str = "en",
+        seed: Optional[int] = None,
+        instruct: Optional[str] = None,
+    ) -> tuple[np.ndarray, int]:
+        """
+        Generate audio using Qwen CustomVoice.
+
+        Args:
+            text: Text to synthesize
+            voice_prompt: Dict with preset_voice_id (speaker name)
+            language: Language code (zh, en, ja, ko, etc.)
+            seed: Random seed for reproducibility
+            instruct: Natural language instruction for style control
+                      (e.g. "Speak in an angry tone", "Very happy")
+
+        Returns:
+            Tuple of (audio_array, sample_rate)
+        """
+        await self.load_model_async(None)
+
+        speaker = voice_prompt.get("preset_voice_id") or QWEN_CV_DEFAULT_SPEAKER
+
+        def _generate_sync():
+            if seed is not None:
+                torch.manual_seed(seed)
+                if torch.cuda.is_available():
+                    torch.cuda.manual_seed(seed)
+
+            lang_name = LANGUAGE_CODE_TO_NAME.get(language, "auto")
+
+            kwargs = {
+                "text": text,
+                "language": lang_name.capitalize() if lang_name != "auto" else "Auto",
+                "speaker": speaker,
+            }
+
+            # Only pass instruct if non-empty
+            if instruct:
+                kwargs["instruct"] = instruct
+
+            wavs, sample_rate = self.model.generate_custom_voice(**kwargs)
+            return wavs[0], sample_rate
+
+        audio, sample_rate = await asyncio.to_thread(_generate_sync)
+        return audio, sample_rate
diff --git a/backend/build_binary.py b/backend/build_binary.py
index 7655f331..43ad0719 100644
--- a/backend/build_binary.py
+++ b/backend/build_binary.py
@@ -86,6 +86,8 @@ def build_server(cuda=False):
             "--hidden-import",
             "backend.backends.pytorch_backend",
             "--hidden-import",
+            "backend.backends.qwen_custom_voice_backend",
+            "--hidden-import",
             "backend.utils.audio",
             "--hidden-import",
             "backend.utils.cache",
@@ -228,6 +230,44 @@ def build_server(cuda=False):
             "torchaudio",
             "--collect-submodules",
             "tada",
+            # Kokoro 82M — lightweight TTS engine using misaki G2P
+            "--hidden-import",
+            "backend.backends.kokoro_backend",
+            "--hidden-import",
+            "kokoro",
+            "--hidden-import",
+            "kokoro.pipeline",
+            "--hidden-import",
+            "kokoro.model",
+            "--hidden-import",
+            "kokoro.istftnet",
+            "--hidden-import",
+            "kokoro.modules",
+            "--hidden-import",
+            "kokoro.custom_stft",
+            # misaki ships G2P data files (dictionaries, phoneme tables)
+            # that must be bundled for espeak/en/ja/zh G2P to work
+            "--collect-all",
+            "misaki",
+            # language_tags ships JSON data files (index.json etc.) loaded at
+            # runtime via: misaki → phonemizer → segments → csvw → language_tags
+            "--collect-all",
+            "language_tags",
+            # espeakng_loader ships the entire espeak-ng-data directory (369 files)
+            # loaded at import time by misaki.espeak via get_data_path()
+            "--collect-all",
+            "espeakng_loader",
+            # spacy en_core_web_sm model — misaki.en tries to spacy.cli.download()
+            # at runtime if not found, which calls pip as a subprocess and crashes
+            # the frozen binary. Bundle the model so spacy.util.is_package() passes.
+            "--collect-all",
+            "en_core_web_sm",
+            "--copy-metadata",
+            "en_core_web_sm",
+            "--hidden-import",
+            "en_core_web_sm",
+            "--hidden-import",
+            "loguru",
         ]
     )
 
diff --git a/backend/config.py b/backend/config.py
index 0eb3cbf7..959731c6 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -19,7 +19,22 @@
     logger.info("Model download path set to: %s", _custom_models_dir)
 
 # Default data directory (used in development)
-_data_dir = Path("data")
+_data_dir = Path("data").resolve()
+
+
+def _path_relative_to_any_data_dir(path: Path) -> Path | None:
+    """Extract the path within a data dir from an absolute or relative path."""
+    parts = path.parts
+    for idx, part in enumerate(parts):
+        if part != "data":
+            continue
+
+        tail = parts[idx + 1 :]
+        if tail:
+            return Path(*tail)
+        return Path()
+
+    return None
 
 
 def set_data_dir(path: str | Path):
@@ -30,9 +45,9 @@ def set_data_dir(path: str | Path):
         path: Path to the data directory
     """
     global _data_dir
-    _data_dir = Path(path)
+    _data_dir = Path(path).resolve()
     _data_dir.mkdir(parents=True, exist_ok=True)
-    logger.info("Data directory set to: %s", _data_dir.absolute())
+    logger.info("Data directory set to: %s", _data_dir)
 
 
 def get_data_dir() -> Path:
@@ -45,6 +60,38 @@ def get_data_dir() -> Path:
     return _data_dir
 
 
+def to_storage_path(path: str | Path) -> str:
+    """Convert a filesystem path to a DB-safe path relative to the data dir."""
+    resolved_path = Path(path).resolve()
+
+    relative_to_any_data_dir = _path_relative_to_any_data_dir(resolved_path)
+    if relative_to_any_data_dir is not None:
+        return str(relative_to_any_data_dir)
+
+    try:
+        return str(resolved_path.relative_to(_data_dir))
+    except ValueError:
+        return str(resolved_path)
+
+
+def resolve_storage_path(path: str | Path | None) -> Path | None:
+    """Resolve a DB-stored path against the configured data dir."""
+    if path is None:
+        return None
+
+    stored_path = Path(path)
+    if stored_path.is_absolute():
+        rebased_path = _path_relative_to_any_data_dir(stored_path)
+        if rebased_path is not None:
+            candidate = (_data_dir / rebased_path).resolve()
+            if candidate.exists() or not stored_path.exists():
+                return candidate
+
+        return stored_path
+
+    return (_data_dir / stored_path).resolve()
+
+
 def get_db_path() -> Path:
     """Get database file path."""
     return _data_dir / "voicebox.db"
diff --git a/backend/database/migrations.py b/backend/database/migrations.py
index 52757a68..2bdd9282 100644
--- a/backend/database/migrations.py
+++ b/backend/database/migrations.py
@@ -34,6 +34,7 @@ def run_migrations(engine) -> None:
     _migrate_generations(engine, inspector, tables)
     _migrate_effect_presets(engine, inspector, tables)
     _migrate_generation_versions(engine, inspector, tables)
+    _normalize_storage_paths(engine, tables)
 
 
 # -- helpers ---------------------------------------------------------------
@@ -134,6 +135,17 @@ def _migrate_profiles(engine, inspector, tables: set[str]) -> None:
         _add_column(engine, "profiles", "avatar_path VARCHAR", "avatar_path")
     if "effects_chain" not in columns:
         _add_column(engine, "profiles", "effects_chain TEXT", "effects_chain")
+    # Voice type system — v0.3.x
+    if "voice_type" not in columns:
+        _add_column(engine, "profiles", "voice_type VARCHAR DEFAULT 'cloned'", "voice_type")
+    if "preset_engine" not in columns:
+        _add_column(engine, "profiles", "preset_engine VARCHAR", "preset_engine")
+    if "preset_voice_id" not in columns:
+        _add_column(engine, "profiles", "preset_voice_id VARCHAR", "preset_voice_id")
+    if "design_prompt" not in columns:
+        _add_column(engine, "profiles", "design_prompt TEXT", "design_prompt")
+    if "default_engine" not in columns:
+        _add_column(engine, "profiles", "default_engine VARCHAR", "default_engine")
 
 
 def _migrate_generations(engine, inspector, tables: set[str]) -> None:
@@ -168,3 +180,47 @@ def _migrate_generation_versions(engine, inspector, tables: set[str]) -> None:
     columns = _get_columns(inspector, "generation_versions")
     if "source_version_id" not in columns:
         _add_column(engine, "generation_versions", "source_version_id VARCHAR", "source_version_id")
+
+
+def _normalize_storage_paths(engine, tables: set[str]) -> None:
+    """Normalize stored file paths to be relative to the configured data dir."""
+    from pathlib import Path
+
+    from ..config import get_data_dir, to_storage_path, resolve_storage_path
+
+    data_dir = get_data_dir()
+
+    path_columns = [
+        ("generations", "audio_path"),
+        ("generation_versions", "audio_path"),
+        ("profile_samples", "audio_path"),
+        ("profiles", "avatar_path"),
+    ]
+
+    total_fixed = 0
+    with engine.connect() as conn:
+        for table, column in path_columns:
+            if table not in tables:
+                continue
+            rows = conn.execute(
+                text(f"SELECT id, {column} FROM {table} WHERE {column} IS NOT NULL")
+            ).fetchall()
+            for row_id, path_val in rows:
+                if not path_val:
+                    continue
+                p = Path(path_val)
+                resolved = resolve_storage_path(p)
+                if resolved is None:
+                    continue
+
+                normalized = to_storage_path(resolved)
+
+                if normalized != path_val:
+                    conn.execute(
+                        text(f"UPDATE {table} SET {column} = :path WHERE id = :id"),
+                        {"path": normalized, "id": row_id},
+                    )
+                    total_fixed += 1
+        if total_fixed > 0:
+            conn.commit()
+            logger.info("Normalized %d stored file paths", total_fixed)
diff --git a/backend/database/models.py b/backend/database/models.py
index 19cefff2..ca03d47e 100644
--- a/backend/database/models.py
+++ b/backend/database/models.py
@@ -10,7 +10,13 @@
 
 
 class VoiceProfile(Base):
-    """Voice profile."""
+    """Voice profile.
+
+    voice_type discriminates three flavours:
+      - "cloned"   — traditional reference-audio profiles (all cloning engines)
+      - "preset"   — engine-specific pre-built voice (e.g. Kokoro voices)
+      - "designed"  — text-described voice (e.g. Qwen CustomVoice, future)
+    """
 
     __tablename__ = "profiles"
 
@@ -20,6 +26,14 @@ class VoiceProfile(Base):
     language = Column(String, default="en")
     avatar_path = Column(String, nullable=True)
     effects_chain = Column(Text, nullable=True)
+
+    # Voice type system — added v0.3.x
+    voice_type = Column(String, default="cloned")  # "cloned" | "preset" | "designed"
+    preset_engine = Column(String, nullable=True)   # e.g. "kokoro" — only for preset
+    preset_voice_id = Column(String, nullable=True)  # e.g. "am_adam" — only for preset
+    design_prompt = Column(Text, nullable=True)      # text description — only for designed
+    default_engine = Column(String, nullable=True)   # auto-selected engine, locked for preset
+
     created_at = Column(DateTime, default=datetime.utcnow)
     updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
 
diff --git a/backend/database/seed.py b/backend/database/seed.py
index b62edc22..b09e0b8f 100644
--- a/backend/database/seed.py
+++ b/backend/database/seed.py
@@ -3,7 +3,8 @@
 import json
 import logging
 import uuid
-from pathlib import Path
+
+from .. import config
 
 logger = logging.getLogger(__name__)
 
@@ -25,7 +26,8 @@ def backfill_generation_versions(SessionLocal, Generation, GenerationVersion) ->
         for gen in generations:
             if gen.id in existing_version_gen_ids:
                 continue
-            if not Path(gen.audio_path).exists():
+            resolved_audio_path = config.resolve_storage_path(gen.audio_path)
+            if resolved_audio_path is None or not resolved_audio_path.exists():
                 continue
             version = GenerationVersion(
                 id=str(uuid.uuid4()),
diff --git a/backend/models.py b/backend/models.py
index 4dd2b368..f2f43d4b 100644
--- a/backend/models.py
+++ b/backend/models.py
@@ -15,6 +15,11 @@ class VoiceProfileCreate(BaseModel):
     language: str = Field(
         default="en", pattern="^(zh|en|ja|ko|de|fr|ru|pt|es|it|he|ar|da|el|fi|hi|ms|nl|no|pl|sv|sw|tr)$"
     )
+    voice_type: Optional[str] = Field(default="cloned", pattern="^(cloned|preset|designed)$")
+    preset_engine: Optional[str] = Field(None, max_length=50)
+    preset_voice_id: Optional[str] = Field(None, max_length=100)
+    design_prompt: Optional[str] = Field(None, max_length=2000)
+    default_engine: Optional[str] = Field(None, max_length=50)
 
 
 class VoiceProfileResponse(BaseModel):
@@ -26,6 +31,11 @@ class VoiceProfileResponse(BaseModel):
     language: str
     avatar_path: Optional[str] = None
     effects_chain: Optional[List["EffectConfig"]] = None
+    voice_type: str = "cloned"
+    preset_engine: Optional[str] = None
+    preset_voice_id: Optional[str] = None
+    design_prompt: Optional[str] = None
+    default_engine: Optional[str] = None
     generation_count: int = 0
     sample_count: int = 0
     created_at: datetime
@@ -68,7 +78,7 @@ class GenerationRequest(BaseModel):
     seed: Optional[int] = Field(None, ge=0)
     model_size: Optional[str] = Field(default="1.7B", pattern="^(1\\.7B|0\\.6B|1B|3B)$")
     instruct: Optional[str] = Field(None, max_length=500)
-    engine: Optional[str] = Field(default="qwen", pattern="^(qwen|luxtts|chatterbox|chatterbox_turbo|tada)$")
+    engine: Optional[str] = Field(default="qwen", pattern="^(qwen|qwen_custom_voice|luxtts|chatterbox|chatterbox_turbo|tada|kokoro)$")
     max_chunk_chars: int = Field(
         default=800, ge=100, le=5000, description="Max characters per chunk for long text splitting"
     )
diff --git a/backend/requirements.txt b/backend/requirements.txt
index ea4a2dc0..e916b1d2 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -40,6 +40,13 @@ pyloudnorm
 # provides the only class TADA uses: Snake1d.)
 torchaudio
 
+# Kokoro TTS (lightweight 82M-param engine)
+kokoro>=0.9.4
+misaki[en,ja,zh]>=0.9.4
+# spacy model for misaki English G2P — must be pre-installed or misaki
+# tries spacy.cli.download() at runtime which crashes frozen builds
+en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
+
 # Audio processing
 librosa>=0.10.0
 soundfile>=0.12.0
diff --git a/backend/routes/audio.py b/backend/routes/audio.py
index 682d7aae..f80a44d5 100644
--- a/backend/routes/audio.py
+++ b/backend/routes/audio.py
@@ -1,12 +1,10 @@
 """Audio file serving endpoints."""
 
-from pathlib import Path
-
 from fastapi import APIRouter, Depends, HTTPException
 from fastapi.responses import FileResponse
 from sqlalchemy.orm import Session
 
-from .. import models
+from .. import config, models
 from ..services import history
 from ..database import get_db
 
@@ -22,8 +20,8 @@ async def get_version_audio(version_id: str, db: Session = Depends(get_db)):
     if not version:
         raise HTTPException(status_code=404, detail="Version not found")
 
-    audio_path = Path(version.audio_path)
-    if not audio_path.exists():
+    audio_path = config.resolve_storage_path(version.audio_path)
+    if audio_path is None or not audio_path.exists():
         raise HTTPException(status_code=404, detail="Audio file not found")
 
     return FileResponse(
@@ -40,8 +38,8 @@ async def get_audio(generation_id: str, db: Session = Depends(get_db)):
     if not generation:
         raise HTTPException(status_code=404, detail="Generation not found")
 
-    audio_path = Path(generation.audio_path)
-    if not audio_path.exists():
+    audio_path = config.resolve_storage_path(generation.audio_path)
+    if audio_path is None or not audio_path.exists():
         raise HTTPException(status_code=404, detail="Audio file not found")
 
     return FileResponse(
@@ -60,8 +58,8 @@ async def get_sample_audio(sample_id: str, db: Session = Depends(get_db)):
     if not sample:
         raise HTTPException(status_code=404, detail="Sample not found")
 
-    audio_path = Path(sample.audio_path)
-    if not audio_path.exists():
+    audio_path = config.resolve_storage_path(sample.audio_path)
+    if audio_path is None or not audio_path.exists():
         raise HTTPException(status_code=404, detail="Audio file not found")
 
     return FileResponse(
diff --git a/backend/routes/effects.py b/backend/routes/effects.py
index 8139176d..52bbc8fd 100644
--- a/backend/routes/effects.py
+++ b/backend/routes/effects.py
@@ -3,7 +3,6 @@
 import asyncio
 import io
 import uuid
-from pathlib import Path
 
 from fastapi import APIRouter, Depends, HTTPException
 from fastapi.responses import StreamingResponse
@@ -41,10 +40,11 @@ async def preview_effects(
     all_versions = versions_mod.list_versions(generation_id, db)
     clean_version = next((v for v in all_versions if v.effects_chain is None), None)
     source_path = clean_version.audio_path if clean_version else gen.audio_path
-    if not source_path or not Path(source_path).exists():
+    resolved_source_path = config.resolve_storage_path(source_path)
+    if resolved_source_path is None or not resolved_source_path.exists():
         raise HTTPException(status_code=404, detail="Source audio file not found")
 
-    audio, sample_rate = await asyncio.to_thread(load_audio, source_path)
+    audio, sample_rate = await asyncio.to_thread(load_audio, str(resolved_source_path))
     processed = await asyncio.to_thread(apply_effects, audio, sample_rate, chain_dicts)
 
     import soundfile as sf
@@ -193,10 +193,11 @@ async def apply_effects_to_generation(
             source_path = clean_version.audio_path
             source_version_id = clean_version.id
 
-    if not source_path or not Path(source_path).exists():
+    resolved_source_path = config.resolve_storage_path(source_path)
+    if resolved_source_path is None or not resolved_source_path.exists():
         raise HTTPException(status_code=404, detail="Source audio file not found")
 
-    audio, sample_rate = await asyncio.to_thread(load_audio, source_path)
+    audio, sample_rate = await asyncio.to_thread(load_audio, str(resolved_source_path))
     processed_audio = await asyncio.to_thread(apply_effects, audio, sample_rate, chain_dicts)
 
     version_id = str(uuid.uuid4())
@@ -208,7 +209,7 @@ async def apply_effects_to_generation(
     version = versions_mod.create_version(
         generation_id=generation_id,
         label=label,
-        audio_path=str(processed_path),
+        audio_path=config.to_storage_path(processed_path),
         db=db,
         effects_chain=chain_dicts,
         is_default=data.set_as_default,
diff --git a/backend/routes/generations.py b/backend/routes/generations.py
index 8541b659..2af3832e 100644
--- a/backend/routes/generations.py
+++ b/backend/routes/generations.py
@@ -20,6 +20,10 @@
 router = APIRouter()
 
 
+def _resolve_generation_engine(data: models.GenerationRequest, profile) -> str:
+    return data.engine or getattr(profile, "default_engine", None) or getattr(profile, "preset_engine", None) or "qwen"
+
+
 @router.post("/generate", response_model=models.GenerationResponse)
 async def generate_speech(
     data: models.GenerationRequest,
@@ -35,7 +39,12 @@ async def generate_speech(
 
     from ..backends import engine_has_model_sizes
 
-    engine = data.engine or "qwen"
+    engine = _resolve_generation_engine(data, profile)
+    try:
+        profiles.validate_profile_engine(profile, engine)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
     model_size = (data.model_size or "1.7B") if engine_has_model_sizes(engine) else None
 
     generation = await history.create_generation(
@@ -230,7 +239,11 @@ async def stream_speech(
     if not profile:
         raise HTTPException(status_code=404, detail="Profile not found")
 
-    engine = data.engine or "qwen"
+    engine = _resolve_generation_engine(data, profile)
+    try:
+        profiles.validate_profile_engine(profile, engine)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
     tts_model = get_tts_backend_for_engine(engine)
     model_size = data.model_size or "1.7B"
 
@@ -263,6 +276,22 @@ async def stream_speech(
         trim_fn=trim_fn,
     )
 
+    effects_chain_config = None
+    if data.effects_chain is not None:
+        effects_chain_config = [e.model_dump() for e in data.effects_chain]
+    elif profile.effects_chain:
+        import json as _json
+
+        try:
+            effects_chain_config = _json.loads(profile.effects_chain)
+        except Exception:
+            effects_chain_config = None
+
+    if effects_chain_config:
+        from ..utils.effects import apply_effects
+
+        audio = apply_effects(audio, sample_rate, effects_chain_config)
+
     if data.normalize:
         from ..utils.audio import normalize_audio
 
diff --git a/backend/routes/history.py b/backend/routes/history.py
index 5435e299..f8233e3c 100644
--- a/backend/routes/history.py
+++ b/backend/routes/history.py
@@ -1,13 +1,12 @@
 """Generation history endpoints."""
 
 import io
-from pathlib import Path
 
 from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
 from fastapi.responses import FileResponse, StreamingResponse
 from sqlalchemy.orm import Session
 
-from .. import models
+from .. import config, models
 from ..services import export_import, history
 from ..app import safe_content_disposition
 from ..database import Generation as DBGeneration, VoiceProfile as DBVoiceProfile, get_db
@@ -162,8 +161,8 @@ async def export_generation_audio(
     if not generation.audio_path:
         raise HTTPException(status_code=404, detail="Generation has no audio file")
 
-    audio_path = Path(generation.audio_path)
-    if not audio_path.is_file():
+    audio_path = config.resolve_storage_path(generation.audio_path)
+    if audio_path is None or not audio_path.is_file():
         raise HTTPException(status_code=404, detail="Audio file not found")
 
     safe_text = "".join(c for c in generation.text[:30] if c.isalnum() or c in (" ", "-", "_")).strip()
diff --git a/backend/routes/profiles.py b/backend/routes/profiles.py
index 5b2257e0..7bc075c5 100644
--- a/backend/routes/profiles.py
+++ b/backend/routes/profiles.py
@@ -1,8 +1,9 @@
 """Voice profile endpoints."""
 
 import io
+import json as _json
+import logging
 import tempfile
-from datetime import datetime
 from pathlib import Path
 
 from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
@@ -15,6 +16,8 @@
 from ..services import channels, export_import, profiles
 from ..services.profiles import _profile_to_response
 
+logger = logging.getLogger(__name__)
+
 router = APIRouter()
 
 
@@ -62,6 +65,46 @@ async def import_profile(
         raise HTTPException(status_code=500, detail=str(e))
 
 
+# ── Preset Voice Endpoints ───────────────────────────────────────────
+# These MUST be declared before /profiles/{profile_id} to avoid the
+# wildcard swallowing "presets" as a profile_id.
+
+
+@router.get("/profiles/presets/{engine}")
+async def list_preset_voices(engine: str):
+    """List available preset voices for an engine."""
+    if engine == "kokoro":
+        from ..backends.kokoro_backend import KOKORO_VOICES
+
+        return {
+            "engine": engine,
+            "voices": [
+                {
+                    "voice_id": vid,
+                    "name": name,
+                    "gender": gender,
+                    "language": lang,
+                }
+                for vid, name, gender, lang in KOKORO_VOICES
+            ],
+        }
+    if engine == "qwen_custom_voice":
+        from ..backends.qwen_custom_voice_backend import QWEN_CUSTOM_VOICES
+
+        return {
+            "engine": engine,
+            "voices": [
+                {
+                    "voice_id": speaker_id,
+                    "name": display_name,
+                    "gender": gender,
+                    "language": lang,
+                }
+                for speaker_id, display_name, gender, lang, _desc in QWEN_CUSTOM_VOICES
+            ],
+        }
+    return {"engine": engine, "voices": []}
+
 @router.get("/profiles/{profile_id}", response_model=models.VoiceProfileResponse)
 async def get_profile(
     profile_id: str,
@@ -215,8 +258,8 @@ async def get_profile_avatar(
     if not profile.avatar_path:
         raise HTTPException(status_code=404, detail="No avatar found for this profile")
 
-    avatar_path = Path(profile.avatar_path)
-    if not avatar_path.exists():
+    avatar_path = config.resolve_storage_path(profile.avatar_path)
+    if avatar_path is None or not avatar_path.exists():
         raise HTTPException(status_code=404, detail="Avatar file not found")
 
     return FileResponse(avatar_path)
@@ -297,8 +340,6 @@ async def update_profile_effects(
     db: Session = Depends(get_db),
 ):
     """Set or clear the default effects chain for a voice profile."""
-    import json as _json
-
     profile = db.query(DBVoiceProfile).filter_by(id=profile_id).first()
     if not profile:
         raise HTTPException(status_code=404, detail="Profile not found")
diff --git a/backend/services/export_import.py b/backend/services/export_import.py
index 93252f50..514eaacd 100644
--- a/backend/services/export_import.py
+++ b/backend/services/export_import.py
@@ -73,8 +73,8 @@ def export_profile_to_zip(profile_id: str, db: Session) -> bytes:
         # Check if profile has avatar
         has_avatar = False
         if profile.avatar_path:
-            avatar_path = Path(profile.avatar_path)
-            if avatar_path.exists():
+            avatar_path = config.resolve_storage_path(profile.avatar_path)
+            if avatar_path is not None and avatar_path.exists():
                 has_avatar = True
                 # Add avatar to ZIP root with original extension
                 avatar_ext = avatar_path.suffix
@@ -98,7 +98,9 @@ def export_profile_to_zip(profile_id: str, db: Session) -> bytes:
 
         for sample in samples:
             # Get filename from audio_path (should be {sample_id}.wav)
-            audio_path = Path(sample.audio_path)
+            audio_path = config.resolve_storage_path(sample.audio_path)
+            if audio_path is None:
+                raise ValueError(f"Audio file not found: {sample.audio_path}")
             filename = audio_path.name
 
             # Read audio file
@@ -279,7 +281,7 @@ def export_generation_to_zip(generation_id: str, db: Session) -> bytes:
         # Build version manifest entries
         version_entries = []
         for v in versions:
-            v_path = Path(v.audio_path)
+            v_path = config.resolve_storage_path(v.audio_path)
             effects_chain = None
             if v.effects_chain:
                 effects_chain = json.loads(v.effects_chain)
@@ -314,14 +316,14 @@ def export_generation_to_zip(generation_id: str, db: Session) -> bytes:
         
         # Add all version audio files
         for v in versions:
-            v_path = Path(v.audio_path)
-            if v_path.exists():
+            v_path = config.resolve_storage_path(v.audio_path)
+            if v_path is not None and v_path.exists():
                 zip_file.write(v_path, f"audio/{v_path.name}")
 
         # Fallback: if no versions exist, include the generation's main audio
         if not versions:
-            audio_path = Path(generation.audio_path)
-            if audio_path.exists():
+            audio_path = config.resolve_storage_path(generation.audio_path)
+            if audio_path is not None and audio_path.exists():
                 zip_file.write(audio_path, f"audio/{audio_path.name}")
     
     zip_buffer.seek(0)
@@ -426,7 +428,7 @@ async def import_generation_from_zip(file_bytes: bytes, db: Session) -> dict:
                     profile_id=profile_id,
                     text=generation_data["text"],
                     language=generation_data["language"],
-                    audio_path=str(audio_dest),
+                    audio_path=config.to_storage_path(audio_dest),
                     duration=generation_data["duration"],
                     seed=generation_data.get("seed"),
                     instruct=generation_data.get("instruct"),
diff --git a/backend/services/generation.py b/backend/services/generation.py
index d8d5214d..a70e633e 100644
--- a/backend/services/generation.py
+++ b/backend/services/generation.py
@@ -163,7 +163,7 @@ def _save_generate(
     versions_mod.create_version(
         generation_id=generation_id,
         label="original",
-        audio_path=str(clean_audio_path),
+        audio_path=config.to_storage_path(clean_audio_path),
         db=db,
         effects_chain=None,
         is_default=not has_effects,
@@ -174,6 +174,8 @@ def _save_generate(
     if has_effects:
         from ..utils.effects import apply_effects, validate_effects_chain
 
+        assert effects_chain is not None
+
         error_msg = validate_effects_chain(effects_chain)
         if error_msg:
             import logging
@@ -189,13 +191,13 @@ def _save_generate(
             versions_mod.create_version(
                 generation_id=generation_id,
                 label="version-2",
-                audio_path=str(processed_path),
+                audio_path=config.to_storage_path(processed_path),
                 db=db,
                 effects_chain=effects_chain,
                 is_default=True,
             )
 
-    return final_audio_path
+    return config.to_storage_path(final_audio_path)
 
 
 def _save_retry(
@@ -211,7 +213,7 @@ def _save_retry(
     """
     audio_path = config.get_generations_dir() / f"{generation_id}.wav"
     save_audio(audio, str(audio_path), sample_rate)
-    return str(audio_path)
+    return config.to_storage_path(audio_path)
 
 
 def _save_regenerate(
@@ -244,10 +246,10 @@ def _save_regenerate(
     versions_mod.create_version(
         generation_id=generation_id,
         label=label,
-        audio_path=str(audio_path),
+        audio_path=config.to_storage_path(audio_path),
         db=db,
         effects_chain=None,
         is_default=True,
     )
 
-    return str(audio_path)
+    return config.to_storage_path(audio_path)
diff --git a/backend/services/history.py b/backend/services/history.py
index 8f45d48f..473c4b37 100644
--- a/backend/services/history.py
+++ b/backend/services/history.py
@@ -253,8 +253,8 @@ async def delete_generation(
 
     # Delete main audio file (if not already removed by version cleanup)
     if generation.audio_path:
-        audio_path = Path(generation.audio_path)
-        if audio_path.exists():
+        audio_path = config.resolve_storage_path(generation.audio_path)
+        if audio_path is not None and audio_path.exists():
             audio_path.unlink()
 
     # Delete from database
@@ -283,8 +283,8 @@ async def delete_generations_by_profile(
     count = 0
     for generation in generations:
         # Delete audio file
-        audio_path = Path(generation.audio_path)
-        if audio_path.exists():
+        audio_path = config.resolve_storage_path(generation.audio_path)
+        if audio_path is not None and audio_path.exists():
             audio_path.unlink()
         
         # Delete from database
diff --git a/backend/services/profiles.py b/backend/services/profiles.py
index d20c142d..78839504 100644
--- a/backend/services/profiles.py
+++ b/backend/services/profiles.py
@@ -1,33 +1,30 @@
-"""
-Voice profile management module.
-"""
+"""Voice profile management module."""
 
-from typing import List, Optional
-from datetime import datetime
-import uuid
+import json as _json
+import logging
 import shutil
+import uuid
+from datetime import datetime
 from pathlib import Path
+
+from sqlalchemy import func
 from sqlalchemy.orm import Session
-from sqlalchemy import func, select
 
+from .. import config
+from ..database import Generation as DBGeneration, ProfileSample as DBProfileSample, VoiceProfile as DBVoiceProfile
 from ..models import (
+    EffectConfig,
+    ProfileSampleResponse,
     VoiceProfileCreate,
     VoiceProfileResponse,
-    ProfileSampleCreate,
-    ProfileSampleResponse,
 )
-from ..database import (
-    VoiceProfile as DBVoiceProfile,
-    ProfileSample as DBProfileSample,
-    Generation as DBGeneration,
-)
-from ..models import EffectConfig
-from ..utils.audio import validate_reference_audio, validate_and_load_reference_audio, load_audio, save_audio
-from ..utils.images import validate_image, process_avatar
+from ..utils.audio import save_audio, validate_and_load_reference_audio
 from ..utils.cache import _get_cache_dir, clear_profile_cache
-from .tts import get_tts_model
-from .. import config
-import json as _json
+from ..utils.images import process_avatar, validate_image
+
+logger = logging.getLogger(__name__)
+
+CLONING_ENGINES = {"qwen", "luxtts", "chatterbox", "chatterbox_turbo", "tada"}
 
 
 def _profile_to_response(
@@ -52,6 +49,11 @@ def _profile_to_response(
         language=profile.language,
         avatar_path=profile.avatar_path,
         effects_chain=effects_chain,
+        voice_type=getattr(profile, "voice_type", None) or "cloned",
+        preset_engine=getattr(profile, "preset_engine", None),
+        preset_voice_id=getattr(profile, "preset_voice_id", None),
+        design_prompt=getattr(profile, "design_prompt", None),
+        default_engine=getattr(profile, "default_engine", None),
         generation_count=generation_count,
         sample_count=sample_count,
         created_at=profile.created_at,
@@ -59,6 +61,79 @@ def _profile_to_response(
     )
 
 
+def _get_preset_voice_ids(engine: str) -> set[str]:
+    if engine == "kokoro":
+        from ..backends.kokoro_backend import KOKORO_VOICES
+
+        return {voice_id for voice_id, _name, _gender, _lang in KOKORO_VOICES}
+
+    if engine == "qwen_custom_voice":
+        from ..backends.qwen_custom_voice_backend import QWEN_CUSTOM_VOICES
+
+        return {voice_id for voice_id, _name, _gender, _lang, _desc in QWEN_CUSTOM_VOICES}
+
+    return set()
+
+
+def _validate_profile_fields(
+    *,
+    voice_type: str,
+    preset_engine: str | None,
+    preset_voice_id: str | None,
+    design_prompt: str | None,
+    default_engine: str | None,
+) -> str | None:
+    if voice_type == "preset":
+        if not preset_engine or not preset_voice_id:
+            return "Preset profiles require both preset_engine and preset_voice_id"
+        if default_engine and default_engine != preset_engine:
+            return "Preset profiles must use their preset_engine as default_engine"
+
+        available_voice_ids = _get_preset_voice_ids(preset_engine)
+        if available_voice_ids and preset_voice_id not in available_voice_ids:
+            return f"Preset voice '{preset_voice_id}' is not valid for engine '{preset_engine}'"
+        return None
+
+    if voice_type == "designed":
+        if not design_prompt or not design_prompt.strip():
+            return "Designed profiles require a design_prompt"
+        if preset_engine or preset_voice_id:
+            return "Designed profiles cannot set preset_engine or preset_voice_id"
+        return None
+
+    if preset_engine or preset_voice_id:
+        return "Cloned profiles cannot set preset_engine or preset_voice_id"
+    if design_prompt:
+        return "Cloned profiles cannot set design_prompt"
+    if default_engine and default_engine not in CLONING_ENGINES:
+        return f"Cloned profiles cannot use default engine '{default_engine}'"
+    return None
+
+
+def validate_profile_engine(profile, engine: str) -> None:
+    voice_type = getattr(profile, "voice_type", None) or "cloned"
+
+    if voice_type == "preset":
+        preset_engine = getattr(profile, "preset_engine", None)
+        preset_voice_id = getattr(profile, "preset_voice_id", None)
+        if not preset_engine or not preset_voice_id:
+            raise ValueError(f"Preset profile {profile.id} is missing preset engine metadata")
+        if preset_engine != engine:
+            raise ValueError(
+                f"Preset profile {profile.id} only supports engine '{preset_engine}', not '{engine}'"
+            )
+        return
+
+    if voice_type == "designed":
+        design_prompt = getattr(profile, "design_prompt", None)
+        if not design_prompt or not design_prompt.strip():
+            raise ValueError(f"Designed profile {profile.id} is missing design_prompt")
+        return
+
+    if engine not in CLONING_ENGINES:
+        raise ValueError(f"Engine '{engine}' does not support cloned voice profiles")
+
+
 async def create_profile(
     data: VoiceProfileCreate,
     db: Session,
@@ -80,11 +155,32 @@ async def create_profile(
     if existing_profile:
         raise ValueError(f"A profile with the name '{data.name}' already exists. Please choose a different name.")
 
+    # Auto-set default_engine for preset profiles
+    default_engine = data.default_engine
+    voice_type = data.voice_type or "cloned"
+    if voice_type == "preset" and data.preset_engine and not default_engine:
+        default_engine = data.preset_engine
+
+    validation_error = _validate_profile_fields(
+        voice_type=voice_type,
+        preset_engine=data.preset_engine,
+        preset_voice_id=data.preset_voice_id,
+        design_prompt=data.design_prompt,
+        default_engine=default_engine,
+    )
+    if validation_error:
+        raise ValueError(validation_error)
+
     db_profile = DBVoiceProfile(
         id=str(uuid.uuid4()),
         name=data.name,
         description=data.description,
         language=data.language,
+        voice_type=voice_type,
+        preset_engine=data.preset_engine,
+        preset_voice_id=data.preset_voice_id,
+        design_prompt=data.design_prompt,
+        default_engine=default_engine,
         created_at=datetime.utcnow(),
         updated_at=datetime.utcnow(),
     )
@@ -140,7 +236,7 @@ async def add_profile_sample(
     db_sample = DBProfileSample(
         id=sample_id,
         profile_id=profile_id,
-        audio_path=str(dest_path),
+        audio_path=config.to_storage_path(dest_path),
         reference_text=reference_text,
     )
 
@@ -161,7 +257,7 @@ async def add_profile_sample(
 async def get_profile(
     profile_id: str,
     db: Session,
-) -> Optional[VoiceProfileResponse]:
+) -> VoiceProfileResponse | None:
     """
     Get a voice profile by ID.
 
@@ -182,7 +278,7 @@ async def get_profile(
 async def get_profile_samples(
     profile_id: str,
     db: Session,
-) -> List[ProfileSampleResponse]:
+) -> list[ProfileSampleResponse]:
     """
     Get all samples for a profile.
 
@@ -197,7 +293,7 @@ async def get_profile_samples(
     return [ProfileSampleResponse.model_validate(s) for s in samples]
 
 
-async def list_profiles(db: Session) -> List[VoiceProfileResponse]:
+async def list_profiles(db: Session) -> list[VoiceProfileResponse]:
     """
     List all voice profiles with generation and sample counts.
 
@@ -238,7 +334,7 @@ async def update_profile(
     profile_id: str,
     data: VoiceProfileCreate,
     db: Session,
-) -> Optional[VoiceProfileResponse]:
+) -> VoiceProfileResponse | None:
     """
     Update a voice profile.
 
@@ -262,9 +358,27 @@ async def update_profile(
         if existing_profile:
             raise ValueError(f"A profile with the name '{data.name}' already exists. Please choose a different name.")
 
+    voice_type = getattr(profile, "voice_type", None) or "cloned"
+    preset_engine = getattr(profile, "preset_engine", None)
+    preset_voice_id = getattr(profile, "preset_voice_id", None)
+    design_prompt = getattr(profile, "design_prompt", None)
+    default_engine = data.default_engine if data.default_engine is not None else getattr(profile, "default_engine", None)
+
+    validation_error = _validate_profile_fields(
+        voice_type=voice_type,
+        preset_engine=preset_engine,
+        preset_voice_id=preset_voice_id,
+        design_prompt=design_prompt,
+        default_engine=default_engine,
+    )
+    if validation_error:
+        raise ValueError(validation_error)
+
     profile.name = data.name
     profile.description = data.description
     profile.language = data.language
+    if data.default_engine is not None:
+        profile.default_engine = data.default_engine or None  # empty string → NULL
     profile.updated_at = datetime.utcnow()
 
     db.commit()
@@ -327,8 +441,8 @@ async def delete_profile_sample(
     # Store profile_id before deleting
     profile_id = sample.profile_id
 
-    audio_path = Path(sample.audio_path)
-    if audio_path.exists():
+    audio_path = config.resolve_storage_path(sample.audio_path)
+    if audio_path is not None and audio_path.exists():
         audio_path.unlink()
 
     db.delete(sample)
@@ -345,7 +459,7 @@ async def update_profile_sample(
     sample_id: str,
     reference_text: str,
     db: Session,
-) -> Optional[ProfileSampleResponse]:
+) -> ProfileSampleResponse | None:
     """
     Update a profile sample's reference text.
 
@@ -382,19 +496,57 @@ async def create_voice_prompt_for_profile(
     engine: str = "qwen",
 ) -> dict:
     """
-    Create a combined voice prompt from all samples in a profile.
+    Create a voice prompt from a profile.
+
+    For cloned profiles: combines all audio samples into a voice prompt.
+    For preset profiles: returns the engine-specific preset voice reference.
+    For designed profiles: returns the text design prompt (future).
 
     Args:
         profile_id: Profile ID
         db: Database session
         use_cache: Whether to use cached prompts
-        engine: TTS engine to create prompt for ("qwen" or "luxtts")
+        engine: TTS engine to create prompt for
 
     Returns:
         Voice prompt dictionary
     """
     from ..backends import get_tts_backend_for_engine
 
+    profile = db.query(DBVoiceProfile).filter_by(id=profile_id).first()
+    if not profile:
+        raise ValueError(f"Profile not found: {profile_id}")
+
+    voice_type = getattr(profile, "voice_type", None) or "cloned"
+    validate_profile_engine(profile, engine)
+
+    # ── Preset profiles: return engine-specific voice reference ──
+    if voice_type == "preset":
+        if not profile.preset_engine or not profile.preset_voice_id:
+            raise ValueError(f"Preset profile {profile_id} is missing preset engine metadata")
+        if profile.preset_engine != engine:
+            raise ValueError(
+                f"Preset profile {profile_id} only supports engine '{profile.preset_engine}', not '{engine}'"
+            )
+        return {
+            "voice_type": "preset",
+            "preset_engine": profile.preset_engine,
+            "preset_voice_id": profile.preset_voice_id,
+        }
+
+    # ── Designed profiles: return text description (future) ──
+    if voice_type == "designed":
+        if not profile.design_prompt or not profile.design_prompt.strip():
+            raise ValueError(f"Designed profile {profile_id} is missing design_prompt")
+        return {
+            "voice_type": "designed",
+            "design_prompt": profile.design_prompt,
+        }
+
+    if engine not in CLONING_ENGINES:
+        raise ValueError(f"Engine '{engine}' does not support cloned voice profiles")
+
+    # ── Cloned profiles: create from audio samples ──
     samples = db.query(DBProfileSample).filter_by(profile_id=profile_id).all()
 
     if not samples:
@@ -404,40 +556,48 @@ async def create_voice_prompt_for_profile(
 
     if len(samples) == 1:
         sample = samples[0]
+        sample_audio_path = config.resolve_storage_path(sample.audio_path)
+        if sample_audio_path is None:
+            raise ValueError(f"Sample audio not found for profile {profile_id}")
         voice_prompt, _ = await tts_model.create_voice_prompt(
-            sample.audio_path,
+            str(sample_audio_path),
             sample.reference_text,
             use_cache=use_cache,
         )
         return voice_prompt
-    else:
-        audio_paths = [s.audio_path for s in samples]
-        reference_texts = [s.reference_text for s in samples]
 
-        combined_audio, combined_text = await tts_model.combine_voice_prompts(
-            audio_paths,
-            reference_texts,
-        )
+    audio_paths = []
+    for sample in samples:
+        sample_audio_path = config.resolve_storage_path(sample.audio_path)
+        if sample_audio_path is None:
+            raise ValueError(f"Sample audio not found for profile {profile_id}")
+        audio_paths.append(str(sample_audio_path))
+    reference_texts = [s.reference_text for s in samples]
+
+    combined_audio, combined_text = await tts_model.combine_voice_prompts(
+        audio_paths,
+        reference_texts,
+    )
 
-        # Save combined audio to cache directory (persistent)
-        # Create a hash of sample IDs to identify this specific combination
-        import hashlib
+    # Save combined audio to cache directory (persistent)
+    # Create a hash of sample IDs to identify this specific combination
+    import hashlib
 
-        sample_ids_str = "-".join(sorted([s.id for s in samples]))
-        combination_hash = hashlib.md5(sample_ids_str.encode()).hexdigest()[:12]
+    sample_ids_str = "-".join(sorted([s.id for s in samples]))
+    combination_hash = hashlib.md5(sample_ids_str.encode()).hexdigest()[:12]
 
-        cache_dir = _get_cache_dir()
-        cache_dir.mkdir(parents=True, exist_ok=True)
-        combined_path = cache_dir / f"combined_{profile_id}_{combination_hash}.wav"
+    cache_dir = _get_cache_dir()
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    combined_path = cache_dir / f"combined_{profile_id}_{combination_hash}.wav"
 
-        save_audio(combined_audio, str(combined_path), 24000)
+    save_audio(combined_audio, str(combined_path), 24000)
 
-        voice_prompt, _ = await tts_model.create_voice_prompt(
-            str(combined_path),
-            combined_text,
-            use_cache=use_cache,
-        )
-        return voice_prompt
+    voice_prompt, _ = await tts_model.create_voice_prompt(
+        str(combined_path),
+        combined_text,
+        use_cache=use_cache,
+    )
+    return voice_prompt
 
 
 async def upload_avatar(
@@ -465,8 +625,8 @@ async def upload_avatar(
         raise ValueError(error_msg)
 
     if profile.avatar_path:
-        old_avatar = Path(profile.avatar_path)
-        if old_avatar.exists():
+        old_avatar = config.resolve_storage_path(profile.avatar_path)
+        if old_avatar is not None and old_avatar.exists():
             old_avatar.unlink()
 
     # Determine file extension from uploaded file
@@ -487,7 +647,7 @@ async def upload_avatar(
 
     process_avatar(image_path, str(output_path))
 
-    profile.avatar_path = str(output_path)
+    profile.avatar_path = config.to_storage_path(output_path)
     profile.updated_at = datetime.utcnow()
 
     db.commit()
@@ -514,8 +674,8 @@ async def delete_avatar(
     if not profile or not profile.avatar_path:
         return False
 
-    avatar_path = Path(profile.avatar_path)
-    if avatar_path.exists():
+    avatar_path = config.resolve_storage_path(profile.avatar_path)
+    if avatar_path is not None and avatar_path.exists():
         avatar_path.unlink()
 
     profile.avatar_path = None
diff --git a/backend/services/stories.py b/backend/services/stories.py
index ac8e22bd..611ab29d 100644
--- a/backend/services/stories.py
+++ b/backend/services/stories.py
@@ -10,6 +10,7 @@
 from sqlalchemy.orm import Session
 from sqlalchemy import func
 
+from .. import config
 from ..models import (
     StoryCreate,
     StoryResponse,
@@ -826,8 +827,8 @@ async def export_story_audio(
             if version:
                 resolved_audio_path = version.audio_path
 
-        audio_path = Path(resolved_audio_path)
-        if not audio_path.exists():
+        audio_path = config.resolve_storage_path(resolved_audio_path)
+        if audio_path is None or not audio_path.exists():
             continue
 
         try:
diff --git a/backend/services/versions.py b/backend/services/versions.py
index 1743a25c..cbeb4b8b 100644
--- a/backend/services/versions.py
+++ b/backend/services/versions.py
@@ -158,8 +158,8 @@ def delete_version(version_id: str, db: Session) -> bool:
     gen_id = version.generation_id
 
     # Delete audio file
-    audio_path = Path(version.audio_path)
-    if audio_path.exists():
+    audio_path = config.resolve_storage_path(version.audio_path)
+    if audio_path is not None and audio_path.exists():
         audio_path.unlink()
 
     db.delete(version)
@@ -193,8 +193,8 @@ def delete_versions_for_generation(generation_id: str, db: Session) -> int:
     )
     count = 0
     for v in versions:
-        audio_path = Path(v.audio_path)
-        if audio_path.exists():
+        audio_path = config.resolve_storage_path(v.audio_path)
+        if audio_path is not None and audio_path.exists():
             audio_path.unlink()
         db.delete(v)
         count += 1
diff --git a/backend/voicebox-server.spec b/backend/voicebox-server.spec
index b5756c66..c1acc541 100644
--- a/backend/voicebox-server.spec
+++ b/backend/voicebox-server.spec
@@ -1,13 +1,11 @@
 # -*- mode: python ; coding: utf-8 -*-
-from PyInstaller.utils.hooks import collect_data_files
 from PyInstaller.utils.hooks import collect_submodules
 from PyInstaller.utils.hooks import collect_all
 from PyInstaller.utils.hooks import copy_metadata
 
 datas = []
 binaries = []
-hiddenimports = ['backend', 'backend.main', 'backend.config', 'backend.database', 'backend.models', 'backend.services.profiles', 'backend.services.history', 'backend.services.tts', 'backend.services.transcribe', 'backend.utils.platform_detect', 'backend.backends', 'backend.backends.pytorch_backend', 'backend.utils.audio', 'backend.utils.cache', 'backend.utils.progress', 'backend.utils.hf_progress', 'backend.services.cuda', 'backend.services.effects', 'backend.utils.effects', 'backend.services.versions', 'pedalboard', 'chatterbox', 'chatterbox.tts_turbo', 'chatterbox.mtl_tts', 'backend.backends.chatterbox_backend', 'backend.backends.chatterbox_turbo_backend', 'backend.backends.luxtts_backend', 'zipvoice', 'zipvoice.luxvoice', 'torch', 'transformers', 'fastapi', 'uvicorn', 'sqlalchemy', 'soundfile', 'qwen_tts', 'qwen_tts.inference', 'qwen_tts.inference.qwen3_tts_model', 'qwen_tts.inference.qwen3_tts_tokenizer', 'qwen_tts.core', 'qwen_tts.cli', 'requests', 'pkg_resources.extern', 'backend.backends.mlx_backend', 'mlx', 'mlx.core', 'mlx.nn', 'mlx_audio', 'mlx_audio.tts', 'mlx_audio.stt']
-datas += collect_data_files('qwen_tts')
+hiddenimports = ['backend', 'backend.main', 'backend.config', 'backend.database', 'backend.models', 'backend.services.profiles', 'backend.services.history', 'backend.services.tts', 'backend.services.transcribe', 'backend.utils.platform_detect', 'backend.backends', 'backend.backends.pytorch_backend', 'backend.backends.qwen_custom_voice_backend', 'backend.utils.audio', 'backend.utils.cache', 'backend.utils.progress', 'backend.utils.hf_progress', 'backend.services.cuda', 'backend.services.effects', 'backend.utils.effects', 'backend.services.versions', 'pedalboard', 'chatterbox', 'chatterbox.tts_turbo', 'chatterbox.mtl_tts', 'backend.backends.chatterbox_backend', 'backend.backends.chatterbox_turbo_backend', 'backend.backends.luxtts_backend', 'zipvoice', 'zipvoice.luxvoice', 'torch', 'transformers', 'fastapi', 'uvicorn', 'sqlalchemy', 'soundfile', 'qwen_tts', 'qwen_tts.inference', 'qwen_tts.inference.qwen3_tts_model', 'qwen_tts.inference.qwen3_tts_tokenizer', 'qwen_tts.core', 'qwen_tts.cli', 'requests', 'pkg_resources.extern', 'backend.backends.hume_backend', 'tada', 'tada.modules', 'tada.modules.tada', 'tada.modules.encoder', 'tada.modules.decoder', 'tada.modules.aligner', 'tada.modules.acoustic_spkr_verf', 'tada.nn', 'tada.nn.vibevoice', 'tada.utils', 'tada.utils.gray_code', 'tada.utils.text', 'backend.utils.dac_shim', 'torchaudio', 'backend.backends.kokoro_backend', 'kokoro', 'kokoro.pipeline', 'kokoro.model', 'kokoro.istftnet', 'kokoro.modules', 'kokoro.custom_stft', 'en_core_web_sm', 'loguru', 'backend.backends.mlx_backend', 'mlx', 'mlx.core', 'mlx.nn', 'mlx_audio', 'mlx_audio.tts', 'mlx_audio.stt']
 datas += copy_metadata('qwen-tts')
 datas += copy_metadata('requests')
 datas += copy_metadata('transformers')
@@ -15,8 +13,9 @@ datas += copy_metadata('huggingface-hub')
 datas += copy_metadata('tokenizers')
 datas += copy_metadata('safetensors')
 datas += copy_metadata('tqdm')
-hiddenimports += collect_submodules('qwen_tts')
+datas += copy_metadata('en_core_web_sm')
 hiddenimports += collect_submodules('jaraco')
+hiddenimports += collect_submodules('tada')
 hiddenimports += collect_submodules('mlx')
 hiddenimports += collect_submodules('mlx_audio')
 tmp_ret = collect_all('zipvoice')
@@ -27,12 +26,22 @@ tmp_ret = collect_all('lazy_loader')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('librosa')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
+tmp_ret = collect_all('qwen_tts')
+datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('inflect')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('perth')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('piper_phonemize')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
+tmp_ret = collect_all('misaki')
+datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
+tmp_ret = collect_all('language_tags')
+datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
+tmp_ret = collect_all('espeakng_loader')
+datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
+tmp_ret = collect_all('en_core_web_sm')
+datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('mlx')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('mlx_audio')
diff --git a/docs/content/docs/developer/tts-engines.mdx b/docs/content/docs/developer/tts-engines.mdx
index dc749b02..21f07880 100644
--- a/docs/content/docs/developer/tts-engines.mdx
+++ b/docs/content/docs/developer/tts-engines.mdx
@@ -285,6 +285,34 @@ In `app/src/lib/hooks/useGenerationForm.ts`:
 
 In `app/src/components/ServerSettings/ModelManagement.tsx`:
 - Add description to `MODEL_DESCRIPTIONS` record
+- Add model name to `voiceModels` filter condition
+
+### 3.6 Non-Cloning Engines (Preset Voices)
+
+If your engine uses **pre-built voices** instead of zero-shot cloning from reference audio (e.g. Kokoro), additional integration is needed:
+
+**Backend:**
+- In `kokoro_backend.py` (or your engine), define a `VOICES` list of `(voice_id, display_name, gender, language)` tuples
+- `create_voice_prompt()` should return `{"voice_type": "preset", "preset_engine": "<engine>", "preset_voice_id": "<id>"}`
+- `generate()` should read `voice_prompt.get("preset_voice_id")` to select the voice
+- Add a `seed_preset_profiles("<engine>")` call in `backend/routes/models.py` after model download completes
+- The `seed_preset_profiles()` function in `backend/services/profiles.py` creates DB profiles with `voice_type="preset"`
+
+**Frontend:**
+- The `EngineModelSelector` filters options based on `selectedProfile.voice_type`:
+  - `"cloned"` profiles → only cloning engines shown (Kokoro hidden)
+  - `"preset"` profiles → only the preset's engine shown
+- Profile cards show the engine name as a badge for preset profiles
+- When a preset profile is selected, the engine auto-switches
+
+**Profile schema fields for presets:**
+- `voice_type: "preset"` (vs `"cloned"` for traditional profiles)
+- `preset_engine: "<engine>"` — which engine owns this voice
+- `preset_voice_id: "<id>"` — the engine-specific voice identifier
+
+**For future "designed" voices** (text description instead of audio, e.g. Qwen CustomVoice):
+- Use `voice_type: "designed"` with `design_prompt` field
+- `create_voice_prompt_for_profile()` already returns the design prompt for this type
 
 ## Phase 4: Dependencies
 
diff --git a/docs/notes/PROJECT_STATUS.md b/docs/notes/PROJECT_STATUS.md
index 71c06453..dee437f5 100644
--- a/docs/notes/PROJECT_STATUS.md
+++ b/docs/notes/PROJECT_STATUS.md
@@ -1,6 +1,6 @@
 # Voicebox Project Status & Roadmap
 
-> Last updated: 2026-03-13 | Current version: **v0.1.13** | 13.1k stars | ~176 open issues | 25 open PRs
+> Last updated: 2026-03-18 | Current version: **v0.3.0** | 13.4k stars | ~136 open issues | 9 open PRs
 
 ---
 
@@ -100,7 +100,7 @@ POST /generate
 
 ## Current State
 
-### What's Shipped (v0.1.13 + recent merges)
+### What's Shipped (v0.3.0)
 
 **Core TTS:**
 - Qwen3-TTS voice cloning (1.7B and 0.6B models)
@@ -108,30 +108,42 @@ POST /generate
 - Multi-engine TTS architecture with thread-safe backend registry (PR #254)
 - LuxTTS integration — fast, CPU-friendly English TTS (PR #254)
 - Chatterbox Multilingual TTS — 23 languages including Hebrew (PR #257)
-- Instruct parameter UI exists but is non-functional across all backends (see #224, Known Limitations)
+- Chatterbox Turbo — paralinguistic tags, low latency English (PR #258)
 - HumeAI TADA integration — 1B English + 3B Multilingual speech-language model (PR #296)
-- Single flat model dropdown (Qwen 1.7B, Qwen 0.6B, LuxTTS, Chatterbox, Chatterbox Turbo, TADA 1B, TADA 3B)
-- Centralized model config registry (`ModelConfig` dataclass) — no per-engine dispatch maps in `main.py`
+- Chunked TTS generation for long text — engine-agnostic, removes ~500 char limit (PR #266)
+- Async generation queue (PR #269)
+- Post-processing audio effects system (PR #271)
+- Centralized model config registry (`ModelConfig` dataclass) — no per-engine dispatch maps
 - Shared `EngineModelSelector` component — engine/model dropdown defined once, used in both generation forms
 
 **Infrastructure:**
-- CUDA backend swap via binary download and restart (PR #252)
-- GPU acceleration settings UI
+- CUDA backend swap via binary download and restart (PR #252), upgraded to cu128 (PR #316)
+- CUDA backend split into independently versioned server + libs archives (PR #298)
+- Docker + web deployment (PR #161)
+- Backend refactor: modular architecture, style guide, tooling (PR #285)
+- Settings overhaul: routed sub-tabs, server logs, changelog, about page (PR #294)
+- Windows support: CUDA detection, cross-platform justfile, clean server shutdown (PR #272)
 - Voice profiles with multi-sample support
 - Stories editor (multi-track DAW timeline)
 - Whisper transcription (base, small, medium, large variants)
-- Model management UI with inline download progress bars (HFProgressTracker)
+- Model management UI with inline download progress bars + folder migration (PR #268)
 - Download cancel/clear UI with error panel (PR #238)
 - Generation history with caching
 - Streaming generation endpoint (MLX only)
-- Duplicate profile name validation (PR #175)
-- Linux NVIDIA GBM buffer + WebKitGTK microphone fix (PR #210)
+- Audio player freeze fix + UX improvements (PR #293)
+- CORS restriction to known local origins (PR #88)
+
+### Abandoned Integrations
+
+| Model | PR | Reason |
+|-------|----|--------|
+| **CosyVoice2/3** | PR #311 | Output quality too poor. Heavy deps, no PyPI, needed 5+ shims. |
 
 ### What's In-Flight
 
 | Feature | Branch/PR | Status |
 |---------|-----------|--------|
-| Chatterbox Turbo + per-engine language lists | `feat/chatterbox-turbo` / PR #258 | Open, ready for review |
+| Kokoro 82M TTS engine | WIP | In development — 82M CPU-realtime engine, 8 languages |
 
 ### TTS Engine Comparison
 
@@ -144,6 +156,7 @@ POST /generate
 | Chatterbox Turbo | `chatterbox-turbo` | English | ~1.5 GB | Paralinguistic tags ([laugh], [cough]), 350M params, low latency | Partial — inline tags only, no separate instruct param |
 | TADA 1B | `tada-1b` | English | ~4 GB | HumeAI speech-language model, 700s+ coherent audio | None |
 | TADA 3B Multilingual | `tada-3b-ml` | 10 (en, ar, zh, de, es, fr, it, ja, pl, pt) | ~8 GB | Multilingual, text-acoustic dual alignment | None |
+| Kokoro 82M | `kokoro` | 8 (en, es, fr, hi, it, pt, ja, zh) | ~350 MB | 82M params, CPU realtime, Apache 2.0, pre-built voices | None |
 
 ### Multi-Engine Architecture (Shipped)
 
@@ -173,69 +186,41 @@ The singleton TTS backend blocker described in the previous version of this doc
 
 | PR | Title | Merged |
 |----|-------|--------|
-| **#257** | feat: Chatterbox TTS engine with multilingual voice cloning | 2026-03-13 |
-| **#254** | feat: LuxTTS integration — multi-engine TTS support | 2026-03-13 |
-| **#252** | feat: CUDA backend swap via binary download and restart | 2026-03-13 |
-| **#238** | Download cancel/clear UI, fixed model downloading | 2026-03-13 |
-| **#250** | docs: align local API port examples | 2026-03-13 |
-| **#210** | fix: Linux NVIDIA GBM buffer crash | 2026-03-13 |
-| **#175** | Fix #134: duplicate profile name validation | 2026-03-13 |
-
-### In-Flight (Our Work)
+| **#316** | Upgrade CUDA backend from cu126 to cu128, fix GPU settings UI | 2026-03-18 |
+| **#305** | fix: bundle qwen_tts source files in PyInstaller build | 2026-03-17 |
+| **#298** | feat: split CUDA backend into independently versioned server + libs archives | 2026-03-17 |
+| **#296** | Add HumeAI TADA TTS engine (1B English + 3B Multilingual) | 2026-03-17 |
+| **#295** | fix: batch of bug fixes from issue tracker | 2026-03-17 |
+| **#293** | Fix audio player freezing and improve UX | 2026-03-17 |
+| **#294** | Settings overhaul: routed sub-tabs, server logs, changelog, about page | 2026-03-16 |
+| **#288** | Better docs | 2026-03-16 |
+| **#285** | Backend refactor: modular architecture, style guide, tooling | 2026-03-16 |
+| **#274** | Landing page v0.2.0 redesign | 2026-03-15 |
+| **#272** | Windows support: CUDA detection, cross-platform justfile, clean server shutdown | 2026-03-15 |
+| **#271** | Add post-processing audio effects system | 2026-03-14 |
+| **#269** | feat: async generation queue | 2026-03-13 |
+| **#268** | feat: model management improvements and folder migration | 2026-03-13 |
+| **#266** | feat: chunked TTS generation for long text (engine-agnostic) | 2026-03-13 |
+| **#265** | feat: paralinguistic tag autocomplete for Chatterbox Turbo | 2026-03-13 |
+| **#264** | fix: Chatterbox float64 dtype mismatch + model unload button | 2026-03-13 |
+| **#258** | feat: Chatterbox Turbo engine + per-engine language lists | 2026-03-13 |
+| **#230** | docs: fix README grammar | 2026-03-13 |
+| **#161** | feat: Docker + web deployment | 2026-03-13 |
+| **#88** | security: restrict CORS to known local origins | 2026-03-13 |
+
+### Currently Open (9 PRs)
 
 | PR | Title | Status | Notes |
 |----|-------|--------|-------|
-| **#258** | feat: Chatterbox Turbo engine + per-engine language lists | Open | Ready for review. Adds Turbo engine + dynamic language dropdown. |
-
-### Merge-Ready / Near-Ready (Bug Fixes & Small Features)
-
-| PR | Title | Risk | Notes |
-|----|-------|------|-------|
-| **#230** | docs: fix README grammar | None | Docs-only |
-| **#243** | a11y: screen reader and keyboard improvements | Low | Accessibility, no backend changes |
-| **#178** | Fix #168 #140: generation error handling | Low | Error handling improvements |
-| **#152** | Fix: prevent crashes when HuggingFace unreachable | Medium | Monkey-patches HF hub; solves real offline bug (#150, #151) |
-| **#218** | fix: unify qwen tts cache dir on Windows | Low | Windows-specific path fix |
-| **#214** | fix: panic on launch from tokio::spawn | Low | Rust-side Tauri fix |
-| **#88** | security: restrict CORS to known local origins | Low | Security hardening |
-| **#133** | feat: network access toggle | Low | Wires up existing plumbing |
-
-### Significant Feature PRs
-
-| PR | Title | Complexity | Notes |
-|----|-------|-----------|-------|
-| **#253** | Enhance speech tokenizer with 48kHz version | Medium | Qwen tokenizer upgrade |
-| **#97** | fix: pass language parameter to TTS models | Medium | May be partially obsoleted by multi-engine work — needs review |
-| **#99** | feat: chunked TTS with quality selector | Medium | Solves 500-char limit. Addresses #191, #203, #69, #111. |
-| **#154** | feat: Audiobook tab | Medium | Full audiobook workflow. Depends on #99 concepts. |
-| **#91** | fix: CoreAudio device enumeration | Medium | macOS audio device handling |
-
-### Architectural PRs (Need Careful Review)
-
-| PR | Title | Complexity | Notes |
-|----|-------|-----------|-------|
-| **#225** | feat: custom HuggingFace model support | High | Arbitrary HF repo loading. May need rework given multi-engine arch is now shipped. |
-| **#194** | feat: Hebrew + Chatterbox TTS | High | **Superseded** by PR #257 which shipped Chatterbox multilingual (23 langs incl. Hebrew). May be closeable. |
-| **#195** | feat: per-profile LoRA fine-tuning | Very High | Training pipeline, adapter management, 15 new endpoints. Depends on #194 (now superseded). |
-| **#161** | feat: Docker + web deployment | High | 3-stage Dockerfile, SPA serving. Independent of TTS engine work. |
-| **#124** / **#123** | Docker (simpler attempts) | Low-Medium | Overlap with #161 |
-| **#227** | fix: harden input validation & file safety | Medium | Coupled to #225 (custom models) |
-
-### PRs That Need Author Action / Are Stale
-
-| PR | Title | Notes |
-|----|-------|-------|
-| **#237** | fix: bundle qwen_tts source files in PyInstaller | Build system, needs review |
-| **#215** | Update prerequisites with Tauri deps | Branch is `main` — will have conflicts |
-| **#89** | Linux Support | Branch is `main` — will have conflicts. Broad scope. |
-| **#83** | Update download links for v0.1.12 | Outdated (we're on v0.1.13) |
-
-### PRs Likely Superseded
-
-| PR | Superseded By | Notes |
-|----|--------------|-------|
-| **#194** (Hebrew + Chatterbox) | PR #257 (merged) | #257 ships Chatterbox multilingual with 23 languages including Hebrew. #194 took a different approach (route by language). Can likely be closed. |
-| **#33** (External provider binaries) | PR #252 (merged) | #252 shipped CUDA backend swap. #33's broader provider architecture may still have value but needs reassessment. |
+| **#311** | feat: add CosyVoice2/3 TTS engine | **Will close** | Model quality too poor. See Abandoned Integrations. |
+| **#253** | Enhance speech tokenizer with 48kHz version | Community PR | Qwen tokenizer upgrade. Worth reviewing. |
+| **#237** | fix: bundle qwen_tts source files in PyInstaller | Superseded | Our PR #305 shipped this. Can close. |
+| **#227** | fix: harden input validation & file safety | Community PR | Coupled to #225 (custom models). |
+| **#225** | feat: custom HuggingFace model support | Community PR | Needs rework for multi-engine arch. |
+| **#218** | fix: unify qwen tts cache dir on Windows | Community PR | Windows-specific path fix. Still relevant. |
+| **#195** | feat: per-profile LoRA fine-tuning | Draft | Complex. 15 new endpoints. |
+| **#154** | feat: Audiobook tab | Community PR | Chunked generation now shipped (#266). |
+| **#91** | fix: CoreAudio device enumeration | Draft | macOS audio device handling. |
 
 ---
 
@@ -280,7 +265,7 @@ Strong demand for: Hindi (#245), Indonesian (#247), Dutch (#236), Hebrew (#199),
 | #132 | LavaSR (transcription) |
 | #76 | (General model expansion) |
 
-Community also requests: XTTS-v2, Fish Speech, CosyVoice, Kokoro. The multi-engine architecture is now in place, making new model integration significantly easier.
+Community also requests: XTTS-v2, Fish Speech, Kokoro. CosyVoice was tried and abandoned. The multi-engine architecture is in place, making new model integration straightforward.
 
 ### Long-Form / Chunking (5 issues)
 
@@ -288,7 +273,7 @@ Users hitting the ~500 character practical limit.
 
 **Key issues:** #234 (queue system), #203 (500 char limit), #191 (auto-split), #111, #69
 
-**Fix path:** PR #99 (chunked TTS + quality selector) directly addresses this. PR #154 (Audiobook tab) builds on it.
+**Fix path:** **Mostly resolved.** PR #266 (engine-agnostic chunked TTS) and PR #269 (async generation queue) are both merged. PR #154 (Audiobook tab) is still open.
 
 ### Feature Requests (23 issues)
 
@@ -326,7 +311,7 @@ Notable requests:
 | `CUDA_BACKEND_SWAP_FINAL.md` | — | **Shipped** (PR #252) | Final implementation plan |
 | `EXTERNAL_PROVIDERS.md` | v0.2.0 | **Not started** | Remote server support |
 | `MLX_AUDIO.md` | — | **Shipped** | MLX backend is live |
-| `DOCKER_DEPLOYMENT.md` | v0.2.0 | **PR exists** (#161) | Waiting on review |
+| `DOCKER_DEPLOYMENT.md` | v0.2.0 | **Shipped** (PR #161) | Docker + web deployment |
 | `OPENAI_SUPPORT.md` | v0.2.0 | **Not started** | OpenAI-compatible API layer |
 | `PR33_CUDA_PROVIDER_REVIEW.md` | — | **Reference** | Analysis of the original provider approach |
 
@@ -334,31 +319,31 @@ Notable requests:
 
 ## New Model Integration — Landscape
 
-### Models Worth Supporting (2026 SOTA — updated March 13)
+### Models Worth Supporting (2026 SOTA — updated March 18)
 
 | Model | Cloning | Speed | Sample Rate | Languages | VRAM | Instruct Support | Integration Ease | Status |
 |-------|---------|-------|-------------|-----------|------|-----------------|-----------------|--------|
 | **Qwen3-TTS** | 10s zero-shot | Medium | 24 kHz | 10 | Medium | None (Base); Yes (CustomVoice variant, predefined speakers only) | **Shipped** | v0.1.13 |
 | **LuxTTS** | 3s zero-shot | 150x RT, CPU ok | 48 kHz | English | <1 GB | None | **Shipped** | PR #254 |
 | **Chatterbox MTL** | 5s zero-shot | Medium | 24 kHz | 23 | Medium | Partial — `exaggeration` float | **Shipped** | PR #257 |
-| **Chatterbox Turbo** | 5s zero-shot | Fast | 24 kHz | English | Low | Partial — inline tags only | **PR #258** | In review |
-| **CosyVoice2-0.5B** | 3-10s zero-shot | Very fast | 24 kHz | Multilingual | Low | **Yes** — `inference_instruct2()`, works with cloning | Ready | Best instruct candidate |
-| **Fish Speech** | 10-30s few-shot | Real-time | 24-44 kHz | 50+ | Medium | **Yes** — inline text descriptions, word-level control | Ready | Multi-engine arch in place |
-| **MOSS-TTS Family** | Zero-shot | — | — | Multilingual | Medium | **Yes** — text prompts for style + timbre design | Needs vetting | Apache 2.0, multi-speaker dialogue |
-| **HumeAI TADA 1B/3B** | Zero-shot | 5× faster than LLM-TTS | 24 kHz | EN (1B), Multilingual (3B) | Medium | Partial — automatic prosody from text context | **Shipped** | PR #296, MIT, 700s+ coherent |
-| **VoxCPM 1.5** | Zero-shot (seconds) | ~0.15 RTF streaming | — | Bilingual (EN/ZH) | Medium | Partial — automatic context-aware prosody | Needs vetting | Apache 2.0, tokenizer-free continuous diffusion |
-| **Kokoro-82M** | 3s instant | CPU realtime | 24 kHz | English | Tiny (82M) | Partial — automatic style inference | Ready | Apache 2.0, multi-engine arch in place |
-| **XTTS-v2** | 6s zero-shot | Mid-GPU | 24 kHz | 17+ | Medium | Partial — style transfer from ref audio only | Ready | Multi-engine arch in place |
-| **Pocket TTS** | Zero-shot + streaming | >1× RT on CPU | — | English | ~100M params, CPU-first | None | Needs vetting | MIT, Kyutai Labs, no GPU required |
-
-#### Notes on New Candidates (March 2026)
-
-- **CosyVoice2-0.5B** — Best candidate for instruct support. `inference_instruct2()` accepts a text instruct parameter for emotions, speed, volume, dialects — and it works alongside voice cloning. This is the closest match to what users expect from our instruct UI. [HF: FunAudioLLM/CosyVoice2-0.5B](https://huggingface.co/FunAudioLLM/CosyVoice2-0.5B)
-- **HumeAI TADA** — Text-Audio Dual Alignment arch. Near-zero hallucinations/drift, free synced transcript. 700+ seconds coherent audio. Best candidate for Stories long-form reliability. Prosody/emotion is automatic from text context, not user-controllable. [HF: HumeAI/tada-1b](https://huggingface.co/HumeAI/tada-1b) | [GitHub: HumeAI/tada](https://github.com/HumeAI/tada)
-- **MOSS-TTS** — Modular suite: flagship cloning, MOSS-TTSD (multi-speaker dialogue), MOSS-VoiceGenerator (create voices from text descriptions). VoiceGenerator unifies timbre design and style control via text prompts, usable as a layer for downstream TTS including cloning. [HF: OpenMOSS-Team/MOSS-VoiceGenerator](https://huggingface.co/OpenMOSS-Team/MOSS-VoiceGenerator) | [GitHub: OpenMOSS/MOSS-TTS](https://github.com/OpenMOSS/MOSS-TTS)
-- **Fish Speech** — Word-level fine-grained control using plain language descriptions inline in the script. Works with cloning. Note: Fish Audio S2 has a restrictive research license (commercial use requires approval), but the open-source Fish Speech model may differ. Needs license clarification. [fish.audio blog](https://fish.audio/blog/fish-audio-s2-fine-grained-ai-voice-control-at-the-word-level)
-- **VoxCPM 1.5** — Tokenizer-free continuous diffusion + autoregressive. No discrete token artifacts. Prosody/emotion is context-aware but automatic, not explicitly controllable via text prompt. Real-time streaming, LoRA fine-tuning. Trained on 1.8M+ hours. [GitHub: OpenBMB/VoxCPM](https://github.com/OpenBMB/VoxCPM)
-- **Pocket TTS** — 100M param CPU-first model from Kyutai Labs (Moshi team). Runs >1× realtime without GPU. No style control. Broadens hardware support significantly. [GitHub: kyutai-labs/pocket-tts](https://github.com/kyutai-labs/pocket-tts)
+| **Chatterbox Turbo** | 5s zero-shot | Fast | 24 kHz | English | Low | Partial — inline tags only | **Shipped** | PR #258 |
+| **HumeAI TADA 1B/3B** | Zero-shot | 5x faster than LLM-TTS | 24 kHz | EN (1B), Multilingual (3B) | Medium | Partial — automatic prosody | **Shipped** | PR #296 |
+| **Kokoro-82M** | Pre-built voices | CPU realtime | 24 kHz | 8 | Tiny (82M) | None | **In progress** | Apache 2.0, pip install, ~350MB |
+| ~~**CosyVoice2-0.5B**~~ | 3-10s zero-shot | Very fast | 24 kHz | Multilingual | Low | Yes — `inference_instruct2()` | **Abandoned** | PR #311 — poor output quality |
+| **Fish Speech** | 10-30s few-shot | Real-time | 24-44 kHz | 50+ | Medium | **Yes** — inline text descriptions, word-level control | Ready | Needs license clarification |
+| **XTTS-v2** | 6s zero-shot | Mid-GPU | 24 kHz | 17+ | Medium | Partial — style transfer from ref audio only | Ready | Mature pip package |
+| **Pocket TTS** | Zero-shot + streaming | >1x RT on CPU | — | English | ~100M params, CPU-first | None | Ready | MIT, Kyutai Labs |
+| **MOSS-TTS Family** | Zero-shot | — | — | Multilingual | Medium | **Yes** — text prompts for style + timbre design | Needs vetting | Apache 2.0 |
+| **VoxCPM 1.5** | Zero-shot (seconds) | ~0.15 RTF streaming | — | Bilingual (EN/ZH) | Medium | Partial — automatic context-aware prosody | Needs vetting | Apache 2.0 |
+
+#### Notes on Candidates (March 2026)
+
+- **CosyVoice2-0.5B** — **Tried and abandoned** (PR #311). Despite having the best instruct API, output quality was poor. No PyPI package, needed 5+ shims, heavy deps. Not worth it.
+- **HumeAI TADA** — **Shipped** (PR #296). 700+ seconds coherent audio. [GitHub: HumeAI/tada](https://github.com/HumeAI/tada)
+- **Kokoro-82M** — **In progress.** 82M params, CPU realtime, Apache 2.0, clean `pip install kokoro`. Uses pre-built voice styles (not zero-shot cloning from arbitrary audio). [GitHub: hexgrad/kokoro](https://github.com/hexgrad/kokoro)
+- **Fish Speech** — Word-level fine-grained control. License needs clarification. [fish.audio blog](https://fish.audio/blog/fish-audio-s2-fine-grained-ai-voice-control-at-the-word-level)
+- **XTTS-v2** — Coqui's multilingual cloning. 17+ languages, pip-installable. [GitHub: coqui-ai/TTS](https://github.com/coqui-ai/TTS)
+- **Pocket TTS** — 100M param CPU-first model from Kyutai Labs. [GitHub: kyutai-labs/pocket-tts](https://github.com/kyutai-labs/pocket-tts)
 - **Watch list:** MioTTS-2.6B (fast LLM-based EN/JP, vLLM compatible), Oolel-Voices (Soynade Research, expressive modular control)
 
 ### Adding a New Engine (Now Straightforward)
@@ -402,49 +387,44 @@ The generation form now uses a flat model dropdown with engine-based routing. Pe
 
 ## Recommended Priorities
 
-### Tier 1 — Ship Now (Low Risk)
+### Tier 1 — Ship Now
 
 | Priority | PR/Item | Impact | Effort |
 |----------|---------|--------|--------|
-| 1 | **#258** — Chatterbox Turbo + per-engine languages | Paralinguistic tags, proper language filtering | Review only |
-| 2 | **#152** — Offline mode crash fix | Fixes #150, #151 | Low |
-| 3 | **#99** — Chunked TTS + quality selector | Removes 500-char limit, addresses 5 issues | Medium |
-| 4 | **#218** — Windows HF cache dir fix | Windows-specific pain | Low |
-| 5 | **#178** — Generation error handling | Error UX | Low |
-| 6 | **#230** — Docs fixes | Zero risk | None |
-| 7 | **#133** — Network access toggle | Wires up existing code | Low |
-| 8 | **#88** — CORS restriction | Security improvement | Low |
-| 9 | **#214** — Tauri window close panic fix | Stability | Low |
-| 10 | Triage GPU issues | Many may be resolved by CUDA swap (#252) | Low |
-| 11 | Close superseded PRs | #194 (superseded by #257), #83 (outdated) | None |
-
-### Tier 2 — Next Release (v0.2.0)
+| 1 | **Kokoro 82M** — finish integration | New engine, CPU-friendly, 8 langs | Low (nearly done) |
+| 2 | Close PR #311 (CosyVoice) and #237 (superseded by #305) | Housekeeping | None |
+| 3 | **#218** — Windows HF cache dir fix | Windows-specific pain | Low |
+| 4 | **#253** — 48kHz speech tokenizer | Quality improvement for Qwen | Medium |
+
+### Tier 2 — Feature Work
 
 | Priority | Item | Impact | Effort |
 |----------|------|--------|--------|
-| 1 | **#253** — 48kHz speech tokenizer | Quality improvement | Medium |
-| 2 | **#161** — Docker deployment | Server/headless users | Medium |
-| 3 | **#154** — Audiobook tab | Long-form users | Medium |
-| 4 | ~~**Model config registry**~~ | ~~Reduce dispatch duplication in main.py~~ | **Done** |
-| 5 | **#225** — Custom HuggingFace models | User-supplied models | High (needs rework for multi-engine) |
+| 1 | **#154** — Audiobook tab | Long-form users. Chunking + queue now shipped. | Medium |
+| 2 | **#225** — Custom HuggingFace models | User-supplied models. Needs rework. | High |
+| 3 | OpenAI-compatible API (plan doc exists) | Low effort once API is stable | Low |
+| 4 | LoRA fine-tuning (PR #195) | Complex, needs rework for multi-engine | Very High |
+| 5 | Streaming for non-MLX engines | Currently MLX-only | Medium |
 
-### Tier 3 — Future (v0.3.0+)
+### Tier 3 — Future Engines
 
 | Priority | Item | Notes |
 |----------|------|-------|
-| 1 | **HumeAI TADA** | Long-form reliability for Stories, synced transcripts. Addresses #234, #203, #191, #111, #69. Needs API vetting. |
-| 2 | **Pocket TTS** (Kyutai) | CPU-first 100M model, broadens hardware support. Kyutai ships clean code. Needs API vetting. |
-| 3 | **MOSS-TTS** | Text-to-voice design (no ref audio) is unique. Multi-speaker dialogue for Stories. Needs thorough API vetting. |
-| 4 | **Kokoro-82M** | 82M params, CPU realtime, Apache 2.0. Easy win. |
-| 5 | ~~**Model config registry refactor**~~ | **Done** — consolidated in `backend/backends/__init__.py` + `EngineModelSelector.tsx` |
-| 6 | XTTS-v2 / Fish Speech / CosyVoice | Multi-engine arch is ready; just needs backend implementation |
-| 7 | **VoxCPM 1.5** | Tokenizer-free streaming, interesting but uncertain integration surface |
-| 8 | OpenAI-compatible API (plan doc exists) | Low effort once API is stable |
-| 9 | LoRA fine-tuning (PR #195) | Complex, needs rework for multi-engine |
-| 10 | External/remote providers | Depends on use case demand |
-| 11 | GGUF support (#226) | Depends on model ecosystem maturity |
-| 12 | Queue system (#234) | Batch generation |
-| 13 | Streaming for non-MLX engines | Currently MLX-only |
+| 1 | **Fish Speech** | 50+ langs, word-level instruct. License TBD. |
+| 2 | **XTTS-v2** | 17+ langs, mature pip package. Best multilingual cloning. |
+| 3 | **Pocket TTS** (Kyutai) | CPU-first 100M model. MIT. |
+| 4 | **MOSS-TTS** | Text-to-voice design. Multi-speaker dialogue for Stories. |
+| 5 | **VoxCPM 1.5** | Tokenizer-free streaming. Uncertain integration surface. |
+
+### ~~Previously Prioritized — Now Done~~
+
+- ~~#258 — Chatterbox Turbo~~ **Merged**
+- ~~#99 — Chunked TTS~~ **Superseded by #266, merged**
+- ~~#88 — CORS restriction~~ **Merged**
+- ~~#161 — Docker deployment~~ **Merged**
+- ~~#234 — Queue system~~ **Addressed by #269, merged**
+- ~~HumeAI TADA~~ **Shipped** (PR #296)
+- ~~Kokoro-82M~~ **In progress**
 
 ---
 
@@ -452,13 +432,10 @@ The generation form now uses a flat model dropdown with engine-based routing. Pe
 
 | Branch | PR | Status | Notes |
 |--------|-----|--------|-------|
-| `feat/chatterbox-turbo` | #258 | Open | Chatterbox Turbo + per-engine languages |
+| `feat/cosyvoice-engine` | #311 | Open — closing | CosyVoice2/3 — abandoned, poor quality |
+| `feat/chatterbox-turbo` | #258 | **Merged** | Chatterbox Turbo + per-engine languages |
 | `feat/chatterbox` | #257 | **Merged** | Chatterbox Multilingual |
 | `feat/luxtts` | #254 | **Merged** | LuxTTS + multi-engine arch |
-| `external-provider-binaries` | #33 | Superseded by #252 | Original CUDA provider approach |
-| `feat/dual-server-binaries` | — | No PR | Related to provider split |
-| `fix-multi-sample` | — | No PR | Voice profile multi-sample fix |
-| `fix-dl-notification-...` | — | No PR | Model download UX |
 
 ---
 
diff --git a/docs/plans/API_REFACTOR_PLAN.md b/docs/plans/API_REFACTOR_PLAN.md
new file mode 100644
index 00000000..019f0482
--- /dev/null
+++ b/docs/plans/API_REFACTOR_PLAN.md
@@ -0,0 +1,428 @@
+# Voicebox API Refactor Plan
+
+Date: 2026-03-19
+Status: Proposed
+Scope: Backend HTTP API structure, schemas, docs, and compatibility strategy
+
+## Goals
+
+- Make the API easier to understand and automate against.
+- Improve endpoint consistency without breaking the desktop app or existing local integrations.
+- Align generated docs and checked-in OpenAPI artifacts with the actual backend.
+- Separate app-facing resources from internal or operational actions.
+- Create a migration path toward a cleaner `v2` resource model while preserving `v1` routes during transition.
+
+## Non-Goals
+
+- Rewriting backend business logic or generation internals.
+- Introducing authentication for all deployment modes in the first pass.
+- Changing storage models or database schema unless required for API correctness.
+- Removing current routes immediately.
+
+## Current Pain Points
+
+- Mixed endpoint styles: resource-oriented (`/profiles`) and command-oriented (`/generate`, `/tasks/clear`) coexist.
+- Related generation resources are split across multiple namespaces: `/generate`, `/history`, `/audio`, `/effects`, and `/generations/.../versions`.
+- Response payloads vary widely: typed models, raw dicts with `message`, booleans, and `HTTPException(detail=...)` payloads.
+- Some async flows use exception-shaped `202` responses instead of first-class task contracts.
+- Checked-in OpenAPI output can drift from actual backend models.
+- Operational endpoints such as `/shutdown` are exposed in the same surface as user workflows.
+
+## Guiding Principles
+
+1. Prefer additive changes before destructive changes.
+2. Keep `v1` behavior working until the app and docs fully migrate.
+3. Add compatibility shims close to the routing layer, not deep in services.
+4. Treat OpenAPI as a release artifact that must be kept in sync.
+5. Standardize public contracts before renaming everything.
+
+## Target API Shape
+
+This is the intended end state, not the immediate first milestone.
+
+### Core Resources
+
+- `/profiles`
+- `/profiles/{profile_id}/samples`
+- `/profiles/{profile_id}/avatar`
+- `/profiles/{profile_id}/effects`
+- `/generations`
+- `/generations/{generation_id}`
+- `/generations/{generation_id}/status`
+- `/generations/{generation_id}/audio`
+- `/generations/{generation_id}/versions`
+- `/generations/{generation_id}/versions/{version_id}`
+- `/generations/{generation_id}/versions/{version_id}/audio`
+- `/stories`
+- `/stories/{story_id}/items`
+- `/effects/presets`
+- `/models`
+- `/models/{model_name}`
+- `/tasks`
+
+### Operational or Internal Endpoints
+
+Move under an explicit namespace and disable where appropriate:
+
+- `/admin/shutdown`
+- `/admin/watchdog/disable`
+- `/admin/cache/clear`
+- `/admin/tasks/clear`
+
+### Response Contract Direction
+
+- Resource reads and writes return typed resource models.
+- Delete and action endpoints return small typed action result models.
+- Errors use a consistent structure.
+- Async actions return explicit task metadata instead of overloading `detail`.
+
+## Migration Strategy Overview
+
+The refactor is split into six phases. Phases 1-3 are the highest impact and safest to ship first.
+
+| Phase | Focus | Est. Duration | Risk | Backward Compatibility |
+| --- | --- | --- | --- | --- |
+| 1 | Documentation and contract correctness | 2-3 days | Low | Full |
+| 2 | Response and error consistency | 3-5 days | Low-Medium | Full |
+| 3 | Router structure and internal organization | 3-4 days | Low | Full |
+| 4 | Additive `v2` resource endpoints | 1-2 weeks | Medium | Full |
+| 5 | Client migration and deprecation rollout | 1 week | Medium | Full during rollout |
+| 6 | Cleanup and optional removals | 1-2 releases | Medium-High | Partial after notice |
+
+## Phase 1: Fix Contract Drift First
+
+Priority: Highest
+Outcome: The documented API matches the running backend.
+
+### Problems Addressed
+
+- `docs/openapi.json` can become stale.
+- Generated API reference pages may describe outdated request bodies.
+- App metadata still frames the backend too narrowly.
+
+### Implementation Steps
+
+1. Update FastAPI app metadata in `backend/app.py`.
+   - Replace the old Qwen-specific description with a multi-engine Voicebox API description.
+   - Add tags metadata for major domains if desired.
+2. Regenerate OpenAPI from the running app using the existing docs script flow.
+3. Compare `backend/models.py` to the checked-in schema.
+   - Verify `GenerationRequest`, effects endpoints, stories endpoints, and model endpoints.
+4. Regenerate or refresh API reference pages under `docs/content/docs/api-reference/`.
+5. Add a CI check that fails if `docs/openapi.json` is out of date.
+6. Add a short maintainer note describing when schema regeneration is required.
+
+### Backward Compatibility
+
+- No route changes.
+- No payload changes.
+- Safe to release immediately.
+
+### Success Criteria
+
+- `docs/openapi.json` matches the live app.
+- Generated docs include all currently supported generate parameters.
+- No frontend code changes required.
+
+## Phase 2: Standardize Responses and Errors
+
+Priority: High
+Outcome: Clients can handle responses predictably.
+
+### Problems Addressed
+
+- Delete endpoints return ad hoc message dicts.
+- Toggle endpoints return special one-off payloads.
+- `202` async responses are encoded as `HTTPException(detail=...)` in some places.
+
+### Implementation Steps
+
+1. Add shared response models in `backend/models.py`.
+   - `ActionResult`
+   - `DeleteResult`
+   - `ToggleFavoriteResponse`
+   - `AcceptedTaskResponse`
+   - `ApiError`
+2. Convert routes that currently return raw dicts to explicit `response_model`s.
+   - `DELETE /profiles/{profile_id}`
+   - `DELETE /history/{generation_id}`
+   - `DELETE /stories/{story_id}`
+   - `POST /tasks/clear`
+   - `POST /cache/clear`
+   - similar endpoints across routes
+3. Replace exception-shaped `202` responses in `transcription.py` with an explicit accepted response body.
+   - Return `JSONResponse(status_code=202, content=...)` or typed FastAPI response model.
+4. Add a global exception handler for known API errors if helpful.
+   - Normalize `ValueError` to `400` with a consistent error body.
+   - Preserve FastAPI validation errors for now, or wrap them in a consistent top-level shape in a later pass.
+5. Document the stable error contract in the docs.
+
+### Migration Strategy
+
+- Keep field names inside successful payloads compatible where possible.
+- For existing dict responses, preserve the current keys while introducing typed models with the same shape.
+- For `202` flows, support both old and new client handling for one release if needed.
+
+### Timeline Estimate
+
+- 3-5 engineering days including tests and docs refresh.
+
+### Success Criteria
+
+- All mutation endpoints declare response models.
+- Clients can programmatically distinguish success, accepted, and error cases without special casing `detail` payloads.
+
+## Phase 3: Normalize Router Structure Internally
+
+Priority: High
+Outcome: The backend becomes easier to maintain before public path changes begin.
+
+### Problems Addressed
+
+- Route files hardcode full paths and are all mounted at root.
+- There is no consistent use of router prefixes or tags.
+- Route grouping in code does not cleanly express the public API shape.
+
+### Implementation Steps
+
+1. Add prefixes and tags to routers.
+   - `profiles`: `prefix="/profiles"`
+   - `generations`: `prefix="/generate"` for now or split additive aliases carefully
+   - `history`: `prefix="/history"`
+   - `effects`: `prefix="/effects"`
+   - and so on
+2. Convert route declarations to relative paths within each router.
+3. Introduce a small route compatibility layer for routes that are likely to move later.
+   - Example: helper functions that can be mounted under both old and new paths.
+4. Add explicit route tags so Swagger/OpenAPI groups are coherent.
+5. Document the intended public ownership of each namespace.
+
+### Backward Compatibility
+
+- No public path changes yet if existing paths are preserved through prefixes and aliases.
+- Mostly internal refactoring.
+
+### Timeline Estimate
+
+- 3-4 engineering days.
+
+### Success Criteria
+
+- All route modules use prefixes and tags.
+- Route registration in `backend/routes/__init__.py` becomes simpler.
+- OpenAPI groups read cleanly by domain.
+
+## Phase 4: Introduce Additive `v2` Resource Endpoints
+
+Priority: High
+Outcome: A cleaner API exists without breaking the current one.
+
+### Problems Addressed
+
+- Generation-related resources are fragmented.
+- Sample and audio endpoints are not consistently modeled as resources.
+- Command-style naming makes the API harder to reason about.
+
+### New Endpoints to Add
+
+These should be introduced alongside current endpoints, not as replacements.
+
+- `POST /generations` -> alias for current `/generate`
+- `GET /generations` -> alias for current `/history`
+- `GET /generations/{id}` -> alias for current `/history/{id}`
+- `POST /generations/{id}/retry` -> alias for current `/generate/{id}/retry`
+- `POST /generations/{id}/regenerate` -> alias for current `/generate/{id}/regenerate`
+- `GET /generations/{id}/status` -> alias for current `/generate/{id}/status`
+- `POST /generations/stream` -> alias for current `/generate/stream`
+- `GET /generations/{id}/audio` -> alias for current `/audio/{generation_id}`
+- `GET /generations/{id}/export` -> alias for current `/history/{generation_id}/export`
+- `GET /generations/{id}/export-audio` -> alias for current `/history/{generation_id}/export-audio`
+- `GET /profiles/{profile_id}/samples/{sample_id}` or `GET /samples/{sample_id}` as a consciously chosen model
+- `PUT /profiles/{profile_id}/samples/{sample_id}` -> alias for current sample update route
+- `DELETE /profiles/{profile_id}/samples/{sample_id}` -> alias for current sample delete route
+
+### Implementation Steps
+
+1. Create new handler entry points that call the existing service functions.
+2. Keep old handlers in place, but mark them deprecated in OpenAPI.
+3. Add `summary` and `description` text clarifying preferred routes.
+4. Update frontend and docs examples to use new endpoints first.
+5. Add tests proving both old and new paths return equivalent responses.
+
+### Migration Strategy
+
+- Old paths remain functional for at least one stable release cycle.
+- New docs and client examples use `v2-style` resource routes immediately.
+- Include deprecation headers where feasible, for example:
+  - `Deprecation: true`
+  - `Sunset: <date>`
+  - `Link: <new-doc-url>; rel="successor-version"`
+
+### Timeline Estimate
+
+- 1-2 weeks depending on test coverage and frontend updates.
+
+### Success Criteria
+
+- All major generation workflows are accessible through resource-oriented routes.
+- Old routes still work unchanged.
+
+## Phase 5: Migrate First-Party Clients and Publish Deprecations
+
+Priority: Medium
+Outcome: Voicebox itself stops depending on legacy paths.
+
+### Problems Addressed
+
+- The desktop app and docs may continue to reinforce old route shapes.
+- Third-party consumers need a visible migration path.
+
+### Implementation Steps
+
+1. Update `app/src/lib/api/client.ts` to use the new preferred endpoints.
+2. Regenerate or refresh any generated API clients.
+3. Update docs examples, tutorials, and code snippets to use preferred routes only.
+4. Add a changelog entry describing the migration path.
+5. Add runtime deprecation logging for legacy route usage in development mode.
+6. If feasible, expose a small `/health` or `/meta` field showing API version and deprecation window.
+
+### Migration Strategy
+
+- Keep old endpoints available but clearly documented as legacy.
+- Publish a mapping table from old route to new route.
+- Do not change request or response payloads during the same phase unless necessary.
+
+### Timeline Estimate
+
+- About 1 week including docs and app verification.
+
+### Success Criteria
+
+- First-party app no longer depends on legacy route names.
+- Docs do not advertise deprecated paths as the primary interface.
+
+## Phase 6: Cleanup, Namespace Hardening, and Optional Breaking Changes
+
+Priority: Medium
+Outcome: The API surface is cleaner and safer for remote or Docker use.
+
+### Problems Addressed
+
+- Internal/admin endpoints are mixed into the public API.
+- Legacy aliases increase maintenance cost forever if never retired.
+
+### Implementation Steps
+
+1. Move operational endpoints under `/admin` or `/internal`.
+   - `/shutdown`
+   - `/watchdog/disable`
+   - `/tasks/clear`
+   - `/cache/clear`
+2. Gate these endpoints behind configuration for non-local deployments.
+   - Example: `VOICEBOX_ENABLE_ADMIN_API=true`
+3. Decide whether to remove or keep legacy aliases.
+   - If removing, do so only after a published deprecation window.
+4. Remove deprecated docs pages and old examples.
+5. Tighten route-level tests to prevent accidental reintroduction of legacy patterns.
+
+### Migration Strategy
+
+- For desktop-only local use, aliases may remain indefinitely if removal cost outweighs benefit.
+- For published remote API guidance, hide admin endpoints from default docs even if they still exist.
+
+### Timeline Estimate
+
+- 1-2 releases after the additive migration is complete.
+
+### Success Criteria
+
+- Public docs expose a coherent resource API.
+- Operational endpoints are clearly separate or disabled in remote contexts.
+
+## Cross-Cutting Work Items
+
+These should happen throughout the migration, not only in a single phase.
+
+### Testing
+
+- Add route equivalence tests for old and new paths.
+- Add schema snapshot tests for OpenAPI generation.
+- Add response-shape tests for common mutations and async workflows.
+- Add contract tests for `202 Accepted` flows.
+
+### Documentation
+
+- Maintain an old-to-new endpoint mapping table.
+- Add per-endpoint examples for create profile, generate, apply effects, transcribe, and stories operations.
+- Explicitly document which endpoints are app-facing vs admin-facing.
+
+### Observability
+
+- Add warning logs when deprecated endpoints are used.
+- Track usage counts in development or optional telemetry-free local logs.
+
+### Release Management
+
+- Mention API changes in `CHANGELOG.md`.
+- Ensure docs and app updates ship in the same release as new preferred routes.
+
+## Recommended Execution Order
+
+If engineering time is limited, implement in this exact order:
+
+1. Fix OpenAPI and docs drift.
+2. Standardize response models and accepted-task responses.
+3. Add router prefixes and tags internally.
+4. Add `/generations` aliases and sample path aliases.
+5. Migrate the first-party app to preferred routes.
+6. Deprecate or hide legacy/admin routes.
+
+## Old-to-New Route Mapping
+
+| Current Route | Preferred Route |
+| --- | --- |
+| `POST /generate` | `POST /generations` |
+| `POST /generate/stream` | `POST /generations/stream` |
+| `POST /generate/{id}/retry` | `POST /generations/{id}/retry` |
+| `POST /generate/{id}/regenerate` | `POST /generations/{id}/regenerate` |
+| `GET /generate/{id}/status` | `GET /generations/{id}/status` |
+| `GET /history` | `GET /generations` |
+| `GET /history/{id}` | `GET /generations/{id}` |
+| `GET /audio/{id}` | `GET /generations/{id}/audio` |
+| `GET /history/{id}/export` | `GET /generations/{id}/export` |
+| `GET /history/{id}/export-audio` | `GET /generations/{id}/export-audio` |
+| `PUT /profiles/samples/{sample_id}` | `PUT /profiles/{profile_id}/samples/{sample_id}` |
+| `DELETE /profiles/samples/{sample_id}` | `DELETE /profiles/{profile_id}/samples/{sample_id}` |
+| `POST /tasks/clear` | `POST /admin/tasks/clear` |
+| `POST /cache/clear` | `POST /admin/cache/clear` |
+| `POST /shutdown` | `POST /admin/shutdown` |
+| `POST /watchdog/disable` | `POST /admin/watchdog/disable` |
+
+## Risks and Mitigations
+
+### Risk: App regressions during endpoint migration
+
+- Mitigation: Add new routes before changing client usage.
+- Mitigation: Keep payloads identical while paths change.
+
+### Risk: Docs still drift after cleanup
+
+- Mitigation: Add CI enforcement and a release checklist step.
+
+### Risk: Third-party local scripts break on removal
+
+- Mitigation: Prefer indefinite aliases for one-person local workflows unless maintenance becomes painful.
+
+### Risk: Admin endpoints remain dangerous in remote mode
+
+- Mitigation: Hide and gate them before promoting remote deployment more broadly.
+
+## Definition of Done
+
+The refactor can be considered complete when all of the following are true:
+
+- OpenAPI, checked-in docs, and backend models match.
+- The preferred public API is resource-oriented and documented consistently.
+- The Voicebox app uses preferred routes exclusively.
+- Legacy routes are either deprecated with a timeline or intentionally retained as compatibility aliases.
+- Operational endpoints are clearly separated from the public app API.
diff --git a/tauri/src-tauri/Cargo.lock b/tauri/src-tauri/Cargo.lock
index b133dfc8..194314de 100644
--- a/tauri/src-tauri/Cargo.lock
+++ b/tauri/src-tauri/Cargo.lock
@@ -5041,7 +5041,7 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 
 [[package]]
 name = "voicebox"
-version = "0.2.3"
+version = "0.3.1"
 dependencies = [
  "base64 0.22.1",
  "core-foundation-sys",
diff --git a/tauri/src-tauri/src/main.rs b/tauri/src-tauri/src/main.rs
index 415961f2..c65dda0c 100644
--- a/tauri/src-tauri/src/main.rs
+++ b/tauri/src-tauri/src/main.rs
@@ -53,6 +53,41 @@ fn find_voicebox_pid_on_port(port: u16) -> Option<u32> {
     None
 }
 
+/// Check if a Voicebox server is responding on the given port.
+///
+/// Sends an HTTP GET to `/health` and returns `true` only if the response
+/// is valid JSON matching the Voicebox `HealthResponse` schema — specifically
+/// `status` must be `"healthy"`, and both `model_loaded` and `gpu_available`
+/// must be present as booleans. This prevents misidentifying an unrelated
+/// service that happens to expose a `/health` endpoint.
+#[allow(dead_code)] // Used in platform-specific cfg blocks
+fn check_health(port: u16) -> bool {
+    let url = format!("http://127.0.0.1:{}/health", port);
+    match reqwest::blocking::Client::builder()
+        .timeout(std::time::Duration::from_secs(3))
+        .build()
+    {
+        Ok(client) => match client.get(&url).send() {
+            Ok(resp) => {
+                if !resp.status().is_success() {
+                    return false;
+                }
+                // Parse as JSON and validate Voicebox-specific fields
+                match resp.json::<serde_json::Value>() {
+                    Ok(body) => {
+                        body.get("status").and_then(|v| v.as_str()) == Some("healthy")
+                            && body.get("model_loaded").map(|v| v.is_boolean()).unwrap_or(false)
+                            && body.get("gpu_available").map(|v| v.is_boolean()).unwrap_or(false)
+                    }
+                    Err(_) => false,
+                }
+            }
+            Err(_) => false,
+        },
+        Err(_) => false,
+    }
+}
+
 struct ServerState {
     child: Mutex<Option<tauri_plugin_shell::process::CommandChild>>,
     server_pid: Mutex<Option<u32>>,
@@ -80,7 +115,8 @@ async fn start_server(
         return Ok(format!("http://127.0.0.1:{}", SERVER_PORT));
     }
 
-    // Check if a voicebox server is already running on our port (from previous session with keep_running=true)
+    // Check if a voicebox server is already running on our port (from previous session with keep_running=true,
+    // or an externally started server e.g. via `python`, `uvicorn`, Docker, etc.)
     #[cfg(unix)]
     {
         use std::process::Command;
@@ -101,6 +137,20 @@ async fn start_server(
                             *state.server_pid.lock().unwrap() = Some(pid);
                             return Ok(format!("http://127.0.0.1:{}", SERVER_PORT));
                         }
+                    } else {
+                        // Process name doesn't contain "voicebox" — could be an external
+                        // Python/uvicorn/Docker server. Verify via HTTP health check.
+                        println!("Port {} in use by '{}' (PID: {}), checking if it's a Voicebox server...", SERVER_PORT, command, pid_str);
+                        if check_health(SERVER_PORT) {
+                            println!("Health check passed — reusing external server on port {}", SERVER_PORT);
+                            return Ok(format!("http://127.0.0.1:{}", SERVER_PORT));
+                        }
+                        println!("Health check failed — port is occupied by a non-Voicebox process");
+                        return Err(format!(
+                            "Port {} is already in use by another application ({}). \
+                             Close it or change the Voicebox server port.",
+                            SERVER_PORT, command
+                        ));
                     }
                 }
             }
@@ -114,18 +164,24 @@ async fn start_server(
             &format!("127.0.0.1:{}", SERVER_PORT).parse().unwrap(),
             std::time::Duration::from_secs(1),
         ).is_ok() {
-            // Port is in use — check if it's a voicebox process
+            // Port is in use — check if it's a voicebox process by name first
             if let Some(pid) = find_voicebox_pid_on_port(SERVER_PORT) {
                 println!("Found existing voicebox-server on port {} (PID: {}), reusing it", SERVER_PORT, pid);
                 *state.server_pid.lock().unwrap() = Some(pid);
                 return Ok(format!("http://127.0.0.1:{}", SERVER_PORT));
-            } else {
-                return Err(format!(
-                    "Port {} is already in use by another application. \
-                     Close the other application or change the Voicebox port.",
-                    SERVER_PORT
-                ));
             }
+            // Process name doesn't match — could be an external Python/Docker server.
+            // Verify via HTTP health check before giving up.
+            println!("Port {} in use by unknown process, checking if it's a Voicebox server...", SERVER_PORT);
+            if check_health(SERVER_PORT) {
+                println!("Health check passed — reusing external server on port {}", SERVER_PORT);
+                return Ok(format!("http://127.0.0.1:{}", SERVER_PORT));
+            }
+            return Err(format!(
+                "Port {} is already in use by another application. \
+                 Close the other application or change the Voicebox port.",
+                SERVER_PORT
+            ));
         }
     }