diff --git a/composables/use-tts-voice.ts b/composables/use-tts-voice.ts index 84e073ae6..599ba5385 100644 --- a/composables/use-tts-voice.ts +++ b/composables/use-tts-voice.ts @@ -28,7 +28,7 @@ export function useTTSVoice(options: TTSVoiceOptions = {}) { // hardcoded voice options for now const ttsLanguageVoiceOptions = [ { label: 'Pazu 薯伯伯 - 粵語', value: 'zh-HK_pazu' }, - { label: 'Phoebe - 粵語', value: 'zh-HK_phoebe' }, + { label: 'Phoebe - 粵語口語', value: 'zh-HK_phoebe' }, { label: 'Aurora - 國語', value: 'zh-TW_aurora' }, { label: '國語男聲', value: 'zh-TW_1' }, { label: 'English female', value: 'en-US_0' }, diff --git a/server/api/reader/tts.get.ts b/server/api/reader/tts.get.ts index c35156363..1b700c009 100644 --- a/server/api/reader/tts.get.ts +++ b/server/api/reader/tts.get.ts @@ -32,8 +32,6 @@ function parseRangeHeader(rangeHeader: string, totalSize: number): { start: numb return { start, end } } -const KNOWN_VOICE_IDS = new Set(['0', '1', 'aurora', 'pazu', 'phoebe']) - function getTTSProvider(voiceId: string): MinimaxTTSProvider { if (!KNOWN_VOICE_IDS.has(voiceId)) { throw createError({ @@ -139,7 +137,7 @@ export default defineEventHandler(async (event) => { }) } - const ttsModel = getMinimaxModel({ customVoiceId: customMiniMaxVoiceId, language }) + const ttsModel = getMinimaxModel({ voiceId, customVoiceId: customMiniMaxVoiceId, language }) const bucket = getTTSCacheBucket() const isCacheEnabled = !!bucket const cacheKey = isCacheEnabled diff --git a/server/utils/tts-minimax.ts b/server/utils/tts-minimax.ts index 16e81ce46..419e4a07f 100644 --- a/server/utils/tts-minimax.ts +++ b/server/utils/tts-minimax.ts @@ -6,16 +6,21 @@ export const LANG_MAPPING = { 'zh-HK': 'Chinese,Yue', } -// Voice mapping with provider information -const VOICE_MAPPING: Record = { - // Minimax voices - 0: 'Chinese (Mandarin)_Warm_Bestie', - 1: 'Chinese (Mandarin)_Southern_Young_Man', - aurora: 'three_book_aurora_v0', - pazu: 'book_pazu_v2', - phoebe: 'phoebe_v1', +interface VoiceConfig { + minimaxVoiceId: string + model?: string } +const VOICE_CONFIG: Record = { + 0: { minimaxVoiceId: 'Chinese (Mandarin)_Warm_Bestie' }, + 1: { minimaxVoiceId: 'Chinese (Mandarin)_Southern_Young_Man' }, + aurora: { minimaxVoiceId: 'three_book_aurora_v0' }, + pazu: { minimaxVoiceId: 'book_pazu_v2' }, + phoebe: { minimaxVoiceId: 'phoebe_v1', model: 'speech-2.6-hd' }, +} + +export const KNOWN_VOICE_IDS = new Set(Object.keys(VOICE_CONFIG)) + export function getTTSPronunciationDictionary(language: string) { switch (language) { case 'zh-TW': @@ -40,10 +45,15 @@ export function getTTSPronunciationDictionary(language: string) { } export function getMinimaxModel(options: { + voiceId?: string customVoiceId?: string language?: string } = {}): string { - const { customVoiceId, language } = options + const { voiceId, customVoiceId, language } = options + const voiceModel = voiceId && VOICE_CONFIG[voiceId]?.model + if (voiceModel) { + return voiceModel + } if (language === 'zh-HK') { return 'speech-2.8-hd' } @@ -57,7 +67,7 @@ export class MinimaxTTSProvider implements BaseTTSProvider { async processRequest(params: TTSRequestParams): Promise { const { text, language, voiceId, customMiniMaxVoiceId } = params - if (!customMiniMaxVoiceId && !VOICE_MAPPING[voiceId]) { + if (!customMiniMaxVoiceId && !VOICE_CONFIG[voiceId]) { throw createError({ status: 400, message: 'INVALID_VOICE_ID', @@ -65,8 +75,8 @@ export class MinimaxTTSProvider implements BaseTTSProvider { } const client = getMiniMaxSpeechClient() - const resolvedVoiceId = (customMiniMaxVoiceId || VOICE_MAPPING[voiceId]) as string - const model = getMinimaxModel({ customVoiceId: customMiniMaxVoiceId, language }) + const resolvedVoiceId = (customMiniMaxVoiceId || VOICE_CONFIG[voiceId]!.minimaxVoiceId) as string + const model = getMinimaxModel({ voiceId, customVoiceId: customMiniMaxVoiceId, language }) const result = await client.synthesize({ text, @@ -87,7 +97,7 @@ export class MinimaxTTSProvider implements BaseTTSProvider { async processRequestStream(params: TTSRequestParams): Promise> { const { text, language, voiceId, customMiniMaxVoiceId } = params - if (!customMiniMaxVoiceId && !VOICE_MAPPING[voiceId]) { + if (!customMiniMaxVoiceId && !VOICE_CONFIG[voiceId]) { throw createError({ status: 400, message: 'INVALID_VOICE_ID', @@ -95,8 +105,8 @@ export class MinimaxTTSProvider implements BaseTTSProvider { } const client = getMiniMaxSpeechClient() - const resolvedVoiceId = (customMiniMaxVoiceId || VOICE_MAPPING[voiceId]) as string - const model = getMinimaxModel({ customVoiceId: customMiniMaxVoiceId, language }) + const resolvedVoiceId = (customMiniMaxVoiceId || VOICE_CONFIG[voiceId]!.minimaxVoiceId) as string + const model = getMinimaxModel({ voiceId, customVoiceId: customMiniMaxVoiceId, language }) return await client.synthesizeStream({ text,