From 90b45152ed674fccf5c7b2c0ef827cf5d909ae10 Mon Sep 17 00:00:00 2001 From: ComputelessComputer Date: Thu, 26 Mar 2026 14:14:31 -0700 Subject: [PATCH 1/2] fix: simplify spoken language selection - normalize locale variants like ko-US and es-419 into one language option per language - keep variant aliases searchable without exposing them in the selector UI - normalize spoken languages before STT support checks and transcription runs --- apps/desktop/src/settings/ai/stt/select.tsx | 13 ++- apps/desktop/src/settings/general/language.ts | 26 ++--- .../src/settings/general/main-language.tsx | 26 ++--- .../src/settings/general/spoken-languages.tsx | 96 +++++++-------- apps/desktop/src/shared/language.test.ts | 47 ++++++++ apps/desktop/src/shared/language.ts | 109 ++++++++++++++++++ apps/desktop/src/stt/useRunBatch.ts | 5 +- apps/desktop/src/stt/useStartListening.ts | 3 +- 8 files changed, 235 insertions(+), 90 deletions(-) create mode 100644 apps/desktop/src/shared/language.test.ts create mode 100644 apps/desktop/src/shared/language.ts diff --git a/apps/desktop/src/settings/ai/stt/select.tsx b/apps/desktop/src/settings/ai/stt/select.tsx index 9b909a40e6..0ccc58b2c9 100644 --- a/apps/desktop/src/settings/ai/stt/select.tsx +++ b/apps/desktop/src/settings/ai/stt/select.tsx @@ -1,7 +1,7 @@ import { useQueries, useQuery } from "@tanstack/react-query"; import { arch } from "@tauri-apps/plugin-os"; import { Check, Loader2 } from "lucide-react"; -import { useRef } from "react"; +import { useMemo, useRef } from "react"; import { commands as listenerCommands } from "@hypr/plugin-listener"; import { @@ -37,6 +37,7 @@ import { requiresEntitlement, } from "~/settings/ai/shared/eligibility"; import { useConfigValues } from "~/shared/config"; +import { normalizeLanguageCodes } from "~/shared/language"; import * as settings from "~/store/tinybase/store/settings"; export function SelectProviderAndModel() { @@ -53,23 +54,27 @@ export function SelectProviderAndModel() { const isConfigured = !!(current_stt_provider && current_stt_model); const hasError = isConfigured && health.status === "error"; + const normalizedSpokenLanguages = useMemo( + () => normalizeLanguageCodes(spoken_languages ?? []), + [spoken_languages], + ); const languageSupport = useQuery({ queryKey: [ "stt-language-support", current_stt_provider, current_stt_model, - spoken_languages, + normalizedSpokenLanguages, ], queryFn: async () => { const result = await listenerCommands.isSupportedLanguagesLive( current_stt_provider!, current_stt_model ?? null, - spoken_languages ?? [], + normalizedSpokenLanguages, ); return result.status === "ok" ? result.data : true; }, - enabled: !!(current_stt_provider && spoken_languages?.length), + enabled: !!(current_stt_provider && normalizedSpokenLanguages.length), }); const hasLanguageWarning = diff --git a/apps/desktop/src/settings/general/language.ts b/apps/desktop/src/settings/general/language.ts index 1d152ac4ea..0a7dbb8632 100644 --- a/apps/desktop/src/settings/general/language.ts +++ b/apps/desktop/src/settings/general/language.ts @@ -1,18 +1,8 @@ -const displayNames = new Intl.DisplayNames(["en"], { type: "language" }); - -export function getLanguageDisplayName(code: string): string { - return displayNames.of(code) ?? code; -} - -export function getBaseLanguageDisplayName(code: string): string { - const { language } = parseLocale(code); - return displayNames.of(language) ?? code; -} - -export function parseLocale(code: string): { - language: string; - region?: string; -} { - const locale = new Intl.Locale(code); - return { language: locale.language, region: locale.region }; -} +export { + getBaseLanguageDisplayName, + getLanguageDisplayName, + getLanguageOptions, + normalizeLanguageCode, + normalizeLanguageCodes, + parseLocale, +} from "~/shared/language"; diff --git a/apps/desktop/src/settings/general/main-language.tsx b/apps/desktop/src/settings/general/main-language.tsx index fe5419f47e..ac1dca14f3 100644 --- a/apps/desktop/src/settings/general/main-language.tsx +++ b/apps/desktop/src/settings/general/main-language.tsx @@ -1,6 +1,6 @@ import { useMemo } from "react"; -import { getBaseLanguageDisplayName, parseLocale } from "./language"; +import { getLanguageOptions, normalizeLanguageCode } from "./language"; import { SearchableSelect, type SearchableSelectOption, @@ -15,29 +15,17 @@ export function MainLanguageView({ onChange: (value: string) => void; supportedLanguages: readonly string[]; }) { - const deduped = useMemo(() => { - const map = new Map(); - for (const code of supportedLanguages) { - const { language } = parseLocale(code); - if (!map.has(language)) { - map.set(language, code); - } - } - return map; - }, [supportedLanguages]); - const normalizedValue = useMemo(() => { - const { language } = parseLocale(value); - return deduped.get(language) ?? value; - }, [value, deduped]); + return normalizeLanguageCode(value); + }, [value]); const options: SearchableSelectOption[] = useMemo( () => - [...deduped.values()].map((code) => ({ - value: code, - label: getBaseLanguageDisplayName(code), + getLanguageOptions(supportedLanguages).map((option) => ({ + value: option.value, + label: option.label, })), - [deduped], + [supportedLanguages], ); return ( diff --git a/apps/desktop/src/settings/general/spoken-languages.tsx b/apps/desktop/src/settings/general/spoken-languages.tsx index b519532533..fd3d46c88a 100644 --- a/apps/desktop/src/settings/general/spoken-languages.tsx +++ b/apps/desktop/src/settings/general/spoken-languages.tsx @@ -5,35 +5,11 @@ import { Badge } from "@hypr/ui/components/ui/badge"; import { Button } from "@hypr/ui/components/ui/button"; import { cn } from "@hypr/utils"; -import { getLanguageDisplayName } from "./language"; - -function hasRegionVariant(langCode: string): boolean { - return langCode.includes("-"); -} - -function getBaseLanguage(langCode: string): string { - return langCode.split("-")[0]; -} - -function isLanguageDisabled( - langCode: string, - selectedLanguages: string[], -): boolean { - const base = getBaseLanguage(langCode); - const isVariant = hasRegionVariant(langCode); - - for (const selected of selectedLanguages) { - const selectedBase = getBaseLanguage(selected); - if (selectedBase !== base) continue; - - if (isVariant) { - return selected === base || hasRegionVariant(selected); - } else { - return hasRegionVariant(selected); - } - } - return false; -} +import { + getLanguageDisplayName, + getLanguageOptions, + normalizeLanguageCodes, +} from "./language"; interface SpokenLanguagesViewProps { value: string[]; @@ -49,24 +25,41 @@ export function SpokenLanguagesView({ const [languageSearchQuery, setLanguageSearchQuery] = useState(""); const [languageInputFocused, setLanguageInputFocused] = useState(false); const [languageSelectedIndex, setLanguageSelectedIndex] = useState(-1); + const selectedLanguages = useMemo( + () => normalizeLanguageCodes(value), + [value], + ); + const languageOptions = useMemo( + () => getLanguageOptions(supportedLanguages), + [supportedLanguages], + ); const filteredLanguages = useMemo(() => { if (!languageSearchQuery.trim()) { return []; } + const query = languageSearchQuery.toLowerCase(); - return supportedLanguages.filter((langCode) => { - if (value.includes(langCode)) return false; - if (isLanguageDisabled(langCode, value)) return false; - const langName = getLanguageDisplayName(langCode); - return langName.toLowerCase().includes(query); + + return languageOptions.filter((option) => { + if (selectedLanguages.includes(option.value)) { + return false; + } + + return option.searchTerms.some((term) => + term.toLowerCase().includes(query), + ); }); - }, [languageSearchQuery, value, supportedLanguages]); + }, [languageOptions, languageSearchQuery, selectedLanguages]); const handleLanguageKeyDown = (e: React.KeyboardEvent) => { - if (e.key === "Backspace" && !languageSearchQuery && value.length > 0) { + if ( + e.key === "Backspace" && + !languageSearchQuery && + selectedLanguages.length > 0 + ) { e.preventDefault(); - onChange(value.slice(0, -1)); + onChange(selectedLanguages.slice(0, -1)); return; } @@ -88,8 +81,12 @@ export function SpokenLanguagesView({ languageSelectedIndex >= 0 && languageSelectedIndex < filteredLanguages.length ) { - const selectedCode = filteredLanguages[languageSelectedIndex]; - onChange([...value, selectedCode]); + const selectedCode = filteredLanguages[languageSelectedIndex]?.value; + if (!selectedCode) { + return; + } + + onChange(normalizeLanguageCodes([...selectedLanguages, selectedCode])); setLanguageSearchQuery(""); setLanguageSelectedIndex(-1); } @@ -116,7 +113,7 @@ export function SpokenLanguagesView({ document.getElementById("language-search-input")?.focus() } > - {value.map((code) => ( + {selectedLanguages.map((code) => ( { e.stopPropagation(); - onChange(value.filter((c) => c !== code)); + onChange(selectedLanguages.filter((c) => c !== code)); }} > ))} - {value.length === 0 && ( + {selectedLanguages.length === 0 && ( )} @@ -173,15 +170,20 @@ export function SpokenLanguagesView({ className="absolute top-full right-0 left-0 z-10 mt-1 flex max-h-60 w-full flex-col overflow-hidden overflow-y-auto rounded-xs border border-neutral-200 bg-white shadow-md" > {filteredLanguages.length > 0 ? ( - filteredLanguages.map((langCode, index) => ( + filteredLanguages.map((option, index) => ( )) diff --git a/apps/desktop/src/shared/language.test.ts b/apps/desktop/src/shared/language.test.ts new file mode 100644 index 0000000000..37820dc1bd --- /dev/null +++ b/apps/desktop/src/shared/language.test.ts @@ -0,0 +1,47 @@ +import { describe, expect, test } from "vitest"; + +import { + getLanguageDisplayName, + getLanguageOptions, + normalizeLanguageCodes, +} from "./language"; + +describe("language helpers", () => { + test("shows base language names for locale variants", () => { + expect(getLanguageDisplayName("ko-US")).toBe("Korean"); + expect(getLanguageDisplayName("en-US")).toBe("English"); + }); + + test("normalizes and dedupes language selections by base language", () => { + expect( + normalizeLanguageCodes(["en-US", "en", "es-419", "es", "ko-US"]), + ).toEqual(["en", "es", "ko"]); + }); + + test("builds one option per language while keeping variant aliases searchable", () => { + const options = getLanguageOptions([ + "en", + "en-US", + "es", + "es-419", + "ko-KR", + ]); + + expect( + options.map(({ value, label }) => ({ + value, + label, + })), + ).toEqual([ + { value: "en", label: "English" }, + { value: "es", label: "Spanish" }, + { value: "ko", label: "Korean" }, + ]); + + expect( + options + .find((option) => option.value === "es") + ?.searchTerms.some((term) => term.includes("Latin")), + ).toBe(true); + }); +}); diff --git a/apps/desktop/src/shared/language.ts b/apps/desktop/src/shared/language.ts new file mode 100644 index 0000000000..0d38fbf538 --- /dev/null +++ b/apps/desktop/src/shared/language.ts @@ -0,0 +1,109 @@ +const displayNames = new Intl.DisplayNames(["en"], { type: "language" }); +const regionDisplayNames = new Intl.DisplayNames(["en"], { type: "region" }); + +export type LanguageOption = { + value: string; + label: string; + searchTerms: string[]; +}; + +function getLocaleDisplayName(code: string): string { + return displayNames.of(code) ?? code; +} + +function getRegionDisplayName(code: string): string | undefined { + const { region } = parseLocale(code); + return region ? regionDisplayNames.of(region) : undefined; +} + +export function parseLocale(code: string): { + language: string; + region?: string; +} { + try { + const locale = new Intl.Locale(code); + return { + language: locale.language || code.split(/[-_]/)[0]?.toLowerCase() || code, + region: locale.region, + }; + } catch { + const [language = code, ...rest] = code.split(/[-_]/); + const region = rest.find( + (part) => /^[a-z]{2}$/i.test(part) || /^\d{3}$/.test(part), + ); + + return { + language: language.toLowerCase(), + region: region?.toUpperCase(), + }; + } +} + +export function normalizeLanguageCode(code: string): string { + return parseLocale(code).language; +} + +export function normalizeLanguageCodes(codes: readonly string[]): string[] { + const normalized: string[] = []; + const seen = new Set(); + + for (const code of codes) { + const value = normalizeLanguageCode(code); + if (!value || seen.has(value)) { + continue; + } + + seen.add(value); + normalized.push(value); + } + + return normalized; +} + +export function getBaseLanguageDisplayName(code: string): string { + const { language } = parseLocale(code); + return displayNames.of(language) ?? code; +} + +export function getLanguageDisplayName(code: string): string { + return getBaseLanguageDisplayName(code); +} + +export function getLanguageOptions( + supportedLanguages: readonly string[], +): LanguageOption[] { + const grouped = new Map(); + for (const code of supportedLanguages) { + const value = normalizeLanguageCode(code); + if (!value) { + continue; + } + + const group = grouped.get(value); + if (group) { + group.push(code); + } else { + grouped.set(value, [code]); + } + } + + return [...grouped.entries()].map(([value, codes]) => { + const label = getBaseLanguageDisplayName(value); + const searchTerms = [ + label, + value, + ...codes.flatMap((code) => [ + code, + code.replace(/-/g, " "), + getLocaleDisplayName(code), + getRegionDisplayName(code), + ]), + ].filter((term): term is string => Boolean(term)); + + return { + value, + label, + searchTerms: [...new Set(searchTerms)], + }; + }); +} diff --git a/apps/desktop/src/stt/useRunBatch.ts b/apps/desktop/src/stt/useRunBatch.ts index 33ddeaa738..a76082296d 100644 --- a/apps/desktop/src/stt/useRunBatch.ts +++ b/apps/desktop/src/stt/useRunBatch.ts @@ -8,6 +8,7 @@ import { useKeywords } from "./useKeywords"; import { useSTTConnection } from "./useSTTConnection"; import { useConfigValue } from "~/shared/config"; +import { normalizeLanguageCodes } from "~/shared/language"; import { id } from "~/shared/utils"; import * as main from "~/store/tinybase/store/main"; import type { BatchPersistCallback } from "~/store/zustand/listener/transcript"; @@ -190,7 +191,9 @@ export const useRunBatch = (sessionId: string) => { base_url: options?.baseUrl ?? conn.baseUrl, api_key: options?.apiKey ?? conn.apiKey, keywords: options?.keywords ?? keywords ?? [], - languages: options?.languages ?? languages ?? [], + languages: normalizeLanguageCodes( + options?.languages ?? languages ?? [], + ), }; await runBatch(params, { handlePersist: persist }); diff --git a/apps/desktop/src/stt/useStartListening.ts b/apps/desktop/src/stt/useStartListening.ts index 41f2277636..834b703557 100644 --- a/apps/desktop/src/stt/useStartListening.ts +++ b/apps/desktop/src/stt/useStartListening.ts @@ -12,6 +12,7 @@ import { useSTTConnection } from "./useSTTConnection"; import { getEnhancerService } from "~/services/enhancer"; import { getSessionEventById } from "~/session/utils"; import { useConfigValue } from "~/shared/config"; +import { normalizeLanguageCodes } from "~/shared/language"; import { id } from "~/shared/utils"; import * as main from "~/store/tinybase/store/main"; import type { @@ -141,7 +142,7 @@ export function useStartListening( const started = await start( { session_id: sessionId, - languages, + languages: normalizeLanguageCodes(languages ?? []), onboarding: false, transcription_mode: transcriptionMode, recording_mode: recordingMode, From fb6dc417fb5334ea4e361dc3c2ac17826d13255f Mon Sep 17 00:00:00 2001 From: ComputelessComputer Date: Thu, 26 Mar 2026 14:18:12 -0700 Subject: [PATCH 2/2] fix: preserve meaningful spoken language options - keep meaningful spoken language variants like English (US), English (UK), Mandarin, and Cantonese visible in the picker - normalize legacy invalid locale leftovers without collapsing valid provider codes during STT support checks and runs - show spoken language suggestions on focus and keep keyboard selection working without a typed query --- apps/desktop/src/settings/ai/stt/select.tsx | 4 +- apps/desktop/src/settings/general/language.ts | 11 +- .../src/settings/general/main-language.tsx | 6 +- .../src/settings/general/spoken-languages.tsx | 65 ++--- apps/desktop/src/shared/language.test.ts | 72 ++++- apps/desktop/src/shared/language.ts | 255 ++++++++++++++++-- apps/desktop/src/stt/useRunBatch.ts | 4 +- apps/desktop/src/stt/useStartListening.ts | 4 +- 8 files changed, 341 insertions(+), 80 deletions(-) diff --git a/apps/desktop/src/settings/ai/stt/select.tsx b/apps/desktop/src/settings/ai/stt/select.tsx index 0ccc58b2c9..aba8a6f5b2 100644 --- a/apps/desktop/src/settings/ai/stt/select.tsx +++ b/apps/desktop/src/settings/ai/stt/select.tsx @@ -37,7 +37,7 @@ import { requiresEntitlement, } from "~/settings/ai/shared/eligibility"; import { useConfigValues } from "~/shared/config"; -import { normalizeLanguageCodes } from "~/shared/language"; +import { normalizeSpokenLanguageCodes } from "~/shared/language"; import * as settings from "~/store/tinybase/store/settings"; export function SelectProviderAndModel() { @@ -55,7 +55,7 @@ export function SelectProviderAndModel() { const isConfigured = !!(current_stt_provider && current_stt_model); const hasError = isConfigured && health.status === "error"; const normalizedSpokenLanguages = useMemo( - () => normalizeLanguageCodes(spoken_languages ?? []), + () => normalizeSpokenLanguageCodes(spoken_languages ?? []), [spoken_languages], ); diff --git a/apps/desktop/src/settings/general/language.ts b/apps/desktop/src/settings/general/language.ts index 0a7dbb8632..ae1b831828 100644 --- a/apps/desktop/src/settings/general/language.ts +++ b/apps/desktop/src/settings/general/language.ts @@ -1,8 +1,13 @@ export { + getBaseLanguageOptions, getBaseLanguageDisplayName, getLanguageDisplayName, - getLanguageOptions, - normalizeLanguageCode, - normalizeLanguageCodes, + getSpokenLanguageDisplayName, + getSpokenLanguageOptions, + normalizeBaseLanguageCode, + normalizeBaseLanguageCodes, + normalizeSelectedSpokenLanguages, + normalizeSpokenLanguageCode, + normalizeSpokenLanguageCodes, parseLocale, } from "~/shared/language"; diff --git a/apps/desktop/src/settings/general/main-language.tsx b/apps/desktop/src/settings/general/main-language.tsx index ac1dca14f3..e44d843fa9 100644 --- a/apps/desktop/src/settings/general/main-language.tsx +++ b/apps/desktop/src/settings/general/main-language.tsx @@ -1,6 +1,6 @@ import { useMemo } from "react"; -import { getLanguageOptions, normalizeLanguageCode } from "./language"; +import { getBaseLanguageOptions, normalizeBaseLanguageCode } from "./language"; import { SearchableSelect, type SearchableSelectOption, @@ -16,12 +16,12 @@ export function MainLanguageView({ supportedLanguages: readonly string[]; }) { const normalizedValue = useMemo(() => { - return normalizeLanguageCode(value); + return normalizeBaseLanguageCode(value); }, [value]); const options: SearchableSelectOption[] = useMemo( () => - getLanguageOptions(supportedLanguages).map((option) => ({ + getBaseLanguageOptions(supportedLanguages).map((option) => ({ value: option.value, label: option.label, })), diff --git a/apps/desktop/src/settings/general/spoken-languages.tsx b/apps/desktop/src/settings/general/spoken-languages.tsx index fd3d46c88a..e527e43097 100644 --- a/apps/desktop/src/settings/general/spoken-languages.tsx +++ b/apps/desktop/src/settings/general/spoken-languages.tsx @@ -6,9 +6,9 @@ import { Button } from "@hypr/ui/components/ui/button"; import { cn } from "@hypr/utils"; import { - getLanguageDisplayName, - getLanguageOptions, - normalizeLanguageCodes, + getSpokenLanguageDisplayName, + getSpokenLanguageOptions, + normalizeSelectedSpokenLanguages, } from "./language"; interface SpokenLanguagesViewProps { @@ -26,31 +26,37 @@ export function SpokenLanguagesView({ const [languageInputFocused, setLanguageInputFocused] = useState(false); const [languageSelectedIndex, setLanguageSelectedIndex] = useState(-1); const selectedLanguages = useMemo( - () => normalizeLanguageCodes(value), - [value], + () => normalizeSelectedSpokenLanguages(value, supportedLanguages), + [supportedLanguages, value], ); const languageOptions = useMemo( - () => getLanguageOptions(supportedLanguages), + () => getSpokenLanguageOptions(supportedLanguages), [supportedLanguages], ); + const selectedSelectionKeys = useMemo( + () => + new Set( + languageOptions + .filter((option) => selectedLanguages.includes(option.value)) + .map((option) => option.selectionKey), + ), + [languageOptions, selectedLanguages], + ); const filteredLanguages = useMemo(() => { - if (!languageSearchQuery.trim()) { - return []; - } - - const query = languageSearchQuery.toLowerCase(); + const availableOptions = languageOptions.filter( + (option) => !selectedSelectionKeys.has(option.selectionKey), + ); + const query = languageSearchQuery.trim().toLowerCase(); - return languageOptions.filter((option) => { - if (selectedLanguages.includes(option.value)) { - return false; - } + if (!query) { + return availableOptions; + } - return option.searchTerms.some((term) => - term.toLowerCase().includes(query), - ); - }); - }, [languageOptions, languageSearchQuery, selectedLanguages]); + return availableOptions.filter((option) => + option.searchTerms.some((term) => term.toLowerCase().includes(query)), + ); + }, [languageOptions, languageSearchQuery, selectedSelectionKeys]); const handleLanguageKeyDown = (e: React.KeyboardEvent) => { if ( @@ -63,7 +69,7 @@ export function SpokenLanguagesView({ return; } - if (!languageSearchQuery.trim() || filteredLanguages.length === 0) { + if (filteredLanguages.length === 0) { return; } @@ -86,7 +92,7 @@ export function SpokenLanguagesView({ return; } - onChange(normalizeLanguageCodes([...selectedLanguages, selectedCode])); + onChange([...selectedLanguages, selectedCode]); setLanguageSearchQuery(""); setLanguageSelectedIndex(-1); } @@ -119,7 +125,7 @@ export function SpokenLanguagesView({ variant="secondary" className="bg-muted flex items-center gap-1 px-2 py-0.5 text-xs" > - {getLanguageDisplayName(code)} + {getSpokenLanguageDisplayName(code)} )) diff --git a/apps/desktop/src/shared/language.test.ts b/apps/desktop/src/shared/language.test.ts index 37820dc1bd..e4b353ab15 100644 --- a/apps/desktop/src/shared/language.test.ts +++ b/apps/desktop/src/shared/language.test.ts @@ -1,30 +1,62 @@ import { describe, expect, test } from "vitest"; import { - getLanguageDisplayName, - getLanguageOptions, - normalizeLanguageCodes, + getBaseLanguageOptions, + getSpokenLanguageDisplayName, + getSpokenLanguageOptions, + normalizeBaseLanguageCodes, + normalizeSelectedSpokenLanguages, + normalizeSpokenLanguageCodes, } from "./language"; describe("language helpers", () => { - test("shows base language names for locale variants", () => { - expect(getLanguageDisplayName("ko-US")).toBe("Korean"); - expect(getLanguageDisplayName("en-US")).toBe("English"); + test("shows meaningful spoken-language labels for supported variants", () => { + expect(getSpokenLanguageDisplayName("ko-US")).toBe("Korean"); + expect(getSpokenLanguageDisplayName("en-US")).toBe("English (US)"); + expect(getSpokenLanguageDisplayName("zh-HK")).toBe("Cantonese"); + expect(getSpokenLanguageDisplayName("zh-TW")).toBe("Mandarin"); }); - test("normalizes and dedupes language selections by base language", () => { + test("keeps meaningful spoken variants while collapsing provider-only locale noise", () => { expect( - normalizeLanguageCodes(["en-US", "en", "es-419", "es", "ko-US"]), - ).toEqual(["en", "es", "ko"]); + normalizeSpokenLanguageCodes([ + "en-US", + "en", + "es-419", + "es", + "ko-US", + "zh-HK", + "zh-TW", + ]), + ).toEqual(["en-US", "es-419", "ko", "zh-HK", "zh-TW"]); }); - test("builds one option per language while keeping variant aliases searchable", () => { - const options = getLanguageOptions([ + test("keeps main-language options at the base-language level", () => { + expect( + getBaseLanguageOptions(["en", "en-US", "en-GB", "ko-KR"]).map( + ({ value, label }) => ({ value, label }), + ), + ).toEqual([ + { value: "en", label: "English" }, + { value: "ko", label: "Korean" }, + ]); + + expect(normalizeBaseLanguageCodes(["en-US", "en", "ko-KR"])).toEqual([ + "en", + "ko", + ]); + }); + + test("builds spoken-language options with curated variant labels and aliases", () => { + const options = getSpokenLanguageOptions([ "en", "en-US", + "en-GB", "es", "es-419", "ko-KR", + "zh-HK", + "zh-TW", ]); expect( @@ -34,14 +66,28 @@ describe("language helpers", () => { })), ).toEqual([ { value: "en", label: "English" }, + { value: "en-US", label: "English (US)" }, + { value: "en-GB", label: "English (UK)" }, { value: "es", label: "Spanish" }, - { value: "ko", label: "Korean" }, + { value: "es-419", label: "Spanish (Latin America)" }, + { value: "ko-KR", label: "Korean" }, + { value: "zh-HK", label: "Cantonese" }, + { value: "zh-TW", label: "Mandarin" }, ]); expect( options - .find((option) => option.value === "es") + .find((option) => option.value === "es-419") ?.searchTerms.some((term) => term.includes("Latin")), ).toBe(true); }); + + test("maps legacy stored spoken languages onto current spoken-language options", () => { + expect( + normalizeSelectedSpokenLanguages( + ["ko-US", "en-US", "zh-Hant"], + ["en", "en-US", "ko-KR", "zh-TW", "zh-HK"], + ), + ).toEqual(["ko-KR", "en-US", "zh-TW"]); + }); }); diff --git a/apps/desktop/src/shared/language.ts b/apps/desktop/src/shared/language.ts index 0d38fbf538..dd28856a25 100644 --- a/apps/desktop/src/shared/language.ts +++ b/apps/desktop/src/shared/language.ts @@ -7,6 +7,86 @@ export type LanguageOption = { searchTerms: string[]; }; +export type SpokenLanguageOption = LanguageOption & { + selectionKey: string; +}; + +type SpokenLanguageDescriptor = { + optionKey: string; + selectionKey: string; + label: string; + canonicalValue: string; +}; + +const EXACT_VARIANT_DESCRIPTORS: Record = { + "en-US": { + optionKey: "en-US", + selectionKey: "en", + label: "English (US)", + canonicalValue: "en-US", + }, + "en-GB": { + optionKey: "en-GB", + selectionKey: "en", + label: "English (UK)", + canonicalValue: "en-GB", + }, + "es-419": { + optionKey: "es-419", + selectionKey: "es", + label: "Spanish (Latin America)", + canonicalValue: "es-419", + }, + "pt-BR": { + optionKey: "pt-BR", + selectionKey: "pt", + label: "Portuguese (Brazil)", + canonicalValue: "pt-BR", + }, + "pt-PT": { + optionKey: "pt-PT", + selectionKey: "pt", + label: "Portuguese (Portugal)", + canonicalValue: "pt-PT", + }, + "fr-CA": { + optionKey: "fr-CA", + selectionKey: "fr", + label: "French (Canada)", + canonicalValue: "fr-CA", + }, + "zh-HK": { + optionKey: "zh-HK", + selectionKey: "zh-cantonese", + label: "Cantonese", + canonicalValue: "zh-HK", + }, +}; + +const MANDARIN_CODES = new Set(["zh", "zh-CN", "zh-Hans", "zh-TW", "zh-Hant"]); +const MANDARIN_PREFERRED_VALUES = [ + "zh", + "zh-CN", + "zh-Hans", + "zh-TW", + "zh-Hant", +]; +const RUNTIME_PRESERVED_CODES = new Set([ + ...Object.keys(EXACT_VARIANT_DESCRIPTORS), + ...MANDARIN_CODES, + "da-DK", + "de-CH", + "en-AU", + "en-CA", + "en-IE", + "en-IN", + "en-NZ", + "ko-KR", + "nl-BE", + "sv-SE", + "th-TH", +]); + function getLocaleDisplayName(code: string): string { return displayNames.of(code) ?? code; } @@ -16,6 +96,46 @@ function getRegionDisplayName(code: string): string | undefined { return region ? regionDisplayNames.of(region) : undefined; } +function choosePreferredValue( + aliases: readonly string[], + preferredValues: readonly string[], + fallback: string, +): string { + for (const preferred of preferredValues) { + if (aliases.includes(preferred)) { + return preferred; + } + } + + return fallback; +} + +function getSpokenLanguageDescriptor(code: string): SpokenLanguageDescriptor { + const exactMatch = EXACT_VARIANT_DESCRIPTORS[code]; + if (exactMatch) { + return exactMatch; + } + + if (MANDARIN_CODES.has(code)) { + return { + optionKey: "zh-mandarin", + selectionKey: "zh-mandarin", + label: "Mandarin", + canonicalValue: "zh", + }; + } + + const { language } = parseLocale(code); + const label = displayNames.of(language) ?? code; + + return { + optionKey: language, + selectionKey: language, + label, + canonicalValue: language, + }; +} + export function parseLocale(code: string): { language: string; region?: string; @@ -39,16 +159,16 @@ export function parseLocale(code: string): { } } -export function normalizeLanguageCode(code: string): string { +export function normalizeBaseLanguageCode(code: string): string { return parseLocale(code).language; } -export function normalizeLanguageCodes(codes: readonly string[]): string[] { +export function normalizeBaseLanguageCodes(codes: readonly string[]): string[] { const normalized: string[] = []; const seen = new Set(); for (const code of codes) { - const value = normalizeLanguageCode(code); + const value = normalizeBaseLanguageCode(code); if (!value || seen.has(value)) { continue; } @@ -60,50 +180,139 @@ export function normalizeLanguageCodes(codes: readonly string[]): string[] { return normalized; } +export function normalizeSpokenLanguageCode(code: string): string { + if (RUNTIME_PRESERVED_CODES.has(code)) { + return code; + } + + return getSpokenLanguageDescriptor(code).canonicalValue; +} + +export function normalizeSpokenLanguageCodes( + codes: readonly string[], +): string[] { + const normalized: string[] = []; + const seen = new Set(); + + for (const code of codes) { + const descriptor = getSpokenLanguageDescriptor(code); + if (seen.has(descriptor.selectionKey)) { + continue; + } + + seen.add(descriptor.selectionKey); + normalized.push(normalizeSpokenLanguageCode(code)); + } + + return normalized; +} + export function getBaseLanguageDisplayName(code: string): string { const { language } = parseLocale(code); return displayNames.of(language) ?? code; } +export function getSpokenLanguageDisplayName(code: string): string { + return getSpokenLanguageDescriptor(code).label; +} + export function getLanguageDisplayName(code: string): string { - return getBaseLanguageDisplayName(code); + return getSpokenLanguageDisplayName(code); } -export function getLanguageOptions( +export function getBaseLanguageOptions( supportedLanguages: readonly string[], ): LanguageOption[] { - const grouped = new Map(); - for (const code of supportedLanguages) { - const value = normalizeLanguageCode(code); - if (!value) { - continue; + return normalizeBaseLanguageCodes(supportedLanguages).map((value) => ({ + value, + label: getBaseLanguageDisplayName(value), + searchTerms: [value, getBaseLanguageDisplayName(value)], + })); +} + +export function getSpokenLanguageOptions( + supportedLanguages: readonly string[], +): SpokenLanguageOption[] { + const grouped = new Map< + string, + { + descriptor: SpokenLanguageDescriptor; + aliases: string[]; } + >(); + + for (const code of supportedLanguages) { + const descriptor = getSpokenLanguageDescriptor(code); + const group = grouped.get(descriptor.optionKey); - const group = grouped.get(value); if (group) { - group.push(code); - } else { - grouped.set(value, [code]); + group.aliases.push(code); + continue; } + + grouped.set(descriptor.optionKey, { + descriptor, + aliases: [code], + }); } - return [...grouped.entries()].map(([value, codes]) => { - const label = getBaseLanguageDisplayName(value); + return [...grouped.values()].map(({ descriptor, aliases }) => { + const value = + descriptor.optionKey === "zh-mandarin" + ? choosePreferredValue( + aliases, + MANDARIN_PREFERRED_VALUES, + aliases[0] ?? descriptor.canonicalValue, + ) + : choosePreferredValue( + aliases, + [descriptor.canonicalValue], + aliases[0] ?? descriptor.canonicalValue, + ); + const searchTerms = [ - label, + descriptor.label, value, - ...codes.flatMap((code) => [ - code, - code.replace(/-/g, " "), - getLocaleDisplayName(code), - getRegionDisplayName(code), + ...aliases.flatMap((alias) => [ + alias, + alias.replace(/-/g, " "), + getLocaleDisplayName(alias), + getRegionDisplayName(alias), ]), ].filter((term): term is string => Boolean(term)); return { value, - label, + label: descriptor.label, + selectionKey: descriptor.selectionKey, searchTerms: [...new Set(searchTerms)], }; }); } + +export function normalizeSelectedSpokenLanguages( + codes: readonly string[], + supportedLanguages: readonly string[], +): string[] { + const options = getSpokenLanguageOptions(supportedLanguages); + const optionsBySelectionKey = new Map( + options.map((option) => [option.selectionKey, option.value]), + ); + const normalized: string[] = []; + const seen = new Set(); + + for (const code of codes) { + const descriptor = getSpokenLanguageDescriptor(code); + if (seen.has(descriptor.selectionKey)) { + continue; + } + + seen.add(descriptor.selectionKey); + normalized.push( + optionsBySelectionKey.get(descriptor.selectionKey) ?? + descriptor.canonicalValue, + ); + } + + return normalized; +} diff --git a/apps/desktop/src/stt/useRunBatch.ts b/apps/desktop/src/stt/useRunBatch.ts index a76082296d..ee85cf31ff 100644 --- a/apps/desktop/src/stt/useRunBatch.ts +++ b/apps/desktop/src/stt/useRunBatch.ts @@ -8,7 +8,7 @@ import { useKeywords } from "./useKeywords"; import { useSTTConnection } from "./useSTTConnection"; import { useConfigValue } from "~/shared/config"; -import { normalizeLanguageCodes } from "~/shared/language"; +import { normalizeSpokenLanguageCodes } from "~/shared/language"; import { id } from "~/shared/utils"; import * as main from "~/store/tinybase/store/main"; import type { BatchPersistCallback } from "~/store/zustand/listener/transcript"; @@ -191,7 +191,7 @@ export const useRunBatch = (sessionId: string) => { base_url: options?.baseUrl ?? conn.baseUrl, api_key: options?.apiKey ?? conn.apiKey, keywords: options?.keywords ?? keywords ?? [], - languages: normalizeLanguageCodes( + languages: normalizeSpokenLanguageCodes( options?.languages ?? languages ?? [], ), }; diff --git a/apps/desktop/src/stt/useStartListening.ts b/apps/desktop/src/stt/useStartListening.ts index 834b703557..e7f3ceee26 100644 --- a/apps/desktop/src/stt/useStartListening.ts +++ b/apps/desktop/src/stt/useStartListening.ts @@ -12,7 +12,7 @@ import { useSTTConnection } from "./useSTTConnection"; import { getEnhancerService } from "~/services/enhancer"; import { getSessionEventById } from "~/session/utils"; import { useConfigValue } from "~/shared/config"; -import { normalizeLanguageCodes } from "~/shared/language"; +import { normalizeSpokenLanguageCodes } from "~/shared/language"; import { id } from "~/shared/utils"; import * as main from "~/store/tinybase/store/main"; import type { @@ -142,7 +142,7 @@ export function useStartListening( const started = await start( { session_id: sessionId, - languages: normalizeLanguageCodes(languages ?? []), + languages: normalizeSpokenLanguageCodes(languages ?? []), onboarding: false, transcription_mode: transcriptionMode, recording_mode: recordingMode,