diff --git a/package.json b/package.json index e209028fd4..81bf693ea0 100644 --- a/package.json +++ b/package.json @@ -76,6 +76,7 @@ "@hono/swagger-ui": "0.5.2", "@hono/zod-openapi": "1.1.0", "@hono/zod-validator": "0.7.2", + "@indic-transliteration/sanscript": "^1.3.3", "@jellybrick/dbus-next": "0.10.3", "@jellybrick/electron-better-web-request": "1.0.4", "@jellybrick/mpris-service": "2.1.5", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 60535de66f..c9a03306ea 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -75,6 +75,9 @@ importers: '@hono/zod-validator': specifier: 0.7.2 version: 0.7.2(hono@4.9.6)(zod@4.1.5) + '@indic-transliteration/sanscript': + specifier: ^1.3.3 + version: 1.3.3 '@jellybrick/dbus-next': specifier: 0.10.3 version: 0.10.3 @@ -861,6 +864,12 @@ packages: resolution: {integrity: sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ==} engines: {node: '>=18.18'} + '@indic-transliteration/common_maps@1.0.5': + resolution: {integrity: sha512-XbWDA5AXGE+Nh4uGr/yN9ZM8avRBy4F1KQL+DLgQGOdsQ390lcW4fga0NSjg4C/rOpMd0rHZv2YFV3Bq3UbpkQ==} + + '@indic-transliteration/sanscript@1.3.3': + resolution: {integrity: sha512-zNGeARmQTPIlubwgEhl/JumpwTPHrdT/cNsQeCL+G67SQmjJe3qRnMIYghXiVt7+KDso/pU1Ky2ZfD/RBISfJQ==} + '@isaacs/balanced-match@4.0.1': resolution: {integrity: sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==} engines: {node: 20 || >=22} @@ -4472,6 +4481,9 @@ packages: resolution: {integrity: sha512-6udB24Q737UD/SDsKAHI9FCRP7Bqc9D/MQUV02ORQg5iskjtLJlZJNdN4kKtcdtwCeWIwIHDGaUsTsCCAa8sFQ==} engines: {node: '>=10'} + toml@2.3.6: + resolution: {integrity: sha512-gVweAectJU3ebq//Ferr2JUY4WKSDe5N+z0FvjDncLGyHmIDoxgY/2Ie4qfEIDm4IS7OA6Rmdm7pdEEdMcV/xQ==} + totalist@3.0.1: resolution: {integrity: sha512-sf4i37nQ2LBx4m3wB74y+ubopq6W/dIzXg0FDGjsYnZHVa1Da8FH853wlL2gtUhg+xJXjfk3kUZS3BRoQeoQBQ==} engines: {node: '>=6'} @@ -5364,6 +5376,13 @@ snapshots: '@humanwhocodes/retry@0.4.3': {} + '@indic-transliteration/common_maps@1.0.5': {} + + '@indic-transliteration/sanscript@1.3.3': + dependencies: + '@indic-transliteration/common_maps': 1.0.5 + toml: 2.3.6 + '@isaacs/balanced-match@4.0.1': {} '@isaacs/brace-expansion@5.0.0': @@ -9413,6 +9432,8 @@ snapshots: '@tokenizer/token': 0.3.0 ieee754: 1.2.1 + toml@2.3.6: {} + totalist@3.0.1: {} truncate-utf8-bytes@1.0.2: diff --git a/src/plugins/synced-lyrics/renderer/utils.tsx b/src/plugins/synced-lyrics/renderer/utils.tsx index 1c6a410bd2..bb28f29f57 100644 --- a/src/plugins/synced-lyrics/renderer/utils.tsx +++ b/src/plugins/synced-lyrics/renderer/utils.tsx @@ -7,6 +7,7 @@ import * as pinyin from 'tiny-pinyin'; import { romanize as romanizeThaiFrag } from '@dehoist/romanize-thai'; import { lazy } from 'lazy-var'; import { detect } from 'tinyld'; +import Sanscript from '@indic-transliteration/sanscript'; import { waitForElement } from '@/utils/wait-for-element'; import { LyricsRenderer, setIsVisible } from './renderer'; @@ -155,6 +156,12 @@ const hasChinese = (lines: string[]) => const hasThai = (lines: string[]) => lines.some((line) => /[\u0E00-\u0E7F]+/.test(line)); +const hasBengali = (lines: string[]) => + lines.some((line) => /[\u0980-\u09FF]+/.test(line)); + +const hasHindi = (lines: string[]) => + lines.some((line) => /[\u0900-\u097F]+/.test(line)); + export const romanizeJapanese = async (line: string) => (await kuroshiro.get()).convert(line, { to: 'romaji', @@ -190,11 +197,35 @@ export const romanizeThai = (line: string) => { return latin; }; +export const romanizeBengali = (line: string) => { + try { + let out = Sanscript.t(line, 'bengali', 'iast'); + out = out.normalize('NFD'); + out = out.replace(/[\u0300-\u036f]/g, ''); + out = out.replace(/[\u09BC\u09BE-\u09CD]/g, ''); + return out.toLowerCase(); + } catch { + return line; + } +}; + +export const romanizeHindi = (line: string) => { + try { + let out = Sanscript.t(line, 'devanagari', 'iast'); + out = out.normalize('NFD').replace(/[\u0300-\u036f]/g, ''); // strip accents + return out.replace(/[^a-zA-Z\s]/g, '') || line; // remove any remaining symbols + } catch { + return line; + } +}; + const handlers: Record Promise | string> = { ja: romanizeJapanese, ko: romanizeHangul, zh: romanizeChinese, th: romanizeThai, + bn: romanizeBengali, + hi: romanizeHindi, }; export const romanize = async (line: string) => { @@ -210,6 +241,8 @@ export const romanize = async (line: string) => { if (hasKorean([line])) line = romanizeHangul(line); if (hasChinese([line])) line = romanizeChinese(line); if (hasThai([line])) line = romanizeThai(line); + if (hasBengali([line])) line = romanizeBengali(line); + if (hasHindi([line])) line = romanizeHindi(line); return line; };