From b014bc1ca8fb05d4b9a2912193ccf1cfa879a0aa Mon Sep 17 00:00:00 2001 From: ChuxiJ Date: Fri, 27 Mar 2026 13:55:52 +0800 Subject: [PATCH 1/3] feat: add local BPM detection and chord recognition via ONNX Runtime Web MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements browser-local audio analysis using ONNX models in a Web Worker: - Beat This! (small) for SOTA BPM/beat detection - Consonance-ACE (ISMIR 2025) for 170-class chord recognition New files: - src/types/analysis.ts — BeatEvent, ChordEvent, worker message types - src/utils/melSpectrogram.ts — pure TS FFT + mel filterbank - src/services/modelManager.ts — lazy ONNX model download + IndexedDB cache - src/services/localAnalysisService.ts — orchestrates worker lifecycle - src/store/analysisStore.ts — Zustand store for analysis job tracking - src/workers/analysisWorker.ts — Web Worker with ONNX inference pipeline Modified: - AudioAnalysisPanel.tsx — local/server mode toggle, progress bar, chord display - InferredMetas — extended with beats, chords, analysisSource fields - vite.config.ts — onnxruntime-web WASM config - main.tsx — expose analyzeClipLocally + analysisStore on window Closes #978 Co-Authored-By: Claude Opus 4.6 (1M context) --- package-lock.json | 135 ++++++- package.json | 1 + .../generation/AudioAnalysisPanel.tsx | 258 ++++++++++-- src/main.tsx | 4 + src/services/__tests__/modelManager.test.ts | 106 +++++ src/services/localAnalysisService.ts | 136 +++++++ src/services/modelManager.ts | 107 +++++ src/store/__tests__/analysisStore.test.ts | 121 ++++++ src/store/analysisStore.ts | 139 +++++++ src/types/analysis.ts | 100 +++++ src/types/project.ts | 6 + src/utils/__tests__/melSpectrogram.test.ts | 183 +++++++++ src/utils/melSpectrogram.ts | 259 +++++++++++++ src/workers/analysisWorker.ts | 366 ++++++++++++++++++ vite.config.ts | 6 + 15 files changed, 1890 insertions(+), 37 deletions(-) create mode 100644 src/services/__tests__/modelManager.test.ts create mode 100644 src/services/localAnalysisService.ts create mode 100644 src/services/modelManager.ts create mode 100644 src/store/__tests__/analysisStore.test.ts create mode 100644 src/store/analysisStore.ts create mode 100644 src/types/analysis.ts create mode 100644 src/utils/__tests__/melSpectrogram.test.ts create mode 100644 src/utils/melSpectrogram.ts create mode 100644 src/workers/analysisWorker.ts diff --git a/package-lock.json b/package-lock.json index 34e976df..3c709b52 100644 --- a/package-lock.json +++ b/package-lock.json @@ -32,6 +32,7 @@ "idb-keyval": "^6.2.0", "mp4-muxer": "^5.2.2", "node-pty": "^1.2.0-beta.12", + "onnxruntime-web": "^1.24.3", "react": "^19.0.0", "react-dom": "^19.0.0", "tone": "^15.1.22", @@ -1680,6 +1681,70 @@ "node": ">=18" } }, + "node_modules/@protobufjs/aspromise": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", + "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/base64": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz", + "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/codegen": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz", + "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/eventemitter": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz", + "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/fetch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz", + "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==", + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.1", + "@protobufjs/inquire": "^1.1.0" + } + }, + "node_modules/@protobufjs/float": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz", + "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/inquire": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz", + "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/path": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz", + "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/pool": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz", + "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/utf8": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz", + "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==", + "license": "BSD-3-Clause" + }, "node_modules/@replit/codemirror-emacs": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/@replit/codemirror-emacs/-/codemirror-emacs-6.1.0.tgz", @@ -3350,7 +3415,6 @@ "version": "25.5.0", "resolved": "https://registry.npmjs.org/@types/node/-/node-25.5.0.tgz", "integrity": "sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==", - "dev": true, "license": "MIT", "dependencies": { "undici-types": "~7.18.0" @@ -4546,6 +4610,12 @@ "babel-plugin-add-module-exports": "^0.2.1" } }, + "node_modules/flatbuffers": { + "version": "25.9.23", + "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-25.9.23.tgz", + "integrity": "sha512-MI1qs7Lo4Syw0EOzUl0xjs2lsoeqFku44KpngfIduHBYvzm8h2+7K8YMQh1JtVVVrUvhLpNwqVi4DERegUJhPQ==", + "license": "Apache-2.0" + }, "node_modules/focus-trap": { "version": "7.8.0", "resolved": "https://registry.npmjs.org/focus-trap/-/focus-trap-7.8.0.tgz", @@ -4610,6 +4680,12 @@ "dev": true, "license": "ISC" }, + "node_modules/guid-typescript": { + "version": "1.0.9", + "resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz", + "integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==", + "license": "ISC" + }, "node_modules/hasown": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", @@ -5146,6 +5222,12 @@ "url": "https://opencollective.com/parcel" } }, + "node_modules/long": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz", + "integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==", + "license": "Apache-2.0" + }, "node_modules/loose-envify": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", @@ -5472,6 +5554,26 @@ "regex-recursion": "^6.0.2" } }, + "node_modules/onnxruntime-common": { + "version": "1.24.3", + "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.24.3.tgz", + "integrity": "sha512-GeuPZO6U/LBJXvwdaqHbuUmoXiEdeCjWi/EG7Y1HNnDwJYuk6WUbNXpF6luSUY8yASul3cmUlLGrCCL1ZgVXqA==", + "license": "MIT" + }, + "node_modules/onnxruntime-web": { + "version": "1.24.3", + "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.24.3.tgz", + "integrity": "sha512-41dDq7fxtTm0XzGE7N0d6m8FcOY8EWtUA65GkOixJPB/G7DGzBmiDAnVVXHznRw9bgUZpb+4/1lQK/PNxGpbrQ==", + "license": "MIT", + "dependencies": { + "flatbuffers": "^25.1.24", + "guid-typescript": "^1.0.9", + "long": "^5.2.3", + "onnxruntime-common": "1.24.3", + "platform": "^1.3.6", + "protobufjs": "^7.2.4" + } + }, "node_modules/parse5": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/parse5/-/parse5-8.0.0.tgz", @@ -5543,6 +5645,12 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/platform": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz", + "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==", + "license": "MIT" + }, "node_modules/playwright": { "version": "1.58.2", "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.58.2.tgz", @@ -5657,6 +5765,30 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/protobufjs": { + "version": "7.5.4", + "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.4.tgz", + "integrity": "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==", + "hasInstallScript": true, + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.2", + "@protobufjs/base64": "^1.1.2", + "@protobufjs/codegen": "^2.0.4", + "@protobufjs/eventemitter": "^1.1.0", + "@protobufjs/fetch": "^1.1.0", + "@protobufjs/float": "^1.0.2", + "@protobufjs/inquire": "^1.1.0", + "@protobufjs/path": "^1.1.2", + "@protobufjs/pool": "^1.1.0", + "@protobufjs/utf8": "^1.1.0", + "@types/node": ">=13.7.0", + "long": "^5.0.0" + }, + "engines": { + "node": ">=12.0.0" + } + }, "node_modules/punycode": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", @@ -6310,7 +6442,6 @@ "version": "7.18.2", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", - "dev": true, "license": "MIT" }, "node_modules/unist-util-is": { diff --git a/package.json b/package.json index 4c075d85..72347620 100644 --- a/package.json +++ b/package.json @@ -42,6 +42,7 @@ "idb-keyval": "^6.2.0", "mp4-muxer": "^5.2.2", "node-pty": "^1.2.0-beta.12", + "onnxruntime-web": "^1.24.3", "react": "^19.0.0", "react-dom": "^19.0.0", "tone": "^15.1.22", diff --git a/src/components/generation/AudioAnalysisPanel.tsx b/src/components/generation/AudioAnalysisPanel.tsx index 60a89ad5..c0b333c9 100644 --- a/src/components/generation/AudioAnalysisPanel.tsx +++ b/src/components/generation/AudioAnalysisPanel.tsx @@ -2,12 +2,17 @@ import { useState, useEffect, useCallback } from 'react'; import { useProjectStore } from '../../store/projectStore'; import { useUIStore } from '../../store/uiStore'; import { useGenerationStore } from '../../store/generationStore'; +import { useAnalysisStore } from '../../store/analysisStore'; import * as api from '../../services/aceStepApi'; import { loadAudioBlobByKey } from '../../services/audioFileManager'; +import { analyzeClipLocally } from '../../services/localAnalysisService'; import type { TaskResultItem } from '../../types/api'; +import type { LocalAnalysisResult, ChordEvent } from '../../types/analysis'; import { POLL_INTERVAL_MS, MAX_POLL_DURATION_MS } from '../../constants/defaults'; -interface AnalysisResult { +type AnalysisMode = 'local' | 'server'; + +interface ServerAnalysisResult { bpm: number | undefined; keyScale: string | undefined; timeSignature: string | undefined; @@ -25,14 +30,22 @@ export function AudioAnalysisPanel() { const clip = analysisClipId ? getClipById(analysisClipId) : null; const track = project?.tracks.find((t) => t.clips.some((c) => c.id === analysisClipId)) ?? null; + const [mode, setMode] = useState('local'); const [analyzing, setAnalyzing] = useState(false); - const [result, setResult] = useState(null); + const [serverResult, setServerResult] = useState(null); + const [localResult, setLocalResult] = useState(null); const [error, setError] = useState(''); const [applied, setApplied] = useState(false); + // Local analysis progress from store + const analysisJob = useAnalysisStore((s) => + analysisClipId ? s.getJobForClip(analysisClipId) : undefined, + ); + // Reset when clip changes useEffect(() => { - setResult(null); + setServerResult(null); + setLocalResult(null); setError(''); setApplied(false); setAnalyzing(false); @@ -48,14 +61,31 @@ export function AudioAnalysisPanel() { return () => window.removeEventListener('keydown', handleEsc); }, [onClose]); - const handleAnalyze = useCallback(async () => { + // ---------- Local analysis ---------- + const handleLocalAnalyze = useCallback(async () => { + if (!clip || !analysisClipId || analyzing) return; + setAnalyzing(true); + setError(''); + setLocalResult(null); + + try { + const result = await analyzeClipLocally(analysisClipId); + setLocalResult(result); + } catch (e) { + setError(e instanceof Error ? e.message : 'Local analysis failed'); + } finally { + setAnalyzing(false); + } + }, [clip, analysisClipId, analyzing]); + + // ---------- Server analysis ---------- + const handleServerAnalyze = useCallback(async () => { if (!clip || analyzing || isGenerating) return; setAnalyzing(true); setError(''); - setResult(null); + setServerResult(null); try { - // Load clip audio let audioBlob: Blob | null = null; if (clip.isolatedAudioKey) { audioBlob = (await loadAudioBlobByKey(clip.isolatedAudioKey)) ?? null; @@ -68,15 +98,13 @@ export function AudioAnalysisPanel() { return; } - // Send as a cover task with minimal transformation — we just want the metas back. - // The cover task returns inferred BPM, key, etc. in the result metas. const coverParams = { task_type: 'cover' as const, caption: 'analyze audio properties', lyrics: '', - audio_cover_strength: 0.0, // No transformation — just analyze + audio_cover_strength: 0.0, audio_duration: clip.duration, - inference_steps: 10, // Minimal steps for fast analysis + inference_steps: 10, guidance_scale: 1.0, shift: 1.0, batch_size: 1, @@ -99,7 +127,7 @@ export function AudioAnalysisPanel() { const items: TaskResultItem[] = JSON.parse(entry.result); const first = items?.[0]; if (first) { - setResult({ + setServerResult({ bpm: first.metas?.bpm, keyScale: first.metas?.keyscale, timeSignature: first.metas?.timesignature, @@ -123,30 +151,39 @@ export function AudioAnalysisPanel() { } }, [clip, analyzing, isGenerating, project]); + const handleAnalyze = mode === 'local' ? handleLocalAnalyze : handleServerAnalyze; + const handleApplyToProject = useCallback(() => { - if (!result || !project) return; + if (!project) return; const updates: Record = {}; - if (result.bpm) updates.bpm = Math.round(result.bpm); - if (result.keyScale) updates.keyScale = result.keyScale; + if (mode === 'local' && localResult) { + if (localResult.bpm) updates.bpm = Math.round(localResult.bpm); + if (localResult.keyScale) updates.keyScale = localResult.keyScale; + } else if (mode === 'server' && serverResult) { + if (serverResult.bpm) updates.bpm = Math.round(serverResult.bpm); + if (serverResult.keyScale) updates.keyScale = serverResult.keyScale; + } if (Object.keys(updates).length > 0) { useProjectStore.getState().updateProject(updates as { bpm?: number; keyScale?: string }); setApplied(true); } - }, [result, project]); + }, [mode, localResult, serverResult, project]); if (!analysisClipId || !clip || !track) return null; const hasAudio = !!(clip.isolatedAudioKey || clip.cumulativeMixKey); - - // If clip already has inferred metas, show them immediately const existingMetas = clip.inferredMetas; + const hasResult = mode === 'local' ? !!localResult : !!serverResult; + const hasBpmOrKey = mode === 'local' + ? !!(localResult?.bpm || localResult?.keyScale) + : !!(serverResult?.bpm || serverResult?.keyScale); return (
{ if (e.target === e.currentTarget) onClose(); }} > -
+
{/* Header */}
@@ -165,6 +202,30 @@ export function AudioAnalysisPanel() { {/* Body */}
+ {/* Mode selector */} +
+ + +
+ {/* Source clip info */}

Clip

@@ -175,10 +236,33 @@ export function AudioAnalysisPanel() {

{clip.duration.toFixed(1)}s

+ {/* Local analysis progress */} + {mode === 'local' && analyzing && analysisJob && ( +
+

+ Analyzing... +

+
+
+
+

{analysisJob.message}

+
+ )} + {/* Existing inferred metas */} {existingMetas && (
-

Previously Inferred

+

+ Previously Inferred + {existingMetas.analysisSource && ( + + ({existingMetas.analysisSource}) + + )} +

{existingMetas.bpm && (
@@ -205,42 +289,72 @@ export function AudioAnalysisPanel() {
)}
+ + {/* Chord display for local analysis results */} + {existingMetas.chords && existingMetas.chords.length > 0 && ( +
+ Chords +
+ {existingMetas.chords.slice(0, 16).map((chord, i) => ( + + {chord.label} + + ))} + {existingMetas.chords.length > 16 && ( + + +{existingMetas.chords.length - 16} more + + )} +
+
+ )}
)} - {/* Analysis results */} - {result && ( + {/* Local analysis results */} + {mode === 'local' && localResult && ( + + )} + + {/* Server analysis results */} + {mode === 'server' && serverResult && (
-

Analysis Results

+

+ Server Results +

- {result.bpm && ( + {serverResult.bpm && (
BPM -

{Math.round(result.bpm)}

+

{Math.round(serverResult.bpm)}

)} - {result.keyScale && ( + {serverResult.keyScale && (
Key -

{result.keyScale}

+

{serverResult.keyScale}

)} - {result.timeSignature && ( + {serverResult.timeSignature && (
Time Sig -

{result.timeSignature}

+

{serverResult.timeSignature}

)} - {result.genres && ( + {serverResult.genres && (
Genre -

{result.genres}

+

{serverResult.genres}

)} - {result.caption && ( + {serverResult.caption && (
Description -

{result.caption}

+

{serverResult.caption}

)}
@@ -258,6 +372,13 @@ export function AudioAnalysisPanel() { No audio available — generate the clip first before analyzing.

)} + + {mode === 'local' && !analyzing && !localResult && hasAudio && ( +

+ Local analysis uses Beat This! for BPM detection and Consonance-ACE for chord recognition. + Models are downloaded on first use (~23MB total) and cached locally. +

+ )}
{/* Footer */} @@ -269,7 +390,7 @@ export function AudioAnalysisPanel() { Close
- {result && (result.bpm || result.keyScale) && ( + {hasResult && hasBpmOrKey && (