diff --git a/index.html b/index.html
index 7af04b17..92273d5b 100644
--- a/index.html
+++ b/index.html
@@ -12,6 +12,15 @@
ACE-Step DAW
+
diff --git a/src/main.tsx b/src/main.tsx
index 97da5e5b..0f87e681 100644
--- a/src/main.tsx
+++ b/src/main.tsx
@@ -1,5 +1,23 @@
import { StrictMode } from 'react';
import { createRoot } from 'react-dom/client';
+
+// Initialize default provider list if not already set
+const PROVIDERS_KEY = 'ace-step-daw-chat-providers';
+try {
+ const stored = localStorage.getItem(PROVIDERS_KEY);
+ const providers = stored ? JSON.parse(stored) : null;
+ if (!providers) {
+ const defaults = [
+ { id: 'anthropic', name: 'Anthropic', apiKey: '', baseUrl: 'https://api.anthropic.com/v1', enabled: false },
+ { id: 'openai', name: 'OpenAI', apiKey: '', baseUrl: 'https://api.openai.com/v1', enabled: false },
+ { id: 'google', name: 'Google AI', apiKey: '', baseUrl: 'https://generativelanguage.googleapis.com/v1beta', enabled: false },
+ { id: 'openrouter', name: 'OpenRouter', apiKey: '', baseUrl: 'https://openrouter.ai/api/v1', enabled: false },
+ { id: 'deepseek', name: 'DeepSeek', apiKey: '', baseUrl: 'https://api.deepseek.com/v1', enabled: false },
+ { id: 'xai', name: 'xAI', apiKey: '', baseUrl: 'https://api.xai.io/v1', enabled: false },
+ ];
+ localStorage.setItem(PROVIDERS_KEY, JSON.stringify(defaults));
+ }
+} catch { /* ignore */ }
import './index.css';
import App from './App';
import { getAudioEngine } from './hooks/useAudioEngine';
diff --git a/src/services/llmChatService.ts b/src/services/llmChatService.ts
new file mode 100644
index 00000000..64d4045a
--- /dev/null
+++ b/src/services/llmChatService.ts
@@ -0,0 +1,431 @@
+/**
+ * LLM Chat Service
+ * Calls OpenRouter (or any OpenAI-compatible provider) with DAW context.
+ * Parses ACTION blocks from the response and executes them on the DAW.
+ */
+
+import { useProjectStore } from '../store/projectStore';
+import { useGenerationStore } from '../store/generationStore';
+import { TRACK_CATALOG } from '../constants/tracks';
+import type { TrackName } from '../types/project';
+
+const PROVIDERS_KEY = 'ace-step-daw-chat-providers';
+const MODEL = 'google/gemini-2.0-flash-lite-001';
+
+const MAX_DURATION_SECONDS = 270;
+
+// Generation order: drums first (conditioned on silence), vocals last (conditioned on all)
+const LEGO_TRACK_ORDER: TrackName[] = [
+ 'drums', 'bass', 'guitar', 'keyboard', 'percussion',
+ 'strings', 'synth', 'brass', 'woodwinds', 'backing_vocals', 'vocals',
+];
+
+const VOCAL_TRACKS = new Set(['vocals', 'backing_vocals']);
+
+interface Provider {
+ id: string;
+ apiKey: string;
+ baseUrl: string;
+ enabled: boolean;
+}
+
+function getActiveProvider(): Provider | null {
+ try {
+ const stored = localStorage.getItem(PROVIDERS_KEY);
+ if (!stored) return null;
+ const providers: Provider[] = JSON.parse(stored);
+ const or = providers.find((p) => p.id === 'openrouter' && p.apiKey);
+ if (or) return or;
+ return providers.find((p) => p.apiKey) ?? null;
+ } catch {
+ return null;
+ }
+}
+
+function buildSystemPrompt(dawSummary: string): string {
+ return `You are an expert music producer AI embedded inside ACE-Step DAW. You have deep knowledge of genre-specific sounds, BPMs, and production techniques.
+
+## Current DAW State
+${dawSummary}
+
+---
+
+## GENRE KNOWLEDGE — use these exact characteristics when matching a genre
+
+**Trap / Drill**
+- BPM: 130–170 (trap ~140, drill ~140–150)
+- Drums: 808 kick with long decay, hard snare/clap on 2&4, trap hi-hat rolls (16th/32nd triplets), open hi-hat, rimshot, percs
+- Bass: 808 sub bass with portamento pitch slides, heavy low end, distorted
+- Synth: dark minor-key pads, haunting melismatic synths, ominous bells, distorted leads
+- Vocals: male/female, autotune, melodic rap, ad-libs, aggressive delivery, trap flow
+
+**Boom Bap / Classic Hip-Hop**
+- BPM: 85–100
+- Drums: punchy vinyl-sampled kick, crisp snare, swung hi-hats, rim shots, boom bap groove
+- Bass: deep round bass, walking bass lines
+- Synth/keyboard: soul samples, dusty piano, Rhodes, organ stabs
+
+**Lo-fi Hip-Hop / Chill Hop**
+- BPM: 70–90
+- Drums: swung vinyl-textured drums, lazy kick, soft snare, subtle hi-hats, tape saturation
+- Bass: warm mellow bass, simple root notes
+- keyboard: dusty piano, warm Rhodes, jazz chords, vinyl crackle
+
+**Pop**
+- BPM: 100–130
+- Drums: clean punchy kick, tight snare, steady four-on-the-floor or 2&4 snare, crisp hi-hats
+- Bass: clean melodic bass, follows chord roots
+- Synth/guitar: bright synth pads, clean electric guitar, catchy melodic hooks
+- Vocals: female/male, clear polished singing, harmonies, hooky chorus
+
+**House / Dance**
+- BPM: 120–130
+- Drums: four-on-the-floor kick, open hi-hat on offbeats, clap on 2&4, driving groove
+- Bass: deep house bass, filtered bassline, sidechain pump
+- Synth: lush house chords, stabs, atmospheric pads, piano chords
+
+**R&B / Soul**
+- BPM: 60–100
+- Drums: smooth groove, soft kick, snappy snare, brush hi-hats
+- Bass: smooth melodic bass, syncopated, soulful
+- keyboard: warm Rhodes, smooth chords, gospel organ
+- Vocals: female/male, soulful singing, runs, harmonies, sensual delivery
+
+**Jazz**
+- BPM: 120–200 (swing feel)
+- Drums: jazz ride cymbal, brushed snare, walking time feel, swing groove
+- Bass: upright bass, walking bass lines
+- keyboard: jazz piano comping, chord voicings, improvisation
+
+**Rock / Alternative**
+- BPM: 100–160
+- Drums: powerful kick, cracking snare, driving rock hi-hats, crash cymbals, rock groove
+- bass: distorted bass guitar, follows guitar riffs
+- guitar: crunchy electric guitar, power chords, distortion, riffs
+
+**Afrobeats / Afropop**
+- BPM: 95–115
+- Drums: afro percussion, talking drum, shaker, cross-stick snare, bouncy kick pattern
+- bass: warm bass, syncopated groove
+- Vocals: male/female, melodic African-influenced singing, call and response
+
+---
+
+## ACTION: Full Multi-Track Song
+Use when user asks for a full song or beat with 2+ instruments.
+
+\`\`\`action
+GENERATE_SONG
+global_caption: dark trap, male vocals, 808 sub bass, trap hi-hats, minor key, 140 bpm
+bpm: 140
+duration: 180
+drums: 808 kick long decay, hard snare, trap hi-hat rolls 32nd notes, open hi-hat, rim shots
+bass: 808 sub bass portamento slides, heavy distorted low end, pitch bends
+synth: dark minor key pad, ominous bells, haunting atmosphere
+vocals: male, autotune, melodic trap flow, aggressive delivery
+lyrics: [intro-short] ; [verse] On my wrist the ice glow. Moving through the night slow. ; [chorus] Trap life never change. Money never strange. ; [outro-short]
+\`\`\`
+
+### Rules
+- global_caption: comma-separated genre tags describing the full song
+- bpm: match the genre's typical range
+- duration: seconds — **use these defaults based on request:**
+ - "full song" or "song" → 180
+ - "short song" or "quick" → 60
+ - "beat" or "loop" → 30
+ - "long song" or user specifies minutes → convert to seconds (e.g. 3 min = 180)
+ - max 270
+- Track lines: \`trackname: specific comma-separated production tags\` — BE SPECIFIC to the genre, not generic
+- Valid track names: drums, bass, guitar, keyboard, percussion, strings, synth, brass, woodwinds, backing_vocals, vocals
+- lyrics: ONLY when user wants vocals. Omit entirely for instrumentals.
+
+---
+
+## ACTION: Single Track
+Use only for one instrument or a quick test.
+
+\`\`\`action
+GENERATE_MUSIC
+prompt: dark trap, 808 kick long decay, trap hi-hat rolls, hard snare, 140 bpm
+duration: 30
+\`\`\`
+
+---
+
+## Lyrics Format
+- Sections separated by semicolons ;
+- Instrumental: [intro-short] [intro-medium] [inst-short] [inst-medium] [outro-short] [outro-medium]
+- Vocal: [verse] [chorus] [bridge] — sentences end with period
+- Example: [intro-short] ; [verse] Line one. Line two. ; [chorus] Hook line. ; [outro-short]
+- If user provided lyrics, use them — fit them to the structure, don't rewrite them
+
+---
+
+## Critical Rules
+- ALWAYS match genre DNA — trap must sound like trap, not pop
+- Use SPECIFIC production terms: "808 kick with long decay" not just "kick drum"
+- Include BPM that matches the genre (trap=140, pop=120, boom bap=90, house=128)
+- Duration: "full song" = 180s minimum, never use 30s for a full song request
+- Always explain what you're generating BEFORE the action block
+- Keep responses concise`;
+}
+
+interface ParsedTrackEntry {
+ name: TrackName;
+ description: string;
+}
+
+interface ParsedSongAction {
+ type: 'generate_song';
+ globalCaption: string;
+ bpm: number;
+ duration: number;
+ lyrics?: string;
+ tracks: ParsedTrackEntry[];
+}
+
+interface ParsedMusicAction {
+ type: 'generate_music';
+ prompt: string;
+ duration: number;
+ lyrics?: string;
+}
+
+type ParsedAction = ParsedSongAction | ParsedMusicAction;
+
+function parseActions(text: string): { cleanText: string; actions: ParsedAction[] } {
+ const actions: ParsedAction[] = [];
+ const KNOWN_TRACKS = new Set(LEGO_TRACK_ORDER);
+
+ const cleanText = text.replace(/```action\n([\s\S]*?)```/g, (_match, body: string) => {
+ const lines = body.trim().split('\n');
+ const type = lines[0]?.trim();
+
+ const get = (key: string) => {
+ const line = lines.find((l) => l.startsWith(`${key}:`));
+ return line ? line.slice(key.length + 1).trim() : undefined;
+ };
+
+ if (type === 'GENERATE_SONG') {
+ const globalCaption = get('global_caption') ?? '';
+ const bpm = parseInt(get('bpm') ?? '120', 10) || 120;
+ const duration = parseInt(get('duration') ?? '30', 10) || 30;
+ const lyrics = get('lyrics');
+
+ // Parse per-track lines
+ const tracks: ParsedTrackEntry[] = [];
+ for (const line of lines.slice(1)) {
+ const colonIdx = line.indexOf(':');
+ if (colonIdx < 0) continue;
+ const key = line.slice(0, colonIdx).trim().toLowerCase();
+ const val = line.slice(colonIdx + 1).trim();
+ if (KNOWN_TRACKS.has(key) && val) {
+ tracks.push({ name: key as TrackName, description: val });
+ }
+ }
+
+ // Sort tracks in LEGO generation order
+ tracks.sort(
+ (a, b) => LEGO_TRACK_ORDER.indexOf(a.name) - LEGO_TRACK_ORDER.indexOf(b.name),
+ );
+
+ if (tracks.length > 0) {
+ actions.push({ type: 'generate_song', globalCaption, bpm, duration, lyrics, tracks });
+ }
+ } else if (type === 'GENERATE_MUSIC') {
+ const prompt = get('prompt') ?? '';
+ const duration = parseInt(get('duration') ?? '30', 10) || 30;
+ const lyrics = get('lyrics');
+ if (prompt) {
+ actions.push({ type: 'generate_music', prompt, duration, lyrics });
+ }
+ }
+
+ return ''; // remove block from displayed text
+ }).trim();
+
+ return { cleanText, actions };
+}
+
+async function executeActions(actions: ParsedAction[]): Promise {
+ for (const action of actions) {
+ if (action.type === 'generate_song') {
+ const projectStore = useProjectStore.getState();
+ const genStore = useGenerationStore.getState();
+
+ if (genStore.isGenerating) {
+ console.warn('[llmChat] Already generating — skipping LEGO batch');
+ continue;
+ }
+
+ // Clamp duration — long clips OOM on RTX 3060 (11.6 GB, model uses ~8 GB)
+ const duration = Math.min(action.duration, MAX_DURATION_SECONDS);
+
+ // Create one stems track + clip per requested instrument
+ const batchTracks: Array<{ clipId: string; localDescription: string; lyrics?: string }> = [];
+
+ for (const trackEntry of action.tracks) {
+ const info = TRACK_CATALOG[trackEntry.name];
+ const track = projectStore.addTrack(trackEntry.name, 'stems', {
+ displayName: info.displayName,
+ order: info.defaultOrder,
+ color: info.color,
+ });
+
+ const isVocal = VOCAL_TRACKS.has(trackEntry.name);
+ const clip = projectStore.addClip(track.id, {
+ startTime: 0,
+ duration,
+ prompt: trackEntry.description,
+ globalCaption: action.globalCaption,
+ lyrics: isVocal ? (action.lyrics ?? '') : '',
+ source: 'generated',
+ });
+
+ batchTracks.push({
+ clipId: clip.id,
+ localDescription: trackEntry.description,
+ ...(isVocal && action.lyrics ? { lyrics: action.lyrics } : {}),
+ });
+ }
+
+ // Set BPM on the generation form so the API receives the right value
+ genStore.setGenerationBpm(action.bpm);
+
+ // Small delay to let state settle
+ await new Promise((r) => setTimeout(r, 150));
+
+ // Dynamic import to avoid circular dependency (uiStore → llmChatService → generationPipeline → uiStore)
+ const { generateBatch } = await import('./generationPipeline');
+
+ // Use silence mode: all tracks generate in parallel from silence.
+ // Each track uses the shared seed + globalCaption for musical coherence.
+ // Context mode uploads large cumulative blobs and risks OOM on 3060.
+ await generateBatch({
+ mode: 'silence',
+ globalCaption: action.globalCaption,
+ tracks: batchTracks,
+ sharedSeed: Math.floor(Math.random() * 2 ** 31),
+ });
+
+ } else if (action.type === 'generate_music') {
+ // Single-track fallback
+ const projectStore = useProjectStore.getState();
+ const genStore = useGenerationStore.getState();
+
+ let track = projectStore.project?.tracks.find((t) => t.trackType === 'stems');
+ if (!track) {
+ track = projectStore.addTrack('custom', 'stems', { displayName: 'AI Generated' });
+ }
+
+ genStore.setGenerationPrompt(action.prompt);
+ genStore.setGenerationLengthSeconds(action.duration);
+ genStore.setGenerationTargetTrack(track.id);
+ if (action.lyrics) {
+ genStore.setGenerationLyrics(action.lyrics);
+ }
+
+ await new Promise((r) => setTimeout(r, 100));
+ genStore.submitGenerationRequest();
+ }
+ }
+}
+
+export async function* streamLLMResponse(
+ question: string,
+ dawSummary: string,
+ conversationHistory: Array<{ role: 'user' | 'assistant'; content: string }>,
+): AsyncGenerator<{ chunk: string; actions?: ParsedAction[] }> {
+ const provider = getActiveProvider();
+
+ if (!provider) {
+ yield { chunk: '__FALLBACK__' };
+ return;
+ }
+
+ const messages = [
+ ...conversationHistory.slice(-8),
+ { role: 'user' as const, content: question },
+ ];
+
+ const response = await fetch(`${provider.baseUrl}/chat/completions`, {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ 'Authorization': `Bearer ${provider.apiKey}`,
+ 'HTTP-Referer': 'https://ace-step-daw',
+ 'X-Title': 'ACE-Step DAW',
+ },
+ body: JSON.stringify({
+ model: MODEL,
+ messages: [
+ { role: 'system', content: buildSystemPrompt(dawSummary) },
+ ...messages,
+ ],
+ stream: true,
+ max_tokens: 1024,
+ temperature: 0.7,
+ }),
+ });
+
+ if (!response.ok) {
+ const err = await response.text();
+ throw new Error(`LLM API error ${response.status}: ${err}`);
+ }
+
+ const reader = response.body?.getReader();
+ if (!reader) throw new Error('No response body');
+
+ const decoder = new TextDecoder();
+ let buffer = '';
+ let fullText = '';
+
+ while (true) {
+ const { done, value } = await reader.read();
+ if (done) break;
+
+ buffer += decoder.decode(value, { stream: true });
+ const lines = buffer.split('\n');
+ buffer = lines.pop() ?? '';
+
+ for (const line of lines) {
+ if (!line.startsWith('data: ')) continue;
+ const data = line.slice(6).trim();
+ if (data === '[DONE]') {
+ const { cleanText, actions } = parseActions(fullText);
+ if (actions.length > 0) {
+ await executeActions(actions);
+ const actionMsg = actions
+ .map((a) => {
+ if (a.type === 'generate_song') {
+ const trackList = a.tracks.map((t) => t.name).join(', ');
+ const clampedDur = Math.min(a.duration, MAX_DURATION_SECONDS);
+ const clamped = clampedDur < a.duration ? ` (capped at ${clampedDur}s)` : '';
+ return `\n\n🎵 *Generating ${a.tracks.length} tracks (${trackList}) — ${clampedDur}s each${clamped}*`;
+ }
+ return `\n\n🎵 *Generating: "${a.prompt}" (${a.duration}s)*`;
+ })
+ .join('');
+ yield { chunk: actionMsg, actions };
+ }
+ return;
+ }
+ try {
+ const json = JSON.parse(data);
+ const chunk: string = json.choices?.[0]?.delta?.content ?? '';
+ if (chunk) {
+ fullText += chunk;
+ // Don't stream tokens that are inside an action block
+ const inActionBlock =
+ (fullText.match(/```action/g) ?? []).length >
+ (fullText.match(/```action[\s\S]*?```/g) ?? []).length;
+ if (!inActionBlock) {
+ yield { chunk };
+ }
+ }
+ } catch {
+ // ignore parse errors on partial chunks
+ }
+ }
+ }
+}
diff --git a/src/store/uiStore.ts b/src/store/uiStore.ts
index 9ae77ff1..eac691e9 100644
--- a/src/store/uiStore.ts
+++ b/src/store/uiStore.ts
@@ -9,6 +9,7 @@ import { useTransportStore } from './transportStore';
import type { AIChatContext } from '../utils/aiAssistantContext';
import { buildAssistantContext } from '../utils/aiAssistantContext';
import { getAssistantSuggestions, streamAssistantResponse } from '../services/aiAssistantService';
+import { streamLLMResponse } from '../services/llmChatService';
import type { ShortcutContext } from '../types/shortcuts';
import type { ThemeId } from '../themes/themeTokens';
import type { EnhancementNode, EnhancementSession } from '../types/enhance';
@@ -1135,7 +1136,22 @@ export const useUIStore = create()(
}));
try {
- for await (const chunk of streamAssistantResponse(trimmed, context, options?.delayMs)) {
+ // Build conversation history for LLM context (exclude the empty assistant message just added)
+ const history = get().aiChatMessages
+ .filter((m) => m.id !== assistantMessage.id && m.content)
+ .map((m) => ({ role: m.role as 'user' | 'assistant', content: m.content }));
+
+ const dawSummary = typeof window.__dawSummary === 'function'
+ ? window.__dawSummary()
+ : context.summary;
+
+ let usedFallback = false;
+
+ for await (const { chunk } of streamLLMResponse(trimmed, dawSummary, history)) {
+ if (chunk === '__FALLBACK__') {
+ usedFallback = true;
+ break;
+ }
set((state) => ({
aiChatMessages: state.aiChatMessages.map((message) => (
message.id === assistantMessage.id
@@ -1144,6 +1160,18 @@ export const useUIStore = create()(
)),
}));
}
+
+ if (usedFallback) {
+ for await (const chunk of streamAssistantResponse(trimmed, context, options?.delayMs)) {
+ set((state) => ({
+ aiChatMessages: state.aiChatMessages.map((message) => (
+ message.id === assistantMessage.id
+ ? { ...message, content: `${message.content}${chunk}` }
+ : message
+ )),
+ }));
+ }
+ }
} catch (error) {
const message = error instanceof Error ? error.message : 'Assistant response failed.';
set((state) => ({