Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions src/__tests__/elevenlabs-v3.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import { describe, it, expect } from "@jest/globals";
import { ElevenLabsTTSClient } from "../engines/elevenlabs";

describe("ElevenLabs v3 prepareText", () => {
it("strips SSML for eleven_v3 (no translation)", async () => {
const client = new ElevenLabsTTSClient({ apiKey: "test-key", modelId: "eleven_v3" });
const ssml = '<speak>Normal <emphasis level="strong">dramatic</emphasis> end</speak>';
const prepared = await (client as any).prepareText(ssml, {});
expect(prepared).toBe("Normal dramatic end");
});

it("preserves native [audio tags] in plain text", async () => {
const client = new ElevenLabsTTSClient({ apiKey: "test-key", modelId: "eleven_v3" });
const text = "Hello [excited] world [whispers]";
const prepared = await (client as any).prepareText(text, {});
expect(prepared).toBe(text);
});
});

describe("ElevenLabs v3 request parameters", () => {
it("includes seed in payload when set", () => {
const client = new ElevenLabsTTSClient({ apiKey: "test-key" });
const payload = (client as any).buildRequestPayload("hello", { seed: 42 });
expect(payload.seed).toBe(42);
});

it("includes language_code in payload when set", () => {
const client = new ElevenLabsTTSClient({ apiKey: "test-key" });
const payload = (client as any).buildRequestPayload("hello", { languageCode: "en" });
expect(payload.language_code).toBe("en");
});

it("includes previous_text in payload when set", () => {
const client = new ElevenLabsTTSClient({ apiKey: "test-key" });
const payload = (client as any).buildRequestPayload("hello", { previousText: "Before this" });
expect(payload.previous_text).toBe("Before this");
});

it("includes next_text in payload when set", () => {
const client = new ElevenLabsTTSClient({ apiKey: "test-key" });
const payload = (client as any).buildRequestPayload("hello", { nextText: "After this" });
expect(payload.next_text).toBe("After this");
});

it("includes apply_text_normalization in payload when set", () => {
const client = new ElevenLabsTTSClient({ apiKey: "test-key" });
const payload = (client as any).buildRequestPayload("hello", { applyTextNormalization: "off" });
expect(payload.apply_text_normalization).toBe("off");
});

it("omits v3 params from payload when not set", () => {
const client = new ElevenLabsTTSClient({ apiKey: "test-key" });
const payload = (client as any).buildRequestPayload("hello", {});
expect(payload.seed).toBeUndefined();
expect(payload.language_code).toBeUndefined();
expect(payload.previous_text).toBeUndefined();
expect(payload.next_text).toBeUndefined();
expect(payload.apply_text_normalization).toBeUndefined();
});
});
37 changes: 24 additions & 13 deletions src/engines/elevenlabs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
const fetch = getFetch();

/**
* Extended options for ElevenLabs TTS
* Extended options for ElevenLabs TTS.
* seed, languageCode, previousText, nextText, and applyTextNormalization are
* only honoured by the eleven_v3 model and are silently ignored by others.
*/
export interface ElevenLabsTTSOptions extends SpeakOptions {
format?: "mp3" | "wav"; // Define formats supported by this client logic (maps to pcm)
Expand All @@ -17,6 +19,11 @@
outputFormat?: string; // Override output_format per request
voiceSettings?: Record<string, unknown>; // Override voice_settings per request
requestOptions?: Record<string, unknown>; // Additional request payload overrides
seed?: number; // Deterministic output — same seed produces the same audio
languageCode?: string; // Force language interpretation (e.g. "en")
previousText?: string; // Context for continuity between sequential requests
nextText?: string; // Context for continuity between sequential requests
applyTextNormalization?: "auto" | "on" | "off"; // Control spelling/number expansion
}

/**
Expand Down Expand Up @@ -65,6 +72,8 @@
* ElevenLabs TTS client
*/
export class ElevenLabsTTSClient extends AbstractTTSClient {
private static readonly MODEL_V3 = "eleven_v3";

Check failure on line 75 in src/engines/elevenlabs.ts

View workflow job for this annotation

GitHub Actions / test

'MODEL_V3' is declared but its value is never read.

Check failure on line 75 in src/engines/elevenlabs.ts

View workflow job for this annotation

GitHub Actions / test (18.x)

'MODEL_V3' is declared but its value is never read.

Check failure on line 75 in src/engines/elevenlabs.ts

View workflow job for this annotation

GitHub Actions / test (20.x)

'MODEL_V3' is declared but its value is never read.

Check failure on line 75 in src/engines/elevenlabs.ts

View workflow job for this annotation

GitHub Actions / bundle-smoke

'MODEL_V3' is declared but its value is never read.
private static readonly DEFAULT_MODEL = "eleven_multilingual_v2";
/**
* ElevenLabs API key
*/
Expand Down Expand Up @@ -98,7 +107,9 @@
super(credentials);
this.apiKey = credentials.apiKey || process.env.ELEVENLABS_API_KEY || "";
this.modelId =
(credentials as any).modelId || (credentials as any).model || "eleven_multilingual_v2";
(credentials as any).modelId ||
(credentials as any).model ||
ElevenLabsTTSClient.DEFAULT_MODEL;

if (typeof (credentials as any).outputFormat === "string") {
this.outputFormat = (credentials as any).outputFormat;
Expand Down Expand Up @@ -259,6 +270,13 @@
merged.output_format = this.resolveOutputFormat(options, merged);
merged.voice_settings = this.resolveVoiceSettings(options, merged);

if (options?.seed !== undefined) merged.seed = options.seed;
if (options?.languageCode) merged.language_code = options.languageCode;
if (options?.previousText) merged.previous_text = options.previousText;
if (options?.nextText) merged.next_text = options.nextText;
if (options?.applyTextNormalization)
merged.apply_text_normalization = options.applyTextNormalization;

return merged;
}

Expand Down Expand Up @@ -434,24 +452,17 @@
}

/**
* Prepare text for synthesis by stripping SSML tags
* @param text Text to prepare
* @param options Synthesis options
* @returns Prepared text
* Prepare text for synthesis by stripping SSML tags.
* ElevenLabs does not support SSML — use native [audio tags] for v3 expressiveness.
*/
private async prepareText(text: string, options?: SpeakOptions): Promise<string> {
private async prepareText(text: string, options?: ElevenLabsTTSOptions): Promise<string> {
let processedText = text;

// Convert from Speech Markdown if requested
if (options?.useSpeechMarkdown && SpeechMarkdown.isSpeechMarkdown(processedText)) {
// Convert to SSML first, then strip SSML tags
// Use "elevenlabs" platform for ElevenLabs-specific Speech Markdown features
const ssml = await SpeechMarkdown.toSSML(processedText, "elevenlabs");
processedText = this._stripSSML(ssml);
processedText = ssml;
}

// If text is SSML, strip the tags as ElevenLabs doesn't support SSML
// and has its own emotion analysis
if (this._isSSML(processedText)) {
processedText = this._stripSSML(processedText);
}
Expand Down
Loading