diff --git a/src/__tests__/elevenlabs-v3.test.ts b/src/__tests__/elevenlabs-v3.test.ts
new file mode 100644
index 0000000..81fe6ae
--- /dev/null
+++ b/src/__tests__/elevenlabs-v3.test.ts
@@ -0,0 +1,60 @@
+import { describe, it, expect } from "@jest/globals";
+import { ElevenLabsTTSClient } from "../engines/elevenlabs";
+
+describe("ElevenLabs v3 prepareText", () => {
+ it("strips SSML for eleven_v3 (no translation)", async () => {
+ const client = new ElevenLabsTTSClient({ apiKey: "test-key", modelId: "eleven_v3" });
+ const ssml = 'Normal dramatic end';
+ const prepared = await (client as any).prepareText(ssml, {});
+ expect(prepared).toBe("Normal dramatic end");
+ });
+
+ it("preserves native [audio tags] in plain text", async () => {
+ const client = new ElevenLabsTTSClient({ apiKey: "test-key", modelId: "eleven_v3" });
+ const text = "Hello [excited] world [whispers]";
+ const prepared = await (client as any).prepareText(text, {});
+ expect(prepared).toBe(text);
+ });
+});
+
+describe("ElevenLabs v3 request parameters", () => {
+ it("includes seed in payload when set", () => {
+ const client = new ElevenLabsTTSClient({ apiKey: "test-key" });
+ const payload = (client as any).buildRequestPayload("hello", { seed: 42 });
+ expect(payload.seed).toBe(42);
+ });
+
+ it("includes language_code in payload when set", () => {
+ const client = new ElevenLabsTTSClient({ apiKey: "test-key" });
+ const payload = (client as any).buildRequestPayload("hello", { languageCode: "en" });
+ expect(payload.language_code).toBe("en");
+ });
+
+ it("includes previous_text in payload when set", () => {
+ const client = new ElevenLabsTTSClient({ apiKey: "test-key" });
+ const payload = (client as any).buildRequestPayload("hello", { previousText: "Before this" });
+ expect(payload.previous_text).toBe("Before this");
+ });
+
+ it("includes next_text in payload when set", () => {
+ const client = new ElevenLabsTTSClient({ apiKey: "test-key" });
+ const payload = (client as any).buildRequestPayload("hello", { nextText: "After this" });
+ expect(payload.next_text).toBe("After this");
+ });
+
+ it("includes apply_text_normalization in payload when set", () => {
+ const client = new ElevenLabsTTSClient({ apiKey: "test-key" });
+ const payload = (client as any).buildRequestPayload("hello", { applyTextNormalization: "off" });
+ expect(payload.apply_text_normalization).toBe("off");
+ });
+
+ it("omits v3 params from payload when not set", () => {
+ const client = new ElevenLabsTTSClient({ apiKey: "test-key" });
+ const payload = (client as any).buildRequestPayload("hello", {});
+ expect(payload.seed).toBeUndefined();
+ expect(payload.language_code).toBeUndefined();
+ expect(payload.previous_text).toBeUndefined();
+ expect(payload.next_text).toBeUndefined();
+ expect(payload.apply_text_normalization).toBeUndefined();
+ });
+});
diff --git a/src/engines/elevenlabs.ts b/src/engines/elevenlabs.ts
index fe70528..858de12 100644
--- a/src/engines/elevenlabs.ts
+++ b/src/engines/elevenlabs.ts
@@ -7,7 +7,9 @@ import { getFetch } from "../utils/fetch-utils";
const fetch = getFetch();
/**
- * Extended options for ElevenLabs TTS
+ * Extended options for ElevenLabs TTS.
+ * seed, languageCode, previousText, nextText, and applyTextNormalization are
+ * only honoured by the eleven_v3 model and are silently ignored by others.
*/
export interface ElevenLabsTTSOptions extends SpeakOptions {
format?: "mp3" | "wav"; // Define formats supported by this client logic (maps to pcm)
@@ -17,6 +19,11 @@ export interface ElevenLabsTTSOptions extends SpeakOptions {
outputFormat?: string; // Override output_format per request
voiceSettings?: Record; // Override voice_settings per request
requestOptions?: Record; // Additional request payload overrides
+ seed?: number; // Deterministic output — same seed produces the same audio
+ languageCode?: string; // Force language interpretation (e.g. "en")
+ previousText?: string; // Context for continuity between sequential requests
+ nextText?: string; // Context for continuity between sequential requests
+ applyTextNormalization?: "auto" | "on" | "off"; // Control spelling/number expansion
}
/**
@@ -65,6 +72,8 @@ export interface ElevenLabsTimestampResponse {
* ElevenLabs TTS client
*/
export class ElevenLabsTTSClient extends AbstractTTSClient {
+ private static readonly MODEL_V3 = "eleven_v3";
+ private static readonly DEFAULT_MODEL = "eleven_multilingual_v2";
/**
* ElevenLabs API key
*/
@@ -98,7 +107,9 @@ export class ElevenLabsTTSClient extends AbstractTTSClient {
super(credentials);
this.apiKey = credentials.apiKey || process.env.ELEVENLABS_API_KEY || "";
this.modelId =
- (credentials as any).modelId || (credentials as any).model || "eleven_multilingual_v2";
+ (credentials as any).modelId ||
+ (credentials as any).model ||
+ ElevenLabsTTSClient.DEFAULT_MODEL;
if (typeof (credentials as any).outputFormat === "string") {
this.outputFormat = (credentials as any).outputFormat;
@@ -259,6 +270,13 @@ export class ElevenLabsTTSClient extends AbstractTTSClient {
merged.output_format = this.resolveOutputFormat(options, merged);
merged.voice_settings = this.resolveVoiceSettings(options, merged);
+ if (options?.seed !== undefined) merged.seed = options.seed;
+ if (options?.languageCode) merged.language_code = options.languageCode;
+ if (options?.previousText) merged.previous_text = options.previousText;
+ if (options?.nextText) merged.next_text = options.nextText;
+ if (options?.applyTextNormalization)
+ merged.apply_text_normalization = options.applyTextNormalization;
+
return merged;
}
@@ -434,24 +452,17 @@ export class ElevenLabsTTSClient extends AbstractTTSClient {
}
/**
- * Prepare text for synthesis by stripping SSML tags
- * @param text Text to prepare
- * @param options Synthesis options
- * @returns Prepared text
+ * Prepare text for synthesis by stripping SSML tags.
+ * ElevenLabs does not support SSML — use native [audio tags] for v3 expressiveness.
*/
- private async prepareText(text: string, options?: SpeakOptions): Promise {
+ private async prepareText(text: string, options?: ElevenLabsTTSOptions): Promise {
let processedText = text;
- // Convert from Speech Markdown if requested
if (options?.useSpeechMarkdown && SpeechMarkdown.isSpeechMarkdown(processedText)) {
- // Convert to SSML first, then strip SSML tags
- // Use "elevenlabs" platform for ElevenLabs-specific Speech Markdown features
const ssml = await SpeechMarkdown.toSSML(processedText, "elevenlabs");
- processedText = this._stripSSML(ssml);
+ processedText = ssml;
}
- // If text is SSML, strip the tags as ElevenLabs doesn't support SSML
- // and has its own emotion analysis
if (this._isSSML(processedText)) {
processedText = this._stripSSML(processedText);
}