AssemblyAI · he-james · Jun 17, 2025
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 <img src="https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/assemblyai.png?raw=true" width="500"/>
 
----
+______________________________________________________________________
 
 [![npm](https://img.shields.io/npm/v/assemblyai)](https://www.npmjs.com/package/assemblyai)
 [![Test](https://github.com/AssemblyAI/assemblyai-node-sdk/actions/workflows/test.yml/badge.svg)](https://github.com/AssemblyAI/assemblyai-node-sdk/actions/workflows/test.yml)
@@ -101,7 +101,7 @@ let transcript = await client.transcripts.transcribe({
 });
 ```
 
-> [!TIP]
+> [!NOTE]
 > You can also pass a local file path, a stream, or a buffer as the `audio` property.
 
 `transcribe` queues a transcription job and polls it until the `status` is `completed` or `error`.
@@ -128,7 +128,7 @@ let transcript = await client.transcripts.transcribe({
 });
 ```
 
-> [!TIP]
+> **Note:**
 > You can also pass a file URL, a stream, or a buffer as the `audio` property.
 
 `transcribe` queues a transcription job and polls it until the `status` is `completed` or `error`.
@@ -224,7 +224,7 @@ do {
 } while (previousPageUrl !== null);
 ```
 
-> [!TIP]
+> [!NOTE]
 > To paginate over all pages, you need to use the `page.page_details.prev_url`
 > because the transcripts are returned in descending order by creation date and time.
 > The first page is are the most recent transcript, and each "previous" page are older transcripts.
@@ -263,9 +263,7 @@ const rt = client.streaming.transcriber({
 > _Server code_:
 >
 > ```typescript
-> const token = await client.streaming.createTemporaryToken({
->   expires_in_seconds = 60,
-> });
+> const token = await client.streaming.createTemporaryToken({ expires_in_seconds = 60 });
 > // TODO: return token to client
 > ```
 >
@@ -283,6 +281,7 @@ const rt = client.streaming.transcriber({
 You can configure the following events.
 
 <!-- prettier-ignore -->
+
 ```typescript
 rt.on("open", ({ id, expires_at }) => console.log('Session ID:', id, 'Expires at:', expires_at));
 rt.on("close", (code: number, reason: string) => console.log('Closed', code, reason));

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "assemblyai",
-  "version": "4.13.2",
+  "version": "4.13.3",
   "description": "The AssemblyAI JavaScript SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.",
   "engines": {
     "node": ">=18"

diff --git a/samples/speaker-diarization.ts b/samples/speaker-diarization.ts
@@ -0,0 +1,80 @@
+/**
+ * Example of using speaker diarization with speaker_options
+ *
+ * Note: speaker_options and speakers_expected are mutually exclusive.
+ * Use either speakers_expected for simple guidance OR speaker_options for advanced control.
+ */
+
+import { AssemblyAI, SpeakerOptions } from "assemblyai";
+
+// Replace with your API key
+const client = new AssemblyAI({
+  apiKey: "YOUR_API_KEY",
+});
+
+async function transcribeWithSpeakerDiarization() {
+  // Example 1: Basic speaker diarization (uses smart defaults)
+  // The model automatically detects the optimal number of speakers
+  let transcript = await client.transcripts.transcribe({
+    audio: "https://example.com/audio.mp3",
+    speaker_labels: true,
+  });
+
+  console.log("Basic speaker diarization:", transcript.id);
+
+  // Example 2: Provide a hint with speakers_expected (smart default with guidance)
+  // Still uses smart defaults but gives the model a hint about expected speakers
+  transcript = await client.transcripts.transcribe({
+    audio: "https://example.com/audio.mp3",
+    speaker_labels: true,
+    speakers_expected: 3,
+  });
+
+  console.log("With expected speakers:", transcript.id);
+
+  // Example 3: Set boundaries with speaker_options (controlled smart defaults)
+  // Constrains the smart defaults to work within specified bounds
+  const speakerOptions: SpeakerOptions = {
+    min_speakers_expected: 2, // At least 2 speakers (overrides smart default if < 2)
+    max_speakers_expected: 4, // At most 4 speakers (overrides smart default if > 4)
+  };
+
+  transcript = await client.transcripts.transcribe({
+    audio: "https://example.com/audio.mp3",
+    speaker_labels: true,
+    speaker_options: speakerOptions,
+  });
+
+  console.log("With speaker options:", transcript.id);
+
+  // Note: The following would be INVALID since speakers_expected and speaker_options are mutually exclusive:
+  // transcript = await client.transcripts.transcribe({
+  //   audio: "https://example.com/audio.mp3",
+  //   speaker_labels: true,
+  //   speakers_expected: 3, // ❌ Cannot use both
+  //   speaker_options: { min_speakers_expected: 2 }, // ❌ Cannot use both
+  // });
+
+  // Example 4: Edge case handling for challenging audio
+  // Use speaker_options when you need precise control over speaker detection
+  transcript = await client.transcripts.transcribe({
+    audio: "https://example.com/audio.mp3",
+    speaker_labels: true,
+    speaker_options: {
+      min_speakers_expected: 1, // Handle solo speakers or presentations
+      max_speakers_expected: 10, // Handle large meetings or conferences
+    },
+  });
+
+  console.log("Edge case handling:", transcript.id);
+
+  // Access the utterances with speaker labels
+  if (transcript.status === "completed" && transcript.utterances) {
+    for (const utterance of transcript.utterances) {
+      console.log(`Speaker ${utterance.speaker}: ${utterance.text}`);
+    }
+  }
+}
+
+// Run the example
+transcribeWithSpeakerDiarization().catch(console.error);
diff --git a/src/types/openapi.generated.ts b/src/types/openapi.generated.ts
@@ -969,7 +969,7 @@
  /**
   * Only get throttled transcripts, overrides the status filter
   * @defaultValue false
   * @deprecated
   */
  throttled_only?: boolean;
 };
@@ -1413,6 +1413,20 @@
   medium: number;
 };
 
+/**
+ * Advanced options for controlling speaker diarization parameters
+ */
+export type SpeakerOptions = {
+  /**
+   * Minimum number of speakers expected in the audio
+   */
+  min_speakers_expected?: number | null;
+  /**
+   * Maximum number of speakers expected in the audio
+   */
+  max_speakers_expected?: number | null;
+};
+
 /**
  * The speech model to use for the transcription.
  */
@@ -2517,6 +2531,10 @@
    * Tell the speaker label model how many speakers it should attempt to identify, up to 10. See {@link https://www.assemblyai.com/docs/models/speaker-diarization | Speaker diarization } for more details.
    */
   speakers_expected?: number | null;
+  /**
+   * Advanced options for controlling speaker diarization parameters
+   */
+  speaker_options?: SpeakerOptions | null;
   /**
    * The speech model used for the transcription. When `null`, the default model is used.
    * @defaultValue "null
@@ -3039,6 +3057,10 @@
    * @defaultValue "null
    */
   speakers_expected?: number | null;
+  /**
+   * Advanced options for controlling speaker diarization parameters
+   */
+  speaker_options?: SpeakerOptions | null;
   /**
    * The speech model to use for the transcription. When `null`, the "best" model is used.
    * @defaultValue best

diff --git a/tests/unit/speaker-options.test.ts b/tests/unit/speaker-options.test.ts
@@ -0,0 +1,130 @@
+import fetchMock from "jest-fetch-mock";
+import { SpeakerOptions } from "../../src";
+import {
+  createClient,
+  requestMatches,
+} from "./utils";
+
+fetchMock.enableMocks();
+
+const assembly = createClient();
+const transcriptId = "transcript_123";
+const remoteAudioURL = "https://assembly.ai/espn.m4a";
+
+beforeEach(() => {
+  jest.clearAllMocks();
+  fetchMock.resetMocks();
+  fetchMock.doMock();
+});
+
+describe("speaker options", () => {
+  it("should create transcript with speaker_options", async () => {
+    const speakerOptions: SpeakerOptions = {
+      min_speakers_expected: 2,
+      max_speakers_expected: 4,
+    };
+
+    fetchMock.doMockOnceIf(
+      requestMatches({ url: "/v2/transcript", method: "POST" }),
+      JSON.stringify({ id: transcriptId, status: "queued" }),
+    );
+
+    const transcript = await assembly.transcripts.submit({
+      audio_url: remoteAudioURL,
+      speaker_labels: true,
+      speaker_options: speakerOptions,
+    });
+
+    expect(transcript.id).toBe(transcriptId);
+    expect(transcript.status).toBe("queued");
+
+    // Verify the request body included speaker_options
+    const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
+    expect(requestBody.speaker_labels).toBe(true);
+    expect(requestBody.speaker_options).toEqual(speakerOptions);
+  });
+
+  it("should create transcript with only min_speakers_expected", async () => {
+    const speakerOptions: SpeakerOptions = {
+      min_speakers_expected: 3,
+    };
+
+    fetchMock.doMockOnceIf(
+      requestMatches({ url: "/v2/transcript", method: "POST" }),
+      JSON.stringify({ id: transcriptId, status: "queued" }),
+    );
+
+    const transcript = await assembly.transcripts.submit({
+      audio_url: remoteAudioURL,
+      speaker_labels: true,
+      speaker_options: speakerOptions,
+    });
+
+    expect(transcript.id).toBe(transcriptId);
+
+    const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
+    expect(requestBody.speaker_options.min_speakers_expected).toBe(3);
+    expect(requestBody.speaker_options.max_speakers_expected).toBeUndefined();
+  });
+
+  it("should create transcript with only max_speakers_expected", async () => {
+    const speakerOptions: SpeakerOptions = {
+      max_speakers_expected: 5,
+    };
+
+    fetchMock.doMockOnceIf(
+      requestMatches({ url: "/v2/transcript", method: "POST" }),
+      JSON.stringify({ id: transcriptId, status: "queued" }),
+    );
+
+    const transcript = await assembly.transcripts.submit({
+      audio_url: remoteAudioURL,
+      speaker_labels: true,
+      speaker_options: speakerOptions,
+    });
+
+    expect(transcript.id).toBe(transcriptId);
+
+    const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
+    expect(requestBody.speaker_options.min_speakers_expected).toBeUndefined();
+    expect(requestBody.speaker_options.max_speakers_expected).toBe(5);
+  });
+
+  it("should create transcript with speakers_expected (without speaker_options)", async () => {
+    fetchMock.doMockOnceIf(
+      requestMatches({ url: "/v2/transcript", method: "POST" }),
+      JSON.stringify({ id: transcriptId, status: "queued" }),
+    );
+
+    const transcript = await assembly.transcripts.submit({
+      audio_url: remoteAudioURL,
+      speaker_labels: true,
+      speakers_expected: 3,
+    });
+
+    expect(transcript.id).toBe(transcriptId);
+
+    const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
+    expect(requestBody.speaker_labels).toBe(true);
+    expect(requestBody.speakers_expected).toBe(3);
+    expect(requestBody.speaker_options).toBeUndefined();
+  });
+
+  it("should handle null speaker_options", async () => {
+    fetchMock.doMockOnceIf(
+      requestMatches({ url: "/v2/transcript", method: "POST" }),
+      JSON.stringify({ id: transcriptId, status: "queued" }),
+    );
+
+    const transcript = await assembly.transcripts.submit({
+      audio_url: remoteAudioURL,
+      speaker_labels: true,
+      speaker_options: null,
+    });
+
+    expect(transcript.id).toBe(transcriptId);
+
+    const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
+    expect(requestBody.speaker_options).toBe(null);
+  });
+});