diff --git a/package.json b/package.json index a8cf71b..8675c97 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "assemblyai", - "version": "4.17.0", + "version": "4.18.0", "description": "The AssemblyAI JavaScript SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.", "engines": { "node": ">=18" diff --git a/src/types/openapi.generated.ts b/src/types/openapi.generated.ts index 8f04ca7..a6af27e 100644 --- a/src/types/openapi.generated.ts +++ b/src/types/openapi.generated.ts @@ -1481,6 +1481,59 @@ export type SeverityScoreSummary = { medium: number; }; +/** + * Speaker identification type for speech understanding + */ +export type SpeakerType = "role" | "name"; + +/** + * Speaker identification configuration for speech understanding + */ +export type SpeakerIdentificationRequest = { + /** + * The type of speaker identification to perform + */ + speaker_type: SpeakerType; + /** + * Known speaker values (required if speaker_type is 'role') + */ + known_values?: string[]; +}; + +/** + * Speech understanding request configuration + */ +export type SpeechUnderstandingRequest = { + /** + * Speaker identification configuration + */ + speaker_identification?: SpeakerIdentificationRequest; +}; + +/** + * Status of a speech understanding feature + */ +export type SpeechUnderstandingFeatureStatus = { + /** + * Status of the feature (e.g., 'success') + */ + status: string; +}; + +/** + * Speech understanding response containing feature statuses and the original request + */ +export type SpeechUnderstandingResponse = { + /** + * Status of speaker identification feature + */ + speaker_identification?: SpeechUnderstandingFeatureStatus; + /** + * The original speech understanding request + */ + request?: SpeechUnderstandingRequest; +}; + /** * Advanced options for controlling speaker diarization parameters */ @@ -2622,6 +2675,14 @@ export type Transcript = { * @defaultValue "null */ speech_model: SpeechModel | null; + /** + * The list of speech models to use for the transcription in priority order. + */ + speech_models?: string[] | null; + /** + * The actual speech model that was used for the transcription. + */ + speech_model_used?: string | null; /** * Defaults to null. Reject audio files that contain less than this fraction of speech. * Valid values are in the range [0", 1] inclusive. @@ -3066,6 +3127,12 @@ export type TranscriptOptionalParams = { * The list of key terms used to generate the transcript with the Slam-1 speech model. Can't be used together with `prompt`. */ keyterms_prompt?: string[]; + /** + * Speech understanding configuration/response for speaker identification + */ + speech_understanding?: + | SpeechUnderstandingRequest + | SpeechUnderstandingResponse; /** * The language of your audio file. Possible values are found in {@link https://www.assemblyai.com/docs/concepts/supported-languages | Supported Languages }. * The default value is 'en_us'. @@ -3152,6 +3219,10 @@ export type TranscriptOptionalParams = { * @defaultValue best */ speech_model?: SpeechModel | null; + /** + * The list of speech models to use for the transcription in priority order. + */ + speech_models?: string[] | null; /** * Reject audio files that contain less than this fraction of speech. * Valid values are in the range [0", 1] inclusive. diff --git a/tests/unit/transcript.test.ts b/tests/unit/transcript.test.ts index de6da24..5b077a7 100644 --- a/tests/unit/transcript.test.ts +++ b/tests/unit/transcript.test.ts @@ -468,4 +468,39 @@ describe("transcript", () => { expect(searchResponse.total_count).toBe(1); expect(searchResponse.matches).toBeInstanceOf(Array); }); + + it("should handle transcript response with speech_model_used field", async () => { + const transcriptWithSpeechModelUsed = { + id: transcriptId, + status: "completed", + speech_model_used: "best", + }; + fetchMock.doMockOnceIf( + requestMatches({ url: `/v2/transcript/${transcriptId}`, method: "GET" }), + JSON.stringify(transcriptWithSpeechModelUsed), + ); + const transcript = await assembly.transcripts.get(transcriptId); + + expect(transcript.id).toBe(transcriptId); + expect(transcript.speech_model_used).toBe("best"); + }); + + it("should handle transcript response without speech_model_used field", async () => { + // This test verifies that the SDK gracefully handles responses + // where speech_model_used is not present, as the field has not yet + // been added to the API for all users. + const transcriptWithoutSpeechModelUsed = { + id: transcriptId, + status: "completed", + // speech_model_used intentionally omitted + }; + fetchMock.doMockOnceIf( + requestMatches({ url: `/v2/transcript/${transcriptId}`, method: "GET" }), + JSON.stringify(transcriptWithoutSpeechModelUsed), + ); + const transcript = await assembly.transcripts.get(transcriptId); + + expect(transcript.id).toBe(transcriptId); + expect(transcript.speech_model_used).toBeUndefined(); + }); });