diff --git a/fern/pages/01-getting-started/universal-3-pro.mdx b/fern/pages/01-getting-started/universal-3-pro.mdx index 8e0c2a87..c3013364 100644 --- a/fern/pages/01-getting-started/universal-3-pro.mdx +++ b/fern/pages/01-getting-started/universal-3-pro.mdx @@ -78,7 +78,27 @@ Not sure where to start? Try our [Prompt Generator](/docs/pre-recorded-audio/pro This example shows how you can transcribe a pre-recorded audio file with our Universal-3 Pro model and print the transcript text to your terminal. - + + +```python +import assemblyai as aai + +aai.settings.api_key = "" + +audio_file = "https://assembly.ai/sports_injuries.mp3" + +config = aai.TranscriptionConfig( + speech_models=["universal-3-pro", "universal-2"], + language_detection=True, +) + +transcript = aai.Transcriber().transcribe(audio_file, config) + +print(transcript.text) +``` + + + ```python import requests @@ -114,6 +134,32 @@ while True: time.sleep(3) ``` + + + +```javascript +import { AssemblyAI } from "assemblyai"; + +const client = new AssemblyAI({ + apiKey: "", +}); + +const audioFile = "https://assembly.ai/sports_injuries.mp3"; + +const params = { + audio: audioFile, + speech_models: ["universal-3-pro", "universal-2"], + language_detection: true, +}; + +const run = async () => { + const transcript = await client.transcripts.transcribe(params); + console.log(transcript.text); +}; + +run(); +``` + @@ -187,9 +233,30 @@ Hi, this is Kelly Byrne-Donoghue ``` - + ```python {10} +import assemblyai as aai + +aai.settings.api_key = "" + +audio_file = "https://assemblyaiassets.com/audios/keyterms_prompting.wav" + +config = aai.TranscriptionConfig( + speech_models=["universal-3-pro", "universal-2"], + language_detection=True, + keyterms_prompt=["Kelly Byrne-Donoghue"], +) + +transcript = aai.Transcriber().transcribe(audio_file, config) + +print(transcript.text) +``` + + + + +```python {11} import requests import time @@ -224,10 +291,37 @@ while True: time.sleep(3) ``` + + + +```javascript {13} +import { AssemblyAI } from "assemblyai"; + +const client = new AssemblyAI({ + apiKey: "", +}); + +const audioFile = "https://assemblyaiassets.com/audios/keyterms_prompting.wav"; + +const params = { + audio: audioFile, + speech_models: ["universal-3-pro", "universal-2"], + language_detection: true, + keyterms_prompt: ["Kelly Byrne-Donoghue"], +}; + +const run = async () => { + const transcript = await client.transcripts.transcribe(params); + console.log(transcript.text); +}; + +run(); +``` + -```javascript {11} +```javascript {12} import axios from "axios"; const baseUrl = "https://api.assemblyai.com"; @@ -272,6 +366,12 @@ while (true) { Universal-3 Pro delivers great accuracy out of the box. To fine-tune transcription results to your use case, provide a prompt with up to 1,500 words of context in plain language. This helps the model consistently recognize domain-specific terminology, apply your preferred formatting conventions, handle code switching between languages, and better interpret ambiguous speech. + + +`prompt` and `keyterms_prompt` cannot be used in the same request. + + + ### Verbatim transcription and disfluencies Capture natural speech patterns exactly as spoken, including um, uh, false starts, repetitions, stutters. Add examples of the verbatim elements you want to transcribe in the prompt parameter to guide the model. @@ -291,7 +391,28 @@ Do you and Quentin still socialize, uh, when you come to Los Angeles, or is it l ``` - + + +```python {10} +import assemblyai as aai + +aai.settings.api_key = "" + +audio_file = "https://assemblyaiassets.com/audios/verbatim.mp3" + +config = aai.TranscriptionConfig( + speech_models=["universal-3-pro", "universal-2"], + language_detection=True, + prompt="Produce a transcript suitable for conversational analysis. Every disfluency is meaningful data. Include: fillers (um, uh, er, ah, hmm, mhm, like, you know, I mean), repetitions (I I, the the), restarts (I was- I went), stutters (th-that, b-but, no-not), and informal speech (gonna, wanna, gotta)", +) + +transcript = aai.Transcriber().transcribe(audio_file, config) + +print(transcript.text) +``` + + + ```python {11} import requests @@ -328,6 +449,33 @@ while True: time.sleep(3) ``` + + + +```javascript {13} +import { AssemblyAI } from "assemblyai"; + +const client = new AssemblyAI({ + apiKey: "", +}); + +const audioFile = "https://assemblyaiassets.com/audios/verbatim.mp3"; + +const params = { + audio: audioFile, + speech_models: ["universal-3-pro", "universal-2"], + language_detection: true, + prompt: "Produce a transcript suitable for conversational analysis. Every disfluency is meaningful data. Include: fillers (um, uh, er, ah, hmm, mhm, like, you know, I mean), repetitions (I I, the the), restarts (I was- I went), stutters (th-that, b-but, no-not), and informal speech (gonna, wanna, gotta)", +}; + +const run = async () => { + const transcript = await client.transcripts.transcribe(params); + console.log(transcript.text); +}; + +run(); +``` + @@ -343,8 +491,7 @@ const data = { audio_url: "https://assemblyaiassets.com/audios/verbatim.mp3", language_detection: true, speech_models: ["universal-3-pro", "universal-2"], - prompt: - "Produce a transcript suitable for conversational analysis. Every disfluency is meaningful data. Include: fillers (um, uh, er, ah, hmm, mhm, like, you know, I mean), repetitions (I I, the the), restarts (I was- I went), stutters (th-that, b-but, no-not), and informal speech (gonna, wanna, gotta)", + prompt: "Produce a transcript suitable for conversational analysis. Every disfluency is meaningful data. Include: fillers (um, uh, er, ah, hmm, mhm, like, you know, I mean), repetitions (I I, the the), restarts (I was- I went), stutters (th-that, b-but, no-not), and informal speech (gonna, wanna, gotta)", }; const url = `${baseUrl}/v2/transcript`; @@ -413,9 +560,30 @@ You got called because you were being loud and screaming. No, I wasn't. That's l ``` - + -```python +```python {10} +import assemblyai as aai + +aai.settings.api_key = "" + +audio_file = "https://assemblyaiassets.com/audios/ouput_formatting.mp3" + +config = aai.TranscriptionConfig( + speech_models=["universal-3-pro", "universal-2"], + language_detection=True, + prompt="Add punctuation based on the speaker's tone and expressiveness. Use exclamation marks (!) when the speaker is yelling, excited, or emphatic. Use question marks (?) for questioning intonation. Apply standard punctuation (periods, commas) based on natural speech patterns and pauses.", +) + +transcript = aai.Transcriber().transcribe(audio_file, config) + +print(transcript.text) +``` + + + + +```python {11} import requests import time @@ -450,10 +618,37 @@ while True: time.sleep(3) ``` + + + +```javascript {13} +import { AssemblyAI } from "assemblyai"; + +const client = new AssemblyAI({ + apiKey: "", +}); + +const audioFile = "https://assemblyaiassets.com/audios/ouput_formatting.mp3"; + +const params = { + audio: audioFile, + speech_models: ["universal-3-pro", "universal-2"], + language_detection: true, + prompt: "Add punctuation based on the speaker's tone and expressiveness. Use exclamation marks (!) when the speaker is yelling, excited, or emphatic. Use question marks (?) for questioning intonation. Apply standard punctuation (periods, commas) based on natural speech patterns and pauses.", +}; + +const run = async () => { + const transcript = await client.transcripts.transcribe(params); + console.log(transcript.text); +}; + +run(); +``` + -```javascript +```javascript {12} import axios from "axios"; const baseUrl = "https://api.assemblyai.com"; @@ -465,8 +660,7 @@ const data = { audio_url: "https://assemblyaiassets.com/audios/ouput_formatting.mp3", language_detection: true, speech_models: ["universal-3-pro", "universal-2"], - prompt: - "Add punctuation based on the speaker's tone and expressiveness. Use exclamation marks (!) when the speaker is yelling, excited, or emphatic. Use question marks (?) for questioning intonation. Apply standard punctuation (periods, commas) based on natural speech patterns and pauses.", + prompt: "Add punctuation based on the speaker's tone and expressiveness. Use exclamation marks (!) when the speaker is yelling, excited, or emphatic. Use question marks (?) for questioning intonation. Apply standard punctuation (periods, commas) based on natural speech patterns and pauses.", }; const url = `${baseUrl}/v2/transcript`; @@ -528,7 +722,28 @@ I just wanna move you along a bit further. Do you take any prescribed medicines? ``` - + + +```python {10} +import assemblyai as aai + +aai.settings.api_key = "" + +audio_file = "https://assemblyaiassets.com/audios/nlp_prompting.mp3" + +config = aai.TranscriptionConfig( + speech_models=["universal-3-pro", "universal-2"], + language_detection=True, + prompt="Produce a transcript for a clinical history evaluation. It's important to capture medication and dosage accurately. Every disfluency is meaningful data. Include: fillers (um, uh, er, erm, ah, hmm, mhm, like, you know, I mean), repetitions (I I I, the the), restarts (I was- I went), stutters (th-that, b-but, no-not), and informal speech (gonna, wanna, gotta)", +) + +transcript = aai.Transcriber().transcribe(audio_file, config) + +print(transcript.text) +``` + + + ```python {11} import requests @@ -565,6 +780,33 @@ while True: time.sleep(3) ``` + + + +```javascript {13} +import { AssemblyAI } from "assemblyai"; + +const client = new AssemblyAI({ + apiKey: "", +}); + +const audioFile = "https://assemblyaiassets.com/audios/nlp_prompting.mp3"; + +const params = { + audio: audioFile, + speech_models: ["universal-3-pro", "universal-2"], + language_detection: true, + prompt: "Produce a transcript for a clinical history evaluation. It's important to capture medication and dosage accurately. Every disfluency is meaningful data. Include: fillers (um, uh, er, erm, ah, hmm, mhm, like, you know, I mean), repetitions (I I I, the the), restarts (I was- I went), stutters (th-that, b-but, no-not), and informal speech (gonna, wanna, gotta)", +}; + +const run = async () => { + const transcript = await client.transcripts.transcribe(params); + console.log(transcript.text); +}; + +run(); +``` + @@ -580,8 +822,7 @@ const data = { audio_url: "https://assemblyaiassets.com/audios/nlp_prompting.mp3", language_detection: true, speech_models: ["universal-3-pro", "universal-2"], - prompt: - "Produce a transcript for a clinical history evaluation. It's important to capture medication and dosage accurately. Every disfluency is meaningful data. Include: fillers (um, uh, er, erm, ah, hmm, mhm, like, you know, I mean), repetitions (I I I, the the), restarts (I was- I went), stutters (th-that, b-but, no-not), and informal speech (gonna, wanna, gotta)", + prompt: "Produce a transcript for a clinical history evaluation. It's important to capture medication and dosage accurately. Every disfluency is meaningful data. Include: fillers (um, uh, er, erm, ah, hmm, mhm, like, you know, I mean), repetitions (I I I, the the), restarts (I was- I went), stutters (th-that, b-but, no-not), and informal speech (gonna, wanna, gotta)", }; const url = `${baseUrl}/v2/transcript`; @@ -643,7 +884,28 @@ Watch again closely. This is the potential game changer. The first responder NK ``` - + + +```python {10} +import assemblyai as aai + +aai.settings.api_key = "" + +audio_file = "https://assemblyaiassets.com/audios/entity_accuracy.mp3" + +config = aai.TranscriptionConfig( + speech_models=["universal-3-pro", "universal-2"], + language_detection=True, + prompt="The speaker is discussing the cancer drug Anktiva (spelled A-N-K-T-I-V-A). When you hear what sounds like Entiva or similar pronunciations, transcribe it as Anktiva. This is the correct pharmaceutical name.", +) + +transcript = aai.Transcriber().transcribe(audio_file, config) + +print(transcript.text) +``` + + + ```python {11} import requests @@ -680,6 +942,33 @@ while True: time.sleep(3) ``` + + + +```javascript {13} +import { AssemblyAI } from "assemblyai"; + +const client = new AssemblyAI({ + apiKey: "", +}); + +const audioFile = "https://assemblyaiassets.com/audios/entity_accuracy.mp3"; + +const params = { + audio: audioFile, + speech_models: ["universal-3-pro", "universal-2"], + language_detection: true, + prompt: "The speaker is discussing the cancer drug Anktiva (spelled A-N-K-T-I-V-A). When you hear what sounds like Entiva or similar pronunciations, transcribe it as Anktiva. This is the correct pharmaceutical name.", +}; + +const run = async () => { + const transcript = await client.transcripts.transcribe(params); + console.log(transcript.text); +}; + +run(); +``` + @@ -695,8 +984,7 @@ const data = { audio_url: "https://assemblyaiassets.com/audios/entity_accuracy.mp3", language_detection: true, speech_models: ["universal-3-pro", "universal-2"], - prompt: - "The speaker is discussing the cancer drug Anktiva (spelled A-N-K-T-I-V-A). When you hear what sounds like Entiva or similar pronunciations, transcribe it as Anktiva. This is the correct pharmaceutical name.", + prompt: "The speaker is discussing the cancer drug Anktiva (spelled A-N-K-T-I-V-A). When you hear what sounds like Entiva or similar pronunciations, transcribe it as Anktiva. This is the correct pharmaceutical name.", }; const url = `${baseUrl}/v2/transcript`; @@ -767,7 +1055,30 @@ With prompt: Without prompting, it may appear that speaker B said everything. But with prompting, the model correctly identifies this as 5 separate speaker turns, capturing utterances as short as a single word, like "good". - + + +```python {11} +import assemblyai as aai + +aai.settings.api_key = "" + +audio_file = "https://assemblyaiassets.com/audios/speaker_diarization.mp3" + +config = aai.TranscriptionConfig( + speech_models=["universal-3-pro", "universal-2"], + language_detection=True, + speaker_labels=True, + prompt="Produce a transcript with every disfluency data. Additionally, label speakers with their respective roles. 1. Place [Speaker:role] at the start of each speaker turn. Example format: [Speaker:NURSE] Hello there. How can I help you today? [Speaker:PATIENT] I'm feeling unwell. I have a headache.", +) + +transcript = aai.Transcriber().transcribe(audio_file, config) + +for utterance in transcript.utterances: + print(f"Speaker {utterance.speaker}: {utterance.text}") +``` + + + ```python {12} import requests @@ -806,10 +1117,41 @@ while True: time.sleep(3) ``` + + + +```javascript {14} +import { AssemblyAI } from "assemblyai"; + +const client = new AssemblyAI({ + apiKey: "", +}); + +const audioFile = "https://assemblyaiassets.com/audios/speaker_diarization.mp3"; + +const params = { + audio: audioFile, + speech_models: ["universal-3-pro", "universal-2"], + language_detection: true, + speaker_labels: true, + prompt: "Produce a transcript with every disfluency data. Additionally, label speakers with their respective roles. 1. Place [Speaker:role] at the start of each speaker turn. Example format: [Speaker:NURSE] Hello there. How can I help you today? [Speaker:PATIENT] I'm feeling unwell. I have a headache.", +}; + +const run = async () => { + const transcript = await client.transcripts.transcribe(params); + + for (const utterance of transcript.utterances!) { + console.log(`Speaker ${utterance.speaker}: ${utterance.text}`); + } +}; + +run(); +``` + -```javascript {12} +```javascript {13} import axios from "axios"; const baseUrl = "https://api.assemblyai.com"; @@ -822,8 +1164,7 @@ const data = { language_detection: true, speech_models: ["universal-3-pro", "universal-2"], speaker_labels: true, - prompt: - "Produce a transcript with every disfluency data. Additionally, label speakers with their respective roles. 1. Place [Speaker:role] at the start of each speaker turn. Example format: [Speaker:NURSE] Hello there. How can I help you today? [Speaker:PATIENT] I'm feeling unwell. I have a headache.", + prompt: "Produce a transcript with every disfluency data. Additionally, label speakers with their respective roles. 1. Place [Speaker:role] at the start of each speaker turn. Example format: [Speaker:NURSE] Hello there. How can I help you today? [Speaker:PATIENT] I'm feeling unwell. I have a headache.", }; const url = `${baseUrl}/v2/transcript`; @@ -875,9 +1216,30 @@ Your call has been forwarded to an automatic voice message system. At the tone, Here are some examples of audio tags you can prompt for: [music], [laugher], [applause], [noise], [pause], [inaudible], [sigh], [gasp], [cheering], [sound], [screaming], [bell], [beep], [sound effect], [buzzer], and more. - + -```python {12} +```python {10} +import assemblyai as aai + +aai.settings.api_key = "" + +audio_file = "https://assemblyaiassets.com/audios/audio_tag.mp3" + +config = aai.TranscriptionConfig( + speech_models=["universal-3-pro", "universal-2"], + language_detection=True, + prompt="Produce a transcript suitable for conversational analysis. Every disfluency is meaningful data. Include: Tag sounds: [beep]", +) + +transcript = aai.Transcriber().transcribe(audio_file, config) + +print(transcript.text) +``` + + + + +```python {11} import requests import time @@ -913,9 +1275,36 @@ while True: ``` - + ```javascript {13} +import { AssemblyAI } from "assemblyai"; + +const client = new AssemblyAI({ + apiKey: "", +}); + +const audioFile = "https://assemblyaiassets.com/audios/audio_tag.mp3"; + +const params = { + audio: audioFile, + speech_models: ["universal-3-pro", "universal-2"], + language_detection: true, + prompt: "Produce a transcript suitable for conversational analysis. Every disfluency is meaningful data. Include: Tag sounds: [beep]", +}; + +const run = async () => { + const transcript = await client.transcripts.transcribe(params); + console.log(transcript.text); +}; + +run(); +``` + + + + +```javascript {12} import axios from "axios"; const baseUrl = "https://api.assemblyai.com"; @@ -927,8 +1316,7 @@ const data = { audio_url: "https://assemblyaiassets.com/audios/audio_tag.mp3", language_detection: true, speech_models: ["universal-3-pro", "universal-2"], - prompt: - "Produce a transcript suitable for conversational analysis. Every disfluency is meaningful data. Include: Tag sounds: [beep]", + prompt: "Produce a transcript suitable for conversational analysis. Every disfluency is meaningful data. Include: Tag sounds: [beep]", }; const url = `${baseUrl}/v2/transcript`; @@ -976,9 +1364,30 @@ You literally lost your French? No, no, no. Mon français est là. L'italien, j' ``` - + -```python +```python {10} +import assemblyai as aai + +aai.settings.api_key = "" + +audio_file = "https://assemblyaiassets.com/audios/code_switching_multilingual.mp3" + +config = aai.TranscriptionConfig( + speech_models=["universal-3-pro", "universal-2"], + language_detection=True, + prompt="The spoken language may change throughout the audio, transcribe in the original language mix (code-switching), preserving the words in the language they are spoken.", +) + +transcript = aai.Transcriber().transcribe(audio_file, config) + +print(transcript.text) +``` + + + + +```python {11} import requests import time @@ -1013,10 +1422,37 @@ while True: time.sleep(3) ``` + + + +```javascript {13} +import { AssemblyAI } from "assemblyai"; + +const client = new AssemblyAI({ + apiKey: "", +}); + +const audioFile = "https://assemblyaiassets.com/audios/code_switching_multilingual.mp3"; + +const params = { + audio: audioFile, + speech_models: ["universal-3-pro", "universal-2"], + language_detection: true, + prompt: "The spoken language may change throughout the audio, transcribe in the original language mix (code-switching), preserving the words in the language they are spoken.", +}; + +const run = async () => { + const transcript = await client.transcripts.transcribe(params); + console.log(transcript.text); +}; + +run(); +``` + -```javascript +```javascript {12} import axios from "axios"; const baseUrl = "https://api.assemblyai.com"; @@ -1028,8 +1464,7 @@ const data = { audio_url: "https://assemblyaiassets.com/audios/code_switching_multilingual.mp3", language_detection: true, speech_models: ["universal-3-pro", "universal-2"], - prompt: - "The spoken language may change throughout the audio, transcribe in the original language mix (code-switching), preserving the words in the language they are spoken.", + prompt: "The spoken language may change throughout the audio, transcribe in the original language mix (code-switching), preserving the words in the language they are spoken.", }; const url = `${baseUrl}/v2/transcript`; @@ -1091,9 +1526,30 @@ Commission has presented their communication, a hydrogen strategy for climate-ne ``` - + -```python +```python {10} +import assemblyai as aai + +aai.settings.api_key = "" + +audio_file = "https://assemblyaiassets.com/audios/numbers_formatting.mp3" + +config = aai.TranscriptionConfig( + speech_models=["universal-3-pro", "universal-2"], + language_detection=True, + prompt="Transcribe with numbers normalized to standard formats. For example, when you see $1 billion, convert to $1,000,000,000.", +) + +transcript = aai.Transcriber().transcribe(audio_file, config) + +print(transcript.text) +``` + + + + +```python {11} import requests import time @@ -1101,7 +1557,7 @@ base_url = "https://api.assemblyai.com" headers = {"authorization": ""} data = { - "audio_url": "https://assemblyaiassets.com/audios/verbatim.mp3", + "audio_url": "https://assemblyaiassets.com/audios/numbers_formatting.mp3", "language_detection": True, "speech_models": ["universal-3-pro", "universal-2"], "prompt": "Transcribe with numbers normalized to standard formats. For example, when you see $1 billion, convert to $1,000,000,000." @@ -1128,10 +1584,37 @@ while True: time.sleep(3) ``` + + + +```javascript {13} +import { AssemblyAI } from "assemblyai"; + +const client = new AssemblyAI({ + apiKey: "", +}); + +const audioFile = "https://assemblyaiassets.com/audios/numbers_formatting.mp3"; + +const params = { + audio: audioFile, + speech_models: ["universal-3-pro", "universal-2"], + language_detection: true, + prompt: "Transcribe with numbers normalized to standard formats. For example, when you see $1 billion, convert to $1,000,000,000.", +}; + +const run = async () => { + const transcript = await client.transcripts.transcribe(params); + console.log(transcript.text); +}; + +run(); +``` + -```javascript +```javascript {12} import axios from "axios"; const baseUrl = "https://api.assemblyai.com"; @@ -1140,11 +1623,10 @@ const headers = { }; const data = { - audio_url: "https://assemblyaiassets.com/audios/verbatim.mp3", + audio_url: "https://assemblyaiassets.com/audios/numbers_formatting.mp3", language_detection: true, speech_models: ["universal-3-pro", "universal-2"], - prompt: - "Convert spoken numbers to digits.", + prompt: "Transcribe with numbers normalized to standard formats. For example, when you see $1 billion, convert to $1,000,000,000.", }; const url = `${baseUrl}/v2/transcript`; @@ -1206,9 +1688,30 @@ I hope you got our card. [CROSSTALK] Okay, nobody talk. We'll just wait for her ``` - + -```python +```python {10} +import assemblyai as aai + +aai.settings.api_key = "" + +audio_file = "https://assemblyaiassets.com/audios/Difficult_audio.mp3" + +config = aai.TranscriptionConfig( + speech_models=["universal-3-pro", "universal-2"], + language_detection=True, + prompt="When multiple speakers talk simultaneously, mark crosstalk segments.", +) + +transcript = aai.Transcriber().transcribe(audio_file, config) + +print(transcript.text) +``` + + + + +```python {11} import requests import time @@ -1216,7 +1719,7 @@ base_url = "https://api.assemblyai.com" headers = {"authorization": ""} data = { - "audio_url": "https://assemblyaiassets.com/audios/verbatim.mp3", + "audio_url": "https://assemblyaiassets.com/audios/Difficult_audio.mp3", "language_detection": True, "speech_models": ["universal-3-pro", "universal-2"], "prompt": "When multiple speakers talk simultaneously, mark crosstalk segments." @@ -1243,10 +1746,37 @@ while True: time.sleep(3) ``` + + + +```javascript {13} +import { AssemblyAI } from "assemblyai"; + +const client = new AssemblyAI({ + apiKey: "", +}); + +const audioFile = "https://assemblyaiassets.com/audios/Difficult_audio.mp3"; + +const params = { + audio: audioFile, + speech_models: ["universal-3-pro", "universal-2"], + language_detection: true, + prompt: "When multiple speakers talk simultaneously, mark crosstalk segments.", +}; + +const run = async () => { + const transcript = await client.transcripts.transcribe(params); + console.log(transcript.text); +}; + +run(); +``` + -```javascript +```javascript {12} import axios from "axios"; const baseUrl = "https://api.assemblyai.com"; @@ -1255,11 +1785,10 @@ const headers = { }; const data = { - audio_url: "https://assemblyaiassets.com/audios/verbatim.mp3", + audio_url: "https://assemblyaiassets.com/audios/Difficult_audio.mp3", language_detection: true, speech_models: ["universal-3-pro", "universal-2"], - prompt: - "Mark inaudible segments. Preserve overlapping speech and crosstalk.", + prompt: "When multiple speakers talk simultaneously, mark crosstalk segments.", }; const url = `${baseUrl}/v2/transcript`; @@ -1315,9 +1844,31 @@ Low non-zero temperatures often produce better transcription accuracy (lower WER - + -```python +```python {11} +import assemblyai as aai + +aai.settings.api_key = "" + +audio_file = "https://assemblyaiassets.com/audios/nlp_prompting.mp3" + +config = aai.TranscriptionConfig( + speech_models=["universal-3-pro", "universal-2"], + language_detection=True, + prompt="Produce a transcript for a clinical history evaluation. It's important to capture medication and dosage accurately. Every disfluency is meaningful data. Include: fillers (um, uh, er, erm, ah, hmm, mhm, like, you know, I mean), repetitions (I I I, the the), restarts (I was- I went), stutters (th-that, b-but, no-not), and informal speech (gonna, wanna, gotta)", + temperature=0.1, +) + +transcript = aai.Transcriber().transcribe(audio_file, config) + +print(transcript.text) +``` + + + + +```python {12} import requests import time @@ -1353,10 +1904,39 @@ while True: time.sleep(3) ``` + + + +```javascript {15} +import { AssemblyAI } from "assemblyai"; + +const client = new AssemblyAI({ + apiKey: "", +}); + +const audioFile = "https://assemblyaiassets.com/audios/nlp_prompting.mp3"; + +const params = { + audio: audioFile, + speech_models: ["universal-3-pro", "universal-2"], + language_detection: true, + prompt: + "Produce a transcript for a clinical history evaluation. It's important to capture medication and dosage accurately. Every disfluency is meaningful data. Include: fillers (um, uh, er, erm, ah, hmm, mhm, like, you know, I mean), repetitions (I I I, the the), restarts (I was- I went), stutters (th-that, b-but, no-not), and informal speech (gonna, wanna, gotta)", + temperature: 0.1, +}; + +const run = async () => { + const transcript = await client.transcripts.transcribe(params); + console.log(transcript.text); +}; + +run(); +``` + -```javascript +```javascript {14} import axios from "axios"; const baseUrl = "https://api.assemblyai.com";