From f55aaf74f7192ca0d9d0750cd035310a0972e7de Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 6 Feb 2026 14:14:34 +0000 Subject: [PATCH 1/4] Restore Disfluencies page to Pre-recorded STT navigation after Custom spelling Co-Authored-By: Lee Vaughn --- fern/docs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fern/docs.yml b/fern/docs.yml index 73f5369e..c511eb27 100644 --- a/fern/docs.yml +++ b/fern/docs.yml @@ -101,6 +101,8 @@ navigation: path: pages/02-speech-to-text/pre-recorded-audio/language-detection.mdx - page: Custom spelling path: pages/02-speech-to-text/pre-recorded-audio/custom-spelling.mdx + - page: Disfluencies + path: pages/02-speech-to-text/pre-recorded-audio/filler-words.mdx - page: Word search path: pages/02-speech-to-text/pre-recorded-audio/word-search.mdx - page: Set the start and end of the transcript From 88032d51f73509688f1049a128d29cdf22bb8df2 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 6 Feb 2026 14:21:21 +0000 Subject: [PATCH 2/4] Use original Filler Words page version instead of Verbatim rewrite Co-Authored-By: Lee Vaughn --- fern/docs.yml | 2 +- .../pre-recorded-audio/filler-words.mdx | 178 ++++-------------- 2 files changed, 38 insertions(+), 142 deletions(-) diff --git a/fern/docs.yml b/fern/docs.yml index c511eb27..bd013209 100644 --- a/fern/docs.yml +++ b/fern/docs.yml @@ -101,7 +101,7 @@ navigation: path: pages/02-speech-to-text/pre-recorded-audio/language-detection.mdx - page: Custom spelling path: pages/02-speech-to-text/pre-recorded-audio/custom-spelling.mdx - - page: Disfluencies + - page: Filler words path: pages/02-speech-to-text/pre-recorded-audio/filler-words.mdx - page: Word search path: pages/02-speech-to-text/pre-recorded-audio/word-search.mdx diff --git a/fern/pages/02-speech-to-text/pre-recorded-audio/filler-words.mdx b/fern/pages/02-speech-to-text/pre-recorded-audio/filler-words.mdx index 4146ee65..4e8b5c98 100644 --- a/fern/pages/02-speech-to-text/pre-recorded-audio/filler-words.mdx +++ b/fern/pages/02-speech-to-text/pre-recorded-audio/filler-words.mdx @@ -1,125 +1,37 @@ --- -title: "Verbatim" +title: "Filler Words" --- -import { AudioPlayer } from "../../../assets/components/AudioPlayer"; - -Verbatim transcription preserves natural speech patterns including filler words (um, uh), false starts, repetitions, and stutters. By default, these elements are removed for readability. Use the `prompt` parameter to specify which verbatim elements you want included in your transcript. - -## Overview - -Capture natural speech patterns exactly as spoken. Include examples of the verbatim elements you want to transcribe in the prompt parameter to guide the model. - - - -Without prompting verbatim: - -```txt wordWrap -Do you and Quentin still socialize when you come to Los Angeles, or is it like he's so used to having you here? No, no, no, we're friends. What do you do with him? -``` - -With prompting verbatim: - -```txt wordWrap -Do you and Quentin still socialize, uh, when you come to Los Angeles, or is it like he's so used to having you here? No, no, no, we, we, we're friends. What do you do with him? -``` - -Here are examples of verbatim transcription that you can prompt for: - -- fillers (um, uh, like, you know, I mean) -- repetitions (I I I, the the) -- restarts (I was- I went) -- stutters (th-that, b-but) -- informal speech (gonna, wanna, gotta) - -## Quickstart - - - - -```python {11} -import requests -import time - -base_url = "https://api.assemblyai.com" -headers = {"authorization": ""} - -data = { - "audio_url": "https://assemblyaiassets.com/audios/verbatim.mp3", - "language_detection": True, - "speech_models": ["universal-3-pro", "universal-2"], - "prompt": "Produce a transcript suitable for conversational analysis. Every disfluency is meaningful data. Include: fillers (um, uh, er, ah, hmm, mhm, like, you know, I mean), repetitions (I I, the the), restarts (I was- I went), stutters (th-that, b-but, no-not), and informal speech (gonna, wanna, gotta)" -} - -response = requests.post(base_url + "/v2/transcript", headers=headers, json=data) - -if response.status_code != 200: - print(f"Error: {response.status_code}, Response: {response.text}") - response.raise_for_status() - -transcript_response = response.json() -transcript_id = transcript_response["id"] -polling_endpoint = f"{base_url}/v2/transcript/{transcript_id}" - -while True: - transcript = requests.get(polling_endpoint, headers=headers).json() - if transcript["status"] == "completed": - print(transcript["text"]) - break - elif transcript["status"] == "error": - raise RuntimeError(f"Transcription failed: {transcript['error']}") - else: - time.sleep(3) -``` - - - - -```javascript {11-12} -import axios from "axios"; - -const baseUrl = "https://api.assemblyai.com"; -const headers = { - authorization: "", -}; - -const data = { - audio_url: "https://assemblyaiassets.com/audios/verbatim.mp3", - language_detection: true, - speech_models: ["universal-3-pro", "universal-2"], - prompt: - "Produce a transcript suitable for conversational analysis. Every disfluency is meaningful data. Include: fillers (um, uh, er, ah, hmm, mhm, like, you know, I mean), repetitions (I I, the the), restarts (I was- I went), stutters (th-that, b-but, no-not), and informal speech (gonna, wanna, gotta)", -}; - -const url = `${baseUrl}/v2/transcript`; -const response = await axios.post(url, data, { headers: headers }); - -const transcriptId = response.data.id; -const pollingEndpoint = `${baseUrl}/v2/transcript/${transcriptId}`; - -while (true) { - const pollingResponse = await axios.get(pollingEndpoint, { - headers: headers, - }); - const transcriptionResult = pollingResponse.data; - - if (transcriptionResult.status === "completed") { - console.log(transcriptionResult.text); - break; - } else if (transcriptionResult.status === "error") { - throw new Error(`Transcription failed: ${transcriptionResult.error}`); - } else { - await new Promise((resolve) => setTimeout(resolve, 3000)); - } -} -``` - - - - -## Filler words (legacy) - -Universal-2 offers filler word detection through the `disfluencies` parameter. +import { LanguageTable } from "../../../assets/components/LanguagesTable"; + + + + + +
+
+ + + +
+
+ + + US & EU
+
+ +
The following filler words are removed by default: @@ -145,11 +57,7 @@ aai.settings.api_key = "" # audio_file = "./local_file.mp3" audio_file = "https://assembly.ai/wildfires.mp3" -config = aai.TranscriptionConfig( - speech_models=["universal-2"], - language_detection=True, - disfluencies=True -) +config = aai.TranscriptionConfig(disfluencies=True) transcript = aai.Transcriber(config=config).transcribe(audio_file) @@ -178,8 +86,6 @@ upload_url = response.json()["upload_url"] data = { "audio_url": upload_url, # You can also use a URL to an audio or video file on the web - "speech_models": ["universal-2"], - "language_detection": True, "disfluencies": True } @@ -216,8 +122,6 @@ const audioFile = "https://assembly.ai/wildfires.mp3"; const params = { audio: audioFile, - speech_models: ["universal-2"], - language_detection: true, disfluencies: true, }; @@ -249,8 +153,6 @@ const uploadUrl = uploadResponse.data.upload_url; const data = { audio_url: uploadUrl, // You can also use a URL to an audio or video file on the web - speech_models: ["universal-2"], - language_detection: true, disfluencies: true, }; @@ -343,13 +245,11 @@ class Program static async Task CreateTranscriptAsync(string audioUrl, HttpClient httpClient) { - var data = new - { - audio_url = audioUrl, - speech_models = new[] { "universal-2" }, - language_detection = true, - disfluencies = true - }; + var data = new + { + audio_url = audioUrl, + disfluencies = true + }; var content = new StringContent(JsonSerializer.Serialize(data), Encoding.UTF8, "application/json"); @@ -411,8 +311,6 @@ upload_url = JSON.parse(upload_response.body)["upload_url"] data = { "audio_url" => upload_url, # You can also use a URL to an audio or video file on the web - "speech_models" => ["universal-2"], - "language_detection" => true, "disfluencies" => true } @@ -485,8 +383,6 @@ curl_close($ch); $data = array( "audio_url" => $upload_url, // You can also use a URL to an audio or video file on the web - "speech_models" => ["universal-2"], - "language_detection" => true, "disfluencies" => true ); From d131ef83e35c510f466084c96228ae37b7ace9cb Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 6 Feb 2026 14:28:46 +0000 Subject: [PATCH 3/4] Update Filler Words: Universal-2 model, speech_models param, remove C#/Ruby/PHP examples Co-Authored-By: Lee Vaughn --- .../pre-recorded-audio/filler-words.mdx | 258 +----------------- 1 file changed, 12 insertions(+), 246 deletions(-) diff --git a/fern/pages/02-speech-to-text/pre-recorded-audio/filler-words.mdx b/fern/pages/02-speech-to-text/pre-recorded-audio/filler-words.mdx index 4e8b5c98..1c89ad4f 100644 --- a/fern/pages/02-speech-to-text/pre-recorded-audio/filler-words.mdx +++ b/fern/pages/02-speech-to-text/pre-recorded-audio/filler-words.mdx @@ -21,7 +21,7 @@ import { LanguageTable } from "../../../assets/components/LanguagesTable";
@@ -57,7 +57,11 @@ aai.settings.api_key = "" # audio_file = "./local_file.mp3" audio_file = "https://assembly.ai/wildfires.mp3" -config = aai.TranscriptionConfig(disfluencies=True) +config = aai.TranscriptionConfig( + speech_models=["universal-2"], + language_detection=True, + disfluencies=True +) transcript = aai.Transcriber(config=config).transcribe(audio_file) @@ -86,6 +90,8 @@ upload_url = response.json()["upload_url"] data = { "audio_url": upload_url, # You can also use a URL to an audio or video file on the web + "speech_models": ["universal-2"], + "language_detection": True, "disfluencies": True } @@ -122,6 +128,8 @@ const audioFile = "https://assembly.ai/wildfires.mp3"; const params = { audio: audioFile, + speech_models: ["universal-2"], + language_detection: true, disfluencies: true, }; @@ -153,6 +161,8 @@ const uploadUrl = uploadResponse.data.upload_url; const data = { audio_url: uploadUrl, // You can also use a URL to an audio or video file on the web + speech_models: ["universal-2"], + language_detection: true, disfluencies: true, }; @@ -179,249 +189,5 @@ while (true) { } ``` -```csharp title="C#" highlight={69} maxLines=15 -using System; -using System.IO; -using System.Net.Http; -using System.Net.Http.Headers; -using System.Net.Http.Json; -using System.Text; -using System.Text.Json; -using System.Text.Json.Serialization; -using System.Threading.Tasks; - -public class Transcript -{ - public string Id { get; set; } - public string Status { get; set; } - public string Text { get; set; } - public string Error { get; set; } -} - -class Program -{ - static void Main(string[] args) - { - MainAsync(args).GetAwaiter().GetResult(); - } - - static async Task MainAsync(string[] args) - { - using (var httpClient = new HttpClient()) - { - httpClient.DefaultRequestHeaders.Add("authorization", ""); - - var localFilePath = "audio.mp3"; - - Console.WriteLine("Uploading file..."); - var uploadUrl = await UploadFileAsync(localFilePath, httpClient); - - Console.WriteLine("Creating transcript with speech_model..."); - var transcript = await CreateTranscriptAsync(uploadUrl, httpClient); - - Console.WriteLine("Waiting for transcript..."); - transcript = await WaitForTranscriptToProcess(transcript, httpClient); - - Console.WriteLine("Transcription completed!"); - Console.WriteLine("----------------------------------"); - Console.WriteLine(transcript.Text); - } - } - - static async Task UploadFileAsync(string filePath, HttpClient httpClient) - { - using (var fileStream = File.OpenRead(filePath)) - using (var content = new StreamContent(fileStream)) - { - content.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream"); - - var response = await httpClient.PostAsync("https://api.assemblyai.com/v2/upload", content); - response.EnsureSuccessStatusCode(); - - var jsonDoc = await response.Content.ReadFromJsonAsync(); - return jsonDoc.RootElement.GetProperty("upload_url").GetString(); - } - } - - static async Task CreateTranscriptAsync(string audioUrl, HttpClient httpClient) - { - var data = new - { - audio_url = audioUrl, - disfluencies = true - }; - - var content = new StringContent(JsonSerializer.Serialize(data), Encoding.UTF8, "application/json"); - - using (var response = await httpClient.PostAsync("https://api.assemblyai.com/v2/transcript", content)) - { - response.EnsureSuccessStatusCode(); - return await response.Content.ReadFromJsonAsync(); - } - } - - static async Task WaitForTranscriptToProcess(Transcript transcript, HttpClient httpClient) - { - var pollingEndpoint = $"https://api.assemblyai.com/v2/transcript/{transcript.Id}"; - - while (true) - { - var pollingResponse = await httpClient.GetAsync(pollingEndpoint); - transcript = await pollingResponse.Content.ReadFromJsonAsync(); - - switch (transcript.Status) - { - case "processing": - case "queued": - Console.WriteLine($"Status: {transcript.Status}... waiting..."); - await Task.Delay(TimeSpan.FromSeconds(3)); - break; - case "completed": - return transcript; - case "error": - throw new Exception($"Transcription failed: {transcript.Error}"); - default: - throw new Exception("Unexpected transcript status."); - } - } - } -} -``` - -```ruby title="Ruby" highlight={23} maxLines=15 -require 'net/http' -require 'json' - -base_url = 'https://api.assemblyai.com' - -headers = { - 'authorization' => '', - 'content-type' => 'application/json' -} - -path = "./my-audio.mp3" -uri = URI("#{base_url}/v2/upload") -request = Net::HTTP::Post.new(uri, headers) -request.body = File.read(path) - -http = Net::HTTP.new(uri.host, uri.port) -http.use_ssl = true -upload_response = http.request(request) -upload_url = JSON.parse(upload_response.body)["upload_url"] - -data = { - "audio_url" => upload_url, # You can also use a URL to an audio or video file on the web - "disfluencies" => true -} - -uri = URI.parse("#{base_url}/v2/transcript") -http = Net::HTTP.new(uri.host, uri.port) -http.use_ssl = true - -request = Net::HTTP::Post.new(uri.request_uri, headers) -request.body = data.to_json - -response = http.request(request) -response_body = JSON.parse(response.body) - -unless response.is_a?(Net::HTTPSuccess) - raise "API request failed with status #{response.code}: #{response.body}" -end - -transcript_id = response_body['id'] -puts "Transcript ID: #{transcript_id}" - -polling_endpoint = URI.parse("#{base_url}/v2/transcript/#{transcript_id}") - -while true - polling_http = Net::HTTP.new(polling_endpoint.host, polling_endpoint.port) - polling_http.use_ssl = true - polling_request = Net::HTTP::Get.new(polling_endpoint.request_uri, headers) - polling_response = polling_http.request(polling_request) - - transcription_result = JSON.parse(polling_response.body) - - if transcription_result['status'] == 'completed' - puts "Transcription text: #{transcription_result['text']}" - break - elsif transcription_result['status'] == 'error' - raise "Transcription failed: #{transcription_result['error']}" - else - puts 'Waiting for transcription to complete...' - sleep(3) - end -end -``` - -```php title="PHP" highlight={30} maxLines=15 -", - "content-type: application/json" -); - -$path = "./my-audio.mp3"; - -$ch = curl_init(); - -curl_setopt($ch, CURLOPT_URL, $base_url . "/v2/upload"); -curl_setopt($ch, CURLOPT_POST, 1); -curl_setopt($ch, CURLOPT_POSTFIELDS, file_get_contents($path)); -curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); -curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); - -$response = curl_exec($ch); -$response_data = json_decode($response, true); -$upload_url = $response_data["upload_url"]; - -curl_close($ch); - -$data = array( - "audio_url" => $upload_url, // You can also use a URL to an audio or video file on the web - "disfluencies" => true -); - -$url = $base_url . "/v2/transcript"; -$curl = curl_init($url); - -curl_setopt($curl, CURLOPT_POST, true); -curl_setopt($curl, CURLOPT_POSTFIELDS, json_encode($data)); -curl_setopt($curl, CURLOPT_HTTPHEADER, $headers); -curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); - -$response = curl_exec($curl); - -$response = json_decode($response, true); - -curl_close($curl); - -$transcript_id = $response['id']; -echo "Transcript ID: $transcript_id\n"; - -$polling_endpoint = $base_url . "/v2/transcript/" . $transcript_id; - -while (true) { - $polling_response = curl_init($polling_endpoint); - - curl_setopt($polling_response, CURLOPT_HTTPHEADER, $headers); - curl_setopt($polling_response, CURLOPT_RETURNTRANSFER, true); - - $transcription_result = json_decode(curl_exec($polling_response), true); - - if ($transcription_result['status'] === "completed") { - echo $transcription_result['text']; - break; - } else if ($transcription_result['status'] === "error") { - throw new Exception("Transcription failed: " . $transcription_result['error']); - } else { - sleep(3); - } -} -``` From d2d7682377e7aeca5192f58d8cf1faffb2ef7a26 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 6 Feb 2026 14:35:08 +0000 Subject: [PATCH 4/4] Add note callout about Universal-2 specificity and link to Universal-3-Pro verbatim docs Co-Authored-By: Lee Vaughn --- .../02-speech-to-text/pre-recorded-audio/filler-words.mdx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fern/pages/02-speech-to-text/pre-recorded-audio/filler-words.mdx b/fern/pages/02-speech-to-text/pre-recorded-audio/filler-words.mdx index 1c89ad4f..9b99f330 100644 --- a/fern/pages/02-speech-to-text/pre-recorded-audio/filler-words.mdx +++ b/fern/pages/02-speech-to-text/pre-recorded-audio/filler-words.mdx @@ -4,6 +4,10 @@ title: "Filler Words" import { LanguageTable } from "../../../assets/components/LanguagesTable"; + +This page covers using the `disfluencies` parameter with the Universal-2 model. To learn about verbatim transcription and disfluencies with Universal-3-Pro, see [Verbatim transcription and disfluencies](https://www.assemblyai.com/docs/getting-started/universal-3-pro#verbatim-transcription-and-disfluencies). + +