onyx-dot-app · jessicasingh7 · Mar 25, 2026
diff --git a/admins/actions/voice_mode.mdx b/admins/actions/voice_mode.mdx
@@ -0,0 +1,153 @@
+---
+title: "Voice Mode"
+description: "Set up voice providers to enable Voice Mode for your Onyx instance"
+icon: "microphone"
+---
+
+The Voice Mode action allows your users to have spoken conversations with Onyx using speech-to-text and text-to-speech
+providers.
+
+## Setting up Voice Mode
+
+<Steps>
+  <Step title="Navigate to the Voice Mode Dashboard">
+    Click your user profile icon and select Admin Panel, then select the Voice Mode tab in the sidebar.
+
+    <img className="rounded-image" src="/assets/admin/actions/voice_mode/voice_mode_dashboard.png" alt="Voice Mode dashboard in Onyx Admin Panel" />
+  </Step>
+
+  <Step title="Set up Speech-to-Text (STT) provider">
+    Configure a provider to transcribe user speech into text.
+
+    <img className="rounded-image" src="/assets/admin/actions/voice_mode/stt.png" alt="Speech-to-Text Providers" />
+
+    <Tabs tabs={["OpenAI Whisper", "Azure STT", "ElevenLabs"]}>
+      <Tab title="OpenAI Whisper">
+        <div style={{ marginTop: '-2rem' }}>
+          <Steps>
+            <Step title="Get an OpenAI API key">
+              Navigate to the [OpenAI dashboard](https://platform.openai.com/api-keys) and create a new API key, or
+              reuse an existing OpenAI API key already configured for your LLM provider.
+            </Step>
+
+            <Step title="Enter the API key">
+              Click **Connect** and enter the API key.
+            </Step>
+          </Steps>
+        </div>
+      </Tab>
+      <Tab title="Azure STT">
+        <div style={{ marginTop: '-2rem' }}>
+          <Steps>
+            <Step title="Get Azure Speech credentials">
+              Navigate to the [Azure Portal](https://portal.azure.com) and create a Speech resource. Copy the API key
+              and region.
+            </Step>
+
+            <Step title="Enter the credentials">
+              Click **Connect** and enter the API key, region, and target URI.
+            </Step>
+          </Steps>
+        </div>
+      </Tab>
+      <Tab title="ElevenLabs">
+        <div style={{ marginTop: '-2rem' }}>
+          <Steps>
+            <Step title="Get an ElevenLabs API key">
+              Navigate to the [ElevenLabs dashboard](https://elevenlabs.io/app/settings/api-keys) and create a new
+              API key.
+            </Step>
+
+            <Step title="Enter the API key">
+              Click **Connect** and enter the API key.
+            </Step>
+          </Steps>
+        </div>
+      </Tab>
+    </Tabs>
+  </Step>
+
+  <Step title="Set up Text-to-Speech (TTS) provider">
+    Configure a provider to convert AI responses into spoken audio.
+
+    <img className="rounded-image" src="/assets/admin/actions/voice_mode/tts.png" alt="Text-to-Speech Providers" />
+
+    <Tabs tabs={["OpenAI TTS-1", "OpenAI TTS-1 HD", "Azure TTS", "ElevenLabs"]}>
+      <Tab title="OpenAI TTS-1">
+        <div style={{ marginTop: '-2rem' }}>
+          <Steps>
+            <Step title="Get an OpenAI API key">
+              Navigate to the [OpenAI dashboard](https://platform.openai.com/api-keys) and create a new API key
+              (or reuse the one from Whisper).
+            </Step>
+
+            <Step title="Enter the API key">
+              Click **Connect** and enter the API key.
+            </Step>
+          </Steps>
+        </div>
+      </Tab>
+      <Tab title="OpenAI TTS-1 HD">
+        <div style={{ marginTop: '-2rem' }}>
+          <Steps>
+            <Step title="Get an OpenAI API key">
+              Navigate to the [OpenAI dashboard](https://platform.openai.com/api-keys) and create a new API key
+              (or reuse the one from Whisper).
+            </Step>
+
+            <Step title="Enter the API key">
+              Click **Connect** and enter the API key.
+            </Step>
+          </Steps>
+
+          <Note>
+            TTS-1 HD provides higher quality audio than TTS-1 but may have slightly higher latency and cost.
+          </Note>
+        </div>
+      </Tab>
+      <Tab title="Azure TTS">
+        <div style={{ marginTop: '-2rem' }}>
+          <Steps>
+            <Step title="Get Azure Speech credentials">
+              Navigate to the [Azure Portal](https://portal.azure.com) and create a Speech resource. Copy the API key
+              and region.
+            </Step>
+
+            <Step title="Enter the credentials">
+              Click **Connect** and enter the API key, region, and target URI.
+            </Step>
+          </Steps>
+        </div>
+      </Tab>
+      <Tab title="ElevenLabs">
+        <div style={{ marginTop: '-2rem' }}>
+          <Steps>
+            <Step title="Get an ElevenLabs API key">
+              Navigate to the [ElevenLabs dashboard](https://elevenlabs.io/app/settings/api-keys) and create a new
+              API key.
+            </Step>
+
+            <Step title="Enter the API key">
+              Click **Connect** and enter the API key.
+            </Step>
+          </Steps>
+        </div>
+      </Tab>
+    </Tabs>
+
+    Once your TTS provider is configured, select a voice from the dropdown or input a voice ID directly.
+
+    <img className="rounded-image" src="/assets/admin/actions/voice_mode/voice_selection.png" alt="Voice Selection" />
+  </Step>
+
+  <Step title="Set Default Provider">
+    If you have multiple providers configured, make sure to **Set as Default** for both your preferred STT and TTS
+    provider.
+
+    <img className="rounded-image" src="/assets/admin/actions/voice_mode/set_default.png" alt="Set Default Provider" />
+  </Step>
+</Steps>
+
+<Note>
+  Make sure your users have microphone access enabled in their browser to use Voice Mode.
+</Note>
diff --git a/assets/admin/actions/voice_mode/set_default.png b/assets/admin/actions/voice_mode/set_default.png
diff --git a/assets/admin/actions/voice_mode/stt.png b/assets/admin/actions/voice_mode/stt.png
diff --git a/assets/admin/actions/voice_mode/tts.png b/assets/admin/actions/voice_mode/tts.png
diff --git a/assets/admin/actions/voice_mode/voice_mode_dashboard.png b/assets/admin/actions/voice_mode/voice_mode_dashboard.png
diff --git a/assets/admin/actions/voice_mode/voice_selection.png b/assets/admin/actions/voice_mode/voice_selection.png
diff --git a/assets/overview/core_features/user_settings.png b/assets/overview/core_features/user_settings.png
diff --git a/assets/overview/core_features/voice_mode_overview.png b/assets/overview/core_features/voice_mode_overview.png
diff --git a/assets/overview/core_features/voice_onyx_listening.png b/assets/overview/core_features/voice_onyx_listening.png
diff --git a/assets/overview/core_features/voice_onyx_speaking.png b/assets/overview/core_features/voice_onyx_speaking.png
diff --git a/assets/overview/core_features/voice_read_aloud.png b/assets/overview/core_features/voice_read_aloud.png
diff --git a/docs.json b/docs.json
@@ -35,6 +35,7 @@
               "overview/core_features/code_interpreter",
               "overview/core_features/image_generation",
               "overview/core_features/craft",
+              "overview/core_features/voice_mode",
               "overview/core_features/workflows"
             ]
           },
@@ -250,6 +251,7 @@
             "pages": [
               "admins/actions/overview",
               "admins/actions/web_search",
+              "admins/actions/voice_mode",
               "admins/actions/image_generation",
               "admins/actions/mcp",
               "admins/actions/openapi",

diff --git a/overview/core_features/voice_mode.mdx b/overview/core_features/voice_mode.mdx
@@ -0,0 +1,49 @@
+---
+title: "Voice Mode"
+description: "Interact with Onyx using voice"
+icon: "microphone"
+---
+
+## Overview
+
+<img className="rounded-image" src="/assets/overview/core_features/voice_mode_overview.png" alt="Voice Mode Interface"/>
+
+Voice Mode lets you interact with Onyx using speech instead of text. Speak your questions and hear responses read
+back. Ideal for hands-free use, brainstorming, or quickly getting answers while multitasking.
+
+## Speech-to-Text
+
+<img className="rounded-image" src="/assets/overview/core_features/voice_onyx_listening.png" alt="Voice Mode listening"/>
+
+To use Voice Mode, click the **microphone** icon in the input bar. Onyx will begin listening for your input and
+transcribe your speech in real time.
+
+- **Speak naturally**: Onyx transcribes your speech as you talk.
+- **Pause**: Use the pause button to temporarily stop and resume when ready.
+
+<Note>
+  Voice Mode requires microphone access in your browser. You will be prompted to grant permission on first use.
+</Note>
+
+## Text-to-Speech
+
+<img className="rounded-image" src="/assets/overview/core_features/voice_onyx_speaking.png" alt="Voice Mode speaking"/>
+
+Once your message is sent, Onyx generates a response and reads it back as audio. The full text is displayed alongside
+the audio playback.
+
+<img className="rounded-image" src="/assets/overview/core_features/voice_read_aloud.png" alt="Read Aloud button"/>
+
+If you don't have auto-playback enabled, you can press the **Read Aloud** button to have the response read back to you.
+
+## User Settings
+
+You can configure your voice preferences in the user settings panel:
+
+<img className="rounded-image" src="/assets/overview/core_features/user_settings.png" alt="Voice Mode Settings"/>
+
+- **Auto-send**: Automatically sends your message when you stop speaking, so you don't need to click send.
+- **Auto-playback**: When enabled, the microphone will automatically start listening again as soon as the response
+  finishes speaking, enabling a continuous hands-free flow. If auto-playback is off, the response is generated as text
+  only and must be played manually.
+- **Speed**: Adjust the playback speed of voice responses with a slider ranging from **0.5x** (slower) to **2x** (faster).