diff --git a/admins/actions/voice_mode.mdx b/admins/actions/voice_mode.mdx new file mode 100644 index 00000000..9d485d7e --- /dev/null +++ b/admins/actions/voice_mode.mdx @@ -0,0 +1,153 @@ +--- +title: "Voice Mode" +description: "Set up voice providers to enable Voice Mode for your Onyx instance" +icon: "microphone" +--- + +The Voice Mode action allows your users to have spoken conversations with Onyx using speech-to-text and text-to-speech +providers. + +## Setting up Voice Mode + + + + Click your user profile icon and select Admin Panel, then select the Voice Mode tab in the sidebar. + + Voice Mode dashboard in Onyx Admin Panel + + + + Configure a provider to transcribe user speech into text. + + Speech-to-Text Providers + + + +
+ + + Navigate to the [OpenAI dashboard](https://platform.openai.com/api-keys) and create a new API key, or + reuse an existing OpenAI API key already configured for your LLM provider. + + + + Click **Connect** and enter the API key. + + +
+
+ +
+ + + Navigate to the [Azure Portal](https://portal.azure.com) and create a Speech resource. Copy the API key + and region. + + + + Click **Connect** and enter the API key, region, and target URI. + + +
+
+ +
+ + + Navigate to the [ElevenLabs dashboard](https://elevenlabs.io/app/settings/api-keys) and create a new + API key. + + + + Click **Connect** and enter the API key. + + +
+
+
+
+ + + Configure a provider to convert AI responses into spoken audio. + + Text-to-Speech Providers + + + +
+ + + Navigate to the [OpenAI dashboard](https://platform.openai.com/api-keys) and create a new API key + (or reuse the one from Whisper). + + + + Click **Connect** and enter the API key. + + +
+
+ +
+ + + Navigate to the [OpenAI dashboard](https://platform.openai.com/api-keys) and create a new API key + (or reuse the one from Whisper). + + + + Click **Connect** and enter the API key. + + + + + TTS-1 HD provides higher quality audio than TTS-1 but may have slightly higher latency and cost. + +
+
+ +
+ + + Navigate to the [Azure Portal](https://portal.azure.com) and create a Speech resource. Copy the API key + and region. + + + + Click **Connect** and enter the API key, region, and target URI. + + +
+
+ +
+ + + Navigate to the [ElevenLabs dashboard](https://elevenlabs.io/app/settings/api-keys) and create a new + API key. + + + + Click **Connect** and enter the API key. + + +
+
+
+ + Once your TTS provider is configured, select a voice from the dropdown or input a voice ID directly. + + Voice Selection +
+ + + If you have multiple providers configured, make sure to **Set as Default** for both your preferred STT and TTS + provider. + + Set Default Provider + +
+ + + Make sure your users have microphone access enabled in their browser to use Voice Mode. + diff --git a/assets/admin/actions/voice_mode/set_default.png b/assets/admin/actions/voice_mode/set_default.png new file mode 100644 index 00000000..338623a3 Binary files /dev/null and b/assets/admin/actions/voice_mode/set_default.png differ diff --git a/assets/admin/actions/voice_mode/stt.png b/assets/admin/actions/voice_mode/stt.png new file mode 100644 index 00000000..310deea6 Binary files /dev/null and b/assets/admin/actions/voice_mode/stt.png differ diff --git a/assets/admin/actions/voice_mode/tts.png b/assets/admin/actions/voice_mode/tts.png new file mode 100644 index 00000000..ffe78a1f Binary files /dev/null and b/assets/admin/actions/voice_mode/tts.png differ diff --git a/assets/admin/actions/voice_mode/voice_mode_dashboard.png b/assets/admin/actions/voice_mode/voice_mode_dashboard.png new file mode 100644 index 00000000..8901f65f Binary files /dev/null and b/assets/admin/actions/voice_mode/voice_mode_dashboard.png differ diff --git a/assets/admin/actions/voice_mode/voice_selection.png b/assets/admin/actions/voice_mode/voice_selection.png new file mode 100644 index 00000000..28c0ff71 Binary files /dev/null and b/assets/admin/actions/voice_mode/voice_selection.png differ diff --git a/assets/overview/core_features/user_settings.png b/assets/overview/core_features/user_settings.png new file mode 100644 index 00000000..8b5273dc Binary files /dev/null and b/assets/overview/core_features/user_settings.png differ diff --git a/assets/overview/core_features/voice_mode_overview.png b/assets/overview/core_features/voice_mode_overview.png new file mode 100644 index 00000000..c567c347 Binary files /dev/null and b/assets/overview/core_features/voice_mode_overview.png differ diff --git a/assets/overview/core_features/voice_onyx_listening.png b/assets/overview/core_features/voice_onyx_listening.png new file mode 100644 index 00000000..faf92391 Binary files /dev/null and b/assets/overview/core_features/voice_onyx_listening.png differ diff --git a/assets/overview/core_features/voice_onyx_speaking.png b/assets/overview/core_features/voice_onyx_speaking.png new file mode 100644 index 00000000..1259eaf4 Binary files /dev/null and b/assets/overview/core_features/voice_onyx_speaking.png differ diff --git a/assets/overview/core_features/voice_read_aloud.png b/assets/overview/core_features/voice_read_aloud.png new file mode 100644 index 00000000..feb03539 Binary files /dev/null and b/assets/overview/core_features/voice_read_aloud.png differ diff --git a/docs.json b/docs.json index 080af03a..3894a921 100644 --- a/docs.json +++ b/docs.json @@ -35,6 +35,7 @@ "overview/core_features/code_interpreter", "overview/core_features/image_generation", "overview/core_features/craft", + "overview/core_features/voice_mode", "overview/core_features/workflows" ] }, @@ -250,6 +251,7 @@ "pages": [ "admins/actions/overview", "admins/actions/web_search", + "admins/actions/voice_mode", "admins/actions/image_generation", "admins/actions/mcp", "admins/actions/openapi", diff --git a/overview/core_features/voice_mode.mdx b/overview/core_features/voice_mode.mdx new file mode 100644 index 00000000..76161d4b --- /dev/null +++ b/overview/core_features/voice_mode.mdx @@ -0,0 +1,49 @@ +--- +title: "Voice Mode" +description: "Interact with Onyx using voice" +icon: "microphone" +--- + +## Overview + +Voice Mode Interface + +Voice Mode lets you interact with Onyx using speech instead of text. Speak your questions and hear responses read +back. Ideal for hands-free use, brainstorming, or quickly getting answers while multitasking. + +## Speech-to-Text + +Voice Mode listening + +To use Voice Mode, click the **microphone** icon in the input bar. Onyx will begin listening for your input and +transcribe your speech in real time. + +- **Speak naturally**: Onyx transcribes your speech as you talk. +- **Pause**: Use the pause button to temporarily stop and resume when ready. + + + Voice Mode requires microphone access in your browser. You will be prompted to grant permission on first use. + + +## Text-to-Speech + +Voice Mode speaking + +Once your message is sent, Onyx generates a response and reads it back as audio. The full text is displayed alongside +the audio playback. + +Read Aloud button + +If you don't have auto-playback enabled, you can press the **Read Aloud** button to have the response read back to you. + +## User Settings + +You can configure your voice preferences in the user settings panel: + +Voice Mode Settings + +- **Auto-send**: Automatically sends your message when you stop speaking, so you don't need to click send. +- **Auto-playback**: When enabled, the microphone will automatically start listening again as soon as the response + finishes speaking, enabling a continuous hands-free flow. If auto-playback is off, the response is generated as text + only and must be played manually. +- **Speed**: Adjust the playback speed of voice responses with a slider ranging from **0.5x** (slower) to **2x** (faster).