diff --git a/env.example b/env.example index fef7f91f2f..927ff0cb85 100644 --- a/env.example +++ b/env.example @@ -194,6 +194,9 @@ TWILIO_AUTH_TOKEN=... # Ultravox Realtime ULTRAVOX_API_KEY=... +# Respeecher +RESPEECHER_API_KEY=... + # WhatsApp WHATSAPP_TOKEN=... WHATSAPP_WEBHOOK_VERIFICATION_TOKEN=... diff --git a/examples/foundational/07ad-interruptible-respeecher.py b/examples/foundational/07ad-interruptible-respeecher.py new file mode 100644 index 0000000000..c07183075a --- /dev/null +++ b/examples/foundational/07ad-interruptible-respeecher.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.respeecher.tts import RespeecherTTSService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams + +load_dotenv(override=True) + +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = RespeecherTTSService( + api_key=os.getenv("RESPEECHER_API_KEY"), + voice_id="samantha", + params=RespeecherTTSService.InputParams( + sampling_params={ + # Optional sampling params overrides + # See https://space.respeecher.com/docs/api/tts/sampling-params-guide + # "temperature": 0.5 + }, + ), + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = OpenAILLMContext(messages) + context_aggregator = llm.create_context_aggregator(context) + + pipeline = Pipeline( + [ + transport.input(), # Transport user input + stt, + context_aggregator.user(), # User responses + llm, # LLM + tts, # TTS + transport.output(), # Transport bot output + context_aggregator.assistant(), # Assistant spoken responses + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + # Kick off the conversation. + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([context_aggregator.user().get_context_frame()]) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/pyproject.toml b/pyproject.toml index b1c03adf97..727ad9e6d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,7 @@ perplexity = [] playht = [ "pipecat-ai[websockets-base]" ] qwen = [] remote-smart-turn = [] +respeecher = [ "respeecher>=1.0,<2" ] rime = [ "pipecat-ai[websockets-base]" ] riva = [ "pipecat-ai[nvidia]" ] runner = [ "python-dotenv>=1.0.0,<2.0.0", "uvicorn>=0.32.0,<1.0.0", "fastapi>=0.115.6,<0.122.0", "pipecat-ai-small-webrtc-prebuilt>=1.0.0"] diff --git a/src/pipecat/services/respeecher/__init__.py b/src/pipecat/services/respeecher/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/pipecat/services/respeecher/tts.py b/src/pipecat/services/respeecher/tts.py new file mode 100644 index 0000000000..ad834eb422 --- /dev/null +++ b/src/pipecat/services/respeecher/tts.py @@ -0,0 +1,318 @@ +# +# Copyright (c) 2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Respeecher Space text-to-speech service implementation.""" + +import base64 +import json +import uuid +from typing import AsyncGenerator, Optional + +from loguru import logger +from pydantic import BaseModel, TypeAdapter, ValidationError + +from pipecat.frames.frames import ( + CancelFrame, + EndFrame, + ErrorFrame, + Frame, + LLMFullResponseEndFrame, + StartFrame, + StartInterruptionFrame, + TTSAudioRawFrame, + TTSStartedFrame, + TTSStoppedFrame, +) +from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.tts_service import AudioContextTTSService +from pipecat.utils.tracing.service_decorators import traced_tts + +# See .env.example for Respeecher configuration needed +try: + from respeecher.tts import ContextfulGenerationRequestParams, StreamingOutputFormatParams + from respeecher.tts import Response as TTSResponse + from respeecher.voices import ( + SamplingParamsParams as SamplingParams, # TypedDict instead of a Pydantic model + ) + from websockets.asyncio.client import connect as websocket_connect + from websockets.protocol import State +except ModuleNotFoundError as e: + logger.error(f"Exception: {e}") + logger.error("In order to use Respeecher, you need to `pip install pipecat-ai[respeecher]`.") + raise Exception(f"Missing module: {e}") + + +class RespeecherTTSService(AudioContextTTSService): + """Respeecher Space TTS service with WebSocket streaming and audio contexts. + + Provides text-to-speech using Respeecher's streaming WebSocket API. + Supports audio context management and voice customization via sampling parameters. + """ + + class InputParams(BaseModel): + """Input parameters for Respeecher TTS configuration. + + Parameters: + sampling_params: Sampling parameters used for speech synthesis. + """ + + sampling_params: SamplingParams = {} + + def __init__( + self, + *, + api_key: str, + voice_id: str, + model: str = "public/tts/en-rt", + url: str = "wss://api.respeecher.com/v1", + sample_rate: Optional[int] = None, + params: Optional[InputParams] = None, + aggregate_sentences: bool = False, + **kwargs, + ): + """Initialize the Respeecher TTS service. + + Args: + api_key: Respeecher API key for authentication. + voice_id: ID of the voice to use for synthesis. + model: Model path for the Respeecher TTS API. + url: WebSocket base URL for Respeecher TTS API. + sample_rate: Audio sample rate. If None, uses default. + params: Additional input parameters for voice customization. + aggregate_sentences: Whether to aggregate text into sentences client-side. + **kwargs: Additional arguments passed to the parent service. + """ + super().__init__( + push_text_frames=False, + pause_frame_processing=True, + sample_rate=sample_rate, + aggregate_sentences=aggregate_sentences, + **kwargs, + ) + + params = params or RespeecherTTSService.InputParams() + + self._api_key = api_key + self._url = url + self._output_format: StreamingOutputFormatParams = { + "encoding": "pcm_s16le", + "sample_rate": sample_rate or 0, + } + self._settings = {"sampling_params": params.sampling_params} + self.set_model_name(model) + self.set_voice(voice_id) + + self._context_id: str | None = None + self._receive_task = None + + def can_generate_metrics(self) -> bool: + """Check if this service can generate processing metrics. + + Returns: + True + """ + return True + + async def set_model(self, model: str): + """Set the TTS model. + + Args: + model: The model name to use for synthesis. + """ + self._model_id = model + await super().set_model(model) + logger.info(f"Switching TTS model to: [{model}]") + await self._disconnect() + await self._connect() + + def _build_request(self, text: str, continue_transcript: bool = True): + assert self._context_id is not None + + request: ContextfulGenerationRequestParams = { + "transcript": text, + "continue": continue_transcript, + "context_id": self._context_id, + "voice": { + "id": self._voice_id, + "sampling_params": self._settings["sampling_params"], + }, + "output_format": self._output_format, + } + + return json.dumps(request) + + async def start(self, frame: StartFrame): + """Start the Respeecher TTS service. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + self._output_format["sample_rate"] = self.sample_rate + await self._connect() + + async def stop(self, frame: EndFrame): + """Stop the Respeecher TTS service. + + Args: + frame: The end frame. + """ + await super().stop(frame) + await self._disconnect() + + async def cancel(self, frame: CancelFrame): + """Cancel the Respeecher TTS service. + + Args: + frame: The cancel frame. + """ + await super().cancel(frame) + await self._disconnect() + + async def _connect(self): + await self._connect_websocket() + + if self._websocket and not self._receive_task: + self._receive_task = self.create_task(self._receive_task_handler(self._report_error)) + + async def _disconnect(self): + if self._receive_task: + await self.cancel_task(self._receive_task) + self._receive_task = None + + await self._disconnect_websocket() + + async def _connect_websocket(self): + try: + if self._websocket and self._websocket.state is State.OPEN: + return + logger.debug("Connecting to Respeecher") + self._websocket = await websocket_connect( + f"{self._url}/{self._model_name}/tts/websocket?api_key={self._api_key}" + ) + except Exception as e: + logger.error(f"{self} initialization error: {e}") + self._context_id = None + self._websocket = None + await self._call_event_handler("on_connection_error", f"{e}") + + async def _disconnect_websocket(self): + try: + await self.stop_all_metrics() + + if self._websocket: + logger.debug("Disconnecting from Respeecher") + await self._websocket.close() + except Exception as e: + logger.error(f"{self} error closing websocket: {e}") + finally: + self._websocket = None + + def _get_websocket(self): + if self._websocket: + return self._websocket + raise Exception("Websocket not connected") + + async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection): + await super()._handle_interruption(frame, direction) + await self.stop_all_metrics() + if self._context_id: + cancel_request = json.dumps({"context_id": self._context_id, "cancel": True}) + await self._get_websocket().send(cancel_request) + self._context_id = None + + async def process_frame(self, frame: Frame, direction: FrameDirection): + """Process frames with context awareness. + + Args: + frame: The frame to process. + direction: The direction of frame processing. + """ + await super().process_frame(frame, direction) + + if isinstance(frame, (LLMFullResponseEndFrame, EndFrame)): + await self.flush_audio() + + async def flush_audio(self): + """Flush any pending audio and finalize the current context.""" + if not self._context_id or not self._websocket: + return + logger.trace(f"{self}: flushing audio") + flush_request = self._build_request(text="", continue_transcript=False) + await self._websocket.send(flush_request) + self._context_id = None + + async def _receive_messages(self): + async for message in self._get_websocket(): + try: + response = TypeAdapter(TTSResponse).validate_json(message) + except ValidationError as e: + logger.error(f"{self} cannot parse message: {e}") + continue + + if response.context_id is not None and not self.audio_context_available( + response.context_id + ): + logger.error( + f"{self} error, received {response.type} for unknown context_id: {response.context_id}" + ) + continue + + if response.type == "error": + logger.error(f"{self} error: {response}") + await self.push_frame(TTSStoppedFrame()) + await self.stop_all_metrics() + await self.push_error(ErrorFrame(f"{self} error: {response.error}")) + continue + + if response.type == "done": + await self.stop_ttfb_metrics() + await self.remove_audio_context(response.context_id) + elif response.type == "chunk": + await self.stop_ttfb_metrics() + frame = TTSAudioRawFrame( + audio=base64.b64decode(response.data), + sample_rate=self.sample_rate, + num_channels=1, + ) + await self.append_to_audio_context(response.context_id, frame) + + @traced_tts + async def run_tts(self, text: str) -> AsyncGenerator[Frame | None, None]: + """Generate speech from text using Respeecher's streaming API. + + Args: + text: The text to synthesize into speech. + + Yields: + Frame: Audio frames containing the synthesized speech. + """ + logger.debug(f"{self}: Generating TTS [{text}]") + + try: + if not self._websocket or self._websocket.state is State.CLOSED: + await self._connect() + + if not self._context_id: + await self.start_ttfb_metrics() + yield TTSStartedFrame() + self._context_id = str(uuid.uuid4()) + await self.create_audio_context(self._context_id) + + generation_request = self._build_request(text=text) + + try: + await self._get_websocket().send(generation_request) + await self.start_tts_usage_metrics(text) + except Exception as e: + logger.error(f"{self} error sending message: {e}") + yield TTSStoppedFrame() + await self._disconnect() + await self._connect() + return + yield None + except Exception as e: + logger.error(f"{self} exception: {e}") diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 39a6078e09..8983dea23f 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -7,13 +7,14 @@ """Base classes for Text-to-speech services.""" import asyncio -from abc import abstractmethod +from abc import ABC, abstractmethod from typing import ( Any, AsyncGenerator, AsyncIterator, Awaitable, Callable, + Coroutine, Dict, List, Mapping, @@ -51,6 +52,7 @@ from pipecat.services.ai_service import AIService from pipecat.services.websocket_service import WebsocketService from pipecat.transcriptions.language import Language +from pipecat.utils.asyncio.task_manager import BaseTaskManager from pipecat.utils.text.base_text_aggregator import BaseTextAggregator from pipecat.utils.text.base_text_filter import BaseTextFilter from pipecat.utils.text.simple_text_aggregator import SimpleTextAggregator @@ -463,12 +465,7 @@ async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirect direction: The direction to push the frame. """ if self._push_silence_after_stop and isinstance(frame, TTSStoppedFrame): - silence_num_bytes = int(self._silence_time_s * self.sample_rate * 2) # 16-bit - silence_frame = TTSAudioRawFrame( - audio=b"\x00" * silence_num_bytes, - sample_rate=self.sample_rate, - num_channels=1, - ) + silence_frame = self.silence_frame(self._silence_time_s) silence_frame.transport_destination = self._transport_destination await self.push_frame(silence_frame) @@ -485,6 +482,20 @@ async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirect ): await self._stop_frame_queue.put(frame) + def silence_frame(self, duration_s: float) -> TTSAudioRawFrame: + """Create a frame of silence. + + Args: + duration_s: Silence duration in seconds. + """ + silence_num_bytes = int(duration_s * self.sample_rate * 2) # 16-bit + + return TTSAudioRawFrame( + audio=b"\x00" * silence_num_bytes, + sample_rate=self.sample_rate, + num_channels=1, + ) + async def _stream_audio_frames_from_iterator( self, iterator: AsyncIterator[bytes], *, strip_wav_header: bool ) -> AsyncGenerator[Frame, None]: @@ -901,29 +912,15 @@ async def process_frame(self, frame: Frame, direction: FrameDirection): self._bot_speaking = False -class AudioContextWordTTSService(WebsocketWordTTSService): - """Websocket-based TTS service with word timestamps and audio context management. +class _AudioContextServiceMixin(ABC): + """A service that supports audio contexts. - This is a base class for websocket-based TTS services that support word - timestamps and also allow correlating the generated audio with the requested - text. - - Each request could be multiple sentences long which are grouped by - context. For this to work, the TTS service needs to support handling - multiple requests at once (i.e. multiple simultaneous contexts). - - The audio received from the TTS will be played in context order. That is, if - we requested audio for a context "A" and then audio for context "B", the - audio from context ID "A" will be played first. + This class does not inherit from other service base classes to avoid + diamond inheritance. """ - def __init__(self, **kwargs): - """Initialize the Audio Context Word TTS service. - - Args: - **kwargs: Additional arguments passed to the parent WebsocketWordTTSService. - """ - super().__init__(**kwargs) + def __init__(self): + """Initialize the service.""" self._contexts: Dict[str, asyncio.Queue] = {} self._audio_context_task = None @@ -976,43 +973,6 @@ def audio_context_available(self, context_id: str) -> bool: """ return context_id in self._contexts - async def start(self, frame: StartFrame): - """Start the audio context TTS service. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - self._create_audio_context_task() - - async def stop(self, frame: EndFrame): - """Stop the audio context TTS service. - - Args: - frame: The end frame. - """ - await super().stop(frame) - if self._audio_context_task: - # Indicate no more audio contexts are available. this will end the - # task cleanly after all contexts have been processed. - await self._contexts_queue.put(None) - await self._audio_context_task - self._audio_context_task = None - - async def cancel(self, frame: CancelFrame): - """Cancel the audio context TTS service. - - Args: - frame: The cancel frame. - """ - await super().cancel(frame) - await self._stop_audio_context_task() - - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - await super()._handle_interruption(frame, direction) - await self._stop_audio_context_task() - self._create_audio_context_task() - def _create_audio_context_task(self): if not self._audio_context_task: self._contexts_queue = asyncio.Queue() @@ -1038,12 +998,10 @@ async def _audio_context_task_handler(self): # We just finished processing the context, so we can safely remove it. del self._contexts[context_id] - # Append some silence between sentences. - silence = b"\x00" * self.sample_rate - frame = TTSAudioRawFrame( - audio=silence, sample_rate=self.sample_rate, num_channels=1 - ) - await self.push_frame(frame) + # Append some silence between contexts. + SILENCE_BETWEEN_CONTEXTS = 1 + silence_frame = self.silence_frame(SILENCE_BETWEEN_CONTEXTS) + await self.push_frame(silence_frame) else: running = False @@ -1064,3 +1022,149 @@ async def _handle_audio_context(self, context_id: str): # We didn't get audio, so let's consider this context finished. logger.trace(f"{self} time out on audio context {context_id}") break + + @abstractmethod + def create_task(self, coroutine: Coroutine) -> asyncio.Task: + pass + + @abstractmethod + async def cancel_task(self, task: asyncio.Task) -> None: + pass + + @abstractmethod + async def push_frame(self, frame: Frame) -> None: + pass + + @abstractmethod + def silence_frame(self, duration_s: float) -> TTSAudioRawFrame: + pass + + @property + @abstractmethod + def task_manager(self) -> BaseTaskManager: + pass + + +class AudioContextTTSService(WebsocketTTSService, _AudioContextServiceMixin): + """Websocket-based TTS service with audio context management. + + This is a base class for websocket-based TTS services that allow correlating + the generated audio with the requested text. + + Each request could be multiple sentences long which are grouped by + context. For this to work, the TTS service needs to support handling + multiple requests at once (i.e. multiple simultaneous contexts). + + The audio received from the TTS will be played in context order. That is, if + we requested audio for a context "A" and then audio for context "B", the + audio from context ID "A" will be played first. + """ + + def __init__(self, **kwargs): + """Initialize the Audio Context TTS service. + + Args: + **kwargs: Additional arguments passed to the parent WebsocketTTSService. + """ + WebsocketTTSService.__init__(self, **kwargs) + _AudioContextServiceMixin.__init__(self) + + async def start(self, frame: StartFrame): + """Start the audio context TTS service. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + self._create_audio_context_task() + + async def stop(self, frame: EndFrame): + """Stop the audio context TTS service. + + Args: + frame: The end frame. + """ + await super().stop(frame) + if self._audio_context_task: + # Indicate no more audio contexts are available. this will end the + # task cleanly after all contexts have been processed. + await self._contexts_queue.put(None) + await self._audio_context_task + self._audio_context_task = None + + async def cancel(self, frame: CancelFrame): + """Cancel the audio context TTS service. + + Args: + frame: The cancel frame. + """ + await super().cancel(frame) + await self._stop_audio_context_task() + + async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): + await super()._handle_interruption(frame, direction) + await self._stop_audio_context_task() + self._create_audio_context_task() + + +class AudioContextWordTTSService(WebsocketWordTTSService, _AudioContextServiceMixin): + """Websocket-based TTS service with word timestamps and audio context management. + + This is a base class for websocket-based TTS services that support word + timestamps and also allow correlating the generated audio with the requested + text. + + Each request could be multiple sentences long which are grouped by + context. For this to work, the TTS service needs to support handling + multiple requests at once (i.e. multiple simultaneous contexts). + + The audio received from the TTS will be played in context order. That is, if + we requested audio for a context "A" and then audio for context "B", the + audio from context ID "A" will be played first. + """ + + def __init__(self, **kwargs): + """Initialize the Audio Context Word TTS service. + + Args: + **kwargs: Additional arguments passed to the parent WebsocketWordTTSService. + """ + WebsocketWordTTSService.__init__(self, **kwargs) + _AudioContextServiceMixin.__init__(self) + + async def start(self, frame: StartFrame): + """Start the audio context TTS service. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + self._create_audio_context_task() + + async def stop(self, frame: EndFrame): + """Stop the audio context TTS service. + + Args: + frame: The end frame. + """ + await super().stop(frame) + if self._audio_context_task: + # Indicate no more audio contexts are available. this will end the + # task cleanly after all contexts have been processed. + await self._contexts_queue.put(None) + await self.wait_for_task(self._audio_context_task) + self._audio_context_task = None + + async def cancel(self, frame: CancelFrame): + """Cancel the audio context TTS service. + + Args: + frame: The cancel frame. + """ + await super().cancel(frame) + await self._stop_audio_context_task() + + async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): + await super()._handle_interruption(frame, direction) + await self._stop_audio_context_task() + self._create_audio_context_task() diff --git a/uv.lock b/uv.lock index 24b9a13c69..b8f5f989b6 100644 --- a/uv.lock +++ b/uv.lock @@ -4006,6 +4006,9 @@ openpipe = [ playht = [ { name = "websockets" }, ] +respeecher = [ + { name = "respeecher" }, +] rime = [ { name = "websockets" }, ] @@ -4187,6 +4190,7 @@ requires-dist = [ { name = "python-dotenv", marker = "extra == 'runner'", specifier = ">=1.0.0,<2.0.0" }, { name = "pyvips", extras = ["binary"], marker = "extra == 'moondream'", specifier = "~=3.0.0" }, { name = "resampy", specifier = "~=0.4.3" }, + { name = "respeecher", marker = "extra == 'respeecher'", specifier = ">=1.0,<2" }, { name = "sarvamai", marker = "extra == 'sarvam'", specifier = "==0.1.21" }, { name = "sentry-sdk", marker = "extra == 'sentry'", specifier = ">=2.28.0,<3" }, { name = "simli-ai", marker = "extra == 'simli'", specifier = "~=1.0.3" }, @@ -4205,7 +4209,7 @@ requires-dist = [ { name = "wait-for2", marker = "python_full_version < '3.12'", specifier = ">=0.4.1" }, { name = "websockets", marker = "extra == 'websockets-base'", specifier = ">=13.1,<16.0" }, ] -provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "krisp", "langchain", "livekit", "lmnt", "local", "local-smart-turn", "local-smart-turn-v3", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "neuphonic", "noisereduce", "nvidia", "openai", "rnnoise", "openpipe", "openrouter", "perplexity", "playht", "qwen", "remote-smart-turn", "rime", "riva", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"] +provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "krisp", "langchain", "livekit", "lmnt", "local", "local-smart-turn", "local-smart-turn-v3", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "neuphonic", "noisereduce", "nvidia", "openai", "rnnoise", "openpipe", "openrouter", "perplexity", "playht", "qwen", "remote-smart-turn", "respeecher", "rime", "riva", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"] [package.metadata.requires-dev] dev = [ @@ -5206,6 +5210,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4d/b9/3b00ac340a1aab3389ebcc52c779914a44aadf7b0cb7a3bf053195735607/resampy-0.4.3-py3-none-any.whl", hash = "sha256:ad2ed64516b140a122d96704e32bc0f92b23f45419e8b8f478e5a05f83edcebd", size = 3076529, upload-time = "2024-03-05T20:36:02.439Z" }, ] +[[package]] +name = "respeecher" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "pydantic" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "websockets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/a5/9eed1454f3c737733dffbd66c2212ea6a72168442393db6b52c138ba4dc7/respeecher-1.0.1.tar.gz", hash = "sha256:74a1f423d8a5fdd600b1634a1830525a0cb3996252687cc477c76254ce4d79a1", size = 28762, upload-time = "2025-08-25T10:50:53.533Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/bf/adc9d59abfdc6fee22ea8af97088b1713e090d4ab640a53d49e01e38b863/respeecher-1.0.1-py3-none-any.whl", hash = "sha256:7c940791cc310f939b2a9b7dc99f11830402b6b7eb934d7fb0282b38386f1f5c", size = 50565, upload-time = "2025-08-25T10:50:52.537Z" }, +] + [[package]] name = "rich" version = "14.1.0"