diff --git a/examples/avatar_audio_passthrough.py b/examples/avatar_audio_passthrough.py index 663d3c8..0dc96f5 100644 --- a/examples/avatar_audio_passthrough.py +++ b/examples/avatar_audio_passthrough.py @@ -85,6 +85,9 @@ async def interactive_loop(session, display: VideoDisplay) -> None: AgentAudioInputConfig(encoding="pcm_s16le", sample_rate=24000, channels=1) ) await send_audio_file_chunked(agent, wav_path) + # When TTS audio is finished, signal end of sequence to the backend + # to let the avatar go back to listening mode + await agent.end_sequence() else: print(f"❌ File not found: {wav_file}") diff --git a/examples/persona_interactive_video.py b/examples/persona_interactive_video.py index f4fe615..0a641c2 100644 --- a/examples/persona_interactive_video.py +++ b/examples/persona_interactive_video.py @@ -18,7 +18,7 @@ export ANAM_AVATAR_ID="your-avatar-id" export ANAM_VOICE_ID="your-voice-id" export ANAM_LLM_ID="your-llm-id" - export ANAM_AVATAR_MODEL="model-name" # optional, e.g. "cara-3" + export ANAM_AVATAR_MODEL="model-name" # optional, e.g. "cara-3" uv run --extra display python examples/persona_interactive_video.py """ @@ -79,7 +79,9 @@ async def interactive_loop(session, display: VideoDisplay) -> None: print("Available commands:") print(" m - Send text message (user input for the conversation.)") print(" t - Send talk command (bypasses LLM and sends text to TTS) using REST API)") - print(" ts - Send talk stream (bypasses LLM and sends text to TTS) using WebSocket (lower latency)") + print( + " ts - Send talk stream (bypasses LLM and sends text to TTS) using WebSocket (lower latency)" + ) print(" i - Interrupt current audio") print(" c - Toggle live captions. Default: disabled") print(" h - Toggle conversation history at session end. Default: disabled.") @@ -128,7 +130,9 @@ async def interactive_loop(session, display: VideoDisplay) -> None: elif command == "t" or command == "ts": # Get the rest of the input as the talk (stream) command if len(parts) < 2: - print("❌ Please provide talk (stream) command. Usage: t|ts ") + print( + "❌ Please provide talk (stream) command. Usage: t|ts " + ) continue message_text = " ".join(parts[1:]) try: @@ -281,12 +285,14 @@ def main() -> None: avatar_id = os.environ.get("ANAM_AVATAR_ID", "").strip().strip('"') voice_id = os.environ.get("ANAM_VOICE_ID", "").strip().strip('"') avatar_model = os.environ.get("ANAM_AVATAR_MODEL") - llm_id = os.environ.get("ANAM_LLM_ID","").strip().strip('"') + llm_id = os.environ.get("ANAM_LLM_ID", "").strip().strip('"') api_base_url = os.environ.get("ANAM_API_BASE_URL", "https://api.anam.ai").strip().strip('"') - if not api_key or not avatar_id or not voice_id: + if not api_key or not avatar_id or not llm_id or not voice_id: # These are required for an ephemeral persona configuration. - raise ValueError("Set ANAM_API_KEY, ANAM_AVATAR_ID, ANAM_LLM_ID and ANAM_VOICE_ID environment variables") + raise ValueError( + "Set ANAM_API_KEY, ANAM_AVATAR_ID, ANAM_LLM_ID and ANAM_VOICE_ID environment variables" + ) system_prompt = "You are a helpful and creative assistant. Respond in a conversational tone with short sentences and do not use special characters or emojis. Start you first message with 'Hello developer, Welcome to Anam. What can I help you with today?'" diff --git a/examples/utils.py b/examples/utils.py index a3827c3..94179e4 100644 --- a/examples/utils.py +++ b/examples/utils.py @@ -117,7 +117,6 @@ async def send_audio_file_chunked( # Small delay between chunks await asyncio.sleep(0.01) - await agent.end_sequence() print(f"✅ Sent {chunk_count} audio chunks from {wav_file_path.name}") diff --git a/uv.lock b/uv.lock index 1915a2e..1200a81 100644 --- a/uv.lock +++ b/uv.lock @@ -188,7 +188,7 @@ wheels = [ [[package]] name = "anam" -version = "0.2.0a2" +version = "0.2.0" source = { editable = "." } dependencies = [ { name = "aiohttp" },