anam-org · sebvanleuven · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/README.md b/README.md
@@ -27,11 +27,9 @@ pip install anam[display]
 ```python
 import asyncio
 from anam import AnamClient
-from av.video.frame import VideoFrame
-from av.audio.frame import AudioFrame
 
 async def main():
-    # Create client with your API key and persona
+    # Create client with your API key and persona_id (for pre-defined personas)
     client = AnamClient(
         api_key="your-api-key",
         persona_id="your-persona-id",
@@ -71,7 +69,7 @@ asyncio.run(main())
 - 📚 **Message history tracking** - Automatic conversation history with incremental updates
 - 🤖 **Audio-passthrough** - Send TTS generated audio input and receive rendered synchronized audio/video avatar
 - 🗣️ **Direct text-to-speech** - Send text directly to TTS for immediate speech output (bypasses LLM processing)
-- 🎤 **Real-time user audio input** - Send raw audio samples (e.g. from microphone) to Anam for processing (turnkey solution: STT → LLM → TTS → Face)
+- 🎤 **Real-time user audio input** - Send raw audio samples (e.g. from microphone) to Anam for processing (turnkey solution: STT → LLM → TTS → Avatar)
 - 📡 **Async iterator API** - Clean, Pythonic async/await patterns for continuous stream of audio/video frames
 - 🎯 **Event-driven API** - Simple decorator-based event handlers for discrete events
 - 📝 **Fully typed** - Complete type hints for IDE support
@@ -86,21 +84,25 @@ The main client class for connecting to Anam AI.
 ```python
 from anam import AnamClient, PersonaConfig, ClientOptions
 
-# Simple initialization
+# Simple initialization for pre-defined personas - all other parameters are ignored except enable_audio_passthrough
 client = AnamClient(
     api_key="your-api-key",
     persona_id="your-persona-id",
 )
 
-# Advanced initialization with full persona config
+# Advanced initialization with full (ephemeral) persona config - ideal for programmatic configuration.
+# Use avatar_id instead of persona_id.
 client = AnamClient(
     api_key="your-api-key",
     persona_config=PersonaConfig(
-        persona_id="your-persona-id",
+        avatar_id="your-avatar-id",
+        voice_id="your-voice-id",
+        llm_id="your-llm-id",
         name="My Assistant",
         system_prompt="You are a helpful assistant...",
-        voice_id="emma",
+        avatar_model="cara-3",
         language_code="en",
+        enable_audio_passthrough=False,
     ),
 )
 ```
@@ -124,24 +126,25 @@ async with client.connect() as session:
     # Both streams run concurrently
     await asyncio.gather(process_video(), process_audio())
 ```
+
 ### User Audio Input
 
-User audio input is real time audio such as microphone audio. 
-User audio is 16 bit PCM samples, mono or stereo, with any sample rate. In order to process the audio correctly, the sample rate needs to be provided.
-The audio is forwarded in real-time as a webRTC audio track. In order to reduce latency, any audio provided before the webRTC audio track is created will be dropped.
+User audio input is real-time audio such as microphone audio.
+User audio is 16-bit PCM samples, mono or stereo, with any sample rate. In order to process the audio correctly, the sample rate needs to be provided.
+The audio is forwarded in real-time as a WebRTC audio track. In order to reduce latency, any audio provided before the WebRTC audio track is created will be dropped.
 
 ### TTS audio (Audio Passthrough)
 
-TTS audio is generated by a TTS engine, and should be provided in chunks through the `send_audio_chunk` method. The audio can be a byte array or base64 encoded strings (the SDK will convert to base64). The audio is ingested to the backend at max bandwidth. Sample_rate and channels need to be provided through the `AgentAudioInputConfig` object. 
+TTS audio is generated by a TTS engine, and should be provided in chunks through the `send_audio_chunk` method. The audio can be a byte array or base64 encoded strings (the SDK will convert to base64). The audio is sent to the backend at maximum upload speed. Sample rate and channels need to be provided through the `AgentAudioInputConfig` object. When TTS audio finishes (e.g. at the end of a turn), call `end_sequence()` to signal completion. Without this, the backend keeps waiting for more chunks and the avatar will freeze. 
 
-For best performance, we suggest using 24kHz mono audio. The provided audio is returned in-sync with the avatar without any resampling. Sample rates lower than 24kHz will result in poor Avatar performance. Sample rates higher than 24kHz might impact latency without any noticeable improvement in audio quality.
+For best performance, we suggest using 24kHz mono audio. The provided audio is returned in-sync with the avatar without any resampling. Sample rates lower than 24kHz will result in poor avatar performance. Sample rates higher than 24kHz might impact latency without any noticeable improvement in audio quality.
 
 ### Events
 
 Register callbacks for connection and message events using the `@client.on()` decorator:
 
 ```python
-from anam import AnamEvent, Message,MessageRole, MessageStreamEvent
+from anam import AnamEvent, Message, MessageRole, MessageStreamEvent
 
 @client.on(AnamEvent.CONNECTION_ESTABLISHED)
 async def on_connected():
@@ -232,8 +235,6 @@ async with client.connect() as session:
     await session.wait_until_closed()
 ```
 
-
-
 ## Examples
 
 ### Save Video and Audio
@@ -311,7 +312,9 @@ asyncio.run(main())
 
 ```bash
 export ANAM_API_KEY="your-api-key"
-export ANAM_PERSONA_ID="your-persona-id"
+export ANAM_AVATAR_ID="your-avatar-id"
+export ANAM_VOICE_ID="your-voice-id"
+export ANAM_LLM_ID="your-llm-id"
 ```
 
 ### Client Options
@@ -320,29 +323,67 @@ export ANAM_PERSONA_ID="your-persona-id"
 from anam import ClientOptions
 
 options = ClientOptions(
-    api_base_url="https://api.anam.ai",  # API base URL
+    api_base_url="https://api.anam.ai",   # API base URL
     api_version="v1",                     # API version
-    ice_servers=None,                     # Custom ICE servers
+    ice_servers=None,                     # Custom ICE servers for WebRTC delivery
+)
+```
+
+### Persona types
+
+There are two types of personas:
+
+- Pre-defined personas: use `persona_id` only. Other parameters are ignored except `enable_audio_passthrough`.
+- Ephemeral personas: use `avatar_id`, `voice_id`, `llm_id`, `avatar_model`, `system_prompt`, `language_code` and `enable_audio_passthrough`.
+
+#### Pre-defined personas
+
+Pre-defined personas are built in [lab.anam.ai](https://lab.anam.ai) and combine avatar, voice and LLM. They cannot be changed after creation. 
+They are quick to set up for demos but offer less flexibility for production use.
+
+```python
+client = AnamClient(
+    api_key="your-api-key",
+    persona_id="your-persona-id",
 )
 ```
 
-### Persona Configuration
+#### Ephemeral personas
+
+Ephemeral personas give you full control over components at startup. Configure avatar, voice, LLM, and other options at [lab.anam.ai](https://lab.anam.ai) (avatars, voices, LLMs).
+They are ideal for production environments where you need to control the components at startup.
 
 ```python
 from anam import PersonaConfig
 
+# Ephemeral: specify avatar_id, voice_id, and optionally llm_id, avatar_model
 persona = PersonaConfig(
-    persona_id="your-persona-id",    # Required
-    name="Assistant",                 # Display name
-    avatar_id="anna_v2",             # Avatar to use
-    voice_id="emma",                 # Voice to use
-    system_prompt="You are...",      # Custom system prompt
-    language_code="en",              # Language code
-    llm_id="gpt-4",                  # LLM model
-    max_session_length_seconds=300,  # Max session duration
+    avatar_id="your-avatar-id",       # From https://lab.anam.ai/avatars (do not use persona_id)
+    voice_id="your-voice-id",         # From https://lab.anam.ai/voices
+    llm_id="your-llm-id",             # From https://lab.anam.ai/llms (optional)
+    avatar_model="cara-3",            # Video frame model (optional)
+    system_prompt="You are...",       # See https://docs.anam.ai/concepts/prompting-guide
+    enable_audio_passthrough=False,
 )
 ```
 
+### Orchestration
+
+Orchestration is the process of running a pipeline with different components to transform user audio into a response (STT -> LLM -> TTS -> Avatar). 
+Anam allows two types of orchestration:
+
+- **Anam's orchestration**: Anam receives user audio (or text messages) and runs the pipeline, with a default or custom LLM. 
+- **Custom orchestration**: Anam's orchestration is bypassed by directly providing TTS audio. The TTS audio is passed through directly to the avatar, without being added to the context or message history. This can be achieved by setting `enable_audio_passthrough=True`. See [TTS audio (Audio Passthrough)](#tts-audio-audio-passthrough) for more details.
+
+
+### LLM options
+
+Anam's orchestration layer allows you to choose between default LLMs or running your own custom LLMs:
+
+- **Default LLMs**: Use Anam-provided models when you do not run your own.
+- **Custom LLMs**: Anam connects to your LLM server-to-server. Add and test the connection at [lab.anam.ai/llms](https://lab.anam.ai/llms).
+- **`CUSTOMER_CLIENT_V1`**: Your LLM is not directly connected to Anam. Use `MESSAGE_STREAM_EVENT_RECEIVED` to forward messages and send responses via talk stream (or `enable_audio_passthrough=True` for TTS). Higher latency; useful for niche use cases but not recommended for general applications.
+
 ## Error Handling
 
 ```python

diff --git a/examples/avatar_audio_passthrough.py b/examples/avatar_audio_passthrough.py
@@ -1,10 +1,12 @@
 """Interactive video session example with CLI controls.
 
 This example shows how use Anam as an avatar provider where
-the avatar is rendered based on input TTS audio. The video
-and audio output are kept in sync. Video is displayed in
+the orchetration is bypassed and the avatar is rendered based on input TTS audio.
+The videoand audio output are kept in sync. Video is displayed in
 a window using OpenCV, while audio is played through sounddevice.
 
+Interrupts can be used to stop the ongoing avatar animation and TTS audio.
+
 Requirements:
     uv sync --extra display
     # or: pip install opencv-python sounddevice
@@ -188,19 +190,20 @@ def main() -> None:
     if not api_key or not avatar_id:
         raise ValueError("Set ANAM_API_KEY and ANAM_AVATAR_ID environment variables")
 
-    # Create persona config
+    # Create persona config with audio passthrough enabled, to bypass Anam's orchestration layer:
+    # TTS audio is sent directly to the avatar and is not added to the LLM context or message history.
+    # Warning: Do not use persona_id as this will enable the pre-defined LLM and intefere with your application.
+
     persona_config = PersonaConfig(
         avatar_id=avatar_id,
         enable_audio_passthrough=True,
     )
 
-    # Create client
     client = AnamClient(
         api_key=api_key,
         persona_config=persona_config,
         options=ClientOptions(api_base_url=api_base_url),
     )
-
     # Create display and audio player
     display = VideoDisplay()
     audio_player = AudioPlayer()

diff --git a/examples/persona_interactive_video.py b/examples/persona_interactive_video.py
@@ -4,17 +4,21 @@
 in a window using OpenCV while providing CLI controls for
 interactive session management, where talk commands will be
 spoken directly by the avatar, while text messages mimic
-the transcibed audio and wav files can be sent as TTS audio.
+the transcibed audio.
 
-The persona config has enable_audio_passthrough=True for the TTS audio.
+The persona config creates and ephemeral persona with enable_audio_passthrough=False to disable TTS ingest.
+The avatar will respond directly to text messages and talk commands will be spoken verbatim.
 
 Requirements:
     uv sync --extra display
     # or: pip install opencv-python sounddevice
 
 Usage:
     export ANAM_API_KEY="your-api-key"
-    export ANAM_PERSONA_ID="your-persona-id"
+    export ANAM_AVATAR_ID="your-avatar-id"
+    export ANAM_VOICE_ID="your-voice-id"
+    export ANAM_LLM_ID="your-llm-id"
+    export ANAM_AVATAR_MODEL="model-name"     # optional, e.g. "cara-3"    
     uv run --extra display python examples/persona_interactive_video.py
 """
 
@@ -27,15 +31,25 @@
 from dotenv import load_dotenv
 
 from anam import AnamClient, AnamEvent, ClientOptions
-from anam.types import AgentAudioInputConfig, MessageRole, PersonaConfig
+from anam.types import MessageRole, PersonaConfig
 
 # Add parent directory to path to allow importing from examples
 sys.path.insert(0, str(Path(__file__).parent.parent))
-from examples.utils import AudioPlayer, VideoDisplay, async_input, send_audio_file_chunked
+from examples.utils import AudioPlayer, VideoDisplay, async_input
 
 # Load environment variables
 _ = load_dotenv()
 
+REQUIRED_ENV_VARS = [
+    "ANAM_API_KEY",
+    "ANAM_AVATAR_ID",
+    "ANAM_LLM_ID",
+    "ANAM_VOICE_ID",
+]
+missing = [v for v in REQUIRED_ENV_VARS if not os.getenv(v)]
+if missing:
+    raise EnvironmentError(f"Missing required environment variables: {', '.join(missing)}")
+
 # Configure logging - reduced verbosity
 logging.basicConfig(
     level=logging.WARNING,
@@ -63,10 +77,9 @@ async def interactive_loop(session, display: VideoDisplay) -> None:
     print("Interactive Session Started!")
     print("=" * 60)
     print("Available commands:")
-    print("  f [filename] - Send audio file (defaults to input.wav)")
     print("  m <message>  - Send text message (user input for the conversation.)")
-    print("  t <text>     - Send talk command (bypasses LLM and sends text to TTS) usingREST API)")
-    print("  ts <text>    - Send talk stream (bypasses LLM and sends text to TTS) using WebSocket)")
+    print("  t <text>     - Send talk command (bypasses LLM and sends text to TTS) using REST API)")
+    print("  ts <text>    - Send talk stream (bypasses LLM and sends text to TTS) using WebSocket (lower latency)")
     print("  i            - Interrupt current audio")
     print("  c            - Toggle live captions. Default: disabled")
     print("  h            - Toggle conversation history at session end. Default: disabled.")
@@ -94,19 +107,6 @@ async def interactive_loop(session, display: VideoDisplay) -> None:
                 show_captions = not show_captions
                 print(f"Captions {'enabled' if show_captions else 'disabled'}")
 
-            elif command == "f":
-                # Default to input.wav if no filename provided
-                wav_file = parts[1] if len(parts) > 1 else "input.wav"
-                wav_path = Path(wav_file)
-                if wav_path.exists():
-                    print(f"Sending audio from {wav_file}...")
-                    agent = session.create_agent_audio_input_stream(
-                        AgentAudioInputConfig(encoding="pcm_s16le", sample_rate=24000, channels=1)
-                    )
-                    await send_audio_file_chunked(agent, wav_path)
-                else:
-                    print(f"❌ File not found: {wav_file}")
-
             elif command == "h":
                 print_conversation_history = not print_conversation_history
                 print(
@@ -126,9 +126,9 @@ async def interactive_loop(session, display: VideoDisplay) -> None:
                     print(f"❌ Error sending message: {e}")
 
             elif command == "t" or command == "ts":
-                # Get the rest of the input as the message text
+                # Get the rest of the input as the talk (stream) command
                 if len(parts) < 2:
-                    print("❌ Please provide talk command. Usage: t <text to be spoken>")
+                    print("❌ Please provide talk (stream) command. Usage: t|ts <text to be spoken>")
                     continue
                 message_text = " ".join(parts[1:])
                 try:
@@ -141,9 +141,9 @@ async def interactive_loop(session, display: VideoDisplay) -> None:
                             end_of_speech=True,
                             correlation_id=None,
                         )
-                    print(f"✅ Sent talk command: {message_text}")
+                    print(f"✅ Sent talk (stream) command: {message_text}")
                 except Exception as e:
-                    print(f"❌ Error sending talk command: {e}")
+                    print(f"❌ Error sending talk (stream) command: {e}")
 
             elif command == "i":
                 try:
@@ -278,18 +278,54 @@ def main() -> None:
     """Main entry point."""
     # Get configuration from environment variables (loaded from .env file)
     api_key = os.environ.get("ANAM_API_KEY", "").strip().strip('"')
-    persona_id = os.environ.get("ANAM_PERSONA_ID", "").strip().strip('"')
+    avatar_id = os.environ.get("ANAM_AVATAR_ID", "").strip().strip('"')
+    voice_id = os.environ.get("ANAM_VOICE_ID", "").strip().strip('"')
+    avatar_model = os.environ.get("ANAM_AVATAR_MODEL")
+    llm_id = os.environ.get("ANAM_LLM_ID","").strip().strip('"')
     api_base_url = os.environ.get("ANAM_API_BASE_URL", "https://api.anam.ai").strip().strip('"')
 
-    if not api_key or not persona_id:
-        raise ValueError("Set ANAM_API_KEY and ANAM_PERSONA_ID environment variables")
+    if not api_key or not avatar_id or not voice_id:
+        # These are required for an ephemeral persona configuration.
+        raise ValueError("Set ANAM_API_KEY, ANAM_AVATAR_ID, ANAM_LLM_ID and ANAM_VOICE_ID environment variables")
+
+    system_prompt = "You are a helpful and creative assistant. Respond in a conversational tone with short sentences and do not use special characters or emojis. Start you first message with 'Hello developer, Welcome to Anam. What can I help you with today?'"
+
+    # Ephemeral persona configuration (using Anam's orchestration: STT, LLM, TTS).
+    # Configure components at https://lab.anam.ai (avatars, voices, LLMs).
+    #
+    # Persona types:
+    #   - Ephemeral (avatar_id + voice_id + ...): full control over components at startup.
+    #   - Pre-defined (persona_id only): uses a persona from Lab; other params ignored except enable_audio_passthrough.
+    #     Simpler for demos; limited for production (source control, adaptivity).
+    #
+    # Component IDs:
+    #   - avatar_id: the "face" (https://lab.anam.ai/avatars). Do not use persona_id as avatar_id.
+    #   - voice_id: voice for TTS (https://lab.anam.ai/voices).
+    #   - llm_id: LLM for reasoning (https://lab.anam.ai/llms).
+    #   - avatar_model: video frame model (e.g. "cara-3").
+    #   - system_prompt: primes the LLM. See https://docs.anam.ai/concepts/prompting-guide
+    #
+    # enable_audio_passthrough:
+    #   - False (default): Anam renders TTS; bot audio is added to context and message history.
+    #   - True: TTS audio is passed through directly; not added to context/history.
+    #   Typically leave False when using Anam's orchestration.
+    #
+    # LLM options:
+    #   - Default LLMs: Anam-provided models when you do not run your own.
+    #   - Custom LLMs: Anam connects to your LLM server-to-server. Add LLM and test connection at https://lab.anam.ai/llms.
+    #   - CUSTOMER_CLIENT_V1: your LLM is not directly connected. Use MESSAGE_STREAM_EVENT_RECEIVED
+    #     to forward messages and send responses via talk stream (or enable_audio_passthrough=True for TTS).
+    #     Higher latency; not recommended for production.
 
-    # Create persona config
     persona_config = PersonaConfig(
-        persona_id=persona_id,
-        enable_audio_passthrough=True,
+        avatar_id=avatar_id,
+        voice_id=voice_id,
+        llm_id=llm_id,
+        avatar_model=avatar_model,
+        system_prompt=system_prompt,
+        enable_audio_passthrough=False,
     )
-
+    print(f"Using personaConfig: {persona_config}")
     # Create client
     client = AnamClient(
         api_key=api_key,