diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 64352d4..f58c079 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,6 +30,24 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-go-
 
+      - name: Install PortAudio (Ubuntu)
+        if: runner.os == 'Linux'
+        run: sudo apt-get update && sudo apt-get install -y portaudio19-dev
+
+      - name: Install PortAudio (macOS)
+        if: runner.os == 'macOS'
+        run: brew install portaudio
+
+      - name: Install PortAudio (Windows)
+        if: runner.os == 'Windows'
+        run: |
+          vcpkg install portaudio:x64-windows
+          echo "PKG_CONFIG_PATH=C:/vcpkg/installed/x64-windows/lib/pkgconfig" >> $env:GITHUB_ENV
+          echo "CGO_CFLAGS=-IC:/vcpkg/installed/x64-windows/include" >> $env:GITHUB_ENV
+          echo "CGO_LDFLAGS=-LC:/vcpkg/installed/x64-windows/lib -lportaudio" >> $env:GITHUB_ENV
+          # Add DLL directory to PATH so tests can find portaudio.dll at runtime
+          echo "C:/vcpkg/installed/x64-windows/bin" >> $env:GITHUB_PATH
+
       - name: Download dependencies
         run: go mod download
 
@@ -58,6 +76,9 @@ jobs:
         with:
           go-version: "1.24.4"
 
+      - name: Install PortAudio
+        run: sudo apt-get update && sudo apt-get install -y portaudio19-dev
+
       - name: Run golangci-lint
         uses: golangci/golangci-lint-action@v8
         with:
@@ -76,6 +97,9 @@ jobs:
         with:
           go-version: "1.24.4"
 
+      - name: Install PortAudio
+        run: sudo apt-get update && sudo apt-get install -y portaudio19-dev
+
       - name: Build
         run: make build
 
diff --git a/.gitignore b/.gitignore
index 704d373..7b50a4f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,4 +61,7 @@ Thumbs.db
 npm-wrapper/bin/
 npm-wrapper/node_modules/
 npm-wrapper/*.tgz
-**/node_modules/
\ No newline at end of file
+**/node_modules/
+# Ignore stray binaries
+vapi-cli
+
diff --git a/Makefile b/Makefile
index f67620c..dd88d53 100644
--- a/Makefile
+++ b/Makefile
@@ -129,6 +129,15 @@ lint:
 	@echo "Running linters..."
 	golangci-lint run
 
+# Format Go code
+fmt:
+	@echo "Formatting Go code..."
+	@$(GOCMD) fmt ./...
+	@echo "✅ Go code formatted"
+
+# Alias
+format: fmt
+
 # Run all linters (CLI + MCP server)
 lint-all: lint lint-mcp
 
@@ -195,6 +204,7 @@ help:
 	@echo "  man-pages          Generate Unix manual pages"
 	@echo "  install            Install the CLI and manual pages to ~/.local/"
 	@echo "  test               Run CLI tests"
+	@echo "  fmt                Format Go code"
 	@echo "  lint               Run CLI linters"
 	@echo "  clean              Clean CLI build artifacts"
 	@echo ""
@@ -228,4 +238,4 @@ help:
 	@echo "  make version-set VERSION=1.2.3"
 	@echo "  make publish-mcp            # Publish MCP server to npm"
 
-.PHONY: all build build-mcp build-all test test-mcp test-all test-coverage clean clean-mcp clean-all tidy deps mcp-deps deps-all lint lint-mcp lint-all man-pages install install-mcp install-all run publish-mcp help 
\ No newline at end of file
+.PHONY: all build build-mcp build-all test test-mcp test-all test-coverage clean clean-mcp clean-all tidy deps mcp-deps deps-all lint lint-mcp lint-all fmt format man-pages install install-mcp install-all run publish-mcp help 
\ No newline at end of file
diff --git a/README.md b/README.md
index 5439f6c..1d64c10 100644
--- a/README.md
+++ b/README.md
@@ -37,6 +37,18 @@ iex ((New-Object System.Net.WebClient).DownloadString('https://vapi.ai/install.p
 
 Both scripts automatically detect your platform and install the latest version.
 
+### Audio prerequisite (PortAudio)
+
+For voice features (microphone and speaker I/O), the CLI relies on the PortAudio runtime. Install it with your OS package manager:
+
+- macOS: `brew install portaudio`
+- Debian/Ubuntu: `sudo apt-get update && sudo apt-get install -y libportaudio2 portaudio19-dev`
+- Fedora/RHEL: `sudo dnf install -y portaudio portaudio-devel`
+- Arch Linux: `sudo pacman -S portaudio`
+- Windows: Install PortAudio and ensure `portaudio.dll` is on your PATH (e.g., via vcpkg: `vcpkg install portaudio`, or download the official binary and place the DLL alongside `vapi.exe`).
+
+If PortAudio is not installed, commands that use voice I/O (like `vapi call voice`) will fail at runtime.
+
 ### Docker
 
 ```bash
diff --git a/cmd/voice.go b/cmd/voice.go
new file mode 100644
index 0000000..5b9bb82
--- /dev/null
+++ b/cmd/voice.go
@@ -0,0 +1,426 @@
+/*
+Copyright © 2025 Vapi, Inc.
+
+Licensed under the MIT License (the "License");
+you may not use this file except in compliance with the License.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+Authors:
+
+	Dan Goosewin <dan@vapi.ai>
+*/
+package cmd
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+
+	vapi "github.com/VapiAI/server-sdk-go"
+	"github.com/spf13/cobra"
+
+	"github.com/VapiAI/cli/pkg/voice"
+)
+
+var (
+	configFile        string
+	audioInputDevice  string
+	audioOutputDevice string
+	noVideo           bool
+	callTimeout       int
+	audioDebug        bool
+
+	// Transient assistant configuration
+	assistantName string
+	firstMessage  string
+	voiceID       string
+	model         string
+	systemMessage string
+)
+
+// Voice call management commands
+var voiceCmd = &cobra.Command{
+	Use:   "voice [assistant-id]",
+	Short: "Start voice call with assistant",
+	Long: `Start a real-time voice call with a Vapi assistant.
+
+This command creates a WebSocket connection using Vapi's native transport,
+enabling bidirectional audio streaming for natural conversations.
+
+You can either use an existing assistant ID or create a transient assistant
+by specifying configuration flags.
+
+Voice Call Flow:
+  1. Creates a call via Vapi's /call endpoint with WebSocket transport
+  2. Establishes WebSocket connection to Vapi's audio transport
+  3. Streams microphone audio to the assistant
+  4. Plays assistant responses through speakers
+
+The VAPI_API_KEY will be used from your active CLI account configuration.
+
+Examples:
+  # Use existing assistant
+  vapi call voice asst_12345
+  
+  # Create transient assistant inline
+  vapi call voice --name "My Assistant" --first-message "Hello! How can I help you?"
+  
+  # Advanced transient assistant
+  vapi call voice --name "Support Bot" --first-message "Hi there!" --voice-id "jennifer" --model "gpt-4o"
+  
+  # Load from config file
+  vapi call voice --config ./assistant.json`,
+	Args: cobra.MaximumNArgs(1),
+	RunE: func(cmd *cobra.Command, args []string) error {
+		var assistantID string
+
+		// Determine if we're using an existing assistant or creating a transient one
+		if len(args) > 0 {
+			// Use existing assistant ID
+			assistantID = args[0]
+		} else if configFile != "" {
+			// Load assistant configuration from JSON file
+			// Clean the path to prevent directory traversal
+			cleanPath := filepath.Clean(configFile)
+			data, err := os.ReadFile(cleanPath)
+			if err != nil {
+				return fmt.Errorf("failed to read config file: %w", err)
+			}
+
+			var config map[string]interface{}
+			if err := json.Unmarshal(data, &config); err != nil {
+				return fmt.Errorf("failed to parse config file: %w", err)
+			}
+
+			// Check if config has existing assistant ID
+			if id, ok := config["assistant_id"].(string); ok {
+				assistantID = id
+			} else if id, ok := config["assistantId"].(string); ok {
+				assistantID = id
+			} else {
+				// No assistant ID found - create transient assistant from config
+				loadConfigIntoFlags(config)
+
+				createdAssistantID, err := createTransientAssistant()
+				if err != nil {
+					return fmt.Errorf("failed to create transient assistant from config: %w", err)
+				}
+				assistantID = createdAssistantID
+			}
+		} else if assistantName != "" || firstMessage != "" {
+			// Create transient assistant
+			createdAssistantID, err := createTransientAssistant()
+			if err != nil {
+				return fmt.Errorf("failed to create transient assistant: %w", err)
+			}
+			assistantID = createdAssistantID
+		} else {
+			return fmt.Errorf("assistant ID is required (provide as argument, via --config, or via transient assistant flags like --name)")
+		}
+
+		return startVoiceCall(assistantID)
+	},
+}
+
+var configureVoiceCmd = &cobra.Command{
+	Use:   "configure",
+	Short: "Configure voice call audio devices",
+	Long:  `Configure audio input and output devices for voice calls.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		fmt.Println("🎛️  Voice Call Configuration")
+		fmt.Println()
+
+		// Create device manager to list devices
+		deviceManager := voice.NewAudioDeviceManager()
+		if err := deviceManager.Initialize(); err != nil {
+			return fmt.Errorf("failed to initialize audio system: %w", err)
+		}
+		defer func() {
+			if err := deviceManager.Terminate(); err != nil {
+				fmt.Printf("Failed to terminate device manager: %v\n", err)
+			}
+		}()
+
+		// List available devices
+		deviceList, err := deviceManager.ListDevices()
+		if err != nil {
+			return fmt.Errorf("failed to list audio devices: %w", err)
+		}
+
+		fmt.Println("Available audio devices:")
+		fmt.Print(deviceList)
+
+		fmt.Println("Configuration:")
+		fmt.Println("- Use device names with --audio-input and --audio-output flags")
+		fmt.Println("- Use 'default' to use system default devices")
+		fmt.Println()
+		fmt.Println("Example:")
+		fmt.Println("  vapi call voice asst_12345 --audio-input \"Built-in Microphone\"")
+
+		return nil
+	},
+}
+
+var testAudioCmd = &cobra.Command{
+	Use:   "test-audio",
+	Short: "Test audio devices",
+	Long:  `Test microphone and speaker functionality for voice calls.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		fmt.Println("🎤 Audio Test")
+		fmt.Println()
+
+		// Create a basic audio stream to test devices
+		config := voice.DefaultWebRTCConfig()
+		if audioInputDevice != "" {
+			config.AudioInputDevice = audioInputDevice
+		}
+		if audioOutputDevice != "" {
+			config.AudioOutputDevice = audioOutputDevice
+		}
+
+		audioStream, err := voice.NewAudioStream(config)
+		if err != nil {
+			return fmt.Errorf("failed to create audio stream: %w", err)
+		}
+
+		fmt.Println("Testing audio devices...")
+		fmt.Printf("Input device: %s\n", config.AudioInputDevice)
+		fmt.Printf("Output device: %s\n", config.AudioOutputDevice)
+		fmt.Println()
+
+		// Try to start the audio stream briefly
+		if err := audioStream.Start(); err != nil {
+			return fmt.Errorf("failed to start audio stream: %w", err)
+		}
+
+		fmt.Println("✅ Audio devices initialized successfully!")
+		fmt.Printf("Input device: %s\n", audioStream.GetInputDevice().Name)
+		fmt.Printf("Output device: %s\n", audioStream.GetOutputDevice().Name)
+		fmt.Println()
+
+		// Test for a brief moment
+		fmt.Println("Testing audio for 3 seconds...")
+		time.Sleep(3 * time.Second)
+
+		// Get audio levels
+		inputLevel, outputLevel := audioStream.GetInputLevel(), audioStream.GetOutputLevel()
+		fmt.Printf("Input level: %.1f%%\n", inputLevel*100)
+		fmt.Printf("Output level: %.1f%%\n", outputLevel*100)
+
+		// Stop the audio stream
+		if err := audioStream.Stop(); err != nil {
+			fmt.Printf("Warning: %v\n", err)
+		}
+
+		fmt.Println()
+		fmt.Println("✅ Audio test completed!")
+		return nil
+	},
+}
+
+var statusVoiceCmd = &cobra.Command{
+	Use:   "status",
+	Short: "Show voice call status",
+	Long:  `Display the status of the current voice call.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		fmt.Println("📞 Voice Call Status")
+		fmt.Println()
+		fmt.Println("No active voice call.")
+		fmt.Println()
+		fmt.Println("Start a call with:")
+		fmt.Println("  vapi call voice <assistant-id>")
+		return nil
+	},
+}
+
+var endVoiceCmd = &cobra.Command{
+	Use:   "end",
+	Short: "End current voice call",
+	Long:  `Terminate the current voice call.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		fmt.Println("📞 End Voice Call")
+		fmt.Println()
+		fmt.Println("No active call to end.")
+		fmt.Println()
+		fmt.Println("Calls can be ended by pressing Ctrl+C during an active call.")
+		return nil
+	},
+}
+
+// loadConfigIntoFlags loads configuration from a JSON config into the flag variables
+func loadConfigIntoFlags(config map[string]interface{}) {
+	// Load name
+	if name, ok := config["name"].(string); ok {
+		assistantName = name
+	}
+
+	// Load first message
+	if msg, ok := config["first_message"].(string); ok {
+		firstMessage = msg
+	} else if msg, ok := config["firstMessage"].(string); ok {
+		firstMessage = msg
+	}
+
+	// Load voice ID
+	if voiceValue, ok := config["voice_id"].(string); ok {
+		voiceID = voiceValue
+	} else if voiceValue, ok := config["voiceId"].(string); ok {
+		voiceID = voiceValue
+	}
+
+	// Load model
+	if mdl, ok := config["model"].(string); ok {
+		model = mdl
+	}
+
+	// Load system message
+	if sysMsg, ok := config["system_message"].(string); ok {
+		systemMessage = sysMsg
+	} else if sysMsg, ok := config["systemMessage"].(string); ok {
+		systemMessage = sysMsg
+	}
+}
+
+// createTransientAssistant creates a temporary assistant for the voice call
+func createTransientAssistant() (string, error) {
+	fmt.Println("🤖 Creating transient assistant...")
+
+	// Get Vapi client
+	if vapiClient.GetClient() == nil {
+		return "", fmt.Errorf("no active Vapi account found. Please run 'vapi login' first")
+	}
+
+	// Set defaults if not provided
+	name := assistantName
+	if name == "" {
+		name = "Transient Assistant"
+	}
+
+	message := firstMessage
+	if message == "" {
+		message = "Hello! How can I assist you today?"
+	}
+
+	ctx := context.Background()
+
+	// Create the assistant request
+	createRequest := &vapi.CreateAssistantDto{
+		Name:         &name,
+		FirstMessage: &message,
+		Voice: &vapi.CreateAssistantDtoVoice{
+			VapiVoice: &vapi.VapiVoice{
+				VoiceId: vapi.VapiVoiceVoiceIdElliot, // Default voice
+			},
+		},
+	}
+
+	// Note: For now, we'll keep it simple and just use the default voice and model
+	// Advanced voice/model configuration can be added later once we understand the full API structure
+	if voiceID != "" {
+		fmt.Printf("ℹ️  Voice ID '%s' specified but using default voice for now\n", voiceID)
+	}
+	if model != "" {
+		fmt.Printf("ℹ️  Model '%s' specified but using default model for now\n", model)
+	}
+	if systemMessage != "" {
+		fmt.Printf("ℹ️  System message specified but using default behavior for now\n")
+	}
+
+	// Create the assistant
+	assistant, err := vapiClient.GetClient().Assistants.Create(ctx, createRequest)
+	if err != nil {
+		return "", fmt.Errorf("failed to create transient assistant: %w", err)
+	}
+
+	fmt.Printf("✅ Created transient assistant: %s (ID: %s)\n", name, assistant.Id)
+	return assistant.Id, nil
+}
+
+// startVoiceCall initiates a voice call with the specified assistant
+func startVoiceCall(assistantID string) error {
+	fmt.Printf("🚀 Starting voice call with assistant: %s\n", assistantID)
+	fmt.Println()
+
+	// Create voice call configuration
+	config := voice.DefaultWebRTCConfig()
+
+	// Override with command line options
+	if audioInputDevice != "" {
+		config.AudioInputDevice = audioInputDevice
+	}
+	if audioOutputDevice != "" {
+		config.AudioOutputDevice = audioOutputDevice
+	}
+	config.VideoEnabled = !noVideo
+	config.AudioDebug = audioDebug
+
+	// Get Vapi API configuration from the CLI client
+	if vapiClient.GetClient() == nil {
+		return fmt.Errorf("no active Vapi account found. Please run 'vapi login' first")
+	}
+
+	// Set Vapi API key from the active account configuration
+	if apiKey := vapiClient.GetConfig().GetActiveAPIKey(); apiKey != "" {
+		config.VapiAPIKey = apiKey
+	} else {
+		return fmt.Errorf("VAPI_API_KEY not found. Please run 'vapi login' to authenticate")
+	}
+
+	// Set API base URL from configuration
+	config.VapiBaseURL = vapiClient.GetConfig().GetAPIBaseURL()
+
+	// Set public API key from environment if provided
+	if pub := os.Getenv("VAPI_PUBLIC_KEY"); pub != "" {
+		config.VapiPublicAPIKey = pub
+	}
+
+	// Create voice client
+	client, err := voice.NewVoiceClient(config, vapiClient.GetClient())
+	if err != nil {
+		return fmt.Errorf("failed to create voice client: %w", err)
+	}
+
+	// Create terminal UI
+	ui := voice.NewTerminalUI(client)
+
+	// Start the call
+	if err := client.StartCall(assistantID); err != nil {
+		return fmt.Errorf("failed to start voice call: %w", err)
+	}
+
+	// Run the terminal UI (this blocks until call ends)
+	return ui.Run()
+}
+
+func init() {
+	// Add voice as a subcommand of call
+	callCmd.AddCommand(voiceCmd)
+	voiceCmd.AddCommand(configureVoiceCmd)
+	voiceCmd.AddCommand(testAudioCmd)
+	voiceCmd.AddCommand(statusVoiceCmd)
+	voiceCmd.AddCommand(endVoiceCmd)
+
+	// Add flags to the main voice command
+	voiceCmd.Flags().StringVar(&configFile, "config", "", "Path to assistant configuration JSON file")
+	voiceCmd.Flags().StringVar(&audioInputDevice, "audio-input", "", "Audio input device name")
+	voiceCmd.Flags().StringVar(&audioOutputDevice, "audio-output", "", "Audio output device name")
+	voiceCmd.Flags().IntVar(&callTimeout, "timeout", 30, "Call timeout in minutes")
+	voiceCmd.Flags().BoolVar(&audioDebug, "audio-debug", false, "Enable audio debugging (saves input/output to WAV files)")
+
+	// Transient assistant flags
+	voiceCmd.Flags().StringVar(&assistantName, "name", "", "Name for transient assistant")
+	voiceCmd.Flags().StringVar(&firstMessage, "first-message", "", "First message from transient assistant")
+	voiceCmd.Flags().StringVar(&voiceID, "voice-id", "", "Voice ID for transient assistant (jennifer, derek, elliot)")
+	voiceCmd.Flags().StringVar(&model, "model", "", "AI model for transient assistant (gpt-4o, gpt-4o-mini, etc.)")
+	voiceCmd.Flags().StringVar(&systemMessage, "system-message", "", "System message for transient assistant")
+}
diff --git a/DEVELOPMENT.md b/docs/DEVELOPMENT.md
similarity index 100%
rename from DEVELOPMENT.md
rename to docs/DEVELOPMENT.md
diff --git a/RELEASING.md b/docs/RELEASING.md
similarity index 100%
rename from RELEASING.md
rename to docs/RELEASING.md
diff --git a/docs/WEBRTC_IMPLEMENTATION_PLAN.md b/docs/WEBRTC_IMPLEMENTATION_PLAN.md
new file mode 100644
index 0000000..0df60ef
--- /dev/null
+++ b/docs/WEBRTC_IMPLEMENTATION_PLAN.md
@@ -0,0 +1,452 @@
+# WebRTC Call Implementation Plan for Vapi CLI
+
+## Overview
+This document outlines the implementation plan for adding WebRTC calling functionality to the Vapi CLI using Pion WebRTC library and Daily.co as an intermediary service.
+
+## Architecture
+
+### High-Level Components
+1. **CLI Command Interface** - New `vapi webrtc` command group
+2. **WebRTC Client** - Pion-based WebRTC implementation 
+3. **Daily.co Integration** - Room management and signaling via Daily.co API
+4. **Vapi Integration** - Connect with existing Vapi assistant/call infrastructure
+5. **Audio/Video Pipeline** - Handle media streams for voice/video calls
+
+### Technology Stack
+- **WebRTC Library**: Pion WebRTC v3 (github.com/pion/webrtc/v3)
+- **Signaling Service**: Daily.co API
+- **HTTP Client**: Standard Go net/http or existing client in codebase
+- **Audio Processing**: Pion's built-in audio codecs (Opus, PCM)
+- **CLI Framework**: Cobra (already in use)
+
+## Debug Webhook System (Using Existing Infrastructure)
+
+### Integration with Existing `vapi listen` Command
+The WebRTC implementation will leverage the existing robust webhook infrastructure:
+
+```go
+type WebRTCDebugger struct {
+    webhookURL   string        // URL for vapi listen forwarding
+    events       chan WebhookEvent
+    ui          *TerminalUI
+    callID      string        // Track specific WebRTC call events
+}
+
+type WebhookEvent struct {
+    Timestamp   time.Time     `json:"timestamp"`
+    Type        string        `json:"type"`        // From existing webhook types
+    CallID      string        `json:"call_id"`
+    Data        interface{}   `json:"data"`
+    SessionID   string        `json:"session_id"`
+}
+```
+
+### Debug Integration Modes
+1. **Auto-Start Listen Server** (`--debug`)
+   - Automatically launches `vapi listen --forward-to localhost:3000/webhook`
+   - Integrates webhook events into WebRTC terminal UI
+   - Filters events by call ID for relevant debugging
+
+2. **External Webhook Integration** (`--debug-webhook <url>`)
+   - Uses existing webhook forwarding to external URL
+   - Leverages existing authentication and retry logic
+   - Maintains compatibility with current webhook tooling
+
+3. **Existing File Logging**
+   - Uses existing structured logging from `vapi listen`
+   - Filters WebRTC-specific events for analysis
+
+### Command Integration Examples
+```bash
+# WebRTC call with auto-debug (leverages existing listen command)
+vapi call webrtc asst_12345 --debug
+# Internally runs: vapi listen --forward-to localhost:3000/debug & 
+
+# WebRTC call with external webhook (uses existing infrastructure)  
+vapi call webrtc asst_12345 --debug-webhook http://localhost:8080/webhook
+
+# WebRTC call with JSON config and debug
+vapi call webrtc --config ./assistant.json --debug
+
+# Manual setup using existing commands
+vapi listen --forward-to localhost:3000/webhook &
+vapi call webrtc asst_12345
+```
+
+### Terminal Flow Integration
+```go
+type TerminalUI struct {
+    callStatus    *CallStatusView
+    debugPanel    *DebugPanelView
+    audioLevels   *AudioLevelsView
+    controls      *ControlsView
+}
+
+// Real-time terminal layout
+┌─ Call Status ────────────────────────────────────────────────────┐
+│ 🟢 Connected to: Daily Room "test-call-1234"             │
+│ 👤 Participants: You, Vapi Assistant                     │
+│ ⏱️  Duration: 00:02:34                                    │
+└──────────────────────────────────────────────────────────────────┘
+┌─ Audio Levels ───────────────────────────────────────────────────┐
+│ 🎤 Input:  ████████▒▒ 80%                               │
+│ 🔊 Output: ██████▒▒▒▒ 60%                               │
+└──────────────────────────────────────────────────────────────────┘
+┌─ Debug Events ───────────────────────────────────────────────────┐
+│ [14:23:45] POST /v1/calls → 201 Created                 │
+│ [14:23:46] GET /v1/assistants/asst_123 → 200 OK         │
+│ [14:23:47] WebSocket: connection established             │
+│ [14:23:48] WebRTC: ICE candidate received               │
+└──────────────────────────────────────────────────────────────────┘
+┌─ Controls ───────────────────────────────────────────────────────┐
+│ [m] Mute  [h] Hang up  [d] Toggle debug  [q] Quit       │
+└──────────────────────────────────────────────────────────────────┘
+```
+
+## Terminal Flow Design
+
+### Non-blocking Terminal UI
+```go
+type CallInterface struct {
+    done        chan bool
+    keyEvents   chan rune
+    uiUpdates   chan UIUpdate
+    callEvents  chan CallEvent
+}
+
+// Goroutine structure
+func (c *CallInterface) Run() {
+    go c.handleKeyInput()     // Non-blocking keyboard input
+    go c.handleCallEvents()   // WebRTC/Vapi event processing
+    go c.handleUIUpdates()    // Terminal display updates
+    go c.handleWebhookEvents() // Debug webhook processing
+    
+    // Main event loop
+    for {
+        select {
+        case key := <-c.keyEvents:
+            c.handleKeyPress(key)
+        case event := <-c.callEvents:
+            c.updateCallStatus(event)
+        case update := <-c.uiUpdates:
+            c.refreshDisplay(update)
+        case <-c.done:
+            return
+        }
+    }
+}
+```
+
+### Key Controls During Call
+- `m`: Toggle mute/unmute
+- `h`: Hang up call
+- `d`: Toggle debug panel visibility
+- `v`: Adjust volume levels
+- `r`: Start/stop recording
+- `t`: Show call transcript
+- `q`: Quit (with confirmation)
+- `↑/↓`: Scroll through debug events
+
+### Terminal State Management
+```go
+type TerminalState struct {
+    mode         DisplayMode  // Normal, Debug, Transcript
+    callActive   bool
+    muted        bool
+    recording    bool
+    debugVisible bool
+    scrollPos    int
+}
+```
+
+## Implementation Phases
+
+### Phase 1: Core Infrastructure
+1. **Add Dependencies**
+   ```go
+   // Add to go.mod
+   github.com/pion/webrtc/v3 v3.x.x
+   github.com/pion/interceptor v0.x.x
+   ```
+
+2. **Create WebRTC Package Structure**
+   ```
+   pkg/webrtc/
+   ├── client.go          // Main WebRTC client
+   ├── daily.go           // Daily.co API integration
+   ├── signaling.go       // WebRTC signaling handling
+   ├── media.go           // Audio/video stream management
+   ├── config.go          // WebRTC configuration
+   ├── audio.go           // PortAudio integration
+   ├── devices.go         // Audio device management
+   ├── api.go             // Vapi API request/response handling
+   ├── terminal.go        // Terminal UI management
+   └── diagnostics.go     // Connection diagnostics
+   ```
+
+3. **Daily.co Integration**
+   ```go
+   type DailyClient struct {
+       apiKey    string
+       domain    string
+       httpClient *http.Client
+   }
+   
+   type Room struct {
+       Name        string                 `json:"name"`
+       URL         string                 `json:"url"`
+       Config      *RoomConfig           `json:"config,omitempty"`
+       CreatedAt   time.Time             `json:"created_at"`
+       Privacy     string                `json:"privacy"` // "public" | "private"
+   }
+   
+   type RoomConfig struct {
+       MaxParticipants int                `json:"max_participants"`
+       EnableChat      bool               `json:"enable_chat"`
+       EnableRecording bool               `json:"enable_recording"`
+       AudioOnly       bool               `json:"audio_only"`
+   }
+   
+   // Room management methods
+   func (d *DailyClient) CreateRoom(name string, config *RoomConfig) (*Room, error)
+   func (d *DailyClient) GetRoom(name string) (*Room, error)  
+   func (d *DailyClient) DeleteRoom(name string) error
+   func (d *DailyClient) GenerateToken(roomName string, props *TokenProperties) (string, error)
+   ```
+   
+   **Authentication Flow:**
+   1. Create room via Daily.co REST API with API key
+   2. Generate meeting token for secure room access
+   3. Connect to Daily.co WebSocket with token
+   4. Handle room events and participant management
+
+### Phase 2: CLI Commands
+Add new command group under existing `call` command:
+
+```
+vapi call webrtc <assistant-id> [options]        // Start WebRTC call with assistant
+vapi call webrtc --config <assistant-config.json> [options]  // Start with JSON config
+vapi call webrtc configure                       // Configure audio devices
+vapi call webrtc test-audio                     // Test microphone/speakers
+vapi call webrtc status                          // Show current call status
+vapi call webrtc end                             // End current WebRTC call
+vapi call webrtc diagnostics                     // Connection diagnostics
+```
+
+**Primary Command Usage:**
+```bash
+# Start call with assistant ID
+vapi call webrtc asst_12345 --debug-webhook http://localhost:3000/webhook
+
+# Start call with JSON config
+vapi call webrtc --config ./my-assistant.json --debug
+```
+
+**Command Flags:**
+- `--room-name`: Custom room name (default: auto-generated)
+- `--debug-webhook`: URL to receive debug request/response data
+- `--debug`: Enable debug mode with local webhook server
+- `--audio-input`: Specific audio input device
+- `--audio-output`: Specific audio output device
+- `--config`: Assistant configuration JSON file
+- `--no-video`: Audio-only mode
+- `--record`: Enable call recording
+
+### Phase 3: WebRTC Implementation
+1. **Peer Connection Setup**
+   - Initialize Pion WebRTC peer connection
+   - Configure ICE servers and STUN/TURN
+   - Handle offer/answer exchange via Daily.co
+
+2. **Media Handling**
+   - Audio input/output (microphone/speakers)
+   - Optional video support
+   - Integration with Vapi's voice processing
+
+3. **Signaling Protocol**
+   - WebSocket connection to Daily.co
+   - Handle ICE candidates exchange
+   - Room state management
+
+### Phase 4: Vapi Integration
+1. **Assistant Connection**
+   - Route audio to/from Vapi assistant
+   - Handle call events and state changes
+   - Integrate with existing Vapi call infrastructure
+
+2. **Call Management**
+   - Link WebRTC calls with Vapi call records
+   - Transcript and recording integration
+   - Billing and analytics
+
+## File Structure Changes
+
+### New Files to Create
+```
+cmd/webrtc.go                    // WebRTC CLI commands
+pkg/webrtc/client.go            // Main WebRTC client
+pkg/webrtc/daily.go             // Daily.co API client
+pkg/webrtc/signaling.go         // WebRTC signaling
+pkg/webrtc/media.go             // Media stream handling
+pkg/webrtc/config.go            // Configuration
+pkg/webrtc/audio.go             // PortAudio integration
+pkg/webrtc/devices.go           // Audio device management
+pkg/webrtc/api.go               // Vapi API request/response handling
+pkg/webrtc/terminal.go          // Terminal UI management
+pkg/webrtc/diagnostics.go       // Connection diagnostics
+```
+
+### Modified Files
+```
+cmd/call.go                     // Add WebRTC subcommands
+go.mod                         // Add Pion WebRTC dependencies
+```
+
+## Dependencies
+
+### Required Go Modules
+```go
+// Core WebRTC
+github.com/pion/webrtc/v3 v3.2.40           // Core WebRTC implementation
+github.com/pion/interceptor v0.1.25         // WebRTC interceptors
+github.com/pion/opus v0.4.0                 // Opus audio codec
+github.com/pion/rtp v1.8.2                  // RTP packet handling
+
+// Audio System
+github.com/gordonklaus/portaudio latest     // Cross-platform audio I/O
+github.com/yourusername/go-audio latest     // Audio format conversion
+
+// Networking
+github.com/gorilla/websocket v1.5.1         // WebSocket for signaling
+
+// Utilities
+github.com/google/uuid v1.6.0               // Room ID generation
+github.com/fatih/color v1.15.0              // Terminal colors for status
+```
+
+### Daily.co API Requirements
+- Daily.co API key for room management
+- WebSocket endpoint for real-time signaling
+- REST API for room creation/management
+
+## Configuration
+
+### Environment Variables
+```bash
+DAILY_API_KEY=your_daily_api_key
+DAILY_DOMAIN=your_daily_domain.daily.co
+WEBRTC_STUN_SERVERS=stun:stun.l.google.com:19302
+WEBRTC_TURN_SERVERS=turn:your-turn-server.com
+WEBRTC_AUDIO_INPUT_DEVICE=default
+WEBRTC_AUDIO_OUTPUT_DEVICE=default
+```
+
+### CLI Configuration
+Extend existing config.go to include:
+```go
+type WebRTCConfig struct {
+    DailyAPIKey    string `mapstructure:"daily_api_key"`
+    DailyDomain    string `mapstructure:"daily_domain"`
+    STUNServers    []string `mapstructure:"stun_servers"`
+    TURNServers    []string `mapstructure:"turn_servers"`
+    AudioCodec     string `mapstructure:"audio_codec"` // opus, pcm
+    VideoEnabled   bool   `mapstructure:"video_enabled"`
+}
+```
+
+## Implementation Steps
+
+### Step 1: Setup and Dependencies
+1. Add Pion WebRTC to go.mod
+2. Create basic pkg/webrtc package structure
+3. Implement Daily.co API client for room management
+
+### Step 2: CLI Commands
+1. Create cmd/webrtc.go with basic command structure
+2. Implement room creation and joining commands
+3. Add configuration handling for Daily.co credentials
+
+### Step 3: WebRTC Core
+1. Implement basic peer connection setup
+2. Add signaling via Daily.co WebSocket
+3. Handle offer/answer exchange and ICE candidates
+
+### Step 4: Media Pipeline
+1. **Audio Device Setup**
+   ```go
+   // Initialize PortAudio
+   portaudio.Initialize()
+   defer portaudio.Terminate()
+   
+   // Enumerate audio devices
+   devices, err := portaudio.Devices()
+   ```
+
+2. **Audio Input Pipeline**
+   ```go
+   // Microphone -> PCM Buffer -> Opus Encoder -> WebRTC Track
+   inputStream := setupAudioInput(selectedDevice)
+   opusEncoder := opus.NewEncoder(48000, 1, opus.AppVoIP)
+   audioTrack := setupWebRTCAudioTrack()
+   ```
+
+3. **Audio Output Pipeline**
+   ```go
+   // WebRTC Track -> Opus Decoder -> PCM Buffer -> Speakers
+   outputStream := setupAudioOutput(selectedDevice)
+   opusDecoder := opus.NewDecoder(48000, 1)
+   ```
+
+4. **Route audio to/from Vapi assistant**
+   - Bidirectional audio stream routing
+   - Real-time audio processing and forwarding
+
+### Step 5: Integration and Testing
+1. Connect WebRTC calls with Vapi call management
+2. Add call state tracking and events
+3. Test end-to-end call scenarios
+
+## Security Considerations
+
+1. **API Key Management**: Secure storage of Daily.co API keys
+2. **Media Encryption**: Ensure DTLS/SRTP encryption is enabled
+3. **Authentication**: Validate room access and user permissions
+4. **Network Security**: Proper STUN/TURN server configuration
+
+## Testing Strategy
+
+1. **Unit Tests**: Individual component testing
+2. **Integration Tests**: Daily.co API integration
+3. **End-to-End Tests**: Full call scenarios
+4. **Performance Tests**: Media quality and latency
+
+## Potential Challenges
+
+1. **Audio Routing**: Complex audio pipeline between WebRTC and Vapi
+2. **NAT Traversal**: STUN/TURN server configuration
+3. **Cross-Platform**: Audio device handling across different OS
+4. **Error Handling**: Robust connection failure recovery
+5. **Synchronization**: Managing call state between WebRTC and Vapi
+
+## Success Metrics
+
+1. Successful peer-to-peer connection establishment
+2. Clear audio quality with low latency
+3. Reliable connection through NAT/firewalls
+4. Seamless integration with existing Vapi workflows
+5. Proper call state management and recording
+
+## Future Enhancements
+
+1. **Video Support**: Add video calling capabilities
+2. **Screen Sharing**: Implement screen sharing via WebRTC
+3. **Multi-party Calls**: Support for conference calls
+4. **Recording**: Direct WebRTC call recording
+5. **Mobile Support**: Extend to mobile platforms via Go Mobile
+
+## Resources
+
+- [Pion WebRTC Documentation](https://pkg.go.dev/github.com/pion/webrtc/v3)
+- [Daily.co API Documentation](https://docs.daily.co/)
+- [WebRTC Standards](https://webrtc.org/)
+- [Pion Examples](https://github.com/pion/example-webrtc-applications)
\ No newline at end of file
diff --git a/go.mod b/go.mod
index 87ba042..599db39 100644
--- a/go.mod
+++ b/go.mod
@@ -6,6 +6,8 @@ require (
 	github.com/AlecAivazis/survey/v2 v2.3.7
 	github.com/VapiAI/server-sdk-go v0.9.0
 	github.com/charmbracelet/lipgloss v1.1.0
+	github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b
+	github.com/gorilla/websocket v1.5.1
 	github.com/posthog/posthog-go v1.5.12
 	github.com/spf13/cobra v1.9.1
 	github.com/spf13/viper v1.20.1
@@ -33,6 +35,23 @@ require (
 	github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect
 	github.com/muesli/termenv v0.16.0 // indirect
 	github.com/pelletier/go-toml/v2 v2.2.4 // indirect
+	github.com/pion/datachannel v1.5.10 // indirect
+	github.com/pion/dtls/v3 v3.0.6 // indirect
+	github.com/pion/ice/v4 v4.0.10 // indirect
+	github.com/pion/interceptor v0.1.40 // indirect
+	github.com/pion/logging v0.2.4 // indirect
+	github.com/pion/mdns/v2 v2.0.7 // indirect
+	github.com/pion/mediadevices v0.7.1 // indirect
+	github.com/pion/randutil v0.1.0 // indirect
+	github.com/pion/rtcp v1.2.15 // indirect
+	github.com/pion/rtp v1.8.20 // indirect
+	github.com/pion/sctp v1.8.39 // indirect
+	github.com/pion/sdp/v3 v3.0.14 // indirect
+	github.com/pion/srtp/v3 v3.0.6 // indirect
+	github.com/pion/stun/v3 v3.0.0 // indirect
+	github.com/pion/transport/v3 v3.0.7 // indirect
+	github.com/pion/turn/v4 v4.0.0 // indirect
+	github.com/pion/webrtc/v4 v4.1.3 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
@@ -42,8 +61,12 @@ require (
 	github.com/spf13/cast v1.9.2 // indirect
 	github.com/spf13/pflag v1.0.6 // indirect
 	github.com/subosito/gotenv v1.6.0 // indirect
+	github.com/wlynxg/anet v0.0.5 // indirect
 	github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
 	go.uber.org/multierr v1.11.0 // indirect
+	golang.org/x/crypto v0.33.0 // indirect
+	golang.org/x/image v0.23.0 // indirect
+	golang.org/x/net v0.35.0 // indirect
 	golang.org/x/sys v0.33.0 // indirect
 	golang.org/x/term v0.32.0 // indirect
 	golang.org/x/text v0.26.0 // indirect
diff --git a/go.sum b/go.sum
index 4beae78..0de233f 100644
--- a/go.sum
+++ b/go.sum
@@ -33,6 +33,10 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b h1:WEuQWBxelOGHA6z9lABqaMLMrfwVyMdN3UgRLT+YUPo=
+github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b/go.mod h1:esZFQEUwqC+l76f2R8bIWSwXMaPbp79PppwZ1eJhFco=
+github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/QY=
+github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/hinshun/vt10x v0.0.0-20220119200601-820417d04eec h1:qv2VnGeEQHchGaZ/u7lxST/RaJw+cv273q79D81Xbog=
@@ -62,6 +66,58 @@ github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc
 github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
 github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
 github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
+github.com/pion/datachannel v1.5.10 h1:ly0Q26K1i6ZkGf42W7D4hQYR90pZwzFOjTq5AuCKk4o=
+github.com/pion/datachannel v1.5.10/go.mod h1:p/jJfC9arb29W7WrxyKbepTU20CFgyx5oLo8Rs4Py/M=
+github.com/pion/dtls/v3 v3.0.4 h1:44CZekewMzfrn9pmGrj5BNnTMDCFwr+6sLH+cCuLM7U=
+github.com/pion/dtls/v3 v3.0.4/go.mod h1:R373CsjxWqNPf6MEkfdy3aSe9niZvL/JaKlGeFphtMg=
+github.com/pion/dtls/v3 v3.0.6 h1:7Hkd8WhAJNbRgq9RgdNh1aaWlZlGpYTzdqjy9x9sK2E=
+github.com/pion/dtls/v3 v3.0.6/go.mod h1:iJxNQ3Uhn1NZWOMWlLxEEHAN5yX7GyPvvKw04v9bzYU=
+github.com/pion/ice/v4 v4.0.6 h1:jmM9HwI9lfetQV/39uD0nY4y++XZNPhvzIPCb8EwxUM=
+github.com/pion/ice/v4 v4.0.6/go.mod h1:y3M18aPhIxLlcO/4dn9X8LzLLSma84cx6emMSu14FGw=
+github.com/pion/ice/v4 v4.0.10 h1:P59w1iauC/wPk9PdY8Vjl4fOFL5B+USq1+xbDcN6gT4=
+github.com/pion/ice/v4 v4.0.10/go.mod h1:y3M18aPhIxLlcO/4dn9X8LzLLSma84cx6emMSu14FGw=
+github.com/pion/interceptor v0.1.37 h1:aRA8Zpab/wE7/c0O3fh1PqY0AJI3fCSEM5lRWJVorwI=
+github.com/pion/interceptor v0.1.37/go.mod h1:JzxbJ4umVTlZAf+/utHzNesY8tmRkM2lVmkS82TTj8Y=
+github.com/pion/interceptor v0.1.40 h1:e0BjnPcGpr2CFQgKhrQisBU7V3GXK6wrfYrGYaU6Jq4=
+github.com/pion/interceptor v0.1.40/go.mod h1:Z6kqH7M/FYirg3frjGJ21VLSRJGBXB/KqaTIrdqnOic=
+github.com/pion/logging v0.2.3 h1:gHuf0zpoh1GW67Nr6Gj4cv5Z9ZscU7g/EaoC/Ke/igI=
+github.com/pion/logging v0.2.3/go.mod h1:z8YfknkquMe1csOrxK5kc+5/ZPAzMxbKLX5aXpbpC90=
+github.com/pion/logging v0.2.4 h1:tTew+7cmQ+Mc1pTBLKH2puKsOvhm32dROumOZ655zB8=
+github.com/pion/logging v0.2.4/go.mod h1:DffhXTKYdNZU+KtJ5pyQDjvOAh/GsNSyv1lbkFbe3so=
+github.com/pion/mdns/v2 v2.0.7 h1:c9kM8ewCgjslaAmicYMFQIde2H9/lrZpjBkN8VwoVtM=
+github.com/pion/mdns/v2 v2.0.7/go.mod h1:vAdSYNAT0Jy3Ru0zl2YiW3Rm/fJCwIeM0nToenfOJKA=
+github.com/pion/mediadevices v0.7.1 h1:ayMneLx1ymJr0rVRn01foqu8LO/FQ97MS1IKM/XgpuY=
+github.com/pion/mediadevices v0.7.1/go.mod h1:89jObwFJ4IkL2vkaN8Gq9tSjp0jAY4JtTJ84Ix+QODQ=
+github.com/pion/randutil v0.1.0 h1:CFG1UdESneORglEsnimhUjf33Rwjubwj6xfiOXBa3mA=
+github.com/pion/randutil v0.1.0/go.mod h1:XcJrSMMbbMRhASFVOlj/5hQial/Y8oH/HVo7TBZq+j8=
+github.com/pion/rtcp v1.2.15 h1:LZQi2JbdipLOj4eBjK4wlVoQWfrZbh3Q6eHtWtJBZBo=
+github.com/pion/rtcp v1.2.15/go.mod h1:jlGuAjHMEXwMUHK78RgX0UmEJFV4zUKOFHR7OP+D3D0=
+github.com/pion/rtp v1.8.11 h1:17xjnY5WO5hgO6SD3/NTIUPvSFw/PbLsIJyz1r1yNIk=
+github.com/pion/rtp v1.8.11/go.mod h1:8uMBJj32Pa1wwx8Fuv/AsFhn8jsgw+3rUC2PfoBZ8p4=
+github.com/pion/rtp v1.8.20 h1:8zcyqohadZE8FCBeGdyEvHiclPIezcwRQH9zfapFyYI=
+github.com/pion/rtp v1.8.20/go.mod h1:bAu2UFKScgzyFqvUKmbvzSdPr+NGbZtv6UB2hesqXBk=
+github.com/pion/sctp v1.8.35 h1:qwtKvNK1Wc5tHMIYgTDJhfZk7vATGVHhXbUDfHbYwzA=
+github.com/pion/sctp v1.8.35/go.mod h1:EcXP8zCYVTRy3W9xtOF7wJm1L1aXfKRQzaM33SjQlzg=
+github.com/pion/sctp v1.8.39 h1:PJma40vRHa3UTO3C4MyeJDQ+KIobVYRZQZ0Nt7SjQnE=
+github.com/pion/sctp v1.8.39/go.mod h1:cNiLdchXra8fHQwmIoqw0MbLLMs+f7uQ+dGMG2gWebE=
+github.com/pion/sdp/v3 v3.0.10 h1:6MChLE/1xYB+CjumMw+gZ9ufp2DPApuVSnDT8t5MIgA=
+github.com/pion/sdp/v3 v3.0.10/go.mod h1:88GMahN5xnScv1hIMTqLdu/cOcUkj6a9ytbncwMCq2E=
+github.com/pion/sdp/v3 v3.0.14 h1:1h7gBr9FhOWH5GjWWY5lcw/U85MtdcibTyt/o6RxRUI=
+github.com/pion/sdp/v3 v3.0.14/go.mod h1:88GMahN5xnScv1hIMTqLdu/cOcUkj6a9ytbncwMCq2E=
+github.com/pion/srtp/v3 v3.0.4 h1:2Z6vDVxzrX3UHEgrUyIGM4rRouoC7v+NiF1IHtp9B5M=
+github.com/pion/srtp/v3 v3.0.4/go.mod h1:1Jx3FwDoxpRaTh1oRV8A/6G1BnFL+QI82eK4ms8EEJQ=
+github.com/pion/srtp/v3 v3.0.6 h1:E2gyj1f5X10sB/qILUGIkL4C2CqK269Xq167PbGCc/4=
+github.com/pion/srtp/v3 v3.0.6/go.mod h1:BxvziG3v/armJHAaJ87euvkhHqWe9I7iiOy50K2QkhY=
+github.com/pion/stun/v3 v3.0.0 h1:4h1gwhWLWuZWOJIJR9s2ferRO+W3zA/b6ijOI6mKzUw=
+github.com/pion/stun/v3 v3.0.0/go.mod h1:HvCN8txt8mwi4FBvS3EmDghW6aQJ24T+y+1TKjB5jyU=
+github.com/pion/transport/v3 v3.0.7 h1:iRbMH05BzSNwhILHoBoAPxoB9xQgOaJk+591KC9P1o0=
+github.com/pion/transport/v3 v3.0.7/go.mod h1:YleKiTZ4vqNxVwh77Z0zytYi7rXHl7j6uPLGhhz9rwo=
+github.com/pion/turn/v4 v4.0.0 h1:qxplo3Rxa9Yg1xXDxxH8xaqcyGUtbHYw4QSCvmFWvhM=
+github.com/pion/turn/v4 v4.0.0/go.mod h1:MuPDkm15nYSklKpN8vWJ9W2M0PlyQZqYt1McGuxG7mA=
+github.com/pion/webrtc/v4 v4.0.9 h1:PyOYMRKJgfy0dzPcYtFD/4oW9zaw3Ze3oZzzbj2LV9E=
+github.com/pion/webrtc/v4 v4.0.9/go.mod h1:ViHLVaNpiuvaH8pdiuQxuA9awuE6KVzAXx3vVWilOck=
+github.com/pion/webrtc/v4 v4.1.3 h1:YZ67Boj9X/hk190jJZ8+HFGQ6DqSZ/fYP3sLAZv7c3c=
+github.com/pion/webrtc/v4 v4.1.3/go.mod h1:rsq+zQ82ryfR9vbb0L1umPJ6Ogq7zm8mcn9fcGnxomM=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/posthog/posthog-go v1.5.12 h1:nxK/z5QLCFxwzxV8GNvVd4Y1wJ++zJSWMGEtzU+/HLM=
@@ -93,6 +149,8 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf
 github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
 github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
+github.com/wlynxg/anet v0.0.5 h1:J3VJGi1gvo0JwZ/P1/Yc/8p63SoW98B5dHkYDmpgvvU=
+github.com/wlynxg/anet v0.0.5/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA=
 github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
 github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
@@ -100,12 +158,24 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
 go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc=
+golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc=
+golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
+golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
 golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E=
 golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
+golang.org/x/image v0.23.0 h1:HseQ7c2OpPKTPVzNjG5fwJsOTCiiwS4QdsYi5XU6H68=
+golang.org/x/image v0.23.0/go.mod h1:wJJBTdLfCCf3tiHa1fNxpZmUI4mmoZvwMCPP0ddoNKY=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
+golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
+golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0=
+golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
+golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
+golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
diff --git a/mcp-docs-server/src/resources/documentation.ts b/mcp-docs-server/src/resources/documentation.ts
index 4472d44..67e862b 100644
--- a/mcp-docs-server/src/resources/documentation.ts
+++ b/mcp-docs-server/src/resources/documentation.ts
@@ -7,19 +7,19 @@ export class DocumentationSource {
       switch (uri) {
         case "vapi://docs/overview":
           return this.getDocumentationOverview();
-          
+
         case "vapi://docs/quickstart":
           return this.getQuickStartGuide();
-          
+
         case "vapi://examples/collection":
           return this.getExamplesCollection();
-          
+
         case "vapi://api/reference":
           return this.getApiReference();
-          
+
         case "vapi://changelog/latest":
           return this.getLatestChanges();
-          
+
         default:
           throw new Error(`Unknown resource URI: ${uri}`);
       }
@@ -184,7 +184,7 @@ const assistant = await vapi.assistants.create({
   },
   model: {
     provider: "openai", 
-    model: "gpt-3.5-turbo",
+    model: "gpt-4o",
     messages: [{
       role: "system",
       content: "You are a helpful assistant. Be concise and friendly."
@@ -209,7 +209,7 @@ assistant = vapi.assistants.create(
     },
     model={
         "provider": "openai",
-        "model": "gpt-3.5-turbo", 
+        "model": "gpt-4o", 
         "messages": [{
             "role": "system",
             "content": "You are a helpful assistant. Be concise and friendly."
@@ -332,7 +332,7 @@ const assistant = await vapi.assistants.create({
   voice: { provider: "openai", voiceId: "alloy" },
   model: {
     provider: "openai",
-    model: "gpt-3.5-turbo",
+    model: "gpt-4o",
     messages: [{ role: "system", content: "You are a helpful assistant." }]
   }
 });
@@ -351,7 +351,7 @@ const supportBot = await vapi.assistants.create({
   voice: { provider: "openai", voiceId: "echo" },
   model: {
     provider: "openai",
-    model: "gpt-4",
+    model: "gpt-4o",
     messages: [{
       role: "system",
       content: \`You are a customer support representative for Acme Corp.
@@ -394,7 +394,7 @@ const assistant = await vapi.assistants.create({
   voice: { provider: "elevenlabs", voiceId: "21m00Tcm4TlvDq8ikWAM" },
   model: {
     provider: "openai",
-    model: "gpt-4",
+    model: "gpt-4o",
     messages: [{
       role: "system",
       content: "You are a smart assistant that can help with weather and scheduling."
@@ -531,7 +531,7 @@ def create_assistant():
         },
         model={
             "provider": "openai",
-            "model": "gpt-3.5-turbo",
+            "model": "gpt-4o",
             "messages": [{
                 "role": "system", 
                 "content": data['system_prompt']
diff --git a/pkg/config/config.go b/pkg/config/config.go
index a02c568..0085df7 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -63,8 +63,8 @@ var environments = map[string]Environment{
 	},
 	"staging": {
 		Name:         "staging",
-		APIBaseURL:   "https://api.staging.vapi.ai",
-		DashboardURL: "https://dashboard.staging.vapi.ai",
+		APIBaseURL:   "https://staging-api.vapi.ai",
+		DashboardURL: "https://staging-dashboard.vapi.ai",
 	},
 	"development": {
 		Name:         "development",
diff --git a/pkg/voice/api.go b/pkg/voice/api.go
new file mode 100644
index 0000000..3a606c3
--- /dev/null
+++ b/pkg/voice/api.go
@@ -0,0 +1,77 @@
+package voice
+
+import (
+	"time"
+)
+
+// APIHandler manages API request/response logging and handling
+type APIHandler struct {
+	client      *VoiceClient
+	requestLog  chan APIRequest
+	responseLog chan APIResponse
+}
+
+// NewAPIHandler creates a new API handler
+func NewAPIHandler(client *VoiceClient) *APIHandler {
+	return &APIHandler{
+		client:      client,
+		requestLog:  make(chan APIRequest, 100),
+		responseLog: make(chan APIResponse, 100),
+	}
+}
+
+// LogRequest logs an API request
+func (h *APIHandler) LogRequest(method, url string, headers map[string]string, body interface{}) {
+	req := APIRequest{
+		Method:    method,
+		URL:       url,
+		Headers:   headers,
+		Body:      body,
+		Timestamp: time.Now(),
+	}
+
+	select {
+	case h.requestLog <- req:
+	default:
+		// Channel full, skip logging
+	}
+}
+
+// LogResponse logs an API response
+func (h *APIHandler) LogResponse(statusCode int, headers map[string]string, body interface{}, duration time.Duration) {
+	resp := APIResponse{
+		StatusCode: statusCode,
+		Headers:    headers,
+		Body:       body,
+		Duration:   duration,
+		Timestamp:  time.Now(),
+	}
+
+	select {
+	case h.responseLog <- resp:
+	default:
+		// Channel full, skip logging
+	}
+}
+
+// GetRequestLog returns the request log channel
+func (h *APIHandler) GetRequestLog() <-chan APIRequest {
+	return h.requestLog
+}
+
+// GetResponseLog returns the response log channel
+func (h *APIHandler) GetResponseLog() <-chan APIResponse {
+	return h.responseLog
+}
+
+// FormatRequest formats an API request for display
+func FormatRequest(req *APIRequest) string {
+	return req.Timestamp.Format("15:04:05") + " → " + req.Method + " " + req.URL
+}
+
+// FormatResponse formats an API response for display
+func FormatResponse(resp APIResponse) string {
+	return resp.Timestamp.Format("15:04:05") + " ← " +
+		string(rune(resp.StatusCode)) + " " +
+		resp.Duration.String()
+}
diff --git a/pkg/voice/audio.go b/pkg/voice/audio.go
new file mode 100644
index 0000000..6a1ee4e
--- /dev/null
+++ b/pkg/voice/audio.go
@@ -0,0 +1,434 @@
+package voice
+
+import (
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/gordonklaus/portaudio"
+)
+
+const (
+	// Audio configuration constants
+	SampleRate    = 48000
+	FrameSize     = 480 // 10ms at 48kHz
+	Channels      = 1   // Mono
+	BitsPerSample = 16
+)
+
+// AudioBuffer represents a circular buffer for audio data
+type AudioBuffer struct {
+	data  []float32
+	size  int
+	head  int
+	tail  int
+	count int
+	mutex sync.Mutex
+}
+
+// NewAudioBuffer creates a new audio buffer
+func NewAudioBuffer(size int) *AudioBuffer {
+	return &AudioBuffer{
+		data: make([]float32, size),
+		size: size,
+	}
+}
+
+// Write writes audio data to the buffer
+func (b *AudioBuffer) Write(data []float32) int {
+	b.mutex.Lock()
+	defer b.mutex.Unlock()
+
+	written := 0
+	for i, sample := range data {
+		if b.count >= b.size {
+			// Buffer full, drop oldest sample
+			b.tail = (b.tail + 1) % b.size
+			b.count--
+		}
+
+		b.data[b.head] = sample
+		b.head = (b.head + 1) % b.size
+		b.count++
+		written = i + 1
+	}
+
+	return written
+}
+
+// Read reads audio data from the buffer
+func (b *AudioBuffer) Read(data []float32) int {
+	b.mutex.Lock()
+	defer b.mutex.Unlock()
+
+	read := 0
+	for i := range data {
+		if b.count == 0 {
+			// Buffer empty, fill with silence
+			data[i] = 0
+		} else {
+			data[i] = b.data[b.tail]
+			b.tail = (b.tail + 1) % b.size
+			b.count--
+		}
+		read = i + 1
+	}
+
+	return read
+}
+
+// Available returns the number of samples available for reading
+func (b *AudioBuffer) Available() int {
+	b.mutex.Lock()
+	defer b.mutex.Unlock()
+	return b.count
+}
+
+// AudioStream manages audio input and output streams
+type AudioStream struct {
+	deviceManager *AudioDeviceManager
+	config        *WebRTCConfig
+
+	// Input stream
+	inputStream *portaudio.Stream
+	inputBuffer *AudioBuffer
+	inputDevice *AudioDevice
+
+	// Output stream
+	outputStream *portaudio.Stream
+	outputBuffer *AudioBuffer
+	outputDevice *AudioDevice
+
+	// Control
+	running  bool
+	runMutex sync.RWMutex
+	stopChan chan struct{}
+
+	// Debugging
+	debugger *AudioDebugger
+}
+
+// NewAudioStream creates a new audio stream
+func NewAudioStream(config *WebRTCConfig) (*AudioStream, error) {
+	deviceManager := NewAudioDeviceManager()
+	if err := deviceManager.Initialize(); err != nil {
+		return nil, fmt.Errorf("failed to initialize device manager: %w", err)
+	}
+
+	// Create debugger if enabled
+	debugger := NewAudioDebugger(config.AudioDebug)
+
+	// Create audio buffers (1 second of audio data)
+	bufferSize := SampleRate * 1
+	inputBuffer := NewAudioBuffer(bufferSize)
+	outputBuffer := NewAudioBuffer(bufferSize)
+
+	return &AudioStream{
+		deviceManager: deviceManager,
+		config:        config,
+		inputBuffer:   inputBuffer,
+		outputBuffer:  outputBuffer,
+		stopChan:      make(chan struct{}),
+		debugger:      debugger,
+	}, nil
+}
+
+// Start starts the audio streams
+func (a *AudioStream) Start() error {
+	a.runMutex.Lock()
+	defer a.runMutex.Unlock()
+
+	if a.running {
+		return fmt.Errorf("audio stream already running")
+	}
+
+	// Setup input device
+	var err error
+	if a.config.AudioInputDevice == "default" || a.config.AudioInputDevice == "" {
+		a.inputDevice, err = a.deviceManager.GetDefaultInputDevice()
+	} else {
+		a.inputDevice, err = a.deviceManager.FindInputDeviceByName(a.config.AudioInputDevice)
+	}
+	if err != nil {
+		return fmt.Errorf("failed to get input device: %w", err)
+	}
+
+	// Setup output device
+	if a.config.AudioOutputDevice == "default" || a.config.AudioOutputDevice == "" {
+		a.outputDevice, err = a.deviceManager.GetDefaultOutputDevice()
+	} else {
+		a.outputDevice, err = a.deviceManager.FindOutputDeviceByName(a.config.AudioOutputDevice)
+	}
+	if err != nil {
+		return fmt.Errorf("failed to get output device: %w", err)
+	}
+
+	// Start input stream
+	if err := a.startInputStream(); err != nil {
+		return fmt.Errorf("failed to start input stream: %w", err)
+	}
+
+	// Start output stream
+	if err := a.startOutputStream(); err != nil {
+		if closeErr := a.inputStream.Close(); closeErr != nil {
+			fmt.Printf("Failed to close input stream: %v\n", closeErr)
+		}
+		return fmt.Errorf("failed to start output stream: %w", err)
+	}
+
+	// Start debugger if enabled
+	if err := a.debugger.Start(); err != nil {
+		fmt.Printf("Failed to start audio debugger: %v\n", err)
+	}
+
+	a.running = true
+	return nil
+}
+
+// createStream is a helper function to create audio streams
+func (a *AudioStream) createStream(isInput bool, device *AudioDevice, callback interface{}) (*portaudio.Stream, error) {
+	// Get all devices to find the actual device info
+	devices, err := portaudio.Devices()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get devices: %w", err)
+	}
+
+	if device.Index >= len(devices) {
+		return nil, fmt.Errorf("invalid device index: %d", device.Index)
+	}
+
+	actualDevice := devices[device.Index]
+
+	var params portaudio.StreamParameters
+	if isInput {
+		params = portaudio.StreamParameters{
+			Input: portaudio.StreamDeviceParameters{
+				Device:   actualDevice,
+				Channels: Channels,
+				Latency:  time.Duration(device.DefaultLowInputLatency * float64(time.Second)),
+			},
+			SampleRate:      SampleRate,
+			FramesPerBuffer: FrameSize,
+		}
+	} else {
+		params = portaudio.StreamParameters{
+			Output: portaudio.StreamDeviceParameters{
+				Device:   actualDevice,
+				Channels: Channels,
+				Latency:  time.Duration(device.DefaultLowOutputLatency * float64(time.Second)),
+			},
+			SampleRate:      SampleRate,
+			FramesPerBuffer: FrameSize,
+		}
+	}
+
+	stream, err := portaudio.OpenStream(params, callback)
+	if err != nil {
+		return nil, fmt.Errorf("failed to open stream: %w", err)
+	}
+
+	if err := stream.Start(); err != nil {
+		if closeErr := stream.Close(); closeErr != nil {
+			fmt.Printf("Failed to close stream: %v\n", closeErr)
+		}
+		return nil, fmt.Errorf("failed to start stream: %w", err)
+	}
+
+	return stream, nil
+}
+
+// startInputStream starts the audio input stream
+func (a *AudioStream) startInputStream() error {
+	// Create input callback
+	inputCallback := func(in []float32) {
+		// Debug input audio
+		a.debugger.WriteInput(in)
+		a.debugger.LogAudioStats(in, "Input")
+
+		// Write audio data to input buffer for processing
+		a.inputBuffer.Write(in)
+	}
+
+	stream, err := a.createStream(true, a.inputDevice, inputCallback)
+	if err != nil {
+		return fmt.Errorf("failed to create input stream: %w", err)
+	}
+
+	a.inputStream = stream
+	return nil
+}
+
+// startOutputStream starts the audio output stream
+func (a *AudioStream) startOutputStream() error {
+	// Create output callback
+	outputCallback := func(out []float32) {
+		// Read audio data from output buffer
+		a.outputBuffer.Read(out)
+
+		// Debug output audio
+		a.debugger.WriteOutput(out)
+		a.debugger.LogAudioStats(out, "Output")
+	}
+
+	stream, err := a.createStream(false, a.outputDevice, outputCallback)
+	if err != nil {
+		return fmt.Errorf("failed to create output stream: %w", err)
+	}
+
+	a.outputStream = stream
+	return nil
+}
+
+// Stop stops the audio streams
+func (a *AudioStream) Stop() error {
+	a.runMutex.Lock()
+	defer a.runMutex.Unlock()
+
+	if !a.running {
+		return nil
+	}
+
+	// Signal stop
+	close(a.stopChan)
+
+	// Stop and close streams
+	var inputErr, outputErr error
+
+	if a.inputStream != nil {
+		inputErr = a.inputStream.Close()
+		a.inputStream = nil
+	}
+
+	if a.outputStream != nil {
+		outputErr = a.outputStream.Close()
+		a.outputStream = nil
+	}
+
+	// Stop debugger
+	if err := a.debugger.Stop(); err != nil {
+		fmt.Printf("Warning: failed to stop audio debugger: %v\n", err)
+	}
+
+	// Terminate device manager
+	if err := a.deviceManager.Terminate(); err != nil {
+		fmt.Printf("Warning: failed to terminate device manager: %v\n", err)
+	}
+
+	a.running = false
+
+	// Return first error encountered
+	if inputErr != nil {
+		return fmt.Errorf("failed to close input stream: %w", inputErr)
+	}
+	if outputErr != nil {
+		return fmt.Errorf("failed to close output stream: %w", outputErr)
+	}
+
+	return nil
+}
+
+// WriteAudio writes audio data to the output buffer (for incoming audio)
+func (a *AudioStream) WriteAudio(data []float32) int {
+	return a.outputBuffer.Write(data)
+}
+
+// GetInputLevel returns the current input audio level (0.0 to 1.0)
+func (a *AudioStream) GetInputLevel() float32 {
+	// Get recent audio data from input buffer
+	samples := make([]float32, FrameSize)
+	read := a.inputBuffer.Read(samples)
+
+	if read == 0 {
+		return 0.0
+	}
+
+	// Calculate RMS level
+	var sum float32
+	for i := 0; i < read; i++ {
+		sum += samples[i] * samples[i]
+	}
+
+	rms := float32(0.0)
+	if read > 0 {
+		rms = float32(sum) / float32(read)
+		if rms > 0 {
+			rms = float32(0.5) // Simplified RMS calculation
+		}
+	}
+
+	// Clamp to [0, 1]
+	if rms > 1.0 {
+		rms = 1.0
+	}
+
+	return rms
+}
+
+// GetOutputLevel returns the current output audio level (0.0 to 1.0)
+func (a *AudioStream) GetOutputLevel() float32 {
+	// For output level, we can check the buffer fill level as a proxy
+	available := a.outputBuffer.Available()
+	bufferSize := a.outputBuffer.size
+
+	if bufferSize == 0 {
+		return 0.0
+	}
+
+	level := float32(available) / float32(bufferSize)
+	if level > 1.0 {
+		level = 1.0
+	}
+
+	return level
+}
+
+// IsRunning returns true if the audio stream is running
+func (a *AudioStream) IsRunning() bool {
+	a.runMutex.RLock()
+	defer a.runMutex.RUnlock()
+	return a.running
+}
+
+// GetBufferState returns detailed buffer state for debugging
+func (a *AudioStream) GetBufferState() (inputAvail, outputAvail, inputSize, outputSize int) {
+	if a.inputBuffer != nil {
+		inputAvail = a.inputBuffer.Available()
+		inputSize = a.inputBuffer.size
+	}
+	if a.outputBuffer != nil {
+		outputAvail = a.outputBuffer.Available()
+		outputSize = a.outputBuffer.size
+	}
+	return
+}
+
+// LogBufferState logs current buffer state using the debugger
+func (a *AudioStream) LogBufferState() {
+	if a.debugger == nil {
+		return
+	}
+
+	inputAvail, outputAvail, inputSize, outputSize := a.GetBufferState()
+	a.debugger.LogBufferState(inputAvail, outputAvail, inputSize, outputSize)
+}
+
+// GetInputDevice returns the current input device
+func (a *AudioStream) GetInputDevice() *AudioDevice {
+	return a.inputDevice
+}
+
+// ReadAudio reads audio samples from the input buffer
+func (a *AudioStream) ReadAudio(numSamples int) []float32 {
+	if !a.IsRunning() {
+		return make([]float32, numSamples) // Return silence if not running
+	}
+
+	samples := make([]float32, numSamples)
+	a.inputBuffer.Read(samples)
+	return samples
+}
+
+// GetOutputDevice returns the current output device
+func (a *AudioStream) GetOutputDevice() *AudioDevice {
+	return a.outputDevice
+}
diff --git a/pkg/voice/client.go b/pkg/voice/client.go
new file mode 100644
index 0000000..afc5f44
--- /dev/null
+++ b/pkg/voice/client.go
@@ -0,0 +1,755 @@
+package voice
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+
+	vapiclient "github.com/VapiAI/server-sdk-go/client"
+)
+
+// CallStatus represents the current state of a voice call
+type CallStatus string
+
+const (
+	CallStatusIdle         CallStatus = "idle"
+	CallStatusConnecting   CallStatus = "connecting"
+	CallStatusConnected    CallStatus = "connected"
+	CallStatusDisconnected CallStatus = "disconnected"
+	CallStatusFailed       CallStatus = "failed"
+)
+
+// CallState holds the current state of a voice call
+type CallState struct {
+	CallID       string
+	AssistantID  string
+	Status       CallStatus
+	StartTime    time.Time
+	WebSocketURL string
+}
+
+// APIRequest represents a request to the Vapi API
+type APIRequest struct {
+	Method    string
+	URL       string
+	Headers   map[string]string
+	Body      interface{}
+	Timestamp time.Time
+}
+
+// APIResponse represents a response from the Vapi API
+type APIResponse struct {
+	StatusCode int
+	Headers    map[string]string
+	Body       interface{}
+	Duration   time.Duration
+	Timestamp  time.Time
+}
+
+// VoiceClient manages voice calls with Vapi WebSocket transport
+type VoiceClient struct {
+	config     *WebRTCConfig
+	vapiClient *vapiclient.Client
+	callState  *CallState
+
+	// Audio pipeline
+	audioStream *AudioStream
+
+	// WebSocket signaling
+	signaling *VapiWebSocket
+
+	// Audio processing with jitter buffer
+	audioProcessor *WebSocketAudioProcessor
+	jitterBuffer   *WebSocketJitterBuffer
+
+	// Echo cancellation state
+	lastSpeakerSamples []float32
+
+	// Silence detection
+	silenceThreshold        float32
+	consecutiveSilentChunks int
+	maxSilentChunks         int
+
+	// Event channels
+	requestLog  chan APIRequest
+	responseLog chan APIResponse
+	callEvents  chan CallEvent
+}
+
+// CallEvent represents events during a voice call
+type CallEvent struct {
+	Type      string
+	Data      interface{}
+	Timestamp time.Time
+}
+
+// NewVoiceClient creates a new voice client
+func NewVoiceClient(config *WebRTCConfig, vapiClient *vapiclient.Client) (*VoiceClient, error) {
+	if config == nil {
+		config = DefaultWebRTCConfig()
+	}
+
+	// Create audio stream
+	audioStream, err := NewAudioStream(config)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create audio stream: %w", err)
+	}
+
+	// Create WebSocket signaling client
+	signaling := NewVapiWebSocket()
+
+	// Create audio processor
+	audioProcessor, err := NewWebSocketAudioProcessor()
+	if err != nil {
+		return nil, fmt.Errorf("failed to create audio processor: %w", err)
+	}
+
+	// Create WebSocket jitter buffer for incoming audio
+	jitterBuffer, err := NewWebSocketJitterBuffer(DefaultWebSocketJitterConfig())
+	if err != nil {
+		return nil, fmt.Errorf("failed to create jitter buffer: %w", err)
+	}
+
+	return &VoiceClient{
+		config:             config,
+		vapiClient:         vapiClient,
+		audioStream:        audioStream,
+		signaling:          signaling,
+		audioProcessor:     audioProcessor,
+		jitterBuffer:       jitterBuffer,
+		lastSpeakerSamples: make([]float32, 0),
+		silenceThreshold:   0.001, // -60dB threshold for silence detection
+		maxSilentChunks:    3,     // Allow max 3 consecutive silent chunks before gating
+		callState: &CallState{
+			Status: CallStatusIdle,
+		},
+		requestLog:  make(chan APIRequest, 100),
+		responseLog: make(chan APIResponse, 100),
+		callEvents:  make(chan CallEvent, 100),
+	}, nil
+}
+
+// StartCall initiates a voice call with the specified assistant
+func (c *VoiceClient) StartCall(assistantID string) error {
+	c.callState.Status = CallStatusConnecting
+	c.callState.AssistantID = assistantID
+	c.callState.StartTime = time.Now()
+
+	// 1. Create WebSocket call via Vapi's /call endpoint with WebSocket transport
+	call, err := c.createVapiWebSocketCall(assistantID)
+	if err != nil {
+		c.callState.Status = CallStatusFailed
+		return fmt.Errorf("failed to create Vapi WebSocket call: %w", err)
+	}
+
+	// Update call state from Vapi response
+	c.callState.CallID = call.Id
+	c.callState.WebSocketURL = call.RoomURL
+
+	// 2. Connect to Vapi WebSocket transport
+	if err := c.signaling.Connect(call.RoomURL); err != nil {
+		c.callState.Status = CallStatusFailed
+		return fmt.Errorf("failed to connect to WebSocket transport: %w", err)
+	}
+
+	// Start monitoring signaling events
+	go c.handleSignalingEvents()
+
+	// 3. Start audio stream
+	if err := c.audioStream.Start(); err != nil {
+		c.callState.Status = CallStatusFailed
+		return fmt.Errorf("failed to start audio stream: %w", err)
+	}
+
+	// 4. Reset and start audio processing
+	c.audioProcessor.Reset()
+	c.consecutiveSilentChunks = 0 // Reset silence detection
+
+	// 5. Start jitter buffer for incoming audio
+	if err := c.jitterBuffer.Start(); err != nil {
+		c.callState.Status = CallStatusFailed
+		return fmt.Errorf("failed to start jitter buffer: %w", err)
+	}
+
+	// 6. Start streaming microphone audio to WebSocket
+	go c.streamMicrophoneAudio()
+
+	// 7. Start jitter buffer audio processing
+	go c.processJitterBufferAudio()
+
+	c.callState.Status = CallStatusConnected
+
+	// Emit call started event
+	c.callEvents <- CallEvent{
+		Type:      "call_started",
+		Data:      c.callState,
+		Timestamp: time.Now(),
+	}
+
+	return nil
+}
+
+// WebSocketCallRequest represents the request structure for /call endpoint with WebSocket transport
+type WebSocketCallRequest struct {
+	AssistantID string `json:"assistantId"`
+	Transport   struct {
+		Provider    string `json:"provider"`
+		AudioFormat struct {
+			Format     string `json:"format"`
+			Container  string `json:"container"`
+			SampleRate int    `json:"sampleRate"`
+		} `json:"audioFormat"`
+	} `json:"transport"`
+}
+
+// WebSocketCallResponse represents the response from /call endpoint with WebSocket transport
+type WebSocketCallResponse struct {
+	ID          string `json:"id"`
+	Status      string `json:"status"`
+	AssistantID string `json:"assistantId"`
+	Transport   struct {
+		Provider         string `json:"provider"`
+		WebsocketCallURL string `json:"websocketCallUrl"` // The WebSocket URL for audio transport
+	} `json:"transport"`
+	CreatedAt time.Time `json:"createdAt"`
+}
+
+// Call represents a Vapi call for WebSocket transport
+type Call struct {
+	Id          string
+	AssistantID string
+	Status      string
+	RoomURL     string
+	RoomName    string
+	JoinToken   string
+	ListenURL   string // Vapi WebSocket for monitoring
+	ControlURL  string // Vapi control URL
+}
+
+// createVapiWebSocketCall creates a WebSocket call via Vapi's /call endpoint with WebSocket transport
+func (c *VoiceClient) createVapiWebSocketCall(assistantID string) (*Call, error) {
+	// Prepare the request payload for WebSocket transport
+	payload := WebSocketCallRequest{
+		AssistantID: assistantID,
+		Transport: struct {
+			Provider    string `json:"provider"`
+			AudioFormat struct {
+				Format     string `json:"format"`
+				Container  string `json:"container"`
+				SampleRate int    `json:"sampleRate"`
+			} `json:"audioFormat"`
+		}{
+			Provider: "vapi.websocket",
+			AudioFormat: struct {
+				Format     string `json:"format"`
+				Container  string `json:"container"`
+				SampleRate int    `json:"sampleRate"`
+			}{
+				Format:     "pcm_s16le",
+				Container:  "raw",
+				SampleRate: 16000, // Request 16kHz from Vapi (their default)
+			},
+		},
+	}
+
+	// Marshal the request payload
+	jsonPayload, err := json.Marshal(payload)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal WebSocket call request: %w", err)
+	}
+
+	// Get the API base URL from config
+	baseURL := c.config.getAPIBaseURL()
+	url := baseURL + "/call"
+
+	// Use private API key for call creation
+	privateKey := c.config.getPrivateAPIKey()
+
+	// Create HTTP request
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonPayload))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create WebSocket call request: %w", err)
+	}
+
+	// Set headers
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", "Bearer "+privateKey)
+
+	// Log the API request
+	requestLog := APIRequest{
+		Method:    "POST",
+		URL:       url,
+		Headers:   map[string]string{"Authorization": "Bearer " + privateKey[:10] + "...", "Content-Type": "application/json"},
+		Body:      payload,
+		Timestamp: time.Now(),
+	}
+	select {
+	case c.requestLog <- requestLog:
+	default:
+		// Channel full, drop log
+	}
+
+	// Make the HTTP request
+	startTime := time.Now()
+	client := &http.Client{Timeout: 30 * time.Second}
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create WebSocket call: %w", err)
+	}
+	defer resp.Body.Close() //nolint:errcheck // Error handling would complicate deferred cleanup
+
+	// Check response status
+	if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated {
+		// Try to read error response body for more details
+		var errorBody map[string]interface{}
+		if err := json.NewDecoder(resp.Body).Decode(&errorBody); err == nil {
+			// Log error response
+			responseLog := APIResponse{
+				StatusCode: resp.StatusCode,
+				Headers:    make(map[string]string),
+				Body:       errorBody,
+				Duration:   time.Since(startTime),
+				Timestamp:  time.Now(),
+			}
+			select {
+			case c.responseLog <- responseLog:
+			default:
+				// Channel full, drop log
+			}
+			return nil, fmt.Errorf("WebSocket call creation failed with status %d: %v", resp.StatusCode, errorBody)
+		}
+		return nil, fmt.Errorf("WebSocket call creation failed with status: %d", resp.StatusCode)
+	}
+
+	// Read raw response to see the actual structure
+	bodyBytes, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read response body: %w", err)
+	}
+	if err := resp.Body.Close(); err != nil {
+		fmt.Printf("Failed to close response body: %v\n", err)
+	}
+
+	// Log successful response
+	var responseBody map[string]interface{}
+	if err := json.Unmarshal(bodyBytes, &responseBody); err != nil {
+		fmt.Printf("Failed to unmarshal response body: %v\n", err)
+	}
+
+	responseLog := APIResponse{
+		StatusCode: resp.StatusCode,
+		Headers:    make(map[string]string),
+		Body:       responseBody,
+		Duration:   time.Since(startTime),
+		Timestamp:  time.Now(),
+	}
+	select {
+	case c.responseLog <- responseLog:
+	default:
+		// Channel full, drop log
+	}
+
+	// Parse the response
+	var wsCallResp WebSocketCallResponse
+	if err := json.Unmarshal(bodyBytes, &wsCallResp); err != nil {
+		return nil, fmt.Errorf("failed to decode WebSocket call response: %w", err)
+	}
+
+	// Convert to our internal Call structure
+	call := &Call{
+		Id:          wsCallResp.ID,
+		AssistantID: wsCallResp.AssistantID,
+		Status:      wsCallResp.Status,
+		RoomURL:     wsCallResp.Transport.WebsocketCallURL, // Use WebSocket URL as room URL
+		RoomName:    wsCallResp.ID,                         // Use call ID as room name
+		JoinToken:   "",                                    // No token needed for WebSocket transport
+		ListenURL:   wsCallResp.Transport.WebsocketCallURL, // WebSocket URL for transport
+		ControlURL:  "",                                    // No separate control URL for WebSocket transport
+	}
+
+	return call, nil
+}
+
+// endVapiCall sends a DELETE request to Vapi to properly end the call
+func (c *VoiceClient) endVapiCall(callID string) error {
+	// Get the API base URL from config
+	baseURL := c.config.getAPIBaseURL()
+	url := baseURL + "/call/" + callID
+
+	// Use private API key for call termination
+	privateKey := c.config.getPrivateAPIKey()
+
+	// Create DELETE request
+	req, err := http.NewRequest("DELETE", url, http.NoBody)
+	if err != nil {
+		return fmt.Errorf("failed to create end call request: %w", err)
+	}
+
+	// Set headers
+	req.Header.Set("Authorization", "Bearer "+privateKey)
+
+	// Log the API request
+	requestLog := APIRequest{
+		Method:    "DELETE",
+		URL:       url,
+		Headers:   map[string]string{"Authorization": "Bearer " + privateKey[:10] + "..."},
+		Body:      nil,
+		Timestamp: time.Now(),
+	}
+	select {
+	case c.requestLog <- requestLog:
+	default:
+		// Channel full, drop log
+	}
+
+	// Make the HTTP request
+	startTime := time.Now()
+	client := &http.Client{Timeout: 10 * time.Second}
+	resp, err := client.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to send end call request: %w", err)
+	}
+	defer resp.Body.Close() //nolint:errcheck // Error handling would complicate deferred cleanup
+
+	// Log response
+	responseLog := APIResponse{
+		StatusCode: resp.StatusCode,
+		Headers:    make(map[string]string),
+		Body:       nil,
+		Duration:   time.Since(startTime),
+		Timestamp:  time.Now(),
+	}
+	select {
+	case c.responseLog <- responseLog:
+	default:
+		// Channel full, drop log
+	}
+
+	// Check response status
+	if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusNoContent {
+		return fmt.Errorf("end call request failed with status: %d", resp.StatusCode)
+	}
+
+	return nil
+}
+
+// EndCall terminates the current voice call
+func (c *VoiceClient) EndCall() error {
+	if c.callState.Status == CallStatusIdle {
+		return fmt.Errorf("no active call to end")
+	}
+
+	// Send DELETE request to Vapi to properly end the call
+	if c.callState.CallID != "" {
+		if err := c.endVapiCall(c.callState.CallID); err != nil {
+			fmt.Printf("Warning: failed to end Vapi call: %v\n", err)
+			// Continue with local cleanup even if API call fails
+		}
+	}
+
+	// Stop jitter buffer
+	if c.jitterBuffer != nil {
+		if err := c.jitterBuffer.Stop(); err != nil {
+			fmt.Printf("Warning: failed to stop jitter buffer: %v\n", err)
+		}
+	}
+
+	// Stop audio stream
+	if c.audioStream != nil {
+		if err := c.audioStream.Stop(); err != nil {
+			fmt.Printf("Warning: failed to stop audio stream: %v\n", err)
+		}
+	}
+
+	// Close signaling connection
+	if c.signaling != nil {
+		if err := c.signaling.Close(); err != nil {
+			fmt.Printf("Warning: failed to close signaling: %v\n", err)
+		}
+	}
+
+	// Reset call state
+	c.callState.Status = CallStatusIdle
+	c.callState.WebSocketURL = ""
+
+	// Emit call ended event
+	c.callEvents <- CallEvent{
+		Type:      "call_ended",
+		Data:      c.callState,
+		Timestamp: time.Now(),
+	}
+
+	return nil
+}
+
+// GetCallState returns the current call state
+func (c *VoiceClient) GetCallState() *CallState {
+	return c.callState
+}
+
+// GetRequestLog returns the API request log channel
+func (c *VoiceClient) GetRequestLog() <-chan APIRequest {
+	return c.requestLog
+}
+
+// GetResponseLog returns the API response log channel
+func (c *VoiceClient) GetResponseLog() <-chan APIResponse {
+	return c.responseLog
+}
+
+// GetCallEvents returns the call events channel
+func (c *VoiceClient) GetCallEvents() <-chan CallEvent {
+	return c.callEvents
+}
+
+// GetAudioLevels returns current input and output audio levels
+func (c *VoiceClient) GetAudioLevels() (input, output float32) {
+	if c.audioStream == nil {
+		return 0.0, 0.0
+	}
+
+	return c.audioStream.GetInputLevel(), c.audioStream.GetOutputLevel()
+}
+
+// IsAudioRunning returns true if audio stream is active
+func (c *VoiceClient) IsAudioRunning() bool {
+	if c.audioStream == nil {
+		return false
+	}
+
+	return c.audioStream.IsRunning()
+}
+
+// ResetAudioProcessor resets the audio processor's internal state
+func (c *VoiceClient) ResetAudioProcessor() {
+	if c.audioProcessor != nil {
+		c.audioProcessor.Reset()
+	}
+}
+
+// SetNoiseGateThreshold adjusts the noise gate sensitivity
+func (c *VoiceClient) SetNoiseGateThreshold(threshold float32) {
+	if c.audioProcessor != nil {
+		c.audioProcessor.SetNoiseGateThreshold(threshold)
+	}
+}
+
+// SetEchoLearningRate adjusts the echo cancellation learning rate
+func (c *VoiceClient) SetEchoLearningRate(rate float32) {
+	if c.audioProcessor != nil {
+		c.audioProcessor.SetLearningRate(rate)
+	}
+}
+
+// SetSilenceThreshold adjusts the silence detection threshold
+func (c *VoiceClient) SetSilenceThreshold(threshold float32) {
+	c.silenceThreshold = threshold
+}
+
+// SetMaxSilentChunks adjusts how many consecutive silent chunks to allow before gating
+func (c *VoiceClient) SetMaxSilentChunks(maxChunks int) {
+	c.maxSilentChunks = maxChunks
+}
+
+// GetSilenceStats returns current silence detection statistics
+func (c *VoiceClient) GetSilenceStats() (threshold float32, maxChunks, consecutive int) {
+	return c.silenceThreshold, c.maxSilentChunks, c.consecutiveSilentChunks
+}
+
+// GetJitterBufferStats returns current jitter buffer performance statistics
+func (c *VoiceClient) GetJitterBufferStats() map[string]interface{} {
+	if c.jitterBuffer == nil {
+		return map[string]interface{}{"error": "jitter buffer not initialized"}
+	}
+	return c.jitterBuffer.GetStats()
+}
+
+// handleSignalingEvents processes events from Vapi WebSocket signaling
+func (c *VoiceClient) handleSignalingEvents() {
+	for event := range c.signaling.GetEvents() {
+		// Skip noisy audio_data events from being logged
+		if event.Type == "audio_data" {
+			// Handle audio data directly without forwarding as call event
+			if samples, ok := event.Data.([]float32); ok {
+
+				// Store speaker samples for echo cancellation
+				c.lastSpeakerSamples = samples
+
+				// Send samples to jitter buffer for adaptive buffering
+				if err := c.jitterBuffer.WriteAudio(samples); err != nil {
+					fmt.Printf("⚠️  Jitter buffer write failed: %v\n", err)
+				}
+			}
+			continue
+		}
+
+		// Skip excessive logging events
+		if event.Type == "model-output" || event.Type == "voice-input" {
+			continue
+		}
+
+		// Forward other signaling events as call events (for logging)
+		callEvent := CallEvent{
+			Type:      "signaling_" + event.Type,
+			Data:      event.Data,
+			Timestamp: event.Timestamp,
+		}
+
+		select {
+		case c.callEvents <- callEvent:
+		default:
+			// Channel full, drop event
+		}
+
+		// Handle specific signaling events
+		switch event.Type {
+		case "room_joined":
+			c.callEvents <- CallEvent{
+				Type:      "room_connected",
+				Data:      "Successfully connected to Vapi WebSocket transport",
+				Timestamp: time.Now(),
+			}
+
+		case "participant_joined":
+			c.callEvents <- CallEvent{
+				Type:      "participant_joined",
+				Data:      event.Data,
+				Timestamp: time.Now(),
+			}
+
+		case "speech-update":
+			// Handle speech status updates
+			c.callEvents <- CallEvent{
+				Type:      "speech_update",
+				Data:      event.Data,
+				Timestamp: time.Now(),
+			}
+
+		case "transcript":
+			// Handle transcript events
+			c.callEvents <- CallEvent{
+				Type:      "transcript",
+				Data:      event.Data,
+				Timestamp: time.Now(),
+			}
+
+		case "webrtc_error", "daily_error", "websocket_error":
+			c.callEvents <- CallEvent{
+				Type:      "connection_error",
+				Data:      event.Data,
+				Timestamp: time.Now(),
+			}
+		}
+	}
+}
+
+// streamMicrophoneAudio continuously streams audio from microphone to Vapi WebSocket
+func (c *VoiceClient) streamMicrophoneAudio() {
+	// Buffer for audio samples
+	// AudioStream uses 48kHz, but Vapi expects 16kHz
+	const audioStreamSampleRate = 48000
+	const vapiSampleRate = 16000
+	const chunkDurationMs = 20
+	const audioStreamSamplesPerChunk = (audioStreamSampleRate * chunkDurationMs) / 1000 // 960 samples at 48kHz
+	const vapiSamplesPerChunk = (vapiSampleRate * chunkDurationMs) / 1000               // 320 samples at 16kHz
+
+	audioBuffer := make([]float32, vapiSamplesPerChunk)
+	chunkCount := 0
+
+	for c.callState.Status == CallStatusConnected || c.callState.Status == CallStatusConnecting {
+		chunkCount++
+		// Read audio from microphone
+		if c.audioStream.IsRunning() {
+			// Get audio samples from input stream at 48kHz
+			inputSamples := c.audioStream.ReadAudio(audioStreamSamplesPerChunk)
+			if len(inputSamples) > 0 {
+				// Downsample from 48kHz to 16kHz (take every 3rd sample)
+				for i := 0; i < vapiSamplesPerChunk && i*3 < len(inputSamples); i++ {
+					audioBuffer[i] = inputSamples[i*3]
+				}
+
+				// Apply audio processing (echo cancellation and noise reduction)
+				processedAudio := c.audioProcessor.ProcessAudio(audioBuffer, c.lastSpeakerSamples)
+
+				// Send processed audio to Vapi WebSocket
+				if c.signaling != nil && c.signaling.IsConnected() {
+					if err := c.signaling.SendAudioData(processedAudio); err != nil {
+						fmt.Printf("Failed to send audio data: %v\n", err)
+					}
+				}
+			}
+		}
+
+		// Sleep for chunk duration (20ms)
+		time.Sleep(time.Duration(chunkDurationMs) * time.Millisecond)
+	}
+}
+
+// processJitterBufferAudio continuously reads from jitter buffer and writes to audio stream
+func (c *VoiceClient) processJitterBufferAudio() {
+	const chunkDurationMs = 20
+	const vapiSampleRate = 16000
+	const vapiSamplesPerChunk = (vapiSampleRate * chunkDurationMs) / 1000 // 320 samples at 16kHz
+
+	ticker := time.NewTicker(time.Duration(chunkDurationMs) * time.Millisecond)
+	defer ticker.Stop()
+
+	chunkCount := 0
+
+	for c.callState.Status == CallStatusConnected || c.callState.Status == CallStatusConnecting {
+		select {
+		case <-ticker.C:
+			chunkCount++
+
+			// Read processed audio from jitter buffer (16kHz)
+			jitterSamples := c.jitterBuffer.ReadAudio(vapiSamplesPerChunk)
+
+			if len(jitterSamples) > 0 {
+				// Upsample from 16kHz to 48kHz using proper interpolation
+				upsampled := c.upsample16to48kHz(jitterSamples)
+
+				// Write to audio stream
+				written := c.audioStream.WriteAudio(upsampled)
+				if written != len(upsampled) {
+					fmt.Printf("⚠️  Audio buffer overflow: Tried to write %d samples, only wrote %d\n",
+						len(upsampled), written)
+				}
+			}
+
+		case <-time.After(100 * time.Millisecond):
+			// Timeout protection - continue if call is still active
+			if c.callState.Status != CallStatusConnected && c.callState.Status != CallStatusConnecting {
+				return
+			}
+		}
+	}
+}
+
+// upsample16to48kHz performs proper interpolation from 16kHz to 48kHz
+func (c *VoiceClient) upsample16to48kHz(samples []float32) []float32 {
+	// 3x upsampling with linear interpolation (better than simple repetition)
+	upsampled := make([]float32, len(samples)*3)
+
+	for i := 0; i < len(samples); i++ {
+		// Current sample
+		current := samples[i]
+
+		// Next sample (or repeat last if at end)
+		var next float32
+		if i+1 < len(samples) {
+			next = samples[i+1]
+		} else {
+			next = current
+		}
+
+		// Linear interpolation
+		upsampled[i*3] = current
+		upsampled[i*3+1] = current + (next-current)*0.33
+		upsampled[i*3+2] = current + (next-current)*0.67
+	}
+
+	return upsampled
+}
diff --git a/pkg/voice/config.go b/pkg/voice/config.go
new file mode 100644
index 0000000..cc64576
--- /dev/null
+++ b/pkg/voice/config.go
@@ -0,0 +1,69 @@
+package voice
+
+import (
+	"time"
+)
+
+// WebRTCConfig holds configuration for WebRTC functionality
+type WebRTCConfig struct {
+	// Vapi API Configuration
+	VapiAPIKey       string `mapstructure:"vapi_api_key"`        // Private API key
+	VapiPublicAPIKey string `mapstructure:"vapi_public_api_key"` // Public API key for /call/web
+	VapiBaseURL      string `mapstructure:"vapi_base_url"`
+
+	// Daily.co Configuration (legacy - now handled by Vapi)
+	DailyAPIKey string `mapstructure:"daily_api_key"`
+	DailyDomain string `mapstructure:"daily_domain"`
+
+	// WebRTC Configuration
+	STUNServers []string `mapstructure:"stun_servers"`
+	TURNServers []string `mapstructure:"turn_servers"`
+
+	// Audio Configuration
+	AudioInputDevice  string `mapstructure:"audio_input_device"`
+	AudioOutputDevice string `mapstructure:"audio_output_device"`
+	SampleRate        int    `mapstructure:"sample_rate"`
+	BufferSize        int    `mapstructure:"buffer_size"`
+
+	// Call Configuration
+	CallTimeout  time.Duration `mapstructure:"call_timeout"`
+	VideoEnabled bool          `mapstructure:"video_enabled"`
+
+	// Debug Configuration
+	AudioDebug bool `mapstructure:"audio_debug"`
+}
+
+// DefaultWebRTCConfig returns default WebRTC configuration
+func DefaultWebRTCConfig() *WebRTCConfig {
+	return &WebRTCConfig{
+		// Default Vapi API configuration
+		VapiBaseURL: "https://api.vapi.ai",
+
+		// Default to Vapi's Daily.co subdomain for WebRTC calls (legacy)
+		DailyDomain: "vapi",
+		STUNServers: []string{
+			"stun:stun.l.google.com:19302",
+			"stun:stun1.l.google.com:19302",
+		},
+		AudioInputDevice:  "default",
+		AudioOutputDevice: "default",
+		SampleRate:        48000,
+		BufferSize:        480,
+		CallTimeout:       30 * time.Minute,
+		VideoEnabled:      false, // Audio-only by default
+		AudioDebug:        false,
+	}
+}
+
+// getPrivateAPIKey returns the Vapi private API key for /call endpoint
+func (c *WebRTCConfig) getPrivateAPIKey() string {
+	return c.VapiAPIKey
+}
+
+// getAPIBaseURL returns the Vapi API base URL
+func (c *WebRTCConfig) getAPIBaseURL() string {
+	if c.VapiBaseURL == "" {
+		return "https://api.vapi.ai"
+	}
+	return c.VapiBaseURL
+}
diff --git a/pkg/voice/debug.go b/pkg/voice/debug.go
new file mode 100644
index 0000000..651a600
--- /dev/null
+++ b/pkg/voice/debug.go
@@ -0,0 +1,403 @@
+package voice
+
+import (
+	"encoding/binary"
+	"fmt"
+	"os"
+	"sync"
+	"time"
+)
+
+// AudioDebugger handles audio debugging and recording
+type AudioDebugger struct {
+	enabled       bool
+	inputFile     *os.File
+	outputFile    *os.File
+	inputMutex    sync.Mutex
+	outputMutex   sync.Mutex
+	sampleRate    int
+	channels      int
+	bitsPerSample int
+
+	// Timing and flow tracking
+	lastInputTime     time.Time
+	lastOutputTime    time.Time
+	outputSampleCount int64
+	silentChunks      int
+	totalChunks       int
+}
+
+// NewAudioDebugger creates a new audio debugger
+func NewAudioDebugger(enabled bool) *AudioDebugger {
+	return &AudioDebugger{
+		enabled:       enabled,
+		sampleRate:    48000, // Match your audio pipeline
+		channels:      1,     // Mono
+		bitsPerSample: 16,
+	}
+}
+
+// Start initializes debug recording files
+func (d *AudioDebugger) Start() error {
+	if !d.enabled {
+		return nil
+	}
+
+	timestamp := time.Now().Format("20060102-150405")
+
+	// Create input debug file
+	inputPath := fmt.Sprintf("audio_debug_input_%s.wav", timestamp)
+	// #nosec G304 -- This is intentional file creation for debugging
+	inputFile, err := os.Create(inputPath)
+	if err != nil {
+		return fmt.Errorf("failed to create input debug file: %w", err)
+	}
+	d.inputFile = inputFile
+
+	// Create output debug file
+	outputPath := fmt.Sprintf("audio_debug_output_%s.wav", timestamp)
+	// #nosec G304 -- This is intentional file creation for debugging
+	outputFile, err := os.Create(outputPath)
+	if err != nil {
+		if err := inputFile.Close(); err != nil {
+			fmt.Printf("Failed to close input file: %v\n", err)
+		}
+		return fmt.Errorf("failed to create output debug file: %w", err)
+	}
+	d.outputFile = outputFile
+
+	// Write WAV headers (we'll update the size later)
+	if err := d.writeWAVHeader(d.inputFile); err != nil {
+		return fmt.Errorf("failed to write input WAV header: %w", err)
+	}
+	if err := d.writeWAVHeader(d.outputFile); err != nil {
+		return fmt.Errorf("failed to write output WAV header: %w", err)
+	}
+
+	fmt.Printf("📝 Audio debugging enabled:\n")
+	fmt.Printf("   Input:  %s\n", inputPath)
+	fmt.Printf("   Output: %s\n", outputPath)
+
+	return nil
+}
+
+// WriteInput writes input audio samples to debug file
+func (d *AudioDebugger) WriteInput(samples []float32) {
+	if !d.enabled || d.inputFile == nil {
+		return
+	}
+
+	d.inputMutex.Lock()
+	defer d.inputMutex.Unlock()
+
+	// Convert float32 to int16 and write
+	for _, sample := range samples {
+		// Check for clipping in float domain
+		if sample > 1.0 || sample < -1.0 {
+			fmt.Printf("⚠️  Input clipping detected: %.3f\n", sample)
+		}
+
+		// Clamp to prevent overflow
+		if sample > 1.0 {
+			sample = 1.0
+		} else if sample < -1.0 {
+			sample = -1.0
+		}
+
+		// Convert to int16
+		int16Sample := int16(sample * 32767.0)
+		if err := binary.Write(d.inputFile, binary.LittleEndian, int16Sample); err != nil {
+			fmt.Printf("Failed to write input sample: %v\n", err)
+		}
+	}
+}
+
+// WriteOutput writes output audio samples to debug file
+func (d *AudioDebugger) WriteOutput(samples []float32) {
+	if !d.enabled || d.outputFile == nil {
+		return
+	}
+
+	d.outputMutex.Lock()
+	defer d.outputMutex.Unlock()
+
+	// Track timing and detect gaps
+	now := time.Now()
+	if !d.lastOutputTime.IsZero() {
+		timeSinceLastOutput := now.Sub(d.lastOutputTime)
+		expectedInterval := time.Duration(float64(len(samples)) / float64(d.sampleRate) * float64(time.Second))
+
+		// Detect significant gaps (more than 2x expected interval)
+		if timeSinceLastOutput > expectedInterval*2 {
+			fmt.Printf("🔇 OUTPUT GAP DETECTED: Expected %.2fms, got %.2fms (gap: %.2fms)\n",
+				float64(expectedInterval.Nanoseconds())/1e6,
+				float64(timeSinceLastOutput.Nanoseconds())/1e6,
+				float64((timeSinceLastOutput-expectedInterval).Nanoseconds())/1e6)
+		}
+	}
+	d.lastOutputTime = now
+	d.outputSampleCount += int64(len(samples))
+
+	// Check if this chunk is mostly silent
+	var silentSamples int
+	for _, sample := range samples {
+		if sample > -0.001 && sample < 0.001 { // Very quiet threshold
+			silentSamples++
+		}
+	}
+
+	d.totalChunks++
+	if float64(silentSamples)/float64(len(samples)) > 0.95 {
+		d.silentChunks++
+		if d.totalChunks%50 == 0 { // Log every 50 chunks
+			fmt.Printf("🔇 Output silence rate: %d/%d chunks (%.1f%%) - Current chunk: %d/%d silent\n",
+				d.silentChunks, d.totalChunks,
+				float64(d.silentChunks)/float64(d.totalChunks)*100,
+				silentSamples, len(samples))
+		}
+	}
+
+	// Convert float32 to int16 and write
+	for _, sample := range samples {
+		// Check for clipping in float domain
+		if sample > 1.0 || sample < -1.0 {
+			fmt.Printf("⚠️  Output clipping detected: %.3f\n", sample)
+		}
+
+		// Clamp to prevent overflow
+		if sample > 1.0 {
+			sample = 1.0
+		} else if sample < -1.0 {
+			sample = -1.0
+		}
+
+		// Convert to int16
+		int16Sample := int16(sample * 32767.0)
+		if err := binary.Write(d.outputFile, binary.LittleEndian, int16Sample); err != nil {
+			fmt.Printf("Failed to write output sample: %v\n", err)
+		}
+	}
+}
+
+// LogAudioStats logs statistics about audio samples
+func (d *AudioDebugger) LogAudioStats(samples []float32, source string) {
+	if !d.enabled || len(samples) == 0 {
+		return
+	}
+
+	// Calculate RMS
+	var sum float64
+	var peak float32
+	var clippedCount int
+
+	for _, sample := range samples {
+		sum += float64(sample * sample)
+
+		absSample := sample
+		if absSample < 0 {
+			absSample = -absSample
+		}
+
+		if absSample > peak {
+			peak = absSample
+		}
+
+		if sample > 1.0 || sample < -1.0 {
+			clippedCount++
+		}
+	}
+
+	rms := float32(sum / float64(len(samples)))
+
+	if clippedCount > 0 || peak > 0.95 {
+		fmt.Printf("🔊 %s Audio Stats: RMS=%.3f, Peak=%.3f, Clipped=%d/%d\n",
+			source, rms, peak, clippedCount, len(samples))
+	}
+}
+
+// Stop closes debug files and updates WAV headers
+func (d *AudioDebugger) Stop() error {
+	if !d.enabled {
+		return nil
+	}
+
+	var errs []error
+
+	if d.inputFile != nil {
+		d.inputMutex.Lock()
+		if err := d.updateWAVHeader(d.inputFile); err != nil {
+			errs = append(errs, fmt.Errorf("failed to update input WAV header: %w", err))
+		}
+		if err := d.inputFile.Close(); err != nil {
+			errs = append(errs, fmt.Errorf("failed to close input file: %w", err))
+		}
+		d.inputMutex.Unlock()
+	}
+
+	if d.outputFile != nil {
+		d.outputMutex.Lock()
+		if err := d.updateWAVHeader(d.outputFile); err != nil {
+			errs = append(errs, fmt.Errorf("failed to update output WAV header: %w", err))
+		}
+		if err := d.outputFile.Close(); err != nil {
+			errs = append(errs, fmt.Errorf("failed to close output file: %w", err))
+		}
+		d.outputMutex.Unlock()
+	}
+
+	if len(errs) > 0 {
+		return fmt.Errorf("errors during stop: %v", errs)
+	}
+
+	fmt.Println("📝 Audio debug files saved")
+	return nil
+}
+
+// writeWAVHeader writes a WAV file header
+func (d *AudioDebugger) writeWAVHeader(file *os.File) error {
+	// WAV header structure
+	header := []byte{
+		'R', 'I', 'F', 'F', // ChunkID
+		0, 0, 0, 0, // ChunkSize (to be filled later)
+		'W', 'A', 'V', 'E', // Format
+		'f', 'm', 't', ' ', // Subchunk1ID
+		16, 0, 0, 0, // Subchunk1Size (16 for PCM)
+		1, 0, // AudioFormat (1 = PCM)
+		byte(d.channels), byte(d.channels >> 8), // NumChannels
+		byte(d.sampleRate), byte(d.sampleRate >> 8), byte(d.sampleRate >> 16), byte(d.sampleRate >> 24), // SampleRate
+		0, 0, 0, 0, // ByteRate (to be calculated)
+		0, 0, // BlockAlign (to be calculated)
+		byte(d.bitsPerSample), byte(d.bitsPerSample >> 8), // BitsPerSample
+		'd', 'a', 't', 'a', // Subchunk2ID
+		0, 0, 0, 0, // Subchunk2Size (to be filled later)
+	}
+
+	// Calculate ByteRate and BlockAlign
+	blockAlign := d.channels * d.bitsPerSample / 8
+	byteRate := d.sampleRate * blockAlign
+
+	// Update ByteRate
+	// #nosec G115 -- byteRate is calculated from safe constants
+	binary.LittleEndian.PutUint32(header[28:32], uint32(byteRate))
+	// Update BlockAlign
+	// #nosec G115 -- blockAlign is calculated from safe constants
+	binary.LittleEndian.PutUint16(header[32:34], uint16(blockAlign))
+
+	_, err := file.Write(header)
+	return err
+}
+
+// updateWAVHeader updates the WAV header with the correct file size
+func (d *AudioDebugger) updateWAVHeader(file *os.File) error {
+	// Get file size
+	fileInfo, err := file.Stat()
+	if err != nil {
+		return err
+	}
+
+	fileSize := fileInfo.Size()
+
+	// Update ChunkSize (file size - 8)
+	if _, err := file.Seek(4, 0); err != nil {
+		return fmt.Errorf("failed to seek to chunk size position: %w", err)
+	}
+	// #nosec G115 -- fileSize is from file stat, safe for WAV header
+	if err := binary.Write(file, binary.LittleEndian, uint32(fileSize-8)); err != nil {
+		return fmt.Errorf("failed to write chunk size: %w", err)
+	}
+
+	// Update Subchunk2Size (file size - 44)
+	if _, err := file.Seek(40, 0); err != nil {
+		return fmt.Errorf("failed to seek to subchunk size position: %w", err)
+	}
+	// #nosec G115 -- fileSize is from file stat, safe for WAV header
+	if err := binary.Write(file, binary.LittleEndian, uint32(fileSize-44)); err != nil {
+		return fmt.Errorf("failed to write subchunk size: %w", err)
+	}
+
+	return nil
+}
+
+// LogWebSocketAudio logs detailed information about incoming WebSocket audio
+func (d *AudioDebugger) LogWebSocketAudio(samples []float32, timestamp time.Time) {
+	if !d.enabled || len(samples) == 0 {
+		return
+	}
+
+	// Check for timing gaps in WebSocket audio
+	if !d.lastInputTime.IsZero() {
+		timeSinceLastWS := timestamp.Sub(d.lastInputTime)
+		expectedInterval := time.Duration(float64(len(samples)) / 16000.0 * float64(time.Second)) // 16kHz from Vapi
+
+		if timeSinceLastWS > expectedInterval*3 {
+			fmt.Printf("🌐 WEBSOCKET AUDIO GAP: Expected %.2fms, got %.2fms (gap: %.2fms)\n",
+				float64(expectedInterval.Nanoseconds())/1e6,
+				float64(timeSinceLastWS.Nanoseconds())/1e6,
+				float64((timeSinceLastWS-expectedInterval).Nanoseconds())/1e6)
+		}
+	}
+	d.lastInputTime = timestamp
+
+	// Analyze audio content
+	var silentSamples, clippedSamples int
+	var peak, rms float32
+	for _, sample := range samples {
+		if sample > -0.001 && sample < 0.001 {
+			silentSamples++
+		}
+		if sample > 1.0 || sample < -1.0 {
+			clippedSamples++
+		}
+
+		absSample := sample
+		if absSample < 0 {
+			absSample = -absSample
+		}
+		if absSample > peak {
+			peak = absSample
+		}
+		rms += sample * sample
+	}
+	rms /= float32(len(samples))
+
+	silenceRate := float64(silentSamples) / float64(len(samples))
+
+	// Log if significant silence or other issues
+	if silenceRate > 0.9 || clippedSamples > 0 || peak > 0.95 {
+		fmt.Printf("🌐 WebSocket Audio: %d samples, %.1f%% silent, peak=%.3f, rms=%.3f, clipped=%d\n",
+			len(samples), silenceRate*100, peak, rms, clippedSamples)
+	}
+}
+
+// LogBufferState logs the current state of audio buffers
+func (d *AudioDebugger) LogBufferState(inputAvailable, outputAvailable, inputSize, outputSize int) {
+	if !d.enabled {
+		return
+	}
+
+	inputFill := float64(inputAvailable) / float64(inputSize) * 100
+	outputFill := float64(outputAvailable) / float64(outputSize) * 100
+
+	// Log if buffers are getting too full or too empty
+	if inputFill < 10 || inputFill > 90 || outputFill < 10 || outputFill > 90 {
+		fmt.Printf("📊 Buffer State: Input %.1f%% (%d/%d), Output %.1f%% (%d/%d)\n",
+			inputFill, inputAvailable, inputSize,
+			outputFill, outputAvailable, outputSize)
+	}
+
+	// Warn about potential underruns
+	if outputFill < 5 {
+		fmt.Printf("⚠️  OUTPUT BUFFER UNDERRUN RISK: Only %.1f%% filled (%d/%d samples)\n",
+			outputFill, outputAvailable, outputSize)
+	}
+}
+
+// LogAudioFlow provides a comprehensive view of the audio pipeline state
+func (d *AudioDebugger) LogAudioFlow(stage string, sampleCount int, timestamp time.Time) {
+	if !d.enabled {
+		return
+	}
+
+	fmt.Printf("🎵 Audio Flow [%s]: %d samples at %s\n",
+		stage, sampleCount, timestamp.Format("15:04:05.000"))
+}
diff --git a/pkg/voice/devices.go b/pkg/voice/devices.go
new file mode 100644
index 0000000..f2f10a2
--- /dev/null
+++ b/pkg/voice/devices.go
@@ -0,0 +1,279 @@
+package voice
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/gordonklaus/portaudio"
+)
+
+// AudioDevice represents an audio input or output device
+type AudioDevice struct {
+	Index                   int
+	Name                    string
+	MaxInputChannels        int
+	MaxOutputChannels       int
+	DefaultSampleRate       float64
+	DefaultLowInputLatency  float64
+	DefaultLowOutputLatency float64
+	IsDefault               bool
+}
+
+// AudioDeviceManager manages audio device enumeration and selection
+type AudioDeviceManager struct {
+	inputDevices  []AudioDevice
+	outputDevices []AudioDevice
+	initialized   bool
+}
+
+// NewAudioDeviceManager creates a new audio device manager
+func NewAudioDeviceManager() *AudioDeviceManager {
+	return &AudioDeviceManager{
+		inputDevices:  make([]AudioDevice, 0),
+		outputDevices: make([]AudioDevice, 0),
+		initialized:   false,
+	}
+}
+
+// Initialize initializes PortAudio and enumerates devices
+func (m *AudioDeviceManager) Initialize() error {
+	if m.initialized {
+		return nil
+	}
+
+	// Initialize PortAudio
+	if err := portaudio.Initialize(); err != nil {
+		return fmt.Errorf("failed to initialize PortAudio: %w", err)
+	}
+
+	// Enumerate devices
+	if err := m.enumerateDevices(); err != nil {
+		if termErr := portaudio.Terminate(); termErr != nil {
+			fmt.Printf("Failed to terminate portaudio: %v\n", termErr)
+		}
+		return fmt.Errorf("failed to enumerate audio devices: %w", err)
+	}
+
+	m.initialized = true
+	return nil
+}
+
+// Terminate terminates PortAudio
+func (m *AudioDeviceManager) Terminate() error {
+	if !m.initialized {
+		return nil
+	}
+
+	if err := portaudio.Terminate(); err != nil {
+		return fmt.Errorf("failed to terminate PortAudio: %w", err)
+	}
+
+	m.initialized = false
+	return nil
+}
+
+// enumerateDevices discovers all available audio devices
+func (m *AudioDeviceManager) enumerateDevices() error {
+	// Get default devices
+	defaultInput, err := portaudio.DefaultInputDevice()
+	if err != nil {
+		// Default input device might not be available
+		defaultInput = nil
+	}
+
+	defaultOutput, err := portaudio.DefaultOutputDevice()
+	if err != nil {
+		return fmt.Errorf("failed to get default output device: %w", err)
+	}
+
+	// Get all devices
+	devices, err := portaudio.Devices()
+	if err != nil {
+		return fmt.Errorf("failed to get audio devices: %w", err)
+	}
+
+	// Clear existing device lists
+	m.inputDevices = make([]AudioDevice, 0)
+	m.outputDevices = make([]AudioDevice, 0)
+
+	// Process each device
+	for i, device := range devices {
+		audioDevice := AudioDevice{
+			Index:                   i,
+			Name:                    device.Name,
+			MaxInputChannels:        device.MaxInputChannels,
+			MaxOutputChannels:       device.MaxOutputChannels,
+			DefaultSampleRate:       device.DefaultSampleRate,
+			DefaultLowInputLatency:  device.DefaultLowInputLatency.Seconds(),
+			DefaultLowOutputLatency: device.DefaultLowOutputLatency.Seconds(),
+			IsDefault:               false,
+		}
+
+		// Check if this is the default input device
+		if defaultInput != nil && device == defaultInput {
+			audioDevice.IsDefault = true
+		}
+
+		// Check if this is the default output device
+		if device == defaultOutput {
+			audioDevice.IsDefault = true
+		}
+
+		// Add to appropriate device list
+		if device.MaxInputChannels > 0 {
+			m.inputDevices = append(m.inputDevices, audioDevice)
+		}
+		if device.MaxOutputChannels > 0 {
+			m.outputDevices = append(m.outputDevices, audioDevice)
+		}
+	}
+
+	return nil
+}
+
+// GetInputDevices returns all available input devices
+func (m *AudioDeviceManager) GetInputDevices() ([]AudioDevice, error) {
+	if !m.initialized {
+		if err := m.Initialize(); err != nil {
+			return nil, err
+		}
+	}
+	return m.inputDevices, nil
+}
+
+// GetOutputDevices returns all available output devices
+func (m *AudioDeviceManager) GetOutputDevices() ([]AudioDevice, error) {
+	if !m.initialized {
+		if err := m.Initialize(); err != nil {
+			return nil, err
+		}
+	}
+	return m.outputDevices, nil
+}
+
+// GetDefaultInputDevice returns the default input device
+func (m *AudioDeviceManager) GetDefaultInputDevice() (*AudioDevice, error) {
+	devices, err := m.GetInputDevices()
+	if err != nil {
+		return nil, err
+	}
+
+	for _, device := range devices {
+		if device.IsDefault {
+			return &device, nil
+		}
+	}
+
+	// If no default found, return the first available input device
+	if len(devices) > 0 {
+		return &devices[0], nil
+	}
+
+	return nil, fmt.Errorf("no input devices available")
+}
+
+// GetDefaultOutputDevice returns the default output device
+func (m *AudioDeviceManager) GetDefaultOutputDevice() (*AudioDevice, error) {
+	devices, err := m.GetOutputDevices()
+	if err != nil {
+		return nil, err
+	}
+
+	for _, device := range devices {
+		if device.IsDefault {
+			return &device, nil
+		}
+	}
+
+	// If no default found, return the first available output device
+	if len(devices) > 0 {
+		return &devices[0], nil
+	}
+
+	return nil, fmt.Errorf("no output devices available")
+}
+
+// FindInputDeviceByName finds an input device by name (case-insensitive partial match)
+func (m *AudioDeviceManager) FindInputDeviceByName(name string) (*AudioDevice, error) {
+	devices, err := m.GetInputDevices()
+	if err != nil {
+		return nil, err
+	}
+
+	name = strings.ToLower(name)
+
+	// First try exact match
+	for _, device := range devices {
+		if strings.EqualFold(device.Name, name) {
+			return &device, nil
+		}
+	}
+
+	// Then try partial match
+	for _, device := range devices {
+		if strings.Contains(strings.ToLower(device.Name), name) {
+			return &device, nil
+		}
+	}
+
+	return nil, fmt.Errorf("input device not found: %s", name)
+}
+
+// FindOutputDeviceByName finds an output device by name (case-insensitive partial match)
+func (m *AudioDeviceManager) FindOutputDeviceByName(name string) (*AudioDevice, error) {
+	devices, err := m.GetOutputDevices()
+	if err != nil {
+		return nil, err
+	}
+
+	name = strings.ToLower(name)
+
+	// First try exact match
+	for _, device := range devices {
+		if strings.EqualFold(device.Name, name) {
+			return &device, nil
+		}
+	}
+
+	// Then try partial match
+	for _, device := range devices {
+		if strings.Contains(strings.ToLower(device.Name), name) {
+			return &device, nil
+		}
+	}
+
+	return nil, fmt.Errorf("output device not found: %s", name)
+}
+
+// ListDevices returns a formatted string listing all audio devices
+func (m *AudioDeviceManager) ListDevices() (string, error) {
+	if !m.initialized {
+		if err := m.Initialize(); err != nil {
+			return "", err
+		}
+	}
+
+	var result strings.Builder
+
+	result.WriteString("🎤 Input Devices:\n")
+	for _, device := range m.inputDevices {
+		defaultStr := ""
+		if device.IsDefault {
+			defaultStr = " (default)"
+		}
+		result.WriteString(fmt.Sprintf("  [%d] %s%s - %d channels, %.0f Hz\n",
+			device.Index, device.Name, defaultStr, device.MaxInputChannels, device.DefaultSampleRate))
+	}
+
+	result.WriteString("\n🔊 Output Devices:\n")
+	for _, device := range m.outputDevices {
+		defaultStr := ""
+		if device.IsDefault {
+			defaultStr = " (default)"
+		}
+		result.WriteString(fmt.Sprintf("  [%d] %s%s - %d channels, %.0f Hz\n",
+			device.Index, device.Name, defaultStr, device.MaxOutputChannels, device.DefaultSampleRate))
+	}
+
+	return result.String(), nil
+}
diff --git a/pkg/voice/processor.go b/pkg/voice/processor.go
new file mode 100644
index 0000000..286955f
--- /dev/null
+++ b/pkg/voice/processor.go
@@ -0,0 +1,275 @@
+package voice
+
+// SimpleAudioProcessor provides basic audio processing algorithms
+// This is a simplified version while we work on WebRTC integration
+type WebRTCAudioProcessor struct {
+	enabled    bool
+	sampleRate int
+	channels   int
+	frameSize  int
+	// Simple echo cancellation state
+	echoBuffer     []float32
+	adaptiveFilter []float32
+	filterLength   int
+	// Noise gate parameters
+	noiseGateThreshold float32
+	noiseGateRatio     float32
+	// AGC (Automatic Gain Control) state
+	targetLevel float32
+	currentGain float32
+	agcEnabled  bool
+}
+
+// NewWebRTCAudioProcessor creates a new audio processor with basic algorithms
+func NewWebRTCAudioProcessor(sampleRate, channels, frameSize int) (*WebRTCAudioProcessor, error) {
+	filterLength := 256 // Adaptive filter length for echo cancellation
+	processor := &WebRTCAudioProcessor{
+		enabled:            true,
+		sampleRate:         sampleRate,
+		channels:           channels,
+		frameSize:          frameSize,
+		echoBuffer:         make([]float32, filterLength),
+		adaptiveFilter:     make([]float32, filterLength),
+		filterLength:       filterLength,
+		noiseGateThreshold: 0.01, // -40dB
+		noiseGateRatio:     0.1,  // 10:1 ratio
+		targetLevel:        0.5,  // Target -6dB
+		currentGain:        1.0,
+		agcEnabled:         true,
+	}
+	return processor, nil
+}
+
+// ProcessMicrophoneAudio processes microphone input with basic audio processing
+func (p *WebRTCAudioProcessor) ProcessMicrophoneAudio(micInput, speakerOutput []float32) []float32 {
+	if !p.enabled || len(micInput) == 0 {
+		return micInput
+	}
+	// Make a copy to avoid modifying original
+	processed := make([]float32, len(micInput))
+	copy(processed, micInput)
+	// 1. Simple echo cancellation
+	if len(speakerOutput) > 0 {
+		processed = p.simpleEchoCancellation(processed, speakerOutput)
+	}
+	// 2. Noise gate
+	processed = p.noiseGate(processed)
+	// 3. Automatic Gain Control
+	if p.agcEnabled {
+		processed = p.automaticGainControl(processed)
+	}
+	return processed
+}
+
+// ProcessSpeakerAudio processes speaker output (can add additional processing if needed)
+func (p *WebRTCAudioProcessor) ProcessSpeakerAudio(input []float32) []float32 {
+	if !p.enabled {
+		return input
+	}
+	// For now, just pass through - could add gain control, EQ, etc.
+	return input
+}
+
+// SetEnabled enables or disables processing
+func (p *WebRTCAudioProcessor) SetEnabled(enabled bool) {
+	p.enabled = enabled
+}
+
+// Close cleans up the processor
+func (p *WebRTCAudioProcessor) Close() error {
+	// Reset buffers
+	p.echoBuffer = nil
+	p.adaptiveFilter = nil
+	return nil
+}
+
+// simpleEchoCancellation performs basic echo cancellation using adaptive filtering
+func (p *WebRTCAudioProcessor) simpleEchoCancellation(micInput, speakerOutput []float32) []float32 {
+	if len(speakerOutput) == 0 {
+		return micInput
+	}
+	processed := make([]float32, len(micInput))
+	for i, micSample := range micInput {
+		// Simple subtraction-based echo cancellation
+		// This is very basic - real echo cancellation is much more complex
+		var echo float32
+		if i < len(speakerOutput) {
+			// Apply a simple delay and attenuation
+			echo = speakerOutput[i] * 0.3 // 30% echo assumption
+		}
+		// Subtract estimated echo
+		processed[i] = micSample - echo
+		// Prevent over-cancellation
+		if processed[i] > 1.0 {
+			processed[i] = 1.0
+		} else if processed[i] < -1.0 {
+			processed[i] = -1.0
+		}
+	}
+	return processed
+}
+
+// noiseGate applies noise gating to reduce background noise
+func (p *WebRTCAudioProcessor) noiseGate(input []float32) []float32 {
+	processed := make([]float32, len(input))
+	for i, sample := range input {
+		amplitude := sample
+		if amplitude < 0 {
+			amplitude = -amplitude
+		}
+		if amplitude < p.noiseGateThreshold {
+			// Below threshold - apply ratio
+			processed[i] = sample * p.noiseGateRatio
+		} else {
+			// Above threshold - pass through
+			processed[i] = sample
+		}
+	}
+	return processed
+}
+
+// automaticGainControl maintains consistent audio levels
+func (p *WebRTCAudioProcessor) automaticGainControl(input []float32) []float32 {
+	if len(input) == 0 {
+		return input
+	}
+	// Calculate RMS of current frame
+	var sum float32
+	for _, sample := range input {
+		sum += sample * sample
+	}
+	rms := float32(0.0)
+	if len(input) > 0 {
+		rms = sum / float32(len(input))
+		if rms > 0 {
+			rms = float32(0.707) * rms // Approximate RMS
+		}
+	}
+	// Adjust gain towards target level
+	if rms > 0.001 { // Avoid division by zero
+		targetGain := p.targetLevel / rms
+		// Smooth gain changes to avoid artifacts
+		alpha := float32(0.1) // Smoothing factor
+		p.currentGain = alpha*targetGain + (1-alpha)*p.currentGain
+		// Limit gain to reasonable range
+		if p.currentGain > 10.0 {
+			p.currentGain = 10.0
+		} else if p.currentGain < 0.1 {
+			p.currentGain = 0.1
+		}
+	}
+	// Apply gain
+	processed := make([]float32, len(input))
+	for i, sample := range input {
+		processed[i] = sample * p.currentGain
+		// Prevent clipping
+		if processed[i] > 1.0 {
+			processed[i] = 1.0
+		} else if processed[i] < -1.0 {
+			processed[i] = -1.0
+		}
+	}
+	return processed
+}
+
+// GetStats returns processing statistics
+func (p *WebRTCAudioProcessor) GetStats() map[string]interface{} {
+	return map[string]interface{}{
+		"enabled":     p.enabled,
+		"sample_rate": p.sampleRate,
+		"channels":    p.channels,
+		"frame_size":  p.frameSize,
+	}
+}
+
+// WebRTCResampler provides high-quality resampling using WebRTC algorithms
+type WebRTCResampler struct {
+	inputRate  int
+	outputRate int
+	channels   int
+}
+
+// NewWebRTCResampler creates a new WebRTC-based resampler
+func NewWebRTCResampler(inputRate, outputRate, channels int) (*WebRTCResampler, error) {
+	return &WebRTCResampler{
+		inputRate:  inputRate,
+		outputRate: outputRate,
+		channels:   channels,
+	}, nil
+}
+
+// Resample performs high-quality resampling using WebRTC algorithms
+func (r *WebRTCResampler) Resample(input []float32) ([]float32, error) {
+	if r.inputRate == r.outputRate {
+		return input, nil
+	}
+	// For now, fall back to our improved linear interpolation
+	// TODO: Integrate with WebRTC's actual resampling when available
+	ratio := float64(r.outputRate) / float64(r.inputRate)
+	if ratio > 1.0 {
+		// Upsampling
+		return r.upsample(input, ratio), nil
+	} else {
+		// Downsampling
+		return r.downsample(input, ratio), nil
+	}
+}
+
+// upsample performs high-quality upsampling
+func (r *WebRTCResampler) upsample(input []float32, ratio float64) []float32 {
+	outputLen := int(float64(len(input)) * ratio)
+	output := make([]float32, outputLen)
+	for i := 0; i < outputLen; i++ {
+		srcPos := float64(i) / ratio
+		srcIndex := int(srcPos)
+		frac := float32(srcPos - float64(srcIndex))
+		if srcIndex >= len(input)-1 {
+			output[i] = input[len(input)-1]
+		} else {
+			// Linear interpolation with smoother transition
+			sample1 := input[srcIndex]
+			sample2 := input[srcIndex+1]
+			// Use cosine interpolation for smoother results
+			frac2 := (1 - float32(0.5*(1+0.5)*float64(frac))) * frac
+			output[i] = sample1*(1-frac2) + sample2*frac2
+		}
+	}
+	return output
+}
+
+// downsample performs anti-aliased downsampling
+func (r *WebRTCResampler) downsample(input []float32, ratio float64) []float32 {
+	// Apply anti-aliasing filter first
+	filtered := r.antiAliasFilter(input, ratio)
+	outputLen := int(float64(len(filtered)) * ratio)
+	output := make([]float32, outputLen)
+	for i := 0; i < outputLen; i++ {
+		srcPos := float64(i) / ratio
+		srcIndex := int(srcPos + 0.5) // Round to nearest
+		if srcIndex >= len(filtered) {
+			srcIndex = len(filtered) - 1
+		}
+		output[i] = filtered[srcIndex]
+	}
+	return output
+}
+
+// antiAliasFilter applies a simple anti-aliasing filter before downsampling
+func (r *WebRTCResampler) antiAliasFilter(input []float32, ratio float64) []float32 {
+	if len(input) < 3 || ratio >= 1.0 {
+		return input
+	}
+	output := make([]float32, len(input))
+	// Simple 3-tap moving average filter
+	output[0] = input[0]
+	for i := 1; i < len(input)-1; i++ {
+		output[i] = 0.25*input[i-1] + 0.5*input[i] + 0.25*input[i+1]
+	}
+	output[len(output)-1] = input[len(input)-1]
+	return output
+}
+
+// Close cleans up the resampler
+func (r *WebRTCResampler) Close() error {
+	return nil
+}
diff --git a/pkg/voice/signaling.go b/pkg/voice/signaling.go
new file mode 100644
index 0000000..5de9735
--- /dev/null
+++ b/pkg/voice/signaling.go
@@ -0,0 +1,274 @@
+package voice
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"sync"
+	"time"
+
+	"github.com/gorilla/websocket"
+)
+
+// VapiWebSocket handles WebSocket communication with Vapi transport
+type VapiWebSocket struct {
+	conn   *websocket.Conn
+	wsURL  string
+	events chan SignalingEvent
+
+	// Control
+	connected bool
+	mutex     sync.RWMutex
+	done      chan struct{}
+}
+
+// SignalingEvent represents a signaling event
+type SignalingEvent struct {
+	Type      string      `json:"type"`
+	Data      interface{} `json:"data"`
+	From      string      `json:"from,omitempty"`
+	Timestamp time.Time   `json:"timestamp"`
+}
+
+// WebSocket message types for Vapi transport
+const (
+	MSG_ROOM_JOINED = "room-joined"
+	MSG_ERROR       = "error"
+)
+
+// NewVapiWebSocket creates a new Vapi WebSocket client
+func NewVapiWebSocket() *VapiWebSocket {
+	return &VapiWebSocket{
+		events: make(chan SignalingEvent, 100),
+		done:   make(chan struct{}),
+	}
+}
+
+// Connect connects to Vapi WebSocket transport
+func (s *VapiWebSocket) Connect(wsURL string) error {
+	s.mutex.Lock()
+	defer s.mutex.Unlock()
+
+	if s.connected {
+		return fmt.Errorf("already connected to WebSocket transport")
+	}
+
+	if wsURL == "" {
+		return fmt.Errorf("WebSocket URL is required")
+	}
+
+	s.wsURL = wsURL
+
+	dialer := websocket.DefaultDialer
+	dialer.HandshakeTimeout = 10 * time.Second
+
+	// Add authentication headers for Vapi WebSocket
+	headers := http.Header{}
+
+	fmt.Printf("🕐 Starting WebSocket handshake (timeout: 10s)...\n")
+	conn, resp, err := dialer.Dial(wsURL, headers)
+	if err != nil {
+		if resp != nil {
+			defer resp.Body.Close() //nolint:errcheck // Error handling would complicate deferred cleanup
+			if body, readErr := io.ReadAll(resp.Body); readErr == nil {
+				return fmt.Errorf("WebSocket handshake failed (status %d): %s", resp.StatusCode, string(body))
+			}
+		}
+		return fmt.Errorf("failed to connect to Vapi WebSocket: %w", err)
+	}
+
+	s.conn = conn
+	s.connected = true
+
+	fmt.Printf("✅ WebSocket connection established successfully\n")
+
+	// Start message handling for Vapi transport events
+	go s.handleMessages()
+
+	return nil
+}
+
+// handleMessages processes incoming WebSocket messages
+func (s *VapiWebSocket) handleMessages() {
+	defer func() {
+		if r := recover(); r != nil { //nolint:staticcheck // Empty branch is intentional for panic recovery
+			// Panic recovery - websocket connection was closed
+			// Intentionally empty - we handle cleanup below
+		}
+		s.mutex.Lock()
+		s.connected = false
+		if s.conn != nil {
+			if err := s.conn.Close(); err != nil {
+				fmt.Printf("Failed to close WebSocket connection: %v\n", err)
+			}
+		}
+		s.mutex.Unlock()
+	}()
+
+	for {
+		select {
+		case <-s.done:
+			return
+		default:
+			// Read message from WebSocket (blocking)
+			messageType, data, err := s.conn.ReadMessage()
+			if err != nil {
+				if websocket.IsCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) {
+					// Normal closure
+					return
+				}
+
+				// Send error event
+				s.events <- SignalingEvent{
+					Type:      "websocket_error",
+					Data:      err.Error(),
+					Timestamp: time.Now(),
+				}
+				return
+			}
+
+			switch messageType {
+			case websocket.TextMessage:
+				s.handleTextMessage(data)
+			case websocket.BinaryMessage:
+				s.handleBinaryMessage(data)
+			}
+		}
+	}
+}
+
+// handleTextMessage processes JSON control messages from Vapi WebSocket transport
+func (s *VapiWebSocket) handleTextMessage(data []byte) {
+	var message map[string]interface{}
+	if err := json.Unmarshal(data, &message); err != nil {
+		s.events <- SignalingEvent{
+			Type:      "parse_error",
+			Data:      string(data),
+			Timestamp: time.Now(),
+		}
+		return
+	}
+
+	// Vapi WebSocket transport messages
+	// Common types: speech-update, transcript, function-call, hang, etc.
+	msgType := "vapi_transport_event"
+	if eventType, ok := message["type"].(string); ok {
+		msgType = eventType
+	}
+
+	// Create signaling event for Vapi transport
+	event := SignalingEvent{
+		Type:      msgType,
+		Data:      message,
+		Timestamp: time.Now(),
+	}
+
+	// Send event to listeners
+	select {
+	case s.events <- event:
+	default:
+		// Channel full, drop event
+	}
+}
+
+// handleBinaryMessage processes binary audio data from Vapi WebSocket transport
+func (s *VapiWebSocket) handleBinaryMessage(data []byte) {
+	// Binary data is PCM audio from the assistant
+	// Convert to float32 samples for audio playback
+	if len(data)%2 != 0 {
+		return
+	}
+
+	// Convert PCM 16-bit little-endian to float32 samples
+	samples := make([]float32, len(data)/2)
+	for i := 0; i < len(samples); i++ {
+		// Read 16-bit little-endian sample correctly
+		low := uint16(data[i*2])
+		high := uint16(data[i*2+1])
+		// Use proper bit manipulation to avoid overflow
+		sample := int16(low) | (int16(high) << 8) //nolint:gosec // Safe conversion for audio data
+		// Convert to float32 (-1.0 to 1.0) with proper scaling
+		samples[i] = float32(sample) / 32767.0
+	}
+
+	// Send audio samples to output stream via event
+	s.events <- SignalingEvent{
+		Type:      "audio_data",
+		Data:      samples,
+		Timestamp: time.Now(),
+	}
+}
+
+// SendAudioData sends binary audio data to Vapi WebSocket transport
+func (s *VapiWebSocket) SendAudioData(samples []float32) error {
+	s.mutex.RLock()
+	conn := s.conn
+	connected := s.connected
+	s.mutex.RUnlock()
+
+	if !connected || conn == nil {
+		return fmt.Errorf("not connected to WebSocket transport")
+	}
+
+	// Convert float32 samples to PCM 16-bit little-endian
+	data := make([]byte, len(samples)*2)
+	for i, sample := range samples {
+		// Clamp to [-1.0, 1.0] and convert to int16
+		if sample > 1.0 {
+			sample = 1.0
+		} else if sample < -1.0 {
+			sample = -1.0
+		}
+
+		pcmSample := int16(sample * 32767.0)
+
+		// Write as little-endian
+		data[i*2] = byte(pcmSample & 0xFF)
+		data[i*2+1] = byte((pcmSample >> 8) & 0xFF)
+	}
+
+	return conn.WriteMessage(websocket.BinaryMessage, data)
+}
+
+// GetEvents returns the events channel
+func (s *VapiWebSocket) GetEvents() <-chan SignalingEvent {
+	return s.events
+}
+
+// IsConnected returns true if connected to the signaling server
+func (s *VapiWebSocket) IsConnected() bool {
+	s.mutex.RLock()
+	defer s.mutex.RUnlock()
+	return s.connected
+}
+
+// Close closes the signaling connection
+func (s *VapiWebSocket) Close() error {
+	s.mutex.Lock()
+	defer s.mutex.Unlock()
+
+	if !s.connected {
+		return nil
+	}
+
+	// Set connected to false first to stop message reading
+	s.connected = false
+
+	// Close connection immediately to interrupt any blocking reads
+	var err error
+	if s.conn != nil {
+		err = s.conn.Close()
+		s.conn = nil
+	}
+
+	// Signal shutdown to handleMessage goroutine
+	select {
+	case <-s.done:
+		// already closed
+	default:
+		close(s.done)
+	}
+
+	return err
+}
diff --git a/pkg/voice/terminal.go b/pkg/voice/terminal.go
new file mode 100644
index 0000000..76509c1
--- /dev/null
+++ b/pkg/voice/terminal.go
@@ -0,0 +1,291 @@
+package voice
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"os/signal"
+	"syscall"
+	"time"
+
+	"github.com/charmbracelet/lipgloss"
+	"golang.org/x/term"
+)
+
+// TerminalUI manages the terminal interface for voice calls
+type TerminalUI struct {
+	client     *VoiceClient
+	done       chan bool
+	keyEvents  chan rune
+	uiUpdates  chan UIUpdate
+	callEvents chan CallEvent
+
+	// Styles
+	successStyle lipgloss.Style
+	errorStyle   lipgloss.Style
+	infoStyle    lipgloss.Style
+	headerStyle  lipgloss.Style
+
+	// Terminal state
+	origTermState  *term.State
+	rawModeEnabled bool
+	stdinFD        int
+}
+
+// UIUpdate represents a terminal UI update
+type UIUpdate struct {
+	Type string
+	Data interface{}
+}
+
+// NewTerminalUI creates a new terminal UI manager
+func NewTerminalUI(client *VoiceClient) *TerminalUI {
+	return &TerminalUI{
+		client:     client,
+		done:       make(chan bool),
+		keyEvents:  make(chan rune),
+		uiUpdates:  make(chan UIUpdate),
+		callEvents: make(chan CallEvent),
+
+		// Initialize styles
+		successStyle: lipgloss.NewStyle().Foreground(lipgloss.Color("#00FF00")).Bold(true),
+		errorStyle:   lipgloss.NewStyle().Foreground(lipgloss.Color("#FF0000")).Bold(true),
+		infoStyle:    lipgloss.NewStyle().Foreground(lipgloss.Color("#00BFFF")),
+		headerStyle:  lipgloss.NewStyle().Foreground(lipgloss.Color("#FFFF00")).Bold(true),
+	}
+}
+
+// Run starts the terminal UI
+func (ui *TerminalUI) Run() error {
+	// Display initial header
+	ui.displayHeader()
+
+	// Set up signal handling for graceful shutdown
+	c := make(chan os.Signal, 1)
+	signal.Notify(c, os.Interrupt, syscall.SIGTERM)
+
+	// Start event monitoring goroutines
+	go ui.monitorCallEvents()
+	go ui.handleKeyboardInput()
+
+	// Main event loop
+	for {
+		select {
+		case <-c:
+			// Interrupt signal received
+			fmt.Println(ui.infoStyle.Render("\nShutting down..."))
+			return ui.shutdown()
+
+		case event := <-ui.callEvents:
+			ui.handleCallEvent(event)
+
+		case update := <-ui.uiUpdates:
+			ui.handleUIUpdate(update)
+
+		case <-ui.done:
+			return nil
+		}
+	}
+}
+
+// displayHeader shows the initial UI header
+func (ui *TerminalUI) displayHeader() {
+	fmt.Println(ui.headerStyle.Render("🚀 Vapi Voice Call"))
+	fmt.Println()
+	fmt.Println(ui.infoStyle.Render("Starting voice call..."))
+	fmt.Println(ui.infoStyle.Render("Press Ctrl+C to end the call"))
+	fmt.Println(ui.infoStyle.Render("Controls: [s] Status  [q] End call  [h] Help"))
+	fmt.Println()
+}
+
+// monitorCallEvents monitors call events from the voice client
+func (ui *TerminalUI) monitorCallEvents() {
+	for event := range ui.client.GetCallEvents() {
+		ui.callEvents <- event
+	}
+}
+
+// handleCallEvent processes call events
+func (ui *TerminalUI) handleCallEvent(event CallEvent) {
+	timestamp := event.Timestamp.Format("15:04:05")
+
+	switch event.Type {
+	case "call_started":
+		fmt.Printf("[%s] %s Call started successfully\n",
+			timestamp, ui.successStyle.Render("✓"))
+		ui.displayCallStatus()
+
+	case "call_ended":
+		fmt.Printf("[%s] %s Call ended\n",
+			timestamp, ui.infoStyle.Render("•"))
+		ui.done <- true
+
+	case "ice_connection_state_change":
+		state := event.Data.(string)
+		fmt.Printf("[%s] %s Connection state: %s\n",
+			timestamp, ui.infoStyle.Render("•"), state)
+
+	case "ice_candidate":
+		fmt.Printf("[%s] %s Connection negotiation\n",
+			timestamp, ui.infoStyle.Render("•"))
+
+	case "offer_sent":
+		fmt.Printf("[%s] %s Audio connection established\n",
+			timestamp, ui.infoStyle.Render("•"))
+
+	case "room_connected":
+		fmt.Printf("[%s] %s Connected to Vapi WebSocket transport\n",
+			timestamp, ui.successStyle.Render("✓"))
+
+	case "participant_joined":
+		fmt.Printf("[%s] %s Participant joined call\n",
+			timestamp, ui.successStyle.Render("✓"))
+
+	case "connection_error":
+		fmt.Printf("[%s] %s Connection error: %v\n",
+			timestamp, ui.errorStyle.Render("✗"), event.Data)
+
+	case "signaling_room_joined":
+		fmt.Printf("[%s] %s Vapi WebSocket connected\n",
+			timestamp, ui.successStyle.Render("✓"))
+
+	default:
+		// Show all events for debugging
+		if event.Type != "" {
+			fmt.Printf("[%s] %s %s\n",
+				timestamp, ui.infoStyle.Render("•"), event.Type)
+		}
+	}
+}
+
+// handleUIUpdate processes UI updates
+func (ui *TerminalUI) handleUIUpdate(update UIUpdate) {
+	switch update.Type {
+	case "status_update":
+		ui.displayCallStatus()
+	case "error":
+		fmt.Printf("%s %v\n", ui.errorStyle.Render("✗"), update.Data)
+	}
+}
+
+// displayCallStatus shows current call status
+func (ui *TerminalUI) displayCallStatus() {
+	state := ui.client.GetCallState()
+
+	fmt.Println(ui.headerStyle.Render("📞 Call Status"))
+	fmt.Printf("  Call ID: %s\n", state.CallID)
+	fmt.Printf("  Assistant: %s\n", state.AssistantID)
+	fmt.Printf("  Status: %s\n", ui.formatStatus(state.Status))
+	fmt.Printf("  Duration: %s\n", ui.formatDuration(state.StartTime))
+
+	if state.WebSocketURL != "" {
+		fmt.Printf("  Room: %s\n", state.CallID)
+		fmt.Printf("  WebSocket URL: %s\n", state.WebSocketURL)
+	}
+
+	// Display audio status
+	if ui.client.IsAudioRunning() {
+		fmt.Printf("  Audio: %s\n", ui.successStyle.Render("Active"))
+	} else {
+		fmt.Printf("  Audio: %s\n", ui.errorStyle.Render("Inactive"))
+	}
+
+	fmt.Println()
+}
+
+// formatStatus formats call status with appropriate colors
+func (ui *TerminalUI) formatStatus(status CallStatus) string {
+	switch status {
+	case CallStatusConnected:
+		return ui.successStyle.Render(string(status))
+	case CallStatusFailed, CallStatusDisconnected:
+		return ui.errorStyle.Render(string(status))
+	case CallStatusIdle, CallStatusConnecting:
+		return ui.infoStyle.Render(string(status))
+	default:
+		return ui.infoStyle.Render(string(status))
+	}
+}
+
+// formatDuration formats call duration
+func (ui *TerminalUI) formatDuration(startTime time.Time) string {
+	if startTime.IsZero() {
+		return "00:00:00"
+	}
+
+	duration := time.Since(startTime)
+	hours := int(duration.Hours())
+	minutes := int(duration.Minutes()) % 60
+	seconds := int(duration.Seconds()) % 60
+
+	return fmt.Sprintf("%02d:%02d:%02d", hours, minutes, seconds)
+}
+
+// handleKeyboardInput handles keyboard input (placeholder for future interactive features)
+func (ui *TerminalUI) handleKeyboardInput() {
+	fd := int(os.Stdin.Fd())
+	ui.stdinFD = fd
+
+	if term.IsTerminal(fd) {
+		if oldState, err := term.MakeRaw(fd); err == nil {
+			ui.origTermState = oldState
+			ui.rawModeEnabled = true
+		}
+	}
+
+	// Ensure terminal is restored when this goroutine exits
+	defer func() {
+		if ui.rawModeEnabled && ui.origTermState != nil {
+			_ = term.Restore(ui.stdinFD, ui.origTermState)
+			ui.rawModeEnabled = false
+		}
+	}()
+
+	reader := bufio.NewReader(os.Stdin)
+	for {
+		b, err := reader.ReadByte()
+		if err != nil {
+			return
+		}
+		switch b {
+		case 'q', 'Q':
+			fmt.Println(ui.infoStyle.Render("\nEnding call (q pressed)..."))
+			_ = ui.shutdown()
+			return
+		case 's', 'S':
+			// Trigger a status update in the UI loop
+			ui.uiUpdates <- UIUpdate{Type: "status_update"}
+		case 'h', 'H':
+			fmt.Println(ui.infoStyle.Render("Controls: [s] Status  [q] End call  [h] Help"))
+		default:
+			// ignore other keys
+		}
+	}
+}
+
+// shutdown gracefully shuts down the terminal UI
+func (ui *TerminalUI) shutdown() error {
+	// Restore terminal if we enabled raw mode
+	if ui.rawModeEnabled && ui.origTermState != nil {
+		_ = term.Restore(ui.stdinFD, ui.origTermState)
+		ui.rawModeEnabled = false
+	}
+	fmt.Println(ui.infoStyle.Render("Ending voice call..."))
+
+	// End the call if still active
+	if ui.client.GetCallState().Status == CallStatusConnected {
+		if err := ui.client.EndCall(); err != nil {
+			fmt.Printf("%s Failed to end call: %v\n", ui.errorStyle.Render("✗"), err)
+			// Don't return error, continue with shutdown
+		}
+	}
+
+	// Give a brief moment for cleanup to complete
+	time.Sleep(200 * time.Millisecond)
+
+	fmt.Println(ui.successStyle.Render("✓ Voice call ended successfully"))
+
+	// Force exit the process
+	os.Exit(0)
+	return nil // This line will never be reached, but Go requires it
+}
diff --git a/pkg/voice/webrtc_processor.go b/pkg/voice/webrtc_processor.go
new file mode 100644
index 0000000..7143868
--- /dev/null
+++ b/pkg/voice/webrtc_processor.go
@@ -0,0 +1,155 @@
+package voice
+
+import (
+	"fmt"
+	"math"
+
+	"github.com/gorilla/websocket"
+)
+
+// WebSocketAudioProcessor handles audio processing with basic echo cancellation
+type WebSocketAudioProcessor struct {
+	// Echo cancellation state
+	echoBuffer     []float32
+	echoBufferSize int
+	adaptiveFilter []float32
+	learningRate   float32
+	// Noise gate parameters
+	noiseGateThreshold float32
+	gateRatio          float32
+}
+
+// AudioPacket represents the structure for WebSocket audio data
+type AudioPacket struct {
+	MicSamples     []float32 `json:"micSamples"`
+	SpeakerSamples []float32 `json:"speakerSamples,omitempty"`
+	Timestamp      int64     `json:"timestamp"`
+}
+
+// NewWebSocketAudioProcessor creates a new audio processor with basic echo cancellation
+func NewWebSocketAudioProcessor() (*WebSocketAudioProcessor, error) {
+	const echoBufferSizeMs = 200 // 200ms echo buffer
+	const sampleRate = 16000     // 16kHz sample rate (Vapi's format)
+	echoBufferSize := (sampleRate * echoBufferSizeMs) / 1000
+	return &WebSocketAudioProcessor{
+		echoBuffer:         make([]float32, echoBufferSize),
+		echoBufferSize:     echoBufferSize,
+		adaptiveFilter:     make([]float32, 128), // 128-tap adaptive filter
+		learningRate:       0.01,
+		noiseGateThreshold: 0.01, // -40dB noise gate
+		gateRatio:          0.1,  // 10:1 ratio
+	}, nil
+}
+
+// ProcessAudio applies basic echo cancellation and noise reduction
+func (wap *WebSocketAudioProcessor) ProcessAudio(micInput, speakerOutput []float32) []float32 {
+	if len(micInput) == 0 {
+		return micInput
+	}
+	processed := make([]float32, len(micInput))
+	copy(processed, micInput)
+	// Apply basic echo cancellation if we have speaker output
+	if len(speakerOutput) > 0 {
+		processed = wap.applyEchoCancellation(processed, speakerOutput)
+	}
+	// Apply noise gate
+	processed = wap.applyNoiseGate(processed)
+	return processed
+}
+
+// applyEchoCancellation implements a basic adaptive echo cancellation algorithm
+func (wap *WebSocketAudioProcessor) applyEchoCancellation(micInput, speakerOutput []float32) []float32 {
+	result := make([]float32, len(micInput))
+	for i, sample := range micInput {
+		// Store speaker output in echo buffer (circular buffer)
+		if len(speakerOutput) > i {
+			bufferIdx := (i) % wap.echoBufferSize
+			wap.echoBuffer[bufferIdx] = speakerOutput[i]
+		}
+		// Estimate echo using adaptive filter
+		var echoEstimate float32
+		filterLen := len(wap.adaptiveFilter)
+		for j := 0; j < filterLen && j < wap.echoBufferSize; j++ {
+			bufferIdx := (i - j + wap.echoBufferSize) % wap.echoBufferSize
+			echoEstimate += wap.adaptiveFilter[j] * wap.echoBuffer[bufferIdx]
+		}
+		// Subtract estimated echo from microphone input
+		result[i] = sample - echoEstimate
+		// Update adaptive filter using LMS algorithm
+		errorSignal := result[i]
+		for j := 0; j < filterLen && j < wap.echoBufferSize; j++ {
+			bufferIdx := (i - j + wap.echoBufferSize) % wap.echoBufferSize
+			wap.adaptiveFilter[j] += wap.learningRate * errorSignal * wap.echoBuffer[bufferIdx]
+		}
+	}
+	return result
+}
+
+// applyNoiseGate applies a simple noise gate to reduce background noise
+func (wap *WebSocketAudioProcessor) applyNoiseGate(input []float32) []float32 {
+	result := make([]float32, len(input))
+	for i, sample := range input {
+		amplitude := float32(math.Abs(float64(sample)))
+		if amplitude > wap.noiseGateThreshold {
+			// Above threshold - pass through
+			result[i] = sample
+		} else {
+			// Below threshold - apply gate ratio
+			result[i] = sample * wap.gateRatio
+		}
+	}
+	return result
+}
+
+// HandleWebSocket processes WebSocket connections with audio processing
+func (wap *WebSocketAudioProcessor) HandleWebSocket(ws *websocket.Conn) error {
+	defer func() {
+		if err := ws.Close(); err != nil {
+			fmt.Printf("Failed to close websocket: %v\n", err)
+		}
+	}()
+	for {
+		// Read audio data from WebSocket
+		var audioData AudioPacket
+		err := ws.ReadJSON(&audioData)
+		if err != nil {
+			if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure) {
+				return fmt.Errorf("websocket read error: %w", err)
+			}
+			break
+		}
+		// Process audio with echo cancellation and noise reduction
+		processed := wap.ProcessAudio(audioData.MicSamples, audioData.SpeakerSamples)
+		// Send processed audio back via WebSocket
+		response := AudioPacket{
+			MicSamples: processed,
+			Timestamp:  audioData.Timestamp,
+		}
+		if err := ws.WriteJSON(response); err != nil {
+			return fmt.Errorf("websocket write error: %w", err)
+		}
+	}
+	return nil
+}
+
+// Reset clears the processor's internal state
+func (wap *WebSocketAudioProcessor) Reset() {
+	// Clear echo buffer
+	for i := range wap.echoBuffer {
+		wap.echoBuffer[i] = 0
+	}
+	// Reset adaptive filter
+	for i := range wap.adaptiveFilter {
+		wap.adaptiveFilter[i] = 0
+	}
+}
+
+// SetNoiseGateThreshold adjusts the noise gate sensitivity
+func (wap *WebSocketAudioProcessor) SetNoiseGateThreshold(threshold float32) {
+	wap.noiseGateThreshold = threshold
+}
+
+// SetLearningRate adjusts the adaptive filter learning rate
+func (wap *WebSocketAudioProcessor) SetLearningRate(rate float32) {
+	wap.learningRate = rate
+}
diff --git a/pkg/voice/websocket_jitter.go b/pkg/voice/websocket_jitter.go
new file mode 100644
index 0000000..54c957b
--- /dev/null
+++ b/pkg/voice/websocket_jitter.go
@@ -0,0 +1,327 @@
+package voice
+
+import (
+	"fmt"
+	"log"
+	"sync"
+	"time"
+)
+
+// WebSocketJitterBuffer provides adaptive jitter buffering for WebSocket audio
+type WebSocketJitterBuffer struct {
+	// Configuration
+	targetDelay time.Duration
+	maxDelay    time.Duration
+	minDelay    time.Duration
+	sampleRate  int
+
+	// Buffer management
+	audioBuffer [][]float32
+	bufferMutex sync.RWMutex
+
+	// Timing control
+	lastWriteTime time.Time
+	lastReadTime  time.Time
+	readInterval  time.Duration
+
+	// Adaptive delay
+	currentDelay time.Duration
+	delayMutex   sync.RWMutex
+
+	// Control
+	running  bool
+	runMutex sync.RWMutex
+
+	// Statistics
+	packetsReceived int64
+	packetsDropped  int64
+	underruns       int64
+	overruns        int64
+
+	// Read ticker for consistent output
+	ticker     *time.Ticker
+	outputChan chan []float32
+}
+
+// WebSocketJitterConfig holds configuration for WebSocket jitter buffer
+type WebSocketJitterConfig struct {
+	SampleRate     int           // Audio sample rate (16000 for Vapi)
+	MinDelay       time.Duration // Minimum buffer delay
+	MaxDelay       time.Duration // Maximum buffer delay
+	TargetDelay    time.Duration // Initial target delay
+	PacketInterval time.Duration // Expected packet interval (20ms for Vapi)
+}
+
+// DefaultWebSocketJitterConfig returns optimized config for Vapi WebSocket
+func DefaultWebSocketJitterConfig() *WebSocketJitterConfig {
+	return &WebSocketJitterConfig{
+		SampleRate:     16000,
+		MinDelay:       40 * time.Millisecond,  // Minimum 40ms buffering
+		MaxDelay:       200 * time.Millisecond, // Maximum 200ms buffering
+		TargetDelay:    80 * time.Millisecond,  // Target 80ms - good for voice
+		PacketInterval: 20 * time.Millisecond,  // Vapi sends 20ms packets
+	}
+}
+
+// NewWebSocketJitterBuffer creates a new WebSocket-compatible jitter buffer
+func NewWebSocketJitterBuffer(config *WebSocketJitterConfig) (*WebSocketJitterBuffer, error) {
+	if config == nil {
+		config = DefaultWebSocketJitterConfig()
+	}
+
+	jb := &WebSocketJitterBuffer{
+		targetDelay:  config.TargetDelay,
+		maxDelay:     config.MaxDelay,
+		minDelay:     config.MinDelay,
+		sampleRate:   config.SampleRate,
+		currentDelay: config.TargetDelay,
+		readInterval: config.PacketInterval,
+		audioBuffer:  make([][]float32, 0, 50), // Pre-allocate for ~1 second
+		outputChan:   make(chan []float32, 10),
+	}
+
+	return jb, nil
+}
+
+// WriteAudio adds audio samples to the jitter buffer
+func (jb *WebSocketJitterBuffer) WriteAudio(samples []float32) error {
+	if !jb.IsRunning() {
+		return fmt.Errorf("jitter buffer not running")
+	}
+
+	now := time.Now()
+
+	jb.bufferMutex.Lock()
+	defer jb.bufferMutex.Unlock()
+
+	// Copy samples to avoid any reference issues
+	sampleCopy := make([]float32, len(samples))
+	copy(sampleCopy, samples)
+
+	// Add to buffer
+	jb.audioBuffer = append(jb.audioBuffer, sampleCopy)
+	jb.packetsReceived++
+	jb.lastWriteTime = now
+
+	// Check for buffer overflow
+	maxBufferSize := int(jb.maxDelay / jb.readInterval)
+	if len(jb.audioBuffer) > maxBufferSize {
+		// Drop oldest packet
+		jb.audioBuffer = jb.audioBuffer[1:]
+		jb.overruns++
+		if jb.overruns%25 == 0 {
+			log.Printf("⚠️ Jitter buffer overrun: dropped oldest packet (total: %d)", jb.overruns)
+		}
+	}
+
+	// Adaptive delay adjustment based on buffer fill
+	jb.adjustDelay()
+
+	return nil
+}
+
+// adjustDelay adapts the buffer delay based on current conditions
+func (jb *WebSocketJitterBuffer) adjustDelay() {
+	bufferSize := len(jb.audioBuffer)
+	targetBufferSize := int(jb.targetDelay / jb.readInterval)
+
+	jb.delayMutex.Lock()
+	defer jb.delayMutex.Unlock()
+
+	// Adjust target delay based on buffer fill
+	if bufferSize < targetBufferSize/2 {
+		// Buffer running low - increase delay slightly
+		jb.currentDelay += 5 * time.Millisecond
+		if jb.currentDelay > jb.maxDelay {
+			jb.currentDelay = jb.maxDelay
+		}
+	} else if bufferSize > targetBufferSize*2 {
+		// Buffer getting too full - decrease delay slightly
+		jb.currentDelay -= 5 * time.Millisecond
+		if jb.currentDelay < jb.minDelay {
+			jb.currentDelay = jb.minDelay
+		}
+	}
+}
+
+// ReadAudio reads processed audio samples from the jitter buffer
+func (jb *WebSocketJitterBuffer) ReadAudio(numSamples int) []float32 {
+	if !jb.IsRunning() {
+		return make([]float32, numSamples) // Return silence
+	}
+
+	// Try to get samples from output channel with timeout
+	select {
+	case samples := <-jb.outputChan:
+		// Resize to requested length if needed
+		if len(samples) == numSamples {
+			return samples
+		}
+
+		result := make([]float32, numSamples)
+		if len(samples) > 0 {
+			copy(result, samples)
+		}
+		return result
+
+	case <-time.After(10 * time.Millisecond):
+		// Timeout - return silence to prevent blocking
+		jb.underruns++
+		if jb.underruns%50 == 0 {
+			log.Printf("⚠️ Jitter buffer underrun: no data available (total: %d)", jb.underruns)
+		}
+		return make([]float32, numSamples)
+	}
+}
+
+// Start begins jitter buffer operation
+func (jb *WebSocketJitterBuffer) Start() error {
+	jb.runMutex.Lock()
+	defer jb.runMutex.Unlock()
+
+	if jb.running {
+		return fmt.Errorf("jitter buffer already running")
+	}
+
+	jb.running = true
+
+	// Start read ticker for consistent output timing
+	jb.ticker = time.NewTicker(jb.readInterval)
+	go jb.readLoop()
+
+	// Start stats monitoring
+	go jb.monitorStats()
+
+	log.Printf("🎵 WebSocket Jitter Buffer started (target delay: %v, interval: %v)",
+		jb.targetDelay, jb.readInterval)
+	return nil
+}
+
+// readLoop continuously reads from buffer and outputs at regular intervals
+func (jb *WebSocketJitterBuffer) readLoop() {
+	defer jb.ticker.Stop()
+
+	initialDelay := jb.currentDelay
+	log.Printf("🎵 Jitter buffer starting with %v initial delay", initialDelay)
+
+	// Initial delay before starting to read
+	time.Sleep(initialDelay)
+
+	for range jb.ticker.C {
+		if !jb.IsRunning() {
+			return
+		}
+
+		jb.bufferMutex.RLock()
+		bufferLen := len(jb.audioBuffer)
+
+		if bufferLen > 0 {
+			// Get the oldest packet
+			samples := jb.audioBuffer[0]
+
+			// Remove from buffer
+			jb.bufferMutex.RUnlock()
+			jb.bufferMutex.Lock()
+			if len(jb.audioBuffer) > 0 {
+				jb.audioBuffer = jb.audioBuffer[1:]
+			}
+			jb.bufferMutex.Unlock()
+
+			// Send to output channel (non-blocking)
+			select {
+			case jb.outputChan <- samples:
+				jb.lastReadTime = time.Now()
+			default:
+				// Output channel full - drop this packet
+				jb.packetsDropped++
+			}
+		} else {
+			jb.bufferMutex.RUnlock()
+			// Buffer empty - output silence
+			silence := make([]float32, 320) // 20ms at 16kHz
+			select {
+			case jb.outputChan <- silence:
+			default:
+				// Output channel full - just skip
+			}
+		}
+	}
+}
+
+// monitorStats logs periodic statistics
+func (jb *WebSocketJitterBuffer) monitorStats() {
+	ticker := time.NewTicker(10 * time.Second)
+	defer ticker.Stop()
+
+	for range ticker.C {
+		if !jb.IsRunning() {
+			return
+		}
+		jb.logStats()
+	}
+}
+
+// logStats logs current buffer statistics
+func (jb *WebSocketJitterBuffer) logStats() {
+	jb.bufferMutex.RLock()
+	bufferSize := len(jb.audioBuffer)
+	jb.bufferMutex.RUnlock()
+
+	jb.delayMutex.RLock()
+	currentDelay := jb.currentDelay
+	jb.delayMutex.RUnlock()
+
+	outputQueueSize := len(jb.outputChan)
+
+	log.Printf("📊 WebSocket Jitter Buffer Stats: Buffer: %d packets, Delay: %v, Output queue: %d/10, Received: %d, Dropped: %d, Underruns: %d, Overruns: %d",
+		bufferSize, currentDelay, outputQueueSize, jb.packetsReceived, jb.packetsDropped, jb.underruns, jb.overruns)
+}
+
+// Stop stops the jitter buffer
+func (jb *WebSocketJitterBuffer) Stop() error {
+	jb.runMutex.Lock()
+	defer jb.runMutex.Unlock()
+
+	if !jb.running {
+		return nil
+	}
+
+	jb.running = false
+
+	if jb.ticker != nil {
+		jb.ticker.Stop()
+	}
+
+	log.Printf("🎵 WebSocket Jitter Buffer stopped")
+	return nil
+}
+
+// IsRunning returns true if the jitter buffer is running
+func (jb *WebSocketJitterBuffer) IsRunning() bool {
+	jb.runMutex.RLock()
+	defer jb.runMutex.RUnlock()
+	return jb.running
+}
+
+// GetStats returns current jitter buffer statistics
+func (jb *WebSocketJitterBuffer) GetStats() map[string]interface{} {
+	jb.bufferMutex.RLock()
+	bufferSize := len(jb.audioBuffer)
+	jb.bufferMutex.RUnlock()
+
+	jb.delayMutex.RLock()
+	currentDelay := jb.currentDelay
+	jb.delayMutex.RUnlock()
+
+	return map[string]interface{}{
+		"buffer_size":       bufferSize,
+		"current_delay_ms":  currentDelay.Milliseconds(),
+		"target_delay_ms":   jb.targetDelay.Milliseconds(),
+		"packets_received":  jb.packetsReceived,
+		"packets_dropped":   jb.packetsDropped,
+		"underruns":         jb.underruns,
+		"overruns":          jb.overruns,
+		"output_queue_size": len(jb.outputChan),
+		"running":           jb.IsRunning(),
+	}
+}
diff --git a/sample-assistant.json b/sample-assistant.json
new file mode 100644
index 0000000..55e9030
--- /dev/null
+++ b/sample-assistant.json
@@ -0,0 +1,8 @@
+{
+  "assistant_id": "550e8400-e29b-41d4-a716-446655440000",
+  "name": "Test Assistant",
+  "first_message": "Hello! I'm your test assistant. How can I help you today?",
+  "voice_id": "elliot",
+  "model": "gpt-4o",
+  "system_message": "You are a helpful and friendly assistant. Keep responses concise and conversational."
+}
\ No newline at end of file
diff --git a/scripts/install.ps1 b/scripts/install.ps1
index 260ee4e..cbbdb6f 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -261,6 +261,12 @@ function Main {
     Install-Vapi $version $platform
     Add-ToPath
     Test-Installation
+
+    Write-Host "" 
+    Write-Info "Note: For voice features (microphone/speaker), PortAudio must be installed."
+    Write-Host "  Windows options:" -ForegroundColor White
+    Write-Host "    - Install via vcpkg: vcpkg install portaudio" -ForegroundColor White
+    Write-Host "    - Or download binaries and ensure portaudio.dll is alongside vapi.exe or on PATH" -ForegroundColor White
 }
 
 # Run main function
diff --git a/scripts/install.sh b/scripts/install.sh
index d208416..3712e5b 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -170,6 +170,23 @@ tildify() {
 
 success "vapi was installed successfully to $Bold_Green$(tildify "$exe")"
 
+# PortAudio notice for voice features
+echo
+info "Note: For voice features (microphone/speaker), PortAudio must be installed."
+case $platform in
+  'Darwin x86_64'|'Darwin arm64')
+    info_bold "  brew install portaudio"
+    ;;
+  'Linux x86_64'|'Linux arm64'|'Linux aarch64')
+    info_bold "  Debian/Ubuntu: sudo apt-get update && sudo apt-get install -y libportaudio2 portaudio19-dev"
+    info_bold "  Fedora/RHEL:   sudo dnf install -y portaudio portaudio-devel"
+    info_bold "  Arch:          sudo pacman -S portaudio"
+    ;;
+  'MINGW64'* )
+    info_bold "  Windows: Install PortAudio and ensure portaudio.dll is on PATH (e.g., via vcpkg: vcpkg install portaudio)"
+    ;;
+esac
+
 if command -v vapi >/dev/null; then
     echo "Run 'vapi --help' to get started"
     exit
diff --git a/transient-assistant.json b/transient-assistant.json
new file mode 100644
index 0000000..a51642f
--- /dev/null
+++ b/transient-assistant.json
@@ -0,0 +1,7 @@
+{
+  "name": "Config-based Assistant",
+  "first_message": "Hi! I was created from a configuration file. What would you like to talk about?",
+  "voice_id": "jennifer",
+  "model": "gpt-4o",
+  "system_message": "You are an intelligent assistant created from a JSON configuration. Be helpful, creative, and engaging in conversations."
+}
\ No newline at end of file