From 32e2e47e9d27c823c819706ae8d50dacc6c49c5c Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Mon, 12 Jan 2026 05:34:16 +0000
Subject: [PATCH 1/3] Refactor audio storage to MongoDB chunks and enhance
 cleanup settings management

- Replaced the legacy AudioFile model with AudioChunkDocument for storing audio data in MongoDB, optimizing storage and retrieval.
- Introduced CleanupSettings dataclass for managing soft-deletion configurations, including auto-cleanup and retention days.
- Added admin API routes for retrieving and saving cleanup settings, ensuring better control over data retention policies.
- Updated audio processing workflows to utilize MongoDB chunks, removing dependencies on disk-based audio files.
- Enhanced tests to validate the new audio chunk storage and cleanup functionalities, ensuring robust integration with existing systems.
---
 .../src/advanced_omi_backend/app_factory.py   |   4 +-
 .../src/advanced_omi_backend/config.py        |  97 +++
 .../controllers/audio_controller.py           | 100 +--
 .../controllers/conversation_controller.py    | 227 ++++--
 .../controllers/system_controller.py          |  64 ++
 .../controllers/websocket_controller.py       |  46 +-
 .../models/audio_chunk.py                     | 158 ++++
 .../advanced_omi_backend/models/audio_file.py |  64 --
 .../models/conversation.py                    |  18 +-
 .../src/advanced_omi_backend/models/job.py    |   8 +-
 .../routers/api_router.py                     |   2 +
 .../routers/modules/__init__.py               |   3 +
 .../routers/modules/admin_routes.py           | 122 +++
 .../routers/modules/audio_routes.py           | 305 +++++++-
 .../routers/modules/conversation_routes.py    |  17 +-
 .../routers/modules/system_routes.py          |  22 +
 .../utils/audio_chunk_utils.py                | 739 ++++++++++++++++++
 .../advanced_omi_backend/utils/audio_utils.py |  17 -
 .../utils/conversation_utils.py               |  44 --
 .../utils/gdrive_audio_utils.py               |  13 +-
 .../workers/audio_jobs.py                     | 363 +++++----
 .../workers/cleanup_jobs.py                   | 138 ++++
 .../workers/conversation_jobs.py              |  31 +-
 .../workers/speaker_jobs.py                   | 114 ++-
 .../workers/transcription_jobs.py             |  69 +-
 .../tests/test_audio_persistence_mongodb.py   | 431 ++++++++++
 tests/.env.test                               |   4 +
 tests/endpoints/audio_upload_tests.robot      |  24 +-
 tests/infrastructure/infra_tests.robot        |  42 +-
 .../mongodb_audio_storage_tests.robot         | 106 +++
 tests/libs/mongodb_helper.py                  | 106 +++
 tests/resources/mongodb_keywords.robot        | 118 +++
 tests/test-requirements.txt                   |   1 +
 33 files changed, 3078 insertions(+), 539 deletions(-)
 create mode 100644 backends/advanced/src/advanced_omi_backend/models/audio_chunk.py
 delete mode 100644 backends/advanced/src/advanced_omi_backend/models/audio_file.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/routers/modules/admin_routes.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/utils/audio_chunk_utils.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/workers/cleanup_jobs.py
 create mode 100644 backends/advanced/tests/test_audio_persistence_mongodb.py
 create mode 100644 tests/integration/mongodb_audio_storage_tests.robot
 create mode 100644 tests/libs/mongodb_helper.py
 create mode 100644 tests/resources/mongodb_keywords.robot

diff --git a/backends/advanced/src/advanced_omi_backend/app_factory.py b/backends/advanced/src/advanced_omi_backend/app_factory.py
index 8a162cec..b8bc3e80 100644
--- a/backends/advanced/src/advanced_omi_backend/app_factory.py
+++ b/backends/advanced/src/advanced_omi_backend/app_factory.py
@@ -54,12 +54,12 @@ async def lifespan(app: FastAPI):
     try:
         from beanie import init_beanie
         from advanced_omi_backend.models.conversation import Conversation
-        from advanced_omi_backend.models.audio_file import AudioFile
+        from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
         from advanced_omi_backend.models.user import User
 
         await init_beanie(
             database=config.db,
-            document_models=[User, Conversation, AudioFile],
+            document_models=[User, Conversation, AudioChunkDocument],
         )
         application_logger.info("Beanie initialized for all document models")
     except Exception as e:
diff --git a/backends/advanced/src/advanced_omi_backend/config.py b/backends/advanced/src/advanced_omi_backend/config.py
index 2b07a8d4..f335b8be 100644
--- a/backends/advanced/src/advanced_omi_backend/config.py
+++ b/backends/advanced/src/advanced_omi_backend/config.py
@@ -9,7 +9,9 @@
 import logging
 import os
 import shutil
+from dataclasses import dataclass, asdict
 from pathlib import Path
+from typing import Optional
 
 logger = logging.getLogger(__name__)
 
@@ -131,6 +133,101 @@ def save_diarization_settings_to_file(settings):
         return False
 
 
+# ============================================================================
+# Cleanup Settings (JSON file-based with in-memory caching)
+# ============================================================================
+
+@dataclass
+class CleanupSettings:
+    """Cleanup configuration for soft-deleted conversations."""
+    auto_cleanup_enabled: bool = False
+    retention_days: int = 30
+
+# Global cache for cleanup settings
+_cleanup_settings: Optional[CleanupSettings] = None
+
+
+def get_cleanup_config_path() -> Path:
+    """Get path to cleanup settings JSON file."""
+    data_dir = Path(os.getenv("DATA_DIR", "/app/data"))
+    data_dir.mkdir(parents=True, exist_ok=True)
+    return data_dir / "cleanup_config.json"
+
+
+def load_cleanup_settings_from_file() -> CleanupSettings:
+    """
+    Load cleanup settings from JSON file or return defaults.
+
+    Returns cached settings if available, otherwise loads from file.
+    If file doesn't exist, returns default settings.
+    """
+    global _cleanup_settings
+
+    # Return cached settings if available
+    if _cleanup_settings is not None:
+        return _cleanup_settings
+
+    config_path = get_cleanup_config_path()
+
+    # Try to load from file
+    if config_path.exists():
+        try:
+            with open(config_path, "r") as f:
+                data = json.load(f)
+                _cleanup_settings = CleanupSettings(**data)
+                logger.info(f"✅ Loaded cleanup settings: auto_cleanup={_cleanup_settings.auto_cleanup_enabled}, retention={_cleanup_settings.retention_days}d")
+                return _cleanup_settings
+        except Exception as e:
+            logger.error(f"❌ Failed to load cleanup settings from {config_path}: {e}")
+
+    # Return defaults if file doesn't exist or failed to load
+    _cleanup_settings = CleanupSettings()
+    logger.info("Using default cleanup settings (auto_cleanup_enabled=False, retention_days=30)")
+    return _cleanup_settings
+
+
+def save_cleanup_settings_to_file(settings: CleanupSettings) -> None:
+    """
+    Save cleanup settings to JSON file and update in-memory cache.
+
+    Args:
+        settings: CleanupSettings to persist
+
+    Raises:
+        Exception: If file write fails
+    """
+    global _cleanup_settings
+
+    config_path = get_cleanup_config_path()
+
+    try:
+        # Save to JSON file
+        with open(config_path, "w") as f:
+            json.dump(asdict(settings), f, indent=2)
+
+        # Update in-memory cache
+        _cleanup_settings = settings
+
+        logger.info(f"✅ Saved cleanup settings: auto_cleanup={settings.auto_cleanup_enabled}, retention={settings.retention_days}d")
+    except Exception as e:
+        logger.error(f"❌ Failed to save cleanup settings to {config_path}: {e}")
+        raise
+
+
+def get_cleanup_settings() -> dict:
+    """
+    Get current cleanup settings as dict (for API responses).
+
+    Returns:
+        Dict with auto_cleanup_enabled and retention_days
+    """
+    settings = load_cleanup_settings_from_file()
+    return {
+        "auto_cleanup_enabled": settings.auto_cleanup_enabled,
+        "retention_days": settings.retention_days,
+    }
+
+
 def get_speech_detection_settings():
     """Get speech detection settings from environment or defaults."""
 
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py
index e63dd883..143cb253 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py
@@ -17,8 +17,9 @@
 
 from advanced_omi_backend.utils.audio_utils import (
     AudioValidationError,
-    write_audio_file,
+    validate_and_prepare_audio,
 )
+from advanced_omi_backend.utils.audio_chunk_utils import convert_audio_to_chunks
 from advanced_omi_backend.models.job import JobPriority
 from advanced_omi_backend.models.user import User
 from advanced_omi_backend.models.conversation import create_conversation
@@ -86,33 +87,19 @@ async def upload_and_process_audio_files(
                 # Generate audio UUID and timestamp
                 if source == "gdrive":
                     audio_uuid = getattr(file, "audio_uuid", None)
-                    if not audio_uuid: 
+                    if not audio_uuid:
                         audio_logger.error(f"Missing audio_uuid for gdrive file: {file.filename}")
-                        audio_uuid = str(uuid.uuid4()) 
-                else: 
+                        audio_uuid = str(uuid.uuid4())
+                else:
                     audio_uuid = str(uuid.uuid4())
                 timestamp = int(time.time() * 1000)
 
-                # Determine output directory (with optional subfolder)
-                from advanced_omi_backend.config import CHUNK_DIR
-                if folder:
-                    chunk_dir = CHUNK_DIR / folder
-                    chunk_dir.mkdir(parents=True, exist_ok=True)
-                else:
-                    chunk_dir = CHUNK_DIR
-
-                # Validate, write audio file and create AudioSession (all in one)
+                # Validate and prepare audio (read format from WAV file)
                 try:
-                    relative_audio_path, file_path, duration = await write_audio_file(
-                        raw_audio_data=content,
-                        audio_uuid=audio_uuid,
-                        source=source,
-                        client_id=client_id,
-                        user_id=user.user_id,
-                        user_email=user.email,
-                        timestamp=timestamp,
-                        chunk_dir=chunk_dir,
-                        validate=True,  # Validate WAV format, convert stereo→mono
+                    audio_data, sample_rate, sample_width, channels, duration = await validate_and_prepare_audio(
+                        audio_data=content,
+                        expected_sample_rate=16000,  # Expecting 16kHz
+                        convert_to_mono=True  # Convert stereo to mono
                     )
                 except AudioValidationError as e:
                     processed_files.append({
@@ -123,7 +110,7 @@ async def upload_and_process_audio_files(
                     continue
 
                 audio_logger.info(
-                    f"📊 {file.filename}: {duration:.1f}s → {relative_audio_path}"
+                    f"📊 {file.filename}: {duration:.1f}s ({sample_rate}Hz, {channels}ch, {sample_width} bytes/sample)"
                 )
 
                 # Create conversation immediately for uploaded files (conversation_id auto-generated)
@@ -139,20 +126,37 @@ async def upload_and_process_audio_files(
                     title=title,
                     summary="Processing uploaded audio file..."
                 )
-                # Use the relative path returned by write_audio_file (already includes folder prefix if applicable)
-                conversation.audio_path = relative_audio_path
                 await conversation.insert()
                 conversation_id = conversation.conversation_id  # Get the auto-generated ID
 
                 audio_logger.info(f"📝 Created conversation {conversation_id} for uploaded file")
 
+                # Convert audio directly to MongoDB chunks
+                try:
+                    num_chunks = await convert_audio_to_chunks(
+                        conversation_id=conversation_id,
+                        audio_data=audio_data,
+                        sample_rate=sample_rate,
+                        channels=channels,
+                        sample_width=sample_width,
+                    )
+                    audio_logger.info(
+                        f"📦 Converted uploaded file to {num_chunks} MongoDB chunks "
+                        f"(conversation {conversation_id[:12]})"
+                    )
+                except Exception as chunk_error:
+                    audio_logger.error(
+                        f"Failed to convert uploaded file to chunks: {chunk_error}",
+                        exc_info=True
+                    )
+
                 # Enqueue post-conversation processing job chain
                 from advanced_omi_backend.controllers.queue_controller import start_post_conversation_jobs
 
                 job_ids = start_post_conversation_jobs(
                     conversation_id=conversation_id,
                     audio_uuid=audio_uuid,
-                    audio_file_path=file_path,
+                    audio_file_path=None,  # No file path - using MongoDB chunks
                     user_id=user.user_id,
                     post_transcription=True,  # Run batch transcription for uploads
                     client_id=client_id  # Pass client_id for UI tracking
@@ -217,45 +221,3 @@ async def upload_and_process_audio_files(
         return JSONResponse(
             status_code=500, content={"error": f"File upload failed: {str(e)}"}
         )
-
-
-async def get_conversation_audio_path(conversation_id: str, user: User) -> Path:
-    """
-    Get the file path for a conversation's audio file.
-
-    Args:
-        conversation_id: The conversation ID
-        user: The authenticated user
-
-    Returns:
-        Path object for the audio file
-
-    Raises:
-        ValueError: If conversation not found, access denied, or audio file not available
-    """
-    # Get conversation by conversation_id (UUID field, not _id)
-    conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
-
-    if not conversation:
-        raise ValueError("Conversation not found")
-
-    # Check ownership (admins can access all files)
-    if not user.is_superuser and conversation.user_id != str(user.user_id):
-        raise ValueError("Access denied")
-
-    # Get the audio path
-    audio_path = conversation.audio_path
-
-    if not audio_path:
-        raise ValueError(f"No audio file available for this conversation")
-
-    # Build full file path
-    from advanced_omi_backend.app_config import get_audio_chunk_dir
-    audio_dir = get_audio_chunk_dir()
-    file_path = audio_dir / audio_path
-
-    # Check if file exists
-    if not file_path.exists() or not file_path.is_file():
-        raise ValueError("Audio file not found on disk")
-
-    return file_path
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
index 943d86bd..b26123f3 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
@@ -5,25 +5,20 @@
 import logging
 import time
 from pathlib import Path
-from typing import Optional
+
+from fastapi.responses import JSONResponse
 
 from advanced_omi_backend.client_manager import (
     ClientManager,
     client_belongs_to_user,
 )
-from advanced_omi_backend.models.audio_file import AudioFile
 from advanced_omi_backend.models.conversation import Conversation
+from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
 from advanced_omi_backend.users import User
-from fastapi.responses import JSONResponse
 
 logger = logging.getLogger(__name__)
 audio_logger = logging.getLogger("audio_processing")
 
-# Legacy audio_chunks collection is still used by some endpoints (speaker assignment, segment updates)
-# But conversation queries now use the Conversation model directly
-# Audio cropping operations are handled in audio_controller.py
-
-
 async def close_current_conversation(client_id: str, user: User, client_manager: ClientManager):
     """Close the current conversation for a specific client. Users can only close their own conversations."""
     # Validate client ownership
@@ -103,6 +98,9 @@ async def get_conversation(conversation_id: str, user: User):
             "user_id": conversation.user_id,
             "client_id": conversation.client_id,
             "audio_path": conversation.audio_path,
+            "audio_chunks_count": conversation.audio_chunks_count,
+            "audio_total_duration": conversation.audio_total_duration,
+            "audio_compression_ratio": conversation.audio_compression_ratio,
             "created_at": conversation.created_at.isoformat() if conversation.created_at else None,
             "deleted": conversation.deleted,
             "deletion_reason": conversation.deletion_reason,
@@ -153,6 +151,9 @@ async def get_conversations(user: User):
                 "user_id": conv.user_id,
                 "client_id": conv.client_id,
                 "audio_path": conv.audio_path,
+                "audio_chunks_count": conv.audio_chunks_count,
+                "audio_total_duration": conv.audio_total_duration,
+                "audio_compression_ratio": conv.audio_compression_ratio,
                 "created_at": conv.created_at.isoformat() if conv.created_at else None,
                 "deleted": conv.deleted,
                 "deletion_reason": conv.deletion_reason,
@@ -177,12 +178,87 @@ async def get_conversations(user: User):
         return JSONResponse(status_code=500, content={"error": "Error fetching conversations"})
 
 
-async def delete_conversation(conversation_id: str, user: User):
-    """Delete a conversation and its associated audio files. Users can only delete their own conversations."""
+async def _soft_delete_conversation(conversation: Conversation, user: User) -> JSONResponse:
+    """Mark conversation and chunks as deleted (soft delete)."""
+    conversation_id = conversation.conversation_id
+
+    # Mark conversation as deleted
+    conversation.deleted = True
+    conversation.deletion_reason = "user_deleted"
+    conversation.deleted_at = datetime.utcnow()
+    await conversation.save()
+
+    logger.info(f"Soft deleted conversation {conversation_id} for user {user.user_id}")
+
+    # Soft delete all associated audio chunks
+    result = await AudioChunkDocument.find(
+        AudioChunkDocument.conversation_id == conversation_id,
+        AudioChunkDocument.deleted == False  # Only update non-deleted chunks
+    ).update_many({
+        "$set": {
+            "deleted": True,
+            "deleted_at": datetime.utcnow()
+        }
+    })
+
+    deleted_chunks = result.modified_count
+    logger.info(f"Soft deleted {deleted_chunks} audio chunks for conversation {conversation_id}")
+
+    return JSONResponse(
+        status_code=200,
+        content={
+            "message": f"Successfully soft deleted conversation '{conversation_id}'",
+            "deleted_chunks": deleted_chunks,
+            "conversation_id": conversation_id,
+            "client_id": conversation.client_id,
+            "deleted_at": conversation.deleted_at.isoformat() if conversation.deleted_at else None
+        }
+    )
+
+
+async def _hard_delete_conversation(conversation: Conversation) -> JSONResponse:
+    """Permanently delete conversation and chunks (admin only)."""
+    conversation_id = conversation.conversation_id
+    client_id = conversation.client_id
+    audio_uuid = conversation.audio_uuid
+
+    # Delete conversation document
+    await conversation.delete()
+    logger.info(f"Hard deleted conversation {conversation_id}")
+
+    # Delete all audio chunks
+    result = await AudioChunkDocument.find(
+        AudioChunkDocument.conversation_id == conversation_id
+    ).delete()
+
+    deleted_chunks = result.deleted_count
+    logger.info(f"Hard deleted {deleted_chunks} audio chunks for conversation {conversation_id}")
+
+    return JSONResponse(
+        status_code=200,
+        content={
+            "message": f"Successfully permanently deleted conversation '{conversation_id}'",
+            "deleted_chunks": deleted_chunks,
+            "conversation_id": conversation_id,
+            "client_id": client_id,
+            "audio_uuid": audio_uuid
+        }
+    )
+
+
+async def delete_conversation(conversation_id: str, user: User, permanent: bool = False):
+    """
+    Soft delete a conversation (mark as deleted but keep data).
+
+    Args:
+        conversation_id: Conversation to delete
+        user: Requesting user
+        permanent: If True, permanently delete (admin only)
+    """
     try:
         # Create masked identifier for logging
         masked_id = f"{conversation_id[:8]}...{conversation_id[-4:]}" if len(conversation_id) > 12 else "***"
-        logger.info(f"Attempting to delete conversation: {masked_id}")
+        logger.info(f"Attempting to {'permanently ' if permanent else ''}delete conversation: {masked_id}")
 
         # Find the conversation using Beanie
         conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
@@ -206,57 +282,91 @@ async def delete_conversation(conversation_id: str, user: User):
                 }
             )
 
-        # Get file paths before deletion
-        audio_path = conversation.audio_path
-        audio_uuid = conversation.audio_uuid
-        client_id = conversation.client_id
-
-        # Delete the conversation from database
-        await conversation.delete()
-        logger.info(f"Deleted conversation {conversation_id}")
-
-        # Also delete from legacy AudioFile collection if it exists (backward compatibility)
-        audio_file = await AudioFile.find_one(AudioFile.audio_uuid == audio_uuid)
-        if audio_file:
-            await audio_file.delete()
-            logger.info(f"Deleted legacy audio file record for {audio_uuid}")
-
-        # Delete associated audio files from disk
-        deleted_files = []
-        if audio_path:
-            try:
-                # Construct full path to audio file
-                full_audio_path = Path("/app/audio_chunks") / audio_path
-                if full_audio_path.exists():
-                    full_audio_path.unlink()
-                    deleted_files.append(str(full_audio_path))
-                    logger.info(f"Deleted audio file: {full_audio_path}")
-            except Exception as e:
-                logger.warning(f"Failed to delete audio file {audio_path}: {e}")
-
-        logger.info(f"Successfully deleted conversation {conversation_id} for user {user.user_id}")
-
-        # Prepare response message
-        delete_summary = ["conversation"]
-        if deleted_files:
-            delete_summary.append(f"{len(deleted_files)} audio file(s)")
+        # Hard delete (admin only, permanent flag)
+        if permanent and user.is_superuser:
+            return await _hard_delete_conversation(conversation)
+
+        # Soft delete (default)
+        return await _soft_delete_conversation(conversation, user)
+
+    except Exception as e:
+        logger.error(f"Error deleting conversation {conversation_id}: {e}")
+        return JSONResponse(
+            status_code=500,
+            content={"error": f"Failed to delete conversation: {str(e)}"}
+        )
+
+
+async def restore_conversation(conversation_id: str, user: User) -> JSONResponse:
+    """
+    Restore a soft-deleted conversation.
+
+    Args:
+        conversation_id: Conversation to restore
+        user: Requesting user
+    """
+    try:
+        conversation = await Conversation.find_one(
+            Conversation.conversation_id == conversation_id
+        )
+
+        if not conversation:
+            return JSONResponse(
+                status_code=404,
+                content={"error": "Conversation not found"}
+            )
+
+        # Permission check
+        if not user.is_superuser and conversation.user_id != str(user.user_id):
+            return JSONResponse(
+                status_code=403,
+                content={"error": "Access denied"}
+            )
+
+        if not conversation.deleted:
+            return JSONResponse(
+                status_code=400,
+                content={"error": "Conversation is not deleted"}
+            )
+
+        # Restore conversation
+        conversation.deleted = False
+        conversation.deletion_reason = None
+        conversation.deleted_at = None
+        await conversation.save()
+
+        # Restore audio chunks
+        result = await AudioChunkDocument.find(
+            AudioChunkDocument.conversation_id == conversation_id,
+            AudioChunkDocument.deleted == True
+        ).update_many({
+            "$set": {
+                "deleted": False,
+                "deleted_at": None
+            }
+        })
+
+        restored_chunks = result.modified_count
+
+        logger.info(
+            f"Restored conversation {conversation_id} "
+            f"({restored_chunks} chunks) for user {user.user_id}"
+        )
 
         return JSONResponse(
             status_code=200,
             content={
-                "message": f"Successfully deleted {', '.join(delete_summary)} '{conversation_id}'",
-                "deleted_files": deleted_files,
-                "client_id": client_id,
+                "message": f"Successfully restored conversation '{conversation_id}'",
+                "restored_chunks": restored_chunks,
                 "conversation_id": conversation_id,
-                "audio_uuid": audio_uuid
             }
         )
 
     except Exception as e:
-        logger.error(f"Error deleting conversation {conversation_id}: {e}")
+        logger.error(f"Error restoring conversation {conversation_id}: {e}")
         return JSONResponse(
             status_code=500,
-            content={"error": f"Failed to delete conversation: {str(e)}"}
+            content={"error": f"Failed to restore conversation: {str(e)}"}
         )
 
 
@@ -308,10 +418,17 @@ async def reprocess_transcript(conversation_id: str, user: User):
         version_id = str(uuid.uuid4())
 
         # Enqueue job chain with RQ (transcription -> speaker recognition -> memory)
-        from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job
-        from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job
+        from advanced_omi_backend.controllers.queue_controller import (
+            JOB_RESULT_TTL,
+            default_queue,
+            memory_queue,
+            transcription_queue,
+        )
         from advanced_omi_backend.workers.memory_jobs import process_memory_job
-        from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, default_queue, JOB_RESULT_TTL
+        from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job
+        from advanced_omi_backend.workers.transcription_jobs import (
+            transcribe_full_audio_job,
+        )
 
         # Job 1: Transcribe audio to text
         transcript_job = transcription_queue.enqueue(
@@ -414,8 +531,8 @@ async def reprocess_memory(conversation_id: str, transcript_version_id: str, use
         version_id = str(uuid.uuid4())
 
         # Enqueue memory processing job with RQ (RQ handles job tracking)
-        from advanced_omi_backend.workers.memory_jobs import enqueue_memory_processing
         from advanced_omi_backend.models.job import JobPriority
+        from advanced_omi_backend.workers.memory_jobs import enqueue_memory_processing
 
         job = enqueue_memory_processing(
             client_id=conversation_model.client_id,
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
index f5ff3275..46812a8a 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
@@ -127,6 +127,70 @@ async def save_diarization_settings(settings: dict):
         raise e
 
 
+async def get_cleanup_settings_controller(user: User) -> dict:
+    """
+    Get current cleanup settings (admin only).
+
+    Args:
+        user: Authenticated admin user
+
+    Returns:
+        Dict with cleanup settings
+    """
+    from advanced_omi_backend.config import get_cleanup_settings
+
+    return get_cleanup_settings()
+
+
+async def save_cleanup_settings_controller(
+    auto_cleanup_enabled: bool,
+    retention_days: int,
+    user: User
+) -> dict:
+    """
+    Save cleanup settings (admin only).
+
+    Args:
+        auto_cleanup_enabled: Enable/disable automatic cleanup
+        retention_days: Number of days to retain soft-deleted conversations
+        user: Authenticated admin user
+
+    Returns:
+        Updated cleanup settings
+
+    Raises:
+        ValueError: If validation fails
+    """
+    from advanced_omi_backend.config import CleanupSettings, save_cleanup_settings_to_file
+
+    # Validation
+    if not isinstance(auto_cleanup_enabled, bool):
+        raise ValueError("auto_cleanup_enabled must be a boolean")
+
+    if not isinstance(retention_days, int):
+        raise ValueError("retention_days must be an integer")
+
+    if retention_days < 1 or retention_days > 365:
+        raise ValueError("retention_days must be between 1 and 365")
+
+    # Create settings object
+    settings = CleanupSettings(
+        auto_cleanup_enabled=auto_cleanup_enabled,
+        retention_days=retention_days
+    )
+
+    # Save to file (also updates in-memory cache)
+    save_cleanup_settings_to_file(settings)
+
+    logger.info(f"Admin {user.email} updated cleanup settings: auto_cleanup={auto_cleanup_enabled}, retention={retention_days}d")
+
+    return {
+        "auto_cleanup_enabled": settings.auto_cleanup_enabled,
+        "retention_days": settings.retention_days,
+        "message": "Cleanup settings saved successfully"
+    }
+
+
 async def get_speaker_configuration(user: User):
     """Get current user's primary speakers configuration."""
     try:
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
index 28e9924f..ad856b2b 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
@@ -829,8 +829,8 @@ async def _process_batch_audio_complete(
         return
 
     try:
-        from advanced_omi_backend.utils.audio_utils import write_audio_file
         from advanced_omi_backend.models.conversation import create_conversation
+        from advanced_omi_backend.utils.audio_chunk_utils import convert_audio_to_chunks
 
         # Combine all chunks
         complete_audio = b''.join(client_state.batch_audio_chunks)
@@ -842,20 +842,17 @@ async def _process_batch_audio_complete(
         audio_uuid = str(uuid.uuid4())
         timestamp = int(time.time() * 1000)
 
-        # Write audio file and create AudioFile entry
-        relative_audio_path, file_path, duration = await write_audio_file(
-            raw_audio_data=complete_audio,
-            audio_uuid=audio_uuid,
-            source="websocket",
-            client_id=client_id,
-            user_id=user_id,
-            user_email=user_email,
-            timestamp=timestamp,
-            validate=False  # PCM data, not WAV
-        )
+        # Get audio format from batch metadata (set during audio-start)
+        audio_format = getattr(client_state, 'batch_audio_format', {})
+        sample_rate = audio_format.get('rate', OMI_SAMPLE_RATE)
+        sample_width = audio_format.get('width', OMI_SAMPLE_WIDTH)
+        channels = audio_format.get('channels', OMI_CHANNELS)
+
+        # Calculate audio duration
+        duration = len(complete_audio) / (sample_rate * sample_width * channels)
 
         application_logger.info(
-            f"✅ Batch mode: Wrote audio file {relative_audio_path} ({duration:.1f}s)"
+            f"✅ Batch mode: Processing audio ({duration:.1f}s)"
         )
 
         # Create conversation immediately for batch audio (conversation_id auto-generated)
@@ -868,19 +865,38 @@ async def _process_batch_audio_complete(
             title="Batch Recording",
             summary="Processing batch audio..."
         )
-        conversation.audio_path = relative_audio_path
         await conversation.insert()
         conversation_id = conversation.conversation_id  # Get the auto-generated ID
 
         application_logger.info(f"📝 Batch mode: Created conversation {conversation_id}")
 
+        # Convert audio directly to MongoDB chunks (no disk intermediary)
+        try:
+            num_chunks = await convert_audio_to_chunks(
+                conversation_id=conversation_id,
+                audio_data=complete_audio,
+                sample_rate=sample_rate,
+                channels=channels,
+                sample_width=sample_width,
+            )
+            application_logger.info(
+                f"📦 Batch mode: Converted to {num_chunks} MongoDB chunks "
+                f"(conversation {conversation_id[:12]})"
+            )
+        except Exception as chunk_error:
+            application_logger.error(
+                f"Failed to convert batch audio to chunks: {chunk_error}",
+                exc_info=True
+            )
+            # Continue anyway - transcription job will handle it
+
         # Enqueue post-conversation processing job chain
         from advanced_omi_backend.controllers.queue_controller import start_post_conversation_jobs
 
         job_ids = start_post_conversation_jobs(
             conversation_id=conversation_id,
             audio_uuid=audio_uuid,
-            audio_file_path=file_path,
+            audio_file_path=None,  # No file path - using MongoDB chunks
             user_id=None,  # Will be read from conversation in DB by jobs
             post_transcription=True,  # Run batch transcription for uploads
             client_id=client_id  # Pass client_id for UI tracking
diff --git a/backends/advanced/src/advanced_omi_backend/models/audio_chunk.py b/backends/advanced/src/advanced_omi_backend/models/audio_chunk.py
new file mode 100644
index 00000000..cea20ef7
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/models/audio_chunk.py
@@ -0,0 +1,158 @@
+"""
+Audio chunk models for MongoDB-based audio storage.
+
+This module contains the AudioChunkDocument model for storing Opus-compressed
+audio chunks in MongoDB. Each chunk represents a 10-second segment of audio
+from a conversation.
+"""
+
+from datetime import datetime
+from typing import Optional
+from pydantic import ConfigDict, Field, field_serializer
+from beanie import Document, Indexed
+from bson import Binary
+
+
+class AudioChunkDocument(Document):
+    """
+    MongoDB document representing a 10-second audio chunk.
+
+    Audio chunks are stored in Opus-compressed format for ~94% storage reduction
+    compared to raw PCM. Chunks are sequentially numbered and can be reconstructed
+    into complete WAV files for playback or batch processing.
+
+    Storage Format:
+    - Encoding: Opus (24kbps VBR, optimized for speech)
+    - Chunk Duration: 10 seconds (configurable)
+    - Original Format: 16kHz, 16-bit, mono PCM
+    - Compression Ratio: ~0.047 (94% reduction)
+
+    Indexes:
+    - (conversation_id, chunk_index): Primary query pattern for reconstruction
+    - conversation_id: Conversation lookup and counting
+    - created_at: Maintenance and cleanup operations
+    """
+
+    # Pydantic v2 configuration
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    # Primary identifiers
+    conversation_id: Indexed(str) = Field(
+        description="Parent conversation ID (UUID format)"
+    )
+    chunk_index: int = Field(
+        description="Sequential chunk number (0-based)",
+        ge=0
+    )
+
+    # Audio data
+    audio_data: bytes = Field(
+        description="Opus-encoded audio bytes (stored as BSON Binary in MongoDB)"
+    )
+
+    # Size tracking
+    original_size: int = Field(
+        description="Original PCM size in bytes (before compression)",
+        gt=0
+    )
+    compressed_size: int = Field(
+        description="Opus-encoded size in bytes (after compression)",
+        gt=0
+    )
+
+    # Time boundaries
+    start_time: float = Field(
+        description="Start time in seconds from conversation start",
+        ge=0.0
+    )
+    end_time: float = Field(
+        description="End time in seconds from conversation start",
+        gt=0.0
+    )
+    duration: float = Field(
+        description="Chunk duration in seconds (typically 10.0)",
+        gt=0.0
+    )
+
+    # Audio format
+    sample_rate: int = Field(
+        default=16000,
+        description="Original PCM sample rate (Hz)"
+    )
+    channels: int = Field(
+        default=1,
+        description="Number of audio channels (1=mono, 2=stereo)"
+    )
+
+    # Optional analysis
+    has_speech: Optional[bool] = Field(
+        default=None,
+        description="Voice Activity Detection result (if available)"
+    )
+
+    # Metadata
+    created_at: datetime = Field(
+        default_factory=datetime.utcnow,
+        description="Chunk creation timestamp"
+    )
+
+    # Soft delete fields
+    deleted: bool = Field(
+        default=False,
+        description="Whether this chunk was soft-deleted"
+    )
+    deleted_at: Optional[datetime] = Field(
+        default=None,
+        description="When the chunk was marked as deleted"
+    )
+
+    @field_serializer('audio_data')
+    def serialize_audio_data(self, v: bytes) -> Binary:
+        """
+        Convert bytes to BSON Binary for MongoDB storage.
+
+        MongoDB returns BSON Binary as plain bytes during deserialization,
+        but expects Binary type for serialization to ensure proper binary data handling.
+        """
+        if isinstance(v, bytes):
+            return Binary(v)
+        return v
+
+    class Settings:
+        """Beanie document settings."""
+        name = "audio_chunks"
+
+        indexes = [
+            # Primary query: Retrieve chunks in order for a conversation
+            [("conversation_id", 1), ("chunk_index", 1)],
+
+            # Conversation lookup and counting
+            "conversation_id",
+
+            # Maintenance queries (cleanup, monitoring)
+            "created_at",
+
+            # Soft delete filtering
+            "deleted"
+        ]
+
+    @property
+    def compression_ratio(self) -> float:
+        """Calculate compression ratio (compressed/original)."""
+        if self.original_size == 0:
+            return 0.0
+        return self.compressed_size / self.original_size
+
+    @property
+    def storage_savings_percent(self) -> float:
+        """Calculate storage savings as percentage."""
+        return (1 - self.compression_ratio) * 100
+
+    def __repr__(self) -> str:
+        """Human-readable representation."""
+        return (
+            f"AudioChunk(conversation={self.conversation_id[:8]}..., "
+            f"index={self.chunk_index}, "
+            f"duration={self.duration:.1f}s, "
+            f"compression={self.compression_ratio:.3f})"
+        )
diff --git a/backends/advanced/src/advanced_omi_backend/models/audio_file.py b/backends/advanced/src/advanced_omi_backend/models/audio_file.py
deleted file mode 100644
index ca154500..00000000
--- a/backends/advanced/src/advanced_omi_backend/models/audio_file.py
+++ /dev/null
@@ -1,64 +0,0 @@
-"""
-AudioFile models for Chronicle backend.
-
-This module contains the Beanie Document model for audio_chunks collection,
-which stores ALL audio files (both with and without speech). This is the
-storage layer - all audio gets stored here with its metadata.
-
-Note: Named AudioFile (not AudioChunk) to avoid confusion with wyoming.audio.AudioChunk
-which is the in-memory streaming audio data structure.
-"""
-
-from datetime import datetime
-from typing import Dict, List, Optional, Any
-from pydantic import BaseModel, Field
-
-from beanie import Document, Indexed
-
-
-class AudioFile(Document):
-    """
-    Audio file model representing persisted audio files in MongoDB.
-
-    The audio_chunks collection stores ALL raw audio files (both with and without speech).
-    This is just for audio file storage and metadata. If speech is detected, a
-    Conversation document is created which contains transcripts and memories.
-
-    This is different from wyoming.audio.AudioChunk which is for streaming audio data.
-    """
-
-    # Core identifiers
-    audio_uuid: Indexed(str, unique=True) = Field(description="Unique audio identifier")
-    source: Indexed(str) = Field(
-        default="upload",
-        description="Source of the audio (upload, gdrive, etc.)"
-    )
-    audio_path: str = Field(description="Path to raw audio file")
-    client_id: Indexed(str) = Field(description="Client device identifier")
-    timestamp: Indexed(int) = Field(description="Unix timestamp in milliseconds")
-
-    # User information
-    user_id: Indexed(str) = Field(description="User who owns this audio")
-    user_email: Optional[str] = Field(None, description="User email")
-
-    # Speech-driven conversation linking
-    conversation_id: Optional[str] = Field(
-        None,
-        description="Link to Conversation if speech was detected"
-    )
-    has_speech: bool = Field(default=False, description="Whether speech was detected")
-    speech_analysis: Dict[str, Any] = Field(
-        default_factory=dict,
-        description="Speech detection results"
-    )
-
-
-
-    class Settings:
-        name = "audio_chunks"
-        indexes = [
-            "audio_uuid",
-            "client_id",
-            "user_id",
-            "timestamp", 
-        ]
\ No newline at end of file
diff --git a/backends/advanced/src/advanced_omi_backend/models/conversation.py b/backends/advanced/src/advanced_omi_backend/models/conversation.py
index 00178f10..24091ef8 100644
--- a/backends/advanced/src/advanced_omi_backend/models/conversation.py
+++ b/backends/advanced/src/advanced_omi_backend/models/conversation.py
@@ -88,8 +88,22 @@ class MemoryVersion(BaseModel):
     user_id: Indexed(str) = Field(description="User who owns this conversation")
     client_id: Indexed(str) = Field(description="Client device identifier")
 
-    # Audio file reference
-    audio_path: Optional[str] = Field(None, description="Path to audio file (relative to CHUNK_DIR)")
+    # Legacy audio path field - no longer used, audio stored as MongoDB chunks
+    audio_path: Optional[str] = Field(None, description="Legacy field, not populated for new conversations")
+
+    # MongoDB chunk-based audio storage (new system)
+    audio_chunks_count: Optional[int] = Field(
+        None,
+        description="Total number of 10-second audio chunks stored in MongoDB"
+    )
+    audio_total_duration: Optional[float] = Field(
+        None,
+        description="Total audio duration in seconds (sum of all chunks)"
+    )
+    audio_compression_ratio: Optional[float] = Field(
+        None,
+        description="Compression ratio (compressed_size / original_size), typically ~0.047 for Opus"
+    )
 
     # Creation metadata
     created_at: Indexed(datetime) = Field(default_factory=datetime.utcnow, description="When the conversation was created")
diff --git a/backends/advanced/src/advanced_omi_backend/models/job.py b/backends/advanced/src/advanced_omi_backend/models/job.py
index b295782c..763373d2 100644
--- a/backends/advanced/src/advanced_omi_backend/models/job.py
+++ b/backends/advanced/src/advanced_omi_backend/models/job.py
@@ -35,10 +35,10 @@ async def _ensure_beanie_initialized():
             from motor.motor_asyncio import AsyncIOMotorClient
             from beanie import init_beanie
             from advanced_omi_backend.models.conversation import Conversation
-            from advanced_omi_backend.models.audio_file import AudioFile
-            from advanced_omi_backend.models.user import User                       
+            from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
+            from advanced_omi_backend.models.user import User
             from pymongo.errors import ConfigurationError
-  
+
             # Get MongoDB URI from environment
             mongodb_uri = os.getenv("MONGODB_URI", "mongodb://localhost:27017")
 
@@ -54,7 +54,7 @@ async def _ensure_beanie_initialized():
             # Initialize Beanie
             await init_beanie(
                 database=database,
-                document_models=[User, Conversation, AudioFile],
+                document_models=[User, Conversation, AudioChunkDocument],
             )
 
             _beanie_initialized = True
diff --git a/backends/advanced/src/advanced_omi_backend/routers/api_router.py b/backends/advanced/src/advanced_omi_backend/routers/api_router.py
index 80c03eae..791bd5ca 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/api_router.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/api_router.py
@@ -11,6 +11,7 @@
 from fastapi import APIRouter
 
 from .modules import (
+    admin_router,
     audio_router,
     chat_router,
     client_router,
@@ -30,6 +31,7 @@
 router = APIRouter(prefix="/api", tags=["api"])
 
 # Include all sub-routers
+router.include_router(admin_router)
 router.include_router(audio_router)
 router.include_router(user_router)
 router.include_router(chat_router)
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/__init__.py b/backends/advanced/src/advanced_omi_backend/routers/modules/__init__.py
index 21f89991..3c8e4ceb 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/__init__.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/__init__.py
@@ -12,8 +12,10 @@
 - audio_routes: Audio file uploads and processing
 - health_routes: Health check endpoints
 - websocket_routes: WebSocket connection handling
+- admin_routes: Admin-only system management endpoints
 """
 
+from .admin_routes import router as admin_router
 from .audio_routes import router as audio_router
 from .chat_routes import router as chat_router
 from .client_routes import router as client_router
@@ -27,6 +29,7 @@
 from .websocket_routes import router as websocket_router
 
 __all__ = [
+   "admin_router",
    "audio_router",
    "chat_router",
    "client_router",
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/admin_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/admin_routes.py
new file mode 100644
index 00000000..6fbbfc56
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/admin_routes.py
@@ -0,0 +1,122 @@
+"""
+Admin routes for Chronicle API.
+
+Provides admin-only endpoints for system management and cleanup operations.
+"""
+
+import logging
+from typing import Optional
+
+from fastapi import APIRouter, Depends, Query, HTTPException
+from fastapi.responses import JSONResponse
+
+from advanced_omi_backend.auth import current_active_user
+from advanced_omi_backend.users import User
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/admin", tags=["admin"])
+
+
+def require_admin(current_user: User = Depends(current_active_user)) -> User:
+    """Dependency to require admin/superuser permissions."""
+    if not current_user.is_superuser:
+        raise HTTPException(
+            status_code=403,
+            detail="Admin permissions required"
+        )
+    return current_user
+
+
+@router.get("/cleanup/settings")
+async def get_cleanup_settings_admin(
+    admin: User = Depends(require_admin)
+):
+    """Get current cleanup settings (admin only)."""
+    from advanced_omi_backend.config import get_cleanup_settings
+
+    settings = get_cleanup_settings()
+    return {
+        **settings,
+        "note": "Cleanup settings are stored in /app/data/cleanup_config.json"
+    }
+
+
+@router.post("/cleanup")
+async def trigger_cleanup(
+    dry_run: bool = Query(False, description="Preview what would be deleted"),
+    retention_days: Optional[int] = Query(None, description="Override retention period"),
+    admin: User = Depends(require_admin)
+):
+    """Manually trigger cleanup of soft-deleted conversations (admin only)."""
+    try:
+        from advanced_omi_backend.workers.cleanup_jobs import purge_old_deleted_conversations
+        from advanced_omi_backend.controllers.queue_controller import get_queue
+
+        # Enqueue cleanup job
+        queue = get_queue("default")
+        job = queue.enqueue(
+            purge_old_deleted_conversations,
+            retention_days=retention_days,  # Will use config default if None
+            dry_run=dry_run,
+            job_timeout="30m",
+        )
+
+        logger.info(f"Admin {admin.email} triggered cleanup job {job.id} (dry_run={dry_run}, retention={retention_days or 'default'})")
+
+        return JSONResponse(
+            status_code=200,
+            content={
+                "message": f"Cleanup job {'(dry run) ' if dry_run else ''}queued successfully",
+                "job_id": job.id,
+                "retention_days": retention_days or "default (from config)",
+                "dry_run": dry_run,
+                "note": "Check job status at /api/queue/jobs/{job_id}"
+            }
+        )
+
+    except Exception as e:
+        logger.error(f"Failed to trigger cleanup: {e}")
+        return JSONResponse(
+            status_code=500,
+            content={"error": f"Failed to trigger cleanup: {str(e)}"}
+        )
+
+
+@router.get("/cleanup/preview")
+async def preview_cleanup(
+    retention_days: Optional[int] = Query(None, description="Preview with specific retention period"),
+    admin: User = Depends(require_admin)
+):
+    """Preview what would be deleted by cleanup (admin only)."""
+    try:
+        from advanced_omi_backend.config import load_cleanup_settings_from_file
+        from advanced_omi_backend.models.conversation import Conversation
+        from datetime import datetime, timedelta
+
+        # Use provided retention or default from config
+        if retention_days is None:
+            settings = load_cleanup_settings_from_file()
+            retention_days = settings.retention_days
+
+        cutoff_date = datetime.utcnow() - timedelta(days=retention_days)
+
+        # Count conversations that would be deleted
+        count = await Conversation.find(
+            Conversation.deleted == True,
+            Conversation.deleted_at < cutoff_date
+        ).count()
+
+        return {
+            "retention_days": retention_days,
+            "cutoff_date": cutoff_date.isoformat(),
+            "conversations_to_delete": count,
+            "note": f"Conversations deleted before {cutoff_date.date()} would be purged"
+        }
+
+    except Exception as e:
+        logger.error(f"Failed to preview cleanup: {e}")
+        return JSONResponse(
+            status_code=500,
+            content={"error": f"Failed to preview cleanup: {str(e)}"}
+        )
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py
index 58a33ff5..afa2906f 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py
@@ -2,17 +2,26 @@
 Audio file upload and serving routes.
 
 Handles audio file uploads, processing job management, and audio file serving.
+Audio is served from MongoDB chunks with Opus compression.
 """
 
+import io
 from typing import Optional
 from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile
-from fastapi.responses import FileResponse
+from fastapi.responses import FileResponse, StreamingResponse
 
 from advanced_omi_backend.auth import current_superuser, current_active_user_optional, get_user_from_token_param
 from advanced_omi_backend.controllers import audio_controller
 from advanced_omi_backend.models.user import User
+from advanced_omi_backend.models.conversation import Conversation
 from advanced_omi_backend.app_config import get_audio_chunk_dir
 from advanced_omi_backend.utils.gdrive_audio_utils import download_audio_files_from_drive, AudioValidationError
+from advanced_omi_backend.utils.audio_chunk_utils import (
+    reconstruct_wav_from_conversation,
+    retrieve_audio_chunks,
+    concatenate_chunks_to_pcm,
+    build_wav_from_pcm,
+)
 
 router = APIRouter(prefix="/audio", tags=["audio"])
 
@@ -41,10 +50,13 @@ async def get_conversation_audio(
     current_user: Optional[User] = Depends(current_active_user_optional),
 ):
     """
-    Serve audio file for a conversation.
+    Serve complete audio file for a conversation from MongoDB chunks.
 
-    This endpoint uses conversation_id for direct lookup and ownership verification,
-    which is more efficient than querying by filename.
+    Reconstructs audio by:
+    1. Retrieving all Opus-compressed chunks from MongoDB
+    2. Decoding each chunk to PCM
+    3. Concatenating PCM data
+    4. Building complete WAV file with headers
 
     Supports both header-based auth (Authorization: Bearer) and query param token
     for <audio> element compatibility.
@@ -55,10 +67,10 @@ async def get_conversation_audio(
         current_user: Authenticated user (from header)
 
     Returns:
-        FileResponse with the audio file
+        StreamingResponse with complete WAV file
 
     Raises:
-        404: If conversation or audio file not found
+        404: If conversation or audio chunks not found
         403: If user doesn't own the conversation
         401: If not authenticated
     """
@@ -69,27 +81,272 @@ async def get_conversation_audio(
     if not current_user:
         raise HTTPException(status_code=401, detail="Authentication required")
 
-    # Get audio file path from controller
+    # Verify conversation exists and user has access
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if not conversation:
+        raise HTTPException(status_code=404, detail="Conversation not found")
+
+    # Check ownership (admins can access all)
+    if not current_user.is_superuser and conversation.user_id != str(current_user.user_id):
+        raise HTTPException(status_code=403, detail="Access denied")
+
+    # Reconstruct WAV from MongoDB chunks
     try:
-        file_path = await audio_controller.get_conversation_audio_path(
-            conversation_id=conversation_id,
-            user=current_user
-        )
+        wav_data = await reconstruct_wav_from_conversation(conversation_id)
     except ValueError as e:
-        # Map ValueError messages to appropriate HTTP status codes
-        error_msg = str(e)
-        if "not found" in error_msg.lower():
-            raise HTTPException(status_code=404, detail=error_msg)
-        elif "access denied" in error_msg.lower():
-            raise HTTPException(status_code=403, detail=error_msg)
-        else:
-            raise HTTPException(status_code=404, detail=error_msg)
-
-    # Serve the file
-    return FileResponse(
-        path=str(file_path),
+        # No chunks found for conversation
+        raise HTTPException(status_code=404, detail=str(e))
+    except Exception as e:
+        # Reconstruction failed
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to reconstruct audio: {str(e)}"
+        )
+
+    # Serve as WAV file
+    return StreamingResponse(
+        io.BytesIO(wav_data),
+        media_type="audio/wav",
+        headers={
+            "Content-Disposition": f"inline; filename={conversation_id}.wav",
+            "Content-Length": str(len(wav_data)),
+            "X-Audio-Source": "mongodb-chunks",
+            "X-Chunk-Count": str(conversation.audio_chunks_count or 0),
+        }
+    )
+
+
+@router.get("/stream_audio/{conversation_id}")
+async def stream_conversation_audio(
+    conversation_id: str,
+    token: Optional[str] = Query(default=None, description="JWT token for audio element access"),
+    current_user: Optional[User] = Depends(current_active_user_optional),
+):
+    """
+    Stream audio file for a conversation with progressive chunk delivery.
+
+    Better UX for long conversations - starts playback before full download completes.
+
+    Uses cursor-based pagination to stream chunks in batches of 20, decoding
+    and serving each batch as it's retrieved.
+
+    Supports both header-based auth (Authorization: Bearer) and query param token
+    for <audio> element compatibility.
+
+    Args:
+        conversation_id: The conversation ID
+        token: Optional JWT token as query param (for audio elements)
+        current_user: Authenticated user (from header)
+
+    Returns:
+        StreamingResponse with chunked WAV data (Transfer-Encoding: chunked)
+
+    Raises:
+        404: If conversation or audio chunks not found
+        403: If user doesn't own the conversation
+        401: If not authenticated
+    """
+    # Try token param if header auth failed
+    if not current_user and token:
+        current_user = await get_user_from_token_param(token)
+
+    if not current_user:
+        raise HTTPException(status_code=401, detail="Authentication required")
+
+    # Verify conversation exists and user has access
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if not conversation:
+        raise HTTPException(status_code=404, detail="Conversation not found")
+
+    # Check ownership (admins can access all)
+    if not current_user.is_superuser and conversation.user_id != str(current_user.user_id):
+        raise HTTPException(status_code=403, detail="Access denied")
+
+    # Check if chunks exist
+    if not conversation.audio_chunks_count or conversation.audio_chunks_count == 0:
+        raise HTTPException(status_code=404, detail="No audio data for this conversation")
+
+    async def stream_chunks():
+        """Generator that yields WAV data in batches."""
+        # First, yield WAV header with placeholder size
+        # (actual size will be updated by client or ignored in streaming mode)
+        SAMPLE_RATE = 16000
+        CHANNELS = 1
+        SAMPLE_WIDTH = 2
+
+        # Build minimal WAV header (44 bytes)
+        # We'll write a placeholder size since we're streaming
+        wav_header = io.BytesIO()
+        import wave
+        with wave.open(wav_header, "wb") as wav:
+            wav.setnchannels(CHANNELS)
+            wav.setsampwidth(SAMPLE_WIDTH)
+            wav.setframerate(SAMPLE_RATE)
+            # Write empty frame to establish header
+            wav.writeframes(b"")
+
+        # Yield header
+        yield wav_header.getvalue()
+
+        # Stream chunks in batches of 20
+        start_index = 0
+        batch_size = 20
+
+        while start_index < conversation.audio_chunks_count:
+            # Retrieve batch of chunks
+            chunks = await retrieve_audio_chunks(
+                conversation_id=conversation_id,
+                start_index=start_index,
+                limit=batch_size
+            )
+
+            if not chunks:
+                break
+
+            # Decode and concatenate this batch
+            pcm_batch = await concatenate_chunks_to_pcm(chunks)
+
+            # Yield PCM data (client's WAV parser handles the stream)
+            yield pcm_batch
+
+            # Move to next batch
+            start_index += batch_size
+
+    return StreamingResponse(
+        stream_chunks(),
+        media_type="audio/wav",
+        headers={
+            "Content-Disposition": f"inline; filename={conversation_id}_stream.wav",
+            "X-Audio-Source": "mongodb-chunks-stream",
+            "X-Chunk-Count": str(conversation.audio_chunks_count or 0),
+            "X-Total-Duration": str(conversation.audio_total_duration or 0),
+        }
+    )
+
+
+@router.get("/chunks/{conversation_id}")
+async def get_audio_chunk_range(
+    conversation_id: str,
+    start_time: float = Query(..., description="Start time in seconds"),
+    end_time: float = Query(..., description="End time in seconds"),
+    token: Optional[str] = Query(default=None, description="JWT token for audio element access"),
+    current_user: Optional[User] = Depends(current_active_user_optional),
+):
+    """
+    Serve specific audio chunks by time range for seekable audio player.
+
+    Returns PCM audio data for the requested time range without decoding
+    the entire conversation. Enables efficient seeking in the UI player.
+
+    Example:
+        GET /api/audio/chunks/uuid?start_time=15.5&end_time=25.5&token=xxx
+        Returns: 10 seconds of audio from 15.5s to 25.5s
+
+    Args:
+        conversation_id: The conversation ID
+        start_time: Start time in seconds (inclusive)
+        end_time: End time in seconds (inclusive)
+        token: Optional JWT token as query param
+        current_user: Authenticated user (from header)
+
+    Returns:
+        StreamingResponse with WAV file for requested range
+
+    Raises:
+        404: If conversation or audio chunks not found
+        403: If user doesn't own the conversation
+        401: If not authenticated
+        400: If time range is invalid
+    """
+    # Try token param if header auth failed
+    if not current_user and token:
+        current_user = await get_user_from_token_param(token)
+
+    if not current_user:
+        raise HTTPException(status_code=401, detail="Authentication required")
+
+    # Verify conversation exists and user has access
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if not conversation:
+        raise HTTPException(status_code=404, detail="Conversation not found")
+
+    # Check ownership (admins can access all)
+    if not current_user.is_superuser and conversation.user_id != str(current_user.user_id):
+        raise HTTPException(status_code=403, detail="Access denied")
+
+    # Validate time range
+    if start_time < 0 or end_time <= start_time:
+        raise HTTPException(status_code=400, detail="Invalid time range")
+
+    if conversation.audio_total_duration and end_time > conversation.audio_total_duration:
+        end_time = conversation.audio_total_duration
+
+    # Calculate which chunks are needed (each chunk is 10 seconds)
+    CHUNK_DURATION = 10.0
+    start_chunk = int(start_time / CHUNK_DURATION)
+    end_chunk = int(end_time / CHUNK_DURATION)
+    num_chunks = end_chunk - start_chunk + 1
+
+    # Retrieve only needed chunks
+    chunks = await retrieve_audio_chunks(
+        conversation_id=conversation_id,
+        start_index=start_chunk,
+        limit=num_chunks
+    )
+
+    if not chunks:
+        raise HTTPException(
+            status_code=404,
+            detail=f"No audio data in requested range ({start_time}s-{end_time}s)"
+        )
+
+    # Decode chunks and concatenate
+    pcm_buffer = await concatenate_chunks_to_pcm(chunks)
+
+    # Trim to exact time range within the chunks
+    SAMPLE_RATE = 16000
+    SAMPLE_WIDTH = 2  # 16-bit
+    CHANNELS = 1
+    bytes_per_second = SAMPLE_RATE * SAMPLE_WIDTH * CHANNELS
+
+    # Calculate byte offsets within concatenated buffer
+    start_offset_in_chunk = start_time - (start_chunk * CHUNK_DURATION)
+    end_offset_in_chunk = end_time - (end_chunk * CHUNK_DURATION)
+
+    start_byte = int(start_offset_in_chunk * bytes_per_second)
+    # Calculate end byte from the end of buffer
+    bytes_from_end = int((CHUNK_DURATION - end_offset_in_chunk) * bytes_per_second)
+    end_byte = len(pcm_buffer) - bytes_from_end
+
+    # Trim PCM data
+    trimmed_pcm = pcm_buffer[start_byte:end_byte]
+
+    # Build WAV file with trimmed audio
+    wav_data = await build_wav_from_pcm(
+        pcm_data=trimmed_pcm,
+        sample_rate=SAMPLE_RATE,
+        channels=CHANNELS
+    )
+
+    return StreamingResponse(
+        io.BytesIO(wav_data),
         media_type="audio/wav",
-        filename=file_path.name
+        headers={
+            "Content-Disposition": f"inline; filename=chunk_{start_time}_{end_time}.wav",
+            "Content-Length": str(len(wav_data)),
+            "X-Audio-Duration": str(end_time - start_time),
+            "X-Start-Time": str(start_time),
+            "X-End-Time": str(end_time),
+        }
     )
 
 
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py
index 2fc05425..c529162d 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py
@@ -91,7 +91,18 @@ async def get_conversation_version_history(
 
 @router.delete("/{conversation_id}")
 async def delete_conversation(
-    conversation_id: str, current_user: User = Depends(current_active_user)
+    conversation_id: str,
+    permanent: bool = Query(False, description="Permanently delete (admin only)"),
+    current_user: User = Depends(current_active_user)
+):
+    """Soft delete a conversation (or permanently delete if admin)."""
+    return await conversation_controller.delete_conversation(conversation_id, current_user, permanent)
+
+
+@router.post("/{conversation_id}/restore")
+async def restore_conversation(
+    conversation_id: str,
+    current_user: User = Depends(current_active_user)
 ):
-    """Delete a conversation and its associated audio file. Users can only delete their own conversations."""
-    return await conversation_controller.delete_conversation(conversation_id, current_user)
+    """Restore a soft-deleted conversation."""
+    return await conversation_controller.restore_conversation(conversation_id, current_user)
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py
index 93e94817..9d1d2378 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py
@@ -57,6 +57,28 @@ async def save_diarization_settings(
     return await system_controller.save_diarization_settings(settings)
 
 
+@router.get("/cleanup-settings")
+async def get_cleanup_settings(
+    current_user: User = Depends(current_superuser)
+):
+    """Get cleanup configuration settings. Admin only."""
+    return await system_controller.get_cleanup_settings_controller(current_user)
+
+
+@router.post("/cleanup-settings")
+async def save_cleanup_settings(
+    auto_cleanup_enabled: bool = Body(..., description="Enable automatic cleanup of soft-deleted conversations"),
+    retention_days: int = Body(..., ge=1, le=365, description="Number of days to keep soft-deleted conversations"),
+    current_user: User = Depends(current_superuser)
+):
+    """Save cleanup configuration settings. Admin only."""
+    return await system_controller.save_cleanup_settings_controller(
+        auto_cleanup_enabled=auto_cleanup_enabled,
+        retention_days=retention_days,
+        user=current_user
+    )
+
+
 @router.get("/speaker-configuration")
 async def get_speaker_configuration(current_user: User = Depends(current_active_user)):
     """Get current user's primary speakers configuration."""
diff --git a/backends/advanced/src/advanced_omi_backend/utils/audio_chunk_utils.py b/backends/advanced/src/advanced_omi_backend/utils/audio_chunk_utils.py
new file mode 100644
index 00000000..7d91495c
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/utils/audio_chunk_utils.py
@@ -0,0 +1,739 @@
+"""
+Audio chunk utilities for Opus encoding/decoding and WAV reconstruction.
+
+This module provides functions for:
+- Converting PCM audio to Opus-compressed format
+- Decoding Opus audio back to PCM
+- Building complete WAV files from PCM data
+- Retrieving audio chunks from MongoDB
+
+All FFmpeg operations use subprocess with proper error handling and cleanup.
+"""
+
+import asyncio
+import io
+import logging
+import tempfile
+import wave
+from pathlib import Path
+from typing import List, Optional
+
+from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
+
+logger = logging.getLogger(__name__)
+
+
+async def encode_pcm_to_opus(
+    pcm_data: bytes,
+    sample_rate: int = 16000,
+    channels: int = 1,
+    bitrate: int = 24,
+) -> bytes:
+    """
+    Encode raw PCM audio to Opus format using FFmpeg.
+
+    Args:
+        pcm_data: Raw PCM audio bytes (signed 16-bit little-endian)
+        sample_rate: Sample rate in Hz (default: 16000)
+        channels: Number of audio channels (default: 1 for mono)
+        bitrate: Opus bitrate in kbps (default: 24 for speech)
+
+    Returns:
+        Opus-encoded audio bytes
+
+    Raises:
+        RuntimeError: If FFmpeg encoding fails
+
+    Example:
+        >>> pcm_bytes = b"..."  # 10 seconds of 16kHz mono PCM
+        >>> opus_bytes = await encode_pcm_to_opus(pcm_bytes)
+        >>> # opus_bytes is ~30KB vs 320KB PCM (94% reduction)
+    """
+    # Create temporary files for FFmpeg I/O
+    with tempfile.NamedTemporaryFile(suffix=".pcm", delete=False) as pcm_file, \
+         tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as opus_file:
+
+        pcm_path = Path(pcm_file.name)
+        opus_path = Path(opus_file.name)
+
+        try:
+            # Write PCM data to temp file
+            pcm_file.write(pcm_data)
+            pcm_file.flush()
+
+            # FFmpeg command: PCM → Opus
+            # -f s16le: signed 16-bit little-endian PCM
+            # -ar: sample rate
+            # -ac: audio channels
+            # -c:a libopus: Opus encoder
+            # -b:a: bitrate
+            # -vbr on: variable bitrate for better quality
+            # -application voip: optimize for speech
+            cmd = [
+                "ffmpeg",
+                "-f", "s16le",
+                "-ar", str(sample_rate),
+                "-ac", str(channels),
+                "-i", str(pcm_path),
+                "-c:a", "libopus",
+                "-b:a", f"{bitrate}k",
+                "-vbr", "on",
+                "-application", "voip",
+                "-y",  # Overwrite output
+                str(opus_path),
+            ]
+
+            # Run FFmpeg
+            process = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+
+            stdout, stderr = await process.communicate()
+
+            if process.returncode != 0:
+                error_msg = stderr.decode() if stderr else "Unknown error"
+                logger.error(f"FFmpeg Opus encoding failed: {error_msg}")
+                raise RuntimeError(f"Opus encoding failed: {error_msg}")
+
+            # Read Opus output
+            with open(opus_path, "rb") as f:
+                opus_data = f.read()
+
+            logger.debug(
+                f"Encoded PCM ({len(pcm_data)} bytes) → Opus ({len(opus_data)} bytes), "
+                f"compression ratio: {len(opus_data)/len(pcm_data):.3f}"
+            )
+
+            return opus_data
+
+        finally:
+            # Cleanup temporary files
+            pcm_path.unlink(missing_ok=True)
+            opus_path.unlink(missing_ok=True)
+
+
+async def decode_opus_to_pcm(
+    opus_data: bytes,
+    sample_rate: int = 16000,
+    channels: int = 1,
+) -> bytes:
+    """
+    Decode Opus audio to raw PCM format using FFmpeg.
+
+    Args:
+        opus_data: Opus-encoded audio bytes
+        sample_rate: Target sample rate in Hz (default: 16000)
+        channels: Target number of channels (default: 1 for mono)
+
+    Returns:
+        Raw PCM audio bytes (signed 16-bit little-endian)
+
+    Raises:
+        RuntimeError: If FFmpeg decoding fails
+
+    Example:
+        >>> opus_bytes = b"..."  # Opus-encoded audio
+        >>> pcm_bytes = await decode_opus_to_pcm(opus_bytes)
+        >>> # pcm_bytes can be played or concatenated
+    """
+    # Create temporary files for FFmpeg I/O
+    with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as opus_file, \
+         tempfile.NamedTemporaryFile(suffix=".pcm", delete=False) as pcm_file:
+
+        opus_path = Path(opus_file.name)
+        pcm_path = Path(pcm_file.name)
+
+        try:
+            # Write Opus data to temp file
+            opus_file.write(opus_data)
+            opus_file.flush()
+
+            # FFmpeg command: Opus → PCM
+            # -i: input Opus file
+            # -f s16le: output as signed 16-bit little-endian PCM
+            # -ar: resample to target sample rate
+            # -ac: convert to target channel count
+            cmd = [
+                "ffmpeg",
+                "-i", str(opus_path),
+                "-f", "s16le",
+                "-ar", str(sample_rate),
+                "-ac", str(channels),
+                "-y",  # Overwrite output
+                str(pcm_path),
+            ]
+
+            # Run FFmpeg
+            process = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+
+            stdout, stderr = await process.communicate()
+
+            if process.returncode != 0:
+                error_msg = stderr.decode() if stderr else "Unknown error"
+                logger.error(f"FFmpeg Opus decoding failed: {error_msg}")
+                raise RuntimeError(f"Opus decoding failed: {error_msg}")
+
+            # Read PCM output
+            with open(pcm_path, "rb") as f:
+                pcm_data = f.read()
+
+            logger.debug(
+                f"Decoded Opus ({len(opus_data)} bytes) → PCM ({len(pcm_data)} bytes)"
+            )
+
+            return pcm_data
+
+        finally:
+            # Cleanup temporary files
+            opus_path.unlink(missing_ok=True)
+            pcm_path.unlink(missing_ok=True)
+
+
+async def build_wav_from_pcm(
+    pcm_data: bytes,
+    sample_rate: int = 16000,
+    channels: int = 1,
+    sample_width: int = 2,
+) -> bytes:
+    """
+    Build a complete WAV file from raw PCM data.
+
+    Args:
+        pcm_data: Raw PCM audio bytes (signed 16-bit little-endian)
+        sample_rate: Sample rate in Hz (default: 16000)
+        channels: Number of audio channels (default: 1 for mono)
+        sample_width: Bytes per sample (default: 2 for 16-bit)
+
+    Returns:
+        Complete WAV file as bytes (including headers)
+
+    Example:
+        >>> pcm_bytes = b"..."  # Raw PCM audio
+        >>> wav_bytes = await build_wav_from_pcm(pcm_bytes)
+        >>> # wav_bytes can be served via StreamingResponse
+    """
+    # Use BytesIO as in-memory file
+    wav_buffer = io.BytesIO()
+
+    try:
+        # Create WAV file writer
+        with wave.open(wav_buffer, "wb") as wav_file:
+            wav_file.setnchannels(channels)
+            wav_file.setsampwidth(sample_width)
+            wav_file.setframerate(sample_rate)
+            wav_file.writeframes(pcm_data)
+
+        # Get WAV bytes
+        wav_bytes = wav_buffer.getvalue()
+
+        logger.debug(
+            f"Built WAV file: {len(wav_bytes)} bytes "
+            f"(PCM: {len(pcm_data)}, header: {len(wav_bytes) - len(pcm_data)})"
+        )
+
+        return wav_bytes
+
+    finally:
+        wav_buffer.close()
+
+
+async def retrieve_audio_chunks(
+    conversation_id: str,
+    start_index: int = 0,
+    limit: Optional[int] = None,
+) -> List[AudioChunkDocument]:
+    """
+    Retrieve audio chunks from MongoDB for a conversation.
+
+    Chunks are returned in sequential order by chunk_index.
+
+    Args:
+        conversation_id: Parent conversation ID
+        start_index: First chunk index to retrieve (default: 0)
+        limit: Maximum number of chunks to retrieve (default: None for all)
+
+    Returns:
+        List of AudioChunkDocument instances, sorted by chunk_index
+
+    Example:
+        >>> # Get all chunks for a conversation
+        >>> chunks = await retrieve_audio_chunks("550e8400-e29b-41d4...")
+        >>> # Get chunks 5-14 (10 chunks starting at index 5)
+        >>> chunks = await retrieve_audio_chunks("550e8400-e29b-41d4...", start_index=5, limit=10)
+    """
+    # Build query
+    query = AudioChunkDocument.find(
+        AudioChunkDocument.conversation_id == conversation_id,
+        AudioChunkDocument.chunk_index >= start_index,
+    )
+
+    # Apply limit if specified
+    if limit is not None:
+        query = query.limit(limit)
+
+    # Execute query with sorting
+    chunks = await query.sort("+chunk_index").to_list()
+
+    logger.debug(
+        f"Retrieved {len(chunks)} chunks for conversation {conversation_id[:8]}... "
+        f"(start_index={start_index}, limit={limit})"
+    )
+
+    return chunks
+
+
+async def concatenate_chunks_to_pcm(
+    chunks: List[AudioChunkDocument],
+) -> bytes:
+    """
+    Decode and concatenate multiple audio chunks into a single PCM buffer.
+
+    Args:
+        chunks: List of AudioChunkDocument instances (should be pre-sorted)
+
+    Returns:
+        Concatenated PCM audio bytes
+
+    Example:
+        >>> chunks = await retrieve_audio_chunks(conversation_id)
+        >>> pcm_data = await concatenate_chunks_to_pcm(chunks)
+        >>> wav_data = await build_wav_from_pcm(pcm_data)
+    """
+    if not chunks:
+        return b""
+
+    pcm_buffer = bytearray()
+
+    for chunk in chunks:
+        # Decode Opus → PCM
+        pcm_data = await decode_opus_to_pcm(
+            opus_data=chunk.audio_data,
+            sample_rate=chunk.sample_rate,
+            channels=chunk.channels,
+        )
+
+        # Append to buffer
+        pcm_buffer.extend(pcm_data)
+
+    logger.debug(
+        f"Concatenated {len(chunks)} chunks → {len(pcm_buffer)} bytes PCM"
+    )
+
+    return bytes(pcm_buffer)
+
+
+async def reconstruct_wav_from_conversation(
+    conversation_id: str,
+    start_index: int = 0,
+    limit: Optional[int] = None,
+) -> bytes:
+    """
+    Reconstruct a complete WAV file from MongoDB chunks.
+
+    This is a high-level convenience function that:
+    1. Retrieves chunks from MongoDB
+    2. Decodes Opus → PCM
+    3. Concatenates PCM data
+    4. Builds WAV file with headers
+
+    Args:
+        conversation_id: Parent conversation ID
+        start_index: First chunk to include (default: 0)
+        limit: Maximum chunks to include (default: None for all)
+
+    Returns:
+        Complete WAV file as bytes
+
+    Raises:
+        ValueError: If no chunks found for conversation
+
+    Example:
+        >>> # Get complete audio for conversation
+        >>> wav_data = await reconstruct_wav_from_conversation(conversation_id)
+        >>>
+        >>> # Get first 60 seconds (6 chunks @ 10s each)
+        >>> wav_data = await reconstruct_wav_from_conversation(conversation_id, limit=6)
+    """
+    # Retrieve chunks
+    chunks = await retrieve_audio_chunks(
+        conversation_id=conversation_id,
+        start_index=start_index,
+        limit=limit,
+    )
+
+    if not chunks:
+        raise ValueError(
+            f"No audio chunks found for conversation {conversation_id}"
+        )
+
+    # Get audio format from first chunk
+    sample_rate = chunks[0].sample_rate
+    channels = chunks[0].channels
+
+    # Decode and concatenate
+    pcm_data = await concatenate_chunks_to_pcm(chunks)
+
+    # Build WAV file
+    wav_data = await build_wav_from_pcm(
+        pcm_data=pcm_data,
+        sample_rate=sample_rate,
+        channels=channels,
+    )
+
+    logger.info(
+        f"Reconstructed WAV for conversation {conversation_id[:8]}...: "
+        f"{len(chunks)} chunks, {len(wav_data)} bytes, "
+        f"{len(pcm_data) / sample_rate / channels / 2:.1f}s duration"
+    )
+
+    return wav_data
+
+
+async def convert_audio_to_chunks(
+    conversation_id: str,
+    audio_data: bytes,
+    sample_rate: int = 16000,
+    channels: int = 1,
+    sample_width: int = 2,
+    chunk_duration: float = 10.0,
+) -> int:
+    """
+    Convert raw PCM audio directly to MongoDB chunks without disk intermediary.
+
+    This is the preferred method as it avoids unnecessary disk I/O.
+    Used for both WebSocket streaming and file uploads.
+
+    Args:
+        conversation_id: Conversation ID to associate chunks with
+        audio_data: Raw PCM audio bytes (16-bit mono)
+        sample_rate: Audio sample rate (default: 16000 Hz)
+        channels: Number of channels (default: 1 = mono)
+        sample_width: Bytes per sample (default: 2 = 16-bit)
+        chunk_duration: Duration of each chunk in seconds (default: 10.0)
+
+    Returns:
+        Number of chunks created
+
+    Example:
+        >>> # Convert from memory without disk write
+        >>> num_chunks = await convert_audio_to_chunks(
+        ...     conversation_id="550e8400-e29b-41d4...",
+        ...     audio_data=pcm_bytes,
+        ...     sample_rate=16000,
+        ...     channels=1,
+        ...     sample_width=2,
+        ... )
+        >>> print(f"Created {num_chunks} chunks")
+    """
+    from advanced_omi_backend.models.conversation import Conversation
+    from bson import Binary
+
+    logger.info(f"📦 Converting audio to MongoDB chunks: {len(audio_data)} bytes PCM")
+
+    # Calculate chunk size in bytes
+    bytes_per_second = sample_rate * sample_width * channels
+    chunk_size_bytes = int(chunk_duration * bytes_per_second)
+
+    # Split into chunks and store
+    chunk_index = 0
+    total_original_size = 0
+    total_compressed_size = 0
+    offset = 0
+
+    while offset < len(audio_data):
+        # Extract chunk PCM data
+        chunk_end = min(offset + chunk_size_bytes, len(audio_data))
+        chunk_pcm = audio_data[offset:chunk_end]
+
+        if len(chunk_pcm) == 0:
+            break
+
+        # Calculate chunk timing
+        chunk_start_time = offset / bytes_per_second
+        chunk_end_time = chunk_end / bytes_per_second
+        chunk_duration_actual = (chunk_end - offset) / bytes_per_second
+
+        # Encode to Opus
+        opus_data = await encode_pcm_to_opus(
+            pcm_data=chunk_pcm,
+            sample_rate=sample_rate,
+            channels=channels,
+            bitrate=24  # 24kbps for speech
+        )
+
+        # Create MongoDB document
+        audio_chunk = AudioChunkDocument(
+            conversation_id=conversation_id,
+            chunk_index=chunk_index,
+            audio_data=Binary(opus_data),
+            original_size=len(chunk_pcm),
+            compressed_size=len(opus_data),
+            start_time=chunk_start_time,
+            end_time=chunk_end_time,
+            duration=chunk_duration_actual,
+            sample_rate=sample_rate,
+            channels=channels,
+        )
+
+        # Save to MongoDB
+        await audio_chunk.insert()
+
+        # Update stats
+        total_original_size += len(chunk_pcm)
+        total_compressed_size += len(opus_data)
+        chunk_index += 1
+        offset = chunk_end
+
+        logger.debug(
+            f"💾 Saved chunk {chunk_index}: "
+            f"{len(chunk_pcm)} → {len(opus_data)} bytes"
+        )
+
+    # Update conversation metadata
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if conversation:
+        total_duration = len(audio_data) / bytes_per_second
+        compression_ratio = total_compressed_size / total_original_size if total_original_size > 0 else 0.0
+
+        logger.info(f"🔍 DEBUG: Setting metadata - chunks={chunk_index}, duration={total_duration:.2f}s, ratio={compression_ratio:.3f}")
+
+        conversation.audio_chunks_count = chunk_index
+        conversation.audio_total_duration = total_duration
+        conversation.audio_compression_ratio = compression_ratio
+
+        logger.info(f"🔍 DEBUG: Before save - chunks={conversation.audio_chunks_count}, duration={conversation.audio_total_duration}")
+        await conversation.save()
+        logger.info(f"🔍 DEBUG: After save - metadata should be persisted")
+    else:
+        logger.error(f"❌ Conversation {conversation_id} not found for metadata update!")
+
+    logger.info(
+        f"✅ Converted audio to {chunk_index} MongoDB chunks: "
+        f"{total_original_size / 1024 / 1024:.2f} MB → "
+        f"{total_compressed_size / 1024 / 1024:.2f} MB "
+        f"(compression: {compression_ratio:.3f}, "
+        f"{(1 - compression_ratio) * 100:.1f}% savings)"
+    )
+
+    return chunk_index
+
+
+async def convert_wav_to_chunks(
+    conversation_id: str,
+    wav_file_path: Path,
+    chunk_duration: float = 10.0,
+) -> int:
+    """
+    Convert an existing WAV file to MongoDB audio chunks.
+
+    DEPRECATED: Use convert_audio_to_chunks() instead to avoid disk I/O.
+
+    Used for uploaded audio files to ensure consistency with streaming audio storage.
+    Reads WAV file, splits into 10-second chunks, encodes to Opus, and stores in MongoDB.
+
+    Args:
+        conversation_id: Conversation ID to associate chunks with
+        wav_file_path: Path to existing WAV file
+        chunk_duration: Duration of each chunk in seconds (default: 10.0)
+
+    Returns:
+        Number of chunks created
+
+    Raises:
+        FileNotFoundError: If WAV file doesn't exist
+        ValueError: If WAV file is invalid
+
+    Example:
+        >>> # Convert uploaded file to chunks
+        >>> num_chunks = await convert_wav_to_chunks(
+        ...     conversation_id="550e8400-e29b-41d4...",
+        ...     wav_file_path=Path("/path/to/uploaded.wav")
+        ... )
+        >>> print(f"Created {num_chunks} chunks")
+    """
+    if not wav_file_path.exists():
+        raise FileNotFoundError(f"WAV file not found: {wav_file_path}")
+
+    from advanced_omi_backend.models.conversation import Conversation
+    from bson import Binary
+
+    logger.info(f"📦 Converting WAV file to MongoDB chunks: {wav_file_path}")
+
+    # Read WAV file
+    import wave
+    with wave.open(str(wav_file_path), "rb") as wav:
+        sample_rate = wav.getframerate()
+        channels = wav.getnchannels()
+        sample_width = wav.getsampwidth()
+        total_frames = wav.getnframes()
+
+        # Read all PCM data
+        pcm_data = wav.readframes(total_frames)
+
+    logger.info(
+        f"📁 Read WAV: {len(pcm_data)} bytes PCM, "
+        f"{sample_rate}Hz, {channels}ch, {sample_width*8}-bit"
+    )
+
+    # Calculate chunk size in bytes
+    bytes_per_second = sample_rate * sample_width * channels
+    chunk_size_bytes = int(chunk_duration * bytes_per_second)
+
+    # Split into chunks and store
+    chunk_index = 0
+    total_original_size = 0
+    total_compressed_size = 0
+    offset = 0
+
+    while offset < len(pcm_data):
+        # Extract chunk PCM data
+        chunk_end = min(offset + chunk_size_bytes, len(pcm_data))
+        chunk_pcm = pcm_data[offset:chunk_end]
+
+        if len(chunk_pcm) == 0:
+            break
+
+        # Calculate chunk timing
+        chunk_start_time = offset / bytes_per_second
+        chunk_end_time = chunk_end / bytes_per_second
+        chunk_duration_actual = (chunk_end - offset) / bytes_per_second
+
+        # Encode to Opus
+        opus_data = await encode_pcm_to_opus(
+            pcm_data=chunk_pcm,
+            sample_rate=sample_rate,
+            channels=channels,
+            bitrate=24  # 24kbps for speech
+        )
+
+        # Create MongoDB document
+        audio_chunk = AudioChunkDocument(
+            conversation_id=conversation_id,
+            chunk_index=chunk_index,
+            audio_data=Binary(opus_data),
+            original_size=len(chunk_pcm),
+            compressed_size=len(opus_data),
+            start_time=chunk_start_time,
+            end_time=chunk_end_time,
+            duration=chunk_duration_actual,
+            sample_rate=sample_rate,
+            channels=channels,
+        )
+
+        # Save to MongoDB
+        await audio_chunk.insert()
+
+        # Update stats
+        total_original_size += len(chunk_pcm)
+        total_compressed_size += len(opus_data)
+        chunk_index += 1
+        offset = chunk_end
+
+        logger.debug(
+            f"💾 Saved chunk {chunk_index}: "
+            f"{len(chunk_pcm)} → {len(opus_data)} bytes"
+        )
+
+    # Update conversation metadata
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if conversation:
+        total_duration = len(pcm_data) / bytes_per_second
+        compression_ratio = total_compressed_size / total_original_size if total_original_size > 0 else 0.0
+
+        logger.info(f"🔍 DEBUG: Setting metadata - chunks={chunk_index}, duration={total_duration:.2f}s, ratio={compression_ratio:.3f}")
+
+        conversation.audio_chunks_count = chunk_index
+        conversation.audio_total_duration = total_duration
+        conversation.audio_compression_ratio = compression_ratio
+
+        logger.info(f"🔍 DEBUG: Before save - chunks={conversation.audio_chunks_count}, duration={conversation.audio_total_duration}")
+        await conversation.save()
+        logger.info(f"🔍 DEBUG: After save - metadata should be persisted")
+    else:
+        logger.error(f"❌ Conversation {conversation_id} not found for metadata update!")
+
+    logger.info(
+        f"✅ Converted WAV to {chunk_index} MongoDB chunks: "
+        f"{total_original_size / 1024 / 1024:.2f} MB → "
+        f"{total_compressed_size / 1024 / 1024:.2f} MB "
+        f"(compression: {compression_ratio:.3f}, "
+        f"{(1 - compression_ratio) * 100:.1f}% savings)"
+    )
+
+    return chunk_index
+
+
+async def wait_for_audio_chunks(
+    conversation_id: str,
+    max_wait_seconds: int = 30,
+    min_chunks: int = 1,
+) -> bool:
+    """
+    Wait for MongoDB audio chunks to be available for a conversation.
+
+    Replaces wait_for_audio_file() for MongoDB-based storage.
+    Polls MongoDB until chunks exist or timeout occurs.
+
+    Args:
+        conversation_id: Conversation ID to check
+        max_wait_seconds: Maximum wait time in seconds (default: 30)
+        min_chunks: Minimum number of chunks required (default: 1)
+
+    Returns:
+        True if chunks are available, False if timeout
+
+    Example:
+        >>> # Wait for chunks before transcription
+        >>> if await wait_for_audio_chunks(conversation_id):
+        ...     await transcribe_full_audio_job(...)
+        ... else:
+        ...     logger.error("No audio chunks available")
+    """
+    import time
+    import asyncio
+
+    wait_start = time.time()
+
+    while time.time() - wait_start < max_wait_seconds:
+        # Query chunk count
+        chunks = await retrieve_audio_chunks(
+            conversation_id=conversation_id,
+            start_index=0,
+            limit=1  # Just check if any exist
+        )
+
+        if len(chunks) >= min_chunks:
+            wait_duration = time.time() - wait_start
+            logger.info(
+                f"✅ Audio chunks ready for conversation {conversation_id[:12]} "
+                f"after {wait_duration:.1f}s ({len(chunks)} chunks found)"
+            )
+            return True
+
+        # Log progress every 5 seconds
+        elapsed = time.time() - wait_start
+        if int(elapsed) % 5 == 0 and int(elapsed) > 0:
+            logger.info(
+                f"⏳ Waiting for audio chunks (conversation {conversation_id[:12]})... "
+                f"({elapsed:.0f}s elapsed)"
+            )
+
+        await asyncio.sleep(0.5)  # Check every 500ms
+
+    logger.error(
+        f"❌ Audio chunks not found after {max_wait_seconds}s "
+        f"(conversation: {conversation_id[:12]})"
+    )
+    return False
diff --git a/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py b/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py
index 4d3fa0ae..b4d5487f 100644
--- a/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py
+++ b/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py
@@ -137,7 +137,6 @@ async def write_audio_file(
     """
     from easy_audio_interfaces.filesystem.filesystem_interfaces import LocalFileSink
     from advanced_omi_backend.config import CHUNK_DIR
-    from advanced_omi_backend.models.audio_file import AudioFile
 
     # Validate and prepare audio if needed
     if validate:
@@ -195,22 +194,6 @@ async def write_audio_file(
         f"✅ Wrote audio file: {wav_filename} ({len(audio_data)} bytes, {duration:.1f}s)"
     )
 
-    # Create AudioFile database entry using Beanie model
-    audio_file = AudioFile(
-        audio_uuid=audio_uuid,
-        source=source,
-        audio_path=wav_filename,
-        client_id=client_id,
-        timestamp=timestamp,
-        user_id=user_id,
-        user_email=user_email,
-        has_speech=False,  # Will be updated by transcription
-        speech_analysis={}, 
-    )
-    await audio_file.insert()
-
-    audio_logger.info(f"✅ Created AudioFile entry for {audio_uuid}")
-
     return relative_audio_path, str(file_path), duration
 
 
diff --git a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py
index 3acba204..82ed1c90 100644
--- a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py
+++ b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py
@@ -515,50 +515,6 @@ async def update_job_progress_metadata(
     current_job.save_meta()
 
 
-async def wait_for_audio_file(
-    conversation_id: str, redis_client, max_wait_seconds: int = 30
-) -> Optional[str]:
-    """
-    Wait for audio persistence job to write audio file path to Redis.
-
-    Polls Redis for audio file path with configurable timeout.
-
-    Args:
-        conversation_id: Conversation ID
-        redis_client: Redis client instance
-        max_wait_seconds: Maximum wait time in seconds (default: 30)
-
-    Returns:
-        Audio file path (str) if ready, None if timeout
-    """
-    audio_file_key = f"audio:file:{conversation_id}"
-    wait_start = time.time()
-
-    while time.time() - wait_start < max_wait_seconds:
-        file_path_bytes = await redis_client.get(audio_file_key)
-        if file_path_bytes:
-            wait_duration = time.time() - wait_start
-            logger.info(f"✅ Audio file ready after {wait_duration:.1f}s")
-            return file_path_bytes.decode()
-
-        # Log progress every 5 seconds
-        elapsed = time.time() - wait_start
-        if elapsed % 5 == 0:
-            logger.info(
-                f"⏳ Waiting for audio file (conversation {conversation_id[:12]})... ({elapsed:.0f}s elapsed)"
-            )
-
-        await asyncio.sleep(0.5)  # Check every 500ms
-
-    logger.error(
-        f"❌ Audio file path not found in Redis after {max_wait_seconds}s (key: {audio_file_key})"
-    )
-    logger.warning(
-        "⚠️ Audio persistence job may not have rotated file yet - cannot enqueue batch transcription"
-    )
-    return None
-
-
 async def mark_conversation_deleted(conversation_id: str, deletion_reason: str) -> None:
     """
     Mark a conversation as deleted with a specific reason.
diff --git a/backends/advanced/src/advanced_omi_backend/utils/gdrive_audio_utils.py b/backends/advanced/src/advanced_omi_backend/utils/gdrive_audio_utils.py
index 46b0806d..41d353a4 100644
--- a/backends/advanced/src/advanced_omi_backend/utils/gdrive_audio_utils.py
+++ b/backends/advanced/src/advanced_omi_backend/utils/gdrive_audio_utils.py
@@ -5,7 +5,7 @@
 from starlette.datastructures import UploadFile as StarletteUploadFile
 from googleapiclient.http import MediaIoBaseDownload
 from advanced_omi_backend.clients.gdrive_audio_client import get_google_drive_client
-from advanced_omi_backend.models.audio_file import AudioFile
+from advanced_omi_backend.models.conversation import Conversation
 from advanced_omi_backend.utils.audio_utils import AudioValidationError
 
 
@@ -88,12 +88,11 @@ async def download_audio_files_from_drive(folder_id: str) -> List[StarletteUploa
         
         for item in audio_files_metadata:
             file_id = item["id"] # Get the Google Drive File ID
-            
-            #  Check if the file is already processed
-            existing = await AudioFile.find_one({
-                "audio_uuid": file_id,
-                "source": "gdrive"
-            })
+
+            # Check if the file is already processed (check Conversation by audio_uuid)
+            existing = await Conversation.find_one(
+                Conversation.audio_uuid == file_id
+            )
 
             if existing:
                 audio_logger.info(f"Skipping already processed file: {item['name']}")
diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
index 99f6dd53..58acad62 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
@@ -30,10 +30,10 @@ async def audio_streaming_persistence_job(
     redis_client=None
 ) -> Dict[str, Any]:
     """
-    Long-running RQ job that progressively writes audio chunks to disk as they arrive.
+    Long-running RQ job that stores audio chunks in MongoDB with Opus compression.
 
-    Opens a WAV file immediately and appends chunks in real-time, making the file
-    available for playback in the UI before the session completes.
+    Buffers incoming PCM audio from Redis Stream into 10-second chunks, encodes
+    them to Opus format, and stores in MongoDB audio_chunks collection.
 
     Runs in parallel with transcription processing to reduce memory pressure.
 
@@ -44,11 +44,11 @@ async def audio_streaming_persistence_job(
         redis_client: Redis client (injected by decorator)
 
     Returns:
-        Dict with audio_file_path, chunk_count, total_bytes, duration_seconds
+        Dict with chunk_count, total_bytes, compressed_bytes, duration_seconds
 
-    Note: user_email is fetched from the database when needed.
+    Note: Replaces disk-based WAV file storage with MongoDB chunk storage.
     """
-    logger.info(f"🎵 Starting audio persistence for session {session_id}")
+    logger.info(f"🎵 Starting MongoDB audio persistence for session {session_id}")
 
     # Setup audio persistence consumer group (separate from transcription consumer)
     audio_stream_name = f"audio:stream:{client_id}"
@@ -73,169 +73,253 @@ async def audio_streaming_persistence_job(
     max_runtime = 86340  # 24 hours - 60 seconds (graceful exit before RQ timeout)
     start_time = time.time()
 
-    from advanced_omi_backend.config import CHUNK_DIR
-    from easy_audio_interfaces.filesystem.filesystem_interfaces import LocalFileSink
-    from wyoming.audio import AudioChunk
+    # Import MongoDB chunk utilities
+    from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
+    from advanced_omi_backend.models.conversation import Conversation
+    from advanced_omi_backend.utils.audio_chunk_utils import encode_pcm_to_opus
+    from bson import Binary
 
-    # Ensure directory exists
-    CHUNK_DIR.mkdir(parents=True, exist_ok=True)
-
-    # File rotation state
+    # Conversation rotation state
     current_conversation_id = None
-    file_sink = None
-    file_path = None
-    wav_filename = None
-    conversation_chunk_count = 0
     conversation_start_time = None
+    conversation_count = 0
 
-    # Audio collection stats (across all conversations in this session)
-    total_chunk_count = 0
-    total_bytes = 0
+    # PCM buffer for current 10-second chunk
+    pcm_buffer = bytearray()
+    chunk_index = 0  # Sequential chunk counter for current conversation
+    chunk_start_time = 0.0  # Start time of current buffered chunk
+
+    # Chunk configuration
+    SAMPLE_RATE = 16000
+    SAMPLE_WIDTH = 2  # 16-bit
+    CHANNELS = 1  # Mono
+    CHUNK_DURATION_SECONDS = 10.0
+    BYTES_PER_SECOND = SAMPLE_RATE * SAMPLE_WIDTH * CHANNELS  # 32,000 bytes/sec
+    CHUNK_SIZE_BYTES = int(CHUNK_DURATION_SECONDS * BYTES_PER_SECOND)  # 320,000 bytes
+
+    # Session stats (across all conversations)
+    total_pcm_bytes = 0
+    total_compressed_bytes = 0
+    total_mongo_chunks_written = 0
     end_signal_received = False
     consecutive_empty_reads = 0
-    max_empty_reads = 3  # Exit after 3 consecutive empty reads (deterministic check)
-    conversation_count = 0
+    max_empty_reads = 3
 
     # Get current job for zombie detection
     from rq import get_current_job
     from advanced_omi_backend.utils.job_utils import check_job_alive
     current_job = get_current_job()
 
+    async def flush_pcm_buffer():
+        """
+        Flush current PCM buffer to MongoDB as Opus-compressed chunk.
+
+        Updates conversation metadata with chunk count and compression stats.
+        """
+        nonlocal pcm_buffer, chunk_index, chunk_start_time
+        nonlocal total_pcm_bytes, total_compressed_bytes, total_mongo_chunks_written
+
+        if len(pcm_buffer) == 0 or not current_conversation_id:
+            return
+
+        try:
+            # Encode PCM → Opus
+            opus_data = await encode_pcm_to_opus(
+                pcm_data=bytes(pcm_buffer),
+                sample_rate=SAMPLE_RATE,
+                channels=CHANNELS,
+                bitrate=24  # 24kbps for speech
+            )
+
+            # Calculate chunk metadata
+            original_size = len(pcm_buffer)
+            compressed_size = len(opus_data)
+            duration = original_size / BYTES_PER_SECOND
+            end_time = chunk_start_time + duration
+
+            # Create MongoDB document
+            audio_chunk = AudioChunkDocument(
+                conversation_id=current_conversation_id,
+                chunk_index=chunk_index,
+                audio_data=Binary(opus_data),
+                original_size=original_size,
+                compressed_size=compressed_size,
+                start_time=chunk_start_time,
+                end_time=end_time,
+                duration=duration,
+                sample_rate=SAMPLE_RATE,
+                channels=CHANNELS,
+            )
+
+            # Save to MongoDB
+            await audio_chunk.insert()
+
+            # Update session stats
+            total_pcm_bytes += original_size
+            total_compressed_bytes += compressed_size
+            total_mongo_chunks_written += 1
+
+            # Update conversation metadata
+            conversation = await Conversation.find_one(
+                Conversation.conversation_id == current_conversation_id
+            )
+
+            if conversation:
+                # Calculate running totals
+                chunk_count = chunk_index + 1
+                total_duration = end_time
+                compression_ratio = compressed_size / original_size if original_size > 0 else 0.0
+
+                # Update conversation fields
+                conversation.audio_chunks_count = chunk_count
+                conversation.audio_total_duration = total_duration
+                conversation.audio_compression_ratio = compression_ratio
+                await conversation.save()
+
+            logger.debug(
+                f"💾 Saved chunk {chunk_index} for conversation {current_conversation_id[:12]}: "
+                f"{original_size} → {compressed_size} bytes ({compression_ratio:.3f} ratio), "
+                f"{duration:.1f}s duration"
+            )
+
+            # Log every 6 chunks (60 seconds) to avoid spam
+            if (chunk_index + 1) % 6 == 0:
+                logger.info(
+                    f"📦 Conversation {current_conversation_id[:12]}: "
+                    f"{chunk_index + 1} chunks, {total_duration:.1f}s total"
+                )
+
+        except Exception as e:
+            logger.error(f"❌ Failed to save audio chunk {chunk_index}: {e}", exc_info=True)
+
     while True:
         # Check if job still exists in Redis (detect zombie state)
         if not await check_job_alive(redis_client, current_job, session_id):
-            if file_sink:
-                await file_sink.close()
+            # Flush remaining buffer before exit
+            if len(pcm_buffer) > 0:
+                await flush_pcm_buffer()
             break
 
         # Check timeout
         if time.time() - start_time > max_runtime:
             logger.warning(f"⏱️ Timeout reached for audio persistence {session_id}")
-            # Close current file if open
-            if file_sink:
-                await file_sink.close()
-                logger.info(f"✅ Closed file on timeout: {wav_filename}")
+            # Flush remaining buffer
+            if len(pcm_buffer) > 0:
+                await flush_pcm_buffer()
             break
 
-        # Check if session is finalizing (user stopped recording or WebSocket disconnected)
+        # Check if session is finalizing
         session_status = await redis_client.hget(session_key, "status")
         if session_status and session_status.decode() in ["finalizing", "complete"]:
-            logger.info(f"🛑 Session finalizing detected, writing final chunks...")
-            # Give a brief moment for any in-flight chunks to arrive
-            await asyncio.sleep(0.5)
-            # Do one final read to write remaining chunks to current file
-            if file_sink:
-                try:
-                    final_messages = await redis_client.xreadgroup(
-                        audio_group_name,
-                        audio_consumer_name,
-                        {audio_stream_name: ">"},
-                        count=50,
-                        block=500
-                    )
-                    if final_messages:
-                        for stream_name, msgs in final_messages:
-                            for message_id, fields in msgs:
-                                audio_data = fields.get(b"audio_data", b"")
-                                chunk_id = fields.get(b"chunk_id", b"").decode()
-                                if chunk_id != "END" and len(audio_data) > 0:
-                                    chunk = AudioChunk(
-                                        rate=16000,
-                                        width=2,
-                                        channels=1,
-                                        audio=audio_data
-                                    )
-                                    await file_sink.write(chunk)
-                                    conversation_chunk_count += 1
-                                    total_chunk_count += 1
-                                    total_bytes += len(audio_data)
-                                await redis_client.xack(audio_stream_name, audio_group_name, message_id)
-                        logger.info(f"📦 Final read wrote {len(final_messages[0][1]) if final_messages else 0} more chunks")
-                except Exception as e:
-                    logger.debug(f"Final audio read error (non-fatal): {e}")
-
-                # Close final file
-                await file_sink.close()
-                logger.info(f"✅ Closed final file: {wav_filename} ({conversation_chunk_count} chunks)")
+            logger.info(f"🛑 Session finalizing detected, flushing final chunks...")
+            await asyncio.sleep(0.5)  # Brief wait for in-flight chunks
+
+            # Final read to collect remaining chunks
+            try:
+                final_messages = await redis_client.xreadgroup(
+                    audio_group_name,
+                    audio_consumer_name,
+                    {audio_stream_name: ">"},
+                    count=50,
+                    block=500
+                )
+
+                if final_messages:
+                    for stream_name, msgs in final_messages:
+                        for message_id, fields in msgs:
+                            audio_data = fields.get(b"audio_data", b"")
+                            chunk_id = fields.get(b"chunk_id", b"").decode()
+
+                            if chunk_id != "END" and len(audio_data) > 0:
+                                pcm_buffer.extend(audio_data)
+
+                                # Flush if buffer reaches chunk size
+                                if len(pcm_buffer) >= CHUNK_SIZE_BYTES:
+                                    await flush_pcm_buffer()
+                                    # Reset for next chunk
+                                    pcm_buffer = bytearray()
+                                    chunk_index += 1
+                                    chunk_start_time += CHUNK_DURATION_SECONDS
+
+                            await redis_client.xack(audio_stream_name, audio_group_name, message_id)
+
+                    logger.info(f"📦 Final read processed {len(final_messages[0][1])} messages")
+
+            except Exception as e:
+                logger.debug(f"Final audio read error (non-fatal): {e}")
+
+            # Flush any remaining partial chunk
+            if len(pcm_buffer) > 0:
+                await flush_pcm_buffer()
+
             break
 
-        # Check for conversation change (file rotation signal)
+        # Check for conversation change (rotation signal)
         conversation_key = f"conversation:current:{session_id}"
         new_conversation_id = await redis_client.get(conversation_key)
 
         if new_conversation_id:
             new_conversation_id = new_conversation_id.decode()
 
-            # Conversation changed - rotate to new file
+            # Conversation changed - flush current buffer and rotate
             if new_conversation_id != current_conversation_id:
-                # Close previous file if exists
-                if file_sink:
-                    await file_sink.close()
-                    duration = (time.time() - conversation_start_time) if conversation_start_time else 0
+                # Flush remaining buffer from previous conversation
+                if len(pcm_buffer) > 0 and current_conversation_id:
+                    await flush_pcm_buffer()
                     logger.info(
-                        f"✅ Closed conversation {current_conversation_id[:12]} file: {wav_filename} "
-                        f"({conversation_chunk_count} chunks, {duration:.1f}s)"
+                        f"✅ Finalized conversation {current_conversation_id[:12]}: "
+                        f"{chunk_index + 1} chunks saved to MongoDB"
                     )
 
-                # Open new file for new conversation
+                # Start new conversation
                 current_conversation_id = new_conversation_id
                 conversation_count += 1
-                conversation_chunk_count = 0
                 conversation_start_time = time.time()
 
-                timestamp = int(time.time() * 1000)
-                wav_filename = f"{timestamp}_{client_id}_{current_conversation_id}.wav"
-                file_path = CHUNK_DIR / wav_filename
+                # Reset chunk state
+                pcm_buffer = bytearray()
+                chunk_index = 0
+                chunk_start_time = 0.0
 
-                file_sink = LocalFileSink(
-                    file_path=str(file_path),
-                    sample_rate=16000,
-                    channels=1,
-                    sample_width=2
-                )
-                await file_sink.open()
                 logger.info(
-                    f"📁 Opened new file for conversation #{conversation_count} ({current_conversation_id[:12]}): {file_path}"
+                    f"📁 Started MongoDB persistence for conversation #{conversation_count} "
+                    f"({current_conversation_id[:12]})"
                 )
-
-                # Store file path in Redis (keyed by conversation_id, not session_id)
-                audio_file_key = f"audio:file:{current_conversation_id}"
-                await redis_client.set(audio_file_key, str(file_path), ex=86400)  # 24 hour TTL
-                logger.info(f"💾 Stored audio file path in Redis: {audio_file_key}")
         else:
-            # Key deleted - conversation ended, close current file
-            if file_sink and current_conversation_id:
-                await file_sink.close()
+            # Conversation key deleted - conversation ended
+            if current_conversation_id and len(pcm_buffer) > 0:
+                # Flush final partial chunk
+                await flush_pcm_buffer()
                 duration = (time.time() - conversation_start_time) if conversation_start_time else 0
                 logger.info(
-                    f"✅ Closed conversation {current_conversation_id[:12]} file after conversation ended: {wav_filename} "
-                    f"({conversation_chunk_count} chunks, {duration:.1f}s)"
+                    f"✅ Conversation {current_conversation_id[:12]} ended: "
+                    f"{chunk_index + 1} chunks, {duration:.1f}s"
                 )
-                file_sink = None  # Clear sink to prevent writing to closed file
+
+                # Reset state
+                pcm_buffer = bytearray()
                 current_conversation_id = None
 
-        # If no file open yet, wait for conversation to be created
-        if not file_sink:
-            await asyncio.sleep(0.0001)  # Minimal sleep to yield to event loop
+        # Wait for conversation to be created
+        if not current_conversation_id:
+            await asyncio.sleep(0.0001)
             continue
 
-        # Read audio chunks from stream (non-blocking)
+        # Read audio chunks from Redis Stream
         try:
             audio_messages = await redis_client.xreadgroup(
                 audio_group_name,
                 audio_consumer_name,
                 {audio_stream_name: ">"},
-                count=20,  # Read up to 20 chunks at a time for efficiency
-                block=100  # 100ms timeout - more responsive
+                count=20,  # Read up to 20 chunks at a time
+                block=100  # 100ms timeout
             )
 
             if audio_messages:
-                # Reset empty read counter - we got messages
-                consecutive_empty_reads = 0
+                consecutive_empty_reads = 0  # Reset counter
 
                 for stream_name, msgs in audio_messages:
                     for message_id, fields in msgs:
-                        # Extract audio data
                         audio_data = fields.get(b"audio_data", b"")
                         chunk_id = fields.get(b"chunk_id", b"").decode()
 
@@ -244,57 +328,57 @@ async def audio_streaming_persistence_job(
                             logger.info(f"📡 Received END signal in audio persistence")
                             end_signal_received = True
                         elif len(audio_data) > 0:
-                            # Write chunk immediately to file
-                            chunk = AudioChunk(
-                                rate=16000,
-                                width=2,
-                                channels=1,
-                                audio=audio_data
-                            )
-                            await file_sink.write(chunk)
-                            conversation_chunk_count += 1
-                            total_chunk_count += 1
-                            total_bytes += len(audio_data)
-
-                            # Log every 40 chunks to avoid spam
-                            if total_chunk_count % 40 == 0:
-                                logger.info(
-                                    f"📦 Session {session_id[:12]}: {total_chunk_count} total chunks "
-                                    f"(conversation {current_conversation_id[:12]}: {conversation_chunk_count} chunks)"
-                                )
+                            # Append to PCM buffer
+                            pcm_buffer.extend(audio_data)
+
+                            # Flush if buffer reaches 10-second chunk size
+                            if len(pcm_buffer) >= CHUNK_SIZE_BYTES:
+                                await flush_pcm_buffer()
+
+                                # Reset for next chunk
+                                pcm_buffer = bytearray()
+                                chunk_index += 1
+                                chunk_start_time += CHUNK_DURATION_SECONDS
 
                         # ACK the message
                         await redis_client.xack(audio_stream_name, audio_group_name, message_id)
+
             else:
-                # No new messages - stream might be empty
+                # No new messages
                 if end_signal_received:
                     consecutive_empty_reads += 1
-                    logger.info(f"📭 No new messages ({consecutive_empty_reads}/{max_empty_reads} empty reads after END signal)")
+                    logger.info(f"📭 No new messages ({consecutive_empty_reads}/{max_empty_reads})")
 
                     if consecutive_empty_reads >= max_empty_reads:
-                        logger.info(f"✅ Stream empty after END signal - stopping audio collection")
+                        logger.info(f"✅ Stream empty after END signal - stopping")
+                        # Flush remaining buffer
+                        if len(pcm_buffer) > 0:
+                            await flush_pcm_buffer()
                         break
 
         except Exception as audio_error:
-            # Stream might not exist yet or other transient errors
             logger.debug(f"Audio stream read error (non-fatal): {audio_error}")
 
-        await asyncio.sleep(0.0001)  # Minimal sleep to yield to event loop
+        await asyncio.sleep(0.0001)
 
     # Job complete - calculate final stats
     runtime_seconds = time.time() - start_time
 
-    # Calculate duration (16kHz, 16-bit mono = 32000 bytes/second)
-    if total_bytes > 0:
-        duration = total_bytes / (16000 * 2 * 1)  # sample_rate * sample_width * channels
+    # Calculate total duration
+    if total_pcm_bytes > 0:
+        duration = total_pcm_bytes / BYTES_PER_SECOND
+        compression_ratio = total_compressed_bytes / total_pcm_bytes if total_pcm_bytes > 0 else 0.0
     else:
         logger.warning(f"⚠️ No audio chunks written for session {session_id}")
         duration = 0.0
+        compression_ratio = 0.0
 
     logger.info(
-        f"🎵 Audio persistence job complete for session {session_id}: "
-        f"{conversation_count} conversations, {total_chunk_count} total chunks, "
-        f"{total_bytes / 1024 / 1024:.2f} MB, {runtime_seconds:.1f}s runtime"
+        f"🎵 MongoDB audio persistence complete for session {session_id}: "
+        f"{conversation_count} conversations, {total_mongo_chunks_written} chunks, "
+        f"{total_pcm_bytes / 1024 / 1024:.2f} MB PCM → {total_compressed_bytes / 1024 / 1024:.2f} MB Opus "
+        f"(compression: {compression_ratio:.3f}, {(1 - compression_ratio) * 100:.1f}% savings), "
+        f"{runtime_seconds:.1f}s runtime"
     )
 
     # Clean up Redis tracking keys
@@ -307,9 +391,10 @@ async def audio_streaming_persistence_job(
     return {
         "session_id": session_id,
         "conversation_count": conversation_count,
-        "last_audio_file_path": str(file_path) if file_path else None,
-        "total_chunk_count": total_chunk_count,
-        "total_bytes": total_bytes,
+        "total_mongo_chunks": total_mongo_chunks_written,
+        "total_pcm_bytes": total_pcm_bytes,
+        "total_compressed_bytes": total_compressed_bytes,
+        "compression_ratio": compression_ratio,
         "duration_seconds": duration,
         "runtime_seconds": runtime_seconds
     }
diff --git a/backends/advanced/src/advanced_omi_backend/workers/cleanup_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/cleanup_jobs.py
new file mode 100644
index 00000000..e0229457
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/cleanup_jobs.py
@@ -0,0 +1,138 @@
+"""
+Cleanup jobs for managing soft-deleted data.
+
+Provides manual cleanup of soft-deleted conversations and chunks.
+Auto-cleanup is controlled via admin API settings (stored in /app/data/cleanup_config.json).
+"""
+import logging
+from datetime import datetime, timedelta
+from typing import Optional
+
+from advanced_omi_backend.models.conversation import Conversation
+from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
+from advanced_omi_backend.models.job import async_job
+from advanced_omi_backend.config import load_cleanup_settings_from_file
+
+logger = logging.getLogger(__name__)
+
+
+@async_job(redis=False, beanie=True, timeout=1800)  # 30 minute timeout
+async def purge_old_deleted_conversations(
+    retention_days: Optional[int] = None,
+    dry_run: bool = False
+) -> dict:
+    """
+    Permanently delete conversations that have been soft-deleted for longer than retention period.
+
+    Args:
+        retention_days: Number of days to keep soft-deleted conversations (defaults to config value)
+        dry_run: If True, only count what would be deleted without actually deleting
+
+    Returns:
+        Dict with counts of purged conversations and chunks
+    """
+    # Get retention period from config if not specified
+    if retention_days is None:
+        settings = load_cleanup_settings_from_file()
+        retention_days = settings.retention_days
+
+    cutoff_date = datetime.utcnow() - timedelta(days=retention_days)
+
+    logger.info(f"{'[DRY RUN] ' if dry_run else ''}Purging conversations deleted before {cutoff_date.isoformat()}")
+
+    # Find soft-deleted conversations older than cutoff
+    old_deleted = await Conversation.find(
+        Conversation.deleted == True,
+        Conversation.deleted_at < cutoff_date
+    ).to_list()
+
+    purged_conversations = 0
+    purged_chunks = 0
+
+    for conversation in old_deleted:
+        conversation_id = conversation.conversation_id
+
+        if not dry_run:
+            # Hard delete chunks
+            chunk_result = await AudioChunkDocument.find(
+                AudioChunkDocument.conversation_id == conversation_id
+            ).delete()
+            purged_chunks += chunk_result.deleted_count
+
+            # Hard delete conversation
+            await conversation.delete()
+            purged_conversations += 1
+
+            logger.info(
+                f"Purged conversation {conversation_id} "
+                f"(deleted {chunk_result.deleted_count} chunks)"
+            )
+        else:
+            # Dry run - just count
+            chunk_count = await AudioChunkDocument.find(
+                AudioChunkDocument.conversation_id == conversation_id
+            ).count()
+            purged_chunks += chunk_count
+            purged_conversations += 1
+
+            logger.info(
+                f"[DRY RUN] Would purge conversation {conversation_id} "
+                f"(with {chunk_count} chunks)"
+            )
+
+    logger.info(
+        f"{'[DRY RUN] Would purge' if dry_run else 'Purged'} "
+        f"{purged_conversations} conversations and {purged_chunks} chunks"
+    )
+
+    return {
+        "purged_conversations": purged_conversations,
+        "purged_chunks": purged_chunks,
+        "retention_days": retention_days,
+        "cutoff_date": cutoff_date.isoformat(),
+        "dry_run": dry_run,
+    }
+
+
+def schedule_cleanup_job(retention_days: Optional[int] = None) -> Optional[str]:
+    """
+    Enqueue cleanup job to run once (manual trigger or scheduled task).
+
+    This function only schedules the job if auto-cleanup is enabled via
+    admin API settings (stored in /app/data/cleanup_config.json).
+
+    For manual cleanup, use the admin API endpoint: POST /api/admin/cleanup
+
+    Args:
+        retention_days: Number of days to keep soft-deleted conversations
+                       (defaults to config value)
+
+    Returns:
+        Job ID if scheduled successfully, None otherwise
+    """
+    # Check if auto-cleanup is enabled
+    settings = load_cleanup_settings_from_file()
+    if not settings.auto_cleanup_enabled:
+        logger.info("Auto-cleanup is disabled (auto_cleanup_enabled=false)")
+        return None
+
+    try:
+        from advanced_omi_backend.controllers.queue_controller import get_queue
+
+        if retention_days is None:
+            retention_days = settings.retention_days
+
+        queue = get_queue("default")
+        job = queue.enqueue(
+            purge_old_deleted_conversations,
+            retention_days=retention_days,
+            dry_run=False,
+            job_timeout="30m",
+        )
+        logger.info(f"Scheduled cleanup job {job.id} with {retention_days}-day retention")
+        return job.id
+
+    except Exception as e:
+        logger.error(f"Failed to schedule cleanup job: {e}")
+        return None
+
diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
index 1d3f81f3..ed2ef1fe 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
@@ -492,18 +492,18 @@ async def open_conversation_job(
     # to avoid false negatives from aggregated results lacking proper word-level data
     logger.info("✅ Conversation has meaningful speech (validated during streaming), proceeding with post-processing")
 
-    # Wait for audio_streaming_persistence_job to complete and write the file path
-    from advanced_omi_backend.utils.conversation_utils import wait_for_audio_file
+    # Wait for audio_streaming_persistence_job to complete and write MongoDB chunks
+    from advanced_omi_backend.utils.audio_chunk_utils import wait_for_audio_chunks
 
-    file_path = await wait_for_audio_file(
-        conversation_id=conversation_id, redis_client=redis_client, max_wait_seconds=30
+    chunks_ready = await wait_for_audio_chunks(
+        conversation_id=conversation_id, max_wait_seconds=30, min_chunks=1
     )
 
-    if not file_path:
-        # Mark conversation as deleted - has speech but no audio file to process
+    if not chunks_ready:
+        # Mark conversation as deleted - has speech but no audio chunks to process
         await mark_conversation_deleted(
             conversation_id=conversation_id,
-            deletion_reason="audio_file_not_ready",
+            deletion_reason="audio_chunks_not_ready",
         )
 
         # Call shared cleanup/restart logic before returning
@@ -519,22 +519,7 @@ async def open_conversation_job(
             end_reason=end_reason,
         )
 
-    logger.info(f"📁 Retrieved audio file path: {file_path}")
-
-    # Update conversation with audio file path
-    conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
-    if conversation:
-        # Store just the filename (relative to CHUNK_DIR)
-        from pathlib import Path
-
-        audio_filename = Path(file_path).name
-        conversation.audio_path = audio_filename
-        await conversation.save()
-        logger.info(
-            f"💾 Updated conversation {conversation_id[:12]} with audio_path: {audio_filename}"
-        )
-    else:
-        logger.warning(f"⚠️ Conversation {conversation_id} not found for audio_path update")
+    logger.info(f"📦 MongoDB audio chunks ready for conversation {conversation_id[:12]}")
 
     # Enqueue post-conversation processing pipeline
     client_id = conversation.client_id if conversation else None
diff --git a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py
index 066d05c5..5ad398ea 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py
@@ -162,10 +162,6 @@ async def recognise_speakers_job(
     # Get user_id from conversation
     user_id = conversation.user_id
 
-    # Use the provided audio path
-    actual_audio_path = audio_path
-    logger.info(f"📁 Using audio for speaker recognition: {audio_path}")
-
     # Find the transcript version to update
     transcript_version = None
     for version in conversation.transcript_versions:
@@ -189,39 +185,101 @@ async def recognise_speakers_job(
             "processing_time_seconds": 0
         }
 
+    # Reconstruct audio from MongoDB chunks
+    import tempfile
+    from pathlib import Path
+    from advanced_omi_backend.utils.audio_chunk_utils import reconstruct_wav_from_conversation
+
+    logger.info(f"📦 Reconstructing audio from MongoDB chunks for conversation {conversation_id}")
+
     # Call speaker recognition service
     try:
-        logger.info(f"🎤 Calling speaker recognition service...")
+        # Reconstruct WAV from MongoDB chunks
+        wav_data = await reconstruct_wav_from_conversation(conversation_id)
+
+        # Write to temporary file for speaker recognition service
+        temp_wav_file = tempfile.NamedTemporaryFile(
+            suffix=".wav",
+            delete=False,
+            prefix=f"speaker_recog_{conversation_id[:8]}_"
+        )
 
-        # Read transcript text and words from the transcript version
-        # (Parameters may be empty if called via job dependency)
-        actual_transcript_text = transcript_text or transcript_version.transcript or ""
-        actual_words = words if words else []
+        try:
+            temp_wav_file.write(wav_data)
+            temp_wav_file.flush()
+            temp_wav_path = temp_wav_file.name
+            temp_wav_file.close()
 
-        # If words not provided, we need to get them from metadata
-        if not actual_words and transcript_version.metadata:
-            actual_words = transcript_version.metadata.get("words", [])
+            logger.info(
+                f"📁 Created temporary WAV file for speaker recognition: {temp_wav_path} "
+                f"({len(wav_data) / 1024 / 1024:.2f} MB)"
+            )
 
-        if not actual_transcript_text:
-            logger.warning(f"🎤 No transcript text found in version {version_id}")
-            return {
-                "success": False,
-                "conversation_id": conversation_id,
-                "version_id": version_id,
-                "error": "No transcript text available",
-                "processing_time_seconds": 0
+            # Read transcript text and words from the transcript version
+            # (Parameters may be empty if called via job dependency)
+            actual_transcript_text = transcript_text or transcript_version.transcript or ""
+            actual_words = words if words else []
+
+            # If words not provided, we need to get them from metadata
+            if not actual_words and transcript_version.metadata:
+                actual_words = transcript_version.metadata.get("words", [])
+
+            if not actual_transcript_text:
+                logger.warning(f"🎤 No transcript text found in version {version_id}")
+                # Clean up temp file before returning
+                Path(temp_wav_path).unlink(missing_ok=True)
+                return {
+                    "success": False,
+                    "conversation_id": conversation_id,
+                    "version_id": version_id,
+                    "error": "No transcript text available",
+                    "processing_time_seconds": 0
+                }
+
+            transcript_data = {
+                "text": actual_transcript_text,
+                "words": actual_words
             }
 
-        transcript_data = {
-            "text": actual_transcript_text,
-            "words": actual_words
+            logger.info(f"🎤 Calling speaker recognition service...")
+
+            # Call speaker service with temporary file path
+            speaker_result = await speaker_client.diarize_identify_match(
+                audio_path=temp_wav_path,
+                transcript_data=transcript_data,
+                user_id=user_id
+            )
+
+        finally:
+            # Clean up temporary file
+            try:
+                Path(temp_wav_path).unlink(missing_ok=True)
+                logger.debug(f"🧹 Deleted temporary WAV file: {temp_wav_path}")
+            except Exception as cleanup_error:
+                logger.warning(f"Failed to delete temporary file {temp_wav_path}: {cleanup_error}")
+
+    except ValueError as e:
+        # No chunks found for conversation
+        logger.error(f"No audio chunks found for conversation {conversation_id}: {e}")
+        return {
+            "success": False,
+            "conversation_id": conversation_id,
+            "version_id": version_id,
+            "error": f"No audio chunks found: {e}",
+            "processing_time_seconds": time.time() - start_time
+        }
+    except Exception as audio_error:
+        logger.error(f"Failed to reconstruct audio from MongoDB: {audio_error}", exc_info=True)
+        return {
+            "success": False,
+            "conversation_id": conversation_id,
+            "version_id": version_id,
+            "error": f"Audio reconstruction failed: {audio_error}",
+            "processing_time_seconds": time.time() - start_time
         }
 
-        speaker_result = await speaker_client.diarize_identify_match(
-            audio_path=actual_audio_path,
-            transcript_data=transcript_data,
-            user_id=user_id
-        )
+    # Continue with speaker recognition result processing
+    try:
 
         # Check for errors from speaker service
         if speaker_result.get("error"):
diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
index b37f6454..fdafc6f9 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
@@ -173,10 +173,6 @@ async def transcribe_full_audio_job(
     user_id = str(conversation.user_id) if conversation.user_id else None
     client_id = conversation.client_id if hasattr(conversation, 'client_id') else None
 
-    # Use the provided audio path
-    actual_audio_path = audio_path
-    logger.info(f"📁 Using audio for transcription: {audio_path}")
-
     # Get the transcription provider
     provider = get_transcription_provider(mode="batch")
     if not provider:
@@ -185,19 +181,60 @@ async def transcribe_full_audio_job(
     provider_name = provider.name
     logger.info(f"Using transcription provider: {provider_name}")
 
-    # Read the audio file
-    audio_file_path = Path(actual_audio_path)
-    if not audio_file_path.exists():
-        raise FileNotFoundError(f"Audio file not found: {actual_audio_path}")
+    # Reconstruct audio from MongoDB chunks
+    import tempfile
+    from advanced_omi_backend.utils.audio_chunk_utils import reconstruct_wav_from_conversation
 
-    # Load audio data
-    with open(audio_file_path, "rb") as f:
-        audio_data = f.read()
+    logger.info(f"📦 Reconstructing audio from MongoDB chunks for conversation {conversation_id}")
 
-    # Transcribe the audio (assume 16kHz sample rate)
-    transcription_result = await provider.transcribe(
-        audio_data=audio_data, sample_rate=16000, diarize=True
-    )
+    try:
+        # Reconstruct WAV from MongoDB chunks
+        wav_data = await reconstruct_wav_from_conversation(conversation_id)
+
+        # Write to temporary file for transcription service
+        # (Services expect file paths, not bytes)
+        temp_wav_file = tempfile.NamedTemporaryFile(
+            suffix=".wav",
+            delete=False,
+            prefix=f"batch_transcribe_{conversation_id[:8]}_"
+        )
+
+        try:
+            temp_wav_file.write(wav_data)
+            temp_wav_file.flush()
+            temp_wav_path = temp_wav_file.name
+            temp_wav_file.close()
+
+            logger.info(
+                f"📁 Created temporary WAV file: {temp_wav_path} "
+                f"({len(wav_data) / 1024 / 1024:.2f} MB)"
+            )
+
+            # Read audio data for transcription
+            # Some providers need the file path, some need bytes
+            # Read both to support all provider types
+            with open(temp_wav_path, "rb") as f:
+                audio_data = f.read()
+
+            # Transcribe the audio (assume 16kHz sample rate)
+            transcription_result = await provider.transcribe(
+                audio_data=audio_data, sample_rate=16000, diarize=True
+            )
+
+        finally:
+            # Clean up temporary file
+            try:
+                Path(temp_wav_path).unlink(missing_ok=True)
+                logger.debug(f"🧹 Deleted temporary WAV file: {temp_wav_path}")
+            except Exception as cleanup_error:
+                logger.warning(f"Failed to delete temporary file {temp_wav_path}: {cleanup_error}")
+
+    except ValueError as e:
+        # No chunks found for conversation
+        raise FileNotFoundError(f"No audio chunks found for conversation {conversation_id}: {e}")
+    except Exception as e:
+        logger.error(f"Failed to reconstruct audio from MongoDB: {e}", exc_info=True)
+        raise RuntimeError(f"Audio reconstruction failed: {e}")
 
     # Extract results
     transcript_text = transcription_result.get("text", "")
@@ -517,7 +554,7 @@ async def transcribe_full_audio_job(
         "success": True,
         "conversation_id": conversation_id,
         "version_id": version_id,
-        "audio_path": str(audio_file_path),
+        "audio_source": "mongodb_chunks",  # Audio reconstructed from MongoDB, no permanent file
         "transcript": transcript_text,
         "segments": [seg.model_dump() for seg in speaker_segments],
         "words": words,  # Needed by speaker recognition
diff --git a/backends/advanced/tests/test_audio_persistence_mongodb.py b/backends/advanced/tests/test_audio_persistence_mongodb.py
new file mode 100644
index 00000000..30b5212e
--- /dev/null
+++ b/backends/advanced/tests/test_audio_persistence_mongodb.py
@@ -0,0 +1,431 @@
+"""
+Integration tests for MongoDB-based audio chunk persistence.
+
+These tests require a running MongoDB instance and test the complete
+audio chunk pipeline: encoding, storage, retrieval, and reconstruction.
+
+Run with: pytest tests/test_audio_persistence_mongodb.py --mongodb-url=mongodb://localhost:27017
+"""
+
+import asyncio
+import io
+import os
+import struct
+import wave
+from pathlib import Path
+
+import pytest
+from bson import Binary
+from motor.motor_asyncio import AsyncIOMotorClient
+from beanie import init_beanie
+
+from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
+from advanced_omi_backend.models.conversation import Conversation
+from advanced_omi_backend.utils.audio_chunk_utils import (
+    encode_pcm_to_opus,
+    decode_opus_to_pcm,
+    build_wav_from_pcm,
+    retrieve_audio_chunks,
+    concatenate_chunks_to_pcm,
+    reconstruct_wav_from_conversation,
+    convert_wav_to_chunks,
+    wait_for_audio_chunks,
+)
+
+
+# Test configuration
+
+def get_mongodb_url():
+    """Get MongoDB URL from environment or pytest args."""
+    return os.getenv("MONGODB_URI", "mongodb://localhost:27018")
+
+
+def get_test_db_name():
+    """Get test database name."""
+    return os.getenv("TEST_DB_NAME", "test_audio_chunks_db")
+
+
+@pytest.fixture(scope="session")
+def event_loop():
+    """Create event loop for async tests."""
+    loop = asyncio.get_event_loop_policy().new_event_loop()
+    yield loop
+    loop.close()
+
+
+@pytest.fixture(scope="session")
+async def mongodb_client():
+    """Create MongoDB client for tests."""
+    client = AsyncIOMotorClient(get_mongodb_url())
+    yield client
+    client.close()
+
+
+@pytest.fixture(scope="session")
+async def init_db(mongodb_client):
+    """Initialize Beanie with test database."""
+    db = mongodb_client[get_test_db_name()]
+
+    await init_beanie(
+        database=db,
+        document_models=[AudioChunkDocument, Conversation]
+    )
+
+    yield db
+
+    # Cleanup: Drop test database
+    await mongodb_client.drop_database(get_test_db_name())
+
+
+@pytest.fixture
+async def clean_db(init_db):
+    """Clean database before each test."""
+    # Drop all collections
+    await AudioChunkDocument.delete_all()
+    await Conversation.delete_all()
+    yield
+
+
+# Test data generators
+
+def generate_pcm_data(duration_seconds=1, sample_rate=16000):
+    """Generate sample PCM audio data."""
+    num_samples = int(sample_rate * duration_seconds)
+    pcm_bytes = b""
+
+    for i in range(num_samples):
+        # Simple pattern (not actual audio, just valid PCM structure)
+        value = int(32767 * (i % 100) / 100)
+        pcm_bytes += struct.pack("<h", value)
+
+    return pcm_bytes
+
+
+def create_wav_file(pcm_data, output_path, sample_rate=16000):
+    """Create a WAV file from PCM data."""
+    with wave.open(str(output_path), "wb") as wav:
+        wav.setnchannels(1)
+        wav.setsampwidth(2)
+        wav.setframerate(sample_rate)
+        wav.writeframes(pcm_data)
+
+
+# Integration Tests
+
+@pytest.mark.asyncio
+class TestOpusCodecIntegration:
+    """Test Opus encoding/decoding with real data."""
+
+    async def test_encode_decode_roundtrip(self, clean_db):
+        """Test complete encode-decode cycle preserves data structure."""
+        # Generate 1 second of PCM
+        pcm_data = generate_pcm_data(duration_seconds=1)
+
+        # Encode to Opus
+        opus_data = await encode_pcm_to_opus(pcm_data)
+
+        # Verify compression
+        assert len(opus_data) < len(pcm_data) * 0.2  # At least 80% compression
+
+        # Decode back to PCM
+        decoded_pcm = await decode_opus_to_pcm(opus_data)
+
+        # Verify sizes match (allow small variance)
+        assert abs(len(decoded_pcm) - len(pcm_data)) < 1000
+
+    async def test_build_wav_from_pcm(self, clean_db):
+        """Test WAV file construction."""
+        pcm_data = generate_pcm_data(duration_seconds=1)
+
+        wav_data = await build_wav_from_pcm(pcm_data)
+
+        # Verify WAV structure
+        assert wav_data[:4] == b"RIFF"
+        assert b"WAVE" in wav_data
+
+        # Verify readable by wave module
+        wav_buffer = io.BytesIO(wav_data)
+        with wave.open(wav_buffer, "rb") as wav:
+            assert wav.getnchannels() == 1
+            assert wav.getframerate() == 16000
+            frames = wav.readframes(wav.getnframes())
+            assert len(frames) == len(pcm_data)
+
+
+@pytest.mark.asyncio
+class TestMongoDBChunkStorage:
+    """Test MongoDB chunk storage and retrieval."""
+
+    async def test_store_and_retrieve_single_chunk(self, clean_db):
+        """Test storing and retrieving a single audio chunk."""
+        conversation_id = "test-conv-001"
+        pcm_data = generate_pcm_data(duration_seconds=10)
+        opus_data = await encode_pcm_to_opus(pcm_data)
+
+        # Create and save chunk
+        chunk = AudioChunkDocument(
+            conversation_id=conversation_id,
+            chunk_index=0,
+            audio_data=Binary(opus_data),
+            original_size=len(pcm_data),
+            compressed_size=len(opus_data),
+            start_time=0.0,
+            end_time=10.0,
+            duration=10.0,
+            sample_rate=16000,
+            channels=1,
+        )
+        await chunk.insert()
+
+        # Retrieve chunk
+        chunks = await retrieve_audio_chunks(conversation_id)
+
+        assert len(chunks) == 1
+        assert chunks[0].conversation_id == conversation_id
+        assert chunks[0].chunk_index == 0
+        assert len(chunks[0].audio_data) == len(opus_data)
+
+    async def test_retrieve_multiple_chunks_in_order(self, clean_db):
+        """Test retrieving multiple chunks in correct order."""
+        conversation_id = "test-conv-002"
+        num_chunks = 5
+
+        # Create chunks in reverse order
+        for i in range(num_chunks - 1, -1, -1):
+            pcm_data = generate_pcm_data(duration_seconds=10)
+            opus_data = await encode_pcm_to_opus(pcm_data)
+
+            chunk = AudioChunkDocument(
+                conversation_id=conversation_id,
+                chunk_index=i,
+                audio_data=Binary(opus_data),
+                original_size=len(pcm_data),
+                compressed_size=len(opus_data),
+                start_time=float(i * 10),
+                end_time=float((i + 1) * 10),
+                duration=10.0,
+                sample_rate=16000,
+                channels=1,
+            )
+            await chunk.insert()
+
+        # Retrieve all chunks
+        chunks = await retrieve_audio_chunks(conversation_id)
+
+        assert len(chunks) == num_chunks
+        # Verify sorted by chunk_index
+        for i, chunk in enumerate(chunks):
+            assert chunk.chunk_index == i
+
+    async def test_retrieve_chunks_with_pagination(self, clean_db):
+        """Test chunk retrieval with start_index and limit."""
+        conversation_id = "test-conv-003"
+
+        # Create 10 chunks
+        for i in range(10):
+            pcm_data = generate_pcm_data(duration_seconds=10)
+            opus_data = await encode_pcm_to_opus(pcm_data)
+
+            chunk = AudioChunkDocument(
+                conversation_id=conversation_id,
+                chunk_index=i,
+                audio_data=Binary(opus_data),
+                original_size=len(pcm_data),
+                compressed_size=len(opus_data),
+                start_time=float(i * 10),
+                end_time=float((i + 1) * 10),
+                duration=10.0,
+            )
+            await chunk.insert()
+
+        # Retrieve chunks 5-7 (3 chunks starting at index 5)
+        chunks = await retrieve_audio_chunks(
+            conversation_id,
+            start_index=5,
+            limit=3
+        )
+
+        assert len(chunks) == 3
+        assert chunks[0].chunk_index == 5
+        assert chunks[1].chunk_index == 6
+        assert chunks[2].chunk_index == 7
+
+
+@pytest.mark.asyncio
+class TestWAVReconstruction:
+    """Test complete WAV reconstruction from MongoDB chunks."""
+
+    async def test_reconstruct_wav_from_single_chunk(self, clean_db):
+        """Test reconstructing WAV from a single chunk."""
+        conversation_id = "test-conv-004"
+        pcm_data = generate_pcm_data(duration_seconds=10)
+        opus_data = await encode_pcm_to_opus(pcm_data)
+
+        # Store chunk
+        chunk = AudioChunkDocument(
+            conversation_id=conversation_id,
+            chunk_index=0,
+            audio_data=Binary(opus_data),
+            original_size=len(pcm_data),
+            compressed_size=len(opus_data),
+            start_time=0.0,
+            end_time=10.0,
+            duration=10.0,
+        )
+        await chunk.insert()
+
+        # Reconstruct WAV
+        wav_data = await reconstruct_wav_from_conversation(conversation_id)
+
+        # Verify WAV
+        assert wav_data[:4] == b"RIFF"
+        wav_buffer = io.BytesIO(wav_data)
+        with wave.open(wav_buffer, "rb") as wav:
+            assert wav.getnchannels() == 1
+            assert wav.getframerate() == 16000
+
+    async def test_reconstruct_wav_from_multiple_chunks(self, clean_db):
+        """Test reconstructing WAV from multiple chunks."""
+        conversation_id = "test-conv-005"
+        num_chunks = 3
+
+        # Store 3 chunks (30 seconds total)
+        for i in range(num_chunks):
+            pcm_data = generate_pcm_data(duration_seconds=10)
+            opus_data = await encode_pcm_to_opus(pcm_data)
+
+            chunk = AudioChunkDocument(
+                conversation_id=conversation_id,
+                chunk_index=i,
+                audio_data=Binary(opus_data),
+                original_size=len(pcm_data),
+                compressed_size=len(opus_data),
+                start_time=float(i * 10),
+                end_time=float((i + 1) * 10),
+                duration=10.0,
+            )
+            await chunk.insert()
+
+        # Reconstruct complete WAV
+        wav_data = await reconstruct_wav_from_conversation(conversation_id)
+
+        # Verify WAV contains all chunks
+        wav_buffer = io.BytesIO(wav_data)
+        with wave.open(wav_buffer, "rb") as wav:
+            frames = wav.readframes(wav.getnframes())
+            # Should be approximately 30 seconds worth of data
+            expected_size = 16000 * 2 * 30  # sample_rate * bytes_per_sample * seconds
+            assert abs(len(frames) - expected_size) < 10000  # Allow some variance
+
+    async def test_reconstruct_no_chunks_raises_error(self, clean_db):
+        """Test reconstruction fails when no chunks exist."""
+        with pytest.raises(ValueError, match="No audio chunks found"):
+            await reconstruct_wav_from_conversation("nonexistent-conv")
+
+
+@pytest.mark.asyncio
+class TestWAVConversion:
+    """Test WAV file to MongoDB chunk conversion."""
+
+    async def test_convert_wav_to_chunks(self, clean_db, tmp_path):
+        """Test converting WAV file to MongoDB chunks."""
+        conversation_id = "test-conv-006"
+
+        # Create test WAV file (1 second)
+        pcm_data = generate_pcm_data(duration_seconds=1)
+        wav_path = tmp_path / "test.wav"
+        create_wav_file(pcm_data, wav_path)
+
+        # Create conversation
+        conversation = Conversation(
+            conversation_id=conversation_id,
+            audio_uuid="test-audio-001",
+            user_id="test-user",
+            client_id="test-client"
+        )
+        await conversation.insert()
+
+        # Convert to chunks
+        num_chunks = await convert_wav_to_chunks(conversation_id, wav_path)
+
+        assert num_chunks == 1  # 1 second = 1 chunk (10s chunks)
+
+        # Verify chunks in MongoDB
+        chunks = await retrieve_audio_chunks(conversation_id)
+        assert len(chunks) == 1
+
+        # Verify conversation metadata updated
+        updated_conv = await Conversation.find_one(
+            Conversation.conversation_id == conversation_id
+        )
+        assert updated_conv.audio_chunks_count == 1
+        assert updated_conv.audio_total_duration is not None
+        assert updated_conv.audio_compression_ratio is not None
+
+    async def test_convert_long_wav_creates_multiple_chunks(self, clean_db, tmp_path):
+        """Test converting long WAV creates multiple chunks."""
+        conversation_id = "test-conv-007"
+
+        # Create 25-second WAV file
+        pcm_data = generate_pcm_data(duration_seconds=25)
+        wav_path = tmp_path / "long_test.wav"
+        create_wav_file(pcm_data, wav_path)
+
+        # Create conversation
+        conversation = Conversation(
+            conversation_id=conversation_id,
+            audio_uuid="test-audio-002",
+            user_id="test-user",
+            client_id="test-client"
+        )
+        await conversation.insert()
+
+        # Convert to chunks
+        num_chunks = await convert_wav_to_chunks(conversation_id, wav_path)
+
+        assert num_chunks == 3  # 25 seconds = 3 chunks (0-10s, 10-20s, 20-25s)
+
+        # Verify all chunks stored
+        chunks = await retrieve_audio_chunks(conversation_id)
+        assert len(chunks) == 3
+
+
+@pytest.mark.asyncio
+class TestChunkWaiting:
+    """Test waiting for MongoDB chunks to become available."""
+
+    async def test_wait_for_chunks_immediate_success(self, clean_db):
+        """Test wait succeeds when chunks already exist."""
+        conversation_id = "test-conv-008"
+        pcm_data = generate_pcm_data(duration_seconds=10)
+        opus_data = await encode_pcm_to_opus(pcm_data)
+
+        # Create chunk
+        chunk = AudioChunkDocument(
+            conversation_id=conversation_id,
+            chunk_index=0,
+            audio_data=Binary(opus_data),
+            original_size=len(pcm_data),
+            compressed_size=len(opus_data),
+            start_time=0.0,
+            end_time=10.0,
+            duration=10.0,
+        )
+        await chunk.insert()
+
+        # Wait should succeed immediately
+        result = await wait_for_audio_chunks(conversation_id, max_wait_seconds=5)
+        assert result is True
+
+    async def test_wait_for_chunks_timeout(self, clean_db):
+        """Test wait times out when chunks don't exist."""
+        result = await wait_for_audio_chunks(
+            "nonexistent-conv",
+            max_wait_seconds=1
+        )
+        assert result is False
+
+
+# Run tests
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/.env.test b/tests/.env.test
index 5d981b68..652e028e 100644
--- a/tests/.env.test
+++ b/tests/.env.test
@@ -15,5 +15,9 @@ TEST_DEVICE_NAME=robot-test
 
 MEMORY_PROVIDER=chronicle
 
+# MongoDB Configuration (test environment)
+MONGODB_URI=mongodb://localhost:27018
+TEST_DB_NAME=test_db
+
 # Docker Compose Project Name (used by test_env.py to generate container names)
 COMPOSE_PROJECT_NAME=advanced-backend-test
\ No newline at end of file
diff --git a/tests/endpoints/audio_upload_tests.robot b/tests/endpoints/audio_upload_tests.robot
index aca5ca4f..bf2a0df5 100644
--- a/tests/endpoints/audio_upload_tests.robot
+++ b/tests/endpoints/audio_upload_tests.robot
@@ -41,14 +41,12 @@ Single Audio File Upload Test
     # Verify conversation structure
     Dictionary Should Contain Key    ${conversation}    conversation_id
     Dictionary Should Contain Key    ${conversation}    audio_uuid
-    Dictionary Should Contain Key    ${conversation}    audio_path
     Dictionary Should Contain Key    ${conversation}    transcript
     Dictionary Should Contain Key    ${conversation}    segments
 
-    # Verify audio_path is set (should be just filename, no folder prefix)
-    Should Not Be Empty    ${conversation}[audio_path]
-    Should Not Contain    ${conversation}[audio_path]    /    msg=audio_path should be relative, not absolute
-    Should Contain    ${conversation}[audio_path]    .wav    msg=audio_path should contain .wav extension
+    # audio_path is legacy field (None for MongoDB storage)
+    # Just verify it exists in the dictionary, can be None
+    Dictionary Should Contain Key    ${conversation}    audio_path
 
     # Verify transcript was generated
     ${transcript}=    Set Variable    ${conversation}[transcript]
@@ -56,7 +54,7 @@ Single Audio File Upload Test
     Should Be True    ${transcript_length} > 100    msg=Transcript too short: ${transcript_length} chars
 
     Log To Console    ✅ Uploaded audio file
-    Log To Console    📁 Audio path: ${conversation}[audio_path]
+    Log To Console    💾 Storage: MongoDB chunks (audio_path is legacy field)
     Log To Console    📝 Transcript: ${transcript_length} characters
     Log To Console    🆔 Conversation ID: ${conversation}[conversation_id]
 
@@ -65,24 +63,24 @@ Audio File Upload With Fixtures Folder Test
     [Documentation]    Test uploading audio file to fixtures subfolder
     ...
     ...                Verifies:
-    ...                - File is stored in fixtures/ subfolder
-    ...                - audio_path includes folder prefix
+    ...                - Folder parameter is accepted (for backward compatibility)
+    ...                - Audio is stored in MongoDB chunks
     ...                - Conversation is created correctly
     [Tags]    audio-upload
 
     # Upload audio file to fixtures folder
     ${conversation}=    Upload Audio File    ${TEST_AUDIO_FILE}    device_name=fixture-upload    folder=fixtures
 
-    # Verify audio_path includes fixtures/ prefix
-    Should Start With    ${conversation}[audio_path]    fixtures/    msg=audio_path should start with 'fixtures/'
-    Should Contain    ${conversation}[audio_path]    .wav    msg=audio_path should contain .wav extension
+    # audio_path is legacy field (None for MongoDB storage)
+    # Folder parameter is accepted but audio is stored in MongoDB
+    Dictionary Should Contain Key    ${conversation}    audio_path
 
     # Verify conversation was created
     Dictionary Should Contain Key    ${conversation}    conversation_id
     Dictionary Should Contain Key    ${conversation}    transcript
 
-    Log To Console    ✅ Uploaded audio file to fixtures folder
-    Log To Console    📁 Audio path: ${conversation}[audio_path]
+    Log To Console    ✅ Uploaded audio file with folder parameter
+    Log To Console    💾 Storage: MongoDB chunks (folder param backward compatible)
     Log To Console    🆔 Conversation ID: ${conversation}[conversation_id]
 
 
diff --git a/tests/infrastructure/infra_tests.robot b/tests/infrastructure/infra_tests.robot
index a5b84996..41a99de8 100644
--- a/tests/infrastructure/infra_tests.robot
+++ b/tests/infrastructure/infra_tests.robot
@@ -279,21 +279,35 @@ WebSocket Disconnect Conversation End Reason Test
     # Send audio fast (no realtime pacing) to trigger conversation creation
     Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=200
 
-    # Wait for conversation job to be created and conversation_id to be populated
-    # Transcription + speech analysis takes time (30-60s with queue)
-    ${conv_jobs}=    Wait Until Keyword Succeeds    60s    3s
-    ...    Job Type Exists For Client    open_conversation    ${client_id}
-
-    # Wait for conversation_id in job meta (created asynchronously)
-    ${conversation_id}=    Wait Until Keyword Succeeds    10s    0.5s
-    ...    Get Conversation ID From Job Meta    open_conversation    ${client_id}
-
-    # CRITICAL: Keep sending audio to prevent inactivity timeout (SPEECH_INACTIVITY_THRESHOLD_SECONDS=2)
-    # Send a few more chunks to keep the conversation alive before disconnect
-    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=50
+    # Keep sending audio in a loop to prevent inactivity timeout while waiting for conversation
+    # We need to continuously send audio because SPEECH_INACTIVITY_THRESHOLD_SECONDS=2
+    FOR    ${i}    IN RANGE    20    # Send 20 batches while waiting
+        # Try to get conversation job
+        ${conv_jobs}=    Get Jobs By Type And Client    open_conversation    ${client_id}
+        ${has_job}=    Evaluate    len($conv_jobs) > 0
+
+        IF    ${has_job}
+            # Conversation job exists, try to get conversation_id
+            TRY
+                ${conversation_id}=    Get Conversation ID From Job Meta    open_conversation    ${client_id}
+                # Got conversation_id! Close websocket immediately to trigger disconnect
+                Log To Console    Conversation created (${conversation_id}), closing websocket NOW
+                Close Audio Stream    ${stream_id}
+                BREAK
+            EXCEPT
+                # conversation_id not set yet, keep sending audio
+                Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=50
+                Sleep    1s
+            END
+        ELSE
+            # No conversation job yet, keep sending audio
+            Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=50
+            Sleep    1s
+        END
+    END
 
-    # Simulate WebSocket disconnect (Bluetooth dropout)
-    Close Audio Stream    ${stream_id}
+    # Verify we got the conversation_id before loop ended
+    Should Not Be Equal    ${conversation_id}    ${None}    Failed to get conversation_id within timeout
 
     # Wait for job to complete (should be fast, not 3600s timeout)
     ${conv_jobs}=    Get Jobs By Type And Client    open_conversation    ${device_name}
diff --git a/tests/integration/mongodb_audio_storage_tests.robot b/tests/integration/mongodb_audio_storage_tests.robot
new file mode 100644
index 00000000..27e1c8d7
--- /dev/null
+++ b/tests/integration/mongodb_audio_storage_tests.robot
@@ -0,0 +1,106 @@
+*** Settings ***
+Documentation    MongoDB Audio Chunk Storage Integration Tests
+...
+...              Validates that audio is stored as MongoDB chunks
+...              instead of disk-based WAV files.
+Resource         ../resources/websocket_keywords.robot
+Resource         ../resources/audio_keywords.robot
+Resource         ../resources/conversation_keywords.robot
+Resource         ../resources/mongodb_keywords.robot
+Resource         ../resources/queue_keywords.robot
+Resource         ../setup/setup_keywords.robot
+Resource         ../setup/teardown_keywords.robot
+Variables        ../setup/test_data.py
+
+
+Suite Setup      Suite Setup
+Suite Teardown   Suite Teardown
+Test Setup       Test Cleanup
+
+
+*** Test Cases ***
+
+MongoDB Chunks Created From File Upload
+    [Documentation]    Verify that uploaded audio files are stored as MongoDB chunks
+    [Tags]    audio-upload
+
+    # Upload 1-minute test audio file
+    ${response}=    POST On Session    api    /api/audio/upload
+    ...             files=${{ {'files': open('${TEST_AUDIO_FILE}', 'rb')} }}
+    ...             params=device_name=upload-mongodb-test
+    ...             expected_status=200
+
+    ${upload_data}=    Set Variable    ${response.json()}
+    ${conversation_id}=    Set Variable    ${upload_data}[files][0][conversation_id]
+    Log    Uploaded conversation: ${conversation_id}
+
+    # Wait for chunks to be written to MongoDB
+    Sleep    5s
+
+    # Verify chunks exist in MongoDB (expect ~6 chunks for 1-minute audio)
+    ${chunks}=    Verify Audio Chunks Exist    ${conversation_id}    min_chunks=5
+
+    ${chunk_count}=    Get Length    ${chunks}
+    Log    ✅ Found ${chunk_count} MongoDB chunks for uploaded file
+
+
+MongoDB Chunks Are Sequential
+    [Documentation]    Verify chunks have sequential chunk_index values
+    [Tags]    audio-upload
+
+    ${response}=    POST On Session    api    /api/audio/upload
+    ...             files=${{ {'files': open('${TEST_AUDIO_FILE}', 'rb')} }}
+    ...             params=device_name=sequential-test
+    ...             expected_status=200
+
+    ${conversation_id}=    Set Variable    ${response.json()}[files][0][conversation_id]
+    Sleep    5s
+
+    ${chunks}=    Get Audio Chunks For Conversation    ${conversation_id}
+
+    # Verify sequential numbering
+    Verify Chunks Are Sequential    ${chunks}
+
+    ${chunk_count}=    Get Length    ${chunks}
+    ${last_index}=    Evaluate    ${chunk_count} - 1
+    Log    ✅ Chunks are sequential (0 to ${last_index})
+
+
+Conversation Has MongoDB Chunk Metadata
+    [Documentation]    Verify conversation has chunk count and duration metadata
+    [Tags]    audio-upload
+
+    ${response}=    POST On Session    api    /api/audio/upload
+    ...             files=${{ {'files': open('${TEST_AUDIO_FILE}', 'rb')} }}
+    ...             params=device_name=metadata-test
+    ...             expected_status=200
+
+    ${conversation_id}=    Set Variable    ${response.json()}[files][0][conversation_id]
+    Sleep    5s
+
+    # Get conversation and verify it has chunk metadata
+    ${conversation}=    Get Conversation By ID    ${conversation_id}
+    Verify Conversation Has Chunk Metadata    ${conversation}
+
+    Log    ✅ Conversation has chunk metadata: ${conversation}[audio_chunks_count] chunks, ${conversation}[audio_total_duration]s
+
+
+Each Chunk Has Valid Metadata
+    [Documentation]    Verify chunk documents have all required fields
+    [Tags]    audio-upload
+
+    ${response}=    POST On Session    api    /api/audio/upload
+    ...             files=${{ {'files': open('${TEST_AUDIO_FILE}', 'rb')} }}
+    ...             params=device_name=chunk-metadata-test
+    ...             expected_status=200
+
+    ${conversation_id}=    Set Variable    ${response.json()}[files][0][conversation_id]
+    Sleep    5s
+
+    ${chunks}=    Get Audio Chunks For Conversation    ${conversation_id}
+
+    # Verify first chunk has all required fields
+    ${first_chunk}=    Set Variable    ${chunks}[0]
+    Verify Audio Chunk Metadata    ${first_chunk}
+
+    Log    ✅ Chunk metadata is valid
diff --git a/tests/libs/mongodb_helper.py b/tests/libs/mongodb_helper.py
new file mode 100644
index 00000000..aaa91cd0
--- /dev/null
+++ b/tests/libs/mongodb_helper.py
@@ -0,0 +1,106 @@
+"""
+MongoDB helper functions for Robot Framework tests.
+
+Provides direct MongoDB access for verifying audio chunk storage.
+"""
+
+import os
+from pathlib import Path
+from pymongo import MongoClient
+from dotenv import load_dotenv
+
+# Load test environment variables
+tests_dir = Path(__file__).parent.parent
+load_dotenv(tests_dir / ".env.test", override=False)
+
+
+def get_mongodb_uri():
+    """Get MongoDB URI from environment."""
+    return os.getenv("MONGODB_URI", "mongodb://localhost:27018")
+
+
+def get_db_name():
+    """Get database name from environment."""
+    return os.getenv("TEST_DB_NAME", "test_db")
+
+
+def get_audio_chunks(conversation_id):
+    """
+    Get all audio chunks for a conversation from MongoDB.
+
+    Args:
+        conversation_id: Conversation ID to query
+
+    Returns:
+        List of audio chunk documents (as dictionaries)
+    """
+    client = MongoClient(get_mongodb_uri())
+    db = client[get_db_name()]
+
+    try:
+        # Query audio_chunks collection
+        chunks = list(db.audio_chunks.find(
+            {"conversation_id": conversation_id},
+            sort=[("chunk_index", 1)]
+        ))
+
+        # Convert ObjectId to string and Binary to bytes length for Robot Framework
+        for chunk in chunks:
+            if "_id" in chunk:
+                chunk["_id"] = str(chunk["_id"])
+
+            # Convert binary audio_data to length (Robot can't handle binary)
+            if "audio_data" in chunk:
+                chunk["audio_data_length"] = len(chunk["audio_data"])
+                # Keep reference but don't pass actual binary data
+                chunk["audio_data"] = f"<Binary data: {len(chunk['audio_data'])} bytes>"
+
+        return chunks
+
+    finally:
+        client.close()
+
+
+def get_conversation_chunk_count(conversation_id):
+    """
+    Get the count of audio chunks for a conversation.
+
+    Args:
+        conversation_id: Conversation ID to query
+
+    Returns:
+        Number of chunks
+    """
+    client = MongoClient(get_mongodb_uri())
+    db = client[get_db_name()]
+
+    try:
+        count = db.audio_chunks.count_documents({"conversation_id": conversation_id})
+        return count
+    finally:
+        client.close()
+
+
+def verify_chunks_exist(conversation_id, min_chunks=1):
+    """
+    Verify that audio chunks exist for a conversation.
+
+    Args:
+        conversation_id: Conversation ID to verify
+        min_chunks: Minimum number of chunks expected
+
+    Returns:
+        True if chunks exist and meet minimum count
+
+    Raises:
+        AssertionError if chunks don't meet requirements
+    """
+    chunks = get_audio_chunks(conversation_id)
+    actual_count = len(chunks)
+
+    if actual_count < min_chunks:
+        raise AssertionError(
+            f"Expected at least {min_chunks} chunks, found {actual_count}"
+        )
+
+    return True
diff --git a/tests/resources/mongodb_keywords.robot b/tests/resources/mongodb_keywords.robot
new file mode 100644
index 00000000..58a1f991
--- /dev/null
+++ b/tests/resources/mongodb_keywords.robot
@@ -0,0 +1,118 @@
+*** Settings ***
+Documentation    MongoDB Audio Chunk Verification Keywords
+...
+...              Keywords for verifying MongoDB audio chunk storage.
+...              Used to test the MongoDB migration from disk-based WAV files.
+Library          Collections
+Library          ../libs/mongodb_helper.py
+Resource         session_keywords.robot
+Resource         conversation_keywords.robot
+
+
+*** Keywords ***
+
+Get Audio Chunks For Conversation
+    [Documentation]    Retrieve audio chunks from MongoDB for a conversation
+    [Arguments]    ${conversation_id}
+
+    ${chunks}=    Get Audio Chunks    ${conversation_id}
+    RETURN    ${chunks}
+
+
+Verify Audio Chunks Exist
+    [Documentation]    Verify that audio chunks exist in MongoDB for a conversation
+    [Arguments]    ${conversation_id}    ${min_chunks}=1
+
+    ${chunks}=    Get Audio Chunks For Conversation    ${conversation_id}
+    ${chunk_count}=    Get Length    ${chunks}
+
+    Should Be True    ${chunk_count} >= ${min_chunks}
+    ...    Expected at least ${min_chunks} chunks, found ${chunk_count}
+
+    Log    ✅ Found ${chunk_count} audio chunks in MongoDB for conversation ${conversation_id}
+    RETURN    ${chunks}
+
+
+Verify Audio Chunk Metadata
+    [Documentation]    Verify chunk has correct metadata structure
+    [Arguments]    ${chunk}
+
+    # Verify required fields exist
+    Dictionary Should Contain Key    ${chunk}    conversation_id
+    Dictionary Should Contain Key    ${chunk}    chunk_index
+    Dictionary Should Contain Key    ${chunk}    original_size
+    Dictionary Should Contain Key    ${chunk}    compressed_size
+    Dictionary Should Contain Key    ${chunk}    start_time
+    Dictionary Should Contain Key    ${chunk}    end_time
+    Dictionary Should Contain Key    ${chunk}    duration
+    Dictionary Should Contain Key    ${chunk}    sample_rate
+    Dictionary Should Contain Key    ${chunk}    channels
+
+    # Verify field values are valid
+    Should Be True    ${chunk}[chunk_index] >= 0
+    Should Be True    ${chunk}[original_size] > 0
+    Should Be True    ${chunk}[compressed_size] > 0
+    Should Be True    ${chunk}[duration] > 0
+    Should Be Equal As Integers    ${chunk}[sample_rate]    16000
+    Should Be Equal As Integers    ${chunk}[channels]    1
+
+    Log    ✅ Chunk ${chunk}[chunk_index]: ${chunk}[duration]s duration
+
+
+Verify Chunks Are Sequential
+    [Documentation]    Verify chunks have sequential chunk_index values
+    [Arguments]    ${chunks}
+
+    ${chunk_count}=    Get Length    ${chunks}
+    Should Be True    ${chunk_count} > 0    No chunks to verify
+
+    # Sort by chunk_index
+    ${sorted_chunks}=    Evaluate    sorted(${chunks}, key=lambda x: x['chunk_index'])
+
+    # Verify sequential numbering starting from 0
+    FOR    ${i}    IN RANGE    ${chunk_count}
+        ${chunk}=    Set Variable    ${sorted_chunks}[${i}]
+        Should Be Equal As Integers    ${chunk}[chunk_index]    ${i}
+        ...    Chunk index mismatch: expected ${i}, got ${chunk}[chunk_index]
+    END
+
+    Log    ✅ ${chunk_count} chunks are sequential (0 to ${chunk_count - 1})
+
+
+Calculate Total Audio Size
+    [Documentation]    Calculate total original and compressed audio size from chunks
+    [Arguments]    ${chunks}
+
+    ${total_original}=    Set Variable    ${0}
+    ${total_compressed}=    Set Variable    ${0}
+
+    FOR    ${chunk}    IN    @{chunks}
+        ${total_original}=    Evaluate    ${total_original} + ${chunk}[original_size]
+        ${total_compressed}=    Evaluate    ${total_compressed} + ${chunk}[compressed_size]
+    END
+
+    ${overall_ratio}=    Evaluate    ${total_compressed} / ${total_original} if ${total_original} > 0 else 0
+    ${savings_percent}=    Evaluate    (1 - ${overall_ratio}) * 100
+
+    Log    📦 Total audio: ${total_original} bytes (PCM) → ${total_compressed} bytes (Opus)
+    Log    📊 Compression: ${overall_ratio:.3f} ratio (${savings_percent:.1f}% savings)
+
+    RETURN    ${total_original}    ${total_compressed}    ${overall_ratio}
+
+
+Verify Conversation Has Chunk Metadata
+    [Documentation]    Verify conversation has correct MongoDB chunk metadata fields
+    [Arguments]    ${conversation}
+
+    # Verify MongoDB chunk fields exist
+    Dictionary Should Contain Key    ${conversation}    audio_chunks_count
+    Dictionary Should Contain Key    ${conversation}    audio_total_duration
+
+    # Verify values are valid
+    Should Be True    ${conversation}[audio_chunks_count] > 0
+    ...    Conversation should have audio_chunks_count > 0
+
+    Should Be True    ${conversation}[audio_total_duration] > 0
+    ...    Conversation should have audio_total_duration > 0
+
+    Log    ✅ Conversation metadata: ${conversation}[audio_chunks_count] chunks, ${conversation}[audio_total_duration]s duration
diff --git a/tests/test-requirements.txt b/tests/test-requirements.txt
index ba6fb687..f32614e0 100644
--- a/tests/test-requirements.txt
+++ b/tests/test-requirements.txt
@@ -5,4 +5,5 @@ robotframework-browser
 robotframework-databaselibrary
 python-dotenv
 websockets
+pymongo
  
\ No newline at end of file

From e662f4674b4140d3acdcd84e8a7d24be45cd49a7 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Mon, 12 Jan 2026 07:25:45 +0000
Subject: [PATCH 2/3] Refactor audio processing to utilize MongoDB chunks and
 enhance job handling

- Removed audio file path parameters from various functions, transitioning to audio data retrieval from MongoDB chunks.
- Updated the `start_post_conversation_jobs` function to reflect changes in audio handling, ensuring jobs reconstruct audio from database chunks.
- Enhanced the `transcribe_full_audio_job` and `recognise_speakers_job` to process audio directly from memory, eliminating the need for temporary files.
- Improved error handling and logging for audio data retrieval, ensuring better feedback during processing.
- Added a new utility function for converting PCM data to WAV format in memory, streamlining audio format handling.
---
 .../controllers/audio_controller.py           |   1 -
 .../controllers/conversation_controller.py    |  41 +++----
 .../controllers/queue_controller.py           |   9 +-
 .../controllers/websocket_controller.py       |   1 -
 .../advanced_omi_backend/utils/audio_utils.py |  43 ++++++++
 .../workers/conversation_jobs.py              |   1 -
 .../workers/speaker_jobs.py                   | 102 +++++++-----------
 .../workers/transcription_jobs.py             |  60 +++--------
 tests/setup/test_manager_keywords.robot       |   9 ++
 9 files changed, 121 insertions(+), 146 deletions(-)

diff --git a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py
index 143cb253..7ba3b900 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py
@@ -156,7 +156,6 @@ async def upload_and_process_audio_files(
                 job_ids = start_post_conversation_jobs(
                     conversation_id=conversation_id,
                     audio_uuid=audio_uuid,
-                    audio_file_path=None,  # No file path - using MongoDB chunks
                     user_id=user.user_id,
                     post_transcription=True,  # Run batch transcription for uploads
                     client_id=client_id  # Pass client_id for UI tracking
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
index b26123f3..b6268c64 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
@@ -4,6 +4,7 @@
 
 import logging
 import time
+from datetime import datetime
 from pathlib import Path
 
 from fastapi.responses import JSONResponse
@@ -382,34 +383,20 @@ async def reprocess_transcript(conversation_id: str, user: User):
         if not user.is_superuser and conversation_model.user_id != str(user.user_id):
             return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only reprocess your own conversations."})
 
-        # Get audio_uuid and file path from conversation
+        # Get audio_uuid from conversation
         audio_uuid = conversation_model.audio_uuid
-        audio_path = conversation_model.audio_path
 
-        if not audio_path:
-            return JSONResponse(
-                status_code=400, content={"error": "No audio file found for this conversation"}
-            )
-
-        # Check if file exists - try multiple possible locations
-        possible_paths = [
-            Path("/app/audio_chunks") / audio_path,
-            Path(audio_path),  # fallback to relative path
-        ]
+        # Validate audio chunks exist in MongoDB
+        chunks = await AudioChunkDocument.find(
+            AudioChunkDocument.conversation_id == conversation_id
+        ).to_list()
 
-        full_audio_path = None
-        for path in possible_paths:
-            if path.exists():
-                full_audio_path = path
-                break
-
-        if not full_audio_path:
+        if not chunks:
             return JSONResponse(
-                status_code=422,
+                status_code=404,
                 content={
-                    "error": "Audio file not found on disk",
-                    "details": f"Conversation exists but audio file '{audio_path}' is missing from expected locations",
-                    "searched_paths": [str(p) for p in possible_paths]
+                    "error": "No audio data found for this conversation",
+                    "details": f"Conversation '{conversation_id}' exists but has no audio chunks in MongoDB"
                 }
             )
 
@@ -430,12 +417,11 @@ async def reprocess_transcript(conversation_id: str, user: User):
             transcribe_full_audio_job,
         )
 
-        # Job 1: Transcribe audio to text
+        # Job 1: Transcribe audio to text (reconstructs from MongoDB chunks)
         transcript_job = transcription_queue.enqueue(
             transcribe_full_audio_job,
             conversation_id,
             audio_uuid,
-            str(full_audio_path),
             version_id,
             "reprocess",
             job_timeout=600,
@@ -446,14 +432,11 @@ async def reprocess_transcript(conversation_id: str, user: User):
         )
         logger.info(f"📥 RQ: Enqueued transcription job {transcript_job.id}")
 
-        # Job 2: Recognize speakers (depends on transcription)
+        # Job 2: Recognize speakers (depends on transcription, reads data from DB)
         speaker_job = transcription_queue.enqueue(
             recognise_speakers_job,
             conversation_id,
             version_id,
-            str(full_audio_path),
-            "",  # transcript_text - will be read from DB
-            [],  # words - will be read from DB
             depends_on=transcript_job,
             job_timeout=600,
             result_ttl=JOB_RESULT_TTL,
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
index cd4f7455..b3875861 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
@@ -366,7 +366,6 @@ def start_streaming_jobs(
 def start_post_conversation_jobs(
     conversation_id: str,
     audio_uuid: str,
-    audio_file_path: str,
     user_id: str,
     post_transcription: bool = True,
     transcript_version_id: Optional[str] = None,
@@ -382,15 +381,17 @@ def start_post_conversation_jobs(
     3. Memory extraction job - Extracts memories from conversation (parallel)
     4. Title/summary generation job - Generates title and summary (parallel)
 
+    Note: Audio is reconstructed from MongoDB chunks, not files.
+
     Args:
         conversation_id: Conversation identifier
         audio_uuid: Audio UUID for job tracking
-        audio_file_path: Path to audio file
         user_id: User identifier
         post_transcription: If True, run batch transcription step (for uploads)
                            If False, skip transcription (streaming already has it)
         transcript_version_id: Transcript version ID (auto-generated if None)
         depends_on_job: Optional job dependency for first job
+        client_id: Client ID for UI tracking
 
     Returns:
         Dict with job IDs (transcription will be None if post_transcription=False)
@@ -416,7 +417,6 @@ def start_post_conversation_jobs(
         transcribe_full_audio_job,
         conversation_id,
         audio_uuid,
-        audio_file_path,
         version_id,
         "batch",  # trigger
         job_timeout=1800,  # 30 minutes
@@ -439,9 +439,6 @@ def start_post_conversation_jobs(
         recognise_speakers_job,
         conversation_id,
         version_id,
-        audio_file_path,
-        "",  # transcript_text - will be read from DB
-        [],  # words - will be read from DB
         job_timeout=1200,  # 20 minutes
         result_ttl=JOB_RESULT_TTL,
         depends_on=speaker_depends_on,
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
index ad856b2b..d27c2131 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
@@ -896,7 +896,6 @@ async def _process_batch_audio_complete(
         job_ids = start_post_conversation_jobs(
             conversation_id=conversation_id,
             audio_uuid=audio_uuid,
-            audio_file_path=None,  # No file path - using MongoDB chunks
             user_id=None,  # Will be read from conversation in DB by jobs
             post_transcription=True,  # Run batch transcription for uploads
             client_id=client_id  # Pass client_id for UI tracking
diff --git a/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py b/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py
index b4d5487f..ce81bbb8 100644
--- a/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py
+++ b/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py
@@ -258,6 +258,49 @@ async def process_audio_chunk(
         client_state.update_audio_received(chunk)
 
 
+def pcm_to_wav_bytes(
+    pcm_data: bytes,
+    sample_rate: int = 16000,
+    channels: int = 1,
+    sample_width: int = 2
+) -> bytes:
+    """
+    Convert raw PCM audio data to WAV format in memory.
+
+    Args:
+        pcm_data: Raw PCM audio bytes
+        sample_rate: Sample rate in Hz (default: 16000)
+        channels: Number of audio channels (default: 1 for mono)
+        sample_width: Sample width in bytes (default: 2 for 16-bit)
+
+    Returns:
+        WAV file data as bytes
+    """
+    import wave
+    import io
+
+    logger.debug(
+        f"Converting PCM to WAV in memory: {len(pcm_data)} bytes "
+        f"(rate={sample_rate}, channels={channels}, width={sample_width})"
+    )
+
+    # Use BytesIO to create WAV in memory
+    wav_buffer = io.BytesIO()
+
+    with wave.open(wav_buffer, 'wb') as wav_file:
+        wav_file.setnchannels(channels)
+        wav_file.setsampwidth(sample_width)
+        wav_file.setframerate(sample_rate)
+        wav_file.writeframes(pcm_data)
+
+    # Get the WAV bytes
+    wav_bytes = wav_buffer.getvalue()
+
+    logger.debug(f"Created WAV in memory: {len(wav_bytes)} bytes")
+
+    return wav_bytes
+
+
 def write_pcm_to_wav(
     pcm_data: bytes,
     output_path: str,
diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
index ed2ef1fe..9430018e 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
@@ -527,7 +527,6 @@ async def open_conversation_job(
     job_ids = start_post_conversation_jobs(
         conversation_id=conversation_id,
         audio_uuid=session_id,
-        audio_file_path=file_path,
         user_id=user_id,
         post_transcription=True,  # Run batch transcription for streaming audio
         client_id=client_id  # Pass client_id for UI tracking
diff --git a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py
index 5ad398ea..3547674a 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py
@@ -121,9 +121,8 @@ async def check_enrolled_speakers_job(
 async def recognise_speakers_job(
     conversation_id: str,
     version_id: str,
-    audio_path: str,
-    transcript_text: str,
-    words: list,
+    transcript_text: str = "",
+    words: list = None,
     *,
     redis_client=None
 ) -> Dict[str, Any]:
@@ -131,16 +130,16 @@ async def recognise_speakers_job(
     RQ job function for identifying speakers in a transcribed conversation.
 
     This job runs after transcription and:
-    1. Calls speaker recognition service to identify speakers
-    2. Updates the transcript version with identified speaker labels
-    3. Returns results for downstream jobs (memory)
+    1. Reconstructs audio from MongoDB chunks
+    2. Calls speaker recognition service to identify speakers
+    3. Updates the transcript version with identified speaker labels
+    4. Returns results for downstream jobs (memory)
 
     Args:
         conversation_id: Conversation ID
         version_id: Transcript version ID to update
-        audio_path: Path to audio file
-        transcript_text: Transcript text from transcription job
-        words: Word-level timing data from transcription job
+        transcript_text: Transcript text from transcription job (optional, reads from DB if empty)
+        words: Word-level timing data from transcription job (optional, reads from DB if empty)
         redis_client: Redis client (injected by decorator)
 
     Returns:
@@ -186,77 +185,52 @@ async def recognise_speakers_job(
         }
 
     # Reconstruct audio from MongoDB chunks
-    import tempfile
-    from pathlib import Path
     from advanced_omi_backend.utils.audio_chunk_utils import reconstruct_wav_from_conversation
 
     logger.info(f"📦 Reconstructing audio from MongoDB chunks for conversation {conversation_id}")
 
     # Call speaker recognition service
     try:
-        # Reconstruct WAV from MongoDB chunks
+        # Reconstruct WAV from MongoDB chunks (already in memory as bytes)
         wav_data = await reconstruct_wav_from_conversation(conversation_id)
 
-        # Write to temporary file for speaker recognition service
-        temp_wav_file = tempfile.NamedTemporaryFile(
-            suffix=".wav",
-            delete=False,
-            prefix=f"speaker_recog_{conversation_id[:8]}_"
+        logger.info(
+            f"📦 Reconstructed audio from MongoDB chunks: "
+            f"{len(wav_data) / 1024 / 1024:.2f} MB"
         )
 
-        try:
-            temp_wav_file.write(wav_data)
-            temp_wav_file.flush()
-            temp_wav_path = temp_wav_file.name
-            temp_wav_file.close()
+        # Read transcript text and words from the transcript version
+        # (Parameters may be empty if called via job dependency)
+        actual_transcript_text = transcript_text or transcript_version.transcript or ""
+        actual_words = words if words else []
 
-            logger.info(
-                f"📁 Created temporary WAV file for speaker recognition: {temp_wav_path} "
-                f"({len(wav_data) / 1024 / 1024:.2f} MB)"
-            )
+        # If words not provided, we need to get them from metadata
+        if not actual_words and transcript_version.metadata:
+            actual_words = transcript_version.metadata.get("words", [])
 
-            # Read transcript text and words from the transcript version
-            # (Parameters may be empty if called via job dependency)
-            actual_transcript_text = transcript_text or transcript_version.transcript or ""
-            actual_words = words if words else []
-
-            # If words not provided, we need to get them from metadata
-            if not actual_words and transcript_version.metadata:
-                actual_words = transcript_version.metadata.get("words", [])
-
-            if not actual_transcript_text:
-                logger.warning(f"🎤 No transcript text found in version {version_id}")
-                # Clean up temp file before returning
-                Path(temp_wav_path).unlink(missing_ok=True)
-                return {
-                    "success": False,
-                    "conversation_id": conversation_id,
-                    "version_id": version_id,
-                    "error": "No transcript text available",
-                    "processing_time_seconds": 0
-                }
-
-            transcript_data = {
-                "text": actual_transcript_text,
-                "words": actual_words
+        if not actual_transcript_text:
+            logger.warning(f"🎤 No transcript text found in version {version_id}")
+            return {
+                "success": False,
+                "conversation_id": conversation_id,
+                "version_id": version_id,
+                "error": "No transcript text available",
+                "processing_time_seconds": 0
             }
 
-            logger.info(f"🎤 Calling speaker recognition service...")
+        transcript_data = {
+            "text": actual_transcript_text,
+            "words": actual_words
+        }
 
-            # Call speaker service with temporary file path
-            speaker_result = await speaker_client.diarize_identify_match(
-                audio_path=temp_wav_path,
-                transcript_data=transcript_data,
-                user_id=user_id
-            )
+        logger.info(f"🎤 Calling speaker recognition service...")
 
-        finally:
-            # Clean up temporary file
-            try:
-                Path(temp_wav_path).unlink(missing_ok=True)
-                logger.debug(f"🧹 Deleted temporary WAV file: {temp_wav_path}")
-            except Exception as cleanup_error:
-                logger.warning(f"Failed to delete temporary file {temp_wav_path}: {cleanup_error}")
+        # Call speaker service with in-memory audio data (no temp file needed!)
+        speaker_result = await speaker_client.diarize_identify_match(
+            audio_data=wav_data,  # Pass bytes directly, no disk I/O
+            transcript_data=transcript_data,
+            user_id=user_id
+        )
 
     except ValueError as e:
         # No chunks found for conversation
diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
index fdafc6f9..1d0da874 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
@@ -126,7 +126,6 @@ def get_speaker_at_time(timestamp: float, speaker_segments: list) -> str:
 async def transcribe_full_audio_job(
     conversation_id: str,
     audio_uuid: str,
-    audio_path: str,
     version_id: str,
     trigger: str = "reprocess",
     *,
@@ -136,17 +135,17 @@ async def transcribe_full_audio_job(
     RQ job function for transcribing full audio to text (transcription only, no speaker recognition).
 
     This job:
-    1. Transcribes audio to text with generic speaker labels (Speaker 0, Speaker 1, etc.)
-    2. Generates title and summary
-    3. Saves transcript version to conversation
-    4. Returns results for downstream jobs (speaker recognition, memory)
+    1. Reconstructs audio from MongoDB chunks
+    2. Transcribes audio to text with generic speaker labels (Speaker 0, Speaker 1, etc.)
+    3. Generates title and summary
+    4. Saves transcript version to conversation
+    5. Returns results for downstream jobs (speaker recognition, memory)
 
     Speaker recognition is handled by a separate job (recognise_speakers_job).
 
     Args:
         conversation_id: Conversation ID
         audio_uuid: Audio UUID (unused but kept for compatibility)
-        audio_path: Path to audio file
         version_id: Version ID for new transcript
         trigger: Trigger source
         redis_client: Redis client (injected by decorator)
@@ -182,52 +181,25 @@ async def transcribe_full_audio_job(
     logger.info(f"Using transcription provider: {provider_name}")
 
     # Reconstruct audio from MongoDB chunks
-    import tempfile
     from advanced_omi_backend.utils.audio_chunk_utils import reconstruct_wav_from_conversation
 
     logger.info(f"📦 Reconstructing audio from MongoDB chunks for conversation {conversation_id}")
 
     try:
-        # Reconstruct WAV from MongoDB chunks
+        # Reconstruct WAV from MongoDB chunks (already in memory as bytes)
         wav_data = await reconstruct_wav_from_conversation(conversation_id)
 
-        # Write to temporary file for transcription service
-        # (Services expect file paths, not bytes)
-        temp_wav_file = tempfile.NamedTemporaryFile(
-            suffix=".wav",
-            delete=False,
-            prefix=f"batch_transcribe_{conversation_id[:8]}_"
+        logger.info(
+            f"📦 Reconstructed audio from MongoDB chunks: "
+            f"{len(wav_data) / 1024 / 1024:.2f} MB"
         )
 
-        try:
-            temp_wav_file.write(wav_data)
-            temp_wav_file.flush()
-            temp_wav_path = temp_wav_file.name
-            temp_wav_file.close()
-
-            logger.info(
-                f"📁 Created temporary WAV file: {temp_wav_path} "
-                f"({len(wav_data) / 1024 / 1024:.2f} MB)"
-            )
-
-            # Read audio data for transcription
-            # Some providers need the file path, some need bytes
-            # Read both to support all provider types
-            with open(temp_wav_path, "rb") as f:
-                audio_data = f.read()
-
-            # Transcribe the audio (assume 16kHz sample rate)
-            transcription_result = await provider.transcribe(
-                audio_data=audio_data, sample_rate=16000, diarize=True
-            )
-
-        finally:
-            # Clean up temporary file
-            try:
-                Path(temp_wav_path).unlink(missing_ok=True)
-                logger.debug(f"🧹 Deleted temporary WAV file: {temp_wav_path}")
-            except Exception as cleanup_error:
-                logger.warning(f"Failed to delete temporary file {temp_wav_path}: {cleanup_error}")
+        # Transcribe the audio directly from memory (no disk I/O needed)
+        transcription_result = await provider.transcribe(
+            audio_data=wav_data,  # Pass bytes directly, already in memory
+            sample_rate=16000,
+            diarize=True
+        )
 
     except ValueError as e:
         # No chunks found for conversation
@@ -441,7 +413,7 @@ async def transcribe_full_audio_job(
     # Prepare metadata (transcription only - speaker recognition will add its own metadata)
     metadata = {
         "trigger": trigger,
-        "audio_file_size": len(audio_data),
+        "audio_file_size": len(wav_data),
         "segment_count": len(segments),
         "word_count": len(words),
         "words": words,  # Store words for speaker recognition job to read
diff --git a/tests/setup/test_manager_keywords.robot b/tests/setup/test_manager_keywords.robot
index 3de2728c..ed29f1f8 100644
--- a/tests/setup/test_manager_keywords.robot
+++ b/tests/setup/test_manager_keywords.robot
@@ -39,6 +39,14 @@ Clear Test Databases
     # Clear conversations except those tagged as fixtures
     Run Process    docker exec ${MONGO_CONTAINER} mongosh test_db --eval "db.conversations.deleteMany({\\$or: [{'is_fixture': {\\$exists: false}}, {'is_fixture': false}]})"    shell=True
 
+    # Delete old fixture conversations that don't have audio chunks (from pre-MongoDB-migration)
+    ${delete_fixtures_script}=    Set Variable    const fixturesWithoutChunks = db.conversations.find({'is_fixture': true}).toArray().filter(c => db.audio_chunks.countDocuments({conversation_id: c.conversation_id}) === 0).map(c => c.conversation_id); if (fixturesWithoutChunks.length > 0) { db.conversations.deleteMany({conversation_id: {$in: fixturesWithoutChunks}}); print('Deleted ' + fixturesWithoutChunks.length + ' old fixture(s)'); }
+    Run Process    docker    exec    ${MONGO_CONTAINER}    mongosh    test_db    --eval    ${delete_fixtures_script}    shell=True
+
+    # Clear audio chunks except those belonging to remaining fixture conversations
+    ${clear_chunks_script}=    Set Variable    const fixtureIds = db.conversations.find({'is_fixture': true}, {conversation_id: 1}).map(c => c.conversation_id); db.audio_chunks.deleteMany({conversation_id: {$nin: fixtureIds}})
+    Run Process    docker    exec    ${MONGO_CONTAINER}    mongosh    test_db    --eval    ${clear_chunks_script}    shell=True
+
     # Clear job references from remaining conversations to prevent "No such job" errors
     Run Process    docker exec ${MONGO_CONTAINER} mongosh test_db --eval "db.conversations.updateMany({}, {\\$unset: {'transcription_job_id': '', 'speaker_job_id': '', 'memory_job_id': ''}})"    shell=True
 
@@ -85,6 +93,7 @@ Clear All Test Data
     # Wipe all MongoDB collections
     Run Process    docker exec ${MONGO_CONTAINER} mongosh test_db --eval "db.users.deleteMany({})"    shell=True
     Run Process    docker exec ${MONGO_CONTAINER} mongosh test_db --eval "db.conversations.deleteMany({})"    shell=True
+    Run Process    docker exec ${MONGO_CONTAINER} mongosh test_db --eval "db.audio_chunks.deleteMany({})"    shell=True
     Log To Console    MongoDB completely cleared
 
     # Clear Qdrant

From d143fe7cef056705f6807ff0bcad2173fad72d59 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Mon, 12 Jan 2026 07:27:44 +0000
Subject: [PATCH 3/3] Refactor speaker recognition client to use in-memory
 audio data

- Updated methods to accept audio data as bytes instead of file paths, enhancing performance by eliminating disk I/O.
- Improved logging to reflect in-memory audio processing, providing better insights during speaker identification and diarization.
- Streamlined audio data handling in the `diarize_identify_match` and `diarize_and_identify` methods, ensuring consistency across the client.
- Removed temporary file handling, simplifying the audio processing workflow and reducing potential file system errors.
---
 .../speaker_recognition_client.py             | 327 ++++++++----------
 1 file changed, 151 insertions(+), 176 deletions(-)

diff --git a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py
index e829eff5..fb146c08 100644
--- a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py
+++ b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py
@@ -67,14 +67,14 @@ def __init__(self, service_url: Optional[str] = None):
             logger.info("Speaker recognition client disabled (no service URL configured)")
 
     async def diarize_identify_match(
-        self, audio_path: str, transcript_data: Dict, user_id: Optional[str] = None
+        self, audio_data: bytes, transcript_data: Dict, user_id: Optional[str] = None
     ) -> Dict:
         """
         Perform diarization, speaker identification, and word-to-speaker matching.
         Routes to appropriate endpoint based on diarization source configuration.
 
         Args:
-            audio_path: Path to the audio file
+            audio_data: WAV audio data as bytes (in-memory)
             transcript_data: Dict containing words array and text from transcription
             user_id: Optional user ID for speaker identification
 
@@ -86,7 +86,7 @@ async def diarize_identify_match(
             return {}
 
         try:
-            logger.info(f"🎤 Identifying speakers for {audio_path}")
+            logger.info(f"🎤 Identifying speakers from in-memory audio ({len(audio_data) / 1024 / 1024:.2f} MB)")
 
             # Read diarization source from existing config system
             from advanced_omi_backend.config import load_diarization_settings_from_file
@@ -94,76 +94,75 @@ async def diarize_identify_match(
             diarization_source = config.get("diarization_source", "pyannote")
 
             async with aiohttp.ClientSession() as session:
-                # Prepare the audio file for upload
-                with open(audio_path, "rb") as audio_file:
-                    form_data = aiohttp.FormData()
-                    form_data.add_field(
-                        "file", audio_file, filename=Path(audio_path).name, content_type="audio/wav"
-                    )
-                    
-                    if diarization_source == "deepgram":
-                        # DEEPGRAM DIARIZATION PATH: We EXPECT transcript has speaker info from Deepgram
-                        # Only need speaker identification of existing segments
-                        logger.info("Using Deepgram diarization path - transcript should have speaker segments, identifying speakers")
-                        
-                        # TODO: Implement proper speaker identification for Deepgram segments
-                        # For now, use diarize-identify-match as fallback until we implement segment identification
-                        logger.warning("Deepgram segment identification not yet implemented, using diarize-identify-match as fallback")
-                        
-                        form_data.add_field("transcript_data", json.dumps(transcript_data))
-                        form_data.add_field("user_id", "1")  # TODO: Implement proper user mapping
-                        form_data.add_field("similarity_threshold", str(config.get("similarity_threshold", 0.15)))
-                        form_data.add_field("min_duration", str(config.get("min_duration", 0.5)))
-                        
-                        # Use /v1/diarize-identify-match endpoint as fallback
-                        endpoint = "/v1/diarize-identify-match"
-                        
-                    else:  # pyannote (default)
-                        # PYANNOTE PATH: Backend has transcript, need diarization + speaker identification
-                        logger.info("Using Pyannote path - diarizing backend transcript and identifying speakers")
-                        
-                        # Send existing transcript for diarization and speaker matching
-                        form_data.add_field("transcript_data", json.dumps(transcript_data))
-                        form_data.add_field("user_id", "1")  # TODO: Implement proper user mapping
-                        form_data.add_field("similarity_threshold", str(config.get("similarity_threshold", 0.15)))
-                        
-                        # Add pyannote diarization parameters
-                        form_data.add_field("min_duration", str(config.get("min_duration", 0.5)))
-                        form_data.add_field("collar", str(config.get("collar", 2.0)))
-                        form_data.add_field("min_duration_off", str(config.get("min_duration_off", 1.5)))
-                        if config.get("min_speakers"):
-                            form_data.add_field("min_speakers", str(config.get("min_speakers")))
-                        if config.get("max_speakers"):
-                            form_data.add_field("max_speakers", str(config.get("max_speakers")))
-                        
-                        # Use /v1/diarize-identify-match endpoint for backend integration
-                        endpoint = "/v1/diarize-identify-match"
-
-                    # Make the request to the consolidated endpoint
-                    request_url = f"{self.service_url}{endpoint}"
-                    logger.info(f"🎤 DEBUG: Making request to speaker service URL: {request_url}")
+                # Prepare the audio data for upload (no disk I/O!)
+                form_data = aiohttp.FormData()
+                form_data.add_field(
+                    "file", audio_data, filename="audio.wav", content_type="audio/wav"
+                )
 
-                    async with session.post(
-                        request_url,
-                        data=form_data,
-                        timeout=aiohttp.ClientTimeout(total=120),
-                    ) as response:
-                        logger.info(f"🎤 Speaker service response status: {response.status}")
+                if diarization_source == "deepgram":
+                    # DEEPGRAM DIARIZATION PATH: We EXPECT transcript has speaker info from Deepgram
+                    # Only need speaker identification of existing segments
+                    logger.info("Using Deepgram diarization path - transcript should have speaker segments, identifying speakers")
+
+                    # TODO: Implement proper speaker identification for Deepgram segments
+                    # For now, use diarize-identify-match as fallback until we implement segment identification
+                    logger.warning("Deepgram segment identification not yet implemented, using diarize-identify-match as fallback")
+
+                    form_data.add_field("transcript_data", json.dumps(transcript_data))
+                    form_data.add_field("user_id", "1")  # TODO: Implement proper user mapping
+                    form_data.add_field("similarity_threshold", str(config.get("similarity_threshold", 0.15)))
+                    form_data.add_field("min_duration", str(config.get("min_duration", 0.5)))
+
+                    # Use /v1/diarize-identify-match endpoint as fallback
+                    endpoint = "/v1/diarize-identify-match"
+
+                else:  # pyannote (default)
+                    # PYANNOTE PATH: Backend has transcript, need diarization + speaker identification
+                    logger.info("Using Pyannote path - diarizing backend transcript and identifying speakers")
+
+                    # Send existing transcript for diarization and speaker matching
+                    form_data.add_field("transcript_data", json.dumps(transcript_data))
+                    form_data.add_field("user_id", "1")  # TODO: Implement proper user mapping
+                    form_data.add_field("similarity_threshold", str(config.get("similarity_threshold", 0.15)))
+
+                    # Add pyannote diarization parameters
+                    form_data.add_field("min_duration", str(config.get("min_duration", 0.5)))
+                    form_data.add_field("collar", str(config.get("collar", 2.0)))
+                    form_data.add_field("min_duration_off", str(config.get("min_duration_off", 1.5)))
+                    if config.get("min_speakers"):
+                        form_data.add_field("min_speakers", str(config.get("min_speakers")))
+                    if config.get("max_speakers"):
+                        form_data.add_field("max_speakers", str(config.get("max_speakers")))
+
+                    # Use /v1/diarize-identify-match endpoint for backend integration
+                    endpoint = "/v1/diarize-identify-match"
+
+                # Make the request to the consolidated endpoint
+                request_url = f"{self.service_url}{endpoint}"
+                logger.info(f"🎤 DEBUG: Making request to speaker service URL: {request_url}")
+
+                async with session.post(
+                    request_url,
+                    data=form_data,
+                    timeout=aiohttp.ClientTimeout(total=120),
+                ) as response:
+                    logger.info(f"🎤 Speaker service response status: {response.status}")
 
-                        if response.status != 200:
-                            response_text = await response.text()
-                            logger.error(
-                                f"🎤 ❌ Speaker service returned status {response.status}: {response_text}"
-                            )
-                            return {}
+                    if response.status != 200:
+                        response_text = await response.text()
+                        logger.error(
+                            f"🎤 ❌ Speaker service returned status {response.status}: {response_text}"
+                        )
+                        return {}
 
-                        result = await response.json()
+                    result = await response.json()
 
-                        # Log basic result info
-                        num_segments = len(result.get("segments", []))
-                        logger.info(f"🎤 Speaker recognition returned {num_segments} segments")
+                    # Log basic result info
+                    num_segments = len(result.get("segments", []))
+                    logger.info(f"🎤 Speaker recognition returned {num_segments} segments")
 
-                        return result
+                    return result
 
         except ClientConnectorError as e:
             logger.error(f"🎤 Failed to connect to speaker recognition service: {e}")
@@ -179,13 +178,13 @@ async def diarize_identify_match(
             return {"error": "unknown_error", "message": str(e), "segments": []}
 
     async def diarize_and_identify(
-        self, audio_path: str, words: None, user_id: Optional[str] = None  # NOT IMPLEMENTED
+        self, audio_data: bytes, words: None, user_id: Optional[str] = None  # NOT IMPLEMENTED
     ) -> Dict:
         """
         Perform diarization and speaker identification using the speaker recognition service.
 
         Args:
-            audio_path: Path to the audio file
+            audio_data: WAV audio data as bytes (in-memory)
             words: Optional word-level data from transcription provider (for hints)
             user_id: Optional user ID for speaker identification
 
@@ -200,91 +199,86 @@ async def diarize_and_identify(
             return {}
 
         try:
-            logger.info(f"🎤 [DIARIZE] Starting diarization and identification for {audio_path}")
-
-            # Verify file exists and get info
-            if not os.path.exists(audio_path):
-                logger.error(f"🎤 [DIARIZE] ❌ Audio file does not exist: {audio_path}")
-                return {}
-
-            file_size = os.path.getsize(audio_path)
-            logger.info(f"🎤 [DIARIZE] Audio file size: {file_size} bytes")
+            logger.info(
+                f"🎤 [DIARIZE] Starting diarization and identification from in-memory audio "
+                f"({len(audio_data) / 1024 / 1024:.2f} MB)"
+            )
 
             # Call the speaker recognition service
             async with aiohttp.ClientSession() as session:
-                # Prepare the audio file for upload
-                with open(audio_path, "rb") as audio_file:
-                    form_data = aiohttp.FormData()
-                    form_data.add_field(
-                        "file", audio_file, filename=Path(audio_path).name, content_type="audio/wav"
-                    )
-                    # Get current diarization settings from config
-                    from advanced_omi_backend.config import load_diarization_settings_from_file
+                # Prepare the audio data for upload (no disk I/O!)
+                form_data = aiohttp.FormData()
+                form_data.add_field(
+                    "file", audio_data, filename="audio.wav", content_type="audio/wav"
+                )
 
-                    diarization_settings = load_diarization_settings_from_file()
+                # Get current diarization settings from config
+                from advanced_omi_backend.config import load_diarization_settings_from_file
 
-                    # Add all diarization parameters for the diarize-and-identify endpoint
-                    min_duration = diarization_settings.get("min_duration", 0.5)
-                    similarity_threshold = diarization_settings.get("similarity_threshold", 0.15)
-                    collar = diarization_settings.get("collar", 2.0)
-                    min_duration_off = diarization_settings.get("min_duration_off", 1.5)
+                diarization_settings = load_diarization_settings_from_file()
 
-                    form_data.add_field("min_duration", str(min_duration))
-                    form_data.add_field("similarity_threshold", str(similarity_threshold))
-                    form_data.add_field("collar", str(collar))
-                    form_data.add_field("min_duration_off", str(min_duration_off))
+                # Add all diarization parameters for the diarize-and-identify endpoint
+                min_duration = diarization_settings.get("min_duration", 0.5)
+                similarity_threshold = diarization_settings.get("similarity_threshold", 0.15)
+                collar = diarization_settings.get("collar", 2.0)
+                min_duration_off = diarization_settings.get("min_duration_off", 1.5)
 
-                    if diarization_settings.get("min_speakers"):
-                        form_data.add_field("min_speakers", str(diarization_settings["min_speakers"]))
-                    if diarization_settings.get("max_speakers"):
-                        form_data.add_field("max_speakers", str(diarization_settings["max_speakers"]))
+                form_data.add_field("min_duration", str(min_duration))
+                form_data.add_field("similarity_threshold", str(similarity_threshold))
+                form_data.add_field("collar", str(collar))
+                form_data.add_field("min_duration_off", str(min_duration_off))
 
-                    form_data.add_field("identify_only_enrolled", "false")
-                    # TODO: Implement proper user mapping between MongoDB ObjectIds and speaker service integer IDs
-                    # For now, hardcode to admin user (ID=1) since speaker service expects integer user_id
-                    form_data.add_field("user_id", "1")
-
-                    endpoint_url = f"{self.service_url}/diarize-and-identify"
-                    logger.info(f"🎤 [DIARIZE] Calling speaker service: {endpoint_url}")
-                    logger.info(
-                        f"🎤 [DIARIZE] Parameters: min_duration={min_duration}, "
-                        f"similarity_threshold={similarity_threshold}, collar={collar}, "
-                        f"min_duration_off={min_duration_off}, user_id=1"
-                    )
+                if diarization_settings.get("min_speakers"):
+                    form_data.add_field("min_speakers", str(diarization_settings["min_speakers"]))
+                if diarization_settings.get("max_speakers"):
+                    form_data.add_field("max_speakers", str(diarization_settings["max_speakers"]))
 
-                    # Make the request
-                    async with session.post(
-                        endpoint_url,
-                        data=form_data,
-                        timeout=aiohttp.ClientTimeout(total=120),
-                    ) as response:
-                        logger.info(f"🎤 [DIARIZE] Response status: {response.status}")
+                form_data.add_field("identify_only_enrolled", "false")
+                # TODO: Implement proper user mapping between MongoDB ObjectIds and speaker service integer IDs
+                # For now, hardcode to admin user (ID=1) since speaker service expects integer user_id
+                form_data.add_field("user_id", "1")
 
-                        if response.status != 200:
-                            response_text = await response.text()
-                            logger.warning(
-                                f"🎤 [DIARIZE] ❌ Speaker recognition service returned status {response.status}: {response_text}"
-                            )
-                            return {}
+                endpoint_url = f"{self.service_url}/diarize-and-identify"
+                logger.info(f"🎤 [DIARIZE] Calling speaker service: {endpoint_url}")
+                logger.info(
+                    f"🎤 [DIARIZE] Parameters: min_duration={min_duration}, "
+                    f"similarity_threshold={similarity_threshold}, collar={collar}, "
+                    f"min_duration_off={min_duration_off}, user_id=1"
+                )
 
-                        result = await response.json()
-                        segments_count = len(result.get('segments', []))
-                        logger.info(f"🎤 [DIARIZE] ✅ Speaker service returned {segments_count} segments")
-
-                        # Log details about identified speakers
-                        if segments_count > 0:
-                            identified_names = set()
-                            for seg in result.get('segments', []):
-                                identified_as = seg.get('identified_as')
-                                if identified_as and identified_as != 'Unknown':
-                                    identified_names.add(identified_as)
-
-                            if identified_names:
-                                logger.info(f"🎤 [DIARIZE] Identified speakers in segments: {identified_names}")
-                            else:
-                                logger.warning(f"🎤 [DIARIZE] No identified speakers found in {segments_count} segments")
+                # Make the request
+                async with session.post(
+                    endpoint_url,
+                    data=form_data,
+                    timeout=aiohttp.ClientTimeout(total=120),
+                ) as response:
+                    logger.info(f"🎤 [DIARIZE] Response status: {response.status}")
+
+                    if response.status != 200:
+                        response_text = await response.text()
+                        logger.warning(
+                            f"🎤 [DIARIZE] ❌ Speaker recognition service returned status {response.status}: {response_text}"
+                        )
+                        return {}
+
+                    result = await response.json()
+                    segments_count = len(result.get('segments', []))
+                    logger.info(f"🎤 [DIARIZE] ✅ Speaker service returned {segments_count} segments")
+
+                    # Log details about identified speakers
+                    if segments_count > 0:
+                        identified_names = set()
+                        for seg in result.get('segments', []):
+                            identified_as = seg.get('identified_as')
+                            if identified_as and identified_as != 'Unknown':
+                                identified_names.add(identified_as)
+
+                        if identified_names:
+                            logger.info(f"🎤 [DIARIZE] Identified speakers in segments: {identified_names}")
+                        else:
+                            logger.warning(f"🎤 [DIARIZE] No identified speakers found in {segments_count} segments")
 
-                        return result
+                    return result
 
         except ClientConnectorError as e:
             logger.error(f"🎤 [DIARIZE] ❌ Failed to connect to speaker recognition service at {self.service_url}: {e}")
@@ -495,11 +489,7 @@ async def check_if_enrolled_speaker_present(
             - enrolled_present: True if enrolled speaker detected, False otherwise
             - speaker_result: Full speaker recognition result dict with segments
         """
-        import tempfile
-        import uuid
-        from pathlib import Path
         from advanced_omi_backend.utils.audio_extraction import extract_audio_for_results
-        from advanced_omi_backend.utils.audio_utils import write_pcm_to_wav
 
         logger.info(f"🎤 [SPEAKER CHECK] Starting speaker check for session {session_id}")
         logger.info(f"🎤 [SPEAKER CHECK] Client: {client_id}, User: {user_id}")
@@ -516,44 +506,38 @@ async def check_if_enrolled_speaker_present(
             logger.warning("🎤 [SPEAKER CHECK] No enrolled speakers found, allowing conversation")
             return (True, {})  # If no enrolled speakers, allow all conversations
 
-        # Extract audio chunks
+        # Extract audio chunks (PCM format)
         logger.info(f"🎤 [SPEAKER CHECK] Extracting audio chunks from Redis...")
-        audio_data = await extract_audio_for_results(
+        pcm_data = await extract_audio_for_results(
             redis_client=redis_client,
             client_id=client_id,
             session_id=session_id,
             transcription_results=transcription_results
         )
 
-        if not audio_data:
+        if not pcm_data:
             logger.warning("🎤 [SPEAKER CHECK] No audio data extracted, skipping speaker check")
             return (False, {})
 
-        audio_size_kb = len(audio_data) / 1024
-        audio_duration_sec = len(audio_data) / (16000 * 2)  # 16kHz, 16-bit
+        audio_size_kb = len(pcm_data) / 1024
+        audio_duration_sec = len(pcm_data) / (16000 * 2)  # 16kHz, 16-bit
         logger.info(
             f"🎤 [SPEAKER CHECK] Extracted audio: {audio_size_kb:.1f} KB, ~{audio_duration_sec:.1f}s"
         )
 
-        # Write to temporary WAV file
-        temp_path = Path(tempfile.gettempdir()) / f"speech_check_{uuid.uuid4()}.wav"
-        logger.info(f"🎤 [SPEAKER CHECK] Writing audio to temp file: {temp_path}")
+        # Convert PCM to WAV in memory (no disk I/O!)
+        from advanced_omi_backend.utils.audio_utils import pcm_to_wav_bytes
 
-        try:
-            write_pcm_to_wav(audio_data, str(temp_path), sample_rate=16000, channels=1, sample_width=2)
+        logger.info(f"🎤 [SPEAKER CHECK] Converting PCM to WAV in memory...")
+        wav_data = pcm_to_wav_bytes(pcm_data, sample_rate=16000, channels=1, sample_width=2)
 
-            # Verify file was created
-            if temp_path.exists():
-                file_size = temp_path.stat().st_size
-                logger.info(f"🎤 [SPEAKER CHECK] Temp WAV file created: {file_size} bytes")
-            else:
-                logger.error(f"🎤 [SPEAKER CHECK] ❌ Temp WAV file was not created!")
-                return (False, {})
+        logger.info(f"🎤 [SPEAKER CHECK] WAV created in memory: {len(wav_data) / 1024 / 1024:.2f} MB")
 
-            # Run speaker recognition (diarize and identify)
-            logger.info(f"🎤 [SPEAKER CHECK] Calling diarize_and_identify with audio file...")
+        try:
+            # Run speaker recognition (diarize and identify) with in-memory audio
+            logger.info(f"🎤 [SPEAKER CHECK] Calling diarize_and_identify with in-memory audio...")
             result = await self.diarize_and_identify(
-                audio_path=str(temp_path),
+                audio_data=wav_data,  # Pass bytes directly, no temp file!
                 words=None,
                 user_id=user_id
             )
@@ -600,15 +584,6 @@ async def check_if_enrolled_speaker_present(
             logger.error(f"🎤 [SPEAKER CHECK] ❌ Speaker recognition check failed: {e}", exc_info=True)
             return (False, {})  # Fail closed - don't create conversation on error
 
-        finally:
-            # Clean up temp file
-            try:
-                if temp_path.exists():
-                    temp_path.unlink()
-                    logger.debug(f"🎤 [SPEAKER CHECK] Cleaned up temp file: {temp_path}")
-            except Exception as cleanup_error:
-                logger.warning(f"🎤 [SPEAKER CHECK] Failed to remove temp file {temp_path}: {cleanup_error}")
-
     async def health_check(self) -> bool:
         """
         Check if the speaker recognition service is healthy and responding.