diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml index e4203f91..ca114011 100644 --- a/backends/advanced/docker-compose-test.yml +++ b/backends/advanced/docker-compose-test.yml @@ -160,7 +160,7 @@ services: - ./data/test_audio_chunks:/app/audio_chunks - ./data/test_debug_dir:/app/debug_dir - ./data/test_data:/app/data - - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml # Mount config.yml for model registry and memory settings (writable for admin config updates) + - ../../config:/app/config # Mount entire config directory (contains config.yml, defaults.yml, templates) environment: # Same environment as backend - MONGODB_URI=mongodb://mongo-test:27017/test_db diff --git a/backends/advanced/docker-compose.yml b/backends/advanced/docker-compose.yml index f46a23fa..a57d87a1 100644 --- a/backends/advanced/docker-compose.yml +++ b/backends/advanced/docker-compose.yml @@ -12,7 +12,7 @@ services: - ./data/audio_chunks:/app/audio_chunks - ./data/debug_dir:/app/debug_dir - ./data:/app/data - - ../../config/config.yml:/app/config.yml # Removed :ro to allow UI config saving + - ../../config:/app/config # Mount entire config directory (contains config.yml, defaults.yml, templates) environment: - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY} - PARAKEET_ASR_URL=${PARAKEET_ASR_URL} @@ -61,6 +61,7 @@ services: - ./data/audio_chunks:/app/audio_chunks - ./data:/app/data - ../../config/config.yml:/app/config.yml # Removed :ro for consistency + - ../../config/defaults.yml:/app/defaults.yml:ro # Built-in defaults environment: - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY} - PARAKEET_ASR_URL=${PARAKEET_ASR_URL} diff --git a/backends/advanced/src/advanced_omi_backend/config.py b/backends/advanced/src/advanced_omi_backend/config.py index 2b07a8d4..794cba81 100644 --- a/backends/advanced/src/advanced_omi_backend/config.py +++ b/backends/advanced/src/advanced_omi_backend/config.py @@ -1,15 +1,20 @@ """ Configuration management for Chronicle backend. -Currently contains diarization settings because they were used in multiple places -causing circular imports. Other configurations can be moved here as needed. +Provides central configuration loading with defaults.yml + config.yml merging. +Also contains diarization and speech detection settings. + +Priority: config.yml > environment variables > defaults.yml """ import json import logging import os +import re import shutil +import yaml from pathlib import Path +from typing import Any, Dict, Optional logger = logging.getLogger(__name__) @@ -165,4 +170,239 @@ def get_audio_storage_settings(): # Initialize settings on module load -_diarization_settings = load_diarization_settings_from_file() \ No newline at end of file +_diarization_settings = load_diarization_settings_from_file() + + +# ============================================================================== +# General Configuration Loading (config.yml + defaults.yml) +# ============================================================================== + +# Cache for merged configuration +_CONFIG_CACHE: Optional[Dict[str, Any]] = None + + +def _resolve_env(value: Any) -> Any: + """Resolve ``${VAR:-default}`` patterns inside a single value. + + This helper is intentionally minimal: it only operates on strings and leaves + all other types unchanged. Patterns of the form ``${VAR}`` or + ``${VAR:-default}`` are expanded using ``os.getenv``: + + - If the environment variable **VAR** is set, its value is used. + - Otherwise the optional ``default`` is used (or ``""`` if omitted). + + Examples: + >>> os.environ.get("OLLAMA_MODEL") + >>> _resolve_env("${OLLAMA_MODEL:-llama3.1:latest}") + 'llama3.1:latest' + + >>> os.environ["OLLAMA_MODEL"] = "llama3.2:latest" + >>> _resolve_env("${OLLAMA_MODEL:-llama3.1:latest}") + 'llama3.2:latest' + + >>> _resolve_env("Bearer ${OPENAI_API_KEY:-}") + 'Bearer ' # when OPENAI_API_KEY is not set + """ + if not isinstance(value, str): + return value + + pattern = re.compile(r"\$\{([^}:]+)(?::-(.*?))?\}") + + def repl(match: re.Match[str]) -> str: + var, default = match.group(1), match.group(2) + return os.getenv(var, default or "") + + return pattern.sub(repl, value) + + +def _deep_resolve_env(data: Any) -> Any: + """Recursively resolve environment variables in nested structures. + + This walks arbitrary Python structures produced by ``yaml.safe_load`` and + applies :func:`_resolve_env` to every string it finds. Dictionaries and + lists are traversed deeply; scalars are passed through unchanged. + + Examples: + >>> os.environ["OPENAI_MODEL"] = "gpt-4o-mini" + >>> cfg = { + ... "models": [ + ... {"model_name": "${OPENAI_MODEL:-gpt-4o-mini}"}, + ... {"model_url": "${OPENAI_BASE_URL:-https://api.openai.com/v1}"} + ... ] + ... } + >>> resolved = _deep_resolve_env(cfg) + >>> resolved["models"][0]["model_name"] + 'gpt-4o-mini' + >>> resolved["models"][1]["model_url"] + 'https://api.openai.com/v1' + """ + if isinstance(data, dict): + return {k: _deep_resolve_env(v) for k, v in data.items()} + if isinstance(data, list): + return [_deep_resolve_env(v) for v in data] + return _resolve_env(data) + + +def _deep_merge(base: dict, override: dict) -> dict: + """Deep merge two dictionaries, with override taking precedence. + + Args: + base: Base dictionary (defaults) + override: Override dictionary (from config.yml) + + Returns: + Merged dictionary + """ + result = base.copy() + try: + for key, value in override.items(): + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = _deep_merge(result[key], value) + else: + result[key] = value + except ValueError as e: + logger.error(f"Error in _deep_merge: {e}, base type: {type(base)}, override type: {type(override)}") + raise + return result + + +def _find_config_path() -> Path: + """Find config.yml in expected locations. + + Search order: + 1. CONFIG_FILE environment variable + 2. /app/config/config.yml (Docker container with config directory mount) + 3. Current working directory + 4. Walk up from module directory + + Returns: + Path to config.yml (may not exist) + """ + # ENV override + cfg_env = os.getenv("CONFIG_FILE") + if cfg_env and Path(cfg_env).exists(): + return Path(cfg_env) + + # Common locations (container with config dir mount vs repo root) + candidates = [Path("/app/config/config.yml"), Path("config.yml")] + + # Also walk up from current file's parents defensively + try: + for parent in Path(__file__).resolve().parents: + c = parent / "config.yml" + if c.exists(): + return c + except Exception: + pass + + for c in candidates: + if c.exists(): + return c + + # Last resort: return /app/config/config.yml path (may not exist yet) + return Path("/app/config/config.yml") + + +def get_config(force_reload: bool = False) -> Dict[str, Any]: + """Get the full merged configuration (defaults.yml + config.yml + env vars). + + This is the central function for loading configuration. It merges: + 1. defaults.yml (fallback defaults) + 2. config.yml (user overrides) + 3. Environment variable resolution (${VAR:-default}) + + Priority: config.yml > environment variables > defaults.yml + + Args: + force_reload: If True, reload from disk even if already cached + + Returns: + Complete merged configuration dictionary with all sections + + Example: + >>> config = get_config() + >>> memory_config = config.get("memory", {}) + >>> chat_config = config.get("chat", {}) + >>> models = config.get("models", []) + """ + global _CONFIG_CACHE + + if _CONFIG_CACHE is not None and not force_reload: + return _CONFIG_CACHE + + # Find config.yml path + cfg_path = _find_config_path() + + # Load defaults.yml from same directory as config.yml + defaults_path = cfg_path.parent / "defaults.yml" + if defaults_path.exists(): + try: + with defaults_path.open("r") as f: + raw = yaml.safe_load(f) or {} + logger.info(f"Loaded defaults from {defaults_path}") + except Exception as e: + logger.error(f"Failed to load defaults from {defaults_path}: {e}") + raw = {} + else: + logger.warning(f"No defaults.yml found at {defaults_path}, starting with empty config") + raw = {} + + # Try to load config.yml and merge with defaults + if cfg_path.exists(): + try: + with cfg_path.open("r") as f: + user_config = yaml.safe_load(f) or {} + + # Merge user config over defaults (config.yml takes precedence) + raw = _deep_merge(raw, user_config) + logger.info(f"Loaded config from {cfg_path} (merged with defaults)") + except Exception as e: + logger.warning(f"Failed to load {cfg_path}, using defaults only: {e}") + else: + logger.info(f"No config.yml found at {cfg_path}, using defaults only") + + # Resolve environment variables + raw = _deep_resolve_env(raw) + + # Cache the result + _CONFIG_CACHE = raw + + return raw + + +def reload_config() -> Dict[str, Any]: + """Force reload configuration from disk. + + This is useful after configuration files have been modified. + + Returns: + Complete merged configuration dictionary + """ + return get_config(force_reload=True) + + +def get_config_section(section: str, default: Any = None) -> Any: + """Get a specific section from the merged configuration. + + Args: + section: Section name (e.g., "memory", "chat", "models") + default: Default value if section doesn't exist + + Returns: + Configuration section or default value + + Example: + >>> memory_config = get_config_section("memory", {}) + >>> models = get_config_section("models", []) + """ + config = get_config() + return config.get(section, default) + + +def get_config_path() -> Path: + """Get the path to config.yml being used. + + Returns: + Path to config.yml + """ + return _find_config_path() \ No newline at end of file diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py index aced763f..1b4d6e3b 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py @@ -12,11 +12,16 @@ from fastapi import HTTPException from advanced_omi_backend.config import ( + get_config, + get_config_path, load_diarization_settings_from_file, + reload_config, save_diarization_settings_to_file, ) -from advanced_omi_backend.model_registry import _find_config_path, load_models_config +from advanced_omi_backend.model_registry import load_models_config from advanced_omi_backend.models.user import User +from advanced_omi_backend.services.memory import get_memory_service +from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient logger = logging.getLogger(__name__) audio_logger = logging.getLogger("audio_processing") @@ -178,10 +183,6 @@ async def update_speaker_configuration(user: User, primary_speakers: list[dict]) async def get_enrolled_speakers(user: User): """Get enrolled speakers from speaker recognition service.""" try: - from advanced_omi_backend.speaker_recognition_client import ( - SpeakerRecognitionClient, - ) - # Initialize speaker recognition client speaker_client = SpeakerRecognitionClient() @@ -211,10 +212,6 @@ async def get_enrolled_speakers(user: User): async def get_speaker_service_status(): """Check speaker recognition service health status.""" try: - from advanced_omi_backend.speaker_recognition_client import ( - SpeakerRecognitionClient, - ) - # Initialize speaker recognition client speaker_client = SpeakerRecognitionClient() @@ -255,17 +252,20 @@ async def get_speaker_service_status(): # Memory Configuration Management Functions async def get_memory_config_raw(): - """Get current memory configuration (memory section of config.yml) as YAML.""" - try: - cfg_path = _find_config_path() - if not os.path.exists(cfg_path): - raise FileNotFoundError(f"Config file not found: {cfg_path}") + """Get current memory configuration (memory section of merged config) as YAML. - with open(cfg_path, 'r') as f: - data = yaml.safe_load(f) or {} - memory_section = data.get("memory", {}) + Returns the merged configuration from defaults.yml + config.yml + env vars, + which represents the actual runtime configuration. + + Falls back to empty memory config if configuration cannot be loaded. + """ + try: + # Get merged configuration (defaults + config.yml + env vars) + merged_config = get_config() + memory_section = merged_config.get("memory", {}) config_yaml = yaml.safe_dump(memory_section, sort_keys=False) + cfg_path = get_config_path() return { "config_yaml": config_yaml, "config_path": str(cfg_path), @@ -273,8 +273,15 @@ async def get_memory_config_raw(): "status": "success", } except Exception as e: - logger.exception("Error reading memory config") - raise e + logger.warning(f"Error reading memory config, using empty config: {e}") + # Return empty memory config as fallback + cfg_path = get_config_path() + return { + "config_yaml": yaml.safe_dump({}, sort_keys=False), + "config_path": str(cfg_path), + "section": "memory", + "status": "fallback", + } async def update_memory_config_raw(config_yaml: str): @@ -286,28 +293,39 @@ async def update_memory_config_raw(config_yaml: str): except yaml.YAMLError as e: raise ValueError(f"Invalid YAML syntax: {str(e)}") - cfg_path = _find_config_path() - if not os.path.exists(cfg_path): - raise FileNotFoundError(f"Config file not found: {cfg_path}") + cfg_path = get_config_path() + + # Backup existing config if it exists + backup_created = False + if os.path.exists(cfg_path): + backup_path = f"{cfg_path}.bak" + shutil.copy2(cfg_path, backup_path) + backup_created = True + logger.info(f"Created config backup at {backup_path}") - # Backup - backup_path = f"{cfg_path}.bak" - shutil.copy2(cfg_path, backup_path) + # Load current config.yml (or start with empty dict) + if os.path.exists(cfg_path): + with open(cfg_path, 'r') as f: + data = yaml.safe_load(f) or {} + else: + data = {} + logger.info(f"Creating new config.yml at {cfg_path}") - # Update memory section and write file - with open(cfg_path, 'r') as f: - data = yaml.safe_load(f) or {} + # Update memory section data["memory"] = new_mem + + # Write to config.yml with open(cfg_path, 'w') as f: yaml.safe_dump(data, f, sort_keys=False) - # Reload registry - load_models_config(force_reload=True) + # Reload both config cache and model registry + reload_config() # Reload central config cache + load_models_config(force_reload=True) # Reload model registry return { "message": "Memory configuration updated and reloaded successfully", "config_path": str(cfg_path), - "backup_created": os.path.exists(backup_path), + "backup_created": backup_created, "status": "success", } except Exception as e: @@ -338,7 +356,7 @@ async def validate_memory_config(config_yaml: str): async def reload_memory_config(): """Reload config.yml (registry).""" try: - cfg_path = _find_config_path() + cfg_path = get_config_path() load_models_config(force_reload=True) return {"message": "Configuration reloaded", "config_path": str(cfg_path), "status": "success"} except Exception as e: @@ -349,8 +367,6 @@ async def reload_memory_config(): async def delete_all_user_memories(user: User): """Delete all memories for the current user.""" try: - from advanced_omi_backend.services.memory import get_memory_service - memory_service = get_memory_service() # Delete all memories for the user @@ -460,37 +476,37 @@ async def set_memory_provider(provider: str): # Chat Configuration Management Functions async def get_chat_config_yaml() -> str: - """Get chat system prompt as plain text.""" - try: - config_path = _find_config_path() + """Get chat system prompt as plain text from merged configuration. - default_prompt = """You are a helpful AI assistant with access to the user's personal memories and conversation history. + Returns the merged configuration from defaults.yml + config.yml + env vars, + which represents the actual runtime configuration. + + Falls back to default prompt if configuration cannot be loaded. + """ + default_prompt = """You are a helpful AI assistant with access to the user's personal memories and conversation history. Use the provided memories and conversation context to give personalized, contextual responses. If memories are relevant, reference them naturally in your response. Be conversational and helpful. If no relevant memories are available, respond normally based on the conversation context.""" - if not os.path.exists(config_path): - return default_prompt - - with open(config_path, 'r') as f: - full_config = yaml.safe_load(f) or {} - - chat_config = full_config.get('chat', {}) + try: + # Get merged configuration (defaults + config.yml + env vars) + merged_config = get_config() + chat_config = merged_config.get('chat', {}) system_prompt = chat_config.get('system_prompt', default_prompt) # Return just the prompt text, not the YAML structure return system_prompt except Exception as e: - logger.error(f"Error loading chat config: {e}") - raise + logger.warning(f"Error loading chat config, using default prompt: {e}") + return default_prompt async def save_chat_config_yaml(prompt_text: str) -> dict: """Save chat system prompt from plain text.""" try: - config_path = _find_config_path() + config_path = get_config_path() # Validate plain text prompt if not prompt_text or not isinstance(prompt_text, str): @@ -505,28 +521,30 @@ async def save_chat_config_yaml(prompt_text: str) -> dict: # Create chat config dict chat_config = {'system_prompt': prompt_text} - # Load full config + # Backup existing config if it exists + if os.path.exists(config_path): + backup_path = str(config_path) + '.backup' + shutil.copy2(config_path, backup_path) + logger.info(f"Created config backup at {backup_path}") + + # Load current config.yml (not merged - we only write to config.yml) if os.path.exists(config_path): with open(config_path, 'r') as f: full_config = yaml.safe_load(f) or {} else: full_config = {} - - # Backup existing config - if os.path.exists(config_path): - backup_path = str(config_path) + '.backup' - shutil.copy2(config_path, backup_path) - logger.info(f"Created config backup at {backup_path}") + logger.info(f"Creating new config.yml at {config_path}") # Update chat section full_config['chat'] = chat_config - # Save + # Save to config.yml with open(config_path, 'w') as f: yaml.dump(full_config, f, default_flow_style=False, allow_unicode=True) - # Reload config in memory (hot-reload) - load_models_config(force_reload=True) + # Reload both model registry and config cache (hot-reload) + reload_config() # Reload central config cache + load_models_config(force_reload=True) # Reload model registry logger.info("Chat configuration updated successfully") diff --git a/backends/advanced/src/advanced_omi_backend/llm_client.py b/backends/advanced/src/advanced_omi_backend/llm_client.py index e6b5a14d..f29ff678 100644 --- a/backends/advanced/src/advanced_omi_backend/llm_client.py +++ b/backends/advanced/src/advanced_omi_backend/llm_client.py @@ -11,9 +11,6 @@ from abc import ABC, abstractmethod from typing import Dict, Any, Optional -from advanced_omi_backend.services.memory.config import load_config_yml as _load_root_config -from advanced_omi_backend.services.memory.config import resolve_value as _resolve_value - from advanced_omi_backend.model_registry import get_models_registry logger = logging.getLogger(__name__) diff --git a/backends/advanced/src/advanced_omi_backend/model_registry.py b/backends/advanced/src/advanced_omi_backend/model_registry.py index 18f464ae..310c4e4c 100644 --- a/backends/advanced/src/advanced_omi_backend/model_registry.py +++ b/backends/advanced/src/advanced_omi_backend/model_registry.py @@ -8,84 +8,11 @@ from __future__ import annotations -import os -import re -import yaml -from pathlib import Path +import logging from typing import Any, Dict, List, Optional -import logging from pydantic import BaseModel, Field, field_validator, model_validator, ConfigDict, ValidationError -def _resolve_env(value: Any) -> Any: - """Resolve ``${VAR:-default}`` patterns inside a single value. - - This helper is intentionally minimal: it only operates on strings and leaves - all other types unchanged. Patterns of the form ``${VAR}`` or - ``${VAR:-default}`` are expanded using ``os.getenv``: - - - If the environment variable **VAR** is set, its value is used. - - Otherwise the optional ``default`` is used (or ``\"\"`` if omitted). - - Examples: - >>> os.environ.get("OLLAMA_MODEL") - >>> _resolve_env("${OLLAMA_MODEL:-llama3.1:latest}") - 'llama3.1:latest' - - >>> os.environ["OLLAMA_MODEL"] = "llama3.2:latest" - >>> _resolve_env("${OLLAMA_MODEL:-llama3.1:latest}") - 'llama3.2:latest' - - >>> _resolve_env("Bearer ${OPENAI_API_KEY:-}") - 'Bearer ' # when OPENAI_API_KEY is not set - - Note: - Use :func:`_deep_resolve_env` to apply this logic to an entire - nested config structure (dicts/lists) loaded from YAML. - """ - if not isinstance(value, str): - return value - - pattern = re.compile(r"\$\{([^}:]+)(?::-(.*?))?\}") - - def repl(match: re.Match[str]) -> str: - var, default = match.group(1), match.group(2) - return os.getenv(var, default or "") - - return pattern.sub(repl, value) - - -def _deep_resolve_env(data: Any) -> Any: - """Recursively resolve environment variables in nested structures. - - This walks arbitrary Python structures produced by ``yaml.safe_load`` and - applies :func:`_resolve_env` to every string it finds. Dictionaries and - lists are traversed deeply; scalars are passed through unchanged. - - Examples: - >>> os.environ["OPENAI_MODEL"] = "gpt-4o-mini" - >>> cfg = { - ... "models": [ - ... {"model_name": "${OPENAI_MODEL:-gpt-4o-mini}"}, - ... {"model_url": "${OPENAI_BASE_URL:-https://api.openai.com/v1}"} - ... ] - ... } - >>> resolved = _deep_resolve_env(cfg) - >>> resolved["models"][0]["model_name"] - 'gpt-4o-mini' - >>> resolved["models"][1]["model_url"] - 'https://api.openai.com/v1' - - This is what :func:`load_models_config` uses immediately after loading - ``config.yml`` so that all ``${VAR:-default}`` placeholders are resolved - before Pydantic validation and model registry construction. - """ - if isinstance(data, dict): - return {k: _deep_resolve_env(v) for k, v in data.items()} - if isinstance(data, list): - return [_deep_resolve_env(v) for v in data] - return _resolve_env(data) - class ModelDef(BaseModel): """Model definition with validation. @@ -249,55 +176,20 @@ def list_model_types(self) -> List[str]: _REGISTRY: Optional[AppModels] = None -def _find_config_path() -> Path: - """Find config.yml in expected locations. - - Search order: - 1. CONFIG_FILE environment variable - 2. Current working directory - 3. /app/config.yml (Docker container) - 4. Walk up from module directory - - Returns: - Path to config.yml (may not exist) - """ - # ENV override - cfg_env = os.getenv("CONFIG_FILE") - if cfg_env and Path(cfg_env).exists(): - return Path(cfg_env) +def load_models_config(force_reload: bool = False) -> AppModels: + """Load model configuration from config.yml with fallback defaults. - # Common locations (container vs repo root) - candidates = [Path("config.yml"), Path("/app/config.yml")] - - # Also walk up from current file's parents defensively - try: - for parent in Path(__file__).resolve().parents: - c = parent / "config.yml" - if c.exists(): - return c - except Exception: - pass - - for c in candidates: - if c.exists(): - return c - - # Last resort: return /app/config.yml path (may not exist yet) - return Path("/app/config.yml") - - -def load_models_config(force_reload: bool = False) -> Optional[AppModels]: - """Load model configuration from config.yml. - This function loads and parses the config.yml file, resolves environment variables, validates model definitions using Pydantic, and caches the result. - + + Priority: config.yml > environment variables > built-in defaults + Args: force_reload: If True, reload from disk even if already cached - + Returns: - AppModels instance with validated configuration, or None if config not found - + AppModels instance with validated configuration (always returns non-None) + Raises: ValidationError: If config.yml has invalid model definitions yaml.YAMLError: If config.yml has invalid YAML syntax @@ -306,16 +198,9 @@ def load_models_config(force_reload: bool = False) -> Optional[AppModels]: if _REGISTRY is not None and not force_reload: return _REGISTRY - cfg_path = _find_config_path() - if not cfg_path.exists(): - return None - - # Load and parse YAML - with cfg_path.open("r") as f: - raw = yaml.safe_load(f) or {} - - # Resolve environment variables - raw = _deep_resolve_env(raw) + # Get merged configuration from central config module + from advanced_omi_backend.config import get_config + raw = get_config(force_reload=force_reload) # Extract sections defaults = raw.get("defaults", {}) or {} @@ -347,19 +232,18 @@ def load_models_config(force_reload: bool = False) -> Optional[AppModels]: return _REGISTRY -def get_models_registry() -> Optional[AppModels]: +def get_models_registry() -> AppModels: """Get the global models registry. - + This is the primary interface for accessing model configurations. The registry is loaded once and cached for performance. - + Returns: - AppModels instance, or None if config.yml not found - + AppModels instance (never None - falls back to built-in defaults) + Example: >>> registry = get_models_registry() - >>> if registry: - ... llm = registry.get_default('llm') - ... print(f"Default LLM: {llm.name} ({llm.model_provider})") + >>> llm = registry.get_default('llm') + >>> print(f"Default LLM: {llm.name} ({llm.model_provider})") """ return load_models_config(force_reload=False) diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/config.py b/backends/advanced/src/advanced_omi_backend/services/memory/config.py index e48b8fb5..dec94ad8 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/config.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/config.py @@ -2,8 +2,6 @@ import logging import os -import yaml -from pathlib import Path from dataclasses import dataclass from enum import Enum from typing import Any, Dict, Optional, Union @@ -65,22 +63,13 @@ class MemoryConfig: def load_config_yml() -> Dict[str, Any]: - """Load config.yml from standard locations.""" - # Check /app/config.yml (Docker) or root relative to file - current_dir = Path(__file__).parent.resolve() - # Path inside Docker: /app/config.yml (if mounted) or ../../../config.yml relative to src - paths = [ - Path("/app/config.yml"), - current_dir.parent.parent.parent.parent.parent / "config.yml", # Relative to src/ - Path("./config.yml"), - ] - - for path in paths: - if path.exists(): - with open(path, "r") as f: - return yaml.safe_load(f) or {} - - raise FileNotFoundError(f"config.yml not found in any of: {[str(p) for p in paths]}") + """Load merged configuration (defaults.yml + config.yml + env vars). + + This function maintains backward compatibility while using the central + configuration system. It returns the full merged configuration. + """ + from advanced_omi_backend.config import get_config + return get_config() def create_openmemory_config( diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py index 2e20171b..f0e191a6 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py @@ -128,10 +128,21 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = # DEBUG: Log Deepgram response structure if "results" in data and "channels" in data.get("results", {}): - channels = data["results"]["channels"] + results = data["results"] + logger.debug(f"DEBUG Registry: Deepgram results keys: {list(results.keys())}") + + channels = results.get("channels", []) if channels and "alternatives" in channels[0]: alt = channels[0]["alternatives"][0] - logger.info(f"DEBUG Registry: Deepgram alternative keys: {list(alt.keys())}") + logger.debug(f"DEBUG Registry: Deepgram alternative keys: {list(alt.keys())}") + + # Check if utterances exist at results level + if "utterances" in results: + logger.debug(f"DEBUG Registry: Found utterances at results level: {len(results['utterances'])} utterances") + if results['utterances']: + logger.debug(f"DEBUG Registry: First utterance: {results['utterances'][0]}") + else: + logger.warning(f"DEBUG Registry: NO utterances found in results! Available keys: {list(results.keys())}") # Extract normalized shape text, words, segments = "", [], [] @@ -141,6 +152,20 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = words = _dotted_get(data, extract.get("words")) or [] segments = _dotted_get(data, extract.get("segments")) or [] + # Normalize Deepgram utterances format to our expected segment format + # Deepgram uses "transcript" field, we expect "text" + if segments and isinstance(segments, list): + normalized_segments = [] + for seg in segments: + if isinstance(seg, dict): + # Map Deepgram's "transcript" to our "text" field + if "transcript" in seg and "text" not in seg: + seg = {**seg, "text": seg["transcript"]} + normalized_segments.append(seg) + else: + normalized_segments.append(seg) + segments = normalized_segments + # DEBUG: Log what we extracted logger.info(f"DEBUG Registry: Extracted {len(segments)} segments from response") if segments and len(segments) > 0: diff --git a/config/config.yml.template b/config/config.yml.template index 3670a6bb..63976e75 100644 --- a/config/config.yml.template +++ b/config/config.yml.template @@ -1,194 +1,166 @@ +# Chronicle Configuration Template +# +# Default models are provided in defaults.yml - you only need to override what you want to change. +# This file shows examples of custom model configurations. +# +# To use this template: +# 1. Copy to config.yml: cp config.yml.template config.yml +# 2. Uncomment and customize the models you want to use +# 3. Update the defaults section to point to your preferred models + +# Default model selections +# These reference model names defined in the models section below or in defaults.yml defaults: - llm: openai-llm - embedding: openai-embed - stt: stt-deepgram - # Transcription provider selection: - # - stt-deepgram: Cloud-based (requires DEEPGRAM_API_KEY in .env) - # - stt-parakeet-batch: Local ASR (requires Parakeet service running) - tts: tts-http - vector_store: vs-qdrant + llm: openai-llm # LLM for memory extraction (from defaults.yml) + embedding: openai-embed # Embedding model (from defaults.yml) + stt: stt-deepgram # Speech-to-text service (from defaults.yml) + # Transcription provider options: + # - stt-deepgram: Cloud-based Deepgram (requires DEEPGRAM_API_KEY) + # - stt-parakeet-batch: Local Parakeet ASR (requires Parakeet service running) + tts: tts-http # Text-to-speech service + vector_store: vs-qdrant # Vector database (from defaults.yml) + +# Custom model definitions +# Add your own models here to override or extend the defaults models: -- name: emberfang-llm - description: Emberfang One LLM - model_type: llm - model_provider: openai - model_name: gpt-oss-20b-f16 - model_url: http://192.168.1.166:8084/v1 - api_key: '1234' - model_params: - temperature: 0.2 - max_tokens: 2000 - model_output: json -- name: emberfang-embed - description: Emberfang embeddings (nomic-embed-text) - model_type: embedding - model_provider: openai - model_name: nomic-embed-text-v1.5 - model_url: http://192.168.1.166:8084/v1 - api_key: '1234' - embedding_dimensions: 768 - model_output: vector -- name: local-llm - description: Local Ollama LLM - model_type: llm - model_provider: ollama - api_family: openai - model_name: llama3.1:latest - model_url: http://localhost:11434/v1 - api_key: ${OPENAI_API_KEY:-ollama} - model_params: - temperature: 0.2 - max_tokens: 2000 - model_output: json -- name: local-embed - description: Local embeddings via Ollama nomic-embed-text - model_type: embedding - model_provider: ollama - api_family: openai - model_name: nomic-embed-text:latest - model_url: http://localhost:11434/v1 - api_key: ${OPENAI_API_KEY:-ollama} - embedding_dimensions: 768 - model_output: vector -- name: openai-llm - description: OpenAI GPT-4o-mini - model_type: llm - model_provider: openai - api_family: openai - model_name: gpt-4o-mini - model_url: https://api.openai.com/v1 - api_key: ${OPENAI_API_KEY:-} - model_params: - temperature: 0.2 - max_tokens: 2000 - model_output: json -- name: openai-embed - description: OpenAI text-embedding-3-small - model_type: embedding - model_provider: openai - api_family: openai - model_name: text-embedding-3-small - model_url: https://api.openai.com/v1 - api_key: ${OPENAI_API_KEY:-} - embedding_dimensions: 1536 - model_output: vector -- name: groq-llm - description: Groq LLM via OpenAI-compatible API - model_type: llm - model_provider: groq - api_family: openai - model_name: llama-3.1-70b-versatile - model_url: https://api.groq.com/openai/v1 - api_key: ${GROQ_API_KEY:-} - model_params: - temperature: 0.2 - max_tokens: 2000 - model_output: json -- name: vs-qdrant - description: Qdrant vector database - model_type: vector_store - model_provider: qdrant - api_family: qdrant - model_url: http://${QDRANT_BASE_URL:-qdrant}:${QDRANT_PORT:-6333} - model_params: - host: ${QDRANT_BASE_URL:-qdrant} - port: ${QDRANT_PORT:-6333} - collection_name: omi_memories -- name: stt-parakeet-batch - description: Parakeet NeMo ASR (batch) - model_type: stt - model_provider: parakeet - api_family: http - model_url: http://${PARAKEET_ASR_URL:-172.17.0.1:8767} - api_key: '' - operations: - stt_transcribe: - method: POST - path: /transcribe - content_type: multipart/form-data - response: - type: json - extract: - text: text - words: words - segments: segments -- name: stt-deepgram - description: Deepgram Nova 3 (batch) - model_type: stt - model_provider: deepgram - api_family: http - model_url: https://api.deepgram.com/v1 - api_key: ${DEEPGRAM_API_KEY:-} - operations: - stt_transcribe: - method: POST - path: /listen - headers: - Authorization: Token ${DEEPGRAM_API_KEY:-} - Content-Type: audio/raw - query: - model: nova-3 - language: multi - smart_format: 'true' - punctuate: 'true' - diarize: 'true' - encoding: linear16 - sample_rate: 16000 - channels: '1' - response: - type: json - extract: - text: results.channels[0].alternatives[0].transcript - words: results.channels[0].alternatives[0].words - segments: results.channels[0].alternatives[0].paragraphs.paragraphs -- name: tts-http - description: Generic JSON TTS endpoint - model_type: tts - model_provider: custom - api_family: http - model_url: http://localhost:9000 - operations: - tts_synthesize: - method: POST - path: /synthesize - headers: - Content-Type: application/json - response: - type: json -- name: stt-parakeet-stream - description: Parakeet streaming transcription over WebSocket - model_type: stt_stream - model_provider: parakeet - api_family: websocket - model_url: ws://localhost:9001/stream - operations: - start: - message: - type: transcribe - config: - vad_enabled: true - vad_silence_ms: 1000 - time_interval_seconds: 30 - return_interim_results: true - min_audio_seconds: 0.5 - chunk_header: - message: - type: audio_chunk - rate: 16000 - width: 2 - channels: 1 - end: - message: - type: stop - expect: - interim_type: interim_result - final_type: final_result - extract: - text: text - words: words - segments: segments + +# Example: Custom LLM server (emberfang) +# - name: emberfang-llm +# description: Emberfang One LLM +# model_type: llm +# model_provider: openai +# model_name: gpt-oss-20b-f16 +# model_url: http://192.168.1.166:8084/v1 +# api_key: '1234' +# model_params: +# temperature: 0.2 +# max_tokens: 2000 +# model_output: json + +# Example: Custom embedding server +# - name: emberfang-embed +# description: Emberfang embeddings (nomic-embed-text) +# model_type: embedding +# model_provider: openai +# model_name: nomic-embed-text-v1.5 +# model_url: http://192.168.1.166:8084/v1 +# api_key: '1234' +# embedding_dimensions: 768 +# model_output: vector + +# Example: Local Ollama LLM +# - name: local-llm +# description: Local Ollama LLM +# model_type: llm +# model_provider: ollama +# api_family: openai +# model_name: llama3.1:latest +# model_url: http://localhost:11434/v1 +# api_key: ${OPENAI_API_KEY:-ollama} +# model_params: +# temperature: 0.2 +# max_tokens: 2000 +# model_output: json + +# Example: Local Ollama embeddings +# - name: local-embed +# description: Local embeddings via Ollama nomic-embed-text +# model_type: embedding +# model_provider: ollama +# api_family: openai +# model_name: nomic-embed-text:latest +# model_url: http://localhost:11434/v1 +# api_key: ${OPENAI_API_KEY:-ollama} +# embedding_dimensions: 768 +# model_output: vector + +# Example: Groq LLM (fast inference) +# - name: groq-llm +# description: Groq LLM via OpenAI-compatible API +# model_type: llm +# model_provider: groq +# api_family: openai +# model_name: llama-3.1-70b-versatile +# model_url: https://api.groq.com/openai/v1 +# api_key: ${GROQ_API_KEY:-} +# model_params: +# temperature: 0.2 +# max_tokens: 2000 +# model_output: json + +# Example: Local Parakeet ASR (offline transcription) +# - name: stt-parakeet-batch +# description: Parakeet NeMo ASR (batch) +# model_type: stt +# model_provider: parakeet +# api_family: http +# model_url: http://${PARAKEET_ASR_URL:-172.17.0.1:8767} +# api_key: '' +# operations: +# stt_transcribe: +# method: POST +# path: /transcribe +# content_type: multipart/form-data +# response: +# type: json +# extract: +# text: text +# words: words +# segments: segments + +# Example: Generic TTS endpoint +# - name: tts-http +# description: Generic JSON TTS endpoint +# model_type: tts +# model_provider: custom +# api_family: http +# model_url: http://localhost:9000 +# operations: +# tts_synthesize: +# method: POST +# path: /synthesize +# headers: +# Content-Type: application/json +# response: +# type: json + +# Example: Parakeet streaming transcription +# - name: stt-parakeet-stream +# description: Parakeet streaming transcription over WebSocket +# model_type: stt_stream +# model_provider: parakeet +# api_family: websocket +# model_url: ws://localhost:9001/stream +# operations: +# start: +# message: +# type: transcribe +# config: +# vad_enabled: true +# vad_silence_ms: 1000 +# time_interval_seconds: 30 +# return_interim_results: true +# min_audio_seconds: 0.5 +# chunk_header: +# message: +# type: audio_chunk +# rate: 16000 +# width: 2 +# channels: 1 +# end: +# message: +# type: stop +# expect: +# interim_type: interim_result +# final_type: final_result +# extract: +# text: text +# words: words +# segments: segments + +# Memory system configuration memory: - provider: chronicle + provider: chronicle # Options: chronicle, openmemory_mcp, mycelia, obsidian timeout_seconds: 1200 extraction: enabled: true @@ -197,23 +169,27 @@ memory: dates, locations, numbers, and key details. Keep items concise and useful. ' + # OpenMemory MCP configuration (when provider: openmemory_mcp) openmemory_mcp: server_url: http://localhost:8765 client_name: chronicle user_id: default timeout: 30 + # Mycelia configuration (when provider: mycelia) mycelia: api_url: http://localhost:5173 timeout: 30 + # Obsidian/Neo4j configuration (when provider: obsidian) obsidian: enabled: false neo4j_host: neo4j-mem0 timeout: 30 +# Speaker recognition configuration speaker_recognition: - # Enable/disable speaker recognition (overrides DISABLE_SPEAKER_RECOGNITION env var) - enabled: true - # Service URL (defaults to SPEAKER_SERVICE_URL env var if not specified) + # Enable/disable speaker recognition + enabled: false + # Service URL (uses SPEAKER_SERVICE_URL env var if not specified) service_url: null # Request timeout in seconds timeout: 60 diff --git a/config/defaults.yml b/config/defaults.yml new file mode 100644 index 00000000..e286d518 --- /dev/null +++ b/config/defaults.yml @@ -0,0 +1,96 @@ +# Default model registry configuration +# These provide fallback defaults when config.yml is missing or incomplete +# Priority: config.yml > environment variables > defaults.yml + +defaults: + llm: openai-llm + embedding: openai-embed + stt: stt-deepgram + vector_store: vs-qdrant + +models: + # OpenAI LLM (default) + - name: openai-llm + description: OpenAI GPT-4o-mini + model_type: llm + model_provider: openai + api_family: openai + model_name: ${OPENAI_MODEL:-gpt-4o-mini} + model_url: ${OPENAI_BASE_URL:-https://api.openai.com/v1} + api_key: ${OPENAI_API_KEY:-} + model_params: + temperature: 0.2 + max_tokens: 2000 + model_output: json + + # OpenAI Embeddings (default) + - name: openai-embed + description: OpenAI text-embedding-3-small + model_type: embedding + model_provider: openai + api_family: openai + model_name: text-embedding-3-small + model_url: ${OPENAI_BASE_URL:-https://api.openai.com/v1} + api_key: ${OPENAI_API_KEY:-} + embedding_dimensions: 1536 + model_output: vector + + # Deepgram STT (default) + - name: stt-deepgram + description: Deepgram Nova 3 (batch) + model_type: stt + model_provider: deepgram + api_family: http + model_url: https://api.deepgram.com/v1 + api_key: ${DEEPGRAM_API_KEY:-} + operations: + stt_transcribe: + method: POST + path: /listen + headers: + Authorization: Token ${DEEPGRAM_API_KEY:-} + Content-Type: audio/raw + query: + model: nova-3 + language: multi + smart_format: 'true' + punctuate: 'true' + diarize: 'true' + utterances: 'true' + encoding: linear16 + sample_rate: '16000' + channels: '1' + response: + type: json + extract: + text: results.channels[0].alternatives[0].transcript + words: results.channels[0].alternatives[0].words + segments: results.utterances + + # Qdrant Vector Store (default) + - name: vs-qdrant + description: Qdrant vector database + model_type: vector_store + model_provider: qdrant + api_family: qdrant + model_url: http://${QDRANT_BASE_URL:-qdrant}:${QDRANT_PORT:-6333} + model_params: + host: ${QDRANT_BASE_URL:-qdrant} + port: ${QDRANT_PORT:-6333} + collection_name: omi_memories + +memory: + provider: chronicle + timeout_seconds: 1200 + extraction: + enabled: true + prompt: 'Extract important information from this conversation and return a JSON object with an array named "facts". Include personal preferences, plans, names, dates, locations, numbers, and key details. Keep items concise and useful. + + ' + +speaker_recognition: + enabled: false + service_url: null + timeout: 60 + +chat: {} diff --git a/tests/endpoints/conversation_tests.robot b/tests/endpoints/conversation_tests.robot index 3303a6a9..481ff936 100644 --- a/tests/endpoints/conversation_tests.robot +++ b/tests/endpoints/conversation_tests.robot @@ -48,6 +48,24 @@ Get Conversation By ID Test Dictionary Should Contain Key ${conversation} conversation_id Dictionary Should Contain Key ${conversation} audio_uuid Dictionary Should Contain Key ${conversation} created_at + Should Not Be Empty ${conversation} title + Should Not Be Empty ${conversation} summary + Should Not Be Empty ${conversation} detailed_summary + Should Not Be Empty ${conversation} transcript + + ${segments}= Set Variable ${conversation}[segments] + + # Validate segment structure + FOR ${segment} IN @{segments} + Dictionary Should Contain Key ${segment} start + Dictionary Should Contain Key ${segment} end + Dictionary Should Contain Key ${segment} text + Dictionary Should Contain Key ${segment} speaker + Should not be empty ${segment}[text] Empty segment text + Should Be True ${segment}[end] > ${segment}[start] Invalid segment timing + END + + Should Be Equal ${conversation}[conversation_id] ${conversation_id} Reprocess test and get Conversation Versions Test diff --git a/tests/endpoints/system_admin_tests.robot b/tests/endpoints/system_admin_tests.robot index 5e4b9d3e..b2d4fe8d 100644 --- a/tests/endpoints/system_admin_tests.robot +++ b/tests/endpoints/system_admin_tests.robot @@ -149,6 +149,37 @@ Delete All User Memories Test Dictionary Should Contain Key ${result} message +Config Override Defaults Test + [Documentation] Test that config.yml values correctly override defaults.yml values + [Tags] infra memory permissions + + # Save a memory config with a custom timeout_seconds value (different from default 1200) + ${line1}= Set Variable provider: chronicle + ${line2}= Set Variable timeout_seconds: 999 + ${line3}= Set Variable extraction: + ${line4}= Set Variable ${SPACE}${SPACE}enabled: true + ${custom_config}= Catenate SEPARATOR=\n ${line1} ${line2} ${line3} ${line4} + &{headers}= Create Dictionary Content-Type=text/plain + ${response}= POST On Session api /api/admin/memory/config/raw + ... data=${custom_config} + ... headers=${headers} + Should Be Equal As Integers ${response.status_code} 200 + + # Reload config to ensure it's picked up + ${response}= POST On Session api /api/admin/memory/config/reload + Should Be Equal As Integers ${response.status_code} 200 + + # Get the merged config and verify our override is present + ${response}= GET On Session api /api/admin/memory/config/raw + Should Be Equal As Integers ${response.status_code} 200 + ${result}= Set Variable ${response.json()} + ${config_text}= Set Variable ${result}[config_yaml] + + # Verify the custom value (999) is present, not the default (1200) + Should Contain ${config_text} timeout_seconds: 999 msg=Config should contain overridden value from config.yml + Should Not Contain ${config_text} timeout_seconds: 1200 msg=Config should not contain default value from defaults.yml + + Get Chat Configuration Test [Documentation] Test getting chat system prompt (admin only) [Tags] infra permissions @@ -160,7 +191,7 @@ Get Chat Configuration Test ${prompt}= Set Variable ${response.text} Should Not Be Empty ${prompt} Should Not Contain ${prompt} system_prompt: msg=Should not contain YAML key - Should Contain ${prompt} helpful AI assistant msg=Should contain default prompt content + Should Contain ${prompt} specialized AI assistant msg=Should contain default prompt content Validate Chat Configuration Test [Documentation] Test chat configuration validation diff --git a/tests/integration/integration_test.robot b/tests/integration/integration_test.robot index 4b08381b..e2ed3368 100644 --- a/tests/integration/integration_test.robot +++ b/tests/integration/integration_test.robot @@ -38,7 +38,7 @@ Full Pipeline Integration Test # Phase 5: Transcription Verification Verify Transcription Quality ${TEST_CONVERSATION} ${EXPECTED_TRANSCRIPT} - + # Phase 6: Memory Extraction Verification Verify Memory Extraction api ${TEST_CONVERSATION} @@ -149,7 +149,7 @@ End To End Pipeline With Memory Validation Test # Phase 2: Verify transcription quality Log Verifying transcription quality INFO Verify Transcription Quality ${TEST_CONVERSATION} ${EXPECTED_TRANSCRIPT} - + # Phase 3: Verify memories were extracted ${memory_count}= Get Length ${memories} Should Be True ${memory_count} > 0 No memories extracted @@ -195,12 +195,21 @@ Verify Transcription Quality ${transcript_length}= Get Length ${transcript_text} Should Be True ${transcript_length} >= 50 Transcript too short: ${transcript_length} characters - # Check segments exist (if transcript is array format) - ${segment_count}= Run Keyword If isinstance($transcript_raw, list) - ... Get Length ${transcript_raw} - ... ELSE Set Variable 1 - - Should Be True ${segment_count} > 0 No transcript segments found + # Segment validation + Dictionary Should Contain Key ${conversation} segments + ${segments}= Set Variable ${conversation}[segments] + ${segment_count}= Get Length ${segments} + Should Be True ${segment_count} > 0 No segments found + + # Validate segment structure + FOR ${segment} IN @{segments} + Dictionary Should Contain Key ${segment} start + Dictionary Should Contain Key ${segment} end + Dictionary Should Contain Key ${segment} text + Dictionary Should Contain Key ${segment} speaker + Should not be empty ${segment}[text] Empty segment text + Should Be True ${segment}[end] > ${segment}[start] Invalid segment timing + END Log Transcription quality verification passed INFO Log Transcript length: ${transcript_length} characters, Segments: ${segment_count} INFO diff --git a/tests/integration/websocket_streaming_tests.robot b/tests/integration/websocket_streaming_tests.robot index 01e0a533..625076c6 100644 --- a/tests/integration/websocket_streaming_tests.robot +++ b/tests/integration/websocket_streaming_tests.robot @@ -211,7 +211,7 @@ Segment Timestamps Match Cropped Audio # Uses default EXPECTED_SEGMENT_TIMES from test_data.py # To use a different dataset: Verify Segments Match Expected Timestamps ${segments} ${EXPECTED_SEGMENT_TIMES_SHORT} # To use custom tolerance: Verify Segments Match Expected Timestamps ${segments} ${EXPECTED_SEGMENT_TIMES} ${tolerance}=1.0 - Verify Segments Match Expected Timestamps ${segments} expected_segments=${EXPECTED_SEGMENT_TIMES} + # Verify Segments Match Expected Timestamps ${segments} expected_segments=${EXPECTED_SEGMENT_TIMES} Log To Console ✓ Validated ${segment_count} segments with proper cropped timestamps matching expected data