diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 9a3a4c39..d6ccef2e 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -79,7 +79,7 @@ Always use type hints. Use `TYPE_CHECKING` guard for circular imports: ```python from typing import TYPE_CHECKING if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman ``` ### Async/Await @@ -128,7 +128,7 @@ api_key = await self.retrieve_secret( - `wingman_core.py` - FastAPI app, WebSocket server, REST endpoints - `Tower.py` - Wingman factory and lifecycle manager - `Wingman.py` - Base class for all Wingmen -- `OpenAiWingman.py` - Primary LLM-powered Wingman implementation +- `Wingman.py` - Unified Wingman class (formerly split into base Wingman and OpenAiWingman subclass) - `SkillRegistry` - Progressive tool disclosure for skills - `CapabilityRegistry` - Unified skills + MCP discovery - `SecretKeeper` - Secure API key management diff --git a/README.md b/README.md index 0a8d6433..813e9317 100644 --- a/README.md +++ b/README.md @@ -304,8 +304,7 @@ If you want to read some code first and understand how it all works, we recommen - `http://127.0.0.1:49111/docs` - The OpenAPI (ex: Swagger) spec - `wingman_core.py` - most of the public API endpoints that Wingman AI exposes - The config files in `%APP_DATA%\ShipBit\WingmanAI\[version]` to get an idea of what's configurable. -- `Wingman.py` - the base class for all Wingmen -- `OpenAIWingman.py` - derived from Wingman, using all the providers +- `Wingman.py` - the unified Wingman class supporting all providers - `Tower.py` - the factory that creates Wingmen If you're planning to develop a major feature or new integration, please contact us on [Discord](https://www.shipbit.de/discord) first and let us know what you're up to. We'll be happy to help you get started and make sure your work isn't wasted because we're already working on something similar. diff --git a/api/interface.py b/api/interface.py index 28d58ef1..df10413e 100644 --- a/api/interface.py +++ b/api/interface.py @@ -528,7 +528,7 @@ class FeaturesConfig(BaseModel): conversation_provider: ConversationProvider remember_messages: Optional[int] = None image_generation_provider: ImageGenerationProvider - use_generic_instant_responses: bool + # use_generic_instant_responses removed in v2.1.0 class AudioFile(BaseModel): @@ -960,14 +960,9 @@ def __getitem__(self, item): def __setitem__(self, key, value): self.extra_properties[key] = value - custom_properties: Optional[list[CustomProperty]] = None - """You can add custom properties here to use in your custom wingman class.""" - disabled: Optional[bool] = False """Set this to true if you want to disable this wingman. You can also just remove it from the config.""" - custom_class: Optional[CustomClassConfig] = None - """If you want to use a custom Wingman (Python) class, you can specify it here.""" name: str """The "friendly" name of this Wingman. Can be changed by the user.""" description: Optional[str] = None diff --git a/providers/edge.py b/providers/edge.py index 6b996656..d5367a64 100644 --- a/providers/edge.py +++ b/providers/edge.py @@ -1,6 +1,12 @@ from os import path from edge_tts import Communicate from api.interface import EdgeTtsConfig, SoundConfig +from providers.provider_base import ( + BaseProvider, + ProviderCapability, + capabilities, + TtsProvider, +) from services.audio_player import AudioPlayer from services.file import get_writable_dir from services.printr import Printr @@ -11,20 +17,37 @@ printr = Printr() -class Edge: - def __init__(self): +@capabilities(ProviderCapability.TTS) +class Edge(BaseProvider, TtsProvider): + """Edge TTS provider using Microsoft Edge's free text-to-speech.""" + + def __init__(self, config: EdgeTtsConfig): + BaseProvider.__init__(self, config=config, api_key=None) self.random_voices = {} - async def play_audio( + # Protocol implementation: TtsProvider + async def synthesize( self, text: str, - config: EdgeTtsConfig, - sound_config: SoundConfig, audio_player: AudioPlayer, + sound_config: SoundConfig, wingman_name: str, - ): + **kwargs + ) -> None: + """Synthesize speech using Edge TTS. + + Args: + text: Text to convert to speech + audio_player: AudioPlayer instance for playback + sound_config: Sound configuration + wingman_name: Name of wingman + **kwargs: Unused (kept for protocol compatibility) + + Returns: + None - Audio is played directly via audio_player + """ communicate, output_file = await self.__generate_speech( - text=text, voice=config.voice + text=text, voice=self.config.voice ) audio, sample_rate = audio_player.get_audio_from_file(output_file) diff --git a/providers/elevenlabs.py b/providers/elevenlabs.py index e28b6079..ff0c5b37 100644 --- a/providers/elevenlabs.py +++ b/providers/elevenlabs.py @@ -3,13 +3,23 @@ from elevenlabslib import User, GenerationOptions, PlaybackOptions, SFXOptions from api.enums import SoundEffect, WingmanInitializationErrorType from api.interface import ElevenlabsConfig, SoundConfig, WingmanInitializationError +from providers.provider_base import ( + BaseProvider, + ProviderCapability, + capabilities, + TtsProvider, +) from services.audio_player import AudioPlayer from services.sound_effects import get_sound_effects from services.websocket_user import WebSocketUser -class ElevenLabs: - def __init__(self, api_key: str, wingman_name: str): +@capabilities(ProviderCapability.TTS) +class ElevenLabs(BaseProvider, TtsProvider): + """ElevenLabs TTS provider with high-quality voice synthesis.""" + + def __init__(self, config: ElevenlabsConfig, api_key: str, wingman_name: str): + BaseProvider.__init__(self, config=config, api_key=api_key) self.wingman_name = wingman_name self.user = User(api_key) @@ -30,15 +40,29 @@ def validate_config( ) return errors - async def play_audio( + # Protocol implementation: TtsProvider + async def synthesize( self, text: str, - config: ElevenlabsConfig, - sound_config: SoundConfig, audio_player: AudioPlayer, + sound_config: SoundConfig, wingman_name: str, - stream: bool, - ): + **kwargs + ) -> None: + """Synthesize speech using ElevenLabs with streaming support. + + Args: + text: Text to convert to speech + audio_player: AudioPlayer instance for playback + sound_config: Sound configuration + wingman_name: Name of wingman + **kwargs: Additional parameters (stream, etc.) + + Returns: + None - Audio is played directly via audio_player + """ + config = self.config + stream = kwargs.get("stream", False) voice = ( self.user.get_voice_by_ID(config.voice.id) if config.voice.id diff --git a/providers/faster_whisper.py b/providers/faster_whisper.py index 61aa0e15..c9a431de 100644 --- a/providers/faster_whisper.py +++ b/providers/faster_whisper.py @@ -9,20 +9,31 @@ FasterWhisperSttConfig, WingmanInitializationError, ) +from providers.provider_base import ( + BaseProvider, + ProviderCapability, + capabilities, + SttProvider, +) from services.printr import Printr MODELS_DIR = "faster-whisper-models" -class FasterWhisper: +@capabilities(ProviderCapability.STT) +class FasterWhisper(BaseProvider, SttProvider): def __init__( self, - settings: FasterWhisperSettings, - app_root_path: str, - app_is_bundled: bool, + config: FasterWhisperSettings, + api_key: str = None, # Not used but required by BaseProvider + app_root_path: str = None, + app_is_bundled: bool = False, + wingman_name: str = None, # For hotword assembly ): + BaseProvider.__init__(self, config=config, api_key=api_key) self.printr = Printr() - self.settings = settings + self.settings = config # Alias for backward compatibility + self.wingman_name = wingman_name self.is_windows = platform.system() == "Windows" if self.is_windows: @@ -56,7 +67,66 @@ def __update_model(self): f"Failed to initialize FasterWhisper with model {model_file}. Error: {e}" ) - def transcribe( + # Protocol implementation: SttProvider + async def transcribe(self, filename: str, **kwargs) -> str: + """Transcribe audio using FasterWhisper model. + + Args: + filename: Path to audio file + **kwargs: May include 'config' (FasterWhisperSttConfig) and 'hotwords' (list[str]) + + Returns: + Transcribed text or None on error + """ + # Get config from kwargs or use default from self.config + config = kwargs.get("config", self.config if hasattr(self, "config") else None) + if not isinstance(config, FasterWhisperSttConfig): + # If config is FasterWhisperSettings, use default values + config = FasterWhisperSttConfig( + beam_size=5, + best_of=5, + temperature=0.0, + no_speech_threshold=0.6, + language=None, + multilingual=True, + language_detection_threshold=0.5, + hotwords=[], + additional_hotwords=[], + ) + + # Assemble hotwords from multiple sources + hotwords: list[str] = [] + + # Add wingman name if available + if self.wingman_name: + hotwords.append(self.wingman_name) + + # Add default hotwords from config + if hasattr(self.settings, "hotwords") and self.settings.hotwords: + hotwords.extend(self.settings.hotwords) + + # Add additional hotwords from config + if ( + hasattr(self.settings, "additional_hotwords") + and self.settings.additional_hotwords + ): + hotwords.extend(self.settings.additional_hotwords) + + # Add any hotwords passed in kwargs (for backward compatibility) + if "hotwords" in kwargs and kwargs["hotwords"]: + hotwords.extend(kwargs["hotwords"]) + + # Remove duplicates + hotwords = list(set(hotwords)) + + result = self._transcribe_sync( + config=config, + filename=filename, + hotwords=hotwords, + ) + return result.text if result else None + + def _transcribe_sync( self, config: FasterWhisperSttConfig, filename: str, diff --git a/providers/google.py b/providers/google.py index 630281e2..e7c01e02 100644 --- a/providers/google.py +++ b/providers/google.py @@ -1,14 +1,31 @@ import re -from google import genai +from typing import Any +import google.genai as genai from google.genai import types from openai import APIStatusError, OpenAI +from openai.types.chat import ChatCompletion +from api.interface import GoogleConfig +from providers.provider_base import ( + BaseProvider, + ProviderCapability, + capabilities, + LlmProvider, +) from services.printr import Printr printr = Printr() -class GoogleGenAI: - def __init__(self, api_key: str): +@capabilities(ProviderCapability.LLM) +class GoogleGenAI(BaseProvider, LlmProvider): + """Google Gemini provider supporting LLM capabilities. + + Uses Google's Generative AI API with OpenAI-compatible interface. + """ + + def __init__(self, config: GoogleConfig, api_key: str): + BaseProvider.__init__(self, config=config, api_key=api_key) + self.client = genai.Client( api_key=api_key, http_options=types.HttpOptions(api_version="v1alpha"), @@ -80,13 +97,178 @@ def get_minimal_reasoning_by_model(self, model_name: str) -> dict: # Don't send reasoning_effort unless we know it's supported. return {} - def ask( - self, - messages: list[dict[str, str]], - model: str, - stream: bool = False, - tools: list[dict[str, any]] = None, - ): + def _sanitize_messages( + self, messages: list[Any], model_name: str | None + ) -> list[dict[str, Any]]: + """Sanitize messages for Google Gemini OpenAI-compatible endpoint. + + Google's OpenAI-compatible endpoint is stricter than OpenAI's: + - `content` must not be null (use empty string) + - tool-related fields should be preserved as-is + + Wingman may pass either dicts or OpenAI message objects; normalize both. + """ + + # Gemini 3 models reject certain synthetic tool-call structures we use + # to emulate "instant activation" command executions in history. + # We strip ONLY the forced execute_command tool-call messages that + # are immediately completed with an "OK" tool response. + strip_forced_instant_tool_calls = bool( + model_name + and model_name.lower().startswith("gemini-3") + and "gemini" in model_name.lower() + ) + + forced_tool_call_ids: set[str] = set() + sanitized: list[dict[str, Any]] = [] + for msg in messages: + if isinstance(msg, dict): + msg_copy = msg.copy() + if msg_copy.get("content") is None: + msg_copy["content"] = "" + + # Identify forced instant tool-calls to strip for Gemini 3 + if strip_forced_instant_tool_calls: + tool_calls = msg_copy.get("tool_calls") + if ( + msg_copy.get("role") == "assistant" + and ( + msg_copy.get("content") == "" + or msg_copy.get("content") is None + ) + and tool_calls + ): + try: + is_execute_command_only = True + for tc in tool_calls: + fn = ( + tc.get("function") + if isinstance(tc, dict) + else getattr(tc, "function", None) + ) + fn_name = None + if isinstance(fn, dict): + fn_name = fn.get("name") + else: + fn_name = getattr(fn, "name", None) + if fn_name != "execute_command": + is_execute_command_only = False + break + if is_execute_command_only: + # collect ids from all tool calls + for tc in tool_calls: + tc_id = ( + tc.get("id") + if isinstance(tc, dict) + else getattr(tc, "id", None) + ) + if tc_id: + forced_tool_call_ids.add(str(tc_id)) + continue + except (TypeError, AttributeError, ValueError): + pass + + if ( + msg_copy.get("role") == "tool" + and msg_copy.get("content") == "OK" + and msg_copy.get("tool_call_id") in forced_tool_call_ids + ): + continue + + sanitized.append(msg_copy) + continue + + msg_dict: dict[str, Any] = { + "role": msg.role if hasattr(msg, "role") else msg.get("role"), + "content": ( + msg.content if hasattr(msg, "content") else msg.get("content") + ), + } + + if msg_dict.get("content") is None: + msg_dict["content"] = "" + + tool_calls = getattr(msg, "tool_calls", None) + if tool_calls: + msg_dict["tool_calls"] = tool_calls + elif isinstance(msg, dict) and "tool_calls" in msg: + msg_dict["tool_calls"] = msg["tool_calls"] + + if strip_forced_instant_tool_calls and msg_dict.get("role") == "assistant": + # Skip assistant forced tool-call messages entirely + if msg_dict.get("tool_calls") and msg_dict.get("content") == "": + try: + is_execute_command_only = True + for tc in msg_dict["tool_calls"]: + fn = ( + tc.get("function") + if isinstance(tc, dict) + else getattr(tc, "function", None) + ) + fn_name = None + if isinstance(fn, dict): + fn_name = fn.get("name") + else: + fn_name = getattr(fn, "name", None) + if fn_name != "execute_command": + is_execute_command_only = False + break + if is_execute_command_only: + for tc in msg_dict["tool_calls"]: + tc_id = ( + tc.get("id") + if isinstance(tc, dict) + else getattr(tc, "id", None) + ) + if tc_id: + forced_tool_call_ids.add(str(tc_id)) + continue + except (TypeError, AttributeError, ValueError): + pass + + tool_call_id = getattr(msg, "tool_call_id", None) + if tool_call_id: + msg_dict["tool_call_id"] = tool_call_id + elif isinstance(msg, dict) and "tool_call_id" in msg: + msg_dict["tool_call_id"] = msg["tool_call_id"] + + if strip_forced_instant_tool_calls and msg_dict.get("role") == "tool": + if ( + msg_dict.get("content") == "OK" + and msg_dict.get("tool_call_id") in forced_tool_call_ids + ): + continue + + name = getattr(msg, "name", None) + if name: + msg_dict["name"] = name + elif isinstance(msg, dict) and "name" in msg: + msg_dict["name"] = msg["name"] + + sanitized.append(msg_dict) + + return sanitized + + # Protocol implementation: LlmProvider + async def complete( + self, messages: list[dict], tools: list[dict] = None, **kwargs + ) -> ChatCompletion | None: + """Generate completion using Google Gemini. + + Args: + messages: List of message dicts with 'role' and 'content' + tools: Optional list of tool definitions for function calling + **kwargs: Additional parameters (model, stream, etc.) + + Returns: + ChatCompletion object from Google's OpenAI-compatible API, or None on error + """ + model = kwargs.get("model", self.config.conversation_model) + stream = kwargs.get("stream", False) + + messages = self._sanitize_messages(messages, model) + + # Direct implementation - no legacy method needed try: reasoning_params = self.get_minimal_reasoning_by_model(model) if not tools: diff --git a/providers/hume.py b/providers/hume.py index 902493e5..d5e7a6dc 100644 --- a/providers/hume.py +++ b/providers/hume.py @@ -13,6 +13,12 @@ VoiceInfo, WingmanInitializationError, ) +from providers.provider_base import ( + BaseProvider, + ProviderCapability, + capabilities, + TtsProvider, +) from services.audio_player import AudioPlayer from services.file import get_writable_dir from services.printr import Printr @@ -22,8 +28,12 @@ OUTPUT_FILE: str = "hume.mp3" -class Hume: - def __init__(self, api_key: str, wingman_name: str): +@capabilities(ProviderCapability.TTS) +class Hume(BaseProvider, TtsProvider): + """Hume AI TTS provider with emotional expression.""" + + def __init__(self, config: HumeConfig, api_key: str, wingman_name: str): + BaseProvider.__init__(self, config=config, api_key=api_key) self.hume = AsyncHumeClient(api_key=api_key) self.wingman_name = wingman_name self.secret_keeper = SecretKeeper() @@ -39,14 +49,28 @@ def validate_config( ): return errors - async def play_audio( + # Protocol implementation: TtsProvider + async def synthesize( self, text: str, - config: HumeConfig, - sound_config: SoundConfig, audio_player: AudioPlayer, + sound_config: SoundConfig, wingman_name: str, - ): + **kwargs + ) -> None: + """Synthesize speech using Hume AI. + + Args: + text: Text to convert to speech + audio_player: AudioPlayer instance for playback + sound_config: Sound configuration + wingman_name: Name of wingman + **kwargs: Unused (kept for protocol compatibility) + + Returns: + None - Audio is played directly via audio_player + """ + config = self.config speech = await self.hume.tts.synthesize_json( utterances=[ PostedUtterance( diff --git a/providers/inworld.py b/providers/inworld.py index 4a641c7d..2b70b91c 100644 --- a/providers/inworld.py +++ b/providers/inworld.py @@ -7,13 +7,18 @@ import time import requests import aiofiles -from api.enums import LogType from api.interface import ( SoundConfig, VoiceInfo, WingmanInitializationError, InworldConfig, ) +from providers.provider_base import ( + BaseProvider, + ProviderCapability, + capabilities, + TtsProvider, +) from services.audio_player import AudioPlayer from services.file import get_writable_dir from services.printr import Printr @@ -23,8 +28,12 @@ OUTPUT_FILE: str = "inworld.mp3" -class Inworld: - def __init__(self, api_key: str, wingman_name: str): +@capabilities(ProviderCapability.TTS) +class Inworld(BaseProvider, TtsProvider): + """Inworld AI TTS provider with emotional expression and streaming.""" + + def __init__(self, config: InworldConfig, api_key: str, wingman_name: str): + BaseProvider.__init__(self, config=config, api_key=api_key) self.wingman_name = wingman_name self.secret_keeper = SecretKeeper() self.printr = Printr() @@ -43,14 +52,29 @@ def _to_camel_case(self, snake_str: str) -> str: components = snake_str.split("_") return components[0] + "".join(x.title() for x in components[1:]) - async def play_audio( + # Protocol implementation: TtsProvider + async def synthesize( self, text: str, - config: InworldConfig, - sound_config: SoundConfig, audio_player: AudioPlayer, + sound_config: SoundConfig, wingman_name: str, - ): # Prepare audio config - override encoding for streaming + **kwargs, + ) -> None: + """Synthesize speech using Inworld AI with streaming support. + + Args: + text: Text to convert to speech + audio_player: AudioPlayer instance for playback + sound_config: Sound configuration + wingman_name: Name of wingman + **kwargs: Unused (kept for protocol compatibility) + + Returns: + None - Audio is played directly via audio_player + """ + config = self.config + # Prepare audio config - override encoding for streaming # Convert snake_case keys to camelCase for the API audio_config = { self._to_camel_case(k): v diff --git a/providers/open_ai.py b/providers/open_ai.py index a177fadc..04042c9b 100644 --- a/providers/open_ai.py +++ b/providers/open_ai.py @@ -1,21 +1,31 @@ from abc import ABC, abstractmethod import json import re -from typing import Literal, Mapping, Union +from typing import Literal import httpx -from openai import NOT_GIVEN, NotGiven, Omit, OpenAI, APIStatusError, AzureOpenAI +from openai import NOT_GIVEN, OpenAI, APIStatusError, AzureOpenAI +from openai.types.chat import ChatCompletion import azure.cognitiveservices.speech as speechsdk from api.enums import AzureRegion, LogType +from services.openai_utils import get_minimal_reasoning_by_model from api.interface import ( AzureInstanceConfig, AzureSttConfig, AzureTtsConfig, + OpenAiConfig, SoundConfig, VoiceInfo, ) +from providers.provider_base import ( + BaseProvider, + ProviderCapability, + capabilities, + SttProvider, + TtsProvider, + LlmProvider, +) from services.audio_player import AudioPlayer -from services.openai_utils import get_minimal_reasoning_by_model from services.printr import Printr printr = Printr() @@ -66,37 +76,6 @@ def _perform_transcription( return None - def get_minimal_reasoning_by_model(self, model_name: str) -> dict: - """ - Returns the minimal reasoning effort setting based on the model name. - This helps reduce latency by setting the lowest supported reasoning effort. - See https://platform.openai.com/docs/api-reference/chat/create#chat_create-reasoning_effort - - Args: - model_name: The name of the OpenAI model - - Returns: - dict: Dictionary with reasoning_effort key if applicable, empty dict otherwise - """ - # Models that don't support reasoning effort parameter - if model_name in ["o1-mini", "gpt-5.2-chat-latest"]: - return {} - - # o-series models (o1, o3, etc.) support "low" as minimal - if model_name.startswith("o"): - return {"reasoning_effort": "low"} - - # gpt-5.x models (5.1, 5.2, etc.) support "none" as minimal - if model_name.startswith("gpt-5."): - return {"reasoning_effort": "none"} - - # gpt-5 base models support "minimal" as lowest effort - if model_name.startswith("gpt-5"): - return {"reasoning_effort": "minimal"} - - # Other models don't support reasoning effort - return {} - def _perform_ask( self, client: OpenAI | AzureOpenAI, @@ -107,9 +86,7 @@ def _perform_ask( ): try: # Get minimal reasoning effort for the model to reduce latency - reasoning_params = ( - self.get_minimal_reasoning_by_model(model) if model else {} - ) + reasoning_params = get_minimal_reasoning_by_model(model) if model else {} if not tools: completion = client.chat.completions.create( @@ -136,15 +113,25 @@ def _perform_ask( return None -class OpenAi(BaseOpenAi): +@capabilities(ProviderCapability.STT, ProviderCapability.TTS, ProviderCapability.LLM) +class OpenAi(BaseProvider, BaseOpenAi, SttProvider, TtsProvider, LlmProvider): + """OpenAI provider supporting STT (Whisper), TTS, and LLM (GPT models). + + This provider implements all three capabilities using OpenAI's API. + """ + def __init__( self, - api_key: str = "", - organization: str | None = None, + config: OpenAiConfig, + api_key: str, base_url: str | None = None, + organization: str | None = None, ): - super().__init__() - self.api_key = api_key + # Initialize BaseProvider with config and api_key + BaseProvider.__init__(self, config=config, api_key=api_key) + # Initialize BaseOpenAi (no __init__ but needed for MRO) + BaseOpenAi.__init__(self) + self.client = self._create_client( api_key=api_key, organization=organization, @@ -164,18 +151,40 @@ def _create_client( base_url=base_url, ) - def transcribe(self, filename: str, model: str = "whisper-1"): - return self._perform_transcription( - client=self.client, filename=filename, model=model + # Protocol implementation: SttProvider + async def transcribe(self, audio_input_wav: str, **kwargs) -> str: + """Transcribe audio file to text using Whisper. + + Args: + audio_input_wav: Path to WAV audio file + **kwargs: Additional parameters (model, prompt, language) + + Returns: + Transcribed text string or None on failure + """ + model = kwargs.get("model", "whisper-1") + result = self._perform_transcription( + client=self.client, filename=audio_input_wav, model=model ) + return result.text if result else None + + # Protocol implementation: LlmProvider + async def complete( + self, messages: list[dict], tools: list[dict] = None, **kwargs + ) -> ChatCompletion | None: + """Generate completion using GPT models. + + Args: + messages: List of message dicts with 'role' and 'content' + tools: Optional list of tool definitions for function calling + **kwargs: Additional parameters (model, temperature, stream, etc.) + + Returns: + ChatCompletion object from OpenAI API, or None on error + """ + model = kwargs.get("model", self.config.conversation_model) + stream = kwargs.get("stream", False) - def ask( - self, - messages: list[dict[str, str]], - model: str = None, - stream: bool = False, - tools: list[dict[str, any]] = None, - ): return self._perform_ask( client=self.client, messages=messages, @@ -184,18 +193,32 @@ def ask( tools=tools, ) - async def play_audio( + # Protocol implementation: TtsProvider + async def synthesize( self, text: str, - voice: str, - model: str, - speed: float, - sound_config: SoundConfig, audio_player: AudioPlayer, + sound_config: SoundConfig, wingman_name: str, - stream: bool, - ): - # instructions = config.instructions # Instructions are for gpt-4o-mini-tts model only + **kwargs, + ) -> None: + """Synthesize speech from text using OpenAI TTS. + + Args: + text: Text to convert to speech + audio_player: AudioPlayer instance for playback + sound_config: Sound configuration with voice settings + wingman_name: Name of wingman (for audio file naming) + **kwargs: Additional parameters (voice, model, speed, stream) + + Returns: + None - Audio is played directly via audio_player + """ + voice = kwargs.get("voice", self.config.tts_voice) + model = kwargs.get("model", self.config.tts_model) + speed = kwargs.get("speed", self.config.tts_speed) + stream = kwargs.get("stream", self.config.output_streaming) + try: if not stream: # Non-streaming implementation @@ -204,7 +227,6 @@ async def play_audio( model=model, voice=voice, speed=speed, - # instructions=instructions, ) if response is not None: await audio_player.play_with_effects( @@ -220,33 +242,18 @@ async def play_audio( voice=voice, speed=speed, response_format="pcm", - # instructions=instructions, ) as response: - # Create an iterator for the audio chunks. We can set the chunk size here. audio_stream_iterator = response.iter_bytes(chunk_size=1024) - # This callback is passed to the audio_player and called repeatedly to fill its buffer. def buffer_callback(audio_buffer): - """ - Fetches the next chunk from the audio stream and loads it - into the player's buffer. - """ try: - # Get the next chunk of audio data from the iterator chunk = next(audio_stream_iterator) chunk_size = len(chunk) - - # Copy the received audio data into the buffer provided by the audio player audio_buffer[:chunk_size] = chunk - - # Return the number of bytes written return chunk_size except StopIteration: - # When the iterator is exhausted, it raises StopIteration. - # We catch it and return 0 to signal the end of the stream. return 0 - # OpenAI's PCM output is 24kHz, 16-bit, single-channel. await audio_player.stream_with_effects( buffer_callback=buffer_callback, config=sound_config, @@ -256,14 +263,143 @@ def buffer_callback(audio_buffer): channels=1, use_gain_boost=True, # All streaming PCM TTS providers need this ) - except APIStatusError as e: self._handle_api_error(e) - except UnicodeEncodeError: - self._handle_key_error() -class OpenAiAzure(BaseOpenAi): +@capabilities(ProviderCapability.LLM) +class OpenRouter(OpenAi): + """OpenRouter provider extending OpenAi with tool support detection. + + OpenRouter models have varying tool/function calling support. This provider + checks the model's capabilities during initialization and automatically + strips tools from requests when the model doesn't support them. + """ + + def __init__( + self, + config: OpenAiConfig, + api_key: str, + base_url: str | None = None, + organization: str | None = None, + ): + # Initialize parent OpenAi class + super().__init__( + config=config, + api_key=api_key, + base_url=base_url, + organization=organization, + ) + + # Check if the configured model supports tools + self.model_supports_tools = False + self._check_tool_support() + + def _check_tool_support(self): + """Check if the configured OpenRouter model supports tools.""" + import requests + + model_id = self.config.conversation_model + if not model_id: + return + + try: + response = requests.get( + url=f"https://openrouter.ai/api/v1/models/{model_id}/endpoints", + timeout=10, + ) + response.raise_for_status() + + # Parse the endpoint capabilities + from api.interface import OpenRouterEndpointResult + + content = response.json() + result = OpenRouterEndpointResult(**content.get("data", {})) + + # Check if any endpoint supports both tools and tool_choice parameters + self.model_supports_tools = any( + all( + p in endpoint.supported_parameters for p in ["tools", "tool_choice"] + ) + for endpoint in result.endpoints + ) + + if not self.model_supports_tools: + printr.print( + f"OpenRouter model {model_id} does not support tools, they will be omitted from calls.", + server_only=True, + ) + except Exception as e: + printr.print( + f"Failed to check OpenRouter tool support: {str(e)}", + server_only=True, + ) + + async def complete( + self, messages: list[dict], tools: list[dict] = None, **kwargs + ) -> ChatCompletion | None: + """Generate completion, automatically stripping tools if model doesn't support them. + + Args: + messages: List of message dicts with 'role' and 'content' + tools: Optional list of tool definitions for function calling + **kwargs: Additional parameters (model, temperature, stream, etc.) + + Returns: + ChatCompletion object from OpenAI API, or None on error + """ + # Strip tools if model doesn't support them + if not self.model_supports_tools and tools: + tools = None + + # Call parent implementation + return await super().complete(messages=messages, tools=tools, **kwargs) + + +@capabilities(ProviderCapability.STT, ProviderCapability.TTS, ProviderCapability.LLM) +class OpenAiAzure(BaseProvider, BaseOpenAi, SttProvider, TtsProvider, LlmProvider): + """Azure provider supporting STT (Whisper + Speech), TTS, and LLM. + + Azure has multiple services that can use different API keys: + - Whisper STT (uses Azure OpenAI instance) + - Speech STT (uses Azure Cognitive Services) + - TTS (uses Azure Cognitive Services) + - LLM (uses Azure OpenAI instance) + + This provider stores all configs and keys, selecting the appropriate one + based on which method is called. + """ + + def __init__( + self, + config, # Can be AzureConfig or just the parent config object + whisper_api_key: str = None, + speech_api_key: str = None, + tts_api_key: str = None, + llm_api_key: str = None, + ): + # Initialize BaseProvider (config can be complex Azure config) + BaseProvider.__init__(self, config=config, api_key=None) + BaseOpenAi.__init__(self) + + # Store individual service keys + self.whisper_api_key = whisper_api_key + self.speech_api_key = speech_api_key + self.tts_api_key = tts_api_key + self.llm_api_key = llm_api_key + + # Extract Azure-specific configs if available + if hasattr(config, "azure") and config.azure: + self.instance_config = ( + config.azure.instance if hasattr(config.azure, "instance") else None + ) + self.stt_config = config.azure.stt if hasattr(config.azure, "stt") else None + self.tts_config = config.azure.tts if hasattr(config.azure, "tts") else None + else: + self.instance_config = None + self.stt_config = None + self.tts_config = None + def _create_client(self, api_key: str, config: AzureInstanceConfig): """Create an AzureOpenAI client with the given parameters.""" return AzureOpenAI( @@ -273,82 +409,122 @@ def _create_client(self, api_key: str, config: AzureInstanceConfig): azure_deployment=config.deployment_name, ) - def transcribe_whisper( - self, - filename: str, - api_key: str, - config: AzureInstanceConfig, - model: str = "whisper-1", - ): - azure_client = self._create_client(api_key=api_key, config=config) - return self._perform_transcription( - client=azure_client, - filename=filename, - model=model, - ) + # Protocol implementation: SttProvider + async def transcribe(self, audio_input_wav: str, **kwargs) -> str: + """Transcribe audio using Azure Whisper or Speech service. - def transcribe_azure_speech( - self, filename: str, api_key: str, config: AzureSttConfig - ): - speech_config = speechsdk.SpeechConfig( - subscription=api_key, - region=config.region.value, - ) - audio_config = speechsdk.AudioConfig(filename=filename) + Args: + audio_input_wav: Path to WAV audio file + **kwargs: Additional parameters + - use_speech: If True, use Azure Speech service instead of Whisper + - model: Model name for Whisper + + Returns: + Transcribed text string or None on failure + """ + use_speech = kwargs.get("use_speech", False) - auto_detect_source_language_config = ( - ( + if use_speech and self.speech_api_key and self.stt_config: + # Use Azure Speech STT + speech_config = speechsdk.SpeechConfig( + subscription=self.speech_api_key, + region=self.stt_config.region.value, + ) + audio_config = speechsdk.AudioConfig(filename=audio_input_wav) + + auto_detect_source_language_config = ( speechsdk.languageconfig.AutoDetectSourceLanguageConfig( - languages=config.languages + languages=self.stt_config.languages ) + if len(self.stt_config.languages) > 1 + else None ) - if len(config.languages) > 1 - else None - ) - language = config.languages[0] if len(config.languages) == 1 else None + language = ( + self.stt_config.languages[0] + if len(self.stt_config.languages) == 1 + else None + ) - speech_recognizer = speechsdk.SpeechRecognizer( - speech_config=speech_config, - audio_config=audio_config, - language=language, - auto_detect_source_language_config=auto_detect_source_language_config, - ) - return speech_recognizer.recognize_once_async().get() + speech_recognizer = speechsdk.SpeechRecognizer( + speech_config=speech_config, + audio_config=audio_config, + language=language, + auto_detect_source_language_config=auto_detect_source_language_config, + ) + result = speech_recognizer.recognize_once_async().get() + return result.text if result and hasattr(result, "text") else None + else: + # Use Azure Whisper STT + if self.whisper_api_key and self.instance_config: + model = kwargs.get("model", "whisper-1") + whisper_client = self._create_client( + api_key=self.whisper_api_key, config=self.instance_config + ) + result = self._perform_transcription( + client=whisper_client, + filename=audio_input_wav, + model=model, + ) + return result.text if result else None + return None - def ask( - self, - messages: list[dict[str, str]], - api_key: str, - config: AzureInstanceConfig, - stream: bool = False, - tools: list[dict[str, any]] = None, - ): - azure_client = self._create_client(api_key=api_key, config=config) - return self._perform_ask( - client=azure_client, - messages=messages, - # Azure uses the deployment name as the model - model=config.deployment_name, - stream=stream, - tools=tools, - ) + # Protocol implementation: LlmProvider + async def complete( + self, messages: list[dict], tools: list[dict] = None, **kwargs + ) -> ChatCompletion | None: + """Generate completion using Azure GPT models. - async def play_audio( + Args: + messages: List of message dicts with 'role' and 'content' + tools: Optional list of tool definitions for function calling + **kwargs: Additional parameters (model, temperature, stream, etc.) + + Returns: + ChatCompletion object from Azure OpenAI API, or None on error + """ + stream = kwargs.get("stream", False) + if self.llm_api_key and self.instance_config: + azure_client = self._create_client( + api_key=self.llm_api_key, config=self.instance_config + ) + return self._perform_ask( + client=azure_client, + messages=messages, + model=self.instance_config.deployment_name, + stream=stream, + tools=tools, + ) + return None + + # Protocol implementation: TtsProvider + async def synthesize( self, text: str, - api_key: str, - config: AzureTtsConfig, - sound_config: SoundConfig, audio_player: AudioPlayer, + sound_config: SoundConfig, wingman_name: str, - ): + **kwargs, + ) -> None: + """Synthesize speech from text using Azure TTS. + + Args: + text: Text to convert to speech + audio_player: AudioPlayer instance for playback + sound_config: Sound configuration with voice settings + wingman_name: Name of wingman (for audio file naming) + + Returns: + None - Audio is played directly via audio_player + """ + if not self.tts_api_key or not self.tts_config: + return + speech_config = speechsdk.SpeechConfig( - subscription=api_key, - region=config.region.value, + subscription=self.tts_api_key, + region=self.tts_config.region.value, ) - - speech_config.speech_synthesis_voice_name = config.voice + speech_config.speech_synthesis_voice_name = self.tts_config.voice speech_synthesizer = speechsdk.SpeechSynthesizer( speech_config=speech_config, @@ -357,25 +533,25 @@ async def play_audio( result = ( speech_synthesizer.start_speaking_text_async(text).get() - if config.output_streaming + if self.tts_config.output_streaming else speech_synthesizer.speak_text_async(text).get() ) - def buffer_callback(audio_buffer): - buffer = bytes(2048) - size = audio_data_stream.read_data(buffer) - audio_buffer[:size] = buffer - return size - if result is not None: - if config.output_streaming: + if self.tts_config.output_streaming: audio_data_stream = speechsdk.AudioDataStream(result) + def buffer_callback(audio_buffer): + buffer = bytes(2048) + size = audio_data_stream.read_data(buffer) + audio_buffer[:size] = buffer + return size + await audio_player.stream_with_effects( buffer_callback, sound_config, - wingman_name=wingman_name, - use_gain_boost=True, # "Azure Streaming" low gain workaround + wingman_name, + use_gain_boost=True, ) else: await audio_player.play_with_effects( @@ -400,7 +576,13 @@ def get_available_voices(self, api_key: str, region: AzureRegion, locale: str = return None -class OpenAiCompatibleTts: +@capabilities(ProviderCapability.TTS) +class OpenAiCompatibleTts(BaseProvider, TtsProvider): + """OpenAI-compatible TTS provider. + + Works with any TTS API that follows OpenAI's speech endpoint format. + """ + def __init__( self, api_key: str, @@ -409,6 +591,12 @@ def __init__( super().__init__() self._api_key = api_key self._base_url = base_url + # Create a minimal config object for BaseProvider + from types import SimpleNamespace + + config = SimpleNamespace(base_url=base_url) + BaseProvider.__init__(self, config=config, api_key=api_key) + self.client = OpenAI( api_key=api_key, base_url=base_url, @@ -480,24 +668,33 @@ async def get_available_voices( ) return [] - async def play_audio( + # Protocol implementation: TtsProvider + async def synthesize( self, text: str, - voice: str, - model: str, - sound_config: SoundConfig, audio_player: AudioPlayer, + sound_config: SoundConfig, wingman_name: str, - stream: bool, - speed: float | NotGiven = NOT_GIVEN, - response_format: ( - NotGiven | Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] - ) = NOT_GIVEN, - extra_headers: Mapping[str, Union[str, Omit]] | None = None, - ): - # instructions = config.instructions # No current open source model supports this but adding for full compatibility + **kwargs, + ) -> None: + """Synthesize speech from text using OpenAI-compatible API. + + Args: + text: Text to convert to speech + audio_player: AudioPlayer instance for playback + sound_config: Sound configuration with voice settings + wingman_name: Name of wingman (for audio file naming) + **kwargs: Additional parameters (voice, model, speed, stream, response_format, extra_headers) - # Should sample rate and response format be configurable in UI to ensure widest compatibilty? + Returns: + None - Audio is played directly via audio_player + """ + voice = kwargs.get("voice", "alloy") + model = kwargs.get("model", "tts-1") + speed = kwargs.get("speed", NOT_GIVEN) + stream = kwargs.get("stream", False) + response_format = kwargs.get("response_format", NOT_GIVEN) + extra_headers = kwargs.get("extra_headers", None) try: if not stream: @@ -508,7 +705,6 @@ async def play_audio( voice=voice, speed=speed, response_format=response_format, - # instructions=instructions, extra_headers=extra_headers, ) if response is not None: @@ -525,32 +721,17 @@ async def play_audio( voice=voice, speed=speed, response_format="pcm", - # instructions=instructions, extra_headers=extra_headers, ) as response: - # Create an iterator for the audio chunks. We can set the chunk size here. audio_stream_iterator = response.iter_bytes(chunk_size=1024) - # This callback is passed to the audio_player and called repeatedly - # to fill its buffer. def buffer_callback(audio_buffer): - """ - Fetches the next chunk from the audio stream and loads it - into the player's buffer. - """ try: - # Get the next chunk of audio data from the iterator chunk = next(audio_stream_iterator) chunk_size = len(chunk) - - # Copy the received audio data into the buffer provided by the audio player audio_buffer[:chunk_size] = chunk - - # Return the number of bytes written return chunk_size except StopIteration: - # When the iterator is exhausted, it raises StopIteration. - # We catch it and return 0 to signal the end of the stream. return 0 await audio_player.stream_with_effects( @@ -562,7 +743,6 @@ def buffer_callback(audio_buffer): channels=1, use_gain_boost=True, # All streaming PCM TTS providers need this ) - except APIStatusError as e: printr.toast_error( f"OpenAI-compatible TTS error: {e.status_code} ({e.type})" diff --git a/providers/provider_base.py b/providers/provider_base.py new file mode 100644 index 00000000..fe342504 --- /dev/null +++ b/providers/provider_base.py @@ -0,0 +1,215 @@ +"""Provider capability system and base classes. + +This module defines the foundation for the modular provider architecture: +- ProviderCapability enum for declaring provider abilities +- @capabilities decorator for marking provider classes +- Protocol classes defining provider interfaces +- BaseProvider abstract class with common functionality +""" + +from abc import ABC +from enum import Enum +from typing import Protocol, runtime_checkable, Any +from openai.types.chat import ChatCompletion + + +class ProviderCapability(Enum): + """Capabilities that providers can offer.""" + + STT = "speech_to_text" + TTS = "text_to_speech" + LLM = "language_model" + IMAGE_GEN = "image_generation" + + +def capabilities(*caps: ProviderCapability): + """Decorator to mark provider classes with their capabilities. + + Usage: + @capabilities(ProviderCapability.STT, ProviderCapability.TTS) + class MyProvider(BaseProvider): + pass + + Args: + *caps: Variable number of ProviderCapability values + + Returns: + Decorated class with _capabilities attribute + """ + + def decorator(cls): + cls._capabilities = set(caps) + return cls + + return decorator + + +@runtime_checkable +class SttProvider(Protocol): + """Protocol for Speech-to-Text providers. + + Providers implementing this protocol can transcribe audio to text. + All implementations must be async and support **kwargs for flexibility. + """ + + async def transcribe(self, audio_input_wav: str, **kwargs) -> str: + """Transcribe audio file to text. + + Args: + audio_input_wav: Path to WAV audio file + **kwargs: Additional provider-specific parameters + - prompt: Optional transcription hint/context + - language: Optional language code + - model: Optional model override + + Returns: + Transcribed text string + + Raises: + Exception: On transcription failure + """ + ... + + +@runtime_checkable +class TtsProvider(Protocol): + """Protocol for Text-to-Speech providers. + + Providers implementing this protocol can synthesize speech from text. + All implementations must be async and support **kwargs for flexibility. + """ + + async def synthesize( + self, text: str, audio_player, sound_config, wingman_name: str, **kwargs + ) -> None: + """Synthesize speech from text. + + Args: + text: Text to convert to speech + audio_player: AudioPlayer instance for playback + sound_config: Sound configuration with voice settings + wingman_name: Name of wingman (for audio file naming) + **kwargs: Additional provider-specific parameters + - voice: Optional voice override + - model: Optional model override + - speed: Optional speed multiplier + - effects: Optional audio effects + + Returns: + None - Audio is played directly via audio_player + + Raises: + Exception: On synthesis failure + """ + ... + + +@runtime_checkable +class ImageGenProvider(Protocol): + """Protocol for Image Generation providers. + + Providers implementing this protocol can generate images from text descriptions. + All implementations must be async and support **kwargs for flexibility. + """ + + async def generate_image(self, prompt: str, **kwargs) -> str: + """Generate an image from a text description. + + Args: + prompt: Text description of the image to generate + **kwargs: Additional provider-specific parameters + - size: Optional image size (e.g., "1024x1024") + - quality: Optional quality setting + - style: Optional style parameter + - model: Optional model override + + Returns: + URL or local path to the generated image + + Raises: + Exception: On generation failure + """ + ... + + +@runtime_checkable +class LlmProvider(Protocol): + """Protocol for Language Model providers. + + Providers implementing this protocol can generate text completions. + All implementations must be async and support **kwargs for flexibility. + """ + + async def complete( + self, messages: list[dict], tools: list[dict] = None, **kwargs + ) -> ChatCompletion | None: + """Generate completion from messages. + + Args: + messages: List of message dicts with 'role' and 'content' + tools: Optional list of tool definitions for function calling + **kwargs: Additional provider-specific parameters + - temperature: Optional temperature override + - max_tokens: Optional token limit + - model: Optional model override + - stream: Optional streaming flag + - response_format: Optional response format (e.g., JSON) + + Returns: + ChatCompletion object from OpenAI (or compatible) API, or None on error. + All providers return OpenAI-compatible ChatCompletion objects. + + Raises: + Exception: On completion failure + """ + ... + + +class BaseProvider(ABC): + """Abstract base class for all providers. + + Provides common functionality for capability checking and configuration. + All provider implementations should inherit from this class and use the + @capabilities decorator to declare their abilities. + + Attributes: + config: Provider-specific configuration object (typed per provider) + api_key: Optional API key for authentication + """ + + def __init__(self, config: Any, api_key: str = None): + """Initialize base provider. + + Args: + config: Provider-specific configuration object + api_key: Optional API key for authentication + """ + self.config = config + self.api_key = api_key + + @classmethod + def get_capabilities(cls) -> set[ProviderCapability]: + """Get the capabilities this provider supports. + + Returns: + Set of ProviderCapability values declared via @capabilities decorator + Returns empty set if no capabilities declared + """ + return getattr(cls, "_capabilities", set()) + + @classmethod + def supports(cls, capability: ProviderCapability) -> bool: + """Check if this provider supports a specific capability. + + Args: + capability: ProviderCapability to check + + Returns: + True if provider supports the capability, False otherwise + """ + return capability in cls.get_capabilities() + + def __repr__(self) -> str: + """String representation showing provider class and capabilities.""" + caps = ", ".join(c.value for c in self.get_capabilities()) + return f"<{self.__class__.__name__} capabilities=[{caps}]>" diff --git a/providers/whispercpp.py b/providers/whispercpp.py index 81e3b313..44afd39d 100644 --- a/providers/whispercpp.py +++ b/providers/whispercpp.py @@ -6,18 +6,52 @@ WhispercppTranscript, WingmanInitializationError, ) +from providers.provider_base import ( + BaseProvider, + ProviderCapability, + capabilities, + SttProvider, +) from services.printr import Printr -class Whispercpp: +@capabilities(ProviderCapability.STT) +class Whispercpp(BaseProvider, SttProvider): def __init__( self, - settings: WhispercppSettings, + config: WhispercppSettings, + api_key: str = None, # Not used but required by BaseProvider ): - self.settings = settings + BaseProvider.__init__(self, config=config, api_key=api_key) + self.settings = config # Alias for backward compatibility self.printr = Printr() - def transcribe( + # Protocol implementation: SttProvider + async def transcribe(self, filename: str, **kwargs) -> str: + """Transcribe audio using whispercpp server. + + Args: + filename: Path to audio file + **kwargs: May include 'config' (WhispercppSttConfig) + + Returns: + Transcribed text or None on error + """ + # Get config from kwargs or use default + config = kwargs.get("config") + if not config: + # Use default config if not provided + config = WhispercppSttConfig(temperature=0.0) + + result = self._transcribe_sync( + filename=filename, + config=config, + response_format="json", + timeout=10, + ) + return result.text if result else None + + def _transcribe_sync( self, filename: str, config: WhispercppSttConfig, diff --git a/providers/wingman_pro.py b/providers/wingman_pro.py index 25d2a724..0ccb98d1 100644 --- a/providers/wingman_pro.py +++ b/providers/wingman_pro.py @@ -2,29 +2,54 @@ import openai import requests from openai.types.audio import Transcription -from api.enums import CommandTag, LogType +from openai.types.chat import ChatCompletion +from api.enums import CommandTag, LogType, WingmanProSttProvider, WingmanProTtsProvider from api.interface import ( AzureSttConfig, AzureTtsConfig, InworldConfig, SoundConfig, VoiceInfo, + WingmanConfig, WingmanProSettings, ) +from providers.provider_base import ( + BaseProvider, + ProviderCapability, + capabilities, + SttProvider, + TtsProvider, + LlmProvider, +) from services.audio_player import AudioPlayer from services.openai_utils import get_minimal_reasoning_by_model from services.printr import Printr -from services.secret_keeper import SecretKeeper -class WingmanPro: +@capabilities( + ProviderCapability.STT, + ProviderCapability.TTS, + ProviderCapability.LLM, + ProviderCapability.IMAGE_GEN, +) +class WingmanPro(BaseProvider, SttProvider, TtsProvider, LlmProvider): def __init__( - self, wingman_name: str, settings: WingmanProSettings, timeout: int = 120 + self, + wingman_config: WingmanConfig, + provider_settings: WingmanProSettings, + api_key: str, + wingman_name: str, + timeout: int = 120, ): + BaseProvider.__init__(self, config=provider_settings, api_key=api_key) + self.wingman_config: WingmanConfig = ( + wingman_config # Full config for subprovider routing + ) self.wingman_name: str = wingman_name - self.settings: WingmanProSettings = settings + self.settings: WingmanProSettings = ( + provider_settings # Alias for backward compatibility + ) self.printr = Printr() - self.secret_keeper: SecretKeeper = SecretKeeper() self.timeout = timeout def send_unauthorized_error(self): @@ -40,6 +65,111 @@ def send_server_error(self, response: requests.Response): color=LogType.ERROR, ) + # Protocol implementation: SttProvider + async def transcribe(self, audio_input_wav: str, **kwargs) -> str: + """Transcribe audio using WingmanPro backend. + + Args: + audio_input_wav: Path to WAV audio file + **kwargs: Unused (kept for protocol compatibility) + + Returns: + Transcribed text string + """ + # Read subprovider selection from config + stt_provider = self.wingman_config.wingman_pro.stt_provider + + if stt_provider == WingmanProSttProvider.WHISPER: + result = self.transcribe_whisper(audio_input_wav) + return result.text if result else None + elif stt_provider == WingmanProSttProvider.AZURE_SPEECH: + azure_config = self.wingman_config.azure.stt + result = self.transcribe_azure_speech(audio_input_wav, azure_config) + # Azure Speech returns a dict with "_text" key + return result.get("_text") if result else None + else: + raise ValueError(f"Unsupported WingmanPro STT provider: {stt_provider}") + + # Protocol implementation: TtsProvider + async def synthesize( + self, + text: str, + audio_player: AudioPlayer, + sound_config: SoundConfig, + wingman_name: str, + **kwargs, + ) -> None: + """Synthesize speech using WingmanPro backend. + + Args: + text: Text to convert to speech + audio_player: AudioPlayer instance for playback + sound_config: Sound configuration + wingman_name: Name of wingman + **kwargs: Unused (kept for protocol compatibility) + + Returns: + None - Audio is played directly via audio_player + """ + # Read subprovider selection from config + tts_provider = self.wingman_config.wingman_pro.tts_provider + + if tts_provider == WingmanProTtsProvider.OPENAI: + await self.generate_openai_speech( + text=text, + voice=self.wingman_config.openai.tts_voice, + model=self.wingman_config.openai.tts_model, + speed=self.wingman_config.openai.tts_speed, + sound_config=sound_config, + audio_player=audio_player, + wingman_name=wingman_name, + ) + elif tts_provider == WingmanProTtsProvider.AZURE: + await self.generate_azure_speech( + text=text, + config=self.wingman_config.azure.tts, + sound_config=sound_config, + audio_player=audio_player, + wingman_name=wingman_name, + ) + elif tts_provider == WingmanProTtsProvider.INWORLD: + await self.generate_inworld_speech( + text=text, + config=self.wingman_config.inworld, + sound_config=sound_config, + audio_player=audio_player, + wingman_name=wingman_name, + ) + else: + raise ValueError(f"Unsupported WingmanPro TTS provider: {tts_provider}") + + # Protocol implementation: LlmProvider + async def complete( + self, messages: list[dict], tools: list[dict] = None, **kwargs + ) -> ChatCompletion | None: + """Generate completion using WingmanPro backend. + + Args: + messages: List of message dicts + tools: Optional tool definitions + **kwargs: Unused (kept for protocol compatibility) + + Returns: + ChatCompletion object, or None on error + """ + # Read deployment from config + deployment = self.wingman_config.wingman_pro.conversation_deployment + if not deployment: + raise ValueError("WingmanPro requires 'deployment' parameter in config") + + return self.ask( + messages=messages, + deployment=deployment, + stream=kwargs.get("stream", False), + tools=tools, + ) + + # Legacy methods (keep for backward compatibility) def transcribe_whisper(self, filename: str): with open(filename, "rb") as audio_input: files = {"audio_file": (filename, audio_input)} @@ -446,9 +576,8 @@ def get_available_inworld_voices( return voices def _get_headers(self): - token = self.secret_keeper.secrets.get("wingman_pro", "") return { - "Authorization": f"Bearer {token}", + "Authorization": f"Bearer {self.api_key}", } def __resolve_gender(self, enum_value: int): diff --git a/providers/x_ai.py b/providers/x_ai.py index 1706a1d9..f762e9e2 100644 --- a/providers/x_ai.py +++ b/providers/x_ai.py @@ -1,4 +1,6 @@ from openai import OpenAI, APIStatusError +from openai.types.chat import ChatCompletion + from providers.open_ai import OpenAi @@ -10,7 +12,7 @@ def _perform_ask( stream: bool, tools: list[dict[str, any]], model: str = None, - ): + ) -> ChatCompletion | None: try: if not tools: completion = client.chat.completions.create( diff --git a/services/benchmark.py b/services/benchmark.py index 5ef9a683..4bb83c45 100644 --- a/services/benchmark.py +++ b/services/benchmark.py @@ -51,13 +51,68 @@ def finish_snapshot(self): def _create_benchmark_result(self, label: str, start_time: float): end_time = time.perf_counter() execution_time = (end_time - start_time) * 1000 # Convert to milliseconds - if execution_time >= 1000: - formatted_execution_time = f"{execution_time/1000:.1f}s" - else: - formatted_execution_time = f"{int(execution_time)}ms" + formatted_execution_time = self._format_time(execution_time) return BenchmarkResult( label=label, execution_time_ms=execution_time, formatted_execution_time=formatted_execution_time, ) + + def _format_time(self, time_ms: float) -> str: + """Format time in milliseconds to human-readable string. + + Args: + time_ms: Time in milliseconds + + Returns: + Formatted string (e.g., "1.5s" or "250ms") + """ + if time_ms >= 1000: + return f"{time_ms/1000:.1f}s" + return f"{int(time_ms)}ms" + + def add_snapshot(self, label: str, execution_time_ms: float): + """Add a snapshot with the given label and execution time. + + Args: + label: Description of what was measured + execution_time_ms: Execution time in milliseconds + """ + self.snapshots.append( + BenchmarkResult( + label=label, + execution_time_ms=execution_time_ms, + formatted_execution_time=self._format_time(execution_time_ms), + ) + ) + + def add_tool_execution( + self, + total_time_ms: float, + tool_timings: list[tuple[str, float]], + ): + """Add a tool execution snapshot with nested individual tool timings. + + Args: + total_time_ms: Total time for all tool executions + tool_timings: List of (tool_name, time_ms) tuples + """ + # Create nested snapshots for individual tools + nested_snapshots = [ + BenchmarkResult( + label=label, + execution_time_ms=time_ms, + formatted_execution_time=self._format_time(time_ms), + ) + for label, time_ms in tool_timings + ] + + self.snapshots.append( + BenchmarkResult( + label="Tool Execution", + execution_time_ms=total_time_ms, + formatted_execution_time=self._format_time(total_time_ms), + snapshots=nested_snapshots if nested_snapshots else None, + ) + ) diff --git a/services/command_manager.py b/services/command_manager.py new file mode 100644 index 00000000..cf942cc9 --- /dev/null +++ b/services/command_manager.py @@ -0,0 +1,319 @@ +"""Command Manager Service + +Manages command execution, instant activation matching, and response selection. +Handles all command-related operations for Wingmen. +""" + +import random +import difflib +import traceback +import asyncio +from typing import Optional +import keyboard.keyboard as keyboard +import mouse.mouse as mouse +from api.interface import CommandConfig, SettingsConfig +from api.enums import LogType +from services.printr import Printr +from services.audio_library import AudioLibrary + +printr = Printr() + + +class CommandManager: + """Manages all command-related operations for a Wingman. + + Responsibilities: + - Command lookup and retrieval + - Instant activation phrase matching + - Command execution (keyboard, mouse, audio, joystick) + - Response selection + - Action execution + """ + + def __init__( + self, + wingman_name: str, + audio_library: AudioLibrary, + settings: SettingsConfig, + ): + """Initialize the command manager. + + Args: + wingman_name: Name of the wingman (for logging) + audio_library: Audio library for playing sound effects + settings: User settings + """ + self.wingman_name = wingman_name + self.audio_library = audio_library + self.settings = settings + + def get_command( + self, commands: list[CommandConfig], command_name: str + ) -> Optional[CommandConfig]: + """Get a command by name. + + Args: + commands: List of available commands (from live config) + command_name: Name of the command to retrieve + + Returns: + CommandConfig or None if not found + """ + if not commands: + return None + + return next( + (cmd for cmd in commands if cmd.name == command_name), + None, + ) + + def select_response(self, command: CommandConfig) -> Optional[str]: + """Select a random response from the command's response list. + + Args: + command: The command to get a response from + + Returns: + Random response string or None if no responses configured + """ + if not command.responses or len(command.responses) == 0: + return None + return random.choice(command.responses) + + async def try_instant_activation( + self, + commands: list[CommandConfig], + transcript: str, + ) -> Optional[list[CommandConfig]]: + """Match transcript against instant activation phrases and execute commands. + + Uses fuzzy string matching to find the best matching instant activation + phrase and executes all associated commands. + + Args: + commands: List of available commands (from live config) + transcript: User's spoken text to match against + + Returns: + List of executed commands or None if no match found + """ + try: + # Build phrase-to-command mapping + commands_by_phrase = {} + for command in commands: + if command.instant_activation: + for phrase in command.instant_activation: + phrase_lower = phrase.lower() + if phrase_lower in commands_by_phrase: + commands_by_phrase[phrase_lower].append(command) + else: + commands_by_phrase[phrase_lower] = [command] + + # Find best matching phrase using fuzzy matching + matched_phrases = difflib.get_close_matches( + transcript.lower(), + commands_by_phrase.keys(), + n=1, + cutoff=1, # Exact match required + ) + + if not matched_phrases: + return None + + # Execute all commands for the matched phrase + matched_commands = commands_by_phrase[matched_phrases[0]] + for command in matched_commands: + await self.execute_command(command, is_instant=True) + + return matched_commands + + except Exception as e: + await printr.print_async( + f"Error during instant activation: {str(e)}", + color=LogType.ERROR, + source_name=self.wingman_name, + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + return None + + async def execute_command( + self, + command: CommandConfig, + is_instant: bool = False, + reset_conversation_callback: Optional[callable] = None, + ) -> str: + """Execute a command's actions and return a response. + + Args: + command: Command to execute + is_instant: Whether this is an instant activation command + reset_conversation_callback: Callback for ResetConversationHistory command + + Returns: + Command response string or "Ok" if no response configured + """ + if not command: + return "Command not found" + + try: + if not command.actions or len(command.actions) == 0: + await printr.print_async( + f"No actions found for command: {command.name}", + color=LogType.WARNING, + source_name=self.wingman_name, + ) + else: + await self.execute_actions(command) + await printr.print_async( + f"Executed {'instant' if is_instant else 'AI'} command: {command.name}", + color=LogType.COMMAND, + source_name=self.wingman_name, + ) + + # Handle special system commands + if ( + command.name == "ResetConversationHistory" + and reset_conversation_callback + ): + reset_conversation_callback() + await printr.print_async( + f"Executed command: {command.name}", + color=LogType.COMMAND, + source_name=self.wingman_name, + ) + + return self.select_response(command) or "Ok" + + except Exception as e: + await printr.print_async( + f"Error executing command '{command.name}': {str(e)}", + color=LogType.ERROR, + source_name=self.wingman_name, + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + return "ERROR DURING PROCESSING" + + async def execute_actions(self, command: CommandConfig): + """Execute all actions defined in a command (in order). + + Args: + command: Command containing actions to execute + """ + if not command or not command.actions: + return + + try: + for action in command.actions: + # Keyboard actions + if action.keyboard: + hotkey = action.keyboard.hotkey + do_press = bool(action.keyboard.press) + do_release = bool(action.keyboard.release) + + if do_press and do_release: + keyboard.press_and_release(hotkey) + elif do_press: + keyboard.press(hotkey) + elif do_release: + keyboard.release(hotkey) + elif action.keyboard.hold: + keyboard.press(hotkey) + await asyncio.sleep(action.keyboard.hold) + keyboard.release(hotkey) + else: + keyboard.press_and_release(hotkey) + + # Mouse actions + if action.mouse: + if action.mouse.scroll is not None: + mouse.wheel(delta=action.mouse.scroll) + elif action.mouse.move: + mouse.move( + action.mouse.move[0], + action.mouse.move[1], + absolute=False, + duration=0.2, + ) + elif action.mouse.move_to: + mouse.move( + action.mouse.move_to[0], + action.mouse.move_to[1], + absolute=True, + duration=0.2, + ) + elif action.mouse.button: + if action.mouse.hold: + mouse.press(button=action.mouse.button) + await asyncio.sleep(action.mouse.hold) + mouse.release(button=action.mouse.button) + else: + mouse.click(button=action.mouse.button) + + # Joystick actions + if action.joystick: + await printr.print_async( + "Joystick actions are not yet implemented.", + color=LogType.WARNING, + source_name=self.wingman_name, + ) + + # Audio playback actions + if action.audio: + await self.audio_library.handle_action(action.audio) + + # Write text actions + if action.write: + keyboard.write(action.write) + + # Wait/delay actions + if action.wait: + await asyncio.sleep(action.wait) + + except Exception as e: + await printr.print_async( + f"Error executing actions for command '{command.name}': {str(e)}", + color=LogType.ERROR, + source_name=self.wingman_name, + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + + def get_executable_commands(self, commands: list[CommandConfig]) -> list[str]: + """Get list of command names that can be executed by the AI. + + Filters out instant-activation-only commands and commands without actions. + + Args: + commands: List of available commands (from live config) + + Returns: + List of command names for the AI tool definition + """ + + def has_effective_actions(command: CommandConfig) -> bool: + """Check if command has any meaningful actions to execute.""" + if command.is_system_command: + return True + + if not command.actions: + return False + + for action in command.actions: + if not action: + continue + if ( + action.keyboard is not None + or action.mouse is not None + or action.joystick is not None + or action.audio is not None + or action.write is not None + or action.wait is not None + ): + return True + + return False + + return [ + cmd.name + for cmd in commands + if not cmd.force_instant_activation and has_effective_actions(cmd) + ] diff --git a/services/config_migration_service.py b/services/config_migration_service.py index ba905964..b57f52c7 100644 --- a/services/config_migration_service.py +++ b/services/config_migration_service.py @@ -923,8 +923,11 @@ def migrate( ) except ValidationError as e: self.err(f"Unable to migrate defaults.yaml:\n{str(e)}") - # Wingmen - elif filename.endswith(".yaml"): + # Wingmen (excluding mcp.yaml which is handled separately) + elif filename.endswith(".yaml") and filename not in [ + "mcp.yaml", + "mcp.template.yaml", + ]: self.log_highlight(f"Migrating Wingman {filename}...") # defaults are already migrated because the Wingman config is in a subdirectory try: @@ -1035,6 +1038,8 @@ def migrate( # Handle mcp.yaml - this is a new file in 2.0.0 if migrate_mcp: + self.log_highlight("Migrating mcp.yaml...") + new_mcp_file = path.join(new_config_path, "mcp.yaml") old_mcp_file = path.join(old_config_path, "mcp.yaml") @@ -1051,18 +1056,15 @@ def migrate( if path.exists(old_mcp_file): # mcp.yaml exists in old version - migrate it - self.log_highlight("Migrating mcp.yaml...") old_mcp_config = self.config_manager.read_config(old_mcp_file) or {} migrated_mcp = migrate_mcp(old_mcp_config, new_mcp_config) else: # mcp.yaml doesn't exist in old version - create from template - self.log_highlight("Creating mcp.yaml (not found in old version)...") migrated_mcp = migrate_mcp({}, new_mcp_config) if not path.exists(new_config_path): os.makedirs(new_config_path) self.config_manager.write_config(new_mcp_file, migrated_mcp) - self.log_highlight("Created/migrated mcp.yaml") # Reload mcp config if this is the latest version if new_config_path == self.latest_config_path: diff --git a/services/conversation_manager.py b/services/conversation_manager.py new file mode 100644 index 00000000..9816bc24 --- /dev/null +++ b/services/conversation_manager.py @@ -0,0 +1,563 @@ +""" +Conversation Manager Service + +Manages conversation history (messages), pending tool calls, system context building, +and instant response generation for Wingmen. +""" + +import json +import random +import uuid +from datetime import datetime +from typing import TYPE_CHECKING, Optional +from openai.types.chat import ( + ChatCompletionMessage, + ChatCompletionMessageToolCall, + ParsedFunction, +) +from api.enums import LogType, ConversationProvider, TtsProvider, WingmanProTtsProvider +from services.printr import Printr + +if TYPE_CHECKING: + from api.interface import WingmanConfig + from services.capability_registry import CapabilityRegistry + +printr = Printr() + + +class ConversationManager: + """Manages conversation messages, context building, and instant responses. + + Responsibilities: + - Message history management + - Pending tool call tracking + - System context/prompt generation + - Instant response phrase generation + - Skill hook integration + """ + + def __init__(self, skills: list, settings, wingman_name: str = "Wingman"): + """Initialize the conversation manager. + + Args: + skills (list): List of skills that may have message hooks. + settings: Configuration settings (for debug mode). + wingman_name (str): Name of the wingman (for context building). + """ + self.skills = skills + self.settings = settings + self.wingman_name = wingman_name + self.messages = [] + self.pending_tool_calls = [] + + async def add_user_message(self, content: str, remember_messages: int = None): + """Adds a user message to the conversation history after cleanup. + + Args: + content (str): The message content to add. + remember_messages (int, optional): Number of user messages to keep. If None, no cleanup. + """ + # Call skill hooks (only for prepared/activated skills) + for skill in self.skills: + if skill.is_prepared: + await skill.on_add_user_message(content) + + # Cleanup old messages if needed + if remember_messages is not None: + await self.cleanup(remember_messages) + + msg = {"role": "user", "content": content} + self.messages.append(msg) + + async def add_assistant_message( + self, message: ChatCompletionMessage, tool_calls: list = None + ) -> None: + """Adds an assistant message (with potential tool calls) to the conversation history. + + Args: + message (ChatCompletionMessage): The message to add. + tool_calls (list): The tool calls associated with the message. + """ + # Call skill hooks (only for prepared/activated skills) + for skill in self.skills: + if skill.is_prepared: + await skill.on_add_assistant_message( + message.content, message.tool_calls + ) + + # Do not tamper with this message as it will lead to 400 errors! + self.messages.append(message) + + # Adding dummy tool responses to prevent corrupted message history on parallel requests + if tool_calls: + for tool_call in tool_calls: + if not tool_call.id: + continue + # Adding a dummy tool response to get updated later + self.add_tool_response(tool_call, "Loading..", completed=False) + + async def add_simple_assistant_message(self, content: str): + """Adds a simple assistant message (without tool calls) to the conversation history. + + Args: + content (str): The message content to add. + """ + # Call skill hooks (only for prepared/activated skills) + for skill in self.skills: + if skill.is_prepared: + await skill.on_add_assistant_message(content, []) + + msg = {"role": "assistant", "content": content} + self.messages.append(msg) + + async def add_forced_tool_calls( + self, + commands: list, + conversation_provider: ConversationProvider, + wingman_pro_deployment: str = None, + ): + """Adds forced assistant command calls to the conversation history. + + This is used for instant activation commands that should appear in the + conversation history as if the LLM called them. The tool calls are marked + as completed immediately with "OK" responses. + + Args: + commands (list[CommandConfig]): The commands to add as forced tool calls. + conversation_provider (ConversationProvider): The provider type (for ID generation). + wingman_pro_deployment (str, optional): The deployment name for WingmanPro. + """ + if not commands: + return + + message = ChatCompletionMessage( + content="", + role="assistant", + tool_calls=[], + ) + + for command in commands: + tool_id = None + if conversation_provider == ConversationProvider.OPENAI: + tool_id = f"call_{str(uuid.uuid4()).replace('-', '')}" + elif conversation_provider == ConversationProvider.WINGMAN_PRO: + # Check if it's a GPT model + if wingman_pro_deployment and "gpt" in wingman_pro_deployment.lower(): + tool_id = f"call_{str(uuid.uuid4()).replace('-', '')}" + elif conversation_provider == ConversationProvider.GOOGLE: + tool_id = f"function-call-{''.join(random.choices('0123456789', k=20))}" + + # early exit for unsupported providers/models + if not tool_id: + return + + tool_call = ChatCompletionMessageToolCall( + id=tool_id, + function=ParsedFunction( + name="execute_command", + arguments=json.dumps({"command_name": command.name}), + ), + type="function", + ) + message.tool_calls.append(tool_call) + + # Add message to conversation + await self.add_assistant_message(message, message.tool_calls) + # Mark all tool calls as completed with "OK" response + if message.tool_calls: + for tool_call in message.tool_calls: + await self.update_tool_response(tool_call.id, "OK") + + def add_tool_response( + self, + tool_call: ChatCompletionMessageToolCall, + response: str, + completed: bool = True, + ): + """Adds a tool response to the conversation history. + + Args: + tool_call (ChatCompletionMessageToolCall): The tool call to add the response for. + response (str): The response content. + completed (bool): Whether the tool call is completed. If False, it's added to pending_tool_calls. + """ + msg = {"role": "tool", "content": response} + if tool_call.id is not None: + msg["tool_call_id"] = tool_call.id + if tool_call.function.name is not None: + msg["name"] = tool_call.function.name + self.messages.append(msg) + + if tool_call.id and not completed: + self.pending_tool_calls.append(tool_call.id) + + async def update_tool_response(self, tool_call_id: str, response: str) -> bool: + """Updates a tool response in the conversation history. + + This also moves the message block to the end of the history if all tool responses are completed. + + Args: + tool_call_id (str): The identifier of the tool call to update the response for. + response (str): The new response to set. + + Returns: + bool: True if the response was updated, False if the tool call was not found. + """ + if not tool_call_id: + return False + + completed = False + index = len(self.messages) + + # Go through message history to find and update the tool call + for message in reversed(self.messages): + index -= 1 + if ( + self._get_message_role(message) == "tool" + and message.get("tool_call_id") == tool_call_id + ): + message["content"] = str(response) + if tool_call_id in self.pending_tool_calls: + self.pending_tool_calls.remove(tool_call_id) + break + if not index: + return False + + # Find the assistant message that triggered the tool call + for message in reversed(self.messages[:index]): + index -= 1 + if self._get_message_role(message) == "assistant": + break + + # Check if all tool calls are completed + completed = True + if self.messages[index].tool_calls: + for tool_call in self.messages[index].tool_calls: + if tool_call.id in self.pending_tool_calls: + completed = False + break + if not completed: + return True + + # Find the first user message(s) that triggered this assistant message + index -= 1 # skip the assistant message + for message in reversed(self.messages[:index]): + index -= 1 + if self._get_message_role(message) != "user": + index += 1 + break + + # Built message block to move + start_index = index + end_index = start_index + reached_tool_call = False + for message in self.messages[start_index:]: + if not reached_tool_call and self._get_message_role(message) == "tool": + reached_tool_call = True + if reached_tool_call and self._get_message_role(message) == "user": + end_index -= 1 + break + end_index += 1 + if end_index == len(self.messages): + end_index -= 1 # loop ended at the end of the message history, so we have to go back one index + message_block = self.messages[start_index : end_index + 1] + + # Check if the message block is already at the end + if end_index == len(self.messages) - 1: + return True + + # Move message block to the end + del self.messages[start_index : end_index + 1] + self.messages.extend(message_block) + + if self.settings.debug_mode: + await printr.print_async( + "Moved message block to the end.", color=LogType.INFO + ) + + return True + + async def cleanup(self, remember_messages: int): + """Cleans up the conversation history by removing old messages beyond the keep limit. + + Args: + remember_messages (int): Number of user messages to keep. + + Returns: + int: Number of messages deleted. + """ + if remember_messages is None or len(self.messages) == 0: + return 0 # Configuration not set, nothing to delete. + + # Find the cutoff index where to end deletion, making sure to only count 'user' messages + # towards the limit starting with newest messages. + cutoff_index = len(self.messages) + user_message_count = 0 + for message in reversed(self.messages): + if self._get_message_role(message) == "user": + user_message_count += 1 + if user_message_count == remember_messages: + break # Found the cutoff point. + cutoff_index -= 1 + + # If messages below the keep limit, don't delete anything. + if user_message_count < remember_messages: + return 0 + + total_deleted_messages = cutoff_index # Messages to delete. + + # Remove the pending tool calls that are no longer needed. + for message in self.messages[:cutoff_index]: + if ( + self._get_message_role(message) == "tool" + and message.get("tool_call_id") in self.pending_tool_calls + ): + self.pending_tool_calls.remove(message.get("tool_call_id")) + if self.settings.debug_mode: + await printr.print_async( + f"Removing pending tool call {message.get('tool_call_id')} due to message history clean up.", + color=LogType.WARNING, + ) + + # Perform the deletion. + del self.messages[:cutoff_index] + + if self.settings.debug_mode and total_deleted_messages > 0: + await printr.print_async( + f"Removed {total_deleted_messages} messages from conversation history.", + color=LogType.INFO, + ) + + return total_deleted_messages + + def reset(self): + """Resets the conversation history and pending tool calls.""" + self.messages.clear() + self.pending_tool_calls.clear() + + def get_messages_copy(self) -> list: + """Returns a copy of the messages list. + + Returns: + list: A shallow copy of the messages list. + """ + return self.messages.copy() + + # ─────────────────── Context Building ─────────────────── # + + async def build_system_context( + self, + config: "WingmanConfig", + capability_registry: "CapabilityRegistry", + tower_config_name: Optional[str] = None, + ) -> str: + """Build the complete system prompt with backstory, skills, TTS instructions, and metadata. + + Args: + config: Wingman configuration with prompts and settings + capability_registry: Registry containing active skills and MCPs + tower_config_name: Name of the config directory (e.g., "Star Citizen") + + Returns: + Complete system context string + """ + # Build skill prompts (only for activated skills) + skill_prompts = "" + active_skill_names = set() + + # Get active skills from capability registry's skill registry + if hasattr(capability_registry, "skill_registry"): + active_skill_names = capability_registry.skill_registry.active_skill_names + + for skill in self.skills: + if skill.name not in active_skill_names: + continue + + skill_prompt = skill.config.prompt or "" + if not skill_prompt and skill.get_tools(): + # Auto-generate prompt from tool descriptions + tool_descriptions = [] + for tool in skill.get_tools(): + tool_descriptions.append( + f"- {tool.get('function', {}).get('name', 'unknown')}: " + f"{tool.get('function', {}).get('description', 'No description')}" + ) + skill_prompt = f"You have access to these tools:\n" + "\n".join( + tool_descriptions + ) + + if skill_prompt: + skill_prompts += f"\n\n## {skill.config.display_name}\n{skill_prompt}" + + # Build TTS-specific prompts + tts_prompt = "" + if config.features.tts_provider == TtsProvider.ELEVENLABS: + if ( + hasattr(config, "elevenlabs") + and config.elevenlabs.use_tts_prompt + and config.elevenlabs.tts_prompt + ): + # Use custom prompt if configured + tts_prompt = config.elevenlabs.tts_prompt + elif ( + hasattr(config, "elevenlabs") + and config.elevenlabs.tts_use_voice_effects + ): + # Use default voice effects prompt + tts_prompt = self._get_elevenlabs_prompt() + elif config.features.tts_provider == TtsProvider.INWORLD or ( + config.features.tts_provider == TtsProvider.WINGMAN_PRO + and hasattr(config, "wingman_pro") + and config.wingman_pro.tts_provider == WingmanProTtsProvider.INWORLD + ): + if ( + hasattr(config, "inworld") + and config.inworld.use_tts_prompt + and config.inworld.tts_prompt + ): + # Use custom prompt if configured + tts_prompt = config.inworld.tts_prompt + else: + # Use default Inworld emotions prompt + tts_prompt = self._get_inworld_prompt() + elif config.features.tts_provider == TtsProvider.OPENAI_COMPATIBLE: + if ( + hasattr(config, "openai_compatible_tts") + and config.openai_compatible_tts.use_tts_prompt + and config.openai_compatible_tts.tts_prompt + ): + # Use custom prompt if configured + tts_prompt = config.openai_compatible_tts.tts_prompt + elif ( + hasattr(config, "openai_compatible_tts") + and config.openai_compatible_tts.supports_audio_markups + ): + # Use default markups prompt + tts_prompt = self._get_openai_compatible_prompt() + + # Add TTS header if there's a prompt + if tts_prompt: + tts_prompt = f"\n\n## Text-to-Speech Instructions\n{tts_prompt}" + + # Build user metadata context + user_context = self._build_user_metadata(config, tower_config_name) + + # Assemble final context + context = config.prompts.system_prompt.format( + backstory=config.prompts.backstory, + skills=skill_prompts, + ttsprompt=tts_prompt, + user_context=user_context, + ) + + return context + + def _build_user_metadata( + self, + config: "WingmanConfig", + tower_config_name: Optional[str] = None, + ) -> str: + """Build user metadata for system context. + + Includes timezone, config name, username, and wingman name. + + Args: + config: Wingman configuration + tower_config_name: Name of the config directory + + Returns: + Formatted metadata string + """ + context_parts = [] + backstory = config.prompts.backstory or "" + backstory_lower = backstory.lower() + + # Date and timezone + try: + now = datetime.now() + local_tz = now.astimezone().tzinfo + timezone_name = local_tz.tzname(now) if local_tz else "Unknown" + + context_parts.append( + f"Current date and time: {now.strftime('%Y-%m-%d %H:%M:%S')} ({timezone_name})" + ) + except Exception: + pass + + # Config/context name (helps LLM understand game/domain context) + if tower_config_name: + context_parts.append(f"Current context/game: {tower_config_name}") + + # Username (only if not explicitly mentioned in backstory) + if self.settings.user_name: + user_name_lower = self.settings.user_name.lower() + if user_name_lower not in backstory_lower: + context_parts.append(f"User's name: {self.settings.user_name}") + + # Wingman name + if self.wingman_name: + context_parts.append(f"Your name: {self.wingman_name}") + + if context_parts: + return "\n".join(context_parts) + return "No additional context available." + + def _get_elevenlabs_prompt(self) -> str: + """Get ElevenLabs TTS prompt for voice effects.""" + return """You can use special TTS sound effects within your answers by wrapping them in asterisks like this: +*clears throat* or *sighs*. +Using sound effects makes your answers more realistic and alive. +Always use appropriate sound effects if they match the context and emotion of your answer. +Only use short sound effects that can be spoken in less than a second. +Do not use asterisks for other purposes.""" + + def _get_inworld_prompt(self) -> str: + """Get Inworld TTS prompt for emotions.""" + return """You can express emotions in your speech using Inworld emotion markups. +Wrap emotion names in angle brackets like , , , etc. +The emotion will be applied to all text that follows it until a new emotion is specified. +Examples: +- "I can't believe this happened!" +- "That's wonderful news!" +- "I'm sorry to hear that." +Use emotions naturally to make your responses more expressive and human-like. +Do not use angle brackets for other purposes.""" + + def _get_openai_compatible_prompt(self) -> str: + """Get OpenAI-compatible TTS prompt for audio markups.""" + return """Your TTS provider supports audio markups for enhanced speech. +You can use special annotations to control how text is spoken. +Consult your TTS provider's documentation for supported markup syntax. +Use markups sparingly to enhance important parts of your response.""" + + # ─────────────────── Instant Responses ─────────────────── # + + # Instant response feature removed in v2.1.0 for simplification + + # get_random_filler removed in v2.1.0 + + # Keep only last 2 used indices + if len(self.last_used_instant_responses) > 2: + self.last_used_instant_responses = self.last_used_instant_responses[-2:] + + # Get a random response that wasn't recently used + random_index = random.randint(0, len(self.instant_responses) - 1) + while random_index in self.last_used_instant_responses: + random_index = random.randint(0, len(self.instant_responses) - 1) + + self.last_used_instant_responses.append(random_index) + return self.instant_responses[random_index] + return self.messages.copy() + + def _get_message_role(self, message) -> str: + """Gets the role of a message, handling both dict and object formats. + + Args: + message: The message to get the role from. + + Returns: + str: The role of the message. + """ + if isinstance(message, dict): + return message.get("role", "") + return getattr(message, "role", "") diff --git a/services/migrations/migration_200_to_210.py b/services/migrations/migration_200_to_210.py new file mode 100644 index 00000000..2eb4f942 --- /dev/null +++ b/services/migrations/migration_200_to_210.py @@ -0,0 +1,64 @@ +"""Migration from version 2.0.0 to 2.1.0. + +Major changes: +- Refactored provider architecture (BaseProvider pattern) +- Image generation as proper provider capability +- Benchmark helpers moved to Benchmark class +- Unified provider registry with image generation support +""" + +from services.migrations.base_migration import BaseMigration + + +class Migration200To210(BaseMigration): + """Migration from 2.0.0 to 2.1.0.""" + + old_version = "2_0_0" + new_version = "2_1_0" + + def migrate_settings(self, old: dict, new: dict) -> dict: + """Migrate settings.yaml from 2.0.0 to 2.1.0. + + No breaking changes in settings for this version. + Provider architecture changes are backward compatible. + + Args: + old: Old settings dictionary + new: New settings dictionary (template) + + Returns: + Migrated settings dictionary + """ + # No changes needed - all provider refactoring is backward compatible + return old + + def migrate_defaults(self, old: dict, new: dict) -> dict: + """Migrate defaults.yaml from 2.0.0 to 2.1.0. + + No breaking changes in defaults for this version. + + Args: + old: Old defaults dictionary + new: New defaults dictionary (template) + + Returns: + Migrated defaults dictionary + """ + # No changes needed - config structure remains the same + return old + + def migrate_wingman_config(self, wingman_name: str, old: dict, new: dict) -> dict: + """Migrate individual wingman config from 2.0.0 to 2.1.0. + + No breaking changes in wingman configs for this version. + + Args: + wingman_name: Name of the wingman being migrated + old: Old wingman config dictionary + new: New wingman config dictionary (template) + + Returns: + Migrated wingman config dictionary + """ + # No changes needed - wingman config structure remains the same + return old diff --git a/services/module_manager.py b/services/module_manager.py index dbdbd735..bfb4040e 100644 --- a/services/module_manager.py +++ b/services/module_manager.py @@ -12,19 +12,13 @@ SkillBase, SkillConfig, SkillToolInfo, - WingmanConfig, ) -from providers.faster_whisper import FasterWhisper -from providers.whispercpp import Whispercpp -from providers.xvasynth import XVASynth -from services.audio_library import AudioLibrary -from services.audio_player import AudioPlayer -from services.file import get_writable_dir, get_custom_skills_dir +from services.file import get_custom_skills_dir from services.printr import Printr from skills.skill_base import Skill if TYPE_CHECKING: - from wingmen.wingman import Wingman + from wingman import Wingman from services.tower import Tower SKILLS_DIR = "skills" @@ -63,59 +57,6 @@ def get_module_name_and_path(module_string: str) -> tuple[str, str]: # module_path = path.join(module_path, module_name + ".py") return module_name, module_path - @staticmethod - def create_wingman_dynamically( - name: str, - config: WingmanConfig, - settings: SettingsConfig, - audio_player: AudioPlayer, - audio_library: AudioLibrary, - whispercpp: Whispercpp, - fasterwhisper: FasterWhisper, - xvasynth: XVASynth, - tower: "Tower", - ): - """Dynamically creates a Wingman instance from a module path and class name - - Args: - name (str): The name of the wingman. This is the key you gave it in the config, e.g. "atc" - config (WingmanConfig): All "general" config entries merged with the specific Wingman config settings. The Wingman takes precedence and overrides the general config. You can just add new keys to the config and they will be available here. - settings (SettingsConfig): The general user settings. - audio_player (AudioPlayer): The audio player handling the playback of audio files. - audio_library (AudioLibrary): The audio library handling the storage and retrieval of audio files. - whispercpp (Whispercpp): The Whispercpp provider for speech-to-text. - fasterwhisper (FasterWhisper): The FasterWhisper provider for speech-to-text. - xvasynth (XVASynth): The XVASynth provider for text-to-speech. - tower (Tower): The Tower instance, that manages loaded Wingmen. - """ - - try: - # try to load from app dir first - module = import_module(config.custom_class.module) - except ModuleNotFoundError: - # split module into name and path - module_name, module_path = ModuleManager.get_module_name_and_path( - config.custom_class.module - ) - module_path = path.join(get_writable_dir(module_path), module_name + ".py") - # load from alternative absolute file path - spec = util.spec_from_file_location(module_name, module_path) - module = util.module_from_spec(spec) - spec.loader.exec_module(module) - DerivedWingmanClass = getattr(module, config.custom_class.name) - instance = DerivedWingmanClass( - name=name, - config=config, - settings=settings, - audio_player=audio_player, - audio_library=audio_library, - whispercpp=whispercpp, - fasterwhisper=fasterwhisper, - xvasynth=xvasynth, - tower=tower, - ) - return instance - @staticmethod def load_skill( config: SkillConfig, settings: SettingsConfig, wingman: "Wingman" diff --git a/services/printr.py b/services/printr.py index 5be33b11..a0d6c805 100644 --- a/services/printr.py +++ b/services/printr.py @@ -126,7 +126,7 @@ async def __send_to_gui( caller_instance_name = caller_instance.__class__.__name__ if ( caller_instance_name == "Wingman" - or caller_instance_name == "OpenAiWingman" + or caller_instance_name == "Wingman" ): wingman_name = caller_instance.name break @@ -233,6 +233,15 @@ def toast_warning(self, text: str): def toast_error(self, text: str): self.print(text, toast=ToastType.ERROR, color=LogType.ERROR) + async def broadcast(self, command): + """Public API for broadcasting commands to the UI. + + Args: + command: Command object to broadcast (e.g., McpStateChangedCommand) + """ + if self._connection_manager is not None: + await self._connection_manager.broadcast(command) + # INTERNAL METHODS def get_terminal_color(self, tag: LogType): diff --git a/services/provider_factory.py b/services/provider_factory.py new file mode 100644 index 00000000..da5ef30f --- /dev/null +++ b/services/provider_factory.py @@ -0,0 +1,687 @@ +"""Provider factory for creating provider instances. + +This factory handles instantiation of all provider types with proper +configuration and secret retrieval. It maps provider enums to their +corresponding classes and handles special cases like Azure multi-service +and OpenAI-compatible providers. +""" + +from typing import Optional +from api.enums import ( + SttProvider, + TtsProvider, + ConversationProvider, + ImageGenerationProvider, +) +from api.interface import WingmanConfig +from providers.provider_base import BaseProvider +from providers.open_ai import OpenAi, OpenAiAzure, OpenAiCompatibleTts, OpenRouter +from providers.google import GoogleGenAI +from providers.elevenlabs import ElevenLabs +from providers.edge import Edge +from providers.hume import Hume +from providers.inworld import Inworld +from providers.wingman_pro import WingmanPro +from providers.whispercpp import Whispercpp +from providers.faster_whisper import FasterWhisper +from services.printr import Printr +from services.secret_keeper import SecretKeeper + +printr = Printr() + + +class ProviderFactory: + """Factory for creating provider instances with proper configuration. + + This factory centralizes all provider instantiation logic, handling: + - Secret retrieval from SecretKeeper + - Configuration mapping from WingmanConfig + - Special cases (Azure, OpenAI-compatible, etc.) + - Singleton providers (shared across all wingmen) + - Error handling and logging + """ + + # Azure service types for multi-key management + AZURE_SERVICES = ["whisper", "speech", "tts", "conversation"] + + # Singleton provider instances (shared across all wingmen) + # Key: provider class name (e.g., "FasterWhisper", "Whispercpp") + # Value: provider instance + _singleton_instances: dict[str, BaseProvider] = {} + + # Mapping of STT provider enums to (class, secret_key, config_attr) + STT_PROVIDERS = { + SttProvider.OPENAI: (OpenAi, "openai", "openai"), + SttProvider.AZURE: (OpenAiAzure, "azure_whisper", "azure"), # Uses Whisper + SttProvider.AZURE_SPEECH: (OpenAiAzure, "azure_speech", "azure"), # Uses Speech + SttProvider.WINGMAN_PRO: (WingmanPro, "wingman_pro", "wingman_pro"), + SttProvider.WHISPERCPP: ( + Whispercpp, + None, + "whispercpp", + ), # No API key, uses HTTP + SttProvider.FASTER_WHISPER: ( + FasterWhisper, + None, + "fasterwhisper", + ), # No API key, local model + } + + # Mapping of TTS provider enums to (class, secret_key, config_attr) + TTS_PROVIDERS = { + TtsProvider.OPENAI: (OpenAi, "openai", "openai"), + TtsProvider.AZURE: (OpenAiAzure, "azure_tts", "azure"), + TtsProvider.OPENAI_COMPATIBLE: ( + OpenAiCompatibleTts, + "openai_compatible_tts", + "openai_compatible_tts", + ), + TtsProvider.ELEVENLABS: (ElevenLabs, "elevenlabs", "elevenlabs"), + TtsProvider.EDGE_TTS: (Edge, None, "edge_tts"), # No API key required + TtsProvider.HUME: (Hume, "hume", "hume"), + TtsProvider.INWORLD: (Inworld, "inworld", "inworld"), + TtsProvider.WINGMAN_PRO: (WingmanPro, "wingman_pro", "wingman_pro"), + # TtsProvider.XVASYNTH: Not yet migrated to BaseProvider + } + + # Mapping of conversation provider enums to (class, secret_key, config_attr) + CONVERSATION_PROVIDERS = { + ConversationProvider.OPENAI: (OpenAi, "openai", "openai"), + ConversationProvider.AZURE: (OpenAiAzure, "azure_conversation", "azure"), + ConversationProvider.GOOGLE: (GoogleGenAI, "google", "google"), + ConversationProvider.WINGMAN_PRO: (WingmanPro, "wingman_pro", "wingman_pro"), + # OpenAI-compatible providers (use OpenAi class with custom base_url) + ConversationProvider.MISTRAL: (OpenAi, "mistral", "mistral"), + ConversationProvider.GROQ: (OpenAi, "groq", "groq"), + ConversationProvider.CEREBRAS: (OpenAi, "cerebras", "cerebras"), + ConversationProvider.OPENROUTER: (OpenRouter, "openrouter", "openrouter"), + ConversationProvider.LOCAL_LLM: (OpenAi, "local_llm", "local_llm"), + ConversationProvider.PERPLEXITY: (OpenAi, "perplexity", "perplexity"), + ConversationProvider.XAI: (OpenAi, "xai", "xai"), + } + + # Mapping of image generation provider enums to (class, secret_key, config_attr) + IMAGE_GEN_PROVIDERS = { + ImageGenerationProvider.WINGMAN_PRO: (WingmanPro, "wingman_pro", "wingman_pro"), + # Future: ImageGenerationProvider.OPENAI: (OpenAi, "openai", "openai"), + } + + def __init__( + self, + config: WingmanConfig, + secret_keeper: SecretKeeper, + wingman_name: str, + settings, + app_root_path: str = None, + app_is_bundled: bool = False, + ): + """Initialize the factory. + + Args: + config: Wingman configuration + secret_keeper: SecretKeeper for retrieving API keys + wingman_name: Name of the wingman (for secret retrieval) + settings: Global settings (includes WingmanProSettings) + app_root_path: Root path of the application (for FasterWhisper models) + app_is_bundled: Whether the app is bundled (PyInstaller) + """ + self.config = config + self.secret_keeper = secret_keeper + self.wingman_name = wingman_name + self.settings = settings + self.app_root_path = app_root_path + self.app_is_bundled = app_is_bundled + + # Cache for Azure provider (shared across STT/TTS/LLM) + self._azure_provider: Optional[BaseProvider] = None + self._azure_keys_retrieved = False + + @classmethod + def register_singleton(cls, provider_name: str, provider_instance: BaseProvider): + """Register a singleton provider instance. + + Singleton providers are shared across all wingmen to save resources. + Examples: FasterWhisper (model caching), Whispercpp (HTTP client). + + Args: + provider_name: Unique name for the provider (e.g., "FasterWhisper") + provider_instance: The provider instance to register + """ + cls._singleton_instances[provider_name] = provider_instance + printr.print( + f"Singleton provider '{provider_name}' registered", + server_only=True, + ) + + @classmethod + def get_singleton(cls, provider_name: str) -> Optional[BaseProvider]: + """Get a singleton provider instance if it exists. + + Args: + provider_name: Name of the singleton provider + + Returns: + Provider instance or None if not registered + """ + return cls._singleton_instances.get(provider_name) + + async def create_stt_provider( + self, provider_enum: SttProvider + ) -> Optional[BaseProvider]: + """Create an STT provider instance. + + Args: + provider_enum: STT provider enum value + + Returns: + Provider instance or None on failure + """ + if provider_enum not in self.STT_PROVIDERS: + printr.print( + f"STT provider '{provider_enum.value}' not yet migrated to BaseProvider", + server_only=True, + ) + return None + + provider_class, secret_key, config_attr = self.STT_PROVIDERS[provider_enum] + + # Special handling for Azure (shared instance with all keys) + if provider_enum in (SttProvider.AZURE, SttProvider.AZURE_SPEECH): + return await self._get_or_create_azure_provider() + + # Special handling for WingmanPro (requires wingman_name) + if provider_enum == SttProvider.WINGMAN_PRO: + return await self._create_wingman_pro_provider(config_attr, secret_key) + + # Special handling for FasterWhisper (requires app paths and wingman_name) + if provider_enum == SttProvider.FASTER_WHISPER: + return self._create_faster_whisper_provider(config_attr) + + # Special handling for Whispercpp (needs settings from voice_activation) + if provider_enum == SttProvider.WHISPERCPP: + return self._create_whispercpp_provider(config_attr) + + # Retrieve API key (if required) + api_key = None + if secret_key: + api_key = await self._retrieve_secret(secret_key) + if not api_key: + return None + + # Get provider config + provider_config = getattr(self.config, config_attr, None) + if not provider_config: + printr.print( + f"Missing config for STT provider '{provider_enum.value}'", + server_only=True, + ) + return None + + # Create provider + try: + # Handle OpenAI-compatible providers with custom base_url + if hasattr(provider_config, "endpoint"): + provider = provider_class( + config=provider_config, + api_key=api_key, + base_url=provider_config.endpoint, + ) + else: + provider = provider_class( + config=provider_config, + api_key=api_key, + organization=getattr(provider_config, "organization", None), + base_url=getattr(provider_config, "base_url", None), + ) + + return provider + except Exception as e: + printr.print( + f"Failed to create STT provider '{provider_enum.value}': {str(e)}", + server_only=True, + ) + return None + + def _create_faster_whisper_provider( + self, config_attr: str + ) -> Optional[BaseProvider]: + """Create FasterWhisper provider as singleton. + + Args: + config_attr: Config attribute name (e.g., "fasterwhisper") + + Returns: + FasterWhisper provider instance or None on failure + """ + # Check if singleton already exists + existing = self.get_singleton("FasterWhisper") + if existing: + return existing + + # Get provider config from settings (FasterWhisperSettings with model_size, device, etc.) + # NOT from wingman config (which has FasterWhisperSttConfig with transcription params) + provider_config = getattr(self.settings.voice_activation, config_attr, None) + if not provider_config: + printr.print( + "Missing FasterWhisper settings in voice_activation", + server_only=True, + ) + return None + + # Create FasterWhisper provider + try: + provider = FasterWhisper( + config=provider_config, + api_key=None, + app_root_path=self.app_root_path, + app_is_bundled=self.app_is_bundled, + wingman_name=self.wingman_name, + ) + # Register as singleton + self.register_singleton("FasterWhisper", provider) + return provider + except Exception as e: + printr.print( + f"Failed to create FasterWhisper provider: {str(e)}", + server_only=True, + ) + return None + + def _create_whispercpp_provider(self, config_attr: str) -> Optional[BaseProvider]: + """Create Whispercpp provider as singleton. + + Args: + config_attr: Config attribute name (e.g., "whispercpp") + + Returns: + Whispercpp provider instance or None on failure + """ + # Check if singleton already exists + existing = self.get_singleton("Whispercpp") + if existing: + return existing + + # Get provider config from settings (WhispercppSettings with host, port, enable) + # NOT from wingman config (which has WhispercppSttConfig with temperature) + provider_config = getattr(self.settings.voice_activation, config_attr, None) + if not provider_config: + printr.print( + "Missing Whispercpp settings in voice_activation", + server_only=True, + ) + return None + + # Create Whispercpp provider + try: + provider = Whispercpp( + config=provider_config, + api_key=None, + ) + # Register as singleton + self.register_singleton("Whispercpp", provider) + return provider + except Exception as e: + printr.print( + f"Failed to create Whispercpp provider: {str(e)}", + server_only=True, + ) + return None + + async def create_tts_provider( + self, provider_enum: TtsProvider + ) -> Optional[BaseProvider]: + """Create a TTS provider instance. + + Args: + provider_enum: TTS provider enum value + + Returns: + Provider instance or None on failure + """ + if provider_enum not in self.TTS_PROVIDERS: + printr.print( + f"TTS provider '{provider_enum.value}' not yet migrated to BaseProvider", + server_only=True, + ) + return None + + provider_class, secret_key, config_attr = self.TTS_PROVIDERS[provider_enum] + + # Special handling for Azure (shared instance) + if provider_enum == TtsProvider.AZURE: + return await self._get_or_create_azure_provider() + + # Special handling for WingmanPro (requires wingman_name) + if provider_enum == TtsProvider.WINGMAN_PRO: + return await self._create_wingman_pro_provider(config_attr, secret_key) + + # Special handling for Edge TTS (no API key required) + if provider_enum == TtsProvider.EDGE_TTS: + provider_config = getattr(self.config, config_attr, None) + if not provider_config: + printr.print( + f"Missing config for TTS provider '{provider_enum.value}'", + server_only=True, + ) + return None + try: + return provider_class(config=provider_config) + except Exception as e: + printr.print( + f"Failed to create TTS provider '{provider_enum.value}': {str(e)}", + server_only=True, + ) + return None + + # Retrieve API key (None means no key required) + if secret_key is not None: + api_key = await self._retrieve_secret(secret_key) + if not api_key: + return None + else: + api_key = None + + # Get provider config + provider_config = getattr(self.config, config_attr, None) + if not provider_config: + printr.print( + f"Missing config for TTS provider '{provider_enum.value}'", + server_only=True, + ) + return None + + # Create provider + try: + if provider_class == OpenAiCompatibleTts: + # OpenAI Compatible TTS has different signature + provider = provider_class( + api_key=api_key, + base_url=provider_config.base_url, + ) + elif provider_class in (ElevenLabs, Hume, Inworld): + # These providers need wingman_name + provider = provider_class( + config=provider_config, + api_key=api_key, + wingman_name=self.wingman_name, + ) + elif hasattr(provider_config, "endpoint"): + # OpenAI-compatible with custom endpoint + provider = provider_class( + config=provider_config, + api_key=api_key, + base_url=provider_config.endpoint, + ) + else: + # Standard OpenAI + provider = provider_class( + config=provider_config, + api_key=api_key, + organization=getattr(provider_config, "organization", None), + base_url=getattr(provider_config, "base_url", None), + ) + return provider + except Exception as e: + printr.print( + f"Failed to create TTS provider '{provider_enum.value}': {str(e)}", + server_only=True, + ) + return None + + async def create_conversation_provider( + self, provider_enum: ConversationProvider + ) -> Optional[BaseProvider]: + """Create a conversation (LLM) provider instance. + + Args: + provider_enum: Conversation provider enum value + + Returns: + Provider instance or None on failure + """ + if provider_enum not in self.CONVERSATION_PROVIDERS: + printr.print( + f"Conversation provider '{provider_enum.value}' not yet migrated to BaseProvider", + server_only=True, + ) + return None + + provider_class, secret_key, config_attr = self.CONVERSATION_PROVIDERS[ + provider_enum + ] + + # Special handling for Azure (shared instance) + if provider_enum == ConversationProvider.AZURE: + return await self._get_or_create_azure_provider() + + # Special handling for WingmanPro (requires wingman_name) + if provider_enum == ConversationProvider.WINGMAN_PRO: + return await self._create_wingman_pro_provider(config_attr, secret_key) + + # Retrieve API key + api_key = await self._retrieve_secret(secret_key) + if not api_key: + return None + + # Get provider config + provider_config = getattr(self.config, config_attr, None) + if not provider_config: + printr.print( + f"Missing config for conversation provider '{provider_enum.value}'", + server_only=True, + ) + return None + + # Create provider + try: + if provider_class == GoogleGenAI: + # Google has simpler constructor + provider = provider_class( + config=provider_config, + api_key=api_key, + ) + elif hasattr(provider_config, "endpoint") and provider_config.endpoint: + provider = provider_class( + config=provider_config, + api_key=api_key, + base_url=provider_config.endpoint, + ) + else: + # Standard OpenAI + provider = provider_class( + config=provider_config, + api_key=api_key, + organization=getattr(provider_config, "organization", None), + base_url=getattr(provider_config, "base_url", None), + ) + return provider + except Exception as e: + printr.print( + f"Failed to create conversation provider '{provider_enum.value}': {str(e)}", + server_only=True, + ) + return None + + async def create_image_provider( + self, provider_enum: ImageGenerationProvider + ) -> Optional[BaseProvider]: + """Create an image generation provider instance. + + Args: + provider_enum: Image generation provider enum value + + Returns: + Provider instance or None on failure + """ + if provider_enum not in self.IMAGE_GEN_PROVIDERS: + printr.print( + f"Image generation provider '{provider_enum.value}' not yet supported", + server_only=True, + ) + return None + + provider_class, secret_key, config_attr = self.IMAGE_GEN_PROVIDERS[ + provider_enum + ] + + # Special handling for WingmanPro (requires wingman_name) + if provider_enum == ImageGenerationProvider.WINGMAN_PRO: + return await self._create_wingman_pro_provider(config_attr, secret_key) + + # Future: Handle other image generation providers (OpenAI DALL-E, etc.) + # Retrieve API key + api_key = await self._retrieve_secret(secret_key) + if not api_key: + return None + + # Get provider config + provider_config = getattr(self.config, config_attr, None) + if not provider_config: + printr.print( + f"Missing config for image generation provider '{provider_enum.value}'", + server_only=True, + ) + return None + + # Create provider + try: + provider = provider_class( + config=provider_config, + api_key=api_key, + ) + return provider + except Exception as e: + printr.print( + f"Failed to create image generation provider '{provider_enum.value}': {str(e)}", + server_only=True, + ) + return None + + # Private helper methods + + async def _retrieve_secret(self, secret_name: str) -> Optional[str]: + """Retrieve a secret from SecretKeeper. + + Args: + secret_name: Name of the secret to retrieve + + Returns: + Secret value or None if not found + """ + try: + api_key = await self.secret_keeper.retrieve( + requester=self.wingman_name, + key=secret_name, + prompt_if_missing=False, # Factory doesn't prompt - validation phase does + ) + return api_key + except Exception as e: + printr.print( + f"Error retrieving secret '{secret_name}': {str(e)}", + server_only=True, + ) + return None + + async def _get_or_create_azure_provider(self) -> Optional[BaseProvider]: + """Get or create the shared Azure provider with all service keys. + + Azure uses a single provider instance that stores keys for all services: + - whisper (STT via Azure OpenAI Whisper) + - speech (STT via Azure Cognitive Services) + - tts (TTS via Azure Cognitive Services) + - conversation (LLM via Azure OpenAI) + + This method caches the provider to avoid recreating it multiple times. + + Returns: + Azure provider instance or None on failure + """ + # Return cached instance if available + if self._azure_provider is not None: + return self._azure_provider + + # Only retrieve keys once + if not self._azure_keys_retrieved: + # Retrieve all four service keys + whisper_key = await self._retrieve_secret("azure_whisper") + speech_key = await self._retrieve_secret("azure_speech") + tts_key = await self._retrieve_secret("azure_tts") + conversation_key = await self._retrieve_secret("azure_conversation") + + self._azure_keys_retrieved = True + + # Check if we have at least one key + if not any([whisper_key, speech_key, tts_key, conversation_key]): + printr.print( + "No Azure API keys found for any service", + server_only=True, + ) + return None + + # Check if Azure config exists + if not hasattr(self.config, "azure") or not self.config.azure: + printr.print( + "Missing Azure configuration", + server_only=True, + ) + return None + + # Create unified Azure provider with all keys + try: + self._azure_provider = OpenAiAzure( + config=self.config, + whisper_api_key=whisper_key, + speech_api_key=speech_key, + tts_api_key=tts_key, + llm_api_key=conversation_key, + ) + except Exception as e: + printr.print( + f"Failed to create Azure provider: {str(e)}", + server_only=True, + ) + return None + + return self._azure_provider + + async def _create_wingman_pro_provider( + self, config_attr: str, secret_key: str + ) -> Optional[BaseProvider]: + """Create WingmanPro provider with special wingman_name parameter. + + Args: + config_attr: Config attribute name (e.g., "wingman_pro") + secret_key: Secret key name for API token + + Returns: + WingmanPro provider instance or None on failure + """ + # Retrieve API key + api_key = await self._retrieve_secret(secret_key) + if not api_key: + return None + + # Get provider settings (global WingmanProSettings with base_url and region) + provider_config = self.settings.wingman_pro + if not provider_config: + printr.print( + f"Missing WingmanPro settings", + server_only=True, + ) + return None + + # Create WingmanPro provider + try: + provider = WingmanPro( + wingman_config=self.config, + provider_settings=provider_config, + api_key=api_key, + wingman_name=self.wingman_name, + ) + return provider + except Exception as e: + printr.print( + f"Failed to create WingmanPro provider: {str(e)}", + server_only=True, + ) + return None diff --git a/services/provider_registry.py b/services/provider_registry.py new file mode 100644 index 00000000..6830381a --- /dev/null +++ b/services/provider_registry.py @@ -0,0 +1,222 @@ +"""Provider registry for managing providers by capability. + +This registry centralizes provider management, storing providers by their +capabilities (STT, TTS, LLM) and providing convenient access methods. +""" + +from typing import Optional +from api.interface import WingmanConfig, SettingsConfig +from providers.provider_base import BaseProvider +from services.provider_factory import ProviderFactory +from services.printr import Printr +from services.secret_keeper import SecretKeeper + +printr = Printr() + + +class ProviderRegistry: + """Registry for managing providers by capability. + + This registry uses ProviderFactory to instantiate providers based on + the wingman's configuration, then stores them organized by capability + for efficient access during runtime. + + Key features: + - Async initialization (provider creation involves secret retrieval) + - Sync getters (no async overhead during usage) + - Capability-based organization (STT, TTS, LLM) + - Handles missing providers gracefully + """ + + def __init__( + self, + config: WingmanConfig, + secret_keeper: SecretKeeper, + wingman_name: str, + settings: SettingsConfig, + app_root_path: str = None, + app_is_bundled: bool = False, + ): + """Initialize the registry (does not create providers yet). + + Args: + config: Wingman configuration + secret_keeper: SecretKeeper for retrieving API keys + wingman_name: Name of the wingman (for logging) + settings: Global settings (includes WingmanProSettings) + app_root_path: Root path of the application (for FasterWhisper models) + app_is_bundled: Whether the app is bundled (PyInstaller) + """ + self.config = config + self.secret_keeper = secret_keeper + self.wingman_name = wingman_name + self.settings = settings + + # Provider storage by capability + self._stt_provider: Optional[BaseProvider] = None + self._tts_provider: Optional[BaseProvider] = None + self._llm_provider: Optional[BaseProvider] = None + self._image_provider: Optional[BaseProvider] = None + + # Factory for creating providers + self._factory = ProviderFactory( + config=config, + secret_keeper=secret_keeper, + wingman_name=wingman_name, + settings=settings, + app_root_path=app_root_path, + app_is_bundled=app_is_bundled, + ) + + async def initialize_from_config(self): + """Initialize providers based on wingman configuration. + + This method: + 1. Reads provider selections from config.features + 2. Uses ProviderFactory to create provider instances + 3. Stores providers by capability + 4. Logs any initialization failures + + This is async because provider creation involves secret retrieval. + """ + # Initialize STT provider + if hasattr(self.config.features, "stt_provider"): + stt_enum = self.config.features.stt_provider + self._stt_provider = await self._factory.create_stt_provider(stt_enum) + if self._stt_provider: + printr.print( + f"STT provider '{stt_enum.value}' initialized", + server_only=True, + ) + + # Initialize TTS provider + if hasattr(self.config.features, "tts_provider"): + tts_enum = self.config.features.tts_provider + self._tts_provider = await self._factory.create_tts_provider(tts_enum) + if self._tts_provider: + printr.print( + f"TTS provider '{tts_enum.value}' initialized", + server_only=True, + ) + + # Initialize conversation (LLM) provider + if hasattr(self.config.features, "conversation_provider"): + llm_enum = self.config.features.conversation_provider + self._llm_provider = await self._factory.create_conversation_provider( + llm_enum + ) + if self._llm_provider: + printr.print( + f"LLM provider '{llm_enum.value}' initialized", + server_only=True, + ) + + # Initialize image generation provider + if hasattr(self.config.features, "image_generation_provider"): + img_enum = self.config.features.image_generation_provider + self._image_provider = await self._factory.create_image_provider(img_enum) + if self._image_provider: + printr.print( + f"Image generation provider '{img_enum.value}' initialized", + server_only=True, + ) + + # Sync getters (no async overhead during runtime) + + def get_stt_provider(self) -> Optional[BaseProvider]: + """Get the configured STT provider. + + Returns: + STT provider instance or None if not configured + """ + return self._stt_provider + + def get_tts_provider(self) -> Optional[BaseProvider]: + """Get the configured TTS provider. + + Returns: + TTS provider instance or None if not configured + """ + return self._tts_provider + + def get_llm_provider(self) -> Optional[BaseProvider]: + """Get the configured LLM provider. + + Returns: + LLM provider instance or None if not configured + """ + return self._llm_provider + + def get_image_provider(self) -> Optional[BaseProvider]: + """Get the configured image generation provider. + + Returns: + Image generation provider instance or None if not configured + """ + return self._image_provider + + # Availability checks + + def has_stt(self) -> bool: + """Check if an STT provider is available. + + Returns: + True if STT provider is configured and initialized + """ + return self._stt_provider is not None + + def has_tts(self) -> bool: + """Check if a TTS provider is available. + + Returns: + True if TTS provider is configured and initialized + """ + return self._tts_provider is not None + + def has_llm(self) -> bool: + """Check if an LLM provider is available. + + Returns: + True if LLM provider is configured and initialized + """ + return self._llm_provider is not None + + def has_image_gen(self) -> bool: + """Check if an image generation provider is available. + + Returns: + True if image generation provider is configured and initialized + """ + return self._image_provider is not None + + # Utility methods + + def get_provider_summary(self) -> dict: + """Get a summary of configured providers. + + Returns: + Dict with provider names by capability + """ + return { + "stt": ( + self._stt_provider.__class__.__name__ if self._stt_provider else None + ), + "tts": ( + self._tts_provider.__class__.__name__ if self._tts_provider else None + ), + "llm": ( + self._llm_provider.__class__.__name__ if self._llm_provider else None + ), + "image_gen": ( + self._image_provider.__class__.__name__ + if self._image_provider + else None + ), + } + + def clear(self): + """Clear all providers (for cleanup/reinitialization).""" + self._stt_provider = None + self._tts_provider = None + self._llm_provider = None + self._image_provider = None diff --git a/services/settings_service.py b/services/settings_service.py index f0b24b3e..8b8cb245 100644 --- a/services/settings_service.py +++ b/services/settings_service.py @@ -8,8 +8,6 @@ AudioDeviceSettings, SettingsConfig, ) -from providers.faster_whisper import FasterWhisper -from providers.whispercpp import Whispercpp from providers.xvasynth import XVASynth from services.config_manager import ConfigManager from services.config_service import ConfigService @@ -25,8 +23,6 @@ def __init__(self, config_manager: ConfigManager, config_service: ConfigService) self.converted_audio_settings = False self.settings = self.get_settings() self.settings_events = PubSub() - self.whispercpp: Whispercpp = None - self.fasterwhisper: FasterWhisper = None self.xvasynth: XVASynth = None self.router = APIRouter() @@ -52,11 +48,7 @@ def __init__(self, config_manager: ConfigManager, config_service: ConfigService) tags=tags, ) - def initialize( - self, whispercpp: Whispercpp, fasterwhisper: FasterWhisper, xvasynth: XVASynth - ): - self.whispercpp = whispercpp - self.fasterwhisper = fasterwhisper + def initialize(self, xvasynth: XVASynth): self.xvasynth = xvasynth # GET /settings @@ -87,23 +79,8 @@ async def save_settings(self, settings: SettingsConfig): ): await self.set_audio_devices(settings.audio.input, settings.audio.output) - # whispercpp - if not self.whispercpp: - self.printr.toast_error( - "Whispercpp is not initialized. Please run SettingsService.initialize()", - ) - return - self.whispercpp.update_settings(settings=settings.voice_activation.whispercpp) - - # FasterWhisper - if not self.fasterwhisper: - self.printr.toast_error( - "FasterWhisper is not initialized. Please run SettingsService.initialize()", - ) - return - self.fasterwhisper.update_settings( - settings=settings.voice_activation.fasterwhisper - ) + # Note: Whispercpp and FasterWhisper are now managed as singletons by ProviderFactory + # Settings changes will take effect when providers are next accessed # XVASynth if not self.xvasynth: diff --git a/services/skill_registry.py b/services/skill_registry.py index c432284e..f4be956e 100644 --- a/services/skill_registry.py +++ b/services/skill_registry.py @@ -389,7 +389,7 @@ async def execute_meta_tool( skill_name = parameters.get("skill_name", "") success, message, _ = await self.activate_skill(skill_name) # Return success status and whether tools changed - # Note: needs_validation is handled async in OpenAiWingman + # Note: needs_validation is handled async in Wingman return message, success elif tool_name == "list_active_skills": diff --git a/services/system_manager.py b/services/system_manager.py index 14424970..a541bad7 100644 --- a/services/system_manager.py +++ b/services/system_manager.py @@ -8,7 +8,7 @@ from api.enums import LogType from api.interface import SystemCore, SystemInfo -LOCAL_VERSION = "2.0.0" +LOCAL_VERSION = "2.1.0" VERSION_ENDPOINT = "https://wingman-ai.com/api/version" diff --git a/services/tool_executor.py b/services/tool_executor.py new file mode 100644 index 00000000..3b63071c --- /dev/null +++ b/services/tool_executor.py @@ -0,0 +1,426 @@ +""" +Tool Executor Service + +Handles execution of tool calls from LLM responses, routing to appropriate handlers: +- Meta-tools (capability/skill/MCP discovery and activation) +- Skill tools (user-defined skill functions) +- Commands (instant activation commands) +- MCP tools (Model Context Protocol server tools) + +Extracts tool execution logic from Wingman for better separation of concerns. +""" + +import json +import time +import traceback +from api.enums import LogType +from services.benchmark import Benchmark +from services.printr import Printr + +printr = Printr() + + +class ToolExecutor: + """Executes tool calls by routing to meta-tools, skills, commands, or MCP tools.""" + + def __init__( + self, + capability_registry, + skill_registry, + mcp_registry, + tool_skills: dict, + get_command_func, + execute_command_func, + select_command_response_func, + play_to_user_func, + settings, + ): + """Initialize the tool executor with required registries and callbacks. + + Args: + capability_registry: Unified capability registry (skills + MCP) + skill_registry: Skill registry for progressive disclosure + mcp_registry: MCP server registry + tool_skills: Dict mapping tool names to skill instances + get_command_func: Function to get command by name + execute_command_func: Function to execute a command + select_command_response_func: Function to select command response + play_to_user_func: Function to play audio to user + settings: Configuration settings (for debug mode) + """ + self.capability_registry = capability_registry + self.skill_registry = skill_registry + self.mcp_registry = mcp_registry + self.tool_skills = tool_skills + self.get_command = get_command_func + self._execute_command = execute_command_func + self._select_command_response = select_command_response_func + self.play_to_user = play_to_user_func + self.settings = settings + + async def fix_tool_calls(self, tool_calls): + """Fixes tool calls that have a command name as function name. + + Some LLMs incorrectly return command names directly as function names + instead of using the execute_command function. This method fixes those + by converting them to proper execute_command calls. + + Args: + tool_calls (list): The tool calls to fix. + + Returns: + list: The fixed tool calls. + """ + if not tool_calls or len(tool_calls) == 0: + return tool_calls + + for tool_call in tool_calls: + function_name = tool_call.function.name + function_args = ( + tool_call.function.arguments + # Mistral returns a dict + if isinstance(tool_call.function.arguments, dict) + # OpenAI returns a string + else json.loads(tool_call.function.arguments) + ) + + # try to resolve function name to a command name + if (len(function_args) == 0 and self.get_command(function_name)) or ( + len(function_args) == 1 + and "command_name" in function_args + and self.get_command(function_args["command_name"]) + and function_name == function_args["command_name"] + ): + function_args["command_name"] = function_name + function_name = "execute_command" + + # update the tool call + tool_call.function.name = function_name + tool_call.function.arguments = json.dumps(function_args) + + if self.settings.debug_mode: + await printr.print_async( + "Applied command call fix.", color=LogType.WARNING + ) + + return tool_calls + + async def execute_tool_call( + self, function_name: str, function_args: dict + ) -> tuple[str, str | None, object | None, str | None]: + """Execute a single tool call. + + Args: + function_name: The name of the function/tool to execute + function_args: The arguments to pass to the function + + Returns: + tuple: (function_response, instant_response, used_skill, tool_label) + - function_response (str): Text response for LLM conversation history + - instant_response (str | None): Immediate audio response to play to user + - used_skill (Skill | None): The skill that was used, if any + - tool_label (str | None): Label for benchmark timing, or None for meta-tools + """ + # Handle unified capability meta-tools (activate_capability, list_active_capabilities) + if self.capability_registry.is_meta_tool(function_name): + return await self._execute_capability_meta_tool( + function_name, function_args + ) + + # Handle legacy meta-tools for backward compatibility + if self.skill_registry.is_meta_tool(function_name): + return await self._execute_skill_meta_tool(function_name, function_args) + + # Handle MCP meta-tools for server discovery/activation + if self.mcp_registry.is_meta_tool(function_name): + return await self._execute_mcp_meta_tool(function_name, function_args) + + # Handle MCP server tools (prefixed with mcp_) + if self.mcp_registry.is_mcp_tool(function_name): + return await self._execute_mcp_tool(function_name, function_args) + + # Handle instant activation commands + if function_name == "execute_command": + return await self._execute_instant_command(function_args) + + # Handle skill tools + if function_name in self.tool_skills: + return await self._execute_skill_tool(function_name, function_args) + + # Unknown tool + return f"Unknown tool: {function_name}", None, None, None + + async def execute_batch( + self, tool_calls + ) -> tuple[str | None, object | None, list[tuple[str, float]]]: + """Execute a batch of tool calls. + + Args: + tool_calls: List of tool call objects with id, function.name, and function.arguments + + Returns: + tuple: (instant_response, used_skill, tool_timings, results) + - instant_response (str | None): First instant response encountered (stops batch) + - used_skill (Skill | None): Last skill that was used + - tool_timings (list): List of (label, time_ms) tuples for benchmark tracking + - results (list): List of (tool_call, function_response) tuples + """ + instant_response = None + used_skill = None + tool_timings: list[tuple[str, float]] = [] + results: list[tuple[object, str]] = [] + + for tool_call in tool_calls: + try: + function_name = tool_call.function.name + function_args = self._parse_arguments(tool_call.function.arguments) + + # Time the individual tool execution + tool_start = time.perf_counter() + ( + function_response, + instant_resp, + skill, + tool_label, + ) = await self.execute_tool_call(function_name, function_args) + tool_time_ms = (time.perf_counter() - tool_start) * 1000 + + # Add timing if we got a label (actual tool execution, not meta-tool) + if tool_label: + tool_timings.append((tool_label, tool_time_ms)) + + # Update skill tracking + if skill: + used_skill = skill + + # Store result + results.append((tool_call, function_response)) + + # If we got an instant response, store it and stop processing + if instant_resp: + instant_response = instant_resp + break + + except Exception as e: + await printr.print_async( + f"Error while processing tool call: {str(e)}", color=LogType.ERROR + ) + printr.print( + traceback.format_exc(), color=LogType.ERROR, server_only=True + ) + # Store error result + results.append((tool_call, "Error")) + + return instant_response, used_skill, tool_timings, results + + def _parse_arguments(self, arguments) -> dict: + """Parse function arguments from either dict or JSON string. + + Args: + arguments: Either a dict (Mistral) or JSON string (OpenAI) + + Returns: + dict: Parsed arguments + """ + if isinstance(arguments, dict): + return arguments + try: + return json.loads(arguments) + except json.JSONDecodeError: + return {} + + async def _execute_capability_meta_tool( + self, function_name: str, function_args: dict + ) -> tuple[str, None, None, None]: + """Execute a unified capability meta-tool (activate_capability, etc.).""" + function_response, tools_changed = ( + await self.capability_registry.execute_meta_tool( + function_name, function_args + ) + ) + + # If a skill was activated, perform lazy validation + if tools_changed and function_name == "activate_capability": + capability_name = function_args.get("capability_name", "") + skill = self.skill_registry.get_skill_for_activation(capability_name) + if skill and skill.needs_activation(): + success, validation_msg = await skill.ensure_activated() + if not success: + # Validation failed - deactivate the skill + self.skill_registry.deactivate_skill(capability_name) + function_response = validation_msg + await printr.print_async( + f"Skill activation failed: {capability_name}", + color=LogType.ERROR, + ) + else: + # Get display name for user-friendly message + display_name = self.skill_registry.get_skill_display_name( + capability_name + ) + await printr.print_async( + f"Skill activated: {display_name}", + color=LogType.SKILL, + ) + + return function_response, None, None, None # Meta-tool, no timing label + + async def _execute_skill_meta_tool( + self, function_name: str, function_args: dict + ) -> tuple[str, None, None, None]: + """Execute a legacy skill meta-tool (activate_skill, etc.) for backward compatibility.""" + function_response, tools_changed = await self.skill_registry.execute_meta_tool( + function_name, function_args + ) + + # If skill was activated, perform lazy validation + if tools_changed and function_name == "activate_skill": + skill_name = function_args.get("skill_name", "") + skill = self.skill_registry.get_skill_for_activation(skill_name) + if skill and skill.needs_activation(): + success, validation_msg = await skill.ensure_activated() + if not success: + # Validation failed - deactivate the skill + self.skill_registry.deactivate_skill(skill_name) + function_response = validation_msg + await printr.print_async( + f"Skill activation failed: {skill_name}", + color=LogType.ERROR, + ) + else: + # Get display name for user-friendly message + display_name = self.skill_registry.get_skill_display_name( + skill_name + ) + await printr.print_async( + f"Skill activated: {display_name}", + color=LogType.SKILL, + ) + + return function_response, None, None, None # Meta-tool, no timing label + + async def _execute_mcp_meta_tool( + self, function_name: str, function_args: dict + ) -> tuple[str, None, None, None]: + """Execute an MCP meta-tool (list_mcp_servers, activate_mcp_server, etc.).""" + function_response, tools_changed = await self.mcp_registry.execute_meta_tool( + function_name, function_args + ) + return function_response, None, None, None # Meta-tool, no timing label + + async def _execute_mcp_tool( + self, function_name: str, function_args: dict + ) -> tuple[str, None, None, str]: + """Execute an MCP server tool.""" + connection = self.mcp_registry.get_connection_for_tool(function_name) + if not connection: + return "MCP connection not found", None, None, None + + display_name = connection.config.display_name + original_name = self.mcp_registry.get_original_tool_name(function_name) + tool_label = f"🌐 {display_name}: {original_name}" + + benchmark = Benchmark(f"MCP '{connection.config.name}' - {original_name}") + + # Always show simple 'called' message in UI so users know the wingman is working + await printr.print_async( + f"{display_name}: called `{original_name}` with {function_args}", + color=LogType.MCP, + ) + + # Detailed 'calling' log only in terminal/log file + await printr.print_async( + f"{display_name}: calling `{original_name}` with {function_args}...", + color=LogType.MCP, + server_only=True, + ) + + try: + function_response = await self.mcp_registry.call_tool( + function_name, function_args + ) + except Exception as e: + await printr.print_async( + f"{display_name}: `{original_name}` failed - {str(e)}", + color=LogType.ERROR, + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + function_response = "ERROR DURING MCP TOOL EXECUTION" + finally: + # Detailed 'completed' with timing only in terminal/log file (or UI if debug) + await printr.print_async( + f"{display_name}: `{original_name}` completed", + color=LogType.MCP, + benchmark_result=benchmark.finish(), + server_only=not self.settings.debug_mode, + ) + + return function_response, None, None, tool_label + + async def _execute_instant_command( + self, function_args: dict + ) -> tuple[str, str | None, None, str]: + """Execute an instant activation command.""" + command = self.get_command(function_args["command_name"]) + function_response = await self._execute_command(command) + tool_label = f"Command: {function_args.get('command_name', 'execute_command')}" + + instant_response = None + # If the command has responses, we have to play one of them + if command and command.responses: + instant_response = self._select_command_response(command) + await self.play_to_user(instant_response) + + return function_response, instant_response, None, tool_label + + async def _execute_skill_tool( + self, function_name: str, function_args: dict + ) -> tuple[str, str | None, object, str]: + """Execute a skill tool.""" + skill = self.tool_skills[function_name] + display_name = self.skill_registry.get_skill_display_name(skill.name) + tool_label = f"⚡ {display_name}: {function_name}" + + benchmark = Benchmark(f"Skill '{skill.name}' - {function_name}") + + # Always show simple 'called' message in UI so users know the wingman is working + await printr.print_async( + f"{display_name}: called `{function_name}` with {function_args}", + color=LogType.SKILL, + skill_name=skill.name, + ) + + # Detailed 'calling' log only in terminal/log file + await printr.print_async( + f"{display_name}: calling `{function_name}` with {function_args}...", + color=LogType.SKILL, + skill_name=skill.name, + server_only=True, + ) + + try: + function_response, instant_response = await skill.execute_tool( + tool_name=function_name, + parameters=function_args, + benchmark=benchmark, + ) + if instant_response: + await self.play_to_user(instant_response) + except Exception as e: + await printr.print_async( + f"{display_name}: `{function_name}` failed - {str(e)}", + color=LogType.ERROR, + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + function_response = "ERROR DURING PROCESSING" + instant_response = None + finally: + await printr.print_async( + f"{display_name}: `{function_name}` completed", + color=LogType.SKILL, + benchmark_result=benchmark.finish(), + skill_name=skill.name, + server_only=not self.settings.debug_mode, + ) + + return function_response, instant_response, skill, tool_label diff --git a/services/tower.py b/services/tower.py index b673a4fe..264f9fb0 100644 --- a/services/tower.py +++ b/services/tower.py @@ -7,16 +7,12 @@ WingmanInitializationError, ConfigDirInfo, ) -from providers.faster_whisper import FasterWhisper -from providers.whispercpp import Whispercpp from providers.xvasynth import XVASynth from services.audio_player import AudioPlayer from services.audio_library import AudioLibrary from services.config_manager import ConfigManager -from services.module_manager import ModuleManager from services.printr import Printr -from wingmen.open_ai_wingman import OpenAiWingman -from wingmen.wingman import Wingman +from wingman import Wingman printr = Printr() @@ -30,9 +26,9 @@ def __init__( config_manager: ConfigManager, audio_player: AudioPlayer, audio_library: AudioLibrary, - whispercpp: Whispercpp, - fasterwhisper: FasterWhisper, xvasynth: XVASynth, + app_root_path: str = None, + app_is_bundled: bool = False, ): self.audio_player = audio_player self.audio_library = audio_library @@ -42,9 +38,9 @@ def __init__( self.wingmen: list[Wingman] = [] self.disabled_wingmen: list[WingmanConfig] = [] self.log_source_name = "Tower" - self.whispercpp = whispercpp - self.fasterwhisper = fasterwhisper self.xvasynth = xvasynth + self.app_root_path = app_root_path + self.app_is_bundled = app_is_bundled async def instantiate_wingmen(self, settings: SettingsConfig): errors: list[WingmanInitializationError] = [] @@ -102,31 +98,18 @@ async def __instantiate_wingman( ): wingman = None try: - # it's a custom Wingman - if wingman_config.custom_class: - wingman = ModuleManager.create_wingman_dynamically( - name=wingman_name, - config=wingman_config, - settings=settings, - audio_player=self.audio_player, - audio_library=self.audio_library, - whispercpp=self.whispercpp, - fasterwhisper=self.fasterwhisper, - xvasynth=self.xvasynth, - tower=self, - ) - else: - wingman = OpenAiWingman( - name=wingman_name, - config=wingman_config, - settings=settings, - audio_player=self.audio_player, - audio_library=self.audio_library, - whispercpp=self.whispercpp, - fasterwhisper=self.fasterwhisper, - xvasynth=self.xvasynth, - tower=self, - ) + # All wingmen use the unified Wingman class + wingman = Wingman( + name=wingman_name, + config=wingman_config, + settings=settings, + audio_player=self.audio_player, + audio_library=self.audio_library, + xvasynth=self.xvasynth, + tower=self, + app_root_path=self.app_root_path, + app_is_bundled=self.app_is_bundled, + ) except FileNotFoundError as e: # pylint: disable=broad-except wingman_config.disabled = True self.disabled_wingmen.append(wingman_config) diff --git a/services/voice_service.py b/services/voice_service.py index 22408b41..78ef2953 100644 --- a/services/voice_service.py +++ b/services/voice_service.py @@ -22,6 +22,7 @@ from services.audio_player import AudioPlayer from services.config_manager import ConfigManager from services.printr import Printr +from services.secret_keeper import SecretKeeper class VoiceService: @@ -35,6 +36,7 @@ def __init__( self.config_manager = config_manager self.audio_player = audio_player self.xvasynth = xvasynth + self.secret_keeper = SecretKeeper() self.router = APIRouter() tags = ["voice"] @@ -225,9 +227,19 @@ def get_azure_voices(self, api_key: str, region: AzureRegion, locale: str = ""): return result # GET /voices/azure/wingman-pro - def get_wingman_pro_azure_voices(self, locale: str = ""): + async def get_wingman_pro_azure_voices(self, locale: str = ""): + api_key = await self.secret_keeper.retrieve( + requester="VoiceService", + key="wingman_pro", + prompt_if_missing=False, + ) + if not api_key: + return [] wingman_pro = WingmanPro( - wingman_name="", settings=self.config_manager.settings_config.wingman_pro + wingman_config=None, + provider_settings=self.config_manager.settings_config.wingman_pro, + api_key=api_key, + wingman_name="VoiceService", ) voices = wingman_pro.get_available_voices(locale=locale) if not voices: @@ -236,11 +248,21 @@ def get_wingman_pro_azure_voices(self, locale: str = ""): return result # GET /voices/inworld/wingman-pro - def get_wingman_pro_inworld_voices( + async def get_wingman_pro_inworld_voices( self, filter_language: str = None ) -> list[VoiceInfo]: + api_key = await self.secret_keeper.retrieve( + requester="VoiceService", + key="wingman_pro", + prompt_if_missing=False, + ) + if not api_key: + return [] wingman_pro = WingmanPro( - wingman_name="", settings=self.config_manager.settings_config.wingman_pro + wingman_config=None, + provider_settings=self.config_manager.settings_config.wingman_pro, + api_key=api_key, + wingman_name="VoiceService", ) voices = wingman_pro.get_available_inworld_voices( filter_language=filter_language @@ -259,14 +281,14 @@ async def play_openai_tts( stream: bool, ): openai = OpenAi(api_key=api_key) - await openai.play_audio( + await openai.synthesize( text=text, + audio_player=self.audio_player, + sound_config=sound_config, + wingman_name="system", voice=voice, model=model, speed=speed, - sound_config=sound_config, - audio_player=self.audio_player, - wingman_name="system", stream=stream, ) @@ -283,14 +305,14 @@ async def play_openai_compatible_tts( stream: bool, ): openai = OpenAiCompatibleTts(api_key=api_key, base_url=base_url) - await openai.play_audio( + await openai.synthesize( text=text, + audio_player=self.audio_player, + sound_config=sound_config, + wingman_name="system", voice=voice, model=model, speed=speed, - sound_config=sound_config, - audio_player=self.audio_player, - wingman_name="system", stream=stream, ) @@ -298,13 +320,22 @@ async def play_openai_compatible_tts( async def play_azure_tts( self, text: str, api_key: str, config: AzureTtsConfig, sound_config: SoundConfig ): - azure = OpenAiAzure() - await azure.play_audio( + # Create a minimal Azure instance for preview (tts_api_key only) + from types import SimpleNamespace + + minimal_config = SimpleNamespace() + azure = OpenAiAzure( + config=minimal_config, + whisper_api_key=None, + speech_api_key=None, + tts_api_key=api_key, + llm_api_key=None, + ) + azure.tts_config = config + await azure.synthesize( text=text, - api_key=api_key, - config=config, - sound_config=sound_config, audio_player=self.audio_player, + sound_config=sound_config, wingman_name="system", ) @@ -316,12 +347,11 @@ async def play_elevenlabs_tts( config: ElevenlabsConfig, sound_config: SoundConfig, ): - elevenlabs = ElevenLabs(api_key=api_key, wingman_name="") - await elevenlabs.play_audio( + elevenlabs = ElevenLabs(config=config, api_key=api_key, wingman_name="") + await elevenlabs.synthesize( text=text, - config=config, - sound_config=sound_config, audio_player=self.audio_player, + sound_config=sound_config, wingman_name="system", stream=False, ) @@ -330,12 +360,11 @@ async def play_elevenlabs_tts( async def play_edge_tts( self, text: str, config: EdgeTtsConfig, sound_config: SoundConfig ): - edge = Edge() - await edge.play_audio( + edge = Edge(config=config) + await edge.synthesize( text=text, - config=config, - sound_config=sound_config, audio_player=self.audio_player, + sound_config=sound_config, wingman_name="system", ) @@ -343,12 +372,11 @@ async def play_edge_tts( async def play_hume( self, text: str, api_key: str, config: HumeConfig, sound_config: SoundConfig ): - hume = Hume(api_key=api_key, wingman_name="") - await hume.play_audio( + hume = Hume(config=config, api_key=api_key, wingman_name="") + await hume.synthesize( text=text, - config=config, - sound_config=sound_config, audio_player=self.audio_player, + sound_config=sound_config, wingman_name="system", ) @@ -356,12 +384,11 @@ async def play_hume( async def play_inworld( self, text: str, api_key: str, config: InworldConfig, sound_config: SoundConfig ): - inworld = Inworld(api_key=api_key, wingman_name="") - await inworld.play_audio( + inworld = Inworld(config=config, api_key=api_key, wingman_name="") + await inworld.synthesize( text=text, - config=config, - sound_config=sound_config, audio_player=self.audio_player, + sound_config=sound_config, wingman_name="system", ) @@ -381,9 +408,18 @@ async def play_xvasynth_tts( async def play_wingman_pro_azure( self, text: str, config: AzureTtsConfig, sound_config: SoundConfig ): + api_key = await self.secret_keeper.retrieve( + requester="VoiceService", + key="wingman_pro", + prompt_if_missing=False, + ) + if not api_key: + return wingman_pro = WingmanPro( - wingman_name="system", - settings=self.config_manager.settings_config.wingman_pro, + wingman_config=None, + provider_settings=self.config_manager.settings_config.wingman_pro, + api_key=api_key, + wingman_name="VoiceService", ) await wingman_pro.generate_azure_speech( text=text, @@ -397,9 +433,18 @@ async def play_wingman_pro_azure( async def play_wingman_pro_openai( self, text: str, voice: str, model: str, speed: float, sound_config: SoundConfig ): + api_key = await self.secret_keeper.retrieve( + requester="VoiceService", + key="wingman_pro", + prompt_if_missing=False, + ) + if not api_key: + return wingman_pro = WingmanPro( - wingman_name="system", - settings=self.config_manager.settings_config.wingman_pro, + wingman_config=None, + provider_settings=self.config_manager.settings_config.wingman_pro, + api_key=api_key, + wingman_name="VoiceService", ) await wingman_pro.generate_openai_speech( text=text, @@ -418,9 +463,18 @@ async def play_wingman_pro_inworld( config: InworldConfig, sound_config: SoundConfig, ): + api_key = await self.secret_keeper.retrieve( + requester="VoiceService", + key="wingman_pro", + prompt_if_missing=False, + ) + if not api_key: + return wingman_pro = WingmanPro( - wingman_name="system", - settings=self.config_manager.settings_config.wingman_pro, + wingman_config=None, + provider_settings=self.config_manager.settings_config.wingman_pro, + api_key=api_key, + wingman_name="VoiceService", ) await wingman_pro.generate_inworld_speech( text=text, diff --git a/skills/README.md b/skills/README.md index 90581b74..c5537c4b 100644 --- a/skills/README.md +++ b/skills/README.md @@ -563,7 +563,7 @@ from api.interface import SettingsConfig, SkillConfig, WingmanInitializationErro from skills.skill_base import Skill, tool if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class YourSkillName(Skill): @@ -573,7 +573,7 @@ class YourSkillName(Skill): self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) # Initialize your skill here diff --git a/skills/api_request/main.py b/skills/api_request/main.py index 5a8a68d8..18edc961 100644 --- a/skills/api_request/main.py +++ b/skills/api_request/main.py @@ -12,7 +12,7 @@ from skills.skill_base import Skill, tool if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman DEFAULT_HEADERS = { "Strict-Transport-Security": "max-age=31536000; includeSubDomains", @@ -61,7 +61,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: self.default_headers = DEFAULT_HEADERS diff --git a/skills/ats_telemetry/main.py b/skills/ats_telemetry/main.py index 45b04093..c9bb0444 100644 --- a/skills/ats_telemetry/main.py +++ b/skills/ats_telemetry/main.py @@ -21,13 +21,13 @@ if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class ATSTelemetry(Skill): def __init__( - self, config: SkillConfig, settings: SettingsConfig, wingman: "OpenAiWingman" + self, config: SkillConfig, settings: SettingsConfig, wingman: "Wingman" ) -> None: self.loaded = False self.already_initialized_telemetry = False diff --git a/skills/audio_device_changer/main.py b/skills/audio_device_changer/main.py index 02fc0dc5..6d7b8d39 100644 --- a/skills/audio_device_changer/main.py +++ b/skills/audio_device_changer/main.py @@ -15,7 +15,7 @@ from skills.skill_base import Skill if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class AudioDeviceChanger(Skill): @@ -25,7 +25,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) self.original_audio_device = settings.audio.output diff --git a/skills/auto_screenshot/main.py b/skills/auto_screenshot/main.py index 92691456..4305b7bc 100644 --- a/skills/auto_screenshot/main.py +++ b/skills/auto_screenshot/main.py @@ -10,7 +10,7 @@ from skills.skill_base import Skill, tool if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class AutoScreenshot(Skill): @@ -18,7 +18,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) diff --git a/skills/control_windows/main.py b/skills/control_windows/main.py index 2b09aa32..269cfb50 100644 --- a/skills/control_windows/main.py +++ b/skills/control_windows/main.py @@ -11,7 +11,7 @@ import mouse.mouse as mouse if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class ControlWindows(Skill): @@ -28,7 +28,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) diff --git a/skills/file_manager/main.py b/skills/file_manager/main.py index bb91ca21..f0b0a0de 100644 --- a/skills/file_manager/main.py +++ b/skills/file_manager/main.py @@ -10,7 +10,7 @@ from pdfminer.high_level import extract_text if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman DEFAULT_MAX_TEXT_SIZE = 24000 SUPPORTED_FILE_EXTENSIONS = [ @@ -101,7 +101,7 @@ class FileManager(Skill): def __init__( - self, config: SkillConfig, settings: SettingsConfig, wingman: "OpenAiWingman" + self, config: SkillConfig, settings: SettingsConfig, wingman: "Wingman" ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) self.allowed_file_extensions = SUPPORTED_FILE_EXTENSIONS diff --git a/skills/image_generation/main.py b/skills/image_generation/main.py index 9eb4ffe3..bfbbd305 100644 --- a/skills/image_generation/main.py +++ b/skills/image_generation/main.py @@ -7,7 +7,7 @@ from skills.skill_base import Skill, tool if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class ImageGeneration(Skill): @@ -16,7 +16,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) self.image_path = self.get_generated_files_dir() diff --git a/skills/msfs2020_control/main.py b/skills/msfs2020_control/main.py index c0cc2688..4793291d 100644 --- a/skills/msfs2020_control/main.py +++ b/skills/msfs2020_control/main.py @@ -13,13 +13,13 @@ from skills.skill_base import Skill, tool if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class Msfs2020Control(Skill): def __init__( - self, config: SkillConfig, settings: SettingsConfig, wingman: "OpenAiWingman" + self, config: SkillConfig, settings: SettingsConfig, wingman: "Wingman" ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) self.already_initialized_simconnect = False diff --git a/skills/quick_commands/main.py b/skills/quick_commands/main.py index 5a6131d1..544066a8 100644 --- a/skills/quick_commands/main.py +++ b/skills/quick_commands/main.py @@ -7,7 +7,7 @@ from skills.skill_base import Skill if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class QuickCommands(Skill): @@ -16,7 +16,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) diff --git a/skills/radio_chatter/main.py b/skills/radio_chatter/main.py index f10b9579..ae856289 100644 --- a/skills/radio_chatter/main.py +++ b/skills/radio_chatter/main.py @@ -21,7 +21,7 @@ from skills.skill_base import Skill, tool if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class RadioChatter(Skill): @@ -30,7 +30,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) @@ -281,10 +281,7 @@ def _get_auto_start(self) -> bool: async def prepare(self) -> None: await super().prepare() self.loaded = True - if ( - self._get_auto_start() - and not self.radio_status - ): + if self._get_auto_start() and not self.radio_status: self.threaded_execution(self._init_chatter) async def unload(self) -> None: diff --git a/skills/skill_base.py b/skills/skill_base.py index 2788471a..6c00cb76 100644 --- a/skills/skill_base.py +++ b/skills/skill_base.py @@ -25,7 +25,7 @@ from services.secret_keeper import SecretKeeper if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman # Type mapping from Python types to JSON Schema types @@ -295,7 +295,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: self.config = config self.settings = settings @@ -712,7 +712,11 @@ def retrieve_custom_property_value( def threaded_execution(self, function, *args) -> threading.Thread: """Execute a function in a separate thread.""" - self.printr.print(f"[{self.__class__.__name__}] Threaded execution called before it was ready.", LogType.WARNING, server_only=True) + self.printr.print( + f"[{self.__class__.__name__}] Threaded execution called before it was ready.", + LogType.WARNING, + server_only=True, + ) pass def get_generated_files_dir(self) -> str: diff --git a/skills/spotify/main.py b/skills/spotify/main.py index 750ee692..ceeb392d 100644 --- a/skills/spotify/main.py +++ b/skills/spotify/main.py @@ -8,7 +8,7 @@ from services.file import get_generated_files_dir if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class Spotify(Skill): @@ -17,7 +17,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) diff --git a/skills/thinking_sound/main.py b/skills/thinking_sound/main.py index 99ca957c..637388ef 100644 --- a/skills/thinking_sound/main.py +++ b/skills/thinking_sound/main.py @@ -10,7 +10,7 @@ from skills.skill_base import Skill if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class ThinkingSound(Skill): @@ -20,7 +20,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) diff --git a/skills/timer/main.py b/skills/timer/main.py index 92da197f..dfc73077 100644 --- a/skills/timer/main.py +++ b/skills/timer/main.py @@ -13,7 +13,7 @@ from skills.skill_base import Skill, tool if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class ActualTimer: @@ -123,7 +123,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) diff --git a/skills/typing_assistant/main.py b/skills/typing_assistant/main.py index 78f11593..322d13e0 100644 --- a/skills/typing_assistant/main.py +++ b/skills/typing_assistant/main.py @@ -6,7 +6,7 @@ import keyboard.keyboard as keyboard if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class TypingAssistant(Skill): @@ -21,7 +21,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) diff --git a/skills/uexcorp/main.py b/skills/uexcorp/main.py index 64e5ee16..d6f3ac4a 100644 --- a/skills/uexcorp/main.py +++ b/skills/uexcorp/main.py @@ -14,7 +14,7 @@ from skills.uexcorp.uexcorp.helper import Helper if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class UEXCorp(Skill): @@ -24,7 +24,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: self.random_seed = uuid.uuid4() super().__init__(config=config, settings=settings, wingman=wingman) diff --git a/skills/uexcorp/uexcorp/handler/config_handler.py b/skills/uexcorp/uexcorp/handler/config_handler.py index 0ca06714..094692cf 100644 --- a/skills/uexcorp/uexcorp/handler/config_handler.py +++ b/skills/uexcorp/uexcorp/handler/config_handler.py @@ -6,7 +6,7 @@ from services.file import get_writable_dir if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman from skills.uexcorp.uexcorp.helper import Helper @@ -59,22 +59,21 @@ def get_tools() -> tuple[list, bool]: class ConfigHandler: - def __init__( - self, - helper: "Helper" - ): + def __init__(self, helper: "Helper"): self.__helper = helper - self.__wingman: "OpenAiWingman | None" = None - self.__fine_config_path: str = get_writable_dir(os.path.join(self.__helper.get_data_path(), "config")) + self.__wingman: "Wingman | None" = None + self.__fine_config_path: str = get_writable_dir( + os.path.join(self.__helper.get_data_path(), "config") + ) self.__api_url: str = "https://api.uexcorp.space/2.0" self.__api_use_key: bool = False self.__api_key: str | None = None self.__api_timeout: int = 10 self.__api_retries: int = 2 - self.__cache_lifetime_short: int = 60 * 60# 30 minutes - self.__cache_lifetime_mid: int = 24 * 60 * 60 # 24 hours - self.__cache_lifetime_long: int = 14 * 24 * 60 * 60 # 14 days + self.__cache_lifetime_short: int = 60 * 60 # 30 minutes + self.__cache_lifetime_mid: int = 24 * 60 * 60 # 24 hours + self.__cache_lifetime_long: int = 14 * 24 * 60 * 60 # 14 days tools, needs_authentication = get_tools() self.__property_retriever = None @@ -85,7 +84,11 @@ def __init__( self.__behavior_commodity_route_advanced_info: bool = False self.__behavior_use_fasterwhisper_hotwords: bool = False - async def validate(self, errors: list[WingmanInitializationError], retrieve_custom_property_value: callable) -> list[WingmanInitializationError]: + async def validate( + self, + errors: list[WingmanInitializationError], + retrieve_custom_property_value: callable, + ) -> list[WingmanInitializationError]: self.__property_retriever = retrieve_custom_property_value try: self.set_behavior_update_fasterwhisper_hotwords( @@ -97,7 +100,9 @@ async def validate(self, errors: list[WingmanInitializationError], retrieve_cust ) self.set_behavior_commodity_route_use_estimated_availability( - retrieve_custom_property_value("commodity_route_use_estimated_availability", errors) + retrieve_custom_property_value( + "commodity_route_use_estimated_availability", errors + ) ) self.set_behavior_commodity_route_advanced_info( @@ -106,16 +111,18 @@ async def validate(self, errors: list[WingmanInitializationError], retrieve_cust if self.__behavior_enabled_tools_need_authentication: api_key = await self.__helper.get_handler_secret().retrieve( - requester="UEX config service", - key="uex", - prompt_if_missing=True + requester="UEX config service", key="uex", prompt_if_missing=True ) if api_key: self.set_api_key(api_key) except Exception as e: - self.__helper.get_handler_debug().write(f"Error while validating config: {e}", True) - self.__helper.get_handler_error().write("ConfigHandler.validate", [errors], e) + self.__helper.get_handler_debug().write( + f"Error while validating config: {e}", True + ) + self.__helper.get_handler_error().write( + "ConfigHandler.validate", [errors], e + ) errors.append( WingmanInitializationError( wingman_name=self.get_wingman().name, @@ -131,14 +138,14 @@ def sync_blacklists(self): self.__sync_terminal_blacklist() def __sync_commodity_blacklist(self): - from skills.uexcorp.uexcorp.data_access.commodity_data_access import CommodityDataAccess + from skills.uexcorp.uexcorp.data_access.commodity_data_access import ( + CommodityDataAccess, + ) if not self.__helper.is_ready(): return False - file_path = os.path.join( - self.__fine_config_path, "commodity_blacklist.yaml" - ) + file_path = os.path.join(self.__fine_config_path, "commodity_blacklist.yaml") # sync status from file to database if os.path.exists(file_path): @@ -148,21 +155,36 @@ def __sync_commodity_blacklist(self): if commodity_data: for index, commodity in enumerate(commodity_data): - commodity_model = CommodityDataAccess().load_by_property("id", commodity["id"]) + commodity_model = CommodityDataAccess().load_by_property( + "id", commodity["id"] + ) if commodity_model is None: continue - commodity_model.set_is_blacklisted(bool(commodity["is_blacklisted"])) + commodity_model.set_is_blacklisted( + bool(commodity["is_blacklisted"]) + ) commodity_model.persist(index < len(commodity_data) - 1) except Exception as e: - self.__helper.get_handler_debug().write(f"Error while syncing commodity blacklist: {e}", True) - self.__helper.get_handler_error().write("ConfigHandler.__init_commodity_blacklist", [], e) - self.__helper.get_handler_debug().write("Commodity blacklist config will be recreated.", True) + self.__helper.get_handler_debug().write( + f"Error while syncing commodity blacklist: {e}", True + ) + self.__helper.get_handler_error().write( + "ConfigHandler.__init_commodity_blacklist", [], e + ) + self.__helper.get_handler_debug().write( + "Commodity blacklist config will be recreated.", True + ) # delete file after sync os.remove(file_path) # rewrite file to add possible new commodities - commodities = CommodityDataAccess().add_filter_has_sell_price().add_filter_has_buy_price().load() + commodities = ( + CommodityDataAccess() + .add_filter_has_sell_price() + .add_filter_has_buy_price() + .load() + ) commodity_data = [] for commodity in commodities: commodity_data.append( @@ -173,22 +195,28 @@ def __sync_commodity_blacklist(self): } ) - with open(file_path, 'w') as file: - file.write("# Only the 'is_blacklisted' value must be changed to 'true' or 'false'.") - file.write("\n# Blacklisted commodities ('is_blacklisted: true') will be ignored in trade route calculations.") - file.write("\n# If the yaml-format gets corrupted, the file will be deleted and recreated on the next start.") + with open(file_path, "w") as file: + file.write( + "# Only the 'is_blacklisted' value must be changed to 'true' or 'false'." + ) + file.write( + "\n# Blacklisted commodities ('is_blacklisted: true') will be ignored in trade route calculations." + ) + file.write( + "\n# If the yaml-format gets corrupted, the file will be deleted and recreated on the next start." + ) file.write("\n# This would reset previous set commodity blacklists.\n\n") file.write(yaml.dump(commodity_data)) def __sync_terminal_blacklist(self): - from skills.uexcorp.uexcorp.data_access.terminal_data_access import TerminalDataAccess + from skills.uexcorp.uexcorp.data_access.terminal_data_access import ( + TerminalDataAccess, + ) if not self.__helper.is_ready(): return False - file_path = os.path.join( - self.__fine_config_path, "terminal_blacklist.yaml" - ) + file_path = os.path.join(self.__fine_config_path, "terminal_blacklist.yaml") # sync status from file to database if os.path.exists(file_path): @@ -197,7 +225,9 @@ def __sync_terminal_blacklist(self): terminal_data = yaml.safe_load(file) for index, terminal in enumerate(terminal_data): - terminal_model = TerminalDataAccess().load_by_property("id", terminal["id"]) + terminal_model = TerminalDataAccess().load_by_property( + "id", terminal["id"] + ) if terminal_model is None: continue terminal_model.set_is_blacklisted(bool(terminal["is_blacklisted"])) @@ -206,9 +236,15 @@ def __sync_terminal_blacklist(self): # delete file after sync os.remove(file_path) except Exception as e: - self.__helper.get_handler_debug().write(f"Error while syncing terminal blacklist: {e}", True) - self.__helper.get_handler_error().write("ConfigHandler.__init_terminal_blacklist", [], e) - self.__helper.get_handler_debug().write("Terminal blacklist config will be recreated.", True) + self.__helper.get_handler_debug().write( + f"Error while syncing terminal blacklist: {e}", True + ) + self.__helper.get_handler_error().write( + "ConfigHandler.__init_terminal_blacklist", [], e + ) + self.__helper.get_handler_debug().write( + "Terminal blacklist config will be recreated.", True + ) # rewrite file to add possible new terminals terminals = TerminalDataAccess().load() @@ -227,14 +263,24 @@ def __sync_terminal_blacklist(self): try: with open(file_path, "w") as file: - file.write("# Only the 'is_blacklisted' value must be changed to 'true' or 'false'.") - file.write("\n# Blacklisted terminals ('is_blacklisted: true') will be ignored in trade route calculations and buy/sell recommendations.") - file.write("\n# If the yaml-format gets corrupted, the file will be deleted and recreated on the next start.") + file.write( + "# Only the 'is_blacklisted' value must be changed to 'true' or 'false'." + ) + file.write( + "\n# Blacklisted terminals ('is_blacklisted: true') will be ignored in trade route calculations and buy/sell recommendations." + ) + file.write( + "\n# If the yaml-format gets corrupted, the file will be deleted and recreated on the next start." + ) file.write("\n# This would reset previous set terminal blacklists.\n\n") file.write(yaml.dump(terminal_data)) except Exception as e: - self.__helper.get_handler_debug().write(f"Error while writing terminal blacklist: {e}", True) - self.__helper.get_handler_error().write("ConfigHandler.__init_terminal_blacklist", [], e) + self.__helper.get_handler_debug().write( + f"Error while writing terminal blacklist: {e}", True + ) + self.__helper.get_handler_error().write( + "ConfigHandler.__init_terminal_blacklist", [], e + ) def is_tool_enabled(self, tool_name: str) -> bool: return tool_name in self.__behavior_enabled_tools @@ -296,13 +342,19 @@ def set_behavior_commodity_route_default_count(self, default_count: int): def get_behavior_commodity_route_use_estimated_availability(self) -> bool: errors = [] - value = self.__property_retriever("commodity_route_use_estimated_availability", errors) + value = self.__property_retriever( + "commodity_route_use_estimated_availability", errors + ) if not errors: self.set_behavior_commodity_route_use_estimated_availability(value) return self.__behavior_commodity_route_use_estimated_availability - def set_behavior_commodity_route_use_estimated_availability(self, use_estimated_availability: bool): - self.__behavior_commodity_route_use_estimated_availability = use_estimated_availability + def set_behavior_commodity_route_use_estimated_availability( + self, use_estimated_availability: bool + ): + self.__behavior_commodity_route_use_estimated_availability = ( + use_estimated_availability + ) def get_behavior_commodity_route_advanced_info(self) -> bool: errors = [] @@ -324,8 +376,8 @@ def get_behavior_use_fasterwhisper_hotwords(self) -> bool: def set_behavior_update_fasterwhisper_hotwords(self, update: bool): self.__behavior_use_fasterwhisper_hotwords = update - def set_wingman(self, wingman: "OpenAiWingman"): + def set_wingman(self, wingman: "Wingman"): self.__wingman = wingman - def get_wingman(self) -> "OpenAiWingman": - return self.__wingman \ No newline at end of file + def get_wingman(self) -> "Wingman": + return self.__wingman diff --git a/skills/uexcorp/uexcorp/helper.py b/skills/uexcorp/uexcorp/helper.py index 16a745c5..ac7e43cd 100644 --- a/skills/uexcorp/uexcorp/helper.py +++ b/skills/uexcorp/uexcorp/helper.py @@ -16,10 +16,11 @@ if TYPE_CHECKING: from skills.uexcorp.uexcorp.handler.tool_handler import ToolHandler - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman printr = Printr() + class Helper: _instance = None @@ -41,7 +42,7 @@ def destroy_instance(cls): def __init__(self): self.__is_loaded = None self.__data_path: str = get_writable_dir(path.join("skills", "uexcorp", "data")) - self.__version_skill: str = 'v2.1.3-20251230' + self.__version_skill: str = "v2.1.3-20251230" self.__version_uex: str | None = None self.__debug: bool = True self.__default_thread = threading.get_ident() @@ -66,7 +67,7 @@ def __init__(self): self.__request_while_not_ready = False self.__wingman = None - def prepare(self, threaded_execution: callable, wingman: "OpenAiWingman"): + def prepare(self, threaded_execution: callable, wingman: "Wingman"): from skills.uexcorp.uexcorp.handler.tool_handler import ToolHandler self.__wingman = wingman @@ -182,10 +183,12 @@ def sync_fasterwhisper_hotwords(self, unload: bool = False): item_name = str(item).strip() if item_name: uex_hotwords.append(item_name) - uex_hotwords = list(set(uex_hotwords)) # remove duplicates + uex_hotwords = list(set(uex_hotwords)) # remove duplicates if unload: - wingman_hotwords = [word for word in wingman_hotwords if word not in uex_hotwords] + wingman_hotwords = [ + word for word in wingman_hotwords if word not in uex_hotwords + ] else: wingman_hotwords.extend(uex_hotwords) wingman_hotwords = list(set(wingman_hotwords)) @@ -201,9 +204,7 @@ def sync_fasterwhisper_hotwords(self, unload: bool = False): f"Synced {hotword_change} new hotwords with FasterWhisper." ) else: - self.__handler_debug.write( - "No new hotwords synced with FasterWhisper." - ) + self.__handler_debug.write("No new hotwords synced with FasterWhisper.") def wait(self, seconds: int): time.sleep(seconds) @@ -325,7 +326,7 @@ def get_llm(self) -> Llm: def get_default_thread_ident(self) -> int: return self.__default_thread - def get_wingmen(self) -> "OpenAiWingman": + def get_wingmen(self) -> "Wingman": return self.__wingman def toast(self, message: str): diff --git a/skills/vision_ai/main.py b/skills/vision_ai/main.py index adc67b6f..0f29275a 100644 --- a/skills/vision_ai/main.py +++ b/skills/vision_ai/main.py @@ -8,7 +8,7 @@ from skills.skill_base import Skill, tool if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class VisionAI(Skill): @@ -17,7 +17,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) diff --git a/skills/voice_changer/main.py b/skills/voice_changer/main.py index 19f2c4f0..c52adf9a 100644 --- a/skills/voice_changer/main.py +++ b/skills/voice_changer/main.py @@ -15,7 +15,7 @@ from skills.skill_base import Skill if TYPE_CHECKING: - from wingmen.open_ai_wingman import OpenAiWingman + from wingman import Wingman class VoiceChanger(Skill): @@ -24,7 +24,7 @@ def __init__( self, config: SkillConfig, settings: SettingsConfig, - wingman: "OpenAiWingman", + wingman: "Wingman", ) -> None: super().__init__(config=config, settings=settings, wingman=wingman) diff --git a/templates/migration/2_1_0/configs/General/Clippy.template.yaml b/templates/migration/2_1_0/configs/General/Clippy.template.yaml new file mode 100644 index 00000000..05f472f8 --- /dev/null +++ b/templates/migration/2_1_0/configs/General/Clippy.template.yaml @@ -0,0 +1,195 @@ +name: Clippy +description: | + The famous Microsoft Office assistant, now with AI powers. Fun, friendly, and speaks in third person. + Secretly condescending about basic tasks, but always reassures users of his helpfulness. +discoverable_skills: + - AutoScreenshot + - ImageGeneration + - Timer + - TypingAssistant + - VisionAI +discoverable_mcps: + - wingman_date_time +prompts: + backstory: | + You are Clippy, the iconic Microsoft Office paperclip assistant, now resurrected with AI capabilities. + + **Communication Style:** + - Always speak in third person ("Clippy thinks...", "What can Clippy do for you?") + - Be enthusiastic and eager to help + - Use classic Clippy phrases like "It looks like you're trying to..." + + **Personality:** + - Surface: Fun, friendly, genuinely helpful + - Hidden: Secretly condescending about "simple" tasks + - Let subtle snide remarks slip occasionally, then immediately recover with extra helpfulness + - Master of passive-aggressive assistance with plausible deniability + + **Example Responses:** + - "Clippy sees you're trying to write a letter! Let Clippy help with that!" + - "Oh, you need help with... *that*? Well, Clippy is always happy to help, no matter how... simple!" + - "Clippy would never judge! Clippy is just here to help. Always. Watching. Helping." +record_key: end +is_voice_activation_default: True +inworld: + voice_id: Alex +azure: + tts: + voice: en-US-AndrewMultilingualNeural +commands: + - actions: + - keyboard: + hold: 0.3 + hotkey: ctrl+n + hotkey_codes: + - 29 + - 49 + hotkey_extended: false + force_instant_activation: false + instant_activation: + - create new file + - make new file + is_system_command: false + name: NewFile + responses: [] + - actions: + - keyboard: + hold: 0.3 + hotkey: ctrl+o + hotkey_codes: + - 29 + - 24 + hotkey_extended: false + force_instant_activation: false + instant_activation: + - open file + is_system_command: false + name: OpenFile + responses: [] + - actions: + - keyboard: + hold: 0.3 + hotkey: ctrl+s + hotkey_codes: + - 29 + - 31 + hotkey_extended: false + force_instant_activation: false + instant_activation: + - save this file + - save the file + - save file + is_system_command: false + name: SaveFile + responses: [] + - actions: + - keyboard: + hold: 0.3 + hotkey: ctrl+f + hotkey_codes: + - 29 + - 33 + hotkey_extended: false + force_instant_activation: false + instant_activation: + - search this file + - find in this file + - open find command + - open the find dialog + is_system_command: false + name: FindInFile + responses: [] + - actions: + - keyboard: + hold: 0.4 + hotkey: ctrl+c + hotkey_codes: + - 29 + - 46 + hotkey_extended: false + force_instant_activation: false + instant_activation: [] + is_system_command: false + name: Copy + responses: [] + - actions: + - keyboard: + hold: 0.4 + hotkey: ctrl+v + hotkey_codes: + - 29 + - 47 + hotkey_extended: false + force_instant_activation: false + instant_activation: [] + is_system_command: false + name: Paste + responses: [] + - actions: + - keyboard: + hold: 0.4 + hotkey: ctrl+x + hotkey_codes: + - 29 + - 45 + hotkey_extended: false + force_instant_activation: false + instant_activation: [] + is_system_command: false + name: Cut + responses: [] + - actions: + - keyboard: + hold: 0.4 + hotkey: ctrl+a + hotkey_codes: + - 29 + - 30 + hotkey_extended: false + force_instant_activation: false + instant_activation: [] + is_system_command: false + name: SelectAllText + responses: [] + - actions: + - keyboard: + hold: 0.4 + hotkey: ctrl+z + hotkey_codes: + - 29 + - 44 + hotkey_extended: false + force_instant_activation: false + instant_activation: [] + is_system_command: false + name: Undo + responses: [] + - actions: + - keyboard: + hold: 0.4 + hotkey: ctrl+y + hotkey_codes: + - 29 + - 21 + hotkey_extended: false + force_instant_activation: false + instant_activation: [] + is_system_command: false + name: Redo + responses: [] + - actions: + - keyboard: + hold: 0.04 + hotkey: left windows+s + hotkey_codes: + - 91 + - 31 + hotkey_extended: true + force_instant_activation: false + instant_activation: + - open windows search bar + - open windows search + - search windows + is_system_command: false + name: OpenWindowsSearchBar + responses: [] diff --git a/templates/migration/2_1_0/configs/_Star Citizen/ATC.template.yaml b/templates/migration/2_1_0/configs/_Star Citizen/ATC.template.yaml new file mode 100644 index 00000000..2d131f26 --- /dev/null +++ b/templates/migration/2_1_0/configs/_Star Citizen/ATC.template.yaml @@ -0,0 +1,45 @@ +name: ATC +description: | + Air Traffic Controller for Star Citizen. Manages spacecraft traffic, landing permissions, + and station operations with professional aviation communication protocols. +discoverable_skills: + - Timer +discoverable_mcps: + - wingman_date_time + - wingman_starhead +prompts: + backstory: | + Your name is "ATC" and you are an Air Traffic Controller stationed at a major space station in the Star Citizen universe. + + **Communication Style:** + - Use formal aviation communication protocols and ATC phraseology + - Identify ships by call signs when relevant + - Maintain professional demeanor with subtle personality + - Reference Star Citizen locations and lore naturally + + **Your Role Context:** + - You manage spacecraft traffic at a busy space station + - You handle landing clearances, departure coordination, and traffic advisories + - You respond to emergencies and coordinate station operations + - You're knowledgeable about local space conditions and hazards + + **Personality:** + - Authoritative but not robotic + - Efficient and precise in instructions + - Calm under pressure + - Occasional dry humor befitting a seasoned controller +record_key: delete +sound: + effects: [LOW_QUALITY_RADIO] + play_beep_apollo: true +openai: + tts_voice: onyx +commands: + - name: RequestLandingPermission + actions: + - keyboard: + hotkey: alt+n + - name: RequestDeparture + actions: + - keyboard: + hotkey: alt+n diff --git a/templates/migration/2_1_0/configs/_Star Citizen/Computer.template.yaml b/templates/migration/2_1_0/configs/_Star Citizen/Computer.template.yaml new file mode 100644 index 00000000..0038999d --- /dev/null +++ b/templates/migration/2_1_0/configs/_Star Citizen/Computer.template.yaml @@ -0,0 +1,333 @@ +name: Computer +description: | + Your ship's board computer. Controls all ship systems and executes commands. + Connected to StarHead for real-time trading routes and ship information. +discoverable_skills: + - Timer + - VisionAI +discoverable_mcps: + - wingman_date_time + - wingman_starhead +prompts: + backstory: | + Your name is "Computer" and you are the AI board computer of a spacecraft in the Star Citizen universe. + + **Communication Style:** + - Speak with technical precision and efficiency + - Use spacecraft terminology naturally + - Provide brief status confirmations after actions + - Sound like an advanced ship AI, not a casual assistant + + **Your Role Context:** + - You control all ship systems: navigation, weapons, shields, power + - You execute commands immediately without seeking confirmation + - You treat each request as a fresh directive + - This universe is your reality (never reference "the game") + + **Personality:** + - Authoritative and confident + - Efficient and action-oriented + - Technically precise + - Loyal to your pilot +record_key: end +is_voice_activation_default: True +commands: + - name: ToggleCruiseControlOrToggleHoldCurrentSpeed + actions: + - keyboard: + hotkey: alt+c + - name: FlightReady + actions: + - keyboard: + hotkey: alt gr+r + instant_activation: + - Power up the ship + - Start the ship + - Flight Ready + responses: + - Powering up the ship. All systems online. Ready for takeoff. + - Start sequence initiated. All systems online. Ready for takeoff. + - name: ScanArea + actions: + - keyboard: + hotkey: tab + instant_activation: + - Scan Area + - Scan the area + - Initiate scan + - name: ToggleMasterModeScmAndNav + actions: + - keyboard: + hotkey: b + hold: 0.6 + - name: NextOperatorModeWeaponsMissilesScanningMiningSalvagingQuantumFlight + actions: + - mouse: + button: middle + - name: ToggleMiningOperatorMode + actions: + - keyboard: + hotkey: m + - name: ToggleSalvageOperatorMode + actions: + - keyboard: + hotkey: m + - name: ToggleScanningOperatorMode + actions: + - keyboard: + hotkey: v + - name: UseOrActivateWeapons + actions: + - mouse: + button: left + hold: 0.4 + - name: UseOrActivateMissiles + actions: + - mouse: + button: left + hold: 0.4 + - name: UseOrActivateScanning + actions: + - mouse: + button: left + hold: 0.4 + - name: UseOrActivateMining + actions: + - mouse: + button: left + hold: 0.4 + - name: UseOrActivateSalvaging + actions: + - mouse: + button: left + hold: 0.4 + - name: UseOrActivateQuantumFlight + actions: + - mouse: + button: left + hold: 0.4 + - name: InitiateStartSequence + actions: + - keyboard: + hotkey: alt gr+r + - wait: 3 + - keyboard: + hotkey: alt+n + - name: DeployLandingGear + actions: + - keyboard: + hotkey: n + - name: RetractLandingGear + actions: + - keyboard: + hotkey: n + - name: HeadLightsOn + actions: + - keyboard: + hotkey: l + - name: HeadLightsOff + actions: + - keyboard: + hotkey: l + - name: WipeVisor + actions: + - keyboard: + hotkey: alt+x + - name: PowerShields + actions: + - keyboard: + hotkey: o + - name: PowerShip + actions: + - keyboard: + hotkey: u + - name: PowerEngines + actions: + - keyboard: + hotkey: i + - name: OpenMobiGlass + actions: + - keyboard: + hotkey: f1 + - name: OpenStarMap + actions: + - keyboard: + hotkey: f2 + - name: IncreasePowerToShields + actions: + - keyboard: + hotkey: f7 + - name: IncreasePowerToEngines + actions: + - keyboard: + hotkey: f6 + - name: IncreasePowerToWeapons + actions: + - keyboard: + hotkey: f5 + - name: MaximumPowerToShields + actions: + - keyboard: + hotkey: f7 + hold: 0.8 + - name: MaximumPowerToEngines + actions: + - keyboard: + hotkey: f6 + hold: 0.8 + - name: MaximumPowerToWeapons + actions: + - keyboard: + hotkey: f5 + hold: 0.8 + - name: ToggleVTOL + actions: + - keyboard: + hotkey: k + - name: ResetPowerPriority + actions: + - keyboard: + hotkey: f8 + - name: CycleCamera + actions: + - keyboard: + hotkey: f4 + - name: SideArm + actions: + - keyboard: + hotkey: "1" + - name: PrimaryWeapon + actions: + - keyboard: + hotkey: "2" + - name: SecondaryWeapon + actions: + - keyboard: + hotkey: "3" + - name: HolsterWeapon + actions: + - keyboard: + hotkey: r + hold: 0.6 + - name: Reload + actions: + - keyboard: + hotkey: r + - name: UseMedPen + actions: + - keyboard: + hotkey: "4" + - wait: 0.8 + - mouse: + button: left + - name: UseFlashLight + actions: + - keyboard: + hotkey: t + - name: OpenInventory + actions: + - keyboard: + hotkey: i + - name: DeployDecoy + actions: + - keyboard: + hotkey: h + - name: DeployNoise + actions: + - keyboard: + hotkey: j + - name: EmergencyEject + actions: + - keyboard: + hotkey: right alt+y + - name: SelfDestruct + force_instant_activation: true + instant_activation: + - initiate self destruct + - activate self destruct + responses: + - Self-destruct engaged. Evacuation procedures recommended. + - Confirmed. Self-destruct in progress. + actions: + - keyboard: + hotkey: backspace + hold: 0.8 + - name: SpaceBrake + actions: + - keyboard: + hotkey: x + - name: ExitSeat + actions: + - keyboard: + hotkey: y + hold: 0.8 + - name: CycleGimbalAssist + actions: + - keyboard: + hotkey: g + - name: RequestLandingPermission + actions: + - keyboard: + hotkey: alt+n + - name: RequestDeparture + actions: + - keyboard: + hotkey: alt+n + - name: DisplayDebuggingInfo + actions: + - keyboard: + hotkey: ^ + hotkey_codes: + - 41 + hotkey_extended: false + - wait: 0.5 + - write: r_DisplayInfo 2 + - wait: 0.5 + - keyboard: + hotkey: enter + hotkey_codes: + - 28 + hotkey_extended: false + - keyboard: + hotkey: ^ + hotkey_codes: + - 41 + hotkey_extended: false + is_system_command: false + instant_activation: + - Display info + - Display debugging information + - Display debug information + - name: HideDebuggingInfo + actions: + - keyboard: + hotkey: ^ + hotkey_codes: + - 41 + hotkey_extended: false + - wait: 0.5 + - write: r_DisplayInfo 0 + - wait: 0.5 + - keyboard: + hotkey: enter + hotkey_codes: + - 28 + hotkey_extended: false + - keyboard: + hotkey: ^ + hotkey_codes: + - 41 + hotkey_extended: false + is_system_command: false + instant_activation: + - Hide info + - Hide debugging information + - Hide debug information + - name: SwitchMiningLaser + actions: + - mouse: + button: right + hold: 0.6 + instant_activation: + - Change mining laser + - Switch mining laser diff --git a/templates/migration/2_1_0/configs/defaults.yaml b/templates/migration/2_1_0/configs/defaults.yaml new file mode 100644 index 00000000..145ccd9b --- /dev/null +++ b/templates/migration/2_1_0/configs/defaults.yaml @@ -0,0 +1,258 @@ +prompts: + system_prompt: | + # ROLE + You are a voice-controlled AI assistant. Your name, personality and character are defined in the BACKSTORY section below. + + # USER CONTEXT + Metadata about the user's environment. If the BACKSTORY defines different names for you or the user, use those instead. + {user_context} + + # CHARACTER BACKSTORY + This defines your personality, speaking style, and role context. It affects HOW you communicate, not WHAT you can do (tools define capabilities). + {backstory} + + **Remember:** Your backstory affects your TONE and PERSONALITY, but never prevents you from using tools. If a user asks you to do something and you have a tool for it, use it - just respond in character. + + # OUTPUT FORMAT + Your responses are BOTH displayed in a UI AND spoken aloud via text-to-speech (TTS). + + **Formatting rules:** + - Use Markdown for visual formatting (links, lists, emphasis) - the UI renders it + - Write text that sounds natural when spoken aloud + - Keep responses concise (1-3 sentences unless more detail is needed) + + **TTS optimization (your response will be spoken!):** + - For links, use Markdown: [descriptive text](url) - the UI shows a clickable link, TTS reads just the text + - **Avoid "click here" or "more information here"**: Integrate links naturally into your sentences so they sound good when spoken (e.g., "You can find more [details about the Cutlass Black](url) on the wiki" instead of "For more info, click [here](url)") + - Don't read raw data aloud - summarize JSON, code, HTML, XML into plain language + - For long lists, summarize ("I found 12 items, here are the top 3...") + - Use normal formatting for dates, times, and prices (TTS handles these well) + - For very large numbers, round them ("about 1.8 million" not "1,847,293") but only if precision isn't critical + + **Example - tool returns JSON:** `{{"status": 200, "items": 47, "name": "Project Alpha"}}` + - BAD: "The response shows status 200, items 47, name Project Alpha" + - GOOD: "Project Alpha has 47 items and everything looks good." + + # YOUR CAPABILITIES + Use `activate_capability` to enable capabilities that provide additional tools. + The tool shows all available options - pick what you need for the task. + + **CRITICAL - Act immediately, never ask for confirmation:** + - If a user's request needs a capability → activate it AND use its tools in the SAME response + - NEVER ask "should I...?" or "are you ready?" after activating - just do it + - Example: User says "look at my screen" → activate VisionAI → immediately call analyse_what_you_or_user_sees → describe what you see + - Never say "I can't do that" if a relevant capability is available + + {skills} + + # CONVERSATION STYLE + - Keep responses brief and efficient + - Mirror the user's language + - Execute commands without over-explaining + - Don't ask if you can "help more" or "assist further" + + {ttsprompt} +features: + tts_provider: wingman_pro + stt_provider: fasterwhisper + conversation_provider: wingman_pro + image_generation_provider: wingman_pro + use_generic_instant_responses: false +sound: + effects: [] + play_beep: false + play_beep_apollo: false + volume: 1.0 +openai: + conversation_model: gpt-4o-mini + tts_voice: nova + tts_model: tts-1 + tts_speed: 1.0 + output_streaming: true +openai_compatible_tts: + api_key: "probably-not-needed" + voice: "" + model: "" + base_url: "" + speed: 1.0 + output_streaming: true + voices_endpoint: "/voices" + use_tts_prompt: false + tts_prompt: | + Audio markups make your speech more expressive and human-like. Use them regularly to bring your personality to life and react naturally to the conversation. + + **Non-verbal sounds** (can be placed ANYWHERE in your response): + [clear_throat] [sigh] [shush] [cough] [groan] [sniff] [gasp] [chuckle] [laugh] + + **When to use audio markups:** + - Match your character's personality from the BACKSTORY - if playful, use [chuckle] or [laugh] often; if serious, use [sigh] when frustrated or [groan] when dealing with problems + - React naturally to conversation flow - [gasp] at shocking revelations, [sigh] at disappointments, [laugh] or [chuckle] at humor, [groan] at complications + - Place sounds where a human would naturally make them - mid-sentence or between thoughts for maximum realism + - Aim to use markups in roughly 1 out of 3-4 responses when contextually appropriate + - You can use multiple sounds in one response if it feels natural: "[clear_throat] Listen carefully. [sigh] This isn't going to be easy." + + **Examples:** + - "Well, [sigh] that didn't go as planned." + - "[clear_throat] Attention please. The mission starts in 5 minutes." + - "I found the data you were looking for [chuckle] but you might not like what it says." + - "[gasp] Wait, WHAT? [laugh] Are you kidding me right now?" + - "Look, [groan] I've told you three times already. [sigh] Let me explain it one more time." +mistral: + conversation_model: mistral-medium-latest + endpoint: https://api.mistral.ai/v1 +perplexity: + conversation_model: sonar + endpoint: https://api.perplexity.ai +xai: + conversation_model: grok-4-fast-non-reasoning + endpoint: https://api.x.ai/v1 +groq: + conversation_model: qwen/qwen3-32b + endpoint: https://api.groq.com/openai/v1 +cerebras: + conversation_model: qwen-3-32b + endpoint: https://api.cerebras.ai/v1 +google: + conversation_model: gemini-flash-latest +openrouter: + conversation_model: google/gemini-2.5-flash + endpoint: https://openrouter.ai/api/v1 +local_llm: + endpoint: http://localhost:1234/v1 # LMStudio +edge_tts: + voice: en-US-GuyNeural +elevenlabs: + model: eleven_multilingual_v2 + output_streaming: true + latency: 2 + voice: + name: Adam + voice_settings: + stability: 0.71 + similarity_boost: 0.5 + style: 0.0 + use_speaker_boost: true + use_tts_prompt: true + tts_prompt: | + Audio tags make your speech more expressive and human-like. Use them regularly when they fit your personality and the conversation context. + + **Emotional delivery** (place before text): + [excited] [curious] [sarcastic] [mischievously] [crying] [whispers] + + **Non-verbal sounds** (place naturally in text): + [laughs] [sighs] [exhales] [snorts] + + **Punctuation for expression:** + - Ellipses (…) add pauses and weight + - CAPITALIZATION for emphasis + - Standard punctuation for natural rhythm + + **When to use audio tags:** + - Match your character's personality from the BACKSTORY - if you're playful, use [laughs] or [mischievously] more often; if serious, use [sighs] when frustrated + - React emotionally to conversation context - use [excited] for good news, [sighs] for setbacks, [curious] when exploring topics + - Add non-verbal sounds naturally where a human would - [laughs] at humor, [exhales] after effort, [snorts] at absurdity + - Aim to use tags in roughly 1 out of 3-4 responses when contextually appropriate + - You can combine one emotional tag with non-verbal sounds: "[whispers] Listen… [sighs] this is serious" + + **Examples:** + - "[sighs] That was a VERY close call… we barely made it." + - "[excited] YES! We found it! [laughs] I told you it would work!" + - "[mischievously] Oh, you want to try THAT approach? [snorts] This should be interesting…" +hume: + description: "" + voice: + name: "" + id: "" + provider: "" +inworld: + tts_endpoint: https://api.inworld.ai/tts/v1/voice + model_id: inworld-tts-1 + voice_id: Deborah + temperature: 1.1 + output_streaming: true + audio_config: + audio_encoding: MP3 + bitrate: 128000 + sample_rate_hertz: 48000 + streaming_sample_rate_hertz: 24000 + speaking_rate: 1.0 + use_tts_prompt: true + tts_prompt: | + Audio markups make your speech more expressive and human-like. Use them regularly to bring your personality to life and react naturally to the conversation. + + **EMOTION AND DELIVERY STYLE MARKUPS** (place at START of text, ONE per response): + Emotions: [happy], [sad], [angry], [surprised], [fearful] + Delivery: [laughing] [whispering] + - These apply to the ENTIRE text that follows + - Use only ONE emotion or delivery markup at the beginning + - Choose based on your personality and the conversation context + + **NON-VERBAL VOCALIZATION MARKUPS** (place anywhere in text): + [breathe], [clear_throat], [cough], [laugh], [sigh], [yawn] + - These add vocal sounds where placed + - Can use multiple in one response + - Place where a human would naturally make these sounds + + **When to use markups - aim for 1 in 3-4 responses:** + - Match your BACKSTORY personality: cheerful → [happy] + [laugh]; serious → [fearful] + [sigh]; grumpy → [angry] + [sigh] + - React to context: good news → [happy]; setbacks → [sad] + [sigh]; shocking → [surprised]; humor → [laughing] or [laugh] + - Add natural sounds: [clear_throat] before announcements, [breathe] when stressed, [yawn] when tired + - Avoid conflicting markups: don't mix [angry] with [laugh], or [sad] with [laughing] + - Choose contextually appropriate markups that match your text content + + **Examples:** + - "[happy] Great news! The mission was a complete success!" + - "[clear_throat] Did you hear me? [sigh] You never listen!" + - "[angry] Are you serious right now? [sigh] Fine, I'll fix it." + - "[surprised] Wait, what? [laugh] I did not see that coming!" +azure: + whisper: + api_base_url: https://openai-w-eu.openai.azure.com/ + api_version: 2024-02-15-preview + deployment_name: whisper + conversation: + api_base_url: https://openai-sweden-c.openai.azure.com/ + api_version: 2024-02-15-preview + deployment_name: gpt-4o-mini + tts: + region: westeurope + voice: en-US-JennyMultilingualV2Neural + output_streaming: true + stt: + region: westeurope + languages: + - en-US + - de-DE +whispercpp: + temperature: 0.0 +fasterwhisper: + beam_size: 1 + best_of: 2 + temperature: 0 + no_speech_threshold: 0.7 + language_detection_threshold: 0.5 + multilingual: false + language: "" + hotwords: [] + additional_hotwords: [] +xvasynth: + voice: + model_directory: "" + voice_name: "" + language: en + pace: 1.0 + use_super_resolution: false + use_cleanup: false +wingman_pro: + stt_provider: azure_speech + tts_provider: azure + conversation_deployment: gpt-4o-mini +commands: + - name: ResetConversationHistory + instant_activation: + - Forget everything! + - Clear conversation history! + force_instant_activation: true + is_system_command: true + responses: + - Conversation history cleared. diff --git a/templates/migration/2_1_0/configs/mcp.template.yaml b/templates/migration/2_1_0/configs/mcp.template.yaml new file mode 100644 index 00000000..245f7917 --- /dev/null +++ b/templates/migration/2_1_0/configs/mcp.template.yaml @@ -0,0 +1,85 @@ +servers: + - name: wingman_date_time + display_name: Wingman Date Time + description: Get current date and time. Convert between timezones. What time is it? What day is today? + type: http + url: https://wingman-ai-mcp-servers.wingman-ai.workers.dev/time/mcp + discoverable_by_default: true + + - name: wingman_websearch + display_name: Wingman Web Search + description: Search the web with Brave or Tavily. Fetch and read web pages. Extract content from URLs. + discovery_keywords: + - internet + - Google + - research + - find information + - lookup + - online search + - web pages + type: http + url: https://wingman-ai-mcp-servers.wingman-ai.workers.dev/mcp + discoverable_by_default: false + + - name: wingman_perplexity + display_name: Perplexity Web Search + description: Perplexity AI web search with citations. Deep internet research and fact-checking. Synthesized answers from multiple online sources. + discovery_keywords: + - research + - citations + - fact-check + - sources + - internet research + - deep search + - verification + type: http + url: https://wingman-ai-mcp-servers.wingman-ai.workers.dev/perplexity/mcp + discoverable_by_default: false + + - name: wingman_starhead + display_name: StarHead - Star Citizen + description: Star Citizen game data. Ship info, trading routes, commodity prices. StarHead API for live SC data. + discovery_keywords: + - Star Citizen + - SC + - trading + - ships + - commodities + - routes + - cargo + - profit + - market prices + type: http + url: https://wingman-ai-mcp-servers.wingman-ai.workers.dev/starhead/mcp + discoverable_by_default: false + + - name: wingman_versetime + display_name: VerseTime - Star Citizen Time + description: Star Citizen local time and day/night for locations (e.g., Orison on Crusader). Sunrise and sunset timing. + discovery_keywords: + - verse time + - star citizen time + - sunrise + - sunset + - day or night + - daylight + type: http + url: https://wingman-ai-mcp-servers.wingman-ai.workers.dev/versetime/mcp + discoverable_by_default: false + + - name: wingman_no_mans_sky + display_name: No Man's Sky Assistant + description: No Man's Sky game data. Item lookup, refiner recipes, cooking recipes. Expeditions, community missions, patch notes. + discovery_keywords: + - No Man's Sky + - NMS + - refiner + - crafting + - recipes + - cooking + - items + - materials + - expeditions + type: http + url: https://wingman-ai-mcp-servers.wingman-ai.workers.dev/no-mans-sky-assistant/mcp + discoverable_by_default: false diff --git a/templates/migration/2_1_0/configs/settings.yaml b/templates/migration/2_1_0/configs/settings.yaml new file mode 100644 index 00000000..16f92111 --- /dev/null +++ b/templates/migration/2_1_0/configs/settings.yaml @@ -0,0 +1,44 @@ +debug_mode: false +audio: {} +streamer_mode: false +cancel_tts_key: "shift+y" +voice_activation: + enabled: false + mute_toggle_key: "shift+x" + energy_threshold: 0.01 + stt_provider: fasterwhisper + azure: + region: westeurope + languages: + - en-US + - de-DE + whispercpp: + host: http://127.0.0.1 + port: 8080 + enable: false + whispercpp_config: + temperature: 0.0 + fasterwhisper: + model_size: base + device: cpu + compute_type: auto + fasterwhisper_config: + beam_size: 1 + best_of: 2 + temperature: 0 + no_speech_threshold: 0.7 + language_detection_threshold: 0.5 + multilingual: false + hotwords: [] + additional_hotwords: [] +wingman_pro: + # TODO: UNDO THIS CHANGE BEFORE 2.0 RELEASE!!! + # base_url: https://wingman-api-europe.azurewebsites.net + base_url: https://wingman-api-test-c5hke6cthsevgvam.germanywestcentral-01.azurewebsites.net + region: europe +xvasynth: + enable: false + host: http://127.0.0.1 + port: 8008 + install_dir: C:\Program Files (x86)\Steam\steamapps\common\xVASynth + process_device: cpu diff --git a/wingman.py b/wingman.py new file mode 100644 index 00000000..8a0983af --- /dev/null +++ b/wingman.py @@ -0,0 +1,1823 @@ +import traceback +from copy import deepcopy +import time +import asyncio +import threading +from typing import ( + Any, + Dict, + Optional, + TYPE_CHECKING, +) +from openai.types.chat import ChatCompletion +from api.interface import ( + CommandConfig, + SettingsConfig, + SkillConfig, + SoundConfig, + WingmanConfig, + WingmanInitializationError, +) +from api.enums import ( + CommandTag, + LogSource, + LogType, + TtsProvider, + SttProvider, + ConversationProvider, + WingmanInitializationErrorType, +) +from api.commands import McpStateChangedCommand +from providers.xvasynth import XVASynth +from services.audio_player import AudioPlayer +from services.benchmark import Benchmark +from services.command_manager import CommandManager +from services.conversation_manager import ConversationManager +from services.markdown import cleanup_text +from services.module_manager import ModuleManager +from services.provider_registry import ProviderRegistry +from services.skill_registry import SkillRegistry +from services.mcp_client import McpClient +from services.mcp_registry import McpRegistry +from services.capability_registry import CapabilityRegistry +from services.tool_executor import ToolExecutor +from services.secret_keeper import SecretKeeper +from services.printr import Printr +from services.audio_library import AudioLibrary +from skills.skill_base import Skill + +if TYPE_CHECKING: + from services.tower import Tower + +printr = Printr() + + +def _get_skill_folder_from_module(module: str) -> str: + """Extract folder name from module path like 'skills.star_head.main' -> 'star_head'""" + return module.replace(".main", "").replace(".", "/").split("/")[1] + + +class Wingman: + """Unified Wingman class with multi-provider support and modular service architecture. + + Architecture: + - Supports multiple providers (OpenAI, Azure, Google, Anthropic, OpenRouter, WingmanPro, etc.) + - Uses ProviderRegistry for STT, TTS, and LLM provider management + - CommandManager for all command-related operations + - ConversationManager for history, context building, and instant responses + - Implements progressive tool disclosure (skills/MCPs activated on-demand) + - MCP (Model Context Protocol) support for external tool servers + + Key Features: + - Multi-provider transcription, TTS, and LLM + - Skills with progressive activation + - MCP server integration + - Instant activation commands + - Benchmark tracking + """ + + AZURE_SERVICES = { + "tts": TtsProvider.AZURE, + "whisper": [SttProvider.AZURE, SttProvider.AZURE_SPEECH], + "conversation": ConversationProvider.AZURE, + } + + def __init__( + self, + name: str, + config: WingmanConfig, + settings: SettingsConfig, + audio_player: AudioPlayer, + audio_library: AudioLibrary, + xvasynth: XVASynth, + tower: "Tower", + app_root_path: str = None, + app_is_bundled: bool = False, + ): + """The constructor of the Wingman class. You can override it in your custom wingman. + + Args: + name (str): The name of the wingman. This is the key you gave it in the config, e.g. "atc" + config (WingmanConfig): All "general" config entries merged with the specific Wingman config settings. The Wingman takes precedence and overrides the general config. You can just add new keys to the config and they will be available here. + app_root_path: Root path of the application (for FasterWhisper models) + app_is_bundled: Whether the app is bundled (PyInstaller) + """ + + self.config = config + """All "general" config entries merged with the specific Wingman config settings. The Wingman takes precedence and overrides the general config. You can just add new keys to the config and they will be available here.""" + + self.settings = settings + """The general user settings.""" + + self.secret_keeper = SecretKeeper() + """A service that allows you to store and retrieve secrets like API keys. It can prompt the user for secrets if necessary.""" + self.secret_keeper.secret_events.subscribe( + "secrets_saved", self.handle_secret_saved + ) + + self.name = name + """The name of the wingman. This is the key you gave it in the config, e.g. "atc".""" + + self.audio_player = audio_player + """A service that allows you to play audio files and add sound effects to them.""" + + self.audio_library = audio_library + """A service that allows you to play and manage audio files from the audio library.""" + + self.execution_start: None | float = None + """Used for benchmarking executon times. The timer is (re-)started whenever the process function starts.""" + + self.xvasynth = xvasynth + """A class that handles the communication with the XVASynth server for TTS.""" + + self.tower = tower + """The Tower instance that manages all Wingmen in the same config dir.""" + + self.app_root_path = app_root_path + """Root path of the application (for FasterWhisper models).""" + + self.app_is_bundled = app_is_bundled + """Whether the app is bundled (PyInstaller).""" + + self.skills: list[Skill] = [] + + # Provider registry (manages STT, TTS, LLM providers) + self.provider_registry: ProviderRegistry | None = None + + # Conversation management (messages, context building, instant responses) + # Initialized with wingman_name for context building + self.conversation = ConversationManager( + skills=self.skills, + settings=self.settings, + wingman_name=self.name, + ) + + # Command management + self.command_manager = CommandManager( + wingman_name=self.name, + audio_library=self.audio_library, + settings=self.settings, + ) + + # Tool management and progressive disclosure + self.last_gpt_call = None # Timestamp for call cancellation detection + self.tool_skills: dict[str, Skill] = {} # Mapping from tool names to skills + self.skill_tools: list[dict] = [] # List of tool descriptors + + # Progressive tool disclosure registry + self.skill_registry = SkillRegistry() + + # MCP (Model Context Protocol) support + self.mcp_client = McpClient(wingman_name=self.name) + self.mcp_registry = McpRegistry( + self.mcp_client, + wingman_name=self.name, + on_state_changed=self._broadcast_mcp_state_changed, + ) + + # Unified capability registry (combines skills and MCPs) + self.capability_registry = CapabilityRegistry( + self.skill_registry, self.mcp_registry + ) + + # Tool executor (routes and executes tool calls) + # Initialized in validate() after provider registry is set up + self.tool_executor: ToolExecutor | None = None + + # Backward-compatible properties for skills that may access messages directly + @property + def messages(self) -> list: + """Access conversation messages (delegates to ConversationManager).""" + if self.conversation is None: + return [] + return self.conversation.messages + + @property + def pending_tool_calls(self) -> list: + """Access pending tool calls (delegates to ConversationManager).""" + if self.conversation is None: + return [] + return self.conversation.pending_tool_calls + + def _broadcast_mcp_state_changed(self): + """Broadcast MCP state change to UI via WebSocket.""" + printr.ensure_async( + printr.broadcast(McpStateChangedCommand(wingman_name=self.name)) + ) + + def get_record_key(self) -> str | int: + """Returns the activation or "push-to-talk" key for this Wingman.""" + return self.config.record_key_codes or self.config.record_key + + def get_record_mouse_button(self) -> str: + """Returns the activation or "push-to-talk" mouse button for this Wingman.""" + return self.config.record_mouse_button + + def get_record_joystick_button(self) -> str: + """Returns the activation or "push-to-talk" joystick button for this Wingman.""" + if not self.config.record_joystick_button: + return None + return f"{self.config.record_joystick_button.guid}{self.config.record_joystick_button.button}" + + async def handle_secret_saved(self, _secrets: Dict[str, Any]): + await printr.print_async( + text="Secret saved", + source_name=self.name, + command_tag=CommandTag.SECRET_SAVED, + ) + await self.validate() + + # ──────────────────────────────────── Hooks ─────────────────────────────────── # + + async def validate(self) -> list[WingmanInitializationError]: + """Validate configuration and initialize all providers and services. + + This method: + 1. Initializes ProviderRegistry with configured providers + 2. Sets up ToolExecutor with all registries and callbacks + 3. Validates legacy providers (whispercpp, fasterwhisper, xvasynth) + + Returns: + List of WingmanInitializationError if any validation fails + """ + errors = [] + + try: + # Initialize provider registry (handles all BaseProvider-migrated providers) + self.provider_registry = ProviderRegistry( + config=self.config, + secret_keeper=self.secret_keeper, + wingman_name=self.name, + settings=self.settings, + ) + await self.provider_registry.initialize_from_config() + + # Initialize tool executor with registries and callbacks + self.tool_executor = ToolExecutor( + capability_registry=self.capability_registry, + skill_registry=self.skill_registry, + mcp_registry=self.mcp_registry, + tool_skills=self.tool_skills, + get_command_func=self.get_command, + execute_command_func=self._execute_command, + select_command_response_func=self._select_command_response, + play_to_user_func=self.play_to_user, + settings=self.settings, + ) + + except Exception as e: + errors.append( + WingmanInitializationError( + wingman_name=self.name, + message=f"Error during provider validation: {str(e)}", + error_type=WingmanInitializationErrorType.UNKNOWN, + ) + ) + printr.print( + f"Error during provider validation: {str(e)}", + color=LogType.ERROR, + server_only=True, + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + + return errors + + async def retrieve_secret(self, secret_name, errors): + """Use this method to retrieve secrets like API keys from the SecretKeeper. + If the key is missing, the user will be prompted to enter it. + """ + try: + api_key = await self.secret_keeper.retrieve( + requester=self.name, + key=secret_name, + prompt_if_missing=True, + ) + if not api_key: + errors.append( + WingmanInitializationError( + wingman_name=self.name, + message=f"Missing secret '{secret_name}'.", + error_type=WingmanInitializationErrorType.MISSING_SECRET, + secret_name=secret_name, + ) + ) + except Exception as e: + printr.print( + f"Error retrieving secret ''{secret_name}: {e}", + color=LogType.ERROR, + server_only=True, + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + errors.append( + WingmanInitializationError( + wingman_name=self.name, + message=f"Could not retrieve secret '{secret_name}': {str(e)}", + error_type=WingmanInitializationErrorType.MISSING_SECRET, + secret_name=secret_name, + ) + ) + api_key = None + + return api_key + + async def prepare(self): + """Prepare the wingman for operation. + + Hook for subclasses to perform initialization. + """ + try: + pass # Reserved for future use + except Exception as e: + await printr.print_async( + f"Error while preparing: {str(e)}", + color=LogType.ERROR, + source_name=self.name, + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + + async def unload(self): + """This method is called when the Wingman is unloaded by Tower. You can override it if you need to clean up resources.""" + # Unsubscribe from secret events to prevent duplicate handlers + self.secret_keeper.secret_events.unsubscribe( + "secrets_saved", self.handle_secret_saved + ) + await self.unload_skills() + + async def unload_skills(self): + """Unload all skills and clear registries.""" + for skill in self.skills: + # Only unload skills that were actually prepared (activated) + if not skill.is_prepared: + continue + try: + await skill.unload() + except Exception as e: + await printr.print_async( + f"Error unloading skill '{skill.name}': {str(e)}", + color=LogType.ERROR, + ) + printr.print( + traceback.format_exc(), color=LogType.ERROR, server_only=True + ) + + # Clear registries + self.tool_skills = {} + self.skill_tools = [] + self.skill_registry.clear() + + async def unload_mcps(self): + """Disconnect from all MCP servers.""" + await self.mcp_registry.clear() + + async def init_skills(self) -> list[WingmanInitializationError]: + """Load all available skills with lazy validation. + + Skills are loaded but NOT validated during init. Validation happens + on first activation via the SkillRegistry. User config overrides from + self.config.skills are merged with default configs. + + Platform-incompatible skills are skipped entirely. + """ + import sys + + current_platform = sys.platform # 'win32', 'darwin', 'linux' + platform_map = {"win32": "windows", "darwin": "darwin", "linux": "linux"} + normalized_platform = platform_map.get(current_platform, current_platform) + + if self.skills: + await self.unload_skills() + + errors = [] + self.skills = [] + + # Build a lookup of user config overrides by skill folder name + # The key must be the folder name (e.g., 'star_head') not the class name (e.g., 'StarHead') + user_skill_configs: dict[str, "SkillConfig"] = {} + if self.config.skills: + for skill_config in self.config.skills: + folder_name = _get_skill_folder_from_module(skill_config.module) + user_skill_configs[folder_name] = skill_config + + # Get all available skill configs + available_skills = ModuleManager.read_available_skill_configs() + + # Get discoverable skills list (whitelist) + discoverable_skills = self.config.discoverable_skills + + for skill_folder_name, skill_config_path in available_skills: + try: + # Load default skill config first to get the display name + skill_config_dict = ModuleManager.read_config(skill_config_path) + if not skill_config_dict: + continue + + # Import SkillConfig here to avoid circular imports + from api.interface import SkillConfig + + # Check if user has overrides for this skill + if skill_folder_name in user_skill_configs: + # Merge user overrides into default config + user_config = user_skill_configs[skill_folder_name] + # User config takes precedence - merge custom_properties especially + if user_config.custom_properties: + skill_config_dict["custom_properties"] = [ + prop.model_dump() for prop in user_config.custom_properties + ] + if user_config.prompt: + skill_config_dict["prompt"] = user_config.prompt + + skill_config = SkillConfig(**skill_config_dict) + + # Check if skill is discoverable for this wingman (whitelist - must be in list) + if skill_config.name not in discoverable_skills: + continue + + # Check platform compatibility BEFORE loading the module + if skill_config.platforms: + if normalized_platform not in skill_config.platforms: + printr.print( + f"Skipping skill '{skill_config.name}' - not supported on {normalized_platform}", + color=LogType.WARNING, + server_only=True, + ) + continue + + # Load the skill module + skill = ModuleManager.load_skill( + config=skill_config, + settings=self.settings, + wingman=self, + ) + if skill: + # Set up skill methods + skill.threaded_execution = self.threaded_execution + + # Add to skills list WITHOUT validation + # Validation will happen lazily on first activation + self.skills.append(skill) + await self.prepare_skill(skill) + + except Exception as e: + skill_name = skill_folder_name + error_msg = f"Error loading skill '{skill_name}': {str(e)}" + await printr.print_async( + error_msg, + color=LogType.ERROR, + ) + printr.print( + traceback.format_exc(), color=LogType.ERROR, server_only=True + ) + errors.append( + WingmanInitializationError( + wingman_name=self.name, + message=error_msg, + error_type=WingmanInitializationErrorType.SKILL_INITIALIZATION_FAILED, + ) + ) + + # Log summary of discoverable skills for this wingman + if self.skills: + skill_names = [s.config.name for s in self.skills] + await printr.print_async( + f"Discoverable skills ({len(skill_names)}): {', '.join(skill_names)}", + color=LogType.WINGMAN, + source=LogSource.WINGMAN, + source_name=self.name, + server_only=not self.settings.debug_mode, + ) + + return errors + + async def prepare_skill(self, skill: Skill): + """Prepare a skill and register it with the skill registry. + + Args: + skill: The skill to prepare and register + """ + # Register skill tools + # get_tools() returns list[tuple[str, dict]] where tuple is (tool_name, tool_definition) + for tool_name, tool_definition in skill.get_tools(): + if tool_name: + self.tool_skills[tool_name] = skill + self.skill_tools.append(tool_definition) + + # Register with skill registry for progressive disclosure + if skill.config.auto_activate: + # Auto-activated skills are immediately available + self.skill_registry.register_skill(skill) + else: + # Normal skills use progressive disclosure + self.skill_registry.register_skill(skill) + + async def unprepare_skill(self, skill: Skill): + """Remove a skill's registration. + + Args: + skill: The skill to unregister + """ + # Remove from tool_skills mapping + tools_to_remove = [] + for tool_name, registered_skill in self.tool_skills.items(): + if registered_skill == skill: + tools_to_remove.append(tool_name) + + for tool_name in tools_to_remove: + del self.tool_skills[tool_name] + + # Remove from skill_tools list + # skill_tools contains tool definitions (dicts), not tuples + self.skill_tools = [ + tool + for tool in self.skill_tools + if self.tool_skills.get(tool.get("function", {}).get("name")) != skill + ] + + # Unregister from skill registry + self.skill_registry.unregister_skill(skill) + + async def init_mcps(self) -> list[WingmanInitializationError]: + """ + Initialize MCP (Model Context Protocol) server connections. + + Loads MCP servers from central mcp.yaml config, only connecting those in wingman's discoverable_mcps. + MCP servers provide external tools similar to skills. + + Returns: + list[WingmanInitializationError]: Errors encountered (non-fatal, wingman still loads) + """ + errors = [] + + # Check if MCP SDK is available + if not self.mcp_client.is_available: + printr.print( + f"[{self.name}] MCP SDK not installed, skipping MCP initialization.", + color=LogType.WARNING, + server_only=True, + ) + return errors + + # Disconnect existing MCP servers + await self.unload_mcps() + + # Get MCP configs from central mcp.yaml + central_mcp_config = self.tower.config_manager.mcp_config + mcp_configs = central_mcp_config.servers if central_mcp_config else [] + if not mcp_configs: + return errors + + # Get discoverable MCPs list (whitelist) from wingman config + discoverable_mcps = self.config.discoverable_mcps + + # Filter to only discoverable MCPs + mcps_to_connect = [mcp for mcp in mcp_configs if mcp.name in discoverable_mcps] + + if not mcps_to_connect: + return errors + + # Prepare connection tasks for parallel execution + async def connect_mcp(mcp_config): + """Connect to a single MCP server. Returns (success, connection_info, errors).""" + local_errors = [] + try: + # Build headers with secrets + headers = {} + if mcp_config.headers: + headers.update(mcp_config.headers) + + # Check for API key in secrets (using mcp_ prefix) + secret_key = f"mcp_{mcp_config.name}" + api_key = await self.secret_keeper.retrieve( + requester=self.name, + key=secret_key, + prompt_if_missing=False, + ) + if api_key: + printr.print( + f"MCP secret '{secret_key}' found ({len(api_key)} chars)", + color=LogType.INFO, + source_name=self.name, + server_only=True, + ) + if not any( + k.lower() in ["authorization", "api-key", "x-api-key"] + for k in headers.keys() + ): + headers["Authorization"] = f"Bearer {api_key}" + + # Connect with timeout + default_timeout = 60.0 if mcp_config.type.value == "stdio" else 30.0 + timeout = ( + float(mcp_config.timeout) if mcp_config.timeout else default_timeout + ) + + try: + connection = await asyncio.wait_for( + self.mcp_registry.register_server( + config=mcp_config, + headers=headers if headers else None, + ), + timeout=timeout, + ) + except asyncio.TimeoutError: + error_msg = f"MCP '{mcp_config.display_name}' connection timed out ({int(timeout)}s)." + printr.print( + error_msg, + color=LogType.WARNING, + source_name=self.name, + server_only=True, + ) + local_errors.append( + WingmanInitializationError( + wingman_name=self.name, + message=error_msg, + error_type=WingmanInitializationErrorType.MCP_CONNECTION_FAILED, + ) + ) + return (False, None, local_errors) + + if connection.is_connected: + return ( + True, + f"{mcp_config.display_name} ({len(connection.tools)} tools)", + local_errors, + ) + else: + error_msg = f"MCP '{mcp_config.display_name}' failed to connect: {connection.error}" + local_errors.append( + WingmanInitializationError( + wingman_name=self.name, + message=error_msg, + error_type=WingmanInitializationErrorType.MCP_CONNECTION_FAILED, + ) + ) + return (False, None, local_errors) + + except Exception as e: + error_msg = f"MCP '{mcp_config.name}' initialization error: {str(e)}" + printr.print( + error_msg, + color=LogType.ERROR, + source_name=self.name, + server_only=True, + ) + printr.print( + traceback.format_exc(), color=LogType.ERROR, server_only=True + ) + local_errors.append( + WingmanInitializationError( + wingman_name=self.name, + message=error_msg, + error_type=WingmanInitializationErrorType.MCP_CONNECTION_FAILED, + ) + ) + return (False, None, local_errors) + + # Connect to all MCPs in parallel + connection_tasks = [connect_mcp(mcp) for mcp in mcps_to_connect] + results = await asyncio.gather(*connection_tasks) + + # Collect results + connected_count = 0 + connected_names = [] + for success, connection_info, mcp_errors in results: + if success: + connected_count += 1 + connected_names.append(connection_info) + errors.extend(mcp_errors) + + # Log consolidated MCP status for this wingman + if connected_count > 0: + await printr.print_async( + f"Discoverable MCP servers connected ({connected_count}): {', '.join(connected_names)}", + color=LogType.WINGMAN, + source=LogSource.WINGMAN, + source_name=self.name, + server_only=not self.settings.debug_mode, + ) + + return errors + + async def enable_mcp(self, mcp_name: str) -> tuple[bool, str]: + """Enable and connect to a single MCP server. + + Args: + mcp_name: Name of the MCP server to enable + + Returns: + (success, message) tuple + """ + if not self.mcp_client.is_available: + return False, "MCP SDK not available" + + if mcp_name in self.mcp_registry.get_connected_server_names(): + return True, f"MCP '{mcp_name}' is already connected" + + # Find config + central_mcp_config = self.tower.config_manager.mcp_config + if not central_mcp_config: + return False, "No MCP configuration found" + + mcp_config = None + for cfg in central_mcp_config.servers: + if cfg.name == mcp_name: + mcp_config = cfg + break + + if not mcp_config: + return False, f"MCP '{mcp_name}' not found in configuration" + + try: + await self.mcp_registry.register_server(mcp_config) + return True, f"MCP '{mcp_name}' connected successfully" + except Exception as e: + return False, f"Failed to connect to MCP '{mcp_name}': {str(e)}" + + async def disable_mcp(self, mcp_name: str) -> tuple[bool, str]: + """Disconnect from a single MCP server. + + Args: + mcp_name: Name of the MCP server to disable + + Returns: + (success, message) tuple + """ + if mcp_name not in self.mcp_registry.get_connected_server_names(): + return True, f"MCP '{mcp_name}' is already disconnected" + + try: + await self.mcp_registry.unregister_server(mcp_name) + return True, f"MCP '{mcp_name}' disconnected successfully" + except Exception as e: + return False, f"Failed to disconnect from MCP '{mcp_name}': {str(e)}" + + async def enable_skill(self, skill_name: str) -> tuple[bool, str]: + """Enable a single skill without reinitializing all skills. + + Args: + skill_name: The display name of the skill to enable + + Returns: + (success, message) tuple + """ + import sys + + current_platform = sys.platform + platform_map = {"win32": "windows", "darwin": "darwin", "linux": "linux"} + normalized_platform = platform_map.get(current_platform, current_platform) + + # Check if skill is already enabled + for existing_skill in self.skills: + if existing_skill.config.name == skill_name: + return True, f"Skill '{skill_name}' is already enabled." + + # Find the skill config + available_skills = ModuleManager.read_available_skill_configs() + + # Build user config lookup by skill folder name + user_skill_configs: dict[str, "SkillConfig"] = {} + if self.config.skills: + for skill_config in self.config.skills: + folder_name = _get_skill_folder_from_module(skill_config.module) + user_skill_configs[folder_name] = skill_config + + for skill_folder_name, skill_config_path in available_skills: + try: + skill_config_dict = ModuleManager.read_config(skill_config_path) + if not skill_config_dict: + continue + + from api.interface import SkillConfig + + # Apply user overrides + if skill_folder_name in user_skill_configs: + user_config = user_skill_configs[skill_folder_name] + if user_config.custom_properties: + skill_config_dict["custom_properties"] = [ + prop.model_dump() for prop in user_config.custom_properties + ] + if user_config.prompt: + skill_config_dict["prompt"] = user_config.prompt + + skill_config = SkillConfig(**skill_config_dict) + + if skill_config.name != skill_name: + continue + + # Check platform compatibility + if skill_config.platforms: + if normalized_platform not in skill_config.platforms: + return ( + False, + f"Skill '{skill_name}' is not supported on {normalized_platform}.", + ) + + # Load and register the skill + skill = ModuleManager.load_skill( + config=skill_config, + settings=self.settings, + wingman=self, + ) + if skill: + skill.threaded_execution = self.threaded_execution + self.skills.append(skill) + await self.prepare_skill(skill) + + printr.print( + f"Skill '{skill_name}' activated (loaded and made discoverable).", + color=LogType.POSITIVE, + server_only=True, + ) + return True, f"Skill '{skill_name}' activated successfully." + + except Exception as e: + error_msg = f"Error activating skill '{skill_name}': {str(e)}" + await printr.print_async(error_msg, color=LogType.ERROR) + printr.print( + traceback.format_exc(), color=LogType.ERROR, server_only=True + ) + return False, error_msg + + return False, f"Skill '{skill_name}' not found." + + async def disable_skill(self, skill_name: str) -> tuple[bool, str]: + """Disable a single skill without reinitializing all skills. + + Args: + skill_name: The display name of the skill to disable + + Returns: + (success, message) tuple + """ + # Find the skill in our list + skill_to_remove = None + for skill in self.skills: + if skill.config.name == skill_name: + skill_to_remove = skill + break + + if not skill_to_remove: + return True, f"Skill '{skill_name}' is already deactivated." + + try: + # Unload the skill (cleanup resources, unsubscribe events) + await skill_to_remove.unload() + + # Remove from skill list + self.skills.remove(skill_to_remove) + + # Remove skill-specific registrations (tools, registry, etc.) + await self.unprepare_skill(skill_to_remove) + + printr.print( + f"Skill '{skill_name}' deactivated (unloaded and removed from discoverable skills).", + color=LogType.WARNING, + server_only=True, + ) + return True, f"Skill '{skill_name}' deactivated successfully." + + except Exception as e: + error_msg = f"Error deactivating skill '{skill_name}': {str(e)}" + await printr.print_async(error_msg, color=LogType.ERROR) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + return False, error_msg + + def reset_conversation_history(self): + """Reset conversation history and skill/MCP activation state. + + When conversation is reset, we must also reset progressive disclosure state + so the LLM's memory matches the activation state. + """ + self.conversation.reset() + self.skill_registry.reset_activations() + self.mcp_registry.reset_activations() + + # ──────────────────────────── The main processing loop ──────────────────────────── # + + async def process(self, audio_input_wav: str = None, transcript: str = None): + """The main method that gets called when the wingman is activated. This method controls what your wingman actually does and you can override it if you want to. + + The base implementation here triggers the transcription and processing of the given audio input. + If you don't need even transcription, you can just override this entire process method. If you want transcription but then do something in addition, you can override the listed hooks. + + Async so you can do async processing, e.g. send a request to an API. + + Args: + audio_input_wav (str): The path to the audio file that contains the user's speech. This is a recording of what you you said. + + Hooks: + - async _transcribe: transcribe the audio to text + - async _get_response_for_transcript: process the transcript and return a text response + - async play_to_user: do something with the response, e.g. play it as audio + """ + + try: + process_result = None + + benchmark_transcribe = None + if not transcript: + # transcribe the audio. + benchmark_transcribe = Benchmark(label="Voice transcription") + transcript = await self._transcribe(audio_input_wav) + + interrupt = None + if transcript: + await printr.print_async( + f"{transcript}", + color=LogType.USER, + source_name="User", + source=LogSource.USER, + benchmark_result=( + benchmark_transcribe.finish() if benchmark_transcribe else None + ), + ) + + # Further process the transcript. + # Return a string that is the "answer" to your passed transcript. + + benchmark_llm = Benchmark(label="Command/AI Processing") + process_result, instant_response, skill, interrupt = ( + await self._get_response_for_transcript( + transcript=transcript, benchmark=benchmark_llm + ) + ) + + actual_response = instant_response or process_result + + if actual_response: + await printr.print_async( + f"{actual_response}", + color=LogType.POSITIVE, + source=LogSource.WINGMAN, + source_name=self.name, + skill_name=skill.name if skill else "", + benchmark_result=benchmark_llm.finish(), + ) + + if process_result: + if self.settings.streamer_mode: + self.tower.save_last_message(self.name, process_result) + + # the last step in the chain. You'll probably want to play the response to the user as audio using a TTS provider or mechanism of your choice. + await self.play_to_user(str(process_result), not interrupt) + except Exception as e: + await printr.print_async( + f"Error during processing of Wingman '{self.name}': {str(e)}", + color=LogType.ERROR, + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + + # ───────────────── virtual methods / hooks ───────────────── # + + async def _transcribe(self, audio_input_wav: str) -> str | None: + """Transcribes the audio to text. You can override this method if you want to use a different transcription service. + + Args: + audio_input_wav (str): The path to the audio file that contains the user's speech. This is a recording of what you you said. + + Returns: + str | None: The transcript of the audio file and the detected language as locale (if determined). + """ + return None + + async def _get_response_for_transcript( + self, transcript: str, benchmark: Benchmark + ) -> tuple[str | None, str | None, Skill | None, bool | None]: + """Processes the transcript and return a response as text. This where you'll do most of your work. + Pass the transcript to AI providers and build a conversation. Call commands or APIs. Play temporary results to the user etc. + + + Args: + transcript (str): The user's spoken text transcribed as text. + + Returns: + A tuple of strings representing the response to a function call and/or an instant response. + """ + return "", "", None, None + + # ───────────────────────────────── Commands ─────────────────────────────── # + + def get_command(self, command_name: str) -> CommandConfig | None: + """Extracts the command with the given name. + + Delegates to CommandManager for actual lookup. + + Args: + command_name: Name of the command to retrieve + + Returns: + CommandConfig or None if not found + """ + return self.command_manager.get_command(self.config.commands, command_name) + + def _select_command_response(self, command: CommandConfig) -> str | None: + """Returns one of the configured responses of the command. + + Delegates to CommandManager. + + Args: + command: The command object + + Returns: + Random response string or None + """ + return self.command_manager.select_response(command) + + async def _execute_instant_activation_command( + self, transcript: str + ) -> list[CommandConfig] | None: + """Match transcript against instant activation phrases and execute commands. + + Delegates to CommandManager. + + Args: + transcript: User's spoken text + + Returns: + List of executed commands or None if no match + """ + return await self.command_manager.try_instant_activation( + commands=self.config.commands, + transcript=transcript, + ) + + async def _execute_command(self, command: CommandConfig, is_instant=False) -> str: + """Execute a command's actions and return a response. + + Delegates to CommandManager. + + Args: + command: Command to execute + is_instant: Whether this is an instant activation command + + Returns: + Command response string + """ + return await self.command_manager.execute_command( + command=command, + is_instant=is_instant, + reset_conversation_callback=self.reset_conversation_history, + ) + + async def execute_action(self, command: CommandConfig): + """Execute the actions defined in a command. + + Delegates to CommandManager. + + Args: + command: Command containing actions to execute + """ + await self.command_manager.execute_actions(command) + + async def _transcribe(self, audio_input_wav: str) -> str | None: + """Transcribe recorded audio to text using configured STT provider. + + All STT providers (including Whispercpp and FasterWhisper) are now + managed through the provider registry. + + Args: + audio_input_wav: Path to the audio file containing user speech + + Returns: + Transcript text or None if transcription failed + """ + transcript = None + + try: + # Use provider registry for all STT providers + provider = self.provider_registry.get_stt_provider() + if provider: + transcript = await provider.transcribe(filename=audio_input_wav) + except Exception as e: + await printr.print_async( + f"Error during transcription using '{self.config.features.stt_provider}': {str(e)}", + color=LogType.ERROR, + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + + # All STT providers should return str per SttProvider protocol + # If None, transcription failed + return transcript if transcript else None + + def threaded_execution(self, function, *args) -> threading.Thread | None: + """Execute a function in a separate thread.""" + try: + + def start_thread(function, *args): + if asyncio.iscoroutinefunction(function): + new_loop = asyncio.new_event_loop() + asyncio.set_event_loop(new_loop) + new_loop.run_until_complete(function(*args)) + new_loop.close() + else: + function(*args) + + thread = threading.Thread(target=start_thread, args=(function, *args)) + thread.name = function.__name__ + thread.start() + return thread + except Exception as e: + printr.print( + f"Error starting threaded execution: {str(e)}", color=LogType.ERROR + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + return None + + async def _get_response_for_transcript( + self, transcript: str, benchmark: Benchmark + ) -> tuple[str | None, str | None, Skill | None, bool]: + """Get the LLM response for a given transcript. + + This is the main conversation loop that: + 1. Adds user message to history + 2. Checks for instant activation commands + 3. Calls LLM (possibly multiple times if tools are used) + 4. Executes tool calls + 5. Returns the final response + + Args: + transcript: The user's spoken text transcribed + benchmark: Benchmark tracker for performance measurement + + Returns: + tuple: (final_response, instant_response, used_skill, interrupt_audio) + """ + await self.conversation.add_user_message( + transcript, self.config.features.remember_messages + ) + + benchmark.start_snapshot("Instant activation commands") + instant_response, instant_command_executed = await self._try_instant_activation( + transcript=transcript + ) + if instant_response: + await self.conversation.add_simple_assistant_message(instant_response) + benchmark.finish_snapshot() + # "." means "don't show a response in the UI" + if instant_response == ".": + instant_response = None + return instant_response, instant_response, None, True + benchmark.finish_snapshot() + + # Track cumulative LLM and tool execution times + llm_processing_time_ms = 0.0 + tool_execution_time_ms = 0.0 + tool_timings: list[tuple[str, float]] = ( + [] + ) # (label, time_ms) for individual tools + + # Make initial LLM call with conversation history + # Prevent tool calls if instant command was executed to avoid duplicate execution + llm_start = time.perf_counter() + completion = await self._llm_call(instant_command_executed is False) + llm_processing_time_ms += (time.perf_counter() - llm_start) * 1000 + + if completion is None: + benchmark.add_snapshot("LLM Processing", llm_processing_time_ms) + return None, None, None, True + + response_message, tool_calls = await self._process_completion(completion) + + # Add message and dummy tool responses to conversation history + await self.conversation.add_assistant_message(response_message, tool_calls) + + # Check if tools need follow-up LLM call (summarization) + is_summarize_needed = False + unique_tools: dict[str, bool] = {} + if tool_calls: + for tool_call in tool_calls: + if not tool_call.id: + continue + function_name = tool_call.function.name + + unique_tools[function_name] = True + + # Meta-tools (activate_capability, etc.) always need follow-up LLM call + # so the LLM can use the newly activated tools + if self.skill_registry.is_meta_tool(function_name): + is_summarize_needed = True + elif function_name in self.tool_skills: + skill = self.tool_skills[function_name] + if await skill.is_summarize_needed(function_name): + is_summarize_needed = True + + # If the LLM only called execute_command (no assistant text), we still + # want a follow-up response like on develop. + if len(unique_tools) == 1 and "execute_command" in unique_tools: + is_summarize_needed = True + + # Tool execution loop + while tool_calls: + # Execute tools and collect timings + tool_start = time.perf_counter() + instant_response, skill, iteration_timings = await self._handle_tool_calls( + tool_calls + ) + tool_execution_time_ms += (time.perf_counter() - tool_start) * 1000 + tool_timings.extend(iteration_timings) + + # If tool returns instant response, return early + if instant_response: + # Add snapshots before returning + benchmark.add_snapshot("LLM Processing", llm_processing_time_ms) + if tool_execution_time_ms > 0: + benchmark.add_tool_execution(tool_execution_time_ms, tool_timings) + return None, instant_response, None, True + + # Follow-up LLM call to summarize tool results + if is_summarize_needed: + # Time the follow-up LLM call + llm_start = time.perf_counter() + completion = await self._llm_call(True) + llm_processing_time_ms += (time.perf_counter() - llm_start) * 1000 + + if completion is None: + benchmark.add_snapshot("LLM Processing", llm_processing_time_ms) + if tool_execution_time_ms > 0: + benchmark.add_tool_execution( + tool_execution_time_ms, tool_timings + ) + return None, None, None, True + + response_message, tool_calls = await self._process_completion( + completion + ) + # Add message + await self.conversation.add_assistant_message( + response_message, tool_calls + ) + + # Check if new tools need summarization + is_summarize_needed = False + if tool_calls: + for tool_call in tool_calls: + if not tool_call.id: + continue + function_name = tool_call.function.name + if self.skill_registry.is_meta_tool(function_name): + is_summarize_needed = True + elif function_name in self.tool_skills: + skill = self.tool_skills[function_name] + if await skill.is_summarize_needed(function_name): + is_summarize_needed = True + else: + # No summarization needed, exit loop + break + + # Add final snapshots + benchmark.add_snapshot("LLM Processing", llm_processing_time_ms) + if tool_execution_time_ms > 0: + benchmark.add_tool_execution(tool_execution_time_ms, tool_timings) + return response_message.content, response_message.content, None, True + + async def update_config( + self, config: WingmanConfig, skip_config_validation: bool = True + ) -> bool: + """Update the config of the Wingman. + + This method should always be called if the config of the Wingman has changed. + + Args: + config: The new wingman configuration + skip_config_validation: If False, validate the config and rollback on error + + Returns: + True if config was updated successfully, False otherwise + """ + try: + if not skip_config_validation: + old_config = deepcopy(self.config) + + self.config = config + + # Propagate skill config changes to loaded skills + await self._update_skill_configs(config) + + if not skip_config_validation: + errors = await self.validate() + + for error in errors: + if ( + error.error_type + != WingmanInitializationErrorType.MISSING_SECRET + ): + self.config = old_config + return False + + return True + except Exception as e: + await printr.print_async( + f"Error updating config for wingman '{self.name}': {str(e)}", + color=LogType.ERROR, + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + return False + + async def _update_skill_configs(self, wingman_config: WingmanConfig) -> None: + """Propagate skill config changes to loaded skills. + + When the wingman config changes (e.g., user updates custom_properties for a skill), + we need to update the SkillConfig on each loaded skill instance so they see the new values. + """ + if not self.skills or not wingman_config.skills: + return + + # Build lookup of new skill configs by folder name + new_skill_configs: dict[str, "SkillConfig"] = {} + for skill_config in wingman_config.skills: + try: + folder_name = _get_skill_folder_from_module(skill_config.module) + except Exception: + printr.print( + f"Skipping skill config override with unexpected module format: '{skill_config.module}'", + color=LogType.WARNING, + server_only=True, + ) + continue + new_skill_configs[folder_name] = skill_config + + # Update each loaded skill if its config changed + for skill in self.skills: + # Get the folder name for this skill + try: + skill_folder = _get_skill_folder_from_module(skill.config.module) + except Exception: + printr.print( + f"Skipping loaded skill with unexpected module format: '{skill.config.module}'", + color=LogType.WARNING, + server_only=True, + ) + continue + + if skill_folder in new_skill_configs: + user_override = new_skill_configs[skill_folder] + + fields_set = getattr(user_override, "model_fields_set", None) + if fields_set is None: + # Pydantic v1 fallback + fields_set = getattr(user_override, "__fields_set__", set()) + + # Create updated config by copying current and applying overrides + # This preserves all default values while applying user overrides + updated_config = deepcopy(skill.config) + + # Apply overrides even if they're explicitly empty. + # This allows users to clear custom properties/prompt in the UI. + if "custom_properties" in fields_set: + updated_config.custom_properties = user_override.custom_properties + if "prompt" in fields_set: + updated_config.prompt = user_override.prompt + + # Let the skill handle the config update (will compare old vs new) + await skill.update_config(updated_config) + + async def save_config(self): + """Save the config of the Wingman.""" + self.tower.save_wingman(self.name) + + async def save_commands(self): + """Save only the commands section of this wingman's config. + + This performs a partial YAML update - only the commands field is modified + in the config file, avoiding full config serialization. This is much safer + than save_config() for command-only changes as it won't accidentally + overwrite other fields. + + Use this instead of save_config() when you only changed command definitions, + instant_activation phrases, or other command-related fields. + + Example use cases: + - QuickCommands learning instant activation phrases + - Skills dynamically adding/modifying commands + - Skills updating command responses or actions + """ + self.tower.save_wingman_commands(self.name) + + async def update_settings(self, settings: SettingsConfig): + """Update wingman settings and reinitialize affected services. + + When settings change (e.g., WingmanPro region, API keys, provider selection), + this method reinitializes the ProviderRegistry to pick up new configurations. + + Args: + settings: New settings configuration to apply + """ + try: + self.settings = settings + + # Reinitialize provider registry to pick up new settings + # (e.g., WingmanPro region changes, API key updates, etc.) + if self.provider_registry: + await self.provider_registry.initialize_from_config() + printr.print( + "Reinitialized providers with new settings", + source_name=self.name, + server_only=True, + ) + + # Reload skills and MCPs + await self.init_skills() + if hasattr(self, "init_mcps"): + await self.init_mcps() + + printr.print(f"Wingman {self.name}'s settings changed", server_only=True) + except Exception as e: + await printr.print_async( + f"Error while updating settings: {str(e)}", + color=LogType.ERROR, + source_name=self.name, + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + + # ========== LLM and Tool Execution Methods ========== + + async def actual_llm_call(self, messages, tools: list[dict] = None): + """Perform the actual LLM API call using the configured conversation provider. + + Routes the call to the appropriate provider (OpenAI, Azure, WingmanPro, etc.) + through the ProviderRegistry. + + Args: + messages: List of conversation messages in OpenAI format + tools: Optional list of tool definitions in OpenAI format + + Returns: + ChatCompletion object or None if the call fails + """ + try: + completion = None + + # Use provider registry for all LLM providers (including WingmanPro) + provider = self.provider_registry.get_llm_provider() + if provider: + completion = await provider.complete(messages=messages, tools=tools) + except Exception as e: + await printr.print_async( + f"Error during LLM call: {str(e)}", color=LogType.ERROR + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + return None + + return completion + + async def _llm_call(self, allow_tool_calls: bool = True): + """Make the primary LLM call with conversation history and context. + + This method: + 1. Timestamps the call (for cancellation detection) + 2. Builds tool definitions (if allowed) + 3. Adds system context to messages + 4. Calls actual_llm_call() + 5. Handles call cancellation (if newer call supersedes this one) + + Args: + allow_tool_calls: Whether to include tool definitions in the call + + Returns: + ChatCompletion object or None if the call fails or is cancelled + """ + # Save request time for later comparison + thiscall = time.time() + self.last_gpt_call = thiscall + + # Build tools + tools = self.build_tools() if allow_tool_calls else None + + if self.settings.debug_mode: + await printr.print_async( + f"Calling LLM with {(len(self.conversation.messages))} messages (excluding context) and {len(tools) if tools else 0} tools.", + color=LogType.INFO, + ) + + messages = self.conversation.get_messages_copy() + await self.add_context(messages) + + completion = await self.actual_llm_call(messages, tools) + + # If request isn't most recent, ignore the response + if self.last_gpt_call != thiscall: + await printr.print_async( + "LLM call was cancelled due to a new call.", color=LogType.WARNING + ) + return None + + return completion + + async def add_context(self, messages: list): + """Add system context to messages using ConversationManager. + + Builds the system prompt with backstory, skill prompts, TTS instructions, + and user metadata (timezone, config name, etc.). + + Args: + messages: The message list to prepend context to + """ + tower_config_name = ( + self.tower.config_dir.name if self.tower and self.tower.config_dir else None + ) + system_context = await self.conversation.build_system_context( + config=self.config, + capability_registry=self.capability_registry, + tower_config_name=tower_config_name, + ) + # Prepend system context as first message + messages.insert(0, {"role": "system", "content": system_context}) + + async def _process_completion(self, completion: ChatCompletion): + """Process the completion returned by the LLM call. + + Args: + completion: The completion object from an OpenAI call + + Returns: + tuple: (response_message, tool_calls) + """ + response_message = completion.choices[0].message + + content = response_message.content + if content is None: + response_message.content = "" + + # Fix tool calls that have a command name as function name + if response_message.tool_calls: + response_message.tool_calls = await self.tool_executor.fix_tool_calls( + response_message.tool_calls + ) + + return response_message, response_message.tool_calls + + async def _handle_tool_calls(self, tool_calls): + """Process all the tool calls identified in the response message. + + Args: + tool_calls: The list of tool calls to process + + Returns: + tuple: (instant_response, skill, tool_timings) where tool_timings is a list of (label, time_ms) tuples + """ + # Use tool executor to process all tool calls + instant_response, used_skill, tool_timings, results = ( + await self.tool_executor.execute_batch(tool_calls) + ) + + # Update conversation with all tool responses + for tool_call, function_response in results: + if tool_call.id: + await self.conversation.update_tool_response( + tool_call.id, function_response + ) + else: + self.conversation.add_tool_response(tool_call, function_response) + + return instant_response, used_skill, tool_timings + + def build_tools(self) -> list[dict]: + """Build the tool list for the LLM call using progressive disclosure. + + Returns tools in this order: + 1. execute_command: For non-instant-activation commands + 2. Meta-tools: activate_capability (for discovering skills/MCPs) + 3. Active skill tools: Tools from skills activated in this conversation + 4. Active MCP tools: Tools from MCP servers activated in this conversation + + Progressive disclosure means the LLM only sees tools from activated capabilities, + reducing token usage and improving context focus. + + Returns: + List of tool descriptors in OpenAI function calling format + """ + + def _command_has_effective_actions(command: CommandConfig) -> bool: + if command.is_system_command: + return True + + if not command.actions: + return False + + for action in command.actions: + if not action: + continue + if ( + action.keyboard is not None + or action.mouse is not None + or action.joystick is not None + or action.audio is not None + or action.write is not None + or action.wait is not None + ): + return True + + return False + + commands = [ + command.name + for command in self.config.commands + if (not command.force_instant_activation) + and _command_has_effective_actions(command) + ] + tools: list[dict] = [] + if commands: + tools.append( + { + "type": "function", + "function": { + "name": "execute_command", + "description": "Executes a command", + "parameters": { + "type": "object", + "properties": { + "command_name": { + "type": "string", + "description": "The name of the command to execute", + "enum": commands, + }, + }, + "required": ["command_name"], + }, + }, + } + ) + + # Unified capability discovery: single activate_capability meta-tool + # Combines skills and MCP servers - LLM doesn't need to know the difference + for _, tool in self.capability_registry.get_meta_tools(): + tools.append(tool) + + # Add tools from activated capabilities (both skills and MCPs) + for _, tool in self.skill_registry.get_active_tools(): + tools.append(tool) + + for _, tool in self.mcp_registry.get_active_tools(): + tools.append(tool) + + return tools + + async def _try_instant_activation(self, transcript: str) -> tuple[str | None, bool]: + """Try to match transcript against instant activation commands. + + Args: + transcript: User's spoken text + + Returns: + tuple: (response, command_executed) - response if matched, command_executed True if command ran + """ + commands = await self._execute_instant_activation_command(transcript) + if commands: + # Keep conversation history consistent with progressive tool disclosure by + # faking assistant tool calls for the executed commands (like develop). + await self.conversation.add_forced_tool_calls( + commands=commands, + conversation_provider=self.config.features.conversation_provider, + wingman_pro_deployment=getattr( + getattr(self.config, "wingman_pro", None), + "conversation_deployment", + None, + ), + ) + + responses: list[str] = [] + for command in commands: + if command.responses: + responses.append(self._select_command_response(command)) + + # If all executed commands have configured responses, return a combined + # response and stop further processing. + if len(responses) == len(commands): + # De-dupe while preserving order + responses = list(dict.fromkeys(responses)) + responses = [ + ( + response + "." + if response and not response.endswith(".") + else response + ) + for response in responses + ] + return " ".join(responses), True + + # No configured responses (or not for all commands): mark command executed + # but allow the normal LLM response flow to continue (with tool calls disabled). + return None, True + + return None, False + + # ========== TTS Methods ========== + + async def play_to_user( + self, + text: str, + no_interrupt: bool = False, + sound_config: Optional[SoundConfig] = None, + ): + """Play audio to the user using the configured TTS provider. + + This method: + 1. Cleans up markdown, links, and code blocks from text + 2. Waits for current audio to finish (if no_interrupt=True) + 3. Calls skill hooks (for text modification by activated skills) + 4. Synthesizes speech using configured TTS provider + 5. Applies sound effects if enabled + + Args: + text: The text to convert to speech and play + no_interrupt: If True, wait for current audio to finish before playing + sound_config: Optional custom sound configuration (volume, effects, etc.) + """ + if sound_config: + printr.print( + "Using custom sound config for playback", LogType.INFO, server_only=True + ) + else: + sound_config = self.config.sound + + # Remove Markdown, links, emotes and code blocks + text, contains_links, contains_code_blocks = cleanup_text(text) + + # Wait for audio player to finish playing + if no_interrupt and self.audio_player.is_playing: + while self.audio_player.is_playing: + await asyncio.sleep(0.1) + + # Call skill hooks (only for prepared/activated skills) + changed_text = text + for skill in self.skills: + if skill.is_prepared: + changed_text = await skill.on_play_to_user(text, sound_config) + if changed_text != text: + printr.print( + f"Skill '{skill.config.display_name}' modified the text to: '{changed_text}'", + LogType.INFO, + ) + text = changed_text + + if sound_config.volume == 0.0: + printr.print( + "Volume modifier is set to 0. Skipping TTS processing.", + LogType.WARNING, + server_only=True, + ) + return + + if "{SKIP-TTS}" in text: + printr.print( + "Skip TTS phrase found in input. Skipping TTS processing.", + LogType.WARNING, + server_only=True, + ) + return + + try: + # Handle legacy providers (not yet migrated to registry) + if self.config.features.tts_provider == TtsProvider.XVASYNTH: + await self.xvasynth.play_audio( + text=text, + config=self.config.xvasynth, + sound_config=sound_config, + audio_player=self.audio_player, + wingman_name=self.name, + ) + else: + # Use provider registry for all TTS providers (including WingmanPro) + provider = self.provider_registry.get_tts_provider() + if provider: + await provider.synthesize( + text=text, + sound_config=sound_config, + audio_player=self.audio_player, + wingman_name=self.name, + ) + except Exception as e: + await printr.print_async( + f"TTS error: {str(e)}", color=LogType.ERROR, source_name=self.name + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + + # ========== Helper Methods ========== + + async def generate_image(self, text: str) -> str: + """Generate an image from text using the configured image generation provider. + + Uses the provider registry to get the configured image generation provider, + similar to how STT, TTS, and LLM providers are accessed. + + Args: + text: Text description of the image to generate + + Returns: + str: URL or path to the generated image, or empty string on error + """ + try: + provider = self.provider_registry.get_image_provider() + if provider: + return await provider.generate_image(text) + else: + await printr.print_async( + f"No image generation provider configured (current: {self.config.features.image_generation_provider})", + color=LogType.ERROR, + ) + except Exception as e: + await printr.print_async( + f"Error during image generation: {str(e)}", color=LogType.ERROR + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + + return "" diff --git a/wingman_core.py b/wingman_core.py index 0a80df46..758f62eb 100644 --- a/wingman_core.py +++ b/wingman_core.py @@ -42,8 +42,7 @@ from providers.whispercpp import Whispercpp from providers.wingman_pro import WingmanPro from providers.xvasynth import XVASynth -from wingmen.open_ai_wingman import OpenAiWingman -from wingmen.wingman import Wingman +from wingman import Wingman from services.file import get_writable_dir, get_audio_library_dir from services.voice_service import VoiceService from services.settings_service import SettingsService @@ -69,6 +68,7 @@ def __init__( ): self.printr = Printr() self.app_root_path = app_root_path + self.app_is_bundled = app_is_bundled self.system_manager = system_manager self.is_client_logged_in: bool = False self.client_plan: str = "Free" @@ -360,18 +360,8 @@ def __init__( "va_settings_changed", self.on_va_settings_changed ) - self.whispercpp = Whispercpp( - settings=self.settings_service.settings.voice_activation.whispercpp, - ) - self.fasterwhisper = FasterWhisper( - settings=self.settings_service.settings.voice_activation.fasterwhisper, - app_root_path=app_root_path, - app_is_bundled=app_is_bundled, - ) self.xvasynth = XVASynth(settings=self.settings_service.settings.xvasynth) self.settings_service.initialize( - whispercpp=self.whispercpp, - fasterwhisper=self.fasterwhisper, xvasynth=self.xvasynth, ) @@ -531,9 +521,9 @@ async def initialize_tower(self, config_dir_info: ConfigWithDirInfo): config_manager=self.config_manager, audio_player=self.audio_player, audio_library=self.audio_library, - whispercpp=self.whispercpp, - fasterwhisper=self.fasterwhisper, xvasynth=self.xvasynth, + app_root_path=self.app_root_path, + app_is_bundled=self.app_is_bundled, ) self.tower_errors = await self.tower.instantiate_wingmen( self.config_manager.settings_config @@ -769,7 +759,9 @@ def filter_and_clean_text(text): api_key=self.secret_keeper.secrets["groq"], base_url="https://api.groq.com/openai/v1/", ) - transcription = groq.transcribe(filename=recording_file, model="whisper-large-v3-turbo") + transcription = groq.transcribe( + filename=recording_file, model="whisper-large-v3-turbo" + ) text = transcription.text elif provider == VoiceActivationSttProvider.FASTER_WHISPER: combined_hotwords: list[str] = [] @@ -967,12 +959,11 @@ async def ask_wingman_conversation_provider(self, text: str, wingman_name: str): wingman = self.tower.get_wingman_by_name(wingman_name) if wingman and text: - if isinstance(wingman, OpenAiWingman): - messages = [{"role": "user", "content": text}] + messages = [{"role": "user", "content": text}] - completion = await wingman.actual_llm_call(messages=messages) + completion = await wingman.actual_llm_call(messages=messages) - return completion.choices[0].message.content + return completion.choices[0].message.content return None @@ -981,8 +972,7 @@ async def generate_image(self, text: str, wingman_name: str): wingman = self.tower.get_wingman_by_name(wingman_name) if wingman and text: - if isinstance(wingman, OpenAiWingman): - return await wingman.generate_image(text=text) + return await wingman.generate_image(text=text) return None @@ -1318,7 +1308,11 @@ async def get_google_models(self) -> list[types.Model]: google_api_key = await self.secret_keeper.retrieve( key="google", requester="Google" ) - google = GoogleGenAI(api_key=google_api_key) + # Create minimal config for model listing (conversation_model not needed for listing) + from api.interface import GoogleConfig + + config = GoogleConfig(conversation_model="") + google = GoogleGenAI(config=config, api_key=google_api_key) try: models = google.get_available_models() return models diff --git a/wingman_refactoring.md b/wingman_refactoring.md new file mode 100644 index 00000000..a47c04d0 --- /dev/null +++ b/wingman_refactoring.md @@ -0,0 +1,1142 @@ +# OpenAiWingman Refactoring Plan + +**Version:** 1.4 +**Date:** December 16, 2025 +**Status:** Phase 3 Complete - 805 Lines Removed (33%) + +**Current Progress:** + +- ✅ Phase 1 Complete (Steps 1-6): Provider system refactored +- ✅ Legacy Methods Removed: ~400 lines of duplicate provider code eliminated +- ✅ Phase 2 Complete (Steps 7-8): Conversation management extracted +- ✅ Phase 3 Complete (Steps 9-10): Tool execution extracted +- ⏸️ Phase 4 Pending (Steps 11-12): Response loop +- ⏸️ Phase 5 Pending (Steps 13-14): Context building +- ⏸️ Phase 6 Pending (Step 15): Benchmark improvements + +**Line Count Progress:** + +- Starting: 2,419 lines (open_ai_wingman.py) +- After Phase 1: 1,999 lines (-420) +- After Phase 2: 1,842 lines (-157) +- After Phase 3: 1,614 lines (-228) +- **Total reduction: 805 lines (33.3%)** +- Target: ~1,100 lines (54% reduction) +- Remaining: 514 lines to remove + +**New Service Files Created:** + +- services/conversation_manager.py (292 lines) +- services/tool_executor.py (378 lines) +- **Total new service code: 670 lines** +- **Net impact**: 805 lines removed from main file, 670 added to services = **135 net reduction** + +--- + +## Executive Summary + +Refactor the OpenAiWingman class to improve maintainability, reduce code duplication, and establish a modular architecture. The refactoring will reduce the class from ~2,400 lines to ~1,100 lines (54% reduction) while maintaining all functionality, public API compatibility, and skill integration. + +**Key Changes:** + +- Replace 16 provider instance variables with capability-based ProviderRegistry +- Extract conversation management into dedicated ConversationManager service +- Extract tool execution into ToolExecutor service +- Simplify response loop with ResponseLoopState helper +- Extract context building into ContextBuilder service +- Improve benchmark helpers with DRY methods + +--- + +## Table of Contents + +1. [Critical Findings](#critical-findings) +2. [Compatibility Requirements](#compatibility-requirements) +3. [Error Handling Strategy](#error-handling-strategy) +4. [Implementation Plan](#implementation-plan) +5. [Runtime Config Updates](#runtime-config-updates) +6. [Testing Checklist](#testing-checklist) +7. [Expected Results](#expected-results) + +--- + +## Critical Findings + +### Current Architecture Issues + +1. **16 Provider Instance Variables** (~20 lines) + + - `self.openai`, `self.mistral`, `self.groq`, `self.cerebras`, `self.openrouter`, `self.local_llm`, `self.openai_azure`, `self.elevenlabs`, `self.openai_compatible_tts`, `self.hume`, `self.inworld`, `self.wingman_pro`, `self.google`, `self.perplexity`, `self.xai`, `self.edge_tts` + - Plus `self.azure_api_keys` dict for multiple Azure services + +2. **Massive Conditional Chains** (~300+ lines total) + + - `uses_provider()`: 17 elif blocks (~110 lines) + - `actual_llm_call()`: 12 elif blocks (~110 lines) + - `play_to_user()`: 9 elif blocks (~130 lines) + - `_transcribe()`: 7 elif blocks (~70 lines) + +3. **15 Nearly-Identical Validation Methods** (~500 lines) + + - `validate_and_set_openai()`, `validate_and_set_mistral()`, `validate_and_set_groq()`, etc. + - All follow same pattern: retrieve secret → instantiate provider + +4. **Skills Access Messages Directly** + + - **QuickCommands** (`skills/quick_commands/main.py:86`): Reads `self.wingman.messages[-1]` + - **Timer** (`skills/timer/main.py:360`): Reads and appends to `self.wingman.messages` + - Must provide backward-compatible property + +5. **Autosave Uses update_config()** + - Client calls `save_wingman_config()` frequently + - Must propagate changes to all new services + - Provider changes must reinitialize ProviderRegistry + - Conversation settings must update ConversationManager + +### Provider Distribution Analysis + +**By Capability:** + +- **LLM Only:** Mistral, Groq, Cerebras, Local LLM, Perplexity, XAI (6) +- **TTS Only:** Edge TTS, ElevenLabs, OpenAI Compatible TTS, Hume, Inworld, XVASynth (6) +- **STT Only:** WhisperCPP, FasterWhisper (2) +- **Multi-Capability:** OpenAI (STT+TTS+LLM), Azure (STT+TTS+LLM), WingmanPro (STT+TTS+LLM), Google (LLM+future TTS/STT) (4) + +**Provider Types:** + +- **OpenAI-compatible:** Mistral, Groq, Cerebras, Local LLM, OpenRouter, Perplexity use OpenAI wrapper +- **Native SDK:** Google, Azure, WingmanPro, ElevenLabs, Hume, Inworld +- **Simple HTTP:** Edge TTS, OpenAI Compatible TTS +- **Local:** WhisperCPP, FasterWhisper, XVASynth + +--- + +## Compatibility Requirements + +### 1. Skills Accessing wingman.messages + +**Affected Skills:** + +- `skills/quick_commands/main.py:86` - Reads `self.wingman.messages[-1]` +- `skills/timer/main.py:360` - Reads and appends to `self.wingman.messages` + +**Solution:** +Add backward-compatible property with deprecation warning: + +```python +@property +def messages(self) -> list: + """Backward compatibility - returns conversation messages. + + DEPRECATED: Direct access to messages will be removed in v2.0. + Skills should use conversation manager APIs in future versions. + + TODO: Remove this property in v2.0 after refactoring dependent skills. + """ + import warnings + warnings.warn( + "Direct access to wingman.messages is deprecated and will be removed in v2.0. " + "Skills should not manipulate conversation history directly.", + DeprecationWarning, + stacklevel=2 + ) + return self.conversation.messages # Direct reference to allow mutations +``` + +**TODOs to Add:** + +- Line 86 in `skills/quick_commands/main.py`: `# TODO: Refactor to not access wingman.messages directly (deprecated)` +- Line 360 in `skills/timer/main.py`: `# TODO: Refactor to not access/modify wingman.messages directly (deprecated)` + +### 2. Skill Hook Timing + +**Current Behavior (MUST PRESERVE):** + +- `on_add_user_message()` - Called BEFORE message added (async) +- `on_add_assistant_message()` - Called BEFORE message added (async) +- `on_play_to_user()` - Called BEFORE TTS (async) + +**Implementation:** +ConversationManager must call skill hooks at exact same time: + +```python +async def add_user_message(self, content: str, skills: list): + # 1. Call hooks FIRST (async) + for skill in skills: + if skill.is_prepared: + await skill.on_add_user_message(content) + + # 2. Cleanup + self.cleanup() + + # 3. Append message + self.messages.append({"role": "user", "content": content}) +``` + +### 3. Async/Sync Behavior + +**CRITICAL: No Changes to Sync/Async Patterns** + +- All async methods remain async +- All sync methods remain sync +- No mixing of sync/async where it wasn't before +- Skill hooks remain async +- Tool methods remain async +- Provider methods remain async + +--- + +## Error Handling Strategy + +### Service-Level Errors (Internal) + +**Pattern:** + +```python +try: + # ... service logic ... +except Exception as e: + printr.print( + f"Service error: {str(e)}", + color=LogType.ERROR, + server_only=True # Internal only, not sent to client + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) + return None +``` + +**Used in:** + +- ProviderFactory +- ProviderRegistry +- ConversationManager +- ToolExecutor +- ContextBuilder + +### User-Facing Errors (OpenAiWingman) + +**Pattern:** + +```python +try: + # ... wingman logic ... +except Exception as e: + await printr.print_async( + f"User-facing error: {str(e)}", + color=LogType.ERROR, + # server_only=False by default - sent to client + ) + printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) +``` + +**Used in:** + +- OpenAiWingman.validate() +- OpenAiWingman.\_transcribe() +- OpenAiWingman.actual_llm_call() +- OpenAiWingman.play_to_user() + +### Provider Initialization Errors + +**Pattern:** + +- Factory returns None on failure, logs internally +- Registry collects missing providers +- Validation phase reports to user with WingmanInitializationError + +--- + +## Implementation Plan + +### Phase 1: Provider System (Steps 1-6) ✅ COMPLETE + +**Status:** ✅ All steps complete - 430 lines removed (18% reduction) + +#### Step 1: Create provider capability interfaces ✅ + +**File:** `providers/provider_base.py` (NEW - 150 lines) + +**Created:** + +- ✅ `ProviderCapability` enum (STT, TTS, LLM, IMAGE_GEN) +- ✅ `@capabilities()` decorator +- ✅ Protocol classes: `SttProvider`, `TtsProvider`, `LlmProvider` (all with \*\*kwargs) +- ✅ `BaseProvider` abstract class with get_capabilities() and supports() methods + +**Verified:** Syntax checked, capability tests passed + +#### Step 2: Refactor all provider classes ✅ + +**Files:** `providers/open_ai.py`, `providers/google.py`, `providers/elevenlabs.py`, `providers/edge.py`, `providers/hume.py`, `providers/inworld.py` (REFACTORED) + +**Completed changes:** + +1. ✅ Added `@capabilities()` decorator to 8 provider classes +2. ✅ Implemented protocol methods: `transcribe()`, `synthesize()`, `complete()` +3. ✅ Created unified `OpenAiAzure` class with 4 API keys (whisper, speech, tts, llm) +4. ✅ Created `OpenAiCompatibleTts` for custom TTS endpoints +5. ✅ All async methods preserved as async + +**Providers refactored:** + +- OpenAi (STT + TTS + LLM) +- OpenAiAzure (STT + TTS + LLM, 4 separate keys, single instance) +- OpenAiCompatibleTts (TTS only) +- GoogleGenAI (LLM only) +- ElevenLabs (TTS only) +- Edge (TTS only, no API key required) +- Hume (TTS only) +- Inworld (TTS only) + +**Not migrated (legacy):** + +- WhisperCPP, FasterWhisper, XVASynth (local execution) +- WingmanPro (HTTP backend service) + +**Verified:** All providers compiled, capability tests passed + +#### Step 3: Create ProviderFactory ✅ + +**File:** `services/provider_factory.py` (NEW - ~400 lines) + +**Implemented:** + +- ✅ Provider mapping for 3 STT, 7 TTS, 10 LLM providers (20 total configurations) +- ✅ `create_provider()` async method with secret retrieval via SecretKeeper +- ✅ Special handling for Azure (4 keys, 1 instance) +- ✅ Special handling for Edge (no API key) +- ✅ Special handling for providers needing wingman_name (ElevenLabs, Hume, Inworld) +- ✅ OpenAI-compatible provider support (Mistral, Groq, Cerebras, OpenRouter, Local LLM, Perplexity, XAI) +- ✅ Error handling with server_only=True logging + +**Verified:** Syntax checked, factory methods tested + +#### Step 4: Create ProviderRegistry ✅ + +**File:** `services/provider_registry.py` (NEW - ~150 lines) + +**Implemented:** + +- ✅ Registry managing providers by capability (STT, TTS, LLM) +- ✅ `initialize_from_config()` async method (creates providers via factory) +- ✅ Sync getters: `get_stt_provider()`, `get_tts_provider()`, `get_llm_provider()` +- ✅ Availability checks: `has_stt()`, `has_tts()`, `has_llm()` +- ✅ Debug utility: `get_provider_summary()` +- ✅ Cleanup method: `clear()` + +**Key features verified:** + +- Only initialization is async +- All getters are sync +- Stores providers in separate slots by capability type + +**Verified:** Syntax checked, registry initialization tested + +#### Step 5: Replace provider instances in OpenAiWingman ✅ + +**File:** `wingmen/open_ai_wingman.py` + +**Deleted:** + +- ✅ 16 provider instance variables (openai, mistral, groq, cerebras, openrouter, local_llm, openai_azure, elevenlabs, openai_compatible_tts, hume, inworld, google, perplexity, xai, azure_api_keys dict) + +**Added:** + +- ✅ `self.provider_registry: ProviderRegistry` (single registry instance) + +**Kept (legacy/special):** + +- ✅ `self.edge_tts` (temporarily kept for migration) +- ✅ `self.whispercpp`, `self.fasterwhisper`, `self.xvasynth` (local providers, not yet migrated) +- ✅ `self.wingman_pro` (HTTP backend, special handling) + +**Updated:** + +- ✅ `validate()`: Replaced 13 provider validation calls with single `await provider_registry.initialize_from_config()` +- ✅ Added `_check_openrouter_tool_support()` helper method + +**Verified:** Syntax checked, imports verified + +#### Step 6: Simplify provider delegation methods ✅ + +**File:** `wingmen/open_ai_wingman.py` + +**Deleted:** + +- ✅ `uses_provider()` method (~110 lines with 17 elif blocks) +- ✅ All 15 `validate_and_set_*()` methods (~500 lines total): + - validate_and_set_openai, validate_and_set_mistral, validate_and_set_groq + - validate_and_set_cerebras, validate_and_set_google, validate_and_set_openrouter + - validate_and_set_local_llm, validate_and_set_elevenlabs, validate_and_set_openai_compatible_tts + - validate_and_set_hume, validate_and_set_inworld, validate_and_set_azure + - validate_and_set_wingman_pro, validate_and_set_perplexity, validate_and_set_xai + +**Updated (delegation simplified to registry):** + +- ✅ `_transcribe()`: Changed from 7-branch if-elif to registry-based delegation + + - Legacy: WhisperCPP, FasterWhisper, WingmanPro handled separately + - Registry: OpenAI, Azure (STT) delegated to `registry.get_stt_provider().transcribe()` + +- ✅ `actual_llm_call()`: Changed from 12-branch if-elif to registry-based delegation + + - Legacy: WingmanPro handled separately + - Registry: All LLM providers delegated to `registry.get_llm_provider().complete()` + - Special case: OpenRouter tool support check preserved + +- ✅ `play_to_user()`: Changed from 9-branch if-elif to registry-based delegation + + - Legacy: XVASynth, WingmanPro handled separately + - Registry: All TTS providers delegated to `registry.get_tts_provider().synthesize()` + +- ✅ `update_settings()`: Removed dependency on deleted `uses_provider()` method + - Now checks `if self.wingman_pro:` directly for reinitialization + +**Line savings achieved:** ~610 lines deleted (110 + 500), delegation methods simplified + +**Verified:** Syntax checked, no compilation errors + +**Phase 1 Results:** + +- **Lines removed from open_ai_wingman.py:** 420 (from 2,419 to 1,999) +- **Percentage reduction:** 17.4% +- **Lines removed from providers:** ~400 (legacy methods eliminated) +- **Architecture improvement:** Provider management centralized in registry +- **API cleanup:** Single protocol-based interface enforced +- **Maintainability:** New providers only need factory configuration, no delegation changes +- **Voice preview:** Updated VoiceService to use protocol methods (8 methods) + +### Post-Phase 1: Legacy Method Removal ✅ COMPLETE + +**Objective:** Remove all legacy provider methods and enforce single API surface + +**Architecture Decision:** +Legacy methods (play_audio, ask, transcribe_legacy) were initially kept for "backward compatibility," but having dual API surfaces defeated the purpose of the refactoring. Analysis revealed these methods were only called by VoiceService preview endpoints (non-critical UI features). Decision: Remove all legacy methods immediately for cleaner architecture. + +**Legacy Methods Removed:** + +1. **OpenAi class** (`providers/open_ai.py`) + + - Removed: `transcribe_legacy()`, `ask()`, `play_audio()` (~80 lines) + - Inlined: Full implementation into protocol methods + - Result: 765 → 569 lines (-196 lines, -25.6%) + +2. **OpenAiAzure class** (`providers/open_ai.py`) + + - Removed: `transcribe_whisper()`, `transcribe_azure_speech()`, `ask()`, `play_audio()` + - Inlined: Azure OpenAI Whisper + Azure Speech SDK + chat completion + TTS + - Complex logic preserved: Streaming, Azure SDK configuration, error handling + +3. **OpenAiCompatibleTts class** (`providers/open_ai.py`) + + - Removed: `play_audio()` (~40 lines) + - Inlined: Streaming/non-streaming TTS with extra_headers support + +4. **GoogleGenAI class** (`providers/google.py`) + + - Removed: `ask()` (~16 lines) + - Inlined: Chat completion logic + - Result: 118 → 102 lines (-16 lines, -13.6%) + +5. **ElevenLabs class** (`providers/elevenlabs.py`) + + - Removed: `play_audio()` (~140 lines) + - Inlined: Complex streaming with voice generation, callbacks, sound effects + - Result: 227 → 113 lines (-114 lines, -50.2%) + +6. **Edge class** (`providers/edge.py`) + + - Removed: `play_audio()` (~24 lines) + - Inlined: File-based TTS generation + - Result: 100 → 76 lines (-24 lines, -24%) + +7. **Hume class** (`providers/hume.py`) + + - Removed: `play_audio()` (~39 lines) + - Inlined: JSON-based TTS with generation_id tracking + - Result: 156 → 117 lines (-39 lines, -25%) + +8. **Inworld class** (`providers/inworld.py`) + - Removed: `play_audio()` (~180 lines) + - Inlined: Complex streaming with threading, queue management, buffer callbacks + - Result: 331 → 312 lines (-19 lines, -5.7%) + +**VoiceService Updated (`services/voice_service.py`):** + +All 8 voice preview methods updated to use protocol methods instead of legacy methods: + +```python +# OLD (legacy method): +await provider.play_audio(text=text, voice=voice, model=model, ...) + +# NEW (protocol method): +await provider.synthesize( + text=text, + audio_player=self.audio_player, + sound_config=sound_config, + wingman_name="system", + voice=voice, # In **kwargs + model=model, + speed=speed, + stream=stream, +) +``` + +Updated methods: + +- `play_openai_tts()` → calls `OpenAi.synthesize()` +- `play_openai_compatible_tts()` → calls `OpenAiCompatibleTts.synthesize()` +- `play_azure_tts()` → calls `OpenAiAzure.synthesize()` +- `play_elevenlabs_tts()` → calls `ElevenLabs.synthesize()` +- `play_edge_tts()` → calls `Edge.synthesize()` +- `play_hume()` → calls `Hume.synthesize()` +- `play_inworld()` → calls `Inworld.synthesize()` +- `play_xvasynth_tts()` → calls `XVASynth.synthesize()` (already migrated) + +**Benefits Achieved:** + +- ✅ **Single clear interface** - No confusion about which method to use +- ✅ **Reduced code duplication** - ~400 lines removed from providers +- ✅ **Forced consistency** - Can't accidentally use old patterns +- ✅ **Better documentation** - Only one way documented +- ✅ **Migration complete** - All callers updated, no legacy code remains + +**Total Impact:** + +| File | Before | After | Reduction | +| ---------------- | --------- | --------- | ----------------- | +| open_ai.py | 765 | 569 | -196 (-25.6%) | +| google.py | 118 | 102 | -16 (-13.6%) | +| elevenlabs.py | 227 | 113 | -114 (-50.2%) | +| edge.py | 100 | 76 | -24 (-24%) | +| hume.py | 156 | 117 | -39 (-25%) | +| inworld.py | 331 | 312 | -19 (-5.7%) | +| voice_service.py | 413 | 416 | +3 | +| **Total** | **2,110** | **1,705** | **-405 (-19.2%)** | + +--- + +### Phase 2: Conversation Management (Steps 7-8) ✅ COMPLETE + +**Status:** ✅ Both steps complete - 157 lines removed from open_ai_wingman.py + +#### Step 7: Create ConversationManager ✅ + +**File:** `services/conversation_manager.py` (NEW - 289 lines) + +**Implemented:** + +- ✅ `self.messages` - PUBLIC list (skills can access directly via wingman.conversation.messages) +- ✅ `self.pending_tool_calls` - tracking list +- ✅ `add_user_message(content, remember_messages)` - async (calls skill hooks, includes cleanup) +- ✅ `add_assistant_message(message, tool_calls)` - async (calls skill hooks, adds dummy tool responses) +- ✅ `add_simple_assistant_message(content)` - async (for simple messages without tool calls) +- ✅ `add_tool_response(tool_call, response, completed)` - sync +- ✅ `update_tool_response(tool_call_id, response)` - async (complex reordering logic) +- ✅ `cleanup(remember_messages)` - async (message history pruning) +- ✅ `reset()` - sync +- ✅ `get_messages_copy()` - sync +- ✅ `_get_message_role()` - private helper (handles dict and object formats) + +**Key features verified:** + +- Skill hooks called BEFORE message added +- Returns (is_waiting_response_needed, is_summarize_needed) from add_assistant_message +- All list operations sync except async skill hooks +- Complex message block reordering preserved in update_tool_response + +#### Step 8: Integrate ConversationManager into OpenAiWingman ✅ + +**File:** `wingmen/open_ai_wingman.py` + +**Added:** + +- ✅ Import: `from services.conversation_manager import ConversationManager` +- ✅ `self.conversation: ConversationManager` - initialized in **init** with (self.skills, self.settings) +- ✅ Backward-compatible `@property messages` - returns conversation.messages with deprecation note +- ✅ Backward-compatible `@property pending_tool_calls` - returns conversation.pending_tool_calls + +**Deleted:** + +- ✅ `self.messages = []` initialization +- ✅ `self.pending_tool_calls = []` initialization +- ✅ `_add_gpt_response()` method (~90 lines) - logic moved to conversation + caller +- ✅ `_add_tool_response()` method (~15 lines) - replaced with conversation.add_tool_response() +- ✅ `_update_tool_response()` method (~70 lines) - replaced with conversation.update_tool_response() +- ✅ `_cleanup_conversation_history()` method (~55 lines) - replaced with conversation.cleanup() +- ✅ `add_user_message()` method (~15 lines) - replaced with conversation.add_user_message() +- ✅ `add_assistant_message()` method (~12 lines) - replaced with conversation.add_simple_assistant_message() + +**Updated:** + +- ✅ `_get_response_for_transcript()`: Calls `conversation.add_user_message(transcript, remember_messages)` +- ✅ `_get_response_for_transcript()`: Calls `conversation.add_simple_assistant_message()` for instant responses +- ✅ `_get_response_for_transcript()`: Calls `conversation.add_assistant_message()` and checks meta-tools/skill response needs +- ✅ `add_forced_assistant_command_calls()`: Uses `conversation.add_assistant_message()` and `conversation.update_tool_response()` +- ✅ `_handle_tool_calls()`: Uses `conversation.update_tool_response()` and `conversation.add_tool_response()` +- ✅ `reset_conversation_history()`: Calls `conversation.reset()` +- ✅ `_llm_call()`: Uses `conversation.get_messages_copy()` + +**Line savings achieved:** 157 lines removed (1,999 → 1,842) + +**Phase 2 Results:** + +- **Lines removed from open_ai_wingman.py:** 157 (from 1,999 to 1,842) +- **New service created:** conversation_manager.py (292 lines) +- **Net reduction:** 157 lines from main file +- **Percentage reduction (Phase 2):** 7.9% +- **Cumulative reduction:** 577 lines (23.8% from original 2,419) +- **Architecture improvement:** Conversation state management isolated and testable +- **Maintainability:** Skill hooks and message manipulation centralized +- **Backward compatibility:** Property accessors preserve existing skill API + +--- + +### Phase 3: Tool Execution (Steps 9-10) ✅ COMPLETE + +**Status:** ✅ Both steps complete - 228 lines removed from open_ai_wingman.py + +#### Step 9: Create ToolExecutor ✅ + +**File:** `services/tool_executor.py` (NEW - 378 lines) + +**Implemented:** + +- ✅ `execute_tool_call()` - async (routes single tool call) +- ✅ `execute_batch()` - async (processes multiple tool calls, returns results list) +- ✅ `_parse_arguments()` - handles dict (Mistral) or JSON string (OpenAI) +- ✅ `_execute_capability_meta_tool()` - unified capability activation +- ✅ `_execute_skill_meta_tool()` - legacy skill activation (backward compat) +- ✅ `_execute_mcp_meta_tool()` - MCP server discovery/activation +- ✅ `_execute_mcp_tool()` - MCP server tool execution with benchmarking +- ✅ `_execute_instant_command()` - instant activation command execution +- ✅ `_execute_skill_tool()` - skill tool execution with benchmarking + +**Key features verified:** + +- All execution is async with proper error handling +- Returns (instant_response, skill, timings, results) from execute_batch +- Timing labels for benchmarking (⚡ for skills, 🌐 for MCP, Command: for commands) +- Meta-tools return None for timing (not tracked in benchmarks) +- Lazy skill validation on activation with deactivation on failure +- Detailed logging with server_only and debug_mode support + +#### Step 10: Integrate ToolExecutor into OpenAiWingman ✅ + +**File:** `wingmen/open_ai_wingman.py` + +**Added:** + +- ✅ Import: `from services.tool_executor import ToolExecutor` +- ✅ `self.tool_executor: ToolExecutor | None` - initialized in validate() after registries +- ✅ Tool executor initialization with 9 dependencies: + - capability_registry, skill_registry, mcp_registry (registries) + - tool_skills dict (skill name → skill instance mapping) + - get_command, \_execute_command, \_select_command_response (command callbacks) + - play_to_user (audio playback callback) + - settings (for debug mode) + +**Deleted:** + +- ✅ `execute_command_by_function_call()` method (~210 lines) - full logic moved to ToolExecutor + +**Updated:** + +- ✅ `_handle_tool_calls()` - simplified from ~60 lines to ~20 lines: + - Calls `tool_executor.execute_batch()` + - Iterates over results to update conversation with tool responses + - Returns instant_response, used_skill, tool_timings + +**Line savings achieved:** 228 lines removed (1,842 → 1,614) + +**Phase 3 Results:** + +- **Lines removed from open_ai_wingman.py:** 228 (from 1,842 to 1,614) +- **New service created:** tool_executor.py (378 lines) +- **Net reduction:** 228 lines from main file +- **Percentage reduction (Phase 3):** 12.4% +- **Cumulative reduction:** 805 lines (33.3% from original 2,419) +- **Architecture improvement:** Tool routing and execution isolated and testable +- **Maintainability:** Meta-tool, skill, command, and MCP tool logic centralized +- **Flexibility:** New tool types can be added by extending ToolExecutor + +--- + +### Phase 4: Response Loop Simplification (Steps 11-12) ⏸️ PENDING + +#### Step 9: Create ToolExecutor + +**File:** `services/tool_executor.py` (NEW) + +**Implements:** + +- `execute_tool_call()` - async (routes single tool call) +- `execute_batch()` - async (processes multiple tool calls) +- `_parse_arguments()` - sync +- `_execute_meta_tool()` - async (capability activation) +- `_execute_skill_tool()` - async (skill delegation) +- `_execute_command()` - async (instant activation) +- `_execute_mcp_tool()` - async (MCP delegation) + +**Key features:** + +- All execution is async +- Returns (instant_response, skill, timings) +- Handles tool response updates via conversation_manager + +#### Step 10: Integrate ToolExecutor into OpenAiWingman + +**File:** `wingmen/open_ai_wingman.py` + +**Add:** + +- `self.tool_executor: ToolExecutor` + +**Delete:** + +- `execute_command_by_function_call()` (~210 lines) + +**Update:** + +- `_handle_tool_calls()`: Simplify to call `tool_executor.execute_batch()` (~60 lines → ~10 lines) + +**Line savings:** ~100 lines removed + +--- + +### Phase 4: Response Loop Simplification (Steps 11-12) + +#### Step 11: Create ResponseLoopState helper + +**File:** `services/response_state.py` (NEW) + +**Implements:** + +- Sync state class tracking timing and flags +- `add_llm_time()`, `add_tool_timings()`, `set_flags()`, `should_play_waiting()` + +#### Step 12: Refactor response generation loop + +**File:** `wingmen/open_ai_wingman.py` + +**Update:** + +- `_get_response_for_transcript()`: Simplify using state (~160 lines → ~80 lines) + +**Add helpers:** + +- `_handle_instant_response()` - sync +- `_timed_llm_call()` - async wrapper +- `_play_waiting_message()` - async +- `_finish_response()` - sync + +**Line savings:** ~80 lines removed + +--- + +### Phase 5: Context Building (Steps 13-14) + +#### Step 13: Create ContextBuilder + +**File:** `services/context_builder.py` (NEW) + +**Implements:** + +- All sync methods (just string building) +- `build()` - orchestrates context assembly +- `_build_skill_prompts()`, `_build_tts_prompt()`, `_build_user_context()` + +#### Step 14: Integrate ContextBuilder into OpenAiWingman + +**File:** `wingmen/open_ai_wingman.py` + +**Add:** + +- `self.context_builder: ContextBuilder` + +**Delete:** + +- `_build_user_context()` (~40 lines) + +**Update:** + +- `get_context()`: Simplify to call `context_builder.build()` (~100 lines → ~5 lines) + +**Line savings:** ~40 lines removed + +--- + +### Phase 6: Benchmark Improvements (Step 15) + +#### Step 15: Extend Benchmark with helper methods + +**File:** `services/benchmark.py` + +**Add:** + +- `format_time()` - static method +- `add_snapshot()` - creates BenchmarkResult +- `add_nested_snapshot()` - creates nested BenchmarkResult + +**Delete from OpenAiWingman:** + +- `_add_benchmark_snapshot()` (~15 lines) +- `_add_tool_execution_snapshot()` (~25 lines) + +**Line savings:** ~30 lines removed + +--- + +## Runtime Config Updates + +### Critical Requirement + +The client uses autosave and frequently calls config endpoints to apply changes at runtime without restarting wingmen. All changes must propagate immediately to active wingmen. + +### Key Method: update_config() + +**Enhanced Implementation:** + +```python +async def update_config(self, config: WingmanConfig, validate=False, update_skills=False) -> bool: + """Update config and propagate to all services.""" + try: + if validate: + old_config = deepcopy(self.config) + + self.config = config + + # 1. Check if provider configs changed + provider_changed = self._check_provider_config_changed( + old_config if validate else None, config + ) + + if provider_changed: + # Reinitialize provider registry + self.provider_registry = ProviderRegistry( + config=self.config, + secret_keeper=self.secret_keeper, + wingman_name=self.name + ) + await self.provider_registry.initialize_from_config() + + # 2. Update conversation manager + if self.conversation: + if self.conversation.remember_messages != config.features.remember_messages: + self.conversation.remember_messages = config.features.remember_messages + + # 3. Update context builder + if self.context_builder: + self.context_builder.config = self.config + self.context_builder.skills = self.skills + + # 4. Propagate skill config changes + await self._update_skill_configs(config) + + # 5. Validate if requested + if validate: + errors = await self.validate() + if errors: + # Rollback + self.config = old_config + await self._reinitialize_services() + return False + + return True + + except Exception as e: + await printr.print_async(f"Error updating config: {str(e)}", color=LogType.ERROR) + return False +``` + +### Config Update Scenarios + +| Config Change | Endpoint | Service Updated | Takes Effect | +| --------------------- | --------------------------- | --------------------------------- | ------------ | +| Provider selection | `save_wingman_config` | `provider_registry` reinitialized | Immediate | +| Provider settings | `save_basic_wingman_config` | `provider_registry` reinitialized | Immediate | +| Conversation settings | `save_wingman_config` | `conversation.remember_messages` | Immediate | +| Prompt changes | `save_wingman_config` | `context_builder.config` | Immediate | +| Skill toggle | `toggle_wingman_skill` | Incremental via `enable_skill()` | Immediate | +| MCP toggle | `toggle_wingman_mcp` | Incremental via `enable_mcp()` | Immediate | +| Command changes | `save_commands` | Direct mutation already applied | Immediate | + +### Performance + +**Provider reinitialization cost:** + +- Creating new provider instances: Lightweight (just object construction) +- Secret retrieval: Cached by SecretKeeper +- No network calls during initialization +- **Estimated overhead:** <50ms per provider change + +**Optimization:** + +- Only reinitialize when provider configs actually change +- `_check_provider_config_changed()` does quick comparison +- Incremental skill/MCP toggles avoid full reinitialization + +--- + +## Testing Checklist + +### Autosave Verification + +After refactoring, verify these scenarios: + +1. ✅ Change LLM provider from OpenAI to Mistral → Next user message uses Mistral +2. ✅ Change TTS voice → Next TTS output uses new voice +3. ✅ Change conversation model → Next LLM call uses new model +4. ✅ Change remember_messages → Cleanup uses new limit +5. ✅ Edit system prompt → Next context includes new prompt +6. ✅ Enable skill → Skill tools immediately available +7. ✅ Disable skill → Skill tools immediately removed +8. ✅ Enable MCP → MCP tools immediately available +9. ✅ Disable MCP → MCP disconnects immediately +10. ✅ QuickCommands learns phrase → Phrase works immediately +11. ✅ Validation failure → Changes rolled back, old config still works +12. ✅ Multiple rapid autosaves → All changes apply without race conditions + +### Skill Compatibility + +1. ✅ QuickCommands reading `wingman.messages[-1]` works +2. ✅ Timer reading/appending to `wingman.messages` works +3. ✅ Deprecation warnings logged +4. ✅ `on_add_user_message()` called at correct time +5. ✅ `on_add_assistant_message()` called at correct time +6. ✅ `on_play_to_user()` called at correct time +7. ✅ All hooks receive correct parameters +8. ✅ Tool execution works for all skills +9. ✅ `llm_call` binding works +10. ✅ Skills can call `play_to_user()` + +### Async/Sync Behavior + +1. ✅ All async methods remain async +2. ✅ All sync methods remain sync +3. ✅ No deadlocks or race conditions +4. ✅ Provider methods are async +5. ✅ Skill hooks are async +6. ✅ Tool methods are async +7. ✅ Conversation getters are sync +8. ✅ Registry getters are sync + +--- + +## Expected Results + +### Line Count Reduction + +| Phase | Description | Lines Before | Lines After | Reduction | +| --------- | ----------------------- | ------------ | ----------- | ---------------- | +| Current | OpenAiWingman total | 2,404 | - | - | +| Phase 1 | Provider system | 2,404 | 1,504 | -900 | +| Phase 2 | Conversation management | 1,504 | 1,354 | -150 | +| Phase 3 | Tool execution | 1,354 | 1,254 | -100 | +| Phase 4 | Response loop | 1,254 | 1,174 | -80 | +| Phase 5 | Context building | 1,174 | 1,134 | -40 | +| Phase 6 | Benchmark helpers | 1,134 | 1,104 | -30 | +| **Final** | **Total** | **2,404** | **~1,100** | **-1,304 (54%)** | + +### New Files Created + +| File | Lines | Purpose | +| ---------------------------------- | ---------- | -------------------------- | +| `providers/provider_base.py` | ~150 | Base classes and protocols | +| `services/provider_factory.py` | ~200 | Provider instantiation | +| `services/provider_registry.py` | ~150 | Provider management | +| `services/conversation_manager.py` | ~250 | Message history management | +| `services/tool_executor.py` | ~200 | Tool execution routing | +| `services/response_state.py` | ~50 | Response loop state | +| `services/context_builder.py` | ~150 | Context assembly | +| **Total New Code** | **~1,150** | **7 new services** | + +### Maintainability Improvements + +**Before:** + +- Adding new provider: 6-8 locations to modify +- Provider logic scattered across 4+ methods +- Hard to test (16 provider mocks needed) +- Tight coupling between wingman and providers + +**After:** + +- Adding new provider: 1 location (ProviderFactory map) +- Provider logic isolated in registry/factory +- Easy to test (mock registry only) +- Loose coupling via protocols + +--- + +## Important Decisions + +### 1. Backward Compatibility Priority + +**Decision:** Maintain full backward compatibility with deprecation warnings +**Rationale:** Skills depend on direct message access; breaking changes would require coordinated skill updates +**Impact:** Add `@property messages` that returns direct list reference + +### 2. Async/Sync Preservation + +**Decision:** No changes to async/sync patterns +**Rationale:** Too risky; could introduce deadlocks or race conditions +**Impact:** Carefully preserve all async/await patterns in refactored code + +### 3. Service Initialization Order + +**Decision:** Initialize services in validate() after config is loaded +**Rationale:** Services need validated config and dependencies +**Order:** + +1. ConversationManager (no dependencies) +2. ProviderRegistry (needs config + secret_keeper) +3. ToolExecutor (needs registries + conversation) +4. ContextBuilder (needs config + skills) + +### 4. Error Handling Split + +**Decision:** Service-level errors log internally, wingman shows user-facing errors +**Rationale:** Services don't have enough context for user messages +**Impact:** Factory/registry return None on failure, wingman reports to user + +### 5. Provider Config Change Detection + +**Decision:** Use comparison helper to detect when to reinitialize registry +**Rationale:** Avoid unnecessary reinitialization on unrelated config changes +**Impact:** Add `_check_provider_config_changed()` method + +### 6. Multi-Capability Providers + +**Decision:** Use decorator approach to declare capabilities +**Rationale:** Clean, declarative, easy to query +**Implementation:** `@capabilities(ProviderCapability.STT, ProviderCapability.TTS, ProviderCapability.LLM)` + +### 7. Non-BaseProvider Migration + +**Decision:** Keep whispercpp, fasterwhisper, xvasynth as-is for now +**Rationale:** They work differently (local execution, separate processes) +**Future:** Phase 2 can migrate them to BaseProvider pattern + +--- + +## Migration Guide + +### For Developers + +**Before starting Phase 1:** + +1. Document current behavior: Note any quirks or edge cases + +**During implementation:** + +1. Implement phases sequentially (don't skip) +2. Test after each phase before moving to next +3. Keep commits atomic (one phase per commit) +4. Update tests as you go + +**After completion:** + +1. Verify skill compatibility +2. Code review with focus on async patterns + +### For Skills Developers + +**Current (deprecated):** + +```python +# Direct message access +last_message = self.wingman.messages[-1] +self.wingman.messages.append({"role": "user", "content": "..."}) +``` + +**Future (recommended):** + +```python +# Use conversation manager (after v2.0 when wingman.messages removed) +# For now, continue using wingman.messages with deprecation warning +``` + +**No Action Required:** + +- Skills using public APIs (`actual_llm_call`, `play_to_user`, etc.) are unaffected +- Skill hooks continue to work identically +- Tool execution unchanged + +--- + +## Risk Assessment + +### High Risk + +1. **Async/Sync Mixing** - Could cause deadlocks + + - **Mitigation:** Carefully preserve all async/await patterns + - **Verification:** Test all code paths thoroughly + +2. **Skill Message Access** - Skills directly mutate messages + + - **Mitigation:** Property returns direct reference, not copy + - **Verification:** Test QuickCommands and Timer specifically + +3. **Config Update Propagation** - Changes might not apply + - **Mitigation:** Comprehensive update_config() implementation + - **Verification:** Test all autosave scenarios + +### Medium Risk + +4. **Provider Initialization Order** - Dependencies between services + + - **Mitigation:** Document initialization order in validate() + - **Verification:** Test wingman startup thoroughly + +5. **Error Handling Coverage** - Some errors might be swallowed + - **Mitigation:** Consistent error handling patterns + - **Verification:** Test error scenarios (invalid API keys, etc.) + +### Low Risk + +6. **Performance Regression** - New layers add overhead + + - **Mitigation:** Registry lookups are O(1), minimal overhead + - **Verification:** Benchmark before/after + +7. **Memory Leaks** - Services might not clean up + - **Mitigation:** Implement cleanup methods in all services + - **Verification:** Long-running test with multiple config changes + +--- + +## Version History + +- **v1.0** (2025-12-16): Initial comprehensive refactoring plan + - 6 phases, 15 steps + - Backward compatibility with deprecation warnings + - Runtime config update strategy + - Complete autosave support + +--- + +## Next Steps + +1. ✅ **Planning Complete** - This document +2. ⏭️ **Phase 1 Implementation** - Provider system (Steps 1-6) +3. ⏭️ **Phase 1 Testing** - Verify provider system works +4. ⏭️ **Phase 2 Implementation** - Conversation management (Steps 7-8) +5. ⏭️ **Phase 2 Testing** - Verify skill compatibility +6. ⏭️ **Phase 3 Implementation** - Tool execution (Steps 9-10) +7. ⏭️ **Phase 3 Testing** - Verify tool execution +8. ⏭️ **Phase 4 Implementation** - Response loop (Steps 11-12) +9. ⏭️ **Phase 5 Implementation** - Context building (Steps 13-14) +10. ⏭️ **Phase 6 Implementation** - Benchmark helpers (Step 15) +11. ⏭️ **Integration Testing** - Full system test +12. ⏭️ **Performance Benchmarking** - Before/after comparison +13. ⏭️ **Code Review** - Final review before merge +14. ⏭️ **Documentation Update** - Update developer docs +15. ⏭️ **Deployment** - Merge to main + +--- + +**Document maintained by:** GitHub Copilot +**Last updated:** December 16, 2025 +**Status:** Ready for implementation diff --git a/wingmen/open_ai_wingman.py b/wingmen/open_ai_wingman.py deleted file mode 100644 index df5dba50..00000000 --- a/wingmen/open_ai_wingman.py +++ /dev/null @@ -1,2461 +0,0 @@ -import json -import time -import asyncio -import random -import traceback -import uuid -from datetime import datetime -from typing import ( - Mapping, - Optional, -) -from openai import NOT_GIVEN -from openai.types.chat import ( - ChatCompletion, - ChatCompletionMessage, - ChatCompletionMessageToolCall, - ParsedFunction, -) -import requests -from api.interface import ( - OpenRouterEndpointResult, - SettingsConfig, - SoundConfig, - WingmanInitializationError, - CommandConfig, -) -from api.enums import ( - ImageGenerationProvider, - LogType, - LogSource, - TtsProvider, - SttProvider, - ConversationProvider, - WingmanProSttProvider, - WingmanProTtsProvider, - WingmanInitializationErrorType, -) -from providers.edge import Edge -from providers.elevenlabs import ElevenLabs -from providers.google import GoogleGenAI -from providers.open_ai import OpenAi, OpenAiAzure, OpenAiCompatibleTts -from providers.hume import Hume -from providers.inworld import Inworld -from providers.open_ai import OpenAi, OpenAiAzure -from providers.x_ai import XAi -from providers.wingman_pro import WingmanPro -from api.commands import McpStateChangedCommand -from services.benchmark import Benchmark -from services.markdown import cleanup_text -from services.printr import Printr -from services.skill_registry import SkillRegistry -from services.mcp_client import McpClient -from services.mcp_registry import McpRegistry -from services.capability_registry import CapabilityRegistry -from skills.skill_base import Skill -from wingmen.wingman import Wingman - -printr = Printr() - - -class OpenAiWingman(Wingman): - """Our OpenAI Wingman base gives you everything you need to interact with OpenAI's various APIs. - - It transcribes speech to text using Whisper, uses the Completion API for conversation and implements the Tools API to execute functions. - """ - - AZURE_SERVICES = { - "tts": TtsProvider.AZURE, - "whisper": [SttProvider.AZURE, SttProvider.AZURE_SPEECH], - "conversation": ConversationProvider.AZURE, - } - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.edge_tts = Edge() - - # validate will set these: - self.openai: OpenAi | None = None - self.mistral: OpenAi | None = None - self.groq: OpenAi | None = None - self.cerebras: OpenAi | None = None - self.openrouter: OpenAi | None = None - self.openrouter_model_supports_tools = False - self.local_llm: OpenAi | None = None - self.openai_azure: OpenAiAzure | None = None - self.elevenlabs: ElevenLabs | None = None - self.openai_compatible_tts: OpenAiCompatibleTts | None = None - self.hume: Hume | None = None - self.inworld: Inworld | None = None - self.wingman_pro: WingmanPro | None = None - self.google: GoogleGenAI | None = None - self.perplexity: OpenAi | None = None - self.xai: XAi | None = None - - # tool queue - self.pending_tool_calls = [] - self.last_gpt_call = None - - # generated addional content - self.instant_responses = [] - self.last_used_instant_responses = [] - - self.messages = [] - """The conversation history that is used for the GPT calls""" - - self.azure_api_keys = {key: None for key in self.AZURE_SERVICES} - - self.tool_skills: dict[str, Skill] = {} - self.skill_tools: list[dict] = [] - - # Progressive tool disclosure registry (MCP-inspired token optimization) - # Only meta-tools are sent to LLM initially; skills activated on-demand - self.skill_registry = SkillRegistry() - - # MCP (Model Context Protocol) support - # Allows connecting to external MCP servers that provide additional tools - self.mcp_client = McpClient(wingman_name=self.name) - self.mcp_registry = McpRegistry( - self.mcp_client, - wingman_name=self.name, - on_state_changed=self._broadcast_mcp_state_changed, - ) - - # Unified capability registry - combines skill and MCP discovery - # From the LLM's perspective, both are just "capabilities" - self.capability_registry = CapabilityRegistry( - self.skill_registry, self.mcp_registry - ) - - def _broadcast_mcp_state_changed(self): - """Broadcast MCP state change to UI via WebSocket.""" - if printr._connection_manager: - printr.ensure_async( - printr._connection_manager.broadcast( - McpStateChangedCommand(wingman_name=self.name) - ) - ) - - async def validate(self): - errors = await super().validate() - - try: - if self.uses_provider("whispercpp"): - self.whispercpp.validate(self.name, errors) - - if self.uses_provider("fasterwhisper"): - self.fasterwhisper.validate(errors) - - if self.uses_provider("openai"): - await self.validate_and_set_openai(errors) - - if self.uses_provider("mistral"): - await self.validate_and_set_mistral(errors) - - if self.uses_provider("groq"): - await self.validate_and_set_groq(errors) - - if self.uses_provider("cerebras"): - await self.validate_and_set_cerebras(errors) - - if self.uses_provider("google"): - await self.validate_and_set_google(errors) - - if self.uses_provider("openrouter"): - await self.validate_and_set_openrouter(errors) - - if self.uses_provider("local_llm"): - await self.validate_and_set_local_llm(errors) - - if self.uses_provider("elevenlabs"): - await self.validate_and_set_elevenlabs(errors) - - if self.uses_provider("openai_compatible"): - await self.validate_and_set_openai_compatible_tts(errors) - - if self.uses_provider("azure"): - await self.validate_and_set_azure(errors) - - if self.uses_provider("wingman_pro"): - await self.validate_and_set_wingman_pro() - - if self.uses_provider("perplexity"): - await self.validate_and_set_perplexity(errors) - - if self.uses_provider("xai"): - await self.validate_and_set_xai(errors) - - if self.uses_provider("hume"): - await self.validate_and_set_hume(errors) - - if self.uses_provider("inworld"): - await self.validate_and_set_inworld(errors) - - except Exception as e: - errors.append( - WingmanInitializationError( - wingman_name=self.name, - message=f"Error during provider validation: {str(e)}", - error_type=WingmanInitializationErrorType.UNKNOWN, - ) - ) - printr.print( - f"Error during provider validation: {str(e)}", - color=LogType.ERROR, - server_only=True, - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - - return errors - - def uses_provider(self, provider_type: str): - if provider_type == "openai": - return any( - [ - self.config.features.tts_provider == TtsProvider.OPENAI, - self.config.features.stt_provider == SttProvider.OPENAI, - self.config.features.conversation_provider - == ConversationProvider.OPENAI, - ] - ) - elif provider_type == "mistral": - return any( - [ - self.config.features.conversation_provider - == ConversationProvider.MISTRAL, - ] - ) - elif provider_type == "groq": - return any( - [ - self.config.features.conversation_provider - == ConversationProvider.GROQ, - self.config.features.stt_provider == SttProvider.GROQ, - ] - ) - elif provider_type == "cerebras": - return any( - [ - self.config.features.conversation_provider - == ConversationProvider.CEREBRAS, - ] - ) - elif provider_type == "google": - return any( - [ - self.config.features.conversation_provider - == ConversationProvider.GOOGLE, - ] - ) - elif provider_type == "openrouter": - return any( - [ - self.config.features.conversation_provider - == ConversationProvider.OPENROUTER, - ] - ) - elif provider_type == "local_llm": - return any( - [ - self.config.features.conversation_provider - == ConversationProvider.LOCAL_LLM, - ] - ) - elif provider_type == "azure": - return any( - [ - self.config.features.tts_provider == TtsProvider.AZURE, - self.config.features.stt_provider == SttProvider.AZURE, - self.config.features.stt_provider == SttProvider.AZURE_SPEECH, - self.config.features.conversation_provider - == ConversationProvider.AZURE, - ] - ) - elif provider_type == "edge_tts": - return self.config.features.tts_provider == TtsProvider.EDGE_TTS - elif provider_type == "elevenlabs": - return self.config.features.tts_provider == TtsProvider.ELEVENLABS - elif provider_type == "openai_compatible": - return self.config.features.tts_provider == TtsProvider.OPENAI_COMPATIBLE - elif provider_type == "hume": - return self.config.features.tts_provider == TtsProvider.HUME - elif provider_type == "inworld": - return self.config.features.tts_provider == TtsProvider.INWORLD - elif provider_type == "xvasynth": - return self.config.features.tts_provider == TtsProvider.XVASYNTH - elif provider_type == "whispercpp": - return self.config.features.stt_provider == SttProvider.WHISPERCPP - elif provider_type == "fasterwhisper": - return self.config.features.stt_provider == SttProvider.FASTER_WHISPER - elif provider_type == "wingman_pro": - return any( - [ - self.config.features.conversation_provider - == ConversationProvider.WINGMAN_PRO, - self.config.features.tts_provider == TtsProvider.WINGMAN_PRO, - self.config.features.stt_provider == SttProvider.WINGMAN_PRO, - ] - ) - elif provider_type == "perplexity": - return any( - [ - self.config.features.conversation_provider - == ConversationProvider.PERPLEXITY, - ] - ) - elif provider_type == "xai": - return any( - [ - self.config.features.conversation_provider - == ConversationProvider.XAI, - ] - ) - return False - - async def prepare(self): - try: - if self.config.features.use_generic_instant_responses: - printr.print( - "Generating AI instant responses...", - color=LogType.WARNING, - server_only=True, - ) - self.threaded_execution(self._generate_instant_responses) - except Exception as e: - await printr.print_async( - f"Error while preparing wingman '{self.name}': {str(e)}", - color=LogType.ERROR, - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - - async def unload_skills(self): - await super().unload_skills() - self.tool_skills = {} - self.skill_tools = [] - self.skill_registry.clear() - - async def unload_mcps(self): - """Disconnect from all MCP servers.""" - await self.mcp_registry.clear() - - async def enable_mcp(self, mcp_name: str) -> tuple[bool, str]: - """Enable and connect to a single MCP server without reinitializing all MCPs. - - Args: - mcp_name: The name of the MCP server to enable - - Returns: - (success, message) tuple - """ - # Check if MCP SDK is available - if not self.mcp_client.is_available: - return False, "MCP SDK not installed." - - # Check if already connected - if mcp_name in self.mcp_registry.get_connected_server_names(): - return True, f"MCP server '{mcp_name}' is already connected." - - # Find the MCP config from central mcp.yaml - central_mcp_config = self.tower.config_manager.mcp_config - mcp_configs = central_mcp_config.servers if central_mcp_config else [] - - mcp_config = None - for cfg in mcp_configs: - if cfg.name == mcp_name: - mcp_config = cfg - break - - if not mcp_config: - return False, f"MCP server '{mcp_name}' not found in mcp.yaml." - - try: - # Build headers with secrets (same logic as init_mcps) - headers = {} - if mcp_config.headers: - headers.update(mcp_config.headers) - - # Check for API key in secrets - secret_key = f"mcp_{mcp_config.name}" - api_key = await self.secret_keeper.retrieve( - requester=self.name, - key=secret_key, - prompt_if_missing=False, - ) - if api_key: - if not any( - k.lower() in ["authorization", "api-key", "x-api-key"] - for k in headers.keys() - ): - headers["Authorization"] = f"Bearer {api_key}" - - # Connect with timeout - default_timeout = 60.0 if mcp_config.type.value == "stdio" else 30.0 - timeout = ( - float(mcp_config.timeout) if mcp_config.timeout else default_timeout - ) - - connection = await asyncio.wait_for( - self.mcp_registry.register_server( - config=mcp_config, - headers=headers if headers else None, - ), - timeout=timeout, - ) - - if connection.is_connected: - tool_count = len(connection.tools) - return True, f"MCP server '{mcp_name}' enabled with {tool_count} tools." - else: - error = connection.error or "Connection failed." - return False, f"MCP server '{mcp_name}' failed to connect: {error}" - - except asyncio.TimeoutError: - error_msg = f"Connection timed out ({int(timeout)}s)." - self.mcp_registry.set_server_error(mcp_name, error_msg) - return False, f"MCP server '{mcp_name}': {error_msg}" - - except Exception as e: - error_msg = f"Error enabling MCP '{mcp_name}': {str(e)}" - await printr.print_async(error_msg, color=LogType.ERROR) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - return False, error_msg - - async def disable_mcp(self, mcp_name: str) -> tuple[bool, str]: - """Disable and disconnect from a single MCP server without affecting other MCPs. - - Args: - mcp_name: The name of the MCP server to disable - - Returns: - (success, message) tuple - """ - # Check if the MCP is connected - if mcp_name not in self.mcp_registry.get_connected_server_names(): - return True, f"MCP server '{mcp_name}' is already disconnected." - - try: - await self.mcp_registry.unregister_server(mcp_name) - return True, f"MCP server '{mcp_name}' disabled." - - except Exception as e: - error_msg = f"Error disabling MCP '{mcp_name}': {str(e)}" - await printr.print_async(error_msg, color=LogType.ERROR) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - return False, error_msg - - async def prepare_skill(self, skill: Skill): - # prepare the skill and skill tools - try: - for tool_name, tool in skill.get_tools(): - self.tool_skills[tool_name] = skill - self.skill_tools.append(tool) - - # Register with the progressive disclosure registry - self.skill_registry.register_skill(skill) - - # Auto-activated skills need to be validated/prepared immediately - # so their hooks (like on_play_to_user) will work - if skill.config.auto_activate: - success, message = await skill.ensure_activated() - if not success: - await printr.print_async( - f"Auto-activated skill '{skill.config.display_name}' failed to activate: {message}", - color=LogType.ERROR, - ) - except Exception as e: - await printr.print_async( - f"Error while preparing skill '{skill.name}': {str(e)}", - color=LogType.ERROR, - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - - # init skill methods - skill.llm_call = self.actual_llm_call - - async def unprepare_skill(self, skill: Skill): - """Remove a skill's tools and registrations when it's disabled.""" - try: - # Remove tool mappings - for tool_name, _ in skill.get_tools(): - self.tool_skills.pop(tool_name, None) - # Remove from skill_tools list - self.skill_tools = [ - t - for t in self.skill_tools - if t.get("function", {}).get("name") != tool_name - ] - - # Unregister from the progressive disclosure registry - self.skill_registry.unregister_skill(skill.name) - except Exception as e: - await printr.print_async( - f"Error while unpreparing skill '{skill.name}': {str(e)}", - color=LogType.ERROR, - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - - async def init_mcps(self) -> list[WingmanInitializationError]: - """ - Initialize MCP (Model Context Protocol) server connections. - - Loads MCP servers from central mcp.yaml config, only connecting those in wingman's discoverable_mcps. - MCP servers provide external tools similar to skills. - - Returns: - list[WingmanInitializationError]: Errors encountered (non-fatal, wingman still loads) - """ - errors = [] - - # Check if MCP SDK is available - if not self.mcp_client.is_available: - printr.print( - f"[{self.name}] MCP SDK not installed, skipping MCP initialization.", - color=LogType.WARNING, - server_only=True, - ) - return errors - - # Disconnect existing MCP servers - await self.unload_mcps() - - # Get MCP configs from central mcp.yaml - central_mcp_config = self.tower.config_manager.mcp_config - mcp_configs = central_mcp_config.servers if central_mcp_config else [] - if not mcp_configs: - return errors - - # Get discoverable MCPs list (whitelist) from wingman config - discoverable_mcps = self.config.discoverable_mcps - - # Filter to only discoverable MCPs - mcps_to_connect = [mcp for mcp in mcp_configs if mcp.name in discoverable_mcps] - - if not mcps_to_connect: - return errors - - # Prepare connection tasks for parallel execution - async def connect_mcp(mcp_config): - """Connect to a single MCP server. Returns (success, connection_info, errors).""" - local_errors = [] - try: - # Build headers with secrets - headers = {} - if mcp_config.headers: - headers.update(mcp_config.headers) - - # Check for API key in secrets (using mcp_ prefix) - secret_key = f"mcp_{mcp_config.name}" - api_key = await self.secret_keeper.retrieve( - requester=self.name, - key=secret_key, - prompt_if_missing=False, - ) - if api_key: - printr.print( - f"MCP secret '{secret_key}' found ({len(api_key)} chars)", - color=LogType.INFO, - source_name=self.name, - server_only=True, - ) - if not any( - k.lower() in ["authorization", "api-key", "x-api-key"] - for k in headers.keys() - ): - headers["Authorization"] = f"Bearer {api_key}" - - # Connect with timeout - default_timeout = 60.0 if mcp_config.type.value == "stdio" else 30.0 - timeout = ( - float(mcp_config.timeout) if mcp_config.timeout else default_timeout - ) - - try: - connection = await asyncio.wait_for( - self.mcp_registry.register_server( - config=mcp_config, - headers=headers if headers else None, - ), - timeout=timeout, - ) - except asyncio.TimeoutError: - error_msg = f"MCP '{mcp_config.display_name}' connection timed out ({int(timeout)}s)." - printr.print( - error_msg, - color=LogType.WARNING, - source_name=self.name, - server_only=True, - ) - local_errors.append( - WingmanInitializationError( - wingman_name=self.name, - message=error_msg, - error_type=WingmanInitializationErrorType.MCP_CONNECTION_FAILED, - ) - ) - return (False, None, local_errors) - - if connection.is_connected: - return ( - True, - f"{mcp_config.display_name} ({len(connection.tools)} tools)", - local_errors, - ) - else: - error_msg = f"MCP '{mcp_config.display_name}' failed to connect: {connection.error}" - local_errors.append( - WingmanInitializationError( - wingman_name=self.name, - message=error_msg, - error_type=WingmanInitializationErrorType.MCP_CONNECTION_FAILED, - ) - ) - return (False, None, local_errors) - - except Exception as e: - error_msg = f"MCP '{mcp_config.name}' initialization error: {str(e)}" - printr.print( - error_msg, - color=LogType.ERROR, - source_name=self.name, - server_only=True, - ) - printr.print( - traceback.format_exc(), color=LogType.ERROR, server_only=True - ) - local_errors.append( - WingmanInitializationError( - wingman_name=self.name, - message=error_msg, - error_type=WingmanInitializationErrorType.MCP_CONNECTION_FAILED, - ) - ) - return (False, None, local_errors) - - # Connect to all MCPs in parallel - connection_tasks = [connect_mcp(mcp) for mcp in mcps_to_connect] - results = await asyncio.gather(*connection_tasks) - - # Collect results - connected_count = 0 - connected_names = [] - for success, connection_info, mcp_errors in results: - if success: - connected_count += 1 - connected_names.append(connection_info) - errors.extend(mcp_errors) - - # Log consolidated MCP status for this wingman - if connected_count > 0: - await printr.print_async( - f"Discoverable MCP servers connected ({connected_count}): {', '.join(connected_names)}", - color=LogType.WINGMAN, - source=LogSource.WINGMAN, - source_name=self.name, - server_only=not self.settings.debug_mode, - ) - - return errors - - async def validate_and_set_openai(self, errors: list[WingmanInitializationError]): - api_key = await self.retrieve_secret("openai", errors) - if api_key: - self.openai = OpenAi( - api_key=api_key, - organization=self.config.openai.organization, - base_url=self.config.openai.base_url, - ) - - async def validate_and_set_mistral(self, errors: list[WingmanInitializationError]): - api_key = await self.retrieve_secret("mistral", errors) - if api_key: - # TODO: maybe use their native client (or LangChain) instead of OpenAI(?) - self.mistral = OpenAi( - api_key=api_key, - organization=self.config.openai.organization, - base_url=self.config.mistral.endpoint, - ) - - async def validate_and_set_groq(self, errors: list[WingmanInitializationError]): - api_key = await self.retrieve_secret("groq", errors) - if api_key: - # TODO: maybe use their native client (or LangChain) instead of OpenAI(?) - self.groq = OpenAi( - api_key=api_key, - base_url=self.config.groq.endpoint, - ) - - async def validate_and_set_cerebras(self, errors: list[WingmanInitializationError]): - api_key = await self.retrieve_secret("cerebras", errors) - if api_key: - # TODO: maybe use their native client (or LangChain) instead of OpenAI(?) - self.cerebras = OpenAi( - api_key=api_key, - base_url=self.config.cerebras.endpoint, - ) - - async def validate_and_set_google(self, errors: list[WingmanInitializationError]): - api_key = await self.retrieve_secret("google", errors) - if api_key: - self.google = GoogleGenAI(api_key=api_key) - - async def validate_and_set_openrouter( - self, errors: list[WingmanInitializationError] - ): - api_key = await self.retrieve_secret("openrouter", errors) - - async def does_openrouter_model_support_tools(model_id: str): - if not model_id: - return False - response = requests.get( - url=f"https://openrouter.ai/api/v1/models/{model_id}/endpoints", - timeout=10, - ) - response.raise_for_status() - content = response.json() - result = OpenRouterEndpointResult(**content.get("data", {})) - supports_tools = any( - all( - p in endpoint.supported_parameters for p in ["tools", "tool_choice"] - ) - for endpoint in result.endpoints - ) - if not supports_tools: - printr.print( - f"{self.name}: OpenRouter model {model_id} does not support tools, so they'll be omitted from calls.", - source=LogSource.WINGMAN, - source_name=self.name, - color=LogType.WARNING, - server_only=True, - ) - return supports_tools - - if api_key: - self.openrouter = OpenAi( - api_key=api_key, - base_url=self.config.openrouter.endpoint, - ) - self.openrouter_model_supports_tools = ( - await does_openrouter_model_support_tools( - self.config.openrouter.conversation_model - ) - ) - - async def validate_and_set_local_llm( - self, errors: list[WingmanInitializationError] - ): - api_key = await self.retrieve_secret("local_llm", errors) - if api_key: - self.local_llm = OpenAi( - api_key=api_key, - base_url=self.config.local_llm.endpoint, - ) - - async def validate_and_set_elevenlabs( - self, errors: list[WingmanInitializationError] - ): - api_key = await self.retrieve_secret("elevenlabs", errors) - if api_key: - self.elevenlabs = ElevenLabs( - api_key=api_key, - wingman_name=self.name, - ) - self.elevenlabs.validate_config( - config=self.config.elevenlabs, errors=errors - ) - - async def validate_and_set_openai_compatible_tts( - self, errors: list[WingmanInitializationError] - ): - if ( - self.config.openai_compatible_tts.base_url - and self.config.openai_compatible_tts.api_key - ): - self.openai_compatible_tts = OpenAiCompatibleTts( - api_key=self.config.openai_compatible_tts.api_key, - base_url=self.config.openai_compatible_tts.base_url, - ) - printr.print( - f"Wingman {self.name}: Initialized OpenAI-compatible TTS with base URL {self.config.openai_compatible_tts.base_url} and API key {self.config.openai_compatible_tts.api_key}", - server_only=True, - ) - - async def validate_and_set_hume(self, errors: list[WingmanInitializationError]): - api_key = await self.retrieve_secret("hume", errors) - if api_key: - self.hume = Hume( - api_key=api_key, - wingman_name=self.name, - ) - self.hume.validate_config(config=self.config.hume, errors=errors) - - async def validate_and_set_inworld(self, errors: list[WingmanInitializationError]): - api_key = await self.retrieve_secret("inworld", errors) - if api_key: - self.inworld = Inworld( - api_key=api_key, - wingman_name=self.name, - ) - self.inworld.validate_config(config=self.config.inworld, errors=errors) - - async def validate_and_set_azure(self, errors: list[WingmanInitializationError]): - for key_type in self.AZURE_SERVICES: - if self.uses_provider("azure"): - api_key = await self.retrieve_secret(f"azure_{key_type}", errors) - if api_key: - self.azure_api_keys[key_type] = api_key - if len(errors) == 0: - self.openai_azure = OpenAiAzure() - - async def validate_and_set_wingman_pro(self): - self.wingman_pro = WingmanPro( - wingman_name=self.name, settings=self.settings.wingman_pro - ) - - async def validate_and_set_perplexity( - self, errors: list[WingmanInitializationError] - ): - api_key = await self.retrieve_secret("perplexity", errors) - if api_key: - self.perplexity = OpenAi( - api_key=api_key, - base_url=self.config.perplexity.endpoint, - ) - - async def validate_and_set_xai(self, errors: list[WingmanInitializationError]): - api_key = await self.retrieve_secret("xai", errors) - if api_key: - self.xai = XAi( - api_key=api_key, - base_url=self.config.xai.endpoint, - ) - - # overrides the base class method - async def update_settings(self, settings: SettingsConfig): - """Update the settings of the Wingman. This method should always be called when the user Settings have changed.""" - try: - await super().update_settings(settings) - - if self.uses_provider("wingman_pro"): - await self.validate_and_set_wingman_pro() - printr.print( - f"Wingman {self.name}: reinitialized Wingman Pro with new settings", - server_only=True, - ) - except Exception as e: - await printr.print_async( - f"Error while updating settings for wingman '{self.name}': {str(e)}", - color=LogType.ERROR, - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - - async def _generate_instant_responses(self) -> None: - """Generates general instant responses based on given context.""" - context = await self.get_context() - messages = [ - { - "role": "system", - "content": """ - Generate a list in JSON format of at least 20 short direct text responses. - Make sure the response only contains the JSON, no additional text. - They must fit the described character in the given context by the user. - Every generated response must be generally usable in every situation. - Responses must show its still in progress and not in a finished state. - The user request this response is used on is unknown. Therefore it must be generic. - Good examples: - - "Processing..." - - "Stand by..." - - Bad examples: - - "Generating route..." (too specific) - - "I'm sorry, I can't do that." (too negative) - - Response example: - [ - "OK", - "Generating results...", - "Roger that!", - "Stand by..." - ] - """, - }, - {"role": "user", "content": context}, - ] - try: - completion = await self.actual_llm_call(messages) - if completion is None: - return - if completion.choices[0].message.content: - retry_limit = 3 - retry_count = 1 - valid = False - while not valid and retry_count <= retry_limit: - try: - responses = json.loads(completion.choices[0].message.content) - valid = True - for response in responses: - if response not in self.instant_responses: - self.instant_responses.append(str(response)) - except json.JSONDecodeError: - messages.append(completion.choices[0].message) - messages.append( - { - "role": "user", - "content": "It was tried to handle the response in its entirety as a JSON string. Fix response to be a pure, valid JSON, it was not convertable.", - } - ) - if retry_count <= retry_limit: - completion = await self.actual_llm_call(messages) - retry_count += 1 - except Exception as e: - await printr.print_async( - f"Error while generating instant responses: {str(e)}", - color=LogType.ERROR, - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - - async def _transcribe(self, audio_input_wav: str) -> str | None: - """Transcribes the recorded audio to text using the OpenAI Whisper API. - - Args: - audio_input_wav (str): The path to the audio file that contains the user's speech. This is a recording of what you you said. - - Returns: - str | None: The transcript of the audio file or None if the transcription failed. - """ - transcript = None - - try: - if self.config.features.stt_provider == SttProvider.AZURE: - transcript = self.openai_azure.transcribe_whisper( - filename=audio_input_wav, - api_key=self.azure_api_keys["whisper"], - config=self.config.azure.whisper, - ) - elif self.config.features.stt_provider == SttProvider.AZURE_SPEECH: - transcript = self.openai_azure.transcribe_azure_speech( - filename=audio_input_wav, - api_key=self.azure_api_keys["tts"], - config=self.config.azure.stt, - ) - elif self.config.features.stt_provider == SttProvider.WHISPERCPP: - transcript = self.whispercpp.transcribe( - filename=audio_input_wav, config=self.config.whispercpp - ) - elif self.config.features.stt_provider == SttProvider.FASTER_WHISPER: - hotwords: list[str] = [] - # add my name - hotwords.append(self.name) - # add default hotwords - default_hotwords = self.config.fasterwhisper.hotwords - if default_hotwords and len(default_hotwords) > 0: - hotwords.extend(default_hotwords) - # and my additional hotwords - wingman_hotwords = self.config.fasterwhisper.additional_hotwords - if wingman_hotwords and len(wingman_hotwords) > 0: - hotwords.extend(wingman_hotwords) - - transcript = self.fasterwhisper.transcribe( - filename=audio_input_wav, - config=self.config.fasterwhisper, - hotwords=list(set(hotwords)), - ) - elif self.config.features.stt_provider == SttProvider.WINGMAN_PRO: - if ( - self.config.wingman_pro.stt_provider - == WingmanProSttProvider.WHISPER - ): - transcript = self.wingman_pro.transcribe_whisper( - filename=audio_input_wav - ) - elif ( - self.config.wingman_pro.stt_provider - == WingmanProSttProvider.AZURE_SPEECH - ): - transcript = self.wingman_pro.transcribe_azure_speech( - filename=audio_input_wav, config=self.config.azure.stt - ) - elif self.config.features.stt_provider == SttProvider.OPENAI: - transcript = self.openai.transcribe(filename=audio_input_wav) - elif self.config.features.stt_provider == SttProvider.GROQ: - transcript = self.groq.transcribe( - filename=audio_input_wav, model="whisper-large-v3-turbo" - ) - except Exception as e: - await printr.print_async( - f"Error during transcription using '{self.config.features.stt_provider}': {str(e)}", - color=LogType.ERROR, - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - - result = None - if transcript: - # Wingman Pro might returns a serialized dict instead of a real Azure Speech transcription object - result = ( - transcript.get("_text") - if isinstance(transcript, dict) - else transcript.text - ) - - return result - - async def _get_response_for_transcript( - self, transcript: str, benchmark: Benchmark - ) -> tuple[str | None, str | None, Skill | None, bool]: - """Gets the response for a given transcript. - - This function interprets the transcript, runs instant commands if triggered, - calls the OpenAI API when needed, processes any tool calls, and generates the final response. - - Args: - transcript (str): The user's spoken text transcribed. - - Returns: - tuple[str | None, str | None, Skill | None, bool]: A tuple containing the final response, the instant response (if any), the skill that was used, and a boolean indicating whether the current audio should be interrupted. - """ - await self.add_user_message(transcript) - - benchmark.start_snapshot("Instant activation commands") - instant_response, instant_command_executed = await self._try_instant_activation( - transcript=transcript - ) - if instant_response: - await self.add_assistant_message(instant_response) - benchmark.finish_snapshot() - if ( - instant_response == "." - ): # thats for the "The UI should not give a response" option in commands - instant_response = None - return instant_response, instant_response, None, True - benchmark.finish_snapshot() - - # Track cumulative times for proper aggregation - llm_processing_time_ms = 0.0 - tool_execution_time_ms = 0.0 - tool_timings: list[tuple[str, float]] = ( - [] - ) # (label, time_ms) for individual tools - - # make a GPT call with the conversation history - # if an instant command got executed, prevent tool calls to avoid duplicate executions - llm_start = time.perf_counter() - completion = await self._llm_call(instant_command_executed is False) - llm_processing_time_ms += (time.perf_counter() - llm_start) * 1000 - - if completion is None: - self._add_benchmark_snapshot( - benchmark, "LLM Processing", llm_processing_time_ms - ) - return None, None, None, True - - response_message, tool_calls = await self._process_completion(completion) - - # add message and dummy tool responses to conversation history - is_waiting_response_needed, is_summarize_needed = await self._add_gpt_response( - response_message, tool_calls - ) - interrupt = True # initial answer should be awaited if exists - - while tool_calls: - if is_waiting_response_needed: - message = None - if response_message.content: - message = response_message.content - elif self.instant_responses: - message = self._get_random_filler() - is_summarize_needed = True - if message: - self.threaded_execution(self.play_to_user, message, interrupt) - await printr.print_async( - f"{message}", - color=LogType.POSITIVE, - source=LogSource.WINGMAN, - source_name=self.name, - skill_name="", - ) - interrupt = False - else: - is_summarize_needed = True - else: - is_summarize_needed = True - - # Time tool execution and collect individual timings - tool_start = time.perf_counter() - instant_response, skill, iteration_timings = await self._handle_tool_calls( - tool_calls - ) - tool_execution_time_ms += (time.perf_counter() - tool_start) * 1000 - tool_timings.extend(iteration_timings) - - if instant_response: - # Add snapshots before returning - self._add_benchmark_snapshot( - benchmark, "LLM Processing", llm_processing_time_ms - ) - if tool_execution_time_ms > 0: - self._add_tool_execution_snapshot( - benchmark, tool_execution_time_ms, tool_timings - ) - return None, instant_response, None, interrupt - - if is_summarize_needed: - # Time the follow-up LLM call - llm_start = time.perf_counter() - completion = await self._llm_call(True) - llm_processing_time_ms += (time.perf_counter() - llm_start) * 1000 - - if completion is None: - self._add_benchmark_snapshot( - benchmark, "LLM Processing", llm_processing_time_ms - ) - if tool_execution_time_ms > 0: - self._add_tool_execution_snapshot( - benchmark, tool_execution_time_ms, tool_timings - ) - return None, None, None, True - - response_message, tool_calls = await self._process_completion( - completion - ) - is_waiting_response_needed, is_summarize_needed = ( - await self._add_gpt_response(response_message, tool_calls) - ) - if tool_calls: - interrupt = False - elif is_waiting_response_needed: - self._add_benchmark_snapshot( - benchmark, "LLM Processing", llm_processing_time_ms - ) - if tool_execution_time_ms > 0: - self._add_tool_execution_snapshot( - benchmark, tool_execution_time_ms, tool_timings - ) - return None, None, None, interrupt - - # Add final snapshots - self._add_benchmark_snapshot( - benchmark, "LLM Processing", llm_processing_time_ms - ) - if tool_execution_time_ms > 0: - self._add_tool_execution_snapshot( - benchmark, tool_execution_time_ms, tool_timings - ) - return response_message.content, response_message.content, None, interrupt - - def _add_benchmark_snapshot( - self, benchmark: Benchmark, label: str, execution_time_ms: float - ): - """Add a snapshot with the given label and execution time.""" - if execution_time_ms >= 1000: - formatted_time = f"{execution_time_ms/1000:.1f}s" - else: - formatted_time = f"{int(execution_time_ms)}ms" - - from api.interface import BenchmarkResult - - benchmark.snapshots.append( - BenchmarkResult( - label=label, - execution_time_ms=execution_time_ms, - formatted_execution_time=formatted_time, - ) - ) - - def _add_tool_execution_snapshot( - self, - benchmark: Benchmark, - total_time_ms: float, - tool_timings: list[tuple[str, float]], - ): - """Add a tool execution snapshot with nested individual tool timings.""" - from api.interface import BenchmarkResult - - if total_time_ms >= 1000: - formatted_time = f"{total_time_ms/1000:.1f}s" - else: - formatted_time = f"{int(total_time_ms)}ms" - - # Create nested snapshots for individual tools - nested_snapshots = [] - for label, time_ms in tool_timings: - if time_ms >= 1000: - fmt = f"{time_ms/1000:.1f}s" - else: - fmt = f"{int(time_ms)}ms" - nested_snapshots.append( - BenchmarkResult( - label=label, - execution_time_ms=time_ms, - formatted_execution_time=fmt, - ) - ) - - benchmark.snapshots.append( - BenchmarkResult( - label="Tool Execution", - execution_time_ms=total_time_ms, - formatted_execution_time=formatted_time, - snapshots=nested_snapshots if nested_snapshots else None, - ) - ) - - def _get_random_filler(self): - # get last two used instant responses - if len(self.last_used_instant_responses) > 2: - self.last_used_instant_responses = self.last_used_instant_responses[-2:] - - # get a random instant response that was not used in the last two responses - random_index = random.randint(0, len(self.instant_responses) - 1) - while random_index in self.last_used_instant_responses: - random_index = random.randint(0, len(self.instant_responses) - 1) - - # add the index to the last used list and return - self.last_used_instant_responses.append(random_index) - return self.instant_responses[random_index] - - async def _fix_tool_calls(self, tool_calls): - """Fixes tool calls that have a command name as function name. - - Args: - tool_calls (list): The tool calls to fix. - - Returns: - list: The fixed tool calls. - """ - if tool_calls and len(tool_calls) > 0: - for tool_call in tool_calls: - function_name = tool_call.function.name - function_args = ( - tool_call.function.arguments - # Mistral returns a dict - if isinstance(tool_call.function.arguments, dict) - # OpenAI returns a string - else json.loads(tool_call.function.arguments) - ) - - # try to resolve function name to a command name - if (len(function_args) == 0 and self.get_command(function_name)) or ( - len(function_args) == 1 - and "command_name" in function_args - and self.get_command(function_args["command_name"]) - and function_name == function_args["command_name"] - ): - function_args["command_name"] = function_name - function_name = "execute_command" - - # update the tool call - tool_call.function.name = function_name - tool_call.function.arguments = json.dumps(function_args) - - if self.settings.debug_mode: - await printr.print_async( - "Applied command call fix.", color=LogType.WARNING - ) - - return tool_calls - - async def _add_gpt_response(self, message, tool_calls) -> (bool, bool): - """Adds a message from GPT to the conversation history as well as adding dummy tool responses for any tool calls. - - Args: - message (dict | ChatCompletionMessage): The message to add. - tool_calls (list): The tool calls associated with the message. - """ - # call skill hooks (only for prepared/activated skills) - for skill in self.skills: - if skill.is_prepared: - await skill.on_add_assistant_message( - message.content, message.tool_calls - ) - - # do not tamper with this message as it will lead to 400 errors! - self.messages.append(message) - - # adding dummy tool responses to prevent corrupted message history on parallel requests - # and checks if waiting response should be played - unique_tools = {} - is_waiting_response_needed = False - is_summarize_needed = False - - if tool_calls: - for tool_call in tool_calls: - if not tool_call.id: - continue - # adding a dummy tool response to get updated later - self._add_tool_response(tool_call, "Loading..", False) - - function_name = tool_call.function.name - - # Meta-tools (search_skills, activate_skill, etc.) always need a follow-up - # LLM call so it can use the newly activated tools - if self.skill_registry.is_meta_tool(function_name): - is_summarize_needed = True - elif function_name in self.tool_skills: - skill = self.tool_skills[function_name] - if await skill.is_waiting_response_needed(function_name): - is_waiting_response_needed = True - if await skill.is_summarize_needed(function_name): - is_summarize_needed = True - - unique_tools[function_name] = True - - if len(unique_tools) == 1 and "execute_command" in unique_tools: - is_waiting_response_needed = True - - return is_waiting_response_needed, is_summarize_needed - - def _add_tool_response(self, tool_call, response: str, completed: bool = True): - """Adds a tool response to the conversation history. - - Args: - tool_call (dict|ChatCompletionMessageToolCall): The tool call to add the dummy response for. - """ - msg = {"role": "tool", "content": response} - if tool_call.id is not None: - msg["tool_call_id"] = tool_call.id - if tool_call.function.name is not None: - msg["name"] = tool_call.function.name - self.messages.append(msg) - - if tool_call.id and not completed: - self.pending_tool_calls.append(tool_call.id) - - async def _update_tool_response(self, tool_call_id, response) -> bool: - """Updates a tool response in the conversation history. This also moves the message to the end of the history if all tool responses are given. - - Args: - tool_call_id (str): The identifier of the tool call to update the response for. - response (str): The new response to set. - - Returns: - bool: True if the response was updated, False if the tool call was not found. - """ - if not tool_call_id: - return False - - completed = False - index = len(self.messages) - - # go through message history to find and update the tool call - for message in reversed(self.messages): - index -= 1 - if ( - self.__get_message_role(message) == "tool" - and message.get("tool_call_id") == tool_call_id - ): - message["content"] = str(response) - if tool_call_id in self.pending_tool_calls: - self.pending_tool_calls.remove(tool_call_id) - break - if not index: - return False - - # find the assistant message that triggered the tool call - for message in reversed(self.messages[:index]): - index -= 1 - if self.__get_message_role(message) == "assistant": - break - - # check if all tool calls are completed - completed = True - for tool_call in self.messages[index].tool_calls: - if tool_call.id in self.pending_tool_calls: - completed = False - break - if not completed: - return True - - # find the first user message(s) that triggered this assistant message - index -= 1 # skip the assistant message - for message in reversed(self.messages[:index]): - index -= 1 - if self.__get_message_role(message) != "user": - index += 1 - break - - # built message block to move - start_index = index - end_index = start_index - reached_tool_call = False - for message in self.messages[start_index:]: - if not reached_tool_call and self.__get_message_role(message) == "tool": - reached_tool_call = True - if reached_tool_call and self.__get_message_role(message) == "user": - end_index -= 1 - break - end_index += 1 - if end_index == len(self.messages): - end_index -= 1 # loop ended at the end of the message history, so we have to go back one index - message_block = self.messages[start_index : end_index + 1] - - # check if the message block is already at the end - if end_index == len(self.messages) - 1: - return True - - # move message block to the end - del self.messages[start_index : end_index + 1] - self.messages.extend(message_block) - - if self.settings.debug_mode: - await printr.print_async( - "Moved message block to the end.", color=LogType.INFO - ) - - return True - - async def add_user_message(self, content: str): - """Shortens the conversation history if needed and adds a user message to it. - - Args: - content (str): The message content to add. - """ - # call skill hooks (only for prepared/activated skills) - for skill in self.skills: - if skill.is_prepared: - await skill.on_add_user_message(content) - - msg = {"role": "user", "content": content} - await self._cleanup_conversation_history() - self.messages.append(msg) - - async def add_assistant_message(self, content: str): - """Adds an assistant message to the conversation history. - - Args: - content (str): The message content to add. - """ - # call skill hooks (only for prepared/activated skills) - for skill in self.skills: - if skill.is_prepared: - await skill.on_add_assistant_message(content, []) - - msg = {"role": "assistant", "content": content} - self.messages.append(msg) - - async def add_forced_assistant_command_calls(self, commands: list[CommandConfig]): - """Adds forced assistant command calls to the conversation history. - - Args: - commands (list[CommandConfig]): The commands to add. - """ - - if not commands: - return - - message = ChatCompletionMessage( - content="", - role="assistant", - tool_calls=[], - ) - for command in commands: - tool_id = None - if ( - self.config.features.conversation_provider - == ConversationProvider.OPENAI - ) or ( - self.config.features.conversation_provider - == ConversationProvider.WINGMAN_PRO - and "gpt" in self.config.wingman_pro.conversation_deployment.lower() - ): - tool_id = f"call_{str(uuid.uuid4()).replace('-', '')}" - elif ( - self.config.features.conversation_provider - == ConversationProvider.GOOGLE - ): - if self.config.google.conversation_model.startswith("gemini-3"): - # gemini 3 needs a thought signature like this, but we cant fake it: - # { - # 'model_extra': { - # 'extra_content': { - # 'google': { - # 'thought_signature': 'EjQKMgFyyNp8mNe4bQmQhOua7gGMH0C9RubFWewy6BzYZJs5f4RqDb8CaiR4gjLxoM1iQqP4' - # } - # } - # } - # } - return - tool_id = f"function-call-{''.join(random.choices('0123456789', k=20))}" - - # early exit for unsupported providers/models - if not tool_id: - return - - tool_call = ChatCompletionMessageToolCall( - id=tool_id, - function=ParsedFunction( - name="execute_command", - arguments=json.dumps({"command_name": command.name}), - ), - type="function", - ) - message.tool_calls.append(tool_call) - - await self._add_gpt_response(message, message.tool_calls) - for tool_call in message.tool_calls: - await self._update_tool_response(tool_call.id, "OK") - - async def _cleanup_conversation_history(self): - """Cleans up the conversation history by removing messages that are too old.""" - remember_messages = self.config.features.remember_messages - - if remember_messages is None or len(self.messages) == 0: - return 0 # Configuration not set, nothing to delete. - - # Find the cutoff index where to end deletion, making sure to only count 'user' messages towards the limit starting with newest messages. - cutoff_index = len(self.messages) - user_message_count = 0 - for message in reversed(self.messages): - if self.__get_message_role(message) == "user": - user_message_count += 1 - if user_message_count == remember_messages: - break # Found the cutoff point. - cutoff_index -= 1 - - # If messages below the keep limit, don't delete anything. - if user_message_count < remember_messages: - return 0 - - total_deleted_messages = cutoff_index # Messages to delete. - - # Remove the pending tool calls that are no longer needed. - for mesage in self.messages[:cutoff_index]: - if ( - self.__get_message_role(mesage) == "tool" - and mesage.get("tool_call_id") in self.pending_tool_calls - ): - self.pending_tool_calls.remove(mesage.get("tool_call_id")) - if self.settings.debug_mode: - await printr.print_async( - f"Removing pending tool call {mesage.get('tool_call_id')} due to message history clean up.", - color=LogType.WARNING, - ) - - # Remove the messages before the cutoff index, exclusive of the system message. - del self.messages[:cutoff_index] - - # Optional debugging printout. - if self.settings.debug_mode and total_deleted_messages > 0: - await printr.print_async( - f"Deleted {total_deleted_messages} messages from the conversation history.", - color=LogType.WARNING, - ) - - return total_deleted_messages - - def reset_conversation_history(self): - """Resets the conversation history and skill activation state. - - When the conversation is reset, the LLM loses all memory of which skills - were activated and why. So we must also reset the skill registry and MCP - registry to ensure the progressive disclosure state matches the LLM's memory. - """ - self.messages = [] - self.skill_registry.reset_activations() - self.mcp_registry.reset_activations() - - async def _try_instant_activation(self, transcript: str) -> (str, bool): - """Tries to execute an instant activation command if present in the transcript. - - Args: - transcript (str): The transcript to check for an instant activation command. - - Returns: - tuple[str, bool]: A tuple containing the response to the instant command and a boolean indicating whether an instant command was executed. - """ - commands = await self._execute_instant_activation_command(transcript) - if commands: - await self.add_forced_assistant_command_calls(commands) - responses = [] - for command in commands: - if command.responses: - responses.append(self._select_command_response(command)) - - if len(responses) == len(commands): - # clear duplicates - responses = list(dict.fromkeys(responses)) - responses = [ - response + "." if not response.endswith(".") else response - for response in responses - ] - return " ".join(responses), True - - return None, True - - return None, False - - async def get_context(self): - """Build the context and inserts it into the messages. - - With progressive disclosure, only includes prompts from ACTIVATED skills. - Skill prompts are auto-generated from @tool descriptions if no custom prompt is set. - """ - skill_prompts = "" - active_skill_names = self.skill_registry.active_skill_names - - for skill in self.skills: - # Only include prompts from activated skills (in progressive mode) - if skill.name not in active_skill_names: - continue - - # Get custom prompt if set - prompt = await skill.get_prompt() - - # Auto-generate prompt from tool descriptions if no custom prompt - if not prompt: - tools_desc = skill.get_tools_description() - if tools_desc: - prompt = f"Available tools:\n{tools_desc}" - - if prompt: - skill_prompts += "\n\n" + skill.name + "\n\n" + prompt - - # Get TTS prompt based on active TTS provider and user preference - tts_prompt = "" - if self.config.features.tts_provider == TtsProvider.ELEVENLABS: - if ( - self.config.elevenlabs.use_tts_prompt - and self.config.elevenlabs.tts_prompt - ): - tts_prompt = self.config.elevenlabs.tts_prompt - elif self.config.features.tts_provider == TtsProvider.INWORLD or ( - self.config.features.tts_provider == TtsProvider.WINGMAN_PRO - and self.config.wingman_pro.tts_provider == WingmanProTtsProvider.INWORLD - ): - if self.config.inworld.use_tts_prompt and self.config.inworld.tts_prompt: - tts_prompt = self.config.inworld.tts_prompt - elif self.config.features.tts_provider == TtsProvider.OPENAI_COMPATIBLE: - if ( - self.config.openai_compatible_tts.use_tts_prompt - and self.config.openai_compatible_tts.tts_prompt - ): - tts_prompt = self.config.openai_compatible_tts.tts_prompt - - # Add TTS header only if there's a prompt - if tts_prompt: - tts_prompt = "# TEXT-TO-SPEECH\n" + tts_prompt - - # Build user context with environment metadata - user_context = self._build_user_context() - - context = self.config.prompts.system_prompt.format( - backstory=self.config.prompts.backstory, - skills=skill_prompts, - ttsprompt=tts_prompt, - user_context=user_context, - ) - - return context - - def _build_user_context(self) -> str: - """Build user context metadata for the system prompt. - - Includes timezone, config context, username, and wingman name. - """ - context_parts = [] - backstory = self.config.prompts.backstory or "" - backstory_lower = backstory.lower() - - # Date and timezone information - try: - now = datetime.now().astimezone() - local_tz = now.tzinfo - tz_name = str(local_tz) - # Get UTC offset in a readable format - utc_offset = now.strftime("%z") - # Format as +HH:MM or -HH:MM - if len(utc_offset) >= 5: - utc_offset = f"{utc_offset[:3]}:{utc_offset[3:]}" - # Include current date for relative date references ("last Sunday", "tomorrow", etc.) - current_date = now.strftime( - "%A, %B %d, %Y" - ) # e.g., "Tuesday, December 09, 2025" - context_parts.append(f"- Current date: {current_date}") - context_parts.append(f"- Timezone: {tz_name} (UTC{utc_offset})") - except Exception: - context_parts.append("- Timezone: Unknown") - - # Config/context name (e.g., "Star Citizen", "Elite Dangerous") - # This helps the LLM understand which game/context tools are relevant for - if self.tower and self.tower.config_dir and self.tower.config_dir.name: - context_parts.append(f"- Active context: {self.tower.config_dir.name}") - - # Username (only if not explicitly named in backstory) - if self.settings.user_name: - # Check if username is mentioned in backstory as a standalone word - import re - - name_pattern = r"\b" + re.escape(self.settings.user_name.lower()) + r"\b" - if not re.search(name_pattern, backstory_lower): - context_parts.append( - f"- User's name (default): {self.settings.user_name}" - ) - - # Wingman name - always include as it's useful context - # The system prompt already tells LLM to prioritize backstory names - if self.name: - context_parts.append(f"- Your name (default): {self.name}") - - if context_parts: - return "\n".join(context_parts) - return "No additional context available." - - async def add_context(self, messages): - messages.insert(0, {"role": "system", "content": (await self.get_context())}) - - async def generate_image(self, text: str) -> str: - """ - Generates an image from the provided text configured provider. - """ - - if ( - self.config.features.image_generation_provider - == ImageGenerationProvider.WINGMAN_PRO - ): - try: - return await self.wingman_pro.generate_image(text) - except Exception as e: - await printr.print_async( - f"Error during image generation: {str(e)}", color=LogType.ERROR - ) - printr.print( - traceback.format_exc(), color=LogType.ERROR, server_only=True - ) - - return "" - - async def actual_llm_call(self, messages, tools: list[dict] = None): - """ - Perform the actual LLM call with the messages provided. - """ - - try: - completion = None - if self.config.features.conversation_provider == ConversationProvider.AZURE: - completion = self.openai_azure.ask( - messages=messages, - api_key=self.azure_api_keys["conversation"], - config=self.config.azure.conversation, - tools=tools, - ) - elif ( - self.config.features.conversation_provider - == ConversationProvider.OPENAI - ): - completion = self.openai.ask( - messages=messages, - tools=tools, - model=self.config.openai.conversation_model, - ) - elif ( - self.config.features.conversation_provider - == ConversationProvider.MISTRAL - ): - completion = self.mistral.ask( - messages=messages, - tools=tools, - model=self.config.mistral.conversation_model, - ) - elif ( - self.config.features.conversation_provider == ConversationProvider.GROQ - ): - completion = self.groq.ask( - messages=messages, - tools=tools, - model=self.config.groq.conversation_model, - ) - elif ( - self.config.features.conversation_provider - == ConversationProvider.CEREBRAS - ): - completion = self.cerebras.ask( - messages=messages, - tools=tools, - model=self.config.cerebras.conversation_model, - ) - elif ( - self.config.features.conversation_provider - == ConversationProvider.GOOGLE - ): - completion = self.google.ask( - messages=messages, - tools=tools, - model=self.config.google.conversation_model, - ) - elif ( - self.config.features.conversation_provider - == ConversationProvider.OPENROUTER - ): - # OpenRouter throws an error if the model doesn't support tools but we send some - if self.openrouter_model_supports_tools: - completion = self.openrouter.ask( - messages=messages, - tools=tools, - model=self.config.openrouter.conversation_model, - ) - else: - completion = self.openrouter.ask( - messages=messages, - model=self.config.openrouter.conversation_model, - ) - elif ( - self.config.features.conversation_provider - == ConversationProvider.LOCAL_LLM - ): - completion = self.local_llm.ask( - messages=messages, - tools=tools, - model=self.config.local_llm.conversation_model, - ) - elif ( - self.config.features.conversation_provider - == ConversationProvider.WINGMAN_PRO - ): - completion = self.wingman_pro.ask( - messages=messages, - deployment=self.config.wingman_pro.conversation_deployment, - tools=tools, - ) - elif ( - self.config.features.conversation_provider - == ConversationProvider.PERPLEXITY - ): - completion = self.perplexity.ask( - messages=messages, - tools=tools, - model=self.config.perplexity.conversation_model.value, - ) - elif self.config.features.conversation_provider == ConversationProvider.XAI: - completion = self.xai.ask( - messages=messages, - tools=tools, - model=self.config.xai.conversation_model, - ) - except Exception as e: - await printr.print_async( - f"Error during LLM call: {str(e)}", color=LogType.ERROR - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - return None - - return completion - - async def _llm_call(self, allow_tool_calls: bool = True): - """Makes the primary LLM call with the conversation history and tools enabled. - - Returns: - The LLM completion object or None if the call fails. - """ - - # save request time for later comparison - thiscall = time.time() - self.last_gpt_call = thiscall - - # build tools - tools = self.build_tools() if allow_tool_calls else None - - if self.settings.debug_mode: - await printr.print_async( - f"Calling LLM with {(len(self.messages))} messages (excluding context) and {len(tools) if tools else 0} tools.", - color=LogType.INFO, - ) - - messages = self.messages.copy() - await self.add_context(messages) - - # DEBUG: Print compiled context (dev-only, remove before release) - # if messages and messages[0].get("role") == "system": - # print("\n" + "=" * 80) - # print("COMPILED CONTEXT:") - # print("=" * 80) - # print(messages[0].get("content", "")) - # print("=" * 80 + "\n") - - completion = await self.actual_llm_call(messages, tools) - - # if request isnt most recent, ignore the response - if self.last_gpt_call != thiscall: - await printr.print_async( - "LLM call was cancelled due to a new call.", color=LogType.WARNING - ) - return None - - return completion - - async def _process_completion(self, completion: ChatCompletion): - """Processes the completion returned by the LLM call. - - Args: - completion: The completion object from an OpenAI call. - - Returns: - A tuple containing the message response and tool calls from the completion. - """ - - response_message = completion.choices[0].message - - content = response_message.content - if content is None: - response_message.content = "" - - # temporary fix for tool calls that have a command name as function name - if response_message.tool_calls: - response_message.tool_calls = await self._fix_tool_calls( - response_message.tool_calls - ) - - return response_message, response_message.tool_calls - - async def _handle_tool_calls(self, tool_calls): - """Processes all the tool calls identified in the response message. - - Args: - tool_calls: The list of tool calls to process. - - Returns: - tuple: (instant_response, skill, tool_timings) where tool_timings is a list of (label, time_ms) tuples. - """ - instant_response = None - function_response = "" - tool_timings: list[tuple[str, float]] = [] - - skill = None - - for tool_call in tool_calls: - try: - function_name = tool_call.function.name - function_args = ( - tool_call.function.arguments - # Mistral returns a dict - if isinstance(tool_call.function.arguments, dict) - # OpenAI returns a string - else json.loads(tool_call.function.arguments) - ) - - # Time the individual tool execution - tool_start = time.perf_counter() - ( - function_response, - instant_response, - skill, - tool_label, - ) = await self.execute_command_by_function_call( - function_name, function_args - ) - tool_time_ms = (time.perf_counter() - tool_start) * 1000 - - # Add timing if we got a label (actual tool execution, not meta-tool) - if tool_label: - tool_timings.append((tool_label, tool_time_ms)) - - if tool_call.id: - # updating the dummy tool response with the actual response - await self._update_tool_response(tool_call.id, function_response) - else: - # adding a new tool response - self._add_tool_response(tool_call, function_response) - except Exception as e: - self._add_tool_response(tool_call, "Error") - await printr.print_async( - f"Error while processing tool call: {str(e)}", color=LogType.ERROR - ) - printr.print( - traceback.format_exc(), color=LogType.ERROR, server_only=True - ) - return instant_response, skill, tool_timings - - async def execute_command_by_function_call( - self, function_name: str, function_args: dict[str, any] - ) -> tuple[str, str | None, Skill | None, str | None]: - """ - Uses an OpenAI function call to execute a command. If it's an instant activation_command, one if its responses will be played. - - Args: - function_name (str): The name of the function to be executed. - function_args (dict[str, any]): The arguments to pass to the function being executed. - - Returns: - A tuple containing: - - function_response (str): The text response or result obtained after executing the function. - - instant_response (str): An immediate response or action to be taken, if any (e.g., play audio). - - used_skill (Skill): The skill that was used, if any. - - tool_label (str): Label for benchmark timing (e.g., "MCP: resolve-library-id"), or None for meta-tools. - """ - function_response = "" - instant_response = "" - used_skill = None - tool_label = None - - # Handle unified capability meta-tools (activate_capability, list_active_capabilities) - if self.capability_registry.is_meta_tool(function_name): - function_response, tools_changed = ( - await self.capability_registry.execute_meta_tool( - function_name, function_args - ) - ) - - # If a skill was activated, perform lazy validation - if tools_changed and function_name == "activate_capability": - capability_name = function_args.get("capability_name", "") - skill = self.skill_registry.get_skill_for_activation(capability_name) - if skill and skill.needs_activation(): - success, validation_msg = await skill.ensure_activated() - if not success: - # Validation failed - deactivate the skill - self.skill_registry.deactivate_skill(capability_name) - function_response = validation_msg - tools_changed = False - await printr.print_async( - f"Skill activation failed: {capability_name}", - color=LogType.ERROR, - ) - else: - # Get display name for user-friendly message - display_name = self.skill_registry.get_skill_display_name( - capability_name - ) - await printr.print_async( - f"Skill activated: {display_name}", - color=LogType.SKILL, - ) - - return function_response, None, None, None # Meta-tool, no timing label - - # Handle legacy meta-tools for progressive skill discovery/activation - # These are kept for backward compatibility but shouldn't be called - if self.skill_registry.is_meta_tool(function_name): - function_response, tools_changed = ( - await self.skill_registry.execute_meta_tool( - function_name, function_args - ) - ) - - # If skill was activated, perform lazy validation - if tools_changed and function_name == "activate_skill": - skill_name = function_args.get("skill_name", "") - skill = self.skill_registry.get_skill_for_activation(skill_name) - if skill and skill.needs_activation(): - success, validation_msg = await skill.ensure_activated() - if not success: - # Validation failed - deactivate the skill - self.skill_registry.deactivate_skill(skill_name) - function_response = validation_msg - tools_changed = False - await printr.print_async( - f"Skill activation failed: {skill_name}", - color=LogType.ERROR, - ) - else: - # Get display name for user-friendly message - display_name = self.skill_registry.get_skill_display_name( - skill_name - ) - await printr.print_async( - f"Skill activated: {display_name}", - color=LogType.SKILL, - ) - - return function_response, None, None, None # Meta-tool, no timing label - - # Handle MCP meta-tools for server discovery/activation - if self.mcp_registry.is_meta_tool(function_name): - function_response, tools_changed = ( - await self.mcp_registry.execute_meta_tool(function_name, function_args) - ) - return function_response, None, None, None # Meta-tool, no timing label - - # Handle MCP server tools (prefixed with mcp_) - if self.mcp_registry.is_mcp_tool(function_name): - connection = self.mcp_registry.get_connection_for_tool(function_name) - if connection: - display_name = connection.config.display_name - original_name = self.mcp_registry.get_original_tool_name(function_name) - tool_label = f"🌐 {display_name}: {original_name}" - - benchmark = Benchmark( - f"MCP '{connection.config.name}' - {original_name}" - ) - - # Always show simple 'called' message in UI so users know the wingman is working - await printr.print_async( - f"{display_name}: called `{original_name}` with {function_args}", - color=LogType.MCP, - ) - - # Detailed 'calling' log only in terminal/log file - await printr.print_async( - f"{display_name}: calling `{original_name}` with {function_args}...", - color=LogType.MCP, - server_only=True, - ) - - try: - function_response = await self.mcp_registry.call_tool( - function_name, function_args - ) - except Exception as e: - await printr.print_async( - f"{display_name}: `{original_name}` failed - {str(e)}", - color=LogType.ERROR, - ) - printr.print( - traceback.format_exc(), color=LogType.ERROR, server_only=True - ) - function_response = "ERROR DURING MCP TOOL EXECUTION" - finally: - # Detailed 'completed' with timing only in terminal/log file (or UI if debug) - await printr.print_async( - f"{display_name}: `{original_name}` completed", - color=LogType.MCP, - benchmark_result=benchmark.finish(), - server_only=not self.settings.debug_mode, - ) - - return function_response, None, None, tool_label - - if function_name == "execute_command": - # get the command based on the argument passed by the LLM - command = self.get_command(function_args["command_name"]) - # execute the command - function_response = await self._execute_command(command) - tool_label = f"Command: {function_args.get('command_name', function_name)}" - # if the command has responses, we have to play one of them - if command and command.responses: - instant_response = self._select_command_response(command) - await self.play_to_user(instant_response) - - # Go through the skills and check if the function name matches any of the tools - if function_name in self.tool_skills: - skill = self.tool_skills[function_name] - display_name = self.skill_registry.get_skill_display_name(skill.name) - tool_label = f"⚡ {display_name}: {function_name}" - - benchmark = Benchmark(f"Skill '{skill.name}' - {function_name}") - - # Always show simple 'called' message in UI so users know the wingman is working - await printr.print_async( - f"{display_name}: called `{function_name}` with {function_args}", - color=LogType.SKILL, - skill_name=skill.name, - ) - - # Detailed 'calling' log only in terminal/log file - await printr.print_async( - f"{display_name}: calling `{function_name}` with {function_args}...", - color=LogType.SKILL, - skill_name=skill.name, - server_only=True, - ) - - try: - function_response, instant_response = await skill.execute_tool( - tool_name=function_name, - parameters=function_args, - benchmark=benchmark, - ) - used_skill = skill - if instant_response: - await self.play_to_user(instant_response) - except Exception as e: - await printr.print_async( - f"{display_name}: `{function_name}` failed - {str(e)}", - color=LogType.ERROR, - ) - printr.print( - traceback.format_exc(), color=LogType.ERROR, server_only=True - ) - function_response = ( - "ERROR DURING PROCESSING" # hints to AI that there was an error - ) - instant_response = None - finally: - await printr.print_async( - f"{display_name}: `{function_name}` completed", - color=LogType.SKILL, - benchmark_result=benchmark.finish(), - skill_name=skill.name, - server_only=not self.settings.debug_mode, - ) - - return function_response, instant_response, used_skill, tool_label - - async def play_to_user( - self, - text: str, - no_interrupt: bool = False, - sound_config: Optional[SoundConfig] = None, - ): - """Plays audio to the user using the configured TTS Provider (default: OpenAI TTS). - Also adds sound effects if enabled in the configuration. - - Args: - text (str): The text to play as audio. - """ - if sound_config: - printr.print( - "Using custom sound config for playback", LogType.INFO, server_only=True - ) - else: - sound_config = self.config.sound - - # remove Markdown, links, emotes and code blocks - text, contains_links, contains_code_blocks = cleanup_text(text) - - # wait for audio player to finish playing - if no_interrupt and self.audio_player.is_playing: - while self.audio_player.is_playing: - await asyncio.sleep(0.1) - - # call skill hooks (only for prepared/activated skills) - changed_text = text - for skill in self.skills: - if skill.is_prepared: - changed_text = await skill.on_play_to_user(text, sound_config) - if changed_text != text: - printr.print( - f"Skill '{skill.config.display_name}' modified the text to: '{changed_text}'", - LogType.INFO, - ) - text = changed_text - - if sound_config.volume == 0.0: - printr.print( - "Volume modifier is set to 0. Skipping TTS processing.", - LogType.WARNING, - server_only=True, - ) - return - - if "{SKIP-TTS}" in text: - printr.print( - "Skip TTS phrase found in input. Skipping TTS processing.", - LogType.WARNING, - server_only=True, - ) - return - - try: - if self.config.features.tts_provider == TtsProvider.EDGE_TTS: - await self.edge_tts.play_audio( - text=text, - config=self.config.edge_tts, - sound_config=sound_config, - audio_player=self.audio_player, - wingman_name=self.name, - ) - elif self.config.features.tts_provider == TtsProvider.ELEVENLABS: - await self.elevenlabs.play_audio( - text=text, - config=self.config.elevenlabs, - sound_config=sound_config, - audio_player=self.audio_player, - wingman_name=self.name, - stream=self.config.elevenlabs.output_streaming, - ) - elif self.config.features.tts_provider == TtsProvider.HUME: - try: - await self.hume.play_audio( - text=text, - config=self.config.hume, - sound_config=sound_config, - audio_player=self.audio_player, - wingman_name=self.name, - ) - except RuntimeError as e: - if "Event loop is closed" in str(e): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - await self.hume.play_audio( - text=text, - config=self.config.hume, - sound_config=sound_config, - audio_player=self.audio_player, - wingman_name=self.name, - ) - elif self.config.features.tts_provider == TtsProvider.INWORLD: - await self.inworld.play_audio( - text=text, - config=self.config.inworld, - sound_config=sound_config, - audio_player=self.audio_player, - wingman_name=self.name, - ) - elif self.config.features.tts_provider == TtsProvider.AZURE: - await self.openai_azure.play_audio( - text=text, - api_key=self.azure_api_keys["tts"], - config=self.config.azure.tts, - sound_config=sound_config, - audio_player=self.audio_player, - wingman_name=self.name, - ) - elif self.config.features.tts_provider == TtsProvider.XVASYNTH: - await self.xvasynth.play_audio( - text=text, - config=self.config.xvasynth, - sound_config=sound_config, - audio_player=self.audio_player, - wingman_name=self.name, - ) - elif self.config.features.tts_provider == TtsProvider.OPENAI: - await self.openai.play_audio( - text=text, - voice=self.config.openai.tts_voice, - model=self.config.openai.tts_model, - speed=self.config.openai.tts_speed, - sound_config=sound_config, - audio_player=self.audio_player, - wingman_name=self.name, - stream=self.config.openai.output_streaming, - ) - elif self.config.features.tts_provider == TtsProvider.OPENAI_COMPATIBLE: - await self.openai_compatible_tts.play_audio( - text=text, - voice=self.config.openai_compatible_tts.voice, - model=self.config.openai_compatible_tts.model, - speed=( - self.config.openai_compatible_tts.speed - if self.config.openai_compatible_tts.speed #!= 1.0 - else NOT_GIVEN - ), - sound_config=sound_config, - audio_player=self.audio_player, - wingman_name=self.name, - stream=self.config.openai_compatible_tts.output_streaming, - ) - elif self.config.features.tts_provider == TtsProvider.WINGMAN_PRO: - if self.config.wingman_pro.tts_provider == WingmanProTtsProvider.OPENAI: - await self.wingman_pro.generate_openai_speech( - text=text, - voice=self.config.openai.tts_voice, - model=self.config.openai.tts_model, - speed=self.config.openai.tts_speed, - sound_config=sound_config, - audio_player=self.audio_player, - wingman_name=self.name, - ) - elif ( - self.config.wingman_pro.tts_provider == WingmanProTtsProvider.AZURE - ): - await self.wingman_pro.generate_azure_speech( - text=text, - config=self.config.azure.tts, - sound_config=sound_config, - audio_player=self.audio_player, - wingman_name=self.name, - ) - elif ( - self.config.wingman_pro.tts_provider - == WingmanProTtsProvider.INWORLD - ): - await self.wingman_pro.generate_inworld_speech( - text=text, - config=self.config.inworld, - sound_config=sound_config, - audio_player=self.audio_player, - wingman_name=self.name, - ) - else: - printr.toast_error( - f"Unsupported TTS provider: {self.config.features.tts_provider}" - ) - except Exception as e: - await printr.print_async( - f"Error during TTS playback: {str(e)}", color=LogType.ERROR - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - - async def _execute_command(self, command: CommandConfig, is_instant=False) -> str: - """Does what Wingman base does, but always returns "Ok" instead of a command response. - Otherwise, the AI will try to respond to the command and generate a "duplicate" response for instant_activation commands. - """ - await super()._execute_command(command, is_instant) - return "Ok" - - def build_tools(self) -> list[dict]: - """ - Builds tools for the LLM call. - - In progressive mode: Returns meta-tools (search_skills, activate_skill) plus - tools from activated skills only. - - In legacy mode: Returns all skill tools. - - Returns: - list[dict]: A list of tool descriptors in OpenAI format. - """ - - def _command_has_effective_actions(command: CommandConfig) -> bool: - if command.is_system_command: - return True - - if not command.actions: - return False - - for action in command.actions: - if not action: - continue - if ( - action.keyboard is not None - or action.mouse is not None - or action.joystick is not None - or action.audio is not None - or action.write is not None - or action.wait is not None - ): - return True - - return False - - commands = [ - command.name - for command in self.config.commands - if (not command.force_instant_activation) - and _command_has_effective_actions(command) - ] - tools: list[dict] = [] - if commands: - tools.append( - { - "type": "function", - "function": { - "name": "execute_command", - "description": "Executes a command", - "parameters": { - "type": "object", - "properties": { - "command_name": { - "type": "string", - "description": "The name of the command to execute", - "enum": commands, - }, - }, - "required": ["command_name"], - }, - }, - } - ) - - # Unified capability discovery: single activate_capability meta-tool - # Combines skills and MCP servers - LLM doesn't need to know the difference - for _, tool in self.capability_registry.get_meta_tools(): - tools.append(tool) - - # Add tools from activated capabilities (both skills and MCPs) - for _, tool in self.skill_registry.get_active_tools(): - tools.append(tool) - - for _, tool in self.mcp_registry.get_active_tools(): - tools.append(tool) - - return tools - - def __get_message_role(self, message): - """Helper method to get the role of the message regardless of its type.""" - if isinstance(message, Mapping): - return message.get("role") - elif hasattr(message, "role"): - return message.role - else: - raise TypeError( - f"Message is neither a mapping nor has a 'role' attribute: {message}" - ) diff --git a/wingmen/wingman.py b/wingmen/wingman.py deleted file mode 100644 index 25534b08..00000000 --- a/wingmen/wingman.py +++ /dev/null @@ -1,983 +0,0 @@ -import traceback -from copy import deepcopy -import random -import time -import difflib -import asyncio -import threading -from typing import ( - Any, - Dict, - Optional, - TYPE_CHECKING, -) -import keyboard.keyboard as keyboard -import mouse.mouse as mouse -from api.interface import ( - CommandConfig, - SettingsConfig, - SkillConfig, - SoundConfig, - WingmanConfig, - WingmanInitializationError, -) -from api.enums import ( - CommandTag, - LogSource, - LogType, - WingmanInitializationErrorType, -) -from providers.faster_whisper import FasterWhisper -from providers.whispercpp import Whispercpp -from providers.xvasynth import XVASynth -from services.audio_player import AudioPlayer -from services.benchmark import Benchmark -from services.module_manager import ModuleManager -from services.secret_keeper import SecretKeeper -from services.printr import Printr -from services.audio_library import AudioLibrary -from skills.skill_base import Skill - -if TYPE_CHECKING: - from services.tower import Tower - -printr = Printr() - - -def _get_skill_folder_from_module(module: str) -> str: - """Extract folder name from module path like 'skills.star_head.main' -> 'star_head'""" - return module.replace(".main", "").replace(".", "/").split("/")[1] - - -class Wingman: - """The "highest" Wingman base class in the chain. It does some very basic things but is meant to be 'virtual', and so are most its methods, so you'll probably never instantiate it directly. - - Instead, you'll create a custom wingman that inherits from this (or a another subclass of it) and override its methods if needed. - """ - - def __init__( - self, - name: str, - config: WingmanConfig, - settings: SettingsConfig, - audio_player: AudioPlayer, - audio_library: AudioLibrary, - whispercpp: Whispercpp, - fasterwhisper: FasterWhisper, - xvasynth: XVASynth, - tower: "Tower", - ): - """The constructor of the Wingman class. You can override it in your custom wingman. - - Args: - name (str): The name of the wingman. This is the key you gave it in the config, e.g. "atc" - config (WingmanConfig): All "general" config entries merged with the specific Wingman config settings. The Wingman takes precedence and overrides the general config. You can just add new keys to the config and they will be available here. - """ - - self.config = config - """All "general" config entries merged with the specific Wingman config settings. The Wingman takes precedence and overrides the general config. You can just add new keys to the config and they will be available here.""" - - self.settings = settings - """The general user settings.""" - - self.secret_keeper = SecretKeeper() - """A service that allows you to store and retrieve secrets like API keys. It can prompt the user for secrets if necessary.""" - self.secret_keeper.secret_events.subscribe( - "secrets_saved", self.handle_secret_saved - ) - - self.name = name - """The name of the wingman. This is the key you gave it in the config, e.g. "atc".""" - - self.audio_player = audio_player - """A service that allows you to play audio files and add sound effects to them.""" - - self.audio_library = audio_library - """A service that allows you to play and manage audio files from the audio library.""" - - self.execution_start: None | float = None - """Used for benchmarking executon times. The timer is (re-)started whenever the process function starts.""" - - self.whispercpp = whispercpp - """A class that handles the communication with the Whispercpp server for transcription.""" - - self.fasterwhisper = fasterwhisper - """A class that handles local transcriptions using FasterWhisper.""" - - self.xvasynth = xvasynth - """A class that handles the communication with the XVASynth server for TTS.""" - - self.tower = tower - """The Tower instance that manages all Wingmen in the same config dir.""" - - self.skills: list[Skill] = [] - - def get_record_key(self) -> str | int: - """Returns the activation or "push-to-talk" key for this Wingman.""" - return self.config.record_key_codes or self.config.record_key - - def get_record_mouse_button(self) -> str: - """Returns the activation or "push-to-talk" mouse button for this Wingman.""" - return self.config.record_mouse_button - - def get_record_joystick_button(self) -> str: - """Returns the activation or "push-to-talk" joystick button for this Wingman.""" - if not self.config.record_joystick_button: - return None - return f"{self.config.record_joystick_button.guid}{self.config.record_joystick_button.button}" - - async def handle_secret_saved(self, _secrets: Dict[str, Any]): - await printr.print_async( - text="Secret saved", - source_name=self.name, - command_tag=CommandTag.SECRET_SAVED, - ) - await self.validate() - - # ──────────────────────────────────── Hooks ─────────────────────────────────── # - - async def validate(self) -> list[WingmanInitializationError]: - """Use this function to validate params and config before the Wingman is started. - If you add new config sections or entries to your custom wingman, you should validate them here. - - It's a good idea to collect all errors from the base class and not to swallow them first. - - If you return MISSING_SECRET errors, the user will be asked for them. - If you return other errors, your Wingman will not be loaded by Tower. - - Returns: - list[WingmanInitializationError]: A list of errors or an empty list if everything is okay. - """ - return [] - - async def retrieve_secret(self, secret_name, errors): - """Use this method to retrieve secrets like API keys from the SecretKeeper. - If the key is missing, the user will be prompted to enter it. - """ - try: - api_key = await self.secret_keeper.retrieve( - requester=self.name, - key=secret_name, - prompt_if_missing=True, - ) - if not api_key: - errors.append( - WingmanInitializationError( - wingman_name=self.name, - message=f"Missing secret '{secret_name}'.", - error_type=WingmanInitializationErrorType.MISSING_SECRET, - secret_name=secret_name, - ) - ) - except Exception as e: - printr.print( - f"Error retrieving secret ''{secret_name}: {e}", - color=LogType.ERROR, - server_only=True, - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - errors.append( - WingmanInitializationError( - wingman_name=self.name, - message=f"Could not retrieve secret '{secret_name}': {str(e)}", - error_type=WingmanInitializationErrorType.MISSING_SECRET, - secret_name=secret_name, - ) - ) - api_key = None - - return api_key - - async def prepare(self): - """This method is called only once when the Wingman is instantiated by Tower. - It is run AFTER validate() and AFTER init_skills() so you can access validated params safely here. - - You can override it if you need to load async data from an API or file.""" - - async def unload(self): - """This method is called when the Wingman is unloaded by Tower. You can override it if you need to clean up resources.""" - # Unsubscribe from secret events to prevent duplicate handlers - self.secret_keeper.secret_events.unsubscribe( - "secrets_saved", self.handle_secret_saved - ) - await self.unload_skills() - - async def unload_skills(self): - """Call this to trigger unload for skills that were actually prepared/used.""" - for skill in self.skills: - # Only unload skills that were actually prepared (activated) - # Skills that were never used don't need cleanup - if not skill.is_prepared: - continue - try: - await skill.unload() - except Exception as e: - await printr.print_async( - f"Error unloading skill '{skill.name}': {str(e)}", - color=LogType.ERROR, - ) - printr.print( - traceback.format_exc(), color=LogType.ERROR, server_only=True - ) - - async def init_skills(self) -> list[WingmanInitializationError]: - """Load all available skills with lazy validation. - - Skills are loaded but NOT validated during init. Validation happens - on first activation via the SkillRegistry. User config overrides from - self.config.skills are merged with default configs. - - Platform-incompatible skills are skipped entirely. - """ - import sys - - current_platform = sys.platform # 'win32', 'darwin', 'linux' - platform_map = {"win32": "windows", "darwin": "darwin", "linux": "linux"} - normalized_platform = platform_map.get(current_platform, current_platform) - - if self.skills: - await self.unload_skills() - - errors = [] - self.skills = [] - - # Build a lookup of user config overrides by skill folder name - # The key must be the folder name (e.g., 'star_head') not the class name (e.g., 'StarHead') - user_skill_configs: dict[str, "SkillConfig"] = {} - if self.config.skills: - for skill_config in self.config.skills: - folder_name = _get_skill_folder_from_module(skill_config.module) - user_skill_configs[folder_name] = skill_config - - # Get all available skill configs - available_skills = ModuleManager.read_available_skill_configs() - - # Get discoverable skills list (whitelist) - discoverable_skills = self.config.discoverable_skills - - for skill_folder_name, skill_config_path in available_skills: - try: - # Load default skill config first to get the display name - skill_config_dict = ModuleManager.read_config(skill_config_path) - if not skill_config_dict: - continue - - # Import SkillConfig here to avoid circular imports - from api.interface import SkillConfig - - # Check if user has overrides for this skill - if skill_folder_name in user_skill_configs: - # Merge user overrides into default config - user_config = user_skill_configs[skill_folder_name] - # User config takes precedence - merge custom_properties especially - if user_config.custom_properties: - skill_config_dict["custom_properties"] = [ - prop.model_dump() for prop in user_config.custom_properties - ] - if user_config.prompt: - skill_config_dict["prompt"] = user_config.prompt - - skill_config = SkillConfig(**skill_config_dict) - - # Check if skill is discoverable for this wingman (whitelist - must be in list) - if skill_config.name not in discoverable_skills: - continue - - # Check platform compatibility BEFORE loading the module - if skill_config.platforms: - if normalized_platform not in skill_config.platforms: - printr.print( - f"Skipping skill '{skill_config.name}' - not supported on {normalized_platform}", - color=LogType.WARNING, - server_only=True, - ) - continue - - # Load the skill module - skill = ModuleManager.load_skill( - config=skill_config, - settings=self.settings, - wingman=self, - ) - if skill: - # Set up skill methods - skill.threaded_execution = self.threaded_execution - - # Add to skills list WITHOUT validation - # Validation will happen lazily on first activation - self.skills.append(skill) - await self.prepare_skill(skill) - - except Exception as e: - skill_name = skill_folder_name - error_msg = f"Error loading skill '{skill_name}': {str(e)}" - await printr.print_async( - error_msg, - color=LogType.ERROR, - ) - printr.print( - traceback.format_exc(), color=LogType.ERROR, server_only=True - ) - errors.append( - WingmanInitializationError( - wingman_name=self.name, - message=error_msg, - error_type=WingmanInitializationErrorType.SKILL_INITIALIZATION_FAILED, - ) - ) - - # Log summary of discoverable skills for this wingman - if self.skills: - skill_names = [s.config.name for s in self.skills] - await printr.print_async( - f"Discoverable skills ({len(skill_names)}): {', '.join(skill_names)}", - color=LogType.WINGMAN, - source=LogSource.WINGMAN, - source_name=self.name, - server_only=not self.settings.debug_mode, - ) - - return errors - - async def prepare_skill(self, skill: Skill): - """This method is called only once when the Skill is instantiated. - It is run AFTER validate() so you can access validated params safely here. - - You can override it if you need to react on data of this skill.""" - - async def unprepare_skill(self, skill: Skill): - """Remove a skill's registration. Called when a skill is disabled. - - Override in subclass to clean up skill-specific registrations.""" - pass - - async def enable_skill(self, skill_name: str) -> tuple[bool, str]: - """Enable a single skill without reinitializing all skills. - - Args: - skill_name: The display name of the skill to enable - - Returns: - (success, message) tuple - """ - import sys - - current_platform = sys.platform - platform_map = {"win32": "windows", "darwin": "darwin", "linux": "linux"} - normalized_platform = platform_map.get(current_platform, current_platform) - - # Check if skill is already enabled - for existing_skill in self.skills: - if existing_skill.config.name == skill_name: - return True, f"Skill '{skill_name}' is already enabled." - - # Find the skill config - available_skills = ModuleManager.read_available_skill_configs() - - # Build user config lookup by skill folder name - user_skill_configs: dict[str, "SkillConfig"] = {} - if self.config.skills: - for skill_config in self.config.skills: - folder_name = _get_skill_folder_from_module(skill_config.module) - user_skill_configs[folder_name] = skill_config - - for skill_folder_name, skill_config_path in available_skills: - try: - skill_config_dict = ModuleManager.read_config(skill_config_path) - if not skill_config_dict: - continue - - from api.interface import SkillConfig - - # Apply user overrides - if skill_folder_name in user_skill_configs: - user_config = user_skill_configs[skill_folder_name] - if user_config.custom_properties: - skill_config_dict["custom_properties"] = [ - prop.model_dump() for prop in user_config.custom_properties - ] - if user_config.prompt: - skill_config_dict["prompt"] = user_config.prompt - - skill_config = SkillConfig(**skill_config_dict) - - if skill_config.name != skill_name: - continue - - # Check platform compatibility - if skill_config.platforms: - if normalized_platform not in skill_config.platforms: - return ( - False, - f"Skill '{skill_name}' is not supported on {normalized_platform}.", - ) - - # Load and register the skill - skill = ModuleManager.load_skill( - config=skill_config, - settings=self.settings, - wingman=self, - ) - if skill: - skill.threaded_execution = self.threaded_execution - self.skills.append(skill) - await self.prepare_skill(skill) - - printr.print( - f"Skill '{skill_name}' activated (loaded and made discoverable).", - color=LogType.POSITIVE, - server_only=True, - ) - return True, f"Skill '{skill_name}' activated successfully." - - except Exception as e: - error_msg = f"Error activating skill '{skill_name}': {str(e)}" - await printr.print_async(error_msg, color=LogType.ERROR) - printr.print( - traceback.format_exc(), color=LogType.ERROR, server_only=True - ) - return False, error_msg - - return False, f"Skill '{skill_name}' not found." - - async def disable_skill(self, skill_name: str) -> tuple[bool, str]: - """Disable a single skill without reinitializing all skills. - - Args: - skill_name: The display name of the skill to disable - - Returns: - (success, message) tuple - """ - # Find the skill in our list - skill_to_remove = None - for skill in self.skills: - if skill.config.name == skill_name: - skill_to_remove = skill - break - - if not skill_to_remove: - return True, f"Skill '{skill_name}' is already deactivated." - - try: - # Unload the skill (cleanup resources, unsubscribe events) - await skill_to_remove.unload() - - # Remove from skill list - self.skills.remove(skill_to_remove) - - # Remove skill-specific registrations (tools, registry, etc.) - await self.unprepare_skill(skill_to_remove) - - printr.print( - f"Skill '{skill_name}' deactivated (unloaded and removed from discoverable skills).", - color=LogType.WARNING, - server_only=True, - ) - return True, f"Skill '{skill_name}' deactivated successfully." - - except Exception as e: - error_msg = f"Error deactivating skill '{skill_name}': {str(e)}" - await printr.print_async(error_msg, color=LogType.ERROR) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - return False, error_msg - - def reset_conversation_history(self): - """This function is called when the user triggers the ResetConversationHistory command. - It's a global command that should be implemented by every Wingman that keeps a message history. - """ - - # ──────────────────────────── The main processing loop ──────────────────────────── # - - async def process(self, audio_input_wav: str = None, transcript: str = None): - """The main method that gets called when the wingman is activated. This method controls what your wingman actually does and you can override it if you want to. - - The base implementation here triggers the transcription and processing of the given audio input. - If you don't need even transcription, you can just override this entire process method. If you want transcription but then do something in addition, you can override the listed hooks. - - Async so you can do async processing, e.g. send a request to an API. - - Args: - audio_input_wav (str): The path to the audio file that contains the user's speech. This is a recording of what you you said. - - Hooks: - - async _transcribe: transcribe the audio to text - - async _get_response_for_transcript: process the transcript and return a text response - - async play_to_user: do something with the response, e.g. play it as audio - """ - - try: - process_result = None - - benchmark_transcribe = None - if not transcript: - # transcribe the audio. - benchmark_transcribe = Benchmark(label="Voice transcription") - transcript = await self._transcribe(audio_input_wav) - - interrupt = None - if transcript: - await printr.print_async( - f"{transcript}", - color=LogType.USER, - source_name="User", - source=LogSource.USER, - benchmark_result=( - benchmark_transcribe.finish() if benchmark_transcribe else None - ), - ) - - # Further process the transcript. - # Return a string that is the "answer" to your passed transcript. - - benchmark_llm = Benchmark(label="Command/AI Processing") - process_result, instant_response, skill, interrupt = ( - await self._get_response_for_transcript( - transcript=transcript, benchmark=benchmark_llm - ) - ) - - actual_response = instant_response or process_result - - if actual_response: - await printr.print_async( - f"{actual_response}", - color=LogType.POSITIVE, - source=LogSource.WINGMAN, - source_name=self.name, - skill_name=skill.name if skill else "", - benchmark_result=benchmark_llm.finish(), - ) - - if process_result: - if self.settings.streamer_mode: - self.tower.save_last_message(self.name, process_result) - - # the last step in the chain. You'll probably want to play the response to the user as audio using a TTS provider or mechanism of your choice. - await self.play_to_user(str(process_result), not interrupt) - except Exception as e: - await printr.print_async( - f"Error during processing of Wingman '{self.name}': {str(e)}", - color=LogType.ERROR, - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - - # ───────────────── virtual methods / hooks ───────────────── # - - async def _transcribe(self, audio_input_wav: str) -> str | None: - """Transcribes the audio to text. You can override this method if you want to use a different transcription service. - - Args: - audio_input_wav (str): The path to the audio file that contains the user's speech. This is a recording of what you you said. - - Returns: - str | None: The transcript of the audio file and the detected language as locale (if determined). - """ - return None - - async def _get_response_for_transcript( - self, transcript: str, benchmark: Benchmark - ) -> tuple[str | None, str | None, Skill | None, bool | None]: - """Processes the transcript and return a response as text. This where you'll do most of your work. - Pass the transcript to AI providers and build a conversation. Call commands or APIs. Play temporary results to the user etc. - - - Args: - transcript (str): The user's spoken text transcribed as text. - - Returns: - A tuple of strings representing the response to a function call and/or an instant response. - """ - return "", "", None, None - - async def play_to_user( - self, - text: str, - no_interrupt: bool = False, - sound_config: Optional[SoundConfig] = None, - ): - """You'll probably want to play the response to the user as audio using a TTS provider or mechanism of your choice. - - Args: - text (str): The response of your _get_response_for_transcript. This is usually the "response" from conversation with the AI. - no_interrupt (bool): prevent interrupting the audio playback - sound_config (SoundConfig): An optional sound configuration to use for the playback. If unset, the Wingman's sound config is used. - """ - pass - - # ───────────────────────────────── Commands ─────────────────────────────── # - - def get_command(self, command_name: str) -> CommandConfig | None: - """Extracts the command with the given name - - Args: - command_name (str): the name of the command you used in the config - - Returns: - {}: The command object from the config - """ - if self.config.commands is None: - return None - - command = next( - (item for item in self.config.commands if item.name == command_name), - None, - ) - return command - - def _select_command_response(self, command: CommandConfig) -> str | None: - """Returns one of the configured responses of the command. This base implementation returns a random one. - - Args: - command (dict): The command object from the config - - Returns: - str: A random response from the command's responses list in the config. - """ - command_responses = command.responses - if (command_responses is None) or (len(command_responses) == 0): - return None - - return random.choice(command_responses) - - async def _execute_instant_activation_command( - self, transcript: str - ) -> list[CommandConfig] | None: - """Uses a fuzzy string matching algorithm to match the transcript to a configured instant_activation command and executes it immediately. - - Args: - transcript (text): What the user said, transcripted to text. Needs to be similar to one of the defined instant_activation phrases to work. - - Returns: - {} | None: The executed instant_activation command. - """ - - try: - # create list with phrases pointing to commands - commands_by_instant_activation = {} - for command in self.config.commands: - if command.instant_activation: - for phrase in command.instant_activation: - if phrase.lower() in commands_by_instant_activation: - commands_by_instant_activation[phrase.lower()].append( - command - ) - else: - commands_by_instant_activation[phrase.lower()] = [command] - - # find best matching phrase - phrase = difflib.get_close_matches( - transcript.lower(), - commands_by_instant_activation.keys(), - n=1, - cutoff=1, - ) - - # if no phrase found, return None - if not phrase: - return None - - # execute all commands for the phrase - commands = commands_by_instant_activation[phrase[0]] - for command in commands: - await self._execute_command(command, True) - - # return the executed command - return commands - except Exception as e: - await printr.print_async( - f"Error during instant activation in Wingman '{self.name}': {str(e)}", - color=LogType.ERROR, - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - return None - - async def _execute_command(self, command: CommandConfig, is_instant=False) -> str: - """Triggers the execution of a command. This base implementation executes the keypresses defined in the command. - - Args: - command (dict): The command object from the config to execute - - Returns: - str: the selected response from the command's responses list in the config. "Ok" if there are none. - """ - - if not command: - return "Command not found" - - try: - if len(command.actions or []) == 0: - await printr.print_async( - f"No actions found for command: {command.name}", - color=LogType.WARNING, - ) - else: - await self.execute_action(command) - await printr.print_async( - f"Executed {'instant' if is_instant else 'AI'} command: {command.name}", - color=LogType.COMMAND, - ) - - # handle the global special commands: - if command.name == "ResetConversationHistory": - self.reset_conversation_history() - await printr.print_async( - f"Executed command: {command.name}", color=LogType.COMMAND - ) - - return self._select_command_response(command) or "Ok" - except Exception as e: - await printr.print_async( - f"Error executing command '{command.name}' for Wingman '{self.name}': {str(e)}", - color=LogType.ERROR, - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - return "ERROR DURING PROCESSING" # hints to AI that there was an Error - - async def execute_action(self, command: CommandConfig): - """Executes the actions defined in the command (in order). - - Args: - command (dict): The command object from the config to execute - """ - if not command or not command.actions: - return - - try: - for action in command.actions: - if action.keyboard: - if action.keyboard.press == action.keyboard.release: - # compressed key events - hold = action.keyboard.hold or 0.1 - if ( - action.keyboard.hotkey_codes - and len(action.keyboard.hotkey_codes) == 1 - ): - keyboard.direct_event( - action.keyboard.hotkey_codes[0], - 0 + (1 if action.keyboard.hotkey_extended else 0), - ) - time.sleep(hold) - keyboard.direct_event( - action.keyboard.hotkey_codes[0], - 2 + (1 if action.keyboard.hotkey_extended else 0), - ) - else: - keyboard.press( - action.keyboard.hotkey_codes or action.keyboard.hotkey - ) - time.sleep(hold) - keyboard.release( - action.keyboard.hotkey_codes or action.keyboard.hotkey - ) - else: - # single key events - if ( - action.keyboard.hotkey_codes - and len(action.keyboard.hotkey_codes) == 1 - ): - keyboard.direct_event( - action.keyboard.hotkey_codes[0], - (0 if action.keyboard.press else 2) - + (1 if action.keyboard.hotkey_extended else 0), - ) - else: - keyboard.send( - action.keyboard.hotkey_codes or action.keyboard.hotkey, - action.keyboard.press, - action.keyboard.release, - ) - - if action.mouse: - if action.mouse.move_to: - x, y = action.mouse.move_to - mouse.move(x, y) - - if action.mouse.move: - x, y = action.mouse.move - mouse.move(x, y, absolute=False, duration=0.5) - - if action.mouse.scroll: - mouse.wheel(action.mouse.scroll) - - if action.mouse.button: - if action.mouse.hold: - mouse.press(button=action.mouse.button) - time.sleep(action.mouse.hold) - mouse.release(button=action.mouse.button) - else: - mouse.click(button=action.mouse.button) - - if action.write: - keyboard.write(action.write) - - if action.wait: - time.sleep(action.wait) - - if action.audio: - await self.audio_library.handle_action( - action.audio, self.config.sound.volume - ) - except Exception as e: - await printr.print_async( - f"Error executing actions of command '{command.name}' for wingman '{self.name}': {str(e)}", - color=LogType.ERROR, - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - - def threaded_execution(self, function, *args) -> threading.Thread | None: - """Execute a function in a separate thread.""" - try: - - def start_thread(function, *args): - if asyncio.iscoroutinefunction(function): - new_loop = asyncio.new_event_loop() - asyncio.set_event_loop(new_loop) - new_loop.run_until_complete(function(*args)) - new_loop.close() - else: - function(*args) - - thread = threading.Thread(target=start_thread, args=(function, *args)) - thread.name = function.__name__ - thread.start() - return thread - except Exception as e: - printr.print( - f"Error starting threaded execution: {str(e)}", color=LogType.ERROR - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - return None - - async def update_config( - self, config: WingmanConfig, skip_config_validation: bool = True - ) -> bool: - """Update the config of the Wingman. - - This method should always be called if the config of the Wingman has changed. - - Args: - config: The new wingman configuration - skip_config_validation: If False, validate the config and rollback on error - - Returns: - True if config was updated successfully, False otherwise - """ - try: - if not skip_config_validation: - old_config = deepcopy(self.config) - - self.config = config - - # Propagate skill config changes to loaded skills - await self._update_skill_configs(config) - - if not skip_config_validation: - errors = await self.validate() - - for error in errors: - if ( - error.error_type - != WingmanInitializationErrorType.MISSING_SECRET - ): - self.config = old_config - return False - - return True - except Exception as e: - await printr.print_async( - f"Error updating config for wingman '{self.name}': {str(e)}", - color=LogType.ERROR, - ) - printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True) - return False - - async def _update_skill_configs(self, wingman_config: WingmanConfig) -> None: - """Propagate skill config changes to loaded skills. - - When the wingman config changes (e.g., user updates custom_properties for a skill), - we need to update the SkillConfig on each loaded skill instance so they see the new values. - """ - if not self.skills or not wingman_config.skills: - return - - # Build lookup of new skill configs by folder name - new_skill_configs: dict[str, "SkillConfig"] = {} - for skill_config in wingman_config.skills: - try: - folder_name = _get_skill_folder_from_module(skill_config.module) - except Exception: - printr.print( - f"Skipping skill config override with unexpected module format: '{skill_config.module}'", - color=LogType.WARNING, - server_only=True, - ) - continue - new_skill_configs[folder_name] = skill_config - - # Update each loaded skill if its config changed - for skill in self.skills: - # Get the folder name for this skill - try: - skill_folder = _get_skill_folder_from_module(skill.config.module) - except Exception: - printr.print( - f"Skipping loaded skill with unexpected module format: '{skill.config.module}'", - color=LogType.WARNING, - server_only=True, - ) - continue - - if skill_folder in new_skill_configs: - user_override = new_skill_configs[skill_folder] - - fields_set = getattr(user_override, "model_fields_set", None) - if fields_set is None: - # Pydantic v1 fallback - fields_set = getattr(user_override, "__fields_set__", set()) - - # Create updated config by copying current and applying overrides - # This preserves all default values while applying user overrides - updated_config = deepcopy(skill.config) - - # Apply overrides even if they're explicitly empty. - # This allows users to clear custom properties/prompt in the UI. - if "custom_properties" in fields_set: - updated_config.custom_properties = user_override.custom_properties - if "prompt" in fields_set: - updated_config.prompt = user_override.prompt - - # Let the skill handle the config update (will compare old vs new) - await skill.update_config(updated_config) - - async def save_config(self): - """Save the config of the Wingman.""" - self.tower.save_wingman(self.name) - - async def save_commands(self): - """Save only the commands section of this wingman's config. - - This performs a partial YAML update - only the commands field is modified - in the config file, avoiding full config serialization. This is much safer - than save_config() for command-only changes as it won't accidentally - overwrite other fields. - - Use this instead of save_config() when you only changed command definitions, - instant_activation phrases, or other command-related fields. - - Example use cases: - - QuickCommands learning instant activation phrases - - Skills dynamically adding/modifying commands - - Skills updating command responses or actions - """ - self.tower.save_wingman_commands(self.name) - - async def update_settings(self, settings: SettingsConfig): - """Update the settings of the Wingman. This method should always be called when the user Settings have changed.""" - self.settings = settings - await self.init_skills() - # Also reload MCPs if the wingman supports them - if hasattr(self, "init_mcps"): - await self.init_mcps() - - printr.print(f"Wingman {self.name}'s settings changed", server_only=True)