Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/copilot-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ Always use type hints. Use `TYPE_CHECKING` guard for circular imports:
```python
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from wingmen.open_ai_wingman import OpenAiWingman
from wingman import Wingman
```

### Async/Await
Expand Down Expand Up @@ -128,7 +128,7 @@ api_key = await self.retrieve_secret(
- `wingman_core.py` - FastAPI app, WebSocket server, REST endpoints
- `Tower.py` - Wingman factory and lifecycle manager
- `Wingman.py` - Base class for all Wingmen
- `OpenAiWingman.py` - Primary LLM-powered Wingman implementation
- `Wingman.py` - Unified Wingman class (formerly split into base Wingman and OpenAiWingman subclass)
- `SkillRegistry` - Progressive tool disclosure for skills
- `CapabilityRegistry` - Unified skills + MCP discovery
- `SecretKeeper` - Secure API key management
Expand Down
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,7 @@ If you want to read some code first and understand how it all works, we recommen
- `http://127.0.0.1:49111/docs` - The OpenAPI (ex: Swagger) spec
- `wingman_core.py` - most of the public API endpoints that Wingman AI exposes
- The config files in `%APP_DATA%\ShipBit\WingmanAI\[version]` to get an idea of what's configurable.
- `Wingman.py` - the base class for all Wingmen
- `OpenAIWingman.py` - derived from Wingman, using all the providers
- `Wingman.py` - the unified Wingman class supporting all providers
- `Tower.py` - the factory that creates Wingmen

If you're planning to develop a major feature or new integration, please contact us on [Discord](https://www.shipbit.de/discord) first and let us know what you're up to. We'll be happy to help you get started and make sure your work isn't wasted because we're already working on something similar.
Expand Down
7 changes: 1 addition & 6 deletions api/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ class FeaturesConfig(BaseModel):
conversation_provider: ConversationProvider
remember_messages: Optional[int] = None
image_generation_provider: ImageGenerationProvider
use_generic_instant_responses: bool
# use_generic_instant_responses removed in v2.1.0


class AudioFile(BaseModel):
Expand Down Expand Up @@ -960,14 +960,9 @@ def __getitem__(self, item):
def __setitem__(self, key, value):
self.extra_properties[key] = value

custom_properties: Optional[list[CustomProperty]] = None
"""You can add custom properties here to use in your custom wingman class."""

disabled: Optional[bool] = False
"""Set this to true if you want to disable this wingman. You can also just remove it from the config."""

custom_class: Optional[CustomClassConfig] = None
"""If you want to use a custom Wingman (Python) class, you can specify it here."""
name: str
"""The "friendly" name of this Wingman. Can be changed by the user."""
description: Optional[str] = None
Expand Down
37 changes: 30 additions & 7 deletions providers/edge.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
from os import path
from edge_tts import Communicate
from api.interface import EdgeTtsConfig, SoundConfig
from providers.provider_base import (
BaseProvider,
ProviderCapability,
capabilities,
TtsProvider,
)
from services.audio_player import AudioPlayer
from services.file import get_writable_dir
from services.printr import Printr
Expand All @@ -11,20 +17,37 @@
printr = Printr()


class Edge:
def __init__(self):
@capabilities(ProviderCapability.TTS)
class Edge(BaseProvider, TtsProvider):
"""Edge TTS provider using Microsoft Edge's free text-to-speech."""

def __init__(self, config: EdgeTtsConfig):
BaseProvider.__init__(self, config=config, api_key=None)
self.random_voices = {}

async def play_audio(
# Protocol implementation: TtsProvider
async def synthesize(
self,
text: str,
config: EdgeTtsConfig,
sound_config: SoundConfig,
audio_player: AudioPlayer,
sound_config: SoundConfig,
wingman_name: str,
):
**kwargs
) -> None:
"""Synthesize speech using Edge TTS.

Args:
text: Text to convert to speech
audio_player: AudioPlayer instance for playback
sound_config: Sound configuration
wingman_name: Name of wingman
**kwargs: Unused (kept for protocol compatibility)

Returns:
None - Audio is played directly via audio_player
"""
communicate, output_file = await self.__generate_speech(
text=text, voice=config.voice
text=text, voice=self.config.voice
)
audio, sample_rate = audio_player.get_audio_from_file(output_file)

Expand Down
38 changes: 31 additions & 7 deletions providers/elevenlabs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,23 @@
from elevenlabslib import User, GenerationOptions, PlaybackOptions, SFXOptions
from api.enums import SoundEffect, WingmanInitializationErrorType
from api.interface import ElevenlabsConfig, SoundConfig, WingmanInitializationError
from providers.provider_base import (
BaseProvider,
ProviderCapability,
capabilities,
TtsProvider,
)
from services.audio_player import AudioPlayer
from services.sound_effects import get_sound_effects
from services.websocket_user import WebSocketUser


class ElevenLabs:
def __init__(self, api_key: str, wingman_name: str):
@capabilities(ProviderCapability.TTS)
class ElevenLabs(BaseProvider, TtsProvider):
"""ElevenLabs TTS provider with high-quality voice synthesis."""

def __init__(self, config: ElevenlabsConfig, api_key: str, wingman_name: str):
BaseProvider.__init__(self, config=config, api_key=api_key)
self.wingman_name = wingman_name
self.user = User(api_key)

Expand All @@ -30,15 +40,29 @@ def validate_config(
)
return errors

async def play_audio(
# Protocol implementation: TtsProvider
async def synthesize(
self,
text: str,
config: ElevenlabsConfig,
sound_config: SoundConfig,
audio_player: AudioPlayer,
sound_config: SoundConfig,
wingman_name: str,
stream: bool,
):
**kwargs
) -> None:
"""Synthesize speech using ElevenLabs with streaming support.

Args:
text: Text to convert to speech
audio_player: AudioPlayer instance for playback
sound_config: Sound configuration
wingman_name: Name of wingman
**kwargs: Additional parameters (stream, etc.)

Returns:
None - Audio is played directly via audio_player
"""
config = self.config
stream = kwargs.get("stream", False)
voice = (
self.user.get_voice_by_ID(config.voice.id)
if config.voice.id
Expand Down
82 changes: 76 additions & 6 deletions providers/faster_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,31 @@
FasterWhisperSttConfig,
WingmanInitializationError,
)
from providers.provider_base import (
BaseProvider,
ProviderCapability,
capabilities,
SttProvider,
)
from services.printr import Printr

MODELS_DIR = "faster-whisper-models"


class FasterWhisper:
@capabilities(ProviderCapability.STT)
class FasterWhisper(BaseProvider, SttProvider):
def __init__(
self,
settings: FasterWhisperSettings,
app_root_path: str,
app_is_bundled: bool,
config: FasterWhisperSettings,
api_key: str = None, # Not used but required by BaseProvider
app_root_path: str = None,
app_is_bundled: bool = False,
wingman_name: str = None, # For hotword assembly
):
BaseProvider.__init__(self, config=config, api_key=api_key)
self.printr = Printr()
self.settings = settings
self.settings = config # Alias for backward compatibility
self.wingman_name = wingman_name

self.is_windows = platform.system() == "Windows"
if self.is_windows:
Expand Down Expand Up @@ -56,7 +67,66 @@ def __update_model(self):
f"Failed to initialize FasterWhisper with model {model_file}. Error: {e}"
)

def transcribe(
# Protocol implementation: SttProvider
async def transcribe(self, filename: str, **kwargs) -> str:
"""Transcribe audio using FasterWhisper model.

Args:
filename: Path to audio file
**kwargs: May include 'config' (FasterWhisperSttConfig) and 'hotwords' (list[str])

Returns:
Transcribed text or None on error
"""
# Get config from kwargs or use default from self.config
config = kwargs.get("config", self.config if hasattr(self, "config") else None)
if not isinstance(config, FasterWhisperSttConfig):
# If config is FasterWhisperSettings, use default values
config = FasterWhisperSttConfig(
beam_size=5,
best_of=5,
temperature=0.0,
no_speech_threshold=0.6,
language=None,
multilingual=True,
language_detection_threshold=0.5,
hotwords=[],
additional_hotwords=[],
)

# Assemble hotwords from multiple sources
hotwords: list[str] = []

# Add wingman name if available
if self.wingman_name:
hotwords.append(self.wingman_name)

# Add default hotwords from config
if hasattr(self.settings, "hotwords") and self.settings.hotwords:
hotwords.extend(self.settings.hotwords)

# Add additional hotwords from config
if (
hasattr(self.settings, "additional_hotwords")
and self.settings.additional_hotwords
):
hotwords.extend(self.settings.additional_hotwords)

# Add any hotwords passed in kwargs (for backward compatibility)
if "hotwords" in kwargs and kwargs["hotwords"]:
hotwords.extend(kwargs["hotwords"])

# Remove duplicates
hotwords = list(set(hotwords))

result = self._transcribe_sync(
config=config,
filename=filename,
hotwords=hotwords,
)
return result.text if result else None

def _transcribe_sync(
self,
config: FasterWhisperSttConfig,
filename: str,
Expand Down
Loading