From 8d609981cdaf69b39167b20fa6ea6b138ae5bda5 Mon Sep 17 00:00:00 2001 From: PierreGode Date: Wed, 1 Oct 2025 20:28:16 +0200 Subject: [PATCH] Add cross-platform audio input fallback --- aigirlfriend.py | 96 +++++++++++++++++++++++++++++++++++++----------- requirements.txt | 2 +- 2 files changed, 76 insertions(+), 22 deletions(-) diff --git a/aigirlfriend.py b/aigirlfriend.py index 247c4a0..56c9624 100644 --- a/aigirlfriend.py +++ b/aigirlfriend.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import platform import re import wave import tempfile @@ -11,13 +12,25 @@ import numpy as np import pyaudio -import sounddevice as sd from openai import OpenAI from langchain.memory import ConversationBufferMemory from langchain.schema import AIMessage, HumanMessage, SystemMessage +try: # pragma: no cover - optional dependency per platform + import sounddevice as sd # type: ignore +except ImportError: # pragma: no cover + sd = None + warnings.filterwarnings("ignore", category=DeprecationWarning) +SYSTEM_NAME = platform.system() or "Unknown" +if SYSTEM_NAME == "Windows" or sd is None: + AUDIO_INPUT_BACKEND = "pyaudio" +else: + AUDIO_INPUT_BACKEND = "sounddevice" + +print(f"🔊 Detected {SYSTEM_NAME}; using {AUDIO_INPUT_BACKEND} for recording.") + USER_NAME = "Love" MEMORY_FILE = "user_memory.txt" TTS_MODEL = "tts-1" @@ -96,6 +109,13 @@ def play_audio_stream(stream, audio_stream): for chunk in audio_stream.iter_bytes(chunk_size=1024): stream.write(chunk) +def _calculate_volume_db(samples: np.ndarray) -> float: + if samples.size == 0: + return -np.inf + rms = np.sqrt(np.mean(samples.astype(np.float32) ** 2)) + return 20 * np.log10(rms / 32768 + 1e-10) + 100 + + def record_audio_tempfile_vad( samplerate: int = 16000, db_threshold: float = 50.0, max_duration: int = 20 ) -> Optional[str]: @@ -108,31 +128,65 @@ def record_audio_tempfile_vad( """ print("🎙️ Listening…") - frames: list[np.ndarray] = [] + frames: list[bytes] = [] block_duration = 0.2 block_size = int(samplerate * block_duration) silence_duration = 0.0 speech_duration = 0.0 has_speech = False - with sd.InputStream(samplerate=samplerate, channels=1, dtype="int16") as stream: - while True: - block, _ = stream.read(block_size) - rms = np.sqrt(np.mean(block.astype(np.float32) ** 2)) - volume_db = 20 * np.log10(rms / 32768 + 1e-10) + 100 - - if volume_db >= db_threshold: - has_speech = True - silence_duration = 0.0 - speech_duration += block_duration - frames.append(block) - if speech_duration >= max_duration: - break - elif has_speech: - silence_duration += block_duration - frames.append(block) - if silence_duration > 1.0: - break + if AUDIO_INPUT_BACKEND == "sounddevice": + if sd is None: + raise RuntimeError("SoundDevice backend requested but module is not available.") + with sd.InputStream(samplerate=samplerate, channels=1, dtype="int16") as stream: + while True: + block, _ = stream.read(block_size) + block_array = block.reshape(-1) + volume_db = _calculate_volume_db(block_array) + + if volume_db >= db_threshold: + has_speech = True + silence_duration = 0.0 + speech_duration += block_duration + frames.append(block_array.astype(np.int16).tobytes()) + if speech_duration >= max_duration: + break + elif has_speech: + silence_duration += block_duration + frames.append(block_array.astype(np.int16).tobytes()) + if silence_duration > 1.0: + break + else: + p = pyaudio.PyAudio() + stream = p.open( + format=pyaudio.paInt16, + channels=1, + rate=samplerate, + input=True, + frames_per_buffer=block_size, + ) + try: + while True: + data = stream.read(block_size, exception_on_overflow=False) + block_array = np.frombuffer(data, dtype=np.int16) + volume_db = _calculate_volume_db(block_array) + + if volume_db >= db_threshold: + has_speech = True + silence_duration = 0.0 + speech_duration += block_duration + frames.append(data) + if speech_duration >= max_duration: + break + elif has_speech: + silence_duration += block_duration + frames.append(data) + if silence_duration > 1.0: + break + finally: + stream.stop_stream() + stream.close() + p.terminate() if not has_speech: return None @@ -143,7 +197,7 @@ def record_audio_tempfile_vad( wf.setnchannels(1) wf.setsampwidth(2) wf.setframerate(samplerate) - wf.writeframes(b"".join([b.tobytes() for b in frames])) + wf.writeframes(b"".join(frames)) return temp.name diff --git a/requirements.txt b/requirements.txt index 3993045..bf1f28f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,4 @@ SpeechRecognition setuptools langchain-community scipy -sounddevice \ No newline at end of file +sounddevice