chcavignx · chcavignx · Apr 28, 2026 · Apr 24, 2026 · Apr 25, 2026 · Apr 26, 2026
diff --git a/.github/actions/python-uv-setup/action.yml b/.github/actions/python-uv-setup/action.yml
@@ -1,27 +1,31 @@
 name: Python + uv setup
-description: Checkout, set up uv/Python, and sync dev deps
+description: Set up uv/Python and sync the requested dependency profile
 inputs:
   python-version:
     description: Python version to install
     required: false
     default: "3.11.6"
   sync-flags:
-    description: Extra flags for uv sync (e.g., --all-extras --dev --frozen)
+    description: Extra flags for uv sync (e.g., --locked --group test)
     required: false
-    default: "--all-extras --dev"
+    default: "--locked"
+  install-system-packages:
+    description: Install Linux system packages required by audio / Raspberry Pi jobs
+    required: false
+    default: "false"
   working-directory:
     description: Directory to run in
     required: false
     default: "."
 runs:
   using: composite
   steps:
-    - name: Install the latest version of uv
+    - name: Install uv
       id: setup_uv
-      uses: astral-sh/setup-uv@v4
+      uses: astral-sh/setup-uv@v7
       with:
         python-version: ${{ inputs.python-version }}
-        version: "latest"
+        version: "0.11.2"
         enable-cache: true
         prune-cache: false
         cache-suffix: ${{ inputs.python-version }}
@@ -33,9 +37,9 @@ runs:
       shell: bash
       run: |
         echo "uv download cache hit: ${{ steps.setup_uv.outputs['cache-hit'] }}"
-        echo ".venv cache hit: ${{ steps.cache_venv.outputs.cache-hit }}"
 
     - name: Install system dependencies
+      if: ${{ inputs.install-system-packages == 'true' }}
       shell: bash
       run: |
         if command -v sudo &> /dev/null; then
@@ -44,7 +48,7 @@ runs:
             SUDO=""
         fi
         $SUDO apt-get update
-        $SUDO apt-get install -y ffmpeg portaudio19-dev alsa-utils espeak-ng python3-pyaudio
+        $SUDO apt-get install -y ffmpeg portaudio19-dev alsa-utils
 
     - name: Install the project dependencies
       shell: bash

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -12,41 +12,28 @@ concurrency:
   cancel-in-progress: true
 
 env:
-  # If set, uv will run without updating the uv.lock file. Equivalent to the `uv run --frozen`.
-  UV_FROZEN: "1"
+  # Set the default log level for the tests
+  LOG_LEVEL: info
 
 jobs:
   ruff:
     name: Ruff
-    runs-on: ubuntu-24.04-arm
+    runs-on: ubuntu-latest
     steps:
       - name: Checkout
         uses: actions/checkout@v4
       - name: Install dependencies
         uses: ./.github/actions/python-uv-setup
+        with:
+          sync-flags: "--locked --no-default-groups --group lint"
       - name: Run Ruff linter
         run: uv run ruff check .
       - name: Run Ruff formatter
         run: uv run ruff format --check .
 
-  mypy:
-    name: Mypy
-    runs-on: ubuntu-24.04-arm
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - name: Install dependencies
-        uses: ./.github/actions/python-uv-setup
-      # Restore mypy cache to speed up the run
-      - name: Restore mypy cache
-        uses: actions/cache/restore@v4
-        with:
-          path: .mypy_cache
-          key: mypy-cache-${{ hashFiles('pyproject.toml') }}
-
   test:
     name: Test "${{ inputs.marker }}"
-    runs-on: ubuntu-24.04-arm
+    runs-on: ubuntu-latest
     # env:
     #   ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
     #   GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
@@ -60,7 +47,38 @@ jobs:
         run: uname -m
       - name: Install dependencies
         uses: ./.github/actions/python-uv-setup
-      - name: Sync for update
-        run: uv sync --extra all
+        with:
+          sync-flags: "--locked --no-default-groups --group test"
       - name: Run core tests
-        run: uv run pytest -n auto -m "${{ inputs.marker }}" tests/
+        run: uv run pytest -n auto -m "${{ inputs.marker }}" --cov=src --cov-report=xml tests/
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v4
+        with:
+          file: ./coverage.xml
+          flags: unittests
+          name: codecov-umbrella
+          fail_ci_if_error: false
+
+  test-raspberry-pi:
+    name: Test Raspberry Pi profile
+    runs-on: ubuntu-24.04-arm
+    timeout-minutes: 30
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Check architecture
+        run: uname -m
+      - name: Install dependencies
+        uses: ./.github/actions/python-uv-setup
+        with:
+          install-system-packages: "true"
+          sync-flags: "--locked --no-default-groups --group test --extra raspberry-pi"
+      - name: Run Raspberry Pi profile tests
+        run: uv run pytest -n auto -m "${{ inputs.marker }}" --cov=src --cov-report=xml tests/
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v4
+        with:
+          file: ./coverage.xml
+          flags: unittests-raspberry-pi
+          name: codecov-umbrella
+          fail_ci_if_error: false
diff --git a/.gitignore b/.gitignore
@@ -3,7 +3,6 @@ __pycache__/
 *.py[cod]
 *$py.class
 *.so
-.Python
 build/
 develop-eggs/
 dist/
@@ -23,8 +22,7 @@ wheels/
 # Virtual environments
 venv/
 env/
-ENV/
-.venv
+.venv/
 
 # IDE
 .vscode/
@@ -50,6 +48,7 @@ node_modules/
 
 # Dataset
 cache/
+.cache/
 
 # Temporary files
 *.tmp

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,14 +1,22 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.14.5
+    rev: v0.15.12
     hooks:
-      - id: ruff-check
+      - id: ruff
+        name: ruff check --fix (commit)
+        stages: [pre-commit]
         args: [--fix]
         description: "Run 'ruff check' for extremely fast Python linting"
       - id: ruff-format
+        name: ruff-format (commit)
+        stages: [pre-commit]
         description: "Run 'ruff format' for extremely fast Python formatting"
+      - id: ruff
+        name: ruff check (push)
+        stages: [pre-push]
+        description: "Run 'ruff check' for extremely fast Python linting"
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v6.0.0
     hooks:
       - id: trailing-whitespace
         description: "Run 'trailing-whitespace' for removing trailing whitespace"
@@ -25,8 +33,17 @@ repos:
       - id: check-added-large-files
         description: "Run 'check-added-large-files' for checking added large files"
   - repo: https://github.com/compilerla/conventional-pre-commit
-    rev: v2.3.0
+    rev: v4.4.0
     hooks:
       - id: conventional-pre-commit
         stages: [commit-msg]
         description: "Run 'conventional-pre-commit' for conventional commit messages"
+  - repo: local
+    hooks:
+      - id: basedpyright
+        name: basedpyright strict type-check (push)
+        entry: uv run basedpyright --level error
+        language: system
+        types: [python]
+        pass_filenames: false
+        stages: [pre-push]
diff --git a/config.yaml b/config.yaml
@@ -7,25 +7,50 @@ paths:
   cache: ".cache"
   models: "models"
 
-# Speech-to-Text configuration
-stt:
-  engine: "whisper" # whisper, vosk, faster_whisper
-  model_size: "tiny"
-  language: "en"
-  transformers: false
-  transformers_engine: "huggingface"
-  # download_root: null  # Let Python code build the path dynamically
-
 # Text-to-Speech configuration
 tts:
   engine: "piper"
   model_name: "jarvis-medium.onnx"
   # model_path: null  # Let Python code build the path dynamically
   cli_mode: false
+  speed: 1.0
+  volume: 1.0
 
-# Voice Activity Detection configuration
 vad:
   min_speech_duration_ms: 250
   min_silence_duration_ms: 500
   silence_timeout_seconds: 3
-  max_recording_seconds: 15
+  max_recording_seconds: 10
+  threshold: 0.5
+
+# Automatic Speech Recognition (ASR) configuration
+asr:
+  engine: "faster-whisper" # "whisper", "faster-whisper"
+  model_size: "tiny"
+  language: "en"
+  translate: false
+  transformers: false
+  transformers_engine: "huggingface" # "huggingface" or "onnxruntime"
+  device: "cpu"  # Pi5 : CPU (or "hailo")
+  compute_type: "int8"
+  skip_native_teardown: true  # Avoid native teardown segfaults on some ARM/PortAudio stacks
+  # download_root: null  # Let Python code build the path dynamically
+
+wake:
+  wake_word: "hey_jarvis"
+  model_name: "hey_jarvis_v0.1"
+  inference_framework: "onnx"
+  threshold: 0.5
+  cooldown_seconds: 2.0
+  noise_suppression: true
+
+audio:
+  input_sample_rate: 44100
+  input_chunk_ms: 30  # taille des chunks audio en ms
+  input_chunk_size: 300
+  input_device_index: 2 # No input-capable PyAudio device detected on this machine; use system default when a mic is attached
+  volume: 0.8  # half as loud
+  output_device_index: 0 # PyAudio output device: USB PnP Audio Device: Audio (hw:3,0)
+  output_sample_rate: 44100
+  output_chunk_ms: 30  # taille des chunks audio en ms
+  output_chunk_size: 500
diff --git a/docs/DEV_PROCESS.md b/docs/DEV_PROCESS.md
@@ -10,31 +10,26 @@ Here is a prioritization proposed for the context: Raspberry Pi 5, Hailo-8L, NVM
 - **Offline Voice Recognition (USB Microphone)**
   - *Why?* It is the core of user interaction, quick to set up, and resource-efficient.
   - *Technical Impact:* Low to moderate, with comprehensive documentation, abundant examples, and existing projects.
-  - *Estimated Time:* 1 to 3 days for a proof of concept.
 
 - **Offline Speech Synthesis**
   - *Why?* Provides immediate audio feedback, easy to install, with natural-sounding voices.
   - *Technical Impact:* Low, with direct integration into Python.
-  - *Estimated Time:* 1 to 2 days to test and integrate.
 
 ### 2. **Intermediate Priority: Computer Vision**
 
 - **Facial Recognition (face_recognition + official camera)**
   - *Why?* Adds a layer of personalization and security, but requires more resources and optimization.
   - *Technical Impact:* Moderate; requires proper lighting and testing on a reduced dataset.
-  - *Estimated Time:* 3 to 5 days for reliable detection on a few faces.
 
 - **Object Recognition**
   - *Why?* Advanced functionality, but more complex to integrate and optimize.
   - *Technical Impact:* High; requires model management, hardware acceleration, and extensive testing.
-  - *Estimated Time:* 5 to 8 days for smooth detection of common objects.
 
 ### 3. **Secondary Priority: Integration and User Experience**
 
 - **Module Fusion (main script, command management)**
   - *Why?* Necessary for a coherent assistant, but to be executed once the basic modules are functional.
   - *Technical Impact:* Variable, depending on the complexity of the desired interface.
-  - *Estimated Time:* 3 to 5 days for a basic integration.
 
 - **User Interface (screen, local web interface)**
   - *Why?* To enhance user experience.
@@ -53,7 +48,11 @@ Here is a prioritization proposed for the context: Raspberry Pi 5, Hailo-8L, NVM
 
 ### Proposed Sequence to Meet Constraints and Integrate Modules
 
-- **Audio Modules** (voice recognition and speech synthesis): they are quick to deploy and validate basic interaction.
+- **Audio Modules**:
+  1. Confirm the USB microphone and speaker path with [audio_usb_test.md](audio_usb_test.md).
+  2. Validate offline speech recognition with [STT_offline.md](STT_offline.md).
+  3. Validate offline text-to-speech with [TTS_offline.md](TTS_offline.md).
+  4. Verify the integrated wake-word, ASR, and TTS loop with `examples/VAD/voice_agent_offline.py`.
 - **Follow with Vision Modules**:
     1. Start with facial recognition (simpler than object recognition).
     2. Proceed to object detection.
@@ -64,16 +63,18 @@ Here is a prioritization proposed for the context: Raspberry Pi 5, Hailo-8L, NVM
 ### Sequenced Guides for Different Modules
 
 - **Audio Modules**
-    1. [Offline Speech-to-Text (STT) Guide](STT_offline.md)
-    2. [Offline Text-to-Speech (TTS) Guide](TTS_offline.md)
-    3. [Offline Speech-to-Speech demo](https://github.com/chcavignx/AI-Autonomous-Assistant/blob/main/src/audio/voice_agent_offline.md)
-        Demo application that listens to voice input from the microphone and responds with speech for specific intents (src/audio/voice_agent_offline.py)
+    1. [Offline Speech Recognition (STT)](STT_offline.md)
+    2. [Offline Text-to-Speech (TTS)](TTS_offline.md)
+    3. [Voice stack and VAD models](STS_VAD_models.md)
+    4. [USB microphone and speaker test](audio_usb_test.md)
+    5. [Offline Speech-to-Speech demo](../examples/VAD/voice_agent_offline.md)
+        Demo application that listens for a wake word, transcribes the next utterance, generates a keyword response, and speaks it back (`examples/VAD/voice_agent_offline.py`)
 
 - **Vision Modules**
-    1. [Facial Recognition Guide](facial_recognition.md **TO DO**)
-    2. [Object Recognition Guide](object_recognition.md **TO DO**)
-    3. [Object Recognition demo](object_recognition_demo.md **TO DO**)
-    Demo application that detects and labels objects in real-time. (**TO DO**)
+    1. [Facial Recognition Guide](facial_recognition.md **!TO DO!**)
+    2. [Object Recognition Guide](object_recognition.md **!TO DO!**)
+    3. [Object Recognition demo](object_recognition_demo.md **!TO DO!**)
+    Demo application that detects and labels objects in real-time. (**!TO DO!**)
 
 - **Module Integration**