From 7ab650f3e2f0b2d7deb7a0b778378cbd319bdc7f Mon Sep 17 00:00:00 2001 From: Sami Horttanainen Date: Tue, 17 Mar 2026 23:00:57 +0200 Subject: [PATCH 1/3] feat: Add audio amplifier to gemini live --- src/gemini_live.py | 18 ++++++++++++++++-- src/tests/gemini_live_test.py | 7 ++++--- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/gemini_live.py b/src/gemini_live.py index a334699..00652de 100644 --- a/src/gemini_live.py +++ b/src/gemini_live.py @@ -1,6 +1,7 @@ import asyncio import json import time +import numpy as np from google import auth, genai @@ -51,7 +52,10 @@ "query": { "type": "string", "description": ( - "The text query that is used to query vector database" + "The text query that is used to query vector database." + "Only in english. Concise but enough text to have good query." + "Example: Client Elisa: budget of the project." + "Example: Elisa backend hire decision capacity requirements" ), }, "thinking_context": { @@ -90,7 +94,16 @@ ) -class GeminiLiveSession: # pylint: disable=too-many-instance-attributes +def amplify_chunk(pcm_chunk: bytes, gain: float = 2.0) -> bytes: + samples = np.frombuffer(pcm_chunk, dtype=np.int16).copy() + + # Amplify with clipping to avoid overflow + samples = np.clip(samples * gain, -32768, 32767).astype(np.int16) + + return samples.tobytes() + + +class GeminiLiveSession: # pylint: disable=too-many-instance-attributes def __init__(self, ws): self.ws = ws self._audio_queue: asyncio.Queue = asyncio.Queue(maxsize=10) @@ -121,6 +134,7 @@ def _log_dropped_audio_if_needed(self): def push_audio(self, chunk: bytes): try: + chunk = amplify_chunk(chunk, gain=45.0) self._audio_queue.put_nowait(chunk) except asyncio.QueueFull: self._log_dropped_audio_if_needed() diff --git a/src/tests/gemini_live_test.py b/src/tests/gemini_live_test.py index 43714fd..daf5464 100644 --- a/src/tests/gemini_live_test.py +++ b/src/tests/gemini_live_test.py @@ -30,17 +30,18 @@ def test_session_initialization(self, session, mock_websocket): def test_push_audio_adds_to_queue(self, session): """Test that audio chunks are added to the queue""" - audio_chunk = b"test audio data" + audio_chunk = b"\x00\x01" * 8 # valid int16 PCM bytes session.push_audio(audio_chunk) assert session._audio_queue.qsize() == 1 def test_push_audio_ignores_when_queue_full(self, session): """Test that audio is silently dropped when queue is full""" + pcm_chunk = b"\x00\x01" * 4 # valid int16 PCM bytes # Fill the queue for _ in range(10): - session.push_audio(b"data") + session.push_audio(pcm_chunk) # Try to add one more - session.push_audio(b"extra") + session.push_audio(pcm_chunk) assert session._audio_queue.qsize() == 10 @pytest.mark.asyncio From c4a51b2552c52559f95e03977b4770cb3b450521 Mon Sep 17 00:00:00 2001 From: Sami Horttanainen Date: Tue, 17 Mar 2026 23:04:21 +0200 Subject: [PATCH 2/3] fix: enhance prompt, change model and fix bug --- src/memory_extractor.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/memory_extractor.py b/src/memory_extractor.py index 2a8ec4d..b59b97d 100644 --- a/src/memory_extractor.py +++ b/src/memory_extractor.py @@ -25,7 +25,7 @@ def get_client(): CLIENT = genai.Client( vertexai=True, project=project, - location="europe-north1", + location="global", ) return CLIENT @@ -35,10 +35,21 @@ def get_client(): SAVE: deadlines, decisions, scope changes, budget figures, named responsibilities, technical blockers SKIP: how a decision was reached, confirmations of things already stated, small talk, food, office logistics, parking -For "name": create a short descriptive title that captures the key topic of the conversation. -A fact is worth saving only if forgetting it in 3 months would cause a mistake. -Do not save process steps (e.g. "escalation initiated"), save the outcome (e.g. "Lisa approved backend hire"). -Do not save the same fact twice with different wording. +Rules: +- Only save facts explicitly stated in the transcript. Never infer, assume, or fill in details not directly mentioned — especially numbers, names, and dates. +- Save only if forgetting in 3 months would cause a real mistake. +- Save outcomes, not process steps. Good: "Client Elisa approved a backend hire — team lacked capacity to meet current backend requirements." Bad: "Hiring process initiated." +- One atomic fact per vector. Do not combine multiple facts into one entry. +- Always store in English, regardless of the language of the transcript. +- Each fact must be self-contained: include who decided it and in what context. + Bad: "Priority 1: meeting summary feature." + Good: "Client Elisa set meeting summary as Priority 1 — save conversation summaries to the database and auto-share to Google Drive." +- Phrase for retrieval: include the actor, subject, and key detail so a semantic search for any of them finds the fact. +- Avoid generic statements. If a fact would be true of any similar project, add specifics that make it unique to this one. +- Do not save the same fact twice with different wording within this transcript. +- If there is nothing worth saving, return an empty vectors array. Never store meta-comments about the transcript itself (e.g. "no facts found", "transcript lacks content"). + +For "name": create a short English title capturing the key topic (e.g. "Elisa client meeting - March 2026"). """ @@ -69,7 +80,7 @@ async def memory_extractor_worker(transcript): """ client = get_client() response = await client.aio.models.generate_content( - model="gemini-2.5-flash-lite", + model="gemini-3.1-flash-lite-preview", contents=transcript, config=genai.types.GenerateContentConfig( system_instruction=SYSTEM_PROMPT, @@ -133,6 +144,10 @@ async def extract_and_save_information_to_database( except Exception as e: # pylint: disable=broad-exception-caught print(f"memory_extractor_worker failed: {e}") + if not extracted_vectors: + print("No vectors extracted, skipping store") + return + try: await asyncio.get_event_loop().run_in_executor( None, From 0a0050b6454a31ed39ecf190b57bf534dc6b0fbc Mon Sep 17 00:00:00 2001 From: Sami Horttanainen Date: Wed, 18 Mar 2026 09:49:49 +0200 Subject: [PATCH 3/3] decrease gain to 35 --- src/gemini_live.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gemini_live.py b/src/gemini_live.py index 00652de..22425f2 100644 --- a/src/gemini_live.py +++ b/src/gemini_live.py @@ -134,7 +134,7 @@ def _log_dropped_audio_if_needed(self): def push_audio(self, chunk: bytes): try: - chunk = amplify_chunk(chunk, gain=45.0) + chunk = amplify_chunk(chunk, gain=35.0) self._audio_queue.put_nowait(chunk) except asyncio.QueueFull: self._log_dropped_audio_if_needed()