Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions src/gemini_live.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
import json
import time
import numpy as np

from google import auth, genai

Expand Down Expand Up @@ -51,7 +52,10 @@
"query": {
"type": "string",
"description": (
"The text query that is used to query vector database"
"The text query that is used to query vector database."
"Only in english. Concise but enough text to have good query."
"Example: Client Elisa: budget of the project."
"Example: Elisa backend hire decision capacity requirements"
),
},
"thinking_context": {
Expand Down Expand Up @@ -90,7 +94,16 @@
)


class GeminiLiveSession: # pylint: disable=too-many-instance-attributes
def amplify_chunk(pcm_chunk: bytes, gain: float = 2.0) -> bytes:
samples = np.frombuffer(pcm_chunk, dtype=np.int16).copy()

# Amplify with clipping to avoid overflow
samples = np.clip(samples * gain, -32768, 32767).astype(np.int16)

return samples.tobytes()


class GeminiLiveSession: # pylint: disable=too-many-instance-attributes
def __init__(self, ws):
self.ws = ws
self._audio_queue: asyncio.Queue = asyncio.Queue(maxsize=10)
Expand Down Expand Up @@ -121,6 +134,7 @@ def _log_dropped_audio_if_needed(self):

def push_audio(self, chunk: bytes):
try:
chunk = amplify_chunk(chunk, gain=35.0)
self._audio_queue.put_nowait(chunk)
except asyncio.QueueFull:
self._log_dropped_audio_if_needed()
Expand Down
27 changes: 21 additions & 6 deletions src/memory_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def get_client():
CLIENT = genai.Client(
vertexai=True,
project=project,
location="europe-north1",
location="global",
)
return CLIENT

Expand All @@ -35,10 +35,21 @@ def get_client():
SAVE: deadlines, decisions, scope changes, budget figures, named responsibilities, technical blockers
SKIP: how a decision was reached, confirmations of things already stated, small talk, food, office logistics, parking

For "name": create a short descriptive title that captures the key topic of the conversation.
A fact is worth saving only if forgetting it in 3 months would cause a mistake.
Do not save process steps (e.g. "escalation initiated"), save the outcome (e.g. "Lisa approved backend hire").
Do not save the same fact twice with different wording.
Rules:
- Only save facts explicitly stated in the transcript. Never infer, assume, or fill in details not directly mentioned — especially numbers, names, and dates.
- Save only if forgetting in 3 months would cause a real mistake.
- Save outcomes, not process steps. Good: "Client Elisa approved a backend hire — team lacked capacity to meet current backend requirements." Bad: "Hiring process initiated."
- One atomic fact per vector. Do not combine multiple facts into one entry.
- Always store in English, regardless of the language of the transcript.
- Each fact must be self-contained: include who decided it and in what context.
Bad: "Priority 1: meeting summary feature."
Good: "Client Elisa set meeting summary as Priority 1 — save conversation summaries to the database and auto-share to Google Drive."
- Phrase for retrieval: include the actor, subject, and key detail so a semantic search for any of them finds the fact.
- Avoid generic statements. If a fact would be true of any similar project, add specifics that make it unique to this one.
- Do not save the same fact twice with different wording within this transcript.
- If there is nothing worth saving, return an empty vectors array. Never store meta-comments about the transcript itself (e.g. "no facts found", "transcript lacks content").

For "name": create a short English title capturing the key topic (e.g. "Elisa client meeting - March 2026").
"""


Expand Down Expand Up @@ -69,7 +80,7 @@ async def memory_extractor_worker(transcript):
"""
client = get_client()
response = await client.aio.models.generate_content(
model="gemini-2.5-flash-lite",
model="gemini-3.1-flash-lite-preview",
contents=transcript,
config=genai.types.GenerateContentConfig(
system_instruction=SYSTEM_PROMPT,
Expand Down Expand Up @@ -133,6 +144,10 @@ async def extract_and_save_information_to_database(
except Exception as e: # pylint: disable=broad-exception-caught
print(f"memory_extractor_worker failed: {e}")

if not extracted_vectors:
print("No vectors extracted, skipping store")
return

try:
await asyncio.get_event_loop().run_in_executor(
None,
Expand Down
7 changes: 4 additions & 3 deletions src/tests/gemini_live_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,18 @@ def test_session_initialization(self, session, mock_websocket):

def test_push_audio_adds_to_queue(self, session):
"""Test that audio chunks are added to the queue"""
audio_chunk = b"test audio data"
audio_chunk = b"\x00\x01" * 8 # valid int16 PCM bytes
session.push_audio(audio_chunk)
assert session._audio_queue.qsize() == 1

def test_push_audio_ignores_when_queue_full(self, session):
"""Test that audio is silently dropped when queue is full"""
pcm_chunk = b"\x00\x01" * 4 # valid int16 PCM bytes
# Fill the queue
for _ in range(10):
session.push_audio(b"data")
session.push_audio(pcm_chunk)
# Try to add one more
session.push_audio(b"extra")
session.push_audio(pcm_chunk)
assert session._audio_queue.qsize() == 10

@pytest.mark.asyncio
Expand Down