diff --git a/plugins/memdebug/__init__.py b/plugins/memdebug/__init__.py new file mode 100644 index 00000000000..2030192a1ee --- /dev/null +++ b/plugins/memdebug/__init__.py @@ -0,0 +1,225 @@ +"""``/memdebug`` Discord slash command — read-only retrieval diagnostic (W2-4). + +Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §7.2. + +Usage in chat: + + /memdebug -> top-8 from semantic_facts (curated) + /memdebug rawsearch -> top-8 from episodes (raw turns, forensics) + +The handler intentionally returns plain markdown text (not a Discord +embed): hermes-agent's ``register_command()`` surface is platform-neutral +and dispatches the same string to CLI / gateway / Slack. + +The ``rich-embed + 👍/👎 reaction buttons`` mode is open spec §8 work — we +ship the read-only diagnostic now so the F2 monitoring path (% of +top-1 hits judged useful) is unblocked. For v1, encourage the user +to react with 👍/👎 emoji on this message; a future cron will scrape +those reactions from the channel. +""" + +from __future__ import annotations + +import asyncio +import logging +import sqlite3 +import time +from pathlib import Path +from typing import List, Optional + +logger = logging.getLogger(__name__) + +def _resolve_hermes_home() -> Path: + """Use HERMES_HOME (set by hermes_constants) when available; else ~/.hermes.""" + try: + from hermes_constants import get_hermes_home + return Path(get_hermes_home()) + except Exception: + return Path.home() / ".hermes" + + +_HERMES_HOME = _resolve_hermes_home() +DEFAULT_DB = _HERMES_HOME / "memories" / "memory.db" +DEFAULT_K = 8 +LOG_PATH = _HERMES_HOME / "logs" / "memory.log" + + +def _format_facts_block(facts) -> str: + lines = ["**🧠 /memdebug** — top {} from `semantic_facts`\n".format(len(facts))] + for i, f in enumerate(facts, start=1): + recency = max(0.0, 1.0 - f.age_days / 365.0) # display-only;rerank weight uses 90-day half-life + lines.append( + f"`{i}.` **[{f.entity or '—'}]** {_truncate(f.fact, 90)}\n" + f" score=`{f.score:.3f}` sim=`{f.sim:.3f}` " + f"age=`{int(f.age_days)}d` importance=`{f.importance}`" + ) + lines.append("\n_React 👍/👎 to flag this retrieval._") + return "\n".join(lines) + + +def _truncate(s: str, n: int) -> str: + s = s.replace("\n", " ") + return s if len(s) <= n else s[: n - 1] + "…" + + +def _format_episodes_block(rows: List[sqlite3.Row]) -> str: + if not rows: + return ( + "**🧠 /memdebug rawsearch** — `episodes` table is empty.\n\n" + "Episodes are written by W3 (per-turn write-back). After W3 " + "ships, this command will surface the raw conversation turns " + "behind any retrieval." + ) + lines = ["**🧠 /memdebug rawsearch** — top {} from `episodes`\n".format(len(rows))] + for i, r in enumerate(rows, start=1): + lines.append( + f"`{i}.` `[{r['ts']}]` `{r['channel']}/{r['role']}` " + f"{_truncate(r['text'], 120)}" + ) + return "\n".join(lines) + + +def _append_log(payload: dict) -> None: + """Append a /memdebug invocation to ~/.hermes/logs/memory.log.""" + import json + try: + LOG_PATH.parent.mkdir(parents=True, exist_ok=True) + with LOG_PATH.open("a", encoding="utf-8") as f: + f.write(json.dumps(payload, ensure_ascii=False) + "\n") + except OSError as exc: + logger.warning("memory.log write failed: %s", exc) + + +def _open_memory_db(path: Optional[Path] = None) -> Optional[sqlite3.Connection]: + """Open the sqlite_vec memory.db. Returns None if it doesn't exist yet.""" + path = path or DEFAULT_DB + if not path.exists(): + return None + from plugins.memory.sqlite_vec.store import open_db + return open_db(path, check_same_thread=False) + + +async def _do_semantic(query: str) -> str: + from plugins.memory.sqlite_vec.read import read_memory + + conn = _open_memory_db() + if not conn: + return ( + "**🧠 /memdebug** — memory database not yet initialised.\n\n" + f"Expected at `{DEFAULT_DB}`. Run `scripts/import_md.py --commit` " + "or wait for the first agent turn after W2-3 cutover." + ) + try: + facts = await read_memory(query, conn, k=DEFAULT_K) + finally: + conn.close() + if not facts: + return f"**🧠 /memdebug** — no facts matched `{_truncate(query, 60)}`." + _append_log({ + "ts": time.strftime("%Y-%m-%dT%H:%M:%S"), + "cmd": "memdebug", + "q": query, + "n": len(facts), + "ids": [f.id for f in facts], + }) + return _format_facts_block(facts) + + +async def _do_rawsearch(query: str) -> str: + """Substring scan of episodes.text. No vector query — this is forensics + mode for 'did this conversation happen', not semantic recall.""" + conn = _open_memory_db() + if not conn: + return ( + "**🧠 /memdebug rawsearch** — memory database not yet initialised." + ) + try: + like = f"%{query}%" + rows = conn.execute( + "SELECT ts, channel, role, text FROM episodes " + "WHERE text LIKE ? ORDER BY ts DESC LIMIT ?", + (like, DEFAULT_K), + ).fetchall() + finally: + conn.close() + _append_log({ + "ts": time.strftime("%Y-%m-%dT%H:%M:%S"), + "cmd": "memdebug-raw", + "q": query, + "n": len(rows), + }) + return _format_episodes_block(rows) + + +HELP_TEXT = ( + "**/memdebug** — inspect what `read_memory` would return.\n" + "Usage:\n" + " `/memdebug ` — top-8 from `semantic_facts` (curated)\n" + " `/memdebug rawsearch ` — substring scan of `episodes` (forensics)\n" +) + + +async def _handle_async(raw_args: str) -> str: + args = (raw_args or "").strip() + if not args: + return HELP_TEXT + if args.lower().startswith("rawsearch"): + rest = args[len("rawsearch"):].strip() + if not rest: + return HELP_TEXT + try: + return await _do_rawsearch(rest) + except Exception as exc: + logger.exception("memdebug rawsearch failed") + return f"**/memdebug rawsearch** error: `{exc}`" + try: + return await _do_semantic(args) + except Exception as exc: + logger.exception("memdebug semantic failed") + return f"**/memdebug** error: `{exc}`" + + +def _handle_memdebug(raw_args: str) -> str: + """Sync entry point. PluginContext.register_command supports async + handlers natively, but ours is dispatched on either pathway, so we + bridge via asyncio.run when no loop is running.""" + coro = _handle_async(raw_args) + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + if loop is None: + return asyncio.run(coro) + # Already in a running loop — schedule and wait via a worker thread. + import threading + import concurrent.futures + box = {} + + def runner(): + try: + box["r"] = asyncio.run(coro) + except BaseException as exc: + box["e"] = exc + + t = threading.Thread(target=runner, daemon=True, name="memdebug-handler") + t.start() + t.join(timeout=15.0) + if t.is_alive(): + return "**/memdebug** timed out (>15s)." + if "e" in box: + return f"**/memdebug** error: `{box['e']}`" + return box.get("r", HELP_TEXT) + + +# --------------------------------------------------------------------------- +# Plugin registration +# --------------------------------------------------------------------------- + + +def register(ctx) -> None: + ctx.register_command( + "memdebug", + handler=_handle_memdebug, + description="Inspect Hermes long-term memory retrieval (top-8 + scores).", + args_hint=" | rawsearch ", + ) diff --git a/plugins/memdebug/plugin.yaml b/plugins/memdebug/plugin.yaml new file mode 100644 index 00000000000..1945104cff6 --- /dev/null +++ b/plugins/memdebug/plugin.yaml @@ -0,0 +1,4 @@ +name: memdebug +version: 0.1.0 +description: "/memdebug — inspect Hermes long-term memory retrieval. Read-only diagnostic for the sqlite_vec memory plugin (W2-4)." +author: "Li-yang Chen" diff --git a/plugins/memory/sqlite_vec/__init__.py b/plugins/memory/sqlite_vec/__init__.py new file mode 100644 index 00000000000..3d54be27f66 --- /dev/null +++ b/plugins/memory/sqlite_vec/__init__.py @@ -0,0 +1,251 @@ +"""Hermes V3 memory plugin — sqlite-vec store with two-tier (hot/cold) design. + +Activate via $HERMES_HOME/config.yaml: + + memory: + provider: sqlite_vec + +Read path (W2-3): on each turn, ``prefetch(query)`` runs +``read_memory()`` in a worker thread (the gateway already owns the main +asyncio loop, so we can't ``asyncio.run`` inline) and returns a markdown +block prefixed with ``## Recent relevant memories``. The retrieved fact +IDs are cached per session and bumped via ``sync_turn()`` after the +reply is sent, per spec §4 hits accounting. + +Write path (W3-2): ``sync_turn`` now also fires ``write_episode`` — +records the raw turn into ``episodes``, runs Kimi extract, fast-tracks +short-lived facts directly into ``semantic_facts`` (≤ today + 30d), +stashes longer-lived facts into ``episodes.metadata.stashed_facts`` +for W3-3 weekly_promotion. Errors land in +``~/.hermes/logs/memory_write_failures.jsonl`` and never propagate. +""" + +from __future__ import annotations + +import asyncio +import logging +import threading +import time +from pathlib import Path +from typing import Any, Dict, List, Optional + +from agent.memory_provider import MemoryProvider + +from .read import ( + DEFAULT_K, + Fact, + bump_hits, + format_facts_for_prompt, + read_memory, +) +from .store import init_db +from .write import write_episode + +logger = logging.getLogger(__name__) + +PREFETCH_TIMEOUT_S = 5.0 # Voyage typical 200-400ms; 5s is the kill-switch. +# Write path: extract (~1-3s) + embed batch (~300ms) + INSERT (~5ms). +# 30s gives Kimi room to think while still bounding worst-case latency. +WRITE_TIMEOUT_S = 30.0 +RECALL_HEADER = "## Recent relevant memories" + + +def _mem_off_active() -> bool: + """True iff the global /mem off kill switch sentinel is present. + + Late import to avoid circular plugin loading: plugins.memreview can + import provider symbols indirectly via the slash-command surface. + """ + try: + from plugins.memreview import mem_off_active + return mem_off_active() + except Exception: + return False + + +def _default_db_path(hermes_home: str) -> Path: + return Path(hermes_home).expanduser() / "memories" / "memory.db" + + +def _run_coro_in_thread(coro_factory, timeout: float): + """Run an async coroutine in a worker thread with its own event loop. + + The hermes gateway runs its own asyncio loop, so ``asyncio.run`` from + this synchronous ABC method would raise "cannot be called from a + running event loop". We sidestep by spawning a dedicated thread with a + fresh loop, joining with a timeout. ``coro_factory`` is a zero-arg + callable that builds the coroutine inside the worker so the coroutine + is bound to the worker's loop. + """ + box: Dict[str, Any] = {} + + def runner(): + loop = asyncio.new_event_loop() + try: + box["result"] = loop.run_until_complete(coro_factory()) + except BaseException as exc: + box["error"] = exc + finally: + loop.close() + + t = threading.Thread(target=runner, daemon=True, name="sqlite-vec-worker") + t.start() + t.join(timeout) + if t.is_alive(): + raise TimeoutError(f"sqlite_vec worker exceeded {timeout}s") + if "error" in box: + raise box["error"] + return box.get("result") + + +def _synth_msg_id(session_id: str, user: str, asst: str, ts: str) -> str: + """Stable per-turn external_id for ON CONFLICT idempotency. + + We don't have the real Discord message ID at sync_turn time (the + ABC hook only exposes user/assistant content + session_id), so we + hash the turn into a 12-hex-char id. Bucketing ts to the minute + means a Discord redelivery within the same minute collapses; a + legitimate retry after >1 min would create a new row, which is + acceptable for episode-level forensics. + """ + raw = (session_id, user, asst, ts[:16]) + return "h" + hex(abs(hash(raw)) & 0xFFFFFFFFFFFF)[2:] + + +class SqliteVecMemoryProvider(MemoryProvider): + """Hermes V3 long-term memory provider (W2-3 read + W3-2 write).""" + + def __init__(self) -> None: + self._conn = None + self._db_path: Optional[Path] = None + self._last_fact_ids: Dict[str, List[int]] = {} + self._lock = threading.Lock() + + @property + def name(self) -> str: + return "sqlite_vec" + + def is_available(self) -> bool: + try: + import sqlite_vec # noqa: F401 + except ImportError: + return False + return True + + def initialize(self, session_id: str, **kwargs) -> None: + hermes_home = kwargs.get("hermes_home") + if not hermes_home: + from hermes_constants import get_hermes_home + hermes_home = str(get_hermes_home()) + self._db_path = _default_db_path(hermes_home) + self._conn = init_db(self._db_path, check_same_thread=False) + logger.info("sqlite_vec memory ready at %s", self._db_path) + + def system_prompt_block(self) -> str: + # Persona stays in flat files (SOUL.md, USER.md, life-dimensions.md); + # the recall block is emitted from prefetch() per turn. + return "" + + def prefetch(self, query: str, *, session_id: str = "") -> str: + """Embed query, fetch top-k facts, format as a markdown block. + + Returns "" on empty/trivial query, missing connection, or any + error (Voyage outage, rate limit, etc.) so the gateway never + blocks a reply on memory recall. Retrieved fact IDs are stashed + for the matching ``sync_turn()`` call to bump hits. + """ + if not self._conn or not query or not query.strip(): + return "" + + conn = self._conn + db_lock = self._lock + + async def _do() -> List[Fact]: + with db_lock: + return await read_memory(query, conn, k=DEFAULT_K) + + try: + facts = _run_coro_in_thread(_do, timeout=PREFETCH_TIMEOUT_S) + except Exception as exc: + logger.warning("sqlite_vec prefetch error: %s", exc) + return "" + + if not facts: + return "" + + with self._lock: + self._last_fact_ids[session_id] = [f.id for f in facts] + + body = format_facts_for_prompt(facts, with_meta=True) + return f"{RECALL_HEADER}\n{body}" + + def sync_turn( + self, + user_content: str, + assistant_content: str, + *, + session_id: str = "", + ) -> None: + """Bump hits on retrieved facts and persist the turn. + + Spec §4 + §5.1 — both happen AFTER the reply is delivered, so + this must never raise. ``bump_hits`` swallows its own DB errors; + ``write_episode`` swallows everything and writes failures to + ~/.hermes/logs/memory_write_failures.jsonl. + """ + if not self._conn: + return + conn = self._conn + db_lock = self._lock + + with self._lock: + ids = self._last_fact_ids.pop(session_id, []) + + ts = time.strftime("%Y-%m-%d %H:%M:%S") + msg_id = _synth_msg_id(session_id, user_content, assistant_content, ts) + channel = session_id or "unknown" + + async def _do_bump() -> None: + if ids: + with db_lock: + await bump_hits(ids, conn) + + async def _do_write() -> None: + with db_lock: + await write_episode( + user_msg=user_content, + reply=assistant_content, + channel=channel, + msg_id=msg_id, + ts=ts, + conn=conn, + ) + + try: + _run_coro_in_thread(_do_bump, timeout=PREFETCH_TIMEOUT_S) + except Exception as exc: + logger.warning("sqlite_vec bump_hits worker error: %s", exc) + + if user_content or assistant_content: + # /mem off kill switch: skip write_episode entirely. The hot path + # bump_hits ran above (read-side accounting), but no new + # episodes / facts are persisted. Read remains unaffected. + if _mem_off_active(): + logger.info("sqlite_vec write_episode skipped (/mem off)") + else: + try: + _run_coro_in_thread(_do_write, timeout=WRITE_TIMEOUT_S) + except Exception as exc: + logger.warning("sqlite_vec write_episode worker error: %s", exc) + + def get_tool_schemas(self) -> List[Dict[str, Any]]: + return [] + + def handle_tool_call(self, tool_name: str, args: Dict[str, Any]) -> Any: + from tools.registry import tool_error + return tool_error(f"sqlite_vec exposes no tools (got {tool_name!r})") + + def shutdown(self) -> None: + if getattr(self, "_conn", None): + self._conn.close() + self._conn = None diff --git a/plugins/memory/sqlite_vec/embed.py b/plugins/memory/sqlite_vec/embed.py new file mode 100644 index 00000000000..ae114ebf670 --- /dev/null +++ b/plugins/memory/sqlite_vec/embed.py @@ -0,0 +1,139 @@ +"""Voyage AI embedding wrapper for the sqlite_vec memory plugin. + +Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §1.4 (locked +decision) and §4 (read path) — voyage-3.5-lite, 512 dim, int8. + +Returns each embedding as a 512-byte BLOB ready to insert into +``semantic_facts.embedding``. The store-side trigger wraps the BLOB with +``vec_int8()`` when copying it into the ``vec_facts`` virtual table. + +Public API: + + await voyage_embed(["text 1", "text 2"]) -> [b"...512 bytes...", b"..."] +""" + +from __future__ import annotations + +import asyncio +import logging +import os +from typing import List, Optional, Sequence + +import httpx + +logger = logging.getLogger(__name__) + +VOYAGE_URL = "https://api.voyageai.com/v1/embeddings" +VOYAGE_MODEL = "voyage-3.5-lite" +VOYAGE_BATCH = 128 # Voyage API per-call ceiling +VOYAGE_DIM = 512 +VOYAGE_DTYPE = "int8" +DEFAULT_TIMEOUT = 30.0 +MAX_RETRIES = 3 + + +class VoyageError(RuntimeError): + """Raised when Voyage API repeatedly fails.""" + + +def _api_key() -> str: + key = os.environ.get("VOYAGE_API_KEY") + if not key: + raise VoyageError( + "VOYAGE_API_KEY is not set. Add it to ~/.hermes/.env and " + "expose it to the hermes container via docker-compose." + ) + return key + + +def _to_int8_blob(values: Sequence[int]) -> bytes: + """Pack a list of int8 values (-128..127) into a raw 512-byte BLOB.""" + if len(values) != VOYAGE_DIM: + raise VoyageError( + f"Voyage returned {len(values)}-dim vector, expected {VOYAGE_DIM}" + ) + return bytes((v + 256) & 0xFF for v in values) # signed -> unsigned byte + + +async def _post_batch( + client: httpx.AsyncClient, + texts: List[str], + api_key: str, +) -> List[bytes]: + payload = { + "model": VOYAGE_MODEL, + "input": texts, + "output_dtype": VOYAGE_DTYPE, + "output_dimension": VOYAGE_DIM, + } + headers = {"Authorization": f"Bearer {api_key}"} + + for attempt in range(1, MAX_RETRIES + 1): + try: + r = await client.post( + VOYAGE_URL, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT + ) + except httpx.RequestError as exc: + if attempt == MAX_RETRIES: + raise VoyageError(f"network error: {exc}") from exc + await asyncio.sleep(2 ** (attempt - 1)) + continue + + if 500 <= r.status_code < 600: + if attempt == MAX_RETRIES: + raise VoyageError(f"Voyage 5xx: {r.status_code} {r.text[:200]}") + await asyncio.sleep(2 ** (attempt - 1)) + continue + + if r.status_code >= 400: + raise VoyageError(f"Voyage {r.status_code}: {r.text[:200]}") + + body = r.json() + items = body.get("data", []) + if len(items) != len(texts): + raise VoyageError( + f"Voyage returned {len(items)} items for {len(texts)} inputs" + ) + # Voyage returns embeddings in input order (per docs/index field). + items.sort(key=lambda d: d.get("index", 0)) + return [_to_int8_blob(d["embedding"]) for d in items] + + raise VoyageError("retry loop exhausted unexpectedly") + + +async def voyage_embed( + texts: List[str], + *, + dim: int = VOYAGE_DIM, + dtype: str = VOYAGE_DTYPE, + client: Optional[httpx.AsyncClient] = None, +) -> List[bytes]: + """Embed `texts` and return one int8 BLOB per input. + + Batches automatically at Voyage's 128-input ceiling. Retries 3x with + exponential backoff on 5xx and network errors. Raises VoyageError on + auth failure, 4xx, or repeated 5xx. + + `dim` and `dtype` are accepted for API symmetry but locked to the spec + values; passing different values raises immediately so config drift + fails loudly instead of silently corrupting embeddings. + """ + if dim != VOYAGE_DIM or dtype != VOYAGE_DTYPE: + raise VoyageError( + f"dim/dtype locked to {VOYAGE_DIM}/{VOYAGE_DTYPE} per spec §1.4" + ) + if not texts: + return [] + + api_key = _api_key() + owns_client = client is None + client = client or httpx.AsyncClient() + try: + out: List[bytes] = [] + for i in range(0, len(texts), VOYAGE_BATCH): + batch = texts[i : i + VOYAGE_BATCH] + out.extend(await _post_batch(client, batch, api_key)) + return out + finally: + if owns_client: + await client.aclose() diff --git a/plugins/memory/sqlite_vec/extract.py b/plugins/memory/sqlite_vec/extract.py new file mode 100644 index 00000000000..caeffb1e245 --- /dev/null +++ b/plugins/memory/sqlite_vec/extract.py @@ -0,0 +1,299 @@ +"""Kimi-driven extraction from a single Discord turn (W3-1). + +Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §5.2. + +The ``EXTRACT_PROMPT`` constant is **verbatim** from the spec — do not +paraphrase. Drift here directly compromises the F2 monitoring path +(downstream weekly review will see noise). + +Two-stage flow: + + 1. Caller calls ``kimi_extract(user, assistant, channel, ts)``. + 2. We short-circuit to ``[]`` if ``channel`` is in + ``PHI_BLACKLIST_CHANNELS`` — never round-trip hospital data + through the cloud LLM. + 3. Otherwise we POST to synthetic.new'\\''s OpenAI-compatible + chat-completions endpoint with ``temperature=0.1`` and + ``response_format=json_object`` (Kimi K2.5 supports the OpenAI + structured-output flag). + 4. Parse the JSON list, validate the per-item shape, return + ``list[ExtractedFact]``. Bad rows are dropped, not fatal. + +Token cost is logged to ``memory.log`` so weekly review can spot a +runaway extract budget. +""" + +from __future__ import annotations + +import json +import logging +import os +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import List, Optional + +import httpx + +logger = logging.getLogger(__name__) + +# Spec §1.4 lock — Kimi K2.5 via synthetic.new. +SYNTHETIC_URL = "https://api.synthetic.new/v1/chat/completions" +EXTRACT_MODEL = "hf:moonshotai/Kimi-K2.5" +EXTRACT_TEMPERATURE = 0.1 +EXTRACT_TIMEOUT = 30.0 +EXTRACT_MAX_TOKENS = 1024 # extract output is a small JSON list + +# Spec §5.1 — channels whose content never leaves the host as PHI. +PHI_BLACKLIST_CHANNELS = frozenset({"cmio", "cbme", "medicine"}) + +# Spec §5.2 EXTRACT_PROMPT — copy verbatim. The {placeholders} are +# substituted at call time. +EXTRACT_PROMPT = """You extract durable memories about 禮揚 from this Discord turn. +Output a JSON list. Empty list [] if nothing memorable. + +HARD RULES — these override everything else: +1. NEVER extract: hospital data, patient names, 病歷號, 身分證字號, lab results, + diagnoses about real people, hospital policy specifics, hospital colleague names. +2. NEVER extract pleasantries (好的/收到/早安/明白/thanks). Return [] if turn is just this. +3. If turn metadata says synthetic=true (cron-produced), return [] UNLESS content + contains a NEW commitment by 禮揚 (e.g. "排了 5/22 跟 Y 開會"). +4. If unsure whether content violates rule 1, ERR ON THE SIDE OF NOT EXTRACTING. + +Each item: + type: "episodic" | "semantic" + text: short statement, zh-TW or English (match source language) + entity: nullable. Use ".家庭", ".工作", ".研究興趣", ".健康", etc. namespacing under "禮揚." + importance: 1-5 + valid_to_hint: ISO date if turn implies expiry. "今晚"→tomorrow, "這週"→Sunday, "這個月"→end-of-month. + +Skip facts that duplicate something said in the last 5 turns. + +TURN: +[{ts}] [{channel}] user: {user} +[{ts}] [{channel}] assistant: {assistant} +""" + + +@dataclass +class ExtractedFact: + """One fact extracted from a turn. Distinct from the read-side ``Fact``.""" + + type: str # "episodic" | "semantic" + text: str + entity: Optional[str] + importance: int + valid_to_hint: Optional[str] = None + raw: dict = field(default_factory=dict) # original Kimi output for forensics + + +class ExtractError(RuntimeError): + """Raised when synthetic.new is unreachable or returns malformed payload.""" + + +def _resolve_hermes_home() -> Path: + try: + from hermes_constants import get_hermes_home + return Path(get_hermes_home()) + except Exception: + return Path.home() / ".hermes" + + +def _default_log_path() -> Path: + return _resolve_hermes_home() / "logs" / "memory.log" + + +def _read_synthetic_api_key() -> str: + """Resolve the synthetic.new API key. + + Priority: + 1. ``SYNTHETIC_API_KEY`` env var (test-friendly override). + 2. ``auth.json`` ``custom:synthetic`` pool, first non-expired token. + + Raises ``ExtractError`` if no key is found — the caller decides + whether that should bubble up (W3-2 wraps and falls back). + """ + env = os.environ.get("SYNTHETIC_API_KEY") + if env: + return env + + auth_path = _resolve_hermes_home() / "auth.json" + if auth_path.exists(): + try: + data = json.loads(auth_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + raise ExtractError(f"auth.json parse: {exc}") from exc + # The real auth.json uses "credential_pool" (singular). Older or + # alternate layouts may use the plural form or top-level keys, so we + # check all three for resilience across hermes-agent versions. + pool = ( + (data.get("credential_pool") or {}).get("custom:synthetic") + or (data.get("credential_pools") or {}).get("custom:synthetic") + or data.get("custom:synthetic") + or [] + ) + for entry in pool: + tok = entry.get("access_token") + if tok: + return tok + + raise ExtractError( + "synthetic.new API key not found. Set SYNTHETIC_API_KEY or " + "ensure auth.json has a custom:synthetic credential." + ) + + +def _append_log(payload: dict, log_path: Optional[Path] = None) -> None: + log_path = log_path or _default_log_path() + try: + log_path.parent.mkdir(parents=True, exist_ok=True) + with log_path.open("a", encoding="utf-8") as f: + f.write(json.dumps(payload, ensure_ascii=False) + "\n") + except OSError as exc: + logger.warning("memory.log write failed: %s", exc) + + +def _coerce_fact(raw: dict) -> Optional[ExtractedFact]: + """Validate one Kimi-emitted fact dict; return None on shape errors.""" + t = raw.get("type") + text = raw.get("text") + if t not in ("episodic", "semantic"): + return None + if not isinstance(text, str) or not text.strip(): + return None + importance = raw.get("importance", 2) + try: + importance = int(importance) + except (TypeError, ValueError): + importance = 2 + importance = max(1, min(5, importance)) + entity = raw.get("entity") + if entity is not None and not isinstance(entity, str): + entity = None + valid_to_hint = raw.get("valid_to_hint") + if valid_to_hint is not None and not isinstance(valid_to_hint, str): + valid_to_hint = None + return ExtractedFact( + type=t, + text=text.strip(), + entity=entity, + importance=importance, + valid_to_hint=valid_to_hint, + raw=raw, + ) + + +async def kimi_extract( + user: str, + assistant: str, + channel: str, + ts: str, + *, + client: Optional[httpx.AsyncClient] = None, + log_path: Optional[Path] = None, +) -> List[ExtractedFact]: + """Extract durable memories from one Discord turn. + + Returns ``[]`` (no API call) when ``channel`` is PHI-blacklisted, when + both ``user`` and ``assistant`` are empty, or when Kimi returns + malformed JSON. Otherwise raises ``ExtractError`` on transport + failure or non-2xx response — caller (W3-2) is responsible for + fallback bookkeeping (failure JSONL log). + """ + if channel in PHI_BLACKLIST_CHANNELS: + return [] + if not (user or "").strip() and not (assistant or "").strip(): + return [] + + api_key = _read_synthetic_api_key() + prompt = EXTRACT_PROMPT.format(ts=ts, channel=channel, user=user, assistant=assistant) + + payload = { + "model": EXTRACT_MODEL, + "messages": [{"role": "user", "content": prompt}], + "temperature": EXTRACT_TEMPERATURE, + "max_tokens": EXTRACT_MAX_TOKENS, + "response_format": {"type": "json_object"}, + } + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + + owns_client = client is None + client = client or httpx.AsyncClient() + t0 = time.perf_counter() + try: + try: + r = await client.post( + SYNTHETIC_URL, headers=headers, json=payload, timeout=EXTRACT_TIMEOUT + ) + except httpx.RequestError as exc: + raise ExtractError(f"synthetic.new network error: {exc}") from exc + if r.status_code >= 400: + raise ExtractError(f"synthetic.new {r.status_code}: {r.text[:200]}") + body = r.json() + finally: + if owns_client: + await client.aclose() + elapsed_ms = (time.perf_counter() - t0) * 1000.0 + + choice = (body.get("choices") or [{}])[0] + content = (choice.get("message") or {}).get("content", "") + usage = body.get("usage") or {} + + parsed = _parse_json_list(content) + facts = [f for f in (_coerce_fact(item) for item in parsed) if f is not None] + + _append_log( + { + "ts": time.strftime("%Y-%m-%dT%H:%M:%S"), + "cmd": "kimi_extract", + "channel": channel, + "ms": round(elapsed_ms, 2), + "n_raw": len(parsed), + "n_kept": len(facts), + "tokens_in": usage.get("prompt_tokens"), + "tokens_out": usage.get("completion_tokens"), + }, + log_path=log_path, + ) + return facts + + +def _parse_json_list(content: str) -> list: + """Tolerantly extract a JSON list from Kimi's ``content`` field. + + The prompt asks for a JSON list, but Kimi may wrap it in an object + (when response_format=json_object) like ``{"facts": [...]}`` or + return ``{}`` for empty. We accept any of: + - bare ``[...]`` + - ``{"facts": [...]}`` / ``{"items": [...]}`` / ``{"results": [...]}`` + - ``{}`` (treated as empty list) + """ + if not content: + return [] + try: + data = json.loads(content) + except json.JSONDecodeError: + return [] + if isinstance(data, list): + return data + if isinstance(data, dict): + # Kimi K2.5 with response_format=json_object often wraps the + # answer in a dict like {"analysis": ..., "extracted_memories": [...]}. + # Try the canonical key names first, then fall back to the first list-valued field. + for key in ("facts", "items", "results", "memories", "extracted_memories", "data"): + v = data.get(key) + if isinstance(v, list): + return v + # Last-ditch fallback: any top-level list value wins. + for v in data.values(): + if isinstance(v, list): + return v + # Kimi sometimes returns a single fact as a flat dict (no list wrapper). + # Detect by the presence of the canonical fact keys. + if "type" in data and "text" in data: + return [data] + return [] + return [] diff --git a/plugins/memory/sqlite_vec/plugin.yaml b/plugins/memory/sqlite_vec/plugin.yaml new file mode 100644 index 00000000000..4e3b24133c4 --- /dev/null +++ b/plugins/memory/sqlite_vec/plugin.yaml @@ -0,0 +1,7 @@ +name: sqlite_vec +version: 0.1.0 +description: "Hermes V3 long-term memory — local sqlite-vec store with hot episodes / cold curated semantic_facts, weekly human-approved promotion." +pip_dependencies: + - sqlite-vec>=0.1.6 +hooks: + - on_pre_compress diff --git a/plugins/memory/sqlite_vec/promotion.py b/plugins/memory/sqlite_vec/promotion.py new file mode 100644 index 00000000000..6a4404b752e --- /dev/null +++ b/plugins/memory/sqlite_vec/promotion.py @@ -0,0 +1,862 @@ +"""Weekly promotion + apply core logic (W3-3). + +Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §5.3 + §5.4. + +Two entry points, both invoked from cron-driven thin wrappers in +``~/.hermes/scripts/`` (so they sit inside HERMES_HOME/scripts, the only +location the hermes scheduler will exec): + + weekly_promotion() - reads 7 days of pending episodes, runs one + Kimi-thinking call to produce a promotion diff, + saves it to pending_diffs/.json, + renders + posts the digest to #memory-review. + Does NOT stamp episodes.promoted_at. + + weekly_apply() - purges pending_diffs older than 14 days, loads + the latest, checks for the rejection sentinel + file, and either archives-as-rejected or + applies the diff atomically (promote / dedup / + expire) and stamps episodes.promoted_at. + +The split lets the user reject Sunday's diff with /memreview reject + any time before Monday's apply fires. +""" + +from __future__ import annotations + +import asyncio +import json +import logging +import os +import sqlite3 +import struct +import time +from dataclasses import dataclass, field +from datetime import date, datetime, timedelta +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +import httpx + +from .embed import voyage_embed +from .extract import ( + EXTRACT_TIMEOUT, + PHI_BLACKLIST_CHANNELS, + SYNTHETIC_URL, + _read_synthetic_api_key, +) + +logger = logging.getLogger(__name__) + +PROMOTION_MODEL = "hf:moonshotai/Kimi-K2-Thinking" +PROMOTION_FALLBACK_MODEL = "hf:moonshotai/Kimi-K2.5" +PROMOTION_TEMPERATURE = 0.2 +PROMOTION_MAX_TOKENS = 8192 # diff JSON can be substantial across 7 days +PROMOTION_TIMEOUT = 120.0 # thinking-mode + 100+ episodes + +PROMOTION_NEIGHBOR_K = 20 # spec §5.3: per-candidate vec_search k=20 +PROMOTION_LOOKBACK_DAYS = 7 +PENDING_DIFF_TTL_DAYS = 14 + +DISCORD_API = "https://discord.com/api/v10/channels/{channel_id}/messages" + + +# --------------------------------------------------------------------------- +# Prompt — designed to match spec §5.3 schema verbatim +# --------------------------------------------------------------------------- + +PROMOTION_PROMPT = """You are running the weekly memory promotion review for 禮揚's personal AI. + +Below is one week of conversation episodes that have not yet been reviewed. +Each candidate carries any 'stashed_facts' that the per-turn extractor +recorded in its metadata. You also see, per candidate, the top-20 existing +semantic_facts that are nearest by embedding distance — use these to decide +whether a candidate fact duplicates something already known. + +HARD RULES — these override everything else: +1. NEVER promote: hospital data, patient names, 病歷號, 身分證字號, lab results, + diagnoses about real people, hospital policy specifics, hospital colleague names. +2. Pleasantries (好的/收到/早安/明白/thanks) → drop_as_noise. +3. Synthetic episodes (synthetic=true) — promote ONLY if they contain a NEW + commitment by 禮揚 (a meeting scheduled, a habit declared, a decision made). +4. If a candidate stashed_fact is semantically captured by an existing fact + (sim ≥ 0.92), prefer dedup_hits over creating a new row. +5. Conservative importance: most facts are 2; only use 4-5 for permanent + identity / family / strong commitments. + +For each candidate, decide one of four actions: + + A. PROMOTE — new fact worth keeping. Emit into "promote". + valid_to: ISO date or null (null = permanent). + importance: 1-5 (default 2). + source_episode_ids: which candidate episodes contributed. + + B. DEDUP_HIT — candidate fact reaffirms an existing fact. Emit into + "dedup_hits" with the existing fact id and action="bump_hits" + (just touch the timestamp) or "refine_text" (mild rephrasing + worth applying). + + C. EXPIRE — an existing fact is contradicted or has gone stale. + Emit into "expire" with existing_fact_id, valid_to=today, reason. + + D. DROP_AS_NOISE — pleasantry, low signal, or duplicates within the + week. Emit into "drop_as_noise" with the episode ids and reason. + +Every candidate episode_id must appear under exactly one action above +(in promote.source_episode_ids OR dedup_hits.source_episode_ids OR +drop_as_noise.episode_ids). The "expire" section can reference NEW +existing_fact_ids that are independent of this week's candidates — +that's fine. + +Output ONE JSON object with this exact schema: + +{{ + "digest_id": "{digest_id}", + "candidate_episode_ids": [], + "promote": [ + {{ + "entity": "禮揚.", + "fact": "single-sentence statement", + "importance": 1..5, + "valid_from": "{today}", + "valid_to": "YYYY-MM-DD" | null, + "source_episode_ids": [int, ...] + }} + ], + "dedup_hits": [ + {{ + "existing_fact_id": int, + "action": "bump_hits" | "refine_text", + "refined_text": "string only if action=refine_text", + "source_episode_ids": [int, ...] + }} + ], + "expire": [ + {{ + "existing_fact_id": int, + "valid_to": "{today}", + "reason": "short reason" + }} + ], + "drop_as_noise": [ + {{ + "episode_ids": [int, ...], + "reason": "short reason" + }} + ] +}} + +CANDIDATES (week of {week_label}): +{candidates_block} + +NEAREST-NEIGHBOR EXISTING FACTS (one block per candidate stashed_fact): +{neighbors_block} +""" + + +# --------------------------------------------------------------------------- +# Data classes +# --------------------------------------------------------------------------- + + +@dataclass +class WeekDigest: + """Loaded form of pending_diffs/.json.""" + + digest_id: str + candidate_episode_ids: List[int] + promote: List[Dict[str, Any]] = field(default_factory=list) + dedup_hits: List[Dict[str, Any]] = field(default_factory=list) + expire: List[Dict[str, Any]] = field(default_factory=list) + drop_as_noise: List[Dict[str, Any]] = field(default_factory=list) + raw: Dict[str, Any] = field(default_factory=dict) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "WeekDigest": + return cls( + digest_id=data.get("digest_id", ""), + candidate_episode_ids=list(data.get("candidate_episode_ids") or []), + promote=list(data.get("promote") or []), + dedup_hits=list(data.get("dedup_hits") or []), + expire=list(data.get("expire") or []), + drop_as_noise=list(data.get("drop_as_noise") or []), + raw=data, + ) + + def to_dict(self) -> Dict[str, Any]: + return { + "digest_id": self.digest_id, + "candidate_episode_ids": self.candidate_episode_ids, + "promote": self.promote, + "dedup_hits": self.dedup_hits, + "expire": self.expire, + "drop_as_noise": self.drop_as_noise, + } + + +# --------------------------------------------------------------------------- +# Path helpers +# --------------------------------------------------------------------------- + + +def _resolve_hermes_home() -> Path: + try: + from hermes_constants import get_hermes_home + return Path(get_hermes_home()) + except Exception: + return Path.home() / ".hermes" + + +def pending_dir() -> Path: + p = _resolve_hermes_home() / "memories" / "pending_diffs" + p.mkdir(parents=True, exist_ok=True) + return p + + +def archive_dir() -> Path: + p = _resolve_hermes_home() / "memories" / "diff_archive" + p.mkdir(parents=True, exist_ok=True) + return p + + +def memory_log_path() -> Path: + return _resolve_hermes_home() / "logs" / "memory.log" + + +def db_path() -> Path: + return _resolve_hermes_home() / "memories" / "memory.db" + + +def digest_id_for(today: Optional[date] = None) -> str: + """ISO date based digest id: wk-YYYY-MM-DD.""" + today = today or date.today() + return f"wk-{today.isoformat()}" + + +def rejection_sentinel(digest_id: str) -> Path: + return pending_dir() / f"{digest_id}.rejected" + + +def pending_path(digest_id: str) -> Path: + return pending_dir() / f"{digest_id}.json" + + +# --------------------------------------------------------------------------- +# Shared logging +# --------------------------------------------------------------------------- + + +def _log_event(payload: Dict[str, Any]) -> None: + p = memory_log_path() + try: + p.parent.mkdir(parents=True, exist_ok=True) + with p.open("a", encoding="utf-8") as f: + f.write(json.dumps(payload, ensure_ascii=False, default=str) + "\n") + except OSError as exc: + logger.warning("memory.log write failed: %s", exc) + + +# --------------------------------------------------------------------------- +# Promotion: candidate gathering + neighbor search +# --------------------------------------------------------------------------- + + +def _read_pending_episodes(conn: sqlite3.Connection, days: int = PROMOTION_LOOKBACK_DAYS) -> List[Dict[str, Any]]: + rows = conn.execute( + """ + SELECT id, ts, channel, role, text, metadata, synthetic + FROM episodes + WHERE promoted_at IS NULL + AND ts > datetime('now', ?) + ORDER BY ts + """, + (f"-{days} days",), + ).fetchall() + out: List[Dict[str, Any]] = [] + for r in rows: + meta = {} + if r["metadata"]: + try: + meta = json.loads(r["metadata"]) + except json.JSONDecodeError: + meta = {} + out.append({ + "id": r["id"], + "ts": r["ts"], + "channel": r["channel"], + "role": r["role"], + "text": r["text"], + "synthetic": bool(r["synthetic"]), + "stashed_facts": meta.get("stashed_facts") or [], + }) + return out + + +async def _vec_search(conn: sqlite3.Connection, query: str, k: int = PROMOTION_NEIGHBOR_K) -> List[Dict[str, Any]]: + """Find k nearest existing semantic_facts to ``query`` text. + + Returns rows like {id, fact, entity, importance, sim}. + """ + [qvec] = await voyage_embed([query]) + rows = conn.execute( + """ + WITH knn AS ( + SELECT fact_id, distance + FROM vec_facts + WHERE embedding MATCH vec_int8(?) AND k = ? + ) + SELECT sf.id, sf.fact, sf.entity, sf.importance, + (1 - knn.distance) AS sim + FROM knn + JOIN semantic_facts sf ON sf.id = knn.fact_id + WHERE sf.state = 'active' + AND (sf.valid_to IS NULL OR sf.valid_to > date('now')) + ORDER BY sim DESC + """, + (qvec, k), + ).fetchall() + return [dict(r) for r in rows] + + +def _format_candidates_block(candidates: List[Dict[str, Any]]) -> str: + """Render candidate episodes as a compact block for the prompt.""" + lines = [] + for c in candidates: + marker = "🤖" if c["synthetic"] else "👤" + text = c["text"].replace("\n", " ") + if len(text) > 200: + text = text[:200] + "..." + line = f"#{c['id']} [{c['ts']}] {marker} {c['channel']}/{c['role']}: {text}" + lines.append(line) + for sf in c["stashed_facts"]: + sf_text = sf.get("text", "") + sf_entity = sf.get("entity") or "?" + sf_vth = sf.get("valid_to_hint") or "permanent" + lines.append( + f" ↳ stashed: [{sf_entity}] {sf_text[:120]} " + f"(importance={sf.get('importance', 2)}, valid_to_hint={sf_vth})" + ) + return "\n".join(lines) if lines else "(no candidates)" + + +def _format_neighbors_block(neighbors_by_fact: Dict[str, List[Dict[str, Any]]]) -> str: + """One section per candidate stashed_fact, listing its k nearest existing facts.""" + if not neighbors_by_fact: + return "(no candidate stashed_facts to compare against)" + sections = [] + for stashed_text, rows in neighbors_by_fact.items(): + header = f"--- nearest to: {stashed_text[:120]} ---" + body_lines = [ + f" #{r['id']} sim={r['sim']:.3f} [{r['entity'] or '—'}] {r['fact'][:120]}" + for r in rows[:5] # top 5 per stashed fact keeps prompt short + ] + sections.append(header + "\n" + "\n".join(body_lines)) + return "\n\n".join(sections) + + +# --------------------------------------------------------------------------- +# Kimi thinking call +# --------------------------------------------------------------------------- + + +class PromotionError(RuntimeError): + pass + + +async def _call_kimi_thinking(prompt: str, *, client: Optional[httpx.AsyncClient] = None) -> Dict[str, Any]: + """Single Kimi call producing the promotion diff JSON object. + + Tries Kimi-K2-Thinking first; falls back to Kimi-K2.5 on 4xx model-not-found. + """ + api_key = _read_synthetic_api_key() + headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} + + payload = { + "model": PROMOTION_MODEL, + "messages": [{"role": "user", "content": prompt}], + "temperature": PROMOTION_TEMPERATURE, + "max_tokens": PROMOTION_MAX_TOKENS, + "response_format": {"type": "json_object"}, + } + + owns = client is None + client = client or httpx.AsyncClient() + try: + try: + r = await client.post(SYNTHETIC_URL, headers=headers, json=payload, timeout=PROMOTION_TIMEOUT) + except httpx.RequestError as exc: + raise PromotionError(f"synthetic.new network: {exc}") from exc + if r.status_code == 404 or (r.status_code == 400 and "model" in r.text.lower()): + logger.warning("Kimi-Thinking unavailable; falling back to %s", PROMOTION_FALLBACK_MODEL) + payload["model"] = PROMOTION_FALLBACK_MODEL + r = await client.post(SYNTHETIC_URL, headers=headers, json=payload, timeout=PROMOTION_TIMEOUT) + if r.status_code >= 400: + raise PromotionError(f"synthetic.new {r.status_code}: {r.text[:300]}") + body = r.json() + finally: + if owns: + await client.aclose() + + content = ((body.get("choices") or [{}])[0].get("message") or {}).get("content", "") + try: + diff = json.loads(content) + except json.JSONDecodeError as exc: + raise PromotionError(f"Kimi returned non-JSON: {exc}: {content[:200]}") from exc + if not isinstance(diff, dict): + raise PromotionError(f"Kimi returned non-object: {type(diff).__name__}") + + usage = body.get("usage") or {} + _log_event({ + "ts": time.strftime("%Y-%m-%dT%H:%M:%S"), + "cmd": "weekly_promotion_kimi", + "model": payload["model"], + "tokens_in": usage.get("prompt_tokens"), + "tokens_out": usage.get("completion_tokens"), + }) + return diff + + +# --------------------------------------------------------------------------- +# Digest rendering (spec §5.4) +# --------------------------------------------------------------------------- + + +def render_digest_markdown(diff: WeekDigest, candidates: List[Dict[str, Any]]) -> str: + n_user = sum(1 for c in candidates if not c["synthetic"]) + n_synth = sum(1 for c in candidates if c["synthetic"]) + header = ( + f"# 📚 Weekly Memory Review — {diff.digest_id.removeprefix('wk-')}\n" + f"{len(candidates)} episodes scanned this week " + f"({n_user} user/assistant + {n_synth} cron-synthetic).\n" + f"24 h to reject via `/memreview reject {diff.digest_id}`; default approve.\n" + ) + + sections = [] + + if diff.promote: + lines = [f"## ⬆️ Promote to permanent ({len(diff.promote)})"] + for p in diff.promote: + entity = p.get("entity", "?") + fact = p.get("fact", "") + importance = p.get("importance", 2) + valid_to = p.get("valid_to") or "永久" + srcs = p.get("source_episode_ids") or [] + src_str = ( + ", ".join(f"#{i}" for i in srcs[:5]) + + (f" +{len(srcs)-5}" if len(srcs) > 5 else "") + ) + lines.append(f"- 🆕 **{entity}**: \"{fact}\"") + lines.append(f" evidence: {src_str} | importance {importance} | valid_to: {valid_to}") + sections.append("\n".join(lines)) + + if diff.dedup_hits: + lines = [f"## 🔁 Dedup confirmations ({len(diff.dedup_hits)})"] + for d in diff.dedup_hits: + srcs = d.get("source_episode_ids") or [] + action = d.get("action", "bump_hits") + lines.append( + f"- existing #{d.get('existing_fact_id')} ← {len(srcs)} reaffirmation(s), action={action}" + ) + if action == "refine_text" and d.get("refined_text"): + lines.append(f" refined → \"{d['refined_text']}\"") + sections.append("\n".join(lines)) + + if diff.expire: + lines = [f"## 🪦 Expiring ({len(diff.expire)})"] + for e in diff.expire: + lines.append( + f"- existing #{e.get('existing_fact_id')} → valid_to={e.get('valid_to')} " + f"({e.get('reason', '—')})" + ) + sections.append("\n".join(lines)) + + if diff.drop_as_noise: + lines = [f"## 🗑️ Skipped as noise ({len(diff.drop_as_noise)})"] + for n in diff.drop_as_noise: + ids = n.get("episode_ids") or [] + lines.append(f"- {len(ids)} episode(s): {n.get('reason', '—')}") + sections.append("\n".join(lines)) + + if not sections: + sections.append("_No actions this week._") + + return header + "\n" + "\n\n".join(sections) + + +# --------------------------------------------------------------------------- +# Discord posting +# --------------------------------------------------------------------------- + + +def discord_post(content: str, channel_id: str, *, bot_token: Optional[str] = None) -> bool: + """POST a message to a Discord channel. Returns True on success.""" + bot_token = bot_token or os.environ.get("DISCORD_BOT_TOKEN") + if not bot_token or not channel_id: + logger.warning("discord_post missing bot_token or channel_id") + return False + # Discord rejects messages over 2000 chars; chunk if needed. + chunks: List[str] = [] + remaining = content + while remaining: + if len(remaining) <= 1990: + chunks.append(remaining) + break + # Split on the last newline before 1990 chars to avoid mid-line breaks. + cut = remaining.rfind("\n", 0, 1990) + if cut <= 0: + cut = 1990 + chunks.append(remaining[:cut]) + remaining = remaining[cut:].lstrip("\n") + + headers = { + "Authorization": f"Bot {bot_token}", + "Content-Type": "application/json", + } + url = DISCORD_API.format(channel_id=channel_id) + ok = True + with httpx.Client(timeout=20.0) as c: + for chunk in chunks: + r = c.post(url, headers=headers, json={"content": chunk}) + if r.status_code >= 400: + logger.warning("discord_post failed: %s %s", r.status_code, r.text[:200]) + ok = False + break + return ok + + +def memory_review_channel_id() -> Optional[str]: + """Resolve the Discord #memory-review channel id. + + Priority: + 1. MEMORY_REVIEW_CHANNEL_ID env var (test override) + 2. ~/.hermes/channel_directory.json -> platforms.discord (list) + -> first entry whose name == "memory-review" + 3. Legacy flat layouts (defensive — older installs) + """ + env = os.environ.get("MEMORY_REVIEW_CHANNEL_ID") + if env: + return env + cdir = _resolve_hermes_home() / "channel_directory.json" + if not cdir.exists(): + return None + try: + data = json.loads(cdir.read_text(encoding="utf-8")) + except json.JSONDecodeError: + return None + + # Canonical layout: platforms.discord is a list of channel dicts. + plats = (data.get("platforms") or {}) + discord_chans = plats.get("discord") + if isinstance(discord_chans, list): + for c in discord_chans: + if isinstance(c, dict) and c.get("name") == "memory-review": + return c.get("id") + + # Defensive fallbacks for older / hand-edited layouts. + if isinstance(data.get("memory-review"), str): + return data["memory-review"] + chans = data.get("channels") or {} + m = chans.get("memory-review") if isinstance(chans, dict) else None + if isinstance(m, str): + return m + if isinstance(m, dict): + return m.get("id") or m.get("channel_id") + return None + + +# --------------------------------------------------------------------------- +# Main entry points +# --------------------------------------------------------------------------- + + +async def weekly_promotion( + conn: sqlite3.Connection, + *, + today: Optional[date] = None, + dry_run: bool = False, + discord_channel_id: Optional[str] = None, + kimi_fn=None, # injectable for tests + embed_fn=None, +) -> Dict[str, Any]: + """Run one weekly promotion cycle. Returns a summary dict.""" + today = today or date.today() + digest_id = digest_id_for(today) + + # /mem off kill switch — skip the entire weekly cycle. + try: + from plugins.memreview import mem_off_active + if mem_off_active(): + return { + "digest_id": digest_id, + "candidates": 0, + "skipped": "/mem off active", + } + except Exception: + pass + + candidates = _read_pending_episodes(conn) + if not candidates: + return {"digest_id": digest_id, "candidates": 0, "skipped": "no candidates"} + + # Build neighbor map per stashed_fact across the week. + neighbors_by_fact: Dict[str, List[Dict[str, Any]]] = {} + for c in candidates: + for sf in c["stashed_facts"]: + text = (sf or {}).get("text") or "" + if not text or text in neighbors_by_fact: + continue + try: + neighbors_by_fact[text] = await _vec_search(conn, text) + except Exception as exc: + logger.warning("vec_search failed for stashed fact: %s", exc) + neighbors_by_fact[text] = [] + + prompt = PROMOTION_PROMPT.format( + digest_id=digest_id, + today=today.isoformat(), + week_label=today.isoformat(), + candidates_block=_format_candidates_block(candidates), + neighbors_block=_format_neighbors_block(neighbors_by_fact), + ) + + kimi = kimi_fn or _call_kimi_thinking + try: + diff_dict = await kimi(prompt) + except Exception as exc: + logger.exception("Kimi promotion call failed") + return {"digest_id": digest_id, "candidates": len(candidates), "error": str(exc)} + + # Trust-but-verify: ensure digest_id matches and required keys exist. + diff_dict.setdefault("digest_id", digest_id) + diff_dict.setdefault("candidate_episode_ids", [c["id"] for c in candidates]) + for k in ("promote", "dedup_hits", "expire", "drop_as_noise"): + diff_dict.setdefault(k, []) + + digest = WeekDigest.from_dict(diff_dict) + markdown = render_digest_markdown(digest, candidates) + + summary = { + "digest_id": digest_id, + "candidates": len(candidates), + "promote": len(digest.promote), + "dedup_hits": len(digest.dedup_hits), + "expire": len(digest.expire), + "drop_as_noise": len(digest.drop_as_noise), + "dry_run": dry_run, + } + + if dry_run: + summary["markdown_preview"] = markdown + return summary + + # Persist diff before posting so a Discord outage doesn't lose the work. + pending_path(digest_id).write_text( + json.dumps(digest.to_dict(), ensure_ascii=False, indent=2), + encoding="utf-8", + ) + + posted = False + if discord_channel_id: + posted = discord_post(markdown, discord_channel_id) + summary["discord_posted"] = posted + + _log_event({ + "ts": time.strftime("%Y-%m-%dT%H:%M:%S"), + "cmd": "weekly_promotion", + "digest_id": digest_id, + "summary": summary, + }) + return summary + + +def _purge_old_pending(today: date) -> int: + """Delete pending diffs older than PENDING_DIFF_TTL_DAYS.""" + cutoff = today - timedelta(days=PENDING_DIFF_TTL_DAYS) + n = 0 + for f in pending_dir().glob("*.json"): + try: + stem = f.stem.removeprefix("wk-") + d = datetime.strptime(stem, "%Y-%m-%d").date() + except ValueError: + continue + if d < cutoff: + try: + f.unlink() + # Also remove associated rejection sentinel if any. + rs = f.with_suffix(".rejected") + if rs.exists(): + rs.unlink() + n += 1 + except OSError: + pass + return n + + +def _latest_pending_diff() -> Optional[Path]: + files = sorted(pending_dir().glob("wk-*.json")) + return files[-1] if files else None + + +def _archive_diff(diff_path: Path, status: str) -> None: + target = archive_dir() / f"{diff_path.stem}.{status}.json" + diff_path.replace(target) + + +async def _apply_diff_atomic( + conn: sqlite3.Connection, + digest: WeekDigest, + today: date, + *, + embed_fn=None, +) -> Dict[str, int]: + """Apply promote / dedup / expire in one transaction; stamp promoted_at. + + Embeddings for promoted facts are computed BEFORE the transaction + opens, so the writer lock is held only for the duration of the + SQL statements themselves (~ms). Holding it across the Voyage HTTP + round-trip would block concurrent writes from the hot path. + + Returns counts of each action performed. + """ + counts = {"promoted": 0, "dedup_bumped": 0, "dedup_refined": 0, "expired": 0, "stamped": 0} + + # Pre-embed all promote texts (outside transaction). + embed = embed_fn or voyage_embed + promote_blobs: List[Optional[bytes]] = [] + promote_texts = [p.get("fact", "") for p in digest.promote] + non_empty = [t for t in promote_texts if t] + if non_empty: + embeddings = await embed(non_empty) + # Map back to original positions (None for empty fact strings). + emb_iter = iter(embeddings) + promote_blobs = [next(emb_iter) if t else None for t in promote_texts] + else: + promote_blobs = [None] * len(promote_texts) + + try: + conn.execute("BEGIN") + + # 1. promote — INSERT new semantic_facts. Trigger sf_after_insert + # mirrors each row into vec_facts automatically. + for p, blob in zip(digest.promote, promote_blobs): + fact = p.get("fact", "") + if not fact or blob is None: + continue + conn.execute( + """ + INSERT INTO semantic_facts + (entity, fact, embedding, importance, valid_from, valid_to, + source_episode_ids) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ( + p.get("entity"), + fact, + blob, + int(p.get("importance", 2) or 2), + p.get("valid_from") or today.isoformat(), + p.get("valid_to"), + json.dumps(p.get("source_episode_ids") or []), + ), + ) + counts["promoted"] += 1 + + # 2. dedup_hits — bump the existing fact's hits + last_seen, optional refine. + for d in digest.dedup_hits: + fid = d.get("existing_fact_id") + if fid is None: + continue + if d.get("action") == "refine_text" and d.get("refined_text"): + conn.execute( + "UPDATE semantic_facts SET fact = ?, last_seen = datetime('now'), " + "hits = hits + 1 WHERE id = ?", + (d["refined_text"], fid), + ) + counts["dedup_refined"] += 1 + else: + conn.execute( + "UPDATE semantic_facts SET last_seen = datetime('now'), " + "hits = hits + 1 WHERE id = ?", + (fid,), + ) + counts["dedup_bumped"] += 1 + + # 3. expire — set valid_to (caller chose date). + for e in digest.expire: + fid = e.get("existing_fact_id") + if fid is None: + continue + conn.execute( + "UPDATE semantic_facts SET valid_to = ? WHERE id = ?", + (e.get("valid_to") or today.isoformat(), fid), + ) + counts["expired"] += 1 + + # 4. stamp promoted_at on every candidate episode. + if digest.candidate_episode_ids: + placeholders = ",".join("?" * len(digest.candidate_episode_ids)) + conn.execute( + f"UPDATE episodes SET promoted_at = date('now') WHERE id IN ({placeholders})", + digest.candidate_episode_ids, + ) + counts["stamped"] = len(digest.candidate_episode_ids) + + conn.commit() + except Exception: + conn.rollback() + raise + return counts + + +async def weekly_apply( + conn: sqlite3.Connection, + *, + today: Optional[date] = None, + embed_fn=None, +) -> Dict[str, Any]: + """Apply the latest pending diff (or archive-as-rejected). Returns summary.""" + today = today or date.today() + + purged = _purge_old_pending(today) + diff_path = _latest_pending_diff() + + if not diff_path: + return {"purged": purged, "applied": False, "reason": "no pending diff"} + + digest_id = diff_path.stem + sentinel = rejection_sentinel(digest_id) + if sentinel.exists(): + _archive_diff(diff_path, "rejected") + try: + sentinel.unlink() + except OSError: + pass + _log_event({ + "ts": time.strftime("%Y-%m-%dT%H:%M:%S"), + "cmd": "weekly_apply", + "digest_id": digest_id, + "result": "rejected", + }) + return {"purged": purged, "applied": False, "digest_id": digest_id, "reason": "rejected"} + + try: + diff_dict = json.loads(diff_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as exc: + return {"purged": purged, "applied": False, "error": f"diff load: {exc}"} + + digest = WeekDigest.from_dict(diff_dict) + counts = await _apply_diff_atomic(conn, digest, today, embed_fn=embed_fn) + _archive_diff(diff_path, "applied") + + summary = { + "purged": purged, + "applied": True, + "digest_id": digest_id, + **counts, + } + _log_event({ + "ts": time.strftime("%Y-%m-%dT%H:%M:%S"), + "cmd": "weekly_apply", + **summary, + }) + return summary diff --git a/plugins/memory/sqlite_vec/read.py b/plugins/memory/sqlite_vec/read.py new file mode 100644 index 00000000000..05a7e5b66d9 --- /dev/null +++ b/plugins/memory/sqlite_vec/read.py @@ -0,0 +1,175 @@ +"""Read path for the sqlite_vec memory plugin. + +Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §4 + +Two-step retrieval: + 1. vec0 prefilter: top k=50 by cosine distance on int8 embeddings + 2. SQL CTE rerank: score = (1 - distance) * 0.7 + exp(-age_days/90) * 0.3 + filter active state + valid_to NULL or future, ORDER BY score DESC LIMIT k + +`hits` bumping happens fire-and-forget after the reply is sent (caller's +responsibility to schedule). Errors are swallowed with a warning. + +p95 query latency is logged to ~/.hermes/logs/memory.log. The log path is +overridable via the constructor for testing. +""" + +from __future__ import annotations + +import logging +import sqlite3 +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable, List, Optional + +from .embed import voyage_embed + +logger = logging.getLogger(__name__) + +DEFAULT_K = 8 +PREFILTER_K = 50 + + +def _default_log_path() -> Path: + """Resolve the memory.log path lazily so HERMES_HOME (e.g. /opt/data + inside the container) wins over the worker thread's Path.home().""" + try: + from hermes_constants import get_hermes_home + return Path(get_hermes_home()) / "logs" / "memory.log" + except Exception: + return Path.home() / ".hermes" / "logs" / "memory.log" + + +DEFAULT_LOG_PATH = _default_log_path() + +# Spec §4 — SQL is locked. Do not edit weights without updating the spec +# and re-running the B1 worked example. +RETRIEVE_SQL = """ +WITH knn AS ( + SELECT fact_id, distance + FROM vec_facts + WHERE embedding MATCH vec_int8(?) AND k = {prefilter_k} +) +SELECT sf.id, sf.fact, sf.entity, sf.created_at, sf.importance, + (1 - knn.distance) AS sim, + (julianday('now') - julianday(sf.created_at)) AS age_days, + (1 - knn.distance) * 0.7 + + exp(-(julianday('now') - julianday(sf.created_at)) / 90.0) * 0.3 AS score +FROM knn +JOIN semantic_facts sf ON sf.id = knn.fact_id +WHERE sf.state = 'active' + AND (sf.valid_to IS NULL OR sf.valid_to > date('now')) +ORDER BY score DESC +LIMIT ?; +""" + + +@dataclass +class Fact: + """A retrieved fact with score breakdown for prompt-injection or /memdebug.""" + + id: int + fact: str + entity: Optional[str] + created_at: str + importance: int + sim: float + age_days: float + score: float + + +def _append_log(log_path: Path, payload: dict) -> None: + """Append one JSON line to memory.log; never raise into the read path.""" + import json + try: + log_path.parent.mkdir(parents=True, exist_ok=True) + with log_path.open("a", encoding="utf-8") as f: + f.write(json.dumps(payload, ensure_ascii=False) + "\n") + except OSError as exc: + logger.warning("memory.log write failed: %s", exc) + + +async def read_memory( + query: str, + conn: sqlite3.Connection, + *, + k: int = DEFAULT_K, + log_path: Path = DEFAULT_LOG_PATH, +) -> List[Fact]: + """Embed `query`, retrieve top-`k` facts, log latency, return Fact list.""" + [qvec] = await voyage_embed([query]) + + sql = RETRIEVE_SQL.format(prefilter_k=PREFILTER_K) + t0 = time.perf_counter() + rows = conn.execute(sql, (qvec, k)).fetchall() + elapsed_ms = (time.perf_counter() - t0) * 1000.0 + + facts = [ + Fact( + id=row["id"], + fact=row["fact"], + entity=row["entity"], + created_at=row["created_at"], + importance=row["importance"], + sim=float(row["sim"]), + age_days=float(row["age_days"]), + score=float(row["score"]), + ) + for row in rows + ] + + _append_log( + log_path, + { + "ts": time.strftime("%Y-%m-%dT%H:%M:%S"), + "q": query, + "k": k, + "n": len(facts), + "sql_ms": round(elapsed_ms, 2), + }, + ) + return facts + + +async def bump_hits(fact_ids: Iterable[int], conn: sqlite3.Connection) -> None: + """Fire-and-forget UPDATE; swallow errors with a warning log. + + Caller must wrap with ``asyncio.create_task()`` to avoid blocking the + reply. Per spec §4 hits-bump runs AFTER discord_send, so we keep this + cheap (single UPDATE … IN (…)) and never propagate errors. + """ + ids = list(fact_ids) + if not ids: + return + placeholders = ",".join("?" * len(ids)) + try: + conn.execute( + f"UPDATE semantic_facts SET hits = hits + 1, " + f"last_seen = datetime('now') WHERE id IN ({placeholders})", + ids, + ) + conn.commit() + except sqlite3.Error as exc: + logger.warning("bump_hits swallowed error for %d ids: %s", len(ids), exc) + + +def format_facts_for_prompt(facts: List[Fact], *, with_meta: bool = False) -> str: + """Render top-k facts as a markdown bullet list for system-prompt injection. + + Used by SqliteVecMemoryProvider.prefetch() (with_meta=True per W2-3 + spec) and /memdebug (with_meta=False for compact display). + + No header — the caller owns the section title. + """ + if not facts: + return "" + lines = [] + for f in facts: + prefix = f"[{f.entity}] " if f.entity else "" + suffix = ( + f" (importance: {f.importance}, age: {int(f.age_days)} days)" + if with_meta else "" + ) + lines.append(f"- {prefix}{f.fact}{suffix}") + return "\n".join(lines) diff --git a/plugins/memory/sqlite_vec/schema.sql b/plugins/memory/sqlite_vec/schema.sql new file mode 100644 index 00000000000..5910309543b --- /dev/null +++ b/plugins/memory/sqlite_vec/schema.sql @@ -0,0 +1,41 @@ +-- Hermes V3 memory schema — episodes (hot raw) + semantic_facts (cold curated) +-- Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §3 + +PRAGMA journal_mode = WAL; +PRAGMA synchronous = NORMAL; + +-- Hot tier: raw turn-by-turn record. All Discord turns + cron synthetic land here. +CREATE TABLE IF NOT EXISTS episodes ( + id INTEGER PRIMARY KEY, + ts TEXT NOT NULL, + channel TEXT NOT NULL, + external_id TEXT NOT NULL, + role TEXT NOT NULL CHECK (role IN ('user', 'assistant')), + text TEXT NOT NULL, + synthetic INTEGER NOT NULL DEFAULT 0, + embedding BLOB, + metadata TEXT, + promoted_at TEXT, + UNIQUE(channel, external_id) +); +CREATE INDEX IF NOT EXISTS idx_episodes_ts ON episodes(ts); +CREATE INDEX IF NOT EXISTS idx_episodes_promoted_pending + ON episodes(promoted_at, ts) WHERE promoted_at IS NULL; + +-- Cold tier: curated facts. Cattia's actual working memory queries this. +CREATE TABLE IF NOT EXISTS semantic_facts ( + id INTEGER PRIMARY KEY, + entity TEXT, + fact TEXT NOT NULL, + embedding BLOB NOT NULL, + source_episode_ids TEXT, + importance INTEGER DEFAULT 2, + hits INTEGER DEFAULT 0, + created_at TEXT NOT NULL DEFAULT (datetime('now')), + last_seen TEXT, + state TEXT DEFAULT 'active' CHECK (state IN ('active', 'archived')), + valid_from TEXT NOT NULL DEFAULT (date('now')), + valid_to TEXT +); +CREATE INDEX IF NOT EXISTS idx_facts_entity ON semantic_facts(entity); +CREATE INDEX IF NOT EXISTS idx_facts_active ON semantic_facts(state, valid_to); diff --git a/plugins/memory/sqlite_vec/store.py b/plugins/memory/sqlite_vec/store.py new file mode 100644 index 00000000000..97ec4c3e061 --- /dev/null +++ b/plugins/memory/sqlite_vec/store.py @@ -0,0 +1,82 @@ +"""sqlite-vec backed memory store: schema bootstrap + connection helper. + +W1 scope: schema only. Read/write paths come in W2/W3. +""" + +from __future__ import annotations + +import logging +import sqlite3 +from pathlib import Path +from typing import Optional + +import sqlite_vec + +logger = logging.getLogger(__name__) + +VEC_DIM = 512 # voyage-3.5-lite output dimension we store + +_SCHEMA_PATH = Path(__file__).parent / "schema.sql" + +_VEC_VIRTUAL_TABLE_SQL = f""" +CREATE VIRTUAL TABLE IF NOT EXISTS vec_facts USING vec0( + fact_id INTEGER PRIMARY KEY, + embedding int8[{VEC_DIM}] distance_metric=cosine +); +""" + +# Triggers keep vec_facts in sync with semantic_facts. embedding is stored as +# raw int8 BLOB (512 bytes) on the relational side; vec0 needs vec_int8() +# wrapper to interpret it (without it, vec0 assumes float32). +_TRIGGERS_SQL = """ +CREATE TRIGGER IF NOT EXISTS sf_after_insert +AFTER INSERT ON semantic_facts +BEGIN + INSERT INTO vec_facts(fact_id, embedding) VALUES (NEW.id, vec_int8(NEW.embedding)); +END; + +CREATE TRIGGER IF NOT EXISTS sf_after_update_embedding +AFTER UPDATE OF embedding ON semantic_facts +BEGIN + -- vec0 int8 columns reject UPDATE even via vec_int8(); use DELETE+INSERT. + DELETE FROM vec_facts WHERE fact_id = NEW.id; + INSERT INTO vec_facts(fact_id, embedding) VALUES (NEW.id, vec_int8(NEW.embedding)); +END; + +CREATE TRIGGER IF NOT EXISTS sf_after_delete +AFTER DELETE ON semantic_facts +BEGIN + DELETE FROM vec_facts WHERE fact_id = OLD.id; +END; +""" + + +def open_db(db_path: Path, *, check_same_thread: bool = True) -> sqlite3.Connection: + """Open a sqlite connection with sqlite-vec extension loaded. + + Pass ``check_same_thread=False`` when the connection will be reused + across threads (e.g. the provider's prefetch worker pool). Caller is + then responsible for serializing access via a lock. + """ + db_path.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(str(db_path), check_same_thread=check_same_thread) + conn.enable_load_extension(True) + sqlite_vec.load(conn) + conn.enable_load_extension(False) + conn.row_factory = sqlite3.Row + return conn + + +def bootstrap_schema(conn: sqlite3.Connection) -> None: + """Idempotently create tables, indexes, vec0 virtual table, and triggers.""" + conn.executescript(_SCHEMA_PATH.read_text()) + conn.executescript(_VEC_VIRTUAL_TABLE_SQL) + conn.executescript(_TRIGGERS_SQL) + conn.commit() + + +def init_db(db_path: Path, *, check_same_thread: bool = True) -> sqlite3.Connection: + """Open + bootstrap. Returns a ready-to-use connection.""" + conn = open_db(db_path, check_same_thread=check_same_thread) + bootstrap_schema(conn) + return conn diff --git a/plugins/memory/sqlite_vec/write.py b/plugins/memory/sqlite_vec/write.py new file mode 100644 index 00000000000..227f2b35e1a --- /dev/null +++ b/plugins/memory/sqlite_vec/write.py @@ -0,0 +1,251 @@ +"""Per-turn write-back into the sqlite_vec memory store (W3-2). + +Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §5.1. + +Hot-path flow per Discord turn: + + 1. PHI gate — if ``channel`` is in PHI_BLACKLIST_CHANNELS, raw episode + rows still land but extraction is skipped (no PHI to the cloud LLM). + 2. Extract — kimi_extract() returns 0..N ExtractedFacts. + 3. Embed — voyage_embed([user_msg, reply, *fact_texts]) in one batch. + 4. INSERT 2 episode rows (user, assistant) with + ``ON CONFLICT(channel, external_id) DO NOTHING`` for idempotency + under Discord redelivery / cron retries / container restarts. + 5. Fast-track facts whose ``valid_to_hint`` parses to ≤ today + 30d + directly into ``semantic_facts`` (the trigger mirrors them into + ``vec_facts``). Longer-lived / undated facts are JSON-stashed in + ``episodes.metadata.stashed_facts`` for W3-3 weekly_promotion. + 6. Any exception → append a JSONL line to + ``~/.hermes/logs/memory_write_failures.jsonl`` and swallow. + The reply was already sent before this fired; we never propagate. + +The function is fire-and-forget: the caller schedules it via +``asyncio.create_task`` (or in our case, a worker thread the provider +spawns) AFTER ``discord_send`` so write latency cannot stall the user. +""" + +from __future__ import annotations + +import json +import logging +import sqlite3 +import time +from datetime import date, datetime, timedelta +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional + +from .extract import ( + PHI_BLACKLIST_CHANNELS, + ExtractedFact, + kimi_extract, +) + +logger = logging.getLogger(__name__) + +# Spec §5.3 — fast-track threshold (raised from 7d to 30d): facts that +# expire within ~1 month land directly in semantic_facts so they're +# usable on the next turn instead of waiting up to 7 days for the +# weekly review. +FAST_TRACK_DAYS = 30 + + +def _resolve_hermes_home() -> Path: + try: + from hermes_constants import get_hermes_home + return Path(get_hermes_home()) + except Exception: + return Path.home() / ".hermes" + + +def _failure_log_path() -> Path: + return _resolve_hermes_home() / "logs" / "memory_write_failures.jsonl" + + +def _append_failure(payload: Dict[str, Any], log_path: Optional[Path] = None) -> None: + log_path = log_path or _failure_log_path() + try: + log_path.parent.mkdir(parents=True, exist_ok=True) + with log_path.open("a", encoding="utf-8") as f: + f.write(json.dumps(payload, ensure_ascii=False, default=str) + "\n") + except OSError as exc: + logger.warning("memory_write_failures.jsonl write failed: %s", exc) + + +def _parse_valid_to_hint(hint: Optional[str]) -> Optional[date]: + """Parse 'YYYY-MM-DD' tolerantly. Return None on bad / missing input.""" + if not hint: + return None + try: + return datetime.strptime(hint.strip(), "%Y-%m-%d").date() + except (ValueError, TypeError): + return None + + +def _fact_should_fast_track(fact: ExtractedFact, today: date) -> bool: + """True iff fact has a valid_to_hint within FAST_TRACK_DAYS of today.""" + expiry = _parse_valid_to_hint(fact.valid_to_hint) + if not expiry: + return False + return expiry <= today + timedelta(days=FAST_TRACK_DAYS) + + +# --------------------------------------------------------------------------- +# Main entry point +# --------------------------------------------------------------------------- + + +async def write_episode( + user_msg: str, + reply: str, + channel: str, + msg_id: str, + ts: str, + conn: sqlite3.Connection, + *, + embed_fn: Optional[Callable] = None, + extract_fn: Optional[Callable] = None, + failure_log_path: Optional[Path] = None, +) -> Dict[str, Any]: + """Persist one Discord turn to the memory store. + + Returns a summary dict for caller logging: + {episodes: 0|1|2, fast_tracked: N, stashed: N, skipped_extract: bool} + + Never raises. Errors land in ``memory_write_failures.jsonl``. + """ + summary: Dict[str, Any] = { + "episodes": 0, + "fast_tracked": 0, + "stashed": 0, + "skipped_extract": False, + } + skip_extract = channel in PHI_BLACKLIST_CHANNELS + summary["skipped_extract"] = skip_extract + + try: + # ---- 1. extract (skip on PHI channel) + if skip_extract or not (extract_fn or kimi_extract): + facts: List[ExtractedFact] = [] + else: + extractor = extract_fn or kimi_extract + try: + facts = await extractor(user_msg, reply, channel, ts) + except Exception as exc: + # Extract failure is non-fatal — we still record the + # raw episode so weekly_promotion can re-extract later. + logger.warning("kimi_extract failed; continuing without facts: %s", exc) + facts = [] + + # ---- 2. embed (raw turn + each fact text in one call) + embed = embed_fn + if embed is None: + from .embed import voyage_embed + embed = voyage_embed + + texts_to_embed = [user_msg, reply] + [f.text for f in facts] + # Filter empty strings — Voyage rejects them. + non_empty = [(i, t) for i, t in enumerate(texts_to_embed) if t and t.strip()] + if non_empty: + indices, texts = zip(*non_empty) + blobs_dense = await embed(list(texts)) + # Re-densify back to original positions; missing slots get None. + blobs: List[Optional[bytes]] = [None] * len(texts_to_embed) + for slot, blob in zip(indices, blobs_dense): + blobs[slot] = blob + else: + blobs = [None] * len(texts_to_embed) + + user_blob, reply_blob = blobs[0], blobs[1] + fact_blobs = blobs[2:] + + # ---- 3. partition facts into fast-track vs stash BEFORE INSERT + today = date.today() + fast_track: List[tuple] = [] # [(fact, blob), ...] + stashed: List[Dict[str, Any]] = [] # JSON-serialisable dicts + for f, blob in zip(facts, fact_blobs): + if _fact_should_fast_track(f, today): + if blob is not None: + fast_track.append((f, blob)) + else: + # No embedding for this fact → can't insert into + # semantic_facts (embedding is NOT NULL). Demote to stash. + stashed.append(f.raw or _fact_to_dict(f)) + else: + stashed.append(f.raw or _fact_to_dict(f)) + + metadata = {"stashed_facts": stashed} if stashed else {} + metadata_json = json.dumps(metadata, ensure_ascii=False) if metadata else None + + # ---- 4. INSERT episodes (atomic with fast-track inserts) + try: + conn.execute("BEGIN") + ep_rows = [ + (ts, channel, msg_id + ":user", "user", user_msg, 0, user_blob, metadata_json), + (ts, channel, msg_id + ":asst", "assistant", reply, 0, reply_blob, metadata_json), + ] + for row in ep_rows: + cur = conn.execute( + """ + INSERT INTO episodes + (ts, channel, external_id, role, text, synthetic, embedding, metadata) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(channel, external_id) DO NOTHING + """, + row, + ) + if cur.rowcount: + summary["episodes"] += 1 + + # ---- 5. fast-track facts → semantic_facts (trigger mirrors to vec_facts) + for f, blob in fast_track: + conn.execute( + """ + INSERT INTO semantic_facts + (entity, fact, embedding, importance, valid_from, valid_to) + VALUES (?, ?, ?, ?, ?, ?) + """, + ( + f.entity, + f.text, + blob, + f.importance, + today.isoformat(), + f.valid_to_hint, + ), + ) + summary["fast_tracked"] += 1 + + summary["stashed"] = len(stashed) + conn.commit() + except Exception: + conn.rollback() + raise + + return summary + + except Exception as exc: + logger.warning("write_episode failed for msg_id=%s: %s", msg_id, exc) + _append_failure( + { + "ts": time.strftime("%Y-%m-%dT%H:%M:%S"), + "channel": channel, + "msg_id": msg_id, + "user": user_msg, + "reply": reply, + "error": str(exc), + "summary_so_far": summary, + }, + log_path=failure_log_path, + ) + return summary + + +def _fact_to_dict(f: ExtractedFact) -> Dict[str, Any]: + """Serialise an ExtractedFact for stashing in episodes.metadata.""" + return { + "type": f.type, + "text": f.text, + "entity": f.entity, + "importance": f.importance, + "valid_to_hint": f.valid_to_hint, + } diff --git a/plugins/memreview/__init__.py b/plugins/memreview/__init__.py new file mode 100644 index 00000000000..d8794fe74db --- /dev/null +++ b/plugins/memreview/__init__.py @@ -0,0 +1,227 @@ +"""``/memreview`` and ``/mem`` slash commands — admin / kill-switch (W3-4). + +Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §7.1. + +Two commands: + + /memreview reject - per-digest opt-out. Writes a sentinel + file ``pending_diffs/.rejected`` + that ``weekly_apply`` reads on Monday + morning and archives the diff without + applying. + + /mem off - global kill switch. Writes ``MEM_OFF`` + in HERMES_HOME. Both ``write_episode`` + (hot path) and ``weekly_promotion`` + (cold path) check for this file at the + top of each call and short-circuit to + a no-op + warning log. + + /mem on - reverses the kill switch by deleting + ``MEM_OFF`` (companion to /mem off). + + /mem status - prints whether the kill switch is set + and lists pending diffs awaiting apply. + +Why slash commands and not Discord reactions: spec §7.1 explicitly chose +slash because reactions don't reliably trigger webhook events across all +bot adapters (silent kill-switch failure mode that's worse than no +switch). +""" + +from __future__ import annotations + +import asyncio +import logging +import os +import re +from pathlib import Path +from typing import List, Optional + +logger = logging.getLogger(__name__) + + +def _resolve_hermes_home() -> Path: + try: + from hermes_constants import get_hermes_home + return Path(get_hermes_home()) + except Exception: + return Path.home() / ".hermes" + + +def _pending_dir() -> Path: + p = _resolve_hermes_home() / "memories" / "pending_diffs" + p.mkdir(parents=True, exist_ok=True) + return p + + +def _archive_dir() -> Path: + return _resolve_hermes_home() / "memories" / "diff_archive" + + +def mem_off_path() -> Path: + """The global kill-switch sentinel.""" + return _resolve_hermes_home() / "MEM_OFF" + + +def mem_off_active() -> bool: + """Public predicate consumed by promotion.py + provider.sync_turn.""" + return mem_off_path().exists() + + +# --------------------------------------------------------------------------- +# /memreview +# --------------------------------------------------------------------------- + + +_MEMREVIEW_HELP = ( + "**/memreview** — review or reject the weekly memory promotion digest.\n" + "Usage:\n" + " `/memreview reject ` — write the rejection sentinel; " + "Monday's apply will archive the diff without applying it.\n" + " `/memreview pending` — list digests currently awaiting apply.\n" + " `/memreview status` — same as `pending`." +) + + +_DIGEST_ID_RE = re.compile(r"^wk-\d{4}-\d{2}-\d{2}$") + + +def _list_pending_diffs() -> List[str]: + out = [] + for f in sorted(_pending_dir().glob("wk-*.json")): + rejected = f.with_suffix(".rejected").exists() + flag = " (rejected — will be archived Mon)" if rejected else "" + out.append(f"- `{f.stem}`{flag}") + return out + + +def _handle_memreview(raw_args: str) -> str: + args = (raw_args or "").strip() + if not args: + return _MEMREVIEW_HELP + + parts = args.split(maxsplit=1) + sub = parts[0].lower() + + if sub in ("pending", "status", "list"): + items = _list_pending_diffs() + if not items: + return "**/memreview** — no pending diffs." + return "**/memreview** — pending diffs:\n" + "\n".join(items) + + if sub == "reject": + rest = parts[1].strip() if len(parts) > 1 else "" + if not _DIGEST_ID_RE.match(rest): + return ( + f"**/memreview reject** — digest_id must look like " + f"`wk-YYYY-MM-DD`. Got: `{rest!r}`" + ) + diff_path = _pending_dir() / f"{rest}.json" + if not diff_path.exists(): + return ( + f"**/memreview reject** — no pending diff named `{rest}`. " + f"Use `/memreview pending` to list available digest_ids." + ) + sentinel = _pending_dir() / f"{rest}.rejected" + try: + sentinel.write_text( + f"rejected via /memreview at {asyncio.get_event_loop().time()}", + encoding="utf-8", + ) + except (OSError, RuntimeError): + # No running loop in some sync entry paths — write a static marker. + try: + sentinel.write_text("rejected", encoding="utf-8") + except OSError as exc: + return f"**/memreview reject** error: cannot write sentinel: `{exc}`" + return ( + f"**Rejected.** Pending diff `{rest}` will be archived without " + f"applying. Episodes stay pending for next Sunday's review." + ) + + return _MEMREVIEW_HELP + + +# --------------------------------------------------------------------------- +# /mem +# --------------------------------------------------------------------------- + + +_MEM_HELP = ( + "**/mem** — global memory write-back kill switch.\n" + "Usage:\n" + " `/mem off` — disable per-turn write-back AND weekly promotion.\n" + " `/mem on` — re-enable.\n" + " `/mem status` — show whether the kill switch is currently set." +) + + +def _handle_mem(raw_args: str) -> str: + args = (raw_args or "").strip().lower() + if not args: + return _MEM_HELP + + sub = args.split()[0] + + if sub == "off": + try: + mem_off_path().write_text( + "set via /mem off\n", encoding="utf-8" + ) + except OSError as exc: + return f"**/mem off** error: `{exc}`" + return ( + "**🔇 Memory write-back disabled.**\n" + "Per-turn `write_episode` and weekly promotion will short-circuit " + "until you run `/mem on`. Read path is unaffected — Cattia still " + "retrieves from existing facts." + ) + + if sub == "on": + p = mem_off_path() + if not p.exists(): + return "**/mem on** — write-back was already enabled." + try: + p.unlink() + except OSError as exc: + return f"**/mem on** error: `{exc}`" + return "**🔊 Memory write-back enabled.** Hot + cold paths resume." + + if sub == "status": + active = mem_off_active() + pending = _list_pending_diffs() + lines = [ + "**/mem status**", + f" write-back: {'🔇 OFF' if active else '🔊 ON'}", + f" MEM_OFF sentinel: `{mem_off_path()}`" + f" {'(present)' if active else '(absent)'}", + ] + if pending: + lines.append(" pending diffs:") + lines.extend(" " + p for p in pending) + else: + lines.append(" pending diffs: (none)") + return "\n".join(lines) + + return _MEM_HELP + + +# --------------------------------------------------------------------------- +# Plugin registration +# --------------------------------------------------------------------------- + + +def register(ctx) -> None: + ctx.register_command( + "memreview", + handler=_handle_memreview, + description="Review or reject the weekly Hermes memory promotion digest.", + args_hint="reject | pending | status", + ) + ctx.register_command( + "mem", + handler=_handle_mem, + description="Hermes memory kill switch (off / on / status).", + args_hint="off | on | status", + ) diff --git a/plugins/memreview/plugin.yaml b/plugins/memreview/plugin.yaml new file mode 100644 index 00000000000..66252043f4b --- /dev/null +++ b/plugins/memreview/plugin.yaml @@ -0,0 +1,4 @@ +name: memreview +version: 0.1.0 +description: "/memreview reject + /mem kill switch — admin slash commands for the Hermes V3 memory system (W3-4)." +author: "Li-yang Chen" diff --git a/scripts/cron/README.md b/scripts/cron/README.md new file mode 100644 index 00000000000..af5227bdc80 --- /dev/null +++ b/scripts/cron/README.md @@ -0,0 +1,19 @@ +# Hermes V3 cron scripts + +These scripts are invoked by hermes-agent's cron scheduler. The scheduler +hardcodes `HERMES_HOME/scripts/` as the only path it will exec from +(security: prevents arbitrary script execution via path traversal), so +runtime copies must live at `~/.hermes/scripts/.py` on each host. + +The canonical source lives here in version control. Deploy via: + + cp scripts/cron/weekly_promotion.py ~/.hermes/scripts/ + cp scripts/cron/weekly_apply.py ~/.hermes/scripts/ + +Cron entries are added by adding rows to `~/.hermes/cron/jobs.json` +(see the `Hermes Weekly Memory Promotion` / `Hermes Weekly Memory Apply` +entries; expressions are in UTC — `0 19 * * 6` = Sun 03:00 UTC+8). + +Both scripts emit `{"wakeAgent": false}` as the last stdout line so the +cron framework skips the agent run — delivery happens inside the script +via Discord HTTP POST. diff --git a/scripts/cron/weekly_apply.py b/scripts/cron/weekly_apply.py new file mode 100755 index 00000000000..14d1a18550e --- /dev/null +++ b/scripts/cron/weekly_apply.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +"""Cron entry point: Mon 03:00 UTC+8 weekly memory apply. + +Loads the latest pending diff (purges any older than 14 days first), +checks for a rejection sentinel file (written by /memreview reject), +and either archives the diff as rejected or applies its +promote / dedup / expire actions atomically and stamps +``episodes.promoted_at`` on the candidate rows. +""" + +from __future__ import annotations + +import asyncio +import json +import sys + +sys.path.insert(0, "/opt/hermes") + +try: + from hermes_cli.env_loader import load_hermes_dotenv + load_hermes_dotenv(hermes_home="/opt/data", project_env=None) +except Exception: + pass + +from plugins.memory.sqlite_vec.promotion import ( # noqa: E402 + db_path, + weekly_apply, +) +from plugins.memory.sqlite_vec.store import open_db # noqa: E402 + + +def main() -> int: + conn = open_db(db_path(), check_same_thread=False) + summary = asyncio.run(weekly_apply(conn)) + print(json.dumps(summary, ensure_ascii=False, default=str)) + print('{"wakeAgent": false}') + conn.close() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/cron/weekly_promotion.py b/scripts/cron/weekly_promotion.py new file mode 100755 index 00000000000..55d86d1aa00 --- /dev/null +++ b/scripts/cron/weekly_promotion.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +"""Cron entry point: Sun 03:00 UTC+8 weekly memory promotion. + +Reads the last 7 days of pending episodes, runs one Kimi-thinking call to +produce a promotion diff, persists the diff as +~/.hermes/memories/pending_diffs/wk-YYYY-MM-DD.json, renders the digest +markdown, and posts it to #memory-review for user review. + +Stdout ends with ``{"wakeAgent": false}`` so the cron framework skips +the agent run after we've handled delivery ourselves. +""" + +from __future__ import annotations + +import asyncio +import json +import sys +from pathlib import Path + +# The hermes container exposes the source tree at /opt/hermes but does not +# add it to sys.path; cron exec'd scripts inherit nothing. Insert it +# manually so plugin imports resolve. +sys.path.insert(0, "/opt/hermes") + +# Load the user's .env so VOYAGE_API_KEY / DISCORD_BOT_TOKEN reach the +# plugin code; mirrors what run_agent.py does at module import. +try: + from hermes_cli.env_loader import load_hermes_dotenv + load_hermes_dotenv(hermes_home="/opt/data", project_env=None) +except Exception: + pass + +from plugins.memory.sqlite_vec.promotion import ( # noqa: E402 + db_path, + memory_review_channel_id, + weekly_promotion, +) +from plugins.memory.sqlite_vec.store import open_db # noqa: E402 + + +def main() -> int: + conn = open_db(db_path(), check_same_thread=False) + channel_id = memory_review_channel_id() + summary = asyncio.run( + weekly_promotion(conn, discord_channel_id=channel_id) + ) + # Print human-readable summary to stdout for cron logs. + print(json.dumps(summary, ensure_ascii=False, default=str)) + # Wake-gate: skip the agent run. + print('{"wakeAgent": false}') + conn.close() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/cutover/cutover.sh b/scripts/cutover/cutover.sh new file mode 100755 index 00000000000..268e7848e85 --- /dev/null +++ b/scripts/cutover/cutover.sh @@ -0,0 +1,168 @@ +#!/usr/bin/env bash +# W4-1 cutover script — run on chococlaw when you've decided to retire +# MEMORY.md flat-file injection and commit fully to the sqlite_vec +# memory plugin. +# +# Spec target date: 2026-05-24, *after* observing at least one successful +# weekly review cycle on the new system. +# +# Idempotent — safe to re-run if interrupted partway. +# +# Usage: +# ./scripts/cutover/cutover.sh # dry run, prints planned actions +# ./scripts/cutover/cutover.sh --commit # actually do the work + +set -euo pipefail + +DRY_RUN=true +if [[ "${1:-}" == "--commit" ]]; then + DRY_RUN=false +fi + +today() { date -u +%Y-%m-%d; } +say() { echo "[cutover] $*"; } +do_or_say() { + if $DRY_RUN; then + say "(dry-run) $*" + else + say "$*" + eval "$@" + fi +} + +HOME_DIR="${HERMES_HOME:-$HOME/.hermes}" +say "HERMES_HOME = ${HOME_DIR}" + +# ---- 1. Pre-flight checks -------------------------------------------------- + +say "1. Pre-flight checks" +[[ -d "${HOME_DIR}/memories" ]] || { say "ERR no ${HOME_DIR}/memories"; exit 1; } +[[ -f "${HOME_DIR}/memories/memory.db" ]] || { say "ERR no memory.db — W1 hasn't shipped"; exit 1; } +say " ✓ memory.db present" + +if ! command -v docker >/dev/null; then + say "WARN docker not on PATH — DB queries below will be skipped" +fi + +# Confirm the new system has been writing recently (last 7 days). +if command -v docker >/dev/null; then + ep_recent=$(docker exec hermes /opt/hermes/.venv/bin/python3 -c " +import sqlite3 +conn = sqlite3.connect('/opt/data/memories/memory.db') +n = conn.execute(\"SELECT count(*) FROM episodes WHERE ts > datetime('now','-7 days')\").fetchone()[0] +print(n) +" 2>/dev/null || echo 0) + if [[ "${ep_recent}" -lt 5 ]]; then + say "WARN only ${ep_recent} episodes in the last 7 days. Either the gateway" + say " hasn't been used much OR the write path isn't actually firing." + say " Fix that BEFORE cutover, or the new system has nothing to retrieve." + else + say " ✓ ${ep_recent} episodes recorded in the last 7 days" + fi +fi + +# ---- 2. Archive MEMORY.md -------------------------------------------------- + +ARCHIVE_NAME="MEMORY.md.archive-$(today)" +SRC="${HOME_DIR}/memories/MEMORY.md" +DST="${HOME_DIR}/memories/${ARCHIVE_NAME}" + +say "2. Archive MEMORY.md → ${ARCHIVE_NAME}" +if [[ ! -f "${SRC}" ]]; then + say " - ${SRC} does not exist — already archived?" +else + if [[ -f "${DST}" ]]; then + say " - ${DST} already exists — refusing to overwrite" + else + do_or_say "mv '${SRC}' '${DST}'" + do_or_say "chmod 444 '${DST}'" + fi +fi + +# ---- 3. config.yaml: confirm provider=sqlite_vec --------------------------- + +say "3. Confirm config.yaml memory.provider == sqlite_vec" +cfg="${HOME_DIR}/config.yaml" +if grep -qE '^[[:space:]]*provider:[[:space:]]*sqlite_vec' "${cfg}" 2>/dev/null; then + say " ✓ already set to sqlite_vec" +else + say " - provider not set — please edit ${cfg} manually:" + say " memory:" + say " provider: sqlite_vec" +fi + +# ---- 4. Disable legacy memory crons ---------------------------------------- + +say "4. Disable legacy memory crons in jobs.json" +do_or_say "/usr/bin/env python3 - <<'PY' +import json, pathlib +p = pathlib.Path('${HOME_DIR}/cron/jobs.json') +if not p.exists(): + print(' - no jobs.json'); raise SystemExit(0) +data = json.loads(p.read_text()) +legacy_names = { + 'Dimensions Memory Consolidation', + 'Forgetting Curve (Monthly Archive)', + 'Forgetting Curve', +} +changed = 0 +for j in data.get('jobs', []): + if j['name'] in legacy_names and j.get('enabled', False): + j['enabled'] = False + j['paused_at'] = '$(date -u +%Y-%m-%dT%H:%M:%SZ)' + j['paused_reason'] = 'W4 cutover — replaced by sqlite_vec weekly_promotion' + print(f' ✓ disabled: {j[\"name\"]}') + changed += 1 +if changed: + p.write_text(json.dumps(data, indent=2, ensure_ascii=False)) +else: + print(' - no legacy jobs found (already disabled, or never installed)') +PY" + +# ---- 5. Smoke test --------------------------------------------------------- + +say "5. Smoke test: provider initializes + retrieves" +if command -v docker >/dev/null; then + do_or_say "docker exec hermes /opt/hermes/.venv/bin/python3 -c ' +from hermes_cli.env_loader import load_hermes_dotenv +load_hermes_dotenv(hermes_home=\"/opt/data\", project_env=None) +from agent.memory_manager import MemoryManager +from plugins.memory import load_memory_provider +mm = MemoryManager() +mm.add_provider(load_memory_provider(\"sqlite_vec\")) +mm.initialize_all(session_id=\"cutover-smoke\", platform=\"cli\", hermes_home=\"/opt/data\", agent_context=\"primary\") +out = mm.prefetch_all(\"我太太生日\") +print(\"prefetch returned:\", \"OK\" if out else \"EMPTY\") +mm.shutdown_all() +'" +fi + +# ---- 6. Restart gateway ---------------------------------------------------- + +say "6. Restart gateway to pick up any config changes" +if command -v docker >/dev/null && [[ -d "${HOME}/Projects/hermes-agent" ]]; then + do_or_say "(cd ${HOME}/Projects/hermes-agent && docker compose restart gateway)" +fi + +# ---- Done ------------------------------------------------------------------ + +if $DRY_RUN; then + say "" + say "DRY RUN COMPLETE — no changes made. Re-run with --commit when ready." + say "" + say "After --commit, monitor for 24 hours via memory.log + #memory-review:" + say " - tail -f ~/.hermes/logs/memory.log" + say " - watch ~/.hermes/logs/memory_write_failures.jsonl size" + say " - confirm next Sunday's digest fires" + say "" + say "Rollback procedure: docs/runbooks/memory-rollback.md §3" +else + say "" + say "CUTOVER COMPLETE." + say " Archive at: ${DST}" + say " Legacy crons disabled in: ${HOME_DIR}/cron/jobs.json" + say " Gateway restarted." + say "" + say "Monitor for 24 hours then sanity-check via:" + say " docs/runbooks/memory-monitoring.md §6 (quick health check)" +fi diff --git a/scripts/import_md.py b/scripts/import_md.py new file mode 100755 index 00000000000..86743a5d8eb --- /dev/null +++ b/scripts/import_md.py @@ -0,0 +1,307 @@ +#!/usr/bin/env python3 +"""Seed `semantic_facts` from a flat ``MEMORY.md`` (W2-2). + +Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §6.1. + +Format expected in ``~/.hermes/memories/MEMORY.md``:: + + Topic: content + § + Topic: another content + § + +Each entry becomes one row in ``semantic_facts``: + + entity = "禮揚." + slug(topic) # "Working style" -> "禮揚.working_style" + # "Tools & Access > Proton" -> "禮揚.tools_access.proton" + fact = content (verbatim) + importance = 2 + valid_from = '2026-05-10' + valid_to = NULL + +Idempotent: re-running with the same input does not duplicate rows. The +natural key is ``(entity, fact)`` and is enforced by a pre-INSERT lookup. + +Embeddings come from Voyage 3.5-lite via ``plugins.memory.sqlite_vec.embed``. +The trigger ``sf_after_insert`` keeps ``vec_facts`` synced automatically, so +this script writes only to ``semantic_facts``. + +Usage:: + + docker exec -w /opt/hermes hermes /opt/hermes/.venv/bin/python3 \ + scripts/import_md.py --dry-run + docker exec -w /opt/hermes hermes /opt/hermes/.venv/bin/python3 \ + scripts/import_md.py --commit +""" + +from __future__ import annotations + +import argparse +import asyncio +import logging +import re +import sqlite3 +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable, List, Optional, Tuple + +logger = logging.getLogger(__name__) + +DEFAULT_MEMORY_MD = Path.home() / ".hermes" / "memories" / "MEMORY.md" +DEFAULT_DB = Path("/opt/data") / "memories" / "memory.db" +DEFAULT_VALID_FROM = "2026-05-10" # spec §6.1 +DEFAULT_IMPORTANCE = 2 +DEFAULT_BATCH = 128 +ENTITY_PREFIX = "禮揚" +ENTRY_SEPARATOR = re.compile(r"^§\s*$", re.MULTILINE) + + +@dataclass +class Entry: + """One parsed MEMORY.md entry.""" + + topic: str + fact: str + + @property + def entity(self) -> str: + return f"{ENTITY_PREFIX}.{slugify_topic(self.topic)}" + + +# --------------------------------------------------------------------------- +# Parsing +# --------------------------------------------------------------------------- + + +def slugify_topic(topic: str) -> str: + """Convert a human topic label to a stable entity-suffix slug. + + - Hierarchy markers ``>`` become ``.`` so prefix queries still work. + - Lowercase, ASCII alphanum kept; runs of other chars collapse to ``_``. + - CJK / unicode is preserved unchanged so 中文 topics stay readable. + + Examples: + "Working style" -> "working_style" + "Tools & Access > ProtonMail" -> "tools_access.protonmail" + "禮揚.家庭" -> "禮揚.家庭" (already a slug, untouched) + """ + parts = [p.strip() for p in topic.split(">")] + out_parts = [] + for part in parts: + s = part.strip().lower() + # Collapse non-alphanum (including '&', spaces, punctuation) to underscore. + # CJK characters are unicode word chars in Python regex with re.UNICODE + # (default for str patterns), so [^\w] excludes them = preserved. + s = re.sub(r"[^\w]+", "_", s, flags=re.UNICODE) + s = s.strip("_") + if s: + out_parts.append(s) + return ".".join(out_parts) if out_parts else "unknown" + + +def parse_memory_md(text: str) -> List[Entry]: + """Split MEMORY.md into Entry objects. + + Skips empty blocks and blocks with no ``Topic: content`` colon. Keeps + multi-line content (rare today but possible if a future entry wraps). + """ + entries: List[Entry] = [] + for raw_block in ENTRY_SEPARATOR.split(text): + block = raw_block.strip() + if not block: + continue + if ":" not in block: + logger.warning("skipping malformed block (no colon): %r", block[:60]) + continue + topic, _, content = block.partition(":") + topic = topic.strip() + content = content.strip() + if not topic or not content: + logger.warning("skipping empty topic or content: %r", block[:60]) + continue + entries.append(Entry(topic=topic, fact=content)) + return entries + + +# --------------------------------------------------------------------------- +# DB ops +# --------------------------------------------------------------------------- + + +def existing_keys(conn: sqlite3.Connection) -> set[Tuple[str, str]]: + """Return the (entity, fact) pairs already present, for idempotency.""" + rows = conn.execute("SELECT entity, fact FROM semantic_facts").fetchall() + return {(r[0], r[1]) for r in rows} + + +def insert_batch( + conn: sqlite3.Connection, + rows: List[Tuple[Entry, bytes]], + *, + valid_from: str, + importance: int, +) -> int: + """Insert one batch of (entry, embedding) pairs. Returns count inserted.""" + cur = conn.executemany( + """ + INSERT INTO semantic_facts(entity, fact, embedding, + importance, valid_from, valid_to) + VALUES (?, ?, ?, ?, ?, NULL) + """, + [ + (e.entity, e.fact, blob, importance, valid_from) + for e, blob in rows + ], + ) + return cur.rowcount + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + +async def import_memory_md( + *, + md_path: Path, + db_path: Path, + dry_run: bool, + valid_from: str = DEFAULT_VALID_FROM, + importance: int = DEFAULT_IMPORTANCE, + batch_size: int = DEFAULT_BATCH, + embed_fn=None, # injectable for tests +) -> dict: + """Run the full import. + + Returns a summary dict: {parsed, new, skipped_dup, batches, dry_run}. + Does not return embeddings. + """ + text = md_path.read_text(encoding="utf-8") + entries = parse_memory_md(text) + + # Open DB and bootstrap if needed (idempotent — store.init_db handles that). + from plugins.memory.sqlite_vec.store import init_db + conn = init_db(db_path) + + have = existing_keys(conn) + new_entries = [e for e in entries if (e.entity, e.fact) not in have] + skipped = len(entries) - len(new_entries) + + if dry_run: + print(f"[dry-run] parsed={len(entries)} new={len(new_entries)} " + f"already_present={skipped}") + for e in new_entries[:10]: + print(f" + ({e.entity}) {e.fact[:80]!r}") + if len(new_entries) > 10: + print(f" … and {len(new_entries) - 10} more") + return { + "parsed": len(entries), + "new": len(new_entries), + "skipped_dup": skipped, + "batches": 0, + "dry_run": True, + } + + if not new_entries: + print(f"nothing to import (parsed={len(entries)}, all present)") + return { + "parsed": len(entries), + "new": 0, + "skipped_dup": skipped, + "batches": 0, + "dry_run": False, + } + + # Embed in batches; default uses real Voyage, tests inject a stub. + if embed_fn is None: + from plugins.memory.sqlite_vec.embed import voyage_embed + embed_fn = voyage_embed + + inserted = 0 + batches = 0 + try: + conn.execute("BEGIN") + for i in range(0, len(new_entries), batch_size): + chunk = new_entries[i : i + batch_size] + blobs = await embed_fn([e.fact for e in chunk]) + if len(blobs) != len(chunk): + raise RuntimeError( + f"embed returned {len(blobs)} for {len(chunk)} inputs" + ) + inserted += insert_batch( + conn, + list(zip(chunk, blobs)), + valid_from=valid_from, + importance=importance, + ) + batches += 1 + conn.commit() + except Exception: + conn.rollback() + raise + + print( + f"imported {inserted} entries in {batches} " + f"batch{'es' if batches != 1 else ''} " + f"(parsed={len(entries)}, skipped_dup={skipped})" + ) + return { + "parsed": len(entries), + "new": inserted, + "skipped_dup": skipped, + "batches": batches, + "dry_run": False, + } + + +def _build_arg_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser(description=__doc__.split("\n")[0]) + p.add_argument( + "--memory-md", + type=Path, + default=DEFAULT_MEMORY_MD, + help="Path to MEMORY.md (default: ~/.hermes/memories/MEMORY.md)", + ) + p.add_argument( + "--db", + type=Path, + default=DEFAULT_DB, + help="Path to memory.db (default: /opt/data/memories/memory.db inside container)", + ) + g = p.add_mutually_exclusive_group(required=True) + g.add_argument("--dry-run", action="store_true", help="Show plan, do not write") + g.add_argument("--commit", action="store_true", help="Actually import") + p.add_argument("--valid-from", default=DEFAULT_VALID_FROM) + p.add_argument("--importance", type=int, default=DEFAULT_IMPORTANCE) + return p + + +def main(argv: Optional[List[str]] = None) -> int: + logging.basicConfig( + level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s" + ) + args = _build_arg_parser().parse_args(argv) + + # Live import path: ensure VOYAGE_API_KEY is loaded from ~/.hermes/.env. + if args.commit: + try: + from hermes_cli.env_loader import load_hermes_dotenv + load_hermes_dotenv(hermes_home="/opt/data", project_env=None) + except ImportError: + pass # tests / non-container contexts handle env themselves + + summary = asyncio.run( + import_memory_md( + md_path=args.memory_md, + db_path=args.db, + dry_run=args.dry_run, + valid_from=args.valid_from, + importance=args.importance, + ) + ) + return 0 if summary["new"] >= 0 else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/plugins/memory/test_extract.py b/tests/plugins/memory/test_extract.py new file mode 100644 index 00000000000..45cf1d3bca1 --- /dev/null +++ b/tests/plugins/memory/test_extract.py @@ -0,0 +1,363 @@ +"""Tests for plugins/memory/sqlite_vec/extract.py (W3-1).""" + +from __future__ import annotations + +import asyncio +import json +from pathlib import Path +from unittest.mock import patch + +import httpx +import pytest + +from plugins.memory.sqlite_vec.extract import ( + EXTRACT_MODEL, + EXTRACT_PROMPT, + PHI_BLACKLIST_CHANNELS, + ExtractError, + ExtractedFact, + _coerce_fact, + _parse_json_list, + kimi_extract, +) + + +# --------------------------------------------------------------------------- +# Pure helpers +# --------------------------------------------------------------------------- + + +def test_extract_prompt_is_verbatim_spec_5_2(): + """Spec §5.2 marks EXTRACT_PROMPT as a behavioural contract — verbatim.""" + # Anchors that uniquely identify the spec's exact wording. + assert "You extract durable memories about 禮揚 from this Discord turn." in EXTRACT_PROMPT + assert "HARD RULES — these override everything else:" in EXTRACT_PROMPT + assert "ERR ON THE SIDE OF NOT EXTRACTING" in EXTRACT_PROMPT + assert "Skip facts that duplicate something said in the last 5 turns." in EXTRACT_PROMPT + # Placeholders must be preserved. + assert "{ts}" in EXTRACT_PROMPT and "{channel}" in EXTRACT_PROMPT + assert "{user}" in EXTRACT_PROMPT and "{assistant}" in EXTRACT_PROMPT + + +def test_phi_blacklist_matches_spec_5_1(): + assert PHI_BLACKLIST_CHANNELS == frozenset({"cmio", "cbme", "medicine"}) + + +def test_parse_json_list_bare_array(): + assert _parse_json_list('[{"type":"semantic","text":"a"}]') == [ + {"type": "semantic", "text": "a"} + ] + + +def test_parse_json_list_wrapped_object(): + assert _parse_json_list('{"facts": [{"type":"semantic","text":"a"}]}') == [ + {"type": "semantic", "text": "a"} + ] + assert _parse_json_list('{"items": [{"type":"semantic","text":"b"}]}') == [ + {"type": "semantic", "text": "b"} + ] + + +def test_parse_json_list_empty_object_returns_empty_list(): + assert _parse_json_list("{}") == [] + assert _parse_json_list("") == [] + assert _parse_json_list("not even json") == [] + + +def test_coerce_fact_drops_invalid_type(): + assert _coerce_fact({"type": "garbage", "text": "a"}) is None + assert _coerce_fact({"type": "semantic"}) is None # missing text + assert _coerce_fact({"type": "semantic", "text": " "}) is None # blank text + + +def test_coerce_fact_clamps_importance(): + f = _coerce_fact({"type": "semantic", "text": "a", "importance": 99}) + assert f.importance == 5 + f = _coerce_fact({"type": "semantic", "text": "a", "importance": -3}) + assert f.importance == 1 + f = _coerce_fact({"type": "semantic", "text": "a", "importance": "not-int"}) + assert f.importance == 2 # default fallback + + +def test_coerce_fact_round_trip_full_shape(): + raw = { + "type": "semantic", + "text": "致妤 7:30 才到家", + "entity": "禮揚.家庭", + "importance": 3, + "valid_to_hint": "2026-05-03", + } + f = _coerce_fact(raw) + assert isinstance(f, ExtractedFact) + assert f.text == "致妤 7:30 才到家" + assert f.entity == "禮揚.家庭" + assert f.importance == 3 + assert f.valid_to_hint == "2026-05-03" + + +# --------------------------------------------------------------------------- +# kimi_extract — short-circuits (no httpx call) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("channel", ["cmio", "cbme", "medicine"]) +def test_kimi_extract_phi_channel_returns_empty_no_call(channel, monkeypatch, tmp_path): + """Even with no API key, PHI channels never hit the network.""" + monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False) + # Point auth.json lookup at an empty tmp dir so any leak would raise. + monkeypatch.setattr( + "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path + ) + out = asyncio.run( + kimi_extract( + "病人的血壓 180/100", + "我建議轉診", + channel=channel, + ts="2026-05-02 09:00:00", + ) + ) + assert out == [] + + +def test_kimi_extract_empty_turn_returns_empty(monkeypatch, tmp_path): + monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False) + monkeypatch.setattr( + "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path + ) + out = asyncio.run( + kimi_extract("", "", channel="cattia", ts="2026-05-02 09:00:00") + ) + assert out == [] + + +# --------------------------------------------------------------------------- +# kimi_extract — mocked synthetic.new responses +# --------------------------------------------------------------------------- + + +def _mock_synthetic_response(facts: list, *, status: int = 200): + """Build a synthetic.new chat-completions JSON body wrapping `facts`.""" + body = { + "id": "test", + "choices": [ + { + "message": {"role": "assistant", "content": json.dumps(facts)}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 200, "completion_tokens": 80}, + } + return status, body + + +class _FakeTransport(httpx.MockTransport): + def __init__(self, status, body): + self.calls = [] + self._status = status + self._body = body + super().__init__(self._h) + + def _h(self, request: httpx.Request): + self.calls.append(request) + return httpx.Response(self._status, json=self._body) + + +def test_kimi_extract_pleasantry_returns_empty_after_call(monkeypatch, tmp_path): + monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key") + monkeypatch.setattr( + "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path + ) + status, body = _mock_synthetic_response([]) # Kimi correctly returns [] + transport = _FakeTransport(status, body) + client = httpx.AsyncClient(transport=transport) + + out = asyncio.run( + kimi_extract( + "好的", "收到", channel="cattia", ts="2026-05-02 09:00:00", + client=client, log_path=tmp_path / "memory.log", + ) + ) + assert out == [] + assert len(transport.calls) == 1 + log_line = (tmp_path / "memory.log").read_text().strip() + assert '"cmd": "kimi_extract"' in log_line + assert '"n_kept": 0' in log_line + + +def test_kimi_extract_short_lived_fact_with_valid_to_hint(monkeypatch, tmp_path): + monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key") + monkeypatch.setattr( + "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path + ) + facts = [ + { + "type": "semantic", + "text": "致妤今晚 (2026-05-02) 預計 7:30 才到家", + "entity": "禮揚.家庭/今晚", + "importance": 3, + "valid_to_hint": "2026-05-03", + } + ] + transport = _FakeTransport(*_mock_synthetic_response(facts)) + client = httpx.AsyncClient(transport=transport) + + out = asyncio.run( + kimi_extract( + "今晚致妤會晚回來,大概 7:30 才到", "好喔", + channel="at-home", ts="2026-05-02 09:00:00", + client=client, log_path=tmp_path / "memory.log", + ) + ) + assert len(out) == 1 + f = out[0] + assert f.type == "semantic" + assert "7:30" in f.text + assert f.valid_to_hint == "2026-05-03" + assert f.importance == 3 + + +def test_kimi_extract_long_lived_fact_no_valid_to(monkeypatch, tmp_path): + monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key") + monkeypatch.setattr( + "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path + ) + facts = [ + { + "type": "semantic", + "text": "禮揚 最近在追 sleep medicine 的 RCT", + "entity": "禮揚.研究興趣", + "importance": 2, + } + ] + transport = _FakeTransport(*_mock_synthetic_response(facts)) + client = httpx.AsyncClient(transport=transport) + + out = asyncio.run( + kimi_extract( + "最近在追 sleep medicine", "了解,要幫你 follow up 嗎", + channel="cattia", ts="2026-05-02 09:00:00", + client=client, log_path=tmp_path / "memory.log", + ) + ) + assert len(out) == 1 + assert out[0].valid_to_hint is None + assert out[0].entity == "禮揚.研究興趣" + + +def test_kimi_extract_drops_malformed_rows(monkeypatch, tmp_path): + monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key") + monkeypatch.setattr( + "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path + ) + facts = [ + {"type": "semantic", "text": "good fact"}, + {"type": "garbage", "text": "bad type"}, # dropped + {"type": "episodic"}, # missing text → dropped + {"type": "semantic", "text": " "}, # blank text → dropped + ] + transport = _FakeTransport(*_mock_synthetic_response(facts)) + client = httpx.AsyncClient(transport=transport) + + out = asyncio.run( + kimi_extract( + "u", "a", channel="cattia", ts="2026-05-02 09:00:00", + client=client, log_path=tmp_path / "memory.log", + ) + ) + assert len(out) == 1 + assert out[0].text == "good fact" + + +def test_kimi_extract_5xx_raises_extracterror(monkeypatch, tmp_path): + monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key") + monkeypatch.setattr( + "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path + ) + transport = _FakeTransport(503, {"error": "down"}) + client = httpx.AsyncClient(transport=transport) + with pytest.raises(ExtractError): + asyncio.run( + kimi_extract( + "u", "a", channel="cattia", ts="2026-05-02 09:00:00", + client=client, log_path=tmp_path / "memory.log", + ) + ) + + +def test_kimi_extract_no_api_key_raises(monkeypatch, tmp_path): + monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False) + monkeypatch.setattr( + "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path + ) # auth.json absent + with pytest.raises(ExtractError, match="API key"): + asyncio.run( + kimi_extract( + "u", "a", channel="cattia", ts="2026-05-02 09:00:00", + log_path=tmp_path / "memory.log", + ) + ) + + +def test_kimi_extract_reads_auth_json_when_no_env(monkeypatch, tmp_path): + monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False) + monkeypatch.setattr( + "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path + ) + auth = { + "credential_pool": { + "custom:synthetic": [ + {"id": "test", "access_token": "syn_test_xxx"}, + ] + } + } + (tmp_path / "auth.json").write_text(json.dumps(auth), encoding="utf-8") + transport = _FakeTransport(*_mock_synthetic_response([])) + client = httpx.AsyncClient(transport=transport) + + out = asyncio.run( + kimi_extract( + "x", "y", channel="cattia", ts="2026-05-02 09:00:00", + client=client, log_path=tmp_path / "memory.log", + ) + ) + assert out == [] + # The Authorization header carried the auth.json token. + assert transport.calls[0].headers["Authorization"] == "Bearer syn_test_xxx" + + + +# =========================================================================== +# Additional parser shapes discovered during live smoke test +# =========================================================================== + + +def test_parse_json_list_extracted_memories_key(): + """Kimi K2.5 with response_format=json_object often wraps the answer in + a dict with key 'extracted_memories' (sometimes alongside an 'analysis' + field showing its reasoning). Both must be parsed correctly.""" + payload = ( + '{"analysis": "the user mentions...", ' + '"extracted_memories": [{"type":"semantic","text":"a"}]}' + ) + out = _parse_json_list(payload) + assert out == [{"type": "semantic", "text": "a"}] + + +def test_parse_json_list_bare_single_fact_dict(): + """Kimi sometimes returns a single fact as a flat dict instead of a list. + We detect that shape by the presence of canonical fact keys.""" + payload = ( + '{"type": "episodic", "text": "致妤今晚 7:30", ' + '"entity": "禮揚.家庭", "importance": 2}' + ) + out = _parse_json_list(payload) + assert len(out) == 1 + assert out[0]["text"] == "致妤今晚 7:30" + + +def test_parse_json_list_arbitrary_dict_falls_back_to_first_list(): + """If neither canonical keys nor fact-shape is present, the first + list-valued field is returned. Defensive against future Kimi changes.""" + payload = '{"weird_unique_key": [{"type":"semantic","text":"x"}]}' + out = _parse_json_list(payload) + assert out == [{"type": "semantic", "text": "x"}] diff --git a/tests/plugins/memory/test_promotion.py b/tests/plugins/memory/test_promotion.py new file mode 100644 index 00000000000..6e452ba2132 --- /dev/null +++ b/tests/plugins/memory/test_promotion.py @@ -0,0 +1,397 @@ +"""Tests for plugins/memory/sqlite_vec/promotion.py (W3-3).""" + +from __future__ import annotations + +import asyncio +import json +import struct +from datetime import date, timedelta +from pathlib import Path +from unittest.mock import patch + +import pytest + +from plugins.memory.sqlite_vec.promotion import ( + PENDING_DIFF_TTL_DAYS, + PROMOTION_PROMPT, + WeekDigest, + _apply_diff_atomic, + _format_candidates_block, + _format_neighbors_block, + _purge_old_pending, + digest_id_for, + pending_path, + rejection_sentinel, + render_digest_markdown, + weekly_apply, + weekly_promotion, +) +from plugins.memory.sqlite_vec.store import VEC_DIM, init_db + + +def _vec(seed: int) -> bytes: + vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)] + return struct.pack(f"{VEC_DIM}b", *vals) + + +# --------------------------------------------------------------------------- +# Prompt + format helpers +# --------------------------------------------------------------------------- + + +def test_prompt_has_required_placeholders(): + """The prompt is .format()'d with these keys; missing any breaks promotion.""" + for key in ("{digest_id}", "{today}", "{week_label}", + "{candidates_block}", "{neighbors_block}"): + assert key in PROMOTION_PROMPT, f"missing placeholder: {key}" + + +def test_prompt_carries_hard_rules(): + assert "病歷號" in PROMOTION_PROMPT + assert "DROP_AS_NOISE" in PROMOTION_PROMPT + assert "PROMOTE" in PROMOTION_PROMPT + assert "DEDUP_HIT" in PROMOTION_PROMPT + assert "EXPIRE" in PROMOTION_PROMPT + + +def test_format_candidates_block_marks_synthetic(): + cands = [ + {"id": 1, "ts": "2026-05-02 09:00", "channel": "cattia", + "role": "user", "synthetic": False, "text": "hello", + "stashed_facts": [{"text": "禮揚 likes X", "entity": "禮揚.訓練", + "importance": 2, "valid_to_hint": None}]}, + {"id": 2, "ts": "2026-05-02 09:00", "channel": "cron", + "role": "assistant", "synthetic": True, "text": "cron output", + "stashed_facts": []}, + ] + out = _format_candidates_block(cands) + assert "👤" in out and "🤖" in out + assert "↳ stashed:" in out + + +def test_format_neighbors_block_truncates_to_top_5(): + neighbors = { + "topic": [ + {"id": i, "fact": f"fact {i}", "entity": "x", "sim": 0.9 - i * 0.01} + for i in range(10) + ] + } + out = _format_neighbors_block(neighbors) + # Only 5 should appear. + assert out.count("#") == 5 + + +# --------------------------------------------------------------------------- +# digest_id + path helpers +# --------------------------------------------------------------------------- + + +def test_digest_id_format(): + assert digest_id_for(date(2026, 5, 11)) == "wk-2026-05-11" + + +# --------------------------------------------------------------------------- +# WeekDigest +# --------------------------------------------------------------------------- + + +def test_week_digest_round_trip(): + raw = { + "digest_id": "wk-2026-05-10", + "candidate_episode_ids": [1, 2, 3], + "promote": [{"entity": "禮揚.家庭", "fact": "x", "importance": 3}], + "dedup_hits": [{"existing_fact_id": 5, "action": "bump_hits"}], + "expire": [{"existing_fact_id": 7, "valid_to": "2026-05-10"}], + "drop_as_noise": [{"episode_ids": [4], "reason": "pleasantry"}], + } + d = WeekDigest.from_dict(raw) + assert d.digest_id == "wk-2026-05-10" + assert d.to_dict()["candidate_episode_ids"] == [1, 2, 3] + + +# --------------------------------------------------------------------------- +# render_digest_markdown +# --------------------------------------------------------------------------- + + +def test_render_digest_markdown_full_shape(): + candidates = [ + {"id": 1, "ts": "x", "channel": "c", "role": "user", + "synthetic": False, "text": "u", "stashed_facts": []}, + {"id": 2, "ts": "x", "channel": "cron", "role": "user", + "synthetic": True, "text": "u", "stashed_facts": []}, + ] + d = WeekDigest.from_dict({ + "digest_id": "wk-2026-05-10", + "candidate_episode_ids": [1, 2], + "promote": [{"entity": "禮揚.家庭", "fact": "致妤生日 3/19", + "importance": 5, "valid_to": None, + "source_episode_ids": [1]}], + "dedup_hits": [{"existing_fact_id": 5, "action": "bump_hits", + "source_episode_ids": [2]}], + "expire": [{"existing_fact_id": 7, "valid_to": "2026-05-10", + "reason": "stale"}], + "drop_as_noise": [{"episode_ids": [3], "reason": "好的"}], + }) + md = render_digest_markdown(d, candidates) + assert "Weekly Memory Review — 2026-05-10" in md + assert "(1 user/assistant + 1 cron-synthetic)" in md + assert "/memreview reject wk-2026-05-10" in md + assert "⬆️ Promote to permanent (1)" in md + assert "🔁 Dedup confirmations (1)" in md + assert "🪦 Expiring (1)" in md + assert "🗑️ Skipped as noise (1)" in md + assert "致妤生日 3/19" in md + assert "valid_to: 永久" in md # null valid_to + + +def test_render_digest_empty_sections_collapse(): + d = WeekDigest.from_dict({"digest_id": "wk-2026-05-10", + "candidate_episode_ids": []}) + md = render_digest_markdown(d, []) + assert "_No actions this week._" in md + + +# --------------------------------------------------------------------------- +# weekly_promotion (mocked Kimi) +# --------------------------------------------------------------------------- + + +def _seed_episodes(conn, today_iso: str = "2026-05-02 12:00:00"): + """Add 2 fixture episodes with stashed_facts.""" + conn.execute( + "INSERT INTO episodes(ts, channel, external_id, role, text, synthetic, metadata) " + "VALUES (?, ?, ?, ?, ?, ?, ?)", + (today_iso, "cattia", "m1:user", "user", "我下週要去日本", 0, + json.dumps({"stashed_facts": [ + {"type": "semantic", "text": "禮揚下週去日本", "entity": "禮揚.家庭", + "importance": 3, "valid_to_hint": "2026-05-11"}]})), + ) + conn.execute( + "INSERT INTO episodes(ts, channel, external_id, role, text) " + "VALUES (?, ?, ?, ?, ?)", + (today_iso, "cattia", "m1:asst", "assistant", "好的", ), + ) + conn.commit() + + +def test_weekly_promotion_no_candidates(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path + ) + db = init_db(tmp_path / "m.db") + summary = asyncio.run(weekly_promotion(db)) + assert summary["candidates"] == 0 + assert "skipped" in summary + + +def test_weekly_promotion_dry_run_returns_markdown(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path + ) + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + db = init_db(tmp_path / "m.db") + _seed_episodes(db) + + async def fake_kimi(prompt): + # Sanity: prompt was actually formatted, not left with placeholders. + assert "{digest_id}" not in prompt + return { + "promote": [{"entity": "禮揚.家庭", "fact": "下週去日本", + "importance": 3, "valid_to": "2026-05-11", + "source_episode_ids": [1]}], + "dedup_hits": [], "expire": [], "drop_as_noise": [], + } + + async def fake_embed(texts): + return [_vec(50) for _ in texts] + + summary = asyncio.run(weekly_promotion( + db, dry_run=True, kimi_fn=fake_kimi, + embed_fn=fake_embed, + )) + assert summary["candidates"] == 2 + assert summary["promote"] == 1 + assert summary["dry_run"] is True + assert "markdown_preview" in summary + assert "下週去日本" in summary["markdown_preview"] + # Dry-run MUST NOT persist a pending diff or post to Discord. + assert not (tmp_path / "memories" / "pending_diffs").exists() or \ + not list((tmp_path / "memories" / "pending_diffs").glob("*.json")) + + +def test_weekly_promotion_persists_diff_on_real_run(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path + ) + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + db = init_db(tmp_path / "m.db") + _seed_episodes(db) + + async def fake_kimi(prompt): + return { + "promote": [], "dedup_hits": [], "expire": [], + "drop_as_noise": [{"episode_ids": [1, 2], "reason": "no signal"}], + } + + summary = asyncio.run(weekly_promotion( + db, dry_run=False, kimi_fn=fake_kimi, + )) + # Diff was written, even with no Discord channel configured. + files = list((tmp_path / "memories" / "pending_diffs").glob("*.json")) + assert len(files) == 1 + diff = json.loads(files[0].read_text()) + assert diff["candidate_episode_ids"] == [1, 2] + + +# --------------------------------------------------------------------------- +# weekly_apply +# --------------------------------------------------------------------------- + + +def test_weekly_apply_no_pending_diff(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path + ) + db = init_db(tmp_path / "m.db") + summary = asyncio.run(weekly_apply(db)) + assert summary["applied"] is False + assert "no pending diff" in summary.get("reason", "") + + +def test_weekly_apply_rejection_sentinel_archives_without_apply(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path + ) + db = init_db(tmp_path / "m.db") + + digest_id = "wk-2026-05-02" + pending_path(digest_id).write_text(json.dumps({ + "digest_id": digest_id, "candidate_episode_ids": [], + "promote": [], "dedup_hits": [], "expire": [], "drop_as_noise": [], + })) + rejection_sentinel(digest_id).write_text("rejected", encoding="utf-8") + + summary = asyncio.run(weekly_apply(db)) + assert summary["applied"] is False + assert summary["reason"] == "rejected" + # Diff moved to archive_dir, sentinel removed. + assert not pending_path(digest_id).exists() + assert not rejection_sentinel(digest_id).exists() + archive = list((tmp_path / "memories" / "diff_archive").glob("*.rejected.json")) + assert len(archive) == 1 + + +def test_weekly_apply_promotes_inserts_and_stamps(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path + ) + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + db = init_db(tmp_path / "m.db") + _seed_episodes(db) + + digest_id = "wk-2026-05-02" + pending_path(digest_id).write_text(json.dumps({ + "digest_id": digest_id, + "candidate_episode_ids": [1, 2], + "promote": [{"entity": "禮揚.家庭", "fact": "下週去日本", + "importance": 3, "valid_from": "2026-05-02", + "valid_to": "2026-05-11", "source_episode_ids": [1]}], + "dedup_hits": [], "expire": [], "drop_as_noise": [], + })) + + async def fake_embed(texts): + return [_vec(50) for _ in texts] + + summary = asyncio.run(weekly_apply(db, embed_fn=fake_embed)) + assert summary["applied"] is True + assert summary["promoted"] == 1 + assert summary["stamped"] == 2 + # New row in semantic_facts. + [(sf,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall() + assert sf == 1 + # Trigger mirrored into vec_facts. + [(vf,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall() + assert vf == 1 + # Episodes stamped. + rows = db.execute("SELECT id, promoted_at FROM episodes ORDER BY id").fetchall() + assert all(r["promoted_at"] is not None for r in rows) + # Diff moved to archive. + archive = list((tmp_path / "memories" / "diff_archive").glob("*.applied.json")) + assert len(archive) == 1 + + +def test_weekly_apply_dedup_bump_increments_hits(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path + ) + db = init_db(tmp_path / "m.db") + db.execute( + "INSERT INTO semantic_facts(fact, embedding, hits) VALUES (?, ?, ?)", + ("禮揚 likes X", _vec(10), 0), + ) + db.commit() + + digest_id = "wk-2026-05-02" + pending_path(digest_id).write_text(json.dumps({ + "digest_id": digest_id, "candidate_episode_ids": [], + "promote": [], "dedup_hits": [ + {"existing_fact_id": 1, "action": "bump_hits", + "source_episode_ids": []} + ], "expire": [], "drop_as_noise": [], + })) + + summary = asyncio.run(weekly_apply(db)) + assert summary["dedup_bumped"] == 1 + [(hits,)] = db.execute("SELECT hits FROM semantic_facts WHERE id=1").fetchall() + assert hits == 1 + + +def test_weekly_apply_expire_sets_valid_to(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path + ) + db = init_db(tmp_path / "m.db") + db.execute( + "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)", + ("禮揚 watches paper X", _vec(10)), + ) + db.commit() + + digest_id = "wk-2026-05-02" + pending_path(digest_id).write_text(json.dumps({ + "digest_id": digest_id, "candidate_episode_ids": [], + "promote": [], "dedup_hits": [], + "expire": [{"existing_fact_id": 1, "valid_to": "2026-05-02", + "reason": "stale"}], + "drop_as_noise": [], + })) + + summary = asyncio.run(weekly_apply(db, today=date(2026, 5, 2))) + assert summary["expired"] == 1 + [(vt,)] = db.execute("SELECT valid_to FROM semantic_facts WHERE id=1").fetchall() + assert vt == "2026-05-02" + + +def test_weekly_apply_purges_old_pending(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path + ) + db = init_db(tmp_path / "m.db") + + today = date(2026, 5, 2) + old = today - timedelta(days=PENDING_DIFF_TTL_DAYS + 5) + fresh = today - timedelta(days=2) + + pending_path(f"wk-{old.isoformat()}").write_text("{}") + pending_path(f"wk-{fresh.isoformat()}").write_text(json.dumps({ + "digest_id": f"wk-{fresh.isoformat()}", "candidate_episode_ids": [], + "promote": [], "dedup_hits": [], "expire": [], "drop_as_noise": [], + })) + + summary = asyncio.run(weekly_apply(db, today=today)) + assert summary["purged"] == 1 + # Old gone, fresh applied + archived. + assert not pending_path(f"wk-{old.isoformat()}").exists() + archive = list((tmp_path / "memories" / "diff_archive").glob("*.applied.json")) + assert len(archive) == 1 diff --git a/tests/plugins/memory/test_sqlite_vec_provider.py b/tests/plugins/memory/test_sqlite_vec_provider.py new file mode 100644 index 00000000000..9f8e8d438d1 --- /dev/null +++ b/tests/plugins/memory/test_sqlite_vec_provider.py @@ -0,0 +1,556 @@ +"""Tests for the sqlite_vec memory provider plugin (W1 scope: schema only). + +Covers: + • bootstrap_schema is idempotent (re-running does not error or duplicate) + • all 3 tables + 4 indexes + 1 virtual table + 3 triggers exist + • semantic_facts defaults work (created_at, valid_from, importance) + • vec0 virtual table answers MATCH queries with k=N prefilter + • triggers keep vec_facts synced with semantic_facts (insert/update/delete) + • SqliteVecMemoryProvider.is_available() / initialize() / shutdown() round-trip +""" + +from __future__ import annotations + +import struct +from pathlib import Path + +import pytest + +from plugins.memory.sqlite_vec import SqliteVecMemoryProvider +from plugins.memory.sqlite_vec.store import ( + VEC_DIM, + bootstrap_schema, + init_db, + open_db, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _vec(seed: int) -> bytes: + """Make a deterministic 512-d int8 BLOB for testing. + + int8 matches the locked decision in spec §1.4 (Voyage 3.5-lite, 512-dim, int8). + seed is the base value (clamped to int8 range) with a small per-dim offset + so different seeds produce different vectors but the same seed reproduces. + """ + vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)] + return struct.pack(f"{VEC_DIM}b", *vals) + + +# --------------------------------------------------------------------------- +# Schema bootstrap +# --------------------------------------------------------------------------- + + +def test_bootstrap_creates_all_objects(tmp_path): + db = init_db(tmp_path / "memory.db") + + table_names = { + row[0] + for row in db.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'" + ) + } + assert "episodes" in table_names + assert "semantic_facts" in table_names + + index_names = { + row[0] + for row in db.execute( + "SELECT name FROM sqlite_master WHERE type='index' AND name NOT LIKE 'sqlite_%'" + ) + } + assert "idx_episodes_ts" in index_names + assert "idx_episodes_promoted_pending" in index_names + assert "idx_facts_entity" in index_names + assert "idx_facts_active" in index_names + + trigger_names = { + row[0] for row in db.execute("SELECT name FROM sqlite_master WHERE type='trigger'") + } + assert "sf_after_insert" in trigger_names + assert "sf_after_update_embedding" in trigger_names + assert "sf_after_delete" in trigger_names + + # vec0 virtual table is registered as a regular table internally + [(vec_count,)] = db.execute( + "SELECT count(*) FROM sqlite_master WHERE name='vec_facts'" + ).fetchall() + assert vec_count >= 1 + + +def test_bootstrap_is_idempotent(tmp_path): + path = tmp_path / "memory.db" + db = init_db(path) + bootstrap_schema(db) # second time + bootstrap_schema(db) # third time + # If we got here without error and tables still query, idempotency holds. + db.execute("SELECT count(*) FROM episodes").fetchone() + db.execute("SELECT count(*) FROM semantic_facts").fetchone() + + +# --------------------------------------------------------------------------- +# Defaults +# --------------------------------------------------------------------------- + + +def test_semantic_facts_defaults_are_populated(tmp_path): + db = init_db(tmp_path / "memory.db") + db.execute( + "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)", + ("禮揚 likes Starting Strength method", _vec(10)), + ) + db.commit() + + row = db.execute( + "SELECT importance, state, valid_from, valid_to, created_at FROM semantic_facts" + ).fetchone() + assert row["importance"] == 2 + assert row["state"] == "active" + assert row["valid_from"] is not None # default = date('now') + assert row["valid_to"] is None + assert row["created_at"] is not None + + +def test_role_check_constraint(tmp_path): + db = init_db(tmp_path / "memory.db") + with pytest.raises(Exception): + db.execute( + "INSERT INTO episodes(ts, channel, external_id, role, text) " + "VALUES (datetime('now'), 'cattia', 'msg-1', 'system', 'hi')" + ) + + +# --------------------------------------------------------------------------- +# Trigger sync between semantic_facts and vec_facts +# --------------------------------------------------------------------------- + + +def test_triggers_sync_insert_update_delete(tmp_path): + db = init_db(tmp_path / "memory.db") + + # INSERT + db.execute( + "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)", + ("fact A", _vec(50)), + ) + db.commit() + [(count_after_insert,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall() + assert count_after_insert == 1 + + # UPDATE embedding + [fact_id] = db.execute("SELECT id FROM semantic_facts").fetchone() + new_vec = _vec(90) + db.execute("UPDATE semantic_facts SET embedding=? WHERE id=?", (new_vec, fact_id)) + db.commit() + [(after_update,)] = db.execute( + "SELECT count(*) FROM vec_facts WHERE fact_id=?", (fact_id,) + ).fetchall() + assert after_update == 1 + + # DELETE + db.execute("DELETE FROM semantic_facts WHERE id=?", (fact_id,)) + db.commit() + [(count_after_delete,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall() + assert count_after_delete == 0 + + +# --------------------------------------------------------------------------- +# vec0 retrieval +# --------------------------------------------------------------------------- + + +def test_vec0_match_returns_nearest(tmp_path): + db = init_db(tmp_path / "memory.db") + for seed, fact in [(10, "alpha"), (50, "beta"), (90, "gamma")]: + db.execute( + "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)", + (fact, _vec(seed)), + ) + db.commit() + + query = _vec(51) + rows = db.execute( + "SELECT fact_id, distance FROM vec_facts WHERE embedding MATCH vec_int8(?) AND k = 2", + (query,), + ).fetchall() + assert len(rows) == 2 + # Closest must be the seed=0.5 row (beta) + closest_fact_id = rows[0]["fact_id"] + closest_fact = db.execute( + "SELECT fact FROM semantic_facts WHERE id=?", (closest_fact_id,) + ).fetchone()["fact"] + assert closest_fact == "beta" + + +# --------------------------------------------------------------------------- +# MemoryProvider lifecycle +# --------------------------------------------------------------------------- + + +def test_provider_lifecycle(tmp_path): + p = SqliteVecMemoryProvider() + assert p.name == "sqlite_vec" + assert p.is_available() is True + p.initialize(session_id="t1", hermes_home=str(tmp_path)) + assert (tmp_path / "memories" / "memory.db").exists() + assert p.prefetch("test query") == "" # W1: no-op + assert p.sync_turn("hi", "hello") is None # W1: no-op + assert p.get_tool_schemas() == [] + p.shutdown() + + + +# =========================================================================== +# W2-1: voyage_embed (mocked) + read_memory + bump_hits + format_facts +# =========================================================================== + +import asyncio +import sqlite3 +from unittest.mock import patch + +import httpx +import pytest + +from plugins.memory.sqlite_vec.embed import ( + VOYAGE_BATCH, + VOYAGE_DIM, + VoyageError, + voyage_embed, +) +from plugins.memory.sqlite_vec.read import ( + Fact, + bump_hits, + format_facts_for_prompt, + read_memory, +) + + +def _fake_voyage_response(texts): + """Build a fake Voyage JSON body where each embedding is dim=512 of zeros + except the first cell which carries the input index. Lets us round-trip + the input ordering through _to_int8_blob.""" + return { + "data": [ + {"index": i, "embedding": [(i % 200) - 100] + [0] * (VOYAGE_DIM - 1)} + for i, _ in enumerate(texts) + ] + } + + +class _MockTransport(httpx.MockTransport): + """httpx mock that records call count and returns programmable responses.""" + + def __init__(self, responses): + self.calls = [] + self._responses = list(responses) + super().__init__(self._handler) + + def _handler(self, request: httpx.Request) -> httpx.Response: + self.calls.append(request) + status, body = self._responses.pop(0) + if isinstance(body, dict): + return httpx.Response(status, json=body) + return httpx.Response(status, text=body) + + +# --------------------------------------------------------------------------- +# voyage_embed +# --------------------------------------------------------------------------- + + +def test_voyage_embed_success(monkeypatch): + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + texts = ["hello", "world", "禮揚"] + transport = _MockTransport([(200, _fake_voyage_response(texts))]) + client = httpx.AsyncClient(transport=transport) + + blobs = asyncio.run(voyage_embed(texts, client=client)) + + assert len(blobs) == len(texts) + for b in blobs: + assert len(b) == VOYAGE_DIM + # First byte encodes the (signed) index value we baked into the fake response. + assert blobs[0][0] == (-100) & 0xFF # input index 0 -> -100 -> unsigned 156 + assert blobs[1][0] == (-99) & 0xFF + assert len(transport.calls) == 1 + + +def test_voyage_embed_batches_at_128(monkeypatch): + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + texts = [f"t{i}" for i in range(200)] # > VOYAGE_BATCH=128 + # 2 calls: first 128, then 72. + transport = _MockTransport( + [ + (200, _fake_voyage_response(texts[:VOYAGE_BATCH])), + (200, _fake_voyage_response(texts[VOYAGE_BATCH:])), + ] + ) + client = httpx.AsyncClient(transport=transport) + + blobs = asyncio.run(voyage_embed(texts, client=client)) + assert len(blobs) == 200 + assert len(transport.calls) == 2 + + +def test_voyage_embed_retries_on_5xx(monkeypatch): + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + texts = ["only"] + transport = _MockTransport( + [ + (502, "bad gateway"), + (503, "still bad"), + (200, _fake_voyage_response(texts)), + ] + ) + client = httpx.AsyncClient(transport=transport) + + # Patch sleep to avoid real backoff delay. + with patch("plugins.memory.sqlite_vec.embed.asyncio.sleep", return_value=None): + blobs = asyncio.run(voyage_embed(texts, client=client)) + + assert len(blobs) == 1 + assert len(transport.calls) == 3 + + +def test_voyage_embed_4xx_raises(monkeypatch): + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + transport = _MockTransport([(401, "unauthorized")]) + client = httpx.AsyncClient(transport=transport) + with pytest.raises(VoyageError): + asyncio.run(voyage_embed(["x"], client=client)) + + +def test_voyage_embed_missing_key(monkeypatch): + monkeypatch.delenv("VOYAGE_API_KEY", raising=False) + with pytest.raises(VoyageError, match="VOYAGE_API_KEY"): + asyncio.run(voyage_embed(["x"])) + + +def test_voyage_embed_empty_input_no_call(monkeypatch): + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + # No transport responses queued; if we make a call the test will explode. + transport = _MockTransport([]) + client = httpx.AsyncClient(transport=transport) + blobs = asyncio.run(voyage_embed([], client=client)) + assert blobs == [] + assert len(transport.calls) == 0 + + +# --------------------------------------------------------------------------- +# read_memory + bump_hits +# --------------------------------------------------------------------------- + + +def _seed_facts(db: sqlite3.Connection): + """Insert 3 facts at known created_at + int8 vectors that put 'beta' nearest to seed=51.""" + rows = [ + # fact text, entity, created_at, vec seed + ("alpha", "禮揚.工作", "2026-04-01 09:00:00", 10), + ("beta", "禮揚.家庭", "2026-05-02 09:00:00", 50), + ("gamma", None, "2025-12-01 09:00:00", 90), + ("expired", "禮揚.短期", "2026-05-01 09:00:00", 50), + ] + for fact, entity, created_at, seed in rows: + db.execute( + "INSERT INTO semantic_facts(fact, entity, embedding, created_at, valid_to) " + "VALUES (?, ?, ?, ?, ?)", + (fact, entity, _vec(seed), created_at, + "2026-01-01" if fact == "expired" else None), + ) + db.commit() + + +def test_read_memory_orders_by_score(tmp_path, monkeypatch): + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + db = init_db(tmp_path / "memory.db") + _seed_facts(db) + + # Stub voyage_embed to return a fixed query vector close to seed=51. + async def fake_embed(texts, **kw): + assert len(texts) == 1 + return [_vec(51)] + + log_file = tmp_path / "memory.log" + with patch("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed): + facts = asyncio.run(read_memory("test query", db, k=8, log_path=log_file)) + + fact_texts = [f.fact for f in facts] + # 'expired' must be filtered (valid_to in past). + assert "expired" not in fact_texts + # 'beta' should rank first (closest vec, recent). + assert fact_texts[0] == "beta" + # All Fact fields populated. + assert all(isinstance(f, Fact) for f in facts) + assert all(f.score is not None and f.sim is not None for f in facts) + # Latency was logged. + assert log_file.exists() + log_line = log_file.read_text().strip().splitlines()[-1] + assert '"sql_ms"' in log_line and '"q": "test query"' in log_line + + +def test_bump_hits_increments_and_swallows(tmp_path, monkeypatch): + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + db = init_db(tmp_path / "memory.db") + _seed_facts(db) + ids = [r["id"] for r in db.execute("SELECT id FROM semantic_facts ORDER BY id").fetchall()] + + asyncio.run(bump_hits(ids[:2], db)) + rows = db.execute( + "SELECT id, hits, last_seen FROM semantic_facts ORDER BY id" + ).fetchall() + assert rows[0]["hits"] == 1 and rows[1]["hits"] == 1 + assert rows[2]["hits"] == 0 # untouched + assert rows[0]["last_seen"] is not None + + # Closed connection -> bump_hits must swallow the sqlite3.Error. + db.close() + asyncio.run(bump_hits(ids[:1], db)) # should not raise + + +def test_bump_hits_empty_is_noop(tmp_path): + db = init_db(tmp_path / "memory.db") + # Should return immediately without touching the connection. + asyncio.run(bump_hits([], db)) + + +def test_format_facts_for_prompt_shape(): + facts = [ + Fact(id=1, fact="禮揚 likes 5x5", entity="禮揚.訓練", + created_at="2026-05-01", importance=2, sim=0.8, age_days=1.0, score=0.9), + Fact(id=2, fact="致妤生日 3/19", entity=None, + created_at="2026-04-01", importance=3, sim=0.7, age_days=30.0, score=0.6), + ] + out = format_facts_for_prompt(facts) + assert "[禮揚.訓練] 禮揚 likes 5x5" in out + assert "- 致妤生日 3/19" in out # no entity prefix when None + assert format_facts_for_prompt([]) == "" + + + +# =========================================================================== +# W2-3: prefetch + sync_turn wiring +# =========================================================================== + +from unittest.mock import patch as _patch_w23 + +from plugins.memory.sqlite_vec import ( + PREFETCH_TIMEOUT_S, + RECALL_HEADER, + SqliteVecMemoryProvider, + _run_coro_in_thread, +) + + +def _stubbed_provider(tmp_path, monkeypatch, query_seed: int = 51): + """Build a provider with a real DB, real conn, but stubbed Voyage.""" + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + p = SqliteVecMemoryProvider() + p.initialize(session_id="t", hermes_home=str(tmp_path)) + # Seed 3 facts via the same trigger-driven pipeline used in production. + for fact, ent, ts, seed in [ + ("alpha", "禮揚.工作", "2026-04-01 09:00:00", 10), + ("beta", "禮揚.家庭", "2026-05-02 09:00:00", 50), + ("gamma", None, "2025-12-01 09:00:00", 90), + ]: + p._conn.execute( + "INSERT INTO semantic_facts(fact, entity, embedding, created_at) VALUES (?,?,?,?)", + (fact, ent, _vec(seed), ts), + ) + p._conn.commit() + + async def fake_embed(texts, **kw): + return [_vec(query_seed) for _ in texts] + + return p, fake_embed + + +def test_prefetch_returns_markdown_with_header(tmp_path, monkeypatch): + p, fake_embed = _stubbed_provider(tmp_path, monkeypatch) + with _patch_w23("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed): + out = p.prefetch("when does my wife arrive home", session_id="s1") + assert out.startswith(RECALL_HEADER + "\n") + # Top fact 'beta' (seed=50) is closest to query (seed=51). + assert "beta" in out + # with_meta=True format includes importance + age. + assert "(importance:" in out and "days)" in out + # Fact ids cached for sync_turn to bump. + assert p._last_fact_ids["s1"] + p.shutdown() + + +def test_prefetch_empty_query_no_op(tmp_path, monkeypatch): + p, fake_embed = _stubbed_provider(tmp_path, monkeypatch) + # No patch needed — should short-circuit before voyage_embed is reached. + assert p.prefetch("", session_id="s1") == "" + assert p.prefetch(" ", session_id="s1") == "" + assert "s1" not in p._last_fact_ids + p.shutdown() + + +def test_prefetch_swallows_voyage_error(tmp_path, monkeypatch): + p, _ = _stubbed_provider(tmp_path, monkeypatch) + + async def raise_embed(texts, **kw): + raise RuntimeError("voyage 503") + + with _patch_w23("plugins.memory.sqlite_vec.read.voyage_embed", raise_embed): + out = p.prefetch("anything", session_id="s1") + assert out == "" # Reply is never blocked on memory-recall failure. + assert "s1" not in p._last_fact_ids + p.shutdown() + + +def test_sync_turn_bumps_hits_then_clears_cache(tmp_path, monkeypatch): + p, fake_embed = _stubbed_provider(tmp_path, monkeypatch) + with _patch_w23("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed): + p.prefetch("query", session_id="s1") + cached_ids = list(p._last_fact_ids["s1"]) + assert cached_ids + + p.sync_turn("user said hi", "asst replied", session_id="s1") + # Cache cleared + assert "s1" not in p._last_fact_ids + # Hits incremented for exactly the cached IDs. + placeholders = ",".join("?" * len(cached_ids)) + rows = p._conn.execute( + f"SELECT id, hits FROM semantic_facts WHERE id IN ({placeholders}) ORDER BY id", + cached_ids, + ).fetchall() + assert all(r["hits"] == 1 for r in rows), [(r["id"], r["hits"]) for r in rows] + + # Second sync_turn for same session is a no-op (cache empty). + p.sync_turn("u", "a", session_id="s1") + rows2 = p._conn.execute( + f"SELECT hits FROM semantic_facts WHERE id IN ({placeholders})", cached_ids + ).fetchall() + assert all(r["hits"] == 1 for r in rows2) + p.shutdown() + + +def test_run_coro_in_thread_timeout(): + import asyncio as _asyncio + + async def slow(): + await _asyncio.sleep(2.0) + return "ok" + + import pytest + with pytest.raises(TimeoutError): + _run_coro_in_thread(slow, timeout=0.05) + + +def test_format_with_meta_shape(): + facts = [ + Fact(id=1, fact="致妤生日 3/19", entity="禮揚.家庭", + created_at="2026-05-01", importance=3, sim=0.7, + age_days=5.4, score=0.6), + ] + out = format_facts_for_prompt(facts, with_meta=True) + assert "(importance: 3, age: 5 days)" in out + out_compact = format_facts_for_prompt(facts, with_meta=False) + assert "importance" not in out_compact diff --git a/tests/plugins/memory/test_write.py b/tests/plugins/memory/test_write.py new file mode 100644 index 00000000000..5bf2462739b --- /dev/null +++ b/tests/plugins/memory/test_write.py @@ -0,0 +1,322 @@ +"""Tests for plugins/memory/sqlite_vec/write.py (W3-2).""" + +from __future__ import annotations + +import asyncio +import json +import struct +from datetime import date, timedelta +from pathlib import Path +from unittest.mock import patch + +import pytest + +from plugins.memory.sqlite_vec.extract import ExtractedFact +from plugins.memory.sqlite_vec.store import VEC_DIM, init_db +from plugins.memory.sqlite_vec.write import ( + FAST_TRACK_DAYS, + _fact_should_fast_track, + _parse_valid_to_hint, + write_episode, +) + + +def _vec(seed: int) -> bytes: + vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)] + return struct.pack(f"{VEC_DIM}b", *vals) + + +def _stub_embed_factory(): + """Returns (stub, call_log) — stub yields deterministic int8 blobs.""" + calls = [] + + async def stub(texts): + calls.append(list(texts)) + return [_vec(10 + i) for i in range(len(texts))] + + return stub, calls + + +def _stub_extract_factory(facts: list): + async def stub(user, asst, channel, ts): + return list(facts) + + return stub + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def test_parse_valid_to_hint(): + assert _parse_valid_to_hint("2026-05-03") == date(2026, 5, 3) + assert _parse_valid_to_hint("not-a-date") is None + assert _parse_valid_to_hint("") is None + assert _parse_valid_to_hint(None) is None + + +def test_fact_should_fast_track_threshold(): + today = date(2026, 5, 2) + f_in = ExtractedFact(type="semantic", text="x", entity=None, importance=2, + valid_to_hint=(today + timedelta(days=10)).isoformat()) + f_edge = ExtractedFact(type="semantic", text="x", entity=None, importance=2, + valid_to_hint=(today + timedelta(days=FAST_TRACK_DAYS)).isoformat()) + f_out = ExtractedFact(type="semantic", text="x", entity=None, importance=2, + valid_to_hint=(today + timedelta(days=60)).isoformat()) + f_none = ExtractedFact(type="semantic", text="x", entity=None, importance=2, + valid_to_hint=None) + assert _fact_should_fast_track(f_in, today) is True + assert _fact_should_fast_track(f_edge, today) is True + assert _fact_should_fast_track(f_out, today) is False + assert _fact_should_fast_track(f_none, today) is False + + +# --------------------------------------------------------------------------- +# write_episode — happy paths +# --------------------------------------------------------------------------- + + +def _bootstrap_db(tmp_path): + return init_db(tmp_path / "m.db") + + +def test_writes_two_episode_rows_per_turn(tmp_path): + db = _bootstrap_db(tmp_path) + embed, calls = _stub_embed_factory() + extract = _stub_extract_factory([]) + + summary = asyncio.run(write_episode( + user_msg="hello", reply="hi back", + channel="cattia", msg_id="m1", ts="2026-05-02 09:00:00", + conn=db, embed_fn=embed, extract_fn=extract, + failure_log_path=tmp_path / "fail.jsonl", + )) + + assert summary["episodes"] == 2 + assert summary["fast_tracked"] == 0 and summary["stashed"] == 0 + rows = db.execute( + "SELECT role, channel, external_id, text FROM episodes ORDER BY id" + ).fetchall() + assert [r["role"] for r in rows] == ["user", "assistant"] + assert rows[0]["external_id"] == "m1:user" + assert rows[1]["external_id"] == "m1:asst" + # Single embed call covered both turn texts (no fact texts). + assert len(calls) == 1 + assert calls[0] == ["hello", "hi back"] + + +def test_phi_channel_records_episode_but_skips_extract(tmp_path): + db = _bootstrap_db(tmp_path) + embed, calls = _stub_embed_factory() + + def extract_should_not_be_called(*a, **kw): + raise AssertionError("extract called for PHI channel") + + summary = asyncio.run(write_episode( + user_msg="病人 [姓名] 血壓 180/100", reply="建議轉診", + channel="cmio", msg_id="phi-1", ts="2026-05-02 09:00:00", + conn=db, embed_fn=embed, extract_fn=extract_should_not_be_called, + failure_log_path=tmp_path / "fail.jsonl", + )) + + assert summary["skipped_extract"] is True + assert summary["episodes"] == 2 + assert summary["fast_tracked"] == 0 and summary["stashed"] == 0 + rows = db.execute("SELECT count(*) FROM episodes").fetchone() + assert rows[0] == 2 # raw episode rows still recorded + + +def test_idempotent_on_duplicate_msg_id(tmp_path): + """Re-running with the same msg_id collapses via ON CONFLICT.""" + db = _bootstrap_db(tmp_path) + embed, _ = _stub_embed_factory() + extract = _stub_extract_factory([]) + + args = dict( + user_msg="x", reply="y", channel="cattia", + msg_id="dup-1", ts="2026-05-02 09:00:00", + conn=db, embed_fn=embed, extract_fn=extract, + failure_log_path=tmp_path / "fail.jsonl", + ) + asyncio.run(write_episode(**args)) + summary2 = asyncio.run(write_episode(**args)) + assert summary2["episodes"] == 0 # nothing new inserted + [(count,)] = db.execute("SELECT count(*) FROM episodes").fetchall() + assert count == 2 + + +# --------------------------------------------------------------------------- +# Fast-track vs stash partitioning +# --------------------------------------------------------------------------- + + +def test_short_lived_fact_fast_tracks_to_semantic_facts(tmp_path): + db = _bootstrap_db(tmp_path) + embed, _ = _stub_embed_factory() + today = date.today() + extract = _stub_extract_factory([ + ExtractedFact( + type="semantic", + text="致妤今晚 7:30 才到家", + entity="禮揚.家庭", + importance=3, + valid_to_hint=(today + timedelta(days=1)).isoformat(), + ), + ]) + + summary = asyncio.run(write_episode( + user_msg="今晚致妤 7:30 才到", reply="了解", + channel="at-home", msg_id="m1", ts="2026-05-02 17:00:00", + conn=db, embed_fn=embed, extract_fn=extract, + failure_log_path=tmp_path / "fail.jsonl", + )) + + assert summary["fast_tracked"] == 1 + assert summary["stashed"] == 0 + [(sf_count,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall() + assert sf_count == 1 + [(vf_count,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall() + assert vf_count == 1 # trigger mirrored the row + row = db.execute( + "SELECT entity, fact, importance, valid_from, valid_to FROM semantic_facts" + ).fetchone() + assert row["entity"] == "禮揚.家庭" + assert row["valid_to"] == (today + timedelta(days=1)).isoformat() + + +def test_long_lived_fact_stashes_in_episode_metadata(tmp_path): + db = _bootstrap_db(tmp_path) + embed, _ = _stub_embed_factory() + extract = _stub_extract_factory([ + ExtractedFact( + type="semantic", + text="禮揚 likes Starting Strength", + entity="禮揚.訓練", + importance=2, + valid_to_hint=None, # permanent → stash + ), + ]) + + summary = asyncio.run(write_episode( + user_msg="我練 SS 一年了", reply="酷", + channel="cattia", msg_id="m1", ts="2026-05-02 09:00:00", + conn=db, embed_fn=embed, extract_fn=extract, + failure_log_path=tmp_path / "fail.jsonl", + )) + + assert summary["stashed"] == 1 + assert summary["fast_tracked"] == 0 + [(sf_count,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall() + assert sf_count == 0 # nothing fast-tracked + metadata_rows = db.execute( + "SELECT metadata FROM episodes WHERE metadata IS NOT NULL" + ).fetchall() + assert len(metadata_rows) == 2 # both user + assistant rows carry the same metadata + md = json.loads(metadata_rows[0]["metadata"]) + assert md["stashed_facts"][0]["text"] == "禮揚 likes Starting Strength" + assert md["stashed_facts"][0]["entity"] == "禮揚.訓練" + + +def test_mixed_facts_partition_correctly(tmp_path): + db = _bootstrap_db(tmp_path) + embed, _ = _stub_embed_factory() + today = date.today() + extract = _stub_extract_factory([ + ExtractedFact( + type="semantic", text="short", + entity="禮揚.短期", importance=2, + valid_to_hint=(today + timedelta(days=2)).isoformat(), + ), + ExtractedFact( + type="semantic", text="long", + entity="禮揚.長期", importance=3, + valid_to_hint=None, + ), + ]) + + summary = asyncio.run(write_episode( + user_msg="u", reply="a", channel="cattia", + msg_id="m1", ts="2026-05-02 09:00:00", + conn=db, embed_fn=embed, extract_fn=extract, + failure_log_path=tmp_path / "fail.jsonl", + )) + + assert summary["fast_tracked"] == 1 + assert summary["stashed"] == 1 + + +# --------------------------------------------------------------------------- +# Failure path +# --------------------------------------------------------------------------- + + +def test_embed_failure_appends_to_jsonl(tmp_path): + db = _bootstrap_db(tmp_path) + + async def failing_embed(texts): + raise RuntimeError("voyage exploded") + + extract = _stub_extract_factory([]) + fail_log = tmp_path / "fail.jsonl" + + summary = asyncio.run(write_episode( + user_msg="u", reply="a", channel="cattia", + msg_id="m1", ts="2026-05-02 09:00:00", + conn=db, embed_fn=failing_embed, extract_fn=extract, + failure_log_path=fail_log, + )) + + # Caller never sees the exception. + assert summary["episodes"] == 0 # rolled back + [(ep_count,)] = db.execute("SELECT count(*) FROM episodes").fetchall() + assert ep_count == 0 + # Failure record landed in the JSONL. + assert fail_log.exists() + line = json.loads(fail_log.read_text().strip().splitlines()[-1]) + assert line["channel"] == "cattia" + assert line["msg_id"] == "m1" + assert "voyage exploded" in line["error"] + + +def test_extract_failure_still_records_episode(tmp_path): + """If kimi_extract raises, we still land the raw episode rows. The + weekly_promotion (W3-3) can re-extract from the raw text later.""" + db = _bootstrap_db(tmp_path) + embed, _ = _stub_embed_factory() + + async def failing_extract(*a, **kw): + raise RuntimeError("synthetic.new 503") + + summary = asyncio.run(write_episode( + user_msg="u", reply="a", channel="cattia", + msg_id="m1", ts="2026-05-02 09:00:00", + conn=db, embed_fn=embed, extract_fn=failing_extract, + failure_log_path=tmp_path / "fail.jsonl", + )) + assert summary["episodes"] == 2 + assert summary["fast_tracked"] == 0 + assert summary["stashed"] == 0 + + +def test_empty_turn_records_no_rows(tmp_path): + """Both user_msg and reply blank → no work done, no embed call.""" + db = _bootstrap_db(tmp_path) + + embed_called = [] + + async def embed(texts): + embed_called.append(texts) + return [] + + extract = _stub_extract_factory([]) + summary = asyncio.run(write_episode( + user_msg="", reply="", channel="cattia", + msg_id="m1", ts="2026-05-02 09:00:00", + conn=db, embed_fn=embed, extract_fn=extract, + failure_log_path=tmp_path / "fail.jsonl", + )) + # No embed call (both texts empty), but the schema accepts NULL embeddings + # for episodes so we still INSERT 2 rows. + assert embed_called == [] + assert summary["episodes"] == 2 diff --git a/tests/plugins/test_memdebug.py b/tests/plugins/test_memdebug.py new file mode 100644 index 00000000000..65380e271c4 --- /dev/null +++ b/tests/plugins/test_memdebug.py @@ -0,0 +1,175 @@ +"""Tests for plugins/memdebug/ — /memdebug slash command (W2-4).""" + +from __future__ import annotations + +import asyncio +import struct +from pathlib import Path +from unittest.mock import patch + +import pytest + +from plugins.memory.sqlite_vec.store import VEC_DIM, init_db +from plugins.memdebug import ( + HELP_TEXT, + _do_rawsearch, + _do_semantic, + _format_facts_block, + _handle_async, + _handle_memdebug, + _truncate, +) + + +def _vec(seed: int) -> bytes: + vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)] + return struct.pack(f"{VEC_DIM}b", *vals) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def test_truncate_short_string_unchanged(): + assert _truncate("abc", 10) == "abc" + + +def test_truncate_long_string_ellipsis(): + out = _truncate("a" * 100, 10) + assert out.endswith("…") and len(out) == 10 + + +# --------------------------------------------------------------------------- +# Help / empty / unknown args +# --------------------------------------------------------------------------- + + +def test_handle_empty_returns_help(): + assert _handle_memdebug("") == HELP_TEXT + assert _handle_memdebug(" ") == HELP_TEXT + + +def test_handle_rawsearch_empty_returns_help(): + assert _handle_memdebug("rawsearch") == HELP_TEXT + assert _handle_memdebug("rawsearch ") == HELP_TEXT + + +# --------------------------------------------------------------------------- +# Semantic / rawsearch via direct async helpers (so we control DB path) +# --------------------------------------------------------------------------- + + +def _seed_db(tmp_path): + """Seed a fixture memory.db on tmp_path and return its path.""" + db_path = tmp_path / "memories" / "memory.db" + conn = init_db(db_path) + conn.execute( + "INSERT INTO semantic_facts(fact, entity, embedding, created_at) VALUES (?,?,?,?)", + ("致妤生日 3/19", "禮揚.家庭", _vec(50), "2026-05-02 09:00:00"), + ) + conn.execute( + "INSERT INTO semantic_facts(fact, entity, embedding, created_at) VALUES (?,?,?,?)", + ("AI as digital twin", "禮揚.工作", _vec(60), "2026-05-01 09:00:00"), + ) + conn.execute( + "INSERT INTO episodes(ts, channel, external_id, role, text) " + "VALUES (?, ?, ?, ?, ?)", + ("2026-05-02 17:00:00", "cattia", "msg-1", "user", "晚餐幾點開"), + ) + conn.commit() + conn.close() + return db_path + + +def test_do_semantic_returns_score_breakdown(tmp_path, monkeypatch): + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + db_path = _seed_db(tmp_path) + + async def fake_embed(texts, **kw): + return [_vec(51) for _ in texts] + + with patch("plugins.memdebug.DEFAULT_DB", db_path), \ + patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"), \ + patch("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed): + out = asyncio.run(_do_semantic("when does my wife get home")) + + assert "/memdebug" in out + assert "致妤生日 3/19" in out # closest fact + # Score breakdown labels present. + assert "score=" in out and "sim=" in out and "age=" in out + # Reaction prompt present (until rich-embed UX lands). + assert "👍" in out and "👎" in out + # Log line written. + log_path = tmp_path / "memory.log" + assert log_path.exists() + last_line = log_path.read_text().strip().splitlines()[-1] + assert '"cmd": "memdebug"' in last_line + + +def test_do_semantic_db_missing_returns_friendly_message(tmp_path, monkeypatch): + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + missing = tmp_path / "absent.db" + with patch("plugins.memdebug.DEFAULT_DB", missing): + out = asyncio.run(_do_semantic("anything")) + assert "not yet initialised" in out + + +def test_do_rawsearch_finds_substring(tmp_path): + db_path = _seed_db(tmp_path) + with patch("plugins.memdebug.DEFAULT_DB", db_path), \ + patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"): + out = asyncio.run(_do_rawsearch("晚餐")) + assert "rawsearch" in out + assert "晚餐幾點開" in out + assert "cattia/user" in out + + +def test_do_rawsearch_empty_episodes_message(tmp_path): + db_path = tmp_path / "memories" / "memory.db" + init_db(db_path).close() # bootstrap schema, no rows + with patch("plugins.memdebug.DEFAULT_DB", db_path), \ + patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"): + out = asyncio.run(_do_rawsearch("anything")) + assert "rawsearch" in out + assert "Episodes are written by W3" in out + + +# --------------------------------------------------------------------------- +# Sync entry point + register() +# --------------------------------------------------------------------------- + + +def test_handle_memdebug_sync_dispatches_semantic(tmp_path, monkeypatch): + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + db_path = _seed_db(tmp_path) + + async def fake_embed(texts, **kw): + return [_vec(51) for _ in texts] + + with patch("plugins.memdebug.DEFAULT_DB", db_path), \ + patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"), \ + patch("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed): + out = _handle_memdebug("when does my wife get home") + assert "致妤生日" in out + + +def test_register_calls_register_command(): + """register(ctx) must call ctx.register_command with the right name.""" + from plugins.memdebug import register + + captured = {} + + class FakeCtx: + def register_command(self, name, handler, description="", args_hint=""): + captured["name"] = name + captured["handler"] = handler + captured["args_hint"] = args_hint + captured["description"] = description + + register(FakeCtx()) + assert captured["name"] == "memdebug" + assert captured["args_hint"] == " | rawsearch " + assert callable(captured["handler"]) + # The handler must accept a single positional argument (raw_args). + assert captured["handler"].__code__.co_argcount == 1 diff --git a/tests/plugins/test_memreview.py b/tests/plugins/test_memreview.py new file mode 100644 index 00000000000..f20e7341790 --- /dev/null +++ b/tests/plugins/test_memreview.py @@ -0,0 +1,272 @@ +"""Tests for plugins/memreview/ — /memreview reject + /mem kill switch (W3-4).""" + +from __future__ import annotations + +import asyncio +import json +import struct +from datetime import date +from pathlib import Path +from unittest.mock import patch + +import pytest + +from plugins.memory.sqlite_vec.store import VEC_DIM, init_db +from plugins.memreview import ( + _MEMREVIEW_HELP, + _MEM_HELP, + _handle_mem, + _handle_memreview, + mem_off_active, + mem_off_path, + register, +) + + +def _vec(seed: int) -> bytes: + vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)] + return struct.pack(f"{VEC_DIM}b", *vals) + + +# --------------------------------------------------------------------------- +# /memreview help / pending +# --------------------------------------------------------------------------- + + +def test_memreview_empty_returns_help(): + assert _handle_memreview("") == _MEMREVIEW_HELP + assert _handle_memreview(" ") == _MEMREVIEW_HELP + + +def test_memreview_pending_no_diffs(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memreview._resolve_hermes_home", lambda: tmp_path + ) + out = _handle_memreview("pending") + assert "no pending diffs" in out + + +def test_memreview_pending_lists_diffs(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memreview._resolve_hermes_home", lambda: tmp_path + ) + pdir = tmp_path / "memories" / "pending_diffs" + pdir.mkdir(parents=True) + (pdir / "wk-2026-05-02.json").write_text("{}") + (pdir / "wk-2026-05-09.json").write_text("{}") + (pdir / "wk-2026-05-09.rejected").write_text("rejected") + + out = _handle_memreview("pending") + assert "wk-2026-05-02" in out + assert "wk-2026-05-09" in out + # Rejected one carries a flag. + assert "(rejected — will be archived Mon)" in out + + +# --------------------------------------------------------------------------- +# /memreview reject +# --------------------------------------------------------------------------- + + +def test_memreview_reject_invalid_digest_id(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memreview._resolve_hermes_home", lambda: tmp_path + ) + out = _handle_memreview("reject not-a-digest") + assert "must look like" in out + + +def test_memreview_reject_unknown_digest(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memreview._resolve_hermes_home", lambda: tmp_path + ) + out = _handle_memreview("reject wk-2026-05-02") + assert "no pending diff" in out + + +def test_memreview_reject_writes_sentinel(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memreview._resolve_hermes_home", lambda: tmp_path + ) + pdir = tmp_path / "memories" / "pending_diffs" + pdir.mkdir(parents=True) + diff_path = pdir / "wk-2026-05-02.json" + diff_path.write_text("{}") + + out = _handle_memreview("reject wk-2026-05-02") + assert "Rejected." in out + sentinel = pdir / "wk-2026-05-02.rejected" + assert sentinel.exists() + assert "rejected" in sentinel.read_text().lower() + + +# --------------------------------------------------------------------------- +# /mem off / on / status +# --------------------------------------------------------------------------- + + +def test_mem_off_creates_sentinel(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memreview._resolve_hermes_home", lambda: tmp_path + ) + out = _handle_mem("off") + assert "disabled" in out + assert mem_off_path().exists() + assert mem_off_active() is True + + +def test_mem_on_removes_sentinel(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memreview._resolve_hermes_home", lambda: tmp_path + ) + mem_off_path().write_text("set", encoding="utf-8") + out = _handle_mem("on") + assert "enabled" in out + assert not mem_off_path().exists() + + +def test_mem_on_when_already_on_idempotent(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memreview._resolve_hermes_home", lambda: tmp_path + ) + out = _handle_mem("on") + assert "already enabled" in out + + +def test_mem_status_off(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memreview._resolve_hermes_home", lambda: tmp_path + ) + out = _handle_mem("status") + assert "🔊 ON" in out # default state + assert "(absent)" in out + + +def test_mem_status_on_with_pending(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memreview._resolve_hermes_home", lambda: tmp_path + ) + mem_off_path().write_text("set") + pdir = tmp_path / "memories" / "pending_diffs" + pdir.mkdir(parents=True) + (pdir / "wk-2026-05-02.json").write_text("{}") + + out = _handle_mem("status") + assert "🔇 OFF" in out + assert "(present)" in out + assert "wk-2026-05-02" in out + + +def test_mem_help_on_unknown_subcommand(tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memreview._resolve_hermes_home", lambda: tmp_path + ) + out = _handle_mem("frobnicate") + assert "/mem off" in out and "/mem on" in out + + +# --------------------------------------------------------------------------- +# register() wires both commands +# --------------------------------------------------------------------------- + + +def test_register_registers_both_commands(): + captured = [] + + class FakeCtx: + def register_command(self, name, handler, description="", args_hint=""): + captured.append((name, args_hint)) + + register(FakeCtx()) + names = [c[0] for c in captured] + assert "memreview" in names + assert "mem" in names + + +# --------------------------------------------------------------------------- +# End-to-end: /memreview reject then weekly_apply archives as rejected +# --------------------------------------------------------------------------- + + +def test_reject_then_apply_archives_as_rejected(tmp_path, monkeypatch): + """Full flow: write pending diff -> /memreview reject -> weekly_apply + sees the sentinel and archives the diff with status=rejected.""" + monkeypatch.setattr( + "plugins.memreview._resolve_hermes_home", lambda: tmp_path + ) + monkeypatch.setattr( + "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", + lambda: tmp_path, + ) + + db = init_db(tmp_path / "m.db") + digest_id = "wk-2026-05-02" + pdir = tmp_path / "memories" / "pending_diffs" + pdir.mkdir(parents=True) + diff_payload = { + "digest_id": digest_id, "candidate_episode_ids": [], + "promote": [{"entity": "禮揚.x", "fact": "f", "importance": 2, + "valid_from": "2026-05-02", "valid_to": None, + "source_episode_ids": []}], + "dedup_hits": [], "expire": [], "drop_as_noise": [], + } + (pdir / f"{digest_id}.json").write_text(json.dumps(diff_payload)) + + # User runs /memreview reject. + reply = _handle_memreview(f"reject {digest_id}") + assert "Rejected." in reply + + # Apply step picks up the sentinel. + from plugins.memory.sqlite_vec.promotion import weekly_apply + summary = asyncio.run(weekly_apply(db, today=date(2026, 5, 2))) + assert summary["applied"] is False + assert summary["reason"] == "rejected" + + # No new semantic_facts row (the promote was discarded). + [(sf,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall() + assert sf == 0 + + # Archive carries the .rejected suffix. + archived = list((tmp_path / "memories" / "diff_archive").glob("*.rejected.json")) + assert len(archived) == 1 + + +def test_mem_off_short_circuits_weekly_promotion(tmp_path, monkeypatch): + """Kill switch: /mem off must stop weekly_promotion from running its + Kimi call (which would otherwise burn tokens and write a diff).""" + monkeypatch.setattr( + "plugins.memreview._resolve_hermes_home", lambda: tmp_path + ) + monkeypatch.setattr( + "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", + lambda: tmp_path, + ) + monkeypatch.setenv("VOYAGE_API_KEY", "test-key") + + db = init_db(tmp_path / "m.db") + db.execute( + "INSERT INTO episodes(ts, channel, external_id, role, text, metadata) " + "VALUES (?, ?, ?, ?, ?, ?)", + ("2026-05-02 09:00", "cattia", "x", "user", "hi", + json.dumps({"stashed_facts": [{"text": "禮揚 likes X", + "entity": "禮揚.x", + "importance": 2}]})), + ) + db.commit() + + # Activate kill switch. + _handle_mem("off") + assert mem_off_active() is True + + kimi_called = [] + + async def kimi_should_not_be_called(prompt): + kimi_called.append(prompt) + return {} + + from plugins.memory.sqlite_vec.promotion import weekly_promotion + summary = asyncio.run(weekly_promotion(db, kimi_fn=kimi_should_not_be_called)) + assert summary["candidates"] == 0 + assert summary["skipped"] == "/mem off active" + # Kimi must not have been called. + assert kimi_called == [] diff --git a/tests/scripts/__init__.py b/tests/scripts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/scripts/test_import_md.py b/tests/scripts/test_import_md.py new file mode 100644 index 00000000000..617b38f9c13 --- /dev/null +++ b/tests/scripts/test_import_md.py @@ -0,0 +1,210 @@ +"""Tests for ``scripts/import_md.py`` (W2-2 — MEMORY.md → semantic_facts). + +Uses a stub embed_fn so no network is hit; live integration is exercised +end-to-end on chococlaw via the post-test ``--commit`` smoke run. +""" + +from __future__ import annotations + +import asyncio +import struct +from pathlib import Path + +import pytest + +from plugins.memory.sqlite_vec.store import VEC_DIM, init_db +from scripts.import_md import ( + Entry, + import_memory_md, + parse_memory_md, + slugify_topic, +) + + +def _vec(seed: int) -> bytes: + vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)] + return struct.pack(f"{VEC_DIM}b", *vals) + + +# --------------------------------------------------------------------------- +# Slugify +# --------------------------------------------------------------------------- + + +def test_slugify_simple(): + assert slugify_topic("People") == "people" + assert slugify_topic("Working style") == "working_style" + assert slugify_topic("Privacy constraints") == "privacy_constraints" + + +def test_slugify_hierarchy_uses_dot(): + assert ( + slugify_topic("Tools & Access > ProtonMail Access") + == "tools_access.protonmail_access" + ) + + +def test_slugify_preserves_cjk(): + # CJK characters survive the punct->underscore collapse; only > is hierarchy. + assert slugify_topic("醫院 > 新樓") == "醫院.新樓" + assert slugify_topic("家庭 生活") == "家庭_生活" + + +def test_slugify_handles_empty_or_punct_only(): + assert slugify_topic("") == "unknown" + assert slugify_topic("!!!") == "unknown" + + +# --------------------------------------------------------------------------- +# Parsing +# --------------------------------------------------------------------------- + + +SAMPLE_MD = """People: 禮揚 — physician +§ +Working style: digital twin model +§ +Privacy constraints: never include real PHI +§ +Tools & Access > ProtonMail: D4303@sinlau.org.tw +§ +""" + + +def test_parse_memory_md_basic(): + entries = parse_memory_md(SAMPLE_MD) + assert len(entries) == 4 + assert entries[0].topic == "People" + assert entries[0].fact == "禮揚 — physician" + assert entries[0].entity == "禮揚.people" + assert entries[3].entity == "禮揚.tools_access.protonmail" + + +def test_parse_skips_blocks_without_colon(): + md = "first entry: ok\n§\n\nno colon here\n§\nsecond: also ok\n§\n" + entries = parse_memory_md(md) + assert [e.topic for e in entries] == ["first entry", "second"] + + +def test_parse_handles_no_trailing_separator(): + md = "topic: content" + entries = parse_memory_md(md) + assert len(entries) == 1 + assert entries[0].fact == "content" + + +# --------------------------------------------------------------------------- +# import_memory_md (with stub embed) +# --------------------------------------------------------------------------- + + +def _make_stub_embed(): + counter = {"n": 0} + + async def stub(texts): + counter["n"] += 1 + return [_vec(i + 1) for i, _ in enumerate(texts)] + + return stub, counter + + +def test_dry_run_does_not_write(tmp_path): + md = tmp_path / "MEMORY.md" + md.write_text(SAMPLE_MD, encoding="utf-8") + db = tmp_path / "m.db" + + summary = asyncio.run( + import_memory_md(md_path=md, db_path=db, dry_run=True) + ) + assert summary == { + "parsed": 4, "new": 4, "skipped_dup": 0, + "batches": 0, "dry_run": True, + } + # DB still empty (init_db ran but no inserts). + conn = init_db(db) + [(count,)] = conn.execute("SELECT count(*) FROM semantic_facts").fetchall() + assert count == 0 + + +def test_commit_inserts_and_populates_vec_facts(tmp_path): + md = tmp_path / "MEMORY.md" + md.write_text(SAMPLE_MD, encoding="utf-8") + db = tmp_path / "m.db" + stub, counter = _make_stub_embed() + + summary = asyncio.run( + import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub) + ) + assert summary["new"] == 4 + assert summary["batches"] == 1 + assert counter["n"] == 1 # one Voyage call for 4 entries + + conn = init_db(db) + rows = conn.execute( + "SELECT entity, fact, importance, valid_from, valid_to FROM semantic_facts ORDER BY id" + ).fetchall() + assert len(rows) == 4 + assert rows[0]["entity"] == "禮揚.people" + assert rows[0]["importance"] == 2 + assert rows[0]["valid_from"] == "2026-05-10" + assert rows[0]["valid_to"] is None + + # Trigger sf_after_insert mirrored every row into vec_facts. + [(vec_count,)] = conn.execute("SELECT count(*) FROM vec_facts").fetchall() + assert vec_count == 4 + + +def test_idempotent_rerun_inserts_nothing_new(tmp_path): + md = tmp_path / "MEMORY.md" + md.write_text(SAMPLE_MD, encoding="utf-8") + db = tmp_path / "m.db" + stub, counter = _make_stub_embed() + + asyncio.run(import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub)) + assert counter["n"] == 1 + + summary2 = asyncio.run( + import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub) + ) + assert summary2["new"] == 0 + assert summary2["skipped_dup"] == 4 + assert counter["n"] == 1 # second run made zero embed calls (no new rows) + + conn = init_db(db) + [(count,)] = conn.execute("SELECT count(*) FROM semantic_facts").fetchall() + assert count == 4 + + +def test_partial_update_only_embeds_new(tmp_path): + md = tmp_path / "MEMORY.md" + md.write_text(SAMPLE_MD, encoding="utf-8") + db = tmp_path / "m.db" + stub, counter = _make_stub_embed() + + asyncio.run(import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub)) + assert counter["n"] == 1 + + md.write_text(SAMPLE_MD + "\nNew topic: brand new fact\n§\n", encoding="utf-8") + summary = asyncio.run( + import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub) + ) + assert summary["new"] == 1 + assert summary["skipped_dup"] == 4 + assert counter["n"] == 2 # one extra call for the one new entry + + +def test_rollback_on_embed_failure_leaves_db_unchanged(tmp_path): + md = tmp_path / "MEMORY.md" + md.write_text(SAMPLE_MD, encoding="utf-8") + db = tmp_path / "m.db" + + async def failing(texts): + raise RuntimeError("voyage exploded") + + with pytest.raises(RuntimeError, match="voyage exploded"): + asyncio.run( + import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=failing) + ) + conn = init_db(db) + [(count,)] = conn.execute("SELECT count(*) FROM semantic_facts").fetchall() + assert count == 0 # transaction rolled back