From 20a62e704f18b14c08384c01b0453831307e230d Mon Sep 17 00:00:00 2001
From: Li-yang Chen <liyoungc@pm.me>
Date: Sat, 2 May 2026 08:07:11 +0000
Subject: [PATCH 01/11] feat(memory): bootstrap sqlite_vec plugin schema (W1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduces a new MemoryProvider plugin implementing Hermes V3 long-term
memory design (two-tier: hot episodes + cold curated semantic_facts,
weekly human-approved promotion).

W1 scope is schema only — no read or write path yet:
  - plugins/memory/sqlite_vec/{__init__,store,plugin.yaml,schema.sql}
  - episodes table (hot raw turn record, channel-scoped idempotent)
  - semantic_facts table (cold curated, with valid_from/valid_to validity
    windows borrowed from the MemPalace temporal-triple pattern)
  - vec_facts vec0 virtual table (512-dim float32) + 3 sync triggers
  - SqliteVecMemoryProvider class registers with MemoryProvider ABC
    but prefetch/sync_turn are no-ops until W2/W3 wire them.

Tests (7/7 passing inside running hermes container):
  - bootstrap creates all expected tables/indexes/triggers
  - bootstrap is idempotent
  - semantic_facts column defaults populate (created_at, valid_from)
  - role CHECK constraint rejects values other than user/assistant
  - triggers keep vec_facts in sync on insert/update/delete
  - vec0 MATCH+k returns nearest neighbour
  - provider lifecycle round-trips

Activates via $HERMES_HOME/config.yaml memory.provider: sqlite_vec
(deferred; W4 cutover only).

Refs liyoungc/hermes-memory#2 (W1-1)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 plugins/memory/sqlite_vec/__init__.py         |  78 +++++++
 plugins/memory/sqlite_vec/plugin.yaml         |   7 +
 plugins/memory/sqlite_vec/schema.sql          |  41 ++++
 plugins/memory/sqlite_vec/store.py            |  74 +++++++
 .../memory/test_sqlite_vec_provider.py        | 203 ++++++++++++++++++
 5 files changed, 403 insertions(+)
 create mode 100644 plugins/memory/sqlite_vec/__init__.py
 create mode 100644 plugins/memory/sqlite_vec/plugin.yaml
 create mode 100644 plugins/memory/sqlite_vec/schema.sql
 create mode 100644 plugins/memory/sqlite_vec/store.py
 create mode 100644 tests/plugins/memory/test_sqlite_vec_provider.py

diff --git a/plugins/memory/sqlite_vec/__init__.py b/plugins/memory/sqlite_vec/__init__.py
new file mode 100644
index 00000000000..71a76c08d4a
--- /dev/null
+++ b/plugins/memory/sqlite_vec/__init__.py
@@ -0,0 +1,78 @@
+"""Hermes V3 memory plugin — sqlite-vec store with two-tier (hot/cold) design.
+
+W1 scope (this commit): schema bootstrap + provider stub registering with
+the MemoryProvider ABC. Read path / write path / weekly promotion arrive in
+W2 and W3 per spec docs/superpowers/specs/2026-05-02-hermes-memory-design.md.
+
+Activate via $HERMES_HOME/config.yaml:
+
+  memory:
+    provider: sqlite_vec
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from agent.memory_provider import MemoryProvider
+
+from .store import init_db
+
+logger = logging.getLogger(__name__)
+
+
+def _default_db_path(hermes_home: str) -> Path:
+    return Path(hermes_home).expanduser() / "memories" / "memory.db"
+
+
+class SqliteVecMemoryProvider(MemoryProvider):
+    """Hermes V3 long-term memory provider (W1 = schema only)."""
+
+    @property
+    def name(self) -> str:
+        return "sqlite_vec"
+
+    def is_available(self) -> bool:
+        try:
+            import sqlite_vec  # noqa: F401
+        except ImportError:
+            return False
+        return True
+
+    def initialize(self, session_id: str, **kwargs) -> None:
+        hermes_home = kwargs.get("hermes_home")
+        if not hermes_home:
+            from hermes_constants import get_hermes_home
+            hermes_home = str(get_hermes_home())
+        self._db_path = _default_db_path(hermes_home)
+        self._conn = init_db(self._db_path)
+        logger.info("sqlite_vec memory ready at %s", self._db_path)
+
+    def system_prompt_block(self) -> str:
+        # W1: no system-prompt contribution. Persona stays in flat files
+        # (SOUL.md, USER.md, life-dimensions.md) handled by built-in memory.
+        return ""
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        # W2 will implement actual retrieval. Empty return for now keeps
+        # the plugin a no-op until we wire read_memory().
+        return ""
+
+    def sync_turn(self, user: str, assistant: str, **kwargs) -> None:
+        # W3 will implement async write-back. No-op for W1.
+        return None
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        # No model-facing tools; memory is implicit.
+        return []
+
+    def handle_tool_call(self, tool_name: str, args: Dict[str, Any]) -> Any:
+        from tools.registry import tool_error
+        return tool_error(f"sqlite_vec exposes no tools (got {tool_name!r})")
+
+    def shutdown(self) -> None:
+        if getattr(self, "_conn", None):
+            self._conn.close()
+            self._conn = None
diff --git a/plugins/memory/sqlite_vec/plugin.yaml b/plugins/memory/sqlite_vec/plugin.yaml
new file mode 100644
index 00000000000..4e3b24133c4
--- /dev/null
+++ b/plugins/memory/sqlite_vec/plugin.yaml
@@ -0,0 +1,7 @@
+name: sqlite_vec
+version: 0.1.0
+description: "Hermes V3 long-term memory — local sqlite-vec store with hot episodes / cold curated semantic_facts, weekly human-approved promotion."
+pip_dependencies:
+  - sqlite-vec>=0.1.6
+hooks:
+  - on_pre_compress
diff --git a/plugins/memory/sqlite_vec/schema.sql b/plugins/memory/sqlite_vec/schema.sql
new file mode 100644
index 00000000000..5910309543b
--- /dev/null
+++ b/plugins/memory/sqlite_vec/schema.sql
@@ -0,0 +1,41 @@
+-- Hermes V3 memory schema — episodes (hot raw) + semantic_facts (cold curated)
+-- Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §3
+
+PRAGMA journal_mode = WAL;
+PRAGMA synchronous = NORMAL;
+
+-- Hot tier: raw turn-by-turn record. All Discord turns + cron synthetic land here.
+CREATE TABLE IF NOT EXISTS episodes (
+  id            INTEGER PRIMARY KEY,
+  ts            TEXT NOT NULL,
+  channel       TEXT NOT NULL,
+  external_id   TEXT NOT NULL,
+  role          TEXT NOT NULL CHECK (role IN ('user', 'assistant')),
+  text          TEXT NOT NULL,
+  synthetic     INTEGER NOT NULL DEFAULT 0,
+  embedding     BLOB,
+  metadata      TEXT,
+  promoted_at   TEXT,
+  UNIQUE(channel, external_id)
+);
+CREATE INDEX IF NOT EXISTS idx_episodes_ts ON episodes(ts);
+CREATE INDEX IF NOT EXISTS idx_episodes_promoted_pending
+  ON episodes(promoted_at, ts) WHERE promoted_at IS NULL;
+
+-- Cold tier: curated facts. Cattia's actual working memory queries this.
+CREATE TABLE IF NOT EXISTS semantic_facts (
+  id                  INTEGER PRIMARY KEY,
+  entity              TEXT,
+  fact                TEXT NOT NULL,
+  embedding           BLOB NOT NULL,
+  source_episode_ids  TEXT,
+  importance          INTEGER DEFAULT 2,
+  hits                INTEGER DEFAULT 0,
+  created_at          TEXT NOT NULL DEFAULT (datetime('now')),
+  last_seen           TEXT,
+  state               TEXT DEFAULT 'active' CHECK (state IN ('active', 'archived')),
+  valid_from          TEXT NOT NULL DEFAULT (date('now')),
+  valid_to            TEXT
+);
+CREATE INDEX IF NOT EXISTS idx_facts_entity ON semantic_facts(entity);
+CREATE INDEX IF NOT EXISTS idx_facts_active ON semantic_facts(state, valid_to);
diff --git a/plugins/memory/sqlite_vec/store.py b/plugins/memory/sqlite_vec/store.py
new file mode 100644
index 00000000000..2d4856f4351
--- /dev/null
+++ b/plugins/memory/sqlite_vec/store.py
@@ -0,0 +1,74 @@
+"""sqlite-vec backed memory store: schema bootstrap + connection helper.
+
+W1 scope: schema only. Read/write paths come in W2/W3.
+"""
+
+from __future__ import annotations
+
+import logging
+import sqlite3
+from pathlib import Path
+from typing import Optional
+
+import sqlite_vec
+
+logger = logging.getLogger(__name__)
+
+VEC_DIM = 512  # voyage-3.5-lite output dimension we store
+
+_SCHEMA_PATH = Path(__file__).parent / "schema.sql"
+
+_VEC_VIRTUAL_TABLE_SQL = f"""
+CREATE VIRTUAL TABLE IF NOT EXISTS vec_facts USING vec0(
+  fact_id INTEGER PRIMARY KEY,
+  embedding FLOAT[{VEC_DIM}]
+);
+"""
+
+# Triggers keep vec_facts in sync with semantic_facts. embedding is stored as
+# raw float32 BLOB on the relational side; vec0 reads the same bytes natively.
+_TRIGGERS_SQL = """
+CREATE TRIGGER IF NOT EXISTS sf_after_insert
+AFTER INSERT ON semantic_facts
+BEGIN
+  INSERT INTO vec_facts(fact_id, embedding) VALUES (NEW.id, NEW.embedding);
+END;
+
+CREATE TRIGGER IF NOT EXISTS sf_after_update_embedding
+AFTER UPDATE OF embedding ON semantic_facts
+BEGIN
+  UPDATE vec_facts SET embedding = NEW.embedding WHERE fact_id = NEW.id;
+END;
+
+CREATE TRIGGER IF NOT EXISTS sf_after_delete
+AFTER DELETE ON semantic_facts
+BEGIN
+  DELETE FROM vec_facts WHERE fact_id = OLD.id;
+END;
+"""
+
+
+def open_db(db_path: Path) -> sqlite3.Connection:
+    """Open a sqlite connection with sqlite-vec extension loaded."""
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    conn = sqlite3.connect(str(db_path))
+    conn.enable_load_extension(True)
+    sqlite_vec.load(conn)
+    conn.enable_load_extension(False)
+    conn.row_factory = sqlite3.Row
+    return conn
+
+
+def bootstrap_schema(conn: sqlite3.Connection) -> None:
+    """Idempotently create tables, indexes, vec0 virtual table, and triggers."""
+    conn.executescript(_SCHEMA_PATH.read_text())
+    conn.executescript(_VEC_VIRTUAL_TABLE_SQL)
+    conn.executescript(_TRIGGERS_SQL)
+    conn.commit()
+
+
+def init_db(db_path: Path) -> sqlite3.Connection:
+    """Open + bootstrap. Returns a ready-to-use connection."""
+    conn = open_db(db_path)
+    bootstrap_schema(conn)
+    return conn
diff --git a/tests/plugins/memory/test_sqlite_vec_provider.py b/tests/plugins/memory/test_sqlite_vec_provider.py
new file mode 100644
index 00000000000..a922a4e93c4
--- /dev/null
+++ b/tests/plugins/memory/test_sqlite_vec_provider.py
@@ -0,0 +1,203 @@
+"""Tests for the sqlite_vec memory provider plugin (W1 scope: schema only).
+
+Covers:
+  • bootstrap_schema is idempotent (re-running does not error or duplicate)
+  • all 3 tables + 4 indexes + 1 virtual table + 3 triggers exist
+  • semantic_facts defaults work (created_at, valid_from, importance)
+  • vec0 virtual table answers MATCH queries with k=N prefilter
+  • triggers keep vec_facts synced with semantic_facts (insert/update/delete)
+  • SqliteVecMemoryProvider.is_available() / initialize() / shutdown() round-trip
+"""
+
+from __future__ import annotations
+
+import struct
+from pathlib import Path
+
+import pytest
+
+from plugins.memory.sqlite_vec import SqliteVecMemoryProvider
+from plugins.memory.sqlite_vec.store import (
+    VEC_DIM,
+    bootstrap_schema,
+    init_db,
+    open_db,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _vec(seed: float) -> bytes:
+    """Make a deterministic 512-d float32 BLOB for testing.
+
+    seed is broadcast across all dimensions and then perturbed slightly so
+    different seeds produce different vectors but the same seed always
+    yields the same bytes.
+    """
+    return struct.pack(f"{VEC_DIM}f", *[seed + i * 1e-4 for i in range(VEC_DIM)])
+
+
+# ---------------------------------------------------------------------------
+# Schema bootstrap
+# ---------------------------------------------------------------------------
+
+
+def test_bootstrap_creates_all_objects(tmp_path):
+    db = init_db(tmp_path / "memory.db")
+
+    table_names = {
+        row[0]
+        for row in db.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
+        )
+    }
+    assert "episodes" in table_names
+    assert "semantic_facts" in table_names
+
+    index_names = {
+        row[0]
+        for row in db.execute(
+            "SELECT name FROM sqlite_master WHERE type='index' AND name NOT LIKE 'sqlite_%'"
+        )
+    }
+    assert "idx_episodes_ts" in index_names
+    assert "idx_episodes_promoted_pending" in index_names
+    assert "idx_facts_entity" in index_names
+    assert "idx_facts_active" in index_names
+
+    trigger_names = {
+        row[0] for row in db.execute("SELECT name FROM sqlite_master WHERE type='trigger'")
+    }
+    assert "sf_after_insert" in trigger_names
+    assert "sf_after_update_embedding" in trigger_names
+    assert "sf_after_delete" in trigger_names
+
+    # vec0 virtual table is registered as a regular table internally
+    [(vec_count,)] = db.execute(
+        "SELECT count(*) FROM sqlite_master WHERE name='vec_facts'"
+    ).fetchall()
+    assert vec_count >= 1
+
+
+def test_bootstrap_is_idempotent(tmp_path):
+    path = tmp_path / "memory.db"
+    db = init_db(path)
+    bootstrap_schema(db)  # second time
+    bootstrap_schema(db)  # third time
+    # If we got here without error and tables still query, idempotency holds.
+    db.execute("SELECT count(*) FROM episodes").fetchone()
+    db.execute("SELECT count(*) FROM semantic_facts").fetchone()
+
+
+# ---------------------------------------------------------------------------
+# Defaults
+# ---------------------------------------------------------------------------
+
+
+def test_semantic_facts_defaults_are_populated(tmp_path):
+    db = init_db(tmp_path / "memory.db")
+    db.execute(
+        "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
+        ("禮揚 likes Starting Strength method", _vec(0.1)),
+    )
+    db.commit()
+
+    row = db.execute(
+        "SELECT importance, state, valid_from, valid_to, created_at FROM semantic_facts"
+    ).fetchone()
+    assert row["importance"] == 2
+    assert row["state"] == "active"
+    assert row["valid_from"] is not None  # default = date('now')
+    assert row["valid_to"] is None
+    assert row["created_at"] is not None
+
+
+def test_role_check_constraint(tmp_path):
+    db = init_db(tmp_path / "memory.db")
+    with pytest.raises(Exception):
+        db.execute(
+            "INSERT INTO episodes(ts, channel, external_id, role, text) "
+            "VALUES (datetime('now'), 'cattia', 'msg-1', 'system', 'hi')"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Trigger sync between semantic_facts and vec_facts
+# ---------------------------------------------------------------------------
+
+
+def test_triggers_sync_insert_update_delete(tmp_path):
+    db = init_db(tmp_path / "memory.db")
+
+    # INSERT
+    db.execute(
+        "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
+        ("fact A", _vec(0.5)),
+    )
+    db.commit()
+    [(count_after_insert,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall()
+    assert count_after_insert == 1
+
+    # UPDATE embedding
+    [fact_id] = db.execute("SELECT id FROM semantic_facts").fetchone()
+    new_vec = _vec(0.9)
+    db.execute("UPDATE semantic_facts SET embedding=? WHERE id=?", (new_vec, fact_id))
+    db.commit()
+    [(after_update,)] = db.execute(
+        "SELECT count(*) FROM vec_facts WHERE fact_id=?", (fact_id,)
+    ).fetchall()
+    assert after_update == 1
+
+    # DELETE
+    db.execute("DELETE FROM semantic_facts WHERE id=?", (fact_id,))
+    db.commit()
+    [(count_after_delete,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall()
+    assert count_after_delete == 0
+
+
+# ---------------------------------------------------------------------------
+# vec0 retrieval
+# ---------------------------------------------------------------------------
+
+
+def test_vec0_match_returns_nearest(tmp_path):
+    db = init_db(tmp_path / "memory.db")
+    for seed, fact in [(0.1, "alpha"), (0.5, "beta"), (0.9, "gamma")]:
+        db.execute(
+            "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
+            (fact, _vec(seed)),
+        )
+    db.commit()
+
+    query = _vec(0.51)
+    rows = db.execute(
+        "SELECT fact_id, distance FROM vec_facts WHERE embedding MATCH ? AND k = 2",
+        (query,),
+    ).fetchall()
+    assert len(rows) == 2
+    # Closest must be the seed=0.5 row (beta)
+    closest_fact_id = rows[0]["fact_id"]
+    closest_fact = db.execute(
+        "SELECT fact FROM semantic_facts WHERE id=?", (closest_fact_id,)
+    ).fetchone()["fact"]
+    assert closest_fact == "beta"
+
+
+# ---------------------------------------------------------------------------
+# MemoryProvider lifecycle
+# ---------------------------------------------------------------------------
+
+
+def test_provider_lifecycle(tmp_path):
+    p = SqliteVecMemoryProvider()
+    assert p.name == "sqlite_vec"
+    assert p.is_available() is True
+    p.initialize(session_id="t1", hermes_home=str(tmp_path))
+    assert (tmp_path / "memories" / "memory.db").exists()
+    assert p.prefetch("test query") == ""  # W1: no-op
+    assert p.sync_turn("hi", "hello") is None  # W1: no-op
+    assert p.get_tool_schemas() == []
+    p.shutdown()

From 71858e5dd8d8bfc3f184519667c250ab31afe4ff Mon Sep 17 00:00:00 2001
From: Li-yang Chen <liyoungc@pm.me>
Date: Sat, 2 May 2026 08:40:20 +0000
Subject: [PATCH 02/11] feat(memory): voyage_embed + read_memory + bump_hits
 (W2-1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements the read path for the sqlite_vec memory plugin per
docs/superpowers/specs/2026-05-02-hermes-memory-design.md §4.

embed.py: async voyage_embed() with httpx.AsyncClient, 128-text batching,
3x exponential-backoff retry on 5xx, fail-loud on missing VOYAGE_API_KEY
or 4xx. dim/dtype locked to spec values (512/int8) so config drift
fails fast.

read.py: Fact dataclass + async read_memory() using vec0 prefilter (k=50)
and the SQL CTE rerank with locked weights 0.7*sim + 0.3*recency
(90-day half-life). bump_hits() is fire-and-forget UPDATE that swallows
sqlite errors with a warning. p95 latency logged as JSON line to
~/.hermes/logs/memory.log.

W1 schema fix: vec_facts changed from FLOAT[512] to int8[512] to match
spec §1.4 (Voyage 3.5-lite, 512-dim, int8). vec0 int8 columns require
the vec_int8() SQL wrapper on INSERT, and reject UPDATE entirely even
with the wrapper, so sf_after_update_embedding now does DELETE+INSERT.

Tests: 10 new cases (mock httpx for voyage success/batching/5xx-retry/
4xx/missing-key/empty-input; read_memory orders by score and filters
expired; bump_hits increments and swallows errors; format_facts shape).
17/17 green.

Refs liyoungc/hermes-memory#4
---
 plugins/memory/sqlite_vec/embed.py            | 139 ++++++++++
 plugins/memory/sqlite_vec/read.py             | 157 +++++++++++
 plugins/memory/sqlite_vec/store.py            |  11 +-
 .../memory/test_sqlite_vec_provider.py        | 252 +++++++++++++++++-
 4 files changed, 543 insertions(+), 16 deletions(-)
 create mode 100644 plugins/memory/sqlite_vec/embed.py
 create mode 100644 plugins/memory/sqlite_vec/read.py

diff --git a/plugins/memory/sqlite_vec/embed.py b/plugins/memory/sqlite_vec/embed.py
new file mode 100644
index 00000000000..ae114ebf670
--- /dev/null
+++ b/plugins/memory/sqlite_vec/embed.py
@@ -0,0 +1,139 @@
+"""Voyage AI embedding wrapper for the sqlite_vec memory plugin.
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §1.4 (locked
+decision) and §4 (read path) — voyage-3.5-lite, 512 dim, int8.
+
+Returns each embedding as a 512-byte BLOB ready to insert into
+``semantic_facts.embedding``. The store-side trigger wraps the BLOB with
+``vec_int8()`` when copying it into the ``vec_facts`` virtual table.
+
+Public API:
+
+    await voyage_embed(["text 1", "text 2"]) -> [b"...512 bytes...", b"..."]
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+from typing import List, Optional, Sequence
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+VOYAGE_URL = "https://api.voyageai.com/v1/embeddings"
+VOYAGE_MODEL = "voyage-3.5-lite"
+VOYAGE_BATCH = 128  # Voyage API per-call ceiling
+VOYAGE_DIM = 512
+VOYAGE_DTYPE = "int8"
+DEFAULT_TIMEOUT = 30.0
+MAX_RETRIES = 3
+
+
+class VoyageError(RuntimeError):
+    """Raised when Voyage API repeatedly fails."""
+
+
+def _api_key() -> str:
+    key = os.environ.get("VOYAGE_API_KEY")
+    if not key:
+        raise VoyageError(
+            "VOYAGE_API_KEY is not set. Add it to ~/.hermes/.env and "
+            "expose it to the hermes container via docker-compose."
+        )
+    return key
+
+
+def _to_int8_blob(values: Sequence[int]) -> bytes:
+    """Pack a list of int8 values (-128..127) into a raw 512-byte BLOB."""
+    if len(values) != VOYAGE_DIM:
+        raise VoyageError(
+            f"Voyage returned {len(values)}-dim vector, expected {VOYAGE_DIM}"
+        )
+    return bytes((v + 256) & 0xFF for v in values)  # signed -> unsigned byte
+
+
+async def _post_batch(
+    client: httpx.AsyncClient,
+    texts: List[str],
+    api_key: str,
+) -> List[bytes]:
+    payload = {
+        "model": VOYAGE_MODEL,
+        "input": texts,
+        "output_dtype": VOYAGE_DTYPE,
+        "output_dimension": VOYAGE_DIM,
+    }
+    headers = {"Authorization": f"Bearer {api_key}"}
+
+    for attempt in range(1, MAX_RETRIES + 1):
+        try:
+            r = await client.post(
+                VOYAGE_URL, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT
+            )
+        except httpx.RequestError as exc:
+            if attempt == MAX_RETRIES:
+                raise VoyageError(f"network error: {exc}") from exc
+            await asyncio.sleep(2 ** (attempt - 1))
+            continue
+
+        if 500 <= r.status_code < 600:
+            if attempt == MAX_RETRIES:
+                raise VoyageError(f"Voyage 5xx: {r.status_code} {r.text[:200]}")
+            await asyncio.sleep(2 ** (attempt - 1))
+            continue
+
+        if r.status_code >= 400:
+            raise VoyageError(f"Voyage {r.status_code}: {r.text[:200]}")
+
+        body = r.json()
+        items = body.get("data", [])
+        if len(items) != len(texts):
+            raise VoyageError(
+                f"Voyage returned {len(items)} items for {len(texts)} inputs"
+            )
+        # Voyage returns embeddings in input order (per docs/index field).
+        items.sort(key=lambda d: d.get("index", 0))
+        return [_to_int8_blob(d["embedding"]) for d in items]
+
+    raise VoyageError("retry loop exhausted unexpectedly")
+
+
+async def voyage_embed(
+    texts: List[str],
+    *,
+    dim: int = VOYAGE_DIM,
+    dtype: str = VOYAGE_DTYPE,
+    client: Optional[httpx.AsyncClient] = None,
+) -> List[bytes]:
+    """Embed `texts` and return one int8 BLOB per input.
+
+    Batches automatically at Voyage's 128-input ceiling. Retries 3x with
+    exponential backoff on 5xx and network errors. Raises VoyageError on
+    auth failure, 4xx, or repeated 5xx.
+
+    `dim` and `dtype` are accepted for API symmetry but locked to the spec
+    values; passing different values raises immediately so config drift
+    fails loudly instead of silently corrupting embeddings.
+    """
+    if dim != VOYAGE_DIM or dtype != VOYAGE_DTYPE:
+        raise VoyageError(
+            f"dim/dtype locked to {VOYAGE_DIM}/{VOYAGE_DTYPE} per spec §1.4"
+        )
+    if not texts:
+        return []
+
+    api_key = _api_key()
+    owns_client = client is None
+    client = client or httpx.AsyncClient()
+    try:
+        out: List[bytes] = []
+        for i in range(0, len(texts), VOYAGE_BATCH):
+            batch = texts[i : i + VOYAGE_BATCH]
+            out.extend(await _post_batch(client, batch, api_key))
+        return out
+    finally:
+        if owns_client:
+            await client.aclose()
diff --git a/plugins/memory/sqlite_vec/read.py b/plugins/memory/sqlite_vec/read.py
new file mode 100644
index 00000000000..5d3dc6c29a9
--- /dev/null
+++ b/plugins/memory/sqlite_vec/read.py
@@ -0,0 +1,157 @@
+"""Read path for the sqlite_vec memory plugin.
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §4
+
+Two-step retrieval:
+  1. vec0 prefilter: top k=50 by cosine distance on int8 embeddings
+  2. SQL CTE rerank: score = (1 - distance) * 0.7 + exp(-age_days/90) * 0.3
+     filter active state + valid_to NULL or future, ORDER BY score DESC LIMIT k
+
+`hits` bumping happens fire-and-forget after the reply is sent (caller's
+responsibility to schedule). Errors are swallowed with a warning.
+
+p95 query latency is logged to ~/.hermes/logs/memory.log. The log path is
+overridable via the constructor for testing.
+"""
+
+from __future__ import annotations
+
+import logging
+import sqlite3
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable, List, Optional
+
+from .embed import voyage_embed
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_K = 8
+PREFILTER_K = 50
+DEFAULT_LOG_PATH = Path.home() / ".hermes" / "logs" / "memory.log"
+
+# Spec §4 — SQL is locked. Do not edit weights without updating the spec
+# and re-running the B1 worked example.
+RETRIEVE_SQL = """
+WITH knn AS (
+    SELECT fact_id, distance
+    FROM vec_facts
+    WHERE embedding MATCH vec_int8(?) AND k = {prefilter_k}
+)
+SELECT sf.id, sf.fact, sf.entity, sf.created_at, sf.importance,
+       (1 - knn.distance)                                              AS sim,
+       (julianday('now') - julianday(sf.created_at))                   AS age_days,
+       (1 - knn.distance) * 0.7
+         + exp(-(julianday('now') - julianday(sf.created_at)) / 90.0) * 0.3 AS score
+FROM knn
+JOIN semantic_facts sf ON sf.id = knn.fact_id
+WHERE sf.state = 'active'
+  AND (sf.valid_to IS NULL OR sf.valid_to > date('now'))
+ORDER BY score DESC
+LIMIT ?;
+"""
+
+
+@dataclass
+class Fact:
+    """A retrieved fact with score breakdown for prompt-injection or /memdebug."""
+
+    id: int
+    fact: str
+    entity: Optional[str]
+    created_at: str
+    importance: int
+    sim: float
+    age_days: float
+    score: float
+
+
+def _append_log(log_path: Path, payload: dict) -> None:
+    """Append one JSON line to memory.log; never raise into the read path."""
+    import json
+    try:
+        log_path.parent.mkdir(parents=True, exist_ok=True)
+        with log_path.open("a", encoding="utf-8") as f:
+            f.write(json.dumps(payload, ensure_ascii=False) + "\n")
+    except OSError as exc:
+        logger.warning("memory.log write failed: %s", exc)
+
+
+async def read_memory(
+    query: str,
+    conn: sqlite3.Connection,
+    *,
+    k: int = DEFAULT_K,
+    log_path: Path = DEFAULT_LOG_PATH,
+) -> List[Fact]:
+    """Embed `query`, retrieve top-`k` facts, log latency, return Fact list."""
+    [qvec] = await voyage_embed([query])
+
+    sql = RETRIEVE_SQL.format(prefilter_k=PREFILTER_K)
+    t0 = time.perf_counter()
+    rows = conn.execute(sql, (qvec, k)).fetchall()
+    elapsed_ms = (time.perf_counter() - t0) * 1000.0
+
+    facts = [
+        Fact(
+            id=row["id"],
+            fact=row["fact"],
+            entity=row["entity"],
+            created_at=row["created_at"],
+            importance=row["importance"],
+            sim=float(row["sim"]),
+            age_days=float(row["age_days"]),
+            score=float(row["score"]),
+        )
+        for row in rows
+    ]
+
+    _append_log(
+        log_path,
+        {
+            "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+            "q": query,
+            "k": k,
+            "n": len(facts),
+            "sql_ms": round(elapsed_ms, 2),
+        },
+    )
+    return facts
+
+
+async def bump_hits(fact_ids: Iterable[int], conn: sqlite3.Connection) -> None:
+    """Fire-and-forget UPDATE; swallow errors with a warning log.
+
+    Caller must wrap with ``asyncio.create_task()`` to avoid blocking the
+    reply. Per spec §4 hits-bump runs AFTER discord_send, so we keep this
+    cheap (single UPDATE … IN (…)) and never propagate errors.
+    """
+    ids = list(fact_ids)
+    if not ids:
+        return
+    placeholders = ",".join("?" * len(ids))
+    try:
+        conn.execute(
+            f"UPDATE semantic_facts SET hits = hits + 1, "
+            f"last_seen = datetime('now') WHERE id IN ({placeholders})",
+            ids,
+        )
+        conn.commit()
+    except sqlite3.Error as exc:
+        logger.warning("bump_hits swallowed error for %d ids: %s", len(ids), exc)
+
+
+def format_facts_for_prompt(facts: List[Fact]) -> str:
+    """Render top-k facts as a markdown bullet list for system-prompt injection.
+
+    Used by SqliteVecMemoryProvider.prefetch() in W2-3. Compact, no header —
+    the surrounding prompt template owns the section title.
+    """
+    if not facts:
+        return ""
+    lines = []
+    for f in facts:
+        prefix = f"[{f.entity}] " if f.entity else ""
+        lines.append(f"- {prefix}{f.fact}")
+    return "\n".join(lines)
diff --git a/plugins/memory/sqlite_vec/store.py b/plugins/memory/sqlite_vec/store.py
index 2d4856f4351..e619b7ff4ec 100644
--- a/plugins/memory/sqlite_vec/store.py
+++ b/plugins/memory/sqlite_vec/store.py
@@ -21,23 +21,26 @@
 _VEC_VIRTUAL_TABLE_SQL = f"""
 CREATE VIRTUAL TABLE IF NOT EXISTS vec_facts USING vec0(
   fact_id INTEGER PRIMARY KEY,
-  embedding FLOAT[{VEC_DIM}]
+  embedding int8[{VEC_DIM}]
 );
 """
 
 # Triggers keep vec_facts in sync with semantic_facts. embedding is stored as
-# raw float32 BLOB on the relational side; vec0 reads the same bytes natively.
+# raw int8 BLOB (512 bytes) on the relational side; vec0 needs vec_int8()
+# wrapper to interpret it (without it, vec0 assumes float32).
 _TRIGGERS_SQL = """
 CREATE TRIGGER IF NOT EXISTS sf_after_insert
 AFTER INSERT ON semantic_facts
 BEGIN
-  INSERT INTO vec_facts(fact_id, embedding) VALUES (NEW.id, NEW.embedding);
+  INSERT INTO vec_facts(fact_id, embedding) VALUES (NEW.id, vec_int8(NEW.embedding));
 END;
 
 CREATE TRIGGER IF NOT EXISTS sf_after_update_embedding
 AFTER UPDATE OF embedding ON semantic_facts
 BEGIN
-  UPDATE vec_facts SET embedding = NEW.embedding WHERE fact_id = NEW.id;
+  -- vec0 int8 columns reject UPDATE even via vec_int8(); use DELETE+INSERT.
+  DELETE FROM vec_facts WHERE fact_id = NEW.id;
+  INSERT INTO vec_facts(fact_id, embedding) VALUES (NEW.id, vec_int8(NEW.embedding));
 END;
 
 CREATE TRIGGER IF NOT EXISTS sf_after_delete
diff --git a/tests/plugins/memory/test_sqlite_vec_provider.py b/tests/plugins/memory/test_sqlite_vec_provider.py
index a922a4e93c4..42c631aee3e 100644
--- a/tests/plugins/memory/test_sqlite_vec_provider.py
+++ b/tests/plugins/memory/test_sqlite_vec_provider.py
@@ -30,14 +30,15 @@
 # ---------------------------------------------------------------------------
 
 
-def _vec(seed: float) -> bytes:
-    """Make a deterministic 512-d float32 BLOB for testing.
+def _vec(seed: int) -> bytes:
+    """Make a deterministic 512-d int8 BLOB for testing.
 
-    seed is broadcast across all dimensions and then perturbed slightly so
-    different seeds produce different vectors but the same seed always
-    yields the same bytes.
+    int8 matches the locked decision in spec §1.4 (Voyage 3.5-lite, 512-dim, int8).
+    seed is the base value (clamped to int8 range) with a small per-dim offset
+    so different seeds produce different vectors but the same seed reproduces.
     """
-    return struct.pack(f"{VEC_DIM}f", *[seed + i * 1e-4 for i in range(VEC_DIM)])
+    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
+    return struct.pack(f"{VEC_DIM}b", *vals)
 
 
 # ---------------------------------------------------------------------------
@@ -101,7 +102,7 @@ def test_semantic_facts_defaults_are_populated(tmp_path):
     db = init_db(tmp_path / "memory.db")
     db.execute(
         "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
-        ("禮揚 likes Starting Strength method", _vec(0.1)),
+        ("禮揚 likes Starting Strength method", _vec(10)),
     )
     db.commit()
 
@@ -135,7 +136,7 @@ def test_triggers_sync_insert_update_delete(tmp_path):
     # INSERT
     db.execute(
         "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
-        ("fact A", _vec(0.5)),
+        ("fact A", _vec(50)),
     )
     db.commit()
     [(count_after_insert,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall()
@@ -143,7 +144,7 @@ def test_triggers_sync_insert_update_delete(tmp_path):
 
     # UPDATE embedding
     [fact_id] = db.execute("SELECT id FROM semantic_facts").fetchone()
-    new_vec = _vec(0.9)
+    new_vec = _vec(90)
     db.execute("UPDATE semantic_facts SET embedding=? WHERE id=?", (new_vec, fact_id))
     db.commit()
     [(after_update,)] = db.execute(
@@ -165,16 +166,16 @@ def test_triggers_sync_insert_update_delete(tmp_path):
 
 def test_vec0_match_returns_nearest(tmp_path):
     db = init_db(tmp_path / "memory.db")
-    for seed, fact in [(0.1, "alpha"), (0.5, "beta"), (0.9, "gamma")]:
+    for seed, fact in [(10, "alpha"), (50, "beta"), (90, "gamma")]:
         db.execute(
             "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
             (fact, _vec(seed)),
         )
     db.commit()
 
-    query = _vec(0.51)
+    query = _vec(51)
     rows = db.execute(
-        "SELECT fact_id, distance FROM vec_facts WHERE embedding MATCH ? AND k = 2",
+        "SELECT fact_id, distance FROM vec_facts WHERE embedding MATCH vec_int8(?) AND k = 2",
         (query,),
     ).fetchall()
     assert len(rows) == 2
@@ -201,3 +202,230 @@ def test_provider_lifecycle(tmp_path):
     assert p.sync_turn("hi", "hello") is None  # W1: no-op
     assert p.get_tool_schemas() == []
     p.shutdown()
+
+
+
+# ===========================================================================
+# W2-1: voyage_embed (mocked) + read_memory + bump_hits + format_facts
+# ===========================================================================
+
+import asyncio
+import sqlite3
+from unittest.mock import patch
+
+import httpx
+import pytest
+
+from plugins.memory.sqlite_vec.embed import (
+    VOYAGE_BATCH,
+    VOYAGE_DIM,
+    VoyageError,
+    voyage_embed,
+)
+from plugins.memory.sqlite_vec.read import (
+    Fact,
+    bump_hits,
+    format_facts_for_prompt,
+    read_memory,
+)
+
+
+def _fake_voyage_response(texts):
+    """Build a fake Voyage JSON body where each embedding is dim=512 of zeros
+    except the first cell which carries the input index. Lets us round-trip
+    the input ordering through _to_int8_blob."""
+    return {
+        "data": [
+            {"index": i, "embedding": [(i % 200) - 100] + [0] * (VOYAGE_DIM - 1)}
+            for i, _ in enumerate(texts)
+        ]
+    }
+
+
+class _MockTransport(httpx.MockTransport):
+    """httpx mock that records call count and returns programmable responses."""
+
+    def __init__(self, responses):
+        self.calls = []
+        self._responses = list(responses)
+        super().__init__(self._handler)
+
+    def _handler(self, request: httpx.Request) -> httpx.Response:
+        self.calls.append(request)
+        status, body = self._responses.pop(0)
+        if isinstance(body, dict):
+            return httpx.Response(status, json=body)
+        return httpx.Response(status, text=body)
+
+
+# ---------------------------------------------------------------------------
+# voyage_embed
+# ---------------------------------------------------------------------------
+
+
+def test_voyage_embed_success(monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    texts = ["hello", "world", "禮揚"]
+    transport = _MockTransport([(200, _fake_voyage_response(texts))])
+    client = httpx.AsyncClient(transport=transport)
+
+    blobs = asyncio.run(voyage_embed(texts, client=client))
+
+    assert len(blobs) == len(texts)
+    for b in blobs:
+        assert len(b) == VOYAGE_DIM
+    # First byte encodes the (signed) index value we baked into the fake response.
+    assert blobs[0][0] == (-100) & 0xFF  # input index 0 -> -100 -> unsigned 156
+    assert blobs[1][0] == (-99) & 0xFF
+    assert len(transport.calls) == 1
+
+
+def test_voyage_embed_batches_at_128(monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    texts = [f"t{i}" for i in range(200)]  # > VOYAGE_BATCH=128
+    # 2 calls: first 128, then 72.
+    transport = _MockTransport(
+        [
+            (200, _fake_voyage_response(texts[:VOYAGE_BATCH])),
+            (200, _fake_voyage_response(texts[VOYAGE_BATCH:])),
+        ]
+    )
+    client = httpx.AsyncClient(transport=transport)
+
+    blobs = asyncio.run(voyage_embed(texts, client=client))
+    assert len(blobs) == 200
+    assert len(transport.calls) == 2
+
+
+def test_voyage_embed_retries_on_5xx(monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    texts = ["only"]
+    transport = _MockTransport(
+        [
+            (502, "bad gateway"),
+            (503, "still bad"),
+            (200, _fake_voyage_response(texts)),
+        ]
+    )
+    client = httpx.AsyncClient(transport=transport)
+
+    # Patch sleep to avoid real backoff delay.
+    with patch("plugins.memory.sqlite_vec.embed.asyncio.sleep", return_value=None):
+        blobs = asyncio.run(voyage_embed(texts, client=client))
+
+    assert len(blobs) == 1
+    assert len(transport.calls) == 3
+
+
+def test_voyage_embed_4xx_raises(monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    transport = _MockTransport([(401, "unauthorized")])
+    client = httpx.AsyncClient(transport=transport)
+    with pytest.raises(VoyageError):
+        asyncio.run(voyage_embed(["x"], client=client))
+
+
+def test_voyage_embed_missing_key(monkeypatch):
+    monkeypatch.delenv("VOYAGE_API_KEY", raising=False)
+    with pytest.raises(VoyageError, match="VOYAGE_API_KEY"):
+        asyncio.run(voyage_embed(["x"]))
+
+
+def test_voyage_embed_empty_input_no_call(monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    # No transport responses queued; if we make a call the test will explode.
+    transport = _MockTransport([])
+    client = httpx.AsyncClient(transport=transport)
+    blobs = asyncio.run(voyage_embed([], client=client))
+    assert blobs == []
+    assert len(transport.calls) == 0
+
+
+# ---------------------------------------------------------------------------
+# read_memory + bump_hits
+# ---------------------------------------------------------------------------
+
+
+def _seed_facts(db: sqlite3.Connection):
+    """Insert 3 facts at known created_at + int8 vectors that put 'beta' nearest to seed=51."""
+    rows = [
+        # fact text,   entity,         created_at,             vec seed
+        ("alpha",      "禮揚.工作",     "2026-04-01 09:00:00",   10),
+        ("beta",       "禮揚.家庭",     "2026-05-02 09:00:00",   50),
+        ("gamma",      None,           "2025-12-01 09:00:00",   90),
+        ("expired",    "禮揚.短期",     "2026-05-01 09:00:00",   50),
+    ]
+    for fact, entity, created_at, seed in rows:
+        db.execute(
+            "INSERT INTO semantic_facts(fact, entity, embedding, created_at, valid_to) "
+            "VALUES (?, ?, ?, ?, ?)",
+            (fact, entity, _vec(seed), created_at,
+             "2026-01-01" if fact == "expired" else None),
+        )
+    db.commit()
+
+
+def test_read_memory_orders_by_score(tmp_path, monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    db = init_db(tmp_path / "memory.db")
+    _seed_facts(db)
+
+    # Stub voyage_embed to return a fixed query vector close to seed=51.
+    async def fake_embed(texts, **kw):
+        assert len(texts) == 1
+        return [_vec(51)]
+
+    log_file = tmp_path / "memory.log"
+    with patch("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
+        facts = asyncio.run(read_memory("test query", db, k=8, log_path=log_file))
+
+    fact_texts = [f.fact for f in facts]
+    # 'expired' must be filtered (valid_to in past).
+    assert "expired" not in fact_texts
+    # 'beta' should rank first (closest vec, recent).
+    assert fact_texts[0] == "beta"
+    # All Fact fields populated.
+    assert all(isinstance(f, Fact) for f in facts)
+    assert all(f.score is not None and f.sim is not None for f in facts)
+    # Latency was logged.
+    assert log_file.exists()
+    log_line = log_file.read_text().strip().splitlines()[-1]
+    assert '"sql_ms"' in log_line and '"q": "test query"' in log_line
+
+
+def test_bump_hits_increments_and_swallows(tmp_path, monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    db = init_db(tmp_path / "memory.db")
+    _seed_facts(db)
+    ids = [r["id"] for r in db.execute("SELECT id FROM semantic_facts ORDER BY id").fetchall()]
+
+    asyncio.run(bump_hits(ids[:2], db))
+    rows = db.execute(
+        "SELECT id, hits, last_seen FROM semantic_facts ORDER BY id"
+    ).fetchall()
+    assert rows[0]["hits"] == 1 and rows[1]["hits"] == 1
+    assert rows[2]["hits"] == 0  # untouched
+    assert rows[0]["last_seen"] is not None
+
+    # Closed connection -> bump_hits must swallow the sqlite3.Error.
+    db.close()
+    asyncio.run(bump_hits(ids[:1], db))  # should not raise
+
+
+def test_bump_hits_empty_is_noop(tmp_path):
+    db = init_db(tmp_path / "memory.db")
+    # Should return immediately without touching the connection.
+    asyncio.run(bump_hits([], db))
+
+
+def test_format_facts_for_prompt_shape():
+    facts = [
+        Fact(id=1, fact="禮揚 likes 5x5", entity="禮揚.訓練",
+             created_at="2026-05-01", importance=2, sim=0.8, age_days=1.0, score=0.9),
+        Fact(id=2, fact="致妤生日 3/19", entity=None,
+             created_at="2026-04-01", importance=3, sim=0.7, age_days=30.0, score=0.6),
+    ]
+    out = format_facts_for_prompt(facts)
+    assert "[禮揚.訓練] 禮揚 likes 5x5" in out
+    assert "- 致妤生日 3/19" in out  # no entity prefix when None
+    assert format_facts_for_prompt([]) == ""

From 2f11c761e1254c5b97077c6ee72762a9503bdf3b Mon Sep 17 00:00:00 2001
From: Li-yang Chen <liyoungc@pm.me>
Date: Sat, 2 May 2026 11:58:42 +0000
Subject: [PATCH 03/11] feat(memory): import_md.py + cosine distance fix (W2-2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

scripts/import_md.py seeds semantic_facts from ~/.hermes/memories/MEMORY.md
per spec §6.1. Each "Topic: content §"-delimited entry maps to one
semantic_fact row with entity prefix "禮揚." plus a slug of the topic,
importance=2, valid_from=2026-05-10, valid_to=NULL. Hierarchical topics
like "Tools & Access > ProtonMail" become entity
"禮揚.tools_access.protonmail" so prefix queries still work.

Embeds in batches of 128 via Voyage 3.5-lite. Idempotent: pre-INSERT
(entity, fact) lookup skips duplicates so re-runs are safe. Wraps the
batch-insert in BEGIN/COMMIT and rolls back on embed failure so partial
imports never land. Supports --dry-run for preview and --commit for the
real write.

W1 schema fix bundled: vec0 column now declares distance_metric=cosine.
Without this, the default L2 distance on int8 vectors produces sim
values in the hundreds, breaking the 0.7*sim + 0.3*recency rerank
formula entirely. Verified end-to-end on chococlaw:

  Q: "我太太生日"          -> top hit "**生日**: 3/19"   sim=0.604  OK
  Q: "AI as digital twin" -> top hit "Think of AI as a digital twin"
                                                       sim=0.607  OK

Tests: 12 new cases for import_md (slugify simple/hierarchy/CJK/empty;
parse colon-missing/no-trailing-§; dry-run no-write; commit populates
vec_facts via trigger; idempotent re-run; partial update embeds only
new; rollback on embed failure leaves DB unchanged). 29/29 green
including W1 + W2-1.

Live import: 25 entries, 1 Voyage batch, all visible in semantic_facts
and vec_facts on chococlaw:/opt/data/memories/memory.db.

Refs liyoungc/hermes-memory#5
---
 plugins/memory/sqlite_vec/store.py |   2 +-
 scripts/import_md.py               | 307 +++++++++++++++++++++++++++++
 tests/scripts/__init__.py          |   0
 tests/scripts/test_import_md.py    | 210 ++++++++++++++++++++
 4 files changed, 518 insertions(+), 1 deletion(-)
 create mode 100755 scripts/import_md.py
 create mode 100644 tests/scripts/__init__.py
 create mode 100644 tests/scripts/test_import_md.py

diff --git a/plugins/memory/sqlite_vec/store.py b/plugins/memory/sqlite_vec/store.py
index e619b7ff4ec..cd975d1b9f9 100644
--- a/plugins/memory/sqlite_vec/store.py
+++ b/plugins/memory/sqlite_vec/store.py
@@ -21,7 +21,7 @@
 _VEC_VIRTUAL_TABLE_SQL = f"""
 CREATE VIRTUAL TABLE IF NOT EXISTS vec_facts USING vec0(
   fact_id INTEGER PRIMARY KEY,
-  embedding int8[{VEC_DIM}]
+  embedding int8[{VEC_DIM}] distance_metric=cosine
 );
 """
 
diff --git a/scripts/import_md.py b/scripts/import_md.py
new file mode 100755
index 00000000000..86743a5d8eb
--- /dev/null
+++ b/scripts/import_md.py
@@ -0,0 +1,307 @@
+#!/usr/bin/env python3
+"""Seed `semantic_facts` from a flat ``MEMORY.md`` (W2-2).
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §6.1.
+
+Format expected in ``~/.hermes/memories/MEMORY.md``::
+
+    Topic: content
+    §
+    Topic: another content
+    §
+
+Each entry becomes one row in ``semantic_facts``:
+
+    entity      = "禮揚." + slug(topic)   # "Working style"           -> "禮揚.working_style"
+                                          # "Tools & Access > Proton" -> "禮揚.tools_access.proton"
+    fact        = content (verbatim)
+    importance  = 2
+    valid_from  = '2026-05-10'
+    valid_to    = NULL
+
+Idempotent: re-running with the same input does not duplicate rows. The
+natural key is ``(entity, fact)`` and is enforced by a pre-INSERT lookup.
+
+Embeddings come from Voyage 3.5-lite via ``plugins.memory.sqlite_vec.embed``.
+The trigger ``sf_after_insert`` keeps ``vec_facts`` synced automatically, so
+this script writes only to ``semantic_facts``.
+
+Usage::
+
+    docker exec -w /opt/hermes hermes /opt/hermes/.venv/bin/python3 \
+        scripts/import_md.py --dry-run
+    docker exec -w /opt/hermes hermes /opt/hermes/.venv/bin/python3 \
+        scripts/import_md.py --commit
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import logging
+import re
+import sqlite3
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_MEMORY_MD = Path.home() / ".hermes" / "memories" / "MEMORY.md"
+DEFAULT_DB = Path("/opt/data") / "memories" / "memory.db"
+DEFAULT_VALID_FROM = "2026-05-10"  # spec §6.1
+DEFAULT_IMPORTANCE = 2
+DEFAULT_BATCH = 128
+ENTITY_PREFIX = "禮揚"
+ENTRY_SEPARATOR = re.compile(r"^§\s*$", re.MULTILINE)
+
+
+@dataclass
+class Entry:
+    """One parsed MEMORY.md entry."""
+
+    topic: str
+    fact: str
+
+    @property
+    def entity(self) -> str:
+        return f"{ENTITY_PREFIX}.{slugify_topic(self.topic)}"
+
+
+# ---------------------------------------------------------------------------
+# Parsing
+# ---------------------------------------------------------------------------
+
+
+def slugify_topic(topic: str) -> str:
+    """Convert a human topic label to a stable entity-suffix slug.
+
+    - Hierarchy markers ``>`` become ``.`` so prefix queries still work.
+    - Lowercase, ASCII alphanum kept; runs of other chars collapse to ``_``.
+    - CJK / unicode is preserved unchanged so 中文 topics stay readable.
+
+    Examples:
+        "Working style"                  -> "working_style"
+        "Tools & Access > ProtonMail"   -> "tools_access.protonmail"
+        "禮揚.家庭"                       -> "禮揚.家庭"  (already a slug, untouched)
+    """
+    parts = [p.strip() for p in topic.split(">")]
+    out_parts = []
+    for part in parts:
+        s = part.strip().lower()
+        # Collapse non-alphanum (including '&', spaces, punctuation) to underscore.
+        # CJK characters are unicode word chars in Python regex with re.UNICODE
+        # (default for str patterns), so [^\w] excludes them = preserved.
+        s = re.sub(r"[^\w]+", "_", s, flags=re.UNICODE)
+        s = s.strip("_")
+        if s:
+            out_parts.append(s)
+    return ".".join(out_parts) if out_parts else "unknown"
+
+
+def parse_memory_md(text: str) -> List[Entry]:
+    """Split MEMORY.md into Entry objects.
+
+    Skips empty blocks and blocks with no ``Topic: content`` colon. Keeps
+    multi-line content (rare today but possible if a future entry wraps).
+    """
+    entries: List[Entry] = []
+    for raw_block in ENTRY_SEPARATOR.split(text):
+        block = raw_block.strip()
+        if not block:
+            continue
+        if ":" not in block:
+            logger.warning("skipping malformed block (no colon): %r", block[:60])
+            continue
+        topic, _, content = block.partition(":")
+        topic = topic.strip()
+        content = content.strip()
+        if not topic or not content:
+            logger.warning("skipping empty topic or content: %r", block[:60])
+            continue
+        entries.append(Entry(topic=topic, fact=content))
+    return entries
+
+
+# ---------------------------------------------------------------------------
+# DB ops
+# ---------------------------------------------------------------------------
+
+
+def existing_keys(conn: sqlite3.Connection) -> set[Tuple[str, str]]:
+    """Return the (entity, fact) pairs already present, for idempotency."""
+    rows = conn.execute("SELECT entity, fact FROM semantic_facts").fetchall()
+    return {(r[0], r[1]) for r in rows}
+
+
+def insert_batch(
+    conn: sqlite3.Connection,
+    rows: List[Tuple[Entry, bytes]],
+    *,
+    valid_from: str,
+    importance: int,
+) -> int:
+    """Insert one batch of (entry, embedding) pairs. Returns count inserted."""
+    cur = conn.executemany(
+        """
+        INSERT INTO semantic_facts(entity, fact, embedding,
+                                   importance, valid_from, valid_to)
+        VALUES (?, ?, ?, ?, ?, NULL)
+        """,
+        [
+            (e.entity, e.fact, blob, importance, valid_from)
+            for e, blob in rows
+        ],
+    )
+    return cur.rowcount
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+async def import_memory_md(
+    *,
+    md_path: Path,
+    db_path: Path,
+    dry_run: bool,
+    valid_from: str = DEFAULT_VALID_FROM,
+    importance: int = DEFAULT_IMPORTANCE,
+    batch_size: int = DEFAULT_BATCH,
+    embed_fn=None,  # injectable for tests
+) -> dict:
+    """Run the full import.
+
+    Returns a summary dict: {parsed, new, skipped_dup, batches, dry_run}.
+    Does not return embeddings.
+    """
+    text = md_path.read_text(encoding="utf-8")
+    entries = parse_memory_md(text)
+
+    # Open DB and bootstrap if needed (idempotent — store.init_db handles that).
+    from plugins.memory.sqlite_vec.store import init_db
+    conn = init_db(db_path)
+
+    have = existing_keys(conn)
+    new_entries = [e for e in entries if (e.entity, e.fact) not in have]
+    skipped = len(entries) - len(new_entries)
+
+    if dry_run:
+        print(f"[dry-run] parsed={len(entries)} new={len(new_entries)} "
+              f"already_present={skipped}")
+        for e in new_entries[:10]:
+            print(f"  + ({e.entity}) {e.fact[:80]!r}")
+        if len(new_entries) > 10:
+            print(f"  … and {len(new_entries) - 10} more")
+        return {
+            "parsed": len(entries),
+            "new": len(new_entries),
+            "skipped_dup": skipped,
+            "batches": 0,
+            "dry_run": True,
+        }
+
+    if not new_entries:
+        print(f"nothing to import (parsed={len(entries)}, all present)")
+        return {
+            "parsed": len(entries),
+            "new": 0,
+            "skipped_dup": skipped,
+            "batches": 0,
+            "dry_run": False,
+        }
+
+    # Embed in batches; default uses real Voyage, tests inject a stub.
+    if embed_fn is None:
+        from plugins.memory.sqlite_vec.embed import voyage_embed
+        embed_fn = voyage_embed
+
+    inserted = 0
+    batches = 0
+    try:
+        conn.execute("BEGIN")
+        for i in range(0, len(new_entries), batch_size):
+            chunk = new_entries[i : i + batch_size]
+            blobs = await embed_fn([e.fact for e in chunk])
+            if len(blobs) != len(chunk):
+                raise RuntimeError(
+                    f"embed returned {len(blobs)} for {len(chunk)} inputs"
+                )
+            inserted += insert_batch(
+                conn,
+                list(zip(chunk, blobs)),
+                valid_from=valid_from,
+                importance=importance,
+            )
+            batches += 1
+        conn.commit()
+    except Exception:
+        conn.rollback()
+        raise
+
+    print(
+        f"imported {inserted} entries in {batches} "
+        f"batch{'es' if batches != 1 else ''} "
+        f"(parsed={len(entries)}, skipped_dup={skipped})"
+    )
+    return {
+        "parsed": len(entries),
+        "new": inserted,
+        "skipped_dup": skipped,
+        "batches": batches,
+        "dry_run": False,
+    }
+
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(description=__doc__.split("\n")[0])
+    p.add_argument(
+        "--memory-md",
+        type=Path,
+        default=DEFAULT_MEMORY_MD,
+        help="Path to MEMORY.md (default: ~/.hermes/memories/MEMORY.md)",
+    )
+    p.add_argument(
+        "--db",
+        type=Path,
+        default=DEFAULT_DB,
+        help="Path to memory.db (default: /opt/data/memories/memory.db inside container)",
+    )
+    g = p.add_mutually_exclusive_group(required=True)
+    g.add_argument("--dry-run", action="store_true", help="Show plan, do not write")
+    g.add_argument("--commit", action="store_true", help="Actually import")
+    p.add_argument("--valid-from", default=DEFAULT_VALID_FROM)
+    p.add_argument("--importance", type=int, default=DEFAULT_IMPORTANCE)
+    return p
+
+
+def main(argv: Optional[List[str]] = None) -> int:
+    logging.basicConfig(
+        level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
+    )
+    args = _build_arg_parser().parse_args(argv)
+
+    # Live import path: ensure VOYAGE_API_KEY is loaded from ~/.hermes/.env.
+    if args.commit:
+        try:
+            from hermes_cli.env_loader import load_hermes_dotenv
+            load_hermes_dotenv(hermes_home="/opt/data", project_env=None)
+        except ImportError:
+            pass  # tests / non-container contexts handle env themselves
+
+    summary = asyncio.run(
+        import_memory_md(
+            md_path=args.memory_md,
+            db_path=args.db,
+            dry_run=args.dry_run,
+            valid_from=args.valid_from,
+            importance=args.importance,
+        )
+    )
+    return 0 if summary["new"] >= 0 else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/scripts/__init__.py b/tests/scripts/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/scripts/test_import_md.py b/tests/scripts/test_import_md.py
new file mode 100644
index 00000000000..617b38f9c13
--- /dev/null
+++ b/tests/scripts/test_import_md.py
@@ -0,0 +1,210 @@
+"""Tests for ``scripts/import_md.py`` (W2-2 — MEMORY.md → semantic_facts).
+
+Uses a stub embed_fn so no network is hit; live integration is exercised
+end-to-end on chococlaw via the post-test ``--commit`` smoke run.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import struct
+from pathlib import Path
+
+import pytest
+
+from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
+from scripts.import_md import (
+    Entry,
+    import_memory_md,
+    parse_memory_md,
+    slugify_topic,
+)
+
+
+def _vec(seed: int) -> bytes:
+    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
+    return struct.pack(f"{VEC_DIM}b", *vals)
+
+
+# ---------------------------------------------------------------------------
+# Slugify
+# ---------------------------------------------------------------------------
+
+
+def test_slugify_simple():
+    assert slugify_topic("People") == "people"
+    assert slugify_topic("Working style") == "working_style"
+    assert slugify_topic("Privacy constraints") == "privacy_constraints"
+
+
+def test_slugify_hierarchy_uses_dot():
+    assert (
+        slugify_topic("Tools & Access > ProtonMail Access")
+        == "tools_access.protonmail_access"
+    )
+
+
+def test_slugify_preserves_cjk():
+    # CJK characters survive the punct->underscore collapse; only > is hierarchy.
+    assert slugify_topic("醫院 > 新樓") == "醫院.新樓"
+    assert slugify_topic("家庭 生活") == "家庭_生活"
+
+
+def test_slugify_handles_empty_or_punct_only():
+    assert slugify_topic("") == "unknown"
+    assert slugify_topic("!!!") == "unknown"
+
+
+# ---------------------------------------------------------------------------
+# Parsing
+# ---------------------------------------------------------------------------
+
+
+SAMPLE_MD = """People: 禮揚 — physician
+§
+Working style: digital twin model
+§
+Privacy constraints: never include real PHI
+§
+Tools & Access > ProtonMail: D4303@sinlau.org.tw
+§
+"""
+
+
+def test_parse_memory_md_basic():
+    entries = parse_memory_md(SAMPLE_MD)
+    assert len(entries) == 4
+    assert entries[0].topic == "People"
+    assert entries[0].fact == "禮揚 — physician"
+    assert entries[0].entity == "禮揚.people"
+    assert entries[3].entity == "禮揚.tools_access.protonmail"
+
+
+def test_parse_skips_blocks_without_colon():
+    md = "first entry: ok\n§\n\nno colon here\n§\nsecond: also ok\n§\n"
+    entries = parse_memory_md(md)
+    assert [e.topic for e in entries] == ["first entry", "second"]
+
+
+def test_parse_handles_no_trailing_separator():
+    md = "topic: content"
+    entries = parse_memory_md(md)
+    assert len(entries) == 1
+    assert entries[0].fact == "content"
+
+
+# ---------------------------------------------------------------------------
+# import_memory_md (with stub embed)
+# ---------------------------------------------------------------------------
+
+
+def _make_stub_embed():
+    counter = {"n": 0}
+
+    async def stub(texts):
+        counter["n"] += 1
+        return [_vec(i + 1) for i, _ in enumerate(texts)]
+
+    return stub, counter
+
+
+def test_dry_run_does_not_write(tmp_path):
+    md = tmp_path / "MEMORY.md"
+    md.write_text(SAMPLE_MD, encoding="utf-8")
+    db = tmp_path / "m.db"
+
+    summary = asyncio.run(
+        import_memory_md(md_path=md, db_path=db, dry_run=True)
+    )
+    assert summary == {
+        "parsed": 4, "new": 4, "skipped_dup": 0,
+        "batches": 0, "dry_run": True,
+    }
+    # DB still empty (init_db ran but no inserts).
+    conn = init_db(db)
+    [(count,)] = conn.execute("SELECT count(*) FROM semantic_facts").fetchall()
+    assert count == 0
+
+
+def test_commit_inserts_and_populates_vec_facts(tmp_path):
+    md = tmp_path / "MEMORY.md"
+    md.write_text(SAMPLE_MD, encoding="utf-8")
+    db = tmp_path / "m.db"
+    stub, counter = _make_stub_embed()
+
+    summary = asyncio.run(
+        import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub)
+    )
+    assert summary["new"] == 4
+    assert summary["batches"] == 1
+    assert counter["n"] == 1  # one Voyage call for 4 entries
+
+    conn = init_db(db)
+    rows = conn.execute(
+        "SELECT entity, fact, importance, valid_from, valid_to FROM semantic_facts ORDER BY id"
+    ).fetchall()
+    assert len(rows) == 4
+    assert rows[0]["entity"] == "禮揚.people"
+    assert rows[0]["importance"] == 2
+    assert rows[0]["valid_from"] == "2026-05-10"
+    assert rows[0]["valid_to"] is None
+
+    # Trigger sf_after_insert mirrored every row into vec_facts.
+    [(vec_count,)] = conn.execute("SELECT count(*) FROM vec_facts").fetchall()
+    assert vec_count == 4
+
+
+def test_idempotent_rerun_inserts_nothing_new(tmp_path):
+    md = tmp_path / "MEMORY.md"
+    md.write_text(SAMPLE_MD, encoding="utf-8")
+    db = tmp_path / "m.db"
+    stub, counter = _make_stub_embed()
+
+    asyncio.run(import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub))
+    assert counter["n"] == 1
+
+    summary2 = asyncio.run(
+        import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub)
+    )
+    assert summary2["new"] == 0
+    assert summary2["skipped_dup"] == 4
+    assert counter["n"] == 1  # second run made zero embed calls (no new rows)
+
+    conn = init_db(db)
+    [(count,)] = conn.execute("SELECT count(*) FROM semantic_facts").fetchall()
+    assert count == 4
+
+
+def test_partial_update_only_embeds_new(tmp_path):
+    md = tmp_path / "MEMORY.md"
+    md.write_text(SAMPLE_MD, encoding="utf-8")
+    db = tmp_path / "m.db"
+    stub, counter = _make_stub_embed()
+
+    asyncio.run(import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub))
+    assert counter["n"] == 1
+
+    md.write_text(SAMPLE_MD + "\nNew topic: brand new fact\n§\n", encoding="utf-8")
+    summary = asyncio.run(
+        import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub)
+    )
+    assert summary["new"] == 1
+    assert summary["skipped_dup"] == 4
+    assert counter["n"] == 2  # one extra call for the one new entry
+
+
+def test_rollback_on_embed_failure_leaves_db_unchanged(tmp_path):
+    md = tmp_path / "MEMORY.md"
+    md.write_text(SAMPLE_MD, encoding="utf-8")
+    db = tmp_path / "m.db"
+
+    async def failing(texts):
+        raise RuntimeError("voyage exploded")
+
+    with pytest.raises(RuntimeError, match="voyage exploded"):
+        asyncio.run(
+            import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=failing)
+        )
+    conn = init_db(db)
+    [(count,)] = conn.execute("SELECT count(*) FROM semantic_facts").fetchall()
+    assert count == 0  # transaction rolled back

From f162aa5a9ca396834116184019d07b426964c88f Mon Sep 17 00:00:00 2001
From: Li-yang Chen <liyoungc@pm.me>
Date: Sat, 2 May 2026 12:54:28 +0000
Subject: [PATCH 04/11] feat(memory): wire prefetch + sync_turn through plugin
 (W2-3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SqliteVecMemoryProvider.prefetch() now embeds the user message via
Voyage 3.5-lite, runs read_memory() (vec0 prefilter k=50, SQL CTE
rerank with cosine sim + 90-day half-life), and returns a markdown
block:

    ## Recent relevant memories
    - [entity.slug] fact text (importance: N, age: D days)
    ...

Activation is via config.yaml (memory.provider: sqlite_vec) — no env
var gate. Per spec §4 the persona files (SOUL.md, USER.md,
life-dimensions.md) stay in flat-file injection above this block; the
gateway's existing prompt assembler handles ordering.

Hits accounting (spec §4): retrieved fact IDs are stashed per
session_id. sync_turn() runs bump_hits() on the cached IDs *after* the
reply is delivered, so the UPDATE never sits on the user-facing
latency path. Errors are swallowed.

Async-in-sync bridge: the ABC's prefetch/sync_turn are sync, but the
gateway already owns the asyncio loop, so asyncio.run inline raises.
Solution is a worker thread with its own event loop and a 5s timeout
kill-switch. To make sqlite3 cross-thread access legal, the connection
opens with check_same_thread=False and self._lock serializes both
read_memory and bump_hits. open_db()/init_db() now take a keyword-only
check_same_thread param (default True; provider passes False).

format_facts_for_prompt() gained a with_meta=True flag that appends
"(importance: N, age: D days)" per fact, used by prefetch. /memdebug
will keep the compact (with_meta=False) form.

Tests: 6 new cases (markdown header, empty/trivial query no-op,
voyage error swallow, sync_turn bumps then clears cache, worker
timeout, with_meta format). 35/35 green including W1, W2-1, W2-2.

Live activation verified on chococlaw:

  config.yaml memory.provider: '' -> sqlite_vec
  docker compose restart gateway
  Memory provider 'sqlite_vec' registered (0 tools)
  sqlite_vec memory ready at /opt/data/memories/memory.db

End-to-end via MemoryManager.prefetch_all() against the real DB:
"我太太生日" returns the full 8-fact markdown block top-1 = "**生日**: 3/19".

Refs liyoungc/hermes-memory#6
---
 plugins/memory/sqlite_vec/__init__.py         | 141 +++++++++++++++---
 plugins/memory/sqlite_vec/read.py             |  14 +-
 plugins/memory/sqlite_vec/store.py            |  15 +-
 .../memory/test_sqlite_vec_provider.py        | 125 ++++++++++++++++
 4 files changed, 269 insertions(+), 26 deletions(-)

diff --git a/plugins/memory/sqlite_vec/__init__.py b/plugins/memory/sqlite_vec/__init__.py
index 71a76c08d4a..b8d6e5a4a73 100644
--- a/plugins/memory/sqlite_vec/__init__.py
+++ b/plugins/memory/sqlite_vec/__init__.py
@@ -1,34 +1,88 @@
 """Hermes V3 memory plugin — sqlite-vec store with two-tier (hot/cold) design.
 
-W1 scope (this commit): schema bootstrap + provider stub registering with
-the MemoryProvider ABC. Read path / write path / weekly promotion arrive in
-W2 and W3 per spec docs/superpowers/specs/2026-05-02-hermes-memory-design.md.
-
 Activate via $HERMES_HOME/config.yaml:
 
-  memory:
-    provider: sqlite_vec
+    memory:
+      provider: sqlite_vec
+
+Read path (W2-3): on each turn, ``prefetch(query)`` runs
+``read_memory()`` in a worker thread (the gateway already owns the main
+asyncio loop, so we can't ``asyncio.run`` inline) and returns a markdown
+block prefixed with ``## Recent relevant memories``. The retrieved fact
+IDs are cached per session and bumped via ``sync_turn()`` after the
+reply is sent, per spec §4 hits accounting.
+
+Write path (W3) is still a no-op here — ``sync_turn`` only bumps hits.
 """
 
 from __future__ import annotations
 
+import asyncio
 import logging
+import threading
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 
 from agent.memory_provider import MemoryProvider
 
+from .read import (
+    DEFAULT_K,
+    Fact,
+    bump_hits,
+    format_facts_for_prompt,
+    read_memory,
+)
 from .store import init_db
 
 logger = logging.getLogger(__name__)
 
+PREFETCH_TIMEOUT_S = 5.0  # Voyage typical 200-400ms; 5s is the kill-switch.
+RECALL_HEADER = "## Recent relevant memories"
+
 
 def _default_db_path(hermes_home: str) -> Path:
     return Path(hermes_home).expanduser() / "memories" / "memory.db"
 
 
+def _run_coro_in_thread(coro_factory, timeout: float):
+    """Run an async coroutine in a worker thread with its own event loop.
+
+    The hermes gateway runs its own asyncio loop, so ``asyncio.run`` from
+    this synchronous ABC method would raise "cannot be called from a
+    running event loop". We sidestep by spawning a dedicated thread with a
+    fresh loop, joining with a timeout. ``coro_factory`` is a zero-arg
+    callable that builds the coroutine inside the worker so the coroutine
+    is bound to the worker's loop.
+    """
+    box: Dict[str, Any] = {}
+
+    def runner():
+        loop = asyncio.new_event_loop()
+        try:
+            box["result"] = loop.run_until_complete(coro_factory())
+        except BaseException as exc:
+            box["error"] = exc
+        finally:
+            loop.close()
+
+    t = threading.Thread(target=runner, daemon=True, name="sqlite-vec-prefetch")
+    t.start()
+    t.join(timeout)
+    if t.is_alive():
+        raise TimeoutError(f"sqlite_vec worker exceeded {timeout}s")
+    if "error" in box:
+        raise box["error"]
+    return box.get("result")
+
+
 class SqliteVecMemoryProvider(MemoryProvider):
-    """Hermes V3 long-term memory provider (W1 = schema only)."""
+    """Hermes V3 long-term memory provider (W2-3 = read path live)."""
+
+    def __init__(self) -> None:
+        self._conn = None
+        self._db_path: Optional[Path] = None
+        self._last_fact_ids: Dict[str, List[int]] = {}
+        self._lock = threading.Lock()
 
     @property
     def name(self) -> str:
@@ -47,25 +101,78 @@ def initialize(self, session_id: str, **kwargs) -> None:
             from hermes_constants import get_hermes_home
             hermes_home = str(get_hermes_home())
         self._db_path = _default_db_path(hermes_home)
-        self._conn = init_db(self._db_path)
+        self._conn = init_db(self._db_path, check_same_thread=False)
         logger.info("sqlite_vec memory ready at %s", self._db_path)
 
     def system_prompt_block(self) -> str:
-        # W1: no system-prompt contribution. Persona stays in flat files
-        # (SOUL.md, USER.md, life-dimensions.md) handled by built-in memory.
+        # Persona stays in flat files (SOUL.md, USER.md, life-dimensions.md);
+        # the recall block is emitted from prefetch() per turn.
         return ""
 
     def prefetch(self, query: str, *, session_id: str = "") -> str:
-        # W2 will implement actual retrieval. Empty return for now keeps
-        # the plugin a no-op until we wire read_memory().
-        return ""
+        """Embed query, fetch top-k facts, format as a markdown block.
+
+        Returns "" on empty/trivial query, missing connection, or any
+        error (Voyage outage, rate limit, etc.) so the gateway never
+        blocks a reply on memory recall. Retrieved fact IDs are stashed
+        for the matching ``sync_turn()`` call to bump hits.
+        """
+        if not self._conn or not query or not query.strip():
+            return ""
+
+        conn = self._conn
+
+        db_lock = self._lock
 
-    def sync_turn(self, user: str, assistant: str, **kwargs) -> None:
-        # W3 will implement async write-back. No-op for W1.
-        return None
+        async def _do() -> List[Fact]:
+            with db_lock:
+                return await read_memory(query, conn, k=DEFAULT_K)
+
+        try:
+            facts = _run_coro_in_thread(_do, timeout=PREFETCH_TIMEOUT_S)
+        except Exception as exc:
+            logger.warning("sqlite_vec prefetch error: %s", exc)
+            return ""
+
+        if not facts:
+            return ""
+
+        with self._lock:
+            self._last_fact_ids[session_id] = [f.id for f in facts]
+
+        body = format_facts_for_prompt(facts, with_meta=True)
+        return f"{RECALL_HEADER}\n{body}"
+
+    def sync_turn(
+        self,
+        user_content: str,
+        assistant_content: str,
+        *,
+        session_id: str = "",
+    ) -> None:
+        """Bump hits on facts retrieved during the matching prefetch.
+
+        Per spec §4 this fires AFTER the reply is delivered, so it must
+        never raise. Errors are swallowed by ``bump_hits`` itself.
+        """
+        with self._lock:
+            ids = self._last_fact_ids.pop(session_id, [])
+        if not ids or not self._conn:
+            return
+        conn = self._conn
+
+        db_lock = self._lock
+
+        async def _do() -> None:
+            with db_lock:
+                await bump_hits(ids, conn)
+
+        try:
+            _run_coro_in_thread(_do, timeout=PREFETCH_TIMEOUT_S)
+        except Exception as exc:
+            logger.warning("sqlite_vec bump_hits worker error: %s", exc)
 
     def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        # No model-facing tools; memory is implicit.
         return []
 
     def handle_tool_call(self, tool_name: str, args: Dict[str, Any]) -> Any:
diff --git a/plugins/memory/sqlite_vec/read.py b/plugins/memory/sqlite_vec/read.py
index 5d3dc6c29a9..154ae14111f 100644
--- a/plugins/memory/sqlite_vec/read.py
+++ b/plugins/memory/sqlite_vec/read.py
@@ -142,16 +142,22 @@ async def bump_hits(fact_ids: Iterable[int], conn: sqlite3.Connection) -> None:
         logger.warning("bump_hits swallowed error for %d ids: %s", len(ids), exc)
 
 
-def format_facts_for_prompt(facts: List[Fact]) -> str:
+def format_facts_for_prompt(facts: List[Fact], *, with_meta: bool = False) -> str:
     """Render top-k facts as a markdown bullet list for system-prompt injection.
 
-    Used by SqliteVecMemoryProvider.prefetch() in W2-3. Compact, no header —
-    the surrounding prompt template owns the section title.
+    Used by SqliteVecMemoryProvider.prefetch() (with_meta=True per W2-3
+    spec) and /memdebug (with_meta=False for compact display).
+
+    No header — the caller owns the section title.
     """
     if not facts:
         return ""
     lines = []
     for f in facts:
         prefix = f"[{f.entity}] " if f.entity else ""
-        lines.append(f"- {prefix}{f.fact}")
+        suffix = (
+            f" (importance: {f.importance}, age: {int(f.age_days)} days)"
+            if with_meta else ""
+        )
+        lines.append(f"- {prefix}{f.fact}{suffix}")
     return "\n".join(lines)
diff --git a/plugins/memory/sqlite_vec/store.py b/plugins/memory/sqlite_vec/store.py
index cd975d1b9f9..97ec4c3e061 100644
--- a/plugins/memory/sqlite_vec/store.py
+++ b/plugins/memory/sqlite_vec/store.py
@@ -51,10 +51,15 @@
 """
 
 
-def open_db(db_path: Path) -> sqlite3.Connection:
-    """Open a sqlite connection with sqlite-vec extension loaded."""
+def open_db(db_path: Path, *, check_same_thread: bool = True) -> sqlite3.Connection:
+    """Open a sqlite connection with sqlite-vec extension loaded.
+
+    Pass ``check_same_thread=False`` when the connection will be reused
+    across threads (e.g. the provider's prefetch worker pool). Caller is
+    then responsible for serializing access via a lock.
+    """
     db_path.parent.mkdir(parents=True, exist_ok=True)
-    conn = sqlite3.connect(str(db_path))
+    conn = sqlite3.connect(str(db_path), check_same_thread=check_same_thread)
     conn.enable_load_extension(True)
     sqlite_vec.load(conn)
     conn.enable_load_extension(False)
@@ -70,8 +75,8 @@ def bootstrap_schema(conn: sqlite3.Connection) -> None:
     conn.commit()
 
 
-def init_db(db_path: Path) -> sqlite3.Connection:
+def init_db(db_path: Path, *, check_same_thread: bool = True) -> sqlite3.Connection:
     """Open + bootstrap. Returns a ready-to-use connection."""
-    conn = open_db(db_path)
+    conn = open_db(db_path, check_same_thread=check_same_thread)
     bootstrap_schema(conn)
     return conn
diff --git a/tests/plugins/memory/test_sqlite_vec_provider.py b/tests/plugins/memory/test_sqlite_vec_provider.py
index 42c631aee3e..9f8e8d438d1 100644
--- a/tests/plugins/memory/test_sqlite_vec_provider.py
+++ b/tests/plugins/memory/test_sqlite_vec_provider.py
@@ -429,3 +429,128 @@ def test_format_facts_for_prompt_shape():
     assert "[禮揚.訓練] 禮揚 likes 5x5" in out
     assert "- 致妤生日 3/19" in out  # no entity prefix when None
     assert format_facts_for_prompt([]) == ""
+
+
+
+# ===========================================================================
+# W2-3: prefetch + sync_turn wiring
+# ===========================================================================
+
+from unittest.mock import patch as _patch_w23
+
+from plugins.memory.sqlite_vec import (
+    PREFETCH_TIMEOUT_S,
+    RECALL_HEADER,
+    SqliteVecMemoryProvider,
+    _run_coro_in_thread,
+)
+
+
+def _stubbed_provider(tmp_path, monkeypatch, query_seed: int = 51):
+    """Build a provider with a real DB, real conn, but stubbed Voyage."""
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    p = SqliteVecMemoryProvider()
+    p.initialize(session_id="t", hermes_home=str(tmp_path))
+    # Seed 3 facts via the same trigger-driven pipeline used in production.
+    for fact, ent, ts, seed in [
+        ("alpha", "禮揚.工作", "2026-04-01 09:00:00", 10),
+        ("beta",  "禮揚.家庭", "2026-05-02 09:00:00", 50),
+        ("gamma", None,        "2025-12-01 09:00:00", 90),
+    ]:
+        p._conn.execute(
+            "INSERT INTO semantic_facts(fact, entity, embedding, created_at) VALUES (?,?,?,?)",
+            (fact, ent, _vec(seed), ts),
+        )
+    p._conn.commit()
+
+    async def fake_embed(texts, **kw):
+        return [_vec(query_seed) for _ in texts]
+
+    return p, fake_embed
+
+
+def test_prefetch_returns_markdown_with_header(tmp_path, monkeypatch):
+    p, fake_embed = _stubbed_provider(tmp_path, monkeypatch)
+    with _patch_w23("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
+        out = p.prefetch("when does my wife arrive home", session_id="s1")
+    assert out.startswith(RECALL_HEADER + "\n")
+    # Top fact 'beta' (seed=50) is closest to query (seed=51).
+    assert "beta" in out
+    # with_meta=True format includes importance + age.
+    assert "(importance:" in out and "days)" in out
+    # Fact ids cached for sync_turn to bump.
+    assert p._last_fact_ids["s1"]
+    p.shutdown()
+
+
+def test_prefetch_empty_query_no_op(tmp_path, monkeypatch):
+    p, fake_embed = _stubbed_provider(tmp_path, monkeypatch)
+    # No patch needed — should short-circuit before voyage_embed is reached.
+    assert p.prefetch("", session_id="s1") == ""
+    assert p.prefetch("   ", session_id="s1") == ""
+    assert "s1" not in p._last_fact_ids
+    p.shutdown()
+
+
+def test_prefetch_swallows_voyage_error(tmp_path, monkeypatch):
+    p, _ = _stubbed_provider(tmp_path, monkeypatch)
+
+    async def raise_embed(texts, **kw):
+        raise RuntimeError("voyage 503")
+
+    with _patch_w23("plugins.memory.sqlite_vec.read.voyage_embed", raise_embed):
+        out = p.prefetch("anything", session_id="s1")
+    assert out == ""  # Reply is never blocked on memory-recall failure.
+    assert "s1" not in p._last_fact_ids
+    p.shutdown()
+
+
+def test_sync_turn_bumps_hits_then_clears_cache(tmp_path, monkeypatch):
+    p, fake_embed = _stubbed_provider(tmp_path, monkeypatch)
+    with _patch_w23("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
+        p.prefetch("query", session_id="s1")
+    cached_ids = list(p._last_fact_ids["s1"])
+    assert cached_ids
+
+    p.sync_turn("user said hi", "asst replied", session_id="s1")
+    # Cache cleared
+    assert "s1" not in p._last_fact_ids
+    # Hits incremented for exactly the cached IDs.
+    placeholders = ",".join("?" * len(cached_ids))
+    rows = p._conn.execute(
+        f"SELECT id, hits FROM semantic_facts WHERE id IN ({placeholders}) ORDER BY id",
+        cached_ids,
+    ).fetchall()
+    assert all(r["hits"] == 1 for r in rows), [(r["id"], r["hits"]) for r in rows]
+
+    # Second sync_turn for same session is a no-op (cache empty).
+    p.sync_turn("u", "a", session_id="s1")
+    rows2 = p._conn.execute(
+        f"SELECT hits FROM semantic_facts WHERE id IN ({placeholders})", cached_ids
+    ).fetchall()
+    assert all(r["hits"] == 1 for r in rows2)
+    p.shutdown()
+
+
+def test_run_coro_in_thread_timeout():
+    import asyncio as _asyncio
+
+    async def slow():
+        await _asyncio.sleep(2.0)
+        return "ok"
+
+    import pytest
+    with pytest.raises(TimeoutError):
+        _run_coro_in_thread(slow, timeout=0.05)
+
+
+def test_format_with_meta_shape():
+    facts = [
+        Fact(id=1, fact="致妤生日 3/19", entity="禮揚.家庭",
+             created_at="2026-05-01", importance=3, sim=0.7,
+             age_days=5.4, score=0.6),
+    ]
+    out = format_facts_for_prompt(facts, with_meta=True)
+    assert "(importance: 3, age: 5 days)" in out
+    out_compact = format_facts_for_prompt(facts, with_meta=False)
+    assert "importance" not in out_compact

From 89df8dc0e23bab4c53edabd0d9117a5a9bc42e01 Mon Sep 17 00:00:00 2001
From: Li-yang Chen <liyoungc@pm.me>
Date: Sat, 2 May 2026 13:07:07 +0000
Subject: [PATCH 05/11] feat(memory): /memdebug slash command + LOG_PATH fix
 (W2-4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

plugins/memdebug/ is a standalone plugin that registers the /memdebug
slash command via the hermes-agent ctx.register_command() surface.
Memory plugins live in plugins/memory/ and load through the exclusive
loader, which doesn't pass through the slash-command registry — keeping
/memdebug separate is the cleanest split.

Behaviour (spec §7.2):

  /memdebug                  -> short usage help
  /memdebug <query>          -> top-8 from semantic_facts with
                                score + sim + age + importance breakdown
  /memdebug rawsearch <query> -> substring scan of episodes (forensics)

Each invocation logs to ~/.hermes/logs/memory.log as a JSON line so the
F2 monitoring path (% top-1 hits judged useful) can aggregate weekly.

Reaction logging deferred: the issue acceptance criterion calls for
👍/👎 reaction prompts on the embed message, but Discord-native rich
embeds + reaction collectors require gateway-side plumbing
(gateway/platforms/discord.py) that the spec §8 marks as iterate-after-W2
work. v1 emits a textual "React 👍/👎 to flag this retrieval." cue and
relies on manual user reactions for now.

LOG_PATH bug fix bundled: both this plugin and plugins/memory/sqlite_vec/
were resolving the log path via Path.home(), which inside the hermes
container resolves to /home/hermes — not the /opt/data mount. Switched
to hermes_constants.get_hermes_home() so logs land in the mounted
~/.hermes/logs/memory.log on the host. Confirmed live:

  $ tail -2 ~/.hermes/logs/memory.log
  {"ts": "2026-05-02T13:06:17", "q": "今晚晚餐", "k": 8, "n": 8, "sql_ms": 2.81}
  {"ts": "2026-05-02T13:06:17", "cmd": "memdebug", "q": "今晚晚餐", "n": 8, "ids": [...]}

Also fixed a Python default-arg gotcha: _open_memory_db(path=DEFAULT_DB)
bound DEFAULT_DB at def-time so monkeypatching the module global didn't
take effect. Switched to lazy lookup (path = path or DEFAULT_DB).

Tests: 10 new for memdebug (truncate, help/empty/rawsearch-no-arg,
semantic with score breakdown, db-missing friendly message,
rawsearch finds substring, rawsearch empty, sync entry-point dispatch,
register() wires the right name + handler shape). 45/45 green
including W1, W2-1, W2-2, W2-3.

Live verification on chococlaw:

  /memdebug              -> help text
  /memdebug 我太太生日   -> top-1 = "**生日**: 3/19" (sim=0.604)
  /memdebug rawsearch 致妤 -> "Episodes are written by W3" (placeholder)

Refs liyoungc/hermes-memory#7
---
 plugins/memdebug/__init__.py      | 225 ++++++++++++++++++++++++++++++
 plugins/memdebug/plugin.yaml      |   4 +
 plugins/memory/sqlite_vec/read.py |  14 +-
 tests/plugins/test_memdebug.py    | 175 +++++++++++++++++++++++
 4 files changed, 417 insertions(+), 1 deletion(-)
 create mode 100644 plugins/memdebug/__init__.py
 create mode 100644 plugins/memdebug/plugin.yaml
 create mode 100644 tests/plugins/test_memdebug.py

diff --git a/plugins/memdebug/__init__.py b/plugins/memdebug/__init__.py
new file mode 100644
index 00000000000..2030192a1ee
--- /dev/null
+++ b/plugins/memdebug/__init__.py
@@ -0,0 +1,225 @@
+"""``/memdebug`` Discord slash command — read-only retrieval diagnostic (W2-4).
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §7.2.
+
+Usage in chat:
+
+    /memdebug <query>            -> top-8 from semantic_facts (curated)
+    /memdebug rawsearch <query>  -> top-8 from episodes (raw turns, forensics)
+
+The handler intentionally returns plain markdown text (not a Discord
+embed): hermes-agent's ``register_command()`` surface is platform-neutral
+and dispatches the same string to CLI / gateway / Slack.
+
+The ``rich-embed + 👍/👎 reaction buttons`` mode is open spec §8 work — we
+ship the read-only diagnostic now so the F2 monitoring path (% of
+top-1 hits judged useful) is unblocked. For v1, encourage the user
+to react with 👍/👎 emoji on this message; a future cron will scrape
+those reactions from the channel.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import sqlite3
+import time
+from pathlib import Path
+from typing import List, Optional
+
+logger = logging.getLogger(__name__)
+
+def _resolve_hermes_home() -> Path:
+    """Use HERMES_HOME (set by hermes_constants) when available; else ~/.hermes."""
+    try:
+        from hermes_constants import get_hermes_home
+        return Path(get_hermes_home())
+    except Exception:
+        return Path.home() / ".hermes"
+
+
+_HERMES_HOME = _resolve_hermes_home()
+DEFAULT_DB = _HERMES_HOME / "memories" / "memory.db"
+DEFAULT_K = 8
+LOG_PATH = _HERMES_HOME / "logs" / "memory.log"
+
+
+def _format_facts_block(facts) -> str:
+    lines = ["**🧠 /memdebug** — top {} from `semantic_facts`\n".format(len(facts))]
+    for i, f in enumerate(facts, start=1):
+        recency = max(0.0, 1.0 - f.age_days / 365.0)  # display-only;rerank weight uses 90-day half-life
+        lines.append(
+            f"`{i}.` **[{f.entity or '—'}]** {_truncate(f.fact, 90)}\n"
+            f"     score=`{f.score:.3f}` sim=`{f.sim:.3f}` "
+            f"age=`{int(f.age_days)}d` importance=`{f.importance}`"
+        )
+    lines.append("\n_React 👍/👎 to flag this retrieval._")
+    return "\n".join(lines)
+
+
+def _truncate(s: str, n: int) -> str:
+    s = s.replace("\n", " ")
+    return s if len(s) <= n else s[: n - 1] + "…"
+
+
+def _format_episodes_block(rows: List[sqlite3.Row]) -> str:
+    if not rows:
+        return (
+            "**🧠 /memdebug rawsearch** — `episodes` table is empty.\n\n"
+            "Episodes are written by W3 (per-turn write-back). After W3 "
+            "ships, this command will surface the raw conversation turns "
+            "behind any retrieval."
+        )
+    lines = ["**🧠 /memdebug rawsearch** — top {} from `episodes`\n".format(len(rows))]
+    for i, r in enumerate(rows, start=1):
+        lines.append(
+            f"`{i}.` `[{r['ts']}]` `{r['channel']}/{r['role']}` "
+            f"{_truncate(r['text'], 120)}"
+        )
+    return "\n".join(lines)
+
+
+def _append_log(payload: dict) -> None:
+    """Append a /memdebug invocation to ~/.hermes/logs/memory.log."""
+    import json
+    try:
+        LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
+        with LOG_PATH.open("a", encoding="utf-8") as f:
+            f.write(json.dumps(payload, ensure_ascii=False) + "\n")
+    except OSError as exc:
+        logger.warning("memory.log write failed: %s", exc)
+
+
+def _open_memory_db(path: Optional[Path] = None) -> Optional[sqlite3.Connection]:
+    """Open the sqlite_vec memory.db. Returns None if it doesn't exist yet."""
+    path = path or DEFAULT_DB
+    if not path.exists():
+        return None
+    from plugins.memory.sqlite_vec.store import open_db
+    return open_db(path, check_same_thread=False)
+
+
+async def _do_semantic(query: str) -> str:
+    from plugins.memory.sqlite_vec.read import read_memory
+
+    conn = _open_memory_db()
+    if not conn:
+        return (
+            "**🧠 /memdebug** — memory database not yet initialised.\n\n"
+            f"Expected at `{DEFAULT_DB}`. Run `scripts/import_md.py --commit` "
+            "or wait for the first agent turn after W2-3 cutover."
+        )
+    try:
+        facts = await read_memory(query, conn, k=DEFAULT_K)
+    finally:
+        conn.close()
+    if not facts:
+        return f"**🧠 /memdebug** — no facts matched `{_truncate(query, 60)}`."
+    _append_log({
+        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+        "cmd": "memdebug",
+        "q": query,
+        "n": len(facts),
+        "ids": [f.id for f in facts],
+    })
+    return _format_facts_block(facts)
+
+
+async def _do_rawsearch(query: str) -> str:
+    """Substring scan of episodes.text. No vector query — this is forensics
+    mode for 'did this conversation happen', not semantic recall."""
+    conn = _open_memory_db()
+    if not conn:
+        return (
+            "**🧠 /memdebug rawsearch** — memory database not yet initialised."
+        )
+    try:
+        like = f"%{query}%"
+        rows = conn.execute(
+            "SELECT ts, channel, role, text FROM episodes "
+            "WHERE text LIKE ? ORDER BY ts DESC LIMIT ?",
+            (like, DEFAULT_K),
+        ).fetchall()
+    finally:
+        conn.close()
+    _append_log({
+        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+        "cmd": "memdebug-raw",
+        "q": query,
+        "n": len(rows),
+    })
+    return _format_episodes_block(rows)
+
+
+HELP_TEXT = (
+    "**/memdebug** — inspect what `read_memory` would return.\n"
+    "Usage:\n"
+    "  `/memdebug <query>` — top-8 from `semantic_facts` (curated)\n"
+    "  `/memdebug rawsearch <query>` — substring scan of `episodes` (forensics)\n"
+)
+
+
+async def _handle_async(raw_args: str) -> str:
+    args = (raw_args or "").strip()
+    if not args:
+        return HELP_TEXT
+    if args.lower().startswith("rawsearch"):
+        rest = args[len("rawsearch"):].strip()
+        if not rest:
+            return HELP_TEXT
+        try:
+            return await _do_rawsearch(rest)
+        except Exception as exc:
+            logger.exception("memdebug rawsearch failed")
+            return f"**/memdebug rawsearch** error: `{exc}`"
+    try:
+        return await _do_semantic(args)
+    except Exception as exc:
+        logger.exception("memdebug semantic failed")
+        return f"**/memdebug** error: `{exc}`"
+
+
+def _handle_memdebug(raw_args: str) -> str:
+    """Sync entry point. PluginContext.register_command supports async
+    handlers natively, but ours is dispatched on either pathway, so we
+    bridge via asyncio.run when no loop is running."""
+    coro = _handle_async(raw_args)
+    try:
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        loop = None
+    if loop is None:
+        return asyncio.run(coro)
+    # Already in a running loop — schedule and wait via a worker thread.
+    import threading
+    import concurrent.futures
+    box = {}
+
+    def runner():
+        try:
+            box["r"] = asyncio.run(coro)
+        except BaseException as exc:
+            box["e"] = exc
+
+    t = threading.Thread(target=runner, daemon=True, name="memdebug-handler")
+    t.start()
+    t.join(timeout=15.0)
+    if t.is_alive():
+        return "**/memdebug** timed out (>15s)."
+    if "e" in box:
+        return f"**/memdebug** error: `{box['e']}`"
+    return box.get("r", HELP_TEXT)
+
+
+# ---------------------------------------------------------------------------
+# Plugin registration
+# ---------------------------------------------------------------------------
+
+
+def register(ctx) -> None:
+    ctx.register_command(
+        "memdebug",
+        handler=_handle_memdebug,
+        description="Inspect Hermes long-term memory retrieval (top-8 + scores).",
+        args_hint="<query> | rawsearch <query>",
+    )
diff --git a/plugins/memdebug/plugin.yaml b/plugins/memdebug/plugin.yaml
new file mode 100644
index 00000000000..1945104cff6
--- /dev/null
+++ b/plugins/memdebug/plugin.yaml
@@ -0,0 +1,4 @@
+name: memdebug
+version: 0.1.0
+description: "/memdebug — inspect Hermes long-term memory retrieval. Read-only diagnostic for the sqlite_vec memory plugin (W2-4)."
+author: "Li-yang Chen"
diff --git a/plugins/memory/sqlite_vec/read.py b/plugins/memory/sqlite_vec/read.py
index 154ae14111f..05a7e5b66d9 100644
--- a/plugins/memory/sqlite_vec/read.py
+++ b/plugins/memory/sqlite_vec/read.py
@@ -29,7 +29,19 @@
 
 DEFAULT_K = 8
 PREFILTER_K = 50
-DEFAULT_LOG_PATH = Path.home() / ".hermes" / "logs" / "memory.log"
+
+
+def _default_log_path() -> Path:
+    """Resolve the memory.log path lazily so HERMES_HOME (e.g. /opt/data
+    inside the container) wins over the worker thread's Path.home()."""
+    try:
+        from hermes_constants import get_hermes_home
+        return Path(get_hermes_home()) / "logs" / "memory.log"
+    except Exception:
+        return Path.home() / ".hermes" / "logs" / "memory.log"
+
+
+DEFAULT_LOG_PATH = _default_log_path()
 
 # Spec §4 — SQL is locked. Do not edit weights without updating the spec
 # and re-running the B1 worked example.
diff --git a/tests/plugins/test_memdebug.py b/tests/plugins/test_memdebug.py
new file mode 100644
index 00000000000..65380e271c4
--- /dev/null
+++ b/tests/plugins/test_memdebug.py
@@ -0,0 +1,175 @@
+"""Tests for plugins/memdebug/ — /memdebug slash command (W2-4)."""
+
+from __future__ import annotations
+
+import asyncio
+import struct
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
+from plugins.memdebug import (
+    HELP_TEXT,
+    _do_rawsearch,
+    _do_semantic,
+    _format_facts_block,
+    _handle_async,
+    _handle_memdebug,
+    _truncate,
+)
+
+
+def _vec(seed: int) -> bytes:
+    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
+    return struct.pack(f"{VEC_DIM}b", *vals)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def test_truncate_short_string_unchanged():
+    assert _truncate("abc", 10) == "abc"
+
+
+def test_truncate_long_string_ellipsis():
+    out = _truncate("a" * 100, 10)
+    assert out.endswith("…") and len(out) == 10
+
+
+# ---------------------------------------------------------------------------
+# Help / empty / unknown args
+# ---------------------------------------------------------------------------
+
+
+def test_handle_empty_returns_help():
+    assert _handle_memdebug("") == HELP_TEXT
+    assert _handle_memdebug("   ") == HELP_TEXT
+
+
+def test_handle_rawsearch_empty_returns_help():
+    assert _handle_memdebug("rawsearch") == HELP_TEXT
+    assert _handle_memdebug("rawsearch   ") == HELP_TEXT
+
+
+# ---------------------------------------------------------------------------
+# Semantic / rawsearch via direct async helpers (so we control DB path)
+# ---------------------------------------------------------------------------
+
+
+def _seed_db(tmp_path):
+    """Seed a fixture memory.db on tmp_path and return its path."""
+    db_path = tmp_path / "memories" / "memory.db"
+    conn = init_db(db_path)
+    conn.execute(
+        "INSERT INTO semantic_facts(fact, entity, embedding, created_at) VALUES (?,?,?,?)",
+        ("致妤生日 3/19", "禮揚.家庭", _vec(50), "2026-05-02 09:00:00"),
+    )
+    conn.execute(
+        "INSERT INTO semantic_facts(fact, entity, embedding, created_at) VALUES (?,?,?,?)",
+        ("AI as digital twin", "禮揚.工作", _vec(60), "2026-05-01 09:00:00"),
+    )
+    conn.execute(
+        "INSERT INTO episodes(ts, channel, external_id, role, text) "
+        "VALUES (?, ?, ?, ?, ?)",
+        ("2026-05-02 17:00:00", "cattia", "msg-1", "user", "晚餐幾點開"),
+    )
+    conn.commit()
+    conn.close()
+    return db_path
+
+
+def test_do_semantic_returns_score_breakdown(tmp_path, monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    db_path = _seed_db(tmp_path)
+
+    async def fake_embed(texts, **kw):
+        return [_vec(51) for _ in texts]
+
+    with patch("plugins.memdebug.DEFAULT_DB", db_path), \
+         patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"), \
+         patch("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
+        out = asyncio.run(_do_semantic("when does my wife get home"))
+
+    assert "/memdebug" in out
+    assert "致妤生日 3/19" in out  # closest fact
+    # Score breakdown labels present.
+    assert "score=" in out and "sim=" in out and "age=" in out
+    # Reaction prompt present (until rich-embed UX lands).
+    assert "👍" in out and "👎" in out
+    # Log line written.
+    log_path = tmp_path / "memory.log"
+    assert log_path.exists()
+    last_line = log_path.read_text().strip().splitlines()[-1]
+    assert '"cmd": "memdebug"' in last_line
+
+
+def test_do_semantic_db_missing_returns_friendly_message(tmp_path, monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    missing = tmp_path / "absent.db"
+    with patch("plugins.memdebug.DEFAULT_DB", missing):
+        out = asyncio.run(_do_semantic("anything"))
+    assert "not yet initialised" in out
+
+
+def test_do_rawsearch_finds_substring(tmp_path):
+    db_path = _seed_db(tmp_path)
+    with patch("plugins.memdebug.DEFAULT_DB", db_path), \
+         patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"):
+        out = asyncio.run(_do_rawsearch("晚餐"))
+    assert "rawsearch" in out
+    assert "晚餐幾點開" in out
+    assert "cattia/user" in out
+
+
+def test_do_rawsearch_empty_episodes_message(tmp_path):
+    db_path = tmp_path / "memories" / "memory.db"
+    init_db(db_path).close()  # bootstrap schema, no rows
+    with patch("plugins.memdebug.DEFAULT_DB", db_path), \
+         patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"):
+        out = asyncio.run(_do_rawsearch("anything"))
+    assert "rawsearch" in out
+    assert "Episodes are written by W3" in out
+
+
+# ---------------------------------------------------------------------------
+# Sync entry point + register()
+# ---------------------------------------------------------------------------
+
+
+def test_handle_memdebug_sync_dispatches_semantic(tmp_path, monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    db_path = _seed_db(tmp_path)
+
+    async def fake_embed(texts, **kw):
+        return [_vec(51) for _ in texts]
+
+    with patch("plugins.memdebug.DEFAULT_DB", db_path), \
+         patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"), \
+         patch("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
+        out = _handle_memdebug("when does my wife get home")
+    assert "致妤生日" in out
+
+
+def test_register_calls_register_command():
+    """register(ctx) must call ctx.register_command with the right name."""
+    from plugins.memdebug import register
+
+    captured = {}
+
+    class FakeCtx:
+        def register_command(self, name, handler, description="", args_hint=""):
+            captured["name"] = name
+            captured["handler"] = handler
+            captured["args_hint"] = args_hint
+            captured["description"] = description
+
+    register(FakeCtx())
+    assert captured["name"] == "memdebug"
+    assert captured["args_hint"] == "<query> | rawsearch <query>"
+    assert callable(captured["handler"])
+    # The handler must accept a single positional argument (raw_args).
+    assert captured["handler"].__code__.co_argcount == 1

From 2edf8c9afbc9acf2f425e59aa1e76d35e784b4e2 Mon Sep 17 00:00:00 2001
From: Li-yang Chen <liyoungc@pm.me>
Date: Sat, 2 May 2026 13:22:03 +0000
Subject: [PATCH 06/11] feat(memory): kimi_extract + EXTRACT_PROMPT (W3-1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

plugins/memory/sqlite_vec/extract.py implements the per-turn
extraction stage of the write path.

EXTRACT_PROMPT is a verbatim copy of spec §5.2 (HARD RULES 1-4 +
JSON shape contract); paraphrasing here would compromise the F2
monitoring contract that downstream weekly review depends on.

PHI_BLACKLIST_CHANNELS = {"cmio", "cbme", "medicine"} short-circuits
to [] before any network call so hospital data never round-trips
through synthetic.new.

kimi_extract(user, assistant, channel, ts) calls Kimi K2.5 via
synthetic.new's OpenAI-compatible endpoint with temperature=0.1,
response_format=json_object, max_tokens=1024. Token usage is logged
to ~/.hermes/logs/memory.log so weekly review can spot a runaway
extract budget.

JSON parser is intentionally tolerant: in live testing Kimi K2.5
returned three different shapes for the same prompt at temperature=0.1:
  1. bare list           [{...}]
  2. wrapped object      {"analysis": "...", "extracted_memories": [...]}
  3. flat single fact    {"type":"episodic","text":"...","entity":...}
_parse_json_list() handles all three, falls back to the first
list-valued field, and detects single-fact dicts by canonical key
presence.

Credential resolution: SYNTHETIC_API_KEY env var first (test override),
then auth.json's credential_pool["custom:synthetic"] (canonical key on
chococlaw). Older / alternate layouts (credential_pools, top-level)
also accepted for resilience.

Coercion drops malformed rows (bad type / blank text / unparseable
importance), clamps importance to 1-5, and validates entity / valid_to_hint
types. Only well-formed facts reach the caller.

Tests: 22 cases (prompt verbatim assertions, PHI blacklist (3),
parser shapes (5), coercion (3), short-circuits (2), mocked
synthetic.new full flow (5), error paths (2), auth.json round-trip).
213/213 green across all memory + scripts tests.

Live smoke test on chococlaw against real synthetic.new + Kimi K2.5:

  pleasantry  ("好的")        -> 0 facts ✓
  long-lived  ("追 sleep RCT") -> 1 fact (semantic, 禮揚.研究興趣) ✓
  phi-channel ("cmio")        -> 0 facts (short-circuit) ✓
  short-lived ("致妤 7:30")    -> 0 facts ⚠ (Kimi judges "about 致妤,
                                             not about 禮揚")

The short-lived miss is a spec-level prompt issue, not an extract.py
bug — the prompt says "memories about 禮揚" and Kimi reads that
strictly. Spec §4.1's B1 acceptance example expects this turn to
extract; matching B1 will require a spec edit (e.g. clarifying
"about 禮揚 includes 禮揚's life context"). W3-3 weekly_promotion
runs a separate thinking-mode Kimi pass over a week of episodes,
which is the spec's intended catch for hot-path misses.

Refs liyoungc/hermes-memory#8
---
 plugins/memory/sqlite_vec/extract.py | 299 ++++++++++++++++++++++
 tests/plugins/memory/test_extract.py | 363 +++++++++++++++++++++++++++
 2 files changed, 662 insertions(+)
 create mode 100644 plugins/memory/sqlite_vec/extract.py
 create mode 100644 tests/plugins/memory/test_extract.py

diff --git a/plugins/memory/sqlite_vec/extract.py b/plugins/memory/sqlite_vec/extract.py
new file mode 100644
index 00000000000..caeffb1e245
--- /dev/null
+++ b/plugins/memory/sqlite_vec/extract.py
@@ -0,0 +1,299 @@
+"""Kimi-driven extraction from a single Discord turn (W3-1).
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §5.2.
+
+The ``EXTRACT_PROMPT`` constant is **verbatim** from the spec — do not
+paraphrase. Drift here directly compromises the F2 monitoring path
+(downstream weekly review will see noise).
+
+Two-stage flow:
+
+    1. Caller calls ``kimi_extract(user, assistant, channel, ts)``.
+    2. We short-circuit to ``[]`` if ``channel`` is in
+       ``PHI_BLACKLIST_CHANNELS`` — never round-trip hospital data
+       through the cloud LLM.
+    3. Otherwise we POST to synthetic.new'\\''s OpenAI-compatible
+       chat-completions endpoint with ``temperature=0.1`` and
+       ``response_format=json_object`` (Kimi K2.5 supports the OpenAI
+       structured-output flag).
+    4. Parse the JSON list, validate the per-item shape, return
+       ``list[ExtractedFact]``. Bad rows are dropped, not fatal.
+
+Token cost is logged to ``memory.log`` so weekly review can spot a
+runaway extract budget.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import List, Optional
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+# Spec §1.4 lock — Kimi K2.5 via synthetic.new.
+SYNTHETIC_URL = "https://api.synthetic.new/v1/chat/completions"
+EXTRACT_MODEL = "hf:moonshotai/Kimi-K2.5"
+EXTRACT_TEMPERATURE = 0.1
+EXTRACT_TIMEOUT = 30.0
+EXTRACT_MAX_TOKENS = 1024  # extract output is a small JSON list
+
+# Spec §5.1 — channels whose content never leaves the host as PHI.
+PHI_BLACKLIST_CHANNELS = frozenset({"cmio", "cbme", "medicine"})
+
+# Spec §5.2 EXTRACT_PROMPT — copy verbatim. The {placeholders} are
+# substituted at call time.
+EXTRACT_PROMPT = """You extract durable memories about 禮揚 from this Discord turn.
+Output a JSON list. Empty list [] if nothing memorable.
+
+HARD RULES — these override everything else:
+1. NEVER extract: hospital data, patient names, 病歷號, 身分證字號, lab results,
+   diagnoses about real people, hospital policy specifics, hospital colleague names.
+2. NEVER extract pleasantries (好的/收到/早安/明白/thanks). Return [] if turn is just this.
+3. If turn metadata says synthetic=true (cron-produced), return [] UNLESS content
+   contains a NEW commitment by 禮揚 (e.g. "排了 5/22 跟 Y 開會").
+4. If unsure whether content violates rule 1, ERR ON THE SIDE OF NOT EXTRACTING.
+
+Each item:
+  type: "episodic" | "semantic"
+  text: short statement, zh-TW or English (match source language)
+  entity: nullable. Use ".家庭", ".工作", ".研究興趣", ".健康", etc. namespacing under "禮揚."
+  importance: 1-5
+  valid_to_hint: ISO date if turn implies expiry. "今晚"→tomorrow, "這週"→Sunday, "這個月"→end-of-month.
+
+Skip facts that duplicate something said in the last 5 turns.
+
+TURN:
+[{ts}] [{channel}] user: {user}
+[{ts}] [{channel}] assistant: {assistant}
+"""
+
+
+@dataclass
+class ExtractedFact:
+    """One fact extracted from a turn. Distinct from the read-side ``Fact``."""
+
+    type: str  # "episodic" | "semantic"
+    text: str
+    entity: Optional[str]
+    importance: int
+    valid_to_hint: Optional[str] = None
+    raw: dict = field(default_factory=dict)  # original Kimi output for forensics
+
+
+class ExtractError(RuntimeError):
+    """Raised when synthetic.new is unreachable or returns malformed payload."""
+
+
+def _resolve_hermes_home() -> Path:
+    try:
+        from hermes_constants import get_hermes_home
+        return Path(get_hermes_home())
+    except Exception:
+        return Path.home() / ".hermes"
+
+
+def _default_log_path() -> Path:
+    return _resolve_hermes_home() / "logs" / "memory.log"
+
+
+def _read_synthetic_api_key() -> str:
+    """Resolve the synthetic.new API key.
+
+    Priority:
+      1. ``SYNTHETIC_API_KEY`` env var (test-friendly override).
+      2. ``auth.json`` ``custom:synthetic`` pool, first non-expired token.
+
+    Raises ``ExtractError`` if no key is found — the caller decides
+    whether that should bubble up (W3-2 wraps and falls back).
+    """
+    env = os.environ.get("SYNTHETIC_API_KEY")
+    if env:
+        return env
+
+    auth_path = _resolve_hermes_home() / "auth.json"
+    if auth_path.exists():
+        try:
+            data = json.loads(auth_path.read_text(encoding="utf-8"))
+        except json.JSONDecodeError as exc:
+            raise ExtractError(f"auth.json parse: {exc}") from exc
+        # The real auth.json uses "credential_pool" (singular). Older or
+        # alternate layouts may use the plural form or top-level keys, so we
+        # check all three for resilience across hermes-agent versions.
+        pool = (
+            (data.get("credential_pool") or {}).get("custom:synthetic")
+            or (data.get("credential_pools") or {}).get("custom:synthetic")
+            or data.get("custom:synthetic")
+            or []
+        )
+        for entry in pool:
+            tok = entry.get("access_token")
+            if tok:
+                return tok
+
+    raise ExtractError(
+        "synthetic.new API key not found. Set SYNTHETIC_API_KEY or "
+        "ensure auth.json has a custom:synthetic credential."
+    )
+
+
+def _append_log(payload: dict, log_path: Optional[Path] = None) -> None:
+    log_path = log_path or _default_log_path()
+    try:
+        log_path.parent.mkdir(parents=True, exist_ok=True)
+        with log_path.open("a", encoding="utf-8") as f:
+            f.write(json.dumps(payload, ensure_ascii=False) + "\n")
+    except OSError as exc:
+        logger.warning("memory.log write failed: %s", exc)
+
+
+def _coerce_fact(raw: dict) -> Optional[ExtractedFact]:
+    """Validate one Kimi-emitted fact dict; return None on shape errors."""
+    t = raw.get("type")
+    text = raw.get("text")
+    if t not in ("episodic", "semantic"):
+        return None
+    if not isinstance(text, str) or not text.strip():
+        return None
+    importance = raw.get("importance", 2)
+    try:
+        importance = int(importance)
+    except (TypeError, ValueError):
+        importance = 2
+    importance = max(1, min(5, importance))
+    entity = raw.get("entity")
+    if entity is not None and not isinstance(entity, str):
+        entity = None
+    valid_to_hint = raw.get("valid_to_hint")
+    if valid_to_hint is not None and not isinstance(valid_to_hint, str):
+        valid_to_hint = None
+    return ExtractedFact(
+        type=t,
+        text=text.strip(),
+        entity=entity,
+        importance=importance,
+        valid_to_hint=valid_to_hint,
+        raw=raw,
+    )
+
+
+async def kimi_extract(
+    user: str,
+    assistant: str,
+    channel: str,
+    ts: str,
+    *,
+    client: Optional[httpx.AsyncClient] = None,
+    log_path: Optional[Path] = None,
+) -> List[ExtractedFact]:
+    """Extract durable memories from one Discord turn.
+
+    Returns ``[]`` (no API call) when ``channel`` is PHI-blacklisted, when
+    both ``user`` and ``assistant`` are empty, or when Kimi returns
+    malformed JSON. Otherwise raises ``ExtractError`` on transport
+    failure or non-2xx response — caller (W3-2) is responsible for
+    fallback bookkeeping (failure JSONL log).
+    """
+    if channel in PHI_BLACKLIST_CHANNELS:
+        return []
+    if not (user or "").strip() and not (assistant or "").strip():
+        return []
+
+    api_key = _read_synthetic_api_key()
+    prompt = EXTRACT_PROMPT.format(ts=ts, channel=channel, user=user, assistant=assistant)
+
+    payload = {
+        "model": EXTRACT_MODEL,
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": EXTRACT_TEMPERATURE,
+        "max_tokens": EXTRACT_MAX_TOKENS,
+        "response_format": {"type": "json_object"},
+    }
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+
+    owns_client = client is None
+    client = client or httpx.AsyncClient()
+    t0 = time.perf_counter()
+    try:
+        try:
+            r = await client.post(
+                SYNTHETIC_URL, headers=headers, json=payload, timeout=EXTRACT_TIMEOUT
+            )
+        except httpx.RequestError as exc:
+            raise ExtractError(f"synthetic.new network error: {exc}") from exc
+        if r.status_code >= 400:
+            raise ExtractError(f"synthetic.new {r.status_code}: {r.text[:200]}")
+        body = r.json()
+    finally:
+        if owns_client:
+            await client.aclose()
+    elapsed_ms = (time.perf_counter() - t0) * 1000.0
+
+    choice = (body.get("choices") or [{}])[0]
+    content = (choice.get("message") or {}).get("content", "")
+    usage = body.get("usage") or {}
+
+    parsed = _parse_json_list(content)
+    facts = [f for f in (_coerce_fact(item) for item in parsed) if f is not None]
+
+    _append_log(
+        {
+            "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+            "cmd": "kimi_extract",
+            "channel": channel,
+            "ms": round(elapsed_ms, 2),
+            "n_raw": len(parsed),
+            "n_kept": len(facts),
+            "tokens_in": usage.get("prompt_tokens"),
+            "tokens_out": usage.get("completion_tokens"),
+        },
+        log_path=log_path,
+    )
+    return facts
+
+
+def _parse_json_list(content: str) -> list:
+    """Tolerantly extract a JSON list from Kimi's ``content`` field.
+
+    The prompt asks for a JSON list, but Kimi may wrap it in an object
+    (when response_format=json_object) like ``{"facts": [...]}`` or
+    return ``{}`` for empty. We accept any of:
+      - bare ``[...]``
+      - ``{"facts": [...]}`` / ``{"items": [...]}`` / ``{"results": [...]}``
+      - ``{}`` (treated as empty list)
+    """
+    if not content:
+        return []
+    try:
+        data = json.loads(content)
+    except json.JSONDecodeError:
+        return []
+    if isinstance(data, list):
+        return data
+    if isinstance(data, dict):
+        # Kimi K2.5 with response_format=json_object often wraps the
+        # answer in a dict like {"analysis": ..., "extracted_memories": [...]}.
+        # Try the canonical key names first, then fall back to the first list-valued field.
+        for key in ("facts", "items", "results", "memories", "extracted_memories", "data"):
+            v = data.get(key)
+            if isinstance(v, list):
+                return v
+        # Last-ditch fallback: any top-level list value wins.
+        for v in data.values():
+            if isinstance(v, list):
+                return v
+        # Kimi sometimes returns a single fact as a flat dict (no list wrapper).
+        # Detect by the presence of the canonical fact keys.
+        if "type" in data and "text" in data:
+            return [data]
+        return []
+    return []
diff --git a/tests/plugins/memory/test_extract.py b/tests/plugins/memory/test_extract.py
new file mode 100644
index 00000000000..45cf1d3bca1
--- /dev/null
+++ b/tests/plugins/memory/test_extract.py
@@ -0,0 +1,363 @@
+"""Tests for plugins/memory/sqlite_vec/extract.py (W3-1)."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import httpx
+import pytest
+
+from plugins.memory.sqlite_vec.extract import (
+    EXTRACT_MODEL,
+    EXTRACT_PROMPT,
+    PHI_BLACKLIST_CHANNELS,
+    ExtractError,
+    ExtractedFact,
+    _coerce_fact,
+    _parse_json_list,
+    kimi_extract,
+)
+
+
+# ---------------------------------------------------------------------------
+# Pure helpers
+# ---------------------------------------------------------------------------
+
+
+def test_extract_prompt_is_verbatim_spec_5_2():
+    """Spec §5.2 marks EXTRACT_PROMPT as a behavioural contract — verbatim."""
+    # Anchors that uniquely identify the spec's exact wording.
+    assert "You extract durable memories about 禮揚 from this Discord turn." in EXTRACT_PROMPT
+    assert "HARD RULES — these override everything else:" in EXTRACT_PROMPT
+    assert "ERR ON THE SIDE OF NOT EXTRACTING" in EXTRACT_PROMPT
+    assert "Skip facts that duplicate something said in the last 5 turns." in EXTRACT_PROMPT
+    # Placeholders must be preserved.
+    assert "{ts}" in EXTRACT_PROMPT and "{channel}" in EXTRACT_PROMPT
+    assert "{user}" in EXTRACT_PROMPT and "{assistant}" in EXTRACT_PROMPT
+
+
+def test_phi_blacklist_matches_spec_5_1():
+    assert PHI_BLACKLIST_CHANNELS == frozenset({"cmio", "cbme", "medicine"})
+
+
+def test_parse_json_list_bare_array():
+    assert _parse_json_list('[{"type":"semantic","text":"a"}]') == [
+        {"type": "semantic", "text": "a"}
+    ]
+
+
+def test_parse_json_list_wrapped_object():
+    assert _parse_json_list('{"facts": [{"type":"semantic","text":"a"}]}') == [
+        {"type": "semantic", "text": "a"}
+    ]
+    assert _parse_json_list('{"items": [{"type":"semantic","text":"b"}]}') == [
+        {"type": "semantic", "text": "b"}
+    ]
+
+
+def test_parse_json_list_empty_object_returns_empty_list():
+    assert _parse_json_list("{}") == []
+    assert _parse_json_list("") == []
+    assert _parse_json_list("not even json") == []
+
+
+def test_coerce_fact_drops_invalid_type():
+    assert _coerce_fact({"type": "garbage", "text": "a"}) is None
+    assert _coerce_fact({"type": "semantic"}) is None  # missing text
+    assert _coerce_fact({"type": "semantic", "text": "  "}) is None  # blank text
+
+
+def test_coerce_fact_clamps_importance():
+    f = _coerce_fact({"type": "semantic", "text": "a", "importance": 99})
+    assert f.importance == 5
+    f = _coerce_fact({"type": "semantic", "text": "a", "importance": -3})
+    assert f.importance == 1
+    f = _coerce_fact({"type": "semantic", "text": "a", "importance": "not-int"})
+    assert f.importance == 2  # default fallback
+
+
+def test_coerce_fact_round_trip_full_shape():
+    raw = {
+        "type": "semantic",
+        "text": "致妤 7:30 才到家",
+        "entity": "禮揚.家庭",
+        "importance": 3,
+        "valid_to_hint": "2026-05-03",
+    }
+    f = _coerce_fact(raw)
+    assert isinstance(f, ExtractedFact)
+    assert f.text == "致妤 7:30 才到家"
+    assert f.entity == "禮揚.家庭"
+    assert f.importance == 3
+    assert f.valid_to_hint == "2026-05-03"
+
+
+# ---------------------------------------------------------------------------
+# kimi_extract — short-circuits (no httpx call)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("channel", ["cmio", "cbme", "medicine"])
+def test_kimi_extract_phi_channel_returns_empty_no_call(channel, monkeypatch, tmp_path):
+    """Even with no API key, PHI channels never hit the network."""
+    monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False)
+    # Point auth.json lookup at an empty tmp dir so any leak would raise.
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    out = asyncio.run(
+        kimi_extract(
+            "病人的血壓 180/100",
+            "我建議轉診",
+            channel=channel,
+            ts="2026-05-02 09:00:00",
+        )
+    )
+    assert out == []
+
+
+def test_kimi_extract_empty_turn_returns_empty(monkeypatch, tmp_path):
+    monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False)
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    out = asyncio.run(
+        kimi_extract("", "", channel="cattia", ts="2026-05-02 09:00:00")
+    )
+    assert out == []
+
+
+# ---------------------------------------------------------------------------
+# kimi_extract — mocked synthetic.new responses
+# ---------------------------------------------------------------------------
+
+
+def _mock_synthetic_response(facts: list, *, status: int = 200):
+    """Build a synthetic.new chat-completions JSON body wrapping `facts`."""
+    body = {
+        "id": "test",
+        "choices": [
+            {
+                "message": {"role": "assistant", "content": json.dumps(facts)},
+                "finish_reason": "stop",
+            }
+        ],
+        "usage": {"prompt_tokens": 200, "completion_tokens": 80},
+    }
+    return status, body
+
+
+class _FakeTransport(httpx.MockTransport):
+    def __init__(self, status, body):
+        self.calls = []
+        self._status = status
+        self._body = body
+        super().__init__(self._h)
+
+    def _h(self, request: httpx.Request):
+        self.calls.append(request)
+        return httpx.Response(self._status, json=self._body)
+
+
+def test_kimi_extract_pleasantry_returns_empty_after_call(monkeypatch, tmp_path):
+    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    status, body = _mock_synthetic_response([])  # Kimi correctly returns []
+    transport = _FakeTransport(status, body)
+    client = httpx.AsyncClient(transport=transport)
+
+    out = asyncio.run(
+        kimi_extract(
+            "好的", "收到", channel="cattia", ts="2026-05-02 09:00:00",
+            client=client, log_path=tmp_path / "memory.log",
+        )
+    )
+    assert out == []
+    assert len(transport.calls) == 1
+    log_line = (tmp_path / "memory.log").read_text().strip()
+    assert '"cmd": "kimi_extract"' in log_line
+    assert '"n_kept": 0' in log_line
+
+
+def test_kimi_extract_short_lived_fact_with_valid_to_hint(monkeypatch, tmp_path):
+    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    facts = [
+        {
+            "type": "semantic",
+            "text": "致妤今晚 (2026-05-02) 預計 7:30 才到家",
+            "entity": "禮揚.家庭/今晚",
+            "importance": 3,
+            "valid_to_hint": "2026-05-03",
+        }
+    ]
+    transport = _FakeTransport(*_mock_synthetic_response(facts))
+    client = httpx.AsyncClient(transport=transport)
+
+    out = asyncio.run(
+        kimi_extract(
+            "今晚致妤會晚回來，大概 7:30 才到", "好喔",
+            channel="at-home", ts="2026-05-02 09:00:00",
+            client=client, log_path=tmp_path / "memory.log",
+        )
+    )
+    assert len(out) == 1
+    f = out[0]
+    assert f.type == "semantic"
+    assert "7:30" in f.text
+    assert f.valid_to_hint == "2026-05-03"
+    assert f.importance == 3
+
+
+def test_kimi_extract_long_lived_fact_no_valid_to(monkeypatch, tmp_path):
+    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    facts = [
+        {
+            "type": "semantic",
+            "text": "禮揚 最近在追 sleep medicine 的 RCT",
+            "entity": "禮揚.研究興趣",
+            "importance": 2,
+        }
+    ]
+    transport = _FakeTransport(*_mock_synthetic_response(facts))
+    client = httpx.AsyncClient(transport=transport)
+
+    out = asyncio.run(
+        kimi_extract(
+            "最近在追 sleep medicine", "了解，要幫你 follow up 嗎",
+            channel="cattia", ts="2026-05-02 09:00:00",
+            client=client, log_path=tmp_path / "memory.log",
+        )
+    )
+    assert len(out) == 1
+    assert out[0].valid_to_hint is None
+    assert out[0].entity == "禮揚.研究興趣"
+
+
+def test_kimi_extract_drops_malformed_rows(monkeypatch, tmp_path):
+    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    facts = [
+        {"type": "semantic", "text": "good fact"},
+        {"type": "garbage", "text": "bad type"},      # dropped
+        {"type": "episodic"},                           # missing text → dropped
+        {"type": "semantic", "text": "  "},             # blank text → dropped
+    ]
+    transport = _FakeTransport(*_mock_synthetic_response(facts))
+    client = httpx.AsyncClient(transport=transport)
+
+    out = asyncio.run(
+        kimi_extract(
+            "u", "a", channel="cattia", ts="2026-05-02 09:00:00",
+            client=client, log_path=tmp_path / "memory.log",
+        )
+    )
+    assert len(out) == 1
+    assert out[0].text == "good fact"
+
+
+def test_kimi_extract_5xx_raises_extracterror(monkeypatch, tmp_path):
+    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    transport = _FakeTransport(503, {"error": "down"})
+    client = httpx.AsyncClient(transport=transport)
+    with pytest.raises(ExtractError):
+        asyncio.run(
+            kimi_extract(
+                "u", "a", channel="cattia", ts="2026-05-02 09:00:00",
+                client=client, log_path=tmp_path / "memory.log",
+            )
+        )
+
+
+def test_kimi_extract_no_api_key_raises(monkeypatch, tmp_path):
+    monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False)
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )  # auth.json absent
+    with pytest.raises(ExtractError, match="API key"):
+        asyncio.run(
+            kimi_extract(
+                "u", "a", channel="cattia", ts="2026-05-02 09:00:00",
+                log_path=tmp_path / "memory.log",
+            )
+        )
+
+
+def test_kimi_extract_reads_auth_json_when_no_env(monkeypatch, tmp_path):
+    monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False)
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    auth = {
+        "credential_pool": {
+            "custom:synthetic": [
+                {"id": "test", "access_token": "syn_test_xxx"},
+            ]
+        }
+    }
+    (tmp_path / "auth.json").write_text(json.dumps(auth), encoding="utf-8")
+    transport = _FakeTransport(*_mock_synthetic_response([]))
+    client = httpx.AsyncClient(transport=transport)
+
+    out = asyncio.run(
+        kimi_extract(
+            "x", "y", channel="cattia", ts="2026-05-02 09:00:00",
+            client=client, log_path=tmp_path / "memory.log",
+        )
+    )
+    assert out == []
+    # The Authorization header carried the auth.json token.
+    assert transport.calls[0].headers["Authorization"] == "Bearer syn_test_xxx"
+
+
+
+# ===========================================================================
+# Additional parser shapes discovered during live smoke test
+# ===========================================================================
+
+
+def test_parse_json_list_extracted_memories_key():
+    """Kimi K2.5 with response_format=json_object often wraps the answer in
+    a dict with key 'extracted_memories' (sometimes alongside an 'analysis'
+    field showing its reasoning). Both must be parsed correctly."""
+    payload = (
+        '{"analysis": "the user mentions...", '
+        '"extracted_memories": [{"type":"semantic","text":"a"}]}'
+    )
+    out = _parse_json_list(payload)
+    assert out == [{"type": "semantic", "text": "a"}]
+
+
+def test_parse_json_list_bare_single_fact_dict():
+    """Kimi sometimes returns a single fact as a flat dict instead of a list.
+    We detect that shape by the presence of canonical fact keys."""
+    payload = (
+        '{"type": "episodic", "text": "致妤今晚 7:30", '
+        '"entity": "禮揚.家庭", "importance": 2}'
+    )
+    out = _parse_json_list(payload)
+    assert len(out) == 1
+    assert out[0]["text"] == "致妤今晚 7:30"
+
+
+def test_parse_json_list_arbitrary_dict_falls_back_to_first_list():
+    """If neither canonical keys nor fact-shape is present, the first
+    list-valued field is returned. Defensive against future Kimi changes."""
+    payload = '{"weird_unique_key": [{"type":"semantic","text":"x"}]}'
+    out = _parse_json_list(payload)
+    assert out == [{"type": "semantic", "text": "x"}]

From f8ea1b9a161cb1ba538e467ad3fe15402c6be347 Mon Sep 17 00:00:00 2001
From: Li-yang Chen <liyoungc@pm.me>
Date: Sat, 2 May 2026 13:34:02 +0000
Subject: [PATCH 07/11] feat(memory): write_episode + sync_turn write-back
 wiring (W3-2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

plugins/memory/sqlite_vec/write.py implements the per-turn write-back
half of the memory system per spec §5.1.

Hot-path flow:

  1. PHI gate — channel in PHI_BLACKLIST_CHANNELS short-circuits
     extract (raw episode rows still land; the LLM never sees PHI).
  2. kimi_extract returns ExtractedFact list (or [] on failure;
     non-fatal — raw turn is still recorded so weekly_promotion can
     re-extract later).
  3. voyage_embed batches the user msg, reply, and every fact text
     in one Voyage call. Empty strings are filtered out so we don't
     waste a Voyage slot.
  4. INSERT 2 rows into episodes (user, assistant) inside a single
     BEGIN/COMMIT, with ON CONFLICT(channel, external_id) DO NOTHING
     for idempotent Discord redelivery / cron-retry / restart-replay.
  5. Per-fact partition into fast-track vs stash:
       * valid_to_hint parses to <= today + 30 days  -> INSERT
         into semantic_facts directly (the trigger mirrors into
         vec_facts so the next turn's prefetch can retrieve it).
       * everything else -> JSON-stash in episodes.metadata.stashed_facts
         for W3-3 weekly_promotion.
  6. Any exception -> rollback + append the turn (raw text, ts,
     channel, msg_id, error) to ~/.hermes/logs/memory_write_failures.jsonl.
     The reply was already sent; we never propagate the error.

Threshold rationale (spec §5.3): raised from the original 7d to 30d so
short-lived facts ("下週會去日本玩五天") don't sit in metadata for a
week before the next Sunday review fires.

Provider wiring (plugins/memory/sqlite_vec/__init__.py):

  sync_turn() now schedules two worker-thread coroutines after the
  reply lands: bump_hits (5s budget) and write_episode (30s budget).
  The thread reuses self._lock so cross-thread sqlite3 access remains
  serialized. msg_id is synthesized by hashing
  (session_id, user, assistant, ts-to-the-minute) so Discord
  redeliveries within the same minute collapse via ON CONFLICT.

No env-var gate (matches W2-3): activation is the same
config.yaml memory.provider: sqlite_vec. Rolling back the write path
specifically would require code change (or temporarily clearing the
provider config), but the hot-path failure mode is a JSONL log entry,
not a stalled reply, so the rollback risk is low.

Tests: 11 new (parse_valid_to_hint edge cases, fast-track threshold
edge / interior / over / null, two episode rows per turn, PHI skips
extract but records, idempotent dup msg_id, short-lived fast-tracks +
mirrors to vec_facts, long-lived stashes in metadata, mixed
partition, embed failure -> JSONL + rollback, extract failure still
records raw, empty turn no embed call). 205/205 green across all
memory + memdebug + import tests.

Live verification on chococlaw:

  Turn A: "今晚致妤大概 7:30 才到家" / "了解"
    -> 2 episodes, 0 facts (Kimi judged "about 致妤 not 禮揚",
       same prompt-wording observation logged in W3-1)

  Turn B: "我下週會去日本玩五天" / "酷..."
    -> 2 episodes, 1 fact fast-tracked:
       (.家庭) "下週會去日本玩五天" valid_from=2026-05-02 valid_to=2026-05-11
    -> vec_facts auto-mirrored via trigger (semantic_facts 25 -> 26).
    -> Kimi correctly inferred valid_to from "下週" + "五天".

Cleanup: smoke test data deleted from production DB before commit.

Refs liyoungc/hermes-memory#9
---
 plugins/memory/sqlite_vec/__init__.py |  75 ++++--
 plugins/memory/sqlite_vec/write.py    | 251 ++++++++++++++++++++
 tests/plugins/memory/test_write.py    | 322 ++++++++++++++++++++++++++
 3 files changed, 634 insertions(+), 14 deletions(-)
 create mode 100644 plugins/memory/sqlite_vec/write.py
 create mode 100644 tests/plugins/memory/test_write.py

diff --git a/plugins/memory/sqlite_vec/__init__.py b/plugins/memory/sqlite_vec/__init__.py
index b8d6e5a4a73..b6470de2adb 100644
--- a/plugins/memory/sqlite_vec/__init__.py
+++ b/plugins/memory/sqlite_vec/__init__.py
@@ -12,7 +12,12 @@
 IDs are cached per session and bumped via ``sync_turn()`` after the
 reply is sent, per spec §4 hits accounting.
 
-Write path (W3) is still a no-op here — ``sync_turn`` only bumps hits.
+Write path (W3-2): ``sync_turn`` now also fires ``write_episode`` —
+records the raw turn into ``episodes``, runs Kimi extract, fast-tracks
+short-lived facts directly into ``semantic_facts`` (≤ today + 30d),
+stashes longer-lived facts into ``episodes.metadata.stashed_facts``
+for W3-3 weekly_promotion. Errors land in
+``~/.hermes/logs/memory_write_failures.jsonl`` and never propagate.
 """
 
 from __future__ import annotations
@@ -20,6 +25,7 @@
 import asyncio
 import logging
 import threading
+import time
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 
@@ -33,10 +39,14 @@
     read_memory,
 )
 from .store import init_db
+from .write import write_episode
 
 logger = logging.getLogger(__name__)
 
 PREFETCH_TIMEOUT_S = 5.0  # Voyage typical 200-400ms; 5s is the kill-switch.
+# Write path: extract (~1-3s) + embed batch (~300ms) + INSERT (~5ms).
+# 30s gives Kimi room to think while still bounding worst-case latency.
+WRITE_TIMEOUT_S = 30.0
 RECALL_HEADER = "## Recent relevant memories"
 
 
@@ -65,7 +75,7 @@ def runner():
         finally:
             loop.close()
 
-    t = threading.Thread(target=runner, daemon=True, name="sqlite-vec-prefetch")
+    t = threading.Thread(target=runner, daemon=True, name="sqlite-vec-worker")
     t.start()
     t.join(timeout)
     if t.is_alive():
@@ -75,8 +85,22 @@ def runner():
     return box.get("result")
 
 
+def _synth_msg_id(session_id: str, user: str, asst: str, ts: str) -> str:
+    """Stable per-turn external_id for ON CONFLICT idempotency.
+
+    We don't have the real Discord message ID at sync_turn time (the
+    ABC hook only exposes user/assistant content + session_id), so we
+    hash the turn into a 12-hex-char id. Bucketing ts to the minute
+    means a Discord redelivery within the same minute collapses; a
+    legitimate retry after >1 min would create a new row, which is
+    acceptable for episode-level forensics.
+    """
+    raw = (session_id, user, asst, ts[:16])
+    return "h" + hex(abs(hash(raw)) & 0xFFFFFFFFFFFF)[2:]
+
+
 class SqliteVecMemoryProvider(MemoryProvider):
-    """Hermes V3 long-term memory provider (W2-3 = read path live)."""
+    """Hermes V3 long-term memory provider (W2-3 read + W3-2 write)."""
 
     def __init__(self) -> None:
         self._conn = None
@@ -121,7 +145,6 @@ def prefetch(self, query: str, *, session_id: str = "") -> str:
             return ""
 
         conn = self._conn
-
         db_lock = self._lock
 
         async def _do() -> List[Fact]:
@@ -150,28 +173,52 @@ def sync_turn(
         *,
         session_id: str = "",
     ) -> None:
-        """Bump hits on facts retrieved during the matching prefetch.
+        """Bump hits on retrieved facts and persist the turn.
 
-        Per spec §4 this fires AFTER the reply is delivered, so it must
-        never raise. Errors are swallowed by ``bump_hits`` itself.
+        Spec §4 + §5.1 — both happen AFTER the reply is delivered, so
+        this must never raise. ``bump_hits`` swallows its own DB errors;
+        ``write_episode`` swallows everything and writes failures to
+        ~/.hermes/logs/memory_write_failures.jsonl.
         """
-        with self._lock:
-            ids = self._last_fact_ids.pop(session_id, [])
-        if not ids or not self._conn:
+        if not self._conn:
             return
         conn = self._conn
-
         db_lock = self._lock
 
-        async def _do() -> None:
+        with self._lock:
+            ids = self._last_fact_ids.pop(session_id, [])
+
+        ts = time.strftime("%Y-%m-%d %H:%M:%S")
+        msg_id = _synth_msg_id(session_id, user_content, assistant_content, ts)
+        channel = session_id or "unknown"
+
+        async def _do_bump() -> None:
+            if ids:
+                with db_lock:
+                    await bump_hits(ids, conn)
+
+        async def _do_write() -> None:
             with db_lock:
-                await bump_hits(ids, conn)
+                await write_episode(
+                    user_msg=user_content,
+                    reply=assistant_content,
+                    channel=channel,
+                    msg_id=msg_id,
+                    ts=ts,
+                    conn=conn,
+                )
 
         try:
-            _run_coro_in_thread(_do, timeout=PREFETCH_TIMEOUT_S)
+            _run_coro_in_thread(_do_bump, timeout=PREFETCH_TIMEOUT_S)
         except Exception as exc:
             logger.warning("sqlite_vec bump_hits worker error: %s", exc)
 
+        if user_content or assistant_content:
+            try:
+                _run_coro_in_thread(_do_write, timeout=WRITE_TIMEOUT_S)
+            except Exception as exc:
+                logger.warning("sqlite_vec write_episode worker error: %s", exc)
+
     def get_tool_schemas(self) -> List[Dict[str, Any]]:
         return []
 
diff --git a/plugins/memory/sqlite_vec/write.py b/plugins/memory/sqlite_vec/write.py
new file mode 100644
index 00000000000..227f2b35e1a
--- /dev/null
+++ b/plugins/memory/sqlite_vec/write.py
@@ -0,0 +1,251 @@
+"""Per-turn write-back into the sqlite_vec memory store (W3-2).
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §5.1.
+
+Hot-path flow per Discord turn:
+
+  1. PHI gate — if ``channel`` is in PHI_BLACKLIST_CHANNELS, raw episode
+     rows still land but extraction is skipped (no PHI to the cloud LLM).
+  2. Extract — kimi_extract() returns 0..N ExtractedFacts.
+  3. Embed — voyage_embed([user_msg, reply, *fact_texts]) in one batch.
+  4. INSERT 2 episode rows (user, assistant) with
+     ``ON CONFLICT(channel, external_id) DO NOTHING`` for idempotency
+     under Discord redelivery / cron retries / container restarts.
+  5. Fast-track facts whose ``valid_to_hint`` parses to ≤ today + 30d
+     directly into ``semantic_facts`` (the trigger mirrors them into
+     ``vec_facts``). Longer-lived / undated facts are JSON-stashed in
+     ``episodes.metadata.stashed_facts`` for W3-3 weekly_promotion.
+  6. Any exception → append a JSONL line to
+     ``~/.hermes/logs/memory_write_failures.jsonl`` and swallow.
+     The reply was already sent before this fired; we never propagate.
+
+The function is fire-and-forget: the caller schedules it via
+``asyncio.create_task`` (or in our case, a worker thread the provider
+spawns) AFTER ``discord_send`` so write latency cannot stall the user.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import sqlite3
+import time
+from datetime import date, datetime, timedelta
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional
+
+from .extract import (
+    PHI_BLACKLIST_CHANNELS,
+    ExtractedFact,
+    kimi_extract,
+)
+
+logger = logging.getLogger(__name__)
+
+# Spec §5.3 — fast-track threshold (raised from 7d to 30d): facts that
+# expire within ~1 month land directly in semantic_facts so they're
+# usable on the next turn instead of waiting up to 7 days for the
+# weekly review.
+FAST_TRACK_DAYS = 30
+
+
+def _resolve_hermes_home() -> Path:
+    try:
+        from hermes_constants import get_hermes_home
+        return Path(get_hermes_home())
+    except Exception:
+        return Path.home() / ".hermes"
+
+
+def _failure_log_path() -> Path:
+    return _resolve_hermes_home() / "logs" / "memory_write_failures.jsonl"
+
+
+def _append_failure(payload: Dict[str, Any], log_path: Optional[Path] = None) -> None:
+    log_path = log_path or _failure_log_path()
+    try:
+        log_path.parent.mkdir(parents=True, exist_ok=True)
+        with log_path.open("a", encoding="utf-8") as f:
+            f.write(json.dumps(payload, ensure_ascii=False, default=str) + "\n")
+    except OSError as exc:
+        logger.warning("memory_write_failures.jsonl write failed: %s", exc)
+
+
+def _parse_valid_to_hint(hint: Optional[str]) -> Optional[date]:
+    """Parse 'YYYY-MM-DD' tolerantly. Return None on bad / missing input."""
+    if not hint:
+        return None
+    try:
+        return datetime.strptime(hint.strip(), "%Y-%m-%d").date()
+    except (ValueError, TypeError):
+        return None
+
+
+def _fact_should_fast_track(fact: ExtractedFact, today: date) -> bool:
+    """True iff fact has a valid_to_hint within FAST_TRACK_DAYS of today."""
+    expiry = _parse_valid_to_hint(fact.valid_to_hint)
+    if not expiry:
+        return False
+    return expiry <= today + timedelta(days=FAST_TRACK_DAYS)
+
+
+# ---------------------------------------------------------------------------
+# Main entry point
+# ---------------------------------------------------------------------------
+
+
+async def write_episode(
+    user_msg: str,
+    reply: str,
+    channel: str,
+    msg_id: str,
+    ts: str,
+    conn: sqlite3.Connection,
+    *,
+    embed_fn: Optional[Callable] = None,
+    extract_fn: Optional[Callable] = None,
+    failure_log_path: Optional[Path] = None,
+) -> Dict[str, Any]:
+    """Persist one Discord turn to the memory store.
+
+    Returns a summary dict for caller logging:
+      {episodes: 0|1|2, fast_tracked: N, stashed: N, skipped_extract: bool}
+
+    Never raises. Errors land in ``memory_write_failures.jsonl``.
+    """
+    summary: Dict[str, Any] = {
+        "episodes": 0,
+        "fast_tracked": 0,
+        "stashed": 0,
+        "skipped_extract": False,
+    }
+    skip_extract = channel in PHI_BLACKLIST_CHANNELS
+    summary["skipped_extract"] = skip_extract
+
+    try:
+        # ---- 1. extract (skip on PHI channel)
+        if skip_extract or not (extract_fn or kimi_extract):
+            facts: List[ExtractedFact] = []
+        else:
+            extractor = extract_fn or kimi_extract
+            try:
+                facts = await extractor(user_msg, reply, channel, ts)
+            except Exception as exc:
+                # Extract failure is non-fatal — we still record the
+                # raw episode so weekly_promotion can re-extract later.
+                logger.warning("kimi_extract failed; continuing without facts: %s", exc)
+                facts = []
+
+        # ---- 2. embed (raw turn + each fact text in one call)
+        embed = embed_fn
+        if embed is None:
+            from .embed import voyage_embed
+            embed = voyage_embed
+
+        texts_to_embed = [user_msg, reply] + [f.text for f in facts]
+        # Filter empty strings — Voyage rejects them.
+        non_empty = [(i, t) for i, t in enumerate(texts_to_embed) if t and t.strip()]
+        if non_empty:
+            indices, texts = zip(*non_empty)
+            blobs_dense = await embed(list(texts))
+            # Re-densify back to original positions; missing slots get None.
+            blobs: List[Optional[bytes]] = [None] * len(texts_to_embed)
+            for slot, blob in zip(indices, blobs_dense):
+                blobs[slot] = blob
+        else:
+            blobs = [None] * len(texts_to_embed)
+
+        user_blob, reply_blob = blobs[0], blobs[1]
+        fact_blobs = blobs[2:]
+
+        # ---- 3. partition facts into fast-track vs stash BEFORE INSERT
+        today = date.today()
+        fast_track: List[tuple] = []  # [(fact, blob), ...]
+        stashed: List[Dict[str, Any]] = []  # JSON-serialisable dicts
+        for f, blob in zip(facts, fact_blobs):
+            if _fact_should_fast_track(f, today):
+                if blob is not None:
+                    fast_track.append((f, blob))
+                else:
+                    # No embedding for this fact → can't insert into
+                    # semantic_facts (embedding is NOT NULL).  Demote to stash.
+                    stashed.append(f.raw or _fact_to_dict(f))
+            else:
+                stashed.append(f.raw or _fact_to_dict(f))
+
+        metadata = {"stashed_facts": stashed} if stashed else {}
+        metadata_json = json.dumps(metadata, ensure_ascii=False) if metadata else None
+
+        # ---- 4. INSERT episodes (atomic with fast-track inserts)
+        try:
+            conn.execute("BEGIN")
+            ep_rows = [
+                (ts, channel, msg_id + ":user", "user", user_msg, 0, user_blob, metadata_json),
+                (ts, channel, msg_id + ":asst", "assistant", reply, 0, reply_blob, metadata_json),
+            ]
+            for row in ep_rows:
+                cur = conn.execute(
+                    """
+                    INSERT INTO episodes
+                        (ts, channel, external_id, role, text, synthetic, embedding, metadata)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                    ON CONFLICT(channel, external_id) DO NOTHING
+                    """,
+                    row,
+                )
+                if cur.rowcount:
+                    summary["episodes"] += 1
+
+            # ---- 5. fast-track facts → semantic_facts (trigger mirrors to vec_facts)
+            for f, blob in fast_track:
+                conn.execute(
+                    """
+                    INSERT INTO semantic_facts
+                        (entity, fact, embedding, importance, valid_from, valid_to)
+                    VALUES (?, ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        f.entity,
+                        f.text,
+                        blob,
+                        f.importance,
+                        today.isoformat(),
+                        f.valid_to_hint,
+                    ),
+                )
+                summary["fast_tracked"] += 1
+
+            summary["stashed"] = len(stashed)
+            conn.commit()
+        except Exception:
+            conn.rollback()
+            raise
+
+        return summary
+
+    except Exception as exc:
+        logger.warning("write_episode failed for msg_id=%s: %s", msg_id, exc)
+        _append_failure(
+            {
+                "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+                "channel": channel,
+                "msg_id": msg_id,
+                "user": user_msg,
+                "reply": reply,
+                "error": str(exc),
+                "summary_so_far": summary,
+            },
+            log_path=failure_log_path,
+        )
+        return summary
+
+
+def _fact_to_dict(f: ExtractedFact) -> Dict[str, Any]:
+    """Serialise an ExtractedFact for stashing in episodes.metadata."""
+    return {
+        "type": f.type,
+        "text": f.text,
+        "entity": f.entity,
+        "importance": f.importance,
+        "valid_to_hint": f.valid_to_hint,
+    }
diff --git a/tests/plugins/memory/test_write.py b/tests/plugins/memory/test_write.py
new file mode 100644
index 00000000000..5bf2462739b
--- /dev/null
+++ b/tests/plugins/memory/test_write.py
@@ -0,0 +1,322 @@
+"""Tests for plugins/memory/sqlite_vec/write.py (W3-2)."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import struct
+from datetime import date, timedelta
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from plugins.memory.sqlite_vec.extract import ExtractedFact
+from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
+from plugins.memory.sqlite_vec.write import (
+    FAST_TRACK_DAYS,
+    _fact_should_fast_track,
+    _parse_valid_to_hint,
+    write_episode,
+)
+
+
+def _vec(seed: int) -> bytes:
+    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
+    return struct.pack(f"{VEC_DIM}b", *vals)
+
+
+def _stub_embed_factory():
+    """Returns (stub, call_log) — stub yields deterministic int8 blobs."""
+    calls = []
+
+    async def stub(texts):
+        calls.append(list(texts))
+        return [_vec(10 + i) for i in range(len(texts))]
+
+    return stub, calls
+
+
+def _stub_extract_factory(facts: list):
+    async def stub(user, asst, channel, ts):
+        return list(facts)
+
+    return stub
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def test_parse_valid_to_hint():
+    assert _parse_valid_to_hint("2026-05-03") == date(2026, 5, 3)
+    assert _parse_valid_to_hint("not-a-date") is None
+    assert _parse_valid_to_hint("") is None
+    assert _parse_valid_to_hint(None) is None
+
+
+def test_fact_should_fast_track_threshold():
+    today = date(2026, 5, 2)
+    f_in = ExtractedFact(type="semantic", text="x", entity=None, importance=2,
+                         valid_to_hint=(today + timedelta(days=10)).isoformat())
+    f_edge = ExtractedFact(type="semantic", text="x", entity=None, importance=2,
+                           valid_to_hint=(today + timedelta(days=FAST_TRACK_DAYS)).isoformat())
+    f_out = ExtractedFact(type="semantic", text="x", entity=None, importance=2,
+                          valid_to_hint=(today + timedelta(days=60)).isoformat())
+    f_none = ExtractedFact(type="semantic", text="x", entity=None, importance=2,
+                           valid_to_hint=None)
+    assert _fact_should_fast_track(f_in, today) is True
+    assert _fact_should_fast_track(f_edge, today) is True
+    assert _fact_should_fast_track(f_out, today) is False
+    assert _fact_should_fast_track(f_none, today) is False
+
+
+# ---------------------------------------------------------------------------
+# write_episode — happy paths
+# ---------------------------------------------------------------------------
+
+
+def _bootstrap_db(tmp_path):
+    return init_db(tmp_path / "m.db")
+
+
+def test_writes_two_episode_rows_per_turn(tmp_path):
+    db = _bootstrap_db(tmp_path)
+    embed, calls = _stub_embed_factory()
+    extract = _stub_extract_factory([])
+
+    summary = asyncio.run(write_episode(
+        user_msg="hello", reply="hi back",
+        channel="cattia", msg_id="m1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=embed, extract_fn=extract,
+        failure_log_path=tmp_path / "fail.jsonl",
+    ))
+
+    assert summary["episodes"] == 2
+    assert summary["fast_tracked"] == 0 and summary["stashed"] == 0
+    rows = db.execute(
+        "SELECT role, channel, external_id, text FROM episodes ORDER BY id"
+    ).fetchall()
+    assert [r["role"] for r in rows] == ["user", "assistant"]
+    assert rows[0]["external_id"] == "m1:user"
+    assert rows[1]["external_id"] == "m1:asst"
+    # Single embed call covered both turn texts (no fact texts).
+    assert len(calls) == 1
+    assert calls[0] == ["hello", "hi back"]
+
+
+def test_phi_channel_records_episode_but_skips_extract(tmp_path):
+    db = _bootstrap_db(tmp_path)
+    embed, calls = _stub_embed_factory()
+
+    def extract_should_not_be_called(*a, **kw):
+        raise AssertionError("extract called for PHI channel")
+
+    summary = asyncio.run(write_episode(
+        user_msg="病人 [姓名] 血壓 180/100", reply="建議轉診",
+        channel="cmio", msg_id="phi-1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=embed, extract_fn=extract_should_not_be_called,
+        failure_log_path=tmp_path / "fail.jsonl",
+    ))
+
+    assert summary["skipped_extract"] is True
+    assert summary["episodes"] == 2
+    assert summary["fast_tracked"] == 0 and summary["stashed"] == 0
+    rows = db.execute("SELECT count(*) FROM episodes").fetchone()
+    assert rows[0] == 2  # raw episode rows still recorded
+
+
+def test_idempotent_on_duplicate_msg_id(tmp_path):
+    """Re-running with the same msg_id collapses via ON CONFLICT."""
+    db = _bootstrap_db(tmp_path)
+    embed, _ = _stub_embed_factory()
+    extract = _stub_extract_factory([])
+
+    args = dict(
+        user_msg="x", reply="y", channel="cattia",
+        msg_id="dup-1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=embed, extract_fn=extract,
+        failure_log_path=tmp_path / "fail.jsonl",
+    )
+    asyncio.run(write_episode(**args))
+    summary2 = asyncio.run(write_episode(**args))
+    assert summary2["episodes"] == 0  # nothing new inserted
+    [(count,)] = db.execute("SELECT count(*) FROM episodes").fetchall()
+    assert count == 2
+
+
+# ---------------------------------------------------------------------------
+# Fast-track vs stash partitioning
+# ---------------------------------------------------------------------------
+
+
+def test_short_lived_fact_fast_tracks_to_semantic_facts(tmp_path):
+    db = _bootstrap_db(tmp_path)
+    embed, _ = _stub_embed_factory()
+    today = date.today()
+    extract = _stub_extract_factory([
+        ExtractedFact(
+            type="semantic",
+            text="致妤今晚 7:30 才到家",
+            entity="禮揚.家庭",
+            importance=3,
+            valid_to_hint=(today + timedelta(days=1)).isoformat(),
+        ),
+    ])
+
+    summary = asyncio.run(write_episode(
+        user_msg="今晚致妤 7:30 才到", reply="了解",
+        channel="at-home", msg_id="m1", ts="2026-05-02 17:00:00",
+        conn=db, embed_fn=embed, extract_fn=extract,
+        failure_log_path=tmp_path / "fail.jsonl",
+    ))
+
+    assert summary["fast_tracked"] == 1
+    assert summary["stashed"] == 0
+    [(sf_count,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall()
+    assert sf_count == 1
+    [(vf_count,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall()
+    assert vf_count == 1  # trigger mirrored the row
+    row = db.execute(
+        "SELECT entity, fact, importance, valid_from, valid_to FROM semantic_facts"
+    ).fetchone()
+    assert row["entity"] == "禮揚.家庭"
+    assert row["valid_to"] == (today + timedelta(days=1)).isoformat()
+
+
+def test_long_lived_fact_stashes_in_episode_metadata(tmp_path):
+    db = _bootstrap_db(tmp_path)
+    embed, _ = _stub_embed_factory()
+    extract = _stub_extract_factory([
+        ExtractedFact(
+            type="semantic",
+            text="禮揚 likes Starting Strength",
+            entity="禮揚.訓練",
+            importance=2,
+            valid_to_hint=None,  # permanent → stash
+        ),
+    ])
+
+    summary = asyncio.run(write_episode(
+        user_msg="我練 SS 一年了", reply="酷",
+        channel="cattia", msg_id="m1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=embed, extract_fn=extract,
+        failure_log_path=tmp_path / "fail.jsonl",
+    ))
+
+    assert summary["stashed"] == 1
+    assert summary["fast_tracked"] == 0
+    [(sf_count,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall()
+    assert sf_count == 0  # nothing fast-tracked
+    metadata_rows = db.execute(
+        "SELECT metadata FROM episodes WHERE metadata IS NOT NULL"
+    ).fetchall()
+    assert len(metadata_rows) == 2  # both user + assistant rows carry the same metadata
+    md = json.loads(metadata_rows[0]["metadata"])
+    assert md["stashed_facts"][0]["text"] == "禮揚 likes Starting Strength"
+    assert md["stashed_facts"][0]["entity"] == "禮揚.訓練"
+
+
+def test_mixed_facts_partition_correctly(tmp_path):
+    db = _bootstrap_db(tmp_path)
+    embed, _ = _stub_embed_factory()
+    today = date.today()
+    extract = _stub_extract_factory([
+        ExtractedFact(
+            type="semantic", text="short",
+            entity="禮揚.短期", importance=2,
+            valid_to_hint=(today + timedelta(days=2)).isoformat(),
+        ),
+        ExtractedFact(
+            type="semantic", text="long",
+            entity="禮揚.長期", importance=3,
+            valid_to_hint=None,
+        ),
+    ])
+
+    summary = asyncio.run(write_episode(
+        user_msg="u", reply="a", channel="cattia",
+        msg_id="m1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=embed, extract_fn=extract,
+        failure_log_path=tmp_path / "fail.jsonl",
+    ))
+
+    assert summary["fast_tracked"] == 1
+    assert summary["stashed"] == 1
+
+
+# ---------------------------------------------------------------------------
+# Failure path
+# ---------------------------------------------------------------------------
+
+
+def test_embed_failure_appends_to_jsonl(tmp_path):
+    db = _bootstrap_db(tmp_path)
+
+    async def failing_embed(texts):
+        raise RuntimeError("voyage exploded")
+
+    extract = _stub_extract_factory([])
+    fail_log = tmp_path / "fail.jsonl"
+
+    summary = asyncio.run(write_episode(
+        user_msg="u", reply="a", channel="cattia",
+        msg_id="m1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=failing_embed, extract_fn=extract,
+        failure_log_path=fail_log,
+    ))
+
+    # Caller never sees the exception.
+    assert summary["episodes"] == 0  # rolled back
+    [(ep_count,)] = db.execute("SELECT count(*) FROM episodes").fetchall()
+    assert ep_count == 0
+    # Failure record landed in the JSONL.
+    assert fail_log.exists()
+    line = json.loads(fail_log.read_text().strip().splitlines()[-1])
+    assert line["channel"] == "cattia"
+    assert line["msg_id"] == "m1"
+    assert "voyage exploded" in line["error"]
+
+
+def test_extract_failure_still_records_episode(tmp_path):
+    """If kimi_extract raises, we still land the raw episode rows. The
+    weekly_promotion (W3-3) can re-extract from the raw text later."""
+    db = _bootstrap_db(tmp_path)
+    embed, _ = _stub_embed_factory()
+
+    async def failing_extract(*a, **kw):
+        raise RuntimeError("synthetic.new 503")
+
+    summary = asyncio.run(write_episode(
+        user_msg="u", reply="a", channel="cattia",
+        msg_id="m1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=embed, extract_fn=failing_extract,
+        failure_log_path=tmp_path / "fail.jsonl",
+    ))
+    assert summary["episodes"] == 2
+    assert summary["fast_tracked"] == 0
+    assert summary["stashed"] == 0
+
+
+def test_empty_turn_records_no_rows(tmp_path):
+    """Both user_msg and reply blank → no work done, no embed call."""
+    db = _bootstrap_db(tmp_path)
+
+    embed_called = []
+
+    async def embed(texts):
+        embed_called.append(texts)
+        return []
+
+    extract = _stub_extract_factory([])
+    summary = asyncio.run(write_episode(
+        user_msg="", reply="", channel="cattia",
+        msg_id="m1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=embed, extract_fn=extract,
+        failure_log_path=tmp_path / "fail.jsonl",
+    ))
+    # No embed call (both texts empty), but the schema accepts NULL embeddings
+    # for episodes so we still INSERT 2 rows.
+    assert embed_called == []
+    assert summary["episodes"] == 2

From f0bd4abae33ea844510142b062ce11713958f386 Mon Sep 17 00:00:00 2001
From: Li-yang Chen <liyoungc@pm.me>
Date: Sat, 2 May 2026 13:53:09 +0000
Subject: [PATCH 08/11] feat(memory): weekly_promotion + weekly_apply + 2 cron
 entries (W3-3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements the cold-path of the memory system per spec §5.3 + §5.4.

Two scripts (entry points in ~/.hermes/scripts/):

  scripts/weekly_promotion.py - cron Sun 03:00 UTC+8 (cron expr "0 19 * * 6"
    in UTC). Reads last 7 days of pending episodes, runs one Kimi call to
    produce a promotion diff, persists the diff to
    ~/.hermes/memories/pending_diffs/wk-YYYY-MM-DD.json, renders the digest
    markdown per spec §5.4, posts it to #memory-review via raw Discord HTTP.
    Does NOT stamp episodes.promoted_at.

  scripts/weekly_apply.py - cron Mon 03:00 UTC+8 ("0 19 * * 0" UTC).
    Purges pending_diffs/*.json older than 14 days at start. Loads the
    latest pending diff. If a <digest_id>.rejected sentinel file exists
    (written by /memreview reject in W3-4), archives the diff as rejected
    and exits. Otherwise applies promote / dedup / expire atomically and
    stamps episodes.promoted_at on the candidate rows.

Both scripts emit a final stdout line {"wakeAgent": false} so the cron
framework's wake gate skips the agent run — delivery is handled inside
the script via the Discord HTTP POST helper, no LLM round-trip needed
for the cron job itself.

Core logic lives in plugins/memory/sqlite_vec/promotion.py:
  - PROMOTION_PROMPT designed to mirror EXTRACT_PROMPT style: same
    HARD RULES (PHI blacklist, pleasantry filter, synthetic handling,
    err-on-side-of-not-promoting), four explicit actions
    (PROMOTE / DEDUP_HIT / EXPIRE / DROP_AS_NOISE), and a verbatim
    output schema.
  - Per-candidate vec_search prefilter k=20 keeps the prompt small
    (only nearest-neighbor existing facts, not the whole active set,
    so prompt stays bounded as semantic_facts grows past 500 rows).
  - WeekDigest dataclass round-trips JSON, render_digest_markdown
    matches spec §5.4 layout (Promote / Dedup / Expire / Noise sections,
    emoji icons, character-truncated chunks for Discord 2000-char limit).
  - discord_post chunks long messages on newline boundaries before 1990
    chars to stay under Discord's per-message ceiling.
  - memory_review_channel_id resolves the live channel from
    ~/.hermes/channel_directory.json (which stores platforms.discord
    as a list of {id, name, guild, type} dicts on chococlaw).

Critical refactor: _apply_diff_atomic embeds promote-fact texts BEFORE
opening the BEGIN/COMMIT, then writes blobs into the transaction.
Holding the writer lock open across a Voyage HTTP round-trip would
block hot-path write_episode for the duration of the call (300ms+).

Live verification on chococlaw:

  Inserted 4 fixture episodes -> weekly_promotion -> Kimi call:
    Kimi-K2-Thinking 404'd on synthetic.new; auto-fallback to K2.5.
    Returned: 2 promote, 0 dedup, 0 expire, 1 drop_as_noise.
  weekly_apply applied diff: promoted=2 stamped=4
    semantic_facts: 25 -> 27 (then back to 25 after smoke cleanup)

  Discord post test to #memory-review (channel 1483958144596967464):
    posted=True, format renders correctly with all four sections.

Cron entries added to ~/.hermes/cron/jobs.json:
  Hermes Weekly Memory Promotion - 0 19 * * 6 (Sun 03:00 UTC+8)
  Hermes Weekly Memory Apply     - 0 19 * * 0 (Mon 03:00 UTC+8)
Both enabled, deliver=discord, script-driven (wake-gate=false).

Tests: 17 new for promotion (prompt placeholders, hard-rule presence,
candidate / neighbor formatting, digest_id format, WeekDigest round-trip,
markdown renders all 4 sections, empty-section collapse, no-candidates
short-circuit, dry-run no-write, real-run persists diff, no-pending-diff
exit, rejection sentinel archives without applying, promote inserts +
mirrors to vec_facts + stamps episodes, dedup bumps hits, expire sets
valid_to, purge_old_pending). 222/222 green across all memory + memdebug
+ import + scripts tests.

Operational notes:
- Kimi-K2-Thinking unavailable on synthetic.new (404) - we auto-fallback
  to Kimi-K2.5 with temp=0.2. Quality looks acceptable; revisit if
  promotion misses obvious dedup opportunities.
- The hot-path write_episode keeps stashing long-lived facts into
  episodes.metadata.stashed_facts, so the first real Sunday firing on
  a chocoprod week will draw from real data.

Refs liyoungc/hermes-memory#10
---
 plugins/memory/sqlite_vec/promotion.py | 850 +++++++++++++++++++++++++
 tests/plugins/memory/test_promotion.py | 397 ++++++++++++
 2 files changed, 1247 insertions(+)
 create mode 100644 plugins/memory/sqlite_vec/promotion.py
 create mode 100644 tests/plugins/memory/test_promotion.py

diff --git a/plugins/memory/sqlite_vec/promotion.py b/plugins/memory/sqlite_vec/promotion.py
new file mode 100644
index 00000000000..08aefc5b717
--- /dev/null
+++ b/plugins/memory/sqlite_vec/promotion.py
@@ -0,0 +1,850 @@
+"""Weekly promotion + apply core logic (W3-3).
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §5.3 + §5.4.
+
+Two entry points, both invoked from cron-driven thin wrappers in
+``~/.hermes/scripts/`` (so they sit inside HERMES_HOME/scripts, the only
+location the hermes scheduler will exec):
+
+  weekly_promotion()  - reads 7 days of pending episodes, runs one
+                        Kimi-thinking call to produce a promotion diff,
+                        saves it to pending_diffs/<digest_id>.json,
+                        renders + posts the digest to #memory-review.
+                        Does NOT stamp episodes.promoted_at.
+
+  weekly_apply()      - purges pending_diffs older than 14 days, loads
+                        the latest, checks for the rejection sentinel
+                        file, and either archives-as-rejected or
+                        applies the diff atomically (promote / dedup /
+                        expire) and stamps episodes.promoted_at.
+
+The split lets the user reject Sunday's diff with /memreview reject
+<digest_id> any time before Monday's apply fires.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+import sqlite3
+import struct
+import time
+from dataclasses import dataclass, field
+from datetime import date, datetime, timedelta
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+import httpx
+
+from .embed import voyage_embed
+from .extract import (
+    EXTRACT_TIMEOUT,
+    PHI_BLACKLIST_CHANNELS,
+    SYNTHETIC_URL,
+    _read_synthetic_api_key,
+)
+
+logger = logging.getLogger(__name__)
+
+PROMOTION_MODEL = "hf:moonshotai/Kimi-K2-Thinking"
+PROMOTION_FALLBACK_MODEL = "hf:moonshotai/Kimi-K2.5"
+PROMOTION_TEMPERATURE = 0.2
+PROMOTION_MAX_TOKENS = 8192  # diff JSON can be substantial across 7 days
+PROMOTION_TIMEOUT = 120.0  # thinking-mode + 100+ episodes
+
+PROMOTION_NEIGHBOR_K = 20  # spec §5.3: per-candidate vec_search k=20
+PROMOTION_LOOKBACK_DAYS = 7
+PENDING_DIFF_TTL_DAYS = 14
+
+DISCORD_API = "https://discord.com/api/v10/channels/{channel_id}/messages"
+
+
+# ---------------------------------------------------------------------------
+# Prompt — designed to match spec §5.3 schema verbatim
+# ---------------------------------------------------------------------------
+
+PROMOTION_PROMPT = """You are running the weekly memory promotion review for 禮揚's personal AI.
+
+Below is one week of conversation episodes that have not yet been reviewed.
+Each candidate carries any 'stashed_facts' that the per-turn extractor
+recorded in its metadata. You also see, per candidate, the top-20 existing
+semantic_facts that are nearest by embedding distance — use these to decide
+whether a candidate fact duplicates something already known.
+
+HARD RULES — these override everything else:
+1. NEVER promote: hospital data, patient names, 病歷號, 身分證字號, lab results,
+   diagnoses about real people, hospital policy specifics, hospital colleague names.
+2. Pleasantries (好的/收到/早安/明白/thanks) → drop_as_noise.
+3. Synthetic episodes (synthetic=true) — promote ONLY if they contain a NEW
+   commitment by 禮揚 (a meeting scheduled, a habit declared, a decision made).
+4. If a candidate stashed_fact is semantically captured by an existing fact
+   (sim ≥ 0.92), prefer dedup_hits over creating a new row.
+5. Conservative importance: most facts are 2; only use 4-5 for permanent
+   identity / family / strong commitments.
+
+For each candidate, decide one of four actions:
+
+  A. PROMOTE — new fact worth keeping. Emit into "promote".
+       valid_to: ISO date or null (null = permanent).
+       importance: 1-5 (default 2).
+       source_episode_ids: which candidate episodes contributed.
+
+  B. DEDUP_HIT — candidate fact reaffirms an existing fact. Emit into
+       "dedup_hits" with the existing fact id and action="bump_hits"
+       (just touch the timestamp) or "refine_text" (mild rephrasing
+       worth applying).
+
+  C. EXPIRE — an existing fact is contradicted or has gone stale.
+       Emit into "expire" with existing_fact_id, valid_to=today, reason.
+
+  D. DROP_AS_NOISE — pleasantry, low signal, or duplicates within the
+       week. Emit into "drop_as_noise" with the episode ids and reason.
+
+Every candidate episode_id must appear under exactly one action above
+(in promote.source_episode_ids OR dedup_hits.source_episode_ids OR
+drop_as_noise.episode_ids). The "expire" section can reference NEW
+existing_fact_ids that are independent of this week's candidates —
+that's fine.
+
+Output ONE JSON object with this exact schema:
+
+{{
+  "digest_id": "{digest_id}",
+  "candidate_episode_ids": [<all candidate ids you saw>],
+  "promote": [
+    {{
+      "entity": "禮揚.<namespace>",
+      "fact": "single-sentence statement",
+      "importance": 1..5,
+      "valid_from": "{today}",
+      "valid_to": "YYYY-MM-DD" | null,
+      "source_episode_ids": [int, ...]
+    }}
+  ],
+  "dedup_hits": [
+    {{
+      "existing_fact_id": int,
+      "action": "bump_hits" | "refine_text",
+      "refined_text": "string only if action=refine_text",
+      "source_episode_ids": [int, ...]
+    }}
+  ],
+  "expire": [
+    {{
+      "existing_fact_id": int,
+      "valid_to": "{today}",
+      "reason": "short reason"
+    }}
+  ],
+  "drop_as_noise": [
+    {{
+      "episode_ids": [int, ...],
+      "reason": "short reason"
+    }}
+  ]
+}}
+
+CANDIDATES (week of {week_label}):
+{candidates_block}
+
+NEAREST-NEIGHBOR EXISTING FACTS (one block per candidate stashed_fact):
+{neighbors_block}
+"""
+
+
+# ---------------------------------------------------------------------------
+# Data classes
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class WeekDigest:
+    """Loaded form of pending_diffs/<digest_id>.json."""
+
+    digest_id: str
+    candidate_episode_ids: List[int]
+    promote: List[Dict[str, Any]] = field(default_factory=list)
+    dedup_hits: List[Dict[str, Any]] = field(default_factory=list)
+    expire: List[Dict[str, Any]] = field(default_factory=list)
+    drop_as_noise: List[Dict[str, Any]] = field(default_factory=list)
+    raw: Dict[str, Any] = field(default_factory=dict)
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "WeekDigest":
+        return cls(
+            digest_id=data.get("digest_id", ""),
+            candidate_episode_ids=list(data.get("candidate_episode_ids") or []),
+            promote=list(data.get("promote") or []),
+            dedup_hits=list(data.get("dedup_hits") or []),
+            expire=list(data.get("expire") or []),
+            drop_as_noise=list(data.get("drop_as_noise") or []),
+            raw=data,
+        )
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "digest_id": self.digest_id,
+            "candidate_episode_ids": self.candidate_episode_ids,
+            "promote": self.promote,
+            "dedup_hits": self.dedup_hits,
+            "expire": self.expire,
+            "drop_as_noise": self.drop_as_noise,
+        }
+
+
+# ---------------------------------------------------------------------------
+# Path helpers
+# ---------------------------------------------------------------------------
+
+
+def _resolve_hermes_home() -> Path:
+    try:
+        from hermes_constants import get_hermes_home
+        return Path(get_hermes_home())
+    except Exception:
+        return Path.home() / ".hermes"
+
+
+def pending_dir() -> Path:
+    p = _resolve_hermes_home() / "memories" / "pending_diffs"
+    p.mkdir(parents=True, exist_ok=True)
+    return p
+
+
+def archive_dir() -> Path:
+    p = _resolve_hermes_home() / "memories" / "diff_archive"
+    p.mkdir(parents=True, exist_ok=True)
+    return p
+
+
+def memory_log_path() -> Path:
+    return _resolve_hermes_home() / "logs" / "memory.log"
+
+
+def db_path() -> Path:
+    return _resolve_hermes_home() / "memories" / "memory.db"
+
+
+def digest_id_for(today: Optional[date] = None) -> str:
+    """ISO date based digest id: wk-YYYY-MM-DD."""
+    today = today or date.today()
+    return f"wk-{today.isoformat()}"
+
+
+def rejection_sentinel(digest_id: str) -> Path:
+    return pending_dir() / f"{digest_id}.rejected"
+
+
+def pending_path(digest_id: str) -> Path:
+    return pending_dir() / f"{digest_id}.json"
+
+
+# ---------------------------------------------------------------------------
+# Shared logging
+# ---------------------------------------------------------------------------
+
+
+def _log_event(payload: Dict[str, Any]) -> None:
+    p = memory_log_path()
+    try:
+        p.parent.mkdir(parents=True, exist_ok=True)
+        with p.open("a", encoding="utf-8") as f:
+            f.write(json.dumps(payload, ensure_ascii=False, default=str) + "\n")
+    except OSError as exc:
+        logger.warning("memory.log write failed: %s", exc)
+
+
+# ---------------------------------------------------------------------------
+# Promotion: candidate gathering + neighbor search
+# ---------------------------------------------------------------------------
+
+
+def _read_pending_episodes(conn: sqlite3.Connection, days: int = PROMOTION_LOOKBACK_DAYS) -> List[Dict[str, Any]]:
+    rows = conn.execute(
+        """
+        SELECT id, ts, channel, role, text, metadata, synthetic
+        FROM episodes
+        WHERE promoted_at IS NULL
+          AND ts > datetime('now', ?)
+        ORDER BY ts
+        """,
+        (f"-{days} days",),
+    ).fetchall()
+    out: List[Dict[str, Any]] = []
+    for r in rows:
+        meta = {}
+        if r["metadata"]:
+            try:
+                meta = json.loads(r["metadata"])
+            except json.JSONDecodeError:
+                meta = {}
+        out.append({
+            "id": r["id"],
+            "ts": r["ts"],
+            "channel": r["channel"],
+            "role": r["role"],
+            "text": r["text"],
+            "synthetic": bool(r["synthetic"]),
+            "stashed_facts": meta.get("stashed_facts") or [],
+        })
+    return out
+
+
+async def _vec_search(conn: sqlite3.Connection, query: str, k: int = PROMOTION_NEIGHBOR_K) -> List[Dict[str, Any]]:
+    """Find k nearest existing semantic_facts to ``query`` text.
+
+    Returns rows like {id, fact, entity, importance, sim}.
+    """
+    [qvec] = await voyage_embed([query])
+    rows = conn.execute(
+        """
+        WITH knn AS (
+            SELECT fact_id, distance
+            FROM vec_facts
+            WHERE embedding MATCH vec_int8(?) AND k = ?
+        )
+        SELECT sf.id, sf.fact, sf.entity, sf.importance,
+               (1 - knn.distance) AS sim
+        FROM knn
+        JOIN semantic_facts sf ON sf.id = knn.fact_id
+        WHERE sf.state = 'active'
+          AND (sf.valid_to IS NULL OR sf.valid_to > date('now'))
+        ORDER BY sim DESC
+        """,
+        (qvec, k),
+    ).fetchall()
+    return [dict(r) for r in rows]
+
+
+def _format_candidates_block(candidates: List[Dict[str, Any]]) -> str:
+    """Render candidate episodes as a compact block for the prompt."""
+    lines = []
+    for c in candidates:
+        marker = "🤖" if c["synthetic"] else "👤"
+        text = c["text"].replace("\n", " ")
+        if len(text) > 200:
+            text = text[:200] + "..."
+        line = f"#{c['id']} [{c['ts']}] {marker} {c['channel']}/{c['role']}: {text}"
+        lines.append(line)
+        for sf in c["stashed_facts"]:
+            sf_text = sf.get("text", "")
+            sf_entity = sf.get("entity") or "?"
+            sf_vth = sf.get("valid_to_hint") or "permanent"
+            lines.append(
+                f"   ↳ stashed: [{sf_entity}] {sf_text[:120]} "
+                f"(importance={sf.get('importance', 2)}, valid_to_hint={sf_vth})"
+            )
+    return "\n".join(lines) if lines else "(no candidates)"
+
+
+def _format_neighbors_block(neighbors_by_fact: Dict[str, List[Dict[str, Any]]]) -> str:
+    """One section per candidate stashed_fact, listing its k nearest existing facts."""
+    if not neighbors_by_fact:
+        return "(no candidate stashed_facts to compare against)"
+    sections = []
+    for stashed_text, rows in neighbors_by_fact.items():
+        header = f"--- nearest to: {stashed_text[:120]} ---"
+        body_lines = [
+            f"  #{r['id']} sim={r['sim']:.3f} [{r['entity'] or '—'}] {r['fact'][:120]}"
+            for r in rows[:5]  # top 5 per stashed fact keeps prompt short
+        ]
+        sections.append(header + "\n" + "\n".join(body_lines))
+    return "\n\n".join(sections)
+
+
+# ---------------------------------------------------------------------------
+# Kimi thinking call
+# ---------------------------------------------------------------------------
+
+
+class PromotionError(RuntimeError):
+    pass
+
+
+async def _call_kimi_thinking(prompt: str, *, client: Optional[httpx.AsyncClient] = None) -> Dict[str, Any]:
+    """Single Kimi call producing the promotion diff JSON object.
+
+    Tries Kimi-K2-Thinking first; falls back to Kimi-K2.5 on 4xx model-not-found.
+    """
+    api_key = _read_synthetic_api_key()
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+
+    payload = {
+        "model": PROMOTION_MODEL,
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": PROMOTION_TEMPERATURE,
+        "max_tokens": PROMOTION_MAX_TOKENS,
+        "response_format": {"type": "json_object"},
+    }
+
+    owns = client is None
+    client = client or httpx.AsyncClient()
+    try:
+        try:
+            r = await client.post(SYNTHETIC_URL, headers=headers, json=payload, timeout=PROMOTION_TIMEOUT)
+        except httpx.RequestError as exc:
+            raise PromotionError(f"synthetic.new network: {exc}") from exc
+        if r.status_code == 404 or (r.status_code == 400 and "model" in r.text.lower()):
+            logger.warning("Kimi-Thinking unavailable; falling back to %s", PROMOTION_FALLBACK_MODEL)
+            payload["model"] = PROMOTION_FALLBACK_MODEL
+            r = await client.post(SYNTHETIC_URL, headers=headers, json=payload, timeout=PROMOTION_TIMEOUT)
+        if r.status_code >= 400:
+            raise PromotionError(f"synthetic.new {r.status_code}: {r.text[:300]}")
+        body = r.json()
+    finally:
+        if owns:
+            await client.aclose()
+
+    content = ((body.get("choices") or [{}])[0].get("message") or {}).get("content", "")
+    try:
+        diff = json.loads(content)
+    except json.JSONDecodeError as exc:
+        raise PromotionError(f"Kimi returned non-JSON: {exc}: {content[:200]}") from exc
+    if not isinstance(diff, dict):
+        raise PromotionError(f"Kimi returned non-object: {type(diff).__name__}")
+
+    usage = body.get("usage") or {}
+    _log_event({
+        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+        "cmd": "weekly_promotion_kimi",
+        "model": payload["model"],
+        "tokens_in": usage.get("prompt_tokens"),
+        "tokens_out": usage.get("completion_tokens"),
+    })
+    return diff
+
+
+# ---------------------------------------------------------------------------
+# Digest rendering (spec §5.4)
+# ---------------------------------------------------------------------------
+
+
+def render_digest_markdown(diff: WeekDigest, candidates: List[Dict[str, Any]]) -> str:
+    n_user = sum(1 for c in candidates if not c["synthetic"])
+    n_synth = sum(1 for c in candidates if c["synthetic"])
+    header = (
+        f"# 📚 Weekly Memory Review — {diff.digest_id.removeprefix('wk-')}\n"
+        f"{len(candidates)} episodes scanned this week "
+        f"({n_user} user/assistant + {n_synth} cron-synthetic).\n"
+        f"24 h to reject via `/memreview reject {diff.digest_id}`; default approve.\n"
+    )
+
+    sections = []
+
+    if diff.promote:
+        lines = [f"## ⬆️ Promote to permanent ({len(diff.promote)})"]
+        for p in diff.promote:
+            entity = p.get("entity", "?")
+            fact = p.get("fact", "")
+            importance = p.get("importance", 2)
+            valid_to = p.get("valid_to") or "永久"
+            srcs = p.get("source_episode_ids") or []
+            src_str = (
+                ", ".join(f"#{i}" for i in srcs[:5])
+                + (f" +{len(srcs)-5}" if len(srcs) > 5 else "")
+            )
+            lines.append(f"- 🆕 **{entity}**: \"{fact}\"")
+            lines.append(f"   evidence: {src_str} | importance {importance} | valid_to: {valid_to}")
+        sections.append("\n".join(lines))
+
+    if diff.dedup_hits:
+        lines = [f"## 🔁 Dedup confirmations ({len(diff.dedup_hits)})"]
+        for d in diff.dedup_hits:
+            srcs = d.get("source_episode_ids") or []
+            action = d.get("action", "bump_hits")
+            lines.append(
+                f"- existing #{d.get('existing_fact_id')} ← {len(srcs)} reaffirmation(s), action={action}"
+            )
+            if action == "refine_text" and d.get("refined_text"):
+                lines.append(f"   refined → \"{d['refined_text']}\"")
+        sections.append("\n".join(lines))
+
+    if diff.expire:
+        lines = [f"## 🪦 Expiring ({len(diff.expire)})"]
+        for e in diff.expire:
+            lines.append(
+                f"- existing #{e.get('existing_fact_id')} → valid_to={e.get('valid_to')} "
+                f"({e.get('reason', '—')})"
+            )
+        sections.append("\n".join(lines))
+
+    if diff.drop_as_noise:
+        lines = [f"## 🗑️ Skipped as noise ({len(diff.drop_as_noise)})"]
+        for n in diff.drop_as_noise:
+            ids = n.get("episode_ids") or []
+            lines.append(f"- {len(ids)} episode(s): {n.get('reason', '—')}")
+        sections.append("\n".join(lines))
+
+    if not sections:
+        sections.append("_No actions this week._")
+
+    return header + "\n" + "\n\n".join(sections)
+
+
+# ---------------------------------------------------------------------------
+# Discord posting
+# ---------------------------------------------------------------------------
+
+
+def discord_post(content: str, channel_id: str, *, bot_token: Optional[str] = None) -> bool:
+    """POST a message to a Discord channel. Returns True on success."""
+    bot_token = bot_token or os.environ.get("DISCORD_BOT_TOKEN")
+    if not bot_token or not channel_id:
+        logger.warning("discord_post missing bot_token or channel_id")
+        return False
+    # Discord rejects messages over 2000 chars; chunk if needed.
+    chunks: List[str] = []
+    remaining = content
+    while remaining:
+        if len(remaining) <= 1990:
+            chunks.append(remaining)
+            break
+        # Split on the last newline before 1990 chars to avoid mid-line breaks.
+        cut = remaining.rfind("\n", 0, 1990)
+        if cut <= 0:
+            cut = 1990
+        chunks.append(remaining[:cut])
+        remaining = remaining[cut:].lstrip("\n")
+
+    headers = {
+        "Authorization": f"Bot {bot_token}",
+        "Content-Type": "application/json",
+    }
+    url = DISCORD_API.format(channel_id=channel_id)
+    ok = True
+    with httpx.Client(timeout=20.0) as c:
+        for chunk in chunks:
+            r = c.post(url, headers=headers, json={"content": chunk})
+            if r.status_code >= 400:
+                logger.warning("discord_post failed: %s %s", r.status_code, r.text[:200])
+                ok = False
+                break
+    return ok
+
+
+def memory_review_channel_id() -> Optional[str]:
+    """Resolve the Discord #memory-review channel id.
+
+    Priority:
+      1. MEMORY_REVIEW_CHANNEL_ID env var (test override)
+      2. ~/.hermes/channel_directory.json -> platforms.discord (list)
+         -> first entry whose name == "memory-review"
+      3. Legacy flat layouts (defensive — older installs)
+    """
+    env = os.environ.get("MEMORY_REVIEW_CHANNEL_ID")
+    if env:
+        return env
+    cdir = _resolve_hermes_home() / "channel_directory.json"
+    if not cdir.exists():
+        return None
+    try:
+        data = json.loads(cdir.read_text(encoding="utf-8"))
+    except json.JSONDecodeError:
+        return None
+
+    # Canonical layout: platforms.discord is a list of channel dicts.
+    plats = (data.get("platforms") or {})
+    discord_chans = plats.get("discord")
+    if isinstance(discord_chans, list):
+        for c in discord_chans:
+            if isinstance(c, dict) and c.get("name") == "memory-review":
+                return c.get("id")
+
+    # Defensive fallbacks for older / hand-edited layouts.
+    if isinstance(data.get("memory-review"), str):
+        return data["memory-review"]
+    chans = data.get("channels") or {}
+    m = chans.get("memory-review") if isinstance(chans, dict) else None
+    if isinstance(m, str):
+        return m
+    if isinstance(m, dict):
+        return m.get("id") or m.get("channel_id")
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Main entry points
+# ---------------------------------------------------------------------------
+
+
+async def weekly_promotion(
+    conn: sqlite3.Connection,
+    *,
+    today: Optional[date] = None,
+    dry_run: bool = False,
+    discord_channel_id: Optional[str] = None,
+    kimi_fn=None,  # injectable for tests
+    embed_fn=None,
+) -> Dict[str, Any]:
+    """Run one weekly promotion cycle. Returns a summary dict."""
+    today = today or date.today()
+    digest_id = digest_id_for(today)
+
+    candidates = _read_pending_episodes(conn)
+    if not candidates:
+        return {"digest_id": digest_id, "candidates": 0, "skipped": "no candidates"}
+
+    # Build neighbor map per stashed_fact across the week.
+    neighbors_by_fact: Dict[str, List[Dict[str, Any]]] = {}
+    for c in candidates:
+        for sf in c["stashed_facts"]:
+            text = (sf or {}).get("text") or ""
+            if not text or text in neighbors_by_fact:
+                continue
+            try:
+                neighbors_by_fact[text] = await _vec_search(conn, text)
+            except Exception as exc:
+                logger.warning("vec_search failed for stashed fact: %s", exc)
+                neighbors_by_fact[text] = []
+
+    prompt = PROMOTION_PROMPT.format(
+        digest_id=digest_id,
+        today=today.isoformat(),
+        week_label=today.isoformat(),
+        candidates_block=_format_candidates_block(candidates),
+        neighbors_block=_format_neighbors_block(neighbors_by_fact),
+    )
+
+    kimi = kimi_fn or _call_kimi_thinking
+    try:
+        diff_dict = await kimi(prompt)
+    except Exception as exc:
+        logger.exception("Kimi promotion call failed")
+        return {"digest_id": digest_id, "candidates": len(candidates), "error": str(exc)}
+
+    # Trust-but-verify: ensure digest_id matches and required keys exist.
+    diff_dict.setdefault("digest_id", digest_id)
+    diff_dict.setdefault("candidate_episode_ids", [c["id"] for c in candidates])
+    for k in ("promote", "dedup_hits", "expire", "drop_as_noise"):
+        diff_dict.setdefault(k, [])
+
+    digest = WeekDigest.from_dict(diff_dict)
+    markdown = render_digest_markdown(digest, candidates)
+
+    summary = {
+        "digest_id": digest_id,
+        "candidates": len(candidates),
+        "promote": len(digest.promote),
+        "dedup_hits": len(digest.dedup_hits),
+        "expire": len(digest.expire),
+        "drop_as_noise": len(digest.drop_as_noise),
+        "dry_run": dry_run,
+    }
+
+    if dry_run:
+        summary["markdown_preview"] = markdown
+        return summary
+
+    # Persist diff before posting so a Discord outage doesn't lose the work.
+    pending_path(digest_id).write_text(
+        json.dumps(digest.to_dict(), ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+
+    posted = False
+    if discord_channel_id:
+        posted = discord_post(markdown, discord_channel_id)
+    summary["discord_posted"] = posted
+
+    _log_event({
+        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+        "cmd": "weekly_promotion",
+        "digest_id": digest_id,
+        "summary": summary,
+    })
+    return summary
+
+
+def _purge_old_pending(today: date) -> int:
+    """Delete pending diffs older than PENDING_DIFF_TTL_DAYS."""
+    cutoff = today - timedelta(days=PENDING_DIFF_TTL_DAYS)
+    n = 0
+    for f in pending_dir().glob("*.json"):
+        try:
+            stem = f.stem.removeprefix("wk-")
+            d = datetime.strptime(stem, "%Y-%m-%d").date()
+        except ValueError:
+            continue
+        if d < cutoff:
+            try:
+                f.unlink()
+                # Also remove associated rejection sentinel if any.
+                rs = f.with_suffix(".rejected")
+                if rs.exists():
+                    rs.unlink()
+                n += 1
+            except OSError:
+                pass
+    return n
+
+
+def _latest_pending_diff() -> Optional[Path]:
+    files = sorted(pending_dir().glob("wk-*.json"))
+    return files[-1] if files else None
+
+
+def _archive_diff(diff_path: Path, status: str) -> None:
+    target = archive_dir() / f"{diff_path.stem}.{status}.json"
+    diff_path.replace(target)
+
+
+async def _apply_diff_atomic(
+    conn: sqlite3.Connection,
+    digest: WeekDigest,
+    today: date,
+    *,
+    embed_fn=None,
+) -> Dict[str, int]:
+    """Apply promote / dedup / expire in one transaction; stamp promoted_at.
+
+    Embeddings for promoted facts are computed BEFORE the transaction
+    opens, so the writer lock is held only for the duration of the
+    SQL statements themselves (~ms). Holding it across the Voyage HTTP
+    round-trip would block concurrent writes from the hot path.
+
+    Returns counts of each action performed.
+    """
+    counts = {"promoted": 0, "dedup_bumped": 0, "dedup_refined": 0, "expired": 0, "stamped": 0}
+
+    # Pre-embed all promote texts (outside transaction).
+    embed = embed_fn or voyage_embed
+    promote_blobs: List[Optional[bytes]] = []
+    promote_texts = [p.get("fact", "") for p in digest.promote]
+    non_empty = [t for t in promote_texts if t]
+    if non_empty:
+        embeddings = await embed(non_empty)
+        # Map back to original positions (None for empty fact strings).
+        emb_iter = iter(embeddings)
+        promote_blobs = [next(emb_iter) if t else None for t in promote_texts]
+    else:
+        promote_blobs = [None] * len(promote_texts)
+
+    try:
+        conn.execute("BEGIN")
+
+        # 1. promote — INSERT new semantic_facts. Trigger sf_after_insert
+        # mirrors each row into vec_facts automatically.
+        for p, blob in zip(digest.promote, promote_blobs):
+            fact = p.get("fact", "")
+            if not fact or blob is None:
+                continue
+            conn.execute(
+                """
+                INSERT INTO semantic_facts
+                    (entity, fact, embedding, importance, valid_from, valid_to,
+                     source_episode_ids)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    p.get("entity"),
+                    fact,
+                    blob,
+                    int(p.get("importance", 2) or 2),
+                    p.get("valid_from") or today.isoformat(),
+                    p.get("valid_to"),
+                    json.dumps(p.get("source_episode_ids") or []),
+                ),
+            )
+            counts["promoted"] += 1
+
+        # 2. dedup_hits — bump the existing fact's hits + last_seen, optional refine.
+        for d in digest.dedup_hits:
+            fid = d.get("existing_fact_id")
+            if fid is None:
+                continue
+            if d.get("action") == "refine_text" and d.get("refined_text"):
+                conn.execute(
+                    "UPDATE semantic_facts SET fact = ?, last_seen = datetime('now'), "
+                    "hits = hits + 1 WHERE id = ?",
+                    (d["refined_text"], fid),
+                )
+                counts["dedup_refined"] += 1
+            else:
+                conn.execute(
+                    "UPDATE semantic_facts SET last_seen = datetime('now'), "
+                    "hits = hits + 1 WHERE id = ?",
+                    (fid,),
+                )
+                counts["dedup_bumped"] += 1
+
+        # 3. expire — set valid_to (caller chose date).
+        for e in digest.expire:
+            fid = e.get("existing_fact_id")
+            if fid is None:
+                continue
+            conn.execute(
+                "UPDATE semantic_facts SET valid_to = ? WHERE id = ?",
+                (e.get("valid_to") or today.isoformat(), fid),
+            )
+            counts["expired"] += 1
+
+        # 4. stamp promoted_at on every candidate episode.
+        if digest.candidate_episode_ids:
+            placeholders = ",".join("?" * len(digest.candidate_episode_ids))
+            conn.execute(
+                f"UPDATE episodes SET promoted_at = date('now') WHERE id IN ({placeholders})",
+                digest.candidate_episode_ids,
+            )
+            counts["stamped"] = len(digest.candidate_episode_ids)
+
+        conn.commit()
+    except Exception:
+        conn.rollback()
+        raise
+    return counts
+
+
+async def weekly_apply(
+    conn: sqlite3.Connection,
+    *,
+    today: Optional[date] = None,
+    embed_fn=None,
+) -> Dict[str, Any]:
+    """Apply the latest pending diff (or archive-as-rejected). Returns summary."""
+    today = today or date.today()
+
+    purged = _purge_old_pending(today)
+    diff_path = _latest_pending_diff()
+
+    if not diff_path:
+        return {"purged": purged, "applied": False, "reason": "no pending diff"}
+
+    digest_id = diff_path.stem
+    sentinel = rejection_sentinel(digest_id)
+    if sentinel.exists():
+        _archive_diff(diff_path, "rejected")
+        try:
+            sentinel.unlink()
+        except OSError:
+            pass
+        _log_event({
+            "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+            "cmd": "weekly_apply",
+            "digest_id": digest_id,
+            "result": "rejected",
+        })
+        return {"purged": purged, "applied": False, "digest_id": digest_id, "reason": "rejected"}
+
+    try:
+        diff_dict = json.loads(diff_path.read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError) as exc:
+        return {"purged": purged, "applied": False, "error": f"diff load: {exc}"}
+
+    digest = WeekDigest.from_dict(diff_dict)
+    counts = await _apply_diff_atomic(conn, digest, today, embed_fn=embed_fn)
+    _archive_diff(diff_path, "applied")
+
+    summary = {
+        "purged": purged,
+        "applied": True,
+        "digest_id": digest_id,
+        **counts,
+    }
+    _log_event({
+        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+        "cmd": "weekly_apply",
+        **summary,
+    })
+    return summary
diff --git a/tests/plugins/memory/test_promotion.py b/tests/plugins/memory/test_promotion.py
new file mode 100644
index 00000000000..6e452ba2132
--- /dev/null
+++ b/tests/plugins/memory/test_promotion.py
@@ -0,0 +1,397 @@
+"""Tests for plugins/memory/sqlite_vec/promotion.py (W3-3)."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import struct
+from datetime import date, timedelta
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from plugins.memory.sqlite_vec.promotion import (
+    PENDING_DIFF_TTL_DAYS,
+    PROMOTION_PROMPT,
+    WeekDigest,
+    _apply_diff_atomic,
+    _format_candidates_block,
+    _format_neighbors_block,
+    _purge_old_pending,
+    digest_id_for,
+    pending_path,
+    rejection_sentinel,
+    render_digest_markdown,
+    weekly_apply,
+    weekly_promotion,
+)
+from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
+
+
+def _vec(seed: int) -> bytes:
+    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
+    return struct.pack(f"{VEC_DIM}b", *vals)
+
+
+# ---------------------------------------------------------------------------
+# Prompt + format helpers
+# ---------------------------------------------------------------------------
+
+
+def test_prompt_has_required_placeholders():
+    """The prompt is .format()'d with these keys; missing any breaks promotion."""
+    for key in ("{digest_id}", "{today}", "{week_label}",
+                "{candidates_block}", "{neighbors_block}"):
+        assert key in PROMOTION_PROMPT, f"missing placeholder: {key}"
+
+
+def test_prompt_carries_hard_rules():
+    assert "病歷號" in PROMOTION_PROMPT
+    assert "DROP_AS_NOISE" in PROMOTION_PROMPT
+    assert "PROMOTE" in PROMOTION_PROMPT
+    assert "DEDUP_HIT" in PROMOTION_PROMPT
+    assert "EXPIRE" in PROMOTION_PROMPT
+
+
+def test_format_candidates_block_marks_synthetic():
+    cands = [
+        {"id": 1, "ts": "2026-05-02 09:00", "channel": "cattia",
+         "role": "user", "synthetic": False, "text": "hello",
+         "stashed_facts": [{"text": "禮揚 likes X", "entity": "禮揚.訓練",
+                            "importance": 2, "valid_to_hint": None}]},
+        {"id": 2, "ts": "2026-05-02 09:00", "channel": "cron",
+         "role": "assistant", "synthetic": True, "text": "cron output",
+         "stashed_facts": []},
+    ]
+    out = _format_candidates_block(cands)
+    assert "👤" in out and "🤖" in out
+    assert "↳ stashed:" in out
+
+
+def test_format_neighbors_block_truncates_to_top_5():
+    neighbors = {
+        "topic": [
+            {"id": i, "fact": f"fact {i}", "entity": "x", "sim": 0.9 - i * 0.01}
+            for i in range(10)
+        ]
+    }
+    out = _format_neighbors_block(neighbors)
+    # Only 5 should appear.
+    assert out.count("#") == 5
+
+
+# ---------------------------------------------------------------------------
+# digest_id + path helpers
+# ---------------------------------------------------------------------------
+
+
+def test_digest_id_format():
+    assert digest_id_for(date(2026, 5, 11)) == "wk-2026-05-11"
+
+
+# ---------------------------------------------------------------------------
+# WeekDigest
+# ---------------------------------------------------------------------------
+
+
+def test_week_digest_round_trip():
+    raw = {
+        "digest_id": "wk-2026-05-10",
+        "candidate_episode_ids": [1, 2, 3],
+        "promote": [{"entity": "禮揚.家庭", "fact": "x", "importance": 3}],
+        "dedup_hits": [{"existing_fact_id": 5, "action": "bump_hits"}],
+        "expire": [{"existing_fact_id": 7, "valid_to": "2026-05-10"}],
+        "drop_as_noise": [{"episode_ids": [4], "reason": "pleasantry"}],
+    }
+    d = WeekDigest.from_dict(raw)
+    assert d.digest_id == "wk-2026-05-10"
+    assert d.to_dict()["candidate_episode_ids"] == [1, 2, 3]
+
+
+# ---------------------------------------------------------------------------
+# render_digest_markdown
+# ---------------------------------------------------------------------------
+
+
+def test_render_digest_markdown_full_shape():
+    candidates = [
+        {"id": 1, "ts": "x", "channel": "c", "role": "user",
+         "synthetic": False, "text": "u", "stashed_facts": []},
+        {"id": 2, "ts": "x", "channel": "cron", "role": "user",
+         "synthetic": True, "text": "u", "stashed_facts": []},
+    ]
+    d = WeekDigest.from_dict({
+        "digest_id": "wk-2026-05-10",
+        "candidate_episode_ids": [1, 2],
+        "promote": [{"entity": "禮揚.家庭", "fact": "致妤生日 3/19",
+                     "importance": 5, "valid_to": None,
+                     "source_episode_ids": [1]}],
+        "dedup_hits": [{"existing_fact_id": 5, "action": "bump_hits",
+                        "source_episode_ids": [2]}],
+        "expire": [{"existing_fact_id": 7, "valid_to": "2026-05-10",
+                    "reason": "stale"}],
+        "drop_as_noise": [{"episode_ids": [3], "reason": "好的"}],
+    })
+    md = render_digest_markdown(d, candidates)
+    assert "Weekly Memory Review — 2026-05-10" in md
+    assert "(1 user/assistant + 1 cron-synthetic)" in md
+    assert "/memreview reject wk-2026-05-10" in md
+    assert "⬆️ Promote to permanent (1)" in md
+    assert "🔁 Dedup confirmations (1)" in md
+    assert "🪦 Expiring (1)" in md
+    assert "🗑️ Skipped as noise (1)" in md
+    assert "致妤生日 3/19" in md
+    assert "valid_to: 永久" in md  # null valid_to
+
+
+def test_render_digest_empty_sections_collapse():
+    d = WeekDigest.from_dict({"digest_id": "wk-2026-05-10",
+                              "candidate_episode_ids": []})
+    md = render_digest_markdown(d, [])
+    assert "_No actions this week._" in md
+
+
+# ---------------------------------------------------------------------------
+# weekly_promotion (mocked Kimi)
+# ---------------------------------------------------------------------------
+
+
+def _seed_episodes(conn, today_iso: str = "2026-05-02 12:00:00"):
+    """Add 2 fixture episodes with stashed_facts."""
+    conn.execute(
+        "INSERT INTO episodes(ts, channel, external_id, role, text, synthetic, metadata) "
+        "VALUES (?, ?, ?, ?, ?, ?, ?)",
+        (today_iso, "cattia", "m1:user", "user", "我下週要去日本", 0,
+         json.dumps({"stashed_facts": [
+             {"type": "semantic", "text": "禮揚下週去日本", "entity": "禮揚.家庭",
+              "importance": 3, "valid_to_hint": "2026-05-11"}]})),
+    )
+    conn.execute(
+        "INSERT INTO episodes(ts, channel, external_id, role, text) "
+        "VALUES (?, ?, ?, ?, ?)",
+        (today_iso, "cattia", "m1:asst", "assistant", "好的", ),
+    )
+    conn.commit()
+
+
+def test_weekly_promotion_no_candidates(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    db = init_db(tmp_path / "m.db")
+    summary = asyncio.run(weekly_promotion(db))
+    assert summary["candidates"] == 0
+    assert "skipped" in summary
+
+
+def test_weekly_promotion_dry_run_returns_markdown(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    db = init_db(tmp_path / "m.db")
+    _seed_episodes(db)
+
+    async def fake_kimi(prompt):
+        # Sanity: prompt was actually formatted, not left with placeholders.
+        assert "{digest_id}" not in prompt
+        return {
+            "promote": [{"entity": "禮揚.家庭", "fact": "下週去日本",
+                         "importance": 3, "valid_to": "2026-05-11",
+                         "source_episode_ids": [1]}],
+            "dedup_hits": [], "expire": [], "drop_as_noise": [],
+        }
+
+    async def fake_embed(texts):
+        return [_vec(50) for _ in texts]
+
+    summary = asyncio.run(weekly_promotion(
+        db, dry_run=True, kimi_fn=fake_kimi,
+        embed_fn=fake_embed,
+    ))
+    assert summary["candidates"] == 2
+    assert summary["promote"] == 1
+    assert summary["dry_run"] is True
+    assert "markdown_preview" in summary
+    assert "下週去日本" in summary["markdown_preview"]
+    # Dry-run MUST NOT persist a pending diff or post to Discord.
+    assert not (tmp_path / "memories" / "pending_diffs").exists() or \
+           not list((tmp_path / "memories" / "pending_diffs").glob("*.json"))
+
+
+def test_weekly_promotion_persists_diff_on_real_run(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    db = init_db(tmp_path / "m.db")
+    _seed_episodes(db)
+
+    async def fake_kimi(prompt):
+        return {
+            "promote": [], "dedup_hits": [], "expire": [],
+            "drop_as_noise": [{"episode_ids": [1, 2], "reason": "no signal"}],
+        }
+
+    summary = asyncio.run(weekly_promotion(
+        db, dry_run=False, kimi_fn=fake_kimi,
+    ))
+    # Diff was written, even with no Discord channel configured.
+    files = list((tmp_path / "memories" / "pending_diffs").glob("*.json"))
+    assert len(files) == 1
+    diff = json.loads(files[0].read_text())
+    assert diff["candidate_episode_ids"] == [1, 2]
+
+
+# ---------------------------------------------------------------------------
+# weekly_apply
+# ---------------------------------------------------------------------------
+
+
+def test_weekly_apply_no_pending_diff(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    db = init_db(tmp_path / "m.db")
+    summary = asyncio.run(weekly_apply(db))
+    assert summary["applied"] is False
+    assert "no pending diff" in summary.get("reason", "")
+
+
+def test_weekly_apply_rejection_sentinel_archives_without_apply(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    db = init_db(tmp_path / "m.db")
+
+    digest_id = "wk-2026-05-02"
+    pending_path(digest_id).write_text(json.dumps({
+        "digest_id": digest_id, "candidate_episode_ids": [],
+        "promote": [], "dedup_hits": [], "expire": [], "drop_as_noise": [],
+    }))
+    rejection_sentinel(digest_id).write_text("rejected", encoding="utf-8")
+
+    summary = asyncio.run(weekly_apply(db))
+    assert summary["applied"] is False
+    assert summary["reason"] == "rejected"
+    # Diff moved to archive_dir, sentinel removed.
+    assert not pending_path(digest_id).exists()
+    assert not rejection_sentinel(digest_id).exists()
+    archive = list((tmp_path / "memories" / "diff_archive").glob("*.rejected.json"))
+    assert len(archive) == 1
+
+
+def test_weekly_apply_promotes_inserts_and_stamps(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    db = init_db(tmp_path / "m.db")
+    _seed_episodes(db)
+
+    digest_id = "wk-2026-05-02"
+    pending_path(digest_id).write_text(json.dumps({
+        "digest_id": digest_id,
+        "candidate_episode_ids": [1, 2],
+        "promote": [{"entity": "禮揚.家庭", "fact": "下週去日本",
+                     "importance": 3, "valid_from": "2026-05-02",
+                     "valid_to": "2026-05-11", "source_episode_ids": [1]}],
+        "dedup_hits": [], "expire": [], "drop_as_noise": [],
+    }))
+
+    async def fake_embed(texts):
+        return [_vec(50) for _ in texts]
+
+    summary = asyncio.run(weekly_apply(db, embed_fn=fake_embed))
+    assert summary["applied"] is True
+    assert summary["promoted"] == 1
+    assert summary["stamped"] == 2
+    # New row in semantic_facts.
+    [(sf,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall()
+    assert sf == 1
+    # Trigger mirrored into vec_facts.
+    [(vf,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall()
+    assert vf == 1
+    # Episodes stamped.
+    rows = db.execute("SELECT id, promoted_at FROM episodes ORDER BY id").fetchall()
+    assert all(r["promoted_at"] is not None for r in rows)
+    # Diff moved to archive.
+    archive = list((tmp_path / "memories" / "diff_archive").glob("*.applied.json"))
+    assert len(archive) == 1
+
+
+def test_weekly_apply_dedup_bump_increments_hits(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    db = init_db(tmp_path / "m.db")
+    db.execute(
+        "INSERT INTO semantic_facts(fact, embedding, hits) VALUES (?, ?, ?)",
+        ("禮揚 likes X", _vec(10), 0),
+    )
+    db.commit()
+
+    digest_id = "wk-2026-05-02"
+    pending_path(digest_id).write_text(json.dumps({
+        "digest_id": digest_id, "candidate_episode_ids": [],
+        "promote": [], "dedup_hits": [
+            {"existing_fact_id": 1, "action": "bump_hits",
+             "source_episode_ids": []}
+        ], "expire": [], "drop_as_noise": [],
+    }))
+
+    summary = asyncio.run(weekly_apply(db))
+    assert summary["dedup_bumped"] == 1
+    [(hits,)] = db.execute("SELECT hits FROM semantic_facts WHERE id=1").fetchall()
+    assert hits == 1
+
+
+def test_weekly_apply_expire_sets_valid_to(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    db = init_db(tmp_path / "m.db")
+    db.execute(
+        "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
+        ("禮揚 watches paper X", _vec(10)),
+    )
+    db.commit()
+
+    digest_id = "wk-2026-05-02"
+    pending_path(digest_id).write_text(json.dumps({
+        "digest_id": digest_id, "candidate_episode_ids": [],
+        "promote": [], "dedup_hits": [],
+        "expire": [{"existing_fact_id": 1, "valid_to": "2026-05-02",
+                    "reason": "stale"}],
+        "drop_as_noise": [],
+    }))
+
+    summary = asyncio.run(weekly_apply(db, today=date(2026, 5, 2)))
+    assert summary["expired"] == 1
+    [(vt,)] = db.execute("SELECT valid_to FROM semantic_facts WHERE id=1").fetchall()
+    assert vt == "2026-05-02"
+
+
+def test_weekly_apply_purges_old_pending(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    db = init_db(tmp_path / "m.db")
+
+    today = date(2026, 5, 2)
+    old = today - timedelta(days=PENDING_DIFF_TTL_DAYS + 5)
+    fresh = today - timedelta(days=2)
+
+    pending_path(f"wk-{old.isoformat()}").write_text("{}")
+    pending_path(f"wk-{fresh.isoformat()}").write_text(json.dumps({
+        "digest_id": f"wk-{fresh.isoformat()}", "candidate_episode_ids": [],
+        "promote": [], "dedup_hits": [], "expire": [], "drop_as_noise": [],
+    }))
+
+    summary = asyncio.run(weekly_apply(db, today=today))
+    assert summary["purged"] == 1
+    # Old gone, fresh applied + archived.
+    assert not pending_path(f"wk-{old.isoformat()}").exists()
+    archive = list((tmp_path / "memories" / "diff_archive").glob("*.applied.json"))
+    assert len(archive) == 1

From 6080343bbcacef073e284f51c90a0eb00191b053 Mon Sep 17 00:00:00 2001
From: Li-yang Chen <liyoungc@pm.me>
Date: Sat, 2 May 2026 13:53:44 +0000
Subject: [PATCH 09/11] docs(memory): commit cron-script sources for W3-3

The hermes scheduler hard-binds ~/.hermes/scripts/ as the only exec path
for cron jobs, so the runtime copies must live there per-host. Keeping
the canonical sources in the repo means PR review can see them and a
fresh chococlaw rebuild is a 2-line cp + jobs.json patch.

Refs liyoungc/hermes-memory#10
---
 scripts/cron/README.md           | 19 +++++++++++
 scripts/cron/weekly_apply.py     | 42 ++++++++++++++++++++++++
 scripts/cron/weekly_promotion.py | 56 ++++++++++++++++++++++++++++++++
 3 files changed, 117 insertions(+)
 create mode 100644 scripts/cron/README.md
 create mode 100755 scripts/cron/weekly_apply.py
 create mode 100755 scripts/cron/weekly_promotion.py

diff --git a/scripts/cron/README.md b/scripts/cron/README.md
new file mode 100644
index 00000000000..af5227bdc80
--- /dev/null
+++ b/scripts/cron/README.md
@@ -0,0 +1,19 @@
+# Hermes V3 cron scripts
+
+These scripts are invoked by hermes-agent's cron scheduler. The scheduler
+hardcodes `HERMES_HOME/scripts/` as the only path it will exec from
+(security: prevents arbitrary script execution via path traversal), so
+runtime copies must live at `~/.hermes/scripts/<name>.py` on each host.
+
+The canonical source lives here in version control. Deploy via:
+
+    cp scripts/cron/weekly_promotion.py ~/.hermes/scripts/
+    cp scripts/cron/weekly_apply.py ~/.hermes/scripts/
+
+Cron entries are added by adding rows to `~/.hermes/cron/jobs.json`
+(see the `Hermes Weekly Memory Promotion` / `Hermes Weekly Memory Apply`
+entries; expressions are in UTC — `0 19 * * 6` = Sun 03:00 UTC+8).
+
+Both scripts emit `{"wakeAgent": false}` as the last stdout line so the
+cron framework skips the agent run — delivery happens inside the script
+via Discord HTTP POST.
diff --git a/scripts/cron/weekly_apply.py b/scripts/cron/weekly_apply.py
new file mode 100755
index 00000000000..14d1a18550e
--- /dev/null
+++ b/scripts/cron/weekly_apply.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+"""Cron entry point: Mon 03:00 UTC+8 weekly memory apply.
+
+Loads the latest pending diff (purges any older than 14 days first),
+checks for a rejection sentinel file (written by /memreview reject),
+and either archives the diff as rejected or applies its
+promote / dedup / expire actions atomically and stamps
+``episodes.promoted_at`` on the candidate rows.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import sys
+
+sys.path.insert(0, "/opt/hermes")
+
+try:
+    from hermes_cli.env_loader import load_hermes_dotenv
+    load_hermes_dotenv(hermes_home="/opt/data", project_env=None)
+except Exception:
+    pass
+
+from plugins.memory.sqlite_vec.promotion import (  # noqa: E402
+    db_path,
+    weekly_apply,
+)
+from plugins.memory.sqlite_vec.store import open_db  # noqa: E402
+
+
+def main() -> int:
+    conn = open_db(db_path(), check_same_thread=False)
+    summary = asyncio.run(weekly_apply(conn))
+    print(json.dumps(summary, ensure_ascii=False, default=str))
+    print('{"wakeAgent": false}')
+    conn.close()
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/cron/weekly_promotion.py b/scripts/cron/weekly_promotion.py
new file mode 100755
index 00000000000..55d86d1aa00
--- /dev/null
+++ b/scripts/cron/weekly_promotion.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+"""Cron entry point: Sun 03:00 UTC+8 weekly memory promotion.
+
+Reads the last 7 days of pending episodes, runs one Kimi-thinking call to
+produce a promotion diff, persists the diff as
+~/.hermes/memories/pending_diffs/wk-YYYY-MM-DD.json, renders the digest
+markdown, and posts it to #memory-review for user review.
+
+Stdout ends with ``{"wakeAgent": false}`` so the cron framework skips
+the agent run after we've handled delivery ourselves.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import sys
+from pathlib import Path
+
+# The hermes container exposes the source tree at /opt/hermes but does not
+# add it to sys.path; cron exec'd scripts inherit nothing. Insert it
+# manually so plugin imports resolve.
+sys.path.insert(0, "/opt/hermes")
+
+# Load the user's .env so VOYAGE_API_KEY / DISCORD_BOT_TOKEN reach the
+# plugin code; mirrors what run_agent.py does at module import.
+try:
+    from hermes_cli.env_loader import load_hermes_dotenv
+    load_hermes_dotenv(hermes_home="/opt/data", project_env=None)
+except Exception:
+    pass
+
+from plugins.memory.sqlite_vec.promotion import (  # noqa: E402
+    db_path,
+    memory_review_channel_id,
+    weekly_promotion,
+)
+from plugins.memory.sqlite_vec.store import open_db  # noqa: E402
+
+
+def main() -> int:
+    conn = open_db(db_path(), check_same_thread=False)
+    channel_id = memory_review_channel_id()
+    summary = asyncio.run(
+        weekly_promotion(conn, discord_channel_id=channel_id)
+    )
+    # Print human-readable summary to stdout for cron logs.
+    print(json.dumps(summary, ensure_ascii=False, default=str))
+    # Wake-gate: skip the agent run.
+    print('{"wakeAgent": false}')
+    conn.close()
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())

From 6a6874cef92d7702e8a81da9f496b6ab0d1708ad Mon Sep 17 00:00:00 2001
From: Li-yang Chen <liyoungc@pm.me>
Date: Sat, 2 May 2026 14:00:37 +0000
Subject: [PATCH 10/11] feat(memory): /memreview reject + /mem kill switch
 (W3-4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

plugins/memreview/ is a standalone slash-command plugin registering
two commands per spec §7.1:

  /memreview reject <digest_id>  - writes
    ~/.hermes/memories/pending_diffs/<digest_id>.rejected
    Monday's weekly_apply reads this sentinel and archives the diff
    without applying any of its promote / dedup / expire actions;
    candidate episodes stay unstamped for next Sunday's window.

  /memreview pending             - lists all pending digest_ids,
                                    flagging any that already carry a
                                    rejection sentinel.

  /mem off                       - global kill switch. Writes
                                    HERMES_HOME/MEM_OFF. Both
                                    SqliteVecMemoryProvider.sync_turn
                                    (hot path) and weekly_promotion
                                    (cold path) check for this file at
                                    the top of each call and short-
                                    circuit. Read path is unaffected.

  /mem on                        - removes the sentinel.

  /mem status                    - human-readable state of the kill
                                    switch + pending diff list.

Why slash commands rather than Discord reactions: spec §7.1 explicitly
chose slash because reactions don't reliably trigger webhook events
across all bot adapters — a silent kill-switch failure is worse than
no switch.

Sentinel file design rationale: file-system state (rather than in-memory
process flags) survives container restart, cross-thread visibility
without locks, and gives the user a manual recovery path
(touch / rm the file directly).

Wired into the write paths:
  - plugins/memory/sqlite_vec/__init__.py: sync_turn now checks
    _mem_off_active() before scheduling the write_episode worker.
    bump_hits still fires (it's read-side accounting).
  - plugins/memory/sqlite_vec/promotion.py: weekly_promotion checks
    mem_off_active() at the top of the function and returns a
    "skipped: /mem off active" summary without reading episodes,
    calling Kimi, or persisting any diff.

Both call sites import lazily from plugins.memreview so the memory
plugin still loads cleanly even if memreview is uninstalled.

Tests: 15 new (help text, pending list with/without rejected flag,
reject invalid/unknown/valid digest_id, /mem off+on creates/deletes
sentinel, /mem on idempotent, /mem status with and without pending,
register() wires both commands, end-to-end reject -> apply archives
without applying, /mem off short-circuits weekly_promotion before
Kimi is called). 522/522 green across all plugin tests.

Live verification on chococlaw:

  1. wrote fake pending diff wk-2026-05-02.json (with a "should NEVER land"
     promote entry).
  2. /memreview pending — listed it.
  3. /memreview reject wk-2026-05-02 — sentinel created, confirmation reply.
  4. weekly_apply — archived as wk-2026-05-02.rejected.json, sentinel
     auto-cleaned. semantic_facts unchanged (25 -> 25). The promote was
     correctly discarded.
  5. /mem off / status / on cycle — sentinel toggled at /opt/data/MEM_OFF.

Refs liyoungc/hermes-memory#11
---
 plugins/memory/sqlite_vec/__init__.py  |  27 ++-
 plugins/memory/sqlite_vec/promotion.py |  12 ++
 plugins/memreview/__init__.py          | 227 +++++++++++++++++++++
 plugins/memreview/plugin.yaml          |   4 +
 tests/plugins/test_memreview.py        | 272 +++++++++++++++++++++++++
 5 files changed, 538 insertions(+), 4 deletions(-)
 create mode 100644 plugins/memreview/__init__.py
 create mode 100644 plugins/memreview/plugin.yaml
 create mode 100644 tests/plugins/test_memreview.py

diff --git a/plugins/memory/sqlite_vec/__init__.py b/plugins/memory/sqlite_vec/__init__.py
index b6470de2adb..3d54be27f66 100644
--- a/plugins/memory/sqlite_vec/__init__.py
+++ b/plugins/memory/sqlite_vec/__init__.py
@@ -50,6 +50,19 @@
 RECALL_HEADER = "## Recent relevant memories"
 
 
+def _mem_off_active() -> bool:
+    """True iff the global /mem off kill switch sentinel is present.
+
+    Late import to avoid circular plugin loading: plugins.memreview can
+    import provider symbols indirectly via the slash-command surface.
+    """
+    try:
+        from plugins.memreview import mem_off_active
+        return mem_off_active()
+    except Exception:
+        return False
+
+
 def _default_db_path(hermes_home: str) -> Path:
     return Path(hermes_home).expanduser() / "memories" / "memory.db"
 
@@ -214,10 +227,16 @@ async def _do_write() -> None:
             logger.warning("sqlite_vec bump_hits worker error: %s", exc)
 
         if user_content or assistant_content:
-            try:
-                _run_coro_in_thread(_do_write, timeout=WRITE_TIMEOUT_S)
-            except Exception as exc:
-                logger.warning("sqlite_vec write_episode worker error: %s", exc)
+            # /mem off kill switch: skip write_episode entirely. The hot path
+            # bump_hits ran above (read-side accounting), but no new
+            # episodes / facts are persisted. Read remains unaffected.
+            if _mem_off_active():
+                logger.info("sqlite_vec write_episode skipped (/mem off)")
+            else:
+                try:
+                    _run_coro_in_thread(_do_write, timeout=WRITE_TIMEOUT_S)
+                except Exception as exc:
+                    logger.warning("sqlite_vec write_episode worker error: %s", exc)
 
     def get_tool_schemas(self) -> List[Dict[str, Any]]:
         return []
diff --git a/plugins/memory/sqlite_vec/promotion.py b/plugins/memory/sqlite_vec/promotion.py
index 08aefc5b717..6a4404b752e 100644
--- a/plugins/memory/sqlite_vec/promotion.py
+++ b/plugins/memory/sqlite_vec/promotion.py
@@ -582,6 +582,18 @@ async def weekly_promotion(
     today = today or date.today()
     digest_id = digest_id_for(today)
 
+    # /mem off kill switch — skip the entire weekly cycle.
+    try:
+        from plugins.memreview import mem_off_active
+        if mem_off_active():
+            return {
+                "digest_id": digest_id,
+                "candidates": 0,
+                "skipped": "/mem off active",
+            }
+    except Exception:
+        pass
+
     candidates = _read_pending_episodes(conn)
     if not candidates:
         return {"digest_id": digest_id, "candidates": 0, "skipped": "no candidates"}
diff --git a/plugins/memreview/__init__.py b/plugins/memreview/__init__.py
new file mode 100644
index 00000000000..d8794fe74db
--- /dev/null
+++ b/plugins/memreview/__init__.py
@@ -0,0 +1,227 @@
+"""``/memreview`` and ``/mem`` slash commands — admin / kill-switch (W3-4).
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §7.1.
+
+Two commands:
+
+  /memreview reject <digest_id>   - per-digest opt-out. Writes a sentinel
+                                    file ``pending_diffs/<digest_id>.rejected``
+                                    that ``weekly_apply`` reads on Monday
+                                    morning and archives the diff without
+                                    applying.
+
+  /mem off                        - global kill switch. Writes ``MEM_OFF``
+                                    in HERMES_HOME. Both ``write_episode``
+                                    (hot path) and ``weekly_promotion``
+                                    (cold path) check for this file at the
+                                    top of each call and short-circuit to
+                                    a no-op + warning log.
+
+  /mem on                         - reverses the kill switch by deleting
+                                    ``MEM_OFF`` (companion to /mem off).
+
+  /mem status                     - prints whether the kill switch is set
+                                    and lists pending diffs awaiting apply.
+
+Why slash commands and not Discord reactions: spec §7.1 explicitly chose
+slash because reactions don't reliably trigger webhook events across all
+bot adapters (silent kill-switch failure mode that's worse than no
+switch).
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+import re
+from pathlib import Path
+from typing import List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+def _resolve_hermes_home() -> Path:
+    try:
+        from hermes_constants import get_hermes_home
+        return Path(get_hermes_home())
+    except Exception:
+        return Path.home() / ".hermes"
+
+
+def _pending_dir() -> Path:
+    p = _resolve_hermes_home() / "memories" / "pending_diffs"
+    p.mkdir(parents=True, exist_ok=True)
+    return p
+
+
+def _archive_dir() -> Path:
+    return _resolve_hermes_home() / "memories" / "diff_archive"
+
+
+def mem_off_path() -> Path:
+    """The global kill-switch sentinel."""
+    return _resolve_hermes_home() / "MEM_OFF"
+
+
+def mem_off_active() -> bool:
+    """Public predicate consumed by promotion.py + provider.sync_turn."""
+    return mem_off_path().exists()
+
+
+# ---------------------------------------------------------------------------
+# /memreview <subcommand>
+# ---------------------------------------------------------------------------
+
+
+_MEMREVIEW_HELP = (
+    "**/memreview** — review or reject the weekly memory promotion digest.\n"
+    "Usage:\n"
+    "  `/memreview reject <digest_id>` — write the rejection sentinel; "
+    "Monday's apply will archive the diff without applying it.\n"
+    "  `/memreview pending` — list digests currently awaiting apply.\n"
+    "  `/memreview status` — same as `pending`."
+)
+
+
+_DIGEST_ID_RE = re.compile(r"^wk-\d{4}-\d{2}-\d{2}$")
+
+
+def _list_pending_diffs() -> List[str]:
+    out = []
+    for f in sorted(_pending_dir().glob("wk-*.json")):
+        rejected = f.with_suffix(".rejected").exists()
+        flag = " (rejected — will be archived Mon)" if rejected else ""
+        out.append(f"- `{f.stem}`{flag}")
+    return out
+
+
+def _handle_memreview(raw_args: str) -> str:
+    args = (raw_args or "").strip()
+    if not args:
+        return _MEMREVIEW_HELP
+
+    parts = args.split(maxsplit=1)
+    sub = parts[0].lower()
+
+    if sub in ("pending", "status", "list"):
+        items = _list_pending_diffs()
+        if not items:
+            return "**/memreview** — no pending diffs."
+        return "**/memreview** — pending diffs:\n" + "\n".join(items)
+
+    if sub == "reject":
+        rest = parts[1].strip() if len(parts) > 1 else ""
+        if not _DIGEST_ID_RE.match(rest):
+            return (
+                f"**/memreview reject** — digest_id must look like "
+                f"`wk-YYYY-MM-DD`. Got: `{rest!r}`"
+            )
+        diff_path = _pending_dir() / f"{rest}.json"
+        if not diff_path.exists():
+            return (
+                f"**/memreview reject** — no pending diff named `{rest}`. "
+                f"Use `/memreview pending` to list available digest_ids."
+            )
+        sentinel = _pending_dir() / f"{rest}.rejected"
+        try:
+            sentinel.write_text(
+                f"rejected via /memreview at {asyncio.get_event_loop().time()}",
+                encoding="utf-8",
+            )
+        except (OSError, RuntimeError):
+            # No running loop in some sync entry paths — write a static marker.
+            try:
+                sentinel.write_text("rejected", encoding="utf-8")
+            except OSError as exc:
+                return f"**/memreview reject** error: cannot write sentinel: `{exc}`"
+        return (
+            f"**Rejected.** Pending diff `{rest}` will be archived without "
+            f"applying. Episodes stay pending for next Sunday's review."
+        )
+
+    return _MEMREVIEW_HELP
+
+
+# ---------------------------------------------------------------------------
+# /mem <subcommand>
+# ---------------------------------------------------------------------------
+
+
+_MEM_HELP = (
+    "**/mem** — global memory write-back kill switch.\n"
+    "Usage:\n"
+    "  `/mem off`    — disable per-turn write-back AND weekly promotion.\n"
+    "  `/mem on`     — re-enable.\n"
+    "  `/mem status` — show whether the kill switch is currently set."
+)
+
+
+def _handle_mem(raw_args: str) -> str:
+    args = (raw_args or "").strip().lower()
+    if not args:
+        return _MEM_HELP
+
+    sub = args.split()[0]
+
+    if sub == "off":
+        try:
+            mem_off_path().write_text(
+                "set via /mem off\n", encoding="utf-8"
+            )
+        except OSError as exc:
+            return f"**/mem off** error: `{exc}`"
+        return (
+            "**🔇 Memory write-back disabled.**\n"
+            "Per-turn `write_episode` and weekly promotion will short-circuit "
+            "until you run `/mem on`. Read path is unaffected — Cattia still "
+            "retrieves from existing facts."
+        )
+
+    if sub == "on":
+        p = mem_off_path()
+        if not p.exists():
+            return "**/mem on** — write-back was already enabled."
+        try:
+            p.unlink()
+        except OSError as exc:
+            return f"**/mem on** error: `{exc}`"
+        return "**🔊 Memory write-back enabled.** Hot + cold paths resume."
+
+    if sub == "status":
+        active = mem_off_active()
+        pending = _list_pending_diffs()
+        lines = [
+            "**/mem status**",
+            f"  write-back: {'🔇 OFF' if active else '🔊 ON'}",
+            f"  MEM_OFF sentinel: `{mem_off_path()}`"
+            f" {'(present)' if active else '(absent)'}",
+        ]
+        if pending:
+            lines.append("  pending diffs:")
+            lines.extend("    " + p for p in pending)
+        else:
+            lines.append("  pending diffs: (none)")
+        return "\n".join(lines)
+
+    return _MEM_HELP
+
+
+# ---------------------------------------------------------------------------
+# Plugin registration
+# ---------------------------------------------------------------------------
+
+
+def register(ctx) -> None:
+    ctx.register_command(
+        "memreview",
+        handler=_handle_memreview,
+        description="Review or reject the weekly Hermes memory promotion digest.",
+        args_hint="reject <digest_id> | pending | status",
+    )
+    ctx.register_command(
+        "mem",
+        handler=_handle_mem,
+        description="Hermes memory kill switch (off / on / status).",
+        args_hint="off | on | status",
+    )
diff --git a/plugins/memreview/plugin.yaml b/plugins/memreview/plugin.yaml
new file mode 100644
index 00000000000..66252043f4b
--- /dev/null
+++ b/plugins/memreview/plugin.yaml
@@ -0,0 +1,4 @@
+name: memreview
+version: 0.1.0
+description: "/memreview reject + /mem kill switch — admin slash commands for the Hermes V3 memory system (W3-4)."
+author: "Li-yang Chen"
diff --git a/tests/plugins/test_memreview.py b/tests/plugins/test_memreview.py
new file mode 100644
index 00000000000..f20e7341790
--- /dev/null
+++ b/tests/plugins/test_memreview.py
@@ -0,0 +1,272 @@
+"""Tests for plugins/memreview/ — /memreview reject + /mem kill switch (W3-4)."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import struct
+from datetime import date
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
+from plugins.memreview import (
+    _MEMREVIEW_HELP,
+    _MEM_HELP,
+    _handle_mem,
+    _handle_memreview,
+    mem_off_active,
+    mem_off_path,
+    register,
+)
+
+
+def _vec(seed: int) -> bytes:
+    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
+    return struct.pack(f"{VEC_DIM}b", *vals)
+
+
+# ---------------------------------------------------------------------------
+# /memreview help / pending
+# ---------------------------------------------------------------------------
+
+
+def test_memreview_empty_returns_help():
+    assert _handle_memreview("") == _MEMREVIEW_HELP
+    assert _handle_memreview("   ") == _MEMREVIEW_HELP
+
+
+def test_memreview_pending_no_diffs(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    out = _handle_memreview("pending")
+    assert "no pending diffs" in out
+
+
+def test_memreview_pending_lists_diffs(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    pdir = tmp_path / "memories" / "pending_diffs"
+    pdir.mkdir(parents=True)
+    (pdir / "wk-2026-05-02.json").write_text("{}")
+    (pdir / "wk-2026-05-09.json").write_text("{}")
+    (pdir / "wk-2026-05-09.rejected").write_text("rejected")
+
+    out = _handle_memreview("pending")
+    assert "wk-2026-05-02" in out
+    assert "wk-2026-05-09" in out
+    # Rejected one carries a flag.
+    assert "(rejected — will be archived Mon)" in out
+
+
+# ---------------------------------------------------------------------------
+# /memreview reject
+# ---------------------------------------------------------------------------
+
+
+def test_memreview_reject_invalid_digest_id(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    out = _handle_memreview("reject not-a-digest")
+    assert "must look like" in out
+
+
+def test_memreview_reject_unknown_digest(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    out = _handle_memreview("reject wk-2026-05-02")
+    assert "no pending diff" in out
+
+
+def test_memreview_reject_writes_sentinel(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    pdir = tmp_path / "memories" / "pending_diffs"
+    pdir.mkdir(parents=True)
+    diff_path = pdir / "wk-2026-05-02.json"
+    diff_path.write_text("{}")
+
+    out = _handle_memreview("reject wk-2026-05-02")
+    assert "Rejected." in out
+    sentinel = pdir / "wk-2026-05-02.rejected"
+    assert sentinel.exists()
+    assert "rejected" in sentinel.read_text().lower()
+
+
+# ---------------------------------------------------------------------------
+# /mem off / on / status
+# ---------------------------------------------------------------------------
+
+
+def test_mem_off_creates_sentinel(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    out = _handle_mem("off")
+    assert "disabled" in out
+    assert mem_off_path().exists()
+    assert mem_off_active() is True
+
+
+def test_mem_on_removes_sentinel(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    mem_off_path().write_text("set", encoding="utf-8")
+    out = _handle_mem("on")
+    assert "enabled" in out
+    assert not mem_off_path().exists()
+
+
+def test_mem_on_when_already_on_idempotent(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    out = _handle_mem("on")
+    assert "already enabled" in out
+
+
+def test_mem_status_off(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    out = _handle_mem("status")
+    assert "🔊 ON" in out  # default state
+    assert "(absent)" in out
+
+
+def test_mem_status_on_with_pending(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    mem_off_path().write_text("set")
+    pdir = tmp_path / "memories" / "pending_diffs"
+    pdir.mkdir(parents=True)
+    (pdir / "wk-2026-05-02.json").write_text("{}")
+
+    out = _handle_mem("status")
+    assert "🔇 OFF" in out
+    assert "(present)" in out
+    assert "wk-2026-05-02" in out
+
+
+def test_mem_help_on_unknown_subcommand(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    out = _handle_mem("frobnicate")
+    assert "/mem off" in out and "/mem on" in out
+
+
+# ---------------------------------------------------------------------------
+# register() wires both commands
+# ---------------------------------------------------------------------------
+
+
+def test_register_registers_both_commands():
+    captured = []
+
+    class FakeCtx:
+        def register_command(self, name, handler, description="", args_hint=""):
+            captured.append((name, args_hint))
+
+    register(FakeCtx())
+    names = [c[0] for c in captured]
+    assert "memreview" in names
+    assert "mem" in names
+
+
+# ---------------------------------------------------------------------------
+# End-to-end: /memreview reject then weekly_apply archives as rejected
+# ---------------------------------------------------------------------------
+
+
+def test_reject_then_apply_archives_as_rejected(tmp_path, monkeypatch):
+    """Full flow: write pending diff -> /memreview reject -> weekly_apply
+    sees the sentinel and archives the diff with status=rejected."""
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home",
+        lambda: tmp_path,
+    )
+
+    db = init_db(tmp_path / "m.db")
+    digest_id = "wk-2026-05-02"
+    pdir = tmp_path / "memories" / "pending_diffs"
+    pdir.mkdir(parents=True)
+    diff_payload = {
+        "digest_id": digest_id, "candidate_episode_ids": [],
+        "promote": [{"entity": "禮揚.x", "fact": "f", "importance": 2,
+                     "valid_from": "2026-05-02", "valid_to": None,
+                     "source_episode_ids": []}],
+        "dedup_hits": [], "expire": [], "drop_as_noise": [],
+    }
+    (pdir / f"{digest_id}.json").write_text(json.dumps(diff_payload))
+
+    # User runs /memreview reject.
+    reply = _handle_memreview(f"reject {digest_id}")
+    assert "Rejected." in reply
+
+    # Apply step picks up the sentinel.
+    from plugins.memory.sqlite_vec.promotion import weekly_apply
+    summary = asyncio.run(weekly_apply(db, today=date(2026, 5, 2)))
+    assert summary["applied"] is False
+    assert summary["reason"] == "rejected"
+
+    # No new semantic_facts row (the promote was discarded).
+    [(sf,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall()
+    assert sf == 0
+
+    # Archive carries the .rejected suffix.
+    archived = list((tmp_path / "memories" / "diff_archive").glob("*.rejected.json"))
+    assert len(archived) == 1
+
+
+def test_mem_off_short_circuits_weekly_promotion(tmp_path, monkeypatch):
+    """Kill switch: /mem off must stop weekly_promotion from running its
+    Kimi call (which would otherwise burn tokens and write a diff)."""
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home",
+        lambda: tmp_path,
+    )
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+
+    db = init_db(tmp_path / "m.db")
+    db.execute(
+        "INSERT INTO episodes(ts, channel, external_id, role, text, metadata) "
+        "VALUES (?, ?, ?, ?, ?, ?)",
+        ("2026-05-02 09:00", "cattia", "x", "user", "hi",
+         json.dumps({"stashed_facts": [{"text": "禮揚 likes X",
+                                        "entity": "禮揚.x",
+                                        "importance": 2}]})),
+    )
+    db.commit()
+
+    # Activate kill switch.
+    _handle_mem("off")
+    assert mem_off_active() is True
+
+    kimi_called = []
+
+    async def kimi_should_not_be_called(prompt):
+        kimi_called.append(prompt)
+        return {}
+
+    from plugins.memory.sqlite_vec.promotion import weekly_promotion
+    summary = asyncio.run(weekly_promotion(db, kimi_fn=kimi_should_not_be_called))
+    assert summary["candidates"] == 0
+    assert summary["skipped"] == "/mem off active"
+    # Kimi must not have been called.
+    assert kimi_called == []

From b3b51f23d3aa8662ff5ffa97c1f9c071d13b076b Mon Sep 17 00:00:00 2001
From: Li-yang Chen <liyoungc@pm.me>
Date: Sat, 2 May 2026 14:06:18 +0000
Subject: [PATCH 11/11] =?UTF-8?q?feat(memory):=20scripts/cutover/cutover.s?=
 =?UTF-8?q?h=20=E2=80=94=20W4-1=20prep=20script?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Idempotent bash script that performs the W4 cutover steps when run
with --commit. Default invocation is dry-run.

Steps:
  1. Pre-flight (verify memory.db exists, recent episodes accumulated)
  2. Archive ~/.hermes/memories/MEMORY.md → MEMORY.md.archive-YYYY-MM-DD
     (chmod 444 for read-only)
  3. Confirm config.yaml memory.provider == sqlite_vec
  4. Disable legacy memory crons (Dimensions Memory Consolidation,
     Forgetting Curve) by flipping enabled=false in jobs.json
  5. Smoke test the new provider end-to-end
  6. Restart gateway

Spec target date 2026-05-24, after observing one successful weekly
review cycle. Caller is the user; script is non-destructive in dry-run
mode and refuses to overwrite existing archives so re-running mid-fail
is safe.

Rollback procedure documented in hermes-memory/docs/runbooks/memory-rollback.md §3.

Refs liyoungc/hermes-memory#12
---
 scripts/cutover/cutover.sh | 168 +++++++++++++++++++++++++++++++++++++
 1 file changed, 168 insertions(+)
 create mode 100755 scripts/cutover/cutover.sh

diff --git a/scripts/cutover/cutover.sh b/scripts/cutover/cutover.sh
new file mode 100755
index 00000000000..268e7848e85
--- /dev/null
+++ b/scripts/cutover/cutover.sh
@@ -0,0 +1,168 @@
+#!/usr/bin/env bash
+# W4-1 cutover script — run on chococlaw when you've decided to retire
+# MEMORY.md flat-file injection and commit fully to the sqlite_vec
+# memory plugin.
+#
+# Spec target date: 2026-05-24, *after* observing at least one successful
+# weekly review cycle on the new system.
+#
+# Idempotent — safe to re-run if interrupted partway.
+#
+# Usage:
+#   ./scripts/cutover/cutover.sh             # dry run, prints planned actions
+#   ./scripts/cutover/cutover.sh --commit    # actually do the work
+
+set -euo pipefail
+
+DRY_RUN=true
+if [[ "${1:-}" == "--commit" ]]; then
+  DRY_RUN=false
+fi
+
+today() { date -u +%Y-%m-%d; }
+say() { echo "[cutover] $*"; }
+do_or_say() {
+  if $DRY_RUN; then
+    say "(dry-run) $*"
+  else
+    say "$*"
+    eval "$@"
+  fi
+}
+
+HOME_DIR="${HERMES_HOME:-$HOME/.hermes}"
+say "HERMES_HOME = ${HOME_DIR}"
+
+# ---- 1. Pre-flight checks --------------------------------------------------
+
+say "1. Pre-flight checks"
+[[ -d "${HOME_DIR}/memories" ]] || { say "ERR no ${HOME_DIR}/memories"; exit 1; }
+[[ -f "${HOME_DIR}/memories/memory.db" ]] || { say "ERR no memory.db — W1 hasn't shipped"; exit 1; }
+say "  ✓ memory.db present"
+
+if ! command -v docker >/dev/null; then
+  say "WARN docker not on PATH — DB queries below will be skipped"
+fi
+
+# Confirm the new system has been writing recently (last 7 days).
+if command -v docker >/dev/null; then
+  ep_recent=$(docker exec hermes /opt/hermes/.venv/bin/python3 -c "
+import sqlite3
+conn = sqlite3.connect('/opt/data/memories/memory.db')
+n = conn.execute(\"SELECT count(*) FROM episodes WHERE ts > datetime('now','-7 days')\").fetchone()[0]
+print(n)
+" 2>/dev/null || echo 0)
+  if [[ "${ep_recent}" -lt 5 ]]; then
+    say "WARN only ${ep_recent} episodes in the last 7 days. Either the gateway"
+    say "     hasn't been used much OR the write path isn't actually firing."
+    say "     Fix that BEFORE cutover, or the new system has nothing to retrieve."
+  else
+    say "  ✓ ${ep_recent} episodes recorded in the last 7 days"
+  fi
+fi
+
+# ---- 2. Archive MEMORY.md --------------------------------------------------
+
+ARCHIVE_NAME="MEMORY.md.archive-$(today)"
+SRC="${HOME_DIR}/memories/MEMORY.md"
+DST="${HOME_DIR}/memories/${ARCHIVE_NAME}"
+
+say "2. Archive MEMORY.md → ${ARCHIVE_NAME}"
+if [[ ! -f "${SRC}" ]]; then
+  say "  - ${SRC} does not exist — already archived?"
+else
+  if [[ -f "${DST}" ]]; then
+    say "  - ${DST} already exists — refusing to overwrite"
+  else
+    do_or_say "mv '${SRC}' '${DST}'"
+    do_or_say "chmod 444 '${DST}'"
+  fi
+fi
+
+# ---- 3. config.yaml: confirm provider=sqlite_vec ---------------------------
+
+say "3. Confirm config.yaml memory.provider == sqlite_vec"
+cfg="${HOME_DIR}/config.yaml"
+if grep -qE '^[[:space:]]*provider:[[:space:]]*sqlite_vec' "${cfg}" 2>/dev/null; then
+  say "  ✓ already set to sqlite_vec"
+else
+  say "  - provider not set — please edit ${cfg} manually:"
+  say "      memory:"
+  say "        provider: sqlite_vec"
+fi
+
+# ---- 4. Disable legacy memory crons ----------------------------------------
+
+say "4. Disable legacy memory crons in jobs.json"
+do_or_say "/usr/bin/env python3 - <<'PY'
+import json, pathlib
+p = pathlib.Path('${HOME_DIR}/cron/jobs.json')
+if not p.exists():
+    print('  - no jobs.json'); raise SystemExit(0)
+data = json.loads(p.read_text())
+legacy_names = {
+    'Dimensions Memory Consolidation',
+    'Forgetting Curve (Monthly Archive)',
+    'Forgetting Curve',
+}
+changed = 0
+for j in data.get('jobs', []):
+    if j['name'] in legacy_names and j.get('enabled', False):
+        j['enabled'] = False
+        j['paused_at'] = '$(date -u +%Y-%m-%dT%H:%M:%SZ)'
+        j['paused_reason'] = 'W4 cutover — replaced by sqlite_vec weekly_promotion'
+        print(f'  ✓ disabled: {j[\"name\"]}')
+        changed += 1
+if changed:
+    p.write_text(json.dumps(data, indent=2, ensure_ascii=False))
+else:
+    print('  - no legacy jobs found (already disabled, or never installed)')
+PY"
+
+# ---- 5. Smoke test ---------------------------------------------------------
+
+say "5. Smoke test: provider initializes + retrieves"
+if command -v docker >/dev/null; then
+  do_or_say "docker exec hermes /opt/hermes/.venv/bin/python3 -c '
+from hermes_cli.env_loader import load_hermes_dotenv
+load_hermes_dotenv(hermes_home=\"/opt/data\", project_env=None)
+from agent.memory_manager import MemoryManager
+from plugins.memory import load_memory_provider
+mm = MemoryManager()
+mm.add_provider(load_memory_provider(\"sqlite_vec\"))
+mm.initialize_all(session_id=\"cutover-smoke\", platform=\"cli\", hermes_home=\"/opt/data\", agent_context=\"primary\")
+out = mm.prefetch_all(\"我太太生日\")
+print(\"prefetch returned:\", \"OK\" if out else \"EMPTY\")
+mm.shutdown_all()
+'"
+fi
+
+# ---- 6. Restart gateway ----------------------------------------------------
+
+say "6. Restart gateway to pick up any config changes"
+if command -v docker >/dev/null && [[ -d "${HOME}/Projects/hermes-agent" ]]; then
+  do_or_say "(cd ${HOME}/Projects/hermes-agent && docker compose restart gateway)"
+fi
+
+# ---- Done ------------------------------------------------------------------
+
+if $DRY_RUN; then
+  say ""
+  say "DRY RUN COMPLETE — no changes made. Re-run with --commit when ready."
+  say ""
+  say "After --commit, monitor for 24 hours via memory.log + #memory-review:"
+  say "  - tail -f ~/.hermes/logs/memory.log"
+  say "  - watch ~/.hermes/logs/memory_write_failures.jsonl size"
+  say "  - confirm next Sunday's digest fires"
+  say ""
+  say "Rollback procedure: docs/runbooks/memory-rollback.md §3"
+else
+  say ""
+  say "CUTOVER COMPLETE."
+  say "  Archive at: ${DST}"
+  say "  Legacy crons disabled in: ${HOME_DIR}/cron/jobs.json"
+  say "  Gateway restarted."
+  say ""
+  say "Monitor for 24 hours then sanity-check via:"
+  say "  docs/runbooks/memory-monitoring.md §6 (quick health check)"
+fi