diff --git a/plugins/memdebug/__init__.py b/plugins/memdebug/__init__.py
new file mode 100644
index 00000000000..2030192a1ee
--- /dev/null
+++ b/plugins/memdebug/__init__.py
@@ -0,0 +1,225 @@
+"""``/memdebug`` Discord slash command — read-only retrieval diagnostic (W2-4).
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §7.2.
+
+Usage in chat:
+
+    /memdebug <query>            -> top-8 from semantic_facts (curated)
+    /memdebug rawsearch <query>  -> top-8 from episodes (raw turns, forensics)
+
+The handler intentionally returns plain markdown text (not a Discord
+embed): hermes-agent's ``register_command()`` surface is platform-neutral
+and dispatches the same string to CLI / gateway / Slack.
+
+The ``rich-embed + 👍/👎 reaction buttons`` mode is open spec §8 work — we
+ship the read-only diagnostic now so the F2 monitoring path (% of
+top-1 hits judged useful) is unblocked. For v1, encourage the user
+to react with 👍/👎 emoji on this message; a future cron will scrape
+those reactions from the channel.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import sqlite3
+import time
+from pathlib import Path
+from typing import List, Optional
+
+logger = logging.getLogger(__name__)
+
+def _resolve_hermes_home() -> Path:
+    """Use HERMES_HOME (set by hermes_constants) when available; else ~/.hermes."""
+    try:
+        from hermes_constants import get_hermes_home
+        return Path(get_hermes_home())
+    except Exception:
+        return Path.home() / ".hermes"
+
+
+_HERMES_HOME = _resolve_hermes_home()
+DEFAULT_DB = _HERMES_HOME / "memories" / "memory.db"
+DEFAULT_K = 8
+LOG_PATH = _HERMES_HOME / "logs" / "memory.log"
+
+
+def _format_facts_block(facts) -> str:
+    lines = ["**🧠 /memdebug** — top {} from `semantic_facts`\n".format(len(facts))]
+    for i, f in enumerate(facts, start=1):
+        recency = max(0.0, 1.0 - f.age_days / 365.0)  # display-only;rerank weight uses 90-day half-life
+        lines.append(
+            f"`{i}.` **[{f.entity or '—'}]** {_truncate(f.fact, 90)}\n"
+            f"     score=`{f.score:.3f}` sim=`{f.sim:.3f}` "
+            f"age=`{int(f.age_days)}d` importance=`{f.importance}`"
+        )
+    lines.append("\n_React 👍/👎 to flag this retrieval._")
+    return "\n".join(lines)
+
+
+def _truncate(s: str, n: int) -> str:
+    s = s.replace("\n", " ")
+    return s if len(s) <= n else s[: n - 1] + "…"
+
+
+def _format_episodes_block(rows: List[sqlite3.Row]) -> str:
+    if not rows:
+        return (
+            "**🧠 /memdebug rawsearch** — `episodes` table is empty.\n\n"
+            "Episodes are written by W3 (per-turn write-back). After W3 "
+            "ships, this command will surface the raw conversation turns "
+            "behind any retrieval."
+        )
+    lines = ["**🧠 /memdebug rawsearch** — top {} from `episodes`\n".format(len(rows))]
+    for i, r in enumerate(rows, start=1):
+        lines.append(
+            f"`{i}.` `[{r['ts']}]` `{r['channel']}/{r['role']}` "
+            f"{_truncate(r['text'], 120)}"
+        )
+    return "\n".join(lines)
+
+
+def _append_log(payload: dict) -> None:
+    """Append a /memdebug invocation to ~/.hermes/logs/memory.log."""
+    import json
+    try:
+        LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
+        with LOG_PATH.open("a", encoding="utf-8") as f:
+            f.write(json.dumps(payload, ensure_ascii=False) + "\n")
+    except OSError as exc:
+        logger.warning("memory.log write failed: %s", exc)
+
+
+def _open_memory_db(path: Optional[Path] = None) -> Optional[sqlite3.Connection]:
+    """Open the sqlite_vec memory.db. Returns None if it doesn't exist yet."""
+    path = path or DEFAULT_DB
+    if not path.exists():
+        return None
+    from plugins.memory.sqlite_vec.store import open_db
+    return open_db(path, check_same_thread=False)
+
+
+async def _do_semantic(query: str) -> str:
+    from plugins.memory.sqlite_vec.read import read_memory
+
+    conn = _open_memory_db()
+    if not conn:
+        return (
+            "**🧠 /memdebug** — memory database not yet initialised.\n\n"
+            f"Expected at `{DEFAULT_DB}`. Run `scripts/import_md.py --commit` "
+            "or wait for the first agent turn after W2-3 cutover."
+        )
+    try:
+        facts = await read_memory(query, conn, k=DEFAULT_K)
+    finally:
+        conn.close()
+    if not facts:
+        return f"**🧠 /memdebug** — no facts matched `{_truncate(query, 60)}`."
+    _append_log({
+        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+        "cmd": "memdebug",
+        "q": query,
+        "n": len(facts),
+        "ids": [f.id for f in facts],
+    })
+    return _format_facts_block(facts)
+
+
+async def _do_rawsearch(query: str) -> str:
+    """Substring scan of episodes.text. No vector query — this is forensics
+    mode for 'did this conversation happen', not semantic recall."""
+    conn = _open_memory_db()
+    if not conn:
+        return (
+            "**🧠 /memdebug rawsearch** — memory database not yet initialised."
+        )
+    try:
+        like = f"%{query}%"
+        rows = conn.execute(
+            "SELECT ts, channel, role, text FROM episodes "
+            "WHERE text LIKE ? ORDER BY ts DESC LIMIT ?",
+            (like, DEFAULT_K),
+        ).fetchall()
+    finally:
+        conn.close()
+    _append_log({
+        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+        "cmd": "memdebug-raw",
+        "q": query,
+        "n": len(rows),
+    })
+    return _format_episodes_block(rows)
+
+
+HELP_TEXT = (
+    "**/memdebug** — inspect what `read_memory` would return.\n"
+    "Usage:\n"
+    "  `/memdebug <query>` — top-8 from `semantic_facts` (curated)\n"
+    "  `/memdebug rawsearch <query>` — substring scan of `episodes` (forensics)\n"
+)
+
+
+async def _handle_async(raw_args: str) -> str:
+    args = (raw_args or "").strip()
+    if not args:
+        return HELP_TEXT
+    if args.lower().startswith("rawsearch"):
+        rest = args[len("rawsearch"):].strip()
+        if not rest:
+            return HELP_TEXT
+        try:
+            return await _do_rawsearch(rest)
+        except Exception as exc:
+            logger.exception("memdebug rawsearch failed")
+            return f"**/memdebug rawsearch** error: `{exc}`"
+    try:
+        return await _do_semantic(args)
+    except Exception as exc:
+        logger.exception("memdebug semantic failed")
+        return f"**/memdebug** error: `{exc}`"
+
+
+def _handle_memdebug(raw_args: str) -> str:
+    """Sync entry point. PluginContext.register_command supports async
+    handlers natively, but ours is dispatched on either pathway, so we
+    bridge via asyncio.run when no loop is running."""
+    coro = _handle_async(raw_args)
+    try:
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        loop = None
+    if loop is None:
+        return asyncio.run(coro)
+    # Already in a running loop — schedule and wait via a worker thread.
+    import threading
+    import concurrent.futures
+    box = {}
+
+    def runner():
+        try:
+            box["r"] = asyncio.run(coro)
+        except BaseException as exc:
+            box["e"] = exc
+
+    t = threading.Thread(target=runner, daemon=True, name="memdebug-handler")
+    t.start()
+    t.join(timeout=15.0)
+    if t.is_alive():
+        return "**/memdebug** timed out (>15s)."
+    if "e" in box:
+        return f"**/memdebug** error: `{box['e']}`"
+    return box.get("r", HELP_TEXT)
+
+
+# ---------------------------------------------------------------------------
+# Plugin registration
+# ---------------------------------------------------------------------------
+
+
+def register(ctx) -> None:
+    ctx.register_command(
+        "memdebug",
+        handler=_handle_memdebug,
+        description="Inspect Hermes long-term memory retrieval (top-8 + scores).",
+        args_hint="<query> | rawsearch <query>",
+    )
diff --git a/plugins/memdebug/plugin.yaml b/plugins/memdebug/plugin.yaml
new file mode 100644
index 00000000000..1945104cff6
--- /dev/null
+++ b/plugins/memdebug/plugin.yaml
@@ -0,0 +1,4 @@
+name: memdebug
+version: 0.1.0
+description: "/memdebug — inspect Hermes long-term memory retrieval. Read-only diagnostic for the sqlite_vec memory plugin (W2-4)."
+author: "Li-yang Chen"
diff --git a/plugins/memory/sqlite_vec/__init__.py b/plugins/memory/sqlite_vec/__init__.py
new file mode 100644
index 00000000000..3d54be27f66
--- /dev/null
+++ b/plugins/memory/sqlite_vec/__init__.py
@@ -0,0 +1,251 @@
+"""Hermes V3 memory plugin — sqlite-vec store with two-tier (hot/cold) design.
+
+Activate via $HERMES_HOME/config.yaml:
+
+    memory:
+      provider: sqlite_vec
+
+Read path (W2-3): on each turn, ``prefetch(query)`` runs
+``read_memory()`` in a worker thread (the gateway already owns the main
+asyncio loop, so we can't ``asyncio.run`` inline) and returns a markdown
+block prefixed with ``## Recent relevant memories``. The retrieved fact
+IDs are cached per session and bumped via ``sync_turn()`` after the
+reply is sent, per spec §4 hits accounting.
+
+Write path (W3-2): ``sync_turn`` now also fires ``write_episode`` —
+records the raw turn into ``episodes``, runs Kimi extract, fast-tracks
+short-lived facts directly into ``semantic_facts`` (≤ today + 30d),
+stashes longer-lived facts into ``episodes.metadata.stashed_facts``
+for W3-3 weekly_promotion. Errors land in
+``~/.hermes/logs/memory_write_failures.jsonl`` and never propagate.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import threading
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from agent.memory_provider import MemoryProvider
+
+from .read import (
+    DEFAULT_K,
+    Fact,
+    bump_hits,
+    format_facts_for_prompt,
+    read_memory,
+)
+from .store import init_db
+from .write import write_episode
+
+logger = logging.getLogger(__name__)
+
+PREFETCH_TIMEOUT_S = 5.0  # Voyage typical 200-400ms; 5s is the kill-switch.
+# Write path: extract (~1-3s) + embed batch (~300ms) + INSERT (~5ms).
+# 30s gives Kimi room to think while still bounding worst-case latency.
+WRITE_TIMEOUT_S = 30.0
+RECALL_HEADER = "## Recent relevant memories"
+
+
+def _mem_off_active() -> bool:
+    """True iff the global /mem off kill switch sentinel is present.
+
+    Late import to avoid circular plugin loading: plugins.memreview can
+    import provider symbols indirectly via the slash-command surface.
+    """
+    try:
+        from plugins.memreview import mem_off_active
+        return mem_off_active()
+    except Exception:
+        return False
+
+
+def _default_db_path(hermes_home: str) -> Path:
+    return Path(hermes_home).expanduser() / "memories" / "memory.db"
+
+
+def _run_coro_in_thread(coro_factory, timeout: float):
+    """Run an async coroutine in a worker thread with its own event loop.
+
+    The hermes gateway runs its own asyncio loop, so ``asyncio.run`` from
+    this synchronous ABC method would raise "cannot be called from a
+    running event loop". We sidestep by spawning a dedicated thread with a
+    fresh loop, joining with a timeout. ``coro_factory`` is a zero-arg
+    callable that builds the coroutine inside the worker so the coroutine
+    is bound to the worker's loop.
+    """
+    box: Dict[str, Any] = {}
+
+    def runner():
+        loop = asyncio.new_event_loop()
+        try:
+            box["result"] = loop.run_until_complete(coro_factory())
+        except BaseException as exc:
+            box["error"] = exc
+        finally:
+            loop.close()
+
+    t = threading.Thread(target=runner, daemon=True, name="sqlite-vec-worker")
+    t.start()
+    t.join(timeout)
+    if t.is_alive():
+        raise TimeoutError(f"sqlite_vec worker exceeded {timeout}s")
+    if "error" in box:
+        raise box["error"]
+    return box.get("result")
+
+
+def _synth_msg_id(session_id: str, user: str, asst: str, ts: str) -> str:
+    """Stable per-turn external_id for ON CONFLICT idempotency.
+
+    We don't have the real Discord message ID at sync_turn time (the
+    ABC hook only exposes user/assistant content + session_id), so we
+    hash the turn into a 12-hex-char id. Bucketing ts to the minute
+    means a Discord redelivery within the same minute collapses; a
+    legitimate retry after >1 min would create a new row, which is
+    acceptable for episode-level forensics.
+    """
+    raw = (session_id, user, asst, ts[:16])
+    return "h" + hex(abs(hash(raw)) & 0xFFFFFFFFFFFF)[2:]
+
+
+class SqliteVecMemoryProvider(MemoryProvider):
+    """Hermes V3 long-term memory provider (W2-3 read + W3-2 write)."""
+
+    def __init__(self) -> None:
+        self._conn = None
+        self._db_path: Optional[Path] = None
+        self._last_fact_ids: Dict[str, List[int]] = {}
+        self._lock = threading.Lock()
+
+    @property
+    def name(self) -> str:
+        return "sqlite_vec"
+
+    def is_available(self) -> bool:
+        try:
+            import sqlite_vec  # noqa: F401
+        except ImportError:
+            return False
+        return True
+
+    def initialize(self, session_id: str, **kwargs) -> None:
+        hermes_home = kwargs.get("hermes_home")
+        if not hermes_home:
+            from hermes_constants import get_hermes_home
+            hermes_home = str(get_hermes_home())
+        self._db_path = _default_db_path(hermes_home)
+        self._conn = init_db(self._db_path, check_same_thread=False)
+        logger.info("sqlite_vec memory ready at %s", self._db_path)
+
+    def system_prompt_block(self) -> str:
+        # Persona stays in flat files (SOUL.md, USER.md, life-dimensions.md);
+        # the recall block is emitted from prefetch() per turn.
+        return ""
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        """Embed query, fetch top-k facts, format as a markdown block.
+
+        Returns "" on empty/trivial query, missing connection, or any
+        error (Voyage outage, rate limit, etc.) so the gateway never
+        blocks a reply on memory recall. Retrieved fact IDs are stashed
+        for the matching ``sync_turn()`` call to bump hits.
+        """
+        if not self._conn or not query or not query.strip():
+            return ""
+
+        conn = self._conn
+        db_lock = self._lock
+
+        async def _do() -> List[Fact]:
+            with db_lock:
+                return await read_memory(query, conn, k=DEFAULT_K)
+
+        try:
+            facts = _run_coro_in_thread(_do, timeout=PREFETCH_TIMEOUT_S)
+        except Exception as exc:
+            logger.warning("sqlite_vec prefetch error: %s", exc)
+            return ""
+
+        if not facts:
+            return ""
+
+        with self._lock:
+            self._last_fact_ids[session_id] = [f.id for f in facts]
+
+        body = format_facts_for_prompt(facts, with_meta=True)
+        return f"{RECALL_HEADER}\n{body}"
+
+    def sync_turn(
+        self,
+        user_content: str,
+        assistant_content: str,
+        *,
+        session_id: str = "",
+    ) -> None:
+        """Bump hits on retrieved facts and persist the turn.
+
+        Spec §4 + §5.1 — both happen AFTER the reply is delivered, so
+        this must never raise. ``bump_hits`` swallows its own DB errors;
+        ``write_episode`` swallows everything and writes failures to
+        ~/.hermes/logs/memory_write_failures.jsonl.
+        """
+        if not self._conn:
+            return
+        conn = self._conn
+        db_lock = self._lock
+
+        with self._lock:
+            ids = self._last_fact_ids.pop(session_id, [])
+
+        ts = time.strftime("%Y-%m-%d %H:%M:%S")
+        msg_id = _synth_msg_id(session_id, user_content, assistant_content, ts)
+        channel = session_id or "unknown"
+
+        async def _do_bump() -> None:
+            if ids:
+                with db_lock:
+                    await bump_hits(ids, conn)
+
+        async def _do_write() -> None:
+            with db_lock:
+                await write_episode(
+                    user_msg=user_content,
+                    reply=assistant_content,
+                    channel=channel,
+                    msg_id=msg_id,
+                    ts=ts,
+                    conn=conn,
+                )
+
+        try:
+            _run_coro_in_thread(_do_bump, timeout=PREFETCH_TIMEOUT_S)
+        except Exception as exc:
+            logger.warning("sqlite_vec bump_hits worker error: %s", exc)
+
+        if user_content or assistant_content:
+            # /mem off kill switch: skip write_episode entirely. The hot path
+            # bump_hits ran above (read-side accounting), but no new
+            # episodes / facts are persisted. Read remains unaffected.
+            if _mem_off_active():
+                logger.info("sqlite_vec write_episode skipped (/mem off)")
+            else:
+                try:
+                    _run_coro_in_thread(_do_write, timeout=WRITE_TIMEOUT_S)
+                except Exception as exc:
+                    logger.warning("sqlite_vec write_episode worker error: %s", exc)
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        return []
+
+    def handle_tool_call(self, tool_name: str, args: Dict[str, Any]) -> Any:
+        from tools.registry import tool_error
+        return tool_error(f"sqlite_vec exposes no tools (got {tool_name!r})")
+
+    def shutdown(self) -> None:
+        if getattr(self, "_conn", None):
+            self._conn.close()
+            self._conn = None
diff --git a/plugins/memory/sqlite_vec/embed.py b/plugins/memory/sqlite_vec/embed.py
new file mode 100644
index 00000000000..ae114ebf670
--- /dev/null
+++ b/plugins/memory/sqlite_vec/embed.py
@@ -0,0 +1,139 @@
+"""Voyage AI embedding wrapper for the sqlite_vec memory plugin.
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §1.4 (locked
+decision) and §4 (read path) — voyage-3.5-lite, 512 dim, int8.
+
+Returns each embedding as a 512-byte BLOB ready to insert into
+``semantic_facts.embedding``. The store-side trigger wraps the BLOB with
+``vec_int8()`` when copying it into the ``vec_facts`` virtual table.
+
+Public API:
+
+    await voyage_embed(["text 1", "text 2"]) -> [b"...512 bytes...", b"..."]
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+from typing import List, Optional, Sequence
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+VOYAGE_URL = "https://api.voyageai.com/v1/embeddings"
+VOYAGE_MODEL = "voyage-3.5-lite"
+VOYAGE_BATCH = 128  # Voyage API per-call ceiling
+VOYAGE_DIM = 512
+VOYAGE_DTYPE = "int8"
+DEFAULT_TIMEOUT = 30.0
+MAX_RETRIES = 3
+
+
+class VoyageError(RuntimeError):
+    """Raised when Voyage API repeatedly fails."""
+
+
+def _api_key() -> str:
+    key = os.environ.get("VOYAGE_API_KEY")
+    if not key:
+        raise VoyageError(
+            "VOYAGE_API_KEY is not set. Add it to ~/.hermes/.env and "
+            "expose it to the hermes container via docker-compose."
+        )
+    return key
+
+
+def _to_int8_blob(values: Sequence[int]) -> bytes:
+    """Pack a list of int8 values (-128..127) into a raw 512-byte BLOB."""
+    if len(values) != VOYAGE_DIM:
+        raise VoyageError(
+            f"Voyage returned {len(values)}-dim vector, expected {VOYAGE_DIM}"
+        )
+    return bytes((v + 256) & 0xFF for v in values)  # signed -> unsigned byte
+
+
+async def _post_batch(
+    client: httpx.AsyncClient,
+    texts: List[str],
+    api_key: str,
+) -> List[bytes]:
+    payload = {
+        "model": VOYAGE_MODEL,
+        "input": texts,
+        "output_dtype": VOYAGE_DTYPE,
+        "output_dimension": VOYAGE_DIM,
+    }
+    headers = {"Authorization": f"Bearer {api_key}"}
+
+    for attempt in range(1, MAX_RETRIES + 1):
+        try:
+            r = await client.post(
+                VOYAGE_URL, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT
+            )
+        except httpx.RequestError as exc:
+            if attempt == MAX_RETRIES:
+                raise VoyageError(f"network error: {exc}") from exc
+            await asyncio.sleep(2 ** (attempt - 1))
+            continue
+
+        if 500 <= r.status_code < 600:
+            if attempt == MAX_RETRIES:
+                raise VoyageError(f"Voyage 5xx: {r.status_code} {r.text[:200]}")
+            await asyncio.sleep(2 ** (attempt - 1))
+            continue
+
+        if r.status_code >= 400:
+            raise VoyageError(f"Voyage {r.status_code}: {r.text[:200]}")
+
+        body = r.json()
+        items = body.get("data", [])
+        if len(items) != len(texts):
+            raise VoyageError(
+                f"Voyage returned {len(items)} items for {len(texts)} inputs"
+            )
+        # Voyage returns embeddings in input order (per docs/index field).
+        items.sort(key=lambda d: d.get("index", 0))
+        return [_to_int8_blob(d["embedding"]) for d in items]
+
+    raise VoyageError("retry loop exhausted unexpectedly")
+
+
+async def voyage_embed(
+    texts: List[str],
+    *,
+    dim: int = VOYAGE_DIM,
+    dtype: str = VOYAGE_DTYPE,
+    client: Optional[httpx.AsyncClient] = None,
+) -> List[bytes]:
+    """Embed `texts` and return one int8 BLOB per input.
+
+    Batches automatically at Voyage's 128-input ceiling. Retries 3x with
+    exponential backoff on 5xx and network errors. Raises VoyageError on
+    auth failure, 4xx, or repeated 5xx.
+
+    `dim` and `dtype` are accepted for API symmetry but locked to the spec
+    values; passing different values raises immediately so config drift
+    fails loudly instead of silently corrupting embeddings.
+    """
+    if dim != VOYAGE_DIM or dtype != VOYAGE_DTYPE:
+        raise VoyageError(
+            f"dim/dtype locked to {VOYAGE_DIM}/{VOYAGE_DTYPE} per spec §1.4"
+        )
+    if not texts:
+        return []
+
+    api_key = _api_key()
+    owns_client = client is None
+    client = client or httpx.AsyncClient()
+    try:
+        out: List[bytes] = []
+        for i in range(0, len(texts), VOYAGE_BATCH):
+            batch = texts[i : i + VOYAGE_BATCH]
+            out.extend(await _post_batch(client, batch, api_key))
+        return out
+    finally:
+        if owns_client:
+            await client.aclose()
diff --git a/plugins/memory/sqlite_vec/extract.py b/plugins/memory/sqlite_vec/extract.py
new file mode 100644
index 00000000000..caeffb1e245
--- /dev/null
+++ b/plugins/memory/sqlite_vec/extract.py
@@ -0,0 +1,299 @@
+"""Kimi-driven extraction from a single Discord turn (W3-1).
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §5.2.
+
+The ``EXTRACT_PROMPT`` constant is **verbatim** from the spec — do not
+paraphrase. Drift here directly compromises the F2 monitoring path
+(downstream weekly review will see noise).
+
+Two-stage flow:
+
+    1. Caller calls ``kimi_extract(user, assistant, channel, ts)``.
+    2. We short-circuit to ``[]`` if ``channel`` is in
+       ``PHI_BLACKLIST_CHANNELS`` — never round-trip hospital data
+       through the cloud LLM.
+    3. Otherwise we POST to synthetic.new'\\''s OpenAI-compatible
+       chat-completions endpoint with ``temperature=0.1`` and
+       ``response_format=json_object`` (Kimi K2.5 supports the OpenAI
+       structured-output flag).
+    4. Parse the JSON list, validate the per-item shape, return
+       ``list[ExtractedFact]``. Bad rows are dropped, not fatal.
+
+Token cost is logged to ``memory.log`` so weekly review can spot a
+runaway extract budget.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import List, Optional
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+# Spec §1.4 lock — Kimi K2.5 via synthetic.new.
+SYNTHETIC_URL = "https://api.synthetic.new/v1/chat/completions"
+EXTRACT_MODEL = "hf:moonshotai/Kimi-K2.5"
+EXTRACT_TEMPERATURE = 0.1
+EXTRACT_TIMEOUT = 30.0
+EXTRACT_MAX_TOKENS = 1024  # extract output is a small JSON list
+
+# Spec §5.1 — channels whose content never leaves the host as PHI.
+PHI_BLACKLIST_CHANNELS = frozenset({"cmio", "cbme", "medicine"})
+
+# Spec §5.2 EXTRACT_PROMPT — copy verbatim. The {placeholders} are
+# substituted at call time.
+EXTRACT_PROMPT = """You extract durable memories about 禮揚 from this Discord turn.
+Output a JSON list. Empty list [] if nothing memorable.
+
+HARD RULES — these override everything else:
+1. NEVER extract: hospital data, patient names, 病歷號, 身分證字號, lab results,
+   diagnoses about real people, hospital policy specifics, hospital colleague names.
+2. NEVER extract pleasantries (好的/收到/早安/明白/thanks). Return [] if turn is just this.
+3. If turn metadata says synthetic=true (cron-produced), return [] UNLESS content
+   contains a NEW commitment by 禮揚 (e.g. "排了 5/22 跟 Y 開會").
+4. If unsure whether content violates rule 1, ERR ON THE SIDE OF NOT EXTRACTING.
+
+Each item:
+  type: "episodic" | "semantic"
+  text: short statement, zh-TW or English (match source language)
+  entity: nullable. Use ".家庭", ".工作", ".研究興趣", ".健康", etc. namespacing under "禮揚."
+  importance: 1-5
+  valid_to_hint: ISO date if turn implies expiry. "今晚"→tomorrow, "這週"→Sunday, "這個月"→end-of-month.
+
+Skip facts that duplicate something said in the last 5 turns.
+
+TURN:
+[{ts}] [{channel}] user: {user}
+[{ts}] [{channel}] assistant: {assistant}
+"""
+
+
+@dataclass
+class ExtractedFact:
+    """One fact extracted from a turn. Distinct from the read-side ``Fact``."""
+
+    type: str  # "episodic" | "semantic"
+    text: str
+    entity: Optional[str]
+    importance: int
+    valid_to_hint: Optional[str] = None
+    raw: dict = field(default_factory=dict)  # original Kimi output for forensics
+
+
+class ExtractError(RuntimeError):
+    """Raised when synthetic.new is unreachable or returns malformed payload."""
+
+
+def _resolve_hermes_home() -> Path:
+    try:
+        from hermes_constants import get_hermes_home
+        return Path(get_hermes_home())
+    except Exception:
+        return Path.home() / ".hermes"
+
+
+def _default_log_path() -> Path:
+    return _resolve_hermes_home() / "logs" / "memory.log"
+
+
+def _read_synthetic_api_key() -> str:
+    """Resolve the synthetic.new API key.
+
+    Priority:
+      1. ``SYNTHETIC_API_KEY`` env var (test-friendly override).
+      2. ``auth.json`` ``custom:synthetic`` pool, first non-expired token.
+
+    Raises ``ExtractError`` if no key is found — the caller decides
+    whether that should bubble up (W3-2 wraps and falls back).
+    """
+    env = os.environ.get("SYNTHETIC_API_KEY")
+    if env:
+        return env
+
+    auth_path = _resolve_hermes_home() / "auth.json"
+    if auth_path.exists():
+        try:
+            data = json.loads(auth_path.read_text(encoding="utf-8"))
+        except json.JSONDecodeError as exc:
+            raise ExtractError(f"auth.json parse: {exc}") from exc
+        # The real auth.json uses "credential_pool" (singular). Older or
+        # alternate layouts may use the plural form or top-level keys, so we
+        # check all three for resilience across hermes-agent versions.
+        pool = (
+            (data.get("credential_pool") or {}).get("custom:synthetic")
+            or (data.get("credential_pools") or {}).get("custom:synthetic")
+            or data.get("custom:synthetic")
+            or []
+        )
+        for entry in pool:
+            tok = entry.get("access_token")
+            if tok:
+                return tok
+
+    raise ExtractError(
+        "synthetic.new API key not found. Set SYNTHETIC_API_KEY or "
+        "ensure auth.json has a custom:synthetic credential."
+    )
+
+
+def _append_log(payload: dict, log_path: Optional[Path] = None) -> None:
+    log_path = log_path or _default_log_path()
+    try:
+        log_path.parent.mkdir(parents=True, exist_ok=True)
+        with log_path.open("a", encoding="utf-8") as f:
+            f.write(json.dumps(payload, ensure_ascii=False) + "\n")
+    except OSError as exc:
+        logger.warning("memory.log write failed: %s", exc)
+
+
+def _coerce_fact(raw: dict) -> Optional[ExtractedFact]:
+    """Validate one Kimi-emitted fact dict; return None on shape errors."""
+    t = raw.get("type")
+    text = raw.get("text")
+    if t not in ("episodic", "semantic"):
+        return None
+    if not isinstance(text, str) or not text.strip():
+        return None
+    importance = raw.get("importance", 2)
+    try:
+        importance = int(importance)
+    except (TypeError, ValueError):
+        importance = 2
+    importance = max(1, min(5, importance))
+    entity = raw.get("entity")
+    if entity is not None and not isinstance(entity, str):
+        entity = None
+    valid_to_hint = raw.get("valid_to_hint")
+    if valid_to_hint is not None and not isinstance(valid_to_hint, str):
+        valid_to_hint = None
+    return ExtractedFact(
+        type=t,
+        text=text.strip(),
+        entity=entity,
+        importance=importance,
+        valid_to_hint=valid_to_hint,
+        raw=raw,
+    )
+
+
+async def kimi_extract(
+    user: str,
+    assistant: str,
+    channel: str,
+    ts: str,
+    *,
+    client: Optional[httpx.AsyncClient] = None,
+    log_path: Optional[Path] = None,
+) -> List[ExtractedFact]:
+    """Extract durable memories from one Discord turn.
+
+    Returns ``[]`` (no API call) when ``channel`` is PHI-blacklisted, when
+    both ``user`` and ``assistant`` are empty, or when Kimi returns
+    malformed JSON. Otherwise raises ``ExtractError`` on transport
+    failure or non-2xx response — caller (W3-2) is responsible for
+    fallback bookkeeping (failure JSONL log).
+    """
+    if channel in PHI_BLACKLIST_CHANNELS:
+        return []
+    if not (user or "").strip() and not (assistant or "").strip():
+        return []
+
+    api_key = _read_synthetic_api_key()
+    prompt = EXTRACT_PROMPT.format(ts=ts, channel=channel, user=user, assistant=assistant)
+
+    payload = {
+        "model": EXTRACT_MODEL,
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": EXTRACT_TEMPERATURE,
+        "max_tokens": EXTRACT_MAX_TOKENS,
+        "response_format": {"type": "json_object"},
+    }
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+
+    owns_client = client is None
+    client = client or httpx.AsyncClient()
+    t0 = time.perf_counter()
+    try:
+        try:
+            r = await client.post(
+                SYNTHETIC_URL, headers=headers, json=payload, timeout=EXTRACT_TIMEOUT
+            )
+        except httpx.RequestError as exc:
+            raise ExtractError(f"synthetic.new network error: {exc}") from exc
+        if r.status_code >= 400:
+            raise ExtractError(f"synthetic.new {r.status_code}: {r.text[:200]}")
+        body = r.json()
+    finally:
+        if owns_client:
+            await client.aclose()
+    elapsed_ms = (time.perf_counter() - t0) * 1000.0
+
+    choice = (body.get("choices") or [{}])[0]
+    content = (choice.get("message") or {}).get("content", "")
+    usage = body.get("usage") or {}
+
+    parsed = _parse_json_list(content)
+    facts = [f for f in (_coerce_fact(item) for item in parsed) if f is not None]
+
+    _append_log(
+        {
+            "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+            "cmd": "kimi_extract",
+            "channel": channel,
+            "ms": round(elapsed_ms, 2),
+            "n_raw": len(parsed),
+            "n_kept": len(facts),
+            "tokens_in": usage.get("prompt_tokens"),
+            "tokens_out": usage.get("completion_tokens"),
+        },
+        log_path=log_path,
+    )
+    return facts
+
+
+def _parse_json_list(content: str) -> list:
+    """Tolerantly extract a JSON list from Kimi's ``content`` field.
+
+    The prompt asks for a JSON list, but Kimi may wrap it in an object
+    (when response_format=json_object) like ``{"facts": [...]}`` or
+    return ``{}`` for empty. We accept any of:
+      - bare ``[...]``
+      - ``{"facts": [...]}`` / ``{"items": [...]}`` / ``{"results": [...]}``
+      - ``{}`` (treated as empty list)
+    """
+    if not content:
+        return []
+    try:
+        data = json.loads(content)
+    except json.JSONDecodeError:
+        return []
+    if isinstance(data, list):
+        return data
+    if isinstance(data, dict):
+        # Kimi K2.5 with response_format=json_object often wraps the
+        # answer in a dict like {"analysis": ..., "extracted_memories": [...]}.
+        # Try the canonical key names first, then fall back to the first list-valued field.
+        for key in ("facts", "items", "results", "memories", "extracted_memories", "data"):
+            v = data.get(key)
+            if isinstance(v, list):
+                return v
+        # Last-ditch fallback: any top-level list value wins.
+        for v in data.values():
+            if isinstance(v, list):
+                return v
+        # Kimi sometimes returns a single fact as a flat dict (no list wrapper).
+        # Detect by the presence of the canonical fact keys.
+        if "type" in data and "text" in data:
+            return [data]
+        return []
+    return []
diff --git a/plugins/memory/sqlite_vec/plugin.yaml b/plugins/memory/sqlite_vec/plugin.yaml
new file mode 100644
index 00000000000..4e3b24133c4
--- /dev/null
+++ b/plugins/memory/sqlite_vec/plugin.yaml
@@ -0,0 +1,7 @@
+name: sqlite_vec
+version: 0.1.0
+description: "Hermes V3 long-term memory — local sqlite-vec store with hot episodes / cold curated semantic_facts, weekly human-approved promotion."
+pip_dependencies:
+  - sqlite-vec>=0.1.6
+hooks:
+  - on_pre_compress
diff --git a/plugins/memory/sqlite_vec/promotion.py b/plugins/memory/sqlite_vec/promotion.py
new file mode 100644
index 00000000000..6a4404b752e
--- /dev/null
+++ b/plugins/memory/sqlite_vec/promotion.py
@@ -0,0 +1,862 @@
+"""Weekly promotion + apply core logic (W3-3).
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §5.3 + §5.4.
+
+Two entry points, both invoked from cron-driven thin wrappers in
+``~/.hermes/scripts/`` (so they sit inside HERMES_HOME/scripts, the only
+location the hermes scheduler will exec):
+
+  weekly_promotion()  - reads 7 days of pending episodes, runs one
+                        Kimi-thinking call to produce a promotion diff,
+                        saves it to pending_diffs/<digest_id>.json,
+                        renders + posts the digest to #memory-review.
+                        Does NOT stamp episodes.promoted_at.
+
+  weekly_apply()      - purges pending_diffs older than 14 days, loads
+                        the latest, checks for the rejection sentinel
+                        file, and either archives-as-rejected or
+                        applies the diff atomically (promote / dedup /
+                        expire) and stamps episodes.promoted_at.
+
+The split lets the user reject Sunday's diff with /memreview reject
+<digest_id> any time before Monday's apply fires.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+import sqlite3
+import struct
+import time
+from dataclasses import dataclass, field
+from datetime import date, datetime, timedelta
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+import httpx
+
+from .embed import voyage_embed
+from .extract import (
+    EXTRACT_TIMEOUT,
+    PHI_BLACKLIST_CHANNELS,
+    SYNTHETIC_URL,
+    _read_synthetic_api_key,
+)
+
+logger = logging.getLogger(__name__)
+
+PROMOTION_MODEL = "hf:moonshotai/Kimi-K2-Thinking"
+PROMOTION_FALLBACK_MODEL = "hf:moonshotai/Kimi-K2.5"
+PROMOTION_TEMPERATURE = 0.2
+PROMOTION_MAX_TOKENS = 8192  # diff JSON can be substantial across 7 days
+PROMOTION_TIMEOUT = 120.0  # thinking-mode + 100+ episodes
+
+PROMOTION_NEIGHBOR_K = 20  # spec §5.3: per-candidate vec_search k=20
+PROMOTION_LOOKBACK_DAYS = 7
+PENDING_DIFF_TTL_DAYS = 14
+
+DISCORD_API = "https://discord.com/api/v10/channels/{channel_id}/messages"
+
+
+# ---------------------------------------------------------------------------
+# Prompt — designed to match spec §5.3 schema verbatim
+# ---------------------------------------------------------------------------
+
+PROMOTION_PROMPT = """You are running the weekly memory promotion review for 禮揚's personal AI.
+
+Below is one week of conversation episodes that have not yet been reviewed.
+Each candidate carries any 'stashed_facts' that the per-turn extractor
+recorded in its metadata. You also see, per candidate, the top-20 existing
+semantic_facts that are nearest by embedding distance — use these to decide
+whether a candidate fact duplicates something already known.
+
+HARD RULES — these override everything else:
+1. NEVER promote: hospital data, patient names, 病歷號, 身分證字號, lab results,
+   diagnoses about real people, hospital policy specifics, hospital colleague names.
+2. Pleasantries (好的/收到/早安/明白/thanks) → drop_as_noise.
+3. Synthetic episodes (synthetic=true) — promote ONLY if they contain a NEW
+   commitment by 禮揚 (a meeting scheduled, a habit declared, a decision made).
+4. If a candidate stashed_fact is semantically captured by an existing fact
+   (sim ≥ 0.92), prefer dedup_hits over creating a new row.
+5. Conservative importance: most facts are 2; only use 4-5 for permanent
+   identity / family / strong commitments.
+
+For each candidate, decide one of four actions:
+
+  A. PROMOTE — new fact worth keeping. Emit into "promote".
+       valid_to: ISO date or null (null = permanent).
+       importance: 1-5 (default 2).
+       source_episode_ids: which candidate episodes contributed.
+
+  B. DEDUP_HIT — candidate fact reaffirms an existing fact. Emit into
+       "dedup_hits" with the existing fact id and action="bump_hits"
+       (just touch the timestamp) or "refine_text" (mild rephrasing
+       worth applying).
+
+  C. EXPIRE — an existing fact is contradicted or has gone stale.
+       Emit into "expire" with existing_fact_id, valid_to=today, reason.
+
+  D. DROP_AS_NOISE — pleasantry, low signal, or duplicates within the
+       week. Emit into "drop_as_noise" with the episode ids and reason.
+
+Every candidate episode_id must appear under exactly one action above
+(in promote.source_episode_ids OR dedup_hits.source_episode_ids OR
+drop_as_noise.episode_ids). The "expire" section can reference NEW
+existing_fact_ids that are independent of this week's candidates —
+that's fine.
+
+Output ONE JSON object with this exact schema:
+
+{{
+  "digest_id": "{digest_id}",
+  "candidate_episode_ids": [<all candidate ids you saw>],
+  "promote": [
+    {{
+      "entity": "禮揚.<namespace>",
+      "fact": "single-sentence statement",
+      "importance": 1..5,
+      "valid_from": "{today}",
+      "valid_to": "YYYY-MM-DD" | null,
+      "source_episode_ids": [int, ...]
+    }}
+  ],
+  "dedup_hits": [
+    {{
+      "existing_fact_id": int,
+      "action": "bump_hits" | "refine_text",
+      "refined_text": "string only if action=refine_text",
+      "source_episode_ids": [int, ...]
+    }}
+  ],
+  "expire": [
+    {{
+      "existing_fact_id": int,
+      "valid_to": "{today}",
+      "reason": "short reason"
+    }}
+  ],
+  "drop_as_noise": [
+    {{
+      "episode_ids": [int, ...],
+      "reason": "short reason"
+    }}
+  ]
+}}
+
+CANDIDATES (week of {week_label}):
+{candidates_block}
+
+NEAREST-NEIGHBOR EXISTING FACTS (one block per candidate stashed_fact):
+{neighbors_block}
+"""
+
+
+# ---------------------------------------------------------------------------
+# Data classes
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class WeekDigest:
+    """Loaded form of pending_diffs/<digest_id>.json."""
+
+    digest_id: str
+    candidate_episode_ids: List[int]
+    promote: List[Dict[str, Any]] = field(default_factory=list)
+    dedup_hits: List[Dict[str, Any]] = field(default_factory=list)
+    expire: List[Dict[str, Any]] = field(default_factory=list)
+    drop_as_noise: List[Dict[str, Any]] = field(default_factory=list)
+    raw: Dict[str, Any] = field(default_factory=dict)
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "WeekDigest":
+        return cls(
+            digest_id=data.get("digest_id", ""),
+            candidate_episode_ids=list(data.get("candidate_episode_ids") or []),
+            promote=list(data.get("promote") or []),
+            dedup_hits=list(data.get("dedup_hits") or []),
+            expire=list(data.get("expire") or []),
+            drop_as_noise=list(data.get("drop_as_noise") or []),
+            raw=data,
+        )
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "digest_id": self.digest_id,
+            "candidate_episode_ids": self.candidate_episode_ids,
+            "promote": self.promote,
+            "dedup_hits": self.dedup_hits,
+            "expire": self.expire,
+            "drop_as_noise": self.drop_as_noise,
+        }
+
+
+# ---------------------------------------------------------------------------
+# Path helpers
+# ---------------------------------------------------------------------------
+
+
+def _resolve_hermes_home() -> Path:
+    try:
+        from hermes_constants import get_hermes_home
+        return Path(get_hermes_home())
+    except Exception:
+        return Path.home() / ".hermes"
+
+
+def pending_dir() -> Path:
+    p = _resolve_hermes_home() / "memories" / "pending_diffs"
+    p.mkdir(parents=True, exist_ok=True)
+    return p
+
+
+def archive_dir() -> Path:
+    p = _resolve_hermes_home() / "memories" / "diff_archive"
+    p.mkdir(parents=True, exist_ok=True)
+    return p
+
+
+def memory_log_path() -> Path:
+    return _resolve_hermes_home() / "logs" / "memory.log"
+
+
+def db_path() -> Path:
+    return _resolve_hermes_home() / "memories" / "memory.db"
+
+
+def digest_id_for(today: Optional[date] = None) -> str:
+    """ISO date based digest id: wk-YYYY-MM-DD."""
+    today = today or date.today()
+    return f"wk-{today.isoformat()}"
+
+
+def rejection_sentinel(digest_id: str) -> Path:
+    return pending_dir() / f"{digest_id}.rejected"
+
+
+def pending_path(digest_id: str) -> Path:
+    return pending_dir() / f"{digest_id}.json"
+
+
+# ---------------------------------------------------------------------------
+# Shared logging
+# ---------------------------------------------------------------------------
+
+
+def _log_event(payload: Dict[str, Any]) -> None:
+    p = memory_log_path()
+    try:
+        p.parent.mkdir(parents=True, exist_ok=True)
+        with p.open("a", encoding="utf-8") as f:
+            f.write(json.dumps(payload, ensure_ascii=False, default=str) + "\n")
+    except OSError as exc:
+        logger.warning("memory.log write failed: %s", exc)
+
+
+# ---------------------------------------------------------------------------
+# Promotion: candidate gathering + neighbor search
+# ---------------------------------------------------------------------------
+
+
+def _read_pending_episodes(conn: sqlite3.Connection, days: int = PROMOTION_LOOKBACK_DAYS) -> List[Dict[str, Any]]:
+    rows = conn.execute(
+        """
+        SELECT id, ts, channel, role, text, metadata, synthetic
+        FROM episodes
+        WHERE promoted_at IS NULL
+          AND ts > datetime('now', ?)
+        ORDER BY ts
+        """,
+        (f"-{days} days",),
+    ).fetchall()
+    out: List[Dict[str, Any]] = []
+    for r in rows:
+        meta = {}
+        if r["metadata"]:
+            try:
+                meta = json.loads(r["metadata"])
+            except json.JSONDecodeError:
+                meta = {}
+        out.append({
+            "id": r["id"],
+            "ts": r["ts"],
+            "channel": r["channel"],
+            "role": r["role"],
+            "text": r["text"],
+            "synthetic": bool(r["synthetic"]),
+            "stashed_facts": meta.get("stashed_facts") or [],
+        })
+    return out
+
+
+async def _vec_search(conn: sqlite3.Connection, query: str, k: int = PROMOTION_NEIGHBOR_K) -> List[Dict[str, Any]]:
+    """Find k nearest existing semantic_facts to ``query`` text.
+
+    Returns rows like {id, fact, entity, importance, sim}.
+    """
+    [qvec] = await voyage_embed([query])
+    rows = conn.execute(
+        """
+        WITH knn AS (
+            SELECT fact_id, distance
+            FROM vec_facts
+            WHERE embedding MATCH vec_int8(?) AND k = ?
+        )
+        SELECT sf.id, sf.fact, sf.entity, sf.importance,
+               (1 - knn.distance) AS sim
+        FROM knn
+        JOIN semantic_facts sf ON sf.id = knn.fact_id
+        WHERE sf.state = 'active'
+          AND (sf.valid_to IS NULL OR sf.valid_to > date('now'))
+        ORDER BY sim DESC
+        """,
+        (qvec, k),
+    ).fetchall()
+    return [dict(r) for r in rows]
+
+
+def _format_candidates_block(candidates: List[Dict[str, Any]]) -> str:
+    """Render candidate episodes as a compact block for the prompt."""
+    lines = []
+    for c in candidates:
+        marker = "🤖" if c["synthetic"] else "👤"
+        text = c["text"].replace("\n", " ")
+        if len(text) > 200:
+            text = text[:200] + "..."
+        line = f"#{c['id']} [{c['ts']}] {marker} {c['channel']}/{c['role']}: {text}"
+        lines.append(line)
+        for sf in c["stashed_facts"]:
+            sf_text = sf.get("text", "")
+            sf_entity = sf.get("entity") or "?"
+            sf_vth = sf.get("valid_to_hint") or "permanent"
+            lines.append(
+                f"   ↳ stashed: [{sf_entity}] {sf_text[:120]} "
+                f"(importance={sf.get('importance', 2)}, valid_to_hint={sf_vth})"
+            )
+    return "\n".join(lines) if lines else "(no candidates)"
+
+
+def _format_neighbors_block(neighbors_by_fact: Dict[str, List[Dict[str, Any]]]) -> str:
+    """One section per candidate stashed_fact, listing its k nearest existing facts."""
+    if not neighbors_by_fact:
+        return "(no candidate stashed_facts to compare against)"
+    sections = []
+    for stashed_text, rows in neighbors_by_fact.items():
+        header = f"--- nearest to: {stashed_text[:120]} ---"
+        body_lines = [
+            f"  #{r['id']} sim={r['sim']:.3f} [{r['entity'] or '—'}] {r['fact'][:120]}"
+            for r in rows[:5]  # top 5 per stashed fact keeps prompt short
+        ]
+        sections.append(header + "\n" + "\n".join(body_lines))
+    return "\n\n".join(sections)
+
+
+# ---------------------------------------------------------------------------
+# Kimi thinking call
+# ---------------------------------------------------------------------------
+
+
+class PromotionError(RuntimeError):
+    pass
+
+
+async def _call_kimi_thinking(prompt: str, *, client: Optional[httpx.AsyncClient] = None) -> Dict[str, Any]:
+    """Single Kimi call producing the promotion diff JSON object.
+
+    Tries Kimi-K2-Thinking first; falls back to Kimi-K2.5 on 4xx model-not-found.
+    """
+    api_key = _read_synthetic_api_key()
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+
+    payload = {
+        "model": PROMOTION_MODEL,
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": PROMOTION_TEMPERATURE,
+        "max_tokens": PROMOTION_MAX_TOKENS,
+        "response_format": {"type": "json_object"},
+    }
+
+    owns = client is None
+    client = client or httpx.AsyncClient()
+    try:
+        try:
+            r = await client.post(SYNTHETIC_URL, headers=headers, json=payload, timeout=PROMOTION_TIMEOUT)
+        except httpx.RequestError as exc:
+            raise PromotionError(f"synthetic.new network: {exc}") from exc
+        if r.status_code == 404 or (r.status_code == 400 and "model" in r.text.lower()):
+            logger.warning("Kimi-Thinking unavailable; falling back to %s", PROMOTION_FALLBACK_MODEL)
+            payload["model"] = PROMOTION_FALLBACK_MODEL
+            r = await client.post(SYNTHETIC_URL, headers=headers, json=payload, timeout=PROMOTION_TIMEOUT)
+        if r.status_code >= 400:
+            raise PromotionError(f"synthetic.new {r.status_code}: {r.text[:300]}")
+        body = r.json()
+    finally:
+        if owns:
+            await client.aclose()
+
+    content = ((body.get("choices") or [{}])[0].get("message") or {}).get("content", "")
+    try:
+        diff = json.loads(content)
+    except json.JSONDecodeError as exc:
+        raise PromotionError(f"Kimi returned non-JSON: {exc}: {content[:200]}") from exc
+    if not isinstance(diff, dict):
+        raise PromotionError(f"Kimi returned non-object: {type(diff).__name__}")
+
+    usage = body.get("usage") or {}
+    _log_event({
+        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+        "cmd": "weekly_promotion_kimi",
+        "model": payload["model"],
+        "tokens_in": usage.get("prompt_tokens"),
+        "tokens_out": usage.get("completion_tokens"),
+    })
+    return diff
+
+
+# ---------------------------------------------------------------------------
+# Digest rendering (spec §5.4)
+# ---------------------------------------------------------------------------
+
+
+def render_digest_markdown(diff: WeekDigest, candidates: List[Dict[str, Any]]) -> str:
+    n_user = sum(1 for c in candidates if not c["synthetic"])
+    n_synth = sum(1 for c in candidates if c["synthetic"])
+    header = (
+        f"# 📚 Weekly Memory Review — {diff.digest_id.removeprefix('wk-')}\n"
+        f"{len(candidates)} episodes scanned this week "
+        f"({n_user} user/assistant + {n_synth} cron-synthetic).\n"
+        f"24 h to reject via `/memreview reject {diff.digest_id}`; default approve.\n"
+    )
+
+    sections = []
+
+    if diff.promote:
+        lines = [f"## ⬆️ Promote to permanent ({len(diff.promote)})"]
+        for p in diff.promote:
+            entity = p.get("entity", "?")
+            fact = p.get("fact", "")
+            importance = p.get("importance", 2)
+            valid_to = p.get("valid_to") or "永久"
+            srcs = p.get("source_episode_ids") or []
+            src_str = (
+                ", ".join(f"#{i}" for i in srcs[:5])
+                + (f" +{len(srcs)-5}" if len(srcs) > 5 else "")
+            )
+            lines.append(f"- 🆕 **{entity}**: \"{fact}\"")
+            lines.append(f"   evidence: {src_str} | importance {importance} | valid_to: {valid_to}")
+        sections.append("\n".join(lines))
+
+    if diff.dedup_hits:
+        lines = [f"## 🔁 Dedup confirmations ({len(diff.dedup_hits)})"]
+        for d in diff.dedup_hits:
+            srcs = d.get("source_episode_ids") or []
+            action = d.get("action", "bump_hits")
+            lines.append(
+                f"- existing #{d.get('existing_fact_id')} ← {len(srcs)} reaffirmation(s), action={action}"
+            )
+            if action == "refine_text" and d.get("refined_text"):
+                lines.append(f"   refined → \"{d['refined_text']}\"")
+        sections.append("\n".join(lines))
+
+    if diff.expire:
+        lines = [f"## 🪦 Expiring ({len(diff.expire)})"]
+        for e in diff.expire:
+            lines.append(
+                f"- existing #{e.get('existing_fact_id')} → valid_to={e.get('valid_to')} "
+                f"({e.get('reason', '—')})"
+            )
+        sections.append("\n".join(lines))
+
+    if diff.drop_as_noise:
+        lines = [f"## 🗑️ Skipped as noise ({len(diff.drop_as_noise)})"]
+        for n in diff.drop_as_noise:
+            ids = n.get("episode_ids") or []
+            lines.append(f"- {len(ids)} episode(s): {n.get('reason', '—')}")
+        sections.append("\n".join(lines))
+
+    if not sections:
+        sections.append("_No actions this week._")
+
+    return header + "\n" + "\n\n".join(sections)
+
+
+# ---------------------------------------------------------------------------
+# Discord posting
+# ---------------------------------------------------------------------------
+
+
+def discord_post(content: str, channel_id: str, *, bot_token: Optional[str] = None) -> bool:
+    """POST a message to a Discord channel. Returns True on success."""
+    bot_token = bot_token or os.environ.get("DISCORD_BOT_TOKEN")
+    if not bot_token or not channel_id:
+        logger.warning("discord_post missing bot_token or channel_id")
+        return False
+    # Discord rejects messages over 2000 chars; chunk if needed.
+    chunks: List[str] = []
+    remaining = content
+    while remaining:
+        if len(remaining) <= 1990:
+            chunks.append(remaining)
+            break
+        # Split on the last newline before 1990 chars to avoid mid-line breaks.
+        cut = remaining.rfind("\n", 0, 1990)
+        if cut <= 0:
+            cut = 1990
+        chunks.append(remaining[:cut])
+        remaining = remaining[cut:].lstrip("\n")
+
+    headers = {
+        "Authorization": f"Bot {bot_token}",
+        "Content-Type": "application/json",
+    }
+    url = DISCORD_API.format(channel_id=channel_id)
+    ok = True
+    with httpx.Client(timeout=20.0) as c:
+        for chunk in chunks:
+            r = c.post(url, headers=headers, json={"content": chunk})
+            if r.status_code >= 400:
+                logger.warning("discord_post failed: %s %s", r.status_code, r.text[:200])
+                ok = False
+                break
+    return ok
+
+
+def memory_review_channel_id() -> Optional[str]:
+    """Resolve the Discord #memory-review channel id.
+
+    Priority:
+      1. MEMORY_REVIEW_CHANNEL_ID env var (test override)
+      2. ~/.hermes/channel_directory.json -> platforms.discord (list)
+         -> first entry whose name == "memory-review"
+      3. Legacy flat layouts (defensive — older installs)
+    """
+    env = os.environ.get("MEMORY_REVIEW_CHANNEL_ID")
+    if env:
+        return env
+    cdir = _resolve_hermes_home() / "channel_directory.json"
+    if not cdir.exists():
+        return None
+    try:
+        data = json.loads(cdir.read_text(encoding="utf-8"))
+    except json.JSONDecodeError:
+        return None
+
+    # Canonical layout: platforms.discord is a list of channel dicts.
+    plats = (data.get("platforms") or {})
+    discord_chans = plats.get("discord")
+    if isinstance(discord_chans, list):
+        for c in discord_chans:
+            if isinstance(c, dict) and c.get("name") == "memory-review":
+                return c.get("id")
+
+    # Defensive fallbacks for older / hand-edited layouts.
+    if isinstance(data.get("memory-review"), str):
+        return data["memory-review"]
+    chans = data.get("channels") or {}
+    m = chans.get("memory-review") if isinstance(chans, dict) else None
+    if isinstance(m, str):
+        return m
+    if isinstance(m, dict):
+        return m.get("id") or m.get("channel_id")
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Main entry points
+# ---------------------------------------------------------------------------
+
+
+async def weekly_promotion(
+    conn: sqlite3.Connection,
+    *,
+    today: Optional[date] = None,
+    dry_run: bool = False,
+    discord_channel_id: Optional[str] = None,
+    kimi_fn=None,  # injectable for tests
+    embed_fn=None,
+) -> Dict[str, Any]:
+    """Run one weekly promotion cycle. Returns a summary dict."""
+    today = today or date.today()
+    digest_id = digest_id_for(today)
+
+    # /mem off kill switch — skip the entire weekly cycle.
+    try:
+        from plugins.memreview import mem_off_active
+        if mem_off_active():
+            return {
+                "digest_id": digest_id,
+                "candidates": 0,
+                "skipped": "/mem off active",
+            }
+    except Exception:
+        pass
+
+    candidates = _read_pending_episodes(conn)
+    if not candidates:
+        return {"digest_id": digest_id, "candidates": 0, "skipped": "no candidates"}
+
+    # Build neighbor map per stashed_fact across the week.
+    neighbors_by_fact: Dict[str, List[Dict[str, Any]]] = {}
+    for c in candidates:
+        for sf in c["stashed_facts"]:
+            text = (sf or {}).get("text") or ""
+            if not text or text in neighbors_by_fact:
+                continue
+            try:
+                neighbors_by_fact[text] = await _vec_search(conn, text)
+            except Exception as exc:
+                logger.warning("vec_search failed for stashed fact: %s", exc)
+                neighbors_by_fact[text] = []
+
+    prompt = PROMOTION_PROMPT.format(
+        digest_id=digest_id,
+        today=today.isoformat(),
+        week_label=today.isoformat(),
+        candidates_block=_format_candidates_block(candidates),
+        neighbors_block=_format_neighbors_block(neighbors_by_fact),
+    )
+
+    kimi = kimi_fn or _call_kimi_thinking
+    try:
+        diff_dict = await kimi(prompt)
+    except Exception as exc:
+        logger.exception("Kimi promotion call failed")
+        return {"digest_id": digest_id, "candidates": len(candidates), "error": str(exc)}
+
+    # Trust-but-verify: ensure digest_id matches and required keys exist.
+    diff_dict.setdefault("digest_id", digest_id)
+    diff_dict.setdefault("candidate_episode_ids", [c["id"] for c in candidates])
+    for k in ("promote", "dedup_hits", "expire", "drop_as_noise"):
+        diff_dict.setdefault(k, [])
+
+    digest = WeekDigest.from_dict(diff_dict)
+    markdown = render_digest_markdown(digest, candidates)
+
+    summary = {
+        "digest_id": digest_id,
+        "candidates": len(candidates),
+        "promote": len(digest.promote),
+        "dedup_hits": len(digest.dedup_hits),
+        "expire": len(digest.expire),
+        "drop_as_noise": len(digest.drop_as_noise),
+        "dry_run": dry_run,
+    }
+
+    if dry_run:
+        summary["markdown_preview"] = markdown
+        return summary
+
+    # Persist diff before posting so a Discord outage doesn't lose the work.
+    pending_path(digest_id).write_text(
+        json.dumps(digest.to_dict(), ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+
+    posted = False
+    if discord_channel_id:
+        posted = discord_post(markdown, discord_channel_id)
+    summary["discord_posted"] = posted
+
+    _log_event({
+        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+        "cmd": "weekly_promotion",
+        "digest_id": digest_id,
+        "summary": summary,
+    })
+    return summary
+
+
+def _purge_old_pending(today: date) -> int:
+    """Delete pending diffs older than PENDING_DIFF_TTL_DAYS."""
+    cutoff = today - timedelta(days=PENDING_DIFF_TTL_DAYS)
+    n = 0
+    for f in pending_dir().glob("*.json"):
+        try:
+            stem = f.stem.removeprefix("wk-")
+            d = datetime.strptime(stem, "%Y-%m-%d").date()
+        except ValueError:
+            continue
+        if d < cutoff:
+            try:
+                f.unlink()
+                # Also remove associated rejection sentinel if any.
+                rs = f.with_suffix(".rejected")
+                if rs.exists():
+                    rs.unlink()
+                n += 1
+            except OSError:
+                pass
+    return n
+
+
+def _latest_pending_diff() -> Optional[Path]:
+    files = sorted(pending_dir().glob("wk-*.json"))
+    return files[-1] if files else None
+
+
+def _archive_diff(diff_path: Path, status: str) -> None:
+    target = archive_dir() / f"{diff_path.stem}.{status}.json"
+    diff_path.replace(target)
+
+
+async def _apply_diff_atomic(
+    conn: sqlite3.Connection,
+    digest: WeekDigest,
+    today: date,
+    *,
+    embed_fn=None,
+) -> Dict[str, int]:
+    """Apply promote / dedup / expire in one transaction; stamp promoted_at.
+
+    Embeddings for promoted facts are computed BEFORE the transaction
+    opens, so the writer lock is held only for the duration of the
+    SQL statements themselves (~ms). Holding it across the Voyage HTTP
+    round-trip would block concurrent writes from the hot path.
+
+    Returns counts of each action performed.
+    """
+    counts = {"promoted": 0, "dedup_bumped": 0, "dedup_refined": 0, "expired": 0, "stamped": 0}
+
+    # Pre-embed all promote texts (outside transaction).
+    embed = embed_fn or voyage_embed
+    promote_blobs: List[Optional[bytes]] = []
+    promote_texts = [p.get("fact", "") for p in digest.promote]
+    non_empty = [t for t in promote_texts if t]
+    if non_empty:
+        embeddings = await embed(non_empty)
+        # Map back to original positions (None for empty fact strings).
+        emb_iter = iter(embeddings)
+        promote_blobs = [next(emb_iter) if t else None for t in promote_texts]
+    else:
+        promote_blobs = [None] * len(promote_texts)
+
+    try:
+        conn.execute("BEGIN")
+
+        # 1. promote — INSERT new semantic_facts. Trigger sf_after_insert
+        # mirrors each row into vec_facts automatically.
+        for p, blob in zip(digest.promote, promote_blobs):
+            fact = p.get("fact", "")
+            if not fact or blob is None:
+                continue
+            conn.execute(
+                """
+                INSERT INTO semantic_facts
+                    (entity, fact, embedding, importance, valid_from, valid_to,
+                     source_episode_ids)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    p.get("entity"),
+                    fact,
+                    blob,
+                    int(p.get("importance", 2) or 2),
+                    p.get("valid_from") or today.isoformat(),
+                    p.get("valid_to"),
+                    json.dumps(p.get("source_episode_ids") or []),
+                ),
+            )
+            counts["promoted"] += 1
+
+        # 2. dedup_hits — bump the existing fact's hits + last_seen, optional refine.
+        for d in digest.dedup_hits:
+            fid = d.get("existing_fact_id")
+            if fid is None:
+                continue
+            if d.get("action") == "refine_text" and d.get("refined_text"):
+                conn.execute(
+                    "UPDATE semantic_facts SET fact = ?, last_seen = datetime('now'), "
+                    "hits = hits + 1 WHERE id = ?",
+                    (d["refined_text"], fid),
+                )
+                counts["dedup_refined"] += 1
+            else:
+                conn.execute(
+                    "UPDATE semantic_facts SET last_seen = datetime('now'), "
+                    "hits = hits + 1 WHERE id = ?",
+                    (fid,),
+                )
+                counts["dedup_bumped"] += 1
+
+        # 3. expire — set valid_to (caller chose date).
+        for e in digest.expire:
+            fid = e.get("existing_fact_id")
+            if fid is None:
+                continue
+            conn.execute(
+                "UPDATE semantic_facts SET valid_to = ? WHERE id = ?",
+                (e.get("valid_to") or today.isoformat(), fid),
+            )
+            counts["expired"] += 1
+
+        # 4. stamp promoted_at on every candidate episode.
+        if digest.candidate_episode_ids:
+            placeholders = ",".join("?" * len(digest.candidate_episode_ids))
+            conn.execute(
+                f"UPDATE episodes SET promoted_at = date('now') WHERE id IN ({placeholders})",
+                digest.candidate_episode_ids,
+            )
+            counts["stamped"] = len(digest.candidate_episode_ids)
+
+        conn.commit()
+    except Exception:
+        conn.rollback()
+        raise
+    return counts
+
+
+async def weekly_apply(
+    conn: sqlite3.Connection,
+    *,
+    today: Optional[date] = None,
+    embed_fn=None,
+) -> Dict[str, Any]:
+    """Apply the latest pending diff (or archive-as-rejected). Returns summary."""
+    today = today or date.today()
+
+    purged = _purge_old_pending(today)
+    diff_path = _latest_pending_diff()
+
+    if not diff_path:
+        return {"purged": purged, "applied": False, "reason": "no pending diff"}
+
+    digest_id = diff_path.stem
+    sentinel = rejection_sentinel(digest_id)
+    if sentinel.exists():
+        _archive_diff(diff_path, "rejected")
+        try:
+            sentinel.unlink()
+        except OSError:
+            pass
+        _log_event({
+            "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+            "cmd": "weekly_apply",
+            "digest_id": digest_id,
+            "result": "rejected",
+        })
+        return {"purged": purged, "applied": False, "digest_id": digest_id, "reason": "rejected"}
+
+    try:
+        diff_dict = json.loads(diff_path.read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError) as exc:
+        return {"purged": purged, "applied": False, "error": f"diff load: {exc}"}
+
+    digest = WeekDigest.from_dict(diff_dict)
+    counts = await _apply_diff_atomic(conn, digest, today, embed_fn=embed_fn)
+    _archive_diff(diff_path, "applied")
+
+    summary = {
+        "purged": purged,
+        "applied": True,
+        "digest_id": digest_id,
+        **counts,
+    }
+    _log_event({
+        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+        "cmd": "weekly_apply",
+        **summary,
+    })
+    return summary
diff --git a/plugins/memory/sqlite_vec/read.py b/plugins/memory/sqlite_vec/read.py
new file mode 100644
index 00000000000..05a7e5b66d9
--- /dev/null
+++ b/plugins/memory/sqlite_vec/read.py
@@ -0,0 +1,175 @@
+"""Read path for the sqlite_vec memory plugin.
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §4
+
+Two-step retrieval:
+  1. vec0 prefilter: top k=50 by cosine distance on int8 embeddings
+  2. SQL CTE rerank: score = (1 - distance) * 0.7 + exp(-age_days/90) * 0.3
+     filter active state + valid_to NULL or future, ORDER BY score DESC LIMIT k
+
+`hits` bumping happens fire-and-forget after the reply is sent (caller's
+responsibility to schedule). Errors are swallowed with a warning.
+
+p95 query latency is logged to ~/.hermes/logs/memory.log. The log path is
+overridable via the constructor for testing.
+"""
+
+from __future__ import annotations
+
+import logging
+import sqlite3
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable, List, Optional
+
+from .embed import voyage_embed
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_K = 8
+PREFILTER_K = 50
+
+
+def _default_log_path() -> Path:
+    """Resolve the memory.log path lazily so HERMES_HOME (e.g. /opt/data
+    inside the container) wins over the worker thread's Path.home()."""
+    try:
+        from hermes_constants import get_hermes_home
+        return Path(get_hermes_home()) / "logs" / "memory.log"
+    except Exception:
+        return Path.home() / ".hermes" / "logs" / "memory.log"
+
+
+DEFAULT_LOG_PATH = _default_log_path()
+
+# Spec §4 — SQL is locked. Do not edit weights without updating the spec
+# and re-running the B1 worked example.
+RETRIEVE_SQL = """
+WITH knn AS (
+    SELECT fact_id, distance
+    FROM vec_facts
+    WHERE embedding MATCH vec_int8(?) AND k = {prefilter_k}
+)
+SELECT sf.id, sf.fact, sf.entity, sf.created_at, sf.importance,
+       (1 - knn.distance)                                              AS sim,
+       (julianday('now') - julianday(sf.created_at))                   AS age_days,
+       (1 - knn.distance) * 0.7
+         + exp(-(julianday('now') - julianday(sf.created_at)) / 90.0) * 0.3 AS score
+FROM knn
+JOIN semantic_facts sf ON sf.id = knn.fact_id
+WHERE sf.state = 'active'
+  AND (sf.valid_to IS NULL OR sf.valid_to > date('now'))
+ORDER BY score DESC
+LIMIT ?;
+"""
+
+
+@dataclass
+class Fact:
+    """A retrieved fact with score breakdown for prompt-injection or /memdebug."""
+
+    id: int
+    fact: str
+    entity: Optional[str]
+    created_at: str
+    importance: int
+    sim: float
+    age_days: float
+    score: float
+
+
+def _append_log(log_path: Path, payload: dict) -> None:
+    """Append one JSON line to memory.log; never raise into the read path."""
+    import json
+    try:
+        log_path.parent.mkdir(parents=True, exist_ok=True)
+        with log_path.open("a", encoding="utf-8") as f:
+            f.write(json.dumps(payload, ensure_ascii=False) + "\n")
+    except OSError as exc:
+        logger.warning("memory.log write failed: %s", exc)
+
+
+async def read_memory(
+    query: str,
+    conn: sqlite3.Connection,
+    *,
+    k: int = DEFAULT_K,
+    log_path: Path = DEFAULT_LOG_PATH,
+) -> List[Fact]:
+    """Embed `query`, retrieve top-`k` facts, log latency, return Fact list."""
+    [qvec] = await voyage_embed([query])
+
+    sql = RETRIEVE_SQL.format(prefilter_k=PREFILTER_K)
+    t0 = time.perf_counter()
+    rows = conn.execute(sql, (qvec, k)).fetchall()
+    elapsed_ms = (time.perf_counter() - t0) * 1000.0
+
+    facts = [
+        Fact(
+            id=row["id"],
+            fact=row["fact"],
+            entity=row["entity"],
+            created_at=row["created_at"],
+            importance=row["importance"],
+            sim=float(row["sim"]),
+            age_days=float(row["age_days"]),
+            score=float(row["score"]),
+        )
+        for row in rows
+    ]
+
+    _append_log(
+        log_path,
+        {
+            "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+            "q": query,
+            "k": k,
+            "n": len(facts),
+            "sql_ms": round(elapsed_ms, 2),
+        },
+    )
+    return facts
+
+
+async def bump_hits(fact_ids: Iterable[int], conn: sqlite3.Connection) -> None:
+    """Fire-and-forget UPDATE; swallow errors with a warning log.
+
+    Caller must wrap with ``asyncio.create_task()`` to avoid blocking the
+    reply. Per spec §4 hits-bump runs AFTER discord_send, so we keep this
+    cheap (single UPDATE … IN (…)) and never propagate errors.
+    """
+    ids = list(fact_ids)
+    if not ids:
+        return
+    placeholders = ",".join("?" * len(ids))
+    try:
+        conn.execute(
+            f"UPDATE semantic_facts SET hits = hits + 1, "
+            f"last_seen = datetime('now') WHERE id IN ({placeholders})",
+            ids,
+        )
+        conn.commit()
+    except sqlite3.Error as exc:
+        logger.warning("bump_hits swallowed error for %d ids: %s", len(ids), exc)
+
+
+def format_facts_for_prompt(facts: List[Fact], *, with_meta: bool = False) -> str:
+    """Render top-k facts as a markdown bullet list for system-prompt injection.
+
+    Used by SqliteVecMemoryProvider.prefetch() (with_meta=True per W2-3
+    spec) and /memdebug (with_meta=False for compact display).
+
+    No header — the caller owns the section title.
+    """
+    if not facts:
+        return ""
+    lines = []
+    for f in facts:
+        prefix = f"[{f.entity}] " if f.entity else ""
+        suffix = (
+            f" (importance: {f.importance}, age: {int(f.age_days)} days)"
+            if with_meta else ""
+        )
+        lines.append(f"- {prefix}{f.fact}{suffix}")
+    return "\n".join(lines)
diff --git a/plugins/memory/sqlite_vec/schema.sql b/plugins/memory/sqlite_vec/schema.sql
new file mode 100644
index 00000000000..5910309543b
--- /dev/null
+++ b/plugins/memory/sqlite_vec/schema.sql
@@ -0,0 +1,41 @@
+-- Hermes V3 memory schema — episodes (hot raw) + semantic_facts (cold curated)
+-- Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §3
+
+PRAGMA journal_mode = WAL;
+PRAGMA synchronous = NORMAL;
+
+-- Hot tier: raw turn-by-turn record. All Discord turns + cron synthetic land here.
+CREATE TABLE IF NOT EXISTS episodes (
+  id            INTEGER PRIMARY KEY,
+  ts            TEXT NOT NULL,
+  channel       TEXT NOT NULL,
+  external_id   TEXT NOT NULL,
+  role          TEXT NOT NULL CHECK (role IN ('user', 'assistant')),
+  text          TEXT NOT NULL,
+  synthetic     INTEGER NOT NULL DEFAULT 0,
+  embedding     BLOB,
+  metadata      TEXT,
+  promoted_at   TEXT,
+  UNIQUE(channel, external_id)
+);
+CREATE INDEX IF NOT EXISTS idx_episodes_ts ON episodes(ts);
+CREATE INDEX IF NOT EXISTS idx_episodes_promoted_pending
+  ON episodes(promoted_at, ts) WHERE promoted_at IS NULL;
+
+-- Cold tier: curated facts. Cattia's actual working memory queries this.
+CREATE TABLE IF NOT EXISTS semantic_facts (
+  id                  INTEGER PRIMARY KEY,
+  entity              TEXT,
+  fact                TEXT NOT NULL,
+  embedding           BLOB NOT NULL,
+  source_episode_ids  TEXT,
+  importance          INTEGER DEFAULT 2,
+  hits                INTEGER DEFAULT 0,
+  created_at          TEXT NOT NULL DEFAULT (datetime('now')),
+  last_seen           TEXT,
+  state               TEXT DEFAULT 'active' CHECK (state IN ('active', 'archived')),
+  valid_from          TEXT NOT NULL DEFAULT (date('now')),
+  valid_to            TEXT
+);
+CREATE INDEX IF NOT EXISTS idx_facts_entity ON semantic_facts(entity);
+CREATE INDEX IF NOT EXISTS idx_facts_active ON semantic_facts(state, valid_to);
diff --git a/plugins/memory/sqlite_vec/store.py b/plugins/memory/sqlite_vec/store.py
new file mode 100644
index 00000000000..97ec4c3e061
--- /dev/null
+++ b/plugins/memory/sqlite_vec/store.py
@@ -0,0 +1,82 @@
+"""sqlite-vec backed memory store: schema bootstrap + connection helper.
+
+W1 scope: schema only. Read/write paths come in W2/W3.
+"""
+
+from __future__ import annotations
+
+import logging
+import sqlite3
+from pathlib import Path
+from typing import Optional
+
+import sqlite_vec
+
+logger = logging.getLogger(__name__)
+
+VEC_DIM = 512  # voyage-3.5-lite output dimension we store
+
+_SCHEMA_PATH = Path(__file__).parent / "schema.sql"
+
+_VEC_VIRTUAL_TABLE_SQL = f"""
+CREATE VIRTUAL TABLE IF NOT EXISTS vec_facts USING vec0(
+  fact_id INTEGER PRIMARY KEY,
+  embedding int8[{VEC_DIM}] distance_metric=cosine
+);
+"""
+
+# Triggers keep vec_facts in sync with semantic_facts. embedding is stored as
+# raw int8 BLOB (512 bytes) on the relational side; vec0 needs vec_int8()
+# wrapper to interpret it (without it, vec0 assumes float32).
+_TRIGGERS_SQL = """
+CREATE TRIGGER IF NOT EXISTS sf_after_insert
+AFTER INSERT ON semantic_facts
+BEGIN
+  INSERT INTO vec_facts(fact_id, embedding) VALUES (NEW.id, vec_int8(NEW.embedding));
+END;
+
+CREATE TRIGGER IF NOT EXISTS sf_after_update_embedding
+AFTER UPDATE OF embedding ON semantic_facts
+BEGIN
+  -- vec0 int8 columns reject UPDATE even via vec_int8(); use DELETE+INSERT.
+  DELETE FROM vec_facts WHERE fact_id = NEW.id;
+  INSERT INTO vec_facts(fact_id, embedding) VALUES (NEW.id, vec_int8(NEW.embedding));
+END;
+
+CREATE TRIGGER IF NOT EXISTS sf_after_delete
+AFTER DELETE ON semantic_facts
+BEGIN
+  DELETE FROM vec_facts WHERE fact_id = OLD.id;
+END;
+"""
+
+
+def open_db(db_path: Path, *, check_same_thread: bool = True) -> sqlite3.Connection:
+    """Open a sqlite connection with sqlite-vec extension loaded.
+
+    Pass ``check_same_thread=False`` when the connection will be reused
+    across threads (e.g. the provider's prefetch worker pool). Caller is
+    then responsible for serializing access via a lock.
+    """
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    conn = sqlite3.connect(str(db_path), check_same_thread=check_same_thread)
+    conn.enable_load_extension(True)
+    sqlite_vec.load(conn)
+    conn.enable_load_extension(False)
+    conn.row_factory = sqlite3.Row
+    return conn
+
+
+def bootstrap_schema(conn: sqlite3.Connection) -> None:
+    """Idempotently create tables, indexes, vec0 virtual table, and triggers."""
+    conn.executescript(_SCHEMA_PATH.read_text())
+    conn.executescript(_VEC_VIRTUAL_TABLE_SQL)
+    conn.executescript(_TRIGGERS_SQL)
+    conn.commit()
+
+
+def init_db(db_path: Path, *, check_same_thread: bool = True) -> sqlite3.Connection:
+    """Open + bootstrap. Returns a ready-to-use connection."""
+    conn = open_db(db_path, check_same_thread=check_same_thread)
+    bootstrap_schema(conn)
+    return conn
diff --git a/plugins/memory/sqlite_vec/write.py b/plugins/memory/sqlite_vec/write.py
new file mode 100644
index 00000000000..227f2b35e1a
--- /dev/null
+++ b/plugins/memory/sqlite_vec/write.py
@@ -0,0 +1,251 @@
+"""Per-turn write-back into the sqlite_vec memory store (W3-2).
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §5.1.
+
+Hot-path flow per Discord turn:
+
+  1. PHI gate — if ``channel`` is in PHI_BLACKLIST_CHANNELS, raw episode
+     rows still land but extraction is skipped (no PHI to the cloud LLM).
+  2. Extract — kimi_extract() returns 0..N ExtractedFacts.
+  3. Embed — voyage_embed([user_msg, reply, *fact_texts]) in one batch.
+  4. INSERT 2 episode rows (user, assistant) with
+     ``ON CONFLICT(channel, external_id) DO NOTHING`` for idempotency
+     under Discord redelivery / cron retries / container restarts.
+  5. Fast-track facts whose ``valid_to_hint`` parses to ≤ today + 30d
+     directly into ``semantic_facts`` (the trigger mirrors them into
+     ``vec_facts``). Longer-lived / undated facts are JSON-stashed in
+     ``episodes.metadata.stashed_facts`` for W3-3 weekly_promotion.
+  6. Any exception → append a JSONL line to
+     ``~/.hermes/logs/memory_write_failures.jsonl`` and swallow.
+     The reply was already sent before this fired; we never propagate.
+
+The function is fire-and-forget: the caller schedules it via
+``asyncio.create_task`` (or in our case, a worker thread the provider
+spawns) AFTER ``discord_send`` so write latency cannot stall the user.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import sqlite3
+import time
+from datetime import date, datetime, timedelta
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional
+
+from .extract import (
+    PHI_BLACKLIST_CHANNELS,
+    ExtractedFact,
+    kimi_extract,
+)
+
+logger = logging.getLogger(__name__)
+
+# Spec §5.3 — fast-track threshold (raised from 7d to 30d): facts that
+# expire within ~1 month land directly in semantic_facts so they're
+# usable on the next turn instead of waiting up to 7 days for the
+# weekly review.
+FAST_TRACK_DAYS = 30
+
+
+def _resolve_hermes_home() -> Path:
+    try:
+        from hermes_constants import get_hermes_home
+        return Path(get_hermes_home())
+    except Exception:
+        return Path.home() / ".hermes"
+
+
+def _failure_log_path() -> Path:
+    return _resolve_hermes_home() / "logs" / "memory_write_failures.jsonl"
+
+
+def _append_failure(payload: Dict[str, Any], log_path: Optional[Path] = None) -> None:
+    log_path = log_path or _failure_log_path()
+    try:
+        log_path.parent.mkdir(parents=True, exist_ok=True)
+        with log_path.open("a", encoding="utf-8") as f:
+            f.write(json.dumps(payload, ensure_ascii=False, default=str) + "\n")
+    except OSError as exc:
+        logger.warning("memory_write_failures.jsonl write failed: %s", exc)
+
+
+def _parse_valid_to_hint(hint: Optional[str]) -> Optional[date]:
+    """Parse 'YYYY-MM-DD' tolerantly. Return None on bad / missing input."""
+    if not hint:
+        return None
+    try:
+        return datetime.strptime(hint.strip(), "%Y-%m-%d").date()
+    except (ValueError, TypeError):
+        return None
+
+
+def _fact_should_fast_track(fact: ExtractedFact, today: date) -> bool:
+    """True iff fact has a valid_to_hint within FAST_TRACK_DAYS of today."""
+    expiry = _parse_valid_to_hint(fact.valid_to_hint)
+    if not expiry:
+        return False
+    return expiry <= today + timedelta(days=FAST_TRACK_DAYS)
+
+
+# ---------------------------------------------------------------------------
+# Main entry point
+# ---------------------------------------------------------------------------
+
+
+async def write_episode(
+    user_msg: str,
+    reply: str,
+    channel: str,
+    msg_id: str,
+    ts: str,
+    conn: sqlite3.Connection,
+    *,
+    embed_fn: Optional[Callable] = None,
+    extract_fn: Optional[Callable] = None,
+    failure_log_path: Optional[Path] = None,
+) -> Dict[str, Any]:
+    """Persist one Discord turn to the memory store.
+
+    Returns a summary dict for caller logging:
+      {episodes: 0|1|2, fast_tracked: N, stashed: N, skipped_extract: bool}
+
+    Never raises. Errors land in ``memory_write_failures.jsonl``.
+    """
+    summary: Dict[str, Any] = {
+        "episodes": 0,
+        "fast_tracked": 0,
+        "stashed": 0,
+        "skipped_extract": False,
+    }
+    skip_extract = channel in PHI_BLACKLIST_CHANNELS
+    summary["skipped_extract"] = skip_extract
+
+    try:
+        # ---- 1. extract (skip on PHI channel)
+        if skip_extract or not (extract_fn or kimi_extract):
+            facts: List[ExtractedFact] = []
+        else:
+            extractor = extract_fn or kimi_extract
+            try:
+                facts = await extractor(user_msg, reply, channel, ts)
+            except Exception as exc:
+                # Extract failure is non-fatal — we still record the
+                # raw episode so weekly_promotion can re-extract later.
+                logger.warning("kimi_extract failed; continuing without facts: %s", exc)
+                facts = []
+
+        # ---- 2. embed (raw turn + each fact text in one call)
+        embed = embed_fn
+        if embed is None:
+            from .embed import voyage_embed
+            embed = voyage_embed
+
+        texts_to_embed = [user_msg, reply] + [f.text for f in facts]
+        # Filter empty strings — Voyage rejects them.
+        non_empty = [(i, t) for i, t in enumerate(texts_to_embed) if t and t.strip()]
+        if non_empty:
+            indices, texts = zip(*non_empty)
+            blobs_dense = await embed(list(texts))
+            # Re-densify back to original positions; missing slots get None.
+            blobs: List[Optional[bytes]] = [None] * len(texts_to_embed)
+            for slot, blob in zip(indices, blobs_dense):
+                blobs[slot] = blob
+        else:
+            blobs = [None] * len(texts_to_embed)
+
+        user_blob, reply_blob = blobs[0], blobs[1]
+        fact_blobs = blobs[2:]
+
+        # ---- 3. partition facts into fast-track vs stash BEFORE INSERT
+        today = date.today()
+        fast_track: List[tuple] = []  # [(fact, blob), ...]
+        stashed: List[Dict[str, Any]] = []  # JSON-serialisable dicts
+        for f, blob in zip(facts, fact_blobs):
+            if _fact_should_fast_track(f, today):
+                if blob is not None:
+                    fast_track.append((f, blob))
+                else:
+                    # No embedding for this fact → can't insert into
+                    # semantic_facts (embedding is NOT NULL).  Demote to stash.
+                    stashed.append(f.raw or _fact_to_dict(f))
+            else:
+                stashed.append(f.raw or _fact_to_dict(f))
+
+        metadata = {"stashed_facts": stashed} if stashed else {}
+        metadata_json = json.dumps(metadata, ensure_ascii=False) if metadata else None
+
+        # ---- 4. INSERT episodes (atomic with fast-track inserts)
+        try:
+            conn.execute("BEGIN")
+            ep_rows = [
+                (ts, channel, msg_id + ":user", "user", user_msg, 0, user_blob, metadata_json),
+                (ts, channel, msg_id + ":asst", "assistant", reply, 0, reply_blob, metadata_json),
+            ]
+            for row in ep_rows:
+                cur = conn.execute(
+                    """
+                    INSERT INTO episodes
+                        (ts, channel, external_id, role, text, synthetic, embedding, metadata)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                    ON CONFLICT(channel, external_id) DO NOTHING
+                    """,
+                    row,
+                )
+                if cur.rowcount:
+                    summary["episodes"] += 1
+
+            # ---- 5. fast-track facts → semantic_facts (trigger mirrors to vec_facts)
+            for f, blob in fast_track:
+                conn.execute(
+                    """
+                    INSERT INTO semantic_facts
+                        (entity, fact, embedding, importance, valid_from, valid_to)
+                    VALUES (?, ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        f.entity,
+                        f.text,
+                        blob,
+                        f.importance,
+                        today.isoformat(),
+                        f.valid_to_hint,
+                    ),
+                )
+                summary["fast_tracked"] += 1
+
+            summary["stashed"] = len(stashed)
+            conn.commit()
+        except Exception:
+            conn.rollback()
+            raise
+
+        return summary
+
+    except Exception as exc:
+        logger.warning("write_episode failed for msg_id=%s: %s", msg_id, exc)
+        _append_failure(
+            {
+                "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
+                "channel": channel,
+                "msg_id": msg_id,
+                "user": user_msg,
+                "reply": reply,
+                "error": str(exc),
+                "summary_so_far": summary,
+            },
+            log_path=failure_log_path,
+        )
+        return summary
+
+
+def _fact_to_dict(f: ExtractedFact) -> Dict[str, Any]:
+    """Serialise an ExtractedFact for stashing in episodes.metadata."""
+    return {
+        "type": f.type,
+        "text": f.text,
+        "entity": f.entity,
+        "importance": f.importance,
+        "valid_to_hint": f.valid_to_hint,
+    }
diff --git a/plugins/memreview/__init__.py b/plugins/memreview/__init__.py
new file mode 100644
index 00000000000..d8794fe74db
--- /dev/null
+++ b/plugins/memreview/__init__.py
@@ -0,0 +1,227 @@
+"""``/memreview`` and ``/mem`` slash commands — admin / kill-switch (W3-4).
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §7.1.
+
+Two commands:
+
+  /memreview reject <digest_id>   - per-digest opt-out. Writes a sentinel
+                                    file ``pending_diffs/<digest_id>.rejected``
+                                    that ``weekly_apply`` reads on Monday
+                                    morning and archives the diff without
+                                    applying.
+
+  /mem off                        - global kill switch. Writes ``MEM_OFF``
+                                    in HERMES_HOME. Both ``write_episode``
+                                    (hot path) and ``weekly_promotion``
+                                    (cold path) check for this file at the
+                                    top of each call and short-circuit to
+                                    a no-op + warning log.
+
+  /mem on                         - reverses the kill switch by deleting
+                                    ``MEM_OFF`` (companion to /mem off).
+
+  /mem status                     - prints whether the kill switch is set
+                                    and lists pending diffs awaiting apply.
+
+Why slash commands and not Discord reactions: spec §7.1 explicitly chose
+slash because reactions don't reliably trigger webhook events across all
+bot adapters (silent kill-switch failure mode that's worse than no
+switch).
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+import re
+from pathlib import Path
+from typing import List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+def _resolve_hermes_home() -> Path:
+    try:
+        from hermes_constants import get_hermes_home
+        return Path(get_hermes_home())
+    except Exception:
+        return Path.home() / ".hermes"
+
+
+def _pending_dir() -> Path:
+    p = _resolve_hermes_home() / "memories" / "pending_diffs"
+    p.mkdir(parents=True, exist_ok=True)
+    return p
+
+
+def _archive_dir() -> Path:
+    return _resolve_hermes_home() / "memories" / "diff_archive"
+
+
+def mem_off_path() -> Path:
+    """The global kill-switch sentinel."""
+    return _resolve_hermes_home() / "MEM_OFF"
+
+
+def mem_off_active() -> bool:
+    """Public predicate consumed by promotion.py + provider.sync_turn."""
+    return mem_off_path().exists()
+
+
+# ---------------------------------------------------------------------------
+# /memreview <subcommand>
+# ---------------------------------------------------------------------------
+
+
+_MEMREVIEW_HELP = (
+    "**/memreview** — review or reject the weekly memory promotion digest.\n"
+    "Usage:\n"
+    "  `/memreview reject <digest_id>` — write the rejection sentinel; "
+    "Monday's apply will archive the diff without applying it.\n"
+    "  `/memreview pending` — list digests currently awaiting apply.\n"
+    "  `/memreview status` — same as `pending`."
+)
+
+
+_DIGEST_ID_RE = re.compile(r"^wk-\d{4}-\d{2}-\d{2}$")
+
+
+def _list_pending_diffs() -> List[str]:
+    out = []
+    for f in sorted(_pending_dir().glob("wk-*.json")):
+        rejected = f.with_suffix(".rejected").exists()
+        flag = " (rejected — will be archived Mon)" if rejected else ""
+        out.append(f"- `{f.stem}`{flag}")
+    return out
+
+
+def _handle_memreview(raw_args: str) -> str:
+    args = (raw_args or "").strip()
+    if not args:
+        return _MEMREVIEW_HELP
+
+    parts = args.split(maxsplit=1)
+    sub = parts[0].lower()
+
+    if sub in ("pending", "status", "list"):
+        items = _list_pending_diffs()
+        if not items:
+            return "**/memreview** — no pending diffs."
+        return "**/memreview** — pending diffs:\n" + "\n".join(items)
+
+    if sub == "reject":
+        rest = parts[1].strip() if len(parts) > 1 else ""
+        if not _DIGEST_ID_RE.match(rest):
+            return (
+                f"**/memreview reject** — digest_id must look like "
+                f"`wk-YYYY-MM-DD`. Got: `{rest!r}`"
+            )
+        diff_path = _pending_dir() / f"{rest}.json"
+        if not diff_path.exists():
+            return (
+                f"**/memreview reject** — no pending diff named `{rest}`. "
+                f"Use `/memreview pending` to list available digest_ids."
+            )
+        sentinel = _pending_dir() / f"{rest}.rejected"
+        try:
+            sentinel.write_text(
+                f"rejected via /memreview at {asyncio.get_event_loop().time()}",
+                encoding="utf-8",
+            )
+        except (OSError, RuntimeError):
+            # No running loop in some sync entry paths — write a static marker.
+            try:
+                sentinel.write_text("rejected", encoding="utf-8")
+            except OSError as exc:
+                return f"**/memreview reject** error: cannot write sentinel: `{exc}`"
+        return (
+            f"**Rejected.** Pending diff `{rest}` will be archived without "
+            f"applying. Episodes stay pending for next Sunday's review."
+        )
+
+    return _MEMREVIEW_HELP
+
+
+# ---------------------------------------------------------------------------
+# /mem <subcommand>
+# ---------------------------------------------------------------------------
+
+
+_MEM_HELP = (
+    "**/mem** — global memory write-back kill switch.\n"
+    "Usage:\n"
+    "  `/mem off`    — disable per-turn write-back AND weekly promotion.\n"
+    "  `/mem on`     — re-enable.\n"
+    "  `/mem status` — show whether the kill switch is currently set."
+)
+
+
+def _handle_mem(raw_args: str) -> str:
+    args = (raw_args or "").strip().lower()
+    if not args:
+        return _MEM_HELP
+
+    sub = args.split()[0]
+
+    if sub == "off":
+        try:
+            mem_off_path().write_text(
+                "set via /mem off\n", encoding="utf-8"
+            )
+        except OSError as exc:
+            return f"**/mem off** error: `{exc}`"
+        return (
+            "**🔇 Memory write-back disabled.**\n"
+            "Per-turn `write_episode` and weekly promotion will short-circuit "
+            "until you run `/mem on`. Read path is unaffected — Cattia still "
+            "retrieves from existing facts."
+        )
+
+    if sub == "on":
+        p = mem_off_path()
+        if not p.exists():
+            return "**/mem on** — write-back was already enabled."
+        try:
+            p.unlink()
+        except OSError as exc:
+            return f"**/mem on** error: `{exc}`"
+        return "**🔊 Memory write-back enabled.** Hot + cold paths resume."
+
+    if sub == "status":
+        active = mem_off_active()
+        pending = _list_pending_diffs()
+        lines = [
+            "**/mem status**",
+            f"  write-back: {'🔇 OFF' if active else '🔊 ON'}",
+            f"  MEM_OFF sentinel: `{mem_off_path()}`"
+            f" {'(present)' if active else '(absent)'}",
+        ]
+        if pending:
+            lines.append("  pending diffs:")
+            lines.extend("    " + p for p in pending)
+        else:
+            lines.append("  pending diffs: (none)")
+        return "\n".join(lines)
+
+    return _MEM_HELP
+
+
+# ---------------------------------------------------------------------------
+# Plugin registration
+# ---------------------------------------------------------------------------
+
+
+def register(ctx) -> None:
+    ctx.register_command(
+        "memreview",
+        handler=_handle_memreview,
+        description="Review or reject the weekly Hermes memory promotion digest.",
+        args_hint="reject <digest_id> | pending | status",
+    )
+    ctx.register_command(
+        "mem",
+        handler=_handle_mem,
+        description="Hermes memory kill switch (off / on / status).",
+        args_hint="off | on | status",
+    )
diff --git a/plugins/memreview/plugin.yaml b/plugins/memreview/plugin.yaml
new file mode 100644
index 00000000000..66252043f4b
--- /dev/null
+++ b/plugins/memreview/plugin.yaml
@@ -0,0 +1,4 @@
+name: memreview
+version: 0.1.0
+description: "/memreview reject + /mem kill switch — admin slash commands for the Hermes V3 memory system (W3-4)."
+author: "Li-yang Chen"
diff --git a/scripts/cron/README.md b/scripts/cron/README.md
new file mode 100644
index 00000000000..af5227bdc80
--- /dev/null
+++ b/scripts/cron/README.md
@@ -0,0 +1,19 @@
+# Hermes V3 cron scripts
+
+These scripts are invoked by hermes-agent's cron scheduler. The scheduler
+hardcodes `HERMES_HOME/scripts/` as the only path it will exec from
+(security: prevents arbitrary script execution via path traversal), so
+runtime copies must live at `~/.hermes/scripts/<name>.py` on each host.
+
+The canonical source lives here in version control. Deploy via:
+
+    cp scripts/cron/weekly_promotion.py ~/.hermes/scripts/
+    cp scripts/cron/weekly_apply.py ~/.hermes/scripts/
+
+Cron entries are added by adding rows to `~/.hermes/cron/jobs.json`
+(see the `Hermes Weekly Memory Promotion` / `Hermes Weekly Memory Apply`
+entries; expressions are in UTC — `0 19 * * 6` = Sun 03:00 UTC+8).
+
+Both scripts emit `{"wakeAgent": false}` as the last stdout line so the
+cron framework skips the agent run — delivery happens inside the script
+via Discord HTTP POST.
diff --git a/scripts/cron/weekly_apply.py b/scripts/cron/weekly_apply.py
new file mode 100755
index 00000000000..14d1a18550e
--- /dev/null
+++ b/scripts/cron/weekly_apply.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+"""Cron entry point: Mon 03:00 UTC+8 weekly memory apply.
+
+Loads the latest pending diff (purges any older than 14 days first),
+checks for a rejection sentinel file (written by /memreview reject),
+and either archives the diff as rejected or applies its
+promote / dedup / expire actions atomically and stamps
+``episodes.promoted_at`` on the candidate rows.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import sys
+
+sys.path.insert(0, "/opt/hermes")
+
+try:
+    from hermes_cli.env_loader import load_hermes_dotenv
+    load_hermes_dotenv(hermes_home="/opt/data", project_env=None)
+except Exception:
+    pass
+
+from plugins.memory.sqlite_vec.promotion import (  # noqa: E402
+    db_path,
+    weekly_apply,
+)
+from plugins.memory.sqlite_vec.store import open_db  # noqa: E402
+
+
+def main() -> int:
+    conn = open_db(db_path(), check_same_thread=False)
+    summary = asyncio.run(weekly_apply(conn))
+    print(json.dumps(summary, ensure_ascii=False, default=str))
+    print('{"wakeAgent": false}')
+    conn.close()
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/cron/weekly_promotion.py b/scripts/cron/weekly_promotion.py
new file mode 100755
index 00000000000..55d86d1aa00
--- /dev/null
+++ b/scripts/cron/weekly_promotion.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+"""Cron entry point: Sun 03:00 UTC+8 weekly memory promotion.
+
+Reads the last 7 days of pending episodes, runs one Kimi-thinking call to
+produce a promotion diff, persists the diff as
+~/.hermes/memories/pending_diffs/wk-YYYY-MM-DD.json, renders the digest
+markdown, and posts it to #memory-review for user review.
+
+Stdout ends with ``{"wakeAgent": false}`` so the cron framework skips
+the agent run after we've handled delivery ourselves.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import sys
+from pathlib import Path
+
+# The hermes container exposes the source tree at /opt/hermes but does not
+# add it to sys.path; cron exec'd scripts inherit nothing. Insert it
+# manually so plugin imports resolve.
+sys.path.insert(0, "/opt/hermes")
+
+# Load the user's .env so VOYAGE_API_KEY / DISCORD_BOT_TOKEN reach the
+# plugin code; mirrors what run_agent.py does at module import.
+try:
+    from hermes_cli.env_loader import load_hermes_dotenv
+    load_hermes_dotenv(hermes_home="/opt/data", project_env=None)
+except Exception:
+    pass
+
+from plugins.memory.sqlite_vec.promotion import (  # noqa: E402
+    db_path,
+    memory_review_channel_id,
+    weekly_promotion,
+)
+from plugins.memory.sqlite_vec.store import open_db  # noqa: E402
+
+
+def main() -> int:
+    conn = open_db(db_path(), check_same_thread=False)
+    channel_id = memory_review_channel_id()
+    summary = asyncio.run(
+        weekly_promotion(conn, discord_channel_id=channel_id)
+    )
+    # Print human-readable summary to stdout for cron logs.
+    print(json.dumps(summary, ensure_ascii=False, default=str))
+    # Wake-gate: skip the agent run.
+    print('{"wakeAgent": false}')
+    conn.close()
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/cutover/cutover.sh b/scripts/cutover/cutover.sh
new file mode 100755
index 00000000000..268e7848e85
--- /dev/null
+++ b/scripts/cutover/cutover.sh
@@ -0,0 +1,168 @@
+#!/usr/bin/env bash
+# W4-1 cutover script — run on chococlaw when you've decided to retire
+# MEMORY.md flat-file injection and commit fully to the sqlite_vec
+# memory plugin.
+#
+# Spec target date: 2026-05-24, *after* observing at least one successful
+# weekly review cycle on the new system.
+#
+# Idempotent — safe to re-run if interrupted partway.
+#
+# Usage:
+#   ./scripts/cutover/cutover.sh             # dry run, prints planned actions
+#   ./scripts/cutover/cutover.sh --commit    # actually do the work
+
+set -euo pipefail
+
+DRY_RUN=true
+if [[ "${1:-}" == "--commit" ]]; then
+  DRY_RUN=false
+fi
+
+today() { date -u +%Y-%m-%d; }
+say() { echo "[cutover] $*"; }
+do_or_say() {
+  if $DRY_RUN; then
+    say "(dry-run) $*"
+  else
+    say "$*"
+    eval "$@"
+  fi
+}
+
+HOME_DIR="${HERMES_HOME:-$HOME/.hermes}"
+say "HERMES_HOME = ${HOME_DIR}"
+
+# ---- 1. Pre-flight checks --------------------------------------------------
+
+say "1. Pre-flight checks"
+[[ -d "${HOME_DIR}/memories" ]] || { say "ERR no ${HOME_DIR}/memories"; exit 1; }
+[[ -f "${HOME_DIR}/memories/memory.db" ]] || { say "ERR no memory.db — W1 hasn't shipped"; exit 1; }
+say "  ✓ memory.db present"
+
+if ! command -v docker >/dev/null; then
+  say "WARN docker not on PATH — DB queries below will be skipped"
+fi
+
+# Confirm the new system has been writing recently (last 7 days).
+if command -v docker >/dev/null; then
+  ep_recent=$(docker exec hermes /opt/hermes/.venv/bin/python3 -c "
+import sqlite3
+conn = sqlite3.connect('/opt/data/memories/memory.db')
+n = conn.execute(\"SELECT count(*) FROM episodes WHERE ts > datetime('now','-7 days')\").fetchone()[0]
+print(n)
+" 2>/dev/null || echo 0)
+  if [[ "${ep_recent}" -lt 5 ]]; then
+    say "WARN only ${ep_recent} episodes in the last 7 days. Either the gateway"
+    say "     hasn't been used much OR the write path isn't actually firing."
+    say "     Fix that BEFORE cutover, or the new system has nothing to retrieve."
+  else
+    say "  ✓ ${ep_recent} episodes recorded in the last 7 days"
+  fi
+fi
+
+# ---- 2. Archive MEMORY.md --------------------------------------------------
+
+ARCHIVE_NAME="MEMORY.md.archive-$(today)"
+SRC="${HOME_DIR}/memories/MEMORY.md"
+DST="${HOME_DIR}/memories/${ARCHIVE_NAME}"
+
+say "2. Archive MEMORY.md → ${ARCHIVE_NAME}"
+if [[ ! -f "${SRC}" ]]; then
+  say "  - ${SRC} does not exist — already archived?"
+else
+  if [[ -f "${DST}" ]]; then
+    say "  - ${DST} already exists — refusing to overwrite"
+  else
+    do_or_say "mv '${SRC}' '${DST}'"
+    do_or_say "chmod 444 '${DST}'"
+  fi
+fi
+
+# ---- 3. config.yaml: confirm provider=sqlite_vec ---------------------------
+
+say "3. Confirm config.yaml memory.provider == sqlite_vec"
+cfg="${HOME_DIR}/config.yaml"
+if grep -qE '^[[:space:]]*provider:[[:space:]]*sqlite_vec' "${cfg}" 2>/dev/null; then
+  say "  ✓ already set to sqlite_vec"
+else
+  say "  - provider not set — please edit ${cfg} manually:"
+  say "      memory:"
+  say "        provider: sqlite_vec"
+fi
+
+# ---- 4. Disable legacy memory crons ----------------------------------------
+
+say "4. Disable legacy memory crons in jobs.json"
+do_or_say "/usr/bin/env python3 - <<'PY'
+import json, pathlib
+p = pathlib.Path('${HOME_DIR}/cron/jobs.json')
+if not p.exists():
+    print('  - no jobs.json'); raise SystemExit(0)
+data = json.loads(p.read_text())
+legacy_names = {
+    'Dimensions Memory Consolidation',
+    'Forgetting Curve (Monthly Archive)',
+    'Forgetting Curve',
+}
+changed = 0
+for j in data.get('jobs', []):
+    if j['name'] in legacy_names and j.get('enabled', False):
+        j['enabled'] = False
+        j['paused_at'] = '$(date -u +%Y-%m-%dT%H:%M:%SZ)'
+        j['paused_reason'] = 'W4 cutover — replaced by sqlite_vec weekly_promotion'
+        print(f'  ✓ disabled: {j[\"name\"]}')
+        changed += 1
+if changed:
+    p.write_text(json.dumps(data, indent=2, ensure_ascii=False))
+else:
+    print('  - no legacy jobs found (already disabled, or never installed)')
+PY"
+
+# ---- 5. Smoke test ---------------------------------------------------------
+
+say "5. Smoke test: provider initializes + retrieves"
+if command -v docker >/dev/null; then
+  do_or_say "docker exec hermes /opt/hermes/.venv/bin/python3 -c '
+from hermes_cli.env_loader import load_hermes_dotenv
+load_hermes_dotenv(hermes_home=\"/opt/data\", project_env=None)
+from agent.memory_manager import MemoryManager
+from plugins.memory import load_memory_provider
+mm = MemoryManager()
+mm.add_provider(load_memory_provider(\"sqlite_vec\"))
+mm.initialize_all(session_id=\"cutover-smoke\", platform=\"cli\", hermes_home=\"/opt/data\", agent_context=\"primary\")
+out = mm.prefetch_all(\"我太太生日\")
+print(\"prefetch returned:\", \"OK\" if out else \"EMPTY\")
+mm.shutdown_all()
+'"
+fi
+
+# ---- 6. Restart gateway ----------------------------------------------------
+
+say "6. Restart gateway to pick up any config changes"
+if command -v docker >/dev/null && [[ -d "${HOME}/Projects/hermes-agent" ]]; then
+  do_or_say "(cd ${HOME}/Projects/hermes-agent && docker compose restart gateway)"
+fi
+
+# ---- Done ------------------------------------------------------------------
+
+if $DRY_RUN; then
+  say ""
+  say "DRY RUN COMPLETE — no changes made. Re-run with --commit when ready."
+  say ""
+  say "After --commit, monitor for 24 hours via memory.log + #memory-review:"
+  say "  - tail -f ~/.hermes/logs/memory.log"
+  say "  - watch ~/.hermes/logs/memory_write_failures.jsonl size"
+  say "  - confirm next Sunday's digest fires"
+  say ""
+  say "Rollback procedure: docs/runbooks/memory-rollback.md §3"
+else
+  say ""
+  say "CUTOVER COMPLETE."
+  say "  Archive at: ${DST}"
+  say "  Legacy crons disabled in: ${HOME_DIR}/cron/jobs.json"
+  say "  Gateway restarted."
+  say ""
+  say "Monitor for 24 hours then sanity-check via:"
+  say "  docs/runbooks/memory-monitoring.md §6 (quick health check)"
+fi
diff --git a/scripts/import_md.py b/scripts/import_md.py
new file mode 100755
index 00000000000..86743a5d8eb
--- /dev/null
+++ b/scripts/import_md.py
@@ -0,0 +1,307 @@
+#!/usr/bin/env python3
+"""Seed `semantic_facts` from a flat ``MEMORY.md`` (W2-2).
+
+Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §6.1.
+
+Format expected in ``~/.hermes/memories/MEMORY.md``::
+
+    Topic: content
+    §
+    Topic: another content
+    §
+
+Each entry becomes one row in ``semantic_facts``:
+
+    entity      = "禮揚." + slug(topic)   # "Working style"           -> "禮揚.working_style"
+                                          # "Tools & Access > Proton" -> "禮揚.tools_access.proton"
+    fact        = content (verbatim)
+    importance  = 2
+    valid_from  = '2026-05-10'
+    valid_to    = NULL
+
+Idempotent: re-running with the same input does not duplicate rows. The
+natural key is ``(entity, fact)`` and is enforced by a pre-INSERT lookup.
+
+Embeddings come from Voyage 3.5-lite via ``plugins.memory.sqlite_vec.embed``.
+The trigger ``sf_after_insert`` keeps ``vec_facts`` synced automatically, so
+this script writes only to ``semantic_facts``.
+
+Usage::
+
+    docker exec -w /opt/hermes hermes /opt/hermes/.venv/bin/python3 \
+        scripts/import_md.py --dry-run
+    docker exec -w /opt/hermes hermes /opt/hermes/.venv/bin/python3 \
+        scripts/import_md.py --commit
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import logging
+import re
+import sqlite3
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_MEMORY_MD = Path.home() / ".hermes" / "memories" / "MEMORY.md"
+DEFAULT_DB = Path("/opt/data") / "memories" / "memory.db"
+DEFAULT_VALID_FROM = "2026-05-10"  # spec §6.1
+DEFAULT_IMPORTANCE = 2
+DEFAULT_BATCH = 128
+ENTITY_PREFIX = "禮揚"
+ENTRY_SEPARATOR = re.compile(r"^§\s*$", re.MULTILINE)
+
+
+@dataclass
+class Entry:
+    """One parsed MEMORY.md entry."""
+
+    topic: str
+    fact: str
+
+    @property
+    def entity(self) -> str:
+        return f"{ENTITY_PREFIX}.{slugify_topic(self.topic)}"
+
+
+# ---------------------------------------------------------------------------
+# Parsing
+# ---------------------------------------------------------------------------
+
+
+def slugify_topic(topic: str) -> str:
+    """Convert a human topic label to a stable entity-suffix slug.
+
+    - Hierarchy markers ``>`` become ``.`` so prefix queries still work.
+    - Lowercase, ASCII alphanum kept; runs of other chars collapse to ``_``.
+    - CJK / unicode is preserved unchanged so 中文 topics stay readable.
+
+    Examples:
+        "Working style"                  -> "working_style"
+        "Tools & Access > ProtonMail"   -> "tools_access.protonmail"
+        "禮揚.家庭"                       -> "禮揚.家庭"  (already a slug, untouched)
+    """
+    parts = [p.strip() for p in topic.split(">")]
+    out_parts = []
+    for part in parts:
+        s = part.strip().lower()
+        # Collapse non-alphanum (including '&', spaces, punctuation) to underscore.
+        # CJK characters are unicode word chars in Python regex with re.UNICODE
+        # (default for str patterns), so [^\w] excludes them = preserved.
+        s = re.sub(r"[^\w]+", "_", s, flags=re.UNICODE)
+        s = s.strip("_")
+        if s:
+            out_parts.append(s)
+    return ".".join(out_parts) if out_parts else "unknown"
+
+
+def parse_memory_md(text: str) -> List[Entry]:
+    """Split MEMORY.md into Entry objects.
+
+    Skips empty blocks and blocks with no ``Topic: content`` colon. Keeps
+    multi-line content (rare today but possible if a future entry wraps).
+    """
+    entries: List[Entry] = []
+    for raw_block in ENTRY_SEPARATOR.split(text):
+        block = raw_block.strip()
+        if not block:
+            continue
+        if ":" not in block:
+            logger.warning("skipping malformed block (no colon): %r", block[:60])
+            continue
+        topic, _, content = block.partition(":")
+        topic = topic.strip()
+        content = content.strip()
+        if not topic or not content:
+            logger.warning("skipping empty topic or content: %r", block[:60])
+            continue
+        entries.append(Entry(topic=topic, fact=content))
+    return entries
+
+
+# ---------------------------------------------------------------------------
+# DB ops
+# ---------------------------------------------------------------------------
+
+
+def existing_keys(conn: sqlite3.Connection) -> set[Tuple[str, str]]:
+    """Return the (entity, fact) pairs already present, for idempotency."""
+    rows = conn.execute("SELECT entity, fact FROM semantic_facts").fetchall()
+    return {(r[0], r[1]) for r in rows}
+
+
+def insert_batch(
+    conn: sqlite3.Connection,
+    rows: List[Tuple[Entry, bytes]],
+    *,
+    valid_from: str,
+    importance: int,
+) -> int:
+    """Insert one batch of (entry, embedding) pairs. Returns count inserted."""
+    cur = conn.executemany(
+        """
+        INSERT INTO semantic_facts(entity, fact, embedding,
+                                   importance, valid_from, valid_to)
+        VALUES (?, ?, ?, ?, ?, NULL)
+        """,
+        [
+            (e.entity, e.fact, blob, importance, valid_from)
+            for e, blob in rows
+        ],
+    )
+    return cur.rowcount
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+async def import_memory_md(
+    *,
+    md_path: Path,
+    db_path: Path,
+    dry_run: bool,
+    valid_from: str = DEFAULT_VALID_FROM,
+    importance: int = DEFAULT_IMPORTANCE,
+    batch_size: int = DEFAULT_BATCH,
+    embed_fn=None,  # injectable for tests
+) -> dict:
+    """Run the full import.
+
+    Returns a summary dict: {parsed, new, skipped_dup, batches, dry_run}.
+    Does not return embeddings.
+    """
+    text = md_path.read_text(encoding="utf-8")
+    entries = parse_memory_md(text)
+
+    # Open DB and bootstrap if needed (idempotent — store.init_db handles that).
+    from plugins.memory.sqlite_vec.store import init_db
+    conn = init_db(db_path)
+
+    have = existing_keys(conn)
+    new_entries = [e for e in entries if (e.entity, e.fact) not in have]
+    skipped = len(entries) - len(new_entries)
+
+    if dry_run:
+        print(f"[dry-run] parsed={len(entries)} new={len(new_entries)} "
+              f"already_present={skipped}")
+        for e in new_entries[:10]:
+            print(f"  + ({e.entity}) {e.fact[:80]!r}")
+        if len(new_entries) > 10:
+            print(f"  … and {len(new_entries) - 10} more")
+        return {
+            "parsed": len(entries),
+            "new": len(new_entries),
+            "skipped_dup": skipped,
+            "batches": 0,
+            "dry_run": True,
+        }
+
+    if not new_entries:
+        print(f"nothing to import (parsed={len(entries)}, all present)")
+        return {
+            "parsed": len(entries),
+            "new": 0,
+            "skipped_dup": skipped,
+            "batches": 0,
+            "dry_run": False,
+        }
+
+    # Embed in batches; default uses real Voyage, tests inject a stub.
+    if embed_fn is None:
+        from plugins.memory.sqlite_vec.embed import voyage_embed
+        embed_fn = voyage_embed
+
+    inserted = 0
+    batches = 0
+    try:
+        conn.execute("BEGIN")
+        for i in range(0, len(new_entries), batch_size):
+            chunk = new_entries[i : i + batch_size]
+            blobs = await embed_fn([e.fact for e in chunk])
+            if len(blobs) != len(chunk):
+                raise RuntimeError(
+                    f"embed returned {len(blobs)} for {len(chunk)} inputs"
+                )
+            inserted += insert_batch(
+                conn,
+                list(zip(chunk, blobs)),
+                valid_from=valid_from,
+                importance=importance,
+            )
+            batches += 1
+        conn.commit()
+    except Exception:
+        conn.rollback()
+        raise
+
+    print(
+        f"imported {inserted} entries in {batches} "
+        f"batch{'es' if batches != 1 else ''} "
+        f"(parsed={len(entries)}, skipped_dup={skipped})"
+    )
+    return {
+        "parsed": len(entries),
+        "new": inserted,
+        "skipped_dup": skipped,
+        "batches": batches,
+        "dry_run": False,
+    }
+
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(description=__doc__.split("\n")[0])
+    p.add_argument(
+        "--memory-md",
+        type=Path,
+        default=DEFAULT_MEMORY_MD,
+        help="Path to MEMORY.md (default: ~/.hermes/memories/MEMORY.md)",
+    )
+    p.add_argument(
+        "--db",
+        type=Path,
+        default=DEFAULT_DB,
+        help="Path to memory.db (default: /opt/data/memories/memory.db inside container)",
+    )
+    g = p.add_mutually_exclusive_group(required=True)
+    g.add_argument("--dry-run", action="store_true", help="Show plan, do not write")
+    g.add_argument("--commit", action="store_true", help="Actually import")
+    p.add_argument("--valid-from", default=DEFAULT_VALID_FROM)
+    p.add_argument("--importance", type=int, default=DEFAULT_IMPORTANCE)
+    return p
+
+
+def main(argv: Optional[List[str]] = None) -> int:
+    logging.basicConfig(
+        level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
+    )
+    args = _build_arg_parser().parse_args(argv)
+
+    # Live import path: ensure VOYAGE_API_KEY is loaded from ~/.hermes/.env.
+    if args.commit:
+        try:
+            from hermes_cli.env_loader import load_hermes_dotenv
+            load_hermes_dotenv(hermes_home="/opt/data", project_env=None)
+        except ImportError:
+            pass  # tests / non-container contexts handle env themselves
+
+    summary = asyncio.run(
+        import_memory_md(
+            md_path=args.memory_md,
+            db_path=args.db,
+            dry_run=args.dry_run,
+            valid_from=args.valid_from,
+            importance=args.importance,
+        )
+    )
+    return 0 if summary["new"] >= 0 else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/plugins/memory/test_extract.py b/tests/plugins/memory/test_extract.py
new file mode 100644
index 00000000000..45cf1d3bca1
--- /dev/null
+++ b/tests/plugins/memory/test_extract.py
@@ -0,0 +1,363 @@
+"""Tests for plugins/memory/sqlite_vec/extract.py (W3-1)."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import httpx
+import pytest
+
+from plugins.memory.sqlite_vec.extract import (
+    EXTRACT_MODEL,
+    EXTRACT_PROMPT,
+    PHI_BLACKLIST_CHANNELS,
+    ExtractError,
+    ExtractedFact,
+    _coerce_fact,
+    _parse_json_list,
+    kimi_extract,
+)
+
+
+# ---------------------------------------------------------------------------
+# Pure helpers
+# ---------------------------------------------------------------------------
+
+
+def test_extract_prompt_is_verbatim_spec_5_2():
+    """Spec §5.2 marks EXTRACT_PROMPT as a behavioural contract — verbatim."""
+    # Anchors that uniquely identify the spec's exact wording.
+    assert "You extract durable memories about 禮揚 from this Discord turn." in EXTRACT_PROMPT
+    assert "HARD RULES — these override everything else:" in EXTRACT_PROMPT
+    assert "ERR ON THE SIDE OF NOT EXTRACTING" in EXTRACT_PROMPT
+    assert "Skip facts that duplicate something said in the last 5 turns." in EXTRACT_PROMPT
+    # Placeholders must be preserved.
+    assert "{ts}" in EXTRACT_PROMPT and "{channel}" in EXTRACT_PROMPT
+    assert "{user}" in EXTRACT_PROMPT and "{assistant}" in EXTRACT_PROMPT
+
+
+def test_phi_blacklist_matches_spec_5_1():
+    assert PHI_BLACKLIST_CHANNELS == frozenset({"cmio", "cbme", "medicine"})
+
+
+def test_parse_json_list_bare_array():
+    assert _parse_json_list('[{"type":"semantic","text":"a"}]') == [
+        {"type": "semantic", "text": "a"}
+    ]
+
+
+def test_parse_json_list_wrapped_object():
+    assert _parse_json_list('{"facts": [{"type":"semantic","text":"a"}]}') == [
+        {"type": "semantic", "text": "a"}
+    ]
+    assert _parse_json_list('{"items": [{"type":"semantic","text":"b"}]}') == [
+        {"type": "semantic", "text": "b"}
+    ]
+
+
+def test_parse_json_list_empty_object_returns_empty_list():
+    assert _parse_json_list("{}") == []
+    assert _parse_json_list("") == []
+    assert _parse_json_list("not even json") == []
+
+
+def test_coerce_fact_drops_invalid_type():
+    assert _coerce_fact({"type": "garbage", "text": "a"}) is None
+    assert _coerce_fact({"type": "semantic"}) is None  # missing text
+    assert _coerce_fact({"type": "semantic", "text": "  "}) is None  # blank text
+
+
+def test_coerce_fact_clamps_importance():
+    f = _coerce_fact({"type": "semantic", "text": "a", "importance": 99})
+    assert f.importance == 5
+    f = _coerce_fact({"type": "semantic", "text": "a", "importance": -3})
+    assert f.importance == 1
+    f = _coerce_fact({"type": "semantic", "text": "a", "importance": "not-int"})
+    assert f.importance == 2  # default fallback
+
+
+def test_coerce_fact_round_trip_full_shape():
+    raw = {
+        "type": "semantic",
+        "text": "致妤 7:30 才到家",
+        "entity": "禮揚.家庭",
+        "importance": 3,
+        "valid_to_hint": "2026-05-03",
+    }
+    f = _coerce_fact(raw)
+    assert isinstance(f, ExtractedFact)
+    assert f.text == "致妤 7:30 才到家"
+    assert f.entity == "禮揚.家庭"
+    assert f.importance == 3
+    assert f.valid_to_hint == "2026-05-03"
+
+
+# ---------------------------------------------------------------------------
+# kimi_extract — short-circuits (no httpx call)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("channel", ["cmio", "cbme", "medicine"])
+def test_kimi_extract_phi_channel_returns_empty_no_call(channel, monkeypatch, tmp_path):
+    """Even with no API key, PHI channels never hit the network."""
+    monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False)
+    # Point auth.json lookup at an empty tmp dir so any leak would raise.
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    out = asyncio.run(
+        kimi_extract(
+            "病人的血壓 180/100",
+            "我建議轉診",
+            channel=channel,
+            ts="2026-05-02 09:00:00",
+        )
+    )
+    assert out == []
+
+
+def test_kimi_extract_empty_turn_returns_empty(monkeypatch, tmp_path):
+    monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False)
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    out = asyncio.run(
+        kimi_extract("", "", channel="cattia", ts="2026-05-02 09:00:00")
+    )
+    assert out == []
+
+
+# ---------------------------------------------------------------------------
+# kimi_extract — mocked synthetic.new responses
+# ---------------------------------------------------------------------------
+
+
+def _mock_synthetic_response(facts: list, *, status: int = 200):
+    """Build a synthetic.new chat-completions JSON body wrapping `facts`."""
+    body = {
+        "id": "test",
+        "choices": [
+            {
+                "message": {"role": "assistant", "content": json.dumps(facts)},
+                "finish_reason": "stop",
+            }
+        ],
+        "usage": {"prompt_tokens": 200, "completion_tokens": 80},
+    }
+    return status, body
+
+
+class _FakeTransport(httpx.MockTransport):
+    def __init__(self, status, body):
+        self.calls = []
+        self._status = status
+        self._body = body
+        super().__init__(self._h)
+
+    def _h(self, request: httpx.Request):
+        self.calls.append(request)
+        return httpx.Response(self._status, json=self._body)
+
+
+def test_kimi_extract_pleasantry_returns_empty_after_call(monkeypatch, tmp_path):
+    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    status, body = _mock_synthetic_response([])  # Kimi correctly returns []
+    transport = _FakeTransport(status, body)
+    client = httpx.AsyncClient(transport=transport)
+
+    out = asyncio.run(
+        kimi_extract(
+            "好的", "收到", channel="cattia", ts="2026-05-02 09:00:00",
+            client=client, log_path=tmp_path / "memory.log",
+        )
+    )
+    assert out == []
+    assert len(transport.calls) == 1
+    log_line = (tmp_path / "memory.log").read_text().strip()
+    assert '"cmd": "kimi_extract"' in log_line
+    assert '"n_kept": 0' in log_line
+
+
+def test_kimi_extract_short_lived_fact_with_valid_to_hint(monkeypatch, tmp_path):
+    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    facts = [
+        {
+            "type": "semantic",
+            "text": "致妤今晚 (2026-05-02) 預計 7:30 才到家",
+            "entity": "禮揚.家庭/今晚",
+            "importance": 3,
+            "valid_to_hint": "2026-05-03",
+        }
+    ]
+    transport = _FakeTransport(*_mock_synthetic_response(facts))
+    client = httpx.AsyncClient(transport=transport)
+
+    out = asyncio.run(
+        kimi_extract(
+            "今晚致妤會晚回來，大概 7:30 才到", "好喔",
+            channel="at-home", ts="2026-05-02 09:00:00",
+            client=client, log_path=tmp_path / "memory.log",
+        )
+    )
+    assert len(out) == 1
+    f = out[0]
+    assert f.type == "semantic"
+    assert "7:30" in f.text
+    assert f.valid_to_hint == "2026-05-03"
+    assert f.importance == 3
+
+
+def test_kimi_extract_long_lived_fact_no_valid_to(monkeypatch, tmp_path):
+    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    facts = [
+        {
+            "type": "semantic",
+            "text": "禮揚 最近在追 sleep medicine 的 RCT",
+            "entity": "禮揚.研究興趣",
+            "importance": 2,
+        }
+    ]
+    transport = _FakeTransport(*_mock_synthetic_response(facts))
+    client = httpx.AsyncClient(transport=transport)
+
+    out = asyncio.run(
+        kimi_extract(
+            "最近在追 sleep medicine", "了解，要幫你 follow up 嗎",
+            channel="cattia", ts="2026-05-02 09:00:00",
+            client=client, log_path=tmp_path / "memory.log",
+        )
+    )
+    assert len(out) == 1
+    assert out[0].valid_to_hint is None
+    assert out[0].entity == "禮揚.研究興趣"
+
+
+def test_kimi_extract_drops_malformed_rows(monkeypatch, tmp_path):
+    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    facts = [
+        {"type": "semantic", "text": "good fact"},
+        {"type": "garbage", "text": "bad type"},      # dropped
+        {"type": "episodic"},                           # missing text → dropped
+        {"type": "semantic", "text": "  "},             # blank text → dropped
+    ]
+    transport = _FakeTransport(*_mock_synthetic_response(facts))
+    client = httpx.AsyncClient(transport=transport)
+
+    out = asyncio.run(
+        kimi_extract(
+            "u", "a", channel="cattia", ts="2026-05-02 09:00:00",
+            client=client, log_path=tmp_path / "memory.log",
+        )
+    )
+    assert len(out) == 1
+    assert out[0].text == "good fact"
+
+
+def test_kimi_extract_5xx_raises_extracterror(monkeypatch, tmp_path):
+    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    transport = _FakeTransport(503, {"error": "down"})
+    client = httpx.AsyncClient(transport=transport)
+    with pytest.raises(ExtractError):
+        asyncio.run(
+            kimi_extract(
+                "u", "a", channel="cattia", ts="2026-05-02 09:00:00",
+                client=client, log_path=tmp_path / "memory.log",
+            )
+        )
+
+
+def test_kimi_extract_no_api_key_raises(monkeypatch, tmp_path):
+    monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False)
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )  # auth.json absent
+    with pytest.raises(ExtractError, match="API key"):
+        asyncio.run(
+            kimi_extract(
+                "u", "a", channel="cattia", ts="2026-05-02 09:00:00",
+                log_path=tmp_path / "memory.log",
+            )
+        )
+
+
+def test_kimi_extract_reads_auth_json_when_no_env(monkeypatch, tmp_path):
+    monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False)
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
+    )
+    auth = {
+        "credential_pool": {
+            "custom:synthetic": [
+                {"id": "test", "access_token": "syn_test_xxx"},
+            ]
+        }
+    }
+    (tmp_path / "auth.json").write_text(json.dumps(auth), encoding="utf-8")
+    transport = _FakeTransport(*_mock_synthetic_response([]))
+    client = httpx.AsyncClient(transport=transport)
+
+    out = asyncio.run(
+        kimi_extract(
+            "x", "y", channel="cattia", ts="2026-05-02 09:00:00",
+            client=client, log_path=tmp_path / "memory.log",
+        )
+    )
+    assert out == []
+    # The Authorization header carried the auth.json token.
+    assert transport.calls[0].headers["Authorization"] == "Bearer syn_test_xxx"
+
+
+
+# ===========================================================================
+# Additional parser shapes discovered during live smoke test
+# ===========================================================================
+
+
+def test_parse_json_list_extracted_memories_key():
+    """Kimi K2.5 with response_format=json_object often wraps the answer in
+    a dict with key 'extracted_memories' (sometimes alongside an 'analysis'
+    field showing its reasoning). Both must be parsed correctly."""
+    payload = (
+        '{"analysis": "the user mentions...", '
+        '"extracted_memories": [{"type":"semantic","text":"a"}]}'
+    )
+    out = _parse_json_list(payload)
+    assert out == [{"type": "semantic", "text": "a"}]
+
+
+def test_parse_json_list_bare_single_fact_dict():
+    """Kimi sometimes returns a single fact as a flat dict instead of a list.
+    We detect that shape by the presence of canonical fact keys."""
+    payload = (
+        '{"type": "episodic", "text": "致妤今晚 7:30", '
+        '"entity": "禮揚.家庭", "importance": 2}'
+    )
+    out = _parse_json_list(payload)
+    assert len(out) == 1
+    assert out[0]["text"] == "致妤今晚 7:30"
+
+
+def test_parse_json_list_arbitrary_dict_falls_back_to_first_list():
+    """If neither canonical keys nor fact-shape is present, the first
+    list-valued field is returned. Defensive against future Kimi changes."""
+    payload = '{"weird_unique_key": [{"type":"semantic","text":"x"}]}'
+    out = _parse_json_list(payload)
+    assert out == [{"type": "semantic", "text": "x"}]
diff --git a/tests/plugins/memory/test_promotion.py b/tests/plugins/memory/test_promotion.py
new file mode 100644
index 00000000000..6e452ba2132
--- /dev/null
+++ b/tests/plugins/memory/test_promotion.py
@@ -0,0 +1,397 @@
+"""Tests for plugins/memory/sqlite_vec/promotion.py (W3-3)."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import struct
+from datetime import date, timedelta
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from plugins.memory.sqlite_vec.promotion import (
+    PENDING_DIFF_TTL_DAYS,
+    PROMOTION_PROMPT,
+    WeekDigest,
+    _apply_diff_atomic,
+    _format_candidates_block,
+    _format_neighbors_block,
+    _purge_old_pending,
+    digest_id_for,
+    pending_path,
+    rejection_sentinel,
+    render_digest_markdown,
+    weekly_apply,
+    weekly_promotion,
+)
+from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
+
+
+def _vec(seed: int) -> bytes:
+    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
+    return struct.pack(f"{VEC_DIM}b", *vals)
+
+
+# ---------------------------------------------------------------------------
+# Prompt + format helpers
+# ---------------------------------------------------------------------------
+
+
+def test_prompt_has_required_placeholders():
+    """The prompt is .format()'d with these keys; missing any breaks promotion."""
+    for key in ("{digest_id}", "{today}", "{week_label}",
+                "{candidates_block}", "{neighbors_block}"):
+        assert key in PROMOTION_PROMPT, f"missing placeholder: {key}"
+
+
+def test_prompt_carries_hard_rules():
+    assert "病歷號" in PROMOTION_PROMPT
+    assert "DROP_AS_NOISE" in PROMOTION_PROMPT
+    assert "PROMOTE" in PROMOTION_PROMPT
+    assert "DEDUP_HIT" in PROMOTION_PROMPT
+    assert "EXPIRE" in PROMOTION_PROMPT
+
+
+def test_format_candidates_block_marks_synthetic():
+    cands = [
+        {"id": 1, "ts": "2026-05-02 09:00", "channel": "cattia",
+         "role": "user", "synthetic": False, "text": "hello",
+         "stashed_facts": [{"text": "禮揚 likes X", "entity": "禮揚.訓練",
+                            "importance": 2, "valid_to_hint": None}]},
+        {"id": 2, "ts": "2026-05-02 09:00", "channel": "cron",
+         "role": "assistant", "synthetic": True, "text": "cron output",
+         "stashed_facts": []},
+    ]
+    out = _format_candidates_block(cands)
+    assert "👤" in out and "🤖" in out
+    assert "↳ stashed:" in out
+
+
+def test_format_neighbors_block_truncates_to_top_5():
+    neighbors = {
+        "topic": [
+            {"id": i, "fact": f"fact {i}", "entity": "x", "sim": 0.9 - i * 0.01}
+            for i in range(10)
+        ]
+    }
+    out = _format_neighbors_block(neighbors)
+    # Only 5 should appear.
+    assert out.count("#") == 5
+
+
+# ---------------------------------------------------------------------------
+# digest_id + path helpers
+# ---------------------------------------------------------------------------
+
+
+def test_digest_id_format():
+    assert digest_id_for(date(2026, 5, 11)) == "wk-2026-05-11"
+
+
+# ---------------------------------------------------------------------------
+# WeekDigest
+# ---------------------------------------------------------------------------
+
+
+def test_week_digest_round_trip():
+    raw = {
+        "digest_id": "wk-2026-05-10",
+        "candidate_episode_ids": [1, 2, 3],
+        "promote": [{"entity": "禮揚.家庭", "fact": "x", "importance": 3}],
+        "dedup_hits": [{"existing_fact_id": 5, "action": "bump_hits"}],
+        "expire": [{"existing_fact_id": 7, "valid_to": "2026-05-10"}],
+        "drop_as_noise": [{"episode_ids": [4], "reason": "pleasantry"}],
+    }
+    d = WeekDigest.from_dict(raw)
+    assert d.digest_id == "wk-2026-05-10"
+    assert d.to_dict()["candidate_episode_ids"] == [1, 2, 3]
+
+
+# ---------------------------------------------------------------------------
+# render_digest_markdown
+# ---------------------------------------------------------------------------
+
+
+def test_render_digest_markdown_full_shape():
+    candidates = [
+        {"id": 1, "ts": "x", "channel": "c", "role": "user",
+         "synthetic": False, "text": "u", "stashed_facts": []},
+        {"id": 2, "ts": "x", "channel": "cron", "role": "user",
+         "synthetic": True, "text": "u", "stashed_facts": []},
+    ]
+    d = WeekDigest.from_dict({
+        "digest_id": "wk-2026-05-10",
+        "candidate_episode_ids": [1, 2],
+        "promote": [{"entity": "禮揚.家庭", "fact": "致妤生日 3/19",
+                     "importance": 5, "valid_to": None,
+                     "source_episode_ids": [1]}],
+        "dedup_hits": [{"existing_fact_id": 5, "action": "bump_hits",
+                        "source_episode_ids": [2]}],
+        "expire": [{"existing_fact_id": 7, "valid_to": "2026-05-10",
+                    "reason": "stale"}],
+        "drop_as_noise": [{"episode_ids": [3], "reason": "好的"}],
+    })
+    md = render_digest_markdown(d, candidates)
+    assert "Weekly Memory Review — 2026-05-10" in md
+    assert "(1 user/assistant + 1 cron-synthetic)" in md
+    assert "/memreview reject wk-2026-05-10" in md
+    assert "⬆️ Promote to permanent (1)" in md
+    assert "🔁 Dedup confirmations (1)" in md
+    assert "🪦 Expiring (1)" in md
+    assert "🗑️ Skipped as noise (1)" in md
+    assert "致妤生日 3/19" in md
+    assert "valid_to: 永久" in md  # null valid_to
+
+
+def test_render_digest_empty_sections_collapse():
+    d = WeekDigest.from_dict({"digest_id": "wk-2026-05-10",
+                              "candidate_episode_ids": []})
+    md = render_digest_markdown(d, [])
+    assert "_No actions this week._" in md
+
+
+# ---------------------------------------------------------------------------
+# weekly_promotion (mocked Kimi)
+# ---------------------------------------------------------------------------
+
+
+def _seed_episodes(conn, today_iso: str = "2026-05-02 12:00:00"):
+    """Add 2 fixture episodes with stashed_facts."""
+    conn.execute(
+        "INSERT INTO episodes(ts, channel, external_id, role, text, synthetic, metadata) "
+        "VALUES (?, ?, ?, ?, ?, ?, ?)",
+        (today_iso, "cattia", "m1:user", "user", "我下週要去日本", 0,
+         json.dumps({"stashed_facts": [
+             {"type": "semantic", "text": "禮揚下週去日本", "entity": "禮揚.家庭",
+              "importance": 3, "valid_to_hint": "2026-05-11"}]})),
+    )
+    conn.execute(
+        "INSERT INTO episodes(ts, channel, external_id, role, text) "
+        "VALUES (?, ?, ?, ?, ?)",
+        (today_iso, "cattia", "m1:asst", "assistant", "好的", ),
+    )
+    conn.commit()
+
+
+def test_weekly_promotion_no_candidates(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    db = init_db(tmp_path / "m.db")
+    summary = asyncio.run(weekly_promotion(db))
+    assert summary["candidates"] == 0
+    assert "skipped" in summary
+
+
+def test_weekly_promotion_dry_run_returns_markdown(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    db = init_db(tmp_path / "m.db")
+    _seed_episodes(db)
+
+    async def fake_kimi(prompt):
+        # Sanity: prompt was actually formatted, not left with placeholders.
+        assert "{digest_id}" not in prompt
+        return {
+            "promote": [{"entity": "禮揚.家庭", "fact": "下週去日本",
+                         "importance": 3, "valid_to": "2026-05-11",
+                         "source_episode_ids": [1]}],
+            "dedup_hits": [], "expire": [], "drop_as_noise": [],
+        }
+
+    async def fake_embed(texts):
+        return [_vec(50) for _ in texts]
+
+    summary = asyncio.run(weekly_promotion(
+        db, dry_run=True, kimi_fn=fake_kimi,
+        embed_fn=fake_embed,
+    ))
+    assert summary["candidates"] == 2
+    assert summary["promote"] == 1
+    assert summary["dry_run"] is True
+    assert "markdown_preview" in summary
+    assert "下週去日本" in summary["markdown_preview"]
+    # Dry-run MUST NOT persist a pending diff or post to Discord.
+    assert not (tmp_path / "memories" / "pending_diffs").exists() or \
+           not list((tmp_path / "memories" / "pending_diffs").glob("*.json"))
+
+
+def test_weekly_promotion_persists_diff_on_real_run(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    db = init_db(tmp_path / "m.db")
+    _seed_episodes(db)
+
+    async def fake_kimi(prompt):
+        return {
+            "promote": [], "dedup_hits": [], "expire": [],
+            "drop_as_noise": [{"episode_ids": [1, 2], "reason": "no signal"}],
+        }
+
+    summary = asyncio.run(weekly_promotion(
+        db, dry_run=False, kimi_fn=fake_kimi,
+    ))
+    # Diff was written, even with no Discord channel configured.
+    files = list((tmp_path / "memories" / "pending_diffs").glob("*.json"))
+    assert len(files) == 1
+    diff = json.loads(files[0].read_text())
+    assert diff["candidate_episode_ids"] == [1, 2]
+
+
+# ---------------------------------------------------------------------------
+# weekly_apply
+# ---------------------------------------------------------------------------
+
+
+def test_weekly_apply_no_pending_diff(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    db = init_db(tmp_path / "m.db")
+    summary = asyncio.run(weekly_apply(db))
+    assert summary["applied"] is False
+    assert "no pending diff" in summary.get("reason", "")
+
+
+def test_weekly_apply_rejection_sentinel_archives_without_apply(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    db = init_db(tmp_path / "m.db")
+
+    digest_id = "wk-2026-05-02"
+    pending_path(digest_id).write_text(json.dumps({
+        "digest_id": digest_id, "candidate_episode_ids": [],
+        "promote": [], "dedup_hits": [], "expire": [], "drop_as_noise": [],
+    }))
+    rejection_sentinel(digest_id).write_text("rejected", encoding="utf-8")
+
+    summary = asyncio.run(weekly_apply(db))
+    assert summary["applied"] is False
+    assert summary["reason"] == "rejected"
+    # Diff moved to archive_dir, sentinel removed.
+    assert not pending_path(digest_id).exists()
+    assert not rejection_sentinel(digest_id).exists()
+    archive = list((tmp_path / "memories" / "diff_archive").glob("*.rejected.json"))
+    assert len(archive) == 1
+
+
+def test_weekly_apply_promotes_inserts_and_stamps(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    db = init_db(tmp_path / "m.db")
+    _seed_episodes(db)
+
+    digest_id = "wk-2026-05-02"
+    pending_path(digest_id).write_text(json.dumps({
+        "digest_id": digest_id,
+        "candidate_episode_ids": [1, 2],
+        "promote": [{"entity": "禮揚.家庭", "fact": "下週去日本",
+                     "importance": 3, "valid_from": "2026-05-02",
+                     "valid_to": "2026-05-11", "source_episode_ids": [1]}],
+        "dedup_hits": [], "expire": [], "drop_as_noise": [],
+    }))
+
+    async def fake_embed(texts):
+        return [_vec(50) for _ in texts]
+
+    summary = asyncio.run(weekly_apply(db, embed_fn=fake_embed))
+    assert summary["applied"] is True
+    assert summary["promoted"] == 1
+    assert summary["stamped"] == 2
+    # New row in semantic_facts.
+    [(sf,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall()
+    assert sf == 1
+    # Trigger mirrored into vec_facts.
+    [(vf,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall()
+    assert vf == 1
+    # Episodes stamped.
+    rows = db.execute("SELECT id, promoted_at FROM episodes ORDER BY id").fetchall()
+    assert all(r["promoted_at"] is not None for r in rows)
+    # Diff moved to archive.
+    archive = list((tmp_path / "memories" / "diff_archive").glob("*.applied.json"))
+    assert len(archive) == 1
+
+
+def test_weekly_apply_dedup_bump_increments_hits(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    db = init_db(tmp_path / "m.db")
+    db.execute(
+        "INSERT INTO semantic_facts(fact, embedding, hits) VALUES (?, ?, ?)",
+        ("禮揚 likes X", _vec(10), 0),
+    )
+    db.commit()
+
+    digest_id = "wk-2026-05-02"
+    pending_path(digest_id).write_text(json.dumps({
+        "digest_id": digest_id, "candidate_episode_ids": [],
+        "promote": [], "dedup_hits": [
+            {"existing_fact_id": 1, "action": "bump_hits",
+             "source_episode_ids": []}
+        ], "expire": [], "drop_as_noise": [],
+    }))
+
+    summary = asyncio.run(weekly_apply(db))
+    assert summary["dedup_bumped"] == 1
+    [(hits,)] = db.execute("SELECT hits FROM semantic_facts WHERE id=1").fetchall()
+    assert hits == 1
+
+
+def test_weekly_apply_expire_sets_valid_to(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    db = init_db(tmp_path / "m.db")
+    db.execute(
+        "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
+        ("禮揚 watches paper X", _vec(10)),
+    )
+    db.commit()
+
+    digest_id = "wk-2026-05-02"
+    pending_path(digest_id).write_text(json.dumps({
+        "digest_id": digest_id, "candidate_episode_ids": [],
+        "promote": [], "dedup_hits": [],
+        "expire": [{"existing_fact_id": 1, "valid_to": "2026-05-02",
+                    "reason": "stale"}],
+        "drop_as_noise": [],
+    }))
+
+    summary = asyncio.run(weekly_apply(db, today=date(2026, 5, 2)))
+    assert summary["expired"] == 1
+    [(vt,)] = db.execute("SELECT valid_to FROM semantic_facts WHERE id=1").fetchall()
+    assert vt == "2026-05-02"
+
+
+def test_weekly_apply_purges_old_pending(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
+    )
+    db = init_db(tmp_path / "m.db")
+
+    today = date(2026, 5, 2)
+    old = today - timedelta(days=PENDING_DIFF_TTL_DAYS + 5)
+    fresh = today - timedelta(days=2)
+
+    pending_path(f"wk-{old.isoformat()}").write_text("{}")
+    pending_path(f"wk-{fresh.isoformat()}").write_text(json.dumps({
+        "digest_id": f"wk-{fresh.isoformat()}", "candidate_episode_ids": [],
+        "promote": [], "dedup_hits": [], "expire": [], "drop_as_noise": [],
+    }))
+
+    summary = asyncio.run(weekly_apply(db, today=today))
+    assert summary["purged"] == 1
+    # Old gone, fresh applied + archived.
+    assert not pending_path(f"wk-{old.isoformat()}").exists()
+    archive = list((tmp_path / "memories" / "diff_archive").glob("*.applied.json"))
+    assert len(archive) == 1
diff --git a/tests/plugins/memory/test_sqlite_vec_provider.py b/tests/plugins/memory/test_sqlite_vec_provider.py
new file mode 100644
index 00000000000..9f8e8d438d1
--- /dev/null
+++ b/tests/plugins/memory/test_sqlite_vec_provider.py
@@ -0,0 +1,556 @@
+"""Tests for the sqlite_vec memory provider plugin (W1 scope: schema only).
+
+Covers:
+  • bootstrap_schema is idempotent (re-running does not error or duplicate)
+  • all 3 tables + 4 indexes + 1 virtual table + 3 triggers exist
+  • semantic_facts defaults work (created_at, valid_from, importance)
+  • vec0 virtual table answers MATCH queries with k=N prefilter
+  • triggers keep vec_facts synced with semantic_facts (insert/update/delete)
+  • SqliteVecMemoryProvider.is_available() / initialize() / shutdown() round-trip
+"""
+
+from __future__ import annotations
+
+import struct
+from pathlib import Path
+
+import pytest
+
+from plugins.memory.sqlite_vec import SqliteVecMemoryProvider
+from plugins.memory.sqlite_vec.store import (
+    VEC_DIM,
+    bootstrap_schema,
+    init_db,
+    open_db,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _vec(seed: int) -> bytes:
+    """Make a deterministic 512-d int8 BLOB for testing.
+
+    int8 matches the locked decision in spec §1.4 (Voyage 3.5-lite, 512-dim, int8).
+    seed is the base value (clamped to int8 range) with a small per-dim offset
+    so different seeds produce different vectors but the same seed reproduces.
+    """
+    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
+    return struct.pack(f"{VEC_DIM}b", *vals)
+
+
+# ---------------------------------------------------------------------------
+# Schema bootstrap
+# ---------------------------------------------------------------------------
+
+
+def test_bootstrap_creates_all_objects(tmp_path):
+    db = init_db(tmp_path / "memory.db")
+
+    table_names = {
+        row[0]
+        for row in db.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
+        )
+    }
+    assert "episodes" in table_names
+    assert "semantic_facts" in table_names
+
+    index_names = {
+        row[0]
+        for row in db.execute(
+            "SELECT name FROM sqlite_master WHERE type='index' AND name NOT LIKE 'sqlite_%'"
+        )
+    }
+    assert "idx_episodes_ts" in index_names
+    assert "idx_episodes_promoted_pending" in index_names
+    assert "idx_facts_entity" in index_names
+    assert "idx_facts_active" in index_names
+
+    trigger_names = {
+        row[0] for row in db.execute("SELECT name FROM sqlite_master WHERE type='trigger'")
+    }
+    assert "sf_after_insert" in trigger_names
+    assert "sf_after_update_embedding" in trigger_names
+    assert "sf_after_delete" in trigger_names
+
+    # vec0 virtual table is registered as a regular table internally
+    [(vec_count,)] = db.execute(
+        "SELECT count(*) FROM sqlite_master WHERE name='vec_facts'"
+    ).fetchall()
+    assert vec_count >= 1
+
+
+def test_bootstrap_is_idempotent(tmp_path):
+    path = tmp_path / "memory.db"
+    db = init_db(path)
+    bootstrap_schema(db)  # second time
+    bootstrap_schema(db)  # third time
+    # If we got here without error and tables still query, idempotency holds.
+    db.execute("SELECT count(*) FROM episodes").fetchone()
+    db.execute("SELECT count(*) FROM semantic_facts").fetchone()
+
+
+# ---------------------------------------------------------------------------
+# Defaults
+# ---------------------------------------------------------------------------
+
+
+def test_semantic_facts_defaults_are_populated(tmp_path):
+    db = init_db(tmp_path / "memory.db")
+    db.execute(
+        "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
+        ("禮揚 likes Starting Strength method", _vec(10)),
+    )
+    db.commit()
+
+    row = db.execute(
+        "SELECT importance, state, valid_from, valid_to, created_at FROM semantic_facts"
+    ).fetchone()
+    assert row["importance"] == 2
+    assert row["state"] == "active"
+    assert row["valid_from"] is not None  # default = date('now')
+    assert row["valid_to"] is None
+    assert row["created_at"] is not None
+
+
+def test_role_check_constraint(tmp_path):
+    db = init_db(tmp_path / "memory.db")
+    with pytest.raises(Exception):
+        db.execute(
+            "INSERT INTO episodes(ts, channel, external_id, role, text) "
+            "VALUES (datetime('now'), 'cattia', 'msg-1', 'system', 'hi')"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Trigger sync between semantic_facts and vec_facts
+# ---------------------------------------------------------------------------
+
+
+def test_triggers_sync_insert_update_delete(tmp_path):
+    db = init_db(tmp_path / "memory.db")
+
+    # INSERT
+    db.execute(
+        "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
+        ("fact A", _vec(50)),
+    )
+    db.commit()
+    [(count_after_insert,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall()
+    assert count_after_insert == 1
+
+    # UPDATE embedding
+    [fact_id] = db.execute("SELECT id FROM semantic_facts").fetchone()
+    new_vec = _vec(90)
+    db.execute("UPDATE semantic_facts SET embedding=? WHERE id=?", (new_vec, fact_id))
+    db.commit()
+    [(after_update,)] = db.execute(
+        "SELECT count(*) FROM vec_facts WHERE fact_id=?", (fact_id,)
+    ).fetchall()
+    assert after_update == 1
+
+    # DELETE
+    db.execute("DELETE FROM semantic_facts WHERE id=?", (fact_id,))
+    db.commit()
+    [(count_after_delete,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall()
+    assert count_after_delete == 0
+
+
+# ---------------------------------------------------------------------------
+# vec0 retrieval
+# ---------------------------------------------------------------------------
+
+
+def test_vec0_match_returns_nearest(tmp_path):
+    db = init_db(tmp_path / "memory.db")
+    for seed, fact in [(10, "alpha"), (50, "beta"), (90, "gamma")]:
+        db.execute(
+            "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
+            (fact, _vec(seed)),
+        )
+    db.commit()
+
+    query = _vec(51)
+    rows = db.execute(
+        "SELECT fact_id, distance FROM vec_facts WHERE embedding MATCH vec_int8(?) AND k = 2",
+        (query,),
+    ).fetchall()
+    assert len(rows) == 2
+    # Closest must be the seed=0.5 row (beta)
+    closest_fact_id = rows[0]["fact_id"]
+    closest_fact = db.execute(
+        "SELECT fact FROM semantic_facts WHERE id=?", (closest_fact_id,)
+    ).fetchone()["fact"]
+    assert closest_fact == "beta"
+
+
+# ---------------------------------------------------------------------------
+# MemoryProvider lifecycle
+# ---------------------------------------------------------------------------
+
+
+def test_provider_lifecycle(tmp_path):
+    p = SqliteVecMemoryProvider()
+    assert p.name == "sqlite_vec"
+    assert p.is_available() is True
+    p.initialize(session_id="t1", hermes_home=str(tmp_path))
+    assert (tmp_path / "memories" / "memory.db").exists()
+    assert p.prefetch("test query") == ""  # W1: no-op
+    assert p.sync_turn("hi", "hello") is None  # W1: no-op
+    assert p.get_tool_schemas() == []
+    p.shutdown()
+
+
+
+# ===========================================================================
+# W2-1: voyage_embed (mocked) + read_memory + bump_hits + format_facts
+# ===========================================================================
+
+import asyncio
+import sqlite3
+from unittest.mock import patch
+
+import httpx
+import pytest
+
+from plugins.memory.sqlite_vec.embed import (
+    VOYAGE_BATCH,
+    VOYAGE_DIM,
+    VoyageError,
+    voyage_embed,
+)
+from plugins.memory.sqlite_vec.read import (
+    Fact,
+    bump_hits,
+    format_facts_for_prompt,
+    read_memory,
+)
+
+
+def _fake_voyage_response(texts):
+    """Build a fake Voyage JSON body where each embedding is dim=512 of zeros
+    except the first cell which carries the input index. Lets us round-trip
+    the input ordering through _to_int8_blob."""
+    return {
+        "data": [
+            {"index": i, "embedding": [(i % 200) - 100] + [0] * (VOYAGE_DIM - 1)}
+            for i, _ in enumerate(texts)
+        ]
+    }
+
+
+class _MockTransport(httpx.MockTransport):
+    """httpx mock that records call count and returns programmable responses."""
+
+    def __init__(self, responses):
+        self.calls = []
+        self._responses = list(responses)
+        super().__init__(self._handler)
+
+    def _handler(self, request: httpx.Request) -> httpx.Response:
+        self.calls.append(request)
+        status, body = self._responses.pop(0)
+        if isinstance(body, dict):
+            return httpx.Response(status, json=body)
+        return httpx.Response(status, text=body)
+
+
+# ---------------------------------------------------------------------------
+# voyage_embed
+# ---------------------------------------------------------------------------
+
+
+def test_voyage_embed_success(monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    texts = ["hello", "world", "禮揚"]
+    transport = _MockTransport([(200, _fake_voyage_response(texts))])
+    client = httpx.AsyncClient(transport=transport)
+
+    blobs = asyncio.run(voyage_embed(texts, client=client))
+
+    assert len(blobs) == len(texts)
+    for b in blobs:
+        assert len(b) == VOYAGE_DIM
+    # First byte encodes the (signed) index value we baked into the fake response.
+    assert blobs[0][0] == (-100) & 0xFF  # input index 0 -> -100 -> unsigned 156
+    assert blobs[1][0] == (-99) & 0xFF
+    assert len(transport.calls) == 1
+
+
+def test_voyage_embed_batches_at_128(monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    texts = [f"t{i}" for i in range(200)]  # > VOYAGE_BATCH=128
+    # 2 calls: first 128, then 72.
+    transport = _MockTransport(
+        [
+            (200, _fake_voyage_response(texts[:VOYAGE_BATCH])),
+            (200, _fake_voyage_response(texts[VOYAGE_BATCH:])),
+        ]
+    )
+    client = httpx.AsyncClient(transport=transport)
+
+    blobs = asyncio.run(voyage_embed(texts, client=client))
+    assert len(blobs) == 200
+    assert len(transport.calls) == 2
+
+
+def test_voyage_embed_retries_on_5xx(monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    texts = ["only"]
+    transport = _MockTransport(
+        [
+            (502, "bad gateway"),
+            (503, "still bad"),
+            (200, _fake_voyage_response(texts)),
+        ]
+    )
+    client = httpx.AsyncClient(transport=transport)
+
+    # Patch sleep to avoid real backoff delay.
+    with patch("plugins.memory.sqlite_vec.embed.asyncio.sleep", return_value=None):
+        blobs = asyncio.run(voyage_embed(texts, client=client))
+
+    assert len(blobs) == 1
+    assert len(transport.calls) == 3
+
+
+def test_voyage_embed_4xx_raises(monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    transport = _MockTransport([(401, "unauthorized")])
+    client = httpx.AsyncClient(transport=transport)
+    with pytest.raises(VoyageError):
+        asyncio.run(voyage_embed(["x"], client=client))
+
+
+def test_voyage_embed_missing_key(monkeypatch):
+    monkeypatch.delenv("VOYAGE_API_KEY", raising=False)
+    with pytest.raises(VoyageError, match="VOYAGE_API_KEY"):
+        asyncio.run(voyage_embed(["x"]))
+
+
+def test_voyage_embed_empty_input_no_call(monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    # No transport responses queued; if we make a call the test will explode.
+    transport = _MockTransport([])
+    client = httpx.AsyncClient(transport=transport)
+    blobs = asyncio.run(voyage_embed([], client=client))
+    assert blobs == []
+    assert len(transport.calls) == 0
+
+
+# ---------------------------------------------------------------------------
+# read_memory + bump_hits
+# ---------------------------------------------------------------------------
+
+
+def _seed_facts(db: sqlite3.Connection):
+    """Insert 3 facts at known created_at + int8 vectors that put 'beta' nearest to seed=51."""
+    rows = [
+        # fact text,   entity,         created_at,             vec seed
+        ("alpha",      "禮揚.工作",     "2026-04-01 09:00:00",   10),
+        ("beta",       "禮揚.家庭",     "2026-05-02 09:00:00",   50),
+        ("gamma",      None,           "2025-12-01 09:00:00",   90),
+        ("expired",    "禮揚.短期",     "2026-05-01 09:00:00",   50),
+    ]
+    for fact, entity, created_at, seed in rows:
+        db.execute(
+            "INSERT INTO semantic_facts(fact, entity, embedding, created_at, valid_to) "
+            "VALUES (?, ?, ?, ?, ?)",
+            (fact, entity, _vec(seed), created_at,
+             "2026-01-01" if fact == "expired" else None),
+        )
+    db.commit()
+
+
+def test_read_memory_orders_by_score(tmp_path, monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    db = init_db(tmp_path / "memory.db")
+    _seed_facts(db)
+
+    # Stub voyage_embed to return a fixed query vector close to seed=51.
+    async def fake_embed(texts, **kw):
+        assert len(texts) == 1
+        return [_vec(51)]
+
+    log_file = tmp_path / "memory.log"
+    with patch("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
+        facts = asyncio.run(read_memory("test query", db, k=8, log_path=log_file))
+
+    fact_texts = [f.fact for f in facts]
+    # 'expired' must be filtered (valid_to in past).
+    assert "expired" not in fact_texts
+    # 'beta' should rank first (closest vec, recent).
+    assert fact_texts[0] == "beta"
+    # All Fact fields populated.
+    assert all(isinstance(f, Fact) for f in facts)
+    assert all(f.score is not None and f.sim is not None for f in facts)
+    # Latency was logged.
+    assert log_file.exists()
+    log_line = log_file.read_text().strip().splitlines()[-1]
+    assert '"sql_ms"' in log_line and '"q": "test query"' in log_line
+
+
+def test_bump_hits_increments_and_swallows(tmp_path, monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    db = init_db(tmp_path / "memory.db")
+    _seed_facts(db)
+    ids = [r["id"] for r in db.execute("SELECT id FROM semantic_facts ORDER BY id").fetchall()]
+
+    asyncio.run(bump_hits(ids[:2], db))
+    rows = db.execute(
+        "SELECT id, hits, last_seen FROM semantic_facts ORDER BY id"
+    ).fetchall()
+    assert rows[0]["hits"] == 1 and rows[1]["hits"] == 1
+    assert rows[2]["hits"] == 0  # untouched
+    assert rows[0]["last_seen"] is not None
+
+    # Closed connection -> bump_hits must swallow the sqlite3.Error.
+    db.close()
+    asyncio.run(bump_hits(ids[:1], db))  # should not raise
+
+
+def test_bump_hits_empty_is_noop(tmp_path):
+    db = init_db(tmp_path / "memory.db")
+    # Should return immediately without touching the connection.
+    asyncio.run(bump_hits([], db))
+
+
+def test_format_facts_for_prompt_shape():
+    facts = [
+        Fact(id=1, fact="禮揚 likes 5x5", entity="禮揚.訓練",
+             created_at="2026-05-01", importance=2, sim=0.8, age_days=1.0, score=0.9),
+        Fact(id=2, fact="致妤生日 3/19", entity=None,
+             created_at="2026-04-01", importance=3, sim=0.7, age_days=30.0, score=0.6),
+    ]
+    out = format_facts_for_prompt(facts)
+    assert "[禮揚.訓練] 禮揚 likes 5x5" in out
+    assert "- 致妤生日 3/19" in out  # no entity prefix when None
+    assert format_facts_for_prompt([]) == ""
+
+
+
+# ===========================================================================
+# W2-3: prefetch + sync_turn wiring
+# ===========================================================================
+
+from unittest.mock import patch as _patch_w23
+
+from plugins.memory.sqlite_vec import (
+    PREFETCH_TIMEOUT_S,
+    RECALL_HEADER,
+    SqliteVecMemoryProvider,
+    _run_coro_in_thread,
+)
+
+
+def _stubbed_provider(tmp_path, monkeypatch, query_seed: int = 51):
+    """Build a provider with a real DB, real conn, but stubbed Voyage."""
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    p = SqliteVecMemoryProvider()
+    p.initialize(session_id="t", hermes_home=str(tmp_path))
+    # Seed 3 facts via the same trigger-driven pipeline used in production.
+    for fact, ent, ts, seed in [
+        ("alpha", "禮揚.工作", "2026-04-01 09:00:00", 10),
+        ("beta",  "禮揚.家庭", "2026-05-02 09:00:00", 50),
+        ("gamma", None,        "2025-12-01 09:00:00", 90),
+    ]:
+        p._conn.execute(
+            "INSERT INTO semantic_facts(fact, entity, embedding, created_at) VALUES (?,?,?,?)",
+            (fact, ent, _vec(seed), ts),
+        )
+    p._conn.commit()
+
+    async def fake_embed(texts, **kw):
+        return [_vec(query_seed) for _ in texts]
+
+    return p, fake_embed
+
+
+def test_prefetch_returns_markdown_with_header(tmp_path, monkeypatch):
+    p, fake_embed = _stubbed_provider(tmp_path, monkeypatch)
+    with _patch_w23("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
+        out = p.prefetch("when does my wife arrive home", session_id="s1")
+    assert out.startswith(RECALL_HEADER + "\n")
+    # Top fact 'beta' (seed=50) is closest to query (seed=51).
+    assert "beta" in out
+    # with_meta=True format includes importance + age.
+    assert "(importance:" in out and "days)" in out
+    # Fact ids cached for sync_turn to bump.
+    assert p._last_fact_ids["s1"]
+    p.shutdown()
+
+
+def test_prefetch_empty_query_no_op(tmp_path, monkeypatch):
+    p, fake_embed = _stubbed_provider(tmp_path, monkeypatch)
+    # No patch needed — should short-circuit before voyage_embed is reached.
+    assert p.prefetch("", session_id="s1") == ""
+    assert p.prefetch("   ", session_id="s1") == ""
+    assert "s1" not in p._last_fact_ids
+    p.shutdown()
+
+
+def test_prefetch_swallows_voyage_error(tmp_path, monkeypatch):
+    p, _ = _stubbed_provider(tmp_path, monkeypatch)
+
+    async def raise_embed(texts, **kw):
+        raise RuntimeError("voyage 503")
+
+    with _patch_w23("plugins.memory.sqlite_vec.read.voyage_embed", raise_embed):
+        out = p.prefetch("anything", session_id="s1")
+    assert out == ""  # Reply is never blocked on memory-recall failure.
+    assert "s1" not in p._last_fact_ids
+    p.shutdown()
+
+
+def test_sync_turn_bumps_hits_then_clears_cache(tmp_path, monkeypatch):
+    p, fake_embed = _stubbed_provider(tmp_path, monkeypatch)
+    with _patch_w23("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
+        p.prefetch("query", session_id="s1")
+    cached_ids = list(p._last_fact_ids["s1"])
+    assert cached_ids
+
+    p.sync_turn("user said hi", "asst replied", session_id="s1")
+    # Cache cleared
+    assert "s1" not in p._last_fact_ids
+    # Hits incremented for exactly the cached IDs.
+    placeholders = ",".join("?" * len(cached_ids))
+    rows = p._conn.execute(
+        f"SELECT id, hits FROM semantic_facts WHERE id IN ({placeholders}) ORDER BY id",
+        cached_ids,
+    ).fetchall()
+    assert all(r["hits"] == 1 for r in rows), [(r["id"], r["hits"]) for r in rows]
+
+    # Second sync_turn for same session is a no-op (cache empty).
+    p.sync_turn("u", "a", session_id="s1")
+    rows2 = p._conn.execute(
+        f"SELECT hits FROM semantic_facts WHERE id IN ({placeholders})", cached_ids
+    ).fetchall()
+    assert all(r["hits"] == 1 for r in rows2)
+    p.shutdown()
+
+
+def test_run_coro_in_thread_timeout():
+    import asyncio as _asyncio
+
+    async def slow():
+        await _asyncio.sleep(2.0)
+        return "ok"
+
+    import pytest
+    with pytest.raises(TimeoutError):
+        _run_coro_in_thread(slow, timeout=0.05)
+
+
+def test_format_with_meta_shape():
+    facts = [
+        Fact(id=1, fact="致妤生日 3/19", entity="禮揚.家庭",
+             created_at="2026-05-01", importance=3, sim=0.7,
+             age_days=5.4, score=0.6),
+    ]
+    out = format_facts_for_prompt(facts, with_meta=True)
+    assert "(importance: 3, age: 5 days)" in out
+    out_compact = format_facts_for_prompt(facts, with_meta=False)
+    assert "importance" not in out_compact
diff --git a/tests/plugins/memory/test_write.py b/tests/plugins/memory/test_write.py
new file mode 100644
index 00000000000..5bf2462739b
--- /dev/null
+++ b/tests/plugins/memory/test_write.py
@@ -0,0 +1,322 @@
+"""Tests for plugins/memory/sqlite_vec/write.py (W3-2)."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import struct
+from datetime import date, timedelta
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from plugins.memory.sqlite_vec.extract import ExtractedFact
+from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
+from plugins.memory.sqlite_vec.write import (
+    FAST_TRACK_DAYS,
+    _fact_should_fast_track,
+    _parse_valid_to_hint,
+    write_episode,
+)
+
+
+def _vec(seed: int) -> bytes:
+    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
+    return struct.pack(f"{VEC_DIM}b", *vals)
+
+
+def _stub_embed_factory():
+    """Returns (stub, call_log) — stub yields deterministic int8 blobs."""
+    calls = []
+
+    async def stub(texts):
+        calls.append(list(texts))
+        return [_vec(10 + i) for i in range(len(texts))]
+
+    return stub, calls
+
+
+def _stub_extract_factory(facts: list):
+    async def stub(user, asst, channel, ts):
+        return list(facts)
+
+    return stub
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def test_parse_valid_to_hint():
+    assert _parse_valid_to_hint("2026-05-03") == date(2026, 5, 3)
+    assert _parse_valid_to_hint("not-a-date") is None
+    assert _parse_valid_to_hint("") is None
+    assert _parse_valid_to_hint(None) is None
+
+
+def test_fact_should_fast_track_threshold():
+    today = date(2026, 5, 2)
+    f_in = ExtractedFact(type="semantic", text="x", entity=None, importance=2,
+                         valid_to_hint=(today + timedelta(days=10)).isoformat())
+    f_edge = ExtractedFact(type="semantic", text="x", entity=None, importance=2,
+                           valid_to_hint=(today + timedelta(days=FAST_TRACK_DAYS)).isoformat())
+    f_out = ExtractedFact(type="semantic", text="x", entity=None, importance=2,
+                          valid_to_hint=(today + timedelta(days=60)).isoformat())
+    f_none = ExtractedFact(type="semantic", text="x", entity=None, importance=2,
+                           valid_to_hint=None)
+    assert _fact_should_fast_track(f_in, today) is True
+    assert _fact_should_fast_track(f_edge, today) is True
+    assert _fact_should_fast_track(f_out, today) is False
+    assert _fact_should_fast_track(f_none, today) is False
+
+
+# ---------------------------------------------------------------------------
+# write_episode — happy paths
+# ---------------------------------------------------------------------------
+
+
+def _bootstrap_db(tmp_path):
+    return init_db(tmp_path / "m.db")
+
+
+def test_writes_two_episode_rows_per_turn(tmp_path):
+    db = _bootstrap_db(tmp_path)
+    embed, calls = _stub_embed_factory()
+    extract = _stub_extract_factory([])
+
+    summary = asyncio.run(write_episode(
+        user_msg="hello", reply="hi back",
+        channel="cattia", msg_id="m1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=embed, extract_fn=extract,
+        failure_log_path=tmp_path / "fail.jsonl",
+    ))
+
+    assert summary["episodes"] == 2
+    assert summary["fast_tracked"] == 0 and summary["stashed"] == 0
+    rows = db.execute(
+        "SELECT role, channel, external_id, text FROM episodes ORDER BY id"
+    ).fetchall()
+    assert [r["role"] for r in rows] == ["user", "assistant"]
+    assert rows[0]["external_id"] == "m1:user"
+    assert rows[1]["external_id"] == "m1:asst"
+    # Single embed call covered both turn texts (no fact texts).
+    assert len(calls) == 1
+    assert calls[0] == ["hello", "hi back"]
+
+
+def test_phi_channel_records_episode_but_skips_extract(tmp_path):
+    db = _bootstrap_db(tmp_path)
+    embed, calls = _stub_embed_factory()
+
+    def extract_should_not_be_called(*a, **kw):
+        raise AssertionError("extract called for PHI channel")
+
+    summary = asyncio.run(write_episode(
+        user_msg="病人 [姓名] 血壓 180/100", reply="建議轉診",
+        channel="cmio", msg_id="phi-1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=embed, extract_fn=extract_should_not_be_called,
+        failure_log_path=tmp_path / "fail.jsonl",
+    ))
+
+    assert summary["skipped_extract"] is True
+    assert summary["episodes"] == 2
+    assert summary["fast_tracked"] == 0 and summary["stashed"] == 0
+    rows = db.execute("SELECT count(*) FROM episodes").fetchone()
+    assert rows[0] == 2  # raw episode rows still recorded
+
+
+def test_idempotent_on_duplicate_msg_id(tmp_path):
+    """Re-running with the same msg_id collapses via ON CONFLICT."""
+    db = _bootstrap_db(tmp_path)
+    embed, _ = _stub_embed_factory()
+    extract = _stub_extract_factory([])
+
+    args = dict(
+        user_msg="x", reply="y", channel="cattia",
+        msg_id="dup-1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=embed, extract_fn=extract,
+        failure_log_path=tmp_path / "fail.jsonl",
+    )
+    asyncio.run(write_episode(**args))
+    summary2 = asyncio.run(write_episode(**args))
+    assert summary2["episodes"] == 0  # nothing new inserted
+    [(count,)] = db.execute("SELECT count(*) FROM episodes").fetchall()
+    assert count == 2
+
+
+# ---------------------------------------------------------------------------
+# Fast-track vs stash partitioning
+# ---------------------------------------------------------------------------
+
+
+def test_short_lived_fact_fast_tracks_to_semantic_facts(tmp_path):
+    db = _bootstrap_db(tmp_path)
+    embed, _ = _stub_embed_factory()
+    today = date.today()
+    extract = _stub_extract_factory([
+        ExtractedFact(
+            type="semantic",
+            text="致妤今晚 7:30 才到家",
+            entity="禮揚.家庭",
+            importance=3,
+            valid_to_hint=(today + timedelta(days=1)).isoformat(),
+        ),
+    ])
+
+    summary = asyncio.run(write_episode(
+        user_msg="今晚致妤 7:30 才到", reply="了解",
+        channel="at-home", msg_id="m1", ts="2026-05-02 17:00:00",
+        conn=db, embed_fn=embed, extract_fn=extract,
+        failure_log_path=tmp_path / "fail.jsonl",
+    ))
+
+    assert summary["fast_tracked"] == 1
+    assert summary["stashed"] == 0
+    [(sf_count,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall()
+    assert sf_count == 1
+    [(vf_count,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall()
+    assert vf_count == 1  # trigger mirrored the row
+    row = db.execute(
+        "SELECT entity, fact, importance, valid_from, valid_to FROM semantic_facts"
+    ).fetchone()
+    assert row["entity"] == "禮揚.家庭"
+    assert row["valid_to"] == (today + timedelta(days=1)).isoformat()
+
+
+def test_long_lived_fact_stashes_in_episode_metadata(tmp_path):
+    db = _bootstrap_db(tmp_path)
+    embed, _ = _stub_embed_factory()
+    extract = _stub_extract_factory([
+        ExtractedFact(
+            type="semantic",
+            text="禮揚 likes Starting Strength",
+            entity="禮揚.訓練",
+            importance=2,
+            valid_to_hint=None,  # permanent → stash
+        ),
+    ])
+
+    summary = asyncio.run(write_episode(
+        user_msg="我練 SS 一年了", reply="酷",
+        channel="cattia", msg_id="m1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=embed, extract_fn=extract,
+        failure_log_path=tmp_path / "fail.jsonl",
+    ))
+
+    assert summary["stashed"] == 1
+    assert summary["fast_tracked"] == 0
+    [(sf_count,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall()
+    assert sf_count == 0  # nothing fast-tracked
+    metadata_rows = db.execute(
+        "SELECT metadata FROM episodes WHERE metadata IS NOT NULL"
+    ).fetchall()
+    assert len(metadata_rows) == 2  # both user + assistant rows carry the same metadata
+    md = json.loads(metadata_rows[0]["metadata"])
+    assert md["stashed_facts"][0]["text"] == "禮揚 likes Starting Strength"
+    assert md["stashed_facts"][0]["entity"] == "禮揚.訓練"
+
+
+def test_mixed_facts_partition_correctly(tmp_path):
+    db = _bootstrap_db(tmp_path)
+    embed, _ = _stub_embed_factory()
+    today = date.today()
+    extract = _stub_extract_factory([
+        ExtractedFact(
+            type="semantic", text="short",
+            entity="禮揚.短期", importance=2,
+            valid_to_hint=(today + timedelta(days=2)).isoformat(),
+        ),
+        ExtractedFact(
+            type="semantic", text="long",
+            entity="禮揚.長期", importance=3,
+            valid_to_hint=None,
+        ),
+    ])
+
+    summary = asyncio.run(write_episode(
+        user_msg="u", reply="a", channel="cattia",
+        msg_id="m1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=embed, extract_fn=extract,
+        failure_log_path=tmp_path / "fail.jsonl",
+    ))
+
+    assert summary["fast_tracked"] == 1
+    assert summary["stashed"] == 1
+
+
+# ---------------------------------------------------------------------------
+# Failure path
+# ---------------------------------------------------------------------------
+
+
+def test_embed_failure_appends_to_jsonl(tmp_path):
+    db = _bootstrap_db(tmp_path)
+
+    async def failing_embed(texts):
+        raise RuntimeError("voyage exploded")
+
+    extract = _stub_extract_factory([])
+    fail_log = tmp_path / "fail.jsonl"
+
+    summary = asyncio.run(write_episode(
+        user_msg="u", reply="a", channel="cattia",
+        msg_id="m1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=failing_embed, extract_fn=extract,
+        failure_log_path=fail_log,
+    ))
+
+    # Caller never sees the exception.
+    assert summary["episodes"] == 0  # rolled back
+    [(ep_count,)] = db.execute("SELECT count(*) FROM episodes").fetchall()
+    assert ep_count == 0
+    # Failure record landed in the JSONL.
+    assert fail_log.exists()
+    line = json.loads(fail_log.read_text().strip().splitlines()[-1])
+    assert line["channel"] == "cattia"
+    assert line["msg_id"] == "m1"
+    assert "voyage exploded" in line["error"]
+
+
+def test_extract_failure_still_records_episode(tmp_path):
+    """If kimi_extract raises, we still land the raw episode rows. The
+    weekly_promotion (W3-3) can re-extract from the raw text later."""
+    db = _bootstrap_db(tmp_path)
+    embed, _ = _stub_embed_factory()
+
+    async def failing_extract(*a, **kw):
+        raise RuntimeError("synthetic.new 503")
+
+    summary = asyncio.run(write_episode(
+        user_msg="u", reply="a", channel="cattia",
+        msg_id="m1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=embed, extract_fn=failing_extract,
+        failure_log_path=tmp_path / "fail.jsonl",
+    ))
+    assert summary["episodes"] == 2
+    assert summary["fast_tracked"] == 0
+    assert summary["stashed"] == 0
+
+
+def test_empty_turn_records_no_rows(tmp_path):
+    """Both user_msg and reply blank → no work done, no embed call."""
+    db = _bootstrap_db(tmp_path)
+
+    embed_called = []
+
+    async def embed(texts):
+        embed_called.append(texts)
+        return []
+
+    extract = _stub_extract_factory([])
+    summary = asyncio.run(write_episode(
+        user_msg="", reply="", channel="cattia",
+        msg_id="m1", ts="2026-05-02 09:00:00",
+        conn=db, embed_fn=embed, extract_fn=extract,
+        failure_log_path=tmp_path / "fail.jsonl",
+    ))
+    # No embed call (both texts empty), but the schema accepts NULL embeddings
+    # for episodes so we still INSERT 2 rows.
+    assert embed_called == []
+    assert summary["episodes"] == 2
diff --git a/tests/plugins/test_memdebug.py b/tests/plugins/test_memdebug.py
new file mode 100644
index 00000000000..65380e271c4
--- /dev/null
+++ b/tests/plugins/test_memdebug.py
@@ -0,0 +1,175 @@
+"""Tests for plugins/memdebug/ — /memdebug slash command (W2-4)."""
+
+from __future__ import annotations
+
+import asyncio
+import struct
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
+from plugins.memdebug import (
+    HELP_TEXT,
+    _do_rawsearch,
+    _do_semantic,
+    _format_facts_block,
+    _handle_async,
+    _handle_memdebug,
+    _truncate,
+)
+
+
+def _vec(seed: int) -> bytes:
+    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
+    return struct.pack(f"{VEC_DIM}b", *vals)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def test_truncate_short_string_unchanged():
+    assert _truncate("abc", 10) == "abc"
+
+
+def test_truncate_long_string_ellipsis():
+    out = _truncate("a" * 100, 10)
+    assert out.endswith("…") and len(out) == 10
+
+
+# ---------------------------------------------------------------------------
+# Help / empty / unknown args
+# ---------------------------------------------------------------------------
+
+
+def test_handle_empty_returns_help():
+    assert _handle_memdebug("") == HELP_TEXT
+    assert _handle_memdebug("   ") == HELP_TEXT
+
+
+def test_handle_rawsearch_empty_returns_help():
+    assert _handle_memdebug("rawsearch") == HELP_TEXT
+    assert _handle_memdebug("rawsearch   ") == HELP_TEXT
+
+
+# ---------------------------------------------------------------------------
+# Semantic / rawsearch via direct async helpers (so we control DB path)
+# ---------------------------------------------------------------------------
+
+
+def _seed_db(tmp_path):
+    """Seed a fixture memory.db on tmp_path and return its path."""
+    db_path = tmp_path / "memories" / "memory.db"
+    conn = init_db(db_path)
+    conn.execute(
+        "INSERT INTO semantic_facts(fact, entity, embedding, created_at) VALUES (?,?,?,?)",
+        ("致妤生日 3/19", "禮揚.家庭", _vec(50), "2026-05-02 09:00:00"),
+    )
+    conn.execute(
+        "INSERT INTO semantic_facts(fact, entity, embedding, created_at) VALUES (?,?,?,?)",
+        ("AI as digital twin", "禮揚.工作", _vec(60), "2026-05-01 09:00:00"),
+    )
+    conn.execute(
+        "INSERT INTO episodes(ts, channel, external_id, role, text) "
+        "VALUES (?, ?, ?, ?, ?)",
+        ("2026-05-02 17:00:00", "cattia", "msg-1", "user", "晚餐幾點開"),
+    )
+    conn.commit()
+    conn.close()
+    return db_path
+
+
+def test_do_semantic_returns_score_breakdown(tmp_path, monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    db_path = _seed_db(tmp_path)
+
+    async def fake_embed(texts, **kw):
+        return [_vec(51) for _ in texts]
+
+    with patch("plugins.memdebug.DEFAULT_DB", db_path), \
+         patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"), \
+         patch("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
+        out = asyncio.run(_do_semantic("when does my wife get home"))
+
+    assert "/memdebug" in out
+    assert "致妤生日 3/19" in out  # closest fact
+    # Score breakdown labels present.
+    assert "score=" in out and "sim=" in out and "age=" in out
+    # Reaction prompt present (until rich-embed UX lands).
+    assert "👍" in out and "👎" in out
+    # Log line written.
+    log_path = tmp_path / "memory.log"
+    assert log_path.exists()
+    last_line = log_path.read_text().strip().splitlines()[-1]
+    assert '"cmd": "memdebug"' in last_line
+
+
+def test_do_semantic_db_missing_returns_friendly_message(tmp_path, monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    missing = tmp_path / "absent.db"
+    with patch("plugins.memdebug.DEFAULT_DB", missing):
+        out = asyncio.run(_do_semantic("anything"))
+    assert "not yet initialised" in out
+
+
+def test_do_rawsearch_finds_substring(tmp_path):
+    db_path = _seed_db(tmp_path)
+    with patch("plugins.memdebug.DEFAULT_DB", db_path), \
+         patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"):
+        out = asyncio.run(_do_rawsearch("晚餐"))
+    assert "rawsearch" in out
+    assert "晚餐幾點開" in out
+    assert "cattia/user" in out
+
+
+def test_do_rawsearch_empty_episodes_message(tmp_path):
+    db_path = tmp_path / "memories" / "memory.db"
+    init_db(db_path).close()  # bootstrap schema, no rows
+    with patch("plugins.memdebug.DEFAULT_DB", db_path), \
+         patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"):
+        out = asyncio.run(_do_rawsearch("anything"))
+    assert "rawsearch" in out
+    assert "Episodes are written by W3" in out
+
+
+# ---------------------------------------------------------------------------
+# Sync entry point + register()
+# ---------------------------------------------------------------------------
+
+
+def test_handle_memdebug_sync_dispatches_semantic(tmp_path, monkeypatch):
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+    db_path = _seed_db(tmp_path)
+
+    async def fake_embed(texts, **kw):
+        return [_vec(51) for _ in texts]
+
+    with patch("plugins.memdebug.DEFAULT_DB", db_path), \
+         patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"), \
+         patch("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
+        out = _handle_memdebug("when does my wife get home")
+    assert "致妤生日" in out
+
+
+def test_register_calls_register_command():
+    """register(ctx) must call ctx.register_command with the right name."""
+    from plugins.memdebug import register
+
+    captured = {}
+
+    class FakeCtx:
+        def register_command(self, name, handler, description="", args_hint=""):
+            captured["name"] = name
+            captured["handler"] = handler
+            captured["args_hint"] = args_hint
+            captured["description"] = description
+
+    register(FakeCtx())
+    assert captured["name"] == "memdebug"
+    assert captured["args_hint"] == "<query> | rawsearch <query>"
+    assert callable(captured["handler"])
+    # The handler must accept a single positional argument (raw_args).
+    assert captured["handler"].__code__.co_argcount == 1
diff --git a/tests/plugins/test_memreview.py b/tests/plugins/test_memreview.py
new file mode 100644
index 00000000000..f20e7341790
--- /dev/null
+++ b/tests/plugins/test_memreview.py
@@ -0,0 +1,272 @@
+"""Tests for plugins/memreview/ — /memreview reject + /mem kill switch (W3-4)."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import struct
+from datetime import date
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
+from plugins.memreview import (
+    _MEMREVIEW_HELP,
+    _MEM_HELP,
+    _handle_mem,
+    _handle_memreview,
+    mem_off_active,
+    mem_off_path,
+    register,
+)
+
+
+def _vec(seed: int) -> bytes:
+    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
+    return struct.pack(f"{VEC_DIM}b", *vals)
+
+
+# ---------------------------------------------------------------------------
+# /memreview help / pending
+# ---------------------------------------------------------------------------
+
+
+def test_memreview_empty_returns_help():
+    assert _handle_memreview("") == _MEMREVIEW_HELP
+    assert _handle_memreview("   ") == _MEMREVIEW_HELP
+
+
+def test_memreview_pending_no_diffs(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    out = _handle_memreview("pending")
+    assert "no pending diffs" in out
+
+
+def test_memreview_pending_lists_diffs(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    pdir = tmp_path / "memories" / "pending_diffs"
+    pdir.mkdir(parents=True)
+    (pdir / "wk-2026-05-02.json").write_text("{}")
+    (pdir / "wk-2026-05-09.json").write_text("{}")
+    (pdir / "wk-2026-05-09.rejected").write_text("rejected")
+
+    out = _handle_memreview("pending")
+    assert "wk-2026-05-02" in out
+    assert "wk-2026-05-09" in out
+    # Rejected one carries a flag.
+    assert "(rejected — will be archived Mon)" in out
+
+
+# ---------------------------------------------------------------------------
+# /memreview reject
+# ---------------------------------------------------------------------------
+
+
+def test_memreview_reject_invalid_digest_id(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    out = _handle_memreview("reject not-a-digest")
+    assert "must look like" in out
+
+
+def test_memreview_reject_unknown_digest(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    out = _handle_memreview("reject wk-2026-05-02")
+    assert "no pending diff" in out
+
+
+def test_memreview_reject_writes_sentinel(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    pdir = tmp_path / "memories" / "pending_diffs"
+    pdir.mkdir(parents=True)
+    diff_path = pdir / "wk-2026-05-02.json"
+    diff_path.write_text("{}")
+
+    out = _handle_memreview("reject wk-2026-05-02")
+    assert "Rejected." in out
+    sentinel = pdir / "wk-2026-05-02.rejected"
+    assert sentinel.exists()
+    assert "rejected" in sentinel.read_text().lower()
+
+
+# ---------------------------------------------------------------------------
+# /mem off / on / status
+# ---------------------------------------------------------------------------
+
+
+def test_mem_off_creates_sentinel(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    out = _handle_mem("off")
+    assert "disabled" in out
+    assert mem_off_path().exists()
+    assert mem_off_active() is True
+
+
+def test_mem_on_removes_sentinel(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    mem_off_path().write_text("set", encoding="utf-8")
+    out = _handle_mem("on")
+    assert "enabled" in out
+    assert not mem_off_path().exists()
+
+
+def test_mem_on_when_already_on_idempotent(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    out = _handle_mem("on")
+    assert "already enabled" in out
+
+
+def test_mem_status_off(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    out = _handle_mem("status")
+    assert "🔊 ON" in out  # default state
+    assert "(absent)" in out
+
+
+def test_mem_status_on_with_pending(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    mem_off_path().write_text("set")
+    pdir = tmp_path / "memories" / "pending_diffs"
+    pdir.mkdir(parents=True)
+    (pdir / "wk-2026-05-02.json").write_text("{}")
+
+    out = _handle_mem("status")
+    assert "🔇 OFF" in out
+    assert "(present)" in out
+    assert "wk-2026-05-02" in out
+
+
+def test_mem_help_on_unknown_subcommand(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    out = _handle_mem("frobnicate")
+    assert "/mem off" in out and "/mem on" in out
+
+
+# ---------------------------------------------------------------------------
+# register() wires both commands
+# ---------------------------------------------------------------------------
+
+
+def test_register_registers_both_commands():
+    captured = []
+
+    class FakeCtx:
+        def register_command(self, name, handler, description="", args_hint=""):
+            captured.append((name, args_hint))
+
+    register(FakeCtx())
+    names = [c[0] for c in captured]
+    assert "memreview" in names
+    assert "mem" in names
+
+
+# ---------------------------------------------------------------------------
+# End-to-end: /memreview reject then weekly_apply archives as rejected
+# ---------------------------------------------------------------------------
+
+
+def test_reject_then_apply_archives_as_rejected(tmp_path, monkeypatch):
+    """Full flow: write pending diff -> /memreview reject -> weekly_apply
+    sees the sentinel and archives the diff with status=rejected."""
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home",
+        lambda: tmp_path,
+    )
+
+    db = init_db(tmp_path / "m.db")
+    digest_id = "wk-2026-05-02"
+    pdir = tmp_path / "memories" / "pending_diffs"
+    pdir.mkdir(parents=True)
+    diff_payload = {
+        "digest_id": digest_id, "candidate_episode_ids": [],
+        "promote": [{"entity": "禮揚.x", "fact": "f", "importance": 2,
+                     "valid_from": "2026-05-02", "valid_to": None,
+                     "source_episode_ids": []}],
+        "dedup_hits": [], "expire": [], "drop_as_noise": [],
+    }
+    (pdir / f"{digest_id}.json").write_text(json.dumps(diff_payload))
+
+    # User runs /memreview reject.
+    reply = _handle_memreview(f"reject {digest_id}")
+    assert "Rejected." in reply
+
+    # Apply step picks up the sentinel.
+    from plugins.memory.sqlite_vec.promotion import weekly_apply
+    summary = asyncio.run(weekly_apply(db, today=date(2026, 5, 2)))
+    assert summary["applied"] is False
+    assert summary["reason"] == "rejected"
+
+    # No new semantic_facts row (the promote was discarded).
+    [(sf,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall()
+    assert sf == 0
+
+    # Archive carries the .rejected suffix.
+    archived = list((tmp_path / "memories" / "diff_archive").glob("*.rejected.json"))
+    assert len(archived) == 1
+
+
+def test_mem_off_short_circuits_weekly_promotion(tmp_path, monkeypatch):
+    """Kill switch: /mem off must stop weekly_promotion from running its
+    Kimi call (which would otherwise burn tokens and write a diff)."""
+    monkeypatch.setattr(
+        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
+    )
+    monkeypatch.setattr(
+        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home",
+        lambda: tmp_path,
+    )
+    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
+
+    db = init_db(tmp_path / "m.db")
+    db.execute(
+        "INSERT INTO episodes(ts, channel, external_id, role, text, metadata) "
+        "VALUES (?, ?, ?, ?, ?, ?)",
+        ("2026-05-02 09:00", "cattia", "x", "user", "hi",
+         json.dumps({"stashed_facts": [{"text": "禮揚 likes X",
+                                        "entity": "禮揚.x",
+                                        "importance": 2}]})),
+    )
+    db.commit()
+
+    # Activate kill switch.
+    _handle_mem("off")
+    assert mem_off_active() is True
+
+    kimi_called = []
+
+    async def kimi_should_not_be_called(prompt):
+        kimi_called.append(prompt)
+        return {}
+
+    from plugins.memory.sqlite_vec.promotion import weekly_promotion
+    summary = asyncio.run(weekly_promotion(db, kimi_fn=kimi_should_not_be_called))
+    assert summary["candidates"] == 0
+    assert summary["skipped"] == "/mem off active"
+    # Kimi must not have been called.
+    assert kimi_called == []
diff --git a/tests/scripts/__init__.py b/tests/scripts/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/scripts/test_import_md.py b/tests/scripts/test_import_md.py
new file mode 100644
index 00000000000..617b38f9c13
--- /dev/null
+++ b/tests/scripts/test_import_md.py
@@ -0,0 +1,210 @@
+"""Tests for ``scripts/import_md.py`` (W2-2 — MEMORY.md → semantic_facts).
+
+Uses a stub embed_fn so no network is hit; live integration is exercised
+end-to-end on chococlaw via the post-test ``--commit`` smoke run.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import struct
+from pathlib import Path
+
+import pytest
+
+from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
+from scripts.import_md import (
+    Entry,
+    import_memory_md,
+    parse_memory_md,
+    slugify_topic,
+)
+
+
+def _vec(seed: int) -> bytes:
+    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
+    return struct.pack(f"{VEC_DIM}b", *vals)
+
+
+# ---------------------------------------------------------------------------
+# Slugify
+# ---------------------------------------------------------------------------
+
+
+def test_slugify_simple():
+    assert slugify_topic("People") == "people"
+    assert slugify_topic("Working style") == "working_style"
+    assert slugify_topic("Privacy constraints") == "privacy_constraints"
+
+
+def test_slugify_hierarchy_uses_dot():
+    assert (
+        slugify_topic("Tools & Access > ProtonMail Access")
+        == "tools_access.protonmail_access"
+    )
+
+
+def test_slugify_preserves_cjk():
+    # CJK characters survive the punct->underscore collapse; only > is hierarchy.
+    assert slugify_topic("醫院 > 新樓") == "醫院.新樓"
+    assert slugify_topic("家庭 生活") == "家庭_生活"
+
+
+def test_slugify_handles_empty_or_punct_only():
+    assert slugify_topic("") == "unknown"
+    assert slugify_topic("!!!") == "unknown"
+
+
+# ---------------------------------------------------------------------------
+# Parsing
+# ---------------------------------------------------------------------------
+
+
+SAMPLE_MD = """People: 禮揚 — physician
+§
+Working style: digital twin model
+§
+Privacy constraints: never include real PHI
+§
+Tools & Access > ProtonMail: D4303@sinlau.org.tw
+§
+"""
+
+
+def test_parse_memory_md_basic():
+    entries = parse_memory_md(SAMPLE_MD)
+    assert len(entries) == 4
+    assert entries[0].topic == "People"
+    assert entries[0].fact == "禮揚 — physician"
+    assert entries[0].entity == "禮揚.people"
+    assert entries[3].entity == "禮揚.tools_access.protonmail"
+
+
+def test_parse_skips_blocks_without_colon():
+    md = "first entry: ok\n§\n\nno colon here\n§\nsecond: also ok\n§\n"
+    entries = parse_memory_md(md)
+    assert [e.topic for e in entries] == ["first entry", "second"]
+
+
+def test_parse_handles_no_trailing_separator():
+    md = "topic: content"
+    entries = parse_memory_md(md)
+    assert len(entries) == 1
+    assert entries[0].fact == "content"
+
+
+# ---------------------------------------------------------------------------
+# import_memory_md (with stub embed)
+# ---------------------------------------------------------------------------
+
+
+def _make_stub_embed():
+    counter = {"n": 0}
+
+    async def stub(texts):
+        counter["n"] += 1
+        return [_vec(i + 1) for i, _ in enumerate(texts)]
+
+    return stub, counter
+
+
+def test_dry_run_does_not_write(tmp_path):
+    md = tmp_path / "MEMORY.md"
+    md.write_text(SAMPLE_MD, encoding="utf-8")
+    db = tmp_path / "m.db"
+
+    summary = asyncio.run(
+        import_memory_md(md_path=md, db_path=db, dry_run=True)
+    )
+    assert summary == {
+        "parsed": 4, "new": 4, "skipped_dup": 0,
+        "batches": 0, "dry_run": True,
+    }
+    # DB still empty (init_db ran but no inserts).
+    conn = init_db(db)
+    [(count,)] = conn.execute("SELECT count(*) FROM semantic_facts").fetchall()
+    assert count == 0
+
+
+def test_commit_inserts_and_populates_vec_facts(tmp_path):
+    md = tmp_path / "MEMORY.md"
+    md.write_text(SAMPLE_MD, encoding="utf-8")
+    db = tmp_path / "m.db"
+    stub, counter = _make_stub_embed()
+
+    summary = asyncio.run(
+        import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub)
+    )
+    assert summary["new"] == 4
+    assert summary["batches"] == 1
+    assert counter["n"] == 1  # one Voyage call for 4 entries
+
+    conn = init_db(db)
+    rows = conn.execute(
+        "SELECT entity, fact, importance, valid_from, valid_to FROM semantic_facts ORDER BY id"
+    ).fetchall()
+    assert len(rows) == 4
+    assert rows[0]["entity"] == "禮揚.people"
+    assert rows[0]["importance"] == 2
+    assert rows[0]["valid_from"] == "2026-05-10"
+    assert rows[0]["valid_to"] is None
+
+    # Trigger sf_after_insert mirrored every row into vec_facts.
+    [(vec_count,)] = conn.execute("SELECT count(*) FROM vec_facts").fetchall()
+    assert vec_count == 4
+
+
+def test_idempotent_rerun_inserts_nothing_new(tmp_path):
+    md = tmp_path / "MEMORY.md"
+    md.write_text(SAMPLE_MD, encoding="utf-8")
+    db = tmp_path / "m.db"
+    stub, counter = _make_stub_embed()
+
+    asyncio.run(import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub))
+    assert counter["n"] == 1
+
+    summary2 = asyncio.run(
+        import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub)
+    )
+    assert summary2["new"] == 0
+    assert summary2["skipped_dup"] == 4
+    assert counter["n"] == 1  # second run made zero embed calls (no new rows)
+
+    conn = init_db(db)
+    [(count,)] = conn.execute("SELECT count(*) FROM semantic_facts").fetchall()
+    assert count == 4
+
+
+def test_partial_update_only_embeds_new(tmp_path):
+    md = tmp_path / "MEMORY.md"
+    md.write_text(SAMPLE_MD, encoding="utf-8")
+    db = tmp_path / "m.db"
+    stub, counter = _make_stub_embed()
+
+    asyncio.run(import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub))
+    assert counter["n"] == 1
+
+    md.write_text(SAMPLE_MD + "\nNew topic: brand new fact\n§\n", encoding="utf-8")
+    summary = asyncio.run(
+        import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub)
+    )
+    assert summary["new"] == 1
+    assert summary["skipped_dup"] == 4
+    assert counter["n"] == 2  # one extra call for the one new entry
+
+
+def test_rollback_on_embed_failure_leaves_db_unchanged(tmp_path):
+    md = tmp_path / "MEMORY.md"
+    md.write_text(SAMPLE_MD, encoding="utf-8")
+    db = tmp_path / "m.db"
+
+    async def failing(texts):
+        raise RuntimeError("voyage exploded")
+
+    with pytest.raises(RuntimeError, match="voyage exploded"):
+        asyncio.run(
+            import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=failing)
+        )
+    conn = init_db(db)
+    [(count,)] = conn.execute("SELECT count(*) FROM semantic_facts").fetchall()
+    assert count == 0  # transaction rolled back