diff --git a/plugins/memdebug/__init__.py b/plugins/memdebug/__init__.py
deleted file mode 100644
index 2030192a1ee..00000000000
--- a/plugins/memdebug/__init__.py
+++ /dev/null
@@ -1,225 +0,0 @@
-"""``/memdebug`` Discord slash command — read-only retrieval diagnostic (W2-4).
-
-Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §7.2.
-
-Usage in chat:
-
-    /memdebug <query>            -> top-8 from semantic_facts (curated)
-    /memdebug rawsearch <query>  -> top-8 from episodes (raw turns, forensics)
-
-The handler intentionally returns plain markdown text (not a Discord
-embed): hermes-agent's ``register_command()`` surface is platform-neutral
-and dispatches the same string to CLI / gateway / Slack.
-
-The ``rich-embed + 👍/👎 reaction buttons`` mode is open spec §8 work — we
-ship the read-only diagnostic now so the F2 monitoring path (% of
-top-1 hits judged useful) is unblocked. For v1, encourage the user
-to react with 👍/👎 emoji on this message; a future cron will scrape
-those reactions from the channel.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import logging
-import sqlite3
-import time
-from pathlib import Path
-from typing import List, Optional
-
-logger = logging.getLogger(__name__)
-
-def _resolve_hermes_home() -> Path:
-    """Use HERMES_HOME (set by hermes_constants) when available; else ~/.hermes."""
-    try:
-        from hermes_constants import get_hermes_home
-        return Path(get_hermes_home())
-    except Exception:
-        return Path.home() / ".hermes"
-
-
-_HERMES_HOME = _resolve_hermes_home()
-DEFAULT_DB = _HERMES_HOME / "memories" / "memory.db"
-DEFAULT_K = 8
-LOG_PATH = _HERMES_HOME / "logs" / "memory.log"
-
-
-def _format_facts_block(facts) -> str:
-    lines = ["**🧠 /memdebug** — top {} from `semantic_facts`\n".format(len(facts))]
-    for i, f in enumerate(facts, start=1):
-        recency = max(0.0, 1.0 - f.age_days / 365.0)  # display-only;rerank weight uses 90-day half-life
-        lines.append(
-            f"`{i}.` **[{f.entity or '—'}]** {_truncate(f.fact, 90)}\n"
-            f"     score=`{f.score:.3f}` sim=`{f.sim:.3f}` "
-            f"age=`{int(f.age_days)}d` importance=`{f.importance}`"
-        )
-    lines.append("\n_React 👍/👎 to flag this retrieval._")
-    return "\n".join(lines)
-
-
-def _truncate(s: str, n: int) -> str:
-    s = s.replace("\n", " ")
-    return s if len(s) <= n else s[: n - 1] + "…"
-
-
-def _format_episodes_block(rows: List[sqlite3.Row]) -> str:
-    if not rows:
-        return (
-            "**🧠 /memdebug rawsearch** — `episodes` table is empty.\n\n"
-            "Episodes are written by W3 (per-turn write-back). After W3 "
-            "ships, this command will surface the raw conversation turns "
-            "behind any retrieval."
-        )
-    lines = ["**🧠 /memdebug rawsearch** — top {} from `episodes`\n".format(len(rows))]
-    for i, r in enumerate(rows, start=1):
-        lines.append(
-            f"`{i}.` `[{r['ts']}]` `{r['channel']}/{r['role']}` "
-            f"{_truncate(r['text'], 120)}"
-        )
-    return "\n".join(lines)
-
-
-def _append_log(payload: dict) -> None:
-    """Append a /memdebug invocation to ~/.hermes/logs/memory.log."""
-    import json
-    try:
-        LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
-        with LOG_PATH.open("a", encoding="utf-8") as f:
-            f.write(json.dumps(payload, ensure_ascii=False) + "\n")
-    except OSError as exc:
-        logger.warning("memory.log write failed: %s", exc)
-
-
-def _open_memory_db(path: Optional[Path] = None) -> Optional[sqlite3.Connection]:
-    """Open the sqlite_vec memory.db. Returns None if it doesn't exist yet."""
-    path = path or DEFAULT_DB
-    if not path.exists():
-        return None
-    from plugins.memory.sqlite_vec.store import open_db
-    return open_db(path, check_same_thread=False)
-
-
-async def _do_semantic(query: str) -> str:
-    from plugins.memory.sqlite_vec.read import read_memory
-
-    conn = _open_memory_db()
-    if not conn:
-        return (
-            "**🧠 /memdebug** — memory database not yet initialised.\n\n"
-            f"Expected at `{DEFAULT_DB}`. Run `scripts/import_md.py --commit` "
-            "or wait for the first agent turn after W2-3 cutover."
-        )
-    try:
-        facts = await read_memory(query, conn, k=DEFAULT_K)
-    finally:
-        conn.close()
-    if not facts:
-        return f"**🧠 /memdebug** — no facts matched `{_truncate(query, 60)}`."
-    _append_log({
-        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
-        "cmd": "memdebug",
-        "q": query,
-        "n": len(facts),
-        "ids": [f.id for f in facts],
-    })
-    return _format_facts_block(facts)
-
-
-async def _do_rawsearch(query: str) -> str:
-    """Substring scan of episodes.text. No vector query — this is forensics
-    mode for 'did this conversation happen', not semantic recall."""
-    conn = _open_memory_db()
-    if not conn:
-        return (
-            "**🧠 /memdebug rawsearch** — memory database not yet initialised."
-        )
-    try:
-        like = f"%{query}%"
-        rows = conn.execute(
-            "SELECT ts, channel, role, text FROM episodes "
-            "WHERE text LIKE ? ORDER BY ts DESC LIMIT ?",
-            (like, DEFAULT_K),
-        ).fetchall()
-    finally:
-        conn.close()
-    _append_log({
-        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
-        "cmd": "memdebug-raw",
-        "q": query,
-        "n": len(rows),
-    })
-    return _format_episodes_block(rows)
-
-
-HELP_TEXT = (
-    "**/memdebug** — inspect what `read_memory` would return.\n"
-    "Usage:\n"
-    "  `/memdebug <query>` — top-8 from `semantic_facts` (curated)\n"
-    "  `/memdebug rawsearch <query>` — substring scan of `episodes` (forensics)\n"
-)
-
-
-async def _handle_async(raw_args: str) -> str:
-    args = (raw_args or "").strip()
-    if not args:
-        return HELP_TEXT
-    if args.lower().startswith("rawsearch"):
-        rest = args[len("rawsearch"):].strip()
-        if not rest:
-            return HELP_TEXT
-        try:
-            return await _do_rawsearch(rest)
-        except Exception as exc:
-            logger.exception("memdebug rawsearch failed")
-            return f"**/memdebug rawsearch** error: `{exc}`"
-    try:
-        return await _do_semantic(args)
-    except Exception as exc:
-        logger.exception("memdebug semantic failed")
-        return f"**/memdebug** error: `{exc}`"
-
-
-def _handle_memdebug(raw_args: str) -> str:
-    """Sync entry point. PluginContext.register_command supports async
-    handlers natively, but ours is dispatched on either pathway, so we
-    bridge via asyncio.run when no loop is running."""
-    coro = _handle_async(raw_args)
-    try:
-        loop = asyncio.get_running_loop()
-    except RuntimeError:
-        loop = None
-    if loop is None:
-        return asyncio.run(coro)
-    # Already in a running loop — schedule and wait via a worker thread.
-    import threading
-    import concurrent.futures
-    box = {}
-
-    def runner():
-        try:
-            box["r"] = asyncio.run(coro)
-        except BaseException as exc:
-            box["e"] = exc
-
-    t = threading.Thread(target=runner, daemon=True, name="memdebug-handler")
-    t.start()
-    t.join(timeout=15.0)
-    if t.is_alive():
-        return "**/memdebug** timed out (>15s)."
-    if "e" in box:
-        return f"**/memdebug** error: `{box['e']}`"
-    return box.get("r", HELP_TEXT)
-
-
-# ---------------------------------------------------------------------------
-# Plugin registration
-# ---------------------------------------------------------------------------
-
-
-def register(ctx) -> None:
-    ctx.register_command(
-        "memdebug",
-        handler=_handle_memdebug,
-        description="Inspect Hermes long-term memory retrieval (top-8 + scores).",
-        args_hint="<query> | rawsearch <query>",
-    )
diff --git a/plugins/memdebug/plugin.yaml b/plugins/memdebug/plugin.yaml
deleted file mode 100644
index 1945104cff6..00000000000
--- a/plugins/memdebug/plugin.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-name: memdebug
-version: 0.1.0
-description: "/memdebug — inspect Hermes long-term memory retrieval. Read-only diagnostic for the sqlite_vec memory plugin (W2-4)."
-author: "Li-yang Chen"
diff --git a/plugins/memory/sqlite_vec/__init__.py b/plugins/memory/sqlite_vec/__init__.py
deleted file mode 100644
index 3d54be27f66..00000000000
--- a/plugins/memory/sqlite_vec/__init__.py
+++ /dev/null
@@ -1,251 +0,0 @@
-"""Hermes V3 memory plugin — sqlite-vec store with two-tier (hot/cold) design.
-
-Activate via $HERMES_HOME/config.yaml:
-
-    memory:
-      provider: sqlite_vec
-
-Read path (W2-3): on each turn, ``prefetch(query)`` runs
-``read_memory()`` in a worker thread (the gateway already owns the main
-asyncio loop, so we can't ``asyncio.run`` inline) and returns a markdown
-block prefixed with ``## Recent relevant memories``. The retrieved fact
-IDs are cached per session and bumped via ``sync_turn()`` after the
-reply is sent, per spec §4 hits accounting.
-
-Write path (W3-2): ``sync_turn`` now also fires ``write_episode`` —
-records the raw turn into ``episodes``, runs Kimi extract, fast-tracks
-short-lived facts directly into ``semantic_facts`` (≤ today + 30d),
-stashes longer-lived facts into ``episodes.metadata.stashed_facts``
-for W3-3 weekly_promotion. Errors land in
-``~/.hermes/logs/memory_write_failures.jsonl`` and never propagate.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import logging
-import threading
-import time
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-from agent.memory_provider import MemoryProvider
-
-from .read import (
-    DEFAULT_K,
-    Fact,
-    bump_hits,
-    format_facts_for_prompt,
-    read_memory,
-)
-from .store import init_db
-from .write import write_episode
-
-logger = logging.getLogger(__name__)
-
-PREFETCH_TIMEOUT_S = 5.0  # Voyage typical 200-400ms; 5s is the kill-switch.
-# Write path: extract (~1-3s) + embed batch (~300ms) + INSERT (~5ms).
-# 30s gives Kimi room to think while still bounding worst-case latency.
-WRITE_TIMEOUT_S = 30.0
-RECALL_HEADER = "## Recent relevant memories"
-
-
-def _mem_off_active() -> bool:
-    """True iff the global /mem off kill switch sentinel is present.
-
-    Late import to avoid circular plugin loading: plugins.memreview can
-    import provider symbols indirectly via the slash-command surface.
-    """
-    try:
-        from plugins.memreview import mem_off_active
-        return mem_off_active()
-    except Exception:
-        return False
-
-
-def _default_db_path(hermes_home: str) -> Path:
-    return Path(hermes_home).expanduser() / "memories" / "memory.db"
-
-
-def _run_coro_in_thread(coro_factory, timeout: float):
-    """Run an async coroutine in a worker thread with its own event loop.
-
-    The hermes gateway runs its own asyncio loop, so ``asyncio.run`` from
-    this synchronous ABC method would raise "cannot be called from a
-    running event loop". We sidestep by spawning a dedicated thread with a
-    fresh loop, joining with a timeout. ``coro_factory`` is a zero-arg
-    callable that builds the coroutine inside the worker so the coroutine
-    is bound to the worker's loop.
-    """
-    box: Dict[str, Any] = {}
-
-    def runner():
-        loop = asyncio.new_event_loop()
-        try:
-            box["result"] = loop.run_until_complete(coro_factory())
-        except BaseException as exc:
-            box["error"] = exc
-        finally:
-            loop.close()
-
-    t = threading.Thread(target=runner, daemon=True, name="sqlite-vec-worker")
-    t.start()
-    t.join(timeout)
-    if t.is_alive():
-        raise TimeoutError(f"sqlite_vec worker exceeded {timeout}s")
-    if "error" in box:
-        raise box["error"]
-    return box.get("result")
-
-
-def _synth_msg_id(session_id: str, user: str, asst: str, ts: str) -> str:
-    """Stable per-turn external_id for ON CONFLICT idempotency.
-
-    We don't have the real Discord message ID at sync_turn time (the
-    ABC hook only exposes user/assistant content + session_id), so we
-    hash the turn into a 12-hex-char id. Bucketing ts to the minute
-    means a Discord redelivery within the same minute collapses; a
-    legitimate retry after >1 min would create a new row, which is
-    acceptable for episode-level forensics.
-    """
-    raw = (session_id, user, asst, ts[:16])
-    return "h" + hex(abs(hash(raw)) & 0xFFFFFFFFFFFF)[2:]
-
-
-class SqliteVecMemoryProvider(MemoryProvider):
-    """Hermes V3 long-term memory provider (W2-3 read + W3-2 write)."""
-
-    def __init__(self) -> None:
-        self._conn = None
-        self._db_path: Optional[Path] = None
-        self._last_fact_ids: Dict[str, List[int]] = {}
-        self._lock = threading.Lock()
-
-    @property
-    def name(self) -> str:
-        return "sqlite_vec"
-
-    def is_available(self) -> bool:
-        try:
-            import sqlite_vec  # noqa: F401
-        except ImportError:
-            return False
-        return True
-
-    def initialize(self, session_id: str, **kwargs) -> None:
-        hermes_home = kwargs.get("hermes_home")
-        if not hermes_home:
-            from hermes_constants import get_hermes_home
-            hermes_home = str(get_hermes_home())
-        self._db_path = _default_db_path(hermes_home)
-        self._conn = init_db(self._db_path, check_same_thread=False)
-        logger.info("sqlite_vec memory ready at %s", self._db_path)
-
-    def system_prompt_block(self) -> str:
-        # Persona stays in flat files (SOUL.md, USER.md, life-dimensions.md);
-        # the recall block is emitted from prefetch() per turn.
-        return ""
-
-    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        """Embed query, fetch top-k facts, format as a markdown block.
-
-        Returns "" on empty/trivial query, missing connection, or any
-        error (Voyage outage, rate limit, etc.) so the gateway never
-        blocks a reply on memory recall. Retrieved fact IDs are stashed
-        for the matching ``sync_turn()`` call to bump hits.
-        """
-        if not self._conn or not query or not query.strip():
-            return ""
-
-        conn = self._conn
-        db_lock = self._lock
-
-        async def _do() -> List[Fact]:
-            with db_lock:
-                return await read_memory(query, conn, k=DEFAULT_K)
-
-        try:
-            facts = _run_coro_in_thread(_do, timeout=PREFETCH_TIMEOUT_S)
-        except Exception as exc:
-            logger.warning("sqlite_vec prefetch error: %s", exc)
-            return ""
-
-        if not facts:
-            return ""
-
-        with self._lock:
-            self._last_fact_ids[session_id] = [f.id for f in facts]
-
-        body = format_facts_for_prompt(facts, with_meta=True)
-        return f"{RECALL_HEADER}\n{body}"
-
-    def sync_turn(
-        self,
-        user_content: str,
-        assistant_content: str,
-        *,
-        session_id: str = "",
-    ) -> None:
-        """Bump hits on retrieved facts and persist the turn.
-
-        Spec §4 + §5.1 — both happen AFTER the reply is delivered, so
-        this must never raise. ``bump_hits`` swallows its own DB errors;
-        ``write_episode`` swallows everything and writes failures to
-        ~/.hermes/logs/memory_write_failures.jsonl.
-        """
-        if not self._conn:
-            return
-        conn = self._conn
-        db_lock = self._lock
-
-        with self._lock:
-            ids = self._last_fact_ids.pop(session_id, [])
-
-        ts = time.strftime("%Y-%m-%d %H:%M:%S")
-        msg_id = _synth_msg_id(session_id, user_content, assistant_content, ts)
-        channel = session_id or "unknown"
-
-        async def _do_bump() -> None:
-            if ids:
-                with db_lock:
-                    await bump_hits(ids, conn)
-
-        async def _do_write() -> None:
-            with db_lock:
-                await write_episode(
-                    user_msg=user_content,
-                    reply=assistant_content,
-                    channel=channel,
-                    msg_id=msg_id,
-                    ts=ts,
-                    conn=conn,
-                )
-
-        try:
-            _run_coro_in_thread(_do_bump, timeout=PREFETCH_TIMEOUT_S)
-        except Exception as exc:
-            logger.warning("sqlite_vec bump_hits worker error: %s", exc)
-
-        if user_content or assistant_content:
-            # /mem off kill switch: skip write_episode entirely. The hot path
-            # bump_hits ran above (read-side accounting), but no new
-            # episodes / facts are persisted. Read remains unaffected.
-            if _mem_off_active():
-                logger.info("sqlite_vec write_episode skipped (/mem off)")
-            else:
-                try:
-                    _run_coro_in_thread(_do_write, timeout=WRITE_TIMEOUT_S)
-                except Exception as exc:
-                    logger.warning("sqlite_vec write_episode worker error: %s", exc)
-
-    def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        return []
-
-    def handle_tool_call(self, tool_name: str, args: Dict[str, Any]) -> Any:
-        from tools.registry import tool_error
-        return tool_error(f"sqlite_vec exposes no tools (got {tool_name!r})")
-
-    def shutdown(self) -> None:
-        if getattr(self, "_conn", None):
-            self._conn.close()
-            self._conn = None
diff --git a/plugins/memory/sqlite_vec/embed.py b/plugins/memory/sqlite_vec/embed.py
deleted file mode 100644
index ae114ebf670..00000000000
--- a/plugins/memory/sqlite_vec/embed.py
+++ /dev/null
@@ -1,139 +0,0 @@
-"""Voyage AI embedding wrapper for the sqlite_vec memory plugin.
-
-Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §1.4 (locked
-decision) and §4 (read path) — voyage-3.5-lite, 512 dim, int8.
-
-Returns each embedding as a 512-byte BLOB ready to insert into
-``semantic_facts.embedding``. The store-side trigger wraps the BLOB with
-``vec_int8()`` when copying it into the ``vec_facts`` virtual table.
-
-Public API:
-
-    await voyage_embed(["text 1", "text 2"]) -> [b"...512 bytes...", b"..."]
-"""
-
-from __future__ import annotations
-
-import asyncio
-import logging
-import os
-from typing import List, Optional, Sequence
-
-import httpx
-
-logger = logging.getLogger(__name__)
-
-VOYAGE_URL = "https://api.voyageai.com/v1/embeddings"
-VOYAGE_MODEL = "voyage-3.5-lite"
-VOYAGE_BATCH = 128  # Voyage API per-call ceiling
-VOYAGE_DIM = 512
-VOYAGE_DTYPE = "int8"
-DEFAULT_TIMEOUT = 30.0
-MAX_RETRIES = 3
-
-
-class VoyageError(RuntimeError):
-    """Raised when Voyage API repeatedly fails."""
-
-
-def _api_key() -> str:
-    key = os.environ.get("VOYAGE_API_KEY")
-    if not key:
-        raise VoyageError(
-            "VOYAGE_API_KEY is not set. Add it to ~/.hermes/.env and "
-            "expose it to the hermes container via docker-compose."
-        )
-    return key
-
-
-def _to_int8_blob(values: Sequence[int]) -> bytes:
-    """Pack a list of int8 values (-128..127) into a raw 512-byte BLOB."""
-    if len(values) != VOYAGE_DIM:
-        raise VoyageError(
-            f"Voyage returned {len(values)}-dim vector, expected {VOYAGE_DIM}"
-        )
-    return bytes((v + 256) & 0xFF for v in values)  # signed -> unsigned byte
-
-
-async def _post_batch(
-    client: httpx.AsyncClient,
-    texts: List[str],
-    api_key: str,
-) -> List[bytes]:
-    payload = {
-        "model": VOYAGE_MODEL,
-        "input": texts,
-        "output_dtype": VOYAGE_DTYPE,
-        "output_dimension": VOYAGE_DIM,
-    }
-    headers = {"Authorization": f"Bearer {api_key}"}
-
-    for attempt in range(1, MAX_RETRIES + 1):
-        try:
-            r = await client.post(
-                VOYAGE_URL, headers=headers, json=payload, timeout=DEFAULT_TIMEOUT
-            )
-        except httpx.RequestError as exc:
-            if attempt == MAX_RETRIES:
-                raise VoyageError(f"network error: {exc}") from exc
-            await asyncio.sleep(2 ** (attempt - 1))
-            continue
-
-        if 500 <= r.status_code < 600:
-            if attempt == MAX_RETRIES:
-                raise VoyageError(f"Voyage 5xx: {r.status_code} {r.text[:200]}")
-            await asyncio.sleep(2 ** (attempt - 1))
-            continue
-
-        if r.status_code >= 400:
-            raise VoyageError(f"Voyage {r.status_code}: {r.text[:200]}")
-
-        body = r.json()
-        items = body.get("data", [])
-        if len(items) != len(texts):
-            raise VoyageError(
-                f"Voyage returned {len(items)} items for {len(texts)} inputs"
-            )
-        # Voyage returns embeddings in input order (per docs/index field).
-        items.sort(key=lambda d: d.get("index", 0))
-        return [_to_int8_blob(d["embedding"]) for d in items]
-
-    raise VoyageError("retry loop exhausted unexpectedly")
-
-
-async def voyage_embed(
-    texts: List[str],
-    *,
-    dim: int = VOYAGE_DIM,
-    dtype: str = VOYAGE_DTYPE,
-    client: Optional[httpx.AsyncClient] = None,
-) -> List[bytes]:
-    """Embed `texts` and return one int8 BLOB per input.
-
-    Batches automatically at Voyage's 128-input ceiling. Retries 3x with
-    exponential backoff on 5xx and network errors. Raises VoyageError on
-    auth failure, 4xx, or repeated 5xx.
-
-    `dim` and `dtype` are accepted for API symmetry but locked to the spec
-    values; passing different values raises immediately so config drift
-    fails loudly instead of silently corrupting embeddings.
-    """
-    if dim != VOYAGE_DIM or dtype != VOYAGE_DTYPE:
-        raise VoyageError(
-            f"dim/dtype locked to {VOYAGE_DIM}/{VOYAGE_DTYPE} per spec §1.4"
-        )
-    if not texts:
-        return []
-
-    api_key = _api_key()
-    owns_client = client is None
-    client = client or httpx.AsyncClient()
-    try:
-        out: List[bytes] = []
-        for i in range(0, len(texts), VOYAGE_BATCH):
-            batch = texts[i : i + VOYAGE_BATCH]
-            out.extend(await _post_batch(client, batch, api_key))
-        return out
-    finally:
-        if owns_client:
-            await client.aclose()
diff --git a/plugins/memory/sqlite_vec/extract.py b/plugins/memory/sqlite_vec/extract.py
deleted file mode 100644
index caeffb1e245..00000000000
--- a/plugins/memory/sqlite_vec/extract.py
+++ /dev/null
@@ -1,299 +0,0 @@
-"""Kimi-driven extraction from a single Discord turn (W3-1).
-
-Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §5.2.
-
-The ``EXTRACT_PROMPT`` constant is **verbatim** from the spec — do not
-paraphrase. Drift here directly compromises the F2 monitoring path
-(downstream weekly review will see noise).
-
-Two-stage flow:
-
-    1. Caller calls ``kimi_extract(user, assistant, channel, ts)``.
-    2. We short-circuit to ``[]`` if ``channel`` is in
-       ``PHI_BLACKLIST_CHANNELS`` — never round-trip hospital data
-       through the cloud LLM.
-    3. Otherwise we POST to synthetic.new'\\''s OpenAI-compatible
-       chat-completions endpoint with ``temperature=0.1`` and
-       ``response_format=json_object`` (Kimi K2.5 supports the OpenAI
-       structured-output flag).
-    4. Parse the JSON list, validate the per-item shape, return
-       ``list[ExtractedFact]``. Bad rows are dropped, not fatal.
-
-Token cost is logged to ``memory.log`` so weekly review can spot a
-runaway extract budget.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import time
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import List, Optional
-
-import httpx
-
-logger = logging.getLogger(__name__)
-
-# Spec §1.4 lock — Kimi K2.5 via synthetic.new.
-SYNTHETIC_URL = "https://api.synthetic.new/v1/chat/completions"
-EXTRACT_MODEL = "hf:moonshotai/Kimi-K2.5"
-EXTRACT_TEMPERATURE = 0.1
-EXTRACT_TIMEOUT = 30.0
-EXTRACT_MAX_TOKENS = 1024  # extract output is a small JSON list
-
-# Spec §5.1 — channels whose content never leaves the host as PHI.
-PHI_BLACKLIST_CHANNELS = frozenset({"cmio", "cbme", "medicine"})
-
-# Spec §5.2 EXTRACT_PROMPT — copy verbatim. The {placeholders} are
-# substituted at call time.
-EXTRACT_PROMPT = """You extract durable memories about 禮揚 from this Discord turn.
-Output a JSON list. Empty list [] if nothing memorable.
-
-HARD RULES — these override everything else:
-1. NEVER extract: hospital data, patient names, 病歷號, 身分證字號, lab results,
-   diagnoses about real people, hospital policy specifics, hospital colleague names.
-2. NEVER extract pleasantries (好的/收到/早安/明白/thanks). Return [] if turn is just this.
-3. If turn metadata says synthetic=true (cron-produced), return [] UNLESS content
-   contains a NEW commitment by 禮揚 (e.g. "排了 5/22 跟 Y 開會").
-4. If unsure whether content violates rule 1, ERR ON THE SIDE OF NOT EXTRACTING.
-
-Each item:
-  type: "episodic" | "semantic"
-  text: short statement, zh-TW or English (match source language)
-  entity: nullable. Use ".家庭", ".工作", ".研究興趣", ".健康", etc. namespacing under "禮揚."
-  importance: 1-5
-  valid_to_hint: ISO date if turn implies expiry. "今晚"→tomorrow, "這週"→Sunday, "這個月"→end-of-month.
-
-Skip facts that duplicate something said in the last 5 turns.
-
-TURN:
-[{ts}] [{channel}] user: {user}
-[{ts}] [{channel}] assistant: {assistant}
-"""
-
-
-@dataclass
-class ExtractedFact:
-    """One fact extracted from a turn. Distinct from the read-side ``Fact``."""
-
-    type: str  # "episodic" | "semantic"
-    text: str
-    entity: Optional[str]
-    importance: int
-    valid_to_hint: Optional[str] = None
-    raw: dict = field(default_factory=dict)  # original Kimi output for forensics
-
-
-class ExtractError(RuntimeError):
-    """Raised when synthetic.new is unreachable or returns malformed payload."""
-
-
-def _resolve_hermes_home() -> Path:
-    try:
-        from hermes_constants import get_hermes_home
-        return Path(get_hermes_home())
-    except Exception:
-        return Path.home() / ".hermes"
-
-
-def _default_log_path() -> Path:
-    return _resolve_hermes_home() / "logs" / "memory.log"
-
-
-def _read_synthetic_api_key() -> str:
-    """Resolve the synthetic.new API key.
-
-    Priority:
-      1. ``SYNTHETIC_API_KEY`` env var (test-friendly override).
-      2. ``auth.json`` ``custom:synthetic`` pool, first non-expired token.
-
-    Raises ``ExtractError`` if no key is found — the caller decides
-    whether that should bubble up (W3-2 wraps and falls back).
-    """
-    env = os.environ.get("SYNTHETIC_API_KEY")
-    if env:
-        return env
-
-    auth_path = _resolve_hermes_home() / "auth.json"
-    if auth_path.exists():
-        try:
-            data = json.loads(auth_path.read_text(encoding="utf-8"))
-        except json.JSONDecodeError as exc:
-            raise ExtractError(f"auth.json parse: {exc}") from exc
-        # The real auth.json uses "credential_pool" (singular). Older or
-        # alternate layouts may use the plural form or top-level keys, so we
-        # check all three for resilience across hermes-agent versions.
-        pool = (
-            (data.get("credential_pool") or {}).get("custom:synthetic")
-            or (data.get("credential_pools") or {}).get("custom:synthetic")
-            or data.get("custom:synthetic")
-            or []
-        )
-        for entry in pool:
-            tok = entry.get("access_token")
-            if tok:
-                return tok
-
-    raise ExtractError(
-        "synthetic.new API key not found. Set SYNTHETIC_API_KEY or "
-        "ensure auth.json has a custom:synthetic credential."
-    )
-
-
-def _append_log(payload: dict, log_path: Optional[Path] = None) -> None:
-    log_path = log_path or _default_log_path()
-    try:
-        log_path.parent.mkdir(parents=True, exist_ok=True)
-        with log_path.open("a", encoding="utf-8") as f:
-            f.write(json.dumps(payload, ensure_ascii=False) + "\n")
-    except OSError as exc:
-        logger.warning("memory.log write failed: %s", exc)
-
-
-def _coerce_fact(raw: dict) -> Optional[ExtractedFact]:
-    """Validate one Kimi-emitted fact dict; return None on shape errors."""
-    t = raw.get("type")
-    text = raw.get("text")
-    if t not in ("episodic", "semantic"):
-        return None
-    if not isinstance(text, str) or not text.strip():
-        return None
-    importance = raw.get("importance", 2)
-    try:
-        importance = int(importance)
-    except (TypeError, ValueError):
-        importance = 2
-    importance = max(1, min(5, importance))
-    entity = raw.get("entity")
-    if entity is not None and not isinstance(entity, str):
-        entity = None
-    valid_to_hint = raw.get("valid_to_hint")
-    if valid_to_hint is not None and not isinstance(valid_to_hint, str):
-        valid_to_hint = None
-    return ExtractedFact(
-        type=t,
-        text=text.strip(),
-        entity=entity,
-        importance=importance,
-        valid_to_hint=valid_to_hint,
-        raw=raw,
-    )
-
-
-async def kimi_extract(
-    user: str,
-    assistant: str,
-    channel: str,
-    ts: str,
-    *,
-    client: Optional[httpx.AsyncClient] = None,
-    log_path: Optional[Path] = None,
-) -> List[ExtractedFact]:
-    """Extract durable memories from one Discord turn.
-
-    Returns ``[]`` (no API call) when ``channel`` is PHI-blacklisted, when
-    both ``user`` and ``assistant`` are empty, or when Kimi returns
-    malformed JSON. Otherwise raises ``ExtractError`` on transport
-    failure or non-2xx response — caller (W3-2) is responsible for
-    fallback bookkeeping (failure JSONL log).
-    """
-    if channel in PHI_BLACKLIST_CHANNELS:
-        return []
-    if not (user or "").strip() and not (assistant or "").strip():
-        return []
-
-    api_key = _read_synthetic_api_key()
-    prompt = EXTRACT_PROMPT.format(ts=ts, channel=channel, user=user, assistant=assistant)
-
-    payload = {
-        "model": EXTRACT_MODEL,
-        "messages": [{"role": "user", "content": prompt}],
-        "temperature": EXTRACT_TEMPERATURE,
-        "max_tokens": EXTRACT_MAX_TOKENS,
-        "response_format": {"type": "json_object"},
-    }
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json",
-    }
-
-    owns_client = client is None
-    client = client or httpx.AsyncClient()
-    t0 = time.perf_counter()
-    try:
-        try:
-            r = await client.post(
-                SYNTHETIC_URL, headers=headers, json=payload, timeout=EXTRACT_TIMEOUT
-            )
-        except httpx.RequestError as exc:
-            raise ExtractError(f"synthetic.new network error: {exc}") from exc
-        if r.status_code >= 400:
-            raise ExtractError(f"synthetic.new {r.status_code}: {r.text[:200]}")
-        body = r.json()
-    finally:
-        if owns_client:
-            await client.aclose()
-    elapsed_ms = (time.perf_counter() - t0) * 1000.0
-
-    choice = (body.get("choices") or [{}])[0]
-    content = (choice.get("message") or {}).get("content", "")
-    usage = body.get("usage") or {}
-
-    parsed = _parse_json_list(content)
-    facts = [f for f in (_coerce_fact(item) for item in parsed) if f is not None]
-
-    _append_log(
-        {
-            "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
-            "cmd": "kimi_extract",
-            "channel": channel,
-            "ms": round(elapsed_ms, 2),
-            "n_raw": len(parsed),
-            "n_kept": len(facts),
-            "tokens_in": usage.get("prompt_tokens"),
-            "tokens_out": usage.get("completion_tokens"),
-        },
-        log_path=log_path,
-    )
-    return facts
-
-
-def _parse_json_list(content: str) -> list:
-    """Tolerantly extract a JSON list from Kimi's ``content`` field.
-
-    The prompt asks for a JSON list, but Kimi may wrap it in an object
-    (when response_format=json_object) like ``{"facts": [...]}`` or
-    return ``{}`` for empty. We accept any of:
-      - bare ``[...]``
-      - ``{"facts": [...]}`` / ``{"items": [...]}`` / ``{"results": [...]}``
-      - ``{}`` (treated as empty list)
-    """
-    if not content:
-        return []
-    try:
-        data = json.loads(content)
-    except json.JSONDecodeError:
-        return []
-    if isinstance(data, list):
-        return data
-    if isinstance(data, dict):
-        # Kimi K2.5 with response_format=json_object often wraps the
-        # answer in a dict like {"analysis": ..., "extracted_memories": [...]}.
-        # Try the canonical key names first, then fall back to the first list-valued field.
-        for key in ("facts", "items", "results", "memories", "extracted_memories", "data"):
-            v = data.get(key)
-            if isinstance(v, list):
-                return v
-        # Last-ditch fallback: any top-level list value wins.
-        for v in data.values():
-            if isinstance(v, list):
-                return v
-        # Kimi sometimes returns a single fact as a flat dict (no list wrapper).
-        # Detect by the presence of the canonical fact keys.
-        if "type" in data and "text" in data:
-            return [data]
-        return []
-    return []
diff --git a/plugins/memory/sqlite_vec/plugin.yaml b/plugins/memory/sqlite_vec/plugin.yaml
deleted file mode 100644
index 4e3b24133c4..00000000000
--- a/plugins/memory/sqlite_vec/plugin.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-name: sqlite_vec
-version: 0.1.0
-description: "Hermes V3 long-term memory — local sqlite-vec store with hot episodes / cold curated semantic_facts, weekly human-approved promotion."
-pip_dependencies:
-  - sqlite-vec>=0.1.6
-hooks:
-  - on_pre_compress
diff --git a/plugins/memory/sqlite_vec/promotion.py b/plugins/memory/sqlite_vec/promotion.py
deleted file mode 100644
index 6a4404b752e..00000000000
--- a/plugins/memory/sqlite_vec/promotion.py
+++ /dev/null
@@ -1,862 +0,0 @@
-"""Weekly promotion + apply core logic (W3-3).
-
-Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §5.3 + §5.4.
-
-Two entry points, both invoked from cron-driven thin wrappers in
-``~/.hermes/scripts/`` (so they sit inside HERMES_HOME/scripts, the only
-location the hermes scheduler will exec):
-
-  weekly_promotion()  - reads 7 days of pending episodes, runs one
-                        Kimi-thinking call to produce a promotion diff,
-                        saves it to pending_diffs/<digest_id>.json,
-                        renders + posts the digest to #memory-review.
-                        Does NOT stamp episodes.promoted_at.
-
-  weekly_apply()      - purges pending_diffs older than 14 days, loads
-                        the latest, checks for the rejection sentinel
-                        file, and either archives-as-rejected or
-                        applies the diff atomically (promote / dedup /
-                        expire) and stamps episodes.promoted_at.
-
-The split lets the user reject Sunday's diff with /memreview reject
-<digest_id> any time before Monday's apply fires.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import logging
-import os
-import sqlite3
-import struct
-import time
-from dataclasses import dataclass, field
-from datetime import date, datetime, timedelta
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
-
-import httpx
-
-from .embed import voyage_embed
-from .extract import (
-    EXTRACT_TIMEOUT,
-    PHI_BLACKLIST_CHANNELS,
-    SYNTHETIC_URL,
-    _read_synthetic_api_key,
-)
-
-logger = logging.getLogger(__name__)
-
-PROMOTION_MODEL = "hf:moonshotai/Kimi-K2-Thinking"
-PROMOTION_FALLBACK_MODEL = "hf:moonshotai/Kimi-K2.5"
-PROMOTION_TEMPERATURE = 0.2
-PROMOTION_MAX_TOKENS = 8192  # diff JSON can be substantial across 7 days
-PROMOTION_TIMEOUT = 120.0  # thinking-mode + 100+ episodes
-
-PROMOTION_NEIGHBOR_K = 20  # spec §5.3: per-candidate vec_search k=20
-PROMOTION_LOOKBACK_DAYS = 7
-PENDING_DIFF_TTL_DAYS = 14
-
-DISCORD_API = "https://discord.com/api/v10/channels/{channel_id}/messages"
-
-
-# ---------------------------------------------------------------------------
-# Prompt — designed to match spec §5.3 schema verbatim
-# ---------------------------------------------------------------------------
-
-PROMOTION_PROMPT = """You are running the weekly memory promotion review for 禮揚's personal AI.
-
-Below is one week of conversation episodes that have not yet been reviewed.
-Each candidate carries any 'stashed_facts' that the per-turn extractor
-recorded in its metadata. You also see, per candidate, the top-20 existing
-semantic_facts that are nearest by embedding distance — use these to decide
-whether a candidate fact duplicates something already known.
-
-HARD RULES — these override everything else:
-1. NEVER promote: hospital data, patient names, 病歷號, 身分證字號, lab results,
-   diagnoses about real people, hospital policy specifics, hospital colleague names.
-2. Pleasantries (好的/收到/早安/明白/thanks) → drop_as_noise.
-3. Synthetic episodes (synthetic=true) — promote ONLY if they contain a NEW
-   commitment by 禮揚 (a meeting scheduled, a habit declared, a decision made).
-4. If a candidate stashed_fact is semantically captured by an existing fact
-   (sim ≥ 0.92), prefer dedup_hits over creating a new row.
-5. Conservative importance: most facts are 2; only use 4-5 for permanent
-   identity / family / strong commitments.
-
-For each candidate, decide one of four actions:
-
-  A. PROMOTE — new fact worth keeping. Emit into "promote".
-       valid_to: ISO date or null (null = permanent).
-       importance: 1-5 (default 2).
-       source_episode_ids: which candidate episodes contributed.
-
-  B. DEDUP_HIT — candidate fact reaffirms an existing fact. Emit into
-       "dedup_hits" with the existing fact id and action="bump_hits"
-       (just touch the timestamp) or "refine_text" (mild rephrasing
-       worth applying).
-
-  C. EXPIRE — an existing fact is contradicted or has gone stale.
-       Emit into "expire" with existing_fact_id, valid_to=today, reason.
-
-  D. DROP_AS_NOISE — pleasantry, low signal, or duplicates within the
-       week. Emit into "drop_as_noise" with the episode ids and reason.
-
-Every candidate episode_id must appear under exactly one action above
-(in promote.source_episode_ids OR dedup_hits.source_episode_ids OR
-drop_as_noise.episode_ids). The "expire" section can reference NEW
-existing_fact_ids that are independent of this week's candidates —
-that's fine.
-
-Output ONE JSON object with this exact schema:
-
-{{
-  "digest_id": "{digest_id}",
-  "candidate_episode_ids": [<all candidate ids you saw>],
-  "promote": [
-    {{
-      "entity": "禮揚.<namespace>",
-      "fact": "single-sentence statement",
-      "importance": 1..5,
-      "valid_from": "{today}",
-      "valid_to": "YYYY-MM-DD" | null,
-      "source_episode_ids": [int, ...]
-    }}
-  ],
-  "dedup_hits": [
-    {{
-      "existing_fact_id": int,
-      "action": "bump_hits" | "refine_text",
-      "refined_text": "string only if action=refine_text",
-      "source_episode_ids": [int, ...]
-    }}
-  ],
-  "expire": [
-    {{
-      "existing_fact_id": int,
-      "valid_to": "{today}",
-      "reason": "short reason"
-    }}
-  ],
-  "drop_as_noise": [
-    {{
-      "episode_ids": [int, ...],
-      "reason": "short reason"
-    }}
-  ]
-}}
-
-CANDIDATES (week of {week_label}):
-{candidates_block}
-
-NEAREST-NEIGHBOR EXISTING FACTS (one block per candidate stashed_fact):
-{neighbors_block}
-"""
-
-
-# ---------------------------------------------------------------------------
-# Data classes
-# ---------------------------------------------------------------------------
-
-
-@dataclass
-class WeekDigest:
-    """Loaded form of pending_diffs/<digest_id>.json."""
-
-    digest_id: str
-    candidate_episode_ids: List[int]
-    promote: List[Dict[str, Any]] = field(default_factory=list)
-    dedup_hits: List[Dict[str, Any]] = field(default_factory=list)
-    expire: List[Dict[str, Any]] = field(default_factory=list)
-    drop_as_noise: List[Dict[str, Any]] = field(default_factory=list)
-    raw: Dict[str, Any] = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "WeekDigest":
-        return cls(
-            digest_id=data.get("digest_id", ""),
-            candidate_episode_ids=list(data.get("candidate_episode_ids") or []),
-            promote=list(data.get("promote") or []),
-            dedup_hits=list(data.get("dedup_hits") or []),
-            expire=list(data.get("expire") or []),
-            drop_as_noise=list(data.get("drop_as_noise") or []),
-            raw=data,
-        )
-
-    def to_dict(self) -> Dict[str, Any]:
-        return {
-            "digest_id": self.digest_id,
-            "candidate_episode_ids": self.candidate_episode_ids,
-            "promote": self.promote,
-            "dedup_hits": self.dedup_hits,
-            "expire": self.expire,
-            "drop_as_noise": self.drop_as_noise,
-        }
-
-
-# ---------------------------------------------------------------------------
-# Path helpers
-# ---------------------------------------------------------------------------
-
-
-def _resolve_hermes_home() -> Path:
-    try:
-        from hermes_constants import get_hermes_home
-        return Path(get_hermes_home())
-    except Exception:
-        return Path.home() / ".hermes"
-
-
-def pending_dir() -> Path:
-    p = _resolve_hermes_home() / "memories" / "pending_diffs"
-    p.mkdir(parents=True, exist_ok=True)
-    return p
-
-
-def archive_dir() -> Path:
-    p = _resolve_hermes_home() / "memories" / "diff_archive"
-    p.mkdir(parents=True, exist_ok=True)
-    return p
-
-
-def memory_log_path() -> Path:
-    return _resolve_hermes_home() / "logs" / "memory.log"
-
-
-def db_path() -> Path:
-    return _resolve_hermes_home() / "memories" / "memory.db"
-
-
-def digest_id_for(today: Optional[date] = None) -> str:
-    """ISO date based digest id: wk-YYYY-MM-DD."""
-    today = today or date.today()
-    return f"wk-{today.isoformat()}"
-
-
-def rejection_sentinel(digest_id: str) -> Path:
-    return pending_dir() / f"{digest_id}.rejected"
-
-
-def pending_path(digest_id: str) -> Path:
-    return pending_dir() / f"{digest_id}.json"
-
-
-# ---------------------------------------------------------------------------
-# Shared logging
-# ---------------------------------------------------------------------------
-
-
-def _log_event(payload: Dict[str, Any]) -> None:
-    p = memory_log_path()
-    try:
-        p.parent.mkdir(parents=True, exist_ok=True)
-        with p.open("a", encoding="utf-8") as f:
-            f.write(json.dumps(payload, ensure_ascii=False, default=str) + "\n")
-    except OSError as exc:
-        logger.warning("memory.log write failed: %s", exc)
-
-
-# ---------------------------------------------------------------------------
-# Promotion: candidate gathering + neighbor search
-# ---------------------------------------------------------------------------
-
-
-def _read_pending_episodes(conn: sqlite3.Connection, days: int = PROMOTION_LOOKBACK_DAYS) -> List[Dict[str, Any]]:
-    rows = conn.execute(
-        """
-        SELECT id, ts, channel, role, text, metadata, synthetic
-        FROM episodes
-        WHERE promoted_at IS NULL
-          AND ts > datetime('now', ?)
-        ORDER BY ts
-        """,
-        (f"-{days} days",),
-    ).fetchall()
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        meta = {}
-        if r["metadata"]:
-            try:
-                meta = json.loads(r["metadata"])
-            except json.JSONDecodeError:
-                meta = {}
-        out.append({
-            "id": r["id"],
-            "ts": r["ts"],
-            "channel": r["channel"],
-            "role": r["role"],
-            "text": r["text"],
-            "synthetic": bool(r["synthetic"]),
-            "stashed_facts": meta.get("stashed_facts") or [],
-        })
-    return out
-
-
-async def _vec_search(conn: sqlite3.Connection, query: str, k: int = PROMOTION_NEIGHBOR_K) -> List[Dict[str, Any]]:
-    """Find k nearest existing semantic_facts to ``query`` text.
-
-    Returns rows like {id, fact, entity, importance, sim}.
-    """
-    [qvec] = await voyage_embed([query])
-    rows = conn.execute(
-        """
-        WITH knn AS (
-            SELECT fact_id, distance
-            FROM vec_facts
-            WHERE embedding MATCH vec_int8(?) AND k = ?
-        )
-        SELECT sf.id, sf.fact, sf.entity, sf.importance,
-               (1 - knn.distance) AS sim
-        FROM knn
-        JOIN semantic_facts sf ON sf.id = knn.fact_id
-        WHERE sf.state = 'active'
-          AND (sf.valid_to IS NULL OR sf.valid_to > date('now'))
-        ORDER BY sim DESC
-        """,
-        (qvec, k),
-    ).fetchall()
-    return [dict(r) for r in rows]
-
-
-def _format_candidates_block(candidates: List[Dict[str, Any]]) -> str:
-    """Render candidate episodes as a compact block for the prompt."""
-    lines = []
-    for c in candidates:
-        marker = "🤖" if c["synthetic"] else "👤"
-        text = c["text"].replace("\n", " ")
-        if len(text) > 200:
-            text = text[:200] + "..."
-        line = f"#{c['id']} [{c['ts']}] {marker} {c['channel']}/{c['role']}: {text}"
-        lines.append(line)
-        for sf in c["stashed_facts"]:
-            sf_text = sf.get("text", "")
-            sf_entity = sf.get("entity") or "?"
-            sf_vth = sf.get("valid_to_hint") or "permanent"
-            lines.append(
-                f"   ↳ stashed: [{sf_entity}] {sf_text[:120]} "
-                f"(importance={sf.get('importance', 2)}, valid_to_hint={sf_vth})"
-            )
-    return "\n".join(lines) if lines else "(no candidates)"
-
-
-def _format_neighbors_block(neighbors_by_fact: Dict[str, List[Dict[str, Any]]]) -> str:
-    """One section per candidate stashed_fact, listing its k nearest existing facts."""
-    if not neighbors_by_fact:
-        return "(no candidate stashed_facts to compare against)"
-    sections = []
-    for stashed_text, rows in neighbors_by_fact.items():
-        header = f"--- nearest to: {stashed_text[:120]} ---"
-        body_lines = [
-            f"  #{r['id']} sim={r['sim']:.3f} [{r['entity'] or '—'}] {r['fact'][:120]}"
-            for r in rows[:5]  # top 5 per stashed fact keeps prompt short
-        ]
-        sections.append(header + "\n" + "\n".join(body_lines))
-    return "\n\n".join(sections)
-
-
-# ---------------------------------------------------------------------------
-# Kimi thinking call
-# ---------------------------------------------------------------------------
-
-
-class PromotionError(RuntimeError):
-    pass
-
-
-async def _call_kimi_thinking(prompt: str, *, client: Optional[httpx.AsyncClient] = None) -> Dict[str, Any]:
-    """Single Kimi call producing the promotion diff JSON object.
-
-    Tries Kimi-K2-Thinking first; falls back to Kimi-K2.5 on 4xx model-not-found.
-    """
-    api_key = _read_synthetic_api_key()
-    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
-
-    payload = {
-        "model": PROMOTION_MODEL,
-        "messages": [{"role": "user", "content": prompt}],
-        "temperature": PROMOTION_TEMPERATURE,
-        "max_tokens": PROMOTION_MAX_TOKENS,
-        "response_format": {"type": "json_object"},
-    }
-
-    owns = client is None
-    client = client or httpx.AsyncClient()
-    try:
-        try:
-            r = await client.post(SYNTHETIC_URL, headers=headers, json=payload, timeout=PROMOTION_TIMEOUT)
-        except httpx.RequestError as exc:
-            raise PromotionError(f"synthetic.new network: {exc}") from exc
-        if r.status_code == 404 or (r.status_code == 400 and "model" in r.text.lower()):
-            logger.warning("Kimi-Thinking unavailable; falling back to %s", PROMOTION_FALLBACK_MODEL)
-            payload["model"] = PROMOTION_FALLBACK_MODEL
-            r = await client.post(SYNTHETIC_URL, headers=headers, json=payload, timeout=PROMOTION_TIMEOUT)
-        if r.status_code >= 400:
-            raise PromotionError(f"synthetic.new {r.status_code}: {r.text[:300]}")
-        body = r.json()
-    finally:
-        if owns:
-            await client.aclose()
-
-    content = ((body.get("choices") or [{}])[0].get("message") or {}).get("content", "")
-    try:
-        diff = json.loads(content)
-    except json.JSONDecodeError as exc:
-        raise PromotionError(f"Kimi returned non-JSON: {exc}: {content[:200]}") from exc
-    if not isinstance(diff, dict):
-        raise PromotionError(f"Kimi returned non-object: {type(diff).__name__}")
-
-    usage = body.get("usage") or {}
-    _log_event({
-        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
-        "cmd": "weekly_promotion_kimi",
-        "model": payload["model"],
-        "tokens_in": usage.get("prompt_tokens"),
-        "tokens_out": usage.get("completion_tokens"),
-    })
-    return diff
-
-
-# ---------------------------------------------------------------------------
-# Digest rendering (spec §5.4)
-# ---------------------------------------------------------------------------
-
-
-def render_digest_markdown(diff: WeekDigest, candidates: List[Dict[str, Any]]) -> str:
-    n_user = sum(1 for c in candidates if not c["synthetic"])
-    n_synth = sum(1 for c in candidates if c["synthetic"])
-    header = (
-        f"# 📚 Weekly Memory Review — {diff.digest_id.removeprefix('wk-')}\n"
-        f"{len(candidates)} episodes scanned this week "
-        f"({n_user} user/assistant + {n_synth} cron-synthetic).\n"
-        f"24 h to reject via `/memreview reject {diff.digest_id}`; default approve.\n"
-    )
-
-    sections = []
-
-    if diff.promote:
-        lines = [f"## ⬆️ Promote to permanent ({len(diff.promote)})"]
-        for p in diff.promote:
-            entity = p.get("entity", "?")
-            fact = p.get("fact", "")
-            importance = p.get("importance", 2)
-            valid_to = p.get("valid_to") or "永久"
-            srcs = p.get("source_episode_ids") or []
-            src_str = (
-                ", ".join(f"#{i}" for i in srcs[:5])
-                + (f" +{len(srcs)-5}" if len(srcs) > 5 else "")
-            )
-            lines.append(f"- 🆕 **{entity}**: \"{fact}\"")
-            lines.append(f"   evidence: {src_str} | importance {importance} | valid_to: {valid_to}")
-        sections.append("\n".join(lines))
-
-    if diff.dedup_hits:
-        lines = [f"## 🔁 Dedup confirmations ({len(diff.dedup_hits)})"]
-        for d in diff.dedup_hits:
-            srcs = d.get("source_episode_ids") or []
-            action = d.get("action", "bump_hits")
-            lines.append(
-                f"- existing #{d.get('existing_fact_id')} ← {len(srcs)} reaffirmation(s), action={action}"
-            )
-            if action == "refine_text" and d.get("refined_text"):
-                lines.append(f"   refined → \"{d['refined_text']}\"")
-        sections.append("\n".join(lines))
-
-    if diff.expire:
-        lines = [f"## 🪦 Expiring ({len(diff.expire)})"]
-        for e in diff.expire:
-            lines.append(
-                f"- existing #{e.get('existing_fact_id')} → valid_to={e.get('valid_to')} "
-                f"({e.get('reason', '—')})"
-            )
-        sections.append("\n".join(lines))
-
-    if diff.drop_as_noise:
-        lines = [f"## 🗑️ Skipped as noise ({len(diff.drop_as_noise)})"]
-        for n in diff.drop_as_noise:
-            ids = n.get("episode_ids") or []
-            lines.append(f"- {len(ids)} episode(s): {n.get('reason', '—')}")
-        sections.append("\n".join(lines))
-
-    if not sections:
-        sections.append("_No actions this week._")
-
-    return header + "\n" + "\n\n".join(sections)
-
-
-# ---------------------------------------------------------------------------
-# Discord posting
-# ---------------------------------------------------------------------------
-
-
-def discord_post(content: str, channel_id: str, *, bot_token: Optional[str] = None) -> bool:
-    """POST a message to a Discord channel. Returns True on success."""
-    bot_token = bot_token or os.environ.get("DISCORD_BOT_TOKEN")
-    if not bot_token or not channel_id:
-        logger.warning("discord_post missing bot_token or channel_id")
-        return False
-    # Discord rejects messages over 2000 chars; chunk if needed.
-    chunks: List[str] = []
-    remaining = content
-    while remaining:
-        if len(remaining) <= 1990:
-            chunks.append(remaining)
-            break
-        # Split on the last newline before 1990 chars to avoid mid-line breaks.
-        cut = remaining.rfind("\n", 0, 1990)
-        if cut <= 0:
-            cut = 1990
-        chunks.append(remaining[:cut])
-        remaining = remaining[cut:].lstrip("\n")
-
-    headers = {
-        "Authorization": f"Bot {bot_token}",
-        "Content-Type": "application/json",
-    }
-    url = DISCORD_API.format(channel_id=channel_id)
-    ok = True
-    with httpx.Client(timeout=20.0) as c:
-        for chunk in chunks:
-            r = c.post(url, headers=headers, json={"content": chunk})
-            if r.status_code >= 400:
-                logger.warning("discord_post failed: %s %s", r.status_code, r.text[:200])
-                ok = False
-                break
-    return ok
-
-
-def memory_review_channel_id() -> Optional[str]:
-    """Resolve the Discord #memory-review channel id.
-
-    Priority:
-      1. MEMORY_REVIEW_CHANNEL_ID env var (test override)
-      2. ~/.hermes/channel_directory.json -> platforms.discord (list)
-         -> first entry whose name == "memory-review"
-      3. Legacy flat layouts (defensive — older installs)
-    """
-    env = os.environ.get("MEMORY_REVIEW_CHANNEL_ID")
-    if env:
-        return env
-    cdir = _resolve_hermes_home() / "channel_directory.json"
-    if not cdir.exists():
-        return None
-    try:
-        data = json.loads(cdir.read_text(encoding="utf-8"))
-    except json.JSONDecodeError:
-        return None
-
-    # Canonical layout: platforms.discord is a list of channel dicts.
-    plats = (data.get("platforms") or {})
-    discord_chans = plats.get("discord")
-    if isinstance(discord_chans, list):
-        for c in discord_chans:
-            if isinstance(c, dict) and c.get("name") == "memory-review":
-                return c.get("id")
-
-    # Defensive fallbacks for older / hand-edited layouts.
-    if isinstance(data.get("memory-review"), str):
-        return data["memory-review"]
-    chans = data.get("channels") or {}
-    m = chans.get("memory-review") if isinstance(chans, dict) else None
-    if isinstance(m, str):
-        return m
-    if isinstance(m, dict):
-        return m.get("id") or m.get("channel_id")
-    return None
-
-
-# ---------------------------------------------------------------------------
-# Main entry points
-# ---------------------------------------------------------------------------
-
-
-async def weekly_promotion(
-    conn: sqlite3.Connection,
-    *,
-    today: Optional[date] = None,
-    dry_run: bool = False,
-    discord_channel_id: Optional[str] = None,
-    kimi_fn=None,  # injectable for tests
-    embed_fn=None,
-) -> Dict[str, Any]:
-    """Run one weekly promotion cycle. Returns a summary dict."""
-    today = today or date.today()
-    digest_id = digest_id_for(today)
-
-    # /mem off kill switch — skip the entire weekly cycle.
-    try:
-        from plugins.memreview import mem_off_active
-        if mem_off_active():
-            return {
-                "digest_id": digest_id,
-                "candidates": 0,
-                "skipped": "/mem off active",
-            }
-    except Exception:
-        pass
-
-    candidates = _read_pending_episodes(conn)
-    if not candidates:
-        return {"digest_id": digest_id, "candidates": 0, "skipped": "no candidates"}
-
-    # Build neighbor map per stashed_fact across the week.
-    neighbors_by_fact: Dict[str, List[Dict[str, Any]]] = {}
-    for c in candidates:
-        for sf in c["stashed_facts"]:
-            text = (sf or {}).get("text") or ""
-            if not text or text in neighbors_by_fact:
-                continue
-            try:
-                neighbors_by_fact[text] = await _vec_search(conn, text)
-            except Exception as exc:
-                logger.warning("vec_search failed for stashed fact: %s", exc)
-                neighbors_by_fact[text] = []
-
-    prompt = PROMOTION_PROMPT.format(
-        digest_id=digest_id,
-        today=today.isoformat(),
-        week_label=today.isoformat(),
-        candidates_block=_format_candidates_block(candidates),
-        neighbors_block=_format_neighbors_block(neighbors_by_fact),
-    )
-
-    kimi = kimi_fn or _call_kimi_thinking
-    try:
-        diff_dict = await kimi(prompt)
-    except Exception as exc:
-        logger.exception("Kimi promotion call failed")
-        return {"digest_id": digest_id, "candidates": len(candidates), "error": str(exc)}
-
-    # Trust-but-verify: ensure digest_id matches and required keys exist.
-    diff_dict.setdefault("digest_id", digest_id)
-    diff_dict.setdefault("candidate_episode_ids", [c["id"] for c in candidates])
-    for k in ("promote", "dedup_hits", "expire", "drop_as_noise"):
-        diff_dict.setdefault(k, [])
-
-    digest = WeekDigest.from_dict(diff_dict)
-    markdown = render_digest_markdown(digest, candidates)
-
-    summary = {
-        "digest_id": digest_id,
-        "candidates": len(candidates),
-        "promote": len(digest.promote),
-        "dedup_hits": len(digest.dedup_hits),
-        "expire": len(digest.expire),
-        "drop_as_noise": len(digest.drop_as_noise),
-        "dry_run": dry_run,
-    }
-
-    if dry_run:
-        summary["markdown_preview"] = markdown
-        return summary
-
-    # Persist diff before posting so a Discord outage doesn't lose the work.
-    pending_path(digest_id).write_text(
-        json.dumps(digest.to_dict(), ensure_ascii=False, indent=2),
-        encoding="utf-8",
-    )
-
-    posted = False
-    if discord_channel_id:
-        posted = discord_post(markdown, discord_channel_id)
-    summary["discord_posted"] = posted
-
-    _log_event({
-        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
-        "cmd": "weekly_promotion",
-        "digest_id": digest_id,
-        "summary": summary,
-    })
-    return summary
-
-
-def _purge_old_pending(today: date) -> int:
-    """Delete pending diffs older than PENDING_DIFF_TTL_DAYS."""
-    cutoff = today - timedelta(days=PENDING_DIFF_TTL_DAYS)
-    n = 0
-    for f in pending_dir().glob("*.json"):
-        try:
-            stem = f.stem.removeprefix("wk-")
-            d = datetime.strptime(stem, "%Y-%m-%d").date()
-        except ValueError:
-            continue
-        if d < cutoff:
-            try:
-                f.unlink()
-                # Also remove associated rejection sentinel if any.
-                rs = f.with_suffix(".rejected")
-                if rs.exists():
-                    rs.unlink()
-                n += 1
-            except OSError:
-                pass
-    return n
-
-
-def _latest_pending_diff() -> Optional[Path]:
-    files = sorted(pending_dir().glob("wk-*.json"))
-    return files[-1] if files else None
-
-
-def _archive_diff(diff_path: Path, status: str) -> None:
-    target = archive_dir() / f"{diff_path.stem}.{status}.json"
-    diff_path.replace(target)
-
-
-async def _apply_diff_atomic(
-    conn: sqlite3.Connection,
-    digest: WeekDigest,
-    today: date,
-    *,
-    embed_fn=None,
-) -> Dict[str, int]:
-    """Apply promote / dedup / expire in one transaction; stamp promoted_at.
-
-    Embeddings for promoted facts are computed BEFORE the transaction
-    opens, so the writer lock is held only for the duration of the
-    SQL statements themselves (~ms). Holding it across the Voyage HTTP
-    round-trip would block concurrent writes from the hot path.
-
-    Returns counts of each action performed.
-    """
-    counts = {"promoted": 0, "dedup_bumped": 0, "dedup_refined": 0, "expired": 0, "stamped": 0}
-
-    # Pre-embed all promote texts (outside transaction).
-    embed = embed_fn or voyage_embed
-    promote_blobs: List[Optional[bytes]] = []
-    promote_texts = [p.get("fact", "") for p in digest.promote]
-    non_empty = [t for t in promote_texts if t]
-    if non_empty:
-        embeddings = await embed(non_empty)
-        # Map back to original positions (None for empty fact strings).
-        emb_iter = iter(embeddings)
-        promote_blobs = [next(emb_iter) if t else None for t in promote_texts]
-    else:
-        promote_blobs = [None] * len(promote_texts)
-
-    try:
-        conn.execute("BEGIN")
-
-        # 1. promote — INSERT new semantic_facts. Trigger sf_after_insert
-        # mirrors each row into vec_facts automatically.
-        for p, blob in zip(digest.promote, promote_blobs):
-            fact = p.get("fact", "")
-            if not fact or blob is None:
-                continue
-            conn.execute(
-                """
-                INSERT INTO semantic_facts
-                    (entity, fact, embedding, importance, valid_from, valid_to,
-                     source_episode_ids)
-                VALUES (?, ?, ?, ?, ?, ?, ?)
-                """,
-                (
-                    p.get("entity"),
-                    fact,
-                    blob,
-                    int(p.get("importance", 2) or 2),
-                    p.get("valid_from") or today.isoformat(),
-                    p.get("valid_to"),
-                    json.dumps(p.get("source_episode_ids") or []),
-                ),
-            )
-            counts["promoted"] += 1
-
-        # 2. dedup_hits — bump the existing fact's hits + last_seen, optional refine.
-        for d in digest.dedup_hits:
-            fid = d.get("existing_fact_id")
-            if fid is None:
-                continue
-            if d.get("action") == "refine_text" and d.get("refined_text"):
-                conn.execute(
-                    "UPDATE semantic_facts SET fact = ?, last_seen = datetime('now'), "
-                    "hits = hits + 1 WHERE id = ?",
-                    (d["refined_text"], fid),
-                )
-                counts["dedup_refined"] += 1
-            else:
-                conn.execute(
-                    "UPDATE semantic_facts SET last_seen = datetime('now'), "
-                    "hits = hits + 1 WHERE id = ?",
-                    (fid,),
-                )
-                counts["dedup_bumped"] += 1
-
-        # 3. expire — set valid_to (caller chose date).
-        for e in digest.expire:
-            fid = e.get("existing_fact_id")
-            if fid is None:
-                continue
-            conn.execute(
-                "UPDATE semantic_facts SET valid_to = ? WHERE id = ?",
-                (e.get("valid_to") or today.isoformat(), fid),
-            )
-            counts["expired"] += 1
-
-        # 4. stamp promoted_at on every candidate episode.
-        if digest.candidate_episode_ids:
-            placeholders = ",".join("?" * len(digest.candidate_episode_ids))
-            conn.execute(
-                f"UPDATE episodes SET promoted_at = date('now') WHERE id IN ({placeholders})",
-                digest.candidate_episode_ids,
-            )
-            counts["stamped"] = len(digest.candidate_episode_ids)
-
-        conn.commit()
-    except Exception:
-        conn.rollback()
-        raise
-    return counts
-
-
-async def weekly_apply(
-    conn: sqlite3.Connection,
-    *,
-    today: Optional[date] = None,
-    embed_fn=None,
-) -> Dict[str, Any]:
-    """Apply the latest pending diff (or archive-as-rejected). Returns summary."""
-    today = today or date.today()
-
-    purged = _purge_old_pending(today)
-    diff_path = _latest_pending_diff()
-
-    if not diff_path:
-        return {"purged": purged, "applied": False, "reason": "no pending diff"}
-
-    digest_id = diff_path.stem
-    sentinel = rejection_sentinel(digest_id)
-    if sentinel.exists():
-        _archive_diff(diff_path, "rejected")
-        try:
-            sentinel.unlink()
-        except OSError:
-            pass
-        _log_event({
-            "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
-            "cmd": "weekly_apply",
-            "digest_id": digest_id,
-            "result": "rejected",
-        })
-        return {"purged": purged, "applied": False, "digest_id": digest_id, "reason": "rejected"}
-
-    try:
-        diff_dict = json.loads(diff_path.read_text(encoding="utf-8"))
-    except (OSError, json.JSONDecodeError) as exc:
-        return {"purged": purged, "applied": False, "error": f"diff load: {exc}"}
-
-    digest = WeekDigest.from_dict(diff_dict)
-    counts = await _apply_diff_atomic(conn, digest, today, embed_fn=embed_fn)
-    _archive_diff(diff_path, "applied")
-
-    summary = {
-        "purged": purged,
-        "applied": True,
-        "digest_id": digest_id,
-        **counts,
-    }
-    _log_event({
-        "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
-        "cmd": "weekly_apply",
-        **summary,
-    })
-    return summary
diff --git a/plugins/memory/sqlite_vec/read.py b/plugins/memory/sqlite_vec/read.py
deleted file mode 100644
index 05a7e5b66d9..00000000000
--- a/plugins/memory/sqlite_vec/read.py
+++ /dev/null
@@ -1,175 +0,0 @@
-"""Read path for the sqlite_vec memory plugin.
-
-Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §4
-
-Two-step retrieval:
-  1. vec0 prefilter: top k=50 by cosine distance on int8 embeddings
-  2. SQL CTE rerank: score = (1 - distance) * 0.7 + exp(-age_days/90) * 0.3
-     filter active state + valid_to NULL or future, ORDER BY score DESC LIMIT k
-
-`hits` bumping happens fire-and-forget after the reply is sent (caller's
-responsibility to schedule). Errors are swallowed with a warning.
-
-p95 query latency is logged to ~/.hermes/logs/memory.log. The log path is
-overridable via the constructor for testing.
-"""
-
-from __future__ import annotations
-
-import logging
-import sqlite3
-import time
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Iterable, List, Optional
-
-from .embed import voyage_embed
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_K = 8
-PREFILTER_K = 50
-
-
-def _default_log_path() -> Path:
-    """Resolve the memory.log path lazily so HERMES_HOME (e.g. /opt/data
-    inside the container) wins over the worker thread's Path.home()."""
-    try:
-        from hermes_constants import get_hermes_home
-        return Path(get_hermes_home()) / "logs" / "memory.log"
-    except Exception:
-        return Path.home() / ".hermes" / "logs" / "memory.log"
-
-
-DEFAULT_LOG_PATH = _default_log_path()
-
-# Spec §4 — SQL is locked. Do not edit weights without updating the spec
-# and re-running the B1 worked example.
-RETRIEVE_SQL = """
-WITH knn AS (
-    SELECT fact_id, distance
-    FROM vec_facts
-    WHERE embedding MATCH vec_int8(?) AND k = {prefilter_k}
-)
-SELECT sf.id, sf.fact, sf.entity, sf.created_at, sf.importance,
-       (1 - knn.distance)                                              AS sim,
-       (julianday('now') - julianday(sf.created_at))                   AS age_days,
-       (1 - knn.distance) * 0.7
-         + exp(-(julianday('now') - julianday(sf.created_at)) / 90.0) * 0.3 AS score
-FROM knn
-JOIN semantic_facts sf ON sf.id = knn.fact_id
-WHERE sf.state = 'active'
-  AND (sf.valid_to IS NULL OR sf.valid_to > date('now'))
-ORDER BY score DESC
-LIMIT ?;
-"""
-
-
-@dataclass
-class Fact:
-    """A retrieved fact with score breakdown for prompt-injection or /memdebug."""
-
-    id: int
-    fact: str
-    entity: Optional[str]
-    created_at: str
-    importance: int
-    sim: float
-    age_days: float
-    score: float
-
-
-def _append_log(log_path: Path, payload: dict) -> None:
-    """Append one JSON line to memory.log; never raise into the read path."""
-    import json
-    try:
-        log_path.parent.mkdir(parents=True, exist_ok=True)
-        with log_path.open("a", encoding="utf-8") as f:
-            f.write(json.dumps(payload, ensure_ascii=False) + "\n")
-    except OSError as exc:
-        logger.warning("memory.log write failed: %s", exc)
-
-
-async def read_memory(
-    query: str,
-    conn: sqlite3.Connection,
-    *,
-    k: int = DEFAULT_K,
-    log_path: Path = DEFAULT_LOG_PATH,
-) -> List[Fact]:
-    """Embed `query`, retrieve top-`k` facts, log latency, return Fact list."""
-    [qvec] = await voyage_embed([query])
-
-    sql = RETRIEVE_SQL.format(prefilter_k=PREFILTER_K)
-    t0 = time.perf_counter()
-    rows = conn.execute(sql, (qvec, k)).fetchall()
-    elapsed_ms = (time.perf_counter() - t0) * 1000.0
-
-    facts = [
-        Fact(
-            id=row["id"],
-            fact=row["fact"],
-            entity=row["entity"],
-            created_at=row["created_at"],
-            importance=row["importance"],
-            sim=float(row["sim"]),
-            age_days=float(row["age_days"]),
-            score=float(row["score"]),
-        )
-        for row in rows
-    ]
-
-    _append_log(
-        log_path,
-        {
-            "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
-            "q": query,
-            "k": k,
-            "n": len(facts),
-            "sql_ms": round(elapsed_ms, 2),
-        },
-    )
-    return facts
-
-
-async def bump_hits(fact_ids: Iterable[int], conn: sqlite3.Connection) -> None:
-    """Fire-and-forget UPDATE; swallow errors with a warning log.
-
-    Caller must wrap with ``asyncio.create_task()`` to avoid blocking the
-    reply. Per spec §4 hits-bump runs AFTER discord_send, so we keep this
-    cheap (single UPDATE … IN (…)) and never propagate errors.
-    """
-    ids = list(fact_ids)
-    if not ids:
-        return
-    placeholders = ",".join("?" * len(ids))
-    try:
-        conn.execute(
-            f"UPDATE semantic_facts SET hits = hits + 1, "
-            f"last_seen = datetime('now') WHERE id IN ({placeholders})",
-            ids,
-        )
-        conn.commit()
-    except sqlite3.Error as exc:
-        logger.warning("bump_hits swallowed error for %d ids: %s", len(ids), exc)
-
-
-def format_facts_for_prompt(facts: List[Fact], *, with_meta: bool = False) -> str:
-    """Render top-k facts as a markdown bullet list for system-prompt injection.
-
-    Used by SqliteVecMemoryProvider.prefetch() (with_meta=True per W2-3
-    spec) and /memdebug (with_meta=False for compact display).
-
-    No header — the caller owns the section title.
-    """
-    if not facts:
-        return ""
-    lines = []
-    for f in facts:
-        prefix = f"[{f.entity}] " if f.entity else ""
-        suffix = (
-            f" (importance: {f.importance}, age: {int(f.age_days)} days)"
-            if with_meta else ""
-        )
-        lines.append(f"- {prefix}{f.fact}{suffix}")
-    return "\n".join(lines)
diff --git a/plugins/memory/sqlite_vec/schema.sql b/plugins/memory/sqlite_vec/schema.sql
deleted file mode 100644
index 5910309543b..00000000000
--- a/plugins/memory/sqlite_vec/schema.sql
+++ /dev/null
@@ -1,41 +0,0 @@
--- Hermes V3 memory schema — episodes (hot raw) + semantic_facts (cold curated)
--- Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §3
-
-PRAGMA journal_mode = WAL;
-PRAGMA synchronous = NORMAL;
-
--- Hot tier: raw turn-by-turn record. All Discord turns + cron synthetic land here.
-CREATE TABLE IF NOT EXISTS episodes (
-  id            INTEGER PRIMARY KEY,
-  ts            TEXT NOT NULL,
-  channel       TEXT NOT NULL,
-  external_id   TEXT NOT NULL,
-  role          TEXT NOT NULL CHECK (role IN ('user', 'assistant')),
-  text          TEXT NOT NULL,
-  synthetic     INTEGER NOT NULL DEFAULT 0,
-  embedding     BLOB,
-  metadata      TEXT,
-  promoted_at   TEXT,
-  UNIQUE(channel, external_id)
-);
-CREATE INDEX IF NOT EXISTS idx_episodes_ts ON episodes(ts);
-CREATE INDEX IF NOT EXISTS idx_episodes_promoted_pending
-  ON episodes(promoted_at, ts) WHERE promoted_at IS NULL;
-
--- Cold tier: curated facts. Cattia's actual working memory queries this.
-CREATE TABLE IF NOT EXISTS semantic_facts (
-  id                  INTEGER PRIMARY KEY,
-  entity              TEXT,
-  fact                TEXT NOT NULL,
-  embedding           BLOB NOT NULL,
-  source_episode_ids  TEXT,
-  importance          INTEGER DEFAULT 2,
-  hits                INTEGER DEFAULT 0,
-  created_at          TEXT NOT NULL DEFAULT (datetime('now')),
-  last_seen           TEXT,
-  state               TEXT DEFAULT 'active' CHECK (state IN ('active', 'archived')),
-  valid_from          TEXT NOT NULL DEFAULT (date('now')),
-  valid_to            TEXT
-);
-CREATE INDEX IF NOT EXISTS idx_facts_entity ON semantic_facts(entity);
-CREATE INDEX IF NOT EXISTS idx_facts_active ON semantic_facts(state, valid_to);
diff --git a/plugins/memory/sqlite_vec/store.py b/plugins/memory/sqlite_vec/store.py
deleted file mode 100644
index 97ec4c3e061..00000000000
--- a/plugins/memory/sqlite_vec/store.py
+++ /dev/null
@@ -1,82 +0,0 @@
-"""sqlite-vec backed memory store: schema bootstrap + connection helper.
-
-W1 scope: schema only. Read/write paths come in W2/W3.
-"""
-
-from __future__ import annotations
-
-import logging
-import sqlite3
-from pathlib import Path
-from typing import Optional
-
-import sqlite_vec
-
-logger = logging.getLogger(__name__)
-
-VEC_DIM = 512  # voyage-3.5-lite output dimension we store
-
-_SCHEMA_PATH = Path(__file__).parent / "schema.sql"
-
-_VEC_VIRTUAL_TABLE_SQL = f"""
-CREATE VIRTUAL TABLE IF NOT EXISTS vec_facts USING vec0(
-  fact_id INTEGER PRIMARY KEY,
-  embedding int8[{VEC_DIM}] distance_metric=cosine
-);
-"""
-
-# Triggers keep vec_facts in sync with semantic_facts. embedding is stored as
-# raw int8 BLOB (512 bytes) on the relational side; vec0 needs vec_int8()
-# wrapper to interpret it (without it, vec0 assumes float32).
-_TRIGGERS_SQL = """
-CREATE TRIGGER IF NOT EXISTS sf_after_insert
-AFTER INSERT ON semantic_facts
-BEGIN
-  INSERT INTO vec_facts(fact_id, embedding) VALUES (NEW.id, vec_int8(NEW.embedding));
-END;
-
-CREATE TRIGGER IF NOT EXISTS sf_after_update_embedding
-AFTER UPDATE OF embedding ON semantic_facts
-BEGIN
-  -- vec0 int8 columns reject UPDATE even via vec_int8(); use DELETE+INSERT.
-  DELETE FROM vec_facts WHERE fact_id = NEW.id;
-  INSERT INTO vec_facts(fact_id, embedding) VALUES (NEW.id, vec_int8(NEW.embedding));
-END;
-
-CREATE TRIGGER IF NOT EXISTS sf_after_delete
-AFTER DELETE ON semantic_facts
-BEGIN
-  DELETE FROM vec_facts WHERE fact_id = OLD.id;
-END;
-"""
-
-
-def open_db(db_path: Path, *, check_same_thread: bool = True) -> sqlite3.Connection:
-    """Open a sqlite connection with sqlite-vec extension loaded.
-
-    Pass ``check_same_thread=False`` when the connection will be reused
-    across threads (e.g. the provider's prefetch worker pool). Caller is
-    then responsible for serializing access via a lock.
-    """
-    db_path.parent.mkdir(parents=True, exist_ok=True)
-    conn = sqlite3.connect(str(db_path), check_same_thread=check_same_thread)
-    conn.enable_load_extension(True)
-    sqlite_vec.load(conn)
-    conn.enable_load_extension(False)
-    conn.row_factory = sqlite3.Row
-    return conn
-
-
-def bootstrap_schema(conn: sqlite3.Connection) -> None:
-    """Idempotently create tables, indexes, vec0 virtual table, and triggers."""
-    conn.executescript(_SCHEMA_PATH.read_text())
-    conn.executescript(_VEC_VIRTUAL_TABLE_SQL)
-    conn.executescript(_TRIGGERS_SQL)
-    conn.commit()
-
-
-def init_db(db_path: Path, *, check_same_thread: bool = True) -> sqlite3.Connection:
-    """Open + bootstrap. Returns a ready-to-use connection."""
-    conn = open_db(db_path, check_same_thread=check_same_thread)
-    bootstrap_schema(conn)
-    return conn
diff --git a/plugins/memory/sqlite_vec/write.py b/plugins/memory/sqlite_vec/write.py
deleted file mode 100644
index 227f2b35e1a..00000000000
--- a/plugins/memory/sqlite_vec/write.py
+++ /dev/null
@@ -1,251 +0,0 @@
-"""Per-turn write-back into the sqlite_vec memory store (W3-2).
-
-Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §5.1.
-
-Hot-path flow per Discord turn:
-
-  1. PHI gate — if ``channel`` is in PHI_BLACKLIST_CHANNELS, raw episode
-     rows still land but extraction is skipped (no PHI to the cloud LLM).
-  2. Extract — kimi_extract() returns 0..N ExtractedFacts.
-  3. Embed — voyage_embed([user_msg, reply, *fact_texts]) in one batch.
-  4. INSERT 2 episode rows (user, assistant) with
-     ``ON CONFLICT(channel, external_id) DO NOTHING`` for idempotency
-     under Discord redelivery / cron retries / container restarts.
-  5. Fast-track facts whose ``valid_to_hint`` parses to ≤ today + 30d
-     directly into ``semantic_facts`` (the trigger mirrors them into
-     ``vec_facts``). Longer-lived / undated facts are JSON-stashed in
-     ``episodes.metadata.stashed_facts`` for W3-3 weekly_promotion.
-  6. Any exception → append a JSONL line to
-     ``~/.hermes/logs/memory_write_failures.jsonl`` and swallow.
-     The reply was already sent before this fired; we never propagate.
-
-The function is fire-and-forget: the caller schedules it via
-``asyncio.create_task`` (or in our case, a worker thread the provider
-spawns) AFTER ``discord_send`` so write latency cannot stall the user.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import sqlite3
-import time
-from datetime import date, datetime, timedelta
-from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional
-
-from .extract import (
-    PHI_BLACKLIST_CHANNELS,
-    ExtractedFact,
-    kimi_extract,
-)
-
-logger = logging.getLogger(__name__)
-
-# Spec §5.3 — fast-track threshold (raised from 7d to 30d): facts that
-# expire within ~1 month land directly in semantic_facts so they're
-# usable on the next turn instead of waiting up to 7 days for the
-# weekly review.
-FAST_TRACK_DAYS = 30
-
-
-def _resolve_hermes_home() -> Path:
-    try:
-        from hermes_constants import get_hermes_home
-        return Path(get_hermes_home())
-    except Exception:
-        return Path.home() / ".hermes"
-
-
-def _failure_log_path() -> Path:
-    return _resolve_hermes_home() / "logs" / "memory_write_failures.jsonl"
-
-
-def _append_failure(payload: Dict[str, Any], log_path: Optional[Path] = None) -> None:
-    log_path = log_path or _failure_log_path()
-    try:
-        log_path.parent.mkdir(parents=True, exist_ok=True)
-        with log_path.open("a", encoding="utf-8") as f:
-            f.write(json.dumps(payload, ensure_ascii=False, default=str) + "\n")
-    except OSError as exc:
-        logger.warning("memory_write_failures.jsonl write failed: %s", exc)
-
-
-def _parse_valid_to_hint(hint: Optional[str]) -> Optional[date]:
-    """Parse 'YYYY-MM-DD' tolerantly. Return None on bad / missing input."""
-    if not hint:
-        return None
-    try:
-        return datetime.strptime(hint.strip(), "%Y-%m-%d").date()
-    except (ValueError, TypeError):
-        return None
-
-
-def _fact_should_fast_track(fact: ExtractedFact, today: date) -> bool:
-    """True iff fact has a valid_to_hint within FAST_TRACK_DAYS of today."""
-    expiry = _parse_valid_to_hint(fact.valid_to_hint)
-    if not expiry:
-        return False
-    return expiry <= today + timedelta(days=FAST_TRACK_DAYS)
-
-
-# ---------------------------------------------------------------------------
-# Main entry point
-# ---------------------------------------------------------------------------
-
-
-async def write_episode(
-    user_msg: str,
-    reply: str,
-    channel: str,
-    msg_id: str,
-    ts: str,
-    conn: sqlite3.Connection,
-    *,
-    embed_fn: Optional[Callable] = None,
-    extract_fn: Optional[Callable] = None,
-    failure_log_path: Optional[Path] = None,
-) -> Dict[str, Any]:
-    """Persist one Discord turn to the memory store.
-
-    Returns a summary dict for caller logging:
-      {episodes: 0|1|2, fast_tracked: N, stashed: N, skipped_extract: bool}
-
-    Never raises. Errors land in ``memory_write_failures.jsonl``.
-    """
-    summary: Dict[str, Any] = {
-        "episodes": 0,
-        "fast_tracked": 0,
-        "stashed": 0,
-        "skipped_extract": False,
-    }
-    skip_extract = channel in PHI_BLACKLIST_CHANNELS
-    summary["skipped_extract"] = skip_extract
-
-    try:
-        # ---- 1. extract (skip on PHI channel)
-        if skip_extract or not (extract_fn or kimi_extract):
-            facts: List[ExtractedFact] = []
-        else:
-            extractor = extract_fn or kimi_extract
-            try:
-                facts = await extractor(user_msg, reply, channel, ts)
-            except Exception as exc:
-                # Extract failure is non-fatal — we still record the
-                # raw episode so weekly_promotion can re-extract later.
-                logger.warning("kimi_extract failed; continuing without facts: %s", exc)
-                facts = []
-
-        # ---- 2. embed (raw turn + each fact text in one call)
-        embed = embed_fn
-        if embed is None:
-            from .embed import voyage_embed
-            embed = voyage_embed
-
-        texts_to_embed = [user_msg, reply] + [f.text for f in facts]
-        # Filter empty strings — Voyage rejects them.
-        non_empty = [(i, t) for i, t in enumerate(texts_to_embed) if t and t.strip()]
-        if non_empty:
-            indices, texts = zip(*non_empty)
-            blobs_dense = await embed(list(texts))
-            # Re-densify back to original positions; missing slots get None.
-            blobs: List[Optional[bytes]] = [None] * len(texts_to_embed)
-            for slot, blob in zip(indices, blobs_dense):
-                blobs[slot] = blob
-        else:
-            blobs = [None] * len(texts_to_embed)
-
-        user_blob, reply_blob = blobs[0], blobs[1]
-        fact_blobs = blobs[2:]
-
-        # ---- 3. partition facts into fast-track vs stash BEFORE INSERT
-        today = date.today()
-        fast_track: List[tuple] = []  # [(fact, blob), ...]
-        stashed: List[Dict[str, Any]] = []  # JSON-serialisable dicts
-        for f, blob in zip(facts, fact_blobs):
-            if _fact_should_fast_track(f, today):
-                if blob is not None:
-                    fast_track.append((f, blob))
-                else:
-                    # No embedding for this fact → can't insert into
-                    # semantic_facts (embedding is NOT NULL).  Demote to stash.
-                    stashed.append(f.raw or _fact_to_dict(f))
-            else:
-                stashed.append(f.raw or _fact_to_dict(f))
-
-        metadata = {"stashed_facts": stashed} if stashed else {}
-        metadata_json = json.dumps(metadata, ensure_ascii=False) if metadata else None
-
-        # ---- 4. INSERT episodes (atomic with fast-track inserts)
-        try:
-            conn.execute("BEGIN")
-            ep_rows = [
-                (ts, channel, msg_id + ":user", "user", user_msg, 0, user_blob, metadata_json),
-                (ts, channel, msg_id + ":asst", "assistant", reply, 0, reply_blob, metadata_json),
-            ]
-            for row in ep_rows:
-                cur = conn.execute(
-                    """
-                    INSERT INTO episodes
-                        (ts, channel, external_id, role, text, synthetic, embedding, metadata)
-                    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
-                    ON CONFLICT(channel, external_id) DO NOTHING
-                    """,
-                    row,
-                )
-                if cur.rowcount:
-                    summary["episodes"] += 1
-
-            # ---- 5. fast-track facts → semantic_facts (trigger mirrors to vec_facts)
-            for f, blob in fast_track:
-                conn.execute(
-                    """
-                    INSERT INTO semantic_facts
-                        (entity, fact, embedding, importance, valid_from, valid_to)
-                    VALUES (?, ?, ?, ?, ?, ?)
-                    """,
-                    (
-                        f.entity,
-                        f.text,
-                        blob,
-                        f.importance,
-                        today.isoformat(),
-                        f.valid_to_hint,
-                    ),
-                )
-                summary["fast_tracked"] += 1
-
-            summary["stashed"] = len(stashed)
-            conn.commit()
-        except Exception:
-            conn.rollback()
-            raise
-
-        return summary
-
-    except Exception as exc:
-        logger.warning("write_episode failed for msg_id=%s: %s", msg_id, exc)
-        _append_failure(
-            {
-                "ts": time.strftime("%Y-%m-%dT%H:%M:%S"),
-                "channel": channel,
-                "msg_id": msg_id,
-                "user": user_msg,
-                "reply": reply,
-                "error": str(exc),
-                "summary_so_far": summary,
-            },
-            log_path=failure_log_path,
-        )
-        return summary
-
-
-def _fact_to_dict(f: ExtractedFact) -> Dict[str, Any]:
-    """Serialise an ExtractedFact for stashing in episodes.metadata."""
-    return {
-        "type": f.type,
-        "text": f.text,
-        "entity": f.entity,
-        "importance": f.importance,
-        "valid_to_hint": f.valid_to_hint,
-    }
diff --git a/plugins/memreview/__init__.py b/plugins/memreview/__init__.py
deleted file mode 100644
index d8794fe74db..00000000000
--- a/plugins/memreview/__init__.py
+++ /dev/null
@@ -1,227 +0,0 @@
-"""``/memreview`` and ``/mem`` slash commands — admin / kill-switch (W3-4).
-
-Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §7.1.
-
-Two commands:
-
-  /memreview reject <digest_id>   - per-digest opt-out. Writes a sentinel
-                                    file ``pending_diffs/<digest_id>.rejected``
-                                    that ``weekly_apply`` reads on Monday
-                                    morning and archives the diff without
-                                    applying.
-
-  /mem off                        - global kill switch. Writes ``MEM_OFF``
-                                    in HERMES_HOME. Both ``write_episode``
-                                    (hot path) and ``weekly_promotion``
-                                    (cold path) check for this file at the
-                                    top of each call and short-circuit to
-                                    a no-op + warning log.
-
-  /mem on                         - reverses the kill switch by deleting
-                                    ``MEM_OFF`` (companion to /mem off).
-
-  /mem status                     - prints whether the kill switch is set
-                                    and lists pending diffs awaiting apply.
-
-Why slash commands and not Discord reactions: spec §7.1 explicitly chose
-slash because reactions don't reliably trigger webhook events across all
-bot adapters (silent kill-switch failure mode that's worse than no
-switch).
-"""
-
-from __future__ import annotations
-
-import asyncio
-import logging
-import os
-import re
-from pathlib import Path
-from typing import List, Optional
-
-logger = logging.getLogger(__name__)
-
-
-def _resolve_hermes_home() -> Path:
-    try:
-        from hermes_constants import get_hermes_home
-        return Path(get_hermes_home())
-    except Exception:
-        return Path.home() / ".hermes"
-
-
-def _pending_dir() -> Path:
-    p = _resolve_hermes_home() / "memories" / "pending_diffs"
-    p.mkdir(parents=True, exist_ok=True)
-    return p
-
-
-def _archive_dir() -> Path:
-    return _resolve_hermes_home() / "memories" / "diff_archive"
-
-
-def mem_off_path() -> Path:
-    """The global kill-switch sentinel."""
-    return _resolve_hermes_home() / "MEM_OFF"
-
-
-def mem_off_active() -> bool:
-    """Public predicate consumed by promotion.py + provider.sync_turn."""
-    return mem_off_path().exists()
-
-
-# ---------------------------------------------------------------------------
-# /memreview <subcommand>
-# ---------------------------------------------------------------------------
-
-
-_MEMREVIEW_HELP = (
-    "**/memreview** — review or reject the weekly memory promotion digest.\n"
-    "Usage:\n"
-    "  `/memreview reject <digest_id>` — write the rejection sentinel; "
-    "Monday's apply will archive the diff without applying it.\n"
-    "  `/memreview pending` — list digests currently awaiting apply.\n"
-    "  `/memreview status` — same as `pending`."
-)
-
-
-_DIGEST_ID_RE = re.compile(r"^wk-\d{4}-\d{2}-\d{2}$")
-
-
-def _list_pending_diffs() -> List[str]:
-    out = []
-    for f in sorted(_pending_dir().glob("wk-*.json")):
-        rejected = f.with_suffix(".rejected").exists()
-        flag = " (rejected — will be archived Mon)" if rejected else ""
-        out.append(f"- `{f.stem}`{flag}")
-    return out
-
-
-def _handle_memreview(raw_args: str) -> str:
-    args = (raw_args or "").strip()
-    if not args:
-        return _MEMREVIEW_HELP
-
-    parts = args.split(maxsplit=1)
-    sub = parts[0].lower()
-
-    if sub in ("pending", "status", "list"):
-        items = _list_pending_diffs()
-        if not items:
-            return "**/memreview** — no pending diffs."
-        return "**/memreview** — pending diffs:\n" + "\n".join(items)
-
-    if sub == "reject":
-        rest = parts[1].strip() if len(parts) > 1 else ""
-        if not _DIGEST_ID_RE.match(rest):
-            return (
-                f"**/memreview reject** — digest_id must look like "
-                f"`wk-YYYY-MM-DD`. Got: `{rest!r}`"
-            )
-        diff_path = _pending_dir() / f"{rest}.json"
-        if not diff_path.exists():
-            return (
-                f"**/memreview reject** — no pending diff named `{rest}`. "
-                f"Use `/memreview pending` to list available digest_ids."
-            )
-        sentinel = _pending_dir() / f"{rest}.rejected"
-        try:
-            sentinel.write_text(
-                f"rejected via /memreview at {asyncio.get_event_loop().time()}",
-                encoding="utf-8",
-            )
-        except (OSError, RuntimeError):
-            # No running loop in some sync entry paths — write a static marker.
-            try:
-                sentinel.write_text("rejected", encoding="utf-8")
-            except OSError as exc:
-                return f"**/memreview reject** error: cannot write sentinel: `{exc}`"
-        return (
-            f"**Rejected.** Pending diff `{rest}` will be archived without "
-            f"applying. Episodes stay pending for next Sunday's review."
-        )
-
-    return _MEMREVIEW_HELP
-
-
-# ---------------------------------------------------------------------------
-# /mem <subcommand>
-# ---------------------------------------------------------------------------
-
-
-_MEM_HELP = (
-    "**/mem** — global memory write-back kill switch.\n"
-    "Usage:\n"
-    "  `/mem off`    — disable per-turn write-back AND weekly promotion.\n"
-    "  `/mem on`     — re-enable.\n"
-    "  `/mem status` — show whether the kill switch is currently set."
-)
-
-
-def _handle_mem(raw_args: str) -> str:
-    args = (raw_args or "").strip().lower()
-    if not args:
-        return _MEM_HELP
-
-    sub = args.split()[0]
-
-    if sub == "off":
-        try:
-            mem_off_path().write_text(
-                "set via /mem off\n", encoding="utf-8"
-            )
-        except OSError as exc:
-            return f"**/mem off** error: `{exc}`"
-        return (
-            "**🔇 Memory write-back disabled.**\n"
-            "Per-turn `write_episode` and weekly promotion will short-circuit "
-            "until you run `/mem on`. Read path is unaffected — Cattia still "
-            "retrieves from existing facts."
-        )
-
-    if sub == "on":
-        p = mem_off_path()
-        if not p.exists():
-            return "**/mem on** — write-back was already enabled."
-        try:
-            p.unlink()
-        except OSError as exc:
-            return f"**/mem on** error: `{exc}`"
-        return "**🔊 Memory write-back enabled.** Hot + cold paths resume."
-
-    if sub == "status":
-        active = mem_off_active()
-        pending = _list_pending_diffs()
-        lines = [
-            "**/mem status**",
-            f"  write-back: {'🔇 OFF' if active else '🔊 ON'}",
-            f"  MEM_OFF sentinel: `{mem_off_path()}`"
-            f" {'(present)' if active else '(absent)'}",
-        ]
-        if pending:
-            lines.append("  pending diffs:")
-            lines.extend("    " + p for p in pending)
-        else:
-            lines.append("  pending diffs: (none)")
-        return "\n".join(lines)
-
-    return _MEM_HELP
-
-
-# ---------------------------------------------------------------------------
-# Plugin registration
-# ---------------------------------------------------------------------------
-
-
-def register(ctx) -> None:
-    ctx.register_command(
-        "memreview",
-        handler=_handle_memreview,
-        description="Review or reject the weekly Hermes memory promotion digest.",
-        args_hint="reject <digest_id> | pending | status",
-    )
-    ctx.register_command(
-        "mem",
-        handler=_handle_mem,
-        description="Hermes memory kill switch (off / on / status).",
-        args_hint="off | on | status",
-    )
diff --git a/plugins/memreview/plugin.yaml b/plugins/memreview/plugin.yaml
deleted file mode 100644
index 66252043f4b..00000000000
--- a/plugins/memreview/plugin.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-name: memreview
-version: 0.1.0
-description: "/memreview reject + /mem kill switch — admin slash commands for the Hermes V3 memory system (W3-4)."
-author: "Li-yang Chen"
diff --git a/scripts/cron/README.md b/scripts/cron/README.md
deleted file mode 100644
index af5227bdc80..00000000000
--- a/scripts/cron/README.md
+++ /dev/null
@@ -1,19 +0,0 @@
-# Hermes V3 cron scripts
-
-These scripts are invoked by hermes-agent's cron scheduler. The scheduler
-hardcodes `HERMES_HOME/scripts/` as the only path it will exec from
-(security: prevents arbitrary script execution via path traversal), so
-runtime copies must live at `~/.hermes/scripts/<name>.py` on each host.
-
-The canonical source lives here in version control. Deploy via:
-
-    cp scripts/cron/weekly_promotion.py ~/.hermes/scripts/
-    cp scripts/cron/weekly_apply.py ~/.hermes/scripts/
-
-Cron entries are added by adding rows to `~/.hermes/cron/jobs.json`
-(see the `Hermes Weekly Memory Promotion` / `Hermes Weekly Memory Apply`
-entries; expressions are in UTC — `0 19 * * 6` = Sun 03:00 UTC+8).
-
-Both scripts emit `{"wakeAgent": false}` as the last stdout line so the
-cron framework skips the agent run — delivery happens inside the script
-via Discord HTTP POST.
diff --git a/scripts/cron/weekly_apply.py b/scripts/cron/weekly_apply.py
deleted file mode 100755
index 14d1a18550e..00000000000
--- a/scripts/cron/weekly_apply.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env python3
-"""Cron entry point: Mon 03:00 UTC+8 weekly memory apply.
-
-Loads the latest pending diff (purges any older than 14 days first),
-checks for a rejection sentinel file (written by /memreview reject),
-and either archives the diff as rejected or applies its
-promote / dedup / expire actions atomically and stamps
-``episodes.promoted_at`` on the candidate rows.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import sys
-
-sys.path.insert(0, "/opt/hermes")
-
-try:
-    from hermes_cli.env_loader import load_hermes_dotenv
-    load_hermes_dotenv(hermes_home="/opt/data", project_env=None)
-except Exception:
-    pass
-
-from plugins.memory.sqlite_vec.promotion import (  # noqa: E402
-    db_path,
-    weekly_apply,
-)
-from plugins.memory.sqlite_vec.store import open_db  # noqa: E402
-
-
-def main() -> int:
-    conn = open_db(db_path(), check_same_thread=False)
-    summary = asyncio.run(weekly_apply(conn))
-    print(json.dumps(summary, ensure_ascii=False, default=str))
-    print('{"wakeAgent": false}')
-    conn.close()
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/scripts/cron/weekly_promotion.py b/scripts/cron/weekly_promotion.py
deleted file mode 100755
index 55d86d1aa00..00000000000
--- a/scripts/cron/weekly_promotion.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/env python3
-"""Cron entry point: Sun 03:00 UTC+8 weekly memory promotion.
-
-Reads the last 7 days of pending episodes, runs one Kimi-thinking call to
-produce a promotion diff, persists the diff as
-~/.hermes/memories/pending_diffs/wk-YYYY-MM-DD.json, renders the digest
-markdown, and posts it to #memory-review for user review.
-
-Stdout ends with ``{"wakeAgent": false}`` so the cron framework skips
-the agent run after we've handled delivery ourselves.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import sys
-from pathlib import Path
-
-# The hermes container exposes the source tree at /opt/hermes but does not
-# add it to sys.path; cron exec'd scripts inherit nothing. Insert it
-# manually so plugin imports resolve.
-sys.path.insert(0, "/opt/hermes")
-
-# Load the user's .env so VOYAGE_API_KEY / DISCORD_BOT_TOKEN reach the
-# plugin code; mirrors what run_agent.py does at module import.
-try:
-    from hermes_cli.env_loader import load_hermes_dotenv
-    load_hermes_dotenv(hermes_home="/opt/data", project_env=None)
-except Exception:
-    pass
-
-from plugins.memory.sqlite_vec.promotion import (  # noqa: E402
-    db_path,
-    memory_review_channel_id,
-    weekly_promotion,
-)
-from plugins.memory.sqlite_vec.store import open_db  # noqa: E402
-
-
-def main() -> int:
-    conn = open_db(db_path(), check_same_thread=False)
-    channel_id = memory_review_channel_id()
-    summary = asyncio.run(
-        weekly_promotion(conn, discord_channel_id=channel_id)
-    )
-    # Print human-readable summary to stdout for cron logs.
-    print(json.dumps(summary, ensure_ascii=False, default=str))
-    # Wake-gate: skip the agent run.
-    print('{"wakeAgent": false}')
-    conn.close()
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/scripts/cutover/cutover.sh b/scripts/cutover/cutover.sh
deleted file mode 100755
index 268e7848e85..00000000000
--- a/scripts/cutover/cutover.sh
+++ /dev/null
@@ -1,168 +0,0 @@
-#!/usr/bin/env bash
-# W4-1 cutover script — run on chococlaw when you've decided to retire
-# MEMORY.md flat-file injection and commit fully to the sqlite_vec
-# memory plugin.
-#
-# Spec target date: 2026-05-24, *after* observing at least one successful
-# weekly review cycle on the new system.
-#
-# Idempotent — safe to re-run if interrupted partway.
-#
-# Usage:
-#   ./scripts/cutover/cutover.sh             # dry run, prints planned actions
-#   ./scripts/cutover/cutover.sh --commit    # actually do the work
-
-set -euo pipefail
-
-DRY_RUN=true
-if [[ "${1:-}" == "--commit" ]]; then
-  DRY_RUN=false
-fi
-
-today() { date -u +%Y-%m-%d; }
-say() { echo "[cutover] $*"; }
-do_or_say() {
-  if $DRY_RUN; then
-    say "(dry-run) $*"
-  else
-    say "$*"
-    eval "$@"
-  fi
-}
-
-HOME_DIR="${HERMES_HOME:-$HOME/.hermes}"
-say "HERMES_HOME = ${HOME_DIR}"
-
-# ---- 1. Pre-flight checks --------------------------------------------------
-
-say "1. Pre-flight checks"
-[[ -d "${HOME_DIR}/memories" ]] || { say "ERR no ${HOME_DIR}/memories"; exit 1; }
-[[ -f "${HOME_DIR}/memories/memory.db" ]] || { say "ERR no memory.db — W1 hasn't shipped"; exit 1; }
-say "  ✓ memory.db present"
-
-if ! command -v docker >/dev/null; then
-  say "WARN docker not on PATH — DB queries below will be skipped"
-fi
-
-# Confirm the new system has been writing recently (last 7 days).
-if command -v docker >/dev/null; then
-  ep_recent=$(docker exec hermes /opt/hermes/.venv/bin/python3 -c "
-import sqlite3
-conn = sqlite3.connect('/opt/data/memories/memory.db')
-n = conn.execute(\"SELECT count(*) FROM episodes WHERE ts > datetime('now','-7 days')\").fetchone()[0]
-print(n)
-" 2>/dev/null || echo 0)
-  if [[ "${ep_recent}" -lt 5 ]]; then
-    say "WARN only ${ep_recent} episodes in the last 7 days. Either the gateway"
-    say "     hasn't been used much OR the write path isn't actually firing."
-    say "     Fix that BEFORE cutover, or the new system has nothing to retrieve."
-  else
-    say "  ✓ ${ep_recent} episodes recorded in the last 7 days"
-  fi
-fi
-
-# ---- 2. Archive MEMORY.md --------------------------------------------------
-
-ARCHIVE_NAME="MEMORY.md.archive-$(today)"
-SRC="${HOME_DIR}/memories/MEMORY.md"
-DST="${HOME_DIR}/memories/${ARCHIVE_NAME}"
-
-say "2. Archive MEMORY.md → ${ARCHIVE_NAME}"
-if [[ ! -f "${SRC}" ]]; then
-  say "  - ${SRC} does not exist — already archived?"
-else
-  if [[ -f "${DST}" ]]; then
-    say "  - ${DST} already exists — refusing to overwrite"
-  else
-    do_or_say "mv '${SRC}' '${DST}'"
-    do_or_say "chmod 444 '${DST}'"
-  fi
-fi
-
-# ---- 3. config.yaml: confirm provider=sqlite_vec ---------------------------
-
-say "3. Confirm config.yaml memory.provider == sqlite_vec"
-cfg="${HOME_DIR}/config.yaml"
-if grep -qE '^[[:space:]]*provider:[[:space:]]*sqlite_vec' "${cfg}" 2>/dev/null; then
-  say "  ✓ already set to sqlite_vec"
-else
-  say "  - provider not set — please edit ${cfg} manually:"
-  say "      memory:"
-  say "        provider: sqlite_vec"
-fi
-
-# ---- 4. Disable legacy memory crons ----------------------------------------
-
-say "4. Disable legacy memory crons in jobs.json"
-do_or_say "/usr/bin/env python3 - <<'PY'
-import json, pathlib
-p = pathlib.Path('${HOME_DIR}/cron/jobs.json')
-if not p.exists():
-    print('  - no jobs.json'); raise SystemExit(0)
-data = json.loads(p.read_text())
-legacy_names = {
-    'Dimensions Memory Consolidation',
-    'Forgetting Curve (Monthly Archive)',
-    'Forgetting Curve',
-}
-changed = 0
-for j in data.get('jobs', []):
-    if j['name'] in legacy_names and j.get('enabled', False):
-        j['enabled'] = False
-        j['paused_at'] = '$(date -u +%Y-%m-%dT%H:%M:%SZ)'
-        j['paused_reason'] = 'W4 cutover — replaced by sqlite_vec weekly_promotion'
-        print(f'  ✓ disabled: {j[\"name\"]}')
-        changed += 1
-if changed:
-    p.write_text(json.dumps(data, indent=2, ensure_ascii=False))
-else:
-    print('  - no legacy jobs found (already disabled, or never installed)')
-PY"
-
-# ---- 5. Smoke test ---------------------------------------------------------
-
-say "5. Smoke test: provider initializes + retrieves"
-if command -v docker >/dev/null; then
-  do_or_say "docker exec hermes /opt/hermes/.venv/bin/python3 -c '
-from hermes_cli.env_loader import load_hermes_dotenv
-load_hermes_dotenv(hermes_home=\"/opt/data\", project_env=None)
-from agent.memory_manager import MemoryManager
-from plugins.memory import load_memory_provider
-mm = MemoryManager()
-mm.add_provider(load_memory_provider(\"sqlite_vec\"))
-mm.initialize_all(session_id=\"cutover-smoke\", platform=\"cli\", hermes_home=\"/opt/data\", agent_context=\"primary\")
-out = mm.prefetch_all(\"我太太生日\")
-print(\"prefetch returned:\", \"OK\" if out else \"EMPTY\")
-mm.shutdown_all()
-'"
-fi
-
-# ---- 6. Restart gateway ----------------------------------------------------
-
-say "6. Restart gateway to pick up any config changes"
-if command -v docker >/dev/null && [[ -d "${HOME}/Projects/hermes-agent" ]]; then
-  do_or_say "(cd ${HOME}/Projects/hermes-agent && docker compose restart gateway)"
-fi
-
-# ---- Done ------------------------------------------------------------------
-
-if $DRY_RUN; then
-  say ""
-  say "DRY RUN COMPLETE — no changes made. Re-run with --commit when ready."
-  say ""
-  say "After --commit, monitor for 24 hours via memory.log + #memory-review:"
-  say "  - tail -f ~/.hermes/logs/memory.log"
-  say "  - watch ~/.hermes/logs/memory_write_failures.jsonl size"
-  say "  - confirm next Sunday's digest fires"
-  say ""
-  say "Rollback procedure: docs/runbooks/memory-rollback.md §3"
-else
-  say ""
-  say "CUTOVER COMPLETE."
-  say "  Archive at: ${DST}"
-  say "  Legacy crons disabled in: ${HOME_DIR}/cron/jobs.json"
-  say "  Gateway restarted."
-  say ""
-  say "Monitor for 24 hours then sanity-check via:"
-  say "  docs/runbooks/memory-monitoring.md §6 (quick health check)"
-fi
diff --git a/scripts/import_md.py b/scripts/import_md.py
deleted file mode 100755
index 86743a5d8eb..00000000000
--- a/scripts/import_md.py
+++ /dev/null
@@ -1,307 +0,0 @@
-#!/usr/bin/env python3
-"""Seed `semantic_facts` from a flat ``MEMORY.md`` (W2-2).
-
-Spec: docs/superpowers/specs/2026-05-02-hermes-memory-design.md §6.1.
-
-Format expected in ``~/.hermes/memories/MEMORY.md``::
-
-    Topic: content
-    §
-    Topic: another content
-    §
-
-Each entry becomes one row in ``semantic_facts``:
-
-    entity      = "禮揚." + slug(topic)   # "Working style"           -> "禮揚.working_style"
-                                          # "Tools & Access > Proton" -> "禮揚.tools_access.proton"
-    fact        = content (verbatim)
-    importance  = 2
-    valid_from  = '2026-05-10'
-    valid_to    = NULL
-
-Idempotent: re-running with the same input does not duplicate rows. The
-natural key is ``(entity, fact)`` and is enforced by a pre-INSERT lookup.
-
-Embeddings come from Voyage 3.5-lite via ``plugins.memory.sqlite_vec.embed``.
-The trigger ``sf_after_insert`` keeps ``vec_facts`` synced automatically, so
-this script writes only to ``semantic_facts``.
-
-Usage::
-
-    docker exec -w /opt/hermes hermes /opt/hermes/.venv/bin/python3 \
-        scripts/import_md.py --dry-run
-    docker exec -w /opt/hermes hermes /opt/hermes/.venv/bin/python3 \
-        scripts/import_md.py --commit
-"""
-
-from __future__ import annotations
-
-import argparse
-import asyncio
-import logging
-import re
-import sqlite3
-import sys
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Iterable, List, Optional, Tuple
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_MEMORY_MD = Path.home() / ".hermes" / "memories" / "MEMORY.md"
-DEFAULT_DB = Path("/opt/data") / "memories" / "memory.db"
-DEFAULT_VALID_FROM = "2026-05-10"  # spec §6.1
-DEFAULT_IMPORTANCE = 2
-DEFAULT_BATCH = 128
-ENTITY_PREFIX = "禮揚"
-ENTRY_SEPARATOR = re.compile(r"^§\s*$", re.MULTILINE)
-
-
-@dataclass
-class Entry:
-    """One parsed MEMORY.md entry."""
-
-    topic: str
-    fact: str
-
-    @property
-    def entity(self) -> str:
-        return f"{ENTITY_PREFIX}.{slugify_topic(self.topic)}"
-
-
-# ---------------------------------------------------------------------------
-# Parsing
-# ---------------------------------------------------------------------------
-
-
-def slugify_topic(topic: str) -> str:
-    """Convert a human topic label to a stable entity-suffix slug.
-
-    - Hierarchy markers ``>`` become ``.`` so prefix queries still work.
-    - Lowercase, ASCII alphanum kept; runs of other chars collapse to ``_``.
-    - CJK / unicode is preserved unchanged so 中文 topics stay readable.
-
-    Examples:
-        "Working style"                  -> "working_style"
-        "Tools & Access > ProtonMail"   -> "tools_access.protonmail"
-        "禮揚.家庭"                       -> "禮揚.家庭"  (already a slug, untouched)
-    """
-    parts = [p.strip() for p in topic.split(">")]
-    out_parts = []
-    for part in parts:
-        s = part.strip().lower()
-        # Collapse non-alphanum (including '&', spaces, punctuation) to underscore.
-        # CJK characters are unicode word chars in Python regex with re.UNICODE
-        # (default for str patterns), so [^\w] excludes them = preserved.
-        s = re.sub(r"[^\w]+", "_", s, flags=re.UNICODE)
-        s = s.strip("_")
-        if s:
-            out_parts.append(s)
-    return ".".join(out_parts) if out_parts else "unknown"
-
-
-def parse_memory_md(text: str) -> List[Entry]:
-    """Split MEMORY.md into Entry objects.
-
-    Skips empty blocks and blocks with no ``Topic: content`` colon. Keeps
-    multi-line content (rare today but possible if a future entry wraps).
-    """
-    entries: List[Entry] = []
-    for raw_block in ENTRY_SEPARATOR.split(text):
-        block = raw_block.strip()
-        if not block:
-            continue
-        if ":" not in block:
-            logger.warning("skipping malformed block (no colon): %r", block[:60])
-            continue
-        topic, _, content = block.partition(":")
-        topic = topic.strip()
-        content = content.strip()
-        if not topic or not content:
-            logger.warning("skipping empty topic or content: %r", block[:60])
-            continue
-        entries.append(Entry(topic=topic, fact=content))
-    return entries
-
-
-# ---------------------------------------------------------------------------
-# DB ops
-# ---------------------------------------------------------------------------
-
-
-def existing_keys(conn: sqlite3.Connection) -> set[Tuple[str, str]]:
-    """Return the (entity, fact) pairs already present, for idempotency."""
-    rows = conn.execute("SELECT entity, fact FROM semantic_facts").fetchall()
-    return {(r[0], r[1]) for r in rows}
-
-
-def insert_batch(
-    conn: sqlite3.Connection,
-    rows: List[Tuple[Entry, bytes]],
-    *,
-    valid_from: str,
-    importance: int,
-) -> int:
-    """Insert one batch of (entry, embedding) pairs. Returns count inserted."""
-    cur = conn.executemany(
-        """
-        INSERT INTO semantic_facts(entity, fact, embedding,
-                                   importance, valid_from, valid_to)
-        VALUES (?, ?, ?, ?, ?, NULL)
-        """,
-        [
-            (e.entity, e.fact, blob, importance, valid_from)
-            for e, blob in rows
-        ],
-    )
-    return cur.rowcount
-
-
-# ---------------------------------------------------------------------------
-# Main
-# ---------------------------------------------------------------------------
-
-
-async def import_memory_md(
-    *,
-    md_path: Path,
-    db_path: Path,
-    dry_run: bool,
-    valid_from: str = DEFAULT_VALID_FROM,
-    importance: int = DEFAULT_IMPORTANCE,
-    batch_size: int = DEFAULT_BATCH,
-    embed_fn=None,  # injectable for tests
-) -> dict:
-    """Run the full import.
-
-    Returns a summary dict: {parsed, new, skipped_dup, batches, dry_run}.
-    Does not return embeddings.
-    """
-    text = md_path.read_text(encoding="utf-8")
-    entries = parse_memory_md(text)
-
-    # Open DB and bootstrap if needed (idempotent — store.init_db handles that).
-    from plugins.memory.sqlite_vec.store import init_db
-    conn = init_db(db_path)
-
-    have = existing_keys(conn)
-    new_entries = [e for e in entries if (e.entity, e.fact) not in have]
-    skipped = len(entries) - len(new_entries)
-
-    if dry_run:
-        print(f"[dry-run] parsed={len(entries)} new={len(new_entries)} "
-              f"already_present={skipped}")
-        for e in new_entries[:10]:
-            print(f"  + ({e.entity}) {e.fact[:80]!r}")
-        if len(new_entries) > 10:
-            print(f"  … and {len(new_entries) - 10} more")
-        return {
-            "parsed": len(entries),
-            "new": len(new_entries),
-            "skipped_dup": skipped,
-            "batches": 0,
-            "dry_run": True,
-        }
-
-    if not new_entries:
-        print(f"nothing to import (parsed={len(entries)}, all present)")
-        return {
-            "parsed": len(entries),
-            "new": 0,
-            "skipped_dup": skipped,
-            "batches": 0,
-            "dry_run": False,
-        }
-
-    # Embed in batches; default uses real Voyage, tests inject a stub.
-    if embed_fn is None:
-        from plugins.memory.sqlite_vec.embed import voyage_embed
-        embed_fn = voyage_embed
-
-    inserted = 0
-    batches = 0
-    try:
-        conn.execute("BEGIN")
-        for i in range(0, len(new_entries), batch_size):
-            chunk = new_entries[i : i + batch_size]
-            blobs = await embed_fn([e.fact for e in chunk])
-            if len(blobs) != len(chunk):
-                raise RuntimeError(
-                    f"embed returned {len(blobs)} for {len(chunk)} inputs"
-                )
-            inserted += insert_batch(
-                conn,
-                list(zip(chunk, blobs)),
-                valid_from=valid_from,
-                importance=importance,
-            )
-            batches += 1
-        conn.commit()
-    except Exception:
-        conn.rollback()
-        raise
-
-    print(
-        f"imported {inserted} entries in {batches} "
-        f"batch{'es' if batches != 1 else ''} "
-        f"(parsed={len(entries)}, skipped_dup={skipped})"
-    )
-    return {
-        "parsed": len(entries),
-        "new": inserted,
-        "skipped_dup": skipped,
-        "batches": batches,
-        "dry_run": False,
-    }
-
-
-def _build_arg_parser() -> argparse.ArgumentParser:
-    p = argparse.ArgumentParser(description=__doc__.split("\n")[0])
-    p.add_argument(
-        "--memory-md",
-        type=Path,
-        default=DEFAULT_MEMORY_MD,
-        help="Path to MEMORY.md (default: ~/.hermes/memories/MEMORY.md)",
-    )
-    p.add_argument(
-        "--db",
-        type=Path,
-        default=DEFAULT_DB,
-        help="Path to memory.db (default: /opt/data/memories/memory.db inside container)",
-    )
-    g = p.add_mutually_exclusive_group(required=True)
-    g.add_argument("--dry-run", action="store_true", help="Show plan, do not write")
-    g.add_argument("--commit", action="store_true", help="Actually import")
-    p.add_argument("--valid-from", default=DEFAULT_VALID_FROM)
-    p.add_argument("--importance", type=int, default=DEFAULT_IMPORTANCE)
-    return p
-
-
-def main(argv: Optional[List[str]] = None) -> int:
-    logging.basicConfig(
-        level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
-    )
-    args = _build_arg_parser().parse_args(argv)
-
-    # Live import path: ensure VOYAGE_API_KEY is loaded from ~/.hermes/.env.
-    if args.commit:
-        try:
-            from hermes_cli.env_loader import load_hermes_dotenv
-            load_hermes_dotenv(hermes_home="/opt/data", project_env=None)
-        except ImportError:
-            pass  # tests / non-container contexts handle env themselves
-
-    summary = asyncio.run(
-        import_memory_md(
-            md_path=args.memory_md,
-            db_path=args.db,
-            dry_run=args.dry_run,
-            valid_from=args.valid_from,
-            importance=args.importance,
-        )
-    )
-    return 0 if summary["new"] >= 0 else 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/tests/plugins/memory/test_extract.py b/tests/plugins/memory/test_extract.py
deleted file mode 100644
index 45cf1d3bca1..00000000000
--- a/tests/plugins/memory/test_extract.py
+++ /dev/null
@@ -1,363 +0,0 @@
-"""Tests for plugins/memory/sqlite_vec/extract.py (W3-1)."""
-
-from __future__ import annotations
-
-import asyncio
-import json
-from pathlib import Path
-from unittest.mock import patch
-
-import httpx
-import pytest
-
-from plugins.memory.sqlite_vec.extract import (
-    EXTRACT_MODEL,
-    EXTRACT_PROMPT,
-    PHI_BLACKLIST_CHANNELS,
-    ExtractError,
-    ExtractedFact,
-    _coerce_fact,
-    _parse_json_list,
-    kimi_extract,
-)
-
-
-# ---------------------------------------------------------------------------
-# Pure helpers
-# ---------------------------------------------------------------------------
-
-
-def test_extract_prompt_is_verbatim_spec_5_2():
-    """Spec §5.2 marks EXTRACT_PROMPT as a behavioural contract — verbatim."""
-    # Anchors that uniquely identify the spec's exact wording.
-    assert "You extract durable memories about 禮揚 from this Discord turn." in EXTRACT_PROMPT
-    assert "HARD RULES — these override everything else:" in EXTRACT_PROMPT
-    assert "ERR ON THE SIDE OF NOT EXTRACTING" in EXTRACT_PROMPT
-    assert "Skip facts that duplicate something said in the last 5 turns." in EXTRACT_PROMPT
-    # Placeholders must be preserved.
-    assert "{ts}" in EXTRACT_PROMPT and "{channel}" in EXTRACT_PROMPT
-    assert "{user}" in EXTRACT_PROMPT and "{assistant}" in EXTRACT_PROMPT
-
-
-def test_phi_blacklist_matches_spec_5_1():
-    assert PHI_BLACKLIST_CHANNELS == frozenset({"cmio", "cbme", "medicine"})
-
-
-def test_parse_json_list_bare_array():
-    assert _parse_json_list('[{"type":"semantic","text":"a"}]') == [
-        {"type": "semantic", "text": "a"}
-    ]
-
-
-def test_parse_json_list_wrapped_object():
-    assert _parse_json_list('{"facts": [{"type":"semantic","text":"a"}]}') == [
-        {"type": "semantic", "text": "a"}
-    ]
-    assert _parse_json_list('{"items": [{"type":"semantic","text":"b"}]}') == [
-        {"type": "semantic", "text": "b"}
-    ]
-
-
-def test_parse_json_list_empty_object_returns_empty_list():
-    assert _parse_json_list("{}") == []
-    assert _parse_json_list("") == []
-    assert _parse_json_list("not even json") == []
-
-
-def test_coerce_fact_drops_invalid_type():
-    assert _coerce_fact({"type": "garbage", "text": "a"}) is None
-    assert _coerce_fact({"type": "semantic"}) is None  # missing text
-    assert _coerce_fact({"type": "semantic", "text": "  "}) is None  # blank text
-
-
-def test_coerce_fact_clamps_importance():
-    f = _coerce_fact({"type": "semantic", "text": "a", "importance": 99})
-    assert f.importance == 5
-    f = _coerce_fact({"type": "semantic", "text": "a", "importance": -3})
-    assert f.importance == 1
-    f = _coerce_fact({"type": "semantic", "text": "a", "importance": "not-int"})
-    assert f.importance == 2  # default fallback
-
-
-def test_coerce_fact_round_trip_full_shape():
-    raw = {
-        "type": "semantic",
-        "text": "致妤 7:30 才到家",
-        "entity": "禮揚.家庭",
-        "importance": 3,
-        "valid_to_hint": "2026-05-03",
-    }
-    f = _coerce_fact(raw)
-    assert isinstance(f, ExtractedFact)
-    assert f.text == "致妤 7:30 才到家"
-    assert f.entity == "禮揚.家庭"
-    assert f.importance == 3
-    assert f.valid_to_hint == "2026-05-03"
-
-
-# ---------------------------------------------------------------------------
-# kimi_extract — short-circuits (no httpx call)
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.parametrize("channel", ["cmio", "cbme", "medicine"])
-def test_kimi_extract_phi_channel_returns_empty_no_call(channel, monkeypatch, tmp_path):
-    """Even with no API key, PHI channels never hit the network."""
-    monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False)
-    # Point auth.json lookup at an empty tmp dir so any leak would raise.
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
-    )
-    out = asyncio.run(
-        kimi_extract(
-            "病人的血壓 180/100",
-            "我建議轉診",
-            channel=channel,
-            ts="2026-05-02 09:00:00",
-        )
-    )
-    assert out == []
-
-
-def test_kimi_extract_empty_turn_returns_empty(monkeypatch, tmp_path):
-    monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False)
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
-    )
-    out = asyncio.run(
-        kimi_extract("", "", channel="cattia", ts="2026-05-02 09:00:00")
-    )
-    assert out == []
-
-
-# ---------------------------------------------------------------------------
-# kimi_extract — mocked synthetic.new responses
-# ---------------------------------------------------------------------------
-
-
-def _mock_synthetic_response(facts: list, *, status: int = 200):
-    """Build a synthetic.new chat-completions JSON body wrapping `facts`."""
-    body = {
-        "id": "test",
-        "choices": [
-            {
-                "message": {"role": "assistant", "content": json.dumps(facts)},
-                "finish_reason": "stop",
-            }
-        ],
-        "usage": {"prompt_tokens": 200, "completion_tokens": 80},
-    }
-    return status, body
-
-
-class _FakeTransport(httpx.MockTransport):
-    def __init__(self, status, body):
-        self.calls = []
-        self._status = status
-        self._body = body
-        super().__init__(self._h)
-
-    def _h(self, request: httpx.Request):
-        self.calls.append(request)
-        return httpx.Response(self._status, json=self._body)
-
-
-def test_kimi_extract_pleasantry_returns_empty_after_call(monkeypatch, tmp_path):
-    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
-    )
-    status, body = _mock_synthetic_response([])  # Kimi correctly returns []
-    transport = _FakeTransport(status, body)
-    client = httpx.AsyncClient(transport=transport)
-
-    out = asyncio.run(
-        kimi_extract(
-            "好的", "收到", channel="cattia", ts="2026-05-02 09:00:00",
-            client=client, log_path=tmp_path / "memory.log",
-        )
-    )
-    assert out == []
-    assert len(transport.calls) == 1
-    log_line = (tmp_path / "memory.log").read_text().strip()
-    assert '"cmd": "kimi_extract"' in log_line
-    assert '"n_kept": 0' in log_line
-
-
-def test_kimi_extract_short_lived_fact_with_valid_to_hint(monkeypatch, tmp_path):
-    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
-    )
-    facts = [
-        {
-            "type": "semantic",
-            "text": "致妤今晚 (2026-05-02) 預計 7:30 才到家",
-            "entity": "禮揚.家庭/今晚",
-            "importance": 3,
-            "valid_to_hint": "2026-05-03",
-        }
-    ]
-    transport = _FakeTransport(*_mock_synthetic_response(facts))
-    client = httpx.AsyncClient(transport=transport)
-
-    out = asyncio.run(
-        kimi_extract(
-            "今晚致妤會晚回來，大概 7:30 才到", "好喔",
-            channel="at-home", ts="2026-05-02 09:00:00",
-            client=client, log_path=tmp_path / "memory.log",
-        )
-    )
-    assert len(out) == 1
-    f = out[0]
-    assert f.type == "semantic"
-    assert "7:30" in f.text
-    assert f.valid_to_hint == "2026-05-03"
-    assert f.importance == 3
-
-
-def test_kimi_extract_long_lived_fact_no_valid_to(monkeypatch, tmp_path):
-    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
-    )
-    facts = [
-        {
-            "type": "semantic",
-            "text": "禮揚 最近在追 sleep medicine 的 RCT",
-            "entity": "禮揚.研究興趣",
-            "importance": 2,
-        }
-    ]
-    transport = _FakeTransport(*_mock_synthetic_response(facts))
-    client = httpx.AsyncClient(transport=transport)
-
-    out = asyncio.run(
-        kimi_extract(
-            "最近在追 sleep medicine", "了解，要幫你 follow up 嗎",
-            channel="cattia", ts="2026-05-02 09:00:00",
-            client=client, log_path=tmp_path / "memory.log",
-        )
-    )
-    assert len(out) == 1
-    assert out[0].valid_to_hint is None
-    assert out[0].entity == "禮揚.研究興趣"
-
-
-def test_kimi_extract_drops_malformed_rows(monkeypatch, tmp_path):
-    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
-    )
-    facts = [
-        {"type": "semantic", "text": "good fact"},
-        {"type": "garbage", "text": "bad type"},      # dropped
-        {"type": "episodic"},                           # missing text → dropped
-        {"type": "semantic", "text": "  "},             # blank text → dropped
-    ]
-    transport = _FakeTransport(*_mock_synthetic_response(facts))
-    client = httpx.AsyncClient(transport=transport)
-
-    out = asyncio.run(
-        kimi_extract(
-            "u", "a", channel="cattia", ts="2026-05-02 09:00:00",
-            client=client, log_path=tmp_path / "memory.log",
-        )
-    )
-    assert len(out) == 1
-    assert out[0].text == "good fact"
-
-
-def test_kimi_extract_5xx_raises_extracterror(monkeypatch, tmp_path):
-    monkeypatch.setenv("SYNTHETIC_API_KEY", "test-key")
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
-    )
-    transport = _FakeTransport(503, {"error": "down"})
-    client = httpx.AsyncClient(transport=transport)
-    with pytest.raises(ExtractError):
-        asyncio.run(
-            kimi_extract(
-                "u", "a", channel="cattia", ts="2026-05-02 09:00:00",
-                client=client, log_path=tmp_path / "memory.log",
-            )
-        )
-
-
-def test_kimi_extract_no_api_key_raises(monkeypatch, tmp_path):
-    monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False)
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
-    )  # auth.json absent
-    with pytest.raises(ExtractError, match="API key"):
-        asyncio.run(
-            kimi_extract(
-                "u", "a", channel="cattia", ts="2026-05-02 09:00:00",
-                log_path=tmp_path / "memory.log",
-            )
-        )
-
-
-def test_kimi_extract_reads_auth_json_when_no_env(monkeypatch, tmp_path):
-    monkeypatch.delenv("SYNTHETIC_API_KEY", raising=False)
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.extract._resolve_hermes_home", lambda: tmp_path
-    )
-    auth = {
-        "credential_pool": {
-            "custom:synthetic": [
-                {"id": "test", "access_token": "syn_test_xxx"},
-            ]
-        }
-    }
-    (tmp_path / "auth.json").write_text(json.dumps(auth), encoding="utf-8")
-    transport = _FakeTransport(*_mock_synthetic_response([]))
-    client = httpx.AsyncClient(transport=transport)
-
-    out = asyncio.run(
-        kimi_extract(
-            "x", "y", channel="cattia", ts="2026-05-02 09:00:00",
-            client=client, log_path=tmp_path / "memory.log",
-        )
-    )
-    assert out == []
-    # The Authorization header carried the auth.json token.
-    assert transport.calls[0].headers["Authorization"] == "Bearer syn_test_xxx"
-
-
-
-# ===========================================================================
-# Additional parser shapes discovered during live smoke test
-# ===========================================================================
-
-
-def test_parse_json_list_extracted_memories_key():
-    """Kimi K2.5 with response_format=json_object often wraps the answer in
-    a dict with key 'extracted_memories' (sometimes alongside an 'analysis'
-    field showing its reasoning). Both must be parsed correctly."""
-    payload = (
-        '{"analysis": "the user mentions...", '
-        '"extracted_memories": [{"type":"semantic","text":"a"}]}'
-    )
-    out = _parse_json_list(payload)
-    assert out == [{"type": "semantic", "text": "a"}]
-
-
-def test_parse_json_list_bare_single_fact_dict():
-    """Kimi sometimes returns a single fact as a flat dict instead of a list.
-    We detect that shape by the presence of canonical fact keys."""
-    payload = (
-        '{"type": "episodic", "text": "致妤今晚 7:30", '
-        '"entity": "禮揚.家庭", "importance": 2}'
-    )
-    out = _parse_json_list(payload)
-    assert len(out) == 1
-    assert out[0]["text"] == "致妤今晚 7:30"
-
-
-def test_parse_json_list_arbitrary_dict_falls_back_to_first_list():
-    """If neither canonical keys nor fact-shape is present, the first
-    list-valued field is returned. Defensive against future Kimi changes."""
-    payload = '{"weird_unique_key": [{"type":"semantic","text":"x"}]}'
-    out = _parse_json_list(payload)
-    assert out == [{"type": "semantic", "text": "x"}]
diff --git a/tests/plugins/memory/test_promotion.py b/tests/plugins/memory/test_promotion.py
deleted file mode 100644
index 6e452ba2132..00000000000
--- a/tests/plugins/memory/test_promotion.py
+++ /dev/null
@@ -1,397 +0,0 @@
-"""Tests for plugins/memory/sqlite_vec/promotion.py (W3-3)."""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import struct
-from datetime import date, timedelta
-from pathlib import Path
-from unittest.mock import patch
-
-import pytest
-
-from plugins.memory.sqlite_vec.promotion import (
-    PENDING_DIFF_TTL_DAYS,
-    PROMOTION_PROMPT,
-    WeekDigest,
-    _apply_diff_atomic,
-    _format_candidates_block,
-    _format_neighbors_block,
-    _purge_old_pending,
-    digest_id_for,
-    pending_path,
-    rejection_sentinel,
-    render_digest_markdown,
-    weekly_apply,
-    weekly_promotion,
-)
-from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
-
-
-def _vec(seed: int) -> bytes:
-    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
-    return struct.pack(f"{VEC_DIM}b", *vals)
-
-
-# ---------------------------------------------------------------------------
-# Prompt + format helpers
-# ---------------------------------------------------------------------------
-
-
-def test_prompt_has_required_placeholders():
-    """The prompt is .format()'d with these keys; missing any breaks promotion."""
-    for key in ("{digest_id}", "{today}", "{week_label}",
-                "{candidates_block}", "{neighbors_block}"):
-        assert key in PROMOTION_PROMPT, f"missing placeholder: {key}"
-
-
-def test_prompt_carries_hard_rules():
-    assert "病歷號" in PROMOTION_PROMPT
-    assert "DROP_AS_NOISE" in PROMOTION_PROMPT
-    assert "PROMOTE" in PROMOTION_PROMPT
-    assert "DEDUP_HIT" in PROMOTION_PROMPT
-    assert "EXPIRE" in PROMOTION_PROMPT
-
-
-def test_format_candidates_block_marks_synthetic():
-    cands = [
-        {"id": 1, "ts": "2026-05-02 09:00", "channel": "cattia",
-         "role": "user", "synthetic": False, "text": "hello",
-         "stashed_facts": [{"text": "禮揚 likes X", "entity": "禮揚.訓練",
-                            "importance": 2, "valid_to_hint": None}]},
-        {"id": 2, "ts": "2026-05-02 09:00", "channel": "cron",
-         "role": "assistant", "synthetic": True, "text": "cron output",
-         "stashed_facts": []},
-    ]
-    out = _format_candidates_block(cands)
-    assert "👤" in out and "🤖" in out
-    assert "↳ stashed:" in out
-
-
-def test_format_neighbors_block_truncates_to_top_5():
-    neighbors = {
-        "topic": [
-            {"id": i, "fact": f"fact {i}", "entity": "x", "sim": 0.9 - i * 0.01}
-            for i in range(10)
-        ]
-    }
-    out = _format_neighbors_block(neighbors)
-    # Only 5 should appear.
-    assert out.count("#") == 5
-
-
-# ---------------------------------------------------------------------------
-# digest_id + path helpers
-# ---------------------------------------------------------------------------
-
-
-def test_digest_id_format():
-    assert digest_id_for(date(2026, 5, 11)) == "wk-2026-05-11"
-
-
-# ---------------------------------------------------------------------------
-# WeekDigest
-# ---------------------------------------------------------------------------
-
-
-def test_week_digest_round_trip():
-    raw = {
-        "digest_id": "wk-2026-05-10",
-        "candidate_episode_ids": [1, 2, 3],
-        "promote": [{"entity": "禮揚.家庭", "fact": "x", "importance": 3}],
-        "dedup_hits": [{"existing_fact_id": 5, "action": "bump_hits"}],
-        "expire": [{"existing_fact_id": 7, "valid_to": "2026-05-10"}],
-        "drop_as_noise": [{"episode_ids": [4], "reason": "pleasantry"}],
-    }
-    d = WeekDigest.from_dict(raw)
-    assert d.digest_id == "wk-2026-05-10"
-    assert d.to_dict()["candidate_episode_ids"] == [1, 2, 3]
-
-
-# ---------------------------------------------------------------------------
-# render_digest_markdown
-# ---------------------------------------------------------------------------
-
-
-def test_render_digest_markdown_full_shape():
-    candidates = [
-        {"id": 1, "ts": "x", "channel": "c", "role": "user",
-         "synthetic": False, "text": "u", "stashed_facts": []},
-        {"id": 2, "ts": "x", "channel": "cron", "role": "user",
-         "synthetic": True, "text": "u", "stashed_facts": []},
-    ]
-    d = WeekDigest.from_dict({
-        "digest_id": "wk-2026-05-10",
-        "candidate_episode_ids": [1, 2],
-        "promote": [{"entity": "禮揚.家庭", "fact": "致妤生日 3/19",
-                     "importance": 5, "valid_to": None,
-                     "source_episode_ids": [1]}],
-        "dedup_hits": [{"existing_fact_id": 5, "action": "bump_hits",
-                        "source_episode_ids": [2]}],
-        "expire": [{"existing_fact_id": 7, "valid_to": "2026-05-10",
-                    "reason": "stale"}],
-        "drop_as_noise": [{"episode_ids": [3], "reason": "好的"}],
-    })
-    md = render_digest_markdown(d, candidates)
-    assert "Weekly Memory Review — 2026-05-10" in md
-    assert "(1 user/assistant + 1 cron-synthetic)" in md
-    assert "/memreview reject wk-2026-05-10" in md
-    assert "⬆️ Promote to permanent (1)" in md
-    assert "🔁 Dedup confirmations (1)" in md
-    assert "🪦 Expiring (1)" in md
-    assert "🗑️ Skipped as noise (1)" in md
-    assert "致妤生日 3/19" in md
-    assert "valid_to: 永久" in md  # null valid_to
-
-
-def test_render_digest_empty_sections_collapse():
-    d = WeekDigest.from_dict({"digest_id": "wk-2026-05-10",
-                              "candidate_episode_ids": []})
-    md = render_digest_markdown(d, [])
-    assert "_No actions this week._" in md
-
-
-# ---------------------------------------------------------------------------
-# weekly_promotion (mocked Kimi)
-# ---------------------------------------------------------------------------
-
-
-def _seed_episodes(conn, today_iso: str = "2026-05-02 12:00:00"):
-    """Add 2 fixture episodes with stashed_facts."""
-    conn.execute(
-        "INSERT INTO episodes(ts, channel, external_id, role, text, synthetic, metadata) "
-        "VALUES (?, ?, ?, ?, ?, ?, ?)",
-        (today_iso, "cattia", "m1:user", "user", "我下週要去日本", 0,
-         json.dumps({"stashed_facts": [
-             {"type": "semantic", "text": "禮揚下週去日本", "entity": "禮揚.家庭",
-              "importance": 3, "valid_to_hint": "2026-05-11"}]})),
-    )
-    conn.execute(
-        "INSERT INTO episodes(ts, channel, external_id, role, text) "
-        "VALUES (?, ?, ?, ?, ?)",
-        (today_iso, "cattia", "m1:asst", "assistant", "好的", ),
-    )
-    conn.commit()
-
-
-def test_weekly_promotion_no_candidates(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
-    )
-    db = init_db(tmp_path / "m.db")
-    summary = asyncio.run(weekly_promotion(db))
-    assert summary["candidates"] == 0
-    assert "skipped" in summary
-
-
-def test_weekly_promotion_dry_run_returns_markdown(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
-    )
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-    db = init_db(tmp_path / "m.db")
-    _seed_episodes(db)
-
-    async def fake_kimi(prompt):
-        # Sanity: prompt was actually formatted, not left with placeholders.
-        assert "{digest_id}" not in prompt
-        return {
-            "promote": [{"entity": "禮揚.家庭", "fact": "下週去日本",
-                         "importance": 3, "valid_to": "2026-05-11",
-                         "source_episode_ids": [1]}],
-            "dedup_hits": [], "expire": [], "drop_as_noise": [],
-        }
-
-    async def fake_embed(texts):
-        return [_vec(50) for _ in texts]
-
-    summary = asyncio.run(weekly_promotion(
-        db, dry_run=True, kimi_fn=fake_kimi,
-        embed_fn=fake_embed,
-    ))
-    assert summary["candidates"] == 2
-    assert summary["promote"] == 1
-    assert summary["dry_run"] is True
-    assert "markdown_preview" in summary
-    assert "下週去日本" in summary["markdown_preview"]
-    # Dry-run MUST NOT persist a pending diff or post to Discord.
-    assert not (tmp_path / "memories" / "pending_diffs").exists() or \
-           not list((tmp_path / "memories" / "pending_diffs").glob("*.json"))
-
-
-def test_weekly_promotion_persists_diff_on_real_run(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
-    )
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-    db = init_db(tmp_path / "m.db")
-    _seed_episodes(db)
-
-    async def fake_kimi(prompt):
-        return {
-            "promote": [], "dedup_hits": [], "expire": [],
-            "drop_as_noise": [{"episode_ids": [1, 2], "reason": "no signal"}],
-        }
-
-    summary = asyncio.run(weekly_promotion(
-        db, dry_run=False, kimi_fn=fake_kimi,
-    ))
-    # Diff was written, even with no Discord channel configured.
-    files = list((tmp_path / "memories" / "pending_diffs").glob("*.json"))
-    assert len(files) == 1
-    diff = json.loads(files[0].read_text())
-    assert diff["candidate_episode_ids"] == [1, 2]
-
-
-# ---------------------------------------------------------------------------
-# weekly_apply
-# ---------------------------------------------------------------------------
-
-
-def test_weekly_apply_no_pending_diff(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
-    )
-    db = init_db(tmp_path / "m.db")
-    summary = asyncio.run(weekly_apply(db))
-    assert summary["applied"] is False
-    assert "no pending diff" in summary.get("reason", "")
-
-
-def test_weekly_apply_rejection_sentinel_archives_without_apply(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
-    )
-    db = init_db(tmp_path / "m.db")
-
-    digest_id = "wk-2026-05-02"
-    pending_path(digest_id).write_text(json.dumps({
-        "digest_id": digest_id, "candidate_episode_ids": [],
-        "promote": [], "dedup_hits": [], "expire": [], "drop_as_noise": [],
-    }))
-    rejection_sentinel(digest_id).write_text("rejected", encoding="utf-8")
-
-    summary = asyncio.run(weekly_apply(db))
-    assert summary["applied"] is False
-    assert summary["reason"] == "rejected"
-    # Diff moved to archive_dir, sentinel removed.
-    assert not pending_path(digest_id).exists()
-    assert not rejection_sentinel(digest_id).exists()
-    archive = list((tmp_path / "memories" / "diff_archive").glob("*.rejected.json"))
-    assert len(archive) == 1
-
-
-def test_weekly_apply_promotes_inserts_and_stamps(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
-    )
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-    db = init_db(tmp_path / "m.db")
-    _seed_episodes(db)
-
-    digest_id = "wk-2026-05-02"
-    pending_path(digest_id).write_text(json.dumps({
-        "digest_id": digest_id,
-        "candidate_episode_ids": [1, 2],
-        "promote": [{"entity": "禮揚.家庭", "fact": "下週去日本",
-                     "importance": 3, "valid_from": "2026-05-02",
-                     "valid_to": "2026-05-11", "source_episode_ids": [1]}],
-        "dedup_hits": [], "expire": [], "drop_as_noise": [],
-    }))
-
-    async def fake_embed(texts):
-        return [_vec(50) for _ in texts]
-
-    summary = asyncio.run(weekly_apply(db, embed_fn=fake_embed))
-    assert summary["applied"] is True
-    assert summary["promoted"] == 1
-    assert summary["stamped"] == 2
-    # New row in semantic_facts.
-    [(sf,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall()
-    assert sf == 1
-    # Trigger mirrored into vec_facts.
-    [(vf,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall()
-    assert vf == 1
-    # Episodes stamped.
-    rows = db.execute("SELECT id, promoted_at FROM episodes ORDER BY id").fetchall()
-    assert all(r["promoted_at"] is not None for r in rows)
-    # Diff moved to archive.
-    archive = list((tmp_path / "memories" / "diff_archive").glob("*.applied.json"))
-    assert len(archive) == 1
-
-
-def test_weekly_apply_dedup_bump_increments_hits(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
-    )
-    db = init_db(tmp_path / "m.db")
-    db.execute(
-        "INSERT INTO semantic_facts(fact, embedding, hits) VALUES (?, ?, ?)",
-        ("禮揚 likes X", _vec(10), 0),
-    )
-    db.commit()
-
-    digest_id = "wk-2026-05-02"
-    pending_path(digest_id).write_text(json.dumps({
-        "digest_id": digest_id, "candidate_episode_ids": [],
-        "promote": [], "dedup_hits": [
-            {"existing_fact_id": 1, "action": "bump_hits",
-             "source_episode_ids": []}
-        ], "expire": [], "drop_as_noise": [],
-    }))
-
-    summary = asyncio.run(weekly_apply(db))
-    assert summary["dedup_bumped"] == 1
-    [(hits,)] = db.execute("SELECT hits FROM semantic_facts WHERE id=1").fetchall()
-    assert hits == 1
-
-
-def test_weekly_apply_expire_sets_valid_to(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
-    )
-    db = init_db(tmp_path / "m.db")
-    db.execute(
-        "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
-        ("禮揚 watches paper X", _vec(10)),
-    )
-    db.commit()
-
-    digest_id = "wk-2026-05-02"
-    pending_path(digest_id).write_text(json.dumps({
-        "digest_id": digest_id, "candidate_episode_ids": [],
-        "promote": [], "dedup_hits": [],
-        "expire": [{"existing_fact_id": 1, "valid_to": "2026-05-02",
-                    "reason": "stale"}],
-        "drop_as_noise": [],
-    }))
-
-    summary = asyncio.run(weekly_apply(db, today=date(2026, 5, 2)))
-    assert summary["expired"] == 1
-    [(vt,)] = db.execute("SELECT valid_to FROM semantic_facts WHERE id=1").fetchall()
-    assert vt == "2026-05-02"
-
-
-def test_weekly_apply_purges_old_pending(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home", lambda: tmp_path
-    )
-    db = init_db(tmp_path / "m.db")
-
-    today = date(2026, 5, 2)
-    old = today - timedelta(days=PENDING_DIFF_TTL_DAYS + 5)
-    fresh = today - timedelta(days=2)
-
-    pending_path(f"wk-{old.isoformat()}").write_text("{}")
-    pending_path(f"wk-{fresh.isoformat()}").write_text(json.dumps({
-        "digest_id": f"wk-{fresh.isoformat()}", "candidate_episode_ids": [],
-        "promote": [], "dedup_hits": [], "expire": [], "drop_as_noise": [],
-    }))
-
-    summary = asyncio.run(weekly_apply(db, today=today))
-    assert summary["purged"] == 1
-    # Old gone, fresh applied + archived.
-    assert not pending_path(f"wk-{old.isoformat()}").exists()
-    archive = list((tmp_path / "memories" / "diff_archive").glob("*.applied.json"))
-    assert len(archive) == 1
diff --git a/tests/plugins/memory/test_sqlite_vec_provider.py b/tests/plugins/memory/test_sqlite_vec_provider.py
deleted file mode 100644
index 9f8e8d438d1..00000000000
--- a/tests/plugins/memory/test_sqlite_vec_provider.py
+++ /dev/null
@@ -1,556 +0,0 @@
-"""Tests for the sqlite_vec memory provider plugin (W1 scope: schema only).
-
-Covers:
-  • bootstrap_schema is idempotent (re-running does not error or duplicate)
-  • all 3 tables + 4 indexes + 1 virtual table + 3 triggers exist
-  • semantic_facts defaults work (created_at, valid_from, importance)
-  • vec0 virtual table answers MATCH queries with k=N prefilter
-  • triggers keep vec_facts synced with semantic_facts (insert/update/delete)
-  • SqliteVecMemoryProvider.is_available() / initialize() / shutdown() round-trip
-"""
-
-from __future__ import annotations
-
-import struct
-from pathlib import Path
-
-import pytest
-
-from plugins.memory.sqlite_vec import SqliteVecMemoryProvider
-from plugins.memory.sqlite_vec.store import (
-    VEC_DIM,
-    bootstrap_schema,
-    init_db,
-    open_db,
-)
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def _vec(seed: int) -> bytes:
-    """Make a deterministic 512-d int8 BLOB for testing.
-
-    int8 matches the locked decision in spec §1.4 (Voyage 3.5-lite, 512-dim, int8).
-    seed is the base value (clamped to int8 range) with a small per-dim offset
-    so different seeds produce different vectors but the same seed reproduces.
-    """
-    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
-    return struct.pack(f"{VEC_DIM}b", *vals)
-
-
-# ---------------------------------------------------------------------------
-# Schema bootstrap
-# ---------------------------------------------------------------------------
-
-
-def test_bootstrap_creates_all_objects(tmp_path):
-    db = init_db(tmp_path / "memory.db")
-
-    table_names = {
-        row[0]
-        for row in db.execute(
-            "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
-        )
-    }
-    assert "episodes" in table_names
-    assert "semantic_facts" in table_names
-
-    index_names = {
-        row[0]
-        for row in db.execute(
-            "SELECT name FROM sqlite_master WHERE type='index' AND name NOT LIKE 'sqlite_%'"
-        )
-    }
-    assert "idx_episodes_ts" in index_names
-    assert "idx_episodes_promoted_pending" in index_names
-    assert "idx_facts_entity" in index_names
-    assert "idx_facts_active" in index_names
-
-    trigger_names = {
-        row[0] for row in db.execute("SELECT name FROM sqlite_master WHERE type='trigger'")
-    }
-    assert "sf_after_insert" in trigger_names
-    assert "sf_after_update_embedding" in trigger_names
-    assert "sf_after_delete" in trigger_names
-
-    # vec0 virtual table is registered as a regular table internally
-    [(vec_count,)] = db.execute(
-        "SELECT count(*) FROM sqlite_master WHERE name='vec_facts'"
-    ).fetchall()
-    assert vec_count >= 1
-
-
-def test_bootstrap_is_idempotent(tmp_path):
-    path = tmp_path / "memory.db"
-    db = init_db(path)
-    bootstrap_schema(db)  # second time
-    bootstrap_schema(db)  # third time
-    # If we got here without error and tables still query, idempotency holds.
-    db.execute("SELECT count(*) FROM episodes").fetchone()
-    db.execute("SELECT count(*) FROM semantic_facts").fetchone()
-
-
-# ---------------------------------------------------------------------------
-# Defaults
-# ---------------------------------------------------------------------------
-
-
-def test_semantic_facts_defaults_are_populated(tmp_path):
-    db = init_db(tmp_path / "memory.db")
-    db.execute(
-        "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
-        ("禮揚 likes Starting Strength method", _vec(10)),
-    )
-    db.commit()
-
-    row = db.execute(
-        "SELECT importance, state, valid_from, valid_to, created_at FROM semantic_facts"
-    ).fetchone()
-    assert row["importance"] == 2
-    assert row["state"] == "active"
-    assert row["valid_from"] is not None  # default = date('now')
-    assert row["valid_to"] is None
-    assert row["created_at"] is not None
-
-
-def test_role_check_constraint(tmp_path):
-    db = init_db(tmp_path / "memory.db")
-    with pytest.raises(Exception):
-        db.execute(
-            "INSERT INTO episodes(ts, channel, external_id, role, text) "
-            "VALUES (datetime('now'), 'cattia', 'msg-1', 'system', 'hi')"
-        )
-
-
-# ---------------------------------------------------------------------------
-# Trigger sync between semantic_facts and vec_facts
-# ---------------------------------------------------------------------------
-
-
-def test_triggers_sync_insert_update_delete(tmp_path):
-    db = init_db(tmp_path / "memory.db")
-
-    # INSERT
-    db.execute(
-        "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
-        ("fact A", _vec(50)),
-    )
-    db.commit()
-    [(count_after_insert,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall()
-    assert count_after_insert == 1
-
-    # UPDATE embedding
-    [fact_id] = db.execute("SELECT id FROM semantic_facts").fetchone()
-    new_vec = _vec(90)
-    db.execute("UPDATE semantic_facts SET embedding=? WHERE id=?", (new_vec, fact_id))
-    db.commit()
-    [(after_update,)] = db.execute(
-        "SELECT count(*) FROM vec_facts WHERE fact_id=?", (fact_id,)
-    ).fetchall()
-    assert after_update == 1
-
-    # DELETE
-    db.execute("DELETE FROM semantic_facts WHERE id=?", (fact_id,))
-    db.commit()
-    [(count_after_delete,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall()
-    assert count_after_delete == 0
-
-
-# ---------------------------------------------------------------------------
-# vec0 retrieval
-# ---------------------------------------------------------------------------
-
-
-def test_vec0_match_returns_nearest(tmp_path):
-    db = init_db(tmp_path / "memory.db")
-    for seed, fact in [(10, "alpha"), (50, "beta"), (90, "gamma")]:
-        db.execute(
-            "INSERT INTO semantic_facts(fact, embedding) VALUES (?, ?)",
-            (fact, _vec(seed)),
-        )
-    db.commit()
-
-    query = _vec(51)
-    rows = db.execute(
-        "SELECT fact_id, distance FROM vec_facts WHERE embedding MATCH vec_int8(?) AND k = 2",
-        (query,),
-    ).fetchall()
-    assert len(rows) == 2
-    # Closest must be the seed=0.5 row (beta)
-    closest_fact_id = rows[0]["fact_id"]
-    closest_fact = db.execute(
-        "SELECT fact FROM semantic_facts WHERE id=?", (closest_fact_id,)
-    ).fetchone()["fact"]
-    assert closest_fact == "beta"
-
-
-# ---------------------------------------------------------------------------
-# MemoryProvider lifecycle
-# ---------------------------------------------------------------------------
-
-
-def test_provider_lifecycle(tmp_path):
-    p = SqliteVecMemoryProvider()
-    assert p.name == "sqlite_vec"
-    assert p.is_available() is True
-    p.initialize(session_id="t1", hermes_home=str(tmp_path))
-    assert (tmp_path / "memories" / "memory.db").exists()
-    assert p.prefetch("test query") == ""  # W1: no-op
-    assert p.sync_turn("hi", "hello") is None  # W1: no-op
-    assert p.get_tool_schemas() == []
-    p.shutdown()
-
-
-
-# ===========================================================================
-# W2-1: voyage_embed (mocked) + read_memory + bump_hits + format_facts
-# ===========================================================================
-
-import asyncio
-import sqlite3
-from unittest.mock import patch
-
-import httpx
-import pytest
-
-from plugins.memory.sqlite_vec.embed import (
-    VOYAGE_BATCH,
-    VOYAGE_DIM,
-    VoyageError,
-    voyage_embed,
-)
-from plugins.memory.sqlite_vec.read import (
-    Fact,
-    bump_hits,
-    format_facts_for_prompt,
-    read_memory,
-)
-
-
-def _fake_voyage_response(texts):
-    """Build a fake Voyage JSON body where each embedding is dim=512 of zeros
-    except the first cell which carries the input index. Lets us round-trip
-    the input ordering through _to_int8_blob."""
-    return {
-        "data": [
-            {"index": i, "embedding": [(i % 200) - 100] + [0] * (VOYAGE_DIM - 1)}
-            for i, _ in enumerate(texts)
-        ]
-    }
-
-
-class _MockTransport(httpx.MockTransport):
-    """httpx mock that records call count and returns programmable responses."""
-
-    def __init__(self, responses):
-        self.calls = []
-        self._responses = list(responses)
-        super().__init__(self._handler)
-
-    def _handler(self, request: httpx.Request) -> httpx.Response:
-        self.calls.append(request)
-        status, body = self._responses.pop(0)
-        if isinstance(body, dict):
-            return httpx.Response(status, json=body)
-        return httpx.Response(status, text=body)
-
-
-# ---------------------------------------------------------------------------
-# voyage_embed
-# ---------------------------------------------------------------------------
-
-
-def test_voyage_embed_success(monkeypatch):
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-    texts = ["hello", "world", "禮揚"]
-    transport = _MockTransport([(200, _fake_voyage_response(texts))])
-    client = httpx.AsyncClient(transport=transport)
-
-    blobs = asyncio.run(voyage_embed(texts, client=client))
-
-    assert len(blobs) == len(texts)
-    for b in blobs:
-        assert len(b) == VOYAGE_DIM
-    # First byte encodes the (signed) index value we baked into the fake response.
-    assert blobs[0][0] == (-100) & 0xFF  # input index 0 -> -100 -> unsigned 156
-    assert blobs[1][0] == (-99) & 0xFF
-    assert len(transport.calls) == 1
-
-
-def test_voyage_embed_batches_at_128(monkeypatch):
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-    texts = [f"t{i}" for i in range(200)]  # > VOYAGE_BATCH=128
-    # 2 calls: first 128, then 72.
-    transport = _MockTransport(
-        [
-            (200, _fake_voyage_response(texts[:VOYAGE_BATCH])),
-            (200, _fake_voyage_response(texts[VOYAGE_BATCH:])),
-        ]
-    )
-    client = httpx.AsyncClient(transport=transport)
-
-    blobs = asyncio.run(voyage_embed(texts, client=client))
-    assert len(blobs) == 200
-    assert len(transport.calls) == 2
-
-
-def test_voyage_embed_retries_on_5xx(monkeypatch):
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-    texts = ["only"]
-    transport = _MockTransport(
-        [
-            (502, "bad gateway"),
-            (503, "still bad"),
-            (200, _fake_voyage_response(texts)),
-        ]
-    )
-    client = httpx.AsyncClient(transport=transport)
-
-    # Patch sleep to avoid real backoff delay.
-    with patch("plugins.memory.sqlite_vec.embed.asyncio.sleep", return_value=None):
-        blobs = asyncio.run(voyage_embed(texts, client=client))
-
-    assert len(blobs) == 1
-    assert len(transport.calls) == 3
-
-
-def test_voyage_embed_4xx_raises(monkeypatch):
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-    transport = _MockTransport([(401, "unauthorized")])
-    client = httpx.AsyncClient(transport=transport)
-    with pytest.raises(VoyageError):
-        asyncio.run(voyage_embed(["x"], client=client))
-
-
-def test_voyage_embed_missing_key(monkeypatch):
-    monkeypatch.delenv("VOYAGE_API_KEY", raising=False)
-    with pytest.raises(VoyageError, match="VOYAGE_API_KEY"):
-        asyncio.run(voyage_embed(["x"]))
-
-
-def test_voyage_embed_empty_input_no_call(monkeypatch):
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-    # No transport responses queued; if we make a call the test will explode.
-    transport = _MockTransport([])
-    client = httpx.AsyncClient(transport=transport)
-    blobs = asyncio.run(voyage_embed([], client=client))
-    assert blobs == []
-    assert len(transport.calls) == 0
-
-
-# ---------------------------------------------------------------------------
-# read_memory + bump_hits
-# ---------------------------------------------------------------------------
-
-
-def _seed_facts(db: sqlite3.Connection):
-    """Insert 3 facts at known created_at + int8 vectors that put 'beta' nearest to seed=51."""
-    rows = [
-        # fact text,   entity,         created_at,             vec seed
-        ("alpha",      "禮揚.工作",     "2026-04-01 09:00:00",   10),
-        ("beta",       "禮揚.家庭",     "2026-05-02 09:00:00",   50),
-        ("gamma",      None,           "2025-12-01 09:00:00",   90),
-        ("expired",    "禮揚.短期",     "2026-05-01 09:00:00",   50),
-    ]
-    for fact, entity, created_at, seed in rows:
-        db.execute(
-            "INSERT INTO semantic_facts(fact, entity, embedding, created_at, valid_to) "
-            "VALUES (?, ?, ?, ?, ?)",
-            (fact, entity, _vec(seed), created_at,
-             "2026-01-01" if fact == "expired" else None),
-        )
-    db.commit()
-
-
-def test_read_memory_orders_by_score(tmp_path, monkeypatch):
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-    db = init_db(tmp_path / "memory.db")
-    _seed_facts(db)
-
-    # Stub voyage_embed to return a fixed query vector close to seed=51.
-    async def fake_embed(texts, **kw):
-        assert len(texts) == 1
-        return [_vec(51)]
-
-    log_file = tmp_path / "memory.log"
-    with patch("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
-        facts = asyncio.run(read_memory("test query", db, k=8, log_path=log_file))
-
-    fact_texts = [f.fact for f in facts]
-    # 'expired' must be filtered (valid_to in past).
-    assert "expired" not in fact_texts
-    # 'beta' should rank first (closest vec, recent).
-    assert fact_texts[0] == "beta"
-    # All Fact fields populated.
-    assert all(isinstance(f, Fact) for f in facts)
-    assert all(f.score is not None and f.sim is not None for f in facts)
-    # Latency was logged.
-    assert log_file.exists()
-    log_line = log_file.read_text().strip().splitlines()[-1]
-    assert '"sql_ms"' in log_line and '"q": "test query"' in log_line
-
-
-def test_bump_hits_increments_and_swallows(tmp_path, monkeypatch):
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-    db = init_db(tmp_path / "memory.db")
-    _seed_facts(db)
-    ids = [r["id"] for r in db.execute("SELECT id FROM semantic_facts ORDER BY id").fetchall()]
-
-    asyncio.run(bump_hits(ids[:2], db))
-    rows = db.execute(
-        "SELECT id, hits, last_seen FROM semantic_facts ORDER BY id"
-    ).fetchall()
-    assert rows[0]["hits"] == 1 and rows[1]["hits"] == 1
-    assert rows[2]["hits"] == 0  # untouched
-    assert rows[0]["last_seen"] is not None
-
-    # Closed connection -> bump_hits must swallow the sqlite3.Error.
-    db.close()
-    asyncio.run(bump_hits(ids[:1], db))  # should not raise
-
-
-def test_bump_hits_empty_is_noop(tmp_path):
-    db = init_db(tmp_path / "memory.db")
-    # Should return immediately without touching the connection.
-    asyncio.run(bump_hits([], db))
-
-
-def test_format_facts_for_prompt_shape():
-    facts = [
-        Fact(id=1, fact="禮揚 likes 5x5", entity="禮揚.訓練",
-             created_at="2026-05-01", importance=2, sim=0.8, age_days=1.0, score=0.9),
-        Fact(id=2, fact="致妤生日 3/19", entity=None,
-             created_at="2026-04-01", importance=3, sim=0.7, age_days=30.0, score=0.6),
-    ]
-    out = format_facts_for_prompt(facts)
-    assert "[禮揚.訓練] 禮揚 likes 5x5" in out
-    assert "- 致妤生日 3/19" in out  # no entity prefix when None
-    assert format_facts_for_prompt([]) == ""
-
-
-
-# ===========================================================================
-# W2-3: prefetch + sync_turn wiring
-# ===========================================================================
-
-from unittest.mock import patch as _patch_w23
-
-from plugins.memory.sqlite_vec import (
-    PREFETCH_TIMEOUT_S,
-    RECALL_HEADER,
-    SqliteVecMemoryProvider,
-    _run_coro_in_thread,
-)
-
-
-def _stubbed_provider(tmp_path, monkeypatch, query_seed: int = 51):
-    """Build a provider with a real DB, real conn, but stubbed Voyage."""
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-    p = SqliteVecMemoryProvider()
-    p.initialize(session_id="t", hermes_home=str(tmp_path))
-    # Seed 3 facts via the same trigger-driven pipeline used in production.
-    for fact, ent, ts, seed in [
-        ("alpha", "禮揚.工作", "2026-04-01 09:00:00", 10),
-        ("beta",  "禮揚.家庭", "2026-05-02 09:00:00", 50),
-        ("gamma", None,        "2025-12-01 09:00:00", 90),
-    ]:
-        p._conn.execute(
-            "INSERT INTO semantic_facts(fact, entity, embedding, created_at) VALUES (?,?,?,?)",
-            (fact, ent, _vec(seed), ts),
-        )
-    p._conn.commit()
-
-    async def fake_embed(texts, **kw):
-        return [_vec(query_seed) for _ in texts]
-
-    return p, fake_embed
-
-
-def test_prefetch_returns_markdown_with_header(tmp_path, monkeypatch):
-    p, fake_embed = _stubbed_provider(tmp_path, monkeypatch)
-    with _patch_w23("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
-        out = p.prefetch("when does my wife arrive home", session_id="s1")
-    assert out.startswith(RECALL_HEADER + "\n")
-    # Top fact 'beta' (seed=50) is closest to query (seed=51).
-    assert "beta" in out
-    # with_meta=True format includes importance + age.
-    assert "(importance:" in out and "days)" in out
-    # Fact ids cached for sync_turn to bump.
-    assert p._last_fact_ids["s1"]
-    p.shutdown()
-
-
-def test_prefetch_empty_query_no_op(tmp_path, monkeypatch):
-    p, fake_embed = _stubbed_provider(tmp_path, monkeypatch)
-    # No patch needed — should short-circuit before voyage_embed is reached.
-    assert p.prefetch("", session_id="s1") == ""
-    assert p.prefetch("   ", session_id="s1") == ""
-    assert "s1" not in p._last_fact_ids
-    p.shutdown()
-
-
-def test_prefetch_swallows_voyage_error(tmp_path, monkeypatch):
-    p, _ = _stubbed_provider(tmp_path, monkeypatch)
-
-    async def raise_embed(texts, **kw):
-        raise RuntimeError("voyage 503")
-
-    with _patch_w23("plugins.memory.sqlite_vec.read.voyage_embed", raise_embed):
-        out = p.prefetch("anything", session_id="s1")
-    assert out == ""  # Reply is never blocked on memory-recall failure.
-    assert "s1" not in p._last_fact_ids
-    p.shutdown()
-
-
-def test_sync_turn_bumps_hits_then_clears_cache(tmp_path, monkeypatch):
-    p, fake_embed = _stubbed_provider(tmp_path, monkeypatch)
-    with _patch_w23("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
-        p.prefetch("query", session_id="s1")
-    cached_ids = list(p._last_fact_ids["s1"])
-    assert cached_ids
-
-    p.sync_turn("user said hi", "asst replied", session_id="s1")
-    # Cache cleared
-    assert "s1" not in p._last_fact_ids
-    # Hits incremented for exactly the cached IDs.
-    placeholders = ",".join("?" * len(cached_ids))
-    rows = p._conn.execute(
-        f"SELECT id, hits FROM semantic_facts WHERE id IN ({placeholders}) ORDER BY id",
-        cached_ids,
-    ).fetchall()
-    assert all(r["hits"] == 1 for r in rows), [(r["id"], r["hits"]) for r in rows]
-
-    # Second sync_turn for same session is a no-op (cache empty).
-    p.sync_turn("u", "a", session_id="s1")
-    rows2 = p._conn.execute(
-        f"SELECT hits FROM semantic_facts WHERE id IN ({placeholders})", cached_ids
-    ).fetchall()
-    assert all(r["hits"] == 1 for r in rows2)
-    p.shutdown()
-
-
-def test_run_coro_in_thread_timeout():
-    import asyncio as _asyncio
-
-    async def slow():
-        await _asyncio.sleep(2.0)
-        return "ok"
-
-    import pytest
-    with pytest.raises(TimeoutError):
-        _run_coro_in_thread(slow, timeout=0.05)
-
-
-def test_format_with_meta_shape():
-    facts = [
-        Fact(id=1, fact="致妤生日 3/19", entity="禮揚.家庭",
-             created_at="2026-05-01", importance=3, sim=0.7,
-             age_days=5.4, score=0.6),
-    ]
-    out = format_facts_for_prompt(facts, with_meta=True)
-    assert "(importance: 3, age: 5 days)" in out
-    out_compact = format_facts_for_prompt(facts, with_meta=False)
-    assert "importance" not in out_compact
diff --git a/tests/plugins/memory/test_write.py b/tests/plugins/memory/test_write.py
deleted file mode 100644
index 5bf2462739b..00000000000
--- a/tests/plugins/memory/test_write.py
+++ /dev/null
@@ -1,322 +0,0 @@
-"""Tests for plugins/memory/sqlite_vec/write.py (W3-2)."""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import struct
-from datetime import date, timedelta
-from pathlib import Path
-from unittest.mock import patch
-
-import pytest
-
-from plugins.memory.sqlite_vec.extract import ExtractedFact
-from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
-from plugins.memory.sqlite_vec.write import (
-    FAST_TRACK_DAYS,
-    _fact_should_fast_track,
-    _parse_valid_to_hint,
-    write_episode,
-)
-
-
-def _vec(seed: int) -> bytes:
-    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
-    return struct.pack(f"{VEC_DIM}b", *vals)
-
-
-def _stub_embed_factory():
-    """Returns (stub, call_log) — stub yields deterministic int8 blobs."""
-    calls = []
-
-    async def stub(texts):
-        calls.append(list(texts))
-        return [_vec(10 + i) for i in range(len(texts))]
-
-    return stub, calls
-
-
-def _stub_extract_factory(facts: list):
-    async def stub(user, asst, channel, ts):
-        return list(facts)
-
-    return stub
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def test_parse_valid_to_hint():
-    assert _parse_valid_to_hint("2026-05-03") == date(2026, 5, 3)
-    assert _parse_valid_to_hint("not-a-date") is None
-    assert _parse_valid_to_hint("") is None
-    assert _parse_valid_to_hint(None) is None
-
-
-def test_fact_should_fast_track_threshold():
-    today = date(2026, 5, 2)
-    f_in = ExtractedFact(type="semantic", text="x", entity=None, importance=2,
-                         valid_to_hint=(today + timedelta(days=10)).isoformat())
-    f_edge = ExtractedFact(type="semantic", text="x", entity=None, importance=2,
-                           valid_to_hint=(today + timedelta(days=FAST_TRACK_DAYS)).isoformat())
-    f_out = ExtractedFact(type="semantic", text="x", entity=None, importance=2,
-                          valid_to_hint=(today + timedelta(days=60)).isoformat())
-    f_none = ExtractedFact(type="semantic", text="x", entity=None, importance=2,
-                           valid_to_hint=None)
-    assert _fact_should_fast_track(f_in, today) is True
-    assert _fact_should_fast_track(f_edge, today) is True
-    assert _fact_should_fast_track(f_out, today) is False
-    assert _fact_should_fast_track(f_none, today) is False
-
-
-# ---------------------------------------------------------------------------
-# write_episode — happy paths
-# ---------------------------------------------------------------------------
-
-
-def _bootstrap_db(tmp_path):
-    return init_db(tmp_path / "m.db")
-
-
-def test_writes_two_episode_rows_per_turn(tmp_path):
-    db = _bootstrap_db(tmp_path)
-    embed, calls = _stub_embed_factory()
-    extract = _stub_extract_factory([])
-
-    summary = asyncio.run(write_episode(
-        user_msg="hello", reply="hi back",
-        channel="cattia", msg_id="m1", ts="2026-05-02 09:00:00",
-        conn=db, embed_fn=embed, extract_fn=extract,
-        failure_log_path=tmp_path / "fail.jsonl",
-    ))
-
-    assert summary["episodes"] == 2
-    assert summary["fast_tracked"] == 0 and summary["stashed"] == 0
-    rows = db.execute(
-        "SELECT role, channel, external_id, text FROM episodes ORDER BY id"
-    ).fetchall()
-    assert [r["role"] for r in rows] == ["user", "assistant"]
-    assert rows[0]["external_id"] == "m1:user"
-    assert rows[1]["external_id"] == "m1:asst"
-    # Single embed call covered both turn texts (no fact texts).
-    assert len(calls) == 1
-    assert calls[0] == ["hello", "hi back"]
-
-
-def test_phi_channel_records_episode_but_skips_extract(tmp_path):
-    db = _bootstrap_db(tmp_path)
-    embed, calls = _stub_embed_factory()
-
-    def extract_should_not_be_called(*a, **kw):
-        raise AssertionError("extract called for PHI channel")
-
-    summary = asyncio.run(write_episode(
-        user_msg="病人 [姓名] 血壓 180/100", reply="建議轉診",
-        channel="cmio", msg_id="phi-1", ts="2026-05-02 09:00:00",
-        conn=db, embed_fn=embed, extract_fn=extract_should_not_be_called,
-        failure_log_path=tmp_path / "fail.jsonl",
-    ))
-
-    assert summary["skipped_extract"] is True
-    assert summary["episodes"] == 2
-    assert summary["fast_tracked"] == 0 and summary["stashed"] == 0
-    rows = db.execute("SELECT count(*) FROM episodes").fetchone()
-    assert rows[0] == 2  # raw episode rows still recorded
-
-
-def test_idempotent_on_duplicate_msg_id(tmp_path):
-    """Re-running with the same msg_id collapses via ON CONFLICT."""
-    db = _bootstrap_db(tmp_path)
-    embed, _ = _stub_embed_factory()
-    extract = _stub_extract_factory([])
-
-    args = dict(
-        user_msg="x", reply="y", channel="cattia",
-        msg_id="dup-1", ts="2026-05-02 09:00:00",
-        conn=db, embed_fn=embed, extract_fn=extract,
-        failure_log_path=tmp_path / "fail.jsonl",
-    )
-    asyncio.run(write_episode(**args))
-    summary2 = asyncio.run(write_episode(**args))
-    assert summary2["episodes"] == 0  # nothing new inserted
-    [(count,)] = db.execute("SELECT count(*) FROM episodes").fetchall()
-    assert count == 2
-
-
-# ---------------------------------------------------------------------------
-# Fast-track vs stash partitioning
-# ---------------------------------------------------------------------------
-
-
-def test_short_lived_fact_fast_tracks_to_semantic_facts(tmp_path):
-    db = _bootstrap_db(tmp_path)
-    embed, _ = _stub_embed_factory()
-    today = date.today()
-    extract = _stub_extract_factory([
-        ExtractedFact(
-            type="semantic",
-            text="致妤今晚 7:30 才到家",
-            entity="禮揚.家庭",
-            importance=3,
-            valid_to_hint=(today + timedelta(days=1)).isoformat(),
-        ),
-    ])
-
-    summary = asyncio.run(write_episode(
-        user_msg="今晚致妤 7:30 才到", reply="了解",
-        channel="at-home", msg_id="m1", ts="2026-05-02 17:00:00",
-        conn=db, embed_fn=embed, extract_fn=extract,
-        failure_log_path=tmp_path / "fail.jsonl",
-    ))
-
-    assert summary["fast_tracked"] == 1
-    assert summary["stashed"] == 0
-    [(sf_count,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall()
-    assert sf_count == 1
-    [(vf_count,)] = db.execute("SELECT count(*) FROM vec_facts").fetchall()
-    assert vf_count == 1  # trigger mirrored the row
-    row = db.execute(
-        "SELECT entity, fact, importance, valid_from, valid_to FROM semantic_facts"
-    ).fetchone()
-    assert row["entity"] == "禮揚.家庭"
-    assert row["valid_to"] == (today + timedelta(days=1)).isoformat()
-
-
-def test_long_lived_fact_stashes_in_episode_metadata(tmp_path):
-    db = _bootstrap_db(tmp_path)
-    embed, _ = _stub_embed_factory()
-    extract = _stub_extract_factory([
-        ExtractedFact(
-            type="semantic",
-            text="禮揚 likes Starting Strength",
-            entity="禮揚.訓練",
-            importance=2,
-            valid_to_hint=None,  # permanent → stash
-        ),
-    ])
-
-    summary = asyncio.run(write_episode(
-        user_msg="我練 SS 一年了", reply="酷",
-        channel="cattia", msg_id="m1", ts="2026-05-02 09:00:00",
-        conn=db, embed_fn=embed, extract_fn=extract,
-        failure_log_path=tmp_path / "fail.jsonl",
-    ))
-
-    assert summary["stashed"] == 1
-    assert summary["fast_tracked"] == 0
-    [(sf_count,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall()
-    assert sf_count == 0  # nothing fast-tracked
-    metadata_rows = db.execute(
-        "SELECT metadata FROM episodes WHERE metadata IS NOT NULL"
-    ).fetchall()
-    assert len(metadata_rows) == 2  # both user + assistant rows carry the same metadata
-    md = json.loads(metadata_rows[0]["metadata"])
-    assert md["stashed_facts"][0]["text"] == "禮揚 likes Starting Strength"
-    assert md["stashed_facts"][0]["entity"] == "禮揚.訓練"
-
-
-def test_mixed_facts_partition_correctly(tmp_path):
-    db = _bootstrap_db(tmp_path)
-    embed, _ = _stub_embed_factory()
-    today = date.today()
-    extract = _stub_extract_factory([
-        ExtractedFact(
-            type="semantic", text="short",
-            entity="禮揚.短期", importance=2,
-            valid_to_hint=(today + timedelta(days=2)).isoformat(),
-        ),
-        ExtractedFact(
-            type="semantic", text="long",
-            entity="禮揚.長期", importance=3,
-            valid_to_hint=None,
-        ),
-    ])
-
-    summary = asyncio.run(write_episode(
-        user_msg="u", reply="a", channel="cattia",
-        msg_id="m1", ts="2026-05-02 09:00:00",
-        conn=db, embed_fn=embed, extract_fn=extract,
-        failure_log_path=tmp_path / "fail.jsonl",
-    ))
-
-    assert summary["fast_tracked"] == 1
-    assert summary["stashed"] == 1
-
-
-# ---------------------------------------------------------------------------
-# Failure path
-# ---------------------------------------------------------------------------
-
-
-def test_embed_failure_appends_to_jsonl(tmp_path):
-    db = _bootstrap_db(tmp_path)
-
-    async def failing_embed(texts):
-        raise RuntimeError("voyage exploded")
-
-    extract = _stub_extract_factory([])
-    fail_log = tmp_path / "fail.jsonl"
-
-    summary = asyncio.run(write_episode(
-        user_msg="u", reply="a", channel="cattia",
-        msg_id="m1", ts="2026-05-02 09:00:00",
-        conn=db, embed_fn=failing_embed, extract_fn=extract,
-        failure_log_path=fail_log,
-    ))
-
-    # Caller never sees the exception.
-    assert summary["episodes"] == 0  # rolled back
-    [(ep_count,)] = db.execute("SELECT count(*) FROM episodes").fetchall()
-    assert ep_count == 0
-    # Failure record landed in the JSONL.
-    assert fail_log.exists()
-    line = json.loads(fail_log.read_text().strip().splitlines()[-1])
-    assert line["channel"] == "cattia"
-    assert line["msg_id"] == "m1"
-    assert "voyage exploded" in line["error"]
-
-
-def test_extract_failure_still_records_episode(tmp_path):
-    """If kimi_extract raises, we still land the raw episode rows. The
-    weekly_promotion (W3-3) can re-extract from the raw text later."""
-    db = _bootstrap_db(tmp_path)
-    embed, _ = _stub_embed_factory()
-
-    async def failing_extract(*a, **kw):
-        raise RuntimeError("synthetic.new 503")
-
-    summary = asyncio.run(write_episode(
-        user_msg="u", reply="a", channel="cattia",
-        msg_id="m1", ts="2026-05-02 09:00:00",
-        conn=db, embed_fn=embed, extract_fn=failing_extract,
-        failure_log_path=tmp_path / "fail.jsonl",
-    ))
-    assert summary["episodes"] == 2
-    assert summary["fast_tracked"] == 0
-    assert summary["stashed"] == 0
-
-
-def test_empty_turn_records_no_rows(tmp_path):
-    """Both user_msg and reply blank → no work done, no embed call."""
-    db = _bootstrap_db(tmp_path)
-
-    embed_called = []
-
-    async def embed(texts):
-        embed_called.append(texts)
-        return []
-
-    extract = _stub_extract_factory([])
-    summary = asyncio.run(write_episode(
-        user_msg="", reply="", channel="cattia",
-        msg_id="m1", ts="2026-05-02 09:00:00",
-        conn=db, embed_fn=embed, extract_fn=extract,
-        failure_log_path=tmp_path / "fail.jsonl",
-    ))
-    # No embed call (both texts empty), but the schema accepts NULL embeddings
-    # for episodes so we still INSERT 2 rows.
-    assert embed_called == []
-    assert summary["episodes"] == 2
diff --git a/tests/plugins/test_memdebug.py b/tests/plugins/test_memdebug.py
deleted file mode 100644
index 65380e271c4..00000000000
--- a/tests/plugins/test_memdebug.py
+++ /dev/null
@@ -1,175 +0,0 @@
-"""Tests for plugins/memdebug/ — /memdebug slash command (W2-4)."""
-
-from __future__ import annotations
-
-import asyncio
-import struct
-from pathlib import Path
-from unittest.mock import patch
-
-import pytest
-
-from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
-from plugins.memdebug import (
-    HELP_TEXT,
-    _do_rawsearch,
-    _do_semantic,
-    _format_facts_block,
-    _handle_async,
-    _handle_memdebug,
-    _truncate,
-)
-
-
-def _vec(seed: int) -> bytes:
-    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
-    return struct.pack(f"{VEC_DIM}b", *vals)
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def test_truncate_short_string_unchanged():
-    assert _truncate("abc", 10) == "abc"
-
-
-def test_truncate_long_string_ellipsis():
-    out = _truncate("a" * 100, 10)
-    assert out.endswith("…") and len(out) == 10
-
-
-# ---------------------------------------------------------------------------
-# Help / empty / unknown args
-# ---------------------------------------------------------------------------
-
-
-def test_handle_empty_returns_help():
-    assert _handle_memdebug("") == HELP_TEXT
-    assert _handle_memdebug("   ") == HELP_TEXT
-
-
-def test_handle_rawsearch_empty_returns_help():
-    assert _handle_memdebug("rawsearch") == HELP_TEXT
-    assert _handle_memdebug("rawsearch   ") == HELP_TEXT
-
-
-# ---------------------------------------------------------------------------
-# Semantic / rawsearch via direct async helpers (so we control DB path)
-# ---------------------------------------------------------------------------
-
-
-def _seed_db(tmp_path):
-    """Seed a fixture memory.db on tmp_path and return its path."""
-    db_path = tmp_path / "memories" / "memory.db"
-    conn = init_db(db_path)
-    conn.execute(
-        "INSERT INTO semantic_facts(fact, entity, embedding, created_at) VALUES (?,?,?,?)",
-        ("致妤生日 3/19", "禮揚.家庭", _vec(50), "2026-05-02 09:00:00"),
-    )
-    conn.execute(
-        "INSERT INTO semantic_facts(fact, entity, embedding, created_at) VALUES (?,?,?,?)",
-        ("AI as digital twin", "禮揚.工作", _vec(60), "2026-05-01 09:00:00"),
-    )
-    conn.execute(
-        "INSERT INTO episodes(ts, channel, external_id, role, text) "
-        "VALUES (?, ?, ?, ?, ?)",
-        ("2026-05-02 17:00:00", "cattia", "msg-1", "user", "晚餐幾點開"),
-    )
-    conn.commit()
-    conn.close()
-    return db_path
-
-
-def test_do_semantic_returns_score_breakdown(tmp_path, monkeypatch):
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-    db_path = _seed_db(tmp_path)
-
-    async def fake_embed(texts, **kw):
-        return [_vec(51) for _ in texts]
-
-    with patch("plugins.memdebug.DEFAULT_DB", db_path), \
-         patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"), \
-         patch("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
-        out = asyncio.run(_do_semantic("when does my wife get home"))
-
-    assert "/memdebug" in out
-    assert "致妤生日 3/19" in out  # closest fact
-    # Score breakdown labels present.
-    assert "score=" in out and "sim=" in out and "age=" in out
-    # Reaction prompt present (until rich-embed UX lands).
-    assert "👍" in out and "👎" in out
-    # Log line written.
-    log_path = tmp_path / "memory.log"
-    assert log_path.exists()
-    last_line = log_path.read_text().strip().splitlines()[-1]
-    assert '"cmd": "memdebug"' in last_line
-
-
-def test_do_semantic_db_missing_returns_friendly_message(tmp_path, monkeypatch):
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-    missing = tmp_path / "absent.db"
-    with patch("plugins.memdebug.DEFAULT_DB", missing):
-        out = asyncio.run(_do_semantic("anything"))
-    assert "not yet initialised" in out
-
-
-def test_do_rawsearch_finds_substring(tmp_path):
-    db_path = _seed_db(tmp_path)
-    with patch("plugins.memdebug.DEFAULT_DB", db_path), \
-         patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"):
-        out = asyncio.run(_do_rawsearch("晚餐"))
-    assert "rawsearch" in out
-    assert "晚餐幾點開" in out
-    assert "cattia/user" in out
-
-
-def test_do_rawsearch_empty_episodes_message(tmp_path):
-    db_path = tmp_path / "memories" / "memory.db"
-    init_db(db_path).close()  # bootstrap schema, no rows
-    with patch("plugins.memdebug.DEFAULT_DB", db_path), \
-         patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"):
-        out = asyncio.run(_do_rawsearch("anything"))
-    assert "rawsearch" in out
-    assert "Episodes are written by W3" in out
-
-
-# ---------------------------------------------------------------------------
-# Sync entry point + register()
-# ---------------------------------------------------------------------------
-
-
-def test_handle_memdebug_sync_dispatches_semantic(tmp_path, monkeypatch):
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-    db_path = _seed_db(tmp_path)
-
-    async def fake_embed(texts, **kw):
-        return [_vec(51) for _ in texts]
-
-    with patch("plugins.memdebug.DEFAULT_DB", db_path), \
-         patch("plugins.memdebug.LOG_PATH", tmp_path / "memory.log"), \
-         patch("plugins.memory.sqlite_vec.read.voyage_embed", fake_embed):
-        out = _handle_memdebug("when does my wife get home")
-    assert "致妤生日" in out
-
-
-def test_register_calls_register_command():
-    """register(ctx) must call ctx.register_command with the right name."""
-    from plugins.memdebug import register
-
-    captured = {}
-
-    class FakeCtx:
-        def register_command(self, name, handler, description="", args_hint=""):
-            captured["name"] = name
-            captured["handler"] = handler
-            captured["args_hint"] = args_hint
-            captured["description"] = description
-
-    register(FakeCtx())
-    assert captured["name"] == "memdebug"
-    assert captured["args_hint"] == "<query> | rawsearch <query>"
-    assert callable(captured["handler"])
-    # The handler must accept a single positional argument (raw_args).
-    assert captured["handler"].__code__.co_argcount == 1
diff --git a/tests/plugins/test_memreview.py b/tests/plugins/test_memreview.py
deleted file mode 100644
index f20e7341790..00000000000
--- a/tests/plugins/test_memreview.py
+++ /dev/null
@@ -1,272 +0,0 @@
-"""Tests for plugins/memreview/ — /memreview reject + /mem kill switch (W3-4)."""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import struct
-from datetime import date
-from pathlib import Path
-from unittest.mock import patch
-
-import pytest
-
-from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
-from plugins.memreview import (
-    _MEMREVIEW_HELP,
-    _MEM_HELP,
-    _handle_mem,
-    _handle_memreview,
-    mem_off_active,
-    mem_off_path,
-    register,
-)
-
-
-def _vec(seed: int) -> bytes:
-    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
-    return struct.pack(f"{VEC_DIM}b", *vals)
-
-
-# ---------------------------------------------------------------------------
-# /memreview help / pending
-# ---------------------------------------------------------------------------
-
-
-def test_memreview_empty_returns_help():
-    assert _handle_memreview("") == _MEMREVIEW_HELP
-    assert _handle_memreview("   ") == _MEMREVIEW_HELP
-
-
-def test_memreview_pending_no_diffs(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
-    )
-    out = _handle_memreview("pending")
-    assert "no pending diffs" in out
-
-
-def test_memreview_pending_lists_diffs(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
-    )
-    pdir = tmp_path / "memories" / "pending_diffs"
-    pdir.mkdir(parents=True)
-    (pdir / "wk-2026-05-02.json").write_text("{}")
-    (pdir / "wk-2026-05-09.json").write_text("{}")
-    (pdir / "wk-2026-05-09.rejected").write_text("rejected")
-
-    out = _handle_memreview("pending")
-    assert "wk-2026-05-02" in out
-    assert "wk-2026-05-09" in out
-    # Rejected one carries a flag.
-    assert "(rejected — will be archived Mon)" in out
-
-
-# ---------------------------------------------------------------------------
-# /memreview reject
-# ---------------------------------------------------------------------------
-
-
-def test_memreview_reject_invalid_digest_id(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
-    )
-    out = _handle_memreview("reject not-a-digest")
-    assert "must look like" in out
-
-
-def test_memreview_reject_unknown_digest(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
-    )
-    out = _handle_memreview("reject wk-2026-05-02")
-    assert "no pending diff" in out
-
-
-def test_memreview_reject_writes_sentinel(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
-    )
-    pdir = tmp_path / "memories" / "pending_diffs"
-    pdir.mkdir(parents=True)
-    diff_path = pdir / "wk-2026-05-02.json"
-    diff_path.write_text("{}")
-
-    out = _handle_memreview("reject wk-2026-05-02")
-    assert "Rejected." in out
-    sentinel = pdir / "wk-2026-05-02.rejected"
-    assert sentinel.exists()
-    assert "rejected" in sentinel.read_text().lower()
-
-
-# ---------------------------------------------------------------------------
-# /mem off / on / status
-# ---------------------------------------------------------------------------
-
-
-def test_mem_off_creates_sentinel(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
-    )
-    out = _handle_mem("off")
-    assert "disabled" in out
-    assert mem_off_path().exists()
-    assert mem_off_active() is True
-
-
-def test_mem_on_removes_sentinel(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
-    )
-    mem_off_path().write_text("set", encoding="utf-8")
-    out = _handle_mem("on")
-    assert "enabled" in out
-    assert not mem_off_path().exists()
-
-
-def test_mem_on_when_already_on_idempotent(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
-    )
-    out = _handle_mem("on")
-    assert "already enabled" in out
-
-
-def test_mem_status_off(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
-    )
-    out = _handle_mem("status")
-    assert "🔊 ON" in out  # default state
-    assert "(absent)" in out
-
-
-def test_mem_status_on_with_pending(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
-    )
-    mem_off_path().write_text("set")
-    pdir = tmp_path / "memories" / "pending_diffs"
-    pdir.mkdir(parents=True)
-    (pdir / "wk-2026-05-02.json").write_text("{}")
-
-    out = _handle_mem("status")
-    assert "🔇 OFF" in out
-    assert "(present)" in out
-    assert "wk-2026-05-02" in out
-
-
-def test_mem_help_on_unknown_subcommand(tmp_path, monkeypatch):
-    monkeypatch.setattr(
-        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
-    )
-    out = _handle_mem("frobnicate")
-    assert "/mem off" in out and "/mem on" in out
-
-
-# ---------------------------------------------------------------------------
-# register() wires both commands
-# ---------------------------------------------------------------------------
-
-
-def test_register_registers_both_commands():
-    captured = []
-
-    class FakeCtx:
-        def register_command(self, name, handler, description="", args_hint=""):
-            captured.append((name, args_hint))
-
-    register(FakeCtx())
-    names = [c[0] for c in captured]
-    assert "memreview" in names
-    assert "mem" in names
-
-
-# ---------------------------------------------------------------------------
-# End-to-end: /memreview reject then weekly_apply archives as rejected
-# ---------------------------------------------------------------------------
-
-
-def test_reject_then_apply_archives_as_rejected(tmp_path, monkeypatch):
-    """Full flow: write pending diff -> /memreview reject -> weekly_apply
-    sees the sentinel and archives the diff with status=rejected."""
-    monkeypatch.setattr(
-        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
-    )
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home",
-        lambda: tmp_path,
-    )
-
-    db = init_db(tmp_path / "m.db")
-    digest_id = "wk-2026-05-02"
-    pdir = tmp_path / "memories" / "pending_diffs"
-    pdir.mkdir(parents=True)
-    diff_payload = {
-        "digest_id": digest_id, "candidate_episode_ids": [],
-        "promote": [{"entity": "禮揚.x", "fact": "f", "importance": 2,
-                     "valid_from": "2026-05-02", "valid_to": None,
-                     "source_episode_ids": []}],
-        "dedup_hits": [], "expire": [], "drop_as_noise": [],
-    }
-    (pdir / f"{digest_id}.json").write_text(json.dumps(diff_payload))
-
-    # User runs /memreview reject.
-    reply = _handle_memreview(f"reject {digest_id}")
-    assert "Rejected." in reply
-
-    # Apply step picks up the sentinel.
-    from plugins.memory.sqlite_vec.promotion import weekly_apply
-    summary = asyncio.run(weekly_apply(db, today=date(2026, 5, 2)))
-    assert summary["applied"] is False
-    assert summary["reason"] == "rejected"
-
-    # No new semantic_facts row (the promote was discarded).
-    [(sf,)] = db.execute("SELECT count(*) FROM semantic_facts").fetchall()
-    assert sf == 0
-
-    # Archive carries the .rejected suffix.
-    archived = list((tmp_path / "memories" / "diff_archive").glob("*.rejected.json"))
-    assert len(archived) == 1
-
-
-def test_mem_off_short_circuits_weekly_promotion(tmp_path, monkeypatch):
-    """Kill switch: /mem off must stop weekly_promotion from running its
-    Kimi call (which would otherwise burn tokens and write a diff)."""
-    monkeypatch.setattr(
-        "plugins.memreview._resolve_hermes_home", lambda: tmp_path
-    )
-    monkeypatch.setattr(
-        "plugins.memory.sqlite_vec.promotion._resolve_hermes_home",
-        lambda: tmp_path,
-    )
-    monkeypatch.setenv("VOYAGE_API_KEY", "test-key")
-
-    db = init_db(tmp_path / "m.db")
-    db.execute(
-        "INSERT INTO episodes(ts, channel, external_id, role, text, metadata) "
-        "VALUES (?, ?, ?, ?, ?, ?)",
-        ("2026-05-02 09:00", "cattia", "x", "user", "hi",
-         json.dumps({"stashed_facts": [{"text": "禮揚 likes X",
-                                        "entity": "禮揚.x",
-                                        "importance": 2}]})),
-    )
-    db.commit()
-
-    # Activate kill switch.
-    _handle_mem("off")
-    assert mem_off_active() is True
-
-    kimi_called = []
-
-    async def kimi_should_not_be_called(prompt):
-        kimi_called.append(prompt)
-        return {}
-
-    from plugins.memory.sqlite_vec.promotion import weekly_promotion
-    summary = asyncio.run(weekly_promotion(db, kimi_fn=kimi_should_not_be_called))
-    assert summary["candidates"] == 0
-    assert summary["skipped"] == "/mem off active"
-    # Kimi must not have been called.
-    assert kimi_called == []
diff --git a/tests/scripts/test_import_md.py b/tests/scripts/test_import_md.py
deleted file mode 100644
index 617b38f9c13..00000000000
--- a/tests/scripts/test_import_md.py
+++ /dev/null
@@ -1,210 +0,0 @@
-"""Tests for ``scripts/import_md.py`` (W2-2 — MEMORY.md → semantic_facts).
-
-Uses a stub embed_fn so no network is hit; live integration is exercised
-end-to-end on chococlaw via the post-test ``--commit`` smoke run.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import struct
-from pathlib import Path
-
-import pytest
-
-from plugins.memory.sqlite_vec.store import VEC_DIM, init_db
-from scripts.import_md import (
-    Entry,
-    import_memory_md,
-    parse_memory_md,
-    slugify_topic,
-)
-
-
-def _vec(seed: int) -> bytes:
-    vals = [max(-128, min(127, seed + (i % 7) - 3)) for i in range(VEC_DIM)]
-    return struct.pack(f"{VEC_DIM}b", *vals)
-
-
-# ---------------------------------------------------------------------------
-# Slugify
-# ---------------------------------------------------------------------------
-
-
-def test_slugify_simple():
-    assert slugify_topic("People") == "people"
-    assert slugify_topic("Working style") == "working_style"
-    assert slugify_topic("Privacy constraints") == "privacy_constraints"
-
-
-def test_slugify_hierarchy_uses_dot():
-    assert (
-        slugify_topic("Tools & Access > ProtonMail Access")
-        == "tools_access.protonmail_access"
-    )
-
-
-def test_slugify_preserves_cjk():
-    # CJK characters survive the punct->underscore collapse; only > is hierarchy.
-    assert slugify_topic("醫院 > 新樓") == "醫院.新樓"
-    assert slugify_topic("家庭 生活") == "家庭_生活"
-
-
-def test_slugify_handles_empty_or_punct_only():
-    assert slugify_topic("") == "unknown"
-    assert slugify_topic("!!!") == "unknown"
-
-
-# ---------------------------------------------------------------------------
-# Parsing
-# ---------------------------------------------------------------------------
-
-
-SAMPLE_MD = """People: 禮揚 — physician
-§
-Working style: digital twin model
-§
-Privacy constraints: never include real PHI
-§
-Tools & Access > ProtonMail: D4303@sinlau.org.tw
-§
-"""
-
-
-def test_parse_memory_md_basic():
-    entries = parse_memory_md(SAMPLE_MD)
-    assert len(entries) == 4
-    assert entries[0].topic == "People"
-    assert entries[0].fact == "禮揚 — physician"
-    assert entries[0].entity == "禮揚.people"
-    assert entries[3].entity == "禮揚.tools_access.protonmail"
-
-
-def test_parse_skips_blocks_without_colon():
-    md = "first entry: ok\n§\n\nno colon here\n§\nsecond: also ok\n§\n"
-    entries = parse_memory_md(md)
-    assert [e.topic for e in entries] == ["first entry", "second"]
-
-
-def test_parse_handles_no_trailing_separator():
-    md = "topic: content"
-    entries = parse_memory_md(md)
-    assert len(entries) == 1
-    assert entries[0].fact == "content"
-
-
-# ---------------------------------------------------------------------------
-# import_memory_md (with stub embed)
-# ---------------------------------------------------------------------------
-
-
-def _make_stub_embed():
-    counter = {"n": 0}
-
-    async def stub(texts):
-        counter["n"] += 1
-        return [_vec(i + 1) for i, _ in enumerate(texts)]
-
-    return stub, counter
-
-
-def test_dry_run_does_not_write(tmp_path):
-    md = tmp_path / "MEMORY.md"
-    md.write_text(SAMPLE_MD, encoding="utf-8")
-    db = tmp_path / "m.db"
-
-    summary = asyncio.run(
-        import_memory_md(md_path=md, db_path=db, dry_run=True)
-    )
-    assert summary == {
-        "parsed": 4, "new": 4, "skipped_dup": 0,
-        "batches": 0, "dry_run": True,
-    }
-    # DB still empty (init_db ran but no inserts).
-    conn = init_db(db)
-    [(count,)] = conn.execute("SELECT count(*) FROM semantic_facts").fetchall()
-    assert count == 0
-
-
-def test_commit_inserts_and_populates_vec_facts(tmp_path):
-    md = tmp_path / "MEMORY.md"
-    md.write_text(SAMPLE_MD, encoding="utf-8")
-    db = tmp_path / "m.db"
-    stub, counter = _make_stub_embed()
-
-    summary = asyncio.run(
-        import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub)
-    )
-    assert summary["new"] == 4
-    assert summary["batches"] == 1
-    assert counter["n"] == 1  # one Voyage call for 4 entries
-
-    conn = init_db(db)
-    rows = conn.execute(
-        "SELECT entity, fact, importance, valid_from, valid_to FROM semantic_facts ORDER BY id"
-    ).fetchall()
-    assert len(rows) == 4
-    assert rows[0]["entity"] == "禮揚.people"
-    assert rows[0]["importance"] == 2
-    assert rows[0]["valid_from"] == "2026-05-10"
-    assert rows[0]["valid_to"] is None
-
-    # Trigger sf_after_insert mirrored every row into vec_facts.
-    [(vec_count,)] = conn.execute("SELECT count(*) FROM vec_facts").fetchall()
-    assert vec_count == 4
-
-
-def test_idempotent_rerun_inserts_nothing_new(tmp_path):
-    md = tmp_path / "MEMORY.md"
-    md.write_text(SAMPLE_MD, encoding="utf-8")
-    db = tmp_path / "m.db"
-    stub, counter = _make_stub_embed()
-
-    asyncio.run(import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub))
-    assert counter["n"] == 1
-
-    summary2 = asyncio.run(
-        import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub)
-    )
-    assert summary2["new"] == 0
-    assert summary2["skipped_dup"] == 4
-    assert counter["n"] == 1  # second run made zero embed calls (no new rows)
-
-    conn = init_db(db)
-    [(count,)] = conn.execute("SELECT count(*) FROM semantic_facts").fetchall()
-    assert count == 4
-
-
-def test_partial_update_only_embeds_new(tmp_path):
-    md = tmp_path / "MEMORY.md"
-    md.write_text(SAMPLE_MD, encoding="utf-8")
-    db = tmp_path / "m.db"
-    stub, counter = _make_stub_embed()
-
-    asyncio.run(import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub))
-    assert counter["n"] == 1
-
-    md.write_text(SAMPLE_MD + "\nNew topic: brand new fact\n§\n", encoding="utf-8")
-    summary = asyncio.run(
-        import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=stub)
-    )
-    assert summary["new"] == 1
-    assert summary["skipped_dup"] == 4
-    assert counter["n"] == 2  # one extra call for the one new entry
-
-
-def test_rollback_on_embed_failure_leaves_db_unchanged(tmp_path):
-    md = tmp_path / "MEMORY.md"
-    md.write_text(SAMPLE_MD, encoding="utf-8")
-    db = tmp_path / "m.db"
-
-    async def failing(texts):
-        raise RuntimeError("voyage exploded")
-
-    with pytest.raises(RuntimeError, match="voyage exploded"):
-        asyncio.run(
-            import_memory_md(md_path=md, db_path=db, dry_run=False, embed_fn=failing)
-        )
-    conn = init_db(db)
-    [(count,)] = conn.execute("SELECT count(*) FROM semantic_facts").fetchall()
-    assert count == 0  # transaction rolled back