diff --git a/.gitignore b/.gitignore
index 69f1b88..07f1f73 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,6 @@ build/
 dist/
 debug_payload.json
 brainstorm_outputs/
-reference/
\ No newline at end of file
+reference/
+.env
+.llmwiki.yaml
diff --git a/README.md b/README.md
index 88c66b9..5e904be 100644
--- a/README.md
+++ b/README.md
@@ -980,6 +980,7 @@ Detailed guides have been moved to [`docs/guides/`](docs/guides/) to keep this R
 | [**Advanced**](docs/guides/advanced.md) | Brainstorm, SSJ Developer Mode, Tmux, Proactive monitoring, Checkpoints, Plan mode, Session management, Cloud sync |
 | [**Recipes**](docs/guides/recipes.md) | 12 step-by-step examples: code review, Telegram remote control, autonomous research, bug fix, brainstorm, session search, browse web pages, email, PDF/Excel analysis, and more |
 | [**Plugin Authoring**](docs/guides/plugin-authoring.md) | Build your own plugin: tools, commands, skills, MCP servers, publishing checklist |
+| [**llmwiki Memory Plugin**](docs/guides/llmwiki.md) | Persistent memory via llmwiki-py: install, configure, update, and use WikiRead/Write/Search/Append |
 | [**Example Plugin**](examples/example-plugin/) | Copy-and-edit starter template with working tools, commands, and skills |
 | [**Contributing**](CONTRIBUTING.md) | Project structure, architecture guide, PR checklist |
 
diff --git a/agent.py b/agent.py
index 033100d..15ec779 100644
--- a/agent.py
+++ b/agent.py
@@ -102,7 +102,13 @@ def run(
 
         # Compact context if approaching window limit
         try:
-            maybe_compact(state, config)
+            from ui.render import _start_tool_spinner, _stop_tool_spinner, set_spinner_phrase
+            set_spinner_phrase("compacting context…")
+            _start_tool_spinner()
+            try:
+                maybe_compact(state, config)
+            finally:
+                _stop_tool_spinner()
         except Exception as _compact_err:
             _log.warn("compact_failed", error=str(_compact_err))
 
@@ -136,6 +142,8 @@ def run(
                     tool_schemas=get_tool_schemas(),
                     config=config,
                 ):
+                    if cancel_check and cancel_check():
+                        return
                     if isinstance(event, (TextChunk, ThinkingChunk)):
                         yield event
                     elif isinstance(event, AssistantTurn):
@@ -242,6 +250,8 @@ def _exec_one(tc):
             """Execute a single tool call, return (tc, result, permitted)."""
             tid = tc["id"]
             permitted = permissions[tid]
+            if cancel_check and cancel_check():
+                return tc, "[Interrupted by user]", False
             if not permitted:
                 if config.get("permission_mode") == "plan":
                     plan_file = runtime.get_ctx(config).plan_file or ""
diff --git a/bootstrap.py b/bootstrap.py
index 6eae678..9126e40 100644
--- a/bootstrap.py
+++ b/bootstrap.py
@@ -39,7 +39,12 @@ def bootstrap(config: dict) -> None:
     import tools as _tools  # noqa: F401
     _log.debug("bootstrap_tools_ready")
 
-    # ── 3. Health-check HTTP server ────────────────────────────────────────
+    # ── 3. MCP servers ─────────────────────────────────────────────────────
+    # Importing mcp.tools triggers background connection + tool registration.
+    import cc_mcp.tools as _mcp_tools  # noqa: F401
+    _log.debug("bootstrap_mcp_connecting")
+
+    # ── 5. Health-check HTTP server ────────────────────────────────────────
     port = config.get("health_check_port")
     if port:
         try:
diff --git a/cc_mcp/client.py b/cc_mcp/client.py
index 910c1f1..afcfecf 100644
--- a/cc_mcp/client.py
+++ b/cc_mcp/client.py
@@ -8,6 +8,7 @@
 import time
 from typing import Any, Dict, List, Optional
 
+from .oauth import acquire_token, get_cached_token
 from .types import (
     MCPServerConfig, MCPServerState, MCPTool, MCPTransport,
     INIT_PARAMS, make_notification, make_request,
@@ -148,21 +149,48 @@ def __init__(self, config: MCPServerConfig):
         self._sse_pending: Dict[int, dict] = {}
         self._running = False
 
-    def _get_client(self):
-        if self._client is None:
-            try:
-                import httpx
-                self._client = httpx.Client(
-                    headers=self._config.headers,
-                    timeout=self._config.timeout,
-                    follow_redirects=True,
-                )
-            except ImportError:
-                raise RuntimeError("httpx is required for HTTP/SSE MCP transport: pip install httpx")
+    def _get_client(self, oauth_token: Optional[str] = None):
+        try:
+            import httpx
+        except ImportError:
+            raise RuntimeError("httpx is required for HTTP/SSE MCP transport: pip install httpx")
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "application/json, text/event-stream",
+            **self._config.headers,
+        }
+        # Inject cached OAuth token if no Authorization header already configured
+        if oauth_token and "Authorization" not in self._config.headers:
+            headers["Authorization"] = f"Bearer {oauth_token}"
+        if self._client is None or oauth_token:
+            if self._client:
+                try:
+                    self._client.close()
+                except Exception:
+                    pass
+            self._client = httpx.Client(
+                headers=headers,
+                timeout=self._config.timeout,
+                follow_redirects=True,
+            )
         return self._client
 
+    def _needs_oauth(self) -> bool:
+        """True if this server has no static auth header configured."""
+        return "Authorization" not in self._config.headers
+
+    def _try_oauth(self, www_auth_header: str) -> None:
+        """Run OAuth flow and rebuild the HTTP client with the new token."""
+        token = acquire_token(self._config.url, www_auth_header)
+        self._get_client(oauth_token=token)
+
     def start(self) -> None:
         """For SSE transport: connect to the /sse endpoint and get session URL."""
+        # Inject a cached OAuth token before first connection if available
+        if self._needs_oauth():
+            cached = get_cached_token(self._config.url)
+            if cached:
+                self._get_client(oauth_token=cached)
         if self._config.transport == MCPTransport.SSE:
             self._start_sse()
         else:
@@ -241,8 +269,24 @@ def request(self, method: str, params: Optional[dict] = None, timeout: Optional[
         else:
             # For HTTP: POST and get response directly
             resp = client.post(self._session_url or self._config.url, json=msg, timeout=wait_secs)
+
+            # OAuth: on 401 with no static auth, run browser flow and retry once
+            if resp.status_code == 401 and self._needs_oauth():
+                www_auth = resp.headers.get("www-authenticate", "")
+                self._try_oauth(www_auth)
+                resp = self._client.post(self._session_url or self._config.url, json=msg, timeout=wait_secs)
+
             resp.raise_for_status()
-            result = resp.json()
+            ct = resp.headers.get("content-type", "")
+            if "text/event-stream" in ct:
+                # Server returned SSE envelope — extract the data: line
+                result = None
+                for line in resp.text.splitlines():
+                    if line.startswith("data:"):
+                        result = json.loads(line[5:].strip())
+                        break
+            else:
+                result = resp.json()
 
         if result is None:
             raise TimeoutError(f"MCP server '{self._config.name}' timed out on '{method}'")
@@ -307,6 +351,21 @@ def connect(self) -> None:
             self._transport.start()
             self._handshake()
             self.state = MCPServerState.CONNECTED
+        except RuntimeError as e:
+            # If handshake failed due to OAuth being triggered mid-connect, retry once
+            if "401" in str(e) or "Unauthorized" in str(e):
+                try:
+                    self._transport.stop()
+                    self._transport = self._make_transport()
+                    self._transport.start()
+                    self._handshake()
+                    self.state = MCPServerState.CONNECTED
+                    return
+                except Exception:
+                    pass
+            self.state = MCPServerState.ERROR
+            self._error = str(e)
+            raise
         except Exception as e:
             self.state = MCPServerState.ERROR
             self._error = str(e)
@@ -492,16 +551,19 @@ def all_tools(self) -> List[MCPTool]:
 
     def call_tool(self, qualified_name: str, arguments: dict) -> str:
         """Dispatch a tool call by qualified name (mcp__server__tool)."""
-        # Parse server and tool name from qualified name
         parts = qualified_name.split("__", 2)
         if len(parts) != 3 or parts[0] != "mcp":
             raise ValueError(f"Invalid MCP tool name: {qualified_name}")
-        server_name = parts[1]
+        server_name_sanitized = parts[1]
         tool_name = parts[2]
 
-        client = self._clients.get(server_name)
+        client = next(
+            (c for c in self._clients.values()
+             if "".join(ch if ch.isalnum() or ch == "_" else "_" for ch in c.config.name) == server_name_sanitized),
+            None,
+        )
         if client is None:
-            raise RuntimeError(f"MCP server '{server_name}' not configured")
+            raise RuntimeError(f"MCP server '{server_name_sanitized}' not configured")
 
         # Auto-reconnect if dropped
         if not client.alive:
diff --git a/cc_mcp/oauth.py b/cc_mcp/oauth.py
new file mode 100644
index 0000000..3ea8730
--- /dev/null
+++ b/cc_mcp/oauth.py
@@ -0,0 +1,272 @@
+"""OAuth 2.0 + PKCE flow for MCP HTTP servers.
+
+Handles:
+  - Dynamic client registration (RFC 7591)
+  - Authorization Code + PKCE (S256)
+  - Token refresh
+  - Token persistence in ~/.cheetahclaws/mcp_oauth_tokens.json
+"""
+from __future__ import annotations
+
+import base64
+import hashlib
+import http.server
+import json
+import re
+import secrets
+import threading
+import time
+import urllib.parse
+import webbrowser
+from pathlib import Path
+from typing import Optional
+
+TOKEN_STORE = Path.home() / ".cheetahclaws" / "mcp_oauth_tokens.json"
+REDIRECT_PORT = 54321
+REDIRECT_URI = f"http://localhost:{REDIRECT_PORT}/callback"
+
+
+def _http():
+    try:
+        import httpx
+        return httpx
+    except ImportError:
+        raise RuntimeError("httpx is required for MCP OAuth: pip install httpx")
+
+
+# ── Token persistence ─────────────────────────────────────────────────────────
+
+def _load_tokens() -> dict:
+    if TOKEN_STORE.exists():
+        try:
+            return json.loads(TOKEN_STORE.read_text())
+        except Exception:
+            pass
+    return {}
+
+
+def _save_tokens(data: dict) -> None:
+    TOKEN_STORE.parent.mkdir(parents=True, exist_ok=True)
+    TOKEN_STORE.write_text(json.dumps(data, indent=2))
+
+
+def get_cached_token(server_url: str) -> Optional[str]:
+    data = _load_tokens()
+    entry = data.get(server_url)
+    if not entry:
+        return None
+    if entry.get("expires_at", 0) < time.time() + 60:
+        refreshed = _try_refresh(server_url, entry)
+        if refreshed:
+            return refreshed
+        return None
+    return entry.get("access_token")
+
+
+def _try_refresh(server_url: str, entry: dict) -> Optional[str]:
+    refresh_token = entry.get("refresh_token")
+    token_uri = entry.get("token_uri")
+    client_id = entry.get("client_id")
+    if not (refresh_token and token_uri and client_id):
+        return None
+    try:
+        httpx = _http()
+        resp = httpx.post(token_uri, data={
+            "grant_type": "refresh_token",
+            "refresh_token": refresh_token,
+            "client_id": client_id,
+        }, timeout=15)
+        resp.raise_for_status()
+        token_data = resp.json()
+        _cache_token(server_url, token_data, token_uri, client_id)
+        return token_data.get("access_token")
+    except Exception:
+        return None
+
+
+def _cache_token(server_url: str, token_data: dict, token_uri: str, client_id: str) -> None:
+    data = _load_tokens()
+    expires_in = token_data.get("expires_in", 3600)
+    data[server_url] = {
+        "access_token": token_data["access_token"],
+        "refresh_token": token_data.get("refresh_token"),
+        "expires_at": time.time() + expires_in,
+        "token_uri": token_uri,
+        "client_id": client_id,
+    }
+    _save_tokens(data)
+
+
+# ── OAuth metadata discovery ──────────────────────────────────────────────────
+
+def _parse_www_authenticate(header: str) -> dict:
+    result = {}
+    for match in re.finditer(r'(\w+)="([^"]*)"', header):
+        result[match.group(1)] = match.group(2)
+    return result
+
+
+def _fetch_json(url: str) -> dict:
+    try:
+        httpx = _http()
+        resp = httpx.get(url, timeout=10)
+        resp.raise_for_status()
+        return resp.json()
+    except Exception:
+        return {}
+
+
+def _discover_endpoints(www_auth_header: str) -> tuple[str, str]:
+    params = _parse_www_authenticate(www_auth_header)
+    auth_uri = params.get("authorization_uri", "")
+    token_uri = params.get("token_uri", "")
+
+    if not auth_uri or not token_uri:
+        metadata_url = params.get("resource_metadata", "")
+        if metadata_url:
+            resource_meta = _fetch_json(metadata_url)
+            for as_url in resource_meta.get("authorization_servers", []):
+                as_meta = _fetch_json(f"{as_url.rstrip('/')}/.well-known/oauth-authorization-server")
+                if as_meta:
+                    auth_uri = auth_uri or as_meta.get("authorization_endpoint", "")
+                    token_uri = token_uri or as_meta.get("token_endpoint", "")
+                    break
+
+    return auth_uri, token_uri
+
+
+# ── Dynamic client registration ───────────────────────────────────────────────
+
+def _register_client(registration_endpoint: str) -> str:
+    httpx = _http()
+    resp = httpx.post(registration_endpoint, json={
+        "client_name": "cheetahclaws",
+        "redirect_uris": [REDIRECT_URI],
+        "grant_types": ["authorization_code", "refresh_token"],
+        "response_types": ["code"],
+        "token_endpoint_auth_method": "none",
+    }, timeout=15)
+    resp.raise_for_status()
+    return resp.json()["client_id"]
+
+
+def _get_or_register_client(server_url: str, token_uri: str) -> str:
+    data = _load_tokens()
+    reg_key = f"__client__{server_url}"
+    if reg_key in data:
+        return data[reg_key]["client_id"]
+
+    base = token_uri.rsplit("/token", 1)[0]
+    registration_endpoint = f"{base}/register"
+    try:
+        client_id = _register_client(registration_endpoint)
+    except Exception as e:
+        raise RuntimeError(
+            f"Dynamic client registration failed for {server_url}: {e}\n"
+            "Add a static 'Authorization' header to this server's entry in mcp.json."
+        )
+
+    data[reg_key] = {"client_id": client_id}
+    _save_tokens(data)
+    return client_id
+
+
+# ── PKCE helpers ──────────────────────────────────────────────────────────────
+
+def _pkce_pair() -> tuple[str, str]:
+    verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode()
+    challenge = base64.urlsafe_b64encode(
+        hashlib.sha256(verifier.encode()).digest()
+    ).rstrip(b"=").decode()
+    return verifier, challenge
+
+
+# ── Local callback server ─────────────────────────────────────────────────────
+
+class _CallbackHandler(http.server.BaseHTTPRequestHandler):
+    code: Optional[str] = None
+    error: Optional[str] = None
+    done = threading.Event()
+
+    def do_GET(self):
+        parsed = urllib.parse.urlparse(self.path)
+        params = dict(urllib.parse.parse_qsl(parsed.query))
+        if "code" in params:
+            _CallbackHandler.code = params["code"]
+        else:
+            _CallbackHandler.error = params.get("error", "unknown error")
+        _CallbackHandler.done.set()
+        self.send_response(200)
+        self.send_header("Content-Type", "text/html")
+        self.end_headers()
+        self.wfile.write(b"<h2>Authentication complete. You can close this tab.</h2>")
+
+    def log_message(self, *_):
+        pass
+
+
+def _run_callback_server() -> http.server.HTTPServer:
+    _CallbackHandler.code = None
+    _CallbackHandler.error = None
+    _CallbackHandler.done.clear()
+    server = http.server.HTTPServer(("localhost", REDIRECT_PORT), _CallbackHandler)
+    t = threading.Thread(target=server.serve_forever, daemon=True)
+    t.start()
+    return server
+
+
+# ── Main OAuth flow ───────────────────────────────────────────────────────────
+
+def acquire_token(server_url: str, www_auth_header: str) -> str:
+    """Run the full OAuth 2.0 + PKCE browser flow and return an access token."""
+    auth_uri, token_uri = _discover_endpoints(www_auth_header)
+    if not auth_uri or not token_uri:
+        raise RuntimeError(
+            f"Cannot discover OAuth endpoints for {server_url}. "
+            "Set an Authorization header manually in mcp.json."
+        )
+
+    client_id = _get_or_register_client(server_url, token_uri)
+    verifier, challenge = _pkce_pair()
+    state = secrets.token_urlsafe(16)
+
+    auth_url = auth_uri + "?" + urllib.parse.urlencode({
+        "response_type": "code",
+        "client_id": client_id,
+        "redirect_uri": REDIRECT_URI,
+        "scope": "mcp",
+        "state": state,
+        "code_challenge": challenge,
+        "code_challenge_method": "S256",
+    })
+
+    callback_server = _run_callback_server()
+    print(f"\n[MCP OAuth] Opening browser for {server_url} ...")
+    print(f"[MCP OAuth] If the browser doesn't open, visit:\n  {auth_url}\n")
+    webbrowser.open(auth_url)
+
+    if not _CallbackHandler.done.wait(timeout=120):
+        callback_server.shutdown()
+        raise RuntimeError(f"OAuth timeout waiting for callback from {server_url}")
+    callback_server.shutdown()
+
+    if _CallbackHandler.error:
+        raise RuntimeError(f"OAuth error from {server_url}: {_CallbackHandler.error}")
+
+    httpx = _http()
+    resp = httpx.post(token_uri, data={
+        "grant_type": "authorization_code",
+        "code": _CallbackHandler.code,
+        "redirect_uri": REDIRECT_URI,
+        "client_id": client_id,
+        "code_verifier": verifier,
+    }, timeout=15)
+    resp.raise_for_status()
+    token_data = resp.json()
+
+    if "access_token" not in token_data:
+        raise RuntimeError(f"Token exchange failed for {server_url}: {token_data}")
+
+    _cache_token(server_url, token_data, token_uri, client_id)
+    print(f"[MCP OAuth] Authenticated successfully with {server_url}\n")
+    return token_data["access_token"]
diff --git a/cc_mcp/types.py b/cc_mcp/types.py
index 8a4451e..5f18fd5 100644
--- a/cc_mcp/types.py
+++ b/cc_mcp/types.py
@@ -1,11 +1,18 @@
 """MCP type definitions: server configs, tool descriptors, connection state."""
 from __future__ import annotations
 
+import os
+import re
 from dataclasses import dataclass, field
 from enum import Enum
 from typing import Any, Dict, List, Optional
 
 
+def _expand(value: str) -> str:
+    """Expand ${VAR} and $VAR references using os.environ."""
+    return re.sub(r'\$\{([^}]+)\}|\$([A-Za-z_][A-Za-z0-9_]*)', lambda m: os.environ.get(m.group(1) or m.group(2), m.group(0)), value)
+
+
 # ── Server config ─────────────────────────────────────────────────────────────
 
 class MCPTransport(str, Enum):
@@ -51,11 +58,11 @@ def from_dict(cls, name: str, d: dict) -> "MCPServerConfig":
         return cls(
             name=name,
             transport=transport,
-            command=d.get("command", ""),
-            args=d.get("args", []),
-            env=d.get("env", {}),
-            url=d.get("url", ""),
-            headers=d.get("headers", {}),
+            command=_expand(d.get("command", "")),
+            args=[_expand(a) for a in d.get("args", [])],
+            env={k: _expand(v) for k, v in d.get("env", {}).items()},
+            url=_expand(d.get("url", "")),
+            headers={k: _expand(v) for k, v in d.get("headers", {}).items()},
             timeout=int(d.get("timeout", 30)),
             disabled=bool(d.get("disabled", False)),
         )
diff --git a/cheetahclaws.py b/cheetahclaws.py
index 07e3823..6180fcb 100755
--- a/cheetahclaws.py
+++ b/cheetahclaws.py
@@ -113,6 +113,20 @@
 
 # ── Standard library ───────────────────────────────────────────────────────
 import os
+
+# Load .env from the cheetahclaws directory if present (before any other imports read os.environ)
+def _load_env() -> None:
+    env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".env")
+    if not os.path.exists(env_path):
+        return
+    with open(env_path) as _f:
+        for _line in _f:
+            _line = _line.strip()
+            if not _line or _line.startswith("#") or "=" not in _line:
+                continue
+            _k, _, _v = _line.partition("=")
+            os.environ.setdefault(_k.strip(), _v.strip())
+_load_env()
 import re
 import sys
 import uuid
@@ -521,6 +535,7 @@ def handle_slash(line: str, state, config) -> Union[bool, tuple]:
     "circuit":     ("Show / reset per-provider circuit breakers", ["status", "reset"]),
     "web":         ("Start the web terminal / chat UI in background", ["status", "--no-auth", "--host"]),
     "setup":       ("Run interactive setup wizard",         []),
+    "btw":         ("Ask a quick question while agent runs (side-session overlay)", []),
     "exit":        ("Exit cheetahclaws",              []),
     "quit":        ("Exit (alias for /exit)",             []),
     "resume":      ("Resume last session",                []),
@@ -722,6 +737,9 @@ def repl(config: dict, initial_prompt: str = None):
 
     query_lock = threading.RLock()
 
+    # ── Concurrency primitives for interactive ESC / queue / /btw ─────────
+    from ui.agent_state import _cancel_event, _input_queue, _agent_running
+
     # Apply rich_live config: disable in-place Live streaming if terminal has issues.
     # Auto-detect environments where ANSI cursor-up / live-rewrite doesn't work:
     #   - SSH sessions (cursor-up fails across network PTY)
@@ -733,7 +751,11 @@ def repl(config: dict, initial_prompt: str = None):
     _is_dumb = (console is not None and getattr(console, "is_dumb_terminal", False))
     _is_macos_terminal = (_plat.system() == "Darwin"
                           and _os.environ.get("TERM_PROGRAM", "") in ("Apple_Terminal", ""))
-    _rich_live_default = not _in_ssh and not _is_dumb and not _is_macos_terminal
+    # Disable Rich Live when prompt_toolkit is active: patch_stdout + Rich Live
+    # conflict in a background-thread setup, causing every streaming update to
+    # re-print the full accumulated text (visible duplication on scroll-up).
+    from ui.input import HAS_PROMPT_TOOLKIT as _HAS_PT
+    _rich_live_default = (not _in_ssh and not _is_dumb and not _is_macos_terminal)
     set_rich_live(config.get("rich_live", _rich_live_default))
 
     # Initialize proactive polling state via RuntimeContext (defaults already set)
@@ -744,6 +766,15 @@ def repl(config: dict, initial_prompt: str = None):
         t.start()
 
     def run_query(user_input: str, is_background: bool = False):
+        # Clear any leftover cancel signal from a previous turn
+        _cancel_event.clear()
+        _agent_running.set()
+        try:
+            _run_query_inner(user_input, is_background)
+        finally:
+            _agent_running.clear()
+
+    def _run_query_inner(user_input: str, is_background: bool = False):
         nonlocal verbose
 
         with query_lock:
@@ -768,7 +799,8 @@ def run_query(user_input: str, is_background: bool = False):
             _duplicate_suppressed = False
 
             try:
-                for event in run(user_input, state, config, system_prompt):
+                for event in run(user_input, state, config, system_prompt,
+                                 cancel_check=lambda: _cancel_event.is_set()):
                     # Stop spinner only when visible output arrives
                     if spinner_shown:
                         show_thinking = isinstance(event, ThinkingChunk) and verbose
@@ -1131,6 +1163,27 @@ def _read_input(prompt: str) -> str:
 
         return first
 
+    def _run_query_in_thread(user_input: str) -> None:
+        """Start the agent on a background thread and return immediately.
+
+        The main thread stays in _read_input / _SESSION.prompt() so the »
+        prompt bar is visible at the bottom (via patch_stdout) while the
+        agent prints output above it — exactly as Claude Code behaves.
+
+        Queued messages are drained by the agent thread itself, not the main
+        thread, so the main loop keeps re-entering the prompt each time.
+        """
+        def _run_and_drain(msg: str) -> None:
+            run_query(msg)
+            while _input_queue:
+                next_msg = _input_queue.popleft()
+                run_query(next_msg)
+
+        t = threading.Thread(target=_run_and_drain, args=(user_input,), daemon=True)
+        t.start()
+        # Return immediately — do NOT join. The main loop will call _read_input
+        # right away, showing the prompt bar while the agent thread runs.
+
     while True:
         # Show notifications for background agents that finished
         _print_background_notifications()
@@ -1152,6 +1205,12 @@ def _read_input(prompt: str) -> str:
             except Exception:
                 pass
             prompt = clr(f"\n[{cwd_short}]", "dim") + ctx_hint + clr(" ", "dim") + clr("» ", "cyan", "bold")
+            try:
+                import shutil as _shutil
+                _w = _shutil.get_terminal_size((80, 24)).columns
+                print(clr("─" * _w, "dim"), flush=True)
+            except Exception:
+                pass
             user_input = _read_input(prompt)
         except (EOFError, KeyboardInterrupt):
             print()
@@ -1422,12 +1481,17 @@ def _spin_and_query(phrase, prompt):
         if result:
             continue
 
-        try:
-            run_query(user_input)
-        except KeyboardInterrupt:
-            _track_ctrl_c()
-            print(clr("\n  (interrupted)", "yellow"))
-            # Keep conversation history up to the interruption
+        # ── /btw overlay: quick side-session while agent runs ──────────────
+        if user_input.startswith("/btw ") or user_input == "/btw":
+            question = user_input[5:].strip() if user_input.startswith("/btw ") else ""
+            if not question:
+                warn("/btw requires a question, e.g. /btw what does this function do?")
+                continue
+            from ui.btw_overlay import run_btw_overlay
+            run_btw_overlay(question, config.get("model", "claude-sonnet-4-6"), config)
+            continue
+
+        _run_query_in_thread(user_input)
 
 
 # ── Entry point ────────────────────────────────────────────────────────────
@@ -1447,6 +1511,8 @@ def main():
                         help="Never ask permission (accept all operations)")
     parser.add_argument("--verbose", action="store_true",
                         help="Show thinking + token counts")
+    parser.add_argument("--no-live", action="store_true",
+                        help="Disable Rich Live streaming (plain text output, no duplication on scroll)")
     parser.add_argument("--thinking", action="store_true",
                         help="Enable extended thinking")
     parser.add_argument("--version", action="store_true", help="Print version")
@@ -1501,6 +1567,8 @@ def main():
         config["permission_mode"] = "accept-all"
     if args.verbose:
         config["verbose"] = True
+    if args.no_live:
+        config["rich_live"] = False
     if args.thinking:
         config["thinking"] = True
 
diff --git a/commands/advanced.py b/commands/advanced.py
index e65514f..53bb79b 100644
--- a/commands/advanced.py
+++ b/commands/advanced.py
@@ -897,7 +897,7 @@ def cmd_mcp(args: str, _state, config) -> bool:
         }.get(client.state.value, "dim")
         print(f"  {clr(client.status_line(), status_color)}")
         for tool in client._tools:
-            print(f"      {clr(tool.qualified_name, 'cyan')}  {tool.description[:60]}")
+            print(f"      {clr(tool.qualified_name, 'cyan')}  {tool.description}")
             total_tools += 1
 
     if total_tools:
diff --git a/commands/core.py b/commands/core.py
index acbe1d9..f9d397d 100644
--- a/commands/core.py
+++ b/commands/core.py
@@ -107,12 +107,14 @@ def cmd_cost(_args: str, state, config) -> bool:
 def cmd_compact(args: str, state, config) -> bool:
     """Manually compact conversation history."""
     from compaction import manual_compact
+    from ui.render import _start_tool_spinner, _stop_tool_spinner, set_spinner_phrase
     focus = args.strip()
-    if focus:
-        info(f"Compacting with focus: {focus}")
-    else:
-        info("Compacting conversation...")
-    success, msg = manual_compact(state, config, focus=focus)
+    set_spinner_phrase(f"compacting conversation{'  [' + focus + ']' if focus else ''}…")
+    _start_tool_spinner()
+    try:
+        success, msg = manual_compact(state, config, focus=focus)
+    finally:
+        _stop_tool_spinner()
     if success:
         info(msg)
     else:
diff --git a/docs/guides/llmwiki.md b/docs/guides/llmwiki.md
new file mode 100644
index 0000000..e3bee7d
--- /dev/null
+++ b/docs/guides/llmwiki.md
@@ -0,0 +1,334 @@
+# Memory Management with llmwiki-py — Setup Tutorial
+
+This guide walks you through every step: cloning the repo, initialising a wiki, wiring credentials, installing the CheetahClaws plugin, and running your first memory operation.
+
+Estimated time: ~10 minutes.
+
+---
+
+## Prerequisites
+
+- Python 3.11+
+- `pip` on your `PATH`
+- CheetahClaws installed and working (`cheetahclaws` launches the REPL)
+- Git (for cloning and for the optional git-backend)
+
+---
+
+## Step 1 — Install llmwiki-py
+
+Install directly from GitHub — no manual clone needed:
+
+```bash
+pip install "git+https://github.com/yamaceay/llmwiki-py.git#egg=llmwiki"
+```
+
+Verify the CLI landed on your PATH:
+
+```bash
+wiki --help
+```
+
+Expected first line: `Usage: wiki [OPTIONS] COMMAND [ARGS]...`
+
+> **Python mismatch warning.** If you run CheetahClaws with a specific interpreter (e.g. `python3.11 cheetahclaws.py`), install with the same one:
+> ```bash
+> python3.11 -m pip install "git+https://github.com/yamaceay/llmwiki-py.git#egg=llmwiki"
+> ```
+
+> **Want a local editable copy instead?** If you prefer to clone and hack on the source:
+> ```bash
+> git clone https://github.com/yamaceay/llmwiki-py
+> pip install -e ./llmwiki-py
+> ```
+
+---
+
+## Step 3 — Create and initialise a wiki
+
+### 3a. Choose a directory
+
+The wiki root is just a folder on disk. Create it wherever you like:
+
+```bash
+mkdir -p ~/my-wiki
+```
+
+### 3b. Set the active wiki
+
+llmwiki-py resolves the active wiki through three mechanisms in order:
+
+1. `--wiki <id>` CLI flag (per-command override)
+2. `.llmwiki.yaml` found by walking up from the current directory
+3. Registry default (`~/.config/llmwiki/registry.yaml`)
+
+`wiki init` registers the wiki automatically. Make it the default:
+
+```bash
+wiki use openclaude-memory
+```
+
+> **Note on `LLMWIKI_WIKI_ROOT`:** this env var is only read when calling the Python API directly (`Wiki(root=None)`). The `wiki` CLI ignores it — registry default is what controls the CLI.
+
+### 3c. Initialise
+
+```bash
+wiki init "$LLMWIKI_WIKI_ROOT" --name "my-wiki" --domain general
+```
+
+This creates `~/my-wiki/.llmwiki.yaml` — the wiki's config file. Check it:
+
+```bash
+cat ~/my-wiki/.llmwiki.yaml
+```
+
+Expected output:
+
+```yaml
+backend: filesystem
+created: '2026-04-30'
+domain: general
+name: my-wiki
+paths:
+  raw: raw
+  schema: SCHEMA.md
+  wiki: wiki
+```
+
+The `wiki/` subdirectory (at `~/my-wiki/wiki/`) is where all pages live. The `raw/` subdirectory holds unprocessed source material.
+
+---
+
+## Step 4 — Credentials (git backend only)
+
+If you chose `--backend filesystem` (the default above) **skip this step entirely** — no credentials needed.
+
+If you want the wiki backed by a GitHub repo so it survives across machines:
+
+### 4a. Create a GitHub Personal Access Token
+
+1. Go to **GitHub → Settings → Developer settings → Personal access tokens → Fine-grained tokens**
+2. Click **Generate new token**
+3. Scopes needed: `Contents: Read and Write` on the target repo
+4. Copy the token — you will not see it again
+
+### 4b. Store the token
+
+Never put a token in `.llmwiki.yaml` — it would end up in git history. Use an env var instead. llmwiki-py checks these in order:
+
+| Env var | Notes |
+|---|---|
+| `LLMWIKI_GIT_TOKEN` | llmwiki-specific, takes highest priority |
+| `GITHUB_TOKEN` | standard CI variable, works if already set |
+| `GIT_TOKEN` | generic fallback |
+
+Add to your shell profile:
+
+```bash
+echo 'export LLMWIKI_GIT_TOKEN="ghp_xxxxxxxxxxxxxxxxxxxx"' >> ~/.zshrc
+```
+
+Replace `ghp_xxxx…` with your actual token. Then reload:
+
+```bash
+source ~/.zshrc
+```
+
+### 4c. Re-initialise with git backend
+
+```bash
+wiki init "$LLMWIKI_WIKI_ROOT" \
+  --name "my-wiki" \
+  --domain general \
+  --backend git \
+  --git-repo "yourusername/your-wiki-repo"
+```
+
+The token is read from the env var at runtime — do **not** pass `--git-token` on the command line (it would appear in shell history).
+
+---
+
+## Step 5 — Verify the wiki works
+
+Run a quick smoke test before involving CheetahClaws:
+
+```bash
+# Check status
+wiki status
+
+# Write a test page
+echo "# Hello\n\nFirst wiki page." | wiki write concepts/hello.md
+
+# Read it back
+wiki read concepts/hello.md
+
+# List all pages
+wiki list --tree
+
+# Search
+wiki search "hello"
+```
+
+All five commands should succeed without errors. If `wiki status` complains about a missing wiki, double-check that `LLMWIKI_WIKI_ROOT` is set in the current shell session.
+
+---
+
+## Step 6 — Install the CheetahClaws plugin
+
+### 6a. Copy the plugin manifest
+
+```bash
+cp -r ~/aicore/openclaude/examples/llmwiki-plugin \
+      ~/.cheetahclaws/plugins/llmwiki
+```
+
+The plugin directory must be named `llmwiki`. Verify:
+
+```bash
+ls ~/.cheetahclaws/plugins/llmwiki/
+# plugin.json  tools.py  README.md
+```
+
+### 6b. Enable the plugin
+
+Launch CheetahClaws and run:
+
+```
+/plugin enable llmwiki
+```
+
+Then confirm it's registered:
+
+```
+/plugin
+```
+
+Expected line: `llmwiki [user] enabled   Persistent memory management via llmwiki-py`
+
+Restart CheetahClaws if the tools don't appear immediately.
+
+---
+
+## Step 7 — First memory operation
+
+Inside the CheetahClaws REPL, tell the AI to write something:
+
+```
+remember that the auth service uses JWT with a 24h expiry
+```
+
+The AI will call `WikiWrite` to persist this. Then verify it was stored:
+
+```
+show me everything you know about auth
+```
+
+The AI will call `WikiSearch` and `WikiRead` and surface the note.
+
+You can also check the file directly:
+
+```bash
+wiki list --tree
+wiki read <path shown above>
+```
+
+---
+
+## Step 8 — Update llmwiki-py
+
+```bash
+pip install --upgrade "git+https://github.com/yamaceay/llmwiki-py.git#egg=llmwiki"
+```
+
+If you installed from a local clone instead:
+
+```bash
+cd ~/aicore/llmwiki-py
+git pull
+```
+
+No CheetahClaws restart needed after updating the package.
+
+To pick up a newer plugin manifest from openclaude (e.g. after a `git pull` in openclaude):
+
+```bash
+cp -r ~/aicore/openclaude/examples/llmwiki-plugin \
+      ~/.cheetahclaws/plugins/llmwiki
+```
+
+Then restart CheetahClaws.
+
+---
+
+## Setting environment variables via openclaude
+
+You do not need to set variables in your shell profile. openclaude loads a `.env` file from its own directory at startup — before any plugin code runs — so variables defined there are available to every tool, including the wiki CLI subprocess.
+
+Create the file (it is already in `.gitignore`, so it will never be committed):
+
+```bash
+# ~/aicore/openclaude/.env
+LLMWIKI_WIKI_ROOT=/Users/you/my-wiki
+LLMWIKI_GIT_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxx   # only needed for git backend
+```
+
+Rules:
+- One `KEY=VALUE` per line, no quotes needed around values
+- Lines starting with `#` are comments
+- If the variable is already set in your shell, the shell value takes precedence (`.env` is a fallback, not an override)
+
+After editing `.env`, restart CheetahClaws once for the values to take effect.
+
+---
+
+## Environment variable reference
+
+| Variable | Required | Default | Purpose |
+|---|---|---|---|
+| `LLMWIKI_WIKI_ROOT` | Yes (simplest path) | — | Points directly at a wiki directory |
+| `LLMWIKI_WIKI` | No | registry default | Selects a named wiki from the registry |
+| `LLMWIKI_CONFIG_DIR` | No | `~/.config/llmwiki` | Location of `registry.yaml` |
+| `LLMWIKI_GIT_TOKEN` | Only for git backend | — | GitHub PAT for push/pull |
+| `GITHUB_TOKEN` | No | — | Fallback git token (standard CI var) |
+| `GIT_TOKEN` | No | — | Second fallback git token |
+| `LLMWIKI_GITHUB_API_URL` | No | `https://api.github.com` | Override for GitHub Enterprise |
+
+All variables go in your shell profile (`~/.zshrc` or `~/.bashrc`) and take effect after `source ~/.zshrc` or opening a new terminal.
+
+---
+
+## Troubleshooting
+
+**`wiki: command not found`**
+
+```bash
+# Find where pip installed it
+python -m llmwiki.cli.main --help
+# Add its bin dir to PATH, or use the module form:
+alias wiki="python -m llmwiki.cli.main"
+```
+
+**`Error: no wiki found. Run 'wiki init' first.`**
+
+`LLMWIKI_WIKI_ROOT` is not set in the current shell. Run `echo $LLMWIKI_WIKI_ROOT` to check, then `source ~/.zshrc` to reload your profile.
+
+**`WikiRead` / `WikiWrite` return `"wiki command not found"` inside CheetahClaws**
+
+CheetahClaws launched in a shell environment where `wiki` is not on `PATH`. Fix by using an absolute path in the plugin's `tools.py`, or ensure your shell profile exports `PATH` correctly and CheetahClaws inherits it.
+
+**Tools disappear after a restart**
+
+The plugin is enabled but CheetahClaws is losing the state. Check `~/.cheetahclaws/plugins.json` — the `llmwiki` entry should have `"enabled": true`.
+
+**Git backend: authentication failed**
+
+`LLMWIKI_GIT_TOKEN` is not reaching the process. Verify with `printenv LLMWIKI_GIT_TOKEN`. If empty, re-source your profile in the same terminal where you launch CheetahClaws.
+
+---
+
+## Reference
+
+- [Plugin authoring guide](./plugin-authoring.md) — how the plugin system works
+- [llmwiki-py repo](https://github.com/yamaceay/llmwiki-py) — source and issue tracker
+- Plugin manifest: `examples/llmwiki-plugin/plugin.json`
+- Plugin tools: `examples/llmwiki-plugin/tools.py`
diff --git a/examples/llmwiki-plugin/README.md b/examples/llmwiki-plugin/README.md
new file mode 100644
index 0000000..59eb071
--- /dev/null
+++ b/examples/llmwiki-plugin/README.md
@@ -0,0 +1,45 @@
+# llmwiki plugin
+
+Memory management for CheetahClaws via [llmwiki-py](https://github.com/yamaceay/llmwiki-py).
+
+## Quick install
+
+```bash
+# 1. Install llmwiki-py (once, from its source directory)
+cd /path/to/llmwiki-py
+pip install -e .
+
+# 2. Create and initialise a wiki
+export LLMWIKI_WIKI_ROOT="$HOME/my-wiki"
+wiki init
+
+# 3. Copy this plugin into CheetahClaws
+cp -r /path/to/openclaude/examples/llmwiki-plugin ~/.cheetahclaws/plugins/llmwiki
+
+# 4. Enable it
+cheetahclaws
+/plugin enable llmwiki
+```
+
+## Tools registered
+
+| Tool | What it does |
+|---|---|
+| `WikiRead` | Read a page by path |
+| `WikiWrite` | Create or overwrite a page |
+| `WikiAppend` | Append to an existing page |
+| `WikiSearch` | Full-text search with snippets |
+| `WikiList` | Directory tree of all pages |
+| `WikiStatus` | Wiki health check |
+
+## Updating llmwiki-py
+
+```bash
+cd /path/to/llmwiki-py
+git pull
+pip install -e .   # only if dependencies changed
+```
+
+## Full documentation
+
+See [docs/guides/llmwiki.md](../../docs/guides/llmwiki.md).
diff --git a/examples/llmwiki-plugin/plugin.json b/examples/llmwiki-plugin/plugin.json
new file mode 100644
index 0000000..f19a05d
--- /dev/null
+++ b/examples/llmwiki-plugin/plugin.json
@@ -0,0 +1,10 @@
+{
+  "name": "llmwiki",
+  "version": "0.1.0",
+  "description": "Persistent memory management via llmwiki-py (read/write/search/append a local wiki)",
+  "author": "openclaude",
+  "tags": ["memory", "wiki", "knowledge-base"],
+  "tools": ["tools"],
+  "dependencies": [],
+  "homepage": "https://github.com/yamaceay/llmwiki-py"
+}
diff --git a/examples/llmwiki-plugin/tools.py b/examples/llmwiki-plugin/tools.py
new file mode 100644
index 0000000..42d34e8
--- /dev/null
+++ b/examples/llmwiki-plugin/tools.py
@@ -0,0 +1,195 @@
+"""llmwiki plugin tools — wraps the `wiki` CLI for AI tool use."""
+from __future__ import annotations
+
+import os
+import subprocess
+from tool_registry import ToolDef
+
+
+def _run(args: list[str], stdin: str | None = None) -> tuple[int, str]:
+    """Run the wiki CLI and return (returncode, stdout+stderr)."""
+    try:
+        import shutil
+        wiki_bin = shutil.which("wiki") or "wiki"
+        result = subprocess.run(
+            [wiki_bin, *args],
+            input=stdin,
+            capture_output=True,
+            text=True,
+            env={**os.environ},
+        )
+        output = result.stdout
+        if result.returncode != 0 and result.stderr:
+            output = result.stderr.strip()
+        return result.returncode, output
+    except FileNotFoundError:
+        return 1, (
+            "The `wiki` command was not found. Install llmwiki-py with:\n"
+            "  pip install \"git+https://github.com/yamaceay/llmwiki-py.git#egg=llmwiki\""
+        )
+
+
+def _wiki_read(params: dict, config: dict) -> str:
+    _, out = _run(["read", params["path"]])
+    return out or "(empty page)"
+
+
+def _wiki_write(params: dict, config: dict) -> str:
+    _, out = _run(["write", params["path"]], stdin=params["content"])
+    return out
+
+
+def _wiki_append(params: dict, config: dict) -> str:
+    _, out = _run(["append", params["path"]], stdin=params["content"])
+    return out
+
+
+def _wiki_search(params: dict, config: dict) -> str:
+    args = ["search", params["query"]]
+    if "limit" in params:
+        args += ["--limit", str(params["limit"])]
+    _, out = _run(args)
+    return out or "No results."
+
+
+def _wiki_list(params: dict, config: dict) -> str:
+    args = ["list", "--tree"]
+    if params.get("dir"):
+        args.append(params["dir"])
+    _, out = _run(args)
+    return out or "(wiki is empty)"
+
+
+def _wiki_status(params: dict, config: dict) -> str:
+    _, out = _run(["status"])
+    return out
+
+
+TOOL_DEFS = [
+    ToolDef(
+        name="WikiRead",
+        schema={
+            "name": "WikiRead",
+            "description": "Read a page from the wiki knowledge base.",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "Wiki page path, e.g. 'concepts/auth.md'",
+                    }
+                },
+                "required": ["path"],
+            },
+        },
+        func=_wiki_read,
+        read_only=True,
+        concurrent_safe=True,
+    ),
+    ToolDef(
+        name="WikiWrite",
+        schema={
+            "name": "WikiWrite",
+            "description": "Write (create or overwrite) a wiki page with new content.",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "Wiki page path, e.g. 'concepts/auth.md'",
+                    },
+                    "content": {
+                        "type": "string",
+                        "description": "Full Markdown content to write",
+                    },
+                },
+                "required": ["path", "content"],
+            },
+        },
+        func=_wiki_write,
+        read_only=False,
+        concurrent_safe=False,
+    ),
+    ToolDef(
+        name="WikiAppend",
+        schema={
+            "name": "WikiAppend",
+            "description": "Append content to an existing wiki page without overwriting it.",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "Wiki page path",
+                    },
+                    "content": {
+                        "type": "string",
+                        "description": "Markdown content to append",
+                    },
+                },
+                "required": ["path", "content"],
+            },
+        },
+        func=_wiki_append,
+        read_only=False,
+        concurrent_safe=False,
+    ),
+    ToolDef(
+        name="WikiSearch",
+        schema={
+            "name": "WikiSearch",
+            "description": "Full-text search across all wiki pages. Returns matching pages with snippets.",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Search query",
+                    },
+                    "limit": {
+                        "type": "integer",
+                        "description": "Max results to return (default: 10)",
+                        "default": 10,
+                    },
+                },
+                "required": ["query"],
+            },
+        },
+        func=_wiki_search,
+        read_only=True,
+        concurrent_safe=True,
+    ),
+    ToolDef(
+        name="WikiList",
+        schema={
+            "name": "WikiList",
+            "description": "List all wiki pages as a directory tree.",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "dir": {
+                        "type": "string",
+                        "description": "Subdirectory to list (optional, defaults to root)",
+                    }
+                },
+            },
+        },
+        func=_wiki_list,
+        read_only=True,
+        concurrent_safe=True,
+    ),
+    ToolDef(
+        name="WikiStatus",
+        schema={
+            "name": "WikiStatus",
+            "description": "Show wiki health: page count, index status, git backend status.",
+            "input_schema": {
+                "type": "object",
+                "properties": {},
+            },
+        },
+        func=_wiki_status,
+        read_only=True,
+        concurrent_safe=True,
+    ),
+]
diff --git a/plugin/store.py b/plugin/store.py
index 729c337..8cec063 100644
--- a/plugin/store.py
+++ b/plugin/store.py
@@ -170,7 +170,7 @@ def install_plugin(
     try:
         if source is None:
             # No source → treat name as a local path if it exists, else error
-            local = Path(name)
+            local = Path(name).expanduser()
             if local.exists() and local.is_dir():
                 source = str(local.resolve())
             else:
@@ -188,7 +188,7 @@ def install_plugin(
             if not ok:
                 return False, msg
         else:
-            local_src = Path(source)
+            local_src = Path(source).expanduser()
             if not local_src.exists():
                 return False, f"Local path not found: {source}"
             shutil.copytree(str(local_src), str(plugin_dir))
diff --git a/providers.py b/providers.py
index 867f8d4..c628e06 100644
--- a/providers.py
+++ b/providers.py
@@ -512,7 +512,13 @@ def stream_anthropic(
 ) -> Generator:
     """Stream from Anthropic API. Yields TextChunk/ThinkingChunk, then AssistantTurn."""
     import anthropic as _ant
-    client = _ant.Anthropic(api_key=api_key)
+    import os as _os
+    _base_url = (config.get("anthropic_base_url")
+                 or _os.environ.get("ANTHROPIC_BASE_URL", ""))
+    _client_kwargs = {"api_key": api_key}
+    if _base_url:
+        _client_kwargs["base_url"] = _base_url
+    client = _ant.Anthropic(**_client_kwargs)
 
     _mt = resolve_max_tokens(config, "anthropic", model) or 8192
     kwargs = {
diff --git a/tools/__init__.py b/tools/__init__.py
index 5bcaf74..75b31c2 100644
--- a/tools/__init__.py
+++ b/tools/__init__.py
@@ -616,7 +616,7 @@ def _register_builtins() -> None:
     "memory.tools",
     "multi_agent.tools",
     "skill.tools",
-    "cc_mcp.tools",
+    "mcp.tools",
     "task.tools",
 ]
 
diff --git a/tools/interaction.py b/tools/interaction.py
index 6457ff4..47a50bb 100644
--- a/tools/interaction.py
+++ b/tools/interaction.py
@@ -180,12 +180,40 @@ def ask_input_interactive(prompt: str, config: dict,
         return text
 
     # ── Terminal ────────────────────────────────────────────────────────────
+    # The agent runs in a background thread; prompt_toolkit owns stdin on the
+    # main thread. Signal the question via shared state so the main thread's
+    # Enter binding collects the answer and unblocks us — no stdin fighting.
     try:
-        rl_prompt = _re.sub(r'(\x1b\[[0-9;]*m)', r'\001\1\002', prompt)
-        return input(rl_prompt)
-    except (KeyboardInterrupt, EOFError):
-        print()
-        return ""
+        from ui.agent_state import (
+            _pending_question as _pq_mod,  # noqa — we mutate the module
+            _answer_event,
+            _answer_value,
+        )
+        import ui.agent_state as _st
+
+        clean_prompt = _re.sub(r'(\x1b\[[0-9;]*m)', '', prompt).strip()
+
+        # Print the question above the prompt bar (goes via patch_stdout proxy)
+        import sys as _sys
+        _sys.stdout.write(f"\n\033[1;35m❯ {clean_prompt}\033[0m\n")
+        _sys.stdout.flush()
+
+        # Signal the Enter binding
+        _st._answer_event.clear()
+        _st._answer_value = ""
+        _st._pending_question = clean_prompt
+
+        if not _st._answer_event.wait(timeout=_INPUT_WAIT_TIMEOUT):
+            _st._pending_question = ""
+            return "(timeout)"
+
+        return _st._answer_value
+
+    except Exception:
+        try:
+            return input(_re.sub(r'(\x1b\[[0-9;]*m)', r'\001\1\002', prompt))
+        except (KeyboardInterrupt, EOFError):
+            return ""
 
 
 # ── SleepTimer ────────────────────────────────────────────────────────────
diff --git a/ui/agent_state.py b/ui/agent_state.py
new file mode 100644
index 0000000..6460c38
--- /dev/null
+++ b/ui/agent_state.py
@@ -0,0 +1,26 @@
+"""ui/agent_state.py — Shared concurrency primitives for the interactive REPL.
+
+Three module-level singletons coordinate the main (input) thread and the
+agent (run_query) thread:
+
+  _cancel_event   — ESC sets this; agent loop checks it to stop early.
+  _input_queue    — typed-ahead messages buffered while agent is running.
+  _agent_running  — set while a run_query() turn is in progress.
+
+Question routing (AskUserQuestion):
+  _pending_question — non-empty string while agent is waiting for an answer.
+  _answer_event     — agent thread blocks on this; Enter binding sets it.
+  _answer_value     — the user's answer, written by Enter binding before set().
+"""
+from __future__ import annotations
+
+import collections
+import threading
+
+_cancel_event: threading.Event = threading.Event()
+_input_queue: collections.deque = collections.deque()
+_agent_running: threading.Event = threading.Event()
+
+_pending_question: str = ""
+_answer_event: threading.Event = threading.Event()
+_answer_value: str = ""
diff --git a/ui/btw_overlay.py b/ui/btw_overlay.py
new file mode 100644
index 0000000..dbfc963
--- /dev/null
+++ b/ui/btw_overlay.py
@@ -0,0 +1,92 @@
+"""ui/btw_overlay.py — /btw Rich overlay: side-session Q&A while agent runs.
+
+Opens a three-column layout:
+  left   — dim padding (agent output scrolls behind via patch_stdout)
+  center — focused white-on-dark panel; streams the answer
+  right  — dim padding (mirror)
+
+The question is answered by a fresh no-tools API call so it never
+touches the main conversation state.
+"""
+from __future__ import annotations
+
+import sys
+from typing import Iterator
+
+
+def _side_stream(question: str, model: str, config: dict) -> Iterator[str]:
+    """Stream a quick no-tools answer for the /btw question."""
+    from providers import stream as _stream, AssistantTurn, TextChunk
+    messages = [{"role": "user", "content": question}]
+    system = "Answer concisely and helpfully. No markdown headers needed."
+    for event in _stream(
+        model=model,
+        system=system,
+        messages=messages,
+        tool_schemas=[],
+        config=config,
+    ):
+        if isinstance(event, TextChunk):
+            yield event.text
+
+
+def run_btw_overlay(question: str, model: str, config: dict) -> None:
+    """Render the /btw overlay and stream an answer into it."""
+    try:
+        from rich.console import Console
+        from rich.layout import Layout
+        from rich.live import Live
+        from rich.panel import Panel
+        from rich.markdown import Markdown
+        from rich.text import Text
+    except ImportError:
+        # Rich not available — plain fallback
+        print(f"\n\033[35m[/btw]\033[0m {question}")
+        for chunk in _side_stream(question, model, config):
+            sys.stdout.write(chunk)
+            sys.stdout.flush()
+        print()
+        return
+
+    console = Console()
+    layout = Layout()
+    layout.split_row(
+        Layout(name="left",   ratio=1),
+        Layout(name="center", ratio=3),
+        Layout(name="right",  ratio=1),
+    )
+    dim_panel = Panel(Text(""), border_style="dim")
+    layout["left"].update(dim_panel)
+    layout["right"].update(dim_panel)
+    layout["center"].update(
+        Panel(Text("…", style="dim"), title="[cyan]/btw[/cyan]", border_style="cyan")
+    )
+
+    buf: list[str] = []
+
+    def _render_center():
+        text = "".join(buf)
+        try:
+            renderable = Markdown(text) if any(c in text for c in ("#", "*", "`", "_", "[")) else text
+        except Exception:
+            renderable = text
+        layout["center"].update(
+            Panel(renderable, title="[cyan]/btw[/cyan]", border_style="cyan")
+        )
+
+    with Live(layout, console=console, auto_refresh=True, refresh_per_second=12,
+              vertical_overflow="visible"):
+        try:
+            for chunk in _side_stream(question, model, config):
+                buf.append(chunk)
+                _render_center()
+        except KeyboardInterrupt:
+            pass
+
+    # Leave a static copy so the user can scroll back
+    final = "".join(buf).strip()
+    if final:
+        console.print(
+            Panel(Markdown(final) if any(c in final for c in ("#", "*", "`", "_", "[")) else final,
+                  title="[cyan]/btw[/cyan]", border_style="dim")
+        )
diff --git a/ui/input.py b/ui/input.py
index ed726ae..d4420d3 100644
--- a/ui/input.py
+++ b/ui/input.py
@@ -153,10 +153,12 @@ def _ghost_text_acceptable() -> bool:
         return True
 
     def _build_key_bindings() -> "KeyBindings":
-        """Tab accepts the gray history ghost-text when one is shown.
+        """Key bindings for the REPL input line.
 
-        Falls through to the default Tab binding (slash-menu cycling) when the
-        filter doesn't match, so `/cmd` completion behavior is unchanged.
+        Tab — accept gray history ghost-text (falls through to slash-menu cycling).
+        Escape — hard-stop the running agent turn (sets _cancel_event).
+        Enter — if agent is running, queue the typed message instead of submitting.
+                /btw <question> submits immediately as an overlay request.
         """
         kb = KeyBindings()
 
@@ -165,6 +167,48 @@ def _(event):
             buf = event.current_buffer
             buf.insert_text(buf.suggestion.text)
 
+        @kb.add("escape")
+        def _esc(event):
+            from ui.agent_state import _cancel_event, _agent_running
+            if _agent_running.is_set():
+                _cancel_event.set()
+                import sys
+                sys.stdout.write("\n\033[33m  (interrupted — finishing current operation…)\033[0m\n")
+                sys.stdout.flush()
+            event.current_buffer.reset()
+
+        @kb.add("enter")
+        def _enter(event):
+            import ui.agent_state as _st
+            buf = event.current_buffer
+            text = buf.text.strip()
+
+            # ── Answer mode: agent is waiting for a reply ──────────────────
+            if _st._pending_question:
+                _st._answer_value = text
+                _st._pending_question = ""
+                _st._answer_event.set()
+                buf.reset()
+                return
+
+            if not text:
+                event.current_buffer.validate_and_handle()
+                return
+            # /btw goes straight to overlay (handled by read_line caller)
+            if text.startswith("/btw ") or text == "/btw":
+                event.app.exit(result=text)
+                return
+            if _st._agent_running.is_set():
+                # Queue the message for after the current turn finishes
+                _st._input_queue.append(text)
+                buf.reset()
+                import sys
+                short = text[:60] + ("…" if len(text) > 60 else "")
+                sys.stdout.write(f"\n\033[2m  queued: {short}\033[0m\n")
+                sys.stdout.flush()
+            else:
+                event.app.exit(result=text)
+
         return kb
 
 
@@ -192,6 +236,33 @@ def _build_session(history_path: Optional[Path]):
         "completion-menu.meta.completion.current": "bg:#005f87 #eeeeee",
         "auto-suggestion":                         "#606060 italic",
     })
+    # Disable CPR (Cursor Position Report): prompt_toolkit sends ESC[6n to
+    # detect cursor position, and the terminal's response leaks back into stdin
+    # as garbage characters (e.g. "7;1R"). Disabling CPR prevents this entirely.
+    try:
+        import sys as _sys
+        import io as _io
+        from prompt_toolkit.output.vt100 import Vt100_Output
+        from prompt_toolkit.output.color_depth import ColorDepth
+
+        def _get_size():
+            from prompt_toolkit.utils import get_cwidth as _  # noqa
+            try:
+                from prompt_toolkit.output.vt100 import _get_size as _gs
+                rows, cols = _gs(_sys.stdout.fileno())
+            except Exception:
+                rows, cols = 24, 80
+            from prompt_toolkit.data_structures import Size
+            return Size(rows=rows or 24, columns=cols or 80)
+
+        _output = Vt100_Output(
+            _sys.stdout,
+            _get_size,
+            enable_cpr=False,
+        )
+    except Exception:
+        _output = None
+
     return PromptSession(
         history=history,
         completer=completer,
@@ -201,6 +272,7 @@ def _build_session(history_path: Optional[Path]):
         mouse_support=False,
         style=style,
         key_bindings=_build_key_bindings(),
+        output=_output,
     )
 
 
@@ -217,5 +289,6 @@ def read_line(prompt_ansi: str, history_path: Optional[Path] = None) -> str:
     if _SESSION is None:
         _SESSION = _build_session(history_path)
         _SESSION_HISTORY_PATH = history_path
+
     with patch_stdout(raw=True):
         return _SESSION.prompt(ANSI(prompt_ansi))
diff --git a/ui/render.py b/ui/render.py
index c776ced..5bab8cc 100644
--- a/ui/render.py
+++ b/ui/render.py
@@ -105,11 +105,10 @@ def _start_live() -> None:
 
 
 def stream_text(chunk: str) -> None:
-    """Buffer chunk; update Live in-place when Rich available, else print directly.
+    """Buffer chunk; render formatted output at flush_response() time.
 
-    Safety: if accumulated text exceeds _LIVE_LINE_LIMIT lines, auto-switch
-    from Rich Live to plain streaming to prevent terminal re-render duplication
-    on terminals that can't handle large Live areas (macOS Terminal, etc.).
+    When Rich Live is enabled: update in-place on each chunk.
+    When disabled (patch_stdout active): buffer silently, render once at flush.
     """
     global _current_live
     _accumulated_text.append(chunk)
@@ -122,8 +121,6 @@ def stream_text(chunk: str) -> None:
         if _current_live is not None and line_count > _LIVE_LINE_LIMIT:
             _current_live.stop()
             _current_live = None
-            # Print the full text once (Live already displayed partial content,
-            # but stopping Live clears it — so we re-print cleanly)
             console.print(_make_renderable(full))
             return
 
@@ -132,10 +129,8 @@ def stream_text(chunk: str) -> None:
                 _start_live()
             _current_live.update(_make_renderable(full), refresh=True)
         else:
-            # Already past limit, no Live — just append new chunk
             print(chunk, end="", flush=True)
-    else:
-        print(chunk, end="", flush=True)
+    # Non-Live: buffer only — flush_response() renders the complete text once
 
 def stream_thinking(chunk: str, verbose: bool):
     if verbose:
@@ -144,17 +139,18 @@ def stream_thinking(chunk: str, verbose: bool):
             print(f"{C['dim']}{clean_chunk}", end="", flush=True)
 
 def flush_response() -> None:
-    """Commit buffered text to screen: stop Live (freezes rendered Markdown in place)."""
+    """Commit buffered text to screen with Rich Markdown formatting."""
     global _current_live
     full = "".join(_accumulated_text)
     _accumulated_text.clear()
     if _current_live is not None:
         _current_live.stop()
         _current_live = None
-    elif _RICH and _RICH_LIVE and full.strip():
+    elif _RICH and full.strip():
         console.print(_make_renderable(full))
-    else:
-        print()  # ensure newline after plain-text stream
+    elif full:
+        print(full, end="", flush=True)
+        print()
 
 
 # ── Spinner ────────────────────────────────────────────────────────────────
@@ -276,25 +272,43 @@ def _tool_desc(name: str, inputs: dict) -> str:
 
 
 def print_tool_start(name: str, inputs: dict, verbose: bool):
-    """Show tool invocation."""
-    desc = _tool_desc(name, inputs)
-    print(clr(f"  ⚙  {desc}", "dim", "cyan"), flush=True)
     if verbose:
+        desc = _tool_desc(name, inputs)
+        print(clr(f"  ⚙  {desc}", "dim", "cyan"), flush=True)
         print(clr(f"     inputs: {json.dumps(inputs, ensure_ascii=False)[:200]}", "dim"))
+        return
+    # Non-verbose: overwrite the spinner line with the tool name, no newline
+    desc = _tool_desc(name, inputs)
+    sys.stdout.write(f"\r  \033[2m⚙  {desc[:80]}\033[0m" + " " * 10 + "\r")
+    sys.stdout.flush()
 
 def print_tool_end(name: str, result: str, verbose: bool):
-    lines = result.count("\n") + 1
-    size = len(result)
-    summary = f"→ {lines} lines ({size} chars)"
-    if not result.startswith("Error") and not result.startswith("Denied"):
-        print(clr(f"  ✓ {summary}", "dim", "green"), flush=True)
-        if name in ("Edit", "Write") and _has_diff(result):
-            parts = result.split("\n\n", 1)
-            if len(parts) == 2:
-                print(clr(f"  {parts[0]}", "dim"))
-                render_diff(parts[1])
-    else:
+    if verbose:
+        lines = result.count("\n") + 1
+        size = len(result)
+        summary = f"→ {lines} lines ({size} chars)"
+        if not result.startswith("Error") and not result.startswith("Denied"):
+            print(clr(f"  ✓ {summary}", "dim", "green"), flush=True)
+            if name in ("Edit", "Write") and _has_diff(result):
+                parts = result.split("\n\n", 1)
+                if len(parts) == 2:
+                    print(clr(f"  {parts[0]}", "dim"))
+                    render_diff(parts[1])
+            preview = result[:500] + ("…" if len(result) > 500 else "")
+            print(clr(f"     {preview.replace(chr(10), chr(10)+'     ')}", "dim"))
+        else:
+            print(clr(f"  ✗ {result[:120]}", "dim", "red"), flush=True)
+        return
+    # Non-verbose: show diffs for Edit/Write (permanent output), errors; erase all else
+    if name in ("Edit", "Write") and _has_diff(result):
+        sys.stdout.write("\r" + " " * 60 + "\r")
+        sys.stdout.flush()
+        parts = result.split("\n\n", 1)
+        if len(parts) == 2:
+            print(clr(f"  {parts[0]}", "dim"))
+            render_diff(parts[1])
+    elif result.startswith("Error") or result.startswith("Denied"):
+        sys.stdout.write("\r" + " " * 60 + "\r")
+        sys.stdout.flush()
         print(clr(f"  ✗ {result[:120]}", "dim", "red"), flush=True)
-    if verbose and not result.startswith("Denied"):
-        preview = result[:500] + ("…" if len(result) > 500 else "")
-        print(clr(f"     {preview.replace(chr(10), chr(10)+'     ')}", "dim"))
+    # Otherwise: just erase the tool-start line; spinner will overwrite next