diff --git a/.gitignore b/.gitignore index ccd633d..1a1cb48 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ memex/__pycache__/ .memex/ .env dist/ -tests/__pycache__/ \ No newline at end of file +tests/__pycache__/ +.mcp.json \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index a297f1d..82b72e5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -50,11 +50,12 @@ memex/ │ ├── writer.py # Renders KnowledgeRecord to .md and commits it │ ├── action.py # GitHub Action entry point — reads env vars, orchestrates │ ├── adr.py # ADR parser — find_adr_files, parse_adr, index_adrs -│ ├── cli.py # Click CLI — `memex configure/init/update/index/query` +│ ├── cli.py # Click CLI — `memex configure/init/update/index/query/serve` │ ├── config.py # API key resolution — load_api_key, save_api_key, CONFIG_FILE │ ├── nudge.py # Low-confidence nudge comment — should_nudge, post_nudge_comment │ ├── init.py # `memex init` — bootstrap from repo scan -│ └── update.py # `memex update` — incremental extraction from git history +│ ├── update.py # `memex update` — incremental extraction from git history +│ └── mcp_server.py # MCP server — memex_query, memex_get_decision, memex_list_recent ├── tests/ │ └── ... ├── pyproject.toml @@ -86,14 +87,16 @@ The index cache lives at: | Structured output | `instructor` + `pydantic` | Guaranteed schema compliance, auto-retry | | Vector search | `numpy` cosine similarity over `index.json` | No database needed at MVP scale (<5k records) | | CLI | `click` | Standard, simple | +| MCP server | `mcp` (official SDK, `mcp.server.fastmcp`) | Exposes knowledge tools to AI coding agents via stdio | | GitHub API | `gh` CLI in Actions, `PyGithub` if needed in Python | Already available in Actions runner | **There is no database.** Knowledge records are markdown files in the repo. The index is a JSON file. Do not introduce SQLite, PostgreSQL, Redis, or any other persistence layer in Phase 1. -**There is no server.** The Action runs in GitHub's infrastructure. The CLI runs -locally. Do not introduce FastAPI, Flask, or any web framework in Phase 1. +**There is no HTTP server.** The Action runs in GitHub's infrastructure. The CLI runs +locally. The MCP server uses stdio transport (subprocess-based, no network port). +Do not introduce FastAPI, Flask, or any web framework in Phase 1. **One API key.** Everything goes through `ANTHROPIC_API_KEY`. Do not introduce OpenAI, Cohere, or any other LLM provider dependency. @@ -213,6 +216,7 @@ memex index # embed all .md files in knowledge/, memex query "why did we move off MongoDB" # cosine similarity search, top 3 results memex query --min-score 0.5 "..." # broaden search by lowering the relevance threshold memex query --expand "vague question" # rewrite query via Claude Haiku before embedding +memex serve # start the MCP server (stdio) for AI coding agents ``` `memex index` should be incremental — only embed files whose content has changed since @@ -288,6 +292,7 @@ When you make any of the changes below, update CLAUDE.md **in the same commit**: |---|---| | New/removed/renamed `.py` in `memex/` | File structure section | | New/removed `@cli.command()` in `cli.py` | CLI behaviour section | +| New/removed `@mcp.tool()` in `mcp_server.py` | File structure section | | `model=` string in `extractor.py` or `init.py` | Tech stack table + decisions section | | New dependency in `pyproject.toml` | Tech stack table | | New `os.environ["VAR"]` in `action.py` | Environment variables table | diff --git a/README.md b/README.md index 4b6a1d4..db02afe 100644 --- a/README.md +++ b/README.md @@ -275,10 +275,47 @@ memex query QUESTION Semantic search over indexed knowledge --top N Return top N results (default: 3) --min-score F Hide results below this similarity score (default: 0.70) --expand Rewrite query via Claude Haiku before searching +memex serve Start the MCP server (stdio) for AI coding agents ``` --- +## MCP server (AI agent integration) + +`memex serve` exposes your knowledge index as an [MCP](https://modelcontextprotocol.io) server, making it available to AI coding agents (Claude Code, Cursor, Copilot, Windsurf) as a set of callable tools. The agent can query your team's decisions automatically before suggesting architectural changes — without you having to prompt it. + +Three tools are exposed: + +| Tool | Description | +|---|---| +| `memex_query(question, top, min_score)` | Semantic search — same as `memex query` but callable by the agent; default `min_score` is 0.5 so borderline matches are surfaced with their score | +| `memex_get_decision(id)` | Fetch the full text of a specific record by file path or title slug | +| `memex_list_recent(domain, limit)` | List recent decisions, optionally filtered by a domain keyword (e.g. `"auth"`, `"database"`) | + +### Setup + +Run `memex index` first so the server has an index to query. + +Create `.mcp.json` in your repo root (this file is git-ignored — paths are machine-specific): + +```json +{ + "mcpServers": { + "memex": { + "command": "/path/to/python3.12", + "args": ["-m", "memex.mcp_server"], + "cwd": "/path/to/your/repo" + } + } +} +``` + +Replace `/path/to/python3.12` with the Python 3.12+ interpreter that has `memex-oss` installed (`which python3.12` or `which python3`), and `/path/to/your/repo` with the absolute path to your repo. + +Reload your editor and the three tools will appear in the agent's tool list automatically. + +--- + ## Querying your knowledge `memex query` runs a local semantic search — no data leaves your machine. diff --git a/memex/cli.py b/memex/cli.py index 283bd9d..77fd8fb 100644 --- a/memex/cli.py +++ b/memex/cli.py @@ -370,6 +370,26 @@ def index(force, include_adrs): click.echo(f"Done. {len(existing)} records total.") +@cli.command() +def serve(): + """Start the MCP server (stdio transport) for AI coding agents. + + Exposes three tools: memex_query, memex_get_decision, memex_list_recent. + + \b + Configure in .mcp.json or claude_desktop_config.json: + { + "mcpServers": { + "memex": {"command": "memex", "args": ["serve"], "cwd": "/your/repo"} + } + } + + Run `memex index` first so the server has an index to query. + """ + from .mcp_server import mcp + mcp.run() + + @cli.command() @click.argument("query", nargs=-1) @click.option("--top", default=3, help="Number of results") diff --git a/memex/mcp_server.py b/memex/mcp_server.py new file mode 100644 index 0000000..43ebc3d --- /dev/null +++ b/memex/mcp_server.py @@ -0,0 +1,160 @@ +"""MCP server for Memex — exposes institutional knowledge to AI coding agents. + +Three tools: + memex_query — semantic search over indexed decisions + memex_get_decision — fetch a specific record by path/slug + memex_list_recent — browse recent decisions, optionally filtered by domain + +Start with: memex serve +Configure in .mcp.json / claude_desktop_config.json: + {"mcpServers": {"memex": {"command": "memex", "args": ["serve"], "cwd": "/your/repo"}}} +""" +from __future__ import annotations + +from pathlib import Path + +from mcp.server.fastmcp import FastMCP + +from .cli import ( + KNOWLEDGE_DIR, + cosine_similarity, + embed, + extract_confidence, + load_index, +) + +mcp = FastMCP("memex") + +_NO_INDEX = ( + "No index found. Run `memex index` first to embed your knowledge records." +) + + +@mcp.tool() +def memex_query(question: str, top: int = 3, min_score: float = 0.5) -> str: + """Semantic search over indexed architectural decisions. + + Returns the most relevant decisions matching the question. + Low-confidence records are included with their confidence score surfaced + so you can hedge your answer. Default min_score is 0.5 (lower than the CLI + default of 0.7 — agents benefit from seeing borderline matches). + Run `memex index` first if the index is empty. + """ + index = load_index() + if not index: + return _NO_INDEX + + [query_embedding] = embed([question]) + + scored = [ + (cosine_similarity(query_embedding, entry["embedding"]), entry) + for entry in index.values() + ] + scored.sort(key=lambda x: x[0], reverse=True) + results = [(s, e) for s, e in scored if s >= min_score][:top] + + if not results: + return ( + f"No results above similarity threshold {min_score:.2f}. " + "Try a lower threshold or rephrase your question." + ) + + lines = [f"Results for: {question}\n"] + for i, (score, entry) in enumerate(results, 1): + confidence = entry.get("confidence", 1.0) + if confidence < 0.65: + conf_note = " ⚠️ limited rationale" + elif confidence < 0.80: + conf_note = " 💡 partial rationale" + else: + conf_note = "" + + lines.append(f"{i}. {entry['title']} [score {score:.2f}{conf_note}]") + if entry.get("excerpt"): + lines.append(f" {entry['excerpt'][:300]}") + lines.append(f" {entry['path']}\n") + + return "\n".join(lines) + + +@mcp.tool() +def memex_get_decision(id: str) -> str: + """Fetch the full text of a specific decision record by path or title slug. + + `id` can be an exact file path + (e.g. 'knowledge/decisions/2024-11-14-migrate-billing.md'), + a filename fragment (e.g. 'migrate-billing'), or any partial path suffix. + Returns the raw markdown including frontmatter. + """ + # Exact path first + exact = Path(id) + if exact.exists(): + return exact.read_text() + + # Match against indexed paths + index = load_index() + for path_str in index: + if id in path_str: + p = Path(path_str) + if p.exists(): + return p.read_text() + + # Fallback: glob knowledge dir directly (works even without an index) + matches = list(KNOWLEDGE_DIR.rglob(f"*{id}*.md")) + if matches: + return matches[0].read_text() + + return ( + f"No record found matching '{id}'. " + "Use memex_query to search by topic, then pass the returned path here." + ) + + +@mcp.tool() +def memex_list_recent(domain: str = "", limit: int = 10) -> str: + """List recent architectural decisions, optionally filtered by domain keyword. + + `domain` is matched case-insensitively against each record's title and excerpt + (e.g. 'auth', 'database', 'api', 'infra'). Returns up to `limit` records + sorted most-recent first (by filename date). + """ + index = load_index() + if not index: + return _NO_INDEX + + entries = list(index.values()) + + if domain: + kw = domain.lower() + entries = [ + e for e in entries + if kw in e.get("title", "").lower() or kw in e.get("excerpt", "").lower() + ] + if not entries: + return f"No decisions found matching domain '{domain}'." + + def _date_key(entry: dict) -> str: + stem = Path(entry["path"]).stem + return stem[:10] if len(stem) >= 10 else "0000-00-00" + + entries.sort(key=_date_key, reverse=True) + entries = entries[:limit] + + header = "Recent decisions" + if domain: + header += f" in '{domain}'" + header += f" ({len(entries)} shown):\n" + + lines = [header] + for entry in entries: + confidence = entry.get("confidence", 1.0) + conf_note = " ⚠️" if confidence < 0.65 else "" + date_str = _date_key(entry) + lines.append(f" {date_str} {entry['title']}{conf_note}") + lines.append(f" {entry['path']}\n") + + return "\n".join(lines) + + +if __name__ == "__main__": + mcp.run() diff --git a/pyproject.toml b/pyproject.toml index c13f28f..2c97bc1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "pydantic>=2.8.0", "numpy>=1.26.0", "click>=8.1.0", + "mcp>=1.0.0", ] [project.optional-dependencies] diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py new file mode 100644 index 0000000..d2fc549 --- /dev/null +++ b/tests/test_mcp_server.py @@ -0,0 +1,215 @@ +"""Unit tests for memex/mcp_server.py — three MCP tools.""" +import json +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +RECORD_A = { + "embedding": [1.0, 0.0, 0.0], + "title": "Migrate billing store to PostgreSQL", + "excerpt": "Unbounded schema flexibility was causing silent data corruption.", + "confidence": 0.92, + "path": "knowledge/decisions/2024-11-14-migrate-billing.md", + "content_hash": "abc123", +} + +RECORD_B = { + "embedding": [0.0, 1.0, 0.0], + "title": "Switch event queue from SQS to Redis Streams", + "excerpt": "SQS 256KB limit was consistently hit as event payloads grew.", + "confidence": 0.45, # low confidence + "path": "knowledge/decisions/2024-09-01-switch-event-queue.md", + "content_hash": "def456", +} + +FIXTURE_INDEX = { + RECORD_A["path"]: RECORD_A, + RECORD_B["path"]: RECORD_B, +} + +FULL_MARKDOWN = """\ +--- +title: "Migrate billing store to PostgreSQL" +date: 2024-11-14 +author: "srajan" +source: "https://github.com/acme/api-core/pull/2847" +confidence: 0.92 +tags: [] +--- + +# Migrate billing store to PostgreSQL + +## Context + +The billing team hit repeated data integrity issues with MongoDB. + +## Decision + +Migrate the billing store to PostgreSQL. +""" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _import_tools(): + """Import the three tool functions from mcp_server.""" + from memex.mcp_server import memex_query, memex_get_decision, memex_list_recent + return memex_query, memex_get_decision, memex_list_recent + + +# --------------------------------------------------------------------------- +# memex_query +# --------------------------------------------------------------------------- + +class TestMemexQuery: + def test_returns_results_above_threshold(self): + # query vector [1,0,0] is identical to RECORD_A — score should be 1.0 + memex_query, _, _ = _import_tools() + with patch("memex.mcp_server.load_index", return_value=FIXTURE_INDEX), \ + patch("memex.mcp_server.embed", return_value=[[1.0, 0.0, 0.0]]): + result = memex_query("why did we move off MongoDB") + + assert "Migrate billing store to PostgreSQL" in result + assert "score 1.00" in result + assert "knowledge/decisions/2024-11-14-migrate-billing.md" in result + + def test_low_confidence_surfaced(self): + # query vector [0,1,0] matches RECORD_B (low confidence) + memex_query, _, _ = _import_tools() + with patch("memex.mcp_server.load_index", return_value=FIXTURE_INDEX), \ + patch("memex.mcp_server.embed", return_value=[[0.0, 1.0, 0.0]]): + result = memex_query("event queue choice") + + assert "Switch event queue from SQS to Redis Streams" in result + assert "limited rationale" in result # confidence < 0.65 annotated + + def test_no_results_below_threshold(self): + memex_query, _, _ = _import_tools() + # query vector [0,0,1] has zero similarity to both records + with patch("memex.mcp_server.load_index", return_value=FIXTURE_INDEX), \ + patch("memex.mcp_server.embed", return_value=[[0.0, 0.0, 1.0]]): + result = memex_query("unrelated question", min_score=0.5) + + assert "No results" in result + + def test_empty_index_returns_guidance(self): + memex_query, _, _ = _import_tools() + with patch("memex.mcp_server.load_index", return_value={}): + result = memex_query("anything") + + assert "memex index" in result + + def test_top_n_limits_results(self): + memex_query, _, _ = _import_tools() + with patch("memex.mcp_server.load_index", return_value=FIXTURE_INDEX), \ + patch("memex.mcp_server.embed", return_value=[[1.0, 1.0, 0.0]]): + result = memex_query("question", top=1, min_score=0.0) + + # Only one result should appear (the higher-scored one) + assert result.count("\n knowledge/") == 1 + + +# --------------------------------------------------------------------------- +# memex_get_decision +# --------------------------------------------------------------------------- + +class TestMemexGetDecision: + def test_exact_path_match(self, tmp_path): + md_file = tmp_path / "decision.md" + md_file.write_text(FULL_MARKDOWN) + memex_query, memex_get_decision, _ = _import_tools() + + with patch("memex.mcp_server.load_index", return_value={}): + result = memex_get_decision(str(md_file)) + + assert "Migrate billing store to PostgreSQL" in result + + def test_partial_path_match_from_index(self, tmp_path): + md_file = tmp_path / "2024-11-14-migrate-billing.md" + md_file.write_text(FULL_MARKDOWN) + index = {str(md_file): {**RECORD_A, "path": str(md_file)}} + _, memex_get_decision, _ = _import_tools() + + with patch("memex.mcp_server.load_index", return_value=index): + result = memex_get_decision("migrate-billing") + + assert "Migrate billing store to PostgreSQL" in result + + def test_not_found_returns_guidance(self): + _, memex_get_decision, _ = _import_tools() + with patch("memex.mcp_server.load_index", return_value={}), \ + patch("memex.mcp_server.KNOWLEDGE_DIR", Path("/nonexistent")): + result = memex_get_decision("nonexistent-slug-xyz") + + assert "No record found" in result + assert "memex_query" in result + + +# --------------------------------------------------------------------------- +# memex_list_recent +# --------------------------------------------------------------------------- + +class TestMemexListRecent: + def test_lists_all_records_sorted_by_date(self): + _, _, memex_list_recent = _import_tools() + with patch("memex.mcp_server.load_index", return_value=FIXTURE_INDEX): + result = memex_list_recent() + + # RECORD_A (2024-11-14) should appear before RECORD_B (2024-09-01) + pos_a = result.index("Migrate billing") + pos_b = result.index("Switch event queue") + assert pos_a < pos_b + + def test_domain_filter_matches_title(self): + _, _, memex_list_recent = _import_tools() + with patch("memex.mcp_server.load_index", return_value=FIXTURE_INDEX): + result = memex_list_recent(domain="billing") + + assert "Migrate billing store to PostgreSQL" in result + assert "Switch event queue" not in result + + def test_domain_filter_matches_excerpt(self): + _, _, memex_list_recent = _import_tools() + with patch("memex.mcp_server.load_index", return_value=FIXTURE_INDEX): + result = memex_list_recent(domain="SQS") + + assert "Switch event queue from SQS to Redis Streams" in result + assert "Migrate billing" not in result + + def test_domain_no_match_returns_message(self): + _, _, memex_list_recent = _import_tools() + with patch("memex.mcp_server.load_index", return_value=FIXTURE_INDEX): + result = memex_list_recent(domain="kubernetes") + + assert "No decisions found" in result + + def test_low_confidence_flagged(self): + _, _, memex_list_recent = _import_tools() + with patch("memex.mcp_server.load_index", return_value=FIXTURE_INDEX): + result = memex_list_recent() + + # RECORD_B has confidence 0.45 — should show warning flag + assert "⚠️" in result + + def test_limit_respected(self): + _, _, memex_list_recent = _import_tools() + with patch("memex.mcp_server.load_index", return_value=FIXTURE_INDEX): + result = memex_list_recent(limit=1) + + # Only the most recent record should appear + assert "Migrate billing store to PostgreSQL" in result + assert "Switch event queue" not in result + + def test_empty_index_returns_guidance(self): + _, _, memex_list_recent = _import_tools() + with patch("memex.mcp_server.load_index", return_value={}): + result = memex_list_recent() + + assert "memex index" in result