Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,26 @@ Commit a single compressed file to your repo and your teammates skip the reindex

The result is similar in spirit to graphify's `graphify-out/` directory, but as a single compressed file with explicit two-tier export, integrity-checked import, and zero merge friction.

## Publish to understand-quickly (opt-in)

[`looptech-ai/understand-quickly`](https://github.com/looptech-ai/understand-quickly) is a public registry of code-knowledge graphs that ships its own MCP server. `scripts/uq-publish.py` calls the codebase-memory-mcp binary's existing MCP tools (no new C code, no rebuild needed), projects the result to a `gitnexus@1`-shaped JSON graph at `.codebase-memory/graph.json`, stamps `metadata.{tool, tool_version, generated_at, commit}`, and — when `UNDERSTAND_QUICKLY_TOKEN` is set — fires a `repository_dispatch` event so the registry resyncs the entry.

```bash
python3 scripts/uq-publish.py
```

Without the token, only the local file is written. The recommended CI step is the [`looptech-ai/uq-publish-action`](https://github.com/looptech-ai/uq-publish-action) Marketplace Action:

```yaml
- uses: looptech-ai/uq-publish-action@v0.1.0
with:
graph-path: '.codebase-memory/graph.json'
format: 'gitnexus@1'
token: ${{ secrets.UNDERSTAND_QUICKLY_TOKEN }}
```

Submitting via the publish path is governed by the [Understand-Quickly Data License 1.0](https://github.com/looptech-ai/understand-quickly/blob/main/DATA-LICENSE.md). It is opt-in and gated on the user explicitly setting the token.

## How It Works

codebase-memory-mcp is a **structural analysis backend** — it builds and queries the knowledge graph. It does **not** include an LLM. Instead, it relies on your MCP client (Claude Code, or any MCP-compatible agent) to be the intelligence layer.
Expand Down
96 changes: 96 additions & 0 deletions scripts/test_uq_publish.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env python3
"""Unit tests for scripts/uq-publish.py.

Stdlib-only — no pytest. Run:
python3 scripts/test_uq_publish.py
"""
from __future__ import annotations

import importlib.util
import json
import os
import subprocess
import sys
import tempfile
import unittest
from pathlib import Path
from unittest import mock

_SCRIPT = Path(__file__).resolve().parent / "uq-publish.py"
_spec = importlib.util.spec_from_file_location("uq_publish", _SCRIPT)
uq = importlib.util.module_from_spec(_spec)
sys.modules["uq_publish"] = uq
_spec.loader.exec_module(uq)


class StampMetadataTests(unittest.TestCase):
def test_required_fields_set(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp = Path(tmp)
subprocess.run(["git", "init", "-q", "-b", "main"], cwd=tmp, check=True)
subprocess.run(["git", "config", "user.email", "t@t"], cwd=tmp, check=True)
subprocess.run(["git", "config", "user.name", "t"], cwd=tmp, check=True)
(tmp / "f").write_text("x")
subprocess.run(["git", "add", "."], cwd=tmp, check=True)
subprocess.run(["git", "commit", "-q", "-m", "i", "--no-gpg-sign"],
cwd=tmp, check=True)
sha = subprocess.run(["git", "rev-parse", "HEAD"], cwd=tmp,
capture_output=True, text=True, check=True
).stdout.strip()

graph = {"schema": "gitnexus@1", "nodes": [], "links": []}
md = uq.stamp_metadata(graph, repo_dir=tmp, tool_version="0.0.0-test")
self.assertEqual(md["tool"], "codebase-memory-mcp")
self.assertEqual(md["tool_version"], "0.0.0-test")
self.assertEqual(md["commit"], sha)
self.assertTrue(md["generated_at"].endswith("Z"))


class BuildGraphTests(unittest.TestCase):
def test_projects_query_graph_rows_to_gitnexus_at_1(self) -> None:
node_rows = [{"id": 1, "name": "AuthService", "kind": "Module"}]
edge_rows = [{"source": 1, "target": 2, "kind": "DEPENDS_ON"}]

def fake_query_rows(_binary: str, _project: str, query: str):
return node_rows if "id(n)" in query else edge_rows

with mock.patch.object(uq, "_query_rows", side_effect=fake_query_rows):
graph = uq.build_graph("/fake/binary", "demo")
self.assertEqual(graph["schema"], "gitnexus@1")
self.assertEqual(len(graph["nodes"]), 1)
self.assertEqual(graph["nodes"][0]["label"], "AuthService")
self.assertEqual(graph["nodes"][0]["kind"], "Module")
self.assertEqual(len(graph["links"]), 1)
self.assertEqual(graph["links"][0]["source"], "1")
self.assertEqual(graph["links"][0]["kind"], "DEPENDS_ON")


class McpCallUnwrapTests(unittest.TestCase):
"""`_mcp_call` must unwrap the MCP `{content:[{type:'text',text:'...'}]}` envelope."""

def _mcp_response(self, payload: dict, is_error: bool = False) -> bytes:
env = {
"jsonrpc": "2.0", "id": 2,
"result": {
"content": [{"type": "text", "text": json.dumps(payload)}],
"isError": is_error,
},
}
return (json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}).encode()
+ b"\n" + json.dumps(env).encode() + b"\n")

def test_unwraps_text_content_and_decodes_json(self) -> None:
fake_proc = mock.MagicMock(stdout=self._mcp_response({"rows": [{"a": 1}]}))
with mock.patch.object(uq.subprocess, "run", return_value=fake_proc):
out = uq._mcp_call("/fake/bin", "query_graph", {"project": "p", "query": "q"})
self.assertEqual(out, {"rows": [{"a": 1}]})

def test_is_error_raises(self) -> None:
fake_proc = mock.MagicMock(stdout=self._mcp_response({"msg": "boom"}, is_error=True))
with mock.patch.object(uq.subprocess, "run", return_value=fake_proc):
with self.assertRaises(RuntimeError):
uq._mcp_call("/fake/bin", "query_graph", {"project": "p", "query": "q"})


if __name__ == "__main__":
unittest.main()
243 changes: 243 additions & 0 deletions scripts/uq-publish.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
#!/usr/bin/env python3
"""uq-publish.py — opt-in publish to the understand-quickly registry.

Uses the existing codebase-memory-mcp binary's MCP surface to extract a
node/edge graph for the current project, projects it to a `gitnexus@1`-shaped
JSON file at `.codebase-memory/graph.json`, stamps
`metadata.{tool, tool_version, generated_at, commit}`, and (when
`UNDERSTAND_QUICKLY_TOKEN` is set) fires a `repository_dispatch` event at
`looptech-ai/understand-quickly`.

Stdlib-only — no new dependencies. Mirrors the style of other
`scripts/test_mcp_rapid_init.py` helpers in this repo.

Usage:
python3 scripts/uq-publish.py [/path/to/binary] [--project <name>]

Spec: https://github.com/looptech-ai/understand-quickly/blob/main/docs/spec/code-graph-protocol.md
Action: https://github.com/looptech-ai/uq-publish-action
"""
from __future__ import annotations

import argparse
import datetime as _dt
import json
import os
import shutil
import subprocess # nosec B404 — fixed argv, no shell
import sys
import urllib.error
import urllib.request
from pathlib import Path
from typing import Any, Optional

TOOL_NAME = "codebase-memory-mcp"
REGISTRY_REPO = "looptech-ai/understand-quickly"
TOKEN_ENV = "UNDERSTAND_QUICKLY_TOKEN"
DISPATCH_EVENT_TYPE = "uq-publish"
DEFAULT_OUT = Path(".codebase-memory") / "graph.json"


def _git(args: list, cwd: Path) -> Optional[str]:
try:
r = subprocess.run( # nosec B603
["git", *args], cwd=str(cwd), capture_output=True, text=True,
check=False, timeout=5,
)
except (FileNotFoundError, subprocess.SubprocessError):
return None
return r.stdout.strip() if r.returncode == 0 else None


def _detect_repo_slug(cwd: Path) -> Optional[str]:
url = _git(["remote", "get-url", "origin"], cwd) or ""
for prefix in ("https://github.com/", "git@github.com:"):
if url.startswith(prefix):
slug = url[len(prefix):]
if slug.endswith(".git"):
slug = slug[: -len(".git")]
return slug or None
return None


def _mcp_call(binary: str, tool: str, args: dict, timeout: float = 30.0) -> Any:
"""Spawn the cbm binary, run a single tool call over stdio, return decoded result.

MCP `tools/call` results have shape ``{"content":[{"type":"text","text":"..."}],
"isError":bool}``. This helper unwraps the text payload and ``json.loads()``-es
it, raising ``RuntimeError`` on ``isError`` or unparseable text.
"""
msgs = (
b'{"jsonrpc":"2.0","id":1,"method":"initialize","params":'
b'{"protocolVersion":"2025-11-25","capabilities":{}}}\n'
b'{"jsonrpc":"2.0","method":"notifications/initialized"}\n'
+ json.dumps({
"jsonrpc": "2.0", "id": 2, "method": "tools/call",
"params": {"name": tool, "arguments": args},
}).encode("utf-8") + b"\n"
)
proc = subprocess.run( # nosec B603
[binary], input=msgs, capture_output=True, timeout=timeout, check=False,
)
result = None
for line in proc.stdout.splitlines():
try:
obj = json.loads(line)
except json.JSONDecodeError:
continue
if obj.get("id") == 2:
result = obj.get("result", {})
break
if result is None:
raise RuntimeError(f"no response from {binary} for tool {tool}")
content = result.get("content") if isinstance(result, dict) else None
if isinstance(content, list) and content and isinstance(content[0], dict):
text = content[0].get("text", "")
if result.get("isError"):
raise RuntimeError(f"{tool} returned isError: {text[:200]}")
try:
return json.loads(text) if text else {}
except json.JSONDecodeError:
# Tool returned a plain (non-JSON) text payload — surface as-is.
return {"text": text}
return result # already-decoded shape (e.g. test mocks)


def _query_rows(binary: str, project: str, query: str) -> list:
"""Run a Cypher query and return the rows list (empty on error)."""
res = _mcp_call(binary, "query_graph", {"project": project, "query": query})
if not isinstance(res, dict):
return []
rows = res.get("rows") or res.get("results") or res.get("data") or []
return rows if isinstance(rows, list) else []


def build_graph(binary: str, project: str) -> dict:
"""Project the in-memory KG to a `gitnexus@1`-shaped graph via `query_graph`.

Uses two Cypher queries to extract Module-level nodes and their dependency
edges. Falls back to an empty graph if the underlying tools return no rows.
"""
node_rows = _query_rows(
binary, project,
"MATCH (n:Module) RETURN id(n) AS id, n.name AS name, labels(n)[0] AS kind "
"LIMIT 5000",
)
edge_rows = _query_rows(
binary, project,
"MATCH (a:Module)-[r]->(b:Module) "
"RETURN id(a) AS source, id(b) AS target, type(r) AS kind LIMIT 20000",
)
nodes = [
{"id": str(r.get("id", r.get("name", i))),
"label": str(r.get("name", "") or ""),
"kind": str(r.get("kind", "Module") or "Module")}
for i, r in enumerate(node_rows) if isinstance(r, dict)
]
edges = [
{"source": str(r.get("source", "")),
"target": str(r.get("target", "")),
"kind": str(r.get("kind", "DEPENDS_ON") or "DEPENDS_ON")}
for r in edge_rows if isinstance(r, dict)
]
return {"schema": "gitnexus@1", "nodes": nodes, "links": edges}


def stamp_metadata(graph: dict, *, repo_dir: Path, tool_version: str) -> dict:
md = dict(graph.get("metadata") or {})
md["tool"] = TOOL_NAME
md["tool_version"] = tool_version
md["generated_at"] = _dt.datetime.now(_dt.timezone.utc).strftime(
"%Y-%m-%dT%H:%M:%SZ"
)
sha = _git(["rev-parse", "HEAD"], repo_dir) or ""
if len(sha) == 40:
md["commit"] = sha
graph["metadata"] = md
return md


def dispatch(repo_slug: str, *, token: str, schema: str, graph_path: str,
commit: Optional[str], tool_version: str, timeout: float = 10.0) -> int:
payload = {
"event_type": DISPATCH_EVENT_TYPE,
"client_payload": {
"repo": repo_slug, "schema": schema, "graph_path": graph_path,
"tool": TOOL_NAME, "tool_version": tool_version,
**({"commit": commit} if commit else {}),
},
}
req = urllib.request.Request( # nosec B310
f"https://api.github.com/repos/{REGISTRY_REPO}/dispatches",
data=json.dumps(payload).encode("utf-8"),
headers={
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
"User-Agent": f"{TOOL_NAME}/{tool_version}",
"X-GitHub-Api-Version": "2022-11-28",
},
method="POST",
)
with urllib.request.urlopen(req, timeout=timeout) as resp: # nosec B310
return resp.status


def main() -> int:
p = argparse.ArgumentParser(description=__doc__.splitlines()[0])
p.add_argument("binary", nargs="?", default=shutil.which("codebase-memory-mcp")
or shutil.which("cbm") or "codebase-memory-mcp")
p.add_argument("--project", default=os.path.basename(os.getcwd()))
p.add_argument("--out", type=Path, default=DEFAULT_OUT)
p.add_argument("--tool-version", default=os.environ.get("CBM_VERSION", "unknown"))
args = p.parse_args()

cwd = Path.cwd()
try:
graph = build_graph(args.binary, args.project)
except Exception as exc:
print(f"[uq-publish] could not extract graph via {args.binary}: {exc}",
file=sys.stderr)
return 1

Comment on lines +196 to +202
metadata = stamp_metadata(graph, repo_dir=cwd, tool_version=args.tool_version)
args.out.parent.mkdir(parents=True, exist_ok=True)
args.out.write_text(json.dumps(graph, indent=2), encoding="utf-8")
print(f"[uq-publish] wrote {args.out} ({len(graph['nodes'])} nodes, "
f"{len(graph['links'])} edges)")

token = os.environ.get(TOKEN_ENV, "").strip()
if not token:
print(f"[uq-publish] ${TOKEN_ENV} unset — local file stamped, "
f"skipping dispatch (use looptech-ai/uq-publish-action in CI).")
return 0

repo_slug = _detect_repo_slug(cwd)
if not repo_slug:
print("[uq-publish] no github 'origin' remote — skipping dispatch.")
return 0

try:
status = dispatch(
repo_slug, token=token, schema="gitnexus@1",
graph_path=str(args.out), commit=metadata.get("commit"),
tool_version=args.tool_version,
)
except urllib.error.HTTPError as exc:
if exc.code == 404:
print(f"[uq-publish] {repo_slug} not in registry — register once "
"with: npx @understand-quickly/cli add")
return 0
print(f"[uq-publish] dispatch failed ({exc.code}); local file stamped.")
return 0
except (urllib.error.URLError, OSError) as exc:
print(f"[uq-publish] dispatch failed ({exc}); local file stamped.")
return 0

print(f"[uq-publish] dispatched to {REGISTRY_REPO} (HTTP {status}) for "
f"{repo_slug}.")
return 0


if __name__ == "__main__":
sys.exit(main())