diff --git a/.coveragerc-unit b/.coveragerc-unit
new file mode 100644
index 0000000..b5d5a89
--- /dev/null
+++ b/.coveragerc-unit
@@ -0,0 +1,7 @@
+[run]
+omit =
+ src/theow/_codegraph/*
+ src/theow/codegraph.py
+
+[report]
+fail_under = 85
diff --git a/README.md b/README.md
index 06401df..9fea54d 100644
--- a/README.md
+++ b/README.md
@@ -391,6 +391,21 @@ def run_safe_command(cmd: str) -> dict:
This is the key to secure automation. You define the blast radius. The LLM operates within those boundaries.
+### CodeGraph
+
+CodeGraph gives the explorer structural awareness of your codebase. Instead of reading entire files to orient, the LLM queries a tree-sitter based graph for symbols, call chains, imports, and class hierarchies.
+
+```python
+from theow.codegraph import CodeGraph
+
+graph = CodeGraph(root="./src")
+pipeline_agent.tool()(graph.search_code)
+```
+
+The LLM gets a single `search_code` tool that supports multiple scopes: find symbols by name, trace callers/callees, list file contents, follow class hierarchies, and find paths between symbols.
+
+CodeGraph is an optional dependency — install with `pip install theow[codegraph]`. See the [CodeGraph README](src/theow/_codegraph/README.md) for full documentation.
+
## LLM Based Actions
Rules can invoke the LLM directly on match instead of running a deterministic action. Useful for failures that need dynamic investigation rather than a fixed fix.
diff --git a/pyproject.toml b/pyproject.toml
index ddeacc4..7e7b6ca 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,11 @@ theow = "theow._cli:app"
daemon = [
# Future: trio, httpx, etc. for server mode
]
+codegraph = [
+ "tree-sitter>=0.23",
+ "tree-sitter-python>=0.23",
+ "tree-sitter-go>=0.23",
+]
[dependency-groups]
dev = [
@@ -45,6 +50,9 @@ path = "src/theow/_version.py"
[tool.hatch.build.targets.wheel]
packages = ["src/theow"]
+[tool.ty.src]
+exclude = ["src/theow/_codegraph/examples/"]
+
[tool.ruff]
line-length = 100
target-version = "py312"
diff --git a/src/theow/_codegraph/README.md b/src/theow/_codegraph/README.md
new file mode 100644
index 0000000..27cb9c3
--- /dev/null
+++ b/src/theow/_codegraph/README.md
@@ -0,0 +1,106 @@
+
+
+# CodeGraph
+
+Tree-sitter based code structure graph for Theow's LLM explorer. Instead of reading entire files to orient (~4000+ tokens), the explorer queries the graph for symbols, call chains, imports, and class hierarchies (~260 tokens).
+
+
+
+
+
+## Install
+
+CodeGraph is an optional dependency:
+
+```bash
+pip install theow[codegraph]
+```
+
+## Usage
+
+```python
+from theow import Theow
+from theow.codegraph import CodeGraph
+
+graph = CodeGraph(root="./src")
+
+engine = Theow(theow_dir=".theow", llm="anthropic/claude-sonnet-4-20250514")
+engine.tool()(graph.search_code)
+```
+
+The graph builds automatically on first `search_code` call. The LLM gets a single tool that covers all navigation needs.
+
+## `search_code` API
+
+| Parameter | Description |
+|-----------|-------------|
+| `query` | Symbol name or substring to search for |
+| `kind` | Filter by type: `"function"`, `"class"`, `"module"` |
+| `scope` | What to search (see below) |
+| `file` | Filter to a specific file |
+| `line` | Find the symbol at this line number in file |
+| `target` | Target symbol for `"path"` scope |
+
+### Scopes
+
+| Scope | Description | Example |
+|-------|-------------|---------|
+| `symbol` | Find symbols by name (default) | `search_code(query="Rule", kind="class")` |
+| `callers` | Who calls this symbol? | `search_code(query="matches", scope="callers")` |
+| `callees` | What does this symbol call? | `search_code(query="build", scope="callees")` |
+| `references` | All incoming/outgoing relationships | `search_code(query="LLMGateway", scope="references")` |
+| `definition` | Where is this symbol defined? | `search_code(scope="definition", file="models.py", line=42)` |
+| `file` | List all symbols in a file | `search_code(scope="file", file="_core/_models.py")` |
+| `path` | Find relationship path between two symbols | `search_code(query="module.py", scope="path", target="Rule")` |
+
+## Language Support
+
+Visitors extract structure from source files using tree-sitter. Currently supported:
+
+- **Python** — functions, classes, methods, imports, calls, decorators, docstrings
+- **Go** — functions, methods with receivers, structs, interfaces, imports, calls, struct embedding
+
+Languages are configured explicitly:
+
+```python
+graph = CodeGraph(root="./src", languages=["python", "go"])
+```
+
+Defaults to `["python"]` if not specified.
+
+## Configuration
+
+```python
+graph = CodeGraph(
+ root="./src",
+ languages=["python", "go"], # languages to parse
+ excludes={"vendor", "testdata"}, # directories to skip
+ max_file_size=1_000_000, # skip files larger than this (bytes)
+)
+```
+
+Default excludes: `__pycache__`, `.git`, `.tox`, `.venv`, `venv`, `node_modules`, `dist`, `build`, `.mypy_cache`, `.ruff_cache`, `.pytest_cache`.
+
+## Serialization
+
+```python
+# Save to JSON
+graph.to_json("graph.json")
+
+# Load from cache
+graph = CodeGraph.from_json("graph.json")
+
+# Get JSON string
+json_str = graph.to_json()
+```
+
+## How It Works
+
+1. **Parse**: Tree-sitter visitors walk source files and extract `Node` (symbols) and `Edge` (relationships) objects
+2. **Index**: Nodes are indexed by file path and short name for fast lookup
+3. **Resolve**: Symbolic call targets (short names like `helper`) are resolved to fully qualified node IDs, preferring same-file matches
+4. **Query**: `search_code` navigates the graph using adjacency lists and BFS — no external graph library needed
diff --git a/src/theow/_codegraph/__init__.py b/src/theow/_codegraph/__init__.py
new file mode 100644
index 0000000..7e631d9
--- /dev/null
+++ b/src/theow/_codegraph/__init__.py
@@ -0,0 +1,5 @@
+"""CodeGraph: tree-sitter based code structure graph for LLM exploration."""
+
+from theow._codegraph._graph import CodeGraph
+
+__all__ = ["CodeGraph"]
diff --git a/src/theow/_codegraph/_graph.py b/src/theow/_codegraph/_graph.py
new file mode 100644
index 0000000..85b4a92
--- /dev/null
+++ b/src/theow/_codegraph/_graph.py
@@ -0,0 +1,457 @@
+"""CodeGraph: tree-sitter based code structure graph."""
+
+import json
+from collections import deque
+from pathlib import Path
+
+from theow._codegraph._models import Edge, Node, SearchResult
+from theow._codegraph._visitors import LanguageVisitor, load_visitors
+from theow._core._logging import get_logger
+
+logger = get_logger(__name__)
+
+DEFAULT_EXCLUDES = frozenset(
+ {
+ "__pycache__",
+ ".git",
+ ".tox",
+ ".venv",
+ "venv",
+ "node_modules",
+ "dist",
+ "build",
+ ".mypy_cache",
+ ".ruff_cache",
+ ".pytest_cache",
+ }
+)
+
+
+class CodeGraph:
+ """A queryable graph of code symbols and relationships.
+
+ Parses source files with tree-sitter visitors and builds a directed graph
+ of modules, classes, functions, and their relationships (calls, imports,
+ inheritance, containment).
+
+ Args:
+ root: Root directory of the codebase to index.
+ languages: Language names to load visitors for (e.g. ["python", "go"]).
+ Defaults to ["python"].
+ visitors: Pre-built visitor instances. Overrides ``languages`` if given.
+ excludes: Directory names to skip during traversal.
+ max_file_size: Skip files larger than this (bytes).
+ """
+
+ def __init__(
+ self,
+ root: str | Path,
+ languages: list[str] | None = None,
+ visitors: list[LanguageVisitor] | None = None,
+ excludes: set[str] | None = None,
+ max_file_size: int = 1_000_000,
+ ) -> None:
+ self._root = Path(root).resolve()
+ self._excludes = excludes if excludes is not None else set(DEFAULT_EXCLUDES)
+ self._max_file_size = max_file_size
+ self._built = False
+
+ if visitors is not None:
+ self._visitors = list(visitors)
+ else:
+ self._visitors = load_visitors(languages or ["python"])
+
+ # Build extension -> visitor lookup
+ self._ext_map: dict[str, LanguageVisitor] = {}
+ for v in self._visitors:
+ for ext in v.extensions:
+ self._ext_map[ext] = v
+
+ # Internal state — adjacency lists for directed edges
+ self._fwd: dict[str, dict[str, dict]] = {} # source -> {target -> edge_data}
+ self._rev: dict[str, dict[str, dict]] = {} # target -> {source -> edge_data}
+ self._nodes: dict[str, Node] = {}
+ self._file_index: dict[str, list[str]] = {}
+ self._name_index: dict[str, list[str]] = {}
+
+ def build(self) -> None:
+ """Parse all files and build the graph. Idempotent."""
+ if self._built:
+ self._fwd.clear()
+ self._rev.clear()
+ self._nodes.clear()
+ self._file_index.clear()
+ self._name_index.clear()
+
+ files_parsed = 0
+ for path in self._iter_files():
+ self.build_file(path)
+ files_parsed += 1
+
+ resolved, unresolved = self._resolve_edges()
+ self._built = True
+ logger.info(
+ "Code graph built",
+ files=files_parsed,
+ nodes=len(self._nodes),
+ edges=self._edge_count(),
+ resolved_refs=resolved,
+ unresolved_refs=unresolved,
+ )
+
+ def build_file(self, path: Path) -> None:
+ """Parse a single file and add its nodes/edges to the graph."""
+ try:
+ source = path.read_text(encoding="utf-8", errors="replace")
+ except OSError:
+ return
+
+ relative_path = str(path.relative_to(self._root))
+ ext = path.suffix
+ visitor = self._ext_map.get(ext)
+ if not visitor:
+ return
+
+ try:
+ nodes, edges = visitor.parse_file(path, source, relative_path)
+ except Exception as e:
+ logger.warning("Parse failed", file=relative_path, error=str(e))
+ return
+
+ for node in nodes:
+ self._nodes[node.id] = node
+ self._file_index.setdefault(node.file, []).append(node.id)
+ self._name_index.setdefault(node.name, []).append(node.id)
+
+ for edge in edges:
+ self._add_edge(edge.source, edge.target, kind=edge.kind, line=edge.line)
+
+ def _iter_files(self):
+ """Walk root directory, yielding files that match visitor extensions."""
+ for path in sorted(self._root.rglob("*")):
+ if not path.is_file():
+ continue
+ if any(part in self._excludes for part in path.parts):
+ continue
+ if path.stat().st_size > self._max_file_size:
+ logger.debug("Skipping large file", file=str(path.relative_to(self._root)))
+ continue
+ if path.suffix in self._ext_map:
+ yield path
+
+ def _resolve_edges(self) -> tuple[int, int]:
+ """Resolve symbolic call/import targets to fully qualified node IDs.
+
+ For call edges where the target is a short name (e.g. "helper"),
+ try to resolve it to a fully qualified node ID. Same-file preference
+ breaks ambiguity.
+
+ Returns:
+ (resolved_count, unresolved_count) tuple.
+ """
+ edges_to_resolve = []
+ for source, targets in self._fwd.items():
+ for target, data in targets.items():
+ if data.get("kind") in ("calls", "inherits") and target not in self._nodes:
+ edges_to_resolve.append((source, target, data))
+
+ resolved_count = 0
+ for source, target, data in edges_to_resolve:
+ resolved = self._resolve_name(target, source)
+ if resolved and resolved != target:
+ self._remove_edge(source, target)
+ self._add_edge(source, resolved, **data)
+ resolved_count += 1
+
+ return resolved_count, len(edges_to_resolve) - resolved_count
+
+ def _resolve_name(self, name: str, context_id: str) -> str | None:
+ """Resolve a short name to a node ID, preferring same-file matches."""
+ # For attribute access like "self.method" or "obj.method", use the last part
+ short = name.rsplit(".", 1)[-1] if "." in name else name
+
+ candidates = self._name_index.get(short, [])
+ if not candidates:
+ return None
+ if len(candidates) == 1:
+ return candidates[0]
+
+ # Prefer same-file
+ context_file = self._nodes[context_id].file if context_id in self._nodes else ""
+ for cid in candidates:
+ if cid in self._nodes and self._nodes[cid].file == context_file:
+ return cid
+
+ return candidates[0]
+
+ def _add_edge(self, source: str, target: str, **data) -> None:
+ self._fwd.setdefault(source, {})[target] = data
+ self._rev.setdefault(target, {})[source] = data
+
+ def _remove_edge(self, source: str, target: str) -> None:
+ if source in self._fwd:
+ self._fwd[source].pop(target, None)
+ if target in self._rev:
+ self._rev[target].pop(source, None)
+
+ def _edge_count(self) -> int:
+ return sum(len(targets) for targets in self._fwd.values())
+
+ def _bfs_path(self, source: str, target: str) -> list[str] | None:
+ """BFS shortest path from source to target."""
+ if source == target:
+ return [source]
+ visited = {source}
+ queue: deque[list[str]] = deque([[source]])
+ while queue:
+ path = queue.popleft()
+ for neighbor in self._fwd.get(path[-1], {}):
+ if neighbor == target:
+ return path + [neighbor]
+ if neighbor not in visited:
+ visited.add(neighbor)
+ queue.append(path + [neighbor])
+ return None
+
+ def search_code(
+ self,
+ query: str = "",
+ kind: str = "",
+ scope: str = "symbol",
+ file: str = "",
+ line: int = 0,
+ target: str = "",
+ ) -> list[dict]:
+ """Search the codebase structure. Navigate classes, functions, imports, and call relationships.
+
+ Args:
+ query: Symbol name or substring to search for.
+ kind: Filter by type: "function", "class", "module", or "import".
+ scope: What to search for:
+ - "symbol": Find symbols by name (default)
+ - "callers": Who calls this symbol?
+ - "callees": What does this symbol call?
+ - "references": All incoming/outgoing relationships
+ - "definition": Where is this symbol defined?
+ - "file": List all symbols in a file
+ - "path": Find relationship path between query and target
+ file: Filter to a specific file, or target file for "file" scope.
+ line: Find the symbol at this line number in file.
+ target: Target symbol for "path" scope.
+ """
+ if not self._built:
+ self.build()
+
+ logger.debug("search_code", scope=scope, query=query, kind=kind, file=file)
+
+ if scope == "file":
+ return self._search_file(file or query)
+ if scope == "callers":
+ return self._search_callers(query, kind)
+ if scope == "callees":
+ return self._search_callees(query, kind)
+ if scope == "references":
+ return self._search_references(query)
+ if scope == "definition":
+ return self._search_definition(query, file, line)
+ if scope == "path":
+ return self._search_path(query, target)
+ # Default: symbol search
+ return self._search_symbol(query, kind, file)
+
+ def _search_symbol(self, query: str, kind: str, file: str) -> list[dict]:
+ """Find symbols matching query by name."""
+ results: list[SearchResult] = []
+ for node_id, node in self._nodes.items():
+ if kind and node.kind != kind:
+ continue
+ if file and node.file != file:
+ continue
+ if query and query.lower() not in node.name.lower() and query.lower() not in node_id.lower():
+ continue
+ relevance = "exact" if node.name == query else "substring"
+ results.append(SearchResult(node=node, relevance=relevance))
+
+ # Sort: exact matches first, then by file + line
+ results.sort(key=lambda r: (r.relevance != "exact", r.node.file, r.node.line))
+ return [r.to_dict() for r in results[:50]]
+
+ def _search_file(self, file: str) -> list[dict]:
+ """List all symbols in a file."""
+ node_ids = self._file_index.get(file, [])
+ results = []
+ for nid in node_ids:
+ node = self._nodes[nid]
+ results.append(SearchResult(node=node).to_dict())
+ results.sort(key=lambda r: r["line"])
+ return results
+
+ def _search_callers(self, query: str, kind: str) -> list[dict]:
+ """Find symbols that call the queried symbol."""
+ target_ids = self._find_node_ids(query, kind)
+ results: list[dict] = []
+ for tid in target_ids:
+ for pred, edge_data in self._rev.get(tid, {}).items():
+ if edge_data.get("kind") == "calls" and pred in self._nodes:
+ results.append(
+ SearchResult(
+ node=self._nodes[pred],
+ context=f"calls {tid}",
+ relevance="caller",
+ ).to_dict()
+ )
+ return results
+
+ def _search_callees(self, query: str, kind: str) -> list[dict]:
+ """Find symbols that the queried symbol calls."""
+ source_ids = self._find_node_ids(query, kind)
+ results: list[dict] = []
+ for sid in source_ids:
+ for succ, edge_data in self._fwd.get(sid, {}).items():
+ if edge_data.get("kind") == "calls" and succ in self._nodes:
+ results.append(
+ SearchResult(
+ node=self._nodes[succ],
+ context=f"called by {sid}",
+ relevance="callee",
+ ).to_dict()
+ )
+ return results
+
+ def _search_references(self, query: str) -> list[dict]:
+ """Find all incoming/outgoing relationships for a symbol."""
+ node_ids = self._find_node_ids(query, "")
+ results: list[dict] = []
+ for nid in node_ids:
+ # Incoming
+ for pred, edge_data in self._rev.get(nid, {}).items():
+ edge_kind = edge_data.get("kind", "unknown")
+ if pred in self._nodes:
+ results.append(
+ SearchResult(
+ node=self._nodes[pred],
+ context=f"{edge_kind} -> {nid}",
+ relevance="incoming",
+ ).to_dict()
+ )
+ # Outgoing
+ for succ, edge_data in self._fwd.get(nid, {}).items():
+ edge_kind = edge_data.get("kind", "unknown")
+ if succ in self._nodes:
+ results.append(
+ SearchResult(
+ node=self._nodes[succ],
+ context=f"{nid} {edge_kind} ->",
+ relevance="outgoing",
+ ).to_dict()
+ )
+ return results
+
+ def _search_definition(self, query: str, file: str, line: int) -> list[dict]:
+ """Find the definition of a symbol."""
+ if file and line:
+ # Find symbol at file:line
+ node_ids = self._file_index.get(file, [])
+ for nid in node_ids:
+ node = self._nodes[nid]
+ if node.line <= line <= (node.end_line or node.line):
+ if node.kind != "module":
+ return [SearchResult(node=node, relevance="exact").to_dict()]
+
+ # Fall back to name search
+ return self._search_symbol(query, "", file)
+
+ def _search_path(self, query: str, target: str) -> list[dict]:
+ """Find relationship path between two symbols."""
+ source_ids = self._find_node_ids(query, "")
+ target_ids = self._find_node_ids(target, "")
+
+ for sid in source_ids:
+ for tid in target_ids:
+ path = self._bfs_path(sid, tid)
+ if path is None:
+ continue
+
+ results: list[dict] = []
+ for i, nid in enumerate(path):
+ if nid in self._nodes:
+ ctx = ""
+ if i < len(path) - 1:
+ edge_data = self._fwd.get(nid, {}).get(path[i + 1], {})
+ ctx = f"--{edge_data.get('kind', '?')}--> {path[i + 1]}"
+ results.append(
+ SearchResult(
+ node=self._nodes[nid],
+ context=ctx,
+ relevance=f"step {i}",
+ ).to_dict()
+ )
+ return results
+
+ return []
+
+ def _find_node_ids(self, query: str, kind: str) -> list[str]:
+ """Find node IDs matching a query string."""
+ # Exact ID match
+ if query in self._nodes:
+ return [query]
+
+ # Exact name match
+ candidates = self._name_index.get(query, [])
+ if candidates:
+ if kind:
+ candidates = [c for c in candidates if self._nodes[c].kind == kind]
+ return candidates
+
+ # Substring search
+ results = []
+ for nid, node in self._nodes.items():
+ if kind and node.kind != kind:
+ continue
+ if query.lower() in node.name.lower():
+ results.append(nid)
+ return results
+
+ def to_json(self, path: str | Path | None = None) -> str:
+ """Serialize graph to JSON."""
+ edges = []
+ for source, targets in self._fwd.items():
+ for target, data in targets.items():
+ edges.append(
+ Edge(
+ source=source,
+ target=target,
+ kind=data.get("kind", ""),
+ line=data.get("line", 0),
+ ).to_dict()
+ )
+ data = {
+ "root": str(self._root),
+ "nodes": [n.to_dict() for n in self._nodes.values()],
+ "edges": edges,
+ }
+ result = json.dumps(data, indent=2)
+ if path:
+ Path(path).write_text(result)
+ return result
+
+ @classmethod
+ def from_json(cls, path: str | Path) -> "CodeGraph":
+ """Load a cached graph from JSON."""
+ raw = json.loads(Path(path).read_text())
+ graph = cls(root=raw["root"], visitors=[])
+ for node_data in raw["nodes"]:
+ node = Node.from_dict(node_data)
+ graph._nodes[node.id] = node
+ graph._file_index.setdefault(node.file, []).append(node.id)
+ graph._name_index.setdefault(node.name, []).append(node.id)
+ for edge_data in raw["edges"]:
+ edge = Edge.from_dict(edge_data)
+ graph._add_edge(edge.source, edge.target, kind=edge.kind, line=edge.line)
+ graph._built = True
+ logger.info(
+ "Code graph loaded from cache",
+ nodes=len(graph._nodes),
+ edges=graph._edge_count(),
+ )
+ return graph
diff --git a/src/theow/_codegraph/_models.py b/src/theow/_codegraph/_models.py
new file mode 100644
index 0000000..2301a54
--- /dev/null
+++ b/src/theow/_codegraph/_models.py
@@ -0,0 +1,113 @@
+"""Data models for the code graph."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+
+@dataclass(frozen=True)
+class Node:
+ """A symbol in the code graph (module, class, or function)."""
+
+ id: str
+ kind: str # "module" | "class" | "function"
+ name: str
+ file: str
+ line: int
+ end_line: int = 0
+ signature: str = ""
+ docstring: str = ""
+ parent: str = ""
+
+ def to_dict(self) -> dict[str, Any]:
+ """Convert to dictionary."""
+ d: dict[str, Any] = {
+ "id": self.id,
+ "kind": self.kind,
+ "name": self.name,
+ "file": self.file,
+ "line": self.line,
+ }
+ if self.end_line:
+ d["end_line"] = self.end_line
+ if self.signature:
+ d["signature"] = self.signature
+ if self.docstring:
+ d["docstring"] = self.docstring
+ if self.parent:
+ d["parent"] = self.parent
+ return d
+
+ @classmethod
+ def from_dict(cls, data: dict[str, Any]) -> Node:
+ """Create Node from dictionary."""
+ return cls(
+ id=data["id"],
+ kind=data["kind"],
+ name=data["name"],
+ file=data["file"],
+ line=data["line"],
+ end_line=data.get("end_line", 0),
+ signature=data.get("signature", ""),
+ docstring=data.get("docstring", ""),
+ parent=data.get("parent", ""),
+ )
+
+
+@dataclass(frozen=True)
+class Edge:
+ """A relationship between two nodes in the code graph."""
+
+ source: str
+ target: str
+ kind: str # "defines" | "contains" | "calls" | "imports" | "inherits"
+ line: int = 0
+
+ def to_dict(self) -> dict[str, Any]:
+ """Convert to dictionary."""
+ d: dict[str, Any] = {
+ "source": self.source,
+ "target": self.target,
+ "kind": self.kind,
+ }
+ if self.line:
+ d["line"] = self.line
+ return d
+
+ @classmethod
+ def from_dict(cls, data: dict[str, Any]) -> Edge:
+ """Create Edge from dictionary."""
+ return cls(
+ source=data["source"],
+ target=data["target"],
+ kind=data["kind"],
+ line=data.get("line", 0),
+ )
+
+
+@dataclass(frozen=True)
+class SearchResult:
+ """A search result wrapping a Node with context."""
+
+ node: Node
+ context: str = ""
+ relevance: str = ""
+
+ def to_dict(self) -> dict[str, Any]:
+ """Convert to dictionary."""
+ d: dict[str, Any] = self.node.to_dict()
+ if self.context:
+ d["context"] = self.context
+ if self.relevance:
+ d["relevance"] = self.relevance
+ return d
+
+ @classmethod
+ def from_dict(cls, data: dict[str, Any]) -> SearchResult:
+ """Create SearchResult from dictionary."""
+ return cls(
+ node=Node.from_dict(data),
+ context=data.get("context", ""),
+ relevance=data.get("relevance", ""),
+ )
diff --git a/src/theow/_codegraph/_visitors/__init__.py b/src/theow/_codegraph/_visitors/__init__.py
new file mode 100644
index 0000000..f77bdde
--- /dev/null
+++ b/src/theow/_codegraph/_visitors/__init__.py
@@ -0,0 +1,50 @@
+"""Language visitor protocol and registry."""
+
+from __future__ import annotations
+
+import importlib
+from pathlib import Path
+from typing import Protocol
+
+from theow._codegraph._models import Edge, Node
+
+
+class LanguageVisitor(Protocol):
+ """Protocol for language-specific tree-sitter visitors."""
+
+ extensions: list[str]
+
+ def parse_file(
+ self, path: Path, source: str, relative_path: str
+ ) -> tuple[list[Node], list[Edge]]: ...
+
+
+# Registry mapping language name -> (module_path, class_name)
+BUILTIN_VISITORS: dict[str, tuple[str, str]] = {
+ "python": ("theow._codegraph._visitors._python", "PythonVisitor"),
+ "go": ("theow._codegraph._visitors._go", "GoVisitor"),
+}
+
+
+def load_visitors(languages: list[str]) -> list[LanguageVisitor]:
+ """Load visitors for the specified languages.
+
+ Args:
+ languages: Language names to load (e.g. ["python", "go"]).
+
+ Raises:
+ ValueError: If a language name is not in the registry.
+ ImportError: If a language's tree-sitter grammar is not installed.
+ """
+ visitors: list[LanguageVisitor] = []
+ for lang in languages:
+ entry = BUILTIN_VISITORS.get(lang)
+ if entry is None:
+ raise ValueError(
+ f"Unknown language: {lang!r}. Available: {sorted(BUILTIN_VISITORS)}"
+ )
+ module_path, class_name = entry
+ module = importlib.import_module(module_path)
+ cls = getattr(module, class_name)
+ visitors.append(cls())
+ return visitors
diff --git a/src/theow/_codegraph/_visitors/_go.py b/src/theow/_codegraph/_visitors/_go.py
new file mode 100644
index 0000000..8791525
--- /dev/null
+++ b/src/theow/_codegraph/_visitors/_go.py
@@ -0,0 +1,302 @@
+"""Go language visitor using tree-sitter."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import tree_sitter_go as tsgo
+from tree_sitter import Language, Node as TSNode, Parser
+
+from theow._codegraph._models import Edge, Node
+from theow._codegraph._visitors._utils import child_by_type, last_child_by_type, text
+
+GO_LANGUAGE = Language(tsgo.language())
+
+
+class GoVisitor:
+ """Extract nodes and edges from Go source files."""
+
+ extensions: list[str] = [".go"]
+
+ def __init__(self) -> None:
+ self._parser = Parser(GO_LANGUAGE)
+
+ def parse_file(
+ self, path: Path, source: str, relative_path: str
+ ) -> tuple[list[Node], list[Edge]]:
+ """Parse a Go file and extract structural nodes and edges."""
+ tree = self._parser.parse(source.encode())
+ nodes: list[Node] = []
+ edges: list[Edge] = []
+
+ module_id = relative_path
+ nodes.append(
+ Node(
+ id=module_id,
+ kind="module",
+ name=Path(relative_path).stem,
+ file=relative_path,
+ line=1,
+ end_line=source.count("\n") + 1,
+ )
+ )
+
+ for child in tree.root_node.children:
+ if child.type == "function_declaration":
+ self._extract_function(child, module_id, relative_path, nodes, edges)
+ elif child.type == "method_declaration":
+ self._extract_method(child, module_id, relative_path, nodes, edges)
+ elif child.type == "type_declaration":
+ self._extract_type_declaration(child, module_id, relative_path, nodes, edges)
+ elif child.type == "import_declaration":
+ self._extract_imports(child, module_id, relative_path, edges)
+
+ return nodes, edges
+
+ def _extract_function(
+ self,
+ node: TSNode,
+ parent_id: str,
+ relative_path: str,
+ nodes: list[Node],
+ edges: list[Edge],
+ ) -> None:
+ name_node = child_by_type(node, "identifier")
+ if not name_node:
+ return
+
+ name = text(name_node)
+ func_id = f"{parent_id}::{name}"
+
+ params = child_by_type(node, "parameter_list")
+ result = child_by_type(node, "result")
+ sig = f"func {name}"
+ if params:
+ sig += text(params)
+ if result:
+ sig += " " + text(result)
+
+ start_line = node.start_point[0] + 1
+ end_line = node.end_point[0] + 1
+
+ nodes.append(
+ Node(
+ id=func_id,
+ kind="function",
+ name=name,
+ file=relative_path,
+ line=start_line,
+ end_line=end_line,
+ signature=sig,
+ parent=parent_id,
+ )
+ )
+ edges.append(Edge(source=parent_id, target=func_id, kind="contains", line=start_line))
+
+ body = child_by_type(node, "block")
+ if body:
+ self._extract_calls_in_body(body, func_id, relative_path, edges)
+
+ def _extract_method(
+ self,
+ node: TSNode,
+ module_id: str,
+ relative_path: str,
+ nodes: list[Node],
+ edges: list[Edge],
+ ) -> None:
+ name_node = child_by_type(node, "field_identifier")
+ if not name_node:
+ return
+
+ name = text(name_node)
+
+ receiver_node = child_by_type(node, "parameter_list")
+ receiver_type = ""
+ if receiver_node:
+ for ch in receiver_node.children:
+ if ch.type == "parameter_declaration":
+ type_node = last_child_by_type(
+ ch, "type_identifier"
+ ) or last_child_by_type(ch, "pointer_type")
+ if type_node:
+ receiver_type = text(type_node).lstrip("*")
+ break
+
+ receiver_id = f"{module_id}::{receiver_type}" if receiver_type else module_id
+ method_id = f"{receiver_id}::{name}"
+
+ params_nodes = [c for c in node.children if c.type == "parameter_list"]
+ params_text = text(params_nodes[1]) if len(params_nodes) > 1 else "()"
+ result = child_by_type(node, "result")
+ sig = f"func ({receiver_type}) {name}{params_text}"
+ if result:
+ sig += " " + text(result)
+
+ start_line = node.start_point[0] + 1
+ end_line = node.end_point[0] + 1
+
+ nodes.append(
+ Node(
+ id=method_id,
+ kind="function",
+ name=name,
+ file=relative_path,
+ line=start_line,
+ end_line=end_line,
+ signature=sig,
+ parent=receiver_id,
+ )
+ )
+ edges.append(
+ Edge(source=receiver_id, target=method_id, kind="contains", line=start_line)
+ )
+
+ body = child_by_type(node, "block")
+ if body:
+ self._extract_calls_in_body(body, method_id, relative_path, edges)
+
+ def _extract_type_declaration(
+ self,
+ node: TSNode,
+ parent_id: str,
+ relative_path: str,
+ nodes: list[Node],
+ edges: list[Edge],
+ ) -> None:
+ for child in node.children:
+ if child.type == "type_spec":
+ self._extract_type_spec(child, parent_id, relative_path, nodes, edges)
+
+ def _extract_type_spec(
+ self,
+ node: TSNode,
+ parent_id: str,
+ relative_path: str,
+ nodes: list[Node],
+ edges: list[Edge],
+ ) -> None:
+ name_node = child_by_type(node, "type_identifier")
+ if not name_node:
+ return
+
+ name = text(name_node)
+ type_id = f"{parent_id}::{name}"
+
+ struct_type = child_by_type(node, "struct_type")
+ kind = "class"
+
+ start_line = node.start_point[0] + 1
+ end_line = node.end_point[0] + 1
+
+ nodes.append(
+ Node(
+ id=type_id,
+ kind=kind,
+ name=name,
+ file=relative_path,
+ line=start_line,
+ end_line=end_line,
+ parent=parent_id,
+ )
+ )
+ edges.append(Edge(source=parent_id, target=type_id, kind="contains", line=start_line))
+
+ if struct_type:
+ field_list = child_by_type(struct_type, "field_declaration_list")
+ if field_list:
+ for field in field_list.children:
+ if field.type == "field_declaration":
+ children = [c for c in field.children if c.type != "comment"]
+ if len(children) == 1 and children[0].type == "type_identifier":
+ embedded = text(children[0])
+ edges.append(
+ Edge(
+ source=type_id,
+ target=embedded,
+ kind="inherits",
+ line=field.start_point[0] + 1,
+ )
+ )
+
+ def _extract_imports(
+ self,
+ node: TSNode,
+ parent_id: str,
+ relative_path: str,
+ edges: list[Edge],
+ ) -> None:
+ """Extract import declarations."""
+ for child in node.children:
+ if child.type == "import_spec":
+ path_node = child_by_type(child, "interpreted_string_literal")
+ if path_node:
+ import_path = text(path_node).strip('"')
+ edges.append(
+ Edge(
+ source=parent_id,
+ target=import_path,
+ kind="imports",
+ line=child.start_point[0] + 1,
+ )
+ )
+ elif child.type == "import_spec_list":
+ for spec in child.children:
+ if spec.type == "import_spec":
+ path_node = child_by_type(spec, "interpreted_string_literal")
+ if path_node:
+ import_path = text(path_node).strip('"')
+ edges.append(
+ Edge(
+ source=parent_id,
+ target=import_path,
+ kind="imports",
+ line=spec.start_point[0] + 1,
+ )
+ )
+
+ def _extract_calls_in_body(
+ self,
+ node: TSNode,
+ parent_id: str,
+ relative_path: str,
+ edges: list[Edge],
+ ) -> None:
+ """Recursively find call expressions in a body block."""
+ for child in node.children:
+ if child.type == "call_expression":
+ self._extract_call(child, parent_id, relative_path, edges)
+ else:
+ self._extract_calls_in_body(child, parent_id, relative_path, edges)
+
+ def _extract_call(
+ self,
+ node: TSNode,
+ parent_id: str,
+ relative_path: str,
+ edges: list[Edge],
+ ) -> None:
+ """Extract a call expression into an edge."""
+ func = node.children[0] if node.children else None
+ if not func:
+ return
+
+ if func.type == "identifier":
+ target = text(func)
+ elif func.type == "selector_expression":
+ target = text(func)
+ else:
+ return
+
+ edges.append(
+ Edge(
+ source=parent_id,
+ target=target,
+ kind="calls",
+ line=node.start_point[0] + 1,
+ )
+ )
+
+ arg_list = child_by_type(node, "argument_list")
+ if arg_list:
+ self._extract_calls_in_body(arg_list, parent_id, relative_path, edges)
diff --git a/src/theow/_codegraph/_visitors/_python.py b/src/theow/_codegraph/_visitors/_python.py
new file mode 100644
index 0000000..ef8c955
--- /dev/null
+++ b/src/theow/_codegraph/_visitors/_python.py
@@ -0,0 +1,289 @@
+"""Python language visitor using tree-sitter."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import tree_sitter_python as tspython
+from tree_sitter import Language, Node as TSNode, Parser
+
+from theow._codegraph._models import Edge, Node
+from theow._codegraph._visitors._utils import child_by_type, text
+
+PY_LANGUAGE = Language(tspython.language())
+
+
+class PythonVisitor:
+ """Extract nodes and edges from Python source files."""
+
+ extensions: list[str] = [".py"]
+
+ def __init__(self) -> None:
+ self._parser = Parser(PY_LANGUAGE)
+
+ def parse_file(
+ self, path: Path, source: str, relative_path: str
+ ) -> tuple[list[Node], list[Edge]]:
+ """Parse a Python file and extract structural nodes and edges."""
+ tree = self._parser.parse(source.encode())
+ nodes: list[Node] = []
+ edges: list[Edge] = []
+
+ module_id = relative_path
+ nodes.append(
+ Node(
+ id=module_id,
+ kind="module",
+ name=Path(relative_path).stem,
+ file=relative_path,
+ line=1,
+ end_line=source.count("\n") + 1,
+ )
+ )
+
+ self._walk(tree.root_node, module_id, relative_path, nodes, edges)
+ return nodes, edges
+
+ def _walk(
+ self,
+ node: TSNode,
+ parent_id: str,
+ relative_path: str,
+ nodes: list[Node],
+ edges: list[Edge],
+ ) -> None:
+ """Recursively walk the AST and extract symbols."""
+ for child in node.children:
+ if child.type == "function_definition":
+ self._extract_function(child, parent_id, relative_path, nodes, edges)
+ elif child.type == "decorated_definition":
+ for inner in child.children:
+ if inner.type == "function_definition":
+ self._extract_function(
+ inner, parent_id, relative_path, nodes, edges
+ )
+ elif inner.type == "class_definition":
+ self._extract_class(
+ inner, parent_id, relative_path, nodes, edges
+ )
+ elif child.type == "class_definition":
+ self._extract_class(child, parent_id, relative_path, nodes, edges)
+ elif child.type == "import_statement":
+ self._extract_import(child, parent_id, relative_path, edges)
+ elif child.type == "import_from_statement":
+ self._extract_import_from(child, parent_id, relative_path, edges)
+ elif child.type == "expression_statement":
+ expr = child.children[0] if child.children else None
+ if expr and expr.type == "call":
+ self._extract_call(expr, parent_id, relative_path, edges)
+
+ def _extract_function(
+ self,
+ node: TSNode,
+ parent_id: str,
+ relative_path: str,
+ nodes: list[Node],
+ edges: list[Edge],
+ ) -> None:
+ name_node = child_by_type(node, "identifier")
+ if not name_node:
+ return
+
+ name = text(name_node)
+ func_id = f"{parent_id}::{name}"
+
+ params_node = child_by_type(node, "parameters")
+ signature = f"def {name}{text(params_node)}" if params_node else f"def {name}()"
+
+ docstring = _extract_docstring(node)
+
+ start_line = node.start_point[0] + 1
+ end_line = node.end_point[0] + 1
+
+ nodes.append(
+ Node(
+ id=func_id,
+ kind="function",
+ name=name,
+ file=relative_path,
+ line=start_line,
+ end_line=end_line,
+ signature=signature,
+ docstring=docstring,
+ parent=parent_id,
+ )
+ )
+ edges.append(Edge(source=parent_id, target=func_id, kind="contains", line=start_line))
+
+ body = child_by_type(node, "block")
+ if body:
+ self._extract_calls_in_body(body, func_id, relative_path, edges)
+
+ def _extract_class(
+ self,
+ node: TSNode,
+ parent_id: str,
+ relative_path: str,
+ nodes: list[Node],
+ edges: list[Edge],
+ ) -> None:
+ name_node = child_by_type(node, "identifier")
+ if not name_node:
+ return
+
+ name = text(name_node)
+ class_id = f"{parent_id}::{name}"
+
+ arg_list = child_by_type(node, "argument_list")
+ if arg_list:
+ for arg in arg_list.children:
+ if arg.type == "identifier":
+ edges.append(
+ Edge(
+ source=class_id,
+ target=text(arg),
+ kind="inherits",
+ line=arg.start_point[0] + 1,
+ )
+ )
+
+ docstring = _extract_docstring(node)
+ start_line = node.start_point[0] + 1
+ end_line = node.end_point[0] + 1
+
+ nodes.append(
+ Node(
+ id=class_id,
+ kind="class",
+ name=name,
+ file=relative_path,
+ line=start_line,
+ end_line=end_line,
+ docstring=docstring,
+ parent=parent_id,
+ )
+ )
+ edges.append(Edge(source=parent_id, target=class_id, kind="contains", line=start_line))
+
+ body = child_by_type(node, "block")
+ if body:
+ self._walk(body, class_id, relative_path, nodes, edges)
+
+ def _extract_import(
+ self,
+ node: TSNode,
+ parent_id: str,
+ relative_path: str,
+ edges: list[Edge],
+ ) -> None:
+ """Extract `import foo` statements."""
+ for child in node.children:
+ if child.type == "dotted_name":
+ edges.append(
+ Edge(
+ source=parent_id,
+ target=text(child),
+ kind="imports",
+ line=node.start_point[0] + 1,
+ )
+ )
+
+ def _extract_import_from(
+ self,
+ node: TSNode,
+ parent_id: str,
+ relative_path: str,
+ edges: list[Edge],
+ ) -> None:
+ """Extract `from foo import bar` statements."""
+ module_name = ""
+ for child in node.children:
+ if child.type == "dotted_name" and not module_name:
+ module_name = text(child)
+ elif child.type == "relative_import":
+ module_name = text(child)
+
+ if module_name:
+ edges.append(
+ Edge(
+ source=parent_id,
+ target=module_name,
+ kind="imports",
+ line=node.start_point[0] + 1,
+ )
+ )
+
+ def _extract_calls_in_body(
+ self,
+ node: TSNode,
+ parent_id: str,
+ relative_path: str,
+ edges: list[Edge],
+ ) -> None:
+ """Recursively find call expressions in a body block."""
+ for child in node.children:
+ if child.type == "call":
+ self._extract_call(child, parent_id, relative_path, edges)
+ elif child.type == "expression_statement":
+ for inner in child.children:
+ if inner.type == "call":
+ self._extract_call(inner, parent_id, relative_path, edges)
+ else:
+ self._extract_calls_in_body(inner, parent_id, relative_path, edges)
+ else:
+ self._extract_calls_in_body(child, parent_id, relative_path, edges)
+
+ def _extract_call(
+ self,
+ node: TSNode,
+ parent_id: str,
+ relative_path: str,
+ edges: list[Edge],
+ ) -> None:
+ """Extract a call expression into an edge."""
+ func = node.children[0] if node.children else None
+ if not func:
+ return
+
+ if func.type == "identifier":
+ target = text(func)
+ elif func.type == "attribute":
+ target = text(func)
+ else:
+ return
+
+ edges.append(
+ Edge(
+ source=parent_id,
+ target=target,
+ kind="calls",
+ line=node.start_point[0] + 1,
+ )
+ )
+
+
+def _extract_docstring(node: TSNode) -> str:
+ """Extract docstring from a function or class definition."""
+ body = child_by_type(node, "block")
+ if not body:
+ return ""
+
+ children = body.children
+ if not children:
+ return ""
+
+ first_stmt = children[0]
+ if first_stmt.type == "expression_statement":
+ expr = first_stmt.children[0] if first_stmt.children else None
+ if expr and expr.type == "string":
+ raw = text(expr)
+ # Strip triple quotes
+ for q in ('"""', "'''"):
+ if raw.startswith(q) and raw.endswith(q):
+ return raw[3:-3].strip()
+ # Strip single quotes
+ for q in ('"', "'"):
+ if raw.startswith(q) and raw.endswith(q):
+ return raw[1:-1].strip()
+ return raw
+ return ""
diff --git a/src/theow/_codegraph/_visitors/_utils.py b/src/theow/_codegraph/_visitors/_utils.py
new file mode 100644
index 0000000..bbf1fec
--- /dev/null
+++ b/src/theow/_codegraph/_visitors/_utils.py
@@ -0,0 +1,27 @@
+"""Shared tree-sitter utilities for language visitors."""
+
+from __future__ import annotations
+
+from tree_sitter import Node as TSNode
+
+
+def text(node: TSNode) -> str:
+ """Get node text as a decoded string."""
+ return node.text.decode() if node.text else ""
+
+
+def child_by_type(node: TSNode, type_name: str) -> TSNode | None:
+ """Find first child of a given type."""
+ for child in node.children:
+ if child.type == type_name:
+ return child
+ return None
+
+
+def last_child_by_type(node: TSNode, type_name: str) -> TSNode | None:
+ """Find last child of a given type."""
+ result = None
+ for child in node.children:
+ if child.type == type_name:
+ result = child
+ return result
diff --git a/src/theow/_codegraph/examples/theow_graph.html b/src/theow/_codegraph/examples/theow_graph.html
new file mode 100644
index 0000000..05ae2a5
--- /dev/null
+++ b/src/theow/_codegraph/examples/theow_graph.html
@@ -0,0 +1,258 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/theow/_codegraph/examples/theow_graph.png b/src/theow/_codegraph/examples/theow_graph.png
new file mode 100644
index 0000000..4c5946c
Binary files /dev/null and b/src/theow/_codegraph/examples/theow_graph.png differ
diff --git a/src/theow/_codegraph/examples/visualize.py b/src/theow/_codegraph/examples/visualize.py
new file mode 100644
index 0000000..e2083f0
--- /dev/null
+++ b/src/theow/_codegraph/examples/visualize.py
@@ -0,0 +1,63 @@
+"""Visualize theow's code graph with pyvis. Run: uv run --extra codegraph src/theow/_codegraph/examples/visualize.py"""
+
+from pathlib import Path
+
+from pyvis.network import Network
+
+from theow.codegraph import CodeGraph
+
+COLORS = {
+ "module": "#bd93f9",
+ "class": "#ffb86c",
+ "function": "#50fa7b",
+}
+
+EDGE_COLORS = {
+ "contains": "#6272a4",
+ "calls": "#f1fa8c",
+ "imports": "#6272a4",
+ "inherits": "#ff5555",
+}
+
+graph = CodeGraph(root=Path(__file__).resolve().parent.parent.parent.parent.parent / "src" / "theow")
+graph.build()
+
+net = Network(height="100vh", width="100%", directed=True, bgcolor="#282a36", font_color="#f8f8f2")
+net.barnes_hut(gravity=-3000, spring_length=150)
+
+for node in graph._nodes.values():
+ label = node.name
+ title = f"{node.id}\n{node.kind}\n{node.file}:{node.line}"
+ if node.signature:
+ title += f"\n{node.signature}"
+ if node.docstring:
+ title += f"\n\n{node.docstring[:200]}"
+
+ size = {"module": 25, "class": 18, "function": 12}.get(node.kind, 10)
+ net.add_node(
+ node.id,
+ label=label,
+ title=title,
+ color=COLORS.get(node.kind, "#999"),
+ size=size,
+ group=node.kind,
+ )
+
+for source, targets in graph._fwd.items():
+ for target, data in targets.items():
+ if target not in graph._nodes:
+ continue
+ kind = data.get("kind", "")
+ net.add_edge(
+ source,
+ target,
+ title=kind,
+ color=EDGE_COLORS.get(kind, "#ccc"),
+ arrows="to",
+ width=2 if kind in ("calls", "inherits") else 1,
+ )
+
+out = Path(__file__).resolve().parent / "theow_graph.html"
+net.save_graph(str(out))
+print(f"Graph saved to {out}")
+print(f"Nodes: {len(graph._nodes)}, Edges: {graph._edge_count()}")
diff --git a/src/theow/_core/_logging.py b/src/theow/_core/_logging.py
index a4b1e5c..e247cef 100644
--- a/src/theow/_core/_logging.py
+++ b/src/theow/_core/_logging.py
@@ -20,6 +20,7 @@
"theow._core._chroma_store": "chroma",
"theow._core._recover": "recovery",
"theow._cli._run": "cli",
+ "theow._codegraph._graph": "codegraph",
}
diff --git a/src/theow/codegraph.py b/src/theow/codegraph.py
new file mode 100644
index 0000000..959208a
--- /dev/null
+++ b/src/theow/codegraph.py
@@ -0,0 +1,13 @@
+"""Code graph for consumer opt-in registration.
+
+Usage:
+ from theow.codegraph import CodeGraph
+
+ graph = CodeGraph(root="./src")
+ engine = Theow(theow_dir=".theow", llm="anthropic/claude-sonnet-4-20250514")
+ engine.tool()(graph.search_code)
+"""
+
+from theow._codegraph import CodeGraph
+
+__all__ = ["CodeGraph"]
diff --git a/tests/codegraph/__init__.py b/tests/codegraph/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/codegraph/conftest.py b/tests/codegraph/conftest.py
new file mode 100644
index 0000000..e734360
--- /dev/null
+++ b/tests/codegraph/conftest.py
@@ -0,0 +1,22 @@
+"""Shared fixtures for codegraph tests."""
+
+import tempfile
+from pathlib import Path
+
+import pytest
+
+# Theow's own source tree as test corpus
+THEOW_SRC = Path(__file__).resolve().parent.parent.parent / "src" / "theow"
+
+
+@pytest.fixture
+def theow_src():
+ """Path to theow's own source tree."""
+ return THEOW_SRC
+
+
+@pytest.fixture
+def empty_project():
+ """Empty directory for edge-case tests."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ yield Path(tmpdir)
diff --git a/tests/codegraph/test_graph.py b/tests/codegraph/test_graph.py
new file mode 100644
index 0000000..6d98abd
--- /dev/null
+++ b/tests/codegraph/test_graph.py
@@ -0,0 +1,241 @@
+"""Tests for CodeGraph using theow's own source."""
+
+import json
+import tempfile
+from pathlib import Path
+
+from theow._codegraph._graph import CodeGraph
+
+
+def test_build_on_theow_src(theow_src):
+ """Building a graph on theow's source should produce real nodes and edges."""
+ graph = CodeGraph(root=theow_src)
+ graph.build()
+
+ assert len(graph._nodes) > 50
+ assert graph._edge_count() > 50
+ assert graph._built is True
+
+
+def test_build_idempotent(theow_src):
+ """Calling build() twice should produce the same graph."""
+ graph = CodeGraph(root=theow_src)
+ graph.build()
+ count_1 = len(graph._nodes)
+
+ graph.build()
+ count_2 = len(graph._nodes)
+
+ assert count_1 == count_2
+
+
+def test_build_empty_project(empty_project):
+ """Empty directory should build an empty graph without errors."""
+ graph = CodeGraph(root=empty_project)
+ graph.build()
+ assert len(graph._nodes) == 0
+
+
+def test_auto_build_on_first_search(theow_src):
+ """search_code should trigger build() if not already built."""
+ graph = CodeGraph(root=theow_src)
+ assert graph._built is False
+
+ results = graph.search_code(query="Rule", kind="class")
+ assert graph._built is True
+ assert len(results) > 0
+
+
+def test_search_symbol_exact(theow_src):
+ """Exact name match for a known class."""
+ graph = CodeGraph(root=theow_src)
+ results = graph.search_code(query="Rule", kind="class")
+
+ assert any(r["name"] == "Rule" for r in results)
+ assert results[0]["relevance"] == "exact"
+
+
+def test_search_symbol_substring(theow_src):
+ """Substring match should find partial name matches."""
+ graph = CodeGraph(root=theow_src)
+ results = graph.search_code(query="Gateway")
+
+ names = {r["name"] for r in results}
+ assert "LLMGateway" in names
+
+
+def test_search_symbol_by_kind(theow_src):
+ """Filtering by kind should only return that kind."""
+ graph = CodeGraph(root=theow_src)
+ results = graph.search_code(query="", kind="class")
+
+ for r in results:
+ assert r["kind"] == "class"
+
+
+def test_search_file_scope(theow_src):
+ """File scope should list all symbols in a specific file."""
+ graph = CodeGraph(root=theow_src)
+ results = graph.search_code(scope="file", file="_core/_models.py")
+
+ assert len(results) > 5
+ assert all(r["file"] == "_core/_models.py" for r in results)
+ # Should be sorted by line number
+ lines = [r["line"] for r in results]
+ assert lines == sorted(lines)
+
+
+def test_search_callers(theow_src):
+ """Callers scope should find functions that call the target."""
+ graph = CodeGraph(root=theow_src)
+ results = graph.search_code(query="matches", scope="callers")
+
+ # Something should call matches()
+ assert len(results) > 0
+ assert all(r.get("relevance") == "caller" for r in results)
+
+
+def test_search_callees(theow_src):
+ """Callees scope should find functions that the target calls."""
+ graph = CodeGraph(root=theow_src)
+ # Fact.matches calls re.search — look for callees of matches
+ results = graph.search_code(query="matches", scope="callees")
+
+ # matches() should call something
+ assert len(results) >= 0 # may or may not resolve depending on edge resolution
+
+
+def test_search_references(theow_src):
+ """References scope should show both incoming and outgoing edges."""
+ graph = CodeGraph(root=theow_src)
+ results = graph.search_code(query="Rule", scope="references")
+
+ # Rule has both incoming (contains) and outgoing edges
+ assert len(results) > 0
+
+
+def test_search_definition_by_file_line(theow_src):
+ """Definition scope with file+line should find the symbol at that location."""
+ graph = CodeGraph(root=theow_src)
+ graph.build()
+
+ # Find Rule's line number first
+ rule_node = graph._nodes.get("_core/_models.py::Rule")
+ assert rule_node is not None
+
+ results = graph.search_code(
+ scope="definition", file="_core/_models.py", line=rule_node.line
+ )
+ assert len(results) > 0
+ assert results[0]["name"] == "Rule"
+
+
+def test_search_path(theow_src):
+ """Path scope should find a relationship chain between two symbols."""
+ graph = CodeGraph(root=theow_src)
+ graph.build()
+
+ # _core/_models.py module contains Rule class
+ results = graph.search_code(
+ query="_core/_models.py", scope="path", target="Rule"
+ )
+ # Should find a path (module --contains--> Rule)
+ assert len(results) >= 2
+
+
+def test_search_no_results(theow_src):
+ """Searching for a nonexistent symbol returns empty list."""
+ graph = CodeGraph(root=theow_src)
+ results = graph.search_code(query="XyzNonExistent12345")
+ assert results == []
+
+
+def test_excludes(theow_src):
+ """Excluded directories should not appear in the graph."""
+ graph = CodeGraph(root=theow_src)
+ graph.build()
+
+ for node in graph._nodes.values():
+ assert "__pycache__" not in node.file
+
+
+def test_max_file_size(theow_src):
+ """Files above max_file_size should be skipped."""
+ graph = CodeGraph(root=theow_src, max_file_size=100)
+ graph.build()
+
+ # With 100 byte limit, most files should be skipped
+ assert len(graph._nodes) < 10
+
+
+def test_file_index(theow_src):
+ """File index should map files to their node IDs."""
+ graph = CodeGraph(root=theow_src)
+ graph.build()
+
+ assert "_core/_models.py" in graph._file_index
+ node_ids = graph._file_index["_core/_models.py"]
+ assert any("Rule" in nid for nid in node_ids)
+
+
+def test_name_index(theow_src):
+ """Name index should map short names to node IDs."""
+ graph = CodeGraph(root=theow_src)
+ graph.build()
+
+ assert "Rule" in graph._name_index
+ assert len(graph._name_index["Rule"]) >= 1
+
+
+def test_json_roundtrip(theow_src):
+ """Graph should survive JSON serialization and deserialization."""
+ graph = CodeGraph(root=theow_src)
+ graph.build()
+
+ with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as f:
+ json_path = f.name
+
+ graph.to_json(json_path)
+ loaded = CodeGraph.from_json(json_path)
+
+ assert len(loaded._nodes) == len(graph._nodes)
+ assert loaded._edge_count() == graph._edge_count()
+ assert loaded._built is True
+
+ Path(json_path).unlink()
+
+
+def test_json_to_string(theow_src):
+ """to_json without path should return a JSON string."""
+ graph = CodeGraph(root=theow_src)
+ graph.build()
+
+ result = graph.to_json()
+ data = json.loads(result)
+ assert "nodes" in data
+ assert "edges" in data
+ assert len(data["nodes"]) == len(graph._nodes)
+
+
+def test_custom_excludes(theow_src):
+ """Custom excludes should override defaults."""
+ graph = CodeGraph(root=theow_src, excludes={"_core"})
+ graph.build()
+
+ for node in graph._nodes.values():
+ assert "_core" not in node.file
+
+
+def test_edge_resolution(theow_src):
+ """Edge resolution should resolve some symbolic names to node IDs."""
+ graph = CodeGraph(root=theow_src)
+ graph.build()
+
+ # After resolution, some call edges should point to known nodes
+ resolved_calls = 0
+ for source, targets in graph._fwd.items():
+ for target, data in targets.items():
+ if data.get("kind") == "calls" and target in graph._nodes:
+ resolved_calls += 1
+
+ assert resolved_calls > 0
diff --git a/tests/codegraph/test_models.py b/tests/codegraph/test_models.py
new file mode 100644
index 0000000..8d00818
--- /dev/null
+++ b/tests/codegraph/test_models.py
@@ -0,0 +1,95 @@
+"""Tests for codegraph data models."""
+
+from theow._codegraph._models import Edge, Node, SearchResult
+
+
+def test_node_to_dict_minimal():
+ node = Node(id="mod.py::func", kind="function", name="func", file="mod.py", line=10)
+ d = node.to_dict()
+ assert d["id"] == "mod.py::func"
+ assert d["kind"] == "function"
+ assert d["line"] == 10
+ assert "end_line" not in d
+ assert "signature" not in d
+ assert "docstring" not in d
+ assert "parent" not in d
+
+
+def test_node_to_dict_full():
+ node = Node(
+ id="mod.py::Cls.method",
+ kind="function",
+ name="method",
+ file="mod.py",
+ line=5,
+ end_line=15,
+ signature="def method(self, x)",
+ docstring="Do stuff.",
+ parent="mod.py::Cls",
+ )
+ d = node.to_dict()
+ assert d["end_line"] == 15
+ assert d["signature"] == "def method(self, x)"
+ assert d["docstring"] == "Do stuff."
+ assert d["parent"] == "mod.py::Cls"
+
+
+def test_node_roundtrip():
+ node = Node(
+ id="a.py::X", kind="class", name="X", file="a.py", line=1,
+ end_line=20, signature="", docstring="A class.", parent="a.py",
+ )
+ assert Node.from_dict(node.to_dict()) == node
+
+
+def test_node_frozen():
+ node = Node(id="x", kind="module", name="x", file="x.py", line=1)
+ try:
+ node.name = "y" # type: ignore[misc]
+ assert False, "Should be frozen"
+ except AttributeError:
+ pass
+
+
+def test_edge_to_dict_minimal():
+ edge = Edge(source="a", target="b", kind="calls")
+ d = edge.to_dict()
+ assert d == {"source": "a", "target": "b", "kind": "calls"}
+ assert "line" not in d
+
+
+def test_edge_to_dict_with_line():
+ edge = Edge(source="a", target="b", kind="imports", line=7)
+ d = edge.to_dict()
+ assert d["line"] == 7
+
+
+def test_edge_roundtrip():
+ edge = Edge(source="a", target="b", kind="contains", line=3)
+ assert Edge.from_dict(edge.to_dict()) == edge
+
+
+def test_search_result_to_dict():
+ node = Node(id="f.py::go", kind="function", name="go", file="f.py", line=1)
+ sr = SearchResult(node=node, context="called by main", relevance="caller")
+ d = sr.to_dict()
+ assert d["id"] == "f.py::go"
+ assert d["context"] == "called by main"
+ assert d["relevance"] == "caller"
+
+
+def test_search_result_to_dict_no_extras():
+ node = Node(id="f.py", kind="module", name="f", file="f.py", line=1)
+ sr = SearchResult(node=node)
+ d = sr.to_dict()
+ assert "context" not in d
+ assert "relevance" not in d
+
+
+def test_search_result_roundtrip():
+ node = Node(id="x.py::Y", kind="class", name="Y", file="x.py", line=2)
+ sr = SearchResult(node=node, context="ctx", relevance="exact")
+ restored = SearchResult.from_dict(sr.to_dict())
+ assert restored.node == node
+ assert restored.context == "ctx"
+ assert restored.relevance == "exact"
diff --git a/tests/codegraph/test_tool.py b/tests/codegraph/test_tool.py
new file mode 100644
index 0000000..af596db
--- /dev/null
+++ b/tests/codegraph/test_tool.py
@@ -0,0 +1,87 @@
+"""Tests for codegraph tool registration using theow's own source."""
+
+import inspect
+
+from theow._codegraph._graph import CodeGraph
+from theow._gateway._base import build_tool_declaration
+
+
+def test_search_code_callable(theow_src):
+ """graph.search_code should be a callable that returns list[dict]."""
+ graph = CodeGraph(root=theow_src)
+ assert callable(graph.search_code)
+
+ results = graph.search_code(query="Rule")
+ assert isinstance(results, list)
+ assert all(isinstance(r, dict) for r in results)
+
+
+def test_search_code_has_docstring(theow_src):
+ """search_code should have a docstring for LLM tool schema."""
+ graph = CodeGraph(root=theow_src)
+ doc = inspect.getdoc(graph.search_code)
+ assert doc is not None
+ assert "symbol" in doc.lower()
+
+
+def test_build_tool_declaration_skips_self(theow_src):
+ """build_tool_declaration should skip self for bound methods."""
+ graph = CodeGraph(root=theow_src)
+ decl = build_tool_declaration("search_code", graph.search_code)
+
+ props = decl["input_schema"]["properties"]
+ assert "self" not in props
+ assert "query" in props
+ assert "scope" in props
+ assert "kind" in props
+ assert "file" in props
+ assert "line" in props
+ assert "target" in props
+
+
+def test_build_tool_declaration_types(theow_src):
+ """Tool declaration should have correct JSON schema types."""
+ graph = CodeGraph(root=theow_src)
+ decl = build_tool_declaration("search_code", graph.search_code)
+
+ props = decl["input_schema"]["properties"]
+ assert props["query"]["type"] == "string"
+ assert props["line"]["type"] == "integer"
+ assert props["scope"]["type"] == "string"
+
+
+def test_build_tool_declaration_no_required(theow_src):
+ """All search_code params have defaults, so none should be required."""
+ graph = CodeGraph(root=theow_src)
+ decl = build_tool_declaration("search_code", graph.search_code)
+
+ assert decl["input_schema"]["required"] == []
+
+
+def test_build_tool_declaration_has_description(theow_src):
+ """Tool declaration should carry the docstring as description."""
+ graph = CodeGraph(root=theow_src)
+ decl = build_tool_declaration("search_code", graph.search_code)
+
+ assert len(decl["description"]) > 50
+
+
+def test_public_import():
+ """CodeGraph should be importable from the public API."""
+ from theow.codegraph import CodeGraph as PublicCodeGraph
+
+ assert PublicCodeGraph is CodeGraph
+
+
+def test_search_code_returns_dicts_with_expected_keys(theow_src):
+ """Results should contain the standard node fields."""
+ graph = CodeGraph(root=theow_src)
+ results = graph.search_code(query="Rule", kind="class")
+
+ assert len(results) > 0
+ r = results[0]
+ assert "id" in r
+ assert "kind" in r
+ assert "name" in r
+ assert "file" in r
+ assert "line" in r
diff --git a/tests/codegraph/test_visitor.py b/tests/codegraph/test_visitor.py
new file mode 100644
index 0000000..95d5f57
--- /dev/null
+++ b/tests/codegraph/test_visitor.py
@@ -0,0 +1,322 @@
+"""Tests for language visitors using theow's own source."""
+
+import tempfile
+from pathlib import Path
+
+from theow._codegraph._visitors._go import GoVisitor
+from theow._codegraph._visitors._python import PythonVisitor
+
+GO_SOURCE = '''\
+package main
+
+import (
+\t"fmt"
+\t"strings"
+)
+
+type Animal interface {
+\tSpeak() string
+}
+
+type Base struct {
+\tName string
+}
+
+type Dog struct {
+\tBase
+\tBreed string
+}
+
+func NewDog(name string, breed string) *Dog {
+\treturn &Dog{Base: Base{Name: name}, Breed: breed}
+}
+
+func (d *Dog) Speak() string {
+\treturn fmt.Sprintf("Woof! I am %s", d.Name)
+}
+
+func (d *Dog) Fetch(item string) string {
+\treturn strings.ToUpper(item)
+}
+
+func main() {
+\tdog := NewDog("Rex", "Labrador")
+\tfmt.Println(dog.Speak())
+\tfmt.Println(dog.Fetch("ball"))
+}
+'''
+
+
+def test_python_visitor_parses_models(theow_src):
+ """Parse theow's _core/_models.py — has classes, methods, imports."""
+ visitor = PythonVisitor()
+ path = theow_src / "_core" / "_models.py"
+ source = path.read_text()
+ relative = "_core/_models.py"
+
+ nodes, edges = visitor.parse_file(path, source, relative)
+
+ names = {n.name for n in nodes}
+ assert "Rule" in names
+ assert "Fact" in names
+ assert "Action" in names
+ assert "LLMConfig" in names
+
+ # Module node exists
+ kinds = {n.kind for n in nodes}
+ assert "module" in kinds
+ assert "class" in kinds
+ assert "function" in kinds
+
+
+def test_python_visitor_extracts_methods(theow_src):
+ """Methods inside classes should have the class as parent."""
+ visitor = PythonVisitor()
+ path = theow_src / "_core" / "_models.py"
+ source = path.read_text()
+ relative = "_core/_models.py"
+
+ nodes, _ = visitor.parse_file(path, source, relative)
+
+ # to_dict should be a method of Fact
+ fact_methods = [n for n in nodes if n.parent == f"{relative}::Fact"]
+ method_names = {n.name for n in fact_methods}
+ assert "to_dict" in method_names
+ assert "from_dict" in method_names
+ assert "matches" in method_names
+
+
+def test_python_visitor_extracts_imports(theow_src):
+ """Module-level imports should produce import edges."""
+ visitor = PythonVisitor()
+ path = theow_src / "_core" / "_models.py"
+ source = path.read_text()
+ relative = "_core/_models.py"
+
+ _, edges = visitor.parse_file(path, source, relative)
+
+ import_edges = [e for e in edges if e.kind == "imports"]
+ import_targets = {e.target for e in import_edges}
+ assert "yaml" in import_targets
+ assert "re" in import_targets
+
+
+def test_python_visitor_extracts_inheritance(theow_src):
+ """Classes with bases should produce inherits edges."""
+ visitor = PythonVisitor()
+ # _gateway/_base.py has LLMGateway(ABC)
+ path = theow_src / "_gateway" / "_base.py"
+ source = path.read_text()
+ relative = "_gateway/_base.py"
+
+ _, edges = visitor.parse_file(path, source, relative)
+
+ inherits = [e for e in edges if e.kind == "inherits"]
+ assert any(e.target == "ABC" for e in inherits)
+
+
+def test_python_visitor_extracts_docstrings(theow_src):
+ """Functions and classes should have docstrings extracted."""
+ visitor = PythonVisitor()
+ path = theow_src / "_core" / "_models.py"
+ source = path.read_text()
+ relative = "_core/_models.py"
+
+ nodes, _ = visitor.parse_file(path, source, relative)
+
+ rule_node = next(n for n in nodes if n.name == "Rule")
+ assert "production rule" in rule_node.docstring.lower()
+
+
+def test_python_visitor_extracts_signatures(theow_src):
+ """Functions should have their signature extracted."""
+ visitor = PythonVisitor()
+ path = theow_src / "_core" / "_models.py"
+ source = path.read_text()
+ relative = "_core/_models.py"
+
+ nodes, _ = visitor.parse_file(path, source, relative)
+
+ matches_node = next(n for n in nodes if n.name == "matches" and "Fact" in n.parent)
+ assert "def matches" in matches_node.signature
+ assert "self" in matches_node.signature
+
+
+def test_python_visitor_extracts_calls(theow_src):
+ """Call sites should produce call edges."""
+ visitor = PythonVisitor()
+ path = theow_src / "_core" / "_models.py"
+ source = path.read_text()
+ relative = "_core/_models.py"
+
+ _, edges = visitor.parse_file(path, source, relative)
+
+ call_edges = [e for e in edges if e.kind == "calls"]
+ call_targets = {e.target for e in call_edges}
+ # Fact.matches uses re.search
+ assert any("re.search" in t for t in call_targets)
+
+
+def test_python_visitor_line_numbers(theow_src):
+ """Node line numbers should be positive and end_line >= line."""
+ visitor = PythonVisitor()
+ path = theow_src / "_core" / "_models.py"
+ source = path.read_text()
+ relative = "_core/_models.py"
+
+ nodes, _ = visitor.parse_file(path, source, relative)
+
+ for node in nodes:
+ assert node.line >= 1
+ if node.end_line:
+ assert node.end_line >= node.line
+
+
+def test_python_visitor_decorated_functions(theow_src):
+ """Decorated definitions should still be extracted."""
+ visitor = PythonVisitor()
+ path = theow_src / "_core" / "_models.py"
+ source = path.read_text()
+ relative = "_core/_models.py"
+
+ nodes, _ = visitor.parse_file(path, source, relative)
+
+ # @property is_ephemeral on Rule
+ node_names = {n.name for n in nodes}
+ assert "is_ephemeral" in node_names
+
+
+def test_python_visitor_extensions():
+ visitor = PythonVisitor()
+ assert visitor.extensions == [".py"]
+
+
+def test_load_visitors_python():
+ from theow._codegraph._visitors import load_visitors
+
+ visitors = load_visitors(["python"])
+ assert len(visitors) == 1
+ assert visitors[0].extensions == [".py"]
+
+
+def test_load_visitors_unknown():
+ import pytest
+ from theow._codegraph._visitors import load_visitors
+
+ with pytest.raises(ValueError, match="Unknown language"):
+ load_visitors(["rust"])
+
+
+# --- Go visitor tests ---
+
+
+def _parse_go(source=GO_SOURCE, filename="main.go"):
+ visitor = GoVisitor()
+ with tempfile.NamedTemporaryFile(suffix=".go", mode="w", delete=False) as f:
+ f.write(source)
+ path = Path(f.name)
+ nodes, edges = visitor.parse_file(path, source, filename)
+ path.unlink()
+ return nodes, edges
+
+
+def test_go_visitor_extensions():
+ assert GoVisitor().extensions == [".go"]
+
+
+def test_go_visitor_extracts_functions():
+ nodes, _ = _parse_go()
+ names = {n.name for n in nodes if n.kind == "function"}
+ assert "NewDog" in names
+ assert "main" in names
+
+
+def test_go_visitor_extracts_methods():
+ """Methods should have the receiver type as parent."""
+ nodes, _ = _parse_go()
+ speak = next(n for n in nodes if n.name == "Speak")
+ assert "Dog" in speak.parent
+ assert "func (*Dog) Speak" in speak.signature or "func (Dog) Speak" in speak.signature
+
+
+def test_go_visitor_extracts_structs():
+ nodes, _ = _parse_go()
+ struct_names = {n.name for n in nodes if n.kind == "class"}
+ assert "Dog" in struct_names
+ assert "Base" in struct_names
+
+
+def test_go_visitor_extracts_interfaces():
+ nodes, _ = _parse_go()
+ assert any(n.name == "Animal" and n.kind == "class" for n in nodes)
+
+
+def test_go_visitor_extracts_imports():
+ _, edges = _parse_go()
+ import_edges = [e for e in edges if e.kind == "imports"]
+ targets = {e.target for e in import_edges}
+ assert "fmt" in targets
+ assert "strings" in targets
+
+
+def test_go_visitor_extracts_calls():
+ _, edges = _parse_go()
+ call_edges = [e for e in edges if e.kind == "calls"]
+ targets = {e.target for e in call_edges}
+ assert "NewDog" in targets
+ # Method calls like dog.Speak()
+ assert any("Speak" in t for t in targets)
+
+
+def test_go_visitor_extracts_embedding():
+ """Struct embedding (Base in Dog) should produce inherits edge."""
+ _, edges = _parse_go()
+ inherits = [e for e in edges if e.kind == "inherits"]
+ assert any(e.target == "Base" and "Dog" in e.source for e in inherits)
+
+
+def test_go_visitor_function_signatures():
+ nodes, _ = _parse_go()
+ new_dog = next(n for n in nodes if n.name == "NewDog")
+ assert "func NewDog" in new_dog.signature
+ assert "string" in new_dog.signature
+
+
+def test_go_visitor_method_signatures():
+ nodes, _ = _parse_go()
+ fetch = next(n for n in nodes if n.name == "Fetch")
+ assert "func" in fetch.signature
+ assert "Dog" in fetch.signature
+ assert "item" in fetch.signature
+
+
+def test_go_visitor_line_numbers():
+ nodes, _ = _parse_go()
+ for node in nodes:
+ assert node.line >= 1
+ if node.end_line:
+ assert node.end_line >= node.line
+
+
+def test_go_visitor_contains_edges():
+ """Module should contain functions and types."""
+ _, edges = _parse_go()
+ contains = [e for e in edges if e.kind == "contains"]
+ assert len(contains) > 0
+ # Module contains functions
+ assert any(e.source == "main.go" for e in contains)
+
+
+def test_load_visitors_go():
+ from theow._codegraph._visitors import load_visitors
+
+ visitors = load_visitors(["go"])
+ assert len(visitors) == 1
+ assert visitors[0].extensions == [".go"]
+
+
+def test_load_visitors_both():
+ from theow._codegraph._visitors import load_visitors
+
+ visitors = load_visitors(["python", "go"])
+ assert len(visitors) == 2
diff --git a/tests/core/test_logging.py b/tests/core/test_logging.py
index d00d2c6..eaab811 100644
--- a/tests/core/test_logging.py
+++ b/tests/core/test_logging.py
@@ -78,6 +78,7 @@ def test_component_map_coverage():
"engine",
"chroma",
"cli",
+ "codegraph",
}
actual_components = set(COMPONENT_MAP.values())
assert actual_components == expected_components
diff --git a/tox.ini b/tox.ini
index 366ffc4..bd8d0ed 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,7 +1,7 @@
[tox]
no_package = True
skip_missing_interpreters = True
-env_list = fmt,lint,static,unit
+env_list = fmt,lint,static,unit,codegraph
min_version = 4.0.0
[vars]
@@ -29,9 +29,14 @@ commands =
[testenv:static]
description = Run static type checking with ty
commands =
- uv run {[vars]uv_flags} ty check {[vars]src}
+ uv run {[vars]uv_flags} --all-extras ty check {[vars]src}
[testenv:unit]
-description = Run unit tests with coverage
+description = Run core unit tests with coverage
commands =
- uv run {[vars]uv_flags} pytest {[vars]tests} -v --cov={[vars]src}/theow --cov-report=term-missing
+ uv run {[vars]uv_flags} pytest {[vars]tests}/core {[vars]tests}/gateway {[vars]tests}/cli -v --cov={[vars]src}/theow --cov-report=term-missing --cov-config={tox_root}/.coveragerc-unit
+
+[testenv:codegraph]
+description = Run codegraph tests with coverage
+commands =
+ uv run {[vars]uv_flags} --extra codegraph pytest {[vars]tests}/codegraph -v --cov={[vars]src}/theow/_codegraph --cov-report=term-missing
diff --git a/uv.lock b/uv.lock
index 9f3ecb5..f1a9d3c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1876,6 +1876,13 @@ dependencies = [
{ name = "typer" },
]
+[package.optional-dependencies]
+codegraph = [
+ { name = "tree-sitter" },
+ { name = "tree-sitter-go" },
+ { name = "tree-sitter-python" },
+]
+
[package.dev-dependencies]
dev = [
{ name = "pytest" },
@@ -1896,9 +1903,12 @@ requires-dist = [
{ name = "pyyaml", specifier = ">=6.0" },
{ name = "rich", specifier = ">=13.0" },
{ name = "structlog", specifier = ">=24.0" },
+ { name = "tree-sitter", marker = "extra == 'codegraph'", specifier = ">=0.23" },
+ { name = "tree-sitter-go", marker = "extra == 'codegraph'", specifier = ">=0.23" },
+ { name = "tree-sitter-python", marker = "extra == 'codegraph'", specifier = ">=0.23" },
{ name = "typer", specifier = ">=0.12" },
]
-provides-extras = ["daemon"]
+provides-extras = ["codegraph", "daemon"]
[package.metadata.requires-dev]
dev = [
@@ -1982,6 +1992,60 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" },
]
+[[package]]
+name = "tree-sitter"
+version = "0.25.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/66/7c/0350cfc47faadc0d3cf7d8237a4e34032b3014ddf4a12ded9933e1648b55/tree-sitter-0.25.2.tar.gz", hash = "sha256:fe43c158555da46723b28b52e058ad444195afd1db3ca7720c59a254544e9c20", size = 177961, upload-time = "2025-09-25T17:37:59.751Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/3c/9e/20c2a00a862f1c2897a436b17edb774e831b22218083b459d0d081c9db33/tree_sitter-0.25.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ddabfff809ffc983fc9963455ba1cecc90295803e06e140a4c83e94c1fa3d960", size = 146941, upload-time = "2025-09-25T17:37:34.813Z" },
+ { url = "https://files.pythonhosted.org/packages/ef/04/8512e2062e652a1016e840ce36ba1cc33258b0dcc4e500d8089b4054afec/tree_sitter-0.25.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c0c0ab5f94938a23fe81928a21cc0fac44143133ccc4eb7eeb1b92f84748331c", size = 137699, upload-time = "2025-09-25T17:37:36.349Z" },
+ { url = "https://files.pythonhosted.org/packages/47/8a/d48c0414db19307b0fb3bb10d76a3a0cbe275bb293f145ee7fba2abd668e/tree_sitter-0.25.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd12d80d91d4114ca097626eb82714618dcdfacd6a5e0955216c6485c350ef99", size = 607125, upload-time = "2025-09-25T17:37:37.725Z" },
+ { url = "https://files.pythonhosted.org/packages/39/d1/b95f545e9fc5001b8a78636ef942a4e4e536580caa6a99e73dd0a02e87aa/tree_sitter-0.25.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b43a9e4c89d4d0839de27cd4d6902d33396de700e9ff4c5ab7631f277a85ead9", size = 635418, upload-time = "2025-09-25T17:37:38.922Z" },
+ { url = "https://files.pythonhosted.org/packages/de/4d/b734bde3fb6f3513a010fa91f1f2875442cdc0382d6a949005cd84563d8f/tree_sitter-0.25.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fbb1706407c0e451c4f8cc016fec27d72d4b211fdd3173320b1ada7a6c74c3ac", size = 631250, upload-time = "2025-09-25T17:37:40.039Z" },
+ { url = "https://files.pythonhosted.org/packages/46/f2/5f654994f36d10c64d50a192239599fcae46677491c8dd53e7579c35a3e3/tree_sitter-0.25.2-cp312-cp312-win_amd64.whl", hash = "sha256:6d0302550bbe4620a5dc7649517c4409d74ef18558276ce758419cf09e578897", size = 127156, upload-time = "2025-09-25T17:37:41.132Z" },
+ { url = "https://files.pythonhosted.org/packages/67/23/148c468d410efcf0a9535272d81c258d840c27b34781d625f1f627e2e27d/tree_sitter-0.25.2-cp312-cp312-win_arm64.whl", hash = "sha256:0c8b6682cac77e37cfe5cf7ec388844957f48b7bd8d6321d0ca2d852994e10d5", size = 113984, upload-time = "2025-09-25T17:37:42.074Z" },
+ { url = "https://files.pythonhosted.org/packages/8c/67/67492014ce32729b63d7ef318a19f9cfedd855d677de5773476caf771e96/tree_sitter-0.25.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0628671f0de69bb279558ef6b640bcfc97864fe0026d840f872728a86cd6b6cd", size = 146926, upload-time = "2025-09-25T17:37:43.041Z" },
+ { url = "https://files.pythonhosted.org/packages/4e/9c/a278b15e6b263e86c5e301c82a60923fa7c59d44f78d7a110a89a413e640/tree_sitter-0.25.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f5ddcd3e291a749b62521f71fc953f66f5fd9743973fd6dd962b092773569601", size = 137712, upload-time = "2025-09-25T17:37:44.039Z" },
+ { url = "https://files.pythonhosted.org/packages/54/9a/423bba15d2bf6473ba67846ba5244b988cd97a4b1ea2b146822162256794/tree_sitter-0.25.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd88fbb0f6c3a0f28f0a68d72df88e9755cf5215bae146f5a1bdc8362b772053", size = 607873, upload-time = "2025-09-25T17:37:45.477Z" },
+ { url = "https://files.pythonhosted.org/packages/ed/4c/b430d2cb43f8badfb3a3fa9d6cd7c8247698187b5674008c9d67b2a90c8e/tree_sitter-0.25.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b878e296e63661c8e124177cc3084b041ba3f5936b43076d57c487822426f614", size = 636313, upload-time = "2025-09-25T17:37:46.68Z" },
+ { url = "https://files.pythonhosted.org/packages/9d/27/5f97098dbba807331d666a0997662e82d066e84b17d92efab575d283822f/tree_sitter-0.25.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d77605e0d353ba3fe5627e5490f0fbfe44141bafa4478d88ef7954a61a848dae", size = 631370, upload-time = "2025-09-25T17:37:47.993Z" },
+ { url = "https://files.pythonhosted.org/packages/d4/3c/87caaed663fabc35e18dc704cd0e9800a0ee2f22bd18b9cbe7c10799895d/tree_sitter-0.25.2-cp313-cp313-win_amd64.whl", hash = "sha256:463c032bd02052d934daa5f45d183e0521ceb783c2548501cf034b0beba92c9b", size = 127157, upload-time = "2025-09-25T17:37:48.967Z" },
+ { url = "https://files.pythonhosted.org/packages/d5/23/f8467b408b7988aff4ea40946a4bd1a2c1a73d17156a9d039bbaff1e2ceb/tree_sitter-0.25.2-cp313-cp313-win_arm64.whl", hash = "sha256:b3f63a1796886249bd22c559a5944d64d05d43f2be72961624278eff0dcc5cb8", size = 113975, upload-time = "2025-09-25T17:37:49.922Z" },
+]
+
+[[package]]
+name = "tree-sitter-go"
+version = "0.25.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/05/727308adbbc79bcb1c92fc0ea10556a735f9d0f0a5435a18f59d40f7fd77/tree_sitter_go-0.25.0.tar.gz", hash = "sha256:a7466e9b8d94dda94cae8d91629f26edb2d26166fd454d4831c3bf6dfa2e8d68", size = 93890, upload-time = "2025-08-29T06:20:25.044Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ca/aa/0984707acc2b9bb461fe4a41e7e0fc5b2b1e245c32820f0c83b3c602957c/tree_sitter_go-0.25.0-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b852993063a3429a443e7bd0aa376dd7dd329d595819fabf56ac4cf9d7257b54", size = 47117, upload-time = "2025-08-29T06:20:14.286Z" },
+ { url = "https://files.pythonhosted.org/packages/32/16/dd4cb124b35e99239ab3624225da07d4cb8da4d8564ed81d03fcb3a6ba9f/tree_sitter_go-0.25.0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:503b81a2b4c31e302869a1de3a352ad0912ccab3df9ac9950197b0a9ceeabd8f", size = 48674, upload-time = "2025-08-29T06:20:17.557Z" },
+ { url = "https://files.pythonhosted.org/packages/86/fb/b30d63a08044115d8b8bd196c6c2ab4325fb8db5757249a4ef0563966e2e/tree_sitter_go-0.25.0-cp310-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:04b3b3cb4aff18e74e28d49b716c6f24cb71ddfdd66768987e26e4d0fa812f74", size = 66418, upload-time = "2025-08-29T06:20:18.345Z" },
+ { url = "https://files.pythonhosted.org/packages/26/21/d3d88a30ad007419b2c97b3baeeef7431407faf9f686195b6f1cad0aedf9/tree_sitter_go-0.25.0-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:148255aca2f54b90d48c48a9dbb4c7faad6cad310a980b2c5a5a9822057ed145", size = 72006, upload-time = "2025-08-29T06:20:19.14Z" },
+ { url = "https://files.pythonhosted.org/packages/cd/d0/0dd6442353ced8a88bbda9e546f4ea29e381b59b5a40b122e5abb586bb6c/tree_sitter_go-0.25.0-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4d338116cdf8a6c6ff990d2441929b41323ef17c710407abe0993c13417d6aad", size = 70603, upload-time = "2025-08-29T06:20:21.544Z" },
+ { url = "https://files.pythonhosted.org/packages/01/e2/ee5e09f63504fc286539535d374d2eaa0e7d489b80f8f744bb3962aff22a/tree_sitter_go-0.25.0-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5608e089d2a29fa8d2b327abeb2ad1cdb8e223c440a6b0ceab0d3fa80bdeebae", size = 66088, upload-time = "2025-08-29T06:20:22.336Z" },
+ { url = "https://files.pythonhosted.org/packages/6e/b6/d9142583374720e79aca9ccb394b3795149a54c012e1dfd80738df2d984e/tree_sitter_go-0.25.0-cp310-abi3-win_amd64.whl", hash = "sha256:30d4ada57a223dfc2c32d942f44d284d40f3d1215ddcf108f96807fd36d53022", size = 48152, upload-time = "2025-08-29T06:20:23.089Z" },
+ { url = "https://files.pythonhosted.org/packages/9e/00/9a2638e7339236f5b01622952a4d71c1474dd3783d1982a89555fc1f03b1/tree_sitter_go-0.25.0-cp310-abi3-win_arm64.whl", hash = "sha256:d5d62362059bf79997340773d47cc7e7e002883b527a05cca829c46e40b70ded", size = 46752, upload-time = "2025-08-29T06:20:24.235Z" },
+]
+
+[[package]]
+name = "tree-sitter-python"
+version = "0.25.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b8/8b/c992ff0e768cb6768d5c96234579bf8842b3a633db641455d86dd30d5dac/tree_sitter_python-0.25.0.tar.gz", hash = "sha256:b13e090f725f5b9c86aa455a268553c65cadf325471ad5b65cd29cac8a1a68ac", size = 159845, upload-time = "2025-09-11T06:47:58.159Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/cf/64/a4e503c78a4eb3ac46d8e72a29c1b1237fa85238d8e972b063e0751f5a94/tree_sitter_python-0.25.0-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:14a79a47ddef72f987d5a2c122d148a812169d7484ff5c75a3db9609d419f361", size = 73790, upload-time = "2025-09-11T06:47:47.652Z" },
+ { url = "https://files.pythonhosted.org/packages/e6/1d/60d8c2a0cc63d6ec4ba4e99ce61b802d2e39ef9db799bdf2a8f932a6cd4b/tree_sitter_python-0.25.0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:480c21dbd995b7fe44813e741d71fed10ba695e7caab627fb034e3828469d762", size = 76691, upload-time = "2025-09-11T06:47:49.038Z" },
+ { url = "https://files.pythonhosted.org/packages/aa/cb/d9b0b67d037922d60cbe0359e0c86457c2da721bc714381a63e2c8e35eba/tree_sitter_python-0.25.0-cp310-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:86f118e5eecad616ecdb81d171a36dde9bef5a0b21ed71ea9c3e390813c3baf5", size = 108133, upload-time = "2025-09-11T06:47:50.499Z" },
+ { url = "https://files.pythonhosted.org/packages/40/bd/bf4787f57e6b2860f3f1c8c62f045b39fb32d6bac4b53d7a9e66de968440/tree_sitter_python-0.25.0-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be71650ca2b93b6e9649e5d65c6811aad87a7614c8c1003246b303f6b150f61b", size = 110603, upload-time = "2025-09-11T06:47:51.985Z" },
+ { url = "https://files.pythonhosted.org/packages/5d/25/feff09f5c2f32484fbce15db8b49455c7572346ce61a699a41972dea7318/tree_sitter_python-0.25.0-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e6d5b5799628cc0f24691ab2a172a8e676f668fe90dc60468bee14084a35c16d", size = 108998, upload-time = "2025-09-11T06:47:53.046Z" },
+ { url = "https://files.pythonhosted.org/packages/75/69/4946da3d6c0df316ccb938316ce007fb565d08f89d02d854f2d308f0309f/tree_sitter_python-0.25.0-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:71959832fc5d9642e52c11f2f7d79ae520b461e63334927e93ca46cd61cd9683", size = 107268, upload-time = "2025-09-11T06:47:54.388Z" },
+ { url = "https://files.pythonhosted.org/packages/ed/a2/996fc2dfa1076dc460d3e2f3c75974ea4b8f02f6bc925383aaae519920e8/tree_sitter_python-0.25.0-cp310-abi3-win_amd64.whl", hash = "sha256:9bcde33f18792de54ee579b00e1b4fe186b7926825444766f849bf7181793a76", size = 76073, upload-time = "2025-09-11T06:47:55.773Z" },
+ { url = "https://files.pythonhosted.org/packages/07/19/4b5569d9b1ebebb5907d11554a96ef3fa09364a30fcfabeff587495b512f/tree_sitter_python-0.25.0-cp310-abi3-win_arm64.whl", hash = "sha256:0fbf6a3774ad7e89ee891851204c2e2c47e12b63a5edbe2e9156997731c128bb", size = 74169, upload-time = "2025-09-11T06:47:56.747Z" },
+]
+
[[package]]
name = "ty"
version = "0.0.18"