diff --git a/.coveragerc-unit b/.coveragerc-unit new file mode 100644 index 0000000..b5d5a89 --- /dev/null +++ b/.coveragerc-unit @@ -0,0 +1,7 @@ +[run] +omit = + src/theow/_codegraph/* + src/theow/codegraph.py + +[report] +fail_under = 85 diff --git a/README.md b/README.md index 06401df..9fea54d 100644 --- a/README.md +++ b/README.md @@ -391,6 +391,21 @@ def run_safe_command(cmd: str) -> dict: This is the key to secure automation. You define the blast radius. The LLM operates within those boundaries. +### CodeGraph + +CodeGraph gives the explorer structural awareness of your codebase. Instead of reading entire files to orient, the LLM queries a tree-sitter based graph for symbols, call chains, imports, and class hierarchies. + +```python +from theow.codegraph import CodeGraph + +graph = CodeGraph(root="./src") +pipeline_agent.tool()(graph.search_code) +``` + +The LLM gets a single `search_code` tool that supports multiple scopes: find symbols by name, trace callers/callees, list file contents, follow class hierarchies, and find paths between symbols. + +CodeGraph is an optional dependency — install with `pip install theow[codegraph]`. See the [CodeGraph README](src/theow/_codegraph/README.md) for full documentation. + ## LLM Based Actions Rules can invoke the LLM directly on match instead of running a deterministic action. Useful for failures that need dynamic investigation rather than a fixed fix. diff --git a/pyproject.toml b/pyproject.toml index ddeacc4..7e7b6ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,11 @@ theow = "theow._cli:app" daemon = [ # Future: trio, httpx, etc. for server mode ] +codegraph = [ + "tree-sitter>=0.23", + "tree-sitter-python>=0.23", + "tree-sitter-go>=0.23", +] [dependency-groups] dev = [ @@ -45,6 +50,9 @@ path = "src/theow/_version.py" [tool.hatch.build.targets.wheel] packages = ["src/theow"] +[tool.ty.src] +exclude = ["src/theow/_codegraph/examples/"] + [tool.ruff] line-length = 100 target-version = "py312" diff --git a/src/theow/_codegraph/README.md b/src/theow/_codegraph/README.md new file mode 100644 index 0000000..27cb9c3 --- /dev/null +++ b/src/theow/_codegraph/README.md @@ -0,0 +1,106 @@ +
+ +# CodeGraph + +Tree-sitter based code structure graph for Theow's LLM explorer. Instead of reading entire files to orient (~4000+ tokens), the explorer queries the graph for symbols, call chains, imports, and class hierarchies (~260 tokens). + +
+ +
+Theow CodeGraph +
+Theow's own code graph — generated with examples/visualize.py +
+ +## Install + +CodeGraph is an optional dependency: + +```bash +pip install theow[codegraph] +``` + +## Usage + +```python +from theow import Theow +from theow.codegraph import CodeGraph + +graph = CodeGraph(root="./src") + +engine = Theow(theow_dir=".theow", llm="anthropic/claude-sonnet-4-20250514") +engine.tool()(graph.search_code) +``` + +The graph builds automatically on first `search_code` call. The LLM gets a single tool that covers all navigation needs. + +## `search_code` API + +| Parameter | Description | +|-----------|-------------| +| `query` | Symbol name or substring to search for | +| `kind` | Filter by type: `"function"`, `"class"`, `"module"` | +| `scope` | What to search (see below) | +| `file` | Filter to a specific file | +| `line` | Find the symbol at this line number in file | +| `target` | Target symbol for `"path"` scope | + +### Scopes + +| Scope | Description | Example | +|-------|-------------|---------| +| `symbol` | Find symbols by name (default) | `search_code(query="Rule", kind="class")` | +| `callers` | Who calls this symbol? | `search_code(query="matches", scope="callers")` | +| `callees` | What does this symbol call? | `search_code(query="build", scope="callees")` | +| `references` | All incoming/outgoing relationships | `search_code(query="LLMGateway", scope="references")` | +| `definition` | Where is this symbol defined? | `search_code(scope="definition", file="models.py", line=42)` | +| `file` | List all symbols in a file | `search_code(scope="file", file="_core/_models.py")` | +| `path` | Find relationship path between two symbols | `search_code(query="module.py", scope="path", target="Rule")` | + +## Language Support + +Visitors extract structure from source files using tree-sitter. Currently supported: + +- **Python** — functions, classes, methods, imports, calls, decorators, docstrings +- **Go** — functions, methods with receivers, structs, interfaces, imports, calls, struct embedding + +Languages are configured explicitly: + +```python +graph = CodeGraph(root="./src", languages=["python", "go"]) +``` + +Defaults to `["python"]` if not specified. + +## Configuration + +```python +graph = CodeGraph( + root="./src", + languages=["python", "go"], # languages to parse + excludes={"vendor", "testdata"}, # directories to skip + max_file_size=1_000_000, # skip files larger than this (bytes) +) +``` + +Default excludes: `__pycache__`, `.git`, `.tox`, `.venv`, `venv`, `node_modules`, `dist`, `build`, `.mypy_cache`, `.ruff_cache`, `.pytest_cache`. + +## Serialization + +```python +# Save to JSON +graph.to_json("graph.json") + +# Load from cache +graph = CodeGraph.from_json("graph.json") + +# Get JSON string +json_str = graph.to_json() +``` + +## How It Works + +1. **Parse**: Tree-sitter visitors walk source files and extract `Node` (symbols) and `Edge` (relationships) objects +2. **Index**: Nodes are indexed by file path and short name for fast lookup +3. **Resolve**: Symbolic call targets (short names like `helper`) are resolved to fully qualified node IDs, preferring same-file matches +4. **Query**: `search_code` navigates the graph using adjacency lists and BFS — no external graph library needed diff --git a/src/theow/_codegraph/__init__.py b/src/theow/_codegraph/__init__.py new file mode 100644 index 0000000..7e631d9 --- /dev/null +++ b/src/theow/_codegraph/__init__.py @@ -0,0 +1,5 @@ +"""CodeGraph: tree-sitter based code structure graph for LLM exploration.""" + +from theow._codegraph._graph import CodeGraph + +__all__ = ["CodeGraph"] diff --git a/src/theow/_codegraph/_graph.py b/src/theow/_codegraph/_graph.py new file mode 100644 index 0000000..85b4a92 --- /dev/null +++ b/src/theow/_codegraph/_graph.py @@ -0,0 +1,457 @@ +"""CodeGraph: tree-sitter based code structure graph.""" + +import json +from collections import deque +from pathlib import Path + +from theow._codegraph._models import Edge, Node, SearchResult +from theow._codegraph._visitors import LanguageVisitor, load_visitors +from theow._core._logging import get_logger + +logger = get_logger(__name__) + +DEFAULT_EXCLUDES = frozenset( + { + "__pycache__", + ".git", + ".tox", + ".venv", + "venv", + "node_modules", + "dist", + "build", + ".mypy_cache", + ".ruff_cache", + ".pytest_cache", + } +) + + +class CodeGraph: + """A queryable graph of code symbols and relationships. + + Parses source files with tree-sitter visitors and builds a directed graph + of modules, classes, functions, and their relationships (calls, imports, + inheritance, containment). + + Args: + root: Root directory of the codebase to index. + languages: Language names to load visitors for (e.g. ["python", "go"]). + Defaults to ["python"]. + visitors: Pre-built visitor instances. Overrides ``languages`` if given. + excludes: Directory names to skip during traversal. + max_file_size: Skip files larger than this (bytes). + """ + + def __init__( + self, + root: str | Path, + languages: list[str] | None = None, + visitors: list[LanguageVisitor] | None = None, + excludes: set[str] | None = None, + max_file_size: int = 1_000_000, + ) -> None: + self._root = Path(root).resolve() + self._excludes = excludes if excludes is not None else set(DEFAULT_EXCLUDES) + self._max_file_size = max_file_size + self._built = False + + if visitors is not None: + self._visitors = list(visitors) + else: + self._visitors = load_visitors(languages or ["python"]) + + # Build extension -> visitor lookup + self._ext_map: dict[str, LanguageVisitor] = {} + for v in self._visitors: + for ext in v.extensions: + self._ext_map[ext] = v + + # Internal state — adjacency lists for directed edges + self._fwd: dict[str, dict[str, dict]] = {} # source -> {target -> edge_data} + self._rev: dict[str, dict[str, dict]] = {} # target -> {source -> edge_data} + self._nodes: dict[str, Node] = {} + self._file_index: dict[str, list[str]] = {} + self._name_index: dict[str, list[str]] = {} + + def build(self) -> None: + """Parse all files and build the graph. Idempotent.""" + if self._built: + self._fwd.clear() + self._rev.clear() + self._nodes.clear() + self._file_index.clear() + self._name_index.clear() + + files_parsed = 0 + for path in self._iter_files(): + self.build_file(path) + files_parsed += 1 + + resolved, unresolved = self._resolve_edges() + self._built = True + logger.info( + "Code graph built", + files=files_parsed, + nodes=len(self._nodes), + edges=self._edge_count(), + resolved_refs=resolved, + unresolved_refs=unresolved, + ) + + def build_file(self, path: Path) -> None: + """Parse a single file and add its nodes/edges to the graph.""" + try: + source = path.read_text(encoding="utf-8", errors="replace") + except OSError: + return + + relative_path = str(path.relative_to(self._root)) + ext = path.suffix + visitor = self._ext_map.get(ext) + if not visitor: + return + + try: + nodes, edges = visitor.parse_file(path, source, relative_path) + except Exception as e: + logger.warning("Parse failed", file=relative_path, error=str(e)) + return + + for node in nodes: + self._nodes[node.id] = node + self._file_index.setdefault(node.file, []).append(node.id) + self._name_index.setdefault(node.name, []).append(node.id) + + for edge in edges: + self._add_edge(edge.source, edge.target, kind=edge.kind, line=edge.line) + + def _iter_files(self): + """Walk root directory, yielding files that match visitor extensions.""" + for path in sorted(self._root.rglob("*")): + if not path.is_file(): + continue + if any(part in self._excludes for part in path.parts): + continue + if path.stat().st_size > self._max_file_size: + logger.debug("Skipping large file", file=str(path.relative_to(self._root))) + continue + if path.suffix in self._ext_map: + yield path + + def _resolve_edges(self) -> tuple[int, int]: + """Resolve symbolic call/import targets to fully qualified node IDs. + + For call edges where the target is a short name (e.g. "helper"), + try to resolve it to a fully qualified node ID. Same-file preference + breaks ambiguity. + + Returns: + (resolved_count, unresolved_count) tuple. + """ + edges_to_resolve = [] + for source, targets in self._fwd.items(): + for target, data in targets.items(): + if data.get("kind") in ("calls", "inherits") and target not in self._nodes: + edges_to_resolve.append((source, target, data)) + + resolved_count = 0 + for source, target, data in edges_to_resolve: + resolved = self._resolve_name(target, source) + if resolved and resolved != target: + self._remove_edge(source, target) + self._add_edge(source, resolved, **data) + resolved_count += 1 + + return resolved_count, len(edges_to_resolve) - resolved_count + + def _resolve_name(self, name: str, context_id: str) -> str | None: + """Resolve a short name to a node ID, preferring same-file matches.""" + # For attribute access like "self.method" or "obj.method", use the last part + short = name.rsplit(".", 1)[-1] if "." in name else name + + candidates = self._name_index.get(short, []) + if not candidates: + return None + if len(candidates) == 1: + return candidates[0] + + # Prefer same-file + context_file = self._nodes[context_id].file if context_id in self._nodes else "" + for cid in candidates: + if cid in self._nodes and self._nodes[cid].file == context_file: + return cid + + return candidates[0] + + def _add_edge(self, source: str, target: str, **data) -> None: + self._fwd.setdefault(source, {})[target] = data + self._rev.setdefault(target, {})[source] = data + + def _remove_edge(self, source: str, target: str) -> None: + if source in self._fwd: + self._fwd[source].pop(target, None) + if target in self._rev: + self._rev[target].pop(source, None) + + def _edge_count(self) -> int: + return sum(len(targets) for targets in self._fwd.values()) + + def _bfs_path(self, source: str, target: str) -> list[str] | None: + """BFS shortest path from source to target.""" + if source == target: + return [source] + visited = {source} + queue: deque[list[str]] = deque([[source]]) + while queue: + path = queue.popleft() + for neighbor in self._fwd.get(path[-1], {}): + if neighbor == target: + return path + [neighbor] + if neighbor not in visited: + visited.add(neighbor) + queue.append(path + [neighbor]) + return None + + def search_code( + self, + query: str = "", + kind: str = "", + scope: str = "symbol", + file: str = "", + line: int = 0, + target: str = "", + ) -> list[dict]: + """Search the codebase structure. Navigate classes, functions, imports, and call relationships. + + Args: + query: Symbol name or substring to search for. + kind: Filter by type: "function", "class", "module", or "import". + scope: What to search for: + - "symbol": Find symbols by name (default) + - "callers": Who calls this symbol? + - "callees": What does this symbol call? + - "references": All incoming/outgoing relationships + - "definition": Where is this symbol defined? + - "file": List all symbols in a file + - "path": Find relationship path between query and target + file: Filter to a specific file, or target file for "file" scope. + line: Find the symbol at this line number in file. + target: Target symbol for "path" scope. + """ + if not self._built: + self.build() + + logger.debug("search_code", scope=scope, query=query, kind=kind, file=file) + + if scope == "file": + return self._search_file(file or query) + if scope == "callers": + return self._search_callers(query, kind) + if scope == "callees": + return self._search_callees(query, kind) + if scope == "references": + return self._search_references(query) + if scope == "definition": + return self._search_definition(query, file, line) + if scope == "path": + return self._search_path(query, target) + # Default: symbol search + return self._search_symbol(query, kind, file) + + def _search_symbol(self, query: str, kind: str, file: str) -> list[dict]: + """Find symbols matching query by name.""" + results: list[SearchResult] = [] + for node_id, node in self._nodes.items(): + if kind and node.kind != kind: + continue + if file and node.file != file: + continue + if query and query.lower() not in node.name.lower() and query.lower() not in node_id.lower(): + continue + relevance = "exact" if node.name == query else "substring" + results.append(SearchResult(node=node, relevance=relevance)) + + # Sort: exact matches first, then by file + line + results.sort(key=lambda r: (r.relevance != "exact", r.node.file, r.node.line)) + return [r.to_dict() for r in results[:50]] + + def _search_file(self, file: str) -> list[dict]: + """List all symbols in a file.""" + node_ids = self._file_index.get(file, []) + results = [] + for nid in node_ids: + node = self._nodes[nid] + results.append(SearchResult(node=node).to_dict()) + results.sort(key=lambda r: r["line"]) + return results + + def _search_callers(self, query: str, kind: str) -> list[dict]: + """Find symbols that call the queried symbol.""" + target_ids = self._find_node_ids(query, kind) + results: list[dict] = [] + for tid in target_ids: + for pred, edge_data in self._rev.get(tid, {}).items(): + if edge_data.get("kind") == "calls" and pred in self._nodes: + results.append( + SearchResult( + node=self._nodes[pred], + context=f"calls {tid}", + relevance="caller", + ).to_dict() + ) + return results + + def _search_callees(self, query: str, kind: str) -> list[dict]: + """Find symbols that the queried symbol calls.""" + source_ids = self._find_node_ids(query, kind) + results: list[dict] = [] + for sid in source_ids: + for succ, edge_data in self._fwd.get(sid, {}).items(): + if edge_data.get("kind") == "calls" and succ in self._nodes: + results.append( + SearchResult( + node=self._nodes[succ], + context=f"called by {sid}", + relevance="callee", + ).to_dict() + ) + return results + + def _search_references(self, query: str) -> list[dict]: + """Find all incoming/outgoing relationships for a symbol.""" + node_ids = self._find_node_ids(query, "") + results: list[dict] = [] + for nid in node_ids: + # Incoming + for pred, edge_data in self._rev.get(nid, {}).items(): + edge_kind = edge_data.get("kind", "unknown") + if pred in self._nodes: + results.append( + SearchResult( + node=self._nodes[pred], + context=f"{edge_kind} -> {nid}", + relevance="incoming", + ).to_dict() + ) + # Outgoing + for succ, edge_data in self._fwd.get(nid, {}).items(): + edge_kind = edge_data.get("kind", "unknown") + if succ in self._nodes: + results.append( + SearchResult( + node=self._nodes[succ], + context=f"{nid} {edge_kind} ->", + relevance="outgoing", + ).to_dict() + ) + return results + + def _search_definition(self, query: str, file: str, line: int) -> list[dict]: + """Find the definition of a symbol.""" + if file and line: + # Find symbol at file:line + node_ids = self._file_index.get(file, []) + for nid in node_ids: + node = self._nodes[nid] + if node.line <= line <= (node.end_line or node.line): + if node.kind != "module": + return [SearchResult(node=node, relevance="exact").to_dict()] + + # Fall back to name search + return self._search_symbol(query, "", file) + + def _search_path(self, query: str, target: str) -> list[dict]: + """Find relationship path between two symbols.""" + source_ids = self._find_node_ids(query, "") + target_ids = self._find_node_ids(target, "") + + for sid in source_ids: + for tid in target_ids: + path = self._bfs_path(sid, tid) + if path is None: + continue + + results: list[dict] = [] + for i, nid in enumerate(path): + if nid in self._nodes: + ctx = "" + if i < len(path) - 1: + edge_data = self._fwd.get(nid, {}).get(path[i + 1], {}) + ctx = f"--{edge_data.get('kind', '?')}--> {path[i + 1]}" + results.append( + SearchResult( + node=self._nodes[nid], + context=ctx, + relevance=f"step {i}", + ).to_dict() + ) + return results + + return [] + + def _find_node_ids(self, query: str, kind: str) -> list[str]: + """Find node IDs matching a query string.""" + # Exact ID match + if query in self._nodes: + return [query] + + # Exact name match + candidates = self._name_index.get(query, []) + if candidates: + if kind: + candidates = [c for c in candidates if self._nodes[c].kind == kind] + return candidates + + # Substring search + results = [] + for nid, node in self._nodes.items(): + if kind and node.kind != kind: + continue + if query.lower() in node.name.lower(): + results.append(nid) + return results + + def to_json(self, path: str | Path | None = None) -> str: + """Serialize graph to JSON.""" + edges = [] + for source, targets in self._fwd.items(): + for target, data in targets.items(): + edges.append( + Edge( + source=source, + target=target, + kind=data.get("kind", ""), + line=data.get("line", 0), + ).to_dict() + ) + data = { + "root": str(self._root), + "nodes": [n.to_dict() for n in self._nodes.values()], + "edges": edges, + } + result = json.dumps(data, indent=2) + if path: + Path(path).write_text(result) + return result + + @classmethod + def from_json(cls, path: str | Path) -> "CodeGraph": + """Load a cached graph from JSON.""" + raw = json.loads(Path(path).read_text()) + graph = cls(root=raw["root"], visitors=[]) + for node_data in raw["nodes"]: + node = Node.from_dict(node_data) + graph._nodes[node.id] = node + graph._file_index.setdefault(node.file, []).append(node.id) + graph._name_index.setdefault(node.name, []).append(node.id) + for edge_data in raw["edges"]: + edge = Edge.from_dict(edge_data) + graph._add_edge(edge.source, edge.target, kind=edge.kind, line=edge.line) + graph._built = True + logger.info( + "Code graph loaded from cache", + nodes=len(graph._nodes), + edges=graph._edge_count(), + ) + return graph diff --git a/src/theow/_codegraph/_models.py b/src/theow/_codegraph/_models.py new file mode 100644 index 0000000..2301a54 --- /dev/null +++ b/src/theow/_codegraph/_models.py @@ -0,0 +1,113 @@ +"""Data models for the code graph.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + + +@dataclass(frozen=True) +class Node: + """A symbol in the code graph (module, class, or function).""" + + id: str + kind: str # "module" | "class" | "function" + name: str + file: str + line: int + end_line: int = 0 + signature: str = "" + docstring: str = "" + parent: str = "" + + def to_dict(self) -> dict[str, Any]: + """Convert to dictionary.""" + d: dict[str, Any] = { + "id": self.id, + "kind": self.kind, + "name": self.name, + "file": self.file, + "line": self.line, + } + if self.end_line: + d["end_line"] = self.end_line + if self.signature: + d["signature"] = self.signature + if self.docstring: + d["docstring"] = self.docstring + if self.parent: + d["parent"] = self.parent + return d + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> Node: + """Create Node from dictionary.""" + return cls( + id=data["id"], + kind=data["kind"], + name=data["name"], + file=data["file"], + line=data["line"], + end_line=data.get("end_line", 0), + signature=data.get("signature", ""), + docstring=data.get("docstring", ""), + parent=data.get("parent", ""), + ) + + +@dataclass(frozen=True) +class Edge: + """A relationship between two nodes in the code graph.""" + + source: str + target: str + kind: str # "defines" | "contains" | "calls" | "imports" | "inherits" + line: int = 0 + + def to_dict(self) -> dict[str, Any]: + """Convert to dictionary.""" + d: dict[str, Any] = { + "source": self.source, + "target": self.target, + "kind": self.kind, + } + if self.line: + d["line"] = self.line + return d + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> Edge: + """Create Edge from dictionary.""" + return cls( + source=data["source"], + target=data["target"], + kind=data["kind"], + line=data.get("line", 0), + ) + + +@dataclass(frozen=True) +class SearchResult: + """A search result wrapping a Node with context.""" + + node: Node + context: str = "" + relevance: str = "" + + def to_dict(self) -> dict[str, Any]: + """Convert to dictionary.""" + d: dict[str, Any] = self.node.to_dict() + if self.context: + d["context"] = self.context + if self.relevance: + d["relevance"] = self.relevance + return d + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> SearchResult: + """Create SearchResult from dictionary.""" + return cls( + node=Node.from_dict(data), + context=data.get("context", ""), + relevance=data.get("relevance", ""), + ) diff --git a/src/theow/_codegraph/_visitors/__init__.py b/src/theow/_codegraph/_visitors/__init__.py new file mode 100644 index 0000000..f77bdde --- /dev/null +++ b/src/theow/_codegraph/_visitors/__init__.py @@ -0,0 +1,50 @@ +"""Language visitor protocol and registry.""" + +from __future__ import annotations + +import importlib +from pathlib import Path +from typing import Protocol + +from theow._codegraph._models import Edge, Node + + +class LanguageVisitor(Protocol): + """Protocol for language-specific tree-sitter visitors.""" + + extensions: list[str] + + def parse_file( + self, path: Path, source: str, relative_path: str + ) -> tuple[list[Node], list[Edge]]: ... + + +# Registry mapping language name -> (module_path, class_name) +BUILTIN_VISITORS: dict[str, tuple[str, str]] = { + "python": ("theow._codegraph._visitors._python", "PythonVisitor"), + "go": ("theow._codegraph._visitors._go", "GoVisitor"), +} + + +def load_visitors(languages: list[str]) -> list[LanguageVisitor]: + """Load visitors for the specified languages. + + Args: + languages: Language names to load (e.g. ["python", "go"]). + + Raises: + ValueError: If a language name is not in the registry. + ImportError: If a language's tree-sitter grammar is not installed. + """ + visitors: list[LanguageVisitor] = [] + for lang in languages: + entry = BUILTIN_VISITORS.get(lang) + if entry is None: + raise ValueError( + f"Unknown language: {lang!r}. Available: {sorted(BUILTIN_VISITORS)}" + ) + module_path, class_name = entry + module = importlib.import_module(module_path) + cls = getattr(module, class_name) + visitors.append(cls()) + return visitors diff --git a/src/theow/_codegraph/_visitors/_go.py b/src/theow/_codegraph/_visitors/_go.py new file mode 100644 index 0000000..8791525 --- /dev/null +++ b/src/theow/_codegraph/_visitors/_go.py @@ -0,0 +1,302 @@ +"""Go language visitor using tree-sitter.""" + +from __future__ import annotations + +from pathlib import Path + +import tree_sitter_go as tsgo +from tree_sitter import Language, Node as TSNode, Parser + +from theow._codegraph._models import Edge, Node +from theow._codegraph._visitors._utils import child_by_type, last_child_by_type, text + +GO_LANGUAGE = Language(tsgo.language()) + + +class GoVisitor: + """Extract nodes and edges from Go source files.""" + + extensions: list[str] = [".go"] + + def __init__(self) -> None: + self._parser = Parser(GO_LANGUAGE) + + def parse_file( + self, path: Path, source: str, relative_path: str + ) -> tuple[list[Node], list[Edge]]: + """Parse a Go file and extract structural nodes and edges.""" + tree = self._parser.parse(source.encode()) + nodes: list[Node] = [] + edges: list[Edge] = [] + + module_id = relative_path + nodes.append( + Node( + id=module_id, + kind="module", + name=Path(relative_path).stem, + file=relative_path, + line=1, + end_line=source.count("\n") + 1, + ) + ) + + for child in tree.root_node.children: + if child.type == "function_declaration": + self._extract_function(child, module_id, relative_path, nodes, edges) + elif child.type == "method_declaration": + self._extract_method(child, module_id, relative_path, nodes, edges) + elif child.type == "type_declaration": + self._extract_type_declaration(child, module_id, relative_path, nodes, edges) + elif child.type == "import_declaration": + self._extract_imports(child, module_id, relative_path, edges) + + return nodes, edges + + def _extract_function( + self, + node: TSNode, + parent_id: str, + relative_path: str, + nodes: list[Node], + edges: list[Edge], + ) -> None: + name_node = child_by_type(node, "identifier") + if not name_node: + return + + name = text(name_node) + func_id = f"{parent_id}::{name}" + + params = child_by_type(node, "parameter_list") + result = child_by_type(node, "result") + sig = f"func {name}" + if params: + sig += text(params) + if result: + sig += " " + text(result) + + start_line = node.start_point[0] + 1 + end_line = node.end_point[0] + 1 + + nodes.append( + Node( + id=func_id, + kind="function", + name=name, + file=relative_path, + line=start_line, + end_line=end_line, + signature=sig, + parent=parent_id, + ) + ) + edges.append(Edge(source=parent_id, target=func_id, kind="contains", line=start_line)) + + body = child_by_type(node, "block") + if body: + self._extract_calls_in_body(body, func_id, relative_path, edges) + + def _extract_method( + self, + node: TSNode, + module_id: str, + relative_path: str, + nodes: list[Node], + edges: list[Edge], + ) -> None: + name_node = child_by_type(node, "field_identifier") + if not name_node: + return + + name = text(name_node) + + receiver_node = child_by_type(node, "parameter_list") + receiver_type = "" + if receiver_node: + for ch in receiver_node.children: + if ch.type == "parameter_declaration": + type_node = last_child_by_type( + ch, "type_identifier" + ) or last_child_by_type(ch, "pointer_type") + if type_node: + receiver_type = text(type_node).lstrip("*") + break + + receiver_id = f"{module_id}::{receiver_type}" if receiver_type else module_id + method_id = f"{receiver_id}::{name}" + + params_nodes = [c for c in node.children if c.type == "parameter_list"] + params_text = text(params_nodes[1]) if len(params_nodes) > 1 else "()" + result = child_by_type(node, "result") + sig = f"func ({receiver_type}) {name}{params_text}" + if result: + sig += " " + text(result) + + start_line = node.start_point[0] + 1 + end_line = node.end_point[0] + 1 + + nodes.append( + Node( + id=method_id, + kind="function", + name=name, + file=relative_path, + line=start_line, + end_line=end_line, + signature=sig, + parent=receiver_id, + ) + ) + edges.append( + Edge(source=receiver_id, target=method_id, kind="contains", line=start_line) + ) + + body = child_by_type(node, "block") + if body: + self._extract_calls_in_body(body, method_id, relative_path, edges) + + def _extract_type_declaration( + self, + node: TSNode, + parent_id: str, + relative_path: str, + nodes: list[Node], + edges: list[Edge], + ) -> None: + for child in node.children: + if child.type == "type_spec": + self._extract_type_spec(child, parent_id, relative_path, nodes, edges) + + def _extract_type_spec( + self, + node: TSNode, + parent_id: str, + relative_path: str, + nodes: list[Node], + edges: list[Edge], + ) -> None: + name_node = child_by_type(node, "type_identifier") + if not name_node: + return + + name = text(name_node) + type_id = f"{parent_id}::{name}" + + struct_type = child_by_type(node, "struct_type") + kind = "class" + + start_line = node.start_point[0] + 1 + end_line = node.end_point[0] + 1 + + nodes.append( + Node( + id=type_id, + kind=kind, + name=name, + file=relative_path, + line=start_line, + end_line=end_line, + parent=parent_id, + ) + ) + edges.append(Edge(source=parent_id, target=type_id, kind="contains", line=start_line)) + + if struct_type: + field_list = child_by_type(struct_type, "field_declaration_list") + if field_list: + for field in field_list.children: + if field.type == "field_declaration": + children = [c for c in field.children if c.type != "comment"] + if len(children) == 1 and children[0].type == "type_identifier": + embedded = text(children[0]) + edges.append( + Edge( + source=type_id, + target=embedded, + kind="inherits", + line=field.start_point[0] + 1, + ) + ) + + def _extract_imports( + self, + node: TSNode, + parent_id: str, + relative_path: str, + edges: list[Edge], + ) -> None: + """Extract import declarations.""" + for child in node.children: + if child.type == "import_spec": + path_node = child_by_type(child, "interpreted_string_literal") + if path_node: + import_path = text(path_node).strip('"') + edges.append( + Edge( + source=parent_id, + target=import_path, + kind="imports", + line=child.start_point[0] + 1, + ) + ) + elif child.type == "import_spec_list": + for spec in child.children: + if spec.type == "import_spec": + path_node = child_by_type(spec, "interpreted_string_literal") + if path_node: + import_path = text(path_node).strip('"') + edges.append( + Edge( + source=parent_id, + target=import_path, + kind="imports", + line=spec.start_point[0] + 1, + ) + ) + + def _extract_calls_in_body( + self, + node: TSNode, + parent_id: str, + relative_path: str, + edges: list[Edge], + ) -> None: + """Recursively find call expressions in a body block.""" + for child in node.children: + if child.type == "call_expression": + self._extract_call(child, parent_id, relative_path, edges) + else: + self._extract_calls_in_body(child, parent_id, relative_path, edges) + + def _extract_call( + self, + node: TSNode, + parent_id: str, + relative_path: str, + edges: list[Edge], + ) -> None: + """Extract a call expression into an edge.""" + func = node.children[0] if node.children else None + if not func: + return + + if func.type == "identifier": + target = text(func) + elif func.type == "selector_expression": + target = text(func) + else: + return + + edges.append( + Edge( + source=parent_id, + target=target, + kind="calls", + line=node.start_point[0] + 1, + ) + ) + + arg_list = child_by_type(node, "argument_list") + if arg_list: + self._extract_calls_in_body(arg_list, parent_id, relative_path, edges) diff --git a/src/theow/_codegraph/_visitors/_python.py b/src/theow/_codegraph/_visitors/_python.py new file mode 100644 index 0000000..ef8c955 --- /dev/null +++ b/src/theow/_codegraph/_visitors/_python.py @@ -0,0 +1,289 @@ +"""Python language visitor using tree-sitter.""" + +from __future__ import annotations + +from pathlib import Path + +import tree_sitter_python as tspython +from tree_sitter import Language, Node as TSNode, Parser + +from theow._codegraph._models import Edge, Node +from theow._codegraph._visitors._utils import child_by_type, text + +PY_LANGUAGE = Language(tspython.language()) + + +class PythonVisitor: + """Extract nodes and edges from Python source files.""" + + extensions: list[str] = [".py"] + + def __init__(self) -> None: + self._parser = Parser(PY_LANGUAGE) + + def parse_file( + self, path: Path, source: str, relative_path: str + ) -> tuple[list[Node], list[Edge]]: + """Parse a Python file and extract structural nodes and edges.""" + tree = self._parser.parse(source.encode()) + nodes: list[Node] = [] + edges: list[Edge] = [] + + module_id = relative_path + nodes.append( + Node( + id=module_id, + kind="module", + name=Path(relative_path).stem, + file=relative_path, + line=1, + end_line=source.count("\n") + 1, + ) + ) + + self._walk(tree.root_node, module_id, relative_path, nodes, edges) + return nodes, edges + + def _walk( + self, + node: TSNode, + parent_id: str, + relative_path: str, + nodes: list[Node], + edges: list[Edge], + ) -> None: + """Recursively walk the AST and extract symbols.""" + for child in node.children: + if child.type == "function_definition": + self._extract_function(child, parent_id, relative_path, nodes, edges) + elif child.type == "decorated_definition": + for inner in child.children: + if inner.type == "function_definition": + self._extract_function( + inner, parent_id, relative_path, nodes, edges + ) + elif inner.type == "class_definition": + self._extract_class( + inner, parent_id, relative_path, nodes, edges + ) + elif child.type == "class_definition": + self._extract_class(child, parent_id, relative_path, nodes, edges) + elif child.type == "import_statement": + self._extract_import(child, parent_id, relative_path, edges) + elif child.type == "import_from_statement": + self._extract_import_from(child, parent_id, relative_path, edges) + elif child.type == "expression_statement": + expr = child.children[0] if child.children else None + if expr and expr.type == "call": + self._extract_call(expr, parent_id, relative_path, edges) + + def _extract_function( + self, + node: TSNode, + parent_id: str, + relative_path: str, + nodes: list[Node], + edges: list[Edge], + ) -> None: + name_node = child_by_type(node, "identifier") + if not name_node: + return + + name = text(name_node) + func_id = f"{parent_id}::{name}" + + params_node = child_by_type(node, "parameters") + signature = f"def {name}{text(params_node)}" if params_node else f"def {name}()" + + docstring = _extract_docstring(node) + + start_line = node.start_point[0] + 1 + end_line = node.end_point[0] + 1 + + nodes.append( + Node( + id=func_id, + kind="function", + name=name, + file=relative_path, + line=start_line, + end_line=end_line, + signature=signature, + docstring=docstring, + parent=parent_id, + ) + ) + edges.append(Edge(source=parent_id, target=func_id, kind="contains", line=start_line)) + + body = child_by_type(node, "block") + if body: + self._extract_calls_in_body(body, func_id, relative_path, edges) + + def _extract_class( + self, + node: TSNode, + parent_id: str, + relative_path: str, + nodes: list[Node], + edges: list[Edge], + ) -> None: + name_node = child_by_type(node, "identifier") + if not name_node: + return + + name = text(name_node) + class_id = f"{parent_id}::{name}" + + arg_list = child_by_type(node, "argument_list") + if arg_list: + for arg in arg_list.children: + if arg.type == "identifier": + edges.append( + Edge( + source=class_id, + target=text(arg), + kind="inherits", + line=arg.start_point[0] + 1, + ) + ) + + docstring = _extract_docstring(node) + start_line = node.start_point[0] + 1 + end_line = node.end_point[0] + 1 + + nodes.append( + Node( + id=class_id, + kind="class", + name=name, + file=relative_path, + line=start_line, + end_line=end_line, + docstring=docstring, + parent=parent_id, + ) + ) + edges.append(Edge(source=parent_id, target=class_id, kind="contains", line=start_line)) + + body = child_by_type(node, "block") + if body: + self._walk(body, class_id, relative_path, nodes, edges) + + def _extract_import( + self, + node: TSNode, + parent_id: str, + relative_path: str, + edges: list[Edge], + ) -> None: + """Extract `import foo` statements.""" + for child in node.children: + if child.type == "dotted_name": + edges.append( + Edge( + source=parent_id, + target=text(child), + kind="imports", + line=node.start_point[0] + 1, + ) + ) + + def _extract_import_from( + self, + node: TSNode, + parent_id: str, + relative_path: str, + edges: list[Edge], + ) -> None: + """Extract `from foo import bar` statements.""" + module_name = "" + for child in node.children: + if child.type == "dotted_name" and not module_name: + module_name = text(child) + elif child.type == "relative_import": + module_name = text(child) + + if module_name: + edges.append( + Edge( + source=parent_id, + target=module_name, + kind="imports", + line=node.start_point[0] + 1, + ) + ) + + def _extract_calls_in_body( + self, + node: TSNode, + parent_id: str, + relative_path: str, + edges: list[Edge], + ) -> None: + """Recursively find call expressions in a body block.""" + for child in node.children: + if child.type == "call": + self._extract_call(child, parent_id, relative_path, edges) + elif child.type == "expression_statement": + for inner in child.children: + if inner.type == "call": + self._extract_call(inner, parent_id, relative_path, edges) + else: + self._extract_calls_in_body(inner, parent_id, relative_path, edges) + else: + self._extract_calls_in_body(child, parent_id, relative_path, edges) + + def _extract_call( + self, + node: TSNode, + parent_id: str, + relative_path: str, + edges: list[Edge], + ) -> None: + """Extract a call expression into an edge.""" + func = node.children[0] if node.children else None + if not func: + return + + if func.type == "identifier": + target = text(func) + elif func.type == "attribute": + target = text(func) + else: + return + + edges.append( + Edge( + source=parent_id, + target=target, + kind="calls", + line=node.start_point[0] + 1, + ) + ) + + +def _extract_docstring(node: TSNode) -> str: + """Extract docstring from a function or class definition.""" + body = child_by_type(node, "block") + if not body: + return "" + + children = body.children + if not children: + return "" + + first_stmt = children[0] + if first_stmt.type == "expression_statement": + expr = first_stmt.children[0] if first_stmt.children else None + if expr and expr.type == "string": + raw = text(expr) + # Strip triple quotes + for q in ('"""', "'''"): + if raw.startswith(q) and raw.endswith(q): + return raw[3:-3].strip() + # Strip single quotes + for q in ('"', "'"): + if raw.startswith(q) and raw.endswith(q): + return raw[1:-1].strip() + return raw + return "" diff --git a/src/theow/_codegraph/_visitors/_utils.py b/src/theow/_codegraph/_visitors/_utils.py new file mode 100644 index 0000000..bbf1fec --- /dev/null +++ b/src/theow/_codegraph/_visitors/_utils.py @@ -0,0 +1,27 @@ +"""Shared tree-sitter utilities for language visitors.""" + +from __future__ import annotations + +from tree_sitter import Node as TSNode + + +def text(node: TSNode) -> str: + """Get node text as a decoded string.""" + return node.text.decode() if node.text else "" + + +def child_by_type(node: TSNode, type_name: str) -> TSNode | None: + """Find first child of a given type.""" + for child in node.children: + if child.type == type_name: + return child + return None + + +def last_child_by_type(node: TSNode, type_name: str) -> TSNode | None: + """Find last child of a given type.""" + result = None + for child in node.children: + if child.type == type_name: + result = child + return result diff --git a/src/theow/_codegraph/examples/theow_graph.html b/src/theow/_codegraph/examples/theow_graph.html new file mode 100644 index 0000000..05ae2a5 --- /dev/null +++ b/src/theow/_codegraph/examples/theow_graph.html @@ -0,0 +1,258 @@ + + + + + + + + + +
+

+
+ + + + + + +
+

+
+ + + + + +
+ + +
+
+ + +
+
+
0%
+
+
+
+
+
+ + + + + + \ No newline at end of file diff --git a/src/theow/_codegraph/examples/theow_graph.png b/src/theow/_codegraph/examples/theow_graph.png new file mode 100644 index 0000000..4c5946c Binary files /dev/null and b/src/theow/_codegraph/examples/theow_graph.png differ diff --git a/src/theow/_codegraph/examples/visualize.py b/src/theow/_codegraph/examples/visualize.py new file mode 100644 index 0000000..e2083f0 --- /dev/null +++ b/src/theow/_codegraph/examples/visualize.py @@ -0,0 +1,63 @@ +"""Visualize theow's code graph with pyvis. Run: uv run --extra codegraph src/theow/_codegraph/examples/visualize.py""" + +from pathlib import Path + +from pyvis.network import Network + +from theow.codegraph import CodeGraph + +COLORS = { + "module": "#bd93f9", + "class": "#ffb86c", + "function": "#50fa7b", +} + +EDGE_COLORS = { + "contains": "#6272a4", + "calls": "#f1fa8c", + "imports": "#6272a4", + "inherits": "#ff5555", +} + +graph = CodeGraph(root=Path(__file__).resolve().parent.parent.parent.parent.parent / "src" / "theow") +graph.build() + +net = Network(height="100vh", width="100%", directed=True, bgcolor="#282a36", font_color="#f8f8f2") +net.barnes_hut(gravity=-3000, spring_length=150) + +for node in graph._nodes.values(): + label = node.name + title = f"{node.id}\n{node.kind}\n{node.file}:{node.line}" + if node.signature: + title += f"\n{node.signature}" + if node.docstring: + title += f"\n\n{node.docstring[:200]}" + + size = {"module": 25, "class": 18, "function": 12}.get(node.kind, 10) + net.add_node( + node.id, + label=label, + title=title, + color=COLORS.get(node.kind, "#999"), + size=size, + group=node.kind, + ) + +for source, targets in graph._fwd.items(): + for target, data in targets.items(): + if target not in graph._nodes: + continue + kind = data.get("kind", "") + net.add_edge( + source, + target, + title=kind, + color=EDGE_COLORS.get(kind, "#ccc"), + arrows="to", + width=2 if kind in ("calls", "inherits") else 1, + ) + +out = Path(__file__).resolve().parent / "theow_graph.html" +net.save_graph(str(out)) +print(f"Graph saved to {out}") +print(f"Nodes: {len(graph._nodes)}, Edges: {graph._edge_count()}") diff --git a/src/theow/_core/_logging.py b/src/theow/_core/_logging.py index a4b1e5c..e247cef 100644 --- a/src/theow/_core/_logging.py +++ b/src/theow/_core/_logging.py @@ -20,6 +20,7 @@ "theow._core._chroma_store": "chroma", "theow._core._recover": "recovery", "theow._cli._run": "cli", + "theow._codegraph._graph": "codegraph", } diff --git a/src/theow/codegraph.py b/src/theow/codegraph.py new file mode 100644 index 0000000..959208a --- /dev/null +++ b/src/theow/codegraph.py @@ -0,0 +1,13 @@ +"""Code graph for consumer opt-in registration. + +Usage: + from theow.codegraph import CodeGraph + + graph = CodeGraph(root="./src") + engine = Theow(theow_dir=".theow", llm="anthropic/claude-sonnet-4-20250514") + engine.tool()(graph.search_code) +""" + +from theow._codegraph import CodeGraph + +__all__ = ["CodeGraph"] diff --git a/tests/codegraph/__init__.py b/tests/codegraph/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/codegraph/conftest.py b/tests/codegraph/conftest.py new file mode 100644 index 0000000..e734360 --- /dev/null +++ b/tests/codegraph/conftest.py @@ -0,0 +1,22 @@ +"""Shared fixtures for codegraph tests.""" + +import tempfile +from pathlib import Path + +import pytest + +# Theow's own source tree as test corpus +THEOW_SRC = Path(__file__).resolve().parent.parent.parent / "src" / "theow" + + +@pytest.fixture +def theow_src(): + """Path to theow's own source tree.""" + return THEOW_SRC + + +@pytest.fixture +def empty_project(): + """Empty directory for edge-case tests.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) diff --git a/tests/codegraph/test_graph.py b/tests/codegraph/test_graph.py new file mode 100644 index 0000000..6d98abd --- /dev/null +++ b/tests/codegraph/test_graph.py @@ -0,0 +1,241 @@ +"""Tests for CodeGraph using theow's own source.""" + +import json +import tempfile +from pathlib import Path + +from theow._codegraph._graph import CodeGraph + + +def test_build_on_theow_src(theow_src): + """Building a graph on theow's source should produce real nodes and edges.""" + graph = CodeGraph(root=theow_src) + graph.build() + + assert len(graph._nodes) > 50 + assert graph._edge_count() > 50 + assert graph._built is True + + +def test_build_idempotent(theow_src): + """Calling build() twice should produce the same graph.""" + graph = CodeGraph(root=theow_src) + graph.build() + count_1 = len(graph._nodes) + + graph.build() + count_2 = len(graph._nodes) + + assert count_1 == count_2 + + +def test_build_empty_project(empty_project): + """Empty directory should build an empty graph without errors.""" + graph = CodeGraph(root=empty_project) + graph.build() + assert len(graph._nodes) == 0 + + +def test_auto_build_on_first_search(theow_src): + """search_code should trigger build() if not already built.""" + graph = CodeGraph(root=theow_src) + assert graph._built is False + + results = graph.search_code(query="Rule", kind="class") + assert graph._built is True + assert len(results) > 0 + + +def test_search_symbol_exact(theow_src): + """Exact name match for a known class.""" + graph = CodeGraph(root=theow_src) + results = graph.search_code(query="Rule", kind="class") + + assert any(r["name"] == "Rule" for r in results) + assert results[0]["relevance"] == "exact" + + +def test_search_symbol_substring(theow_src): + """Substring match should find partial name matches.""" + graph = CodeGraph(root=theow_src) + results = graph.search_code(query="Gateway") + + names = {r["name"] for r in results} + assert "LLMGateway" in names + + +def test_search_symbol_by_kind(theow_src): + """Filtering by kind should only return that kind.""" + graph = CodeGraph(root=theow_src) + results = graph.search_code(query="", kind="class") + + for r in results: + assert r["kind"] == "class" + + +def test_search_file_scope(theow_src): + """File scope should list all symbols in a specific file.""" + graph = CodeGraph(root=theow_src) + results = graph.search_code(scope="file", file="_core/_models.py") + + assert len(results) > 5 + assert all(r["file"] == "_core/_models.py" for r in results) + # Should be sorted by line number + lines = [r["line"] for r in results] + assert lines == sorted(lines) + + +def test_search_callers(theow_src): + """Callers scope should find functions that call the target.""" + graph = CodeGraph(root=theow_src) + results = graph.search_code(query="matches", scope="callers") + + # Something should call matches() + assert len(results) > 0 + assert all(r.get("relevance") == "caller" for r in results) + + +def test_search_callees(theow_src): + """Callees scope should find functions that the target calls.""" + graph = CodeGraph(root=theow_src) + # Fact.matches calls re.search — look for callees of matches + results = graph.search_code(query="matches", scope="callees") + + # matches() should call something + assert len(results) >= 0 # may or may not resolve depending on edge resolution + + +def test_search_references(theow_src): + """References scope should show both incoming and outgoing edges.""" + graph = CodeGraph(root=theow_src) + results = graph.search_code(query="Rule", scope="references") + + # Rule has both incoming (contains) and outgoing edges + assert len(results) > 0 + + +def test_search_definition_by_file_line(theow_src): + """Definition scope with file+line should find the symbol at that location.""" + graph = CodeGraph(root=theow_src) + graph.build() + + # Find Rule's line number first + rule_node = graph._nodes.get("_core/_models.py::Rule") + assert rule_node is not None + + results = graph.search_code( + scope="definition", file="_core/_models.py", line=rule_node.line + ) + assert len(results) > 0 + assert results[0]["name"] == "Rule" + + +def test_search_path(theow_src): + """Path scope should find a relationship chain between two symbols.""" + graph = CodeGraph(root=theow_src) + graph.build() + + # _core/_models.py module contains Rule class + results = graph.search_code( + query="_core/_models.py", scope="path", target="Rule" + ) + # Should find a path (module --contains--> Rule) + assert len(results) >= 2 + + +def test_search_no_results(theow_src): + """Searching for a nonexistent symbol returns empty list.""" + graph = CodeGraph(root=theow_src) + results = graph.search_code(query="XyzNonExistent12345") + assert results == [] + + +def test_excludes(theow_src): + """Excluded directories should not appear in the graph.""" + graph = CodeGraph(root=theow_src) + graph.build() + + for node in graph._nodes.values(): + assert "__pycache__" not in node.file + + +def test_max_file_size(theow_src): + """Files above max_file_size should be skipped.""" + graph = CodeGraph(root=theow_src, max_file_size=100) + graph.build() + + # With 100 byte limit, most files should be skipped + assert len(graph._nodes) < 10 + + +def test_file_index(theow_src): + """File index should map files to their node IDs.""" + graph = CodeGraph(root=theow_src) + graph.build() + + assert "_core/_models.py" in graph._file_index + node_ids = graph._file_index["_core/_models.py"] + assert any("Rule" in nid for nid in node_ids) + + +def test_name_index(theow_src): + """Name index should map short names to node IDs.""" + graph = CodeGraph(root=theow_src) + graph.build() + + assert "Rule" in graph._name_index + assert len(graph._name_index["Rule"]) >= 1 + + +def test_json_roundtrip(theow_src): + """Graph should survive JSON serialization and deserialization.""" + graph = CodeGraph(root=theow_src) + graph.build() + + with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as f: + json_path = f.name + + graph.to_json(json_path) + loaded = CodeGraph.from_json(json_path) + + assert len(loaded._nodes) == len(graph._nodes) + assert loaded._edge_count() == graph._edge_count() + assert loaded._built is True + + Path(json_path).unlink() + + +def test_json_to_string(theow_src): + """to_json without path should return a JSON string.""" + graph = CodeGraph(root=theow_src) + graph.build() + + result = graph.to_json() + data = json.loads(result) + assert "nodes" in data + assert "edges" in data + assert len(data["nodes"]) == len(graph._nodes) + + +def test_custom_excludes(theow_src): + """Custom excludes should override defaults.""" + graph = CodeGraph(root=theow_src, excludes={"_core"}) + graph.build() + + for node in graph._nodes.values(): + assert "_core" not in node.file + + +def test_edge_resolution(theow_src): + """Edge resolution should resolve some symbolic names to node IDs.""" + graph = CodeGraph(root=theow_src) + graph.build() + + # After resolution, some call edges should point to known nodes + resolved_calls = 0 + for source, targets in graph._fwd.items(): + for target, data in targets.items(): + if data.get("kind") == "calls" and target in graph._nodes: + resolved_calls += 1 + + assert resolved_calls > 0 diff --git a/tests/codegraph/test_models.py b/tests/codegraph/test_models.py new file mode 100644 index 0000000..8d00818 --- /dev/null +++ b/tests/codegraph/test_models.py @@ -0,0 +1,95 @@ +"""Tests for codegraph data models.""" + +from theow._codegraph._models import Edge, Node, SearchResult + + +def test_node_to_dict_minimal(): + node = Node(id="mod.py::func", kind="function", name="func", file="mod.py", line=10) + d = node.to_dict() + assert d["id"] == "mod.py::func" + assert d["kind"] == "function" + assert d["line"] == 10 + assert "end_line" not in d + assert "signature" not in d + assert "docstring" not in d + assert "parent" not in d + + +def test_node_to_dict_full(): + node = Node( + id="mod.py::Cls.method", + kind="function", + name="method", + file="mod.py", + line=5, + end_line=15, + signature="def method(self, x)", + docstring="Do stuff.", + parent="mod.py::Cls", + ) + d = node.to_dict() + assert d["end_line"] == 15 + assert d["signature"] == "def method(self, x)" + assert d["docstring"] == "Do stuff." + assert d["parent"] == "mod.py::Cls" + + +def test_node_roundtrip(): + node = Node( + id="a.py::X", kind="class", name="X", file="a.py", line=1, + end_line=20, signature="", docstring="A class.", parent="a.py", + ) + assert Node.from_dict(node.to_dict()) == node + + +def test_node_frozen(): + node = Node(id="x", kind="module", name="x", file="x.py", line=1) + try: + node.name = "y" # type: ignore[misc] + assert False, "Should be frozen" + except AttributeError: + pass + + +def test_edge_to_dict_minimal(): + edge = Edge(source="a", target="b", kind="calls") + d = edge.to_dict() + assert d == {"source": "a", "target": "b", "kind": "calls"} + assert "line" not in d + + +def test_edge_to_dict_with_line(): + edge = Edge(source="a", target="b", kind="imports", line=7) + d = edge.to_dict() + assert d["line"] == 7 + + +def test_edge_roundtrip(): + edge = Edge(source="a", target="b", kind="contains", line=3) + assert Edge.from_dict(edge.to_dict()) == edge + + +def test_search_result_to_dict(): + node = Node(id="f.py::go", kind="function", name="go", file="f.py", line=1) + sr = SearchResult(node=node, context="called by main", relevance="caller") + d = sr.to_dict() + assert d["id"] == "f.py::go" + assert d["context"] == "called by main" + assert d["relevance"] == "caller" + + +def test_search_result_to_dict_no_extras(): + node = Node(id="f.py", kind="module", name="f", file="f.py", line=1) + sr = SearchResult(node=node) + d = sr.to_dict() + assert "context" not in d + assert "relevance" not in d + + +def test_search_result_roundtrip(): + node = Node(id="x.py::Y", kind="class", name="Y", file="x.py", line=2) + sr = SearchResult(node=node, context="ctx", relevance="exact") + restored = SearchResult.from_dict(sr.to_dict()) + assert restored.node == node + assert restored.context == "ctx" + assert restored.relevance == "exact" diff --git a/tests/codegraph/test_tool.py b/tests/codegraph/test_tool.py new file mode 100644 index 0000000..af596db --- /dev/null +++ b/tests/codegraph/test_tool.py @@ -0,0 +1,87 @@ +"""Tests for codegraph tool registration using theow's own source.""" + +import inspect + +from theow._codegraph._graph import CodeGraph +from theow._gateway._base import build_tool_declaration + + +def test_search_code_callable(theow_src): + """graph.search_code should be a callable that returns list[dict].""" + graph = CodeGraph(root=theow_src) + assert callable(graph.search_code) + + results = graph.search_code(query="Rule") + assert isinstance(results, list) + assert all(isinstance(r, dict) for r in results) + + +def test_search_code_has_docstring(theow_src): + """search_code should have a docstring for LLM tool schema.""" + graph = CodeGraph(root=theow_src) + doc = inspect.getdoc(graph.search_code) + assert doc is not None + assert "symbol" in doc.lower() + + +def test_build_tool_declaration_skips_self(theow_src): + """build_tool_declaration should skip self for bound methods.""" + graph = CodeGraph(root=theow_src) + decl = build_tool_declaration("search_code", graph.search_code) + + props = decl["input_schema"]["properties"] + assert "self" not in props + assert "query" in props + assert "scope" in props + assert "kind" in props + assert "file" in props + assert "line" in props + assert "target" in props + + +def test_build_tool_declaration_types(theow_src): + """Tool declaration should have correct JSON schema types.""" + graph = CodeGraph(root=theow_src) + decl = build_tool_declaration("search_code", graph.search_code) + + props = decl["input_schema"]["properties"] + assert props["query"]["type"] == "string" + assert props["line"]["type"] == "integer" + assert props["scope"]["type"] == "string" + + +def test_build_tool_declaration_no_required(theow_src): + """All search_code params have defaults, so none should be required.""" + graph = CodeGraph(root=theow_src) + decl = build_tool_declaration("search_code", graph.search_code) + + assert decl["input_schema"]["required"] == [] + + +def test_build_tool_declaration_has_description(theow_src): + """Tool declaration should carry the docstring as description.""" + graph = CodeGraph(root=theow_src) + decl = build_tool_declaration("search_code", graph.search_code) + + assert len(decl["description"]) > 50 + + +def test_public_import(): + """CodeGraph should be importable from the public API.""" + from theow.codegraph import CodeGraph as PublicCodeGraph + + assert PublicCodeGraph is CodeGraph + + +def test_search_code_returns_dicts_with_expected_keys(theow_src): + """Results should contain the standard node fields.""" + graph = CodeGraph(root=theow_src) + results = graph.search_code(query="Rule", kind="class") + + assert len(results) > 0 + r = results[0] + assert "id" in r + assert "kind" in r + assert "name" in r + assert "file" in r + assert "line" in r diff --git a/tests/codegraph/test_visitor.py b/tests/codegraph/test_visitor.py new file mode 100644 index 0000000..95d5f57 --- /dev/null +++ b/tests/codegraph/test_visitor.py @@ -0,0 +1,322 @@ +"""Tests for language visitors using theow's own source.""" + +import tempfile +from pathlib import Path + +from theow._codegraph._visitors._go import GoVisitor +from theow._codegraph._visitors._python import PythonVisitor + +GO_SOURCE = '''\ +package main + +import ( +\t"fmt" +\t"strings" +) + +type Animal interface { +\tSpeak() string +} + +type Base struct { +\tName string +} + +type Dog struct { +\tBase +\tBreed string +} + +func NewDog(name string, breed string) *Dog { +\treturn &Dog{Base: Base{Name: name}, Breed: breed} +} + +func (d *Dog) Speak() string { +\treturn fmt.Sprintf("Woof! I am %s", d.Name) +} + +func (d *Dog) Fetch(item string) string { +\treturn strings.ToUpper(item) +} + +func main() { +\tdog := NewDog("Rex", "Labrador") +\tfmt.Println(dog.Speak()) +\tfmt.Println(dog.Fetch("ball")) +} +''' + + +def test_python_visitor_parses_models(theow_src): + """Parse theow's _core/_models.py — has classes, methods, imports.""" + visitor = PythonVisitor() + path = theow_src / "_core" / "_models.py" + source = path.read_text() + relative = "_core/_models.py" + + nodes, edges = visitor.parse_file(path, source, relative) + + names = {n.name for n in nodes} + assert "Rule" in names + assert "Fact" in names + assert "Action" in names + assert "LLMConfig" in names + + # Module node exists + kinds = {n.kind for n in nodes} + assert "module" in kinds + assert "class" in kinds + assert "function" in kinds + + +def test_python_visitor_extracts_methods(theow_src): + """Methods inside classes should have the class as parent.""" + visitor = PythonVisitor() + path = theow_src / "_core" / "_models.py" + source = path.read_text() + relative = "_core/_models.py" + + nodes, _ = visitor.parse_file(path, source, relative) + + # to_dict should be a method of Fact + fact_methods = [n for n in nodes if n.parent == f"{relative}::Fact"] + method_names = {n.name for n in fact_methods} + assert "to_dict" in method_names + assert "from_dict" in method_names + assert "matches" in method_names + + +def test_python_visitor_extracts_imports(theow_src): + """Module-level imports should produce import edges.""" + visitor = PythonVisitor() + path = theow_src / "_core" / "_models.py" + source = path.read_text() + relative = "_core/_models.py" + + _, edges = visitor.parse_file(path, source, relative) + + import_edges = [e for e in edges if e.kind == "imports"] + import_targets = {e.target for e in import_edges} + assert "yaml" in import_targets + assert "re" in import_targets + + +def test_python_visitor_extracts_inheritance(theow_src): + """Classes with bases should produce inherits edges.""" + visitor = PythonVisitor() + # _gateway/_base.py has LLMGateway(ABC) + path = theow_src / "_gateway" / "_base.py" + source = path.read_text() + relative = "_gateway/_base.py" + + _, edges = visitor.parse_file(path, source, relative) + + inherits = [e for e in edges if e.kind == "inherits"] + assert any(e.target == "ABC" for e in inherits) + + +def test_python_visitor_extracts_docstrings(theow_src): + """Functions and classes should have docstrings extracted.""" + visitor = PythonVisitor() + path = theow_src / "_core" / "_models.py" + source = path.read_text() + relative = "_core/_models.py" + + nodes, _ = visitor.parse_file(path, source, relative) + + rule_node = next(n for n in nodes if n.name == "Rule") + assert "production rule" in rule_node.docstring.lower() + + +def test_python_visitor_extracts_signatures(theow_src): + """Functions should have their signature extracted.""" + visitor = PythonVisitor() + path = theow_src / "_core" / "_models.py" + source = path.read_text() + relative = "_core/_models.py" + + nodes, _ = visitor.parse_file(path, source, relative) + + matches_node = next(n for n in nodes if n.name == "matches" and "Fact" in n.parent) + assert "def matches" in matches_node.signature + assert "self" in matches_node.signature + + +def test_python_visitor_extracts_calls(theow_src): + """Call sites should produce call edges.""" + visitor = PythonVisitor() + path = theow_src / "_core" / "_models.py" + source = path.read_text() + relative = "_core/_models.py" + + _, edges = visitor.parse_file(path, source, relative) + + call_edges = [e for e in edges if e.kind == "calls"] + call_targets = {e.target for e in call_edges} + # Fact.matches uses re.search + assert any("re.search" in t for t in call_targets) + + +def test_python_visitor_line_numbers(theow_src): + """Node line numbers should be positive and end_line >= line.""" + visitor = PythonVisitor() + path = theow_src / "_core" / "_models.py" + source = path.read_text() + relative = "_core/_models.py" + + nodes, _ = visitor.parse_file(path, source, relative) + + for node in nodes: + assert node.line >= 1 + if node.end_line: + assert node.end_line >= node.line + + +def test_python_visitor_decorated_functions(theow_src): + """Decorated definitions should still be extracted.""" + visitor = PythonVisitor() + path = theow_src / "_core" / "_models.py" + source = path.read_text() + relative = "_core/_models.py" + + nodes, _ = visitor.parse_file(path, source, relative) + + # @property is_ephemeral on Rule + node_names = {n.name for n in nodes} + assert "is_ephemeral" in node_names + + +def test_python_visitor_extensions(): + visitor = PythonVisitor() + assert visitor.extensions == [".py"] + + +def test_load_visitors_python(): + from theow._codegraph._visitors import load_visitors + + visitors = load_visitors(["python"]) + assert len(visitors) == 1 + assert visitors[0].extensions == [".py"] + + +def test_load_visitors_unknown(): + import pytest + from theow._codegraph._visitors import load_visitors + + with pytest.raises(ValueError, match="Unknown language"): + load_visitors(["rust"]) + + +# --- Go visitor tests --- + + +def _parse_go(source=GO_SOURCE, filename="main.go"): + visitor = GoVisitor() + with tempfile.NamedTemporaryFile(suffix=".go", mode="w", delete=False) as f: + f.write(source) + path = Path(f.name) + nodes, edges = visitor.parse_file(path, source, filename) + path.unlink() + return nodes, edges + + +def test_go_visitor_extensions(): + assert GoVisitor().extensions == [".go"] + + +def test_go_visitor_extracts_functions(): + nodes, _ = _parse_go() + names = {n.name for n in nodes if n.kind == "function"} + assert "NewDog" in names + assert "main" in names + + +def test_go_visitor_extracts_methods(): + """Methods should have the receiver type as parent.""" + nodes, _ = _parse_go() + speak = next(n for n in nodes if n.name == "Speak") + assert "Dog" in speak.parent + assert "func (*Dog) Speak" in speak.signature or "func (Dog) Speak" in speak.signature + + +def test_go_visitor_extracts_structs(): + nodes, _ = _parse_go() + struct_names = {n.name for n in nodes if n.kind == "class"} + assert "Dog" in struct_names + assert "Base" in struct_names + + +def test_go_visitor_extracts_interfaces(): + nodes, _ = _parse_go() + assert any(n.name == "Animal" and n.kind == "class" for n in nodes) + + +def test_go_visitor_extracts_imports(): + _, edges = _parse_go() + import_edges = [e for e in edges if e.kind == "imports"] + targets = {e.target for e in import_edges} + assert "fmt" in targets + assert "strings" in targets + + +def test_go_visitor_extracts_calls(): + _, edges = _parse_go() + call_edges = [e for e in edges if e.kind == "calls"] + targets = {e.target for e in call_edges} + assert "NewDog" in targets + # Method calls like dog.Speak() + assert any("Speak" in t for t in targets) + + +def test_go_visitor_extracts_embedding(): + """Struct embedding (Base in Dog) should produce inherits edge.""" + _, edges = _parse_go() + inherits = [e for e in edges if e.kind == "inherits"] + assert any(e.target == "Base" and "Dog" in e.source for e in inherits) + + +def test_go_visitor_function_signatures(): + nodes, _ = _parse_go() + new_dog = next(n for n in nodes if n.name == "NewDog") + assert "func NewDog" in new_dog.signature + assert "string" in new_dog.signature + + +def test_go_visitor_method_signatures(): + nodes, _ = _parse_go() + fetch = next(n for n in nodes if n.name == "Fetch") + assert "func" in fetch.signature + assert "Dog" in fetch.signature + assert "item" in fetch.signature + + +def test_go_visitor_line_numbers(): + nodes, _ = _parse_go() + for node in nodes: + assert node.line >= 1 + if node.end_line: + assert node.end_line >= node.line + + +def test_go_visitor_contains_edges(): + """Module should contain functions and types.""" + _, edges = _parse_go() + contains = [e for e in edges if e.kind == "contains"] + assert len(contains) > 0 + # Module contains functions + assert any(e.source == "main.go" for e in contains) + + +def test_load_visitors_go(): + from theow._codegraph._visitors import load_visitors + + visitors = load_visitors(["go"]) + assert len(visitors) == 1 + assert visitors[0].extensions == [".go"] + + +def test_load_visitors_both(): + from theow._codegraph._visitors import load_visitors + + visitors = load_visitors(["python", "go"]) + assert len(visitors) == 2 diff --git a/tests/core/test_logging.py b/tests/core/test_logging.py index d00d2c6..eaab811 100644 --- a/tests/core/test_logging.py +++ b/tests/core/test_logging.py @@ -78,6 +78,7 @@ def test_component_map_coverage(): "engine", "chroma", "cli", + "codegraph", } actual_components = set(COMPONENT_MAP.values()) assert actual_components == expected_components diff --git a/tox.ini b/tox.ini index 366ffc4..bd8d0ed 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,7 @@ [tox] no_package = True skip_missing_interpreters = True -env_list = fmt,lint,static,unit +env_list = fmt,lint,static,unit,codegraph min_version = 4.0.0 [vars] @@ -29,9 +29,14 @@ commands = [testenv:static] description = Run static type checking with ty commands = - uv run {[vars]uv_flags} ty check {[vars]src} + uv run {[vars]uv_flags} --all-extras ty check {[vars]src} [testenv:unit] -description = Run unit tests with coverage +description = Run core unit tests with coverage commands = - uv run {[vars]uv_flags} pytest {[vars]tests} -v --cov={[vars]src}/theow --cov-report=term-missing + uv run {[vars]uv_flags} pytest {[vars]tests}/core {[vars]tests}/gateway {[vars]tests}/cli -v --cov={[vars]src}/theow --cov-report=term-missing --cov-config={tox_root}/.coveragerc-unit + +[testenv:codegraph] +description = Run codegraph tests with coverage +commands = + uv run {[vars]uv_flags} --extra codegraph pytest {[vars]tests}/codegraph -v --cov={[vars]src}/theow/_codegraph --cov-report=term-missing diff --git a/uv.lock b/uv.lock index 9f3ecb5..f1a9d3c 100644 --- a/uv.lock +++ b/uv.lock @@ -1876,6 +1876,13 @@ dependencies = [ { name = "typer" }, ] +[package.optional-dependencies] +codegraph = [ + { name = "tree-sitter" }, + { name = "tree-sitter-go" }, + { name = "tree-sitter-python" }, +] + [package.dev-dependencies] dev = [ { name = "pytest" }, @@ -1896,9 +1903,12 @@ requires-dist = [ { name = "pyyaml", specifier = ">=6.0" }, { name = "rich", specifier = ">=13.0" }, { name = "structlog", specifier = ">=24.0" }, + { name = "tree-sitter", marker = "extra == 'codegraph'", specifier = ">=0.23" }, + { name = "tree-sitter-go", marker = "extra == 'codegraph'", specifier = ">=0.23" }, + { name = "tree-sitter-python", marker = "extra == 'codegraph'", specifier = ">=0.23" }, { name = "typer", specifier = ">=0.12" }, ] -provides-extras = ["daemon"] +provides-extras = ["codegraph", "daemon"] [package.metadata.requires-dev] dev = [ @@ -1982,6 +1992,60 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, ] +[[package]] +name = "tree-sitter" +version = "0.25.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/7c/0350cfc47faadc0d3cf7d8237a4e34032b3014ddf4a12ded9933e1648b55/tree-sitter-0.25.2.tar.gz", hash = "sha256:fe43c158555da46723b28b52e058ad444195afd1db3ca7720c59a254544e9c20", size = 177961, upload-time = "2025-09-25T17:37:59.751Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/9e/20c2a00a862f1c2897a436b17edb774e831b22218083b459d0d081c9db33/tree_sitter-0.25.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ddabfff809ffc983fc9963455ba1cecc90295803e06e140a4c83e94c1fa3d960", size = 146941, upload-time = "2025-09-25T17:37:34.813Z" }, + { url = "https://files.pythonhosted.org/packages/ef/04/8512e2062e652a1016e840ce36ba1cc33258b0dcc4e500d8089b4054afec/tree_sitter-0.25.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c0c0ab5f94938a23fe81928a21cc0fac44143133ccc4eb7eeb1b92f84748331c", size = 137699, upload-time = "2025-09-25T17:37:36.349Z" }, + { url = "https://files.pythonhosted.org/packages/47/8a/d48c0414db19307b0fb3bb10d76a3a0cbe275bb293f145ee7fba2abd668e/tree_sitter-0.25.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd12d80d91d4114ca097626eb82714618dcdfacd6a5e0955216c6485c350ef99", size = 607125, upload-time = "2025-09-25T17:37:37.725Z" }, + { url = "https://files.pythonhosted.org/packages/39/d1/b95f545e9fc5001b8a78636ef942a4e4e536580caa6a99e73dd0a02e87aa/tree_sitter-0.25.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b43a9e4c89d4d0839de27cd4d6902d33396de700e9ff4c5ab7631f277a85ead9", size = 635418, upload-time = "2025-09-25T17:37:38.922Z" }, + { url = "https://files.pythonhosted.org/packages/de/4d/b734bde3fb6f3513a010fa91f1f2875442cdc0382d6a949005cd84563d8f/tree_sitter-0.25.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fbb1706407c0e451c4f8cc016fec27d72d4b211fdd3173320b1ada7a6c74c3ac", size = 631250, upload-time = "2025-09-25T17:37:40.039Z" }, + { url = "https://files.pythonhosted.org/packages/46/f2/5f654994f36d10c64d50a192239599fcae46677491c8dd53e7579c35a3e3/tree_sitter-0.25.2-cp312-cp312-win_amd64.whl", hash = "sha256:6d0302550bbe4620a5dc7649517c4409d74ef18558276ce758419cf09e578897", size = 127156, upload-time = "2025-09-25T17:37:41.132Z" }, + { url = "https://files.pythonhosted.org/packages/67/23/148c468d410efcf0a9535272d81c258d840c27b34781d625f1f627e2e27d/tree_sitter-0.25.2-cp312-cp312-win_arm64.whl", hash = "sha256:0c8b6682cac77e37cfe5cf7ec388844957f48b7bd8d6321d0ca2d852994e10d5", size = 113984, upload-time = "2025-09-25T17:37:42.074Z" }, + { url = "https://files.pythonhosted.org/packages/8c/67/67492014ce32729b63d7ef318a19f9cfedd855d677de5773476caf771e96/tree_sitter-0.25.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0628671f0de69bb279558ef6b640bcfc97864fe0026d840f872728a86cd6b6cd", size = 146926, upload-time = "2025-09-25T17:37:43.041Z" }, + { url = "https://files.pythonhosted.org/packages/4e/9c/a278b15e6b263e86c5e301c82a60923fa7c59d44f78d7a110a89a413e640/tree_sitter-0.25.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f5ddcd3e291a749b62521f71fc953f66f5fd9743973fd6dd962b092773569601", size = 137712, upload-time = "2025-09-25T17:37:44.039Z" }, + { url = "https://files.pythonhosted.org/packages/54/9a/423bba15d2bf6473ba67846ba5244b988cd97a4b1ea2b146822162256794/tree_sitter-0.25.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd88fbb0f6c3a0f28f0a68d72df88e9755cf5215bae146f5a1bdc8362b772053", size = 607873, upload-time = "2025-09-25T17:37:45.477Z" }, + { url = "https://files.pythonhosted.org/packages/ed/4c/b430d2cb43f8badfb3a3fa9d6cd7c8247698187b5674008c9d67b2a90c8e/tree_sitter-0.25.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b878e296e63661c8e124177cc3084b041ba3f5936b43076d57c487822426f614", size = 636313, upload-time = "2025-09-25T17:37:46.68Z" }, + { url = "https://files.pythonhosted.org/packages/9d/27/5f97098dbba807331d666a0997662e82d066e84b17d92efab575d283822f/tree_sitter-0.25.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d77605e0d353ba3fe5627e5490f0fbfe44141bafa4478d88ef7954a61a848dae", size = 631370, upload-time = "2025-09-25T17:37:47.993Z" }, + { url = "https://files.pythonhosted.org/packages/d4/3c/87caaed663fabc35e18dc704cd0e9800a0ee2f22bd18b9cbe7c10799895d/tree_sitter-0.25.2-cp313-cp313-win_amd64.whl", hash = "sha256:463c032bd02052d934daa5f45d183e0521ceb783c2548501cf034b0beba92c9b", size = 127157, upload-time = "2025-09-25T17:37:48.967Z" }, + { url = "https://files.pythonhosted.org/packages/d5/23/f8467b408b7988aff4ea40946a4bd1a2c1a73d17156a9d039bbaff1e2ceb/tree_sitter-0.25.2-cp313-cp313-win_arm64.whl", hash = "sha256:b3f63a1796886249bd22c559a5944d64d05d43f2be72961624278eff0dcc5cb8", size = 113975, upload-time = "2025-09-25T17:37:49.922Z" }, +] + +[[package]] +name = "tree-sitter-go" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/05/727308adbbc79bcb1c92fc0ea10556a735f9d0f0a5435a18f59d40f7fd77/tree_sitter_go-0.25.0.tar.gz", hash = "sha256:a7466e9b8d94dda94cae8d91629f26edb2d26166fd454d4831c3bf6dfa2e8d68", size = 93890, upload-time = "2025-08-29T06:20:25.044Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ca/aa/0984707acc2b9bb461fe4a41e7e0fc5b2b1e245c32820f0c83b3c602957c/tree_sitter_go-0.25.0-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b852993063a3429a443e7bd0aa376dd7dd329d595819fabf56ac4cf9d7257b54", size = 47117, upload-time = "2025-08-29T06:20:14.286Z" }, + { url = "https://files.pythonhosted.org/packages/32/16/dd4cb124b35e99239ab3624225da07d4cb8da4d8564ed81d03fcb3a6ba9f/tree_sitter_go-0.25.0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:503b81a2b4c31e302869a1de3a352ad0912ccab3df9ac9950197b0a9ceeabd8f", size = 48674, upload-time = "2025-08-29T06:20:17.557Z" }, + { url = "https://files.pythonhosted.org/packages/86/fb/b30d63a08044115d8b8bd196c6c2ab4325fb8db5757249a4ef0563966e2e/tree_sitter_go-0.25.0-cp310-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:04b3b3cb4aff18e74e28d49b716c6f24cb71ddfdd66768987e26e4d0fa812f74", size = 66418, upload-time = "2025-08-29T06:20:18.345Z" }, + { url = "https://files.pythonhosted.org/packages/26/21/d3d88a30ad007419b2c97b3baeeef7431407faf9f686195b6f1cad0aedf9/tree_sitter_go-0.25.0-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:148255aca2f54b90d48c48a9dbb4c7faad6cad310a980b2c5a5a9822057ed145", size = 72006, upload-time = "2025-08-29T06:20:19.14Z" }, + { url = "https://files.pythonhosted.org/packages/cd/d0/0dd6442353ced8a88bbda9e546f4ea29e381b59b5a40b122e5abb586bb6c/tree_sitter_go-0.25.0-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4d338116cdf8a6c6ff990d2441929b41323ef17c710407abe0993c13417d6aad", size = 70603, upload-time = "2025-08-29T06:20:21.544Z" }, + { url = "https://files.pythonhosted.org/packages/01/e2/ee5e09f63504fc286539535d374d2eaa0e7d489b80f8f744bb3962aff22a/tree_sitter_go-0.25.0-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5608e089d2a29fa8d2b327abeb2ad1cdb8e223c440a6b0ceab0d3fa80bdeebae", size = 66088, upload-time = "2025-08-29T06:20:22.336Z" }, + { url = "https://files.pythonhosted.org/packages/6e/b6/d9142583374720e79aca9ccb394b3795149a54c012e1dfd80738df2d984e/tree_sitter_go-0.25.0-cp310-abi3-win_amd64.whl", hash = "sha256:30d4ada57a223dfc2c32d942f44d284d40f3d1215ddcf108f96807fd36d53022", size = 48152, upload-time = "2025-08-29T06:20:23.089Z" }, + { url = "https://files.pythonhosted.org/packages/9e/00/9a2638e7339236f5b01622952a4d71c1474dd3783d1982a89555fc1f03b1/tree_sitter_go-0.25.0-cp310-abi3-win_arm64.whl", hash = "sha256:d5d62362059bf79997340773d47cc7e7e002883b527a05cca829c46e40b70ded", size = 46752, upload-time = "2025-08-29T06:20:24.235Z" }, +] + +[[package]] +name = "tree-sitter-python" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b8/8b/c992ff0e768cb6768d5c96234579bf8842b3a633db641455d86dd30d5dac/tree_sitter_python-0.25.0.tar.gz", hash = "sha256:b13e090f725f5b9c86aa455a268553c65cadf325471ad5b65cd29cac8a1a68ac", size = 159845, upload-time = "2025-09-11T06:47:58.159Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/64/a4e503c78a4eb3ac46d8e72a29c1b1237fa85238d8e972b063e0751f5a94/tree_sitter_python-0.25.0-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:14a79a47ddef72f987d5a2c122d148a812169d7484ff5c75a3db9609d419f361", size = 73790, upload-time = "2025-09-11T06:47:47.652Z" }, + { url = "https://files.pythonhosted.org/packages/e6/1d/60d8c2a0cc63d6ec4ba4e99ce61b802d2e39ef9db799bdf2a8f932a6cd4b/tree_sitter_python-0.25.0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:480c21dbd995b7fe44813e741d71fed10ba695e7caab627fb034e3828469d762", size = 76691, upload-time = "2025-09-11T06:47:49.038Z" }, + { url = "https://files.pythonhosted.org/packages/aa/cb/d9b0b67d037922d60cbe0359e0c86457c2da721bc714381a63e2c8e35eba/tree_sitter_python-0.25.0-cp310-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:86f118e5eecad616ecdb81d171a36dde9bef5a0b21ed71ea9c3e390813c3baf5", size = 108133, upload-time = "2025-09-11T06:47:50.499Z" }, + { url = "https://files.pythonhosted.org/packages/40/bd/bf4787f57e6b2860f3f1c8c62f045b39fb32d6bac4b53d7a9e66de968440/tree_sitter_python-0.25.0-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be71650ca2b93b6e9649e5d65c6811aad87a7614c8c1003246b303f6b150f61b", size = 110603, upload-time = "2025-09-11T06:47:51.985Z" }, + { url = "https://files.pythonhosted.org/packages/5d/25/feff09f5c2f32484fbce15db8b49455c7572346ce61a699a41972dea7318/tree_sitter_python-0.25.0-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e6d5b5799628cc0f24691ab2a172a8e676f668fe90dc60468bee14084a35c16d", size = 108998, upload-time = "2025-09-11T06:47:53.046Z" }, + { url = "https://files.pythonhosted.org/packages/75/69/4946da3d6c0df316ccb938316ce007fb565d08f89d02d854f2d308f0309f/tree_sitter_python-0.25.0-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:71959832fc5d9642e52c11f2f7d79ae520b461e63334927e93ca46cd61cd9683", size = 107268, upload-time = "2025-09-11T06:47:54.388Z" }, + { url = "https://files.pythonhosted.org/packages/ed/a2/996fc2dfa1076dc460d3e2f3c75974ea4b8f02f6bc925383aaae519920e8/tree_sitter_python-0.25.0-cp310-abi3-win_amd64.whl", hash = "sha256:9bcde33f18792de54ee579b00e1b4fe186b7926825444766f849bf7181793a76", size = 76073, upload-time = "2025-09-11T06:47:55.773Z" }, + { url = "https://files.pythonhosted.org/packages/07/19/4b5569d9b1ebebb5907d11554a96ef3fa09364a30fcfabeff587495b512f/tree_sitter_python-0.25.0-cp310-abi3-win_arm64.whl", hash = "sha256:0fbf6a3774ad7e89ee891851204c2e2c47e12b63a5edbe2e9156997731c128bb", size = 74169, upload-time = "2025-09-11T06:47:56.747Z" }, +] + [[package]] name = "ty" version = "0.0.18"