Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions folder_desc/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""Folder description tool: recursive file/folder descriptions with LLM-generated annotations."""
from folder_desc.tree import get_folder_description

__all__ = ["get_folder_description"]
54 changes: 54 additions & 0 deletions folder_desc/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""JSON-based cache for file descriptions."""
from __future__ import annotations

import hashlib
import json
import os
from pathlib import Path

CACHE_DIR = Path.home() / ".cheetahclaws" / "folder_desc_cache"


def _cache_key(file_path: str) -> str:
return hashlib.sha256(file_path.encode()).hexdigest()[:16]


def _cache_path(file_path: str) -> Path:
return CACHE_DIR / f"{_cache_key(file_path)}.json"


def get_cached_desc(file_path: str) -> str | None:
cp = _cache_path(file_path)
if not cp.exists():
return None
try:
data = json.loads(cp.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return None
try:
stat = os.stat(file_path)
except OSError:
return None
if data.get("mtime") != stat.st_mtime or data.get("size") != stat.st_size:
return None
return data.get("desc")


def set_cached_desc(file_path: str, desc: str) -> None:
CACHE_DIR.mkdir(parents=True, exist_ok=True)
try:
stat = os.stat(file_path)
except OSError:
return
data = {"desc": desc, "mtime": stat.st_mtime, "size": stat.st_size, "path": file_path}
_cache_path(file_path).write_text(json.dumps(data), encoding="utf-8")


def clear_cache() -> int:
if not CACHE_DIR.exists():
return 0
count = 0
for f in CACHE_DIR.glob("*.json"):
f.unlink()
count += 1
return count
106 changes: 106 additions & 0 deletions folder_desc/describer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""LLM-based file description generator with parallel execution."""
from __future__ import annotations

import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path

from folder_desc.cache import get_cached_desc, set_cached_desc

_DESC_RE = re.compile(r"#\s*\[desc\]\s*(.+?)\s*\[/desc\]")
_MAX_PREVIEW_LINES = 100
_MAX_WORKERS = 8


def extract_inline_desc(file_path: str) -> str | None:
"""Return the `# [desc] ... [/desc]` tag on the first line, or None."""
try:
with open(file_path, encoding="utf-8", errors="replace") as f:
first_line = next(iter(f), "")
except OSError:
return None # unreadable file = no inline description
m = _DESC_RE.search(first_line)
return m.group(1).strip() if m else None


def _read_preview(file_path: str) -> str:
try:
with open(file_path, encoding="utf-8", errors="replace") as f:
lines = []
for i, line in enumerate(f):
if i >= _MAX_PREVIEW_LINES:
break
lines.append(line)
return "".join(lines)
except OSError:
return ""


def describe_file(file_path: str, config: dict | None = None) -> str:
inline = extract_inline_desc(file_path)
if inline:
set_cached_desc(file_path, inline)
return inline

cached = get_cached_desc(file_path)
if cached:
return cached

preview = _read_preview(file_path)
if not preview.strip():
return "Empty file"

desc = _call_llm_for_desc(file_path, preview, config)
set_cached_desc(file_path, desc)
return desc


def _call_llm_for_desc(file_path: str, preview: str, config: dict | None) -> str:
try:
from auxiliary import stream_auxiliary
name = Path(file_path).name
prompt = (
f"Describe what the file '{name}' does in ONE short sentence (max 15 words). "
f"No markdown, no quotes, just the description.\n\n```\n{preview[:3000]}\n```"
)
result = stream_auxiliary(
system="You generate concise one-line file descriptions.",
messages=[{"role": "user", "content": prompt}],
config=config or {},
)
return result.strip().rstrip(".")
except Exception:
return f"({Path(file_path).suffix or 'unknown'} file)"


def describe_files_parallel(
file_paths: list[str], config: dict | None = None,
) -> dict[str, str]:
results: dict[str, str] = {}
to_describe: list[str] = []

for fp in file_paths:
inline = extract_inline_desc(fp)
if inline:
results[fp] = inline
set_cached_desc(fp, inline)
continue
cached = get_cached_desc(fp)
if cached:
results[fp] = cached
continue
to_describe.append(fp)

if not to_describe:
return results

with ThreadPoolExecutor(max_workers=min(_MAX_WORKERS, len(to_describe))) as pool:
futures = {pool.submit(describe_file, fp, config): fp for fp in to_describe}
for future in as_completed(futures):
fp = futures[future]
try:
results[fp] = future.result()
except Exception:
results[fp] = "(description unavailable)"

return results
41 changes: 41 additions & 0 deletions folder_desc/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Self-registering GetFolderDescription tool."""
from __future__ import annotations

from tool_registry import ToolDef, register_tool
from folder_desc.tree import get_folder_description

_SCHEMA = {
"name": "GetFolderDescription",
"description": (
"Return a recursive tree of code files in a folder with their [desc] one-line "
"descriptions. If descriptions are missing, they are generated automatically "
"(parallel LLM calls) before the tree is returned. Useful for understanding a "
"codebase at a glance."
),
"input_schema": {
"type": "object",
"properties": {
"folder_path": {
"type": "string",
"description": "Absolute path to the folder to describe",
},
},
"required": ["folder_path"],
},
}


def _get_folder_description(params: dict, config: dict) -> str:
folder_path = params.get("folder_path", "")
if not folder_path:
return "Error: missing required parameter 'folder_path'"
return get_folder_description(folder_path, config)


register_tool(ToolDef(
name="GetFolderDescription",
schema=_SCHEMA,
func=_get_folder_description,
read_only=True,
concurrent_safe=True,
))
111 changes: 111 additions & 0 deletions folder_desc/tree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""Recursive directory tree builder with file descriptions."""
from __future__ import annotations

import os
from pathlib import Path

from folder_desc.describer import describe_files_parallel

SKIP_DIRS = {
".git", "__pycache__", ".venv", "venv", "node_modules", ".tox",
".mypy_cache", ".pytest_cache", ".ruff_cache", "dist", "build",
".egg-info", ".eggs", ".nano_claude",
}

CODE_EXTENSIONS = {
".py", ".js", ".ts", ".tsx", ".jsx", ".java", ".go", ".rs", ".rb",
".c", ".cpp", ".h", ".hpp", ".cs", ".php", ".swift", ".kt",
".sh", ".bash", ".zsh", ".ps1", ".bat", ".cmd",
".yaml", ".yml", ".toml", ".json", ".xml", ".ini", ".cfg",
".md", ".rst", ".txt",
".html", ".css", ".scss", ".less",
".sql", ".r", ".R", ".lua", ".zig", ".nim",
".dockerfile", ".Dockerfile",
}

MAX_FILES = 500


def _is_code_file(path: Path) -> bool:
if path.suffix.lower() in CODE_EXTENSIONS:
return True
if path.name in ("Makefile", "Dockerfile", "Jenkinsfile", "Procfile", ".gitignore"):
return True
return False


def _collect_files(folder: Path) -> list[Path]:
files: list[Path] = []

def _walk(current: Path, depth: int = 0) -> None:
if depth > 10 or len(files) >= MAX_FILES:
return
try:
entries = sorted(current.iterdir(), key=lambda e: (not e.is_dir(), e.name.lower()))
except OSError:
return
for entry in entries:
if entry.is_dir():
if entry.name in SKIP_DIRS or entry.name.startswith("."):
continue
_walk(entry, depth + 1)
elif entry.is_file() and _is_code_file(entry):
files.append(entry)

_walk(folder)
return files


def _build_tree_string(folder: Path, descriptions: dict[str, str]) -> str:
lines: list[str] = []
folder_str = str(folder)

def _walk(current: Path, prefix: str = "", depth: int = 0) -> None:
if depth > 10:
return
try:
entries = sorted(current.iterdir(), key=lambda e: (not e.is_dir(), e.name.lower()))
except OSError:
return

visible = []
for entry in entries:
if entry.is_dir():
if entry.name in SKIP_DIRS or entry.name.startswith("."):
continue
visible.append(entry)
elif entry.is_file() and _is_code_file(entry):
visible.append(entry)

for i, entry in enumerate(visible):
is_last = i == len(visible) - 1
connector = "`-- " if is_last else "|-- "
child_prefix = prefix + (" " if is_last else "| ")

if entry.is_dir():
lines.append(f"{prefix}{connector}{entry.name}/")
_walk(entry, child_prefix, depth + 1)
else:
desc = descriptions.get(str(entry), "")
desc_tag = f" [desc] {desc} [/desc]" if desc else ""
lines.append(f"{prefix}{connector}{entry.name}{desc_tag}")

lines.append(f"{folder.name}/")
_walk(folder)
return "\n".join(lines)


def get_folder_description(folder_path: str, config: dict | None = None) -> str:
folder = Path(folder_path)
if not folder.is_dir():
return f"Error: {folder_path} is not a directory"

files = _collect_files(folder)
if not files:
return f"{folder.name}/ (empty or no code files found)"

file_paths = [str(f) for f in files]
descriptions = describe_files_parallel(file_paths, config)
tree = _build_tree_string(folder, descriptions)

return f"{len(files)} code files found.\n\n{tree}"
Loading
Loading