SafeRL-Lab · Simon-Free · Apr 17, 2026 · Apr 20, 2026
diff --git a/folder_desc/__init__.py b/folder_desc/__init__.py
@@ -0,0 +1,4 @@
+"""Folder description tool: recursive file/folder descriptions with LLM-generated annotations."""
+from folder_desc.tree import get_folder_description
+
+__all__ = ["get_folder_description"]
diff --git a/folder_desc/cache.py b/folder_desc/cache.py
@@ -0,0 +1,54 @@
+"""JSON-based cache for file descriptions."""
+from __future__ import annotations
+
+import hashlib
+import json
+import os
+from pathlib import Path
+
+CACHE_DIR = Path.home() / ".cheetahclaws" / "folder_desc_cache"
+
+
+def _cache_key(file_path: str) -> str:
+    return hashlib.sha256(file_path.encode()).hexdigest()[:16]
+
+
+def _cache_path(file_path: str) -> Path:
+    return CACHE_DIR / f"{_cache_key(file_path)}.json"
+
+
+def get_cached_desc(file_path: str) -> str | None:
+    cp = _cache_path(file_path)
+    if not cp.exists():
+        return None
+    try:
+        data = json.loads(cp.read_text(encoding="utf-8"))
+    except (json.JSONDecodeError, OSError):
+        return None
+    try:
+        stat = os.stat(file_path)
+    except OSError:
+        return None
+    if data.get("mtime") != stat.st_mtime or data.get("size") != stat.st_size:
+        return None
+    return data.get("desc")
+
+
+def set_cached_desc(file_path: str, desc: str) -> None:
+    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    try:
+        stat = os.stat(file_path)
+    except OSError:
+        return
+    data = {"desc": desc, "mtime": stat.st_mtime, "size": stat.st_size, "path": file_path}
+    _cache_path(file_path).write_text(json.dumps(data), encoding="utf-8")
+
+
+def clear_cache() -> int:
+    if not CACHE_DIR.exists():
+        return 0
+    count = 0
+    for f in CACHE_DIR.glob("*.json"):
+        f.unlink()
+        count += 1
+    return count
diff --git a/folder_desc/describer.py b/folder_desc/describer.py
@@ -0,0 +1,106 @@
+"""LLM-based file description generator with parallel execution."""
+from __future__ import annotations
+
+import re
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+
+from folder_desc.cache import get_cached_desc, set_cached_desc
+
+_DESC_RE = re.compile(r"#\s*\[desc\]\s*(.+?)\s*\[/desc\]")
+_MAX_PREVIEW_LINES = 100
+_MAX_WORKERS = 8
+
+
+def extract_inline_desc(file_path: str) -> str | None:
+    """Return the `# [desc] ... [/desc]` tag on the first line, or None."""
+    try:
+        with open(file_path, encoding="utf-8", errors="replace") as f:
+            first_line = next(iter(f), "")
+    except OSError:
+        return None  # unreadable file = no inline description
+    m = _DESC_RE.search(first_line)
+    return m.group(1).strip() if m else None
+
+
+def _read_preview(file_path: str) -> str:
+    try:
+        with open(file_path, encoding="utf-8", errors="replace") as f:
+            lines = []
+            for i, line in enumerate(f):
+                if i >= _MAX_PREVIEW_LINES:
+                    break
+                lines.append(line)
+            return "".join(lines)
+    except OSError:
+        return ""
+
+
+def describe_file(file_path: str, config: dict | None = None) -> str:
+    inline = extract_inline_desc(file_path)
+    if inline:
+        set_cached_desc(file_path, inline)
+        return inline
+
+    cached = get_cached_desc(file_path)
+    if cached:
+        return cached
+
+    preview = _read_preview(file_path)
+    if not preview.strip():
+        return "Empty file"
+
+    desc = _call_llm_for_desc(file_path, preview, config)
+    set_cached_desc(file_path, desc)
+    return desc
+
+
+def _call_llm_for_desc(file_path: str, preview: str, config: dict | None) -> str:
+    try:
+        from auxiliary import stream_auxiliary
+        name = Path(file_path).name
+        prompt = (
+            f"Describe what the file '{name}' does in ONE short sentence (max 15 words). "
+            f"No markdown, no quotes, just the description.\n\n```\n{preview[:3000]}\n```"
+        )
+        result = stream_auxiliary(
+            system="You generate concise one-line file descriptions.",
+            messages=[{"role": "user", "content": prompt}],
+            config=config or {},
+        )
+        return result.strip().rstrip(".")
+    except Exception:
+        return f"({Path(file_path).suffix or 'unknown'} file)"
+
+
+def describe_files_parallel(
+    file_paths: list[str], config: dict | None = None,
+) -> dict[str, str]:
+    results: dict[str, str] = {}
+    to_describe: list[str] = []
+
+    for fp in file_paths:
+        inline = extract_inline_desc(fp)
+        if inline:
+            results[fp] = inline
+            set_cached_desc(fp, inline)
+            continue
+        cached = get_cached_desc(fp)
+        if cached:
+            results[fp] = cached
+            continue
+        to_describe.append(fp)
+
+    if not to_describe:
+        return results
+
+    with ThreadPoolExecutor(max_workers=min(_MAX_WORKERS, len(to_describe))) as pool:
+        futures = {pool.submit(describe_file, fp, config): fp for fp in to_describe}
+        for future in as_completed(futures):
+            fp = futures[future]
+            try:
+                results[fp] = future.result()
+            except Exception:
+                results[fp] = "(description unavailable)"
+
+    return results
diff --git a/folder_desc/tools.py b/folder_desc/tools.py
@@ -0,0 +1,41 @@
+"""Self-registering GetFolderDescription tool."""
+from __future__ import annotations
+
+from tool_registry import ToolDef, register_tool
+from folder_desc.tree import get_folder_description
+
+_SCHEMA = {
+    "name": "GetFolderDescription",
+    "description": (
+        "Return a recursive tree of code files in a folder with their [desc] one-line "
+        "descriptions. If descriptions are missing, they are generated automatically "
+        "(parallel LLM calls) before the tree is returned. Useful for understanding a "
+        "codebase at a glance."
+    ),
+    "input_schema": {
+        "type": "object",
+        "properties": {
+            "folder_path": {
+                "type": "string",
+                "description": "Absolute path to the folder to describe",
+            },
+        },
+        "required": ["folder_path"],
+    },
+}
+
+
+def _get_folder_description(params: dict, config: dict) -> str:
+    folder_path = params.get("folder_path", "")
+    if not folder_path:
+        return "Error: missing required parameter 'folder_path'"
+    return get_folder_description(folder_path, config)
+
+
+register_tool(ToolDef(
+    name="GetFolderDescription",
+    schema=_SCHEMA,
+    func=_get_folder_description,
+    read_only=True,
+    concurrent_safe=True,
+))
diff --git a/folder_desc/tree.py b/folder_desc/tree.py
@@ -0,0 +1,111 @@
+"""Recursive directory tree builder with file descriptions."""
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+from folder_desc.describer import describe_files_parallel
+
+SKIP_DIRS = {
+    ".git", "__pycache__", ".venv", "venv", "node_modules", ".tox",
+    ".mypy_cache", ".pytest_cache", ".ruff_cache", "dist", "build",
+    ".egg-info", ".eggs", ".nano_claude",
+}
+
+CODE_EXTENSIONS = {
+    ".py", ".js", ".ts", ".tsx", ".jsx", ".java", ".go", ".rs", ".rb",
+    ".c", ".cpp", ".h", ".hpp", ".cs", ".php", ".swift", ".kt",
+    ".sh", ".bash", ".zsh", ".ps1", ".bat", ".cmd",
+    ".yaml", ".yml", ".toml", ".json", ".xml", ".ini", ".cfg",
+    ".md", ".rst", ".txt",
+    ".html", ".css", ".scss", ".less",
+    ".sql", ".r", ".R", ".lua", ".zig", ".nim",
+    ".dockerfile", ".Dockerfile",
+}
+
+MAX_FILES = 500
+
+
+def _is_code_file(path: Path) -> bool:
+    if path.suffix.lower() in CODE_EXTENSIONS:
+        return True
+    if path.name in ("Makefile", "Dockerfile", "Jenkinsfile", "Procfile", ".gitignore"):
+        return True
+    return False
+
+
+def _collect_files(folder: Path) -> list[Path]:
+    files: list[Path] = []
+
+    def _walk(current: Path, depth: int = 0) -> None:
+        if depth > 10 or len(files) >= MAX_FILES:
+            return
+        try:
+            entries = sorted(current.iterdir(), key=lambda e: (not e.is_dir(), e.name.lower()))
+        except OSError:
+            return
+        for entry in entries:
+            if entry.is_dir():
+                if entry.name in SKIP_DIRS or entry.name.startswith("."):
+                    continue
+                _walk(entry, depth + 1)
+            elif entry.is_file() and _is_code_file(entry):
+                files.append(entry)
+
+    _walk(folder)
+    return files
+
+
+def _build_tree_string(folder: Path, descriptions: dict[str, str]) -> str:
+    lines: list[str] = []
+    folder_str = str(folder)
+
+    def _walk(current: Path, prefix: str = "", depth: int = 0) -> None:
+        if depth > 10:
+            return
+        try:
+            entries = sorted(current.iterdir(), key=lambda e: (not e.is_dir(), e.name.lower()))
+        except OSError:
+            return
+
+        visible = []
+        for entry in entries:
+            if entry.is_dir():
+                if entry.name in SKIP_DIRS or entry.name.startswith("."):
+                    continue
+                visible.append(entry)
+            elif entry.is_file() and _is_code_file(entry):
+                visible.append(entry)
+
+        for i, entry in enumerate(visible):
+            is_last = i == len(visible) - 1
+            connector = "`-- " if is_last else "|-- "
+            child_prefix = prefix + ("    " if is_last else "|   ")
+
+            if entry.is_dir():
+                lines.append(f"{prefix}{connector}{entry.name}/")
+                _walk(entry, child_prefix, depth + 1)
+            else:
+                desc = descriptions.get(str(entry), "")
+                desc_tag = f"  [desc] {desc} [/desc]" if desc else ""
+                lines.append(f"{prefix}{connector}{entry.name}{desc_tag}")
+
+    lines.append(f"{folder.name}/")
+    _walk(folder)
+    return "\n".join(lines)
+
+
+def get_folder_description(folder_path: str, config: dict | None = None) -> str:
+    folder = Path(folder_path)
+    if not folder.is_dir():
+        return f"Error: {folder_path} is not a directory"
+
+    files = _collect_files(folder)
+    if not files:
+        return f"{folder.name}/ (empty or no code files found)"
+
+    file_paths = [str(f) for f in files]
+    descriptions = describe_files_parallel(file_paths, config)
+    tree = _build_tree_string(folder, descriptions)
+
+    return f"{len(files)} code files found.\n\n{tree}"