From d32345edeb6fff3cd8ce8861ea3497712a767d41 Mon Sep 17 00:00:00 2001
From: John Donalson <11264689+m1rl0k@users.noreply.github.com>
Date: Tue, 2 Dec 2025 18:42:10 -0500
Subject: [PATCH] Info-Request

---
 .env                          |  17 ++
 .env.example                  |   9 +
 .gitignore                    |   2 +
 scripts/mcp_indexer_server.py | 332 ++++++++++++++++++++++++++++++++++
 4 files changed, 360 insertions(+)

diff --git a/.env b/.env
index 802b6ea8..ee501184 100644
--- a/.env
+++ b/.env
@@ -169,3 +169,20 @@ USE_GPU_DECODER=0
 
 # Development Remote Upload Configuration
 HOST_INDEX_PATH=./dev-workspace
+
+# Cross-Codebase Isolation (multi-repo search scoping)
+# When enabled, search results are automatically filtered to the current repo
+REPO_AUTO_FILTER=1
+# Explicitly set current repo (overrides auto-detection from git/directory)
+# CURRENT_REPO=
+
+# Post-rerank symbol boost: ensures exact symbol matches rank highest
+POST_RERANK_SYMBOL_BOOST=1.0
+# Rerank blend weight: ratio of rerank score vs fusion score (0.0-1.0)
+RERANK_BLEND_WEIGHT=0.6
+
+# info_request() tool settings (simplified codebase retrieval)
+INFO_REQUEST_LIMIT=10
+INFO_REQUEST_CONTEXT_LINES=5
+# INFO_REQUEST_EXPLAIN_DEFAULT=0
+# INFO_REQUEST_RELATIONSHIPS=0
diff --git a/.env.example b/.env.example
index 0e918238..cd8375c2 100644
--- a/.env.example
+++ b/.env.example
@@ -200,3 +200,12 @@ REFRAG_COMMIT_DESCRIBE=1
 
 STRICT_MEMORY_RESTORE=0
 
+# info_request() tool settings (simplified codebase retrieval)
+# Default result limit for info_request queries
+INFO_REQUEST_LIMIT=10
+# Default context lines in snippets (richer than repo_search default)
+INFO_REQUEST_CONTEXT_LINES=5
+# Enable explanation mode by default (summary, primary_locations, related_concepts)
+# INFO_REQUEST_EXPLAIN_DEFAULT=0
+# Enable relationship mapping by default (imports_from, calls, related_paths)
+# INFO_REQUEST_RELATIONSHIPS=0
diff --git a/.gitignore b/.gitignore
index 4c548379..1e578035 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,3 +32,5 @@ CLAUDE.md
 /.augment
 /dev-workspace/*
 !/dev-workspace/.gitkeep
+docs/AUGMENT_COMPATIBILITY.md
+.serena/project.yml
diff --git a/scripts/mcp_indexer_server.py b/scripts/mcp_indexer_server.py
index af96c533..c912c721 100644
--- a/scripts/mcp_indexer_server.py
+++ b/scripts/mcp_indexer_server.py
@@ -7503,6 +7503,338 @@ async def code_search(
     )
 
 
+# ---------------------------------------------------------------------------
+# info_request: Simplified codebase retrieval with explanation mode
+# ---------------------------------------------------------------------------
+
+
+def _extract_symbols_from_query(query: str) -> list[str]:
+    """Extract potential symbol names from a query string."""
+    import re
+    # Match CamelCase, snake_case, or standalone words that look like identifiers
+    patterns = [
+        r'\b[A-Z][a-z]+(?:[A-Z][a-z]+)+\b',  # CamelCase
+        r'\b[a-z_][a-z0-9_]*(?:_[a-z0-9]+)+\b',  # snake_case
+        r'\b(?:def|class|function|method|async)\s+(\w+)',  # explicit mentions
+    ]
+    symbols = set()
+    for pat in patterns:
+        for m in re.finditer(pat, query):
+            sym = m.group(1) if m.lastindex else m.group(0)
+            if len(sym) > 2:
+                symbols.add(sym)
+    return list(symbols)[:5]  # Limit to top 5
+
+
+def _extract_related_concepts(query: str, results: list) -> list[str]:
+    """Extract related technical concepts dynamically from results (codebase-agnostic)."""
+    import re
+    concepts = set()
+
+    # Extract from results - this works on any codebase
+    for r in results[:10]:
+        # From symbols: split CamelCase/snake_case into meaningful parts
+        sym = r.get("symbol", "") or ""
+        if sym and len(sym) > 2:
+            parts = [p for p in re.split(r'(?=[A-Z])|_|-', sym) if p and len(p) > 2]
+            for part in parts[:3]:
+                concepts.add(part.lower())
+
+        # From file paths: extract directory/module names
+        path = r.get("path", "") or ""
+        if path:
+            path_parts = path.replace("\\", "/").split("/")
+            for pp in path_parts[-3:]:  # Last 3 path segments
+                # Remove extension and split
+                name = pp.rsplit(".", 1)[0] if "." in pp else pp
+                if name and len(name) > 2 and not name.startswith("_"):
+                    concepts.add(name.lower())
+
+        # From kind: function, class, method, etc.
+        kind = r.get("kind", "") or ""
+        if kind and len(kind) > 2:
+            concepts.add(kind.lower())
+
+    # From query: extract significant words (skip common words)
+    skip_words = {"the", "is", "are", "how", "does", "what", "where", "find", "get", "set", "for", "and", "with"}
+    query_parts = re.split(r'\W+', query.lower())
+    for qp in query_parts:
+        if qp and len(qp) > 2 and qp not in skip_words:
+            concepts.add(qp)
+
+    # Sort by frequency in results for relevance
+    return list(concepts)[:10]
+
+
+def _format_information_field(result: dict) -> str:
+    """Generate human-readable information field for a result."""
+    path = result.get("path", "")
+    symbol = result.get("symbol", "")
+    start = result.get("start_line", 0)
+    end = result.get("end_line", 0)
+    kind = result.get("kind", "")
+
+    # Get just the filename
+    filename = path.split("/")[-1] if "/" in path else path
+
+    if symbol and kind:
+        return f"Found {kind} '{symbol}' in {filename} (lines {start}-{end})"
+    elif symbol:
+        return f"Found '{symbol}' in {filename} (lines {start}-{end})"
+    else:
+        return f"Found match in {filename} (lines {start}-{end})"
+
+
+def _extract_relationships(result: dict) -> dict:
+    """Extract relationship metadata (imports, calls) from a result."""
+    relations = result.get("relations") or {}
+    # Get from relations object if present
+    imports = relations.get("imports") or []
+    calls = relations.get("calls") or []
+    symbol_path = relations.get("symbol_path") or ""
+    # Also check top-level metadata (fallback)
+    if not imports:
+        imports = result.get("imports") or []
+    if not calls:
+        calls = result.get("calls") or []
+    # Get related paths if available
+    related_paths = result.get("related_paths") or []
+
+    return {
+        "imports_from": imports[:10] if imports else [],  # Limit to 10
+        "calls": calls[:10] if calls else [],
+        "symbol_path": symbol_path,
+        "related_paths": related_paths[:5] if related_paths else [],
+    }
+
+
+def _calculate_confidence(query: str, results: list) -> dict:
+    """Calculate confidence metrics for the search."""
+    if not results:
+        return {"level": "none", "score": 0.0, "reason": "no_results"}
+
+    avg_score = sum(r.get("score", 0) for r in results) / len(results)
+    top_score = results[0].get("score", 0) if results else 0
+
+    # Check if query terms match symbols
+    query_tokens = set(_split_ident(query.lower()))
+    symbol_matches = sum(
+        1 for r in results[:5]
+        if any(tok in _split_ident((r.get("symbol", "") or "").lower())
+               for tok in query_tokens)
+    )
+
+    if top_score > 0.8 and symbol_matches > 0:
+        level = "high"
+    elif avg_score > 0.6:
+        level = "medium"
+    elif results:
+        level = "low"
+    else:
+        level = "none"
+
+    return {
+        "level": level,
+        "score": round(avg_score, 3),
+        "top_score": round(top_score, 3),
+        "symbol_matches": symbol_matches,
+    }
+
+
+@mcp.tool()
+async def info_request(
+    # Primary parameter
+    info_request: str = None,
+    information_request: str = None,  # Alias
+    # Explanation mode
+    include_explanation: bool = None,
+    # Relationship mapping
+    include_relationships: bool = None,
+    # Optional filters (pass-through to repo_search)
+    limit: int = None,
+    language: str = None,
+    under: str = None,
+    repo: Any = None,
+    path_glob: Any = None,
+    # Additional options
+    include_snippet: bool = None,
+    context_lines: int = None,
+    kwargs: Any = None,
+) -> Dict[str, Any]:
+    """Simplified codebase retrieval with optional explanation mode.
+
+    When to use:
+    - Simple, single-parameter code search with human-readable descriptions
+    - When you want optional explanation mode for richer context
+    - Drop-in replacement for basic codebase retrieval tools
+
+    Key parameters:
+    - info_request: str. Natural language description of the code you're looking for.
+    - information_request: str. Alias for info_request.
+    - include_explanation: bool (default false). Add summary, primary_locations, related_concepts.
+    - include_relationships: bool (default false). Add imports_from, calls, related_paths to results.
+    - limit: int (default 10). Maximum results to return.
+    - language: str. Filter by programming language.
+    - under: str. Limit search to specific directory.
+    - repo: str or list[str]. Filter by repository name(s).
+
+    Returns:
+    - Compact mode (default): results with information field and relevance_score alias
+    - Explanation mode: adds summary, primary_locations, related_concepts, query_understanding
+
+    Example:
+    - {"info_request": "database connection pooling"}
+    - {"info_request": "authentication middleware", "include_explanation": true}
+    """
+    # Resolve query from either parameter
+    query = info_request or information_request
+    if not query or not str(query).strip():
+        return {"ok": False, "error": "info_request parameter is required", "results": []}
+    query = str(query).strip()
+
+    # Resolve defaults from env
+    _default_limit = safe_int(
+        os.environ.get("INFO_REQUEST_LIMIT", "10"), default=10, logger=logger
+    )
+    _default_context = safe_int(
+        os.environ.get("INFO_REQUEST_CONTEXT_LINES", "5"), default=5, logger=logger
+    )
+    _default_explain = str(
+        os.environ.get("INFO_REQUEST_EXPLAIN_DEFAULT", "0")
+    ).strip().lower() in {"1", "true", "yes", "on"}
+    _default_relationships = str(
+        os.environ.get("INFO_REQUEST_RELATIONSHIPS", "0")
+    ).strip().lower() in {"1", "true", "yes", "on"}
+
+    # Apply defaults
+    eff_limit = limit if limit is not None else _default_limit
+    eff_context = context_lines if context_lines is not None else _default_context
+    eff_snippet = include_snippet if include_snippet is not None else True
+    eff_explain = include_explanation if include_explanation is not None else _default_explain
+    eff_relationships = include_relationships if include_relationships is not None else _default_relationships
+
+    # Smart limits based on query characteristics (only if user didn't override)
+    if limit is None:
+        query_words = len(query.split())
+        query_lower = query.lower()
+        if query_words <= 2:  # Short query like "auth handler"
+            eff_limit = 15  # More results for broad queries
+        elif "how does" in query_lower or "what is" in query_lower:
+            eff_limit = 8   # Questions need focused results
+
+    # Call repo_search
+    search_result = await repo_search(
+        query=query,
+        limit=eff_limit,
+        per_path=3,  # Better default for info requests
+        include_snippet=eff_snippet,
+        context_lines=eff_context,
+        language=language,
+        under=under,
+        repo=repo,
+        path_glob=path_glob,
+        kwargs=kwargs,
+    )
+
+    # Extract results
+    results = search_result.get("results", [])
+    total = search_result.get("total", len(results))
+    used_rerank = search_result.get("used_rerank", False)
+
+    # Enhance each result with information field and optional relationships
+    enhanced_results = []
+    for r in results:
+        enhanced = dict(r)
+        enhanced["information"] = _format_information_field(r)
+        enhanced["relevance_score"] = r.get("score", 0.0)  # Alias
+        # Add relationships if requested
+        if eff_relationships:
+            enhanced["relationships"] = _extract_relationships(r)
+        enhanced_results.append(enhanced)
+
+    # Build better search strategy string
+    strategy_parts = ["hybrid"]
+    if used_rerank:
+        strategy_parts.append("rerank")
+    if repo:
+        strategy_parts.append("repo_filtered")
+    if language:
+        strategy_parts.append(f"lang:{language}")
+    if under:
+        strategy_parts.append("path_filtered")
+    search_strategy = "+".join(strategy_parts)
+
+    # Build response
+    response: Dict[str, Any] = {
+        "ok": True,
+        "results": enhanced_results,
+        "total": total,
+        "search_strategy": search_strategy,
+    }
+
+    # Add explanation if requested
+    if eff_explain:
+        # Primary locations: unique file paths
+        seen_paths = set()
+        primary_locations = []
+        for r in results:
+            p = r.get("path", "")
+            if p and p not in seen_paths:
+                seen_paths.add(p)
+                primary_locations.append(p)
+                if len(primary_locations) >= 5:
+                    break
+
+        # Related concepts
+        related_concepts = _extract_related_concepts(query, results)
+
+        # Detected symbols from query
+        detected_symbols = _extract_symbols_from_query(query)
+
+        # Summary
+        n_files = len(seen_paths)
+        summary = f"Found {total} results related to '{query}' across {n_files} file{'s' if n_files != 1 else ''}"
+
+        # Group results by file
+        files_map: Dict[str, list] = {}
+        for r in enhanced_results:
+            p = r.get("path", "")
+            if p not in files_map:
+                files_map[p] = []
+            files_map[p].append({
+                "symbol": r.get("symbol", ""),
+                "line": r.get("start_line", 0),
+                "score": r.get("score", 0.0),
+            })
+
+        grouped_results = {
+            "by_file": {
+                path: {
+                    "count": len(items),
+                    "top_symbols": [i["symbol"] for i in sorted(items, key=lambda x: -x["score"])[:3] if i["symbol"]],
+                }
+                for path, items in files_map.items()
+            }
+        }
+
+        # Calculate confidence
+        confidence = _calculate_confidence(query, enhanced_results)
+
+        response["summary"] = summary
+        response["primary_locations"] = primary_locations
+        response["related_concepts"] = related_concepts
+        response["grouped_results"] = grouped_results
+        response["confidence"] = confidence
+        response["query_understanding"] = {
+            "intent": "search_for_code",
+            "detected_language": language or None,
+            "detected_symbols": detected_symbols,
+            "search_strategy": search_strategy,
+        }
+
+    return response
+
+
 _relax_var_kwarg_defaults()
 
 if __name__ == "__main__":