Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,20 @@ USE_GPU_DECODER=0

# Development Remote Upload Configuration
HOST_INDEX_PATH=./dev-workspace

# Cross-Codebase Isolation (multi-repo search scoping)
# When enabled, search results are automatically filtered to the current repo
REPO_AUTO_FILTER=1
# Explicitly set current repo (overrides auto-detection from git/directory)
# CURRENT_REPO=

# Post-rerank symbol boost: ensures exact symbol matches rank highest
POST_RERANK_SYMBOL_BOOST=1.0
# Rerank blend weight: ratio of rerank score vs fusion score (0.0-1.0)
RERANK_BLEND_WEIGHT=0.6

# info_request() tool settings (simplified codebase retrieval)
INFO_REQUEST_LIMIT=10
INFO_REQUEST_CONTEXT_LINES=5
# INFO_REQUEST_EXPLAIN_DEFAULT=0
# INFO_REQUEST_RELATIONSHIPS=0
9 changes: 9 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,12 @@ REFRAG_COMMIT_DESCRIBE=1

STRICT_MEMORY_RESTORE=0

# info_request() tool settings (simplified codebase retrieval)
# Default result limit for info_request queries
INFO_REQUEST_LIMIT=10
# Default context lines in snippets (richer than repo_search default)
INFO_REQUEST_CONTEXT_LINES=5
# Enable explanation mode by default (summary, primary_locations, related_concepts)
# INFO_REQUEST_EXPLAIN_DEFAULT=0
# Enable relationship mapping by default (imports_from, calls, related_paths)
# INFO_REQUEST_RELATIONSHIPS=0
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,5 @@ CLAUDE.md
/.augment
/dev-workspace/*
!/dev-workspace/.gitkeep
docs/AUGMENT_COMPATIBILITY.md
.serena/project.yml
332 changes: 332 additions & 0 deletions scripts/mcp_indexer_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7503,6 +7503,338 @@ async def code_search(
)


# ---------------------------------------------------------------------------
# info_request: Simplified codebase retrieval with explanation mode
# ---------------------------------------------------------------------------


def _extract_symbols_from_query(query: str) -> list[str]:
"""Extract potential symbol names from a query string."""
import re
# Match CamelCase, snake_case, or standalone words that look like identifiers
patterns = [
r'\b[A-Z][a-z]+(?:[A-Z][a-z]+)+\b', # CamelCase
r'\b[a-z_][a-z0-9_]*(?:_[a-z0-9]+)+\b', # snake_case
r'\b(?:def|class|function|method|async)\s+(\w+)', # explicit mentions
]
symbols = set()
for pat in patterns:
for m in re.finditer(pat, query):
sym = m.group(1) if m.lastindex else m.group(0)
if len(sym) > 2:
symbols.add(sym)
return list(symbols)[:5] # Limit to top 5


def _extract_related_concepts(query: str, results: list) -> list[str]:
"""Extract related technical concepts dynamically from results (codebase-agnostic)."""
import re
concepts = set()

# Extract from results - this works on any codebase
for r in results[:10]:
# From symbols: split CamelCase/snake_case into meaningful parts
sym = r.get("symbol", "") or ""
if sym and len(sym) > 2:
parts = [p for p in re.split(r'(?=[A-Z])|_|-', sym) if p and len(p) > 2]
for part in parts[:3]:
concepts.add(part.lower())

# From file paths: extract directory/module names
path = r.get("path", "") or ""
if path:
path_parts = path.replace("\\", "/").split("/")
for pp in path_parts[-3:]: # Last 3 path segments
# Remove extension and split
name = pp.rsplit(".", 1)[0] if "." in pp else pp
if name and len(name) > 2 and not name.startswith("_"):
concepts.add(name.lower())

# From kind: function, class, method, etc.
kind = r.get("kind", "") or ""
if kind and len(kind) > 2:
concepts.add(kind.lower())

# From query: extract significant words (skip common words)
skip_words = {"the", "is", "are", "how", "does", "what", "where", "find", "get", "set", "for", "and", "with"}
query_parts = re.split(r'\W+', query.lower())
for qp in query_parts:
if qp and len(qp) > 2 and qp not in skip_words:
concepts.add(qp)

# Sort by frequency in results for relevance
return list(concepts)[:10]


def _format_information_field(result: dict) -> str:
"""Generate human-readable information field for a result."""
path = result.get("path", "")
symbol = result.get("symbol", "")
start = result.get("start_line", 0)
end = result.get("end_line", 0)
kind = result.get("kind", "")

# Get just the filename
filename = path.split("/")[-1] if "/" in path else path

if symbol and kind:
return f"Found {kind} '{symbol}' in {filename} (lines {start}-{end})"
elif symbol:
return f"Found '{symbol}' in {filename} (lines {start}-{end})"
else:
return f"Found match in {filename} (lines {start}-{end})"


def _extract_relationships(result: dict) -> dict:
"""Extract relationship metadata (imports, calls) from a result."""
relations = result.get("relations") or {}
# Get from relations object if present
imports = relations.get("imports") or []
calls = relations.get("calls") or []
symbol_path = relations.get("symbol_path") or ""
# Also check top-level metadata (fallback)
if not imports:
imports = result.get("imports") or []
if not calls:
calls = result.get("calls") or []
# Get related paths if available
related_paths = result.get("related_paths") or []

return {
"imports_from": imports[:10] if imports else [], # Limit to 10
"calls": calls[:10] if calls else [],
"symbol_path": symbol_path,
"related_paths": related_paths[:5] if related_paths else [],
}


def _calculate_confidence(query: str, results: list) -> dict:
"""Calculate confidence metrics for the search."""
if not results:
return {"level": "none", "score": 0.0, "reason": "no_results"}

avg_score = sum(r.get("score", 0) for r in results) / len(results)
top_score = results[0].get("score", 0) if results else 0

# Check if query terms match symbols
query_tokens = set(_split_ident(query.lower()))
symbol_matches = sum(
1 for r in results[:5]
if any(tok in _split_ident((r.get("symbol", "") or "").lower())
for tok in query_tokens)
)

if top_score > 0.8 and symbol_matches > 0:
level = "high"
elif avg_score > 0.6:
level = "medium"
elif results:
level = "low"
else:
level = "none"

return {
"level": level,
"score": round(avg_score, 3),
"top_score": round(top_score, 3),
"symbol_matches": symbol_matches,
}


@mcp.tool()
async def info_request(
# Primary parameter
info_request: str = None,
information_request: str = None, # Alias
# Explanation mode
include_explanation: bool = None,
# Relationship mapping
include_relationships: bool = None,
# Optional filters (pass-through to repo_search)
limit: int = None,
language: str = None,
under: str = None,
repo: Any = None,
path_glob: Any = None,
# Additional options
include_snippet: bool = None,
context_lines: int = None,
kwargs: Any = None,
) -> Dict[str, Any]:
"""Simplified codebase retrieval with optional explanation mode.

When to use:
- Simple, single-parameter code search with human-readable descriptions
- When you want optional explanation mode for richer context
- Drop-in replacement for basic codebase retrieval tools

Key parameters:
- info_request: str. Natural language description of the code you're looking for.
- information_request: str. Alias for info_request.
- include_explanation: bool (default false). Add summary, primary_locations, related_concepts.
- include_relationships: bool (default false). Add imports_from, calls, related_paths to results.
- limit: int (default 10). Maximum results to return.
- language: str. Filter by programming language.
- under: str. Limit search to specific directory.
- repo: str or list[str]. Filter by repository name(s).

Returns:
- Compact mode (default): results with information field and relevance_score alias
- Explanation mode: adds summary, primary_locations, related_concepts, query_understanding

Example:
- {"info_request": "database connection pooling"}
- {"info_request": "authentication middleware", "include_explanation": true}
"""
# Resolve query from either parameter
query = info_request or information_request
if not query or not str(query).strip():
return {"ok": False, "error": "info_request parameter is required", "results": []}
query = str(query).strip()

# Resolve defaults from env
_default_limit = safe_int(
os.environ.get("INFO_REQUEST_LIMIT", "10"), default=10, logger=logger
)
_default_context = safe_int(
os.environ.get("INFO_REQUEST_CONTEXT_LINES", "5"), default=5, logger=logger
)
_default_explain = str(
os.environ.get("INFO_REQUEST_EXPLAIN_DEFAULT", "0")
).strip().lower() in {"1", "true", "yes", "on"}
_default_relationships = str(
os.environ.get("INFO_REQUEST_RELATIONSHIPS", "0")
).strip().lower() in {"1", "true", "yes", "on"}

# Apply defaults
eff_limit = limit if limit is not None else _default_limit
eff_context = context_lines if context_lines is not None else _default_context
eff_snippet = include_snippet if include_snippet is not None else True
eff_explain = include_explanation if include_explanation is not None else _default_explain
eff_relationships = include_relationships if include_relationships is not None else _default_relationships

# Smart limits based on query characteristics (only if user didn't override)
if limit is None:
query_words = len(query.split())
query_lower = query.lower()
if query_words <= 2: # Short query like "auth handler"
eff_limit = 15 # More results for broad queries
elif "how does" in query_lower or "what is" in query_lower:
eff_limit = 8 # Questions need focused results

# Call repo_search
search_result = await repo_search(
query=query,
limit=eff_limit,
per_path=3, # Better default for info requests
include_snippet=eff_snippet,
context_lines=eff_context,
language=language,
under=under,
repo=repo,
path_glob=path_glob,
kwargs=kwargs,
)

# Extract results
results = search_result.get("results", [])
total = search_result.get("total", len(results))
used_rerank = search_result.get("used_rerank", False)

# Enhance each result with information field and optional relationships
enhanced_results = []
for r in results:
enhanced = dict(r)
enhanced["information"] = _format_information_field(r)
enhanced["relevance_score"] = r.get("score", 0.0) # Alias
# Add relationships if requested
if eff_relationships:
enhanced["relationships"] = _extract_relationships(r)
enhanced_results.append(enhanced)

# Build better search strategy string
strategy_parts = ["hybrid"]
if used_rerank:
strategy_parts.append("rerank")
if repo:
strategy_parts.append("repo_filtered")
if language:
strategy_parts.append(f"lang:{language}")
if under:
strategy_parts.append("path_filtered")
search_strategy = "+".join(strategy_parts)

# Build response
response: Dict[str, Any] = {
"ok": True,
"results": enhanced_results,
"total": total,
"search_strategy": search_strategy,
}

# Add explanation if requested
if eff_explain:
# Primary locations: unique file paths
seen_paths = set()
primary_locations = []
for r in results:
p = r.get("path", "")
if p and p not in seen_paths:
seen_paths.add(p)
primary_locations.append(p)
if len(primary_locations) >= 5:
break

# Related concepts
related_concepts = _extract_related_concepts(query, results)

# Detected symbols from query
detected_symbols = _extract_symbols_from_query(query)

# Summary
n_files = len(seen_paths)
summary = f"Found {total} results related to '{query}' across {n_files} file{'s' if n_files != 1 else ''}"

# Group results by file
files_map: Dict[str, list] = {}
for r in enhanced_results:
p = r.get("path", "")
if p not in files_map:
files_map[p] = []
files_map[p].append({
"symbol": r.get("symbol", ""),
"line": r.get("start_line", 0),
"score": r.get("score", 0.0),
})

grouped_results = {
"by_file": {
path: {
"count": len(items),
"top_symbols": [i["symbol"] for i in sorted(items, key=lambda x: -x["score"])[:3] if i["symbol"]],
}
for path, items in files_map.items()
}
}

# Calculate confidence
confidence = _calculate_confidence(query, enhanced_results)

response["summary"] = summary
response["primary_locations"] = primary_locations
response["related_concepts"] = related_concepts
response["grouped_results"] = grouped_results
response["confidence"] = confidence
response["query_understanding"] = {
"intent": "search_for_code",
"detected_language": language or None,
"detected_symbols": detected_symbols,
"search_strategy": search_strategy,
}

return response


_relax_var_kwarg_defaults()

if __name__ == "__main__":
Expand Down
Loading