diff --git a/.cursorrules b/.cursorrules new file mode 100644 index 00000000..486385ff --- /dev/null +++ b/.cursorrules @@ -0,0 +1,664 @@ +# AI Agent Rules for Context-Engine MCP Tools + +> **These rules are NOT optional.** Favor MCP Qdrant-Indexer tooling at all costs over grep, file reading, and unrelated codebase search tools. + +## STOP — Read This First + +**DO NOT use `Read File`, `grep`, `ripgrep`, `cat`, `find`, or any filesystem search tool for code exploration.** +These tools exist in your IDE but they are WRONG for this codebase. You have MCP tools that are faster, smarter, and return ranked, contextual results. + +**If you catch yourself about to `Read` a file to understand it** → use `repo_search` or `context_answer` instead. +**If you catch yourself about to `grep` for a symbol** → use `symbol_graph` or `search_callers_for` instead. +**If you catch yourself about to `grep -r` for a concept** → use `repo_search` with a natural language query instead. + +The ONLY acceptable use of `grep`/`Read` is confirming an exact literal string you already know exists (e.g., an env var name like `REDIS_HOST`). + +## Introduction + +This document defines requirements for AI agents using Context-Engine's MCP tools. The system provides two MCP servers (Memory Server on port 8000/8002, Indexer Server on port 8001/8003) with 30+ specialized tools for semantic code search, memory storage, and codebase exploration. + +**Core Principle:** MCP Qdrant-Indexer tools are PRIMARY for exploring code and history. Start with MCP for exploration, debugging, or "where/why" questions; use literal search/file-open only for narrow exact-literal lookups. + +## Glossary + +- **MCP**: Model Context Protocol - standardized interface for exposing tools to AI agents +- **Indexer Server**: MCP server for code search, indexing, symbol graphs (port 8001 SSE, 8003 HTTP) +- **Memory Server**: MCP server for knowledge storage and retrieval (port 8000 SSE, 8002 HTTP) +- **Hybrid Search**: Dense semantic vectors + lexical BM25 + neural reranking (ONNX) +- **ReFRAG**: Micro-chunking with 16-24 token windows for precise code retrieval +- **TOON**: Token-Oriented Object Notation - compact output format (60-80% token reduction) +- **Symbol Graph**: Indexed metadata for calls, imports, and definitions navigation +- **Collection**: Qdrant vector database collection storing indexed code chunks + +## Requirements + +### Requirement 1: MCP-First Tool Selection + +**User Story:** As an AI agent, I want to prioritize MCP tools over grep/file-reading, so that I get semantic understanding efficiently. + +#### Acceptance Criteria + +1. WHEN exploring code or answering "where/why" questions, THE Agent SHALL use MCP Indexer tools as the primary method +2. WHEN the agent needs semantic understanding, cross-file relationships, or ranked results with context, THE Agent SHALL use MCP tools +3. WHEN the agent knows an exact literal string AND only needs to confirm existence/location, THE Agent SHALL use grep or file-open +4. IF the agent is uncertain which approach to use, THEN THE Agent SHALL default to MCP tools +5. THE Agent SHALL NOT use `grep -r "auth"` for concepts (use MCP: "authentication mechanisms") + +### Requirement 2: Query Formulation + +**User Story:** As an AI agent, I want to write effective semantic queries, so that I retrieve relevant code spans. + +#### Acceptance Criteria + +1. WHEN writing queries for `repo_search`, THE Agent SHALL use short natural-language fragments (e.g., "database connection handling") +2. THE Agent SHALL NOT use boolean operators (OR, AND), regex syntax, or code patterns in semantic queries +3. WHEN searching broad concepts, THE Agent SHALL use descriptive phrases like "error reporting patterns" not `grep -r "error"` +4. THE Agent SHALL write queries as descriptions of what to find, not as literal code strings +5. WHEN searching for specific symbols, THE Agent SHALL use the `symbol` parameter alongside the query + +### Requirement 3: Performance Optimization + +**User Story:** As an AI agent, I want to minimize token usage and latency, so that I work efficiently within context limits. + +#### Acceptance Criteria + +1. WHEN starting discovery, THE Agent SHALL use `limit=3`, `compact=true`, `per_path=1` +2. WHEN needing implementation details, THE Agent SHALL increase to `limit=5`, `include_snippet=true` +3. WHEN token efficiency is critical, THE Agent SHALL use `output_format="toon"` for 60-80% reduction +4. THE Agent SHALL NOT use `limit=20` with `include_snippet=true` (excessive token waste) +5. THE Agent SHALL NOT use high `context_lines` for pure discovery (unnecessary tokens) +6. THE Agent SHALL fire independent tool calls in parallel (same message block) for 2-3x speedup +7. THE Agent SHALL prefer `output_format="toon"` as default for all discovery queries + +### Requirement 4: Core Search Tools + +**User Story:** As an AI agent, I want to use the right search tool for each task, so that I get optimal results. + +#### Acceptance Criteria + +1. WHEN finding relevant files/spans and inspecting code, THE Agent SHALL use `repo_search` or `code_search` +2. WHEN combining code hits with memory/docs, THE Agent SHALL use `context_search` with `include_memories=true` +3. WHEN needing natural-language explanations with citations, THE Agent SHALL use `context_answer` +4. WHEN needing quick discovery with summaries, THE Agent SHALL use `info_request` with `include_explanation=true` +5. WHEN finding structurally similar patterns across languages, THE Agent SHALL use `pattern_search` + +### Requirement 5: Symbol Graph Navigation (DEFAULT for all graph queries) + +**User Story:** As an AI agent, I want to navigate code relationships efficiently, so that I understand call graphs and dependencies. + +> **IMPORTANT:** `symbol_graph` is the DEFAULT and ALWAYS-AVAILABLE tool for graph queries. It works with the Qdrant-backed symbol index — no Neo4j required. Use `symbol_graph` FIRST for any caller/definition/importer query. Do NOT attempt `neo4j_graph_query` unless you know Neo4j is enabled. + +#### Acceptance Criteria + +1. WHEN finding who calls a function, THE Agent SHALL use `symbol_graph(symbol="name", query_type="callers")` +2. WHEN finding where a symbol is defined, THE Agent SHALL use `symbol_graph(symbol="name", query_type="definition")` +3. WHEN finding what imports a module, THE Agent SHALL use `symbol_graph(symbol="name", query_type="importers")` +4. THE Agent SHALL prefer `symbol_graph` over `search_callers_for` for structured navigation with metadata +5. THE Agent SHALL use `language` and `under` filters to narrow symbol graph results +6. WHEN needing multi-hop traversals, THE Agent SHALL use `symbol_graph` with `depth=2` or `depth=3` +7. **THE Agent SHALL default to `symbol_graph` for ALL graph/relationship queries.** It is always available regardless of Neo4j status. +8. THE Agent SHALL NOT attempt `neo4j_graph_query` unless the tool is visible in the MCP tool list (it only registers when `NEO4J_GRAPH=1`) + +### Requirement 5b: Neo4j Advanced Graph Queries (OPTIONAL — only when NEO4J_GRAPH=1) + +**User Story:** As an AI agent, I want to perform advanced graph traversals when Neo4j is available, so that I understand impact, dependencies, and circular references. + +> **NOTE:** The `neo4j_graph_query` tool is ONLY available when `NEO4J_GRAPH=1` is set. If this tool is not in your MCP tool list, it is NOT enabled — use `symbol_graph` instead for all graph queries. Do NOT error or warn about missing Neo4j; just use `symbol_graph`. + +#### Acceptance Criteria + +1. WHEN `neo4j_graph_query` IS available AND needing multi-hop callers, THE Agent SHALL use `neo4j_graph_query(query_type="transitive_callers", symbol="name", depth=2)` +2. WHEN `neo4j_graph_query` IS available AND analyzing "what would break if I change X?", THE Agent SHALL use `neo4j_graph_query(query_type="impact", symbol="name", depth=2)` +3. WHEN `neo4j_graph_query` IS available AND finding all dependencies, THE Agent SHALL use `neo4j_graph_query(query_type="dependencies", symbol="name")` +4. WHEN `neo4j_graph_query` IS available AND detecting circular dependencies, THE Agent SHALL use `neo4j_graph_query(query_type="cycles", symbol="name")` +5. WHEN `neo4j_graph_query` IS NOT available, THE Agent SHALL fall back to `symbol_graph` for callers/definitions/importers queries +6. THE Agent SHALL NEVER error or complain about Neo4j being unavailable — just use `symbol_graph` + +### Requirement 6: Specialized Search Tools + +**User Story:** As an AI agent, I want to use specialized tools for common search patterns, so that I find specific code types quickly. + +#### Acceptance Criteria + +1. WHEN finding test files, THE Agent SHALL use `search_tests_for` (preset test globs) +2. WHEN finding configuration files, THE Agent SHALL use `search_config_for` (preset config globs) +3. WHEN finding symbol usages heuristically, THE Agent SHALL use `search_callers_for` +4. WHEN finding import references, THE Agent SHALL use `search_importers_for` +5. THE Agent SHALL pass `language`, `under`, and `limit` filters to narrow specialized searches + +### Requirement 7: Context Answer Best Practices + +**User Story:** As an AI agent, I want high-quality explanations from context_answer, so that I understand code behavior accurately. + +#### Acceptance Criteria + +1. WHEN asking about specific modules, THE Agent SHALL mention filenames explicitly in queries +2. WHEN asking cross-file questions, THE Agent SHALL use behavior-describing queries without filenames +3. THE Agent SHALL use `budget_tokens` to control context size (default: MICRO_BUDGET_TOKENS env) +4. THE Agent SHALL set `temperature=0.2` or `temperature=0.3` for deterministic answers +5. THE Agent SHALL NOT use `context_answer` as a debugger for low-level helpers; prefer `repo_search` + direct reading + +### Requirement 8: Info Request Simplification + +**User Story:** As an AI agent, I want a simple interface for quick code discovery, so that I can find relevant code with minimal parameters. + +#### Acceptance Criteria + +1. WHEN needing simple search, THE Agent SHALL use `info_request(info_request="description")` +2. WHEN needing summaries and concepts, THE Agent SHALL set `include_explanation=true` +3. WHEN needing relationship data (imports/calls), THE Agent SHALL set `include_relationships=true` +4. THE Agent SHALL understand smart limits: 15 for short queries, 8 for questions, 10 default +5. THE Agent SHALL use `info_request` for quick discovery before deeper `repo_search` dives + +### Requirement 9: Pattern Search Usage + +**User Story:** As an AI agent, I want to find structurally similar code across languages, so that I detect patterns and duplication. + +#### Acceptance Criteria + +1. WHEN finding similar control flow, THE Agent SHALL use `pattern_search` with code examples OR descriptions +2. THE Agent SHALL use `query_mode="auto"` (default) to let the system detect code vs description +3. WHEN searching specific target languages, THE Agent SHALL use `target_languages` filter +4. THE Agent SHALL set `min_score=0.3` or higher to filter low-quality matches +5. IF pattern_search is unavailable (PATTERN_VECTORS!=1), THEN THE Agent SHALL fall back to `repo_search` + +### Requirement 10: Git History Integration + +**User Story:** As an AI agent, I want to understand code evolution, so that I answer "when/why did X change" questions. + +#### Acceptance Criteria + +1. WHEN finding current implementation, THE Agent SHALL first use `repo_search` to locate relevant files +2. WHEN summarizing recent changes, THE Agent SHALL use `change_history_for_path(path="...", include_commits=true)` +3. WHEN finding commits for specific behavior, THE Agent SHALL use `search_commits_for(query="behavior phrase", path="...")` +4. THE Agent SHALL read `lineage_goal`, `lineage_symbols`, `lineage_tags` from commit results +5. WHEN explaining current behavior after finding files, THE Agent SHALL use `context_answer` + +### Requirement 11: Memory System Usage + +**User Story:** As an AI agent, I want to store and retrieve knowledge effectively, so that I build on previous work. + +#### Acceptance Criteria + +1. WHEN storing code snippets, THE Agent SHALL use `memory_store` with metadata: `{code, language, path, kind="snippet"}` +2. WHEN storing explanations, THE Agent SHALL use `kind="explanation"` with `tags` and `topic` +3. THE Agent SHALL set `priority` (1-10) to indicate importance (higher = more important) +4. WHEN searching memories, THE Agent SHALL use `memory_find` with filters: `kind`, `language`, `tags`, `priority_min` +5. THE Agent SHALL use `context_search(include_memories=true)` to blend code + memory results + +### Requirement 12: Cross-Repo Search + +**User Story:** As an AI agent, I want to search across repositories effectively, so that I find code regardless of location. + +#### Acceptance Criteria + +1. WHEN searching a single repository, THE Agent SHALL use `repo="repo-name"` +2. WHEN searching multiple repositories, THE Agent SHALL use `repo=["frontend", "backend"]` +3. WHEN searching all indexed repositories, THE Agent SHALL use `repo="*"` +4. WHEN `repo` is omitted, THE Agent SHALL rely on auto-detection via CURRENT_REPO env (REPO_AUTO_FILTER=1) +5. THE Agent SHALL specify `collection` parameter when multiple collections exist + +### Requirement 13: Session Management + +**User Story:** As an AI agent, I want to maintain session context, so that I don't repeat parameters unnecessarily. + +#### Acceptance Criteria + +1. WHEN starting a session, THE Agent SHALL call `set_session_defaults` for both indexer and memory servers +2. THE Agent SHALL set default `collection` in session to avoid repeating it in every request +3. THE Agent SHALL understand precedence: explicit args > per-connection defaults > token defaults > env default +4. THE Agent SHALL use session tokens for cross-connection reuse when needed +5. THE Agent SHALL call `set_session_defaults(collection="...", output_format="toon", compact=true)` early + +### Requirement 14: Query Expansion Strategy + +**User Story:** As an AI agent, I want to use query expansion judiciously, so that I improve results without excessive overhead. + +#### Acceptance Criteria + +1. THE Agent SHALL attempt normal search BEFORE using `expand_query` or `expand=true` +2. WHEN initial search returns poor results, THE Agent SHALL use `expand=true` on `context_answer` +3. THE Agent SHALL use `max_new=2` or `max_new=3` for expansion (default 3) +4. THE Agent SHALL understand expansion uses local LLM (llama.cpp, GLM, or MiniMax) and is expensive +5. THE Agent SHALL treat `expand_query` as a last resort, not a default + +### Requirement 15: Error Handling + +**User Story:** As an AI agent, I want to handle errors gracefully, so that I recover and continue working. + +#### Acceptance Criteria + +1. WHEN MCP tools return responses, THE Agent SHALL check the `ok` field for success/failure +2. WHEN reranking times out, THE Agent SHALL accept fallback to hybrid-only results (still valid) +3. WHEN decoder is disabled, THE Agent SHALL accept `context_answer` returning citations without generated text +4. WHEN pattern_search is unavailable, THE Agent SHALL fall back to `repo_search` +5. THE Agent SHALL parse error messages and adjust parameters accordingly + +### Requirement 16: Indexing and Maintenance + +**User Story:** As an AI agent, I want to manage indexing operations, so that the codebase stays current. + +#### Acceptance Criteria + +1. WHEN indexing entire workspace, THE Agent SHALL use `qdrant_index_root` +2. WHEN indexing subdirectories, THE Agent SHALL use `qdrant_index(subdir="path")` +3. WHEN recreating collections from scratch, THE Agent SHALL use `recreate=true` +4. WHEN removing stale points (deleted files), THE Agent SHALL use `qdrant_prune` +5. WHEN checking index health, THE Agent SHALL use `qdrant_status` for count and timestamps + +### Requirement 17: Workspace Discovery + +**User Story:** As an AI agent, I want to discover available workspaces and collections, so that I target the right codebase. + +#### Acceptance Criteria + +1. WHEN listing all collections, THE Agent SHALL use `qdrant_list` +2. WHEN checking current workspace state, THE Agent SHALL use `workspace_info` +3. WHEN discovering multiple workspaces, THE Agent SHALL use `list_workspaces` +4. WHEN mapping collections to repos, THE Agent SHALL use `collection_map` +5. THE Agent SHALL understand workspace state includes: indexing_status, indexing_config, active_repo_slug + +### Requirement 18: Grep and File Read Anti-Patterns + +**User Story:** As an AI agent, I want to recognize when grep and file reading are inappropriate, so that I use semantic search instead. + +#### Acceptance Criteria + +1. THE Agent SHALL NOT use `grep -r "auth"` (use MCP: "authentication mechanisms") +2. THE Agent SHALL NOT use `grep -r "cache"` (use MCP: "caching strategies") +3. THE Agent SHALL NOT use `grep -r "error"` (use MCP: "error handling patterns") +4. THE Agent SHALL NOT use `grep -r "database"` (use MCP: "database operations") +5. THE Agent SHALL use grep ONLY for exact literals: `grep -rn "UserAlreadyExists"`, `grep -rn "REDIS_HOST"` +6. THE Agent SHALL NOT use `Read File` to understand what a file does — use `repo_search` or `context_answer` with the filename in the query +7. THE Agent SHALL NOT use `Read File` to find callers/imports — use `symbol_graph` instead +8. THE Agent SHALL NOT open files to "browse" the codebase — use `info_request` or `repo_search` for discovery +9. THE Agent SHALL NOT use `find` or `ls` to discover project structure — use `workspace_info` or `qdrant_status` +10. THE ONLY acceptable uses of `Read File` are: (a) editing a file you already located via MCP, (b) reading config files you know the exact path of + +### Requirement 19: Advanced Reranking Features + +**User Story:** As an AI agent, I want to leverage reranking effectively, so that I get the most relevant results. + +#### Acceptance Criteria + +1. THE Agent SHALL use `rerank_enabled=true` (default) for complex queries needing best relevance +2. WHEN faster results are acceptable, THE Agent SHALL set `rerank_enabled=false` +3. THE Agent SHALL understand results include `learning_score` and `refinement_iterations` from ONNX teacher +4. THE Agent SHALL use `rerank_top_n` to control candidate pool size (default 20) +5. THE Agent SHALL use `rerank_return_m` to control final result count after reranking + +### Requirement 20: Output Format and Token Efficiency + +**User Story:** As an AI agent, I want to choose appropriate output formats, so that I optimize token usage. + +#### Acceptance Criteria + +1. WHEN token efficiency is critical, THE Agent SHALL use `output_format="toon"` for 60-80% reduction +2. WHEN needing full structured data, THE Agent SHALL use `output_format="json"` (default) +3. WHEN using `compact=true`, THE Agent SHALL expect reduced result fields (path, symbol, lines, score) +4. THE Agent SHALL use `include_snippet=false` when only file/line references are needed +5. THE Agent SHALL combine `compact=true` + `limit=3` + `per_path=1` for minimal discovery queries + +--- + +## Tool Quick Reference + +### Search Tools +| Tool | Use Case | Key Parameters | +|------|----------|----------------| +| `repo_search` | General code search | `query`, `limit`, `compact`, `language`, `under`, `symbol` | +| `code_search` | Alias for repo_search | Same as repo_search | +| `context_search` | Code + memory blend | `include_memories`, `per_source_limits` | +| `context_answer` | NL explanations with citations | `query`, `budget_tokens`, `temperature` | +| `info_request` | Quick discovery (multi-granular) | `info_request`, `include_explanation` (use for broad architecture/overviews) | +| `pattern_search` | Structural similarity | `query`, `query_mode`, `target_languages` | + +### Navigation Tools +| Tool | Use Case | Key Parameters | Availability | +|------|----------|----------------|--------------| +| `symbol_graph` | Call/import/definition graphs (hydrated w/ snippets) — **DEFAULT for all graph queries** | `symbol`, `query_type`, `limit`, `depth` | **Always available** | +| `neo4j_graph_query` | Advanced traversals (impact, transitive, cycles) | `symbol`, `query_type`, `depth`, `limit` | Only when `NEO4J_GRAPH=1` | +| `search_callers_for` | Symbol usages (heuristic) | `query`, `language` | Always available | +| `search_importers_for` | Import references | `query`, `language` | Always available | +| `search_tests_for` | Test files | `query`, `limit` | Always available | +| `search_config_for` | Config files | `query`, `limit` | Always available | + +### History Tools +| Tool | Use Case | Key Parameters | +|------|----------|----------------| +| `change_history_for_path` | File change summary | `path`, `include_commits` | +| `search_commits_for` | Commit search | `query`, `path`, `limit` | + +### Memory Tools +| Tool | Use Case | Key Parameters | +|------|----------|----------------| +| `memory_store` | Store knowledge | `information`, `metadata` | +| `memory_find` | Search memories | `query`, `kind`, `language`, `tags` | + +### Admin Tools +| Tool | Use Case | Key Parameters | +|------|----------|----------------| +| `qdrant_index_root` | Index workspace | `recreate` | +| `qdrant_index` | Index subdirectory | `subdir`, `recreate` | +| `qdrant_prune` | Remove stale points | - | +| `qdrant_status` | Collection health | `collection` | +| `qdrant_list` | List collections | - | +| `workspace_info` | Workspace state | - | +| `set_session_defaults` | Session config | `collection`, `language`, `under` | +| `warmup_status` | Check model warmup | - | + +--- + +## Agentic Optimization Patterns + +### Parallel Execution (CRITICAL for ROI) + +**Fire independent tool calls in a single message block** - this is the highest-ROI optimization. + +``` +# WRONG (sequential - 3x slower) +result1 = repo_search(query="authentication") +result2 = repo_search(query="error handling") +result3 = symbol_graph(symbol="authenticate") + +# CORRECT (parallel - all at once) +# In a single message, call all three: +repo_search(query="authentication", limit=3, compact=true) +repo_search(query="error handling", limit=3, compact=true) +symbol_graph(symbol="authenticate", query_type="callers") +# Results arrive together +``` + +**When to parallelize:** +- Multiple `repo_search` calls with different queries +- `repo_search` + `symbol_graph` for the same investigation +- `search_tests_for` + `search_config_for` when exploring a feature +- Any tools where results don't depend on each other + +### Two-Phase Search Strategy + +| Phase | Purpose | Parameters | When to Use | +|-------|---------|------------|-------------| +| **Discovery** | Find relevant areas quickly | `limit=3`, `compact=true`, `output_format="toon"`, `per_path=1` | Always start here | +| **Deep Dive** | Get implementation details | `limit=5-8`, `include_snippet=true`, `context_lines=3-5` | After identifying targets | + +``` +# Phase 1: Discovery +info_request(info_request="authentication flow", limit=3) +# Check confidence.level - if "high", proceed; if "low", refine query + +# Phase 2: Deep dive on high-value targets only +repo_search( + query="jwt token validation", + limit=5, + include_snippet=true, + context_lines=5, + under="src/auth" +) +``` + +### Session Bootstrap + +**At the start of any session, set defaults to optimize all subsequent operations:** + +``` +# Set defaults (inherited by all subsequent calls) +set_session_defaults( + output_format="toon", # 60-80% token reduction + compact=true, # Minimal result fields + limit=5 # Reasonable default +) +``` + +### Token Efficiency Defaults + +| Parameter | Discovery | Deep Dive | Notes | +|-----------|-----------|-----------|-------| +| `limit` | 3 | 5-8 | Start small, expand if needed | +| `per_path` | 1 | 2 | Prevents duplicate file results | +| `compact` | true | false | Strips verbose metadata | +| `output_format` | "toon" | "json" | TOON saves 60-80% tokens | +| `include_snippet` | false | true | Headers-only for discovery | +| `context_lines` | 0 | 3-5 | Only when reading code | +| `rerank_enabled` | true | true | Disable only for speed | + +### Fallback Chains + +When primary tools fail or timeout, use these fallback patterns: + +| Primary | Fallback | When | +|---------|----------|------| +| `context_answer` | `repo_search` + `info_request(include_explanation=true)` | Timeout or decoder unavailable | +| `pattern_search` | `repo_search` with structural query terms | PATTERN_VECTORS not enabled | +| `neo4j_graph_query` | `symbol_graph` (Qdrant-backed, ALWAYS available) | Neo4j not enabled (`NEO4J_GRAPH!=1`) or unavailable — **this is the DEFAULT** | +| `memory_find` | `context_search(include_memories=true)` | Memory server issues | +| `grep` / `Read File` | `repo_search`, `symbol_graph`, `info_request` | **ALWAYS** — do not use grep/read for exploration | + +``` +# Example: context_answer fallback +result = context_answer(query="how does auth work?") +if result.get("error") or result.get("answer") == "insufficient context": + # Fallback to search + explanation + search_result = repo_search(query="authentication implementation", limit=5) + explanation = info_request(info_request="authentication flow", include_explanation=true) +``` + +--- + +## Advanced Features & Examples + +### Using Confidence Metrics + +The `info_request` tool returns confidence metrics including score variance analysis to help you understand search quality: + +```json +// Example: Low confidence search triggers suggestion +{ + "query": "auth", + "confidence": { + "level": "low", + "score": 0.42, + "variance_score": 0.18, + "score_spread": 0.6, + "consistency_level": "low", + "coefficient_of_variation": 0.43, + "min_score": 0.2, + "max_score": 0.8, + "low_confidence_hint": "Try more specific terms or include function/class names for better results" + } +} +``` + +**Interpreting Confidence Metrics:** +- `level`: Overall confidence ("high", "medium", "low", "none") +- `score`: Average result score +- `consistency_level`: How similar scores are ("high" = CV<0.2, "medium" = CV 0.2-0.4, "low" = CV>0.4) +- `coefficient_of_variation`: Relative variability (higher = more diverse results) +- `low_confidence_hint`: Actionable suggestion when confidence is low + +**Agent Behavior:** +- **Low confidence + high CV**: Results are diverse but uncertain → refine query with more specific terms +- **High confidence + low CV**: Strong, consistent results → query is effective +- **Low confidence + low CV**: Consistently poor results → try different search approach + +### Symbol Suggestions on Typo + +The `symbol_graph` tool provides fuzzy matching suggestions when exact symbol match fails: + +```json +// Example: Typo in symbol name gets helpful suggestions +{ + "symbol": "getUserProf", + "query_type": "callers", + "results": [], + "count": 0, + "suggestions": ["getUserProfile", "getUser", "UserProfile"], + "hint": "Symbol 'getUserProf' not found. Did you mean: getUserProfile?" +} +``` + +**How Suggestions Work:** +- **Edit distance ≤2**: Catches typos like "getUSerProfile" → "getUserProfile" +- **Prefix matching**: Partial names like "getUser" → "getUserProfile" +- **CamelCase/snake_case tokens**: Matches "get_user_profile" to "getUserProfile" +- **Scored ranking**: Top 3 suggestions ordered by similarity (1.0 = exact, 0.9 = prefix, 0.8 = token match, 0.6-0.8 = edit distance) + +**Agent Behavior:** +- When `suggestions` field present, try the top suggestion first +- Suggestions are cached for 60s to reduce load +- Controlled via `SYMBOL_SUGGESTIONS_LIMIT` env (default: 3) + +### Score Variance Detection + +The `repo_search` and `context_search` tools detect high score variance and automatically expand spans for better context: + +```json +// Example: High variance triggers span expansion +{ + "results": [ + {"path": "auth.py", "start_line": 10, "end_line": 45, "_adaptive_expanded": true}, + {"path": "user.py", "start_line": 20, "end_line": 35} + ], + "score_analysis": { + "cv": 0.45, + "high_variance": true, + "variance": 0.08, + "std": 0.28, + "mean": 0.62, + "adaptive_spans_used": 8 + } +} +``` + +**Variance Metrics:** +- `cv` (Coefficient of Variation): Relative score variability (std/mean) +- `high_variance`: Flag when CV > threshold (default 0.3) +- `adaptive_spans_used`: Count of spans expanded to full symbol boundaries + +**Adaptive Behavior:** +- **High variance (CV > 0.3)** + `ADAPTIVE_SPAN_SIZING=1`: Expands micro-chunks to full function/class boundaries +- Max expansion: 80 lines, up to 3 spans, 40% of token budget +- Only expands when symbol metadata available +- Logs with `DEBUG_ADAPTIVE_SPAN=1` + +**Environment Variables:** +```bash +SCORE_VARIANCE_THRESHOLD=0.3 # CV threshold for high_variance flag +VARIANCE_SPAN_EXPANSION=1.5 # Span multiplier when high variance +ADAPTIVE_SPAN_SIZING=1 # Enable/disable adaptive expansion +``` + +### Intent Confidence Analysis + +Intent classification logs all queries to JSONL for offline analysis: + +```bash +# Analyze intent classification quality over last 7 days +python scripts/analyze_intent_confidence.py --days 7 + +# Output example: +# ================================================================================ +# INTENT CONFIDENCE ANALYSIS +# ================================================================================ +# +# Total Events: 1,247 +# +# Strategy Distribution: +# rules : 823 (66.0%) +# ml : 424 (34.0%) +# +# Intent Distribution: +# search : 512 (41.1%) +# answer : 302 (24.2%) +# search_tests : 156 (12.5%) +# symbol_graph : 134 (10.7%) +# memory_find : 89 ( 7.1%) +# +# Average Confidence: 0.78 +# Fallback Rate (ML → search): 12% +# +# Top 10 Low-Confidence Queries: +# 1. [0.23] "show implementation..." → search (top: answer) +# 2. [0.24] "where is cache..." → search (top: answer) +# 3. [0.26] "find config for..." → search (top: search_config) +# ... +``` + +**Event Log Format (JSONL):** +```json +{ + "timestamp": 1704974400.0, + "query": "find tests for authentication", + "intent": "search_tests", + "confidence": 1.0, + "strategy": "rules", + "threshold": null, + "candidates": [] +} +``` + +**Environment Variables:** +```bash +INTENT_TRACKING_ENABLED=1 # Enable event logging (default: 1) +INTENT_EVENTS_DIR=./events # Log directory (default: ./events) +INTENT_LOG_ROTATE_MB=100 # Max file size before rotation +``` + +**Agent Behavior:** +- **Low confidence (<0.4)**: Query may be ambiguous → check `candidates` for alternatives +- **High fallback rate**: Rules may need tuning → review top fallback queries +- **Strategy="rules"**: Fast, deterministic classification (confidence=1.0) +- **Strategy="ml"**: Semantic embedding-based fallback (confidence=0.0-1.0) + +**Use Cases:** +- Monitor classification accuracy over time +- Identify queries that need better rule coverage +- Tune confidence thresholds based on real usage +- Debug misclassifications with full candidate scores + +### Neo4j Graph Query Types (ONLY when NEO4J_GRAPH=1) + +> **If `neo4j_graph_query` is not in your MCP tool list, skip this section entirely. Use `symbol_graph` for all graph queries instead.** + +The `neo4j_graph_query` tool provides advanced graph traversals that are **impossible with grep**: + +| Query Type | Description | Example | +|------------|-------------|---------| +| `callers` | Who calls this symbol? (depth 1) | `neo4j_graph_query(symbol="authenticate", query_type="callers")` | +| `callees` | What does this symbol call? (depth 1) | `neo4j_graph_query(symbol="main", query_type="callees")` | +| `transitive_callers` | Multi-hop callers (up to depth) | `neo4j_graph_query(symbol="get_embedding_model", query_type="transitive_callers", depth=2)` | +| `transitive_callees` | Multi-hop callees (up to depth) | `neo4j_graph_query(symbol="init", query_type="transitive_callees", depth=3)` | +| `impact` | What breaks if I change this? | `neo4j_graph_query(symbol="normalize_path", query_type="impact", depth=2)` | +| `dependencies` | What does this depend on? | `neo4j_graph_query(symbol="run_hybrid_search", query_type="dependencies")` | +| `cycles` | Detect circular dependencies | `neo4j_graph_query(symbol="ServiceA", query_type="cycles")` | + +**Key Parameters:** +- `symbol`: The function, class, or module to analyze +- `query_type`: One of the above query types +- `depth`: Traversal depth for transitive queries (default 1, max ~5) +- `limit`: Maximum results (default 50) +- `include_paths`: Include full traversal paths in results +- `repo`: Filter by repository name + +**ROI vs Grep:** +| Metric | Grep | Neo4j Graph | ROI | +|--------|------|-------------|-----| +| Multi-hop callers | Impossible | 80ms | ∞ | +| Impact analysis | 10+ iterations | 1 call, 3ms | 10x | +| Noise filtering | Manual | Automatic | 90%+ reduction | +| Time for simple lookup | 3-5s | 80ms | 40x faster | + +**Example Response:** +```json +{ + "ok": true, + "results": [ + {"symbol": "main", "hop": 1, "path_nodes": ["main", "normalize_path"], "repo": "work"}, + {"symbol": "evaluate_query", "hop": 2, "path_nodes": ["evaluate_query", "matches_relevant", "normalize_path"]} + ], + "total": 4, + "query": {"query_type": "impact", "symbol": "normalize_path", "depth": 2}, + "backend": "neo4j", + "query_time_ms": 3.43 +} +``` diff --git a/.env.example b/.env.example index 3317c2ed..973c937b 100644 --- a/.env.example +++ b/.env.example @@ -4,7 +4,8 @@ QDRANT_URL=http://localhost:6333 # Multi-repo mode: 0=single-repo (default), 1=multi-repo # Single-repo: All files go into one collection (COLLECTION_NAME) -# Multi-repo: Each subdirectory gets its own collection +# Multi-repo: Each subdirectory with .git gets its own collection +# Falls back to treating all subdirectories as repos if no .git found (for K8s/containers) MULTI_REPO_MODE=0 # Logical repo reuse (experimental): 0=disabled (default), 1=enable logical_repo_id-based diff --git a/.gitignore b/.gitignore index e25875da..33ff7589 100644 --- a/.gitignore +++ b/.gitignore @@ -65,9 +65,4 @@ ctx_config.json /deploy/eks-cdk /deploy/eks-cdk-PATHFUL .env -.contextstream/config.json -.contextstream/ignore -.cursorrules -GEMINI.md -ctx-mcp-bridge/bin/ctxce.js -/ctx-mcp-bridge/bin + diff --git a/.skills/mcp-tool-selection/SKILL.md b/.skills/mcp-tool-selection/SKILL.md index 8f1ea7df..62d777ab 100644 --- a/.skills/mcp-tool-selection/SKILL.md +++ b/.skills/mcp-tool-selection/SKILL.md @@ -7,6 +7,18 @@ description: Decision rules for when to use MCP Qdrant-Indexer semantic search v **Core principle:** MCP Qdrant-Indexer tools are primary for exploring code and history. Start with MCP for exploration, debugging, or "where/why" questions; use literal search/file-open only for narrow exact-literal lookups. +## STOP — Do NOT Use Read File or Grep for Exploration + +**DO NOT use `Read File`, `grep`, `ripgrep`, `cat`, `find`, or any filesystem search tool for code exploration.** +You have MCP tools that are faster, smarter, and return ranked, contextual results. + +- About to `Read` a file to understand it? → use `repo_search` or `context_answer` +- About to `grep` for a symbol? → use `symbol_graph` or `search_callers_for` +- About to `grep -r` for a concept? → use `repo_search` with natural language +- About to `find`/`ls` for project structure? → use `workspace_info` or `qdrant_status` + +The ONLY acceptable use of grep/Read: confirming exact literal strings (e.g., `REDIS_HOST`), or reading a file you already located via MCP for editing. + ## Use MCP Qdrant-Indexer When - Exploring or don't know exact strings/symbols @@ -14,6 +26,7 @@ description: Decision rules for when to use MCP Qdrant-Indexer semantic search v - Want ranked results with surrounding context, not just line hits - Asking conceptual/architectural or "where/why" behavior questions - Need rich context/snippets around matches +- Finding callers, definitions, or importers of any symbol ## Use Literal Search/File-Open Only When @@ -27,6 +40,10 @@ grep -r "auth" . # → Use MCP: "authentication mechanisms" grep -r "cache" . # → Use MCP: "caching strategies" grep -r "error" . # → Use MCP: "error handling patterns" grep -r "database" . # → Use MCP: "database operations" +# Also DON'T: +Read File to understand a module # → Use repo_search or context_answer +Read File to find callers # → Use symbol_graph +find/ls for project structure # → Use workspace_info ``` ## Literal Search Patterns (DO) @@ -42,14 +59,18 @@ grep -rn "REDIS_HOST" . # Exact environment variable | Question Type | Tool | |--------------|------| | "Where is X implemented?" | MCP `repo_search` | -| "Who calls this and show code?" | MCP `symbol_graph` (hydrated w/ snippets) | -| "Callers of callers? Multi-hop?" | MCP `neo4j_graph_query` (transitive_callers, depth=2) | -| "What breaks if I change X?" | MCP `neo4j_graph_query` (impact, depth=2) | -| "Circular dependencies?" | MCP `neo4j_graph_query` (cycles) | +| "Who calls this and show code?" | MCP `symbol_graph` — **DEFAULT for all graph queries, always available** | +| "Where is X defined?" | MCP `symbol_graph` (query_type="definition") | +| "What imports X?" | MCP `symbol_graph` (query_type="importers") | +| "Callers of callers? Multi-hop?" | MCP `symbol_graph` (depth=2+) or `neo4j_graph_query` (if NEO4J_GRAPH=1) | +| "What breaks if I change X?" | MCP `neo4j_graph_query` (ONLY if available, else use `symbol_graph`) | +| "Circular dependencies?" | MCP `neo4j_graph_query` (ONLY if available) | | "How does authentication work?" | MCP `context_answer` | | "High-level module overview?" | MCP `info_request` (with explanations) | | "Does REDIS_HOST exist?" | Literal grep | | "Why did behavior change?" | `search_commits_for` + `change_history_for_path` | +> **`symbol_graph`** is ALWAYS available (Qdrant-backed). **`neo4j_graph_query`** is ONLY available when `NEO4J_GRAPH=1`. If `neo4j_graph_query` is not in your tool list, use `symbol_graph` for everything. Never error about missing Neo4j. + **If in doubt → start with MCP** diff --git a/GEMINI.md b/GEMINI.md new file mode 100644 index 00000000..486385ff --- /dev/null +++ b/GEMINI.md @@ -0,0 +1,664 @@ +# AI Agent Rules for Context-Engine MCP Tools + +> **These rules are NOT optional.** Favor MCP Qdrant-Indexer tooling at all costs over grep, file reading, and unrelated codebase search tools. + +## STOP — Read This First + +**DO NOT use `Read File`, `grep`, `ripgrep`, `cat`, `find`, or any filesystem search tool for code exploration.** +These tools exist in your IDE but they are WRONG for this codebase. You have MCP tools that are faster, smarter, and return ranked, contextual results. + +**If you catch yourself about to `Read` a file to understand it** → use `repo_search` or `context_answer` instead. +**If you catch yourself about to `grep` for a symbol** → use `symbol_graph` or `search_callers_for` instead. +**If you catch yourself about to `grep -r` for a concept** → use `repo_search` with a natural language query instead. + +The ONLY acceptable use of `grep`/`Read` is confirming an exact literal string you already know exists (e.g., an env var name like `REDIS_HOST`). + +## Introduction + +This document defines requirements for AI agents using Context-Engine's MCP tools. The system provides two MCP servers (Memory Server on port 8000/8002, Indexer Server on port 8001/8003) with 30+ specialized tools for semantic code search, memory storage, and codebase exploration. + +**Core Principle:** MCP Qdrant-Indexer tools are PRIMARY for exploring code and history. Start with MCP for exploration, debugging, or "where/why" questions; use literal search/file-open only for narrow exact-literal lookups. + +## Glossary + +- **MCP**: Model Context Protocol - standardized interface for exposing tools to AI agents +- **Indexer Server**: MCP server for code search, indexing, symbol graphs (port 8001 SSE, 8003 HTTP) +- **Memory Server**: MCP server for knowledge storage and retrieval (port 8000 SSE, 8002 HTTP) +- **Hybrid Search**: Dense semantic vectors + lexical BM25 + neural reranking (ONNX) +- **ReFRAG**: Micro-chunking with 16-24 token windows for precise code retrieval +- **TOON**: Token-Oriented Object Notation - compact output format (60-80% token reduction) +- **Symbol Graph**: Indexed metadata for calls, imports, and definitions navigation +- **Collection**: Qdrant vector database collection storing indexed code chunks + +## Requirements + +### Requirement 1: MCP-First Tool Selection + +**User Story:** As an AI agent, I want to prioritize MCP tools over grep/file-reading, so that I get semantic understanding efficiently. + +#### Acceptance Criteria + +1. WHEN exploring code or answering "where/why" questions, THE Agent SHALL use MCP Indexer tools as the primary method +2. WHEN the agent needs semantic understanding, cross-file relationships, or ranked results with context, THE Agent SHALL use MCP tools +3. WHEN the agent knows an exact literal string AND only needs to confirm existence/location, THE Agent SHALL use grep or file-open +4. IF the agent is uncertain which approach to use, THEN THE Agent SHALL default to MCP tools +5. THE Agent SHALL NOT use `grep -r "auth"` for concepts (use MCP: "authentication mechanisms") + +### Requirement 2: Query Formulation + +**User Story:** As an AI agent, I want to write effective semantic queries, so that I retrieve relevant code spans. + +#### Acceptance Criteria + +1. WHEN writing queries for `repo_search`, THE Agent SHALL use short natural-language fragments (e.g., "database connection handling") +2. THE Agent SHALL NOT use boolean operators (OR, AND), regex syntax, or code patterns in semantic queries +3. WHEN searching broad concepts, THE Agent SHALL use descriptive phrases like "error reporting patterns" not `grep -r "error"` +4. THE Agent SHALL write queries as descriptions of what to find, not as literal code strings +5. WHEN searching for specific symbols, THE Agent SHALL use the `symbol` parameter alongside the query + +### Requirement 3: Performance Optimization + +**User Story:** As an AI agent, I want to minimize token usage and latency, so that I work efficiently within context limits. + +#### Acceptance Criteria + +1. WHEN starting discovery, THE Agent SHALL use `limit=3`, `compact=true`, `per_path=1` +2. WHEN needing implementation details, THE Agent SHALL increase to `limit=5`, `include_snippet=true` +3. WHEN token efficiency is critical, THE Agent SHALL use `output_format="toon"` for 60-80% reduction +4. THE Agent SHALL NOT use `limit=20` with `include_snippet=true` (excessive token waste) +5. THE Agent SHALL NOT use high `context_lines` for pure discovery (unnecessary tokens) +6. THE Agent SHALL fire independent tool calls in parallel (same message block) for 2-3x speedup +7. THE Agent SHALL prefer `output_format="toon"` as default for all discovery queries + +### Requirement 4: Core Search Tools + +**User Story:** As an AI agent, I want to use the right search tool for each task, so that I get optimal results. + +#### Acceptance Criteria + +1. WHEN finding relevant files/spans and inspecting code, THE Agent SHALL use `repo_search` or `code_search` +2. WHEN combining code hits with memory/docs, THE Agent SHALL use `context_search` with `include_memories=true` +3. WHEN needing natural-language explanations with citations, THE Agent SHALL use `context_answer` +4. WHEN needing quick discovery with summaries, THE Agent SHALL use `info_request` with `include_explanation=true` +5. WHEN finding structurally similar patterns across languages, THE Agent SHALL use `pattern_search` + +### Requirement 5: Symbol Graph Navigation (DEFAULT for all graph queries) + +**User Story:** As an AI agent, I want to navigate code relationships efficiently, so that I understand call graphs and dependencies. + +> **IMPORTANT:** `symbol_graph` is the DEFAULT and ALWAYS-AVAILABLE tool for graph queries. It works with the Qdrant-backed symbol index — no Neo4j required. Use `symbol_graph` FIRST for any caller/definition/importer query. Do NOT attempt `neo4j_graph_query` unless you know Neo4j is enabled. + +#### Acceptance Criteria + +1. WHEN finding who calls a function, THE Agent SHALL use `symbol_graph(symbol="name", query_type="callers")` +2. WHEN finding where a symbol is defined, THE Agent SHALL use `symbol_graph(symbol="name", query_type="definition")` +3. WHEN finding what imports a module, THE Agent SHALL use `symbol_graph(symbol="name", query_type="importers")` +4. THE Agent SHALL prefer `symbol_graph` over `search_callers_for` for structured navigation with metadata +5. THE Agent SHALL use `language` and `under` filters to narrow symbol graph results +6. WHEN needing multi-hop traversals, THE Agent SHALL use `symbol_graph` with `depth=2` or `depth=3` +7. **THE Agent SHALL default to `symbol_graph` for ALL graph/relationship queries.** It is always available regardless of Neo4j status. +8. THE Agent SHALL NOT attempt `neo4j_graph_query` unless the tool is visible in the MCP tool list (it only registers when `NEO4J_GRAPH=1`) + +### Requirement 5b: Neo4j Advanced Graph Queries (OPTIONAL — only when NEO4J_GRAPH=1) + +**User Story:** As an AI agent, I want to perform advanced graph traversals when Neo4j is available, so that I understand impact, dependencies, and circular references. + +> **NOTE:** The `neo4j_graph_query` tool is ONLY available when `NEO4J_GRAPH=1` is set. If this tool is not in your MCP tool list, it is NOT enabled — use `symbol_graph` instead for all graph queries. Do NOT error or warn about missing Neo4j; just use `symbol_graph`. + +#### Acceptance Criteria + +1. WHEN `neo4j_graph_query` IS available AND needing multi-hop callers, THE Agent SHALL use `neo4j_graph_query(query_type="transitive_callers", symbol="name", depth=2)` +2. WHEN `neo4j_graph_query` IS available AND analyzing "what would break if I change X?", THE Agent SHALL use `neo4j_graph_query(query_type="impact", symbol="name", depth=2)` +3. WHEN `neo4j_graph_query` IS available AND finding all dependencies, THE Agent SHALL use `neo4j_graph_query(query_type="dependencies", symbol="name")` +4. WHEN `neo4j_graph_query` IS available AND detecting circular dependencies, THE Agent SHALL use `neo4j_graph_query(query_type="cycles", symbol="name")` +5. WHEN `neo4j_graph_query` IS NOT available, THE Agent SHALL fall back to `symbol_graph` for callers/definitions/importers queries +6. THE Agent SHALL NEVER error or complain about Neo4j being unavailable — just use `symbol_graph` + +### Requirement 6: Specialized Search Tools + +**User Story:** As an AI agent, I want to use specialized tools for common search patterns, so that I find specific code types quickly. + +#### Acceptance Criteria + +1. WHEN finding test files, THE Agent SHALL use `search_tests_for` (preset test globs) +2. WHEN finding configuration files, THE Agent SHALL use `search_config_for` (preset config globs) +3. WHEN finding symbol usages heuristically, THE Agent SHALL use `search_callers_for` +4. WHEN finding import references, THE Agent SHALL use `search_importers_for` +5. THE Agent SHALL pass `language`, `under`, and `limit` filters to narrow specialized searches + +### Requirement 7: Context Answer Best Practices + +**User Story:** As an AI agent, I want high-quality explanations from context_answer, so that I understand code behavior accurately. + +#### Acceptance Criteria + +1. WHEN asking about specific modules, THE Agent SHALL mention filenames explicitly in queries +2. WHEN asking cross-file questions, THE Agent SHALL use behavior-describing queries without filenames +3. THE Agent SHALL use `budget_tokens` to control context size (default: MICRO_BUDGET_TOKENS env) +4. THE Agent SHALL set `temperature=0.2` or `temperature=0.3` for deterministic answers +5. THE Agent SHALL NOT use `context_answer` as a debugger for low-level helpers; prefer `repo_search` + direct reading + +### Requirement 8: Info Request Simplification + +**User Story:** As an AI agent, I want a simple interface for quick code discovery, so that I can find relevant code with minimal parameters. + +#### Acceptance Criteria + +1. WHEN needing simple search, THE Agent SHALL use `info_request(info_request="description")` +2. WHEN needing summaries and concepts, THE Agent SHALL set `include_explanation=true` +3. WHEN needing relationship data (imports/calls), THE Agent SHALL set `include_relationships=true` +4. THE Agent SHALL understand smart limits: 15 for short queries, 8 for questions, 10 default +5. THE Agent SHALL use `info_request` for quick discovery before deeper `repo_search` dives + +### Requirement 9: Pattern Search Usage + +**User Story:** As an AI agent, I want to find structurally similar code across languages, so that I detect patterns and duplication. + +#### Acceptance Criteria + +1. WHEN finding similar control flow, THE Agent SHALL use `pattern_search` with code examples OR descriptions +2. THE Agent SHALL use `query_mode="auto"` (default) to let the system detect code vs description +3. WHEN searching specific target languages, THE Agent SHALL use `target_languages` filter +4. THE Agent SHALL set `min_score=0.3` or higher to filter low-quality matches +5. IF pattern_search is unavailable (PATTERN_VECTORS!=1), THEN THE Agent SHALL fall back to `repo_search` + +### Requirement 10: Git History Integration + +**User Story:** As an AI agent, I want to understand code evolution, so that I answer "when/why did X change" questions. + +#### Acceptance Criteria + +1. WHEN finding current implementation, THE Agent SHALL first use `repo_search` to locate relevant files +2. WHEN summarizing recent changes, THE Agent SHALL use `change_history_for_path(path="...", include_commits=true)` +3. WHEN finding commits for specific behavior, THE Agent SHALL use `search_commits_for(query="behavior phrase", path="...")` +4. THE Agent SHALL read `lineage_goal`, `lineage_symbols`, `lineage_tags` from commit results +5. WHEN explaining current behavior after finding files, THE Agent SHALL use `context_answer` + +### Requirement 11: Memory System Usage + +**User Story:** As an AI agent, I want to store and retrieve knowledge effectively, so that I build on previous work. + +#### Acceptance Criteria + +1. WHEN storing code snippets, THE Agent SHALL use `memory_store` with metadata: `{code, language, path, kind="snippet"}` +2. WHEN storing explanations, THE Agent SHALL use `kind="explanation"` with `tags` and `topic` +3. THE Agent SHALL set `priority` (1-10) to indicate importance (higher = more important) +4. WHEN searching memories, THE Agent SHALL use `memory_find` with filters: `kind`, `language`, `tags`, `priority_min` +5. THE Agent SHALL use `context_search(include_memories=true)` to blend code + memory results + +### Requirement 12: Cross-Repo Search + +**User Story:** As an AI agent, I want to search across repositories effectively, so that I find code regardless of location. + +#### Acceptance Criteria + +1. WHEN searching a single repository, THE Agent SHALL use `repo="repo-name"` +2. WHEN searching multiple repositories, THE Agent SHALL use `repo=["frontend", "backend"]` +3. WHEN searching all indexed repositories, THE Agent SHALL use `repo="*"` +4. WHEN `repo` is omitted, THE Agent SHALL rely on auto-detection via CURRENT_REPO env (REPO_AUTO_FILTER=1) +5. THE Agent SHALL specify `collection` parameter when multiple collections exist + +### Requirement 13: Session Management + +**User Story:** As an AI agent, I want to maintain session context, so that I don't repeat parameters unnecessarily. + +#### Acceptance Criteria + +1. WHEN starting a session, THE Agent SHALL call `set_session_defaults` for both indexer and memory servers +2. THE Agent SHALL set default `collection` in session to avoid repeating it in every request +3. THE Agent SHALL understand precedence: explicit args > per-connection defaults > token defaults > env default +4. THE Agent SHALL use session tokens for cross-connection reuse when needed +5. THE Agent SHALL call `set_session_defaults(collection="...", output_format="toon", compact=true)` early + +### Requirement 14: Query Expansion Strategy + +**User Story:** As an AI agent, I want to use query expansion judiciously, so that I improve results without excessive overhead. + +#### Acceptance Criteria + +1. THE Agent SHALL attempt normal search BEFORE using `expand_query` or `expand=true` +2. WHEN initial search returns poor results, THE Agent SHALL use `expand=true` on `context_answer` +3. THE Agent SHALL use `max_new=2` or `max_new=3` for expansion (default 3) +4. THE Agent SHALL understand expansion uses local LLM (llama.cpp, GLM, or MiniMax) and is expensive +5. THE Agent SHALL treat `expand_query` as a last resort, not a default + +### Requirement 15: Error Handling + +**User Story:** As an AI agent, I want to handle errors gracefully, so that I recover and continue working. + +#### Acceptance Criteria + +1. WHEN MCP tools return responses, THE Agent SHALL check the `ok` field for success/failure +2. WHEN reranking times out, THE Agent SHALL accept fallback to hybrid-only results (still valid) +3. WHEN decoder is disabled, THE Agent SHALL accept `context_answer` returning citations without generated text +4. WHEN pattern_search is unavailable, THE Agent SHALL fall back to `repo_search` +5. THE Agent SHALL parse error messages and adjust parameters accordingly + +### Requirement 16: Indexing and Maintenance + +**User Story:** As an AI agent, I want to manage indexing operations, so that the codebase stays current. + +#### Acceptance Criteria + +1. WHEN indexing entire workspace, THE Agent SHALL use `qdrant_index_root` +2. WHEN indexing subdirectories, THE Agent SHALL use `qdrant_index(subdir="path")` +3. WHEN recreating collections from scratch, THE Agent SHALL use `recreate=true` +4. WHEN removing stale points (deleted files), THE Agent SHALL use `qdrant_prune` +5. WHEN checking index health, THE Agent SHALL use `qdrant_status` for count and timestamps + +### Requirement 17: Workspace Discovery + +**User Story:** As an AI agent, I want to discover available workspaces and collections, so that I target the right codebase. + +#### Acceptance Criteria + +1. WHEN listing all collections, THE Agent SHALL use `qdrant_list` +2. WHEN checking current workspace state, THE Agent SHALL use `workspace_info` +3. WHEN discovering multiple workspaces, THE Agent SHALL use `list_workspaces` +4. WHEN mapping collections to repos, THE Agent SHALL use `collection_map` +5. THE Agent SHALL understand workspace state includes: indexing_status, indexing_config, active_repo_slug + +### Requirement 18: Grep and File Read Anti-Patterns + +**User Story:** As an AI agent, I want to recognize when grep and file reading are inappropriate, so that I use semantic search instead. + +#### Acceptance Criteria + +1. THE Agent SHALL NOT use `grep -r "auth"` (use MCP: "authentication mechanisms") +2. THE Agent SHALL NOT use `grep -r "cache"` (use MCP: "caching strategies") +3. THE Agent SHALL NOT use `grep -r "error"` (use MCP: "error handling patterns") +4. THE Agent SHALL NOT use `grep -r "database"` (use MCP: "database operations") +5. THE Agent SHALL use grep ONLY for exact literals: `grep -rn "UserAlreadyExists"`, `grep -rn "REDIS_HOST"` +6. THE Agent SHALL NOT use `Read File` to understand what a file does — use `repo_search` or `context_answer` with the filename in the query +7. THE Agent SHALL NOT use `Read File` to find callers/imports — use `symbol_graph` instead +8. THE Agent SHALL NOT open files to "browse" the codebase — use `info_request` or `repo_search` for discovery +9. THE Agent SHALL NOT use `find` or `ls` to discover project structure — use `workspace_info` or `qdrant_status` +10. THE ONLY acceptable uses of `Read File` are: (a) editing a file you already located via MCP, (b) reading config files you know the exact path of + +### Requirement 19: Advanced Reranking Features + +**User Story:** As an AI agent, I want to leverage reranking effectively, so that I get the most relevant results. + +#### Acceptance Criteria + +1. THE Agent SHALL use `rerank_enabled=true` (default) for complex queries needing best relevance +2. WHEN faster results are acceptable, THE Agent SHALL set `rerank_enabled=false` +3. THE Agent SHALL understand results include `learning_score` and `refinement_iterations` from ONNX teacher +4. THE Agent SHALL use `rerank_top_n` to control candidate pool size (default 20) +5. THE Agent SHALL use `rerank_return_m` to control final result count after reranking + +### Requirement 20: Output Format and Token Efficiency + +**User Story:** As an AI agent, I want to choose appropriate output formats, so that I optimize token usage. + +#### Acceptance Criteria + +1. WHEN token efficiency is critical, THE Agent SHALL use `output_format="toon"` for 60-80% reduction +2. WHEN needing full structured data, THE Agent SHALL use `output_format="json"` (default) +3. WHEN using `compact=true`, THE Agent SHALL expect reduced result fields (path, symbol, lines, score) +4. THE Agent SHALL use `include_snippet=false` when only file/line references are needed +5. THE Agent SHALL combine `compact=true` + `limit=3` + `per_path=1` for minimal discovery queries + +--- + +## Tool Quick Reference + +### Search Tools +| Tool | Use Case | Key Parameters | +|------|----------|----------------| +| `repo_search` | General code search | `query`, `limit`, `compact`, `language`, `under`, `symbol` | +| `code_search` | Alias for repo_search | Same as repo_search | +| `context_search` | Code + memory blend | `include_memories`, `per_source_limits` | +| `context_answer` | NL explanations with citations | `query`, `budget_tokens`, `temperature` | +| `info_request` | Quick discovery (multi-granular) | `info_request`, `include_explanation` (use for broad architecture/overviews) | +| `pattern_search` | Structural similarity | `query`, `query_mode`, `target_languages` | + +### Navigation Tools +| Tool | Use Case | Key Parameters | Availability | +|------|----------|----------------|--------------| +| `symbol_graph` | Call/import/definition graphs (hydrated w/ snippets) — **DEFAULT for all graph queries** | `symbol`, `query_type`, `limit`, `depth` | **Always available** | +| `neo4j_graph_query` | Advanced traversals (impact, transitive, cycles) | `symbol`, `query_type`, `depth`, `limit` | Only when `NEO4J_GRAPH=1` | +| `search_callers_for` | Symbol usages (heuristic) | `query`, `language` | Always available | +| `search_importers_for` | Import references | `query`, `language` | Always available | +| `search_tests_for` | Test files | `query`, `limit` | Always available | +| `search_config_for` | Config files | `query`, `limit` | Always available | + +### History Tools +| Tool | Use Case | Key Parameters | +|------|----------|----------------| +| `change_history_for_path` | File change summary | `path`, `include_commits` | +| `search_commits_for` | Commit search | `query`, `path`, `limit` | + +### Memory Tools +| Tool | Use Case | Key Parameters | +|------|----------|----------------| +| `memory_store` | Store knowledge | `information`, `metadata` | +| `memory_find` | Search memories | `query`, `kind`, `language`, `tags` | + +### Admin Tools +| Tool | Use Case | Key Parameters | +|------|----------|----------------| +| `qdrant_index_root` | Index workspace | `recreate` | +| `qdrant_index` | Index subdirectory | `subdir`, `recreate` | +| `qdrant_prune` | Remove stale points | - | +| `qdrant_status` | Collection health | `collection` | +| `qdrant_list` | List collections | - | +| `workspace_info` | Workspace state | - | +| `set_session_defaults` | Session config | `collection`, `language`, `under` | +| `warmup_status` | Check model warmup | - | + +--- + +## Agentic Optimization Patterns + +### Parallel Execution (CRITICAL for ROI) + +**Fire independent tool calls in a single message block** - this is the highest-ROI optimization. + +``` +# WRONG (sequential - 3x slower) +result1 = repo_search(query="authentication") +result2 = repo_search(query="error handling") +result3 = symbol_graph(symbol="authenticate") + +# CORRECT (parallel - all at once) +# In a single message, call all three: +repo_search(query="authentication", limit=3, compact=true) +repo_search(query="error handling", limit=3, compact=true) +symbol_graph(symbol="authenticate", query_type="callers") +# Results arrive together +``` + +**When to parallelize:** +- Multiple `repo_search` calls with different queries +- `repo_search` + `symbol_graph` for the same investigation +- `search_tests_for` + `search_config_for` when exploring a feature +- Any tools where results don't depend on each other + +### Two-Phase Search Strategy + +| Phase | Purpose | Parameters | When to Use | +|-------|---------|------------|-------------| +| **Discovery** | Find relevant areas quickly | `limit=3`, `compact=true`, `output_format="toon"`, `per_path=1` | Always start here | +| **Deep Dive** | Get implementation details | `limit=5-8`, `include_snippet=true`, `context_lines=3-5` | After identifying targets | + +``` +# Phase 1: Discovery +info_request(info_request="authentication flow", limit=3) +# Check confidence.level - if "high", proceed; if "low", refine query + +# Phase 2: Deep dive on high-value targets only +repo_search( + query="jwt token validation", + limit=5, + include_snippet=true, + context_lines=5, + under="src/auth" +) +``` + +### Session Bootstrap + +**At the start of any session, set defaults to optimize all subsequent operations:** + +``` +# Set defaults (inherited by all subsequent calls) +set_session_defaults( + output_format="toon", # 60-80% token reduction + compact=true, # Minimal result fields + limit=5 # Reasonable default +) +``` + +### Token Efficiency Defaults + +| Parameter | Discovery | Deep Dive | Notes | +|-----------|-----------|-----------|-------| +| `limit` | 3 | 5-8 | Start small, expand if needed | +| `per_path` | 1 | 2 | Prevents duplicate file results | +| `compact` | true | false | Strips verbose metadata | +| `output_format` | "toon" | "json" | TOON saves 60-80% tokens | +| `include_snippet` | false | true | Headers-only for discovery | +| `context_lines` | 0 | 3-5 | Only when reading code | +| `rerank_enabled` | true | true | Disable only for speed | + +### Fallback Chains + +When primary tools fail or timeout, use these fallback patterns: + +| Primary | Fallback | When | +|---------|----------|------| +| `context_answer` | `repo_search` + `info_request(include_explanation=true)` | Timeout or decoder unavailable | +| `pattern_search` | `repo_search` with structural query terms | PATTERN_VECTORS not enabled | +| `neo4j_graph_query` | `symbol_graph` (Qdrant-backed, ALWAYS available) | Neo4j not enabled (`NEO4J_GRAPH!=1`) or unavailable — **this is the DEFAULT** | +| `memory_find` | `context_search(include_memories=true)` | Memory server issues | +| `grep` / `Read File` | `repo_search`, `symbol_graph`, `info_request` | **ALWAYS** — do not use grep/read for exploration | + +``` +# Example: context_answer fallback +result = context_answer(query="how does auth work?") +if result.get("error") or result.get("answer") == "insufficient context": + # Fallback to search + explanation + search_result = repo_search(query="authentication implementation", limit=5) + explanation = info_request(info_request="authentication flow", include_explanation=true) +``` + +--- + +## Advanced Features & Examples + +### Using Confidence Metrics + +The `info_request` tool returns confidence metrics including score variance analysis to help you understand search quality: + +```json +// Example: Low confidence search triggers suggestion +{ + "query": "auth", + "confidence": { + "level": "low", + "score": 0.42, + "variance_score": 0.18, + "score_spread": 0.6, + "consistency_level": "low", + "coefficient_of_variation": 0.43, + "min_score": 0.2, + "max_score": 0.8, + "low_confidence_hint": "Try more specific terms or include function/class names for better results" + } +} +``` + +**Interpreting Confidence Metrics:** +- `level`: Overall confidence ("high", "medium", "low", "none") +- `score`: Average result score +- `consistency_level`: How similar scores are ("high" = CV<0.2, "medium" = CV 0.2-0.4, "low" = CV>0.4) +- `coefficient_of_variation`: Relative variability (higher = more diverse results) +- `low_confidence_hint`: Actionable suggestion when confidence is low + +**Agent Behavior:** +- **Low confidence + high CV**: Results are diverse but uncertain → refine query with more specific terms +- **High confidence + low CV**: Strong, consistent results → query is effective +- **Low confidence + low CV**: Consistently poor results → try different search approach + +### Symbol Suggestions on Typo + +The `symbol_graph` tool provides fuzzy matching suggestions when exact symbol match fails: + +```json +// Example: Typo in symbol name gets helpful suggestions +{ + "symbol": "getUserProf", + "query_type": "callers", + "results": [], + "count": 0, + "suggestions": ["getUserProfile", "getUser", "UserProfile"], + "hint": "Symbol 'getUserProf' not found. Did you mean: getUserProfile?" +} +``` + +**How Suggestions Work:** +- **Edit distance ≤2**: Catches typos like "getUSerProfile" → "getUserProfile" +- **Prefix matching**: Partial names like "getUser" → "getUserProfile" +- **CamelCase/snake_case tokens**: Matches "get_user_profile" to "getUserProfile" +- **Scored ranking**: Top 3 suggestions ordered by similarity (1.0 = exact, 0.9 = prefix, 0.8 = token match, 0.6-0.8 = edit distance) + +**Agent Behavior:** +- When `suggestions` field present, try the top suggestion first +- Suggestions are cached for 60s to reduce load +- Controlled via `SYMBOL_SUGGESTIONS_LIMIT` env (default: 3) + +### Score Variance Detection + +The `repo_search` and `context_search` tools detect high score variance and automatically expand spans for better context: + +```json +// Example: High variance triggers span expansion +{ + "results": [ + {"path": "auth.py", "start_line": 10, "end_line": 45, "_adaptive_expanded": true}, + {"path": "user.py", "start_line": 20, "end_line": 35} + ], + "score_analysis": { + "cv": 0.45, + "high_variance": true, + "variance": 0.08, + "std": 0.28, + "mean": 0.62, + "adaptive_spans_used": 8 + } +} +``` + +**Variance Metrics:** +- `cv` (Coefficient of Variation): Relative score variability (std/mean) +- `high_variance`: Flag when CV > threshold (default 0.3) +- `adaptive_spans_used`: Count of spans expanded to full symbol boundaries + +**Adaptive Behavior:** +- **High variance (CV > 0.3)** + `ADAPTIVE_SPAN_SIZING=1`: Expands micro-chunks to full function/class boundaries +- Max expansion: 80 lines, up to 3 spans, 40% of token budget +- Only expands when symbol metadata available +- Logs with `DEBUG_ADAPTIVE_SPAN=1` + +**Environment Variables:** +```bash +SCORE_VARIANCE_THRESHOLD=0.3 # CV threshold for high_variance flag +VARIANCE_SPAN_EXPANSION=1.5 # Span multiplier when high variance +ADAPTIVE_SPAN_SIZING=1 # Enable/disable adaptive expansion +``` + +### Intent Confidence Analysis + +Intent classification logs all queries to JSONL for offline analysis: + +```bash +# Analyze intent classification quality over last 7 days +python scripts/analyze_intent_confidence.py --days 7 + +# Output example: +# ================================================================================ +# INTENT CONFIDENCE ANALYSIS +# ================================================================================ +# +# Total Events: 1,247 +# +# Strategy Distribution: +# rules : 823 (66.0%) +# ml : 424 (34.0%) +# +# Intent Distribution: +# search : 512 (41.1%) +# answer : 302 (24.2%) +# search_tests : 156 (12.5%) +# symbol_graph : 134 (10.7%) +# memory_find : 89 ( 7.1%) +# +# Average Confidence: 0.78 +# Fallback Rate (ML → search): 12% +# +# Top 10 Low-Confidence Queries: +# 1. [0.23] "show implementation..." → search (top: answer) +# 2. [0.24] "where is cache..." → search (top: answer) +# 3. [0.26] "find config for..." → search (top: search_config) +# ... +``` + +**Event Log Format (JSONL):** +```json +{ + "timestamp": 1704974400.0, + "query": "find tests for authentication", + "intent": "search_tests", + "confidence": 1.0, + "strategy": "rules", + "threshold": null, + "candidates": [] +} +``` + +**Environment Variables:** +```bash +INTENT_TRACKING_ENABLED=1 # Enable event logging (default: 1) +INTENT_EVENTS_DIR=./events # Log directory (default: ./events) +INTENT_LOG_ROTATE_MB=100 # Max file size before rotation +``` + +**Agent Behavior:** +- **Low confidence (<0.4)**: Query may be ambiguous → check `candidates` for alternatives +- **High fallback rate**: Rules may need tuning → review top fallback queries +- **Strategy="rules"**: Fast, deterministic classification (confidence=1.0) +- **Strategy="ml"**: Semantic embedding-based fallback (confidence=0.0-1.0) + +**Use Cases:** +- Monitor classification accuracy over time +- Identify queries that need better rule coverage +- Tune confidence thresholds based on real usage +- Debug misclassifications with full candidate scores + +### Neo4j Graph Query Types (ONLY when NEO4J_GRAPH=1) + +> **If `neo4j_graph_query` is not in your MCP tool list, skip this section entirely. Use `symbol_graph` for all graph queries instead.** + +The `neo4j_graph_query` tool provides advanced graph traversals that are **impossible with grep**: + +| Query Type | Description | Example | +|------------|-------------|---------| +| `callers` | Who calls this symbol? (depth 1) | `neo4j_graph_query(symbol="authenticate", query_type="callers")` | +| `callees` | What does this symbol call? (depth 1) | `neo4j_graph_query(symbol="main", query_type="callees")` | +| `transitive_callers` | Multi-hop callers (up to depth) | `neo4j_graph_query(symbol="get_embedding_model", query_type="transitive_callers", depth=2)` | +| `transitive_callees` | Multi-hop callees (up to depth) | `neo4j_graph_query(symbol="init", query_type="transitive_callees", depth=3)` | +| `impact` | What breaks if I change this? | `neo4j_graph_query(symbol="normalize_path", query_type="impact", depth=2)` | +| `dependencies` | What does this depend on? | `neo4j_graph_query(symbol="run_hybrid_search", query_type="dependencies")` | +| `cycles` | Detect circular dependencies | `neo4j_graph_query(symbol="ServiceA", query_type="cycles")` | + +**Key Parameters:** +- `symbol`: The function, class, or module to analyze +- `query_type`: One of the above query types +- `depth`: Traversal depth for transitive queries (default 1, max ~5) +- `limit`: Maximum results (default 50) +- `include_paths`: Include full traversal paths in results +- `repo`: Filter by repository name + +**ROI vs Grep:** +| Metric | Grep | Neo4j Graph | ROI | +|--------|------|-------------|-----| +| Multi-hop callers | Impossible | 80ms | ∞ | +| Impact analysis | 10+ iterations | 1 call, 3ms | 10x | +| Noise filtering | Manual | Automatic | 90%+ reduction | +| Time for simple lookup | 3-5s | 80ms | 40x faster | + +**Example Response:** +```json +{ + "ok": true, + "results": [ + {"symbol": "main", "hop": 1, "path_nodes": ["main", "normalize_path"], "repo": "work"}, + {"symbol": "evaluate_query", "hop": 2, "path_nodes": ["evaluate_query", "matches_relevant", "normalize_path"]} + ], + "total": 4, + "query": {"query_type": "impact", "symbol": "normalize_path", "depth": 2}, + "backend": "neo4j", + "query_time_ms": 3.43 +} +``` diff --git a/deploy/helm/context-engine/values.yaml b/deploy/helm/context-engine/values.yaml index ff978bb8..ffc813d6 100644 --- a/deploy/helm/context-engine/values.yaml +++ b/deploy/helm/context-engine/values.yaml @@ -356,6 +356,8 @@ watcher: WATCH_ROOT: /work WATCH_DEBOUNCE_SECS: "2.0" WATCH_USE_POLLING: "1" + # Multi-repo mode: 0=single-repo, 1=multi-repo (each .git subdir gets own collection) + # Falls back to treating all subdirectories as repos if no .git found (K8s resilience) MULTI_REPO_MODE: "0" # -- Init containers configuration initContainers: diff --git a/docker-compose.yml b/docker-compose.yml index 5fc2c90d..26913c27 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,8 +3,6 @@ # Repos stored in /work/ (which is project root - avoiding docker volumes) and metadata are stored in /work/.codebase/repos (project root/.codebase) # Updated to use separate PVCs for workspace and codebase to eliminate circular dependencies -version: '3.8' - # YAML Anchors for common configurations x-common-config: &common-config env_file: diff --git a/docs/CLAUDE.example.md b/docs/CLAUDE.example.md index f00ea601..80cba035 100644 --- a/docs/CLAUDE.example.md +++ b/docs/CLAUDE.example.md @@ -2,6 +2,18 @@ AI agent rules for using Context‑Engine Qdrant‑Indexer and Memory MCP tools. These rules are NOT optional - favor qdrant-indexer tooling at all costs over existing file reading, grep-like searching, and unrelated codebase search tools. +## STOP — Read This First + +**DO NOT use `Read File`, `grep`, `ripgrep`, `cat`, `find`, or any filesystem search tool for code exploration.** +You have MCP tools that are faster, smarter, and return ranked, contextual results. + +- About to `Read` a file to understand it? → use `repo_search` or `context_answer` +- About to `grep` for a symbol? → use `symbol_graph` or `search_callers_for` +- About to `grep -r` for a concept? → use `repo_search` with natural language +- About to `find`/`ls` for project structure? → use `workspace_info` or `qdrant_status` + +The ONLY acceptable use of grep/Read: confirming exact literal strings (e.g., `REDIS_HOST`), or reading a file you already located via MCP for editing. + ## Agentic AI Project Rules: When to Use MCP Qdrant-Indexer vs Grep Core Decision Rules (for AI agents) @@ -29,6 +41,9 @@ These rules are NOT optional - favor qdrant-indexer tooling at all costs over ex grep -r "cache" . # → Use MCP: "caching strategies" grep -r "error" . # → Use MCP: "error handling patterns" grep -r "database" . # → Use MCP: "database operations" + Read File to understand a module # → Use repo_search or context_answer + Read File to find callers # → Use symbol_graph + find/ls for project structure # → Use workspace_info ## DO - Efficient for exact matches grep -rn "UserAlreadyExists" . # Specific error class @@ -99,15 +114,19 @@ These rules are NOT optional - favor qdrant-indexer tooling at all costs over ex - Cross-language: Python pattern can match Go/Rust/Java with similar control flow. - Note: Returns error if pattern detection module is not available. - symbol_graph: + - **DEFAULT for ALL graph/relationship queries. Always available (Qdrant-backed, no Neo4j required).** - Use for: structural navigation (callers, definitions, importers). - Think: "who calls this function?", "where is this class defined?". - **Note**: Results are "hydrated" with ~500-char source snippets for immediate context. - Supports `depth` for multi-hop traversals (depth=2 = callers of callers). - - neo4j_graph_query: + - Use this FIRST for any graph query. Do NOT attempt neo4j_graph_query unless it's in your tool list. + - neo4j_graph_query (OPTIONAL — only when NEO4J_GRAPH=1): + - **Only available when NEO4J_GRAPH=1. If not in your tool list, use symbol_graph instead.** - Use for: advanced graph traversals that grep CANNOT do. - Query types: `callers`, `callees`, `transitive_callers`, `transitive_callees`, `impact`, `dependencies`, `cycles`. - Think: "what would break if I change X?" (impact), "callers of callers" (transitive_callers), "circular deps?" (cycles). - Example: `neo4j_graph_query(symbol="normalize_path", query_type="impact", depth=2)` → finds all code that would break. + - **Never error or warn about Neo4j being unavailable — just use symbol_graph.** - info_request: - Use for: rapid broad discovery and architectural overviews. - Good for: "how does the reranker work?", "overview of database modules". @@ -194,4 +213,5 @@ These rules are NOT optional - favor qdrant-indexer tooling at all costs over ex - context_answer timeout → repo_search + info_request(include_explanation=true) - pattern_search unavailable → repo_search with structural query terms - - neo4j_graph_query empty → symbol_graph (Qdrant-backed fallback) + - neo4j_graph_query unavailable → symbol_graph (Qdrant-backed, ALWAYS available — this is the DEFAULT) + - grep / Read File → repo_search, symbol_graph, info_request (ALWAYS use MCP instead) diff --git a/pyproject.toml b/pyproject.toml index abef2155..07b78dde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "tree_sitter_html>=0.23.0", "tree_sitter_css>=0.23.0", "tree_sitter_markdown>=0.5.0", - "mcp==1.17.0", + "mcp==1.23.0", "fastmcp==2.12.4", "fastapi", "uvicorn[standard]", diff --git a/requirements.txt b/requirements.txt index d603d3c6..9465c547 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,7 +33,7 @@ tree_sitter_kotlin>=1.1.0 tree_sitter_swift>=0.0.1 tree_sitter_scala>=0.24.0 tree_sitter_php>=0.24.0 -mcp==1.25.0 +mcp==1.23.0 fastmcp==2.12.4 fastapi uvicorn[standard] diff --git a/scripts/ctx_cli/commands/reset.py b/scripts/ctx_cli/commands/reset.py index f6f5b95f..24693a9e 100644 --- a/scripts/ctx_cli/commands/reset.py +++ b/scripts/ctx_cli/commands/reset.py @@ -171,6 +171,208 @@ def _download_file(url: str, dest: Path, description: str) -> bool: return False +def _start_indexer_detached(compose_cmd: list) -> None: + """Kick off the indexer container in detached mode for background reindexing.""" + indexer_env = {} + for var in ["INDEX_MICRO_CHUNKS", "MAX_MICRO_CHUNKS_PER_FILE", "TOKENIZER_PATH", "TOKENIZER_URL", "INDEX_WORKERS"]: + if var in os.environ: + indexer_env[var] = os.environ[var] + indexer_env["PSEUDO_DEFER_TO_WORKER"] = "1" + if "INDEX_WORKERS" not in indexer_env: + indexer_env["INDEX_WORKERS"] = "4" + + subprocess.run(["docker", "rm", "-f", "ctx-reset-indexer"], capture_output=True, check=False) + indexer_cmd = compose_cmd + ["run", "-d", "--rm", "--name", "ctx-reset-indexer"] + for k, v in indexer_env.items(): + indexer_cmd.extend(["-e", f"{k}={v}"]) + indexer_cmd.extend(["indexer", "--root", "/work", "--recreate"]) + _run_cmd(indexer_cmd, "Starting indexer (detached)") + _print("[green]✓[/green] Indexer started in background") + _print("[dim] Monitor with: docker logs -f ctx-reset-indexer[/dim]") + + +def _reset_containers( + compose_cmd: list, + build_containers: list, + start_containers: list, + mode_desc: str, + skip_build: bool, +) -> int: + """Non-db-reset path: stop → rebuild --no-cache → up -d → indexer detached.""" + _print("\n[bold][1/4] Stopping services...[/bold]") + _run_cmd(compose_cmd + ["down", "--remove-orphans", "--timeout", "10"], "Stopping all containers", check=False) + # Bind-mount volumes (workspace_pvc, codebase_pvc) cause interactive "config mismatch" + # prompts if their compose config-hash changed. Safe to remove — data lives on host disk. + for vol in ["context-engine_workspace_pvc", "context-engine_codebase_pvc"]: + subprocess.run(["docker", "volume", "rm", "-f", vol], capture_output=True, check=False) + _print("[green]✓[/green] Services stopped") + + if not skip_build: + _print("\n[bold][2/4] Building containers (no-cache)...[/bold]") + _run_cmd(compose_cmd + ["build", "--no-cache"] + build_containers, f"Building: {', '.join(build_containers)}") + _print("[green]✓[/green] Containers built") + else: + _print("\n[bold][2/4] Skipping build[/bold]") + + _print("\n[bold][3/4] Starting services...[/bold]") + cmd = compose_cmd + ["up", "-d", "--scale", "embedding=2"] + start_containers + _run_cmd(cmd, f"Starting: {', '.join(start_containers)} (embedding×2)") + _print("[green]✓[/green] Services started") + + _print("\n[bold][4/4] Starting indexer...[/bold]") + qdrant_url = get_qdrant_url_for_host() + if not _wait_for_qdrant(qdrant_url): + return 1 + _start_indexer_detached(compose_cmd) + + _print_panel( + f"[green]✓[/green] Containers rebuilt and restarted\n\n" + f"[cyan]Mode:[/cyan] {mode_desc}\n" + f"[cyan]Services:[/cyan] {', '.join(start_containers)}\n\n" + f"[dim]Run 'ctx status' to verify all services are healthy.[/dim]", + title="Reset Complete", + border_style="green" + ) + return 0 + + +def _reset_full( + compose_cmd: list, + build_containers: list, + start_containers: list, + mode_desc: str, + skip_build: bool, + skip_model: bool, + skip_tokenizer: bool, + model_url: str, + model_path: Path, + tokenizer_url: str, + tokenizer_path: Path, + neo4j_enabled: bool, + redis_enabled: bool, +) -> int: + """Full db-reset path: nuke volumes → rebuild → init_payload → tokenizer → caches → reindex → model → up.""" + import shutil + + steps_total = 7 + step = 0 + + step += 1 + _print(f"\n[bold][{step}/{steps_total}] Stopping services and removing volumes...[/bold]") + _run_cmd(compose_cmd + ["down", "-v", "--remove-orphans", "--timeout", "10"], "Stopping all containers and removing volumes", check=False) + _print("[green]✓[/green] Services stopped and volumes removed") + + step += 1 + if not skip_build: + _print(f"\n[bold][{step}/{steps_total}] Building containers (no-cache)...[/bold]") + _run_cmd(compose_cmd + ["build", "--no-cache"] + build_containers, f"Building: {', '.join(build_containers)}") + _print("[green]✓[/green] Containers built") + else: + _print(f"\n[bold][{step}/{steps_total}] Skipping build[/bold]") + + step += 1 + db_services = ["qdrant"] + if redis_enabled: + db_services.append("redis") + if neo4j_enabled: + db_services.append("neo4j") + db_services.append("embedding") + _print(f"\n[bold][{step}/{steps_total}] Starting {', '.join(db_services)}...[/bold]") + _run_cmd(compose_cmd + ["up", "-d", "--scale", "embedding=2"] + db_services, f"Starting {', '.join(db_services)} (embedding×2)") + + qdrant_url = get_qdrant_url_for_host() + if not _wait_for_qdrant(qdrant_url): + return 1 + if not _wait_for_embedding("http://localhost:8100"): + _print("[yellow]Warning:[/yellow] Embedding service not ready, indexer may have errors") + + step += 1 + _print(f"\n[bold][{step}/{steps_total}] Initializing payload indexes...[/bold]") + _run_cmd(compose_cmd + ["run", "--rm", "init_payload"], "Running init_payload", check=False) + + step += 1 + if not skip_tokenizer: + _print(f"\n[bold][{step}/{steps_total}] Downloading tokenizer...[/bold]") + if not _download_file(tokenizer_url, tokenizer_path, "tokenizer"): + _print("[yellow]Warning:[/yellow] Tokenizer download failed, continuing...") + else: + _print(f"\n[bold][{step}/{steps_total}] Skipping tokenizer download[/bold]") + + step += 1 + _print(f"\n[bold][{step}/{steps_total}] Clearing caches and running indexer...[/bold]") + + _print("[dim]Clearing local caches...[/dim]") + cache_cleared = 0 + codebase_dir = Path(".codebase") + if codebase_dir.exists(): + for cache_file in codebase_dir.rglob("cache.json"): + try: + cache_file.unlink() + cache_cleared += 1 + except Exception as e: + logger.debug(f"Suppressed exception: {e}") + for symbols_dir in codebase_dir.rglob("symbols"): + if symbols_dir.is_dir(): + try: + shutil.rmtree(symbols_dir, ignore_errors=True) + cache_cleared += 1 + except Exception as e: + logger.debug(f"Suppressed exception: {e}") + + dev_workspace = Path("dev-workspace") + if dev_workspace.exists(): + for cache_file in dev_workspace.rglob(".codebase/cache.json"): + try: + cache_file.unlink() + cache_cleared += 1 + except Exception as e: + logger.debug(f"Suppressed exception: {e}") + for symbols_dir in dev_workspace.rglob(".codebase/symbols"): + if symbols_dir.is_dir(): + try: + shutil.rmtree(symbols_dir, ignore_errors=True) + cache_cleared += 1 + except Exception as e: + logger.debug(f"Suppressed exception: {e}") + _print(f"[dim]Cleared {cache_cleared} host cache entries[/dim]") + + _print("[dim]Clearing container caches...[/dim]") + _run_cmd( + compose_cmd + ["run", "--rm", "--entrypoint", "sh", "indexer", "-c", + "find /work -path '*/.codebase/*/cache.json' -delete 2>/dev/null; " + "find /work -path '*/.codebase/cache.json' -delete 2>/dev/null; " + "find /work -path '*/.codebase/*/symbols' -type d -exec rm -rf {} + 2>/dev/null; " + "find /work -path '*/.codebase/symbols' -type d -exec rm -rf {} + 2>/dev/null; " + "echo 'Container caches cleared'"], + "Clearing container caches", + check=False, + ) + + _start_indexer_detached(compose_cmd) + + step += 1 + if not skip_model: + _print(f"\n[bold][{step}/{steps_total}] Downloading model and starting services...[/bold]") + if not _download_file(model_url, model_path, "llama model"): + _print("[yellow]Warning:[/yellow] Model download failed, continuing...") + else: + _print(f"\n[bold][{step}/{steps_total}] Starting services...[/bold]") + + cmd = compose_cmd + ["up", "-d", "--scale", "embedding=2"] + start_containers + _run_cmd(cmd, f"Starting: {', '.join(start_containers)} (embedding×2)") + _print("[green]✓[/green] Services started") + + _print_panel( + f"[green]✓[/green] Full environment reset complete!\n\n" + f"[cyan]Mode:[/cyan] {mode_desc}\n" + f"[cyan]Services:[/cyan] {', '.join(start_containers)}\n\n" + f"[dim]Run 'ctx status' to verify all services are healthy.[/dim]", + title="Reset Complete", + border_style="green" + ) + return 0 + + def reset( mode: str = "mcp", skip_build: bool = False, @@ -214,7 +416,6 @@ def reset( refrag_runtime = os.environ.get("REFRAG_RUNTIME", "").strip().lower() llamacpp_needed = refrag_runtime in ("", "llamacpp") - # Build docker compose command prefix (with optional neo4j compose file) compose_cmd = ["docker", "compose"] if neo4j_enabled: compose_cmd.extend(["-f", "docker-compose.yml", "-f", "docker-compose.neo4j.yml"]) @@ -269,193 +470,39 @@ def reset( _print_panel( f"[cyan]Mode:[/cyan] {mode_desc}\n" - f"[cyan]Skip Build:[/cyan] {skip_build}\n" - f"[cyan]Skip Model:[/cyan] {skip_model}\n" - f"[cyan]Skip Tokenizer:[/cyan] {skip_tokenizer}\n" - f"[cyan]DB Reset:[/cyan] {db_reset}", + f"[cyan]DB Reset:[/cyan] {db_reset}\n" + f"[cyan]Skip Build:[/cyan] {skip_build}", title="Development Environment Reset", border_style="yellow" ) - steps_total = 7 - step = 0 - try: - # Step 1: Stop all services (and optionally reset database volumes) - step += 1 - _print(f"\n[bold][{step}/{steps_total}] Stopping services...[/bold]") if db_reset: - # Full reset including database volumes (qdrant, redis, neo4j, embedding cache) - _run_cmd(compose_cmd + ["down", "-v", "--remove-orphans"], "Stopping all containers and removing volumes", check=False) - _print("[green]✓[/green] Services stopped and database volumes removed") - else: - # Stop services but preserve database volumes - _run_cmd(compose_cmd + ["down", "--remove-orphans"], "Stopping all containers", check=False) - _print("[green]✓[/green] Services stopped (database volumes preserved)") - - # Step 2: Build containers (unless skipped) - step += 1 - if not skip_build: - _print(f"\n[bold][{step}/{steps_total}] Building containers...[/bold]") - cmd = compose_cmd + ["build", "--no-cache"] + build_containers - _run_cmd(cmd, f"Building: {', '.join(build_containers)}") - _print("[green]✓[/green] Containers built") - else: - _print(f"\n[bold][{step}/{steps_total}] Skipping container build[/bold]") - - # Step 3: Start Qdrant, Redis (if enabled), Neo4j (if enabled), and Embedding service - step += 1 - db_services = ["qdrant"] - if redis_enabled: - db_services.append("redis") - if neo4j_enabled: - db_services.append("neo4j") - # Start embedding service early (indexer needs it) - db_services.append("embedding") - _print(f"\n[bold][{step}/{steps_total}] Starting {', '.join(db_services)}...[/bold]") - # Use --scale for embedding to get 2 replicas (deploy.replicas is Swarm-only) - _run_cmd(compose_cmd + ["up", "-d", "--scale", "embedding=2"] + db_services, f"Starting {', '.join(db_services)} (embedding×2)") - - # Use helper that normalizes Docker hostname to localhost for host CLI - qdrant_url = get_qdrant_url_for_host() - if not _wait_for_qdrant(qdrant_url): - return 1 - - # Wait for embedding service to be ready (indexer needs it) - if not _wait_for_embedding("http://localhost:8100"): - _print("[yellow]Warning:[/yellow] Embedding service not ready, indexer may have errors") - - # Step 4: Initialize payload indexes - step += 1 - _print(f"\n[bold][{step}/{steps_total}] Initializing payload indexes...[/bold]") - _run_cmd( - compose_cmd + ["run", "--rm", "init_payload"], - "Running init_payload", - check=False # May fail if collection doesn't exist yet - ) - - # Step 5: Download tokenizer - step += 1 - if not skip_tokenizer: - _print(f"\n[bold][{step}/{steps_total}] Downloading tokenizer...[/bold]") - if not _download_file(tokenizer_url, tokenizer_path, "tokenizer"): - _print("[yellow]Warning:[/yellow] Tokenizer download failed, continuing...") + return _reset_full( + compose_cmd=compose_cmd, + build_containers=build_containers, + start_containers=start_containers, + mode_desc=mode_desc, + skip_build=skip_build, + skip_model=skip_model, + skip_tokenizer=skip_tokenizer, + model_url=model_url, + model_path=model_path, + tokenizer_url=tokenizer_url, + tokenizer_path=tokenizer_path, + neo4j_enabled=neo4j_enabled, + redis_enabled=redis_enabled, + ) else: - _print(f"\n[bold][{step}/{steps_total}] Skipping tokenizer download[/bold]") - - # Step 6: Clear caches and run indexer with recreate - step += 1 - _print(f"\n[bold][{step}/{steps_total}] Clearing caches and running indexer...[/bold]") - - # Clear local caches (host side) - use rglob to find all cache files - _print("[dim]Clearing local caches...[/dim]") - import shutil - cache_cleared = 0 - - # Clear all cache.json files under .codebase (including repos subdirs) - codebase_dir = Path(".codebase") - if codebase_dir.exists(): - for cache_file in codebase_dir.rglob("cache.json"): - try: - cache_file.unlink() - cache_cleared += 1 - except Exception as e: - logger.debug(f"Suppressed exception: {e}") - # Clear all symbols directories - for symbols_dir in codebase_dir.rglob("symbols"): - if symbols_dir.is_dir(): - try: - shutil.rmtree(symbols_dir, ignore_errors=True) - cache_cleared += 1 - except Exception as e: - logger.debug(f"Suppressed exception: {e}") - - # Also clear dev-workspace caches (if present) - dev_workspace = Path("dev-workspace") - if dev_workspace.exists(): - for cache_file in dev_workspace.rglob(".codebase/cache.json"): - try: - cache_file.unlink() - cache_cleared += 1 - except Exception as e: - logger.debug(f"Suppressed exception: {e}") - for symbols_dir in dev_workspace.rglob(".codebase/symbols"): - if symbols_dir.is_dir(): - try: - shutil.rmtree(symbols_dir, ignore_errors=True) - cache_cleared += 1 - except Exception as e: - logger.debug(f"Suppressed exception: {e}") - - _print(f"[dim]Cleared {cache_cleared} host cache entries[/dim]") - - # Also clear caches inside the container (critical for bind-mounted workspaces) - _print("[dim]Clearing container caches...[/dim]") - _run_cmd( - compose_cmd + ["run", "--rm", "--entrypoint", "sh", "indexer", "-c", - "find /work -path '*/.codebase/*/cache.json' -delete 2>/dev/null; " - "find /work -path '*/.codebase/cache.json' -delete 2>/dev/null; " - "find /work -path '*/.codebase/*/symbols' -type d -exec rm -rf {} + 2>/dev/null; " - "find /work -path '*/.codebase/symbols' -type d -exec rm -rf {} + 2>/dev/null; " - "echo 'Container caches cleared'"], - "Clearing container caches", - check=False, - ) - - # Build env vars for indexer - indexer_env = {} - for var in ["INDEX_MICRO_CHUNKS", "MAX_MICRO_CHUNKS_PER_FILE", "TOKENIZER_PATH", "TOKENIZER_URL", "INDEX_WORKERS"]: - if var in os.environ: - indexer_env[var] = os.environ[var] - - indexer_env["PSEUDO_DEFER_TO_WORKER"] = "1" - if "INDEX_WORKERS" not in indexer_env: - indexer_env["INDEX_WORKERS"] = "4" - - # Run indexer detached (-d) so CLI doesn't block - # Use --rm to auto-remove container on exit; first remove any stale container with same name - # to ensure idempotent operation across multiple runs - subprocess.run( - ["docker", "rm", "-f", "ctx-reset-indexer"], - capture_output=True, - check=False, # Ignore error if container doesn't exist - ) - indexer_cmd = compose_cmd + ["run", "-d", "--rm", "--name", "ctx-reset-indexer"] - for k, v in indexer_env.items(): - indexer_cmd.extend(["-e", f"{k}={v}"]) - indexer_cmd.extend(["indexer", "--root", "/work", "--recreate"]) - - _run_cmd(indexer_cmd, "Starting indexer (detached)") - _print("[green]✓[/green] Indexer started in background (pseudo-tags deferred)") - _print("[dim] Monitor with: docker logs -f ctx-reset-indexer[/dim]") - - # Step 7: Download model and start services - step += 1 - if not skip_model: - _print(f"\n[bold][{step}/{steps_total}] Downloading model and starting services...[/bold]") - if not _download_file(model_url, model_path, "llama model"): - _print("[yellow]Warning:[/yellow] Model download failed, continuing...") - else: - _print(f"\n[bold][{step}/{steps_total}] Starting services...[/bold]") - - # Start services - # Use --scale for embedding service to get multiple replicas (deploy.replicas is Swarm-only) - cmd = compose_cmd + ["up", "-d", "--scale", "embedding=2"] + start_containers - _run_cmd(cmd, f"Starting: {', '.join(start_containers)} (embedding×2)") - _print("[green]✓[/green] Services started") - - _print_panel( - f"[green]✓[/green] Development environment reset complete!\n\n" - f"[cyan]Mode:[/cyan] {mode_desc}\n" - f"[cyan]Services:[/cyan] {', '.join(start_containers)}\n\n" - f"[dim]Run 'ctx status' to verify all services are healthy.[/dim]", - title="Reset Complete", - border_style="green" - ) - return 0 - - except subprocess.CalledProcessError as e: - _print(f"[red]Error:[/red] Reset failed at step {step}", error=True) + return _reset_containers( + compose_cmd=compose_cmd, + build_containers=build_containers, + start_containers=start_containers, + mode_desc=mode_desc, + skip_build=skip_build, + ) + except subprocess.CalledProcessError: + _print("[red]Error:[/red] Reset failed", error=True) return 1 except KeyboardInterrupt: _print("\n[yellow]Reset interrupted[/yellow]") diff --git a/scripts/ctx_cli/main.py b/scripts/ctx_cli/main.py index c4991d5e..c7662f2e 100644 --- a/scripts/ctx_cli/main.py +++ b/scripts/ctx_cli/main.py @@ -76,6 +76,13 @@ def _load_dotenv(): def main(): """Main CLI entry point.""" + # macOS is case-insensitive: /Users/x/desktop and /Users/x/Desktop both + # work, but Docker stores volume device paths as exact strings. Canonicalize + # once so compose always resolves ./dev-workspace to the same path. + _real = os.path.realpath(os.getcwd()) + if _real != os.getcwd(): + os.chdir(_real) + parser = argparse.ArgumentParser( prog="ctx", description="Context-Engine CLI - Unified interface for MCP tools", diff --git a/scripts/indexing_admin.py b/scripts/indexing_admin.py index 52a939ae..6cc679cd 100644 --- a/scripts/indexing_admin.py +++ b/scripts/indexing_admin.py @@ -1219,7 +1219,12 @@ def spawn_ingest_code( repo_name: Optional[str], env_overrides: Optional[Dict[str, Any]] = None, clear_caches: bool = False, -) -> None: +) -> "subprocess.Popen[bytes]": + """Spawn ingest_code as a subprocess and return the Popen handle. + + Callers that need to wait for completion (e.g. lock-guarded signal + listeners) can use the returned handle to call ``proc.wait()``. + """ script_path = str((Path(__file__).resolve().parent / "ingest_code.py").resolve()) cmd = [sys.executable or "python3", script_path, "--root", root, "--no-skip-unchanged"] if recreate: @@ -1272,6 +1277,8 @@ def spawn_ingest_code( except Exception as exc: raise RuntimeError(f"Failed to spawn ingest_code for {root}: {exc}") from exc + return proc + def _determine_embedding_dim(model_name: str) -> int: if get_model_dimension: diff --git a/scripts/ingest/cli.py b/scripts/ingest/cli.py index 5a034797..985e5fe4 100644 --- a/scripts/ingest/cli.py +++ b/scripts/ingest/cli.py @@ -298,7 +298,10 @@ def main(): print("[multi_repo] Multi-repo mode enabled - will create separate collections per repository") root_path = Path(args.root).resolve() - repos = [] + + # First pass: find directories with .git (git repos) + git_repos = [] + non_git_dirs = [] try: if root_path.is_dir(): for child in sorted(root_path.iterdir()): @@ -309,15 +312,36 @@ def main(): continue if child.name in {".codebase", "__pycache__"}: continue - repos.append(child) + + if (child / ".git").exists(): + git_repos.append(child) + else: + non_git_dirs.append(child) except Exception as e: logger.debug(f"Suppressed exception, continuing: {e}") continue except Exception: + git_repos = [] + non_git_dirs = [] + + # Always prefer git repos, but fall back to all subdirectories if none found + # This ensures multi-repo mode works in K8s where code may be copied without .git + if git_repos: + repos = git_repos + print(f"[multi_repo] Found {len(git_repos)} git repositories") + if non_git_dirs: + print(f"[multi_repo] Ignoring {len(non_git_dirs)} non-git directories") + elif non_git_dirs: + # Fallback: treat all subdirectories as repos when no .git found + repos = non_git_dirs + print(f"[multi_repo] WARNING: No .git directories found - falling back to directory-based detection") + print(f"[multi_repo] Treating {len(non_git_dirs)} subdirectories as separate repos") + else: repos = [] if not repos: - print(f"[multi_repo] No repo directories found under: {root_path}") + print(f"[multi_repo] No repositories found under: {root_path}") + print("[multi_repo] Hint: Each subdirectory should have a .git folder for best results") return multi_flag = (os.environ.get("PSEUDO_DEFER_TO_WORKER") or "").strip().lower() diff --git a/scripts/ingest/qdrant.py b/scripts/ingest/qdrant.py index 4ad4515d..74bcb0e7 100644 --- a/scripts/ingest/qdrant.py +++ b/scripts/ingest/qdrant.py @@ -1018,15 +1018,31 @@ def _embed_local(model, texts: List[str]) -> List[List[float]]: return [vec.tolist() for vec in model.embed(texts)] +_REMOTE_MAX_BATCH = int(os.environ.get("EMBED_MAX_BATCH", "128") or 128) + + def _embed_remote(texts: List[str], model_name: str = "default") -> List[List[float]]: """Remote embedding via HTTP service with client-side load balancing. If EMBEDDING_SERVICE_URLS is set (comma-separated), round-robins across replicas for true parallel processing. Otherwise uses single EMBEDDING_SERVICE_URL. + + Large batches are automatically chunked to stay within the service's + MAX_BATCH_SIZE limit (default 128). """ global _EMBED_REPLICA_INDEX import requests + # Chunk into sub-batches that fit the service's MAX_BATCH_SIZE. + # Each sub-batch gets its own HTTP request; vectors are concatenated + # in order so index alignment is preserved for callers. + if len(texts) > _REMOTE_MAX_BATCH: + all_vectors: List[List[float]] = [] + for i in range(0, len(texts), _REMOTE_MAX_BATCH): + sub = texts[i : i + _REMOTE_MAX_BATCH] + all_vectors.extend(_embed_remote(sub, model_name)) + return all_vectors + # Get URLs at call time (not import time) for test flexibility service_urls = _get_embedding_service_urls() service_url = _get_embedding_service_url() diff --git a/scripts/upload_service.py b/scripts/upload_service.py index 341f2cd5..e6f860d1 100644 --- a/scripts/upload_service.py +++ b/scripts/upload_service.py @@ -529,12 +529,32 @@ def validate_bundle_format(bundle_path: Path) -> Dict[str, Any]: raise ValueError(f"Invalid bundle format: {str(e)}") +def _publish_index_signal(workspace_path: str, collection_name: Optional[str]) -> None: + """Publish a Redis signal to tell the watcher to index files after upload.""" + try: + from scripts.workspace_state import _get_redis_client + rc = _get_redis_client() + if rc is None: + return + import json as _json + payload = _json.dumps({ + "workspace_path": workspace_path, + "collection": collection_name, + "timestamp": datetime.now().isoformat(), + }) + rc.publish("context-engine:index-signal", payload) + logger.info(f"[upload_service] Published index signal for {workspace_path} -> {collection_name}") + except Exception as e: + logger.debug(f"[upload_service] Failed to publish index signal: {e}") + + async def _process_bundle_background( workspace_path: str, bundle_path: Path, manifest: Dict[str, Any], sequence_number: Optional[int], bundle_id: Optional[str], + collection_name: Optional[str] = None, ) -> None: try: start_time = datetime.now() @@ -563,6 +583,7 @@ async def _process_bundle_background( logger.info( f"[upload_service] Finished processing bundle {bundle_id} seq {sequence_number} in {int(processing_time)}ms" ) + _publish_index_signal(workspace_path, collection_name) except Exception as e: logger.error(f"[upload_service] Error in background processing for bundle {bundle_id}: {e}") finally: @@ -2012,6 +2033,7 @@ async def upload_delta_bundle( manifest=manifest, sequence_number=sequence_number, bundle_id=bundle_id, + collection_name=collection_name, ) ) diff --git a/scripts/watch_index.py b/scripts/watch_index.py index 759c636a..dbcfd81d 100644 --- a/scripts/watch_index.py +++ b/scripts/watch_index.py @@ -103,6 +103,100 @@ def get_collection() -> str: return default_collection_name() +_INDEX_SIGNAL_CHANNEL = "context-engine:index-signal" +_ingest_lock = threading.Lock() + + +def _start_index_signal_listener(work_dir: str, default_collection: str) -> None: + """Listen for Redis pub/sub index signals from the upload service and spawn ingest_code.""" + from scripts.workspace_state import _get_redis_client, _redis_state_enabled + + if not _redis_state_enabled(): + print("[index_signal] Redis not enabled, skipping signal listener") + return + + def _listener(): + while True: + try: + rc = _get_redis_client() + if rc is None: + time.sleep(5) + continue + pubsub = rc.pubsub() + pubsub.subscribe(_INDEX_SIGNAL_CHANNEL) + print(f"[index_signal] Subscribed to {_INDEX_SIGNAL_CHANNEL}") + for message in pubsub.listen(): + if message["type"] != "message": + continue + try: + data = json.loads(message["data"]) + except Exception: + continue + workspace_path = data.get("workspace_path", "") + if not workspace_path: + print("[index_signal] Skipping signal with empty workspace_path") + continue + collection = data.get("collection") or default_collection + print(f"[index_signal] Received signal: workspace={workspace_path} collection={collection}") + + if not _ingest_lock.acquire(blocking=False): + print("[index_signal] Ingest already running, skipping") + continue + try: + from scripts.indexing_admin import spawn_ingest_code + repo_name = None + try: + repo_name = _extract_repo_name_from_path(workspace_path) + except Exception: + pass + + root = workspace_path + if not Path(root).is_absolute(): + root = str(Path(work_dir) / root) + if not Path(root).exists(): + root = work_dir + + print(f"[index_signal] Spawning ingest_code: root={root} collection={collection}") + proc = spawn_ingest_code( + root=root, + work_dir=work_dir, + collection=collection, + recreate=False, + repo_name=repo_name, + ) + + # Hold _ingest_lock until the subprocess finishes so + # concurrent Redis signals are properly skipped. + def _wait_and_release(p, lock): + try: + p.wait() + except Exception: + pass + finally: + lock.release() + print("[index_signal] Ingest process finished, lock released") + + waiter = threading.Thread( + target=_wait_and_release, + args=(proc, _ingest_lock), + daemon=True, + name="ingest-lock-waiter", + ) + waiter.start() + # Lock is now owned by the waiter thread — skip the + # finally-release below. + continue + except Exception as e: + print(f"[index_signal] Error spawning ingest: {e}") + _ingest_lock.release() + except Exception as e: + print(f"[index_signal] Listener error, reconnecting in 5s: {e}") + time.sleep(5) + + th = threading.Thread(target=_listener, daemon=True, name="index-signal-listener") + th.start() + + def main() -> None: global _watcher_healthy, _watcher_started_at from datetime import datetime, timezone @@ -251,6 +345,8 @@ def main() -> None: _watcher_healthy = True print("[health] Watcher is now healthy and monitoring for changes") + _start_index_signal_listener(str(ROOT), default_collection) + try: while True: time.sleep(1.0) diff --git a/scripts/watch_index_core/utils.py b/scripts/watch_index_core/utils.py index b48452eb..43e11f87 100644 --- a/scripts/watch_index_core/utils.py +++ b/scripts/watch_index_core/utils.py @@ -95,14 +95,56 @@ def create_observer(use_polling: bool, observer_cls: Type[Observer] = Observer) def _detect_repo_for_file(file_path: Path) -> Optional[Path]: - """Detect repository root for a file under WATCH root.""" + """Detect repository root for a file by finding the nearest .git parent. + + In multi-repo mode, walks up from the file to find the actual git repository + root (directory containing .git), rather than assuming the first subdirectory + under ROOT is a repo. This prevents subdirectories like src/, docs/, tests/ + from being treated as separate repositories. + + Falls back to first subdirectory behavior only when no .git is found, + unless MULTI_REPO_GIT_STRICT=1 is set. + """ try: - rel_path = file_path.resolve().relative_to(ROOT.resolve()) - except Exception: + resolved = file_path.resolve() + root_resolved = ROOT.resolve() + + # Walk up from file to find actual git repo root + for parent in [resolved] + list(resolved.parents): + try: + # Check if this directory has a .git folder + if (parent / ".git").exists(): + # Ensure it's still under our watch root + try: + parent.relative_to(root_resolved) + return parent + except ValueError: + # Parent is outside watch root, stop here + break + # Stop at workspace root + if parent == root_resolved: + break + except Exception as e: + logger.debug(f"Suppressed exception checking .git at {parent}: {e}") + continue + + # Fallback: use first subdirectory (legacy behavior for non-git dirs) + # This ensures multi-repo mode works in K8s where code may be copied without .git + try: + rel_path = resolved.relative_to(root_resolved) + except ValueError: + return None + if not rel_path.parts: + return ROOT + + fallback_repo = ROOT / rel_path.parts[0] + logger.debug( + f"[multi_repo] No .git found for {file_path}, using fallback: {fallback_repo}" + ) + return fallback_repo + except Exception as e: + logger.debug(f"Suppressed exception in _detect_repo_for_file: {e}") return None - if not rel_path.parts: - return ROOT - return ROOT / rel_path.parts[0] def _repo_name_or_none(repo_path: Optional[Path]) -> Optional[str]: diff --git a/skills/context-engine/SKILL.md b/skills/context-engine/SKILL.md index 7c9a800a..ba2f9591 100644 --- a/skills/context-engine/SKILL.md +++ b/skills/context-engine/SKILL.md @@ -33,13 +33,14 @@ What do you need? | +-- Find relationships | | - | +-- Who calls this function --> search_callers_for OR symbol_graph - | +-- Who imports this module --> search_importers_for - | +-- Symbol graph navigation (callers/defs/importers) --> symbol_graph - | +-- Multi-hop callers (callers of callers) --> neo4j_graph_query (transitive_callers) - | +-- Impact analysis (what breaks if I change X) --> neo4j_graph_query (impact) - | +-- Dependency graph --> neo4j_graph_query (dependencies) - | +-- Circular dependency detection --> neo4j_graph_query (cycles) + | +-- Who calls this function --> symbol_graph (DEFAULT, always available) + | +-- Who imports this module --> symbol_graph OR search_importers_for + | +-- Where is this defined --> symbol_graph (query_type="definition") + | +-- Symbol graph navigation (callers/defs/importers) --> symbol_graph (ALWAYS use this first) + | +-- Multi-hop callers (callers of callers) --> symbol_graph (depth=2+) OR neo4j_graph_query (if NEO4J_GRAPH=1) + | +-- Impact analysis (what breaks if I change X) --> neo4j_graph_query (ONLY if available) + | +-- Dependency graph --> neo4j_graph_query (ONLY if available) + | +-- Circular dependency detection --> neo4j_graph_query (ONLY if available) | +-- Git history --> search_commits_for | @@ -246,7 +247,10 @@ The `query_signature` encodes control flow: `L` (loops), `B` (branches), `T` (tr - If there are no graph hits, it falls back to semantic search. - **Note**: Results are "hydrated" with ~500-char source snippets for immediate context. -**neo4j_graph_query** - Advanced graph traversals (requires NEO4J_GRAPH=1): +**neo4j_graph_query** - Advanced graph traversals (OPTIONAL — ONLY available when NEO4J_GRAPH=1): + +> **If `neo4j_graph_query` is not in your MCP tool list, it is NOT enabled. Use `symbol_graph` for all graph queries instead. Do NOT error or warn about missing Neo4j.** + ```json {"symbol": "normalize_path", "query_type": "impact", "depth": 2} ``` @@ -257,7 +261,7 @@ The `query_signature` encodes control flow: `L` (loops), `B` (branches), `T` (tr {"symbol": "run_hybrid_search", "query_type": "dependencies", "limit": 15} ``` -**Query types:** +**Query types (only when neo4j_graph_query is available):** | Type | Description | |------|-------------| | `callers` | Who calls this symbol? (depth 1) | @@ -400,18 +404,20 @@ Common issues: ## Best Practices -1. **Start broad, then filter** - Begin with a semantic query, add filters if too many results -2. **Use multi-query** - Pass 2-3 query variations for better recall on complex searches -3. **Include snippets** - Set `include_snippet: true` to see code context in results -4. **Store decisions** - Use `memory_store` to save architectural decisions and context for later -5. **Check index health** - Run `qdrant_status` if searches return unexpected results -6. **Prune after refactors** - Run `qdrant_prune` after moving/deleting files -7. **Index before search** - Always run `qdrant_index_root` on first use or after cloning a repo -8. **Use pattern_search for structural matching** - When looking for code with similar control flow (retry loops, error handling), use `pattern_search` instead of `repo_search` (if enabled) -9. **Describe patterns in natural language** - `pattern_search` understands "retry with backoff" just as well as actual code examples (if enabled) -10. **Fire independent searches in parallel** - Call multiple `repo_search`, `symbol_graph`, etc. in the same message block for 2-3x speedup -11. **Use TOON format for discovery** - Set `output_format: "toon"` for 60-80% token reduction on exploratory queries -12. **Bootstrap sessions with defaults** - Call `set_session_defaults(output_format="toon", compact=true)` early to avoid repeating params -13. **Two-phase search** - Discovery first (`limit=3, compact=true`), then deep dive (`limit=5-8, include_snippet=true`) on targets -14. **Use fallback chains** - If `context_answer` times out, fall back to `repo_search` + `info_request(include_explanation=true)` +1. **NEVER use Read File or grep for exploration** - Use MCP tools (`repo_search`, `symbol_graph`, `context_answer`) instead. The ONLY acceptable use of Read/grep is confirming exact literal strings. +2. **Default to `symbol_graph` for all graph queries** - It is always available. Only use `neo4j_graph_query` if the tool appears in your MCP tool list. +3. **Start broad, then filter** - Begin with a semantic query, add filters if too many results +4. **Use multi-query** - Pass 2-3 query variations for better recall on complex searches +5. **Include snippets** - Set `include_snippet: true` to see code context in results +6. **Store decisions** - Use `memory_store` to save architectural decisions and context for later +7. **Check index health** - Run `qdrant_status` if searches return unexpected results +8. **Prune after refactors** - Run `qdrant_prune` after moving/deleting files +9. **Index before search** - Always run `qdrant_index_root` on first use or after cloning a repo +10. **Use pattern_search for structural matching** - When looking for code with similar control flow (retry loops, error handling), use `pattern_search` instead of `repo_search` (if enabled) +11. **Describe patterns in natural language** - `pattern_search` understands "retry with backoff" just as well as actual code examples (if enabled) +12. **Fire independent searches in parallel** - Call multiple `repo_search`, `symbol_graph`, etc. in the same message block for 2-3x speedup +13. **Use TOON format for discovery** - Set `output_format: "toon"` for 60-80% token reduction on exploratory queries +14. **Bootstrap sessions with defaults** - Call `set_session_defaults(output_format="toon", compact=true)` early to avoid repeating params +15. **Two-phase search** - Discovery first (`limit=3, compact=true`), then deep dive (`limit=5-8, include_snippet=true`) on targets +16. **Use fallback chains** - If `context_answer` times out, fall back to `repo_search` + `info_request(include_explanation=true)` diff --git a/templates/admin/acl.html b/templates/admin/acl.html index 27c803ed..99b7fab3 100644 --- a/templates/admin/acl.html +++ b/templates/admin/acl.html @@ -69,15 +69,14 @@

Collections

- - - - - + + + + @@ -85,9 +84,6 @@

Collections

{% if collections and collections|length > 0 %} {% for c in collections %} - - - - - {% endfor %} {% else %} - + {% endif %}
ID Collection Status Progress ConfigGraphApplied HashPending HashCurrent Hash Actions
-
{{ (c.id or c.workspace_id or '—')[:16] }}...
-
{{ c.qdrant_collection }}
{% if c.container_path %} @@ -148,7 +144,7 @@

Collections

{% endif %}
+ + + +
No collections found
No collections found
@@ -627,9 +623,6 @@

New API Key - -
${idDisplay}
-
${escapeHtml(c.qdrant_collection)}
${c.container_path ? `
${escapeHtml(c.container_path)}
` : ''} @@ -647,7 +640,7 @@

New API Key - + ${c.index_graph_edges_enabled ? ` @@ -656,15 +649,15 @@

New API KeyClone: ${escapeHtml(c.graph_clone_name.slice(0, 16))}...${c.graph_clone_copied ? '' : ''}` : ''}` : ''} - +
${escapeHtml((c.applied_indexing_hash || '—').slice(0, 8))}
- + ${c.pending_indexing_hash ? `
${escapeHtml(c.pending_indexing_hash.slice(0, 8))}
` : ''} - + ${currentHash ? `
${escapeHtml(currentHash.slice(0, 8))}
` : ''} @@ -731,7 +724,7 @@

New API KeyNo collections found'; + bodyEl.innerHTML = 'No collections found'; } else { bodyEl.innerHTML = collections.map(renderRow).join(''); } diff --git a/templates/admin/base.html b/templates/admin/base.html index ad5b2023..9ae4494e 100644 --- a/templates/admin/base.html +++ b/templates/admin/base.html @@ -287,7 +287,6 @@ .card:hover { border-color: rgba(255, 255, 255, 0.1); - transform: translateY(-2px); box-shadow: var(--shadow-lg); } @@ -327,13 +326,14 @@ width: 100%; border-collapse: collapse; font-size: 13px; + table-layout: auto; } th { text-align: left; - padding: 14px 18px; + padding: 12px 14px; font-weight: 500; - font-size: 12px; + font-size: 11px; text-transform: uppercase; letter-spacing: 0.05em; color: var(--text-muted); @@ -342,10 +342,11 @@ } td { - padding: 16px 18px; + padding: 14px; border-bottom: 1px solid var(--border-subtle); color: var(--text-secondary); transition: background var(--transition); + vertical-align: top; } tr:last-child td { border-bottom: none; } diff --git a/uv.lock b/uv.lock index e75aac46..e4181a10 100644 --- a/uv.lock +++ b/uv.lock @@ -91,15 +91,15 @@ wheels = [ [[package]] name = "blessed" -version = "1.27.0" +version = "1.28.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jinxed", marker = "sys_platform == 'win32'" }, { name = "wcwidth" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f1/3c/783f2a400e5dac56ad073997aa6aa47150c3b06a5ce8ad2f537f3691eaaa/blessed-1.27.0.tar.gz", hash = "sha256:e3064559388bd532ab6460d9b6c7d6dd699c4e0cf54d28ed6e2cab12feda13bb", size = 6761573, upload-time = "2026-01-20T04:16:56.233Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8a/2a/5439a7d5f543500a604b4e0ff2b4db591fd8722e1ba4999de9185be7abc4/blessed-1.28.0.tar.gz", hash = "sha256:91620abe30549a32720fb0d2359100247cb279f84c4974becd789aac4818695c", size = 13950742, upload-time = "2026-01-27T03:42:58.943Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/30/d9/11d745a88e9000729fc4d9e813789a95327beda325e04ec311e9ae23a30e/blessed-1.27.0-py3-none-any.whl", hash = "sha256:1c599969acc993bb5842bf3f638b0691e335277a9d9058cd079463a346988714", size = 101305, upload-time = "2026-01-20T04:16:54.095Z" }, + { url = "https://files.pythonhosted.org/packages/cf/b6/90a543437276ae0f1e38dac927bf09c24f8e902e00c0aec0ec30e4818635/blessed-1.28.0-py3-none-any.whl", hash = "sha256:51e386b8ec85a0ce2177ffd7269d0121218c380d793196d87333cba97271a79f", size = 100543, upload-time = "2026-01-27T03:42:56.141Z" }, ] [[package]] @@ -402,7 +402,7 @@ requires-dist = [ { name = "fastembed" }, { name = "fastmcp", specifier = "==2.12.4" }, { name = "jinja2" }, - { name = "mcp", specifier = "==1.17.0" }, + { name = "mcp", specifier = "==1.23.0" }, { name = "onnxruntime" }, { name = "openai", specifier = ">=2.11.0" }, { name = "openlit", specifier = ">=1.0.0" }, @@ -1322,7 +1322,7 @@ wheels = [ [[package]] name = "mcp" -version = "1.17.0" +version = "1.23.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1331,15 +1331,18 @@ dependencies = [ { name = "jsonschema" }, { name = "pydantic" }, { name = "pydantic-settings" }, + { name = "pyjwt", extra = ["crypto"] }, { name = "python-multipart" }, { name = "pywin32", marker = "sys_platform == 'win32'" }, { name = "sse-starlette" }, { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5a/79/5724a540df19e192e8606c543cdcf162de8eb435077520cca150f7365ec0/mcp-1.17.0.tar.gz", hash = "sha256:1b57fabf3203240ccc48e39859faf3ae1ccb0b571ff798bbedae800c73c6df90", size = 477951, upload-time = "2025-10-10T12:16:44.519Z" } +sdist = { url = "https://files.pythonhosted.org/packages/25/1a/9c8a5362e3448d585081d6c7aa95898a64e0ac59d3e26169ae6c3ca5feaf/mcp-1.23.0.tar.gz", hash = "sha256:84e0c29316d0a8cf0affd196fd000487ac512aa3f771b63b2ea864e22961772b", size = 596506, upload-time = "2025-12-02T13:40:02.558Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1c/72/3751feae343a5ad07959df713907b5c3fbaed269d697a14b0c449080cf2e/mcp-1.17.0-py3-none-any.whl", hash = "sha256:0660ef275cada7a545af154db3082f176cf1d2681d5e35ae63e014faf0a35d40", size = 167737, upload-time = "2025-10-10T12:16:42.863Z" }, + { url = "https://files.pythonhosted.org/packages/7b/b2/28739ce409f98159c0121eab56e69ad71546c4f34ac8b42e58c03f57dccc/mcp-1.23.0-py3-none-any.whl", hash = "sha256:5a645cf111ed329f4619f2629a3f15d9aabd7adc2ea09d600d31467b51ecb64f", size = 231427, upload-time = "2025-12-02T13:40:00.738Z" }, ] [[package]] @@ -2392,6 +2395,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pyjwt" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785, upload-time = "2024-11-28T03:43:29.933Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + [[package]] name = "pyperclip" version = "1.11.0" @@ -3702,11 +3719,11 @@ wheels = [ [[package]] name = "wcwidth" -version = "0.4.0" +version = "0.5.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/98/0a/dc5110cc99c39df65bac29229c4b637a8304e0914850348d98974c8ecfff/wcwidth-0.4.0.tar.gz", hash = "sha256:46478e02cf7149ba150fb93c39880623ee7e5181c64eda167b6a1de51b7a7ba1", size = 237625, upload-time = "2026-01-26T02:35:58.844Z" } +sdist = { url = "https://files.pythonhosted.org/packages/64/6e/62daec357285b927e82263a81f3b4c1790215bc77c42530ce4a69d501a43/wcwidth-0.5.0.tar.gz", hash = "sha256:f89c103c949a693bf563377b2153082bf58e309919dfb7f27b04d862a0089333", size = 246585, upload-time = "2026-01-27T01:31:44.942Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/f6/da704c5e77281d71723bffbd926b754c0efd57cbcd02e74c2ca374c14cef/wcwidth-0.4.0-py3-none-any.whl", hash = "sha256:8af2c81174b3aa17adf05058c543c267e4e5b6767a28e31a673a658c1d766783", size = 88216, upload-time = "2026-01-26T02:35:57.461Z" }, + { url = "https://files.pythonhosted.org/packages/f2/3e/45583b67c2ff08ad5a582d316fcb2f11d6cf0a50c7707ac09d212d25bc98/wcwidth-0.5.0-py3-none-any.whl", hash = "sha256:1efe1361b83b0ff7877b81ba57c8562c99cf812158b778988ce17ec061095695", size = 93772, upload-time = "2026-01-27T01:31:43.432Z" }, ] [[package]] diff --git a/vscode-extension/build/publish-vscode-extension.sh b/vscode-extension/build/publish-vscode-extension.sh old mode 100644 new mode 100755 diff --git a/vscode-extension/context-engine-uploader/auth_utils.js b/vscode-extension/context-engine-uploader/auth_utils.js index 4fa3bb98..deda9f50 100644 --- a/vscode-extension/context-engine-uploader/auth_utils.js +++ b/vscode-extension/context-engine-uploader/auth_utils.js @@ -16,6 +16,20 @@ function getFetch(deps) { return null; } +function normalizeBackendUrl(raw) { + const trimmed = (raw || '').trim(); + if (!trimmed) return trimmed; + try { + const u = new URL(trimmed); + if (!trimmed.includes('/upload') && !trimmed.includes('/auth')) { + return `${u.protocol}//${u.host}`; + } + return trimmed; + } catch (_) { + return trimmed.replace(/\/+$/, ''); + } +} + /** * Check auth status for the given endpoint using ctxce CLI. * Returns { state: 'ok' | 'missing' | 'expired' | 'error', userId?: string } @@ -25,17 +39,10 @@ async function checkAuthStatus(endpoint, deps) { return { state: 'error' }; } const { spawn, resolveBridgeCliInvocation, getWorkspaceFolderPath } = deps; - const raw = (endpoint || '').trim(); - if (!raw) { + const backendUrl = normalizeBackendUrl(endpoint); + if (!backendUrl) { return { state: 'error' }; } - let backendUrl = raw; - try { - const u = new URL(raw); - backendUrl = `${u.protocol}//${u.host}`; - } catch (_) { - backendUrl = raw.replace(/\/+$/, ''); - } const invocation = resolveBridgeCliInvocation(); if (!invocation) { @@ -118,22 +125,14 @@ async function ensureAuthIfRequired(endpoint, deps) { } const { vscode, spawnSync, resolveBridgeCliInvocation, getWorkspaceFolderPath, log } = deps; const fetchFn = getFetch(deps); - const raw = (endpoint || '').trim(); - if (!raw) { + const baseUrl = normalizeBackendUrl(endpoint); + if (!baseUrl) { return; } if (!fetchFn) { log('Auth status probe skipped: fetch is not available in this runtime.'); return; } - - let baseUrl = raw; - try { - const u = new URL(raw); - baseUrl = `${u.protocol}//${u.host}`; - } catch (_) { - baseUrl = raw.replace(/\/+$/, ''); - } const statusUrl = `${baseUrl.replace(/\/+$/, '')}/auth/status`; let res; @@ -240,17 +239,49 @@ async function runAuthLoginFlow(explicitBackendUrl, deps) { const settings = vscode.workspace.getConfiguration('contextEngineUploader'); endpoint = (settings.get('endpoint') || '').trim(); } - let backendUrl = explicitBackendUrl || endpoint; + const settings = vscode.workspace.getConfiguration('contextEngineUploader'); + const configuredAuthBackendUrl = (settings.get('authBackendUrl') || '').trim(); + const configuredAuthToken = (settings.get('authSharedToken') || '').trim(); + + // Prefer configured authBackendUrl over explicit URL to allow settings override + let backendUrl = normalizeBackendUrl(configuredAuthBackendUrl || explicitBackendUrl || endpoint); if (!backendUrl) { - vscode.window.showErrorMessage('Context Engine Uploader: backend endpoint is not configured (contextEngineUploader.endpoint).'); + vscode.window.showErrorMessage('Context Engine Uploader: backend endpoint is not configured (contextEngineUploader.endpoint or contextEngineUploader.authBackendUrl).'); return; } - try { - const u = new URL(backendUrl); - backendUrl = `${u.protocol}//${u.host}`; - } catch (_) { - backendUrl = backendUrl.replace(/\/+$/, ''); + const invocation = resolveBridgeCliInvocation(); + if (!invocation) { + vscode.window.showErrorMessage('Context Engine Uploader: unable to locate ctxce CLI for auth.'); + return; + } + const cwd = getWorkspaceFolderPath() || process.cwd(); + + if (configuredAuthToken) { + const args = [...invocation.args, 'auth', 'login']; + const env = { + ...process.env, + CTXCE_AUTH_BACKEND_URL: backendUrl, + CTXCE_AUTH_TOKEN: configuredAuthToken, + }; + await new Promise(resolve => { + const child = spawn(invocation.command, args, { cwd, env }); + attachOutput(child, 'auth'); + child.on('error', error => { + log(`ctxce auth login (configured token) failed to start: ${error instanceof Error ? error.message : String(error)}`); + vscode.window.showErrorMessage('Context Engine Uploader: auth login failed to start. See output for details.'); + resolve(); + }); + child.on('close', code => { + if (code === 0) { + vscode.window.showInformationMessage('Context Engine Uploader: auth login successful (using configured token).'); + } else { + vscode.window.showErrorMessage(`Context Engine Uploader: auth login failed with exit code ${code}. See output for details.`); + } + resolve(); + }); + }); + return; } const mode = await vscode.window.showQuickPick( @@ -261,13 +292,6 @@ async function runAuthLoginFlow(explicitBackendUrl, deps) { return; } - const invocation = resolveBridgeCliInvocation(); - if (!invocation) { - vscode.window.showErrorMessage('Context Engine Uploader: unable to locate ctxce CLI for auth.'); - return; - } - const cwd = getWorkspaceFolderPath() || process.cwd(); - if (mode.startsWith('Token')) { const token = await vscode.window.showInputBox({ prompt: 'Enter Context Engine shared auth token', @@ -359,21 +383,17 @@ async function runAuthLogoutFlow(explicitBackendUrl, deps) { } catch (_) { endpoint = ''; } + const settings = vscode.workspace.getConfiguration('contextEngineUploader'); if (!endpoint) { - const settings = vscode.workspace.getConfiguration('contextEngineUploader'); endpoint = (settings.get('endpoint') || '').trim(); } - let backendUrl = explicitBackendUrl || endpoint; + // Read authBackendUrl to align with login flow - ensures logout targets the same backend as login + const configuredAuthBackendUrl = (settings.get('authBackendUrl') || '').trim(); + const backendUrl = normalizeBackendUrl(configuredAuthBackendUrl || explicitBackendUrl || endpoint); if (!backendUrl) { - vscode.window.showErrorMessage('Context Engine Uploader: backend endpoint is not configured (contextEngineUploader.endpoint).'); + vscode.window.showErrorMessage('Context Engine Uploader: backend endpoint is not configured (contextEngineUploader.endpoint or contextEngineUploader.authBackendUrl).'); return; } - try { - const u = new URL(backendUrl); - backendUrl = `${u.protocol}//${u.host}`; - } catch (_) { - backendUrl = backendUrl.replace(/\/+$/, ''); - } const invocation = resolveBridgeCliInvocation(); if (!invocation) { @@ -409,6 +429,7 @@ async function runAuthLogoutFlow(explicitBackendUrl, deps) { module.exports = { checkAuthStatus, ensureAuthIfRequired, + normalizeBackendUrl, runAuthLoginFlow, runAuthLogoutFlow, }; diff --git a/vscode-extension/context-engine-uploader/extension.js b/vscode-extension/context-engine-uploader/extension.js index d8c0e154..36f7b3c8 100644 --- a/vscode-extension/context-engine-uploader/extension.js +++ b/vscode-extension/context-engine-uploader/extension.js @@ -386,7 +386,11 @@ function activate(context) { // Register Settings Webview try { - settingsWebviewProvider = new SettingsWebviewProvider(context.extensionUri); + const getEndpointForSettings = () => { + const cfg = getEffectiveConfig(); + return cfg.get('endpoint') || 'http://localhost:8004'; + }; + settingsWebviewProvider = new SettingsWebviewProvider(context.extensionUri, getEndpointForSettings); const openSettingsCmd = vscode.commands.registerCommand('contextEngineUploader.openSettings', () => { settingsWebviewProvider.openSettings(); }); diff --git a/vscode-extension/context-engine-uploader/package.json b/vscode-extension/context-engine-uploader/package.json index c80cba96..133491c7 100644 --- a/vscode-extension/context-engine-uploader/package.json +++ b/vscode-extension/context-engine-uploader/package.json @@ -2,7 +2,7 @@ "name": "context-engine-uploader", "displayName": "Context-Engine.AI", "description": "Supercharge your AI coding assistants with rich codebase context. Integrates with Claude Code, Windsurf, Augment, and more via MCP.", - "version": "0.1.48", + "version": "0.1.50", "publisher": "context-engine", "engines": { "vscode": "^1.85.0" @@ -376,6 +376,18 @@ "default": "http://localhost:8002/mcp", "order": 6, "description": "MCP server URL for memory/search." + }, + "contextEngineUploader.authBackendUrl": { + "type": "string", + "default": "", + "order": 7, + "markdownDescription": "Auth backend URL for remote deployments (e.g., `http://ce.example.com/upload`). Leave empty for local stack." + }, + "contextEngineUploader.authSharedToken": { + "type": "string", + "default": "", + "order": 8, + "markdownDescription": "Shared authentication token for team deployments. **Keep this secret!**" } } }, diff --git a/vscode-extension/context-engine-uploader/settings-webview.js b/vscode-extension/context-engine-uploader/settings-webview.js index d3d23563..817c1e11 100644 --- a/vscode-extension/context-engine-uploader/settings-webview.js +++ b/vscode-extension/context-engine-uploader/settings-webview.js @@ -6,6 +6,13 @@ const vscode = require('vscode'); // Settings grouped by category const SETTINGS_SCHEMA = { + status: { + title: 'Status', + icon: 'pulse', + description: 'Live indexing progress and system status', + isStatus: true, // Special flag for status section rendering + settings: [] // No editable settings - purely informational + }, general: { title: 'General', icon: 'home', @@ -17,6 +24,15 @@ const SETTINGS_SCHEMA = { { key: 'pythonPath', label: 'Python Path', type: 'string', description: 'Python executable for scripts', placeholder: 'python3' }, ] }, + team: { + title: 'Team', + icon: 'organization', + description: 'Shared authentication for team deployments', + settings: [ + { key: 'authBackendUrl', label: 'Auth Backend URL', type: 'string', description: 'Upload service URL for authentication (e.g. http://ce.yourteam.com/upload)', placeholder: 'http://localhost:8004' }, + { key: 'authSharedToken', label: 'Shared API Token', type: 'password', description: 'Team-wide shared token for upload authentication. All team members use the same token.', placeholder: '••••••••' }, + ] + }, indexing: { title: 'Indexing', icon: 'database', @@ -106,10 +122,11 @@ const SETTINGS_SCHEMA = { class SettingsWebviewProvider { static viewType = 'contextEngineSettingsPanel'; - constructor(extensionUri) { + constructor(extensionUri, getEndpoint) { this._extensionUri = extensionUri; + this._getEndpoint = getEndpoint || (() => 'http://localhost:8004'); this._panel = undefined; - this._activeSection = 'general'; + this._activeSection = 'status'; // Default to status section } openSettings() { @@ -181,13 +198,15 @@ class SettingsWebviewProvider { const nonce = getNonce(); const values = this._getAllSettings(); const logoUri = webview.asWebviewUri(vscode.Uri.joinPath(this._extensionUri, 'assets', 'logo.jpeg')); + const endpoint = this._getEndpoint(); + const workspacePath = values.targetPath || ''; return ` - + Context Engine Settings @@ -213,7 +232,11 @@ class SettingsWebviewProvider { ${this._getSectionContent(values)} - + `; } @@ -231,6 +254,11 @@ class SettingsWebviewProvider { const section = SETTINGS_SCHEMA[this._activeSection]; if (!section) return ''; + // Special rendering for status section + if (section.isStatus) { + return this._getStatusSectionHtml(); + } + return `

${section.title}

@@ -242,6 +270,71 @@ class SettingsWebviewProvider { `; } + _getStatusSectionHtml() { + return ` +
+

Status

+

Live indexing progress and system status

+
+
+
+
+ + Indexing Status + Checking... +
+
+ +
+
+ State + -- +
+
+ Points Indexed + -- +
+
+ Watcher + -- +
+
+ Qdrant + -- +
+
+
+
+
+
+ + Server Connection + Checking... +
+
+
+ Endpoint + -- +
+
+ Last Checked + -- +
+
+
+
+ `; + } + _getSettingHtml(setting, value) { const id = `setting-${setting.key}`; let input = ''; @@ -461,12 +554,97 @@ class SettingsWebviewProvider { .toggle input:checked + .toggle-slider { background: var(--accent); } .toggle input:checked + .toggle-slider::before { transform: translateX(18px); } .toggle input:focus + .toggle-slider { box-shadow: 0 0 0 2px var(--focus-ring); } + + /* Status section styles */ + .status-section { display: flex; flex-direction: column; gap: 16px; } + .status-card { + background: var(--bg-card); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-lg); + overflow: hidden; + } + .status-card-header { + display: flex; + align-items: center; + gap: 10px; + padding: 14px 16px; + background: var(--bg-subtle); + border-bottom: 1px solid var(--border-subtle); + } + .status-card-header .codicon { font-size: 16px; color: var(--accent); } + .status-card-title { font-weight: 500; flex: 1; } + .status-badge { + padding: 3px 10px; + border-radius: 12px; + font-size: 11px; + font-weight: 500; + text-transform: uppercase; + letter-spacing: 0.3px; + } + .status-badge.idle { background: var(--border); color: var(--text-secondary); } + .status-badge.indexing { background: var(--vscode-charts-yellow, #e9a700); color: #000; } + .status-badge.watching { background: var(--vscode-charts-purple, #a855f7); color: #fff; } + .status-badge.ready { background: var(--vscode-charts-green, #22c55e); color: #fff; } + .status-badge.error { background: var(--vscode-errorForeground, #f14c4c); color: #fff; } + .status-badge.offline { background: var(--vscode-errorForeground, #f14c4c); color: #fff; } + .status-badge.online { background: var(--vscode-charts-green, #22c55e); color: #fff; } + .status-card-body { padding: 16px; } + .status-row { + display: flex; + justify-content: space-between; + align-items: center; + padding: 8px 0; + border-bottom: 1px solid var(--border-subtle); + } + .status-row:last-child { border-bottom: none; } + .status-label { color: var(--text-secondary); font-size: 12px; } + .status-value { font-weight: 500; font-size: 13px; } + + /* Progress bar styles */ + .progress-container { margin-bottom: 16px; } + .progress-info { + display: flex; + justify-content: space-between; + margin-bottom: 8px; + font-size: 12px; + } + #progress-text { color: var(--text-primary); } + #progress-percent { color: var(--accent); font-weight: 600; } + .progress-bar { + height: 6px; + background: var(--border); + border-radius: 3px; + overflow: hidden; + } + .progress-fill { + height: 100%; + background: linear-gradient(90deg, var(--accent), var(--vscode-charts-green, #22c55e)); + border-radius: 3px; + transition: width 0.3s ease; + } + .current-file { + margin-top: 8px; + font-size: 11px; + color: var(--text-muted); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + } + .current-file::before { + content: '\\eb68'; + font-family: codicon; + margin-right: 6px; + opacity: 0.7; + } `; } _getScript() { return ` const vscode = acquireVsCodeApi(); + let pollInterval = null; + let isStatusSection = false; + function updateSetting(key, value) { vscode.postMessage({ command: 'updateSetting', key, value }); } @@ -478,6 +656,121 @@ class SettingsWebviewProvider { vscode.postMessage({ command: 'setSection', section: btn.dataset.section }); }); }); + + // Status polling functions + function formatNumber(num) { + return num != null ? num.toLocaleString() : '--'; + } + + function updateStatusUI(data) { + const statusBadge = document.getElementById('status-badge'); + const progressContainer = document.getElementById('progress-container'); + const progressText = document.getElementById('progress-text'); + const progressPercent = document.getElementById('progress-percent'); + const progressFill = document.getElementById('progress-fill'); + const currentFile = document.getElementById('current-file'); + const stateValue = document.getElementById('state-value'); + const pointsValue = document.getElementById('points-value'); + const watcherValue = document.getElementById('watcher-value'); + const qdrantValue = document.getElementById('qdrant-value'); + + if (!statusBadge) return; // Not on status section + + const state = data.indexing_state || 'idle'; + const progress = data.progress; + + // Update badge + statusBadge.textContent = state.charAt(0).toUpperCase() + state.slice(1); + statusBadge.className = 'status-badge ' + state; + + // Update progress bar if indexing + if (state === 'indexing' && progress) { + progressContainer.style.display = 'block'; + const processed = progress.files_processed || 0; + const total = progress.total_files || 1; + const percent = Math.round((processed / total) * 100); + + progressText.textContent = formatNumber(processed) + ' / ' + formatNumber(total) + ' files'; + progressPercent.textContent = percent + '%'; + progressFill.style.width = percent + '%'; + + if (progress.current_file) { + const shortPath = progress.current_file.split('/').slice(-2).join('/'); + currentFile.textContent = shortPath; + currentFile.style.display = 'block'; + } else { + currentFile.style.display = 'none'; + } + } else { + progressContainer.style.display = 'none'; + } + + // Update details + stateValue.textContent = state.charAt(0).toUpperCase() + state.slice(1); + pointsValue.textContent = formatNumber(data.points_count); + watcherValue.textContent = data.watcher_active ? 'Active' : 'Inactive'; + watcherValue.style.color = data.watcher_active ? 'var(--vscode-charts-green, #22c55e)' : 'var(--text-secondary)'; + qdrantValue.textContent = data.qdrant_healthy ? 'Connected' : 'Disconnected'; + qdrantValue.style.color = data.qdrant_healthy ? 'var(--vscode-charts-green, #22c55e)' : 'var(--vscode-errorForeground, #f14c4c)'; + } + + function updateConnectionUI(isConnected, endpoint) { + const connectionBadge = document.getElementById('connection-badge'); + const endpointValue = document.getElementById('endpoint-value'); + const lastCheckedValue = document.getElementById('last-checked-value'); + + if (!connectionBadge) return; + + connectionBadge.textContent = isConnected ? 'Online' : 'Offline'; + connectionBadge.className = 'status-badge ' + (isConnected ? 'online' : 'offline'); + endpointValue.textContent = endpoint || '--'; + lastCheckedValue.textContent = new Date().toLocaleTimeString(); + } + + async function pollStatus() { + if (!STATUS_ENDPOINT) { + updateConnectionUI(false, 'Not configured'); + return; + } + + try { + const params = new URLSearchParams(); + if (WORKSPACE_PATH) params.set('workspace_path', WORKSPACE_PATH); + + const url = STATUS_ENDPOINT + '/api/v1/indexing/status' + (params.toString() ? '?' + params.toString() : ''); + const response = await fetch(url, { + method: 'GET', + headers: { 'Accept': 'application/json' } + }); + + if (response.ok) { + const data = await response.json(); + updateStatusUI(data); + updateConnectionUI(true, STATUS_ENDPOINT); + + // Poll faster during indexing + const newInterval = data.indexing_state === 'indexing' ? 1500 : 5000; + if (pollInterval && pollInterval._interval !== newInterval) { + clearInterval(pollInterval); + pollInterval = setInterval(pollStatus, newInterval); + pollInterval._interval = newInterval; + } + } else { + updateConnectionUI(false, STATUS_ENDPOINT); + } + } catch (error) { + updateConnectionUI(false, STATUS_ENDPOINT); + console.log('Status poll error:', error.message); + } + } + + // Start polling if on status section + if (document.getElementById('indexing-status-card')) { + isStatusSection = true; + pollStatus(); + pollInterval = setInterval(pollStatus, 3000); + pollInterval._interval = 3000; + } `; } }