diff --git a/docker-compose.arm64.yml b/docker-compose.arm64.yml deleted file mode 100644 index 90a33235..00000000 --- a/docker-compose.arm64.yml +++ /dev/null @@ -1,5 +0,0 @@ -services: - qdrant: - platform: linux/arm64/v8 - llamacpp: - platform: linux/arm64/v8 diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 00000000..71172431 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,217 @@ +# Context Engine Architecture + +## Overview + +Context Engine is a production-ready MCP (Model Context Protocol) retrieval stack that unifies code indexing, hybrid search, and optional LLM decoding. It enables teams to ship context-aware AI agents by providing sophisticated semantic and lexical search capabilities with dual-transport compatibility. + +## Core Principles + +- **Research-Grade Retrieval**: Implements ReFRAG-inspired micro-chunking and span budgeting +- **Dual-Transport Support**: Supports both SSE (legacy) and HTTP RMCP (modern) protocols +- **Performance-First**: Intelligent caching, connection pooling, and async I/O patterns +- **Production-Ready**: Comprehensive health checks, monitoring, and operational tooling + +## System Architecture + +### Component Diagram + +``` +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Client Apps │◄──►│ MCP Servers │◄──►│ Qdrant DB │ +│ (IDE, CLI, Web) │ │ (SSE + HTTP) │ │ (Vector Store) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ + ▼ + ┌─────────────────┐ + │ LLM Decoder │ + │ (llama.cpp) │ + │ (Optional) │ + └─────────────────┘ +``` + +## Core Components + +### 1. MCP Servers + +#### Memory Server (`scripts/mcp_memory_server.py`) +- **Purpose**: Knowledge base storage and retrieval +- **Transport**: SSE (port 8000) + HTTP RMCP (port 8002) +- **Key Features**: + - Structured memory storage with rich metadata + - Hybrid search (dense + lexical) + - Dual vector support for embedding and lexical hashes + - Automatic collection management + +#### Indexer Server (`scripts/mcp_indexer_server.py`) +- **Purpose**: Code search, indexing, and management +- **Transport**: SSE (port 8001) + HTTP RMCP (port 8003) +- **Key Features**: + - Hybrid code search with multiple filtering options + - ReFRAG-inspired micro-chunking (16-token windows) + - Context-aware Q&A with local LLM integration + - Workspace and collection management + - Live indexing and pruning capabilities + +### 2. Search Pipeline + +#### Hybrid Search Engine (`scripts/hybrid_search.py`) +- **Multi-Vector Architecture**: + - **Dense Vectors**: Semantic embeddings (BAAI/bge-base-en-v1.5) + - **Lexical Vectors**: BM25-style hashing (4096 dimensions) + - **Mini Vectors**: ReFRAG gating (64 dimensions, optional) + +- **Retrieval Process**: + 1. **Query Expansion**: Generate multiple query variations + 2. **Parallel Search**: Dense + lexical search with RRF fusion + 3. **Optional Reranking**: Cross-encoder neural reranking + 4. **Result Assembly**: Format with citations and metadata + +- **Advanced Features**: + - Request deduplication + - Intelligent caching (multi-policy: LRU, LFU, TTL, FIFO) + - Connection pooling to Qdrant + - Batch processing support + +#### ReFRAG Implementation +- **Micro-chunking**: Token-level windows (16 tokens, 8 stride) +- **Span Budgeting**: Global token budget management +- **Gate-First Filtering**: Mini-vector pre-filtering for efficiency + +### 3. Storage Layer + +#### Qdrant Vector Database +- **Primary Storage**: Embeddings and metadata +- **Collection Management**: Automatic creation and configuration +- **Named Vectors**: Separate storage for different embedding types +- **Performance**: HNSW indexing for fast approximate nearest neighbor search + +#### Unified Cache System (`scripts/cache_manager.py`) +- **Eviction Policies**: LRU, LFU, TTL, FIFO +- **Memory Management**: Configurable size limits and monitoring +- **Thread Safety**: Proper locking for concurrent access +- **Statistics Tracking**: Hit rates, memory usage, eviction counts + +### 4. Supporting Infrastructure + +#### Async Subprocess Manager (`scripts/async_subprocess_manager.py`) +- **Process Management**: Async subprocess execution with resource cleanup +- **Connection Pooling**: Reused HTTP connections +- **Timeout Handling**: Configurable timeouts with graceful degradation +- **Resource Tracking**: Active process monitoring and statistics + +#### Deduplication System (`scripts/deduplication.py`) +- **Request Deduplication**: Prevent redundant processing +- **Cache Integration**: Works with unified cache system +- **Performance Impact**: Significant reduction in duplicate work + +#### Semantic Expansion (`scripts/semantic_expansion.py`) +- **Query Enhancement**: LLM-assisted query variation generation +- **Local LLM Integration**: llama.cpp for offline expansion +- **Caching**: Expanded query results cached for reuse + +## Data Flow Architecture + +### Search Request Flow +``` +1. Client Query → MCP Server +2. Query Expansion (optional) → Multiple Query Variations +3. Parallel Execution → Dense Search + Lexical Search +4. RRF Fusion → Combined Results +5. Reranking (optional) → Enhanced Relevance +6. Result Formatting → Structured Response with Citations +7. Return to Client → MCP Protocol Response +``` + +### Indexing Flow +``` +1. File Change Detection → File System Watcher +2. Content Processing → Tokenization + Chunking +3. Embedding Generation → Model Inference +4. Vector Creation → Dense + Lexical + Mini +5. Metadata Assembly → Path, symbols, language, etc. +6. Batch Upsert → Qdrant Storage +7. Cache Updates → Local Cache Refresh +``` + +## Configuration Architecture + +### Environment-Based Configuration +- **Docker-Native**: All configuration via environment variables +- **Development Support**: Local .env file configuration +- **Production Ready**: External secret management integration + +### Key Configuration Areas +- **Service Configuration**: Ports, hosts, transport protocols +- **Model Configuration**: Embedding models, reranker settings +- **Performance Tuning**: Cache sizes, batch sizes, timeouts +- **Feature Flags**: Experimental features, debug modes + +## Transport Layer Architecture + +### Dual-Transport Design +- **SSE (Server-Sent Events)**: Legacy client compatibility +- **HTTP RMCP**: Modern JSON-RPC over HTTP +- **Simultaneous Operation**: Both protocols can run together +- **Automatic Fallback**: Graceful degradation when transport fails + +### MCP Protocol Implementation +- **FastMCP Framework**: Modern MCP server implementation +- **Tool Registry**: Automatic tool discovery and registration +- **Health Endpoints**: `/readyz` and `/tools` endpoints +- **Error Handling**: Structured error responses and logging + +## Performance Architecture + +### Caching Strategy +- **Multi-Level Caching**: Embedding cache, search cache, expansion cache +- **Intelligent Invalidation**: TTL-based and LRU eviction +- **Memory Management**: Configurable limits and monitoring +- **Performance Monitoring**: Hit rates, response times, memory usage + +### Concurrency Model +- **Async I/O**: Non-blocking operations throughout +- **Connection Pooling**: Reused connections to external services +- **Batch Processing**: Efficient bulk operations +- **Resource Management**: Proper cleanup and resource limits + +## Security Architecture + +### Isolation and Safety +- **Container-Based**: Docker isolation for all services +- **Network Segmentation**: Internal service communication +- **Input Validation**: Comprehensive parameter validation +- **Resource Limits**: Configurable timeouts and memory limits + +### Data Protection +- **No Hardcoded Secrets**: Environment-based configuration +- **API Key Management**: External secret manager integration +- **Audit Logging**: Structured logging for security events + +## Operational Architecture + +### Health Monitoring +- **Service Health**: `/readyz` endpoints for all services +- **Tool Availability**: Dynamic tool listing and status +- **Performance Metrics**: Response times, cache statistics +- **Error Tracking**: Structured error logging and alerting + +### Deployment Patterns +- **Docker Compose**: Multi-service orchestration +- **Environment Parity**: Development ↔ Production consistency +- **Graceful Shutdown**: Proper resource cleanup on termination +- **Rolling Updates**: Zero-downtime deployment support + +## Extensibility Architecture + +### Plugin System +- **MCP Tool Extension**: Easy addition of new tools +- **Transport Flexibility**: Support for future MCP transports +- **Model Pluggability**: Support for different embedding models +- **Storage Abstraction**: Potential for alternative vector stores + +### Configuration Extension +- **Environment-Driven**: Easy configuration via environment variables +- **Feature Flags**: Experimental feature toggling +- **A/B Testing**: Multiple configuration variants support + +This architecture enables Context Engine to serve as a production-ready, scalable context layer for AI applications while maintaining the flexibility to evolve with changing requirements and technologies. \ No newline at end of file diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md new file mode 100644 index 00000000..75c32172 --- /dev/null +++ b/docs/DEVELOPMENT.md @@ -0,0 +1,533 @@ +# Development Guide + +This guide covers setting up a development environment, understanding the codebase structure, and contributing to Context Engine. + +## Prerequisites + +### Required Software +- **Python 3.11+**: Primary development language +- **Docker & Docker Compose**: Containerized development environment +- **Make**: Build automation (recommended) +- **Git**: Version control +- **Node.js & npm**: For MCP development dependencies + +### Optional Tools +- **pytest**: Testing framework (included in requirements.txt) +- **pre-commit**: Git hooks for code quality +- **jq**: JSON processing for CLI tools + +## Quick Start + +### 1. Repository Setup +```bash +# Clone the repository +git clone +cd Context-Engine + +# Copy environment configuration +cp .env.example .env + +# Install Python dependencies +pip install -r requirements.txt +``` + +### 2. Development Environment +```bash +# Start all services in development mode +make reset-dev-dual + +# This starts: +# - Qdrant vector database (ports 6333/6334) +# - Memory MCP server (ports 8000 SSE, 8002 HTTP) +# - Indexer MCP server (ports 8001 SSE, 8003 HTTP) +# - Llama.cpp decoder (port 8080, optional) +``` + +### 3. Verify Setup +```bash +# Check service health +make health + +# Test MCP connectivity +curl http://localhost:8000/sse # Memory server SSE +curl http://localhost:8001/sse # Indexer server SSE +``` + +## Project Structure + +``` +Context-Engine/ +├── scripts/ # Core application code +│ ├── mcp_memory_server.py # Memory MCP server implementation +│ ├── mcp_indexer_server.py # Indexer MCP server implementation +│ ├── hybrid_search.py # Search algorithm implementation +│ ├── cache_manager.py # Unified caching system +│ ├── async_subprocess_manager.py # Process management +│ ├── deduplication.py # Request deduplication +│ ├── semantic_expansion.py # Query expansion +│ ├── utils.py # Shared utilities +│ ├── ingest_code.py # Code indexing logic +│ ├── watch_index.py # File system watcher +│ └── logger.py # Structured logging +├── tests/ # Test suite +│ ├── conftest.py # Test configuration +│ ├── test_*.py # Unit and integration tests +│ └── integration/ # Integration test helpers +├── docker/ # Docker configurations +│ ├── Dockerfile.mcp # Memory server image +│ ├── Dockerfile.mcp-indexer # Indexer server image +│ └── scripts/ # Docker build scripts +├── docs/ # Documentation +├── .env.example # Environment template +├── docker-compose.yml # Development environment +├── docker-compose.override.yml # Development overrides +├── Makefile # Development commands +├── requirements.txt # Python dependencies +└── README.md # Project overview +``` + +## Development Workflow + +### Making Changes + +1. **Create a feature branch**: +```bash +git checkout -b feature/your-feature-name +``` + +2. **Make your changes** following the coding standards outlined below. + +3. **Run tests**: +```bash +# Run all tests +pytest + +# Run specific test file +pytest tests/test_hybrid_search.py + +# Run with coverage +pytest --cov=scripts --cov-report=html +``` + +4. **Test changes in development environment**: +```bash +# Restart services with changes +docker-compose restart mcp mcp_indexer + +# Or rebuild if changes affect Docker images +make rebuild-dev +``` + +5. **Commit changes**: +```bash +git add . +git commit -m "feat: add your feature description" +``` + +### Code Quality Standards + +#### Python Style Guide +- **PEP 8 compliant**: Use standard Python formatting +- **Type hints**: Include type annotations for all public functions +- **Docstrings**: Google-style docstrings for all modules and public functions +- **Error handling**: Use structured error types from `scripts.logger` + +#### Example Function: +```python +from typing import List, Dict, Any, Optional +from scripts.logger import get_logger, RetrievalError + +logger = get_logger(__name__) + +def search_code( + query: str, + limit: int = 10, + filters: Optional[Dict[str, Any]] = None +) -> List[Dict[str, Any]]: + """Search code using hybrid retrieval. + + Args: + query: Search query string + limit: Maximum number of results to return + filters: Optional search filters + + Returns: + List of search results with scores and metadata + + Raises: + RetrievalError: If search operation fails + """ + try: + # Implementation + pass + except Exception as e: + logger.error(f"Search failed: {e}") + raise RetrievalError(f"Search operation failed: {e}") from e +``` + +## Adding New Features + +### 1. Adding a New MCP Tool + +Create a new tool in the appropriate MCP server file: + +```python +# In scripts/mcp_indexer_server.py or scripts/mcp_memory_server.py + +@mcp.tool() +async def my_new_tool( + param1: str, + param2: Optional[int] = None, + param3: str = "" +) -> Dict[str, Any]: + """Brief description of what this tool does. + + Args: + param1: Description of required parameter + param2: Description of optional parameter + param3: Description of parameter with default + + Returns: + Dictionary containing operation result + + Example: + result = await my_new_tool("test", 42) + print(result["ok"]) # True + """ + try: + # Validate inputs + if not param1.strip(): + raise ValidationError("param1 cannot be empty") + + # Implementation logic + result = do_something(param1, param2 or 0) + + return { + "ok": True, + "result": result, + "message": "Operation completed successfully" + } + + except Exception as e: + logger.error(f"my_new_tool failed: {e}") + return { + "ok": False, + "error": str(e), + "message": "Operation failed" + } +``` + +### 2. Adding New Search Filters + +Extend the hybrid search system with new filtering capabilities: + +```python +# In scripts/hybrid_search.py + +def apply_my_filter( + results: List[Dict[str, Any]], + filter_value: str +) -> List[Dict[str, Any]]: + """Apply custom filter to search results. + + Args: + results: List of search results to filter + filter_value: Filter criteria + + Returns: + Filtered list of results + """ + filtered_results = [] + for result in results: + if matches_my_criteria(result, filter_value): + filtered_results.append(result) + return filtered_results + +# Update the main search function +def hybrid_search( + queries: Union[str, List[str]], + # ... existing parameters ... + my_filter: Optional[str] = None +) -> List[Dict[str, Any]]: + # ... existing search logic ... + + # Apply new filter + if my_filter: + results = apply_my_filter(results, my_filter) + + return results +``` + +### 3. Adding New Embedding Models + +1. **Update model mapping** in `scripts/utils.py`: +```python +def sanitize_vector_name(model_name: str) -> str: + name = (model_name or "").strip().lower() + + # Add your model mapping + if "your-new-model" in name: + return "your-new-model-alias" + + # ... existing mappings ... +``` + +2. **Test with new model**: +```python +# In tests/test_embedding.py +def test_new_embedding_model(): + from scripts.utils import sanitize_vector_name + + assert sanitize_vector_name("your-new-model-v1") == "your-new-model-alias" +``` + +3. **Update Docker images** if the model requires additional dependencies. + +## Testing + +### Test Organization + +#### Unit Tests (`tests/test_*.py`) +- Test individual functions and classes +- Mock external dependencies (Qdrant, embedding models) +- Fast execution, no external services required + +#### Integration Tests (`tests/test_integration_*.py`) +- Test component interactions +- Use real Qdrant via testcontainers +- Slower but more realistic testing + +### Writing Tests + +#### Test Structure +```python +import pytest +from unittest.mock import Mock, patch +from scripts.hybrid_search import hybrid_search + +class TestHybridSearch: + @pytest.fixture + def fake_embedder(self): + """Mock embedding model for deterministic tests.""" + embedder = Mock() + embedder.embed.return_value = [[0.1, 0.2, 0.3]] + return embedder + + @pytest.fixture + def mock_qdrant(self): + """Mock Qdrant client.""" + client = Mock() + client.search.return_value = [ + {"id": "1", "score": 0.9, "payload": {"text": "test"}} + ] + return client + + def test_basic_search(self, fake_embedder, mock_qdrant): + """Test basic hybrid search functionality.""" + results = hybrid_search( + queries=["test query"], + embedder=fake_embedder, + qdrant_client=mock_qdrant + ) + + assert len(results) > 0 + assert all("score" in r for r in results) + assert all(0 <= r["score"] <= 1 for r in results) + + def test_search_with_filters(self, fake_embedder, mock_qdrant): + """Test search with language and path filters.""" + results = hybrid_search( + queries=["test query"], + filters={"language": "python", "path": "src/"}, + embedder=fake_embedder, + qdrant_client=mock_qdrant + ) + + # Verify filter application + mock_qdrant.search.assert_called_once() + call_args = mock_qdrant.search.call_args + assert "filter" in call_args.kwargs +``` + +#### Integration Tests +```python +import pytest +from testcontainers.core.container import DockerContainer + +@pytest.mark.integration +class TestSearchIntegration: + @pytest.fixture(scope="module") + def qdrant_container(self): + """Set up real Qdrant container for integration tests.""" + container = DockerContainer("qdrant/qdrant:latest").with_exposed_ports(6333) + container.start() + yield f"http://{container.get_container_host_ip()}:{container.get_exposed_port(6333)}" + container.stop() + + def test_end_to_end_search(self, qdrant_container): + """Test complete search pipeline with real services.""" + # Set up test data + # Perform search + # Verify results + pass +``` + +### Running Tests + +```bash +# Run all tests +pytest + +# Run with verbose output +pytest -v + +# Run unit tests only (exclude integration) +pytest -m "not integration" + +# Run integration tests only +pytest -m integration + +# Run specific test file +pytest tests/test_hybrid_search.py + +# Run with coverage report +pytest --cov=scripts --cov-report=html + +# Run with specific markers +pytest -m "not slow" # Skip slow tests +``` + +## Debugging + +### Development Debugging + +#### Enable Debug Logging +```bash +# Set debug environment variables +export DEBUG_CONTEXT_ANSWER=1 +export HYBRID_DEBUG=1 +export CACHE_DEBUG=1 + +# Restart services +docker-compose restart +``` + +#### Local Development +```bash +# Run MCP servers directly for easier debugging +python scripts/mcp_indexer_server.py + +# Run with debugger +python -m pdb scripts/hybrid_search.py +``` + +### Common Debugging Scenarios + +#### Search Issues +```python +# Enable detailed search logging +import logging +logging.getLogger("hybrid_search").setLevel(logging.DEBUG) + +# Check search intermediate results +def debug_search(query): + # Check embedding generation + embeddings = embed_model.embed([query]) + print(f"Embedding shape: {len(list(embeddings)[0])}") + + # Check Qdrant query + results = qdrant_client.search(...) + print(f"Qdrant results: {len(results)}") + + return results +``` + +#### Cache Issues +```python +# Check cache statistics +from scripts.cache_manager import get_search_cache + +cache = get_search_cache() +print(f"Cache stats: {cache.get_stats()}") +print(f"Cache size: {len(cache._cache)}") +``` + +## Performance Profiling + +### Profiling Tools +```bash +# Profile with cProfile +python -m cProfile -o profile.stats scripts/hybrid_search.py + +# Analyze profile results +python -c " +import pstats +p = pstats.Stats('profile.stats') +p.sort_stats('cumulative') +p.print_stats(20) +" +``` + +### Memory Profiling +```bash +# Install memory_profiler +pip install memory-profiler + +# Profile memory usage +python -m memory_profiler scripts/hybrid_search.py +``` + +## Common Development Issues + +### Environment Setup Issues +```bash +# Python path issues +export PYTHONPATH="${PYTHONPATH}:/path/to/Context-Engine" + +# Docker issues +docker system prune -f # Clean up Docker +docker-compose down -v # Remove volumes +docker-compose up --build # Rebuild images +``` + +### Import Issues +```bash +# Ensure code roots are on sys.path +export WORK_ROOTS="/path/to/Context-Engine,/app" + +# Check Python path +python -c "import sys; print(sys.path)" +``` + +### MCP Server Issues +```bash +# Check MCP server connectivity +curl -H "Accept: text/event-stream" http://localhost:8000/sse + +# Check MCP tools available +curl http://localhost:18001/tools +``` + +## Contributing Guidelines + +### Before Submitting Changes +1. **Run full test suite**: `pytest` +2. **Check code style**: Use `black` and `flake8` +3. **Update documentation**: Add docstrings for new functions +4. **Test locally**: Verify changes work in development environment + +### Pull Request Process +1. **Create descriptive PR title**: "feat: add new search filter" +2. **Provide detailed description**: Explain what changes and why +3. **Include test coverage**: Add tests for new functionality +4. **Update documentation**: Include API changes in docs + +### Code Review Checklist +- [ ] Code follows style guidelines +- [ ] Tests pass for all changes +- [ ] Documentation is updated +- [ ] No hardcoded secrets or values +- [ ] Error handling is appropriate +- [ ] Performance impact is considered + +This development guide should help you get started with contributing to Context Engine. For more specific questions, refer to the code documentation or create an issue in the repository. \ No newline at end of file diff --git a/docs/MCP_API.md b/docs/MCP_API.md new file mode 100644 index 00000000..490c3dfc --- /dev/null +++ b/docs/MCP_API.md @@ -0,0 +1,555 @@ +# MCP API Reference + +This document provides comprehensive API documentation for all MCP (Model Context Protocol) tools exposed by Context Engine's dual-server architecture. + +## Overview + +Context Engine exposes two MCP servers: + +1. **Memory Server**: Knowledge base storage and retrieval (`port 8000` SSE, `port 8002` HTTP) +2. **Indexer Server**: Code search, indexing, and management (`port 8001` SSE, `port 8003` HTTP) + +Both servers support SSE and HTTP RMCP transports simultaneously. + +## Memory Server API + +### store() + +Store information with rich metadata for later retrieval and search. + +**Parameters:** +- `information` (str, required): Clear natural language description of the content to store +- `metadata` (dict, optional): Structured metadata with the following schema: + - `kind` (str, optional): Category type - one of: + - `"snippet"`: Code snippet or pattern + - `"explanation"`: Technical explanation + - `"pattern"`: Design pattern or approach + - `"example"`: Usage example + - `"reference"`: Reference information + - `language` (str, optional): Programming language (e.g., "python", "javascript", "go") + - `path` (str, optional): File path context for code-related entries + - `tags` (list[str], optional): Searchable tags for categorization + - `priority` (int, optional): Importance ranking (1-10, higher = more important) + - `topic` (str, optional): High-level topic classification + - `code` (str, optional): Actual code content (for snippet kind) + - `author` (str, optional): Author or source attribution + - `created_at` (str, optional): ISO timestamp (auto-generated if omitted) + +**Returns:** +```json +{ + "ok": true, + "id": "uuid-string", + "message": "Successfully stored information" +} +``` + +**Example:** +```json +{ + "information": "Efficient Python pattern for processing large files using generators to minimize memory usage", + "metadata": { + "kind": "pattern", + "language": "python", + "path": "utils/file_processor.py", + "tags": ["python", "generators", "memory-efficient", "performance"], + "priority": 8, + "topic": "performance optimization", + "code": "def process_large_file(file_path):\n with open(file_path) as f:\n for line in f:\n yield process_line(line)" + } +} +``` + +### find() + +Search stored memories using hybrid retrieval (semantic + lexical search). + +**Parameters:** +- `query` (str, required): Search query or question +- `kind` (str, optional): Filter by entry kind (snippet, explanation, pattern, etc.) +- `language` (str, optional): Filter by programming language +- `topic` (str, optional): Filter by topic +- `tags` (str or list[str], optional): Filter by tags (comma-separated string or list) +- `limit` (int, default 10): Maximum number of results to return +- `priority_min` (int, optional): Minimum priority threshold (1-10) + +**Returns:** +```json +{ + "ok": true, + "results": [ + { + "id": "uuid-string", + "information": "Full stored information text", + "metadata": { + "kind": "pattern", + "language": "python", + "path": "utils/file_processor.py", + "tags": ["python", "generators"], + "priority": 8, + "topic": "performance", + "created_at": "2024-01-15T10:30:00Z" + }, + "score": 0.89, + "highlights": ["<> Python pattern", "<>"] + } + ], + "total": 15, + "query": "python file processing generators" +} +``` + +**Example:** +```json +{ + "query": "database connection pooling patterns", + "language": "python", + "kind": "pattern", + "limit": 5 +} +``` + +## Indexer Server API + +### repo_search() + +Perform hybrid code search combining dense semantic, lexical BM25, and optional neural reranking. + +**Core Parameters:** +- `query` (str or list[str], required): Search query or list of queries for query fusion +- `limit` (int, default 10): Maximum total results to return +- `per_path` (int, default 2): Maximum results per file path + +**Content Filters:** +- `language` (str, optional): Filter by programming language +- `path_glob` (str or list[str], optional): Glob patterns for path filtering +- `under` (str, optional): Limit search to specific directory path +- `not_glob` (str or list[str], optional): Exclude paths matching these patterns + +**Code Structure Filters:** +- `symbol` (str, optional): Search for specific function, class, or variable names +- `kind` (str, optional): Filter by code construct type: + - `"function"`: Function definitions + - `"class"`: Class definitions + - `"variable"`: Variable assignments + - `"import"`: Import statements + - `"comment"`: Comments and docstrings + +**Search Options:** +- `include_snippet` (bool, default true): Include code snippet in results +- `context_lines` (int, default 3): Number of context lines around snippet +- `highlight_snippet` (bool, default true): Highlight matching tokens in snippet + +**Reranking Options:** +- `rerank_enabled` (bool, optional): Override default reranker setting +- `rerank_top_n` (int, default 50): Number of candidates to consider for reranking +- `rerank_return_m` (int, default 12): Number of results to return after reranking + +**Response Format:** +```json +{ + "ok": true, + "results": [ + { + "score": 0.89, + "path": "src/search/hybrid_search.py", + "symbol": "hybrid_search", + "start_line": 45, + "end_line": 67, + "snippet": "def hybrid_search(query, limit=10):\n # ReFRAG-inspired implementation\n results = []\n return results", + "highlights": ["<> implementation"], + "components": { + "dense_score": 0.85, + "lexical_score": 0.42, + "reranker_score": 0.91, + "final_score": 0.89 + }, + "metadata": { + "language": "python", + "kind": "function", + "complexity": "medium", + "tokens": 156 + } + } + ], + "total": 15, + "used_rerank": true, + "search_time_ms": 127, + "query": "asyncio subprocess management python" +} +``` + +**Examples:** + +**Basic Search:** +```json +{ + "query": "asyncio subprocess management", + "limit": 10, + "language": "python" +} +``` + +**Advanced Search with Multiple Filters:** +```json +{ + "query": ["database connection", "sqlalchemy pool"], + "language": "python", + "path_glob": "**/db/**/*.py", + "not_glob": ["**/test_*.py", "**/migrations/**"], + "kind": "function", + "limit": 20, + "per_path": 3, + "rerank_enabled": true +} +``` + +**Symbol Search:** +```json +{ + "query": "hybrid_search", + "symbol": "hybrid_search", + "language": "python", + "include_snippet": true +} +``` + +### context_search() + +Blend code search results with memory entries for comprehensive context. + +**Parameters:** +All `repo_search` parameters plus: +- `include_memories` (bool, default true): Whether to include memory results +- `memory_weight` (float, default 1.0): Weight for memory results vs code results +- `per_source_limits` (dict, optional): Limits per source type: + ```json + { + "code": 8, + "memory": 4 + } + ``` + +**Returns:** +```json +{ + "ok": true, + "results": [ + { + "source": "code", + "score": 0.89, + "path": "src/db/connection.py", + "symbol": "create_pool", + "snippet": "def create_pool(database_url):\n return create_engine(database_url, pool_size=10)" + }, + { + "source": "memory", + "score": 0.85, + "id": "uuid-string", + "information": "Database connection pooling best practices for high-concurrency applications", + "metadata": { + "kind": "pattern", + "language": "python", + "priority": 9 + } + } + ], + "total": 12, + "sources": ["code", "memory"], + "query": "database connection pooling" +} +``` + +### context_answer() + +Generate natural language answers using retrieval-augmented generation with local LLM. + +**Core Parameters:** +- `query` (str or list[str], required): Question or query to answer +- `budget_tokens` (int, optional): Token budget for context assembly (default from config) +- `include_snippet` (bool, default true): Include code snippets in context + +**Retrieval Parameters:** +All `repo_search` parameters supported for context retrieval. + +**LLM Parameters:** +- `max_tokens` (int, optional): Maximum tokens in generated answer +- `temperature` (float, default 0.3): Sampling temperature (lower = more deterministic) +- `mode` (str, default "stitch"): Context assembly mode ("stitch" or "pack") +- `expand` (bool, default false): Enable query expansion + +**Response Format:** +```json +{ + "ok": true, + "answer": "Context Engine uses ReFRAG-inspired micro-chunking with 16-token windows and 8-token stride to achieve precise code retrieval. The span budgeting system ensures efficient token usage while maintaining context relevance.", + "citations": [ + { + "path": "scripts/hybrid_search.py", + "start_line": 156, + "end_line": 162, + "snippet": "# ReFRAG micro-chunking\nWINDOW_SIZE = 16\nSTRIDE = 8", + "relevance": 0.92 + }, + { + "path": "scripts/utils.py", + "start_line": 89, + "end_line": 95, + "snippet": "def micro_chunk(text, window_size=16, stride=8):", + "relevance": 0.87 + } + ], + "query": ["How does Context Engine implement micro-chunking?"], + "used_context_tokens": 1247, + "generation_time_ms": 2340, + "decoder_used": "llamacpp" +} +``` + +**Example:** +```json +{ + "query": "What is the best way to handle database connections in Python web applications?", + "budget_tokens": 2000, + "language": "python", + "expand": true, + "temperature": 0.2 +} +``` + +### qdrant_index() + +Index or reindex code from the mounted workspace. + +**Parameters:** +- `subdir` (str, optional): Subdirectory to index (default: entire workspace) +- `recreate` (bool, default false): Drop and recreate collection before indexing +- `collection` (str, optional): Override default collection name + +**Returns:** +```json +{ + "ok": true, + "operation": "index", + "subdir": "", + "collection": "my-workspace", + "recreate": false, + "stats": { + "files_processed": 1250, + "chunks_created": 8432, + "vectors_generated": 8432, + "processing_time_seconds": 127, + "errors": 0 + }, + "message": "Indexing completed successfully" +} +``` + +### qdrant_prune() + +Remove stale points from the collection (files that no longer exist). + +**Parameters:** None (operates on current workspace) + +**Returns:** +```json +{ + "ok": true, + "operation": "prune", + "points_removed": 47, + "points_before": 15234, + "points_after": 15187, + "processing_time_ms": 892, + "message": "Pruning completed successfully" +} +``` + +### qdrant_status() + +Get comprehensive status information about the collection and indexing state. + +**Parameters:** +- `collection` (str, optional): Override default collection name +- `max_points` (int, default 5000): Maximum points to scan for timestamp analysis +- `batch` (int, default 1000): Batch size for scanning + +**Returns:** +```json +{ + "ok": true, + "collection": "my-workspace", + "exists": true, + "count": 15234, + "scanned_points": 5000, + "last_ingested_at": { + "unix": 1705123456, + "iso": "2024-01-13T15:30:56Z" + }, + "last_modified_at": { + "unix": 1705124123, + "iso": "2024-01-13T15:35:23Z" + }, + "vectors_config": { + "fast-bge-base-en-v1.5": 384, + "lex": 4096 + }, + "storage_size_mb": 245.7, + "status": "healthy" +} +``` + +### qdrant_list() + +List all available Qdrant collections. + +**Parameters:** None + +**Returns:** +```json +{ + "ok": true, + "collections": [ + { + "name": "my-workspace", + "vectors_count": 15234, + "segments_count": 12, + "points_count": 15234, + "indexed_vectors_count": 15234, + "status": "green", + "optimizer_status": "ok" + } + ] +} +``` + +### workspace_info() + +Read workspace state and default collection information. + +**Parameters:** +- `workspace_path` (str, optional): Override workspace path (default: current workspace) + +**Returns:** +```json +{ + "ok": true, + "workspace_path": "/work", + "default_collection": "context-engine-workspace", + "source": "state_file", + "state": { + "workspace_id": "workspace-uuid", + "created_at": "2024-01-10T09:15:00Z", + "last_indexed": "2024-01-13T15:30:56Z", + "files_count": 1250, + "total_size_bytes": 52428800 + } +} +``` + +### list_workspaces() + +Scan for all workspaces with .codebase/state.json files. + +**Parameters:** +- `search_root` (str, optional): Root directory to scan (default: parent of workspace) + +**Returns:** +```json +{ + "ok": true, + "workspaces": [ + { + "workspace_path": "/work", + "collection_name": "context-engine-workspace", + "last_updated": "2024-01-13T15:30:56Z", + "indexing_state": "completed" + }, + { + "workspace_path": "/work/project-b", + "collection_name": "project-b-workspace", + "last_updated": "2024-01-12T11:20:30Z", + "indexing_state": "in_progress" + } + ] +} +``` + +### memory_store() + +Store memory entry (alias for Memory Server's `store()` tool). + +**Parameters:** Same as Memory Server `store()` method + +**Returns:** Same as Memory Server `store()` method + +### expand_query() + +Generate alternative query variations using local LLM (requires decoder enabled). + +**Parameters:** +- `query` (str or list[str], required): Original query or queries to expand +- `max_new` (int, default 2): Maximum number of alternative queries to generate + +**Returns:** +```json +{ + "ok": true, + "original_query": "python asyncio subprocess", + "alternates": [ + "python asynchronous process management", + "asyncio subprocess handling in python" + ], + "total_queries": 3, + "decoder_used": "llamacpp" +} +``` + +## Error Handling + +All API methods follow consistent error handling patterns: + +### Standard Error Response +```json +{ + "ok": false, + "error": "Error type and description", + "error_code": "VALIDATION_ERROR", + "details": { + "field": "query", + "message": "Query cannot be empty" + } +} +``` + +### Common Error Codes +- `VALIDATION_ERROR`: Invalid parameter values +- `COLLECTION_NOT_FOUND`: Specified collection doesn't exist +- `INDEXING_ERROR`: Failed during indexing operation +- `SEARCH_ERROR`: Search operation failed +- `DECODER_ERROR`: LLM decoder operation failed +- `TIMEOUT_ERROR`: Operation timed out +- `RATE_LIMIT_ERROR`: Too many requests + +## Rate Limits and Quotas + +- **Default timeout**: 30 seconds per operation +- **Maximum query length**: 1000 characters +- **Maximum result limit**: 100 results per search +- **Memory storage**: Configurable per deployment +- **Batch indexing limits**: Configurable via environment variables + +## Transport-Specific Behavior + +### SSE (Server-Sent Events) +- Real-time bidirectional communication +- Automatic reconnection on disconnect +- Streaming responses for long operations + +### HTTP RMCP +- JSON-RPC over HTTP +- Request/response pattern +- Better for batch operations and integrations + +Both transports provide identical API semantics and response formats. + +This API reference should enable developers to effectively integrate Context Engine's MCP tools into their applications and workflows. \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index cfbf3182..942721c0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,6 +10,20 @@ sys.path.insert(0, str(ROOT)) +@pytest.fixture(scope="session", autouse=True) +def _ensure_mcp_imported(): + """Ensure mcp package is properly imported before any tests run. + + This prevents import conflicts when scripts.mcp_indexer_server imports + from mcp.server.fastmcp and later tests try to import fastmcp. + """ + try: + import mcp.types # noqa: F401 + except ImportError: + pass # mcp package not available, tests will skip if needed + yield + + @pytest.fixture(scope="session", autouse=True) def _disable_tokenizers_parallelism(): """Force tokenizers to stay single-threaded to avoid fork warnings during tests.""" diff --git a/tests/test_service_context_search.py b/tests/test_service_context_search.py index 1a5e5c86..ff7b593f 100644 --- a/tests/test_service_context_search.py +++ b/tests/test_service_context_search.py @@ -2,14 +2,6 @@ import json import pytest -# Import fastmcp BEFORE scripts.mcp_indexer_server to avoid import conflicts -# (scripts.mcp_indexer_server imports from mcp.server.fastmcp which can cause -# the mcp module to be in a partially initialized state) -try: - import fastmcp -except ImportError: - fastmcp = None # Will be handled in tests that need it - srv = importlib.import_module("scripts.mcp_indexer_server") @@ -126,10 +118,14 @@ async def list_tools(self): async def call_tool(self, *a, **k): return Resp() - # fastmcp is already imported at module level - if fastmcp is None: - pytest.skip("fastmcp not available") - + # Import fastmcp inside test to avoid module-level import conflicts + # Clear any broken mcp modules from sys.modules first + import sys + mcp_modules = [k for k in sys.modules.keys() if k == 'mcp' or k.startswith('mcp.')] + for mod in mcp_modules: + if mod in sys.modules and not hasattr(sys.modules.get(mod, object()), 'types'): + del sys.modules[mod] + import fastmcp monkeypatch.setattr(fastmcp, "Client", lambda *a, **k: FakeClient()) res = await srv.context_search( @@ -192,10 +188,14 @@ async def list_tools(self): async def call_tool(self, *a, **k): return Resp() - # fastmcp is already imported at module level - if fastmcp is None: - pytest.skip("fastmcp not available") - + # Import fastmcp inside test to avoid module-level import conflicts + # Clear any broken mcp modules from sys.modules first + import sys + mcp_modules = [k for k in sys.modules.keys() if k == 'mcp' or k.startswith('mcp.')] + for mod in mcp_modules: + if mod in sys.modules and not hasattr(sys.modules.get(mod, object()), 'types'): + del sys.modules[mod] + import fastmcp monkeypatch.setattr(fastmcp, "Client", lambda *a, **k: FakeClient()) res = await srv.context_search(