diff --git a/api.py b/api.py index 943294c..ea48925 100644 --- a/api.py +++ b/api.py @@ -5,6 +5,7 @@ import os import platform +import datetime as dt if platform.system() == 'Darwin': os.environ['TOKENIZERS_PARALLELISM'] = 'false' @@ -82,6 +83,13 @@ class StatusResponse(BaseModel): loaded_repositories: List[Dict[str, Any]] = Field(default_factory=list) +class SecurityEventIngestRequest(BaseModel): + """Compatibility payload for OmniLore security-sentinel ingest calls.""" + + event: Dict[str, Any] = Field(default_factory=dict) + tenant_context: Optional[Dict[str, Any]] = Field(default_factory=dict) + + # Initialize FastAPI app @asynccontextmanager @@ -112,6 +120,11 @@ async def lifespan(app: FastAPI): # Global FastCode instance fastcode_instance: Optional[FastCode] = None +security_event_buffer: list[Dict[str, Any]] = [] +SECURITY_EVENT_BUFFER_LIMIT = max( + 10, + int(os.getenv("FASTCODE_SECURITY_EVENT_BUFFER_LIMIT", "500")), +) # Setup logging log_dir = Path("./logs") @@ -165,6 +178,53 @@ async def health_check(): "repo_loaded": fastcode_instance.repo_loaded, "repo_indexed": fastcode_instance.repo_indexed, "multi_repo_mode": fastcode_instance.multi_repo_mode, + "security_ingest_enabled": True, + "security_event_buffer_size": len(security_event_buffer), + } + + +@app.post("/ingest") +async def ingest_security_event(request: SecurityEventIngestRequest): + """ + Security Sentinel compatibility endpoint. + + OmniLore white-label tooling posts security events here when configured with + OMNILORE_SECURITY_SENTINEL_URL=http://127.0.0.1:8001. + """ + fastcode = _ensure_fastcode_initialized() + + event = request.event or {} + tenant_context = request.tenant_context or {} + record = { + "received_at": dt.datetime.now(dt.timezone.utc).isoformat(), + "event": _safe_jsonable(event), + "tenant_context": _safe_jsonable(tenant_context), + } + security_event_buffer.append(record) + if len(security_event_buffer) > SECURITY_EVENT_BUFFER_LIMIT: + security_event_buffer.pop(0) + + event_type = ( + event.get("type") + or event.get("event") + or event.get("name") + or "unknown" + ) + tenant_id = tenant_context.get("tenant_id", "unknown") + logger.warning( + "Security ingest accepted (compat): event_type=%s tenant_id=%s", + event_type, + tenant_id, + ) + + return { + "status": "received", + "mode": "fastcode_compat", + "event_type": event_type, + "tenant_id": tenant_id, + "repo_loaded": fastcode.repo_loaded, + "repo_indexed": fastcode.repo_indexed, + "security_event_buffer_size": len(security_event_buffer), } diff --git a/config/config.yaml b/config/config.yaml index a2653af..b7f12ff 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -14,11 +14,54 @@ repository: - "__pycache__" - "node_modules" - ".git" + - ".git/*" - "*.min.js" - "*.bundle.js" - "dist/*" - "build/*" - "*.lock" + # OmniLore workspace size control (avoid indexing generated/runtime trees) + - ".backup-*" + - ".backup-*/*" + - ".omnilore" + - ".omnilore/*" + - ".omnilore_persist" + - ".omnilore_persist/*" + - ".venv" + - ".venv/*" + - ".venv-*" + - ".venv-*/*" + - "venv*" + - "venv*/*" + - "output" + - "output/*" + - "out" + - "out/*" + - "reports" + - "reports/*" + - "docs" + - "docs/*" + - "external" + - "external/*" + - "node_modules/*" + - ".mypy_cache" + - ".mypy_cache/*" + - ".pytest_cache" + - ".pytest_cache/*" + - ".hypothesis" + - ".hypothesis/*" + - "htmlcov" + - "htmlcov/*" + - "logs" + - "logs/*" + - "artifacts" + - "artifacts/*" + - "archives" + - "archives/*" + - "data" + - "data/*" + - "chroma" + - "chroma/*" supported_extensions: - .py - .js @@ -38,14 +81,7 @@ repository: - .kt - .pyx - .toml - - .md - - .txt - .yaml - - .rst - - .json - - .html - - .css - - .xml # Parser Settings parser: @@ -116,7 +152,7 @@ retrieval: max_files_to_search: 15 # Agency mode for accurate and comprehensive retrieval - enable_agency_mode: true # Enable agent-based retrieval + enable_agency_mode: false # Prefer deterministic retrieval for local stability # Query Processing @@ -129,8 +165,8 @@ query: detect_intent: true # Detect query type (how/what/where/debug/implement) # LLM-Enhanced Processing - use_llm_enhancement: true # Enable LLM-based query understanding - llm_enhancement_mode: "always" # Options: "adaptive", "always", "off" + use_llm_enhancement: false # Disable LLM rewrite for deterministic local routing + llm_enhancement_mode: "off" # Options: "adaptive", "always", "off" # - adaptive: Use LLM only for complex/implementation queries (recommended) # - always: Use LLM for all queries (slower, more accurate) # - off: Disable LLM enhancement (faster, rule-based only) diff --git a/fastcode/loader.py b/fastcode/loader.py index a04d328..d184f35 100644 --- a/fastcode/loader.py +++ b/fastcode/loader.py @@ -10,10 +10,11 @@ from typing import Dict, List, Optional, Any import logging from git import Repo, GitCommandError +from pathspec import PathSpec +from pathspec.patterns import GitWildMatchPattern from .utils import ( is_supported_file, - should_ignore_path, get_repo_name_from_url, normalize_path, ensure_dir, @@ -193,19 +194,40 @@ def scan_files(self) -> List[Dict[str, Any]]: files = [] total_size = 0 max_file_size_bytes = self.max_file_size_mb * 1024 * 1024 - + ignore_spec = PathSpec.from_lines(GitWildMatchPattern, effective_ignore) + + def is_ignored_repo_relative(rel_path: str, *, is_dir: bool = False) -> bool: + """Match ignore patterns against normalized repo-relative paths.""" + normalized = normalize_path(rel_path) + if ignore_spec.match_file(normalized): + return True + # Directory-style patterns (e.g. "output/" or ".venv/") are most + # reliable with a trailing slash candidate. + if is_dir and ignore_spec.match_file(f"{normalized}/"): + return True + return False + for root, dirs, filenames in os.walk(self.repo_path): - # Filter out ignored directories - dirs[:] = [d for d in dirs if not should_ignore_path( - os.path.join(root, d), self.ignore_patterns - )] + # Filter ignored directories using paths relative to repo root. + # Matching absolute paths can miss gitwildmatch patterns such as + # "output/" or ".venv/". + filtered_dirs = [] + for d in dirs: + abs_dir_path = os.path.join(root, d) + rel_dir_path = normalize_path( + os.path.relpath(abs_dir_path, self.repo_path) + ) + if is_ignored_repo_relative(rel_dir_path, is_dir=True): + continue + filtered_dirs.append(d) + dirs[:] = filtered_dirs for filename in filenames: file_path = os.path.join(root, filename) - relative_path = os.path.relpath(file_path, self.repo_path) - + relative_path = normalize_path(os.path.relpath(file_path, self.repo_path)) + # Check if should ignore - if should_ignore_path(relative_path, self.ignore_patterns): + if is_ignored_repo_relative(relative_path): continue # Check if supported extension @@ -312,4 +334,3 @@ def cleanup(self): def __del__(self): """Cleanup on deletion""" self.cleanup() -