Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import os
import platform
import datetime as dt

if platform.system() == 'Darwin':
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
Expand Down Expand Up @@ -82,6 +83,13 @@ class StatusResponse(BaseModel):
loaded_repositories: List[Dict[str, Any]] = Field(default_factory=list)


class SecurityEventIngestRequest(BaseModel):
"""Compatibility payload for OmniLore security-sentinel ingest calls."""

event: Dict[str, Any] = Field(default_factory=dict)
tenant_context: Optional[Dict[str, Any]] = Field(default_factory=dict)


# Initialize FastAPI app

@asynccontextmanager
Expand Down Expand Up @@ -112,6 +120,11 @@ async def lifespan(app: FastAPI):

# Global FastCode instance
fastcode_instance: Optional[FastCode] = None
security_event_buffer: list[Dict[str, Any]] = []
SECURITY_EVENT_BUFFER_LIMIT = max(
10,
int(os.getenv("FASTCODE_SECURITY_EVENT_BUFFER_LIMIT", "500")),
)

# Setup logging
log_dir = Path("./logs")
Expand Down Expand Up @@ -165,6 +178,53 @@ async def health_check():
"repo_loaded": fastcode_instance.repo_loaded,
"repo_indexed": fastcode_instance.repo_indexed,
"multi_repo_mode": fastcode_instance.multi_repo_mode,
"security_ingest_enabled": True,
"security_event_buffer_size": len(security_event_buffer),
}


@app.post("/ingest")
async def ingest_security_event(request: SecurityEventIngestRequest):
"""
Security Sentinel compatibility endpoint.

OmniLore white-label tooling posts security events here when configured with
OMNILORE_SECURITY_SENTINEL_URL=http://127.0.0.1:8001.
"""
fastcode = _ensure_fastcode_initialized()
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Calling _ensure_fastcode_initialized() here forces lazy initialization of the full FastCode system (which loads models, configuration, etc.) on every security ingest call, even when the ingest endpoint is purely a compatibility shim that buffers events. The fastcode local variable is only used to read repo_loaded and repo_indexed in the response — fields which could simply default to False (or be omitted) when the system hasn't been initialized yet. Consider using fastcode_instance directly (checking for None first) rather than triggering expensive initialization on what is meant to be a lightweight ingest path.

Copilot uses AI. Check for mistakes.

event = request.event or {}
tenant_context = request.tenant_context or {}
record = {
"received_at": dt.datetime.now(dt.timezone.utc).isoformat(),
"event": _safe_jsonable(event),
"tenant_context": _safe_jsonable(tenant_context),
}
security_event_buffer.append(record)
if len(security_event_buffer) > SECURITY_EVENT_BUFFER_LIMIT:
security_event_buffer.pop(0)
Comment on lines +203 to +205
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using a plain list with pop(0) as a bounded buffer is O(n) for every eviction, because shifting all remaining elements. With the default buffer limit of 500 entries this is acceptable, but under a burst of security events (e.g., from a misconfigured sentinel) the per-call cost accumulates. The idiomatic and efficient Python structure for a bounded FIFO is collections.deque(maxlen=SECURITY_EVENT_BUFFER_LIMIT), which makes both append and eviction O(1).

Copilot uses AI. Check for mistakes.

event_type = (
event.get("type")
or event.get("event")
or event.get("name")
or "unknown"
)
tenant_id = tenant_context.get("tenant_id", "unknown")
logger.warning(
"Security ingest accepted (compat): event_type=%s tenant_id=%s",
event_type,
tenant_id,
)
Comment on lines +214 to +218
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Every security ingest is logged at WARNING level. A successful, routine ingest is not a warning condition — WARNING level is typically reserved for unexpected or degraded states. Routine events like this should use logger.info instead, to avoid polluting warning-level monitoring and alerting. The existing pattern throughout the rest of api.py (lines 284, 307, etc.) uses logger.info for successful operations.

Copilot uses AI. Check for mistakes.

return {
"status": "received",
"mode": "fastcode_compat",
"event_type": event_type,
"tenant_id": tenant_id,
"repo_loaded": fastcode.repo_loaded,
"repo_indexed": fastcode.repo_indexed,
"security_event_buffer_size": len(security_event_buffer),
}


Expand Down
56 changes: 46 additions & 10 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,54 @@ repository:
- "__pycache__"
- "node_modules"
- ".git"
- ".git/*"
- "*.min.js"
- "*.bundle.js"
- "dist/*"
- "build/*"
- "*.lock"
# OmniLore workspace size control (avoid indexing generated/runtime trees)
- ".backup-*"
- ".backup-*/*"
- ".omnilore"
- ".omnilore/*"
- ".omnilore_persist"
- ".omnilore_persist/*"
- ".venv"
- ".venv/*"
- ".venv-*"
- ".venv-*/*"
- "venv*"
- "venv*/*"
- "output"
- "output/*"
- "out"
- "out/*"
- "reports"
- "reports/*"
- "docs"
- "docs/*"
- "external"
- "external/*"
- "node_modules/*"
- ".mypy_cache"
- ".mypy_cache/*"
- ".pytest_cache"
- ".pytest_cache/*"
- ".hypothesis"
- ".hypothesis/*"
- "htmlcov"
- "htmlcov/*"
- "logs"
- "logs/*"
- "artifacts"
- "artifacts/*"
- "archives"
- "archives/*"
- "data"
- "data/*"
- "chroma"
- "chroma/*"
supported_extensions:
- .py
- .js
Expand All @@ -38,14 +81,7 @@ repository:
- .kt
- .pyx
- .toml
- .md
- .txt
- .yaml
Comment on lines 83 to 84
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The removal of .md, .txt, .rst, .json, .html, .css, and .xml from supported_extensions is a broad behavioral change that affects all repository indexing, not just OmniLore/Security Sentinel use cases. This means documentation files (READMEs, RST docs), configuration/data files (JSON), and web assets (HTML, CSS, XML) will no longer be indexed for any user of this service. The PR description frames this PR as adding a compatibility endpoint for OmniLore security sentinel calls, but this extension removal is a significant unrelated regression that reduces the system's utility. These changes should either be reverted or explicitly justified and scoped.

Copilot uses AI. Check for mistakes.
- .rst
- .json
- .html
- .css
- .xml

# Parser Settings
parser:
Expand Down Expand Up @@ -116,7 +152,7 @@ retrieval:
max_files_to_search: 15

# Agency mode for accurate and comprehensive retrieval
enable_agency_mode: true # Enable agent-based retrieval
enable_agency_mode: false # Prefer deterministic retrieval for local stability


# Query Processing
Expand All @@ -129,8 +165,8 @@ query:
detect_intent: true # Detect query type (how/what/where/debug/implement)

# LLM-Enhanced Processing
use_llm_enhancement: true # Enable LLM-based query understanding
llm_enhancement_mode: "always" # Options: "adaptive", "always", "off"
use_llm_enhancement: false # Disable LLM rewrite for deterministic local routing
llm_enhancement_mode: "off" # Options: "adaptive", "always", "off"
Comment on lines 155 to +169
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changing enable_agency_mode from true to false and use_llm_enhancement / llm_enhancement_mode to false / "off" are global defaults that affect all users of this service, not just OmniLore configurations. These changes will degrade retrieval quality for all existing deployments that relied on the previous defaults. The PR description mentions "prefer deterministic retrieval for local stability" but this is not mentioned in the stated purpose of the PR (adding a security sentinel ingest compatibility endpoint). These config changes should be clearly justified or made opt-in rather than changed as global defaults.

Copilot uses AI. Check for mistakes.
# - adaptive: Use LLM only for complex/implementation queries (recommended)
# - always: Use LLM for all queries (slower, more accurate)
# - off: Disable LLM enhancement (faster, rule-based only)
Expand Down
41 changes: 31 additions & 10 deletions fastcode/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
from typing import Dict, List, Optional, Any
import logging
from git import Repo, GitCommandError
from pathspec import PathSpec
from pathspec.patterns import GitWildMatchPattern

from .utils import (
is_supported_file,
should_ignore_path,
get_repo_name_from_url,
normalize_path,
ensure_dir,
Expand Down Expand Up @@ -193,19 +194,40 @@ def scan_files(self) -> List[Dict[str, Any]]:
files = []
total_size = 0
max_file_size_bytes = self.max_file_size_mb * 1024 * 1024

ignore_spec = PathSpec.from_lines(GitWildMatchPattern, effective_ignore)
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable effective_ignore used on this line is never defined anywhere in scan_files or the surrounding scope. This will raise a NameError at runtime every time scan_files() is called, completely breaking repository scanning. It should be replaced with self.ignore_patterns, which is the instance attribute populated from config and was used by the previous should_ignore_path calls.

Suggested change
ignore_spec = PathSpec.from_lines(GitWildMatchPattern, effective_ignore)
ignore_spec = PathSpec.from_lines(GitWildMatchPattern, self.ignore_patterns)

Copilot uses AI. Check for mistakes.

def is_ignored_repo_relative(rel_path: str, *, is_dir: bool = False) -> bool:
"""Match ignore patterns against normalized repo-relative paths."""
normalized = normalize_path(rel_path)
if ignore_spec.match_file(normalized):
return True
# Directory-style patterns (e.g. "output/" or ".venv/") are most
# reliable with a trailing slash candidate.
if is_dir and ignore_spec.match_file(f"{normalized}/"):
return True
return False

for root, dirs, filenames in os.walk(self.repo_path):
# Filter out ignored directories
dirs[:] = [d for d in dirs if not should_ignore_path(
os.path.join(root, d), self.ignore_patterns
)]
# Filter ignored directories using paths relative to repo root.
# Matching absolute paths can miss gitwildmatch patterns such as
# "output/" or ".venv/".
filtered_dirs = []
for d in dirs:
abs_dir_path = os.path.join(root, d)
rel_dir_path = normalize_path(
os.path.relpath(abs_dir_path, self.repo_path)
)
if is_ignored_repo_relative(rel_dir_path, is_dir=True):
continue
filtered_dirs.append(d)
dirs[:] = filtered_dirs

for filename in filenames:
file_path = os.path.join(root, filename)
relative_path = os.path.relpath(file_path, self.repo_path)
relative_path = normalize_path(os.path.relpath(file_path, self.repo_path))

# Check if should ignore
if should_ignore_path(relative_path, self.ignore_patterns):
if is_ignored_repo_relative(relative_path):
continue

# Check if supported extension
Expand Down Expand Up @@ -312,4 +334,3 @@ def cleanup(self):
def __del__(self):
"""Cleanup on deletion"""
self.cleanup()