Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 46 additions & 10 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,54 @@ repository:
- "__pycache__"
- "node_modules"
- ".git"
- ".git/*"
- "*.min.js"
- "*.bundle.js"
- "dist/*"
- "build/*"
- "*.lock"
# OmniLore workspace size control (avoid indexing generated/runtime trees)
- ".backup-*"
- ".backup-*/*"
- ".omnilore"
- ".omnilore/*"
- ".omnilore_persist"
- ".omnilore_persist/*"
- ".venv"
- ".venv/*"
- ".venv-*"
- ".venv-*/*"
- "venv*"
- "venv*/*"
- "output"
- "output/*"
- "out"
- "out/*"
- "reports"
- "reports/*"
- "docs"
- "docs/*"
- "external"
- "external/*"
Comment on lines +42 to +45
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ignore pattern "docs" (line 42) will match any directory or file named "docs" anywhere in the repository tree, which may be overly broad. This could unintentionally skip important documentation directories in subdirectories. Consider using "docs/*" or "/docs" (with a leading slash for root-level only) if you only want to ignore a specific docs directory. The same applies to other broad patterns like "data", "logs", "external", etc.

Copilot uses AI. Check for mistakes.
- "node_modules/*"
- ".mypy_cache"
- ".mypy_cache/*"
- ".pytest_cache"
- ".pytest_cache/*"
- ".hypothesis"
- ".hypothesis/*"
- "htmlcov"
- "htmlcov/*"
- "logs"
- "logs/*"
- "artifacts"
- "artifacts/*"
- "archives"
- "archives/*"
- "data"
- "data/*"
- "chroma"
- "chroma/*"
supported_extensions:
- .py
- .js
Expand All @@ -38,14 +81,7 @@ repository:
- .kt
- .pyx
- .toml
- .md
- .txt
- .yaml
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The removal of documentation and markup file extensions (.md, .txt, .rst, .json, .html, .css, .xml) is a potentially breaking change that will prevent these file types from being indexed. This appears intentional based on the PR description's focus on "avoiding indexing generated/runtime directories," but it also excludes important documentation files like README.md and configuration files like package.json. Consider whether this tradeoff aligns with the intended use case, especially if users expect documentation files to be searchable.

Suggested change
- .yaml
- .yaml
- .md
- .txt
- .rst
- .json
- .html
- .css
- .xml

Copilot uses AI. Check for mistakes.
- .rst
- .json
- .html
- .css
- .xml

# Parser Settings
parser:
Expand Down Expand Up @@ -116,7 +152,7 @@ retrieval:
max_files_to_search: 15

# Agency mode for accurate and comprehensive retrieval
enable_agency_mode: true # Enable agent-based retrieval
enable_agency_mode: false # Prefer deterministic retrieval for local stability


# Query Processing
Expand All @@ -129,8 +165,8 @@ query:
detect_intent: true # Detect query type (how/what/where/debug/implement)

# LLM-Enhanced Processing
use_llm_enhancement: true # Enable LLM-based query understanding
llm_enhancement_mode: "always" # Options: "adaptive", "always", "off"
use_llm_enhancement: false # Disable LLM rewrite for deterministic local routing
llm_enhancement_mode: "off" # Options: "adaptive", "always", "off"
# - adaptive: Use LLM only for complex/implementation queries (recommended)
# - always: Use LLM for all queries (slower, more accurate)
# - off: Disable LLM enhancement (faster, rule-based only)
Expand Down
23 changes: 17 additions & 6 deletions fastcode/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,26 @@ def scan_files(self) -> List[Dict[str, Any]]:
max_file_size_bytes = self.max_file_size_mb * 1024 * 1024

for root, dirs, filenames in os.walk(self.repo_path):
# Filter out ignored directories
dirs[:] = [d for d in dirs if not should_ignore_path(
os.path.join(root, d), self.ignore_patterns
)]
# Filter ignored directories using paths relative to repo root.
# Matching absolute paths can miss gitwildmatch patterns such as
# "output/" or ".venv/".
filtered_dirs = []
for d in dirs:
abs_dir_path = os.path.join(root, d)
rel_dir_path = normalize_path(
os.path.relpath(abs_dir_path, self.repo_path)
)
rel_dir_with_trailing = f"{rel_dir_path}/"
if should_ignore_path(rel_dir_path, self.ignore_patterns) or should_ignore_path(
rel_dir_with_trailing, self.ignore_patterns
):
Comment on lines +208 to +210
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The directory filtering now calls should_ignore_path twice per directory. Since should_ignore_path creates a new PathSpec object from ignore_patterns on each call (see utils.py:81-87), this doubles the PathSpec creation overhead. For repositories with many directories and long ignore pattern lists, this could impact performance. Consider refactoring should_ignore_path to accept a pre-compiled PathSpec, or create the PathSpec once at the class level and reuse it throughout the scan.

Suggested change
if should_ignore_path(rel_dir_path, self.ignore_patterns) or should_ignore_path(
rel_dir_with_trailing, self.ignore_patterns
):
# Use the trailing-slash form to correctly match directory patterns
# like "output/" while avoiding redundant should_ignore_path calls.
if should_ignore_path(rel_dir_with_trailing, self.ignore_patterns):

Copilot uses AI. Check for mistakes.
continue
filtered_dirs.append(d)
dirs[:] = filtered_dirs

for filename in filenames:
file_path = os.path.join(root, filename)
relative_path = os.path.relpath(file_path, self.repo_path)
relative_path = normalize_path(os.path.relpath(file_path, self.repo_path))

# Check if should ignore
if should_ignore_path(relative_path, self.ignore_patterns):
Expand Down Expand Up @@ -312,4 +324,3 @@ def cleanup(self):
def __del__(self):
"""Cleanup on deletion"""
self.cleanup()

Loading